gallium/u_threaded: sort cases in batch_execute by their occurrence

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33087>
This commit is contained in:
Marek Olšák 2025-01-17 19:47:08 -05:00
parent f8c730158b
commit 09aa19fb64
2 changed files with 56 additions and 44 deletions

View file

@ -5072,8 +5072,10 @@ batch_execute(struct tc_batch *batch, struct pipe_context *pipe, bool parsing)
if (!first)
batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
first = false;
} else if (call->call_id >= TC_CALL_draw_single &&
call->call_id <= TC_CALL_draw_vstate_multi) {
} else if (call->call_id == TC_CALL_draw_single ||
call->call_id == TC_CALL_draw_multi ||
(call->call_id >= TC_CALL_draw_single_drawid &&
call->call_id <= TC_CALL_draw_vstate_multi)) {
/* if a draw happens before a set_framebuffer_state on this batch,
* begin incrementing renderpass data
*/

View file

@ -1,4 +1,56 @@
/* These are sorted by the frequency of their occurrence for instruction cache
* hits between case statements in the batch_execute switch. The benchmark is
* VP2020/Catia1.
*
* This proves that such an optimization is potentially beneficial, though it's
* very close to being noise. The CPU time spent in tc_batch_execute is reduced
* by 0.8% in sysprof (not 0.8% from the original measured number, but 0.8%
* from the original measured number scaled to 100%). 99% of TC calls
* in the benchmark are just alternating between the first two.
*/
CALL(draw_single)
CALL(set_vertex_buffers)
CALL(set_constant_buffer)
CALL(bind_rasterizer_state)
CALL(buffer_unmap)
CALL(bind_vertex_elements_state)
CALL(bind_vs_state)
CALL(bind_fs_state)
CALL(bind_depth_stencil_alpha_state)
CALL(set_sampler_views)
CALL(set_viewport_states)
CALL(flush)
CALL(flush_resource)
CALL(resource_copy_region)
CALL(bind_sampler_states)
CALL(texture_unmap)
CALL(bind_blend_state)
CALL(draw_multi)
CALL(set_framebuffer_state)
CALL(clear)
CALL(set_shader_images)
CALL(set_shader_buffers)
CALL(delete_rasterizer_state)
CALL(delete_vs_state)
CALL(delete_vertex_elements_state)
CALL(delete_fs_state)
CALL(delete_depth_stencil_alpha_state)
CALL(set_min_samples)
CALL(delete_blend_state)
CALL(set_stream_output_targets)
CALL(set_stencil_ref)
CALL(set_sample_mask)
CALL(bind_tes_state)
CALL(bind_tcs_state)
CALL(bind_gs_state)
CALL(bind_compute_state)
CALL(set_tess_state)
CALL(set_polygon_stipple)
CALL(set_patch_vertices)
CALL(set_blend_color)
CALL(replace_buffer_storage)
CALL(delete_sampler_state)
CALL(flush_deferred)
CALL(callback)
CALL(fence_server_sync)
@ -7,53 +59,29 @@ CALL(begin_query)
CALL(end_query)
CALL(get_query_result_resource)
CALL(render_condition)
CALL(bind_sampler_states)
CALL(set_framebuffer_state)
CALL(set_tess_state)
CALL(set_patch_vertices)
CALL(set_constant_buffer)
CALL(set_inlinable_constants)
CALL(set_sample_locations)
CALL(set_scissor_states)
CALL(set_viewport_states)
CALL(set_window_rectangles)
CALL(set_sampler_views)
CALL(set_shader_images)
CALL(set_shader_buffers)
CALL(set_vertex_buffers)
CALL(set_stream_output_targets)
CALL(replace_buffer_storage)
CALL(transfer_flush_region)
CALL(buffer_unmap)
CALL(texture_unmap)
CALL(buffer_subdata)
CALL(texture_subdata)
CALL(emit_string_marker)
CALL(draw_single)
CALL(draw_single_drawid)
CALL(draw_multi)
CALL(draw_indirect)
CALL(draw_vstate_single)
CALL(draw_vstate_multi)
CALL(launch_grid)
CALL(resource_copy_region)
CALL(blit)
CALL(generate_mipmap)
CALL(flush_resource)
CALL(invalidate_resource)
CALL(clear)
CALL(clear_render_target)
CALL(clear_depth_stencil)
CALL(clear_buffer)
CALL(clear_texture)
CALL(resource_commit)
CALL(set_active_query_state)
CALL(set_blend_color)
CALL(set_stencil_ref)
CALL(set_clip_state)
CALL(set_sample_mask)
CALL(set_min_samples)
CALL(set_polygon_stipple)
CALL(texture_barrier)
CALL(memory_barrier)
CALL(delete_texture_handle)
@ -63,28 +91,10 @@ CALL(make_image_handle_resident)
CALL(set_context_param)
CALL(set_frontend_noop)
CALL(bind_blend_state)
CALL(bind_rasterizer_state)
CALL(bind_depth_stencil_alpha_state)
CALL(bind_compute_state)
CALL(bind_fs_state)
CALL(bind_vs_state)
CALL(bind_gs_state)
CALL(bind_tcs_state)
CALL(bind_tes_state)
CALL(bind_vertex_elements_state)
CALL(delete_blend_state)
CALL(delete_rasterizer_state)
CALL(delete_depth_stencil_alpha_state)
CALL(delete_compute_state)
CALL(delete_fs_state)
CALL(delete_vs_state)
CALL(delete_gs_state)
CALL(delete_tcs_state)
CALL(delete_tes_state)
CALL(delete_vertex_elements_state)
CALL(delete_sampler_state)
CALL(begin_intel_perf_query)
CALL(end_intel_perf_query)