mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 09:20:12 +01:00
The principle is the same as the load_ssbo_uniform_block_intel. Whenever we see a uniform offset, load the data only once in GRFs to reduce register pressure. Iris shader-db run on DG2 : total instructions in shared programs: 23001325 -> 23094969 (0.41%) instructions in affected programs: 1775989 -> 1869633 (5.27%) helped: 764 HURT: 2097 helped stats (abs) min: 1 max: 102 x̄: 6.96 x̃: 2 helped stats (rel) min: 0.03% max: 16.91% x̄: 1.36% x̃: 0.63% HURT stats (abs) min: 1 max: 2461 x̄: 47.19 x̃: 7 HURT stats (rel) min: <.01% max: 199.34% x̄: 5.91% x̃: 2.60% 95% mean confidence interval for instructions value: 25.43 40.03 95% mean confidence interval for instructions %-change: 3.60% 4.33% Instructions are HURT. total loops in shared programs: 5847 -> 5847 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total cycles in shared programs: 839329852 -> 845491482 (0.73%) cycles in affected programs: 130229434 -> 136391064 (4.73%) helped: 1098 HURT: 2228 helped stats (abs) min: 1 max: 130102 x̄: 1340.64 x̃: 22 helped stats (rel) min: <.01% max: 64.25% x̄: 4.03% x̃: 0.71% HURT stats (abs) min: 1 max: 185309 x̄: 3426.24 x̃: 87 HURT stats (rel) min: <.01% max: 92.85% x̄: 8.12% x̃: 3.82% 95% mean confidence interval for cycles value: 1342.16 2362.97 95% mean confidence interval for cycles %-change: 3.70% 4.52% Cycles are HURT. total spills in shared programs: 10768 -> 11856 (10.10%) spills in affected programs: 9717 -> 10805 (11.20%) helped: 25 HURT: 28 total fills in shared programs: 13720 -> 16258 (18.50%) fills in affected programs: 12016 -> 14554 (21.12%) helped: 25 HURT: 28 total sends in shared programs: 1034790 -> 1031266 (-0.34%) sends in affected programs: 33416 -> 29892 (-10.55%) helped: 1005 HURT: 0 helped stats (abs) min: 1 max: 22 x̄: 3.51 x̃: 3 helped stats (rel) min: 1.69% max: 60.00% x̄: 15.20% x̃: 14.08% 95% mean confidence interval for sends value: -3.72 -3.29 95% mean confidence interval for sends %-change: -15.82% -14.57% Sends are helped. LOST: 26 GAINED: 183 shader-db on a number of VK/DX titles on DG2 : PERCENTAGE DELTAS Shaders Instrs Cycles age_of_wonders_III 1928 +0.02% -0.19% PERCENTAGE DELTAS Shaders Instrs Cycles Subgroup size Send messages Spill count Fill count Max live registers Max dispatch width assassins_creed_odyssey 2119 +1.12% -0.42% -0.03% -0.29% -9.10% -4.26% -0.64% +0.65% PERCENTAGE DELTAS Shaders Instrs Cycles Spill count Fill count Max live registers aztec_ruins_high 269 -0.05% -0.45% -0.29% -7.27% -0.33% PERCENTAGE DELTAS Shaders Instrs Cycles Max live registers Max dispatch width dark_souls_3_dxvk_g2 1420 +0.09% +0.24% +0.21% +0.12% (stats look bad, but it's just one shader affected) PERCENTAGE DELTAS Shaders Instrs Cycles Spill count Fill count Scratch Memory Size Max live registers fallout_4_dxvk_g2 1638 +0.67% +8.32% +16.02% +7.17% +100.00% +0.48% PERCENTAGE DELTAS Shaders Instrs Cycles Send messages Spill count Fill count Max live registers Max dispatch width red_dead_redemption2 5969 +0.16% -0.04% -0.04% +0.01% +0.05% -0.20% +0.04% PERCENTAGE DELTAS Shaders Instrs Cycles Send messages Max live registers Max dispatch width rise_of_the_tomb_raider_g2 12129 +2.19% +1.36% -1.23% -0.36% +2.04% PERCENTAGE DELTAS Shaders Instrs Cycles Send messages Max live registers shooter-game 693 +0.07% -0.89% -0.09% -0.09% PERCENTAGE DELTAS Shaders Instrs Cycles Send messages Max live registers Max dispatch width talos_g2 1140 +0.37% +3.80% -0.86% -0.67% +0.19% PERCENTAGE DELTAS Shaders Instrs Cycles Max live registers Max dispatch width total_war_warhammer2 477 +0.25% +0.66% -0.17% +0.10% PERCENTAGE DELTAS Shaders Instrs Cycles Send messages Max live registers Max dispatch width witcher_3_dxvk_g2 1074 +0.75% -10.45% -0.15% -0.16% -0.16% PERCENTAGE DELTAS Shaders Instrs Cycles Send messages Max live registers wolfenstein_youngblood 1111 +0.52% +0.66% -0.59% -0.03% Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23477> |
||
|---|---|---|
| .. | ||
| brw_cfg.cpp | ||
| brw_cfg.h | ||
| brw_clip.h | ||
| brw_clip_line.c | ||
| brw_clip_point.c | ||
| brw_clip_tri.c | ||
| brw_clip_unfilled.c | ||
| brw_clip_util.c | ||
| brw_compile_clip.c | ||
| brw_compile_ff_gs.c | ||
| brw_compile_sf.c | ||
| brw_compiler.c | ||
| brw_compiler.h | ||
| brw_dead_control_flow.cpp | ||
| brw_dead_control_flow.h | ||
| brw_debug_recompile.c | ||
| brw_disasm.c | ||
| brw_disasm_info.c | ||
| brw_disasm_info.h | ||
| brw_eu.c | ||
| brw_eu.h | ||
| brw_eu_compact.c | ||
| brw_eu_defines.h | ||
| brw_eu_emit.c | ||
| brw_eu_util.c | ||
| brw_eu_validate.c | ||
| brw_fs.cpp | ||
| brw_fs.h | ||
| brw_fs_bank_conflicts.cpp | ||
| brw_fs_builder.h | ||
| brw_fs_cmod_propagation.cpp | ||
| brw_fs_combine_constants.cpp | ||
| brw_fs_copy_propagation.cpp | ||
| brw_fs_cse.cpp | ||
| brw_fs_dead_code_eliminate.cpp | ||
| brw_fs_generator.cpp | ||
| brw_fs_live_variables.cpp | ||
| brw_fs_live_variables.h | ||
| brw_fs_lower_pack.cpp | ||
| brw_fs_lower_regioning.cpp | ||
| brw_fs_nir.cpp | ||
| brw_fs_reg_allocate.cpp | ||
| brw_fs_register_coalesce.cpp | ||
| brw_fs_saturate_propagation.cpp | ||
| brw_fs_scoreboard.cpp | ||
| brw_fs_sel_peephole.cpp | ||
| brw_fs_thread_payload.cpp | ||
| brw_fs_validate.cpp | ||
| brw_fs_visitor.cpp | ||
| brw_gfx_ver_enum.h | ||
| brw_inst.h | ||
| brw_interpolation_map.c | ||
| brw_ir.h | ||
| brw_ir_allocator.h | ||
| brw_ir_analysis.h | ||
| brw_ir_fs.h | ||
| brw_ir_performance.cpp | ||
| brw_ir_performance.h | ||
| brw_ir_vec4.h | ||
| brw_isa_info.h | ||
| brw_kernel.c | ||
| brw_kernel.h | ||
| brw_lower_logical_sends.cpp | ||
| brw_mesh.cpp | ||
| brw_nir.c | ||
| brw_nir.h | ||
| brw_nir_analyze_boolean_resolves.c | ||
| brw_nir_analyze_ubo_ranges.c | ||
| brw_nir_attribute_workarounds.c | ||
| brw_nir_blockify_uniform_loads.c | ||
| brw_nir_clamp_image_1d_2d_array_sizes.c | ||
| brw_nir_clamp_per_vertex_loads.c | ||
| brw_nir_lower_alpha_to_coverage.c | ||
| brw_nir_lower_conversions.c | ||
| brw_nir_lower_cs_intrinsics.c | ||
| brw_nir_lower_intersection_shader.c | ||
| brw_nir_lower_non_uniform_resource_intel.c | ||
| brw_nir_lower_ray_queries.c | ||
| brw_nir_lower_rt_intrinsics.c | ||
| brw_nir_lower_shader_calls.c | ||
| brw_nir_lower_shading_rate_output.c | ||
| brw_nir_lower_storage_image.c | ||
| brw_nir_opt_peephole_ffma.c | ||
| brw_nir_opt_peephole_imul32x16.c | ||
| brw_nir_rt.c | ||
| brw_nir_rt.h | ||
| brw_nir_rt_builder.h | ||
| brw_nir_tcs_workarounds.c | ||
| brw_nir_trig_workarounds.py | ||
| brw_packed_float.c | ||
| brw_predicated_break.cpp | ||
| brw_prim.h | ||
| brw_private.h | ||
| brw_reg.h | ||
| brw_reg_type.c | ||
| brw_reg_type.h | ||
| brw_rt.h | ||
| brw_schedule_instructions.cpp | ||
| brw_shader.cpp | ||
| brw_shader.h | ||
| brw_simd_selection.cpp | ||
| brw_vec4.cpp | ||
| brw_vec4.h | ||
| brw_vec4_builder.h | ||
| brw_vec4_cmod_propagation.cpp | ||
| brw_vec4_copy_propagation.cpp | ||
| brw_vec4_cse.cpp | ||
| brw_vec4_dead_code_eliminate.cpp | ||
| brw_vec4_generator.cpp | ||
| brw_vec4_gs_nir.cpp | ||
| brw_vec4_gs_visitor.cpp | ||
| brw_vec4_gs_visitor.h | ||
| brw_vec4_live_variables.cpp | ||
| brw_vec4_live_variables.h | ||
| brw_vec4_nir.cpp | ||
| brw_vec4_reg_allocate.cpp | ||
| brw_vec4_surface_builder.cpp | ||
| brw_vec4_surface_builder.h | ||
| brw_vec4_tcs.cpp | ||
| brw_vec4_tcs.h | ||
| brw_vec4_tes.cpp | ||
| brw_vec4_tes.h | ||
| brw_vec4_visitor.cpp | ||
| brw_vec4_vs.h | ||
| brw_vec4_vs_visitor.cpp | ||
| brw_vue_map.c | ||
| gfx6_gs_visitor.cpp | ||
| gfx6_gs_visitor.h | ||
| intel_clc.c | ||
| meson.build | ||
| test_eu_compact.cpp | ||
| test_eu_validate.cpp | ||
| test_fs_cmod_propagation.cpp | ||
| test_fs_copy_propagation.cpp | ||
| test_fs_saturate_propagation.cpp | ||
| test_fs_scoreboard.cpp | ||
| test_simd_selection.cpp | ||
| test_vec4_cmod_propagation.cpp | ||
| test_vec4_copy_propagation.cpp | ||
| test_vec4_dead_code_eliminate.cpp | ||
| test_vec4_register_coalesce.cpp | ||
| test_vf_float_conversions.cpp | ||