From 20cce95ce5839c4d711a8de529888975687825c9 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 11 Feb 2025 13:32:15 -0800 Subject: [PATCH] brw/opt: Don't call brw_opt_copy_propagation before brw_lower_load_reg On a 36c/72t Xeon system, performance of replaying hogwarts_legacy.dx12vk-ultra.foz was improved 1.3% +/- 0.77% (n=10). I picked MTL for the fossil-db results because it was the most negative. shader-db: All Intel platforms had fairly similar results. (Lunar Lake) total instructions in shared programs: 16964217 -> 16964216 (<.01%) instructions in affected programs: 51777 -> 51776 (<.01%) helped: 20 / HURT: 27 total cycles in shared programs: 892934916 -> 893041912 (0.01%) cycles in affected programs: 51245298 -> 51352294 (0.21%) helped: 96 /HURT: 78 fossil-db: All Intel platforms had similar results. (Meteor Lake shown) Totals: Instrs: 233678547 -> 233678944 (+0.00%); split: -0.00%, +0.00% Cycle count: 24398049850 -> 24400490877 (+0.01%); split: -0.01%, +0.02% Max live registers: 42145052 -> 42145038 (-0.00%); split: -0.00%, +0.00% Totals from 1141 (0.14% of 805934) affected shaders: Instrs: 1546001 -> 1546398 (+0.03%); split: -0.01%, +0.03% Cycle count: 1201746062 -> 1204187089 (+0.20%); split: -0.14%, +0.34% Max live registers: 84247 -> 84233 (-0.02%); split: -0.03%, +0.01% Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw_opt.cpp | 3 +- .../compiler/brw_opt_copy_propagation.cpp | 32 ++++--------------- 2 files changed, 8 insertions(+), 27 deletions(-) diff --git a/src/intel/compiler/brw_opt.cpp b/src/intel/compiler/brw_opt.cpp index ad35c4f7637..90d125116b5 100644 --- a/src/intel/compiler/brw_opt.cpp +++ b/src/intel/compiler/brw_opt.cpp @@ -75,8 +75,7 @@ brw_optimize(brw_shader &s) OPT(brw_opt_algebraic); OPT(brw_opt_cse_defs); - if (!OPT(brw_opt_copy_propagation_defs)) - OPT(brw_opt_copy_propagation); + OPT(brw_opt_copy_propagation_defs); OPT(brw_opt_cmod_propagation); OPT(brw_opt_dead_code_eliminate); OPT(brw_opt_saturate_propagation); diff --git a/src/intel/compiler/brw_opt_copy_propagation.cpp b/src/intel/compiler/brw_opt_copy_propagation.cpp index dd811e881c7..f0165e6fc12 100644 --- a/src/intel/compiler/brw_opt_copy_propagation.cpp +++ b/src/intel/compiler/brw_opt_copy_propagation.cpp @@ -660,25 +660,13 @@ instruction_requires_packed_data(brw_inst *inst) } static bool -try_copy_propagate(brw_shader &s, const brw_def_analysis &defs, brw_inst *inst, +try_copy_propagate(brw_shader &s, brw_inst *inst, acp_entry *entry, int arg, uint8_t max_polygons) { if (inst->src[arg].file != VGRF) return false; - /* Do not copy propage a load_reg value to a different block through a - * non-def. This can occur when `entry` is the loop counter, and `inst` is - * a use of the loop counter outside the loop. If the use outside the loop - * is replaced with the def from the load_reg, def analysis will later - * determine that the load_reg does not produce a def. - */ - const brw_inst *const def = defs.get(entry->src); - if (def != NULL && def->opcode == SHADER_OPCODE_LOAD_REG && - def->block != inst->block) { - return false; - } - const struct intel_device_info *devinfo = s.devinfo; assert(entry->src.file == VGRF || entry->src.file == UNIFORM || @@ -770,16 +758,6 @@ try_copy_propagate(brw_shader &s, const brw_def_analysis &defs, brw_inst *inst, if (instruction_requires_packed_data(inst) && entry_stride != 1) return false; - /* load_reg loads a whole VGRF into a def. It is not allowed for the source - * to have a stride or a non-zero offset (unless stride == 0). It is - * allowed for the source to to be uniform. - */ - if (inst->opcode == SHADER_OPCODE_LOAD_REG && - !is_uniform(entry->src) && - (entry->src.offset != 0 || entry_stride > 1)) { - return false; - } - const brw_reg_type dst_type = (has_source_modifiers && entry->dst.type != inst->src[arg].type) ? entry->dst.type : inst->dst.type; @@ -1401,10 +1379,14 @@ opt_copy_propagation_local(brw_shader &s, linear_ctx *lin_ctx, uint8_t max_polygons) { const struct intel_device_info *devinfo = s.devinfo; - const brw_def_analysis &defs = s.def_analysis.require(); bool progress = false; foreach_inst_in_block(brw_inst, inst, block) { + /* The non-defs copy propagation passes should not be called while + * LOAD_REG instructions still exist. + */ + assert(inst->opcode != SHADER_OPCODE_LOAD_REG); + /* Try propagating into this instruction. */ bool constant_progress = false; for (int i = inst->sources - 1; i >= 0; i--) { @@ -1420,7 +1402,7 @@ opt_copy_propagation_local(brw_shader &s, linear_ctx *lin_ctx, break; } } else { - if (try_copy_propagate(s, defs, inst, *iter, i, max_polygons)) { + if (try_copy_propagate(s, inst, *iter, i, max_polygons)) { progress = true; break; }