brw/opt: Don't call brw_opt_copy_propagation before brw_lower_load_reg
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

On a 36c/72t Xeon system, performance of replaying
hogwarts_legacy.dx12vk-ultra.foz was improved 1.3% +/- 0.77% (n=10).

I picked MTL for the fossil-db results because it was the most negative.

shader-db:

All Intel platforms had fairly similar results. (Lunar Lake)
total instructions in shared programs: 16964217 -> 16964216 (<.01%)
instructions in affected programs: 51777 -> 51776 (<.01%)
helped: 20 / HURT: 27

total cycles in shared programs: 892934916 -> 893041912 (0.01%)
cycles in affected programs: 51245298 -> 51352294 (0.21%)
helped: 96 /HURT: 78

fossil-db:

All Intel platforms had similar results. (Meteor Lake shown)
Totals:
Instrs: 233678547 -> 233678944 (+0.00%); split: -0.00%, +0.00%
Cycle count: 24398049850 -> 24400490877 (+0.01%); split: -0.01%, +0.02%
Max live registers: 42145052 -> 42145038 (-0.00%); split: -0.00%, +0.00%

Totals from 1141 (0.14% of 805934) affected shaders:
Instrs: 1546001 -> 1546398 (+0.03%); split: -0.01%, +0.03%
Cycle count: 1201746062 -> 1204187089 (+0.20%); split: -0.14%, +0.34%
Max live registers: 84247 -> 84233 (-0.02%); split: -0.03%, +0.01%

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31497>
This commit is contained in:
Ian Romanick 2025-02-11 13:32:15 -08:00 committed by Marge Bot
parent 991a2f510b
commit 20cce95ce5
2 changed files with 8 additions and 27 deletions

View file

@ -75,8 +75,7 @@ brw_optimize(brw_shader &s)
OPT(brw_opt_algebraic); OPT(brw_opt_algebraic);
OPT(brw_opt_cse_defs); OPT(brw_opt_cse_defs);
if (!OPT(brw_opt_copy_propagation_defs)) OPT(brw_opt_copy_propagation_defs);
OPT(brw_opt_copy_propagation);
OPT(brw_opt_cmod_propagation); OPT(brw_opt_cmod_propagation);
OPT(brw_opt_dead_code_eliminate); OPT(brw_opt_dead_code_eliminate);
OPT(brw_opt_saturate_propagation); OPT(brw_opt_saturate_propagation);

View file

@ -660,25 +660,13 @@ instruction_requires_packed_data(brw_inst *inst)
} }
static bool static bool
try_copy_propagate(brw_shader &s, const brw_def_analysis &defs, brw_inst *inst, try_copy_propagate(brw_shader &s, brw_inst *inst,
acp_entry *entry, int arg, acp_entry *entry, int arg,
uint8_t max_polygons) uint8_t max_polygons)
{ {
if (inst->src[arg].file != VGRF) if (inst->src[arg].file != VGRF)
return false; return false;
/* Do not copy propage a load_reg value to a different block through a
* non-def. This can occur when `entry` is the loop counter, and `inst` is
* a use of the loop counter outside the loop. If the use outside the loop
* is replaced with the def from the load_reg, def analysis will later
* determine that the load_reg does not produce a def.
*/
const brw_inst *const def = defs.get(entry->src);
if (def != NULL && def->opcode == SHADER_OPCODE_LOAD_REG &&
def->block != inst->block) {
return false;
}
const struct intel_device_info *devinfo = s.devinfo; const struct intel_device_info *devinfo = s.devinfo;
assert(entry->src.file == VGRF || entry->src.file == UNIFORM || assert(entry->src.file == VGRF || entry->src.file == UNIFORM ||
@ -770,16 +758,6 @@ try_copy_propagate(brw_shader &s, const brw_def_analysis &defs, brw_inst *inst,
if (instruction_requires_packed_data(inst) && entry_stride != 1) if (instruction_requires_packed_data(inst) && entry_stride != 1)
return false; return false;
/* load_reg loads a whole VGRF into a def. It is not allowed for the source
* to have a stride or a non-zero offset (unless stride == 0). It is
* allowed for the source to to be uniform.
*/
if (inst->opcode == SHADER_OPCODE_LOAD_REG &&
!is_uniform(entry->src) &&
(entry->src.offset != 0 || entry_stride > 1)) {
return false;
}
const brw_reg_type dst_type = (has_source_modifiers && const brw_reg_type dst_type = (has_source_modifiers &&
entry->dst.type != inst->src[arg].type) ? entry->dst.type != inst->src[arg].type) ?
entry->dst.type : inst->dst.type; entry->dst.type : inst->dst.type;
@ -1401,10 +1379,14 @@ opt_copy_propagation_local(brw_shader &s, linear_ctx *lin_ctx,
uint8_t max_polygons) uint8_t max_polygons)
{ {
const struct intel_device_info *devinfo = s.devinfo; const struct intel_device_info *devinfo = s.devinfo;
const brw_def_analysis &defs = s.def_analysis.require();
bool progress = false; bool progress = false;
foreach_inst_in_block(brw_inst, inst, block) { foreach_inst_in_block(brw_inst, inst, block) {
/* The non-defs copy propagation passes should not be called while
* LOAD_REG instructions still exist.
*/
assert(inst->opcode != SHADER_OPCODE_LOAD_REG);
/* Try propagating into this instruction. */ /* Try propagating into this instruction. */
bool constant_progress = false; bool constant_progress = false;
for (int i = inst->sources - 1; i >= 0; i--) { for (int i = inst->sources - 1; i >= 0; i--) {
@ -1420,7 +1402,7 @@ opt_copy_propagation_local(brw_shader &s, linear_ctx *lin_ctx,
break; break;
} }
} else { } else {
if (try_copy_propagate(s, defs, inst, *iter, i, max_polygons)) { if (try_copy_propagate(s, inst, *iter, i, max_polygons)) {
progress = true; progress = true;
break; break;
} }