mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 13:20:14 +01:00
brw/opt: Don't call brw_opt_copy_propagation before brw_lower_load_reg
On a 36c/72t Xeon system, performance of replaying hogwarts_legacy.dx12vk-ultra.foz was improved 1.3% +/- 0.77% (n=10). I picked MTL for the fossil-db results because it was the most negative. shader-db: All Intel platforms had fairly similar results. (Lunar Lake) total instructions in shared programs: 16964217 -> 16964216 (<.01%) instructions in affected programs: 51777 -> 51776 (<.01%) helped: 20 / HURT: 27 total cycles in shared programs: 892934916 -> 893041912 (0.01%) cycles in affected programs: 51245298 -> 51352294 (0.21%) helped: 96 /HURT: 78 fossil-db: All Intel platforms had similar results. (Meteor Lake shown) Totals: Instrs: 233678547 -> 233678944 (+0.00%); split: -0.00%, +0.00% Cycle count: 24398049850 -> 24400490877 (+0.01%); split: -0.01%, +0.02% Max live registers: 42145052 -> 42145038 (-0.00%); split: -0.00%, +0.00% Totals from 1141 (0.14% of 805934) affected shaders: Instrs: 1546001 -> 1546398 (+0.03%); split: -0.01%, +0.03% Cycle count: 1201746062 -> 1204187089 (+0.20%); split: -0.14%, +0.34% Max live registers: 84247 -> 84233 (-0.02%); split: -0.03%, +0.01% Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31497>
This commit is contained in:
parent
991a2f510b
commit
20cce95ce5
2 changed files with 8 additions and 27 deletions
|
|
@ -75,8 +75,7 @@ brw_optimize(brw_shader &s)
|
|||
|
||||
OPT(brw_opt_algebraic);
|
||||
OPT(brw_opt_cse_defs);
|
||||
if (!OPT(brw_opt_copy_propagation_defs))
|
||||
OPT(brw_opt_copy_propagation);
|
||||
OPT(brw_opt_copy_propagation_defs);
|
||||
OPT(brw_opt_cmod_propagation);
|
||||
OPT(brw_opt_dead_code_eliminate);
|
||||
OPT(brw_opt_saturate_propagation);
|
||||
|
|
|
|||
|
|
@ -660,25 +660,13 @@ instruction_requires_packed_data(brw_inst *inst)
|
|||
}
|
||||
|
||||
static bool
|
||||
try_copy_propagate(brw_shader &s, const brw_def_analysis &defs, brw_inst *inst,
|
||||
try_copy_propagate(brw_shader &s, brw_inst *inst,
|
||||
acp_entry *entry, int arg,
|
||||
uint8_t max_polygons)
|
||||
{
|
||||
if (inst->src[arg].file != VGRF)
|
||||
return false;
|
||||
|
||||
/* Do not copy propage a load_reg value to a different block through a
|
||||
* non-def. This can occur when `entry` is the loop counter, and `inst` is
|
||||
* a use of the loop counter outside the loop. If the use outside the loop
|
||||
* is replaced with the def from the load_reg, def analysis will later
|
||||
* determine that the load_reg does not produce a def.
|
||||
*/
|
||||
const brw_inst *const def = defs.get(entry->src);
|
||||
if (def != NULL && def->opcode == SHADER_OPCODE_LOAD_REG &&
|
||||
def->block != inst->block) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const struct intel_device_info *devinfo = s.devinfo;
|
||||
|
||||
assert(entry->src.file == VGRF || entry->src.file == UNIFORM ||
|
||||
|
|
@ -770,16 +758,6 @@ try_copy_propagate(brw_shader &s, const brw_def_analysis &defs, brw_inst *inst,
|
|||
if (instruction_requires_packed_data(inst) && entry_stride != 1)
|
||||
return false;
|
||||
|
||||
/* load_reg loads a whole VGRF into a def. It is not allowed for the source
|
||||
* to have a stride or a non-zero offset (unless stride == 0). It is
|
||||
* allowed for the source to to be uniform.
|
||||
*/
|
||||
if (inst->opcode == SHADER_OPCODE_LOAD_REG &&
|
||||
!is_uniform(entry->src) &&
|
||||
(entry->src.offset != 0 || entry_stride > 1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const brw_reg_type dst_type = (has_source_modifiers &&
|
||||
entry->dst.type != inst->src[arg].type) ?
|
||||
entry->dst.type : inst->dst.type;
|
||||
|
|
@ -1401,10 +1379,14 @@ opt_copy_propagation_local(brw_shader &s, linear_ctx *lin_ctx,
|
|||
uint8_t max_polygons)
|
||||
{
|
||||
const struct intel_device_info *devinfo = s.devinfo;
|
||||
const brw_def_analysis &defs = s.def_analysis.require();
|
||||
bool progress = false;
|
||||
|
||||
foreach_inst_in_block(brw_inst, inst, block) {
|
||||
/* The non-defs copy propagation passes should not be called while
|
||||
* LOAD_REG instructions still exist.
|
||||
*/
|
||||
assert(inst->opcode != SHADER_OPCODE_LOAD_REG);
|
||||
|
||||
/* Try propagating into this instruction. */
|
||||
bool constant_progress = false;
|
||||
for (int i = inst->sources - 1; i >= 0; i--) {
|
||||
|
|
@ -1420,7 +1402,7 @@ opt_copy_propagation_local(brw_shader &s, linear_ctx *lin_ctx,
|
|||
break;
|
||||
}
|
||||
} else {
|
||||
if (try_copy_propagate(s, defs, inst, *iter, i, max_polygons)) {
|
||||
if (try_copy_propagate(s, inst, *iter, i, max_polygons)) {
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue