mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 16:00:08 +01:00
nir/copy_prop_vars: Mask out no-op writes to variables.
The pass previously supported removing complete no-op writes, but we can do better by noticing if any channel being written is the current channel's value and masking off those writes. I noticed this happening in Stray's fragment shader, where it looked like some translation layer had turned var[x].zw = vec2(a, b) into var[x] = vec4(var[x].x, var[x].y, a, b). This in turn lets nir_shrink_vec_array_vars be more effective. Totals: MaxWaves: 22158876 -> 22156696 (-0.01%); split: +0.00%, -0.01% Instrs: 401167243 -> 401007996 (-0.04%); split: -0.04%, +0.00% CodeSize: 1004397302 -> 1004133728 (-0.03%); split: -0.03%, +0.00% STPs: 369810 -> 234618 (-36.56%) LDPs: 209430 -> 172011 (-17.87%) Totals from 1884 (0.12% of 1560230) affected shaders: MaxWaves: 12686 -> 10506 (-17.18%); split: +6.97%, -24.15% Instrs: 2099486 -> 1940239 (-7.59%); split: -7.64%, +0.06% CodeSize: 4570472 -> 4306898 (-5.77%); split: -5.81%, +0.05% NOPs: 334399 -> 270881 (-18.99%); split: -20.58%, +1.58% MOVs: 131003 -> 148034 (+13.00%); split: -11.59%, +24.59% COVs: 14512 -> 16921 (+16.60%); split: -0.23%, +16.83% Full: 58120 -> 72399 (+24.57%); split: -6.75%, +31.31% (ss): 79215 -> 45331 (-42.77%); split: -48.46%, +5.68% (sy): 33081 -> 11119 (-66.39%); split: -66.56%, +0.18% (ss)-stall: 302152 -> 115528 (-61.76%); split: -64.34%, +2.57% (sy)-stall: 2706110 -> 498998 (-81.56%); split: -81.68%, +0.12% STPs: 212045 -> 76853 (-63.76%) LDPs: 47337 -> 9918 (-79.05%) Preamble Instrs: 413954 -> 413630 (-0.08%); split: -0.21%, +0.13% Cat0: 370362 -> 306844 (-17.15%); split: -18.58%, +1.43% Cat1: 145629 -> 165003 (+13.30%); split: -10.51%, +23.81% Cat2: 687947 -> 683992 (-0.57%); split: -0.61%, +0.04% Cat3: 362919 -> 360690 (-0.61%); split: -0.72%, +0.11% Cat6: 461411 -> 352375 (-23.63%) Cat7: 16857 -> 16974 (+0.69%); split: -0.35%, +1.04% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37313>
This commit is contained in:
parent
537cc4e0ff
commit
0781edc30f
1 changed files with 42 additions and 7 deletions
|
|
@ -123,20 +123,22 @@ struct copy_prop_var_state {
|
|||
bool progress;
|
||||
};
|
||||
|
||||
static bool
|
||||
static nir_component_mask_t
|
||||
value_equals_store_src(struct value *value, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
assert(intrin->intrinsic == nir_intrinsic_store_deref);
|
||||
nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
|
||||
nir_component_mask_t equals_mask = 0;
|
||||
|
||||
for (unsigned i = 0; i < intrin->num_components; i++) {
|
||||
nir_scalar src = nir_scalar_resolved(intrin->src[1].ssa, i);
|
||||
if ((write_mask & (1 << i)) &&
|
||||
(value->ssa.def[i] != intrin->src[1].ssa ||
|
||||
value->ssa.component[i] != i))
|
||||
return false;
|
||||
(value->ssa.def[i] == src.def &&
|
||||
value->ssa.component[i] == src.comp))
|
||||
equals_mask |= 1 << i;
|
||||
}
|
||||
|
||||
return true;
|
||||
return equals_mask;
|
||||
}
|
||||
|
||||
static struct vars_written *
|
||||
|
|
@ -1195,13 +1197,46 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
|
|||
|
||||
struct copy_entry *entry =
|
||||
lookup_entry_for_deref(state, copies, &dst, nir_derefs_equal_bit, NULL);
|
||||
if (entry && value_equals_store_src(&entry->src, intrin)) {
|
||||
nir_component_mask_t equals_mask = entry ? value_equals_store_src(&entry->src, intrin) : 0;
|
||||
if (equals_mask == nir_intrinsic_write_mask(intrin)) {
|
||||
/* If we are storing the value from a load of the same var the
|
||||
* store is redundant so remove it.
|
||||
*/
|
||||
nir_instr_remove(instr);
|
||||
state->progress = true;
|
||||
} else {
|
||||
if (!(b->shader->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
(nir_deref_mode_may_be(dst.instr, nir_var_shader_out)))) {
|
||||
/* If any channels we wrote were already the dst's value, mask them
|
||||
* off (which can lead to other dead code elimination). Apparently
|
||||
* glslang does write masking with load-vec-store.
|
||||
*
|
||||
* Skip this for FS outputs, where multiple drivers don't like color
|
||||
* writes getting channels writemasked out based on copying the
|
||||
* fbfetched data through.
|
||||
*/
|
||||
nir_component_mask_t remove_mask = equals_mask & nir_intrinsic_write_mask(intrin);
|
||||
if (remove_mask) {
|
||||
nir_intrinsic_set_write_mask(intrin, nir_intrinsic_write_mask(intrin) & ~equals_mask);
|
||||
|
||||
/* For any channels we're trimming off the write mask, replace
|
||||
* them with undefs. This lets them be dead-code eliminated,
|
||||
* which no other pass would do on its own.
|
||||
*/
|
||||
b->cursor = nir_before_instr(instr);
|
||||
nir_def *channels[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_def *undef = nir_undef(b, 1, intrin->src[1].ssa->bit_size);
|
||||
for (int i = 0; i < intrin->num_components; i++) {
|
||||
if (remove_mask & (1 << i)) {
|
||||
channels[i] = undef;
|
||||
} else {
|
||||
channels[i] = nir_channel(b, intrin->src[1].ssa, i);
|
||||
}
|
||||
}
|
||||
nir_src_rewrite(&intrin->src[1], nir_vec(b, channels, intrin->num_components));
|
||||
state->progress = true;
|
||||
}
|
||||
}
|
||||
struct value value = { 0 };
|
||||
value_set_ssa_components(&value, intrin->src[1].ssa,
|
||||
intrin->num_components);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue