From 0781edc30f9d6509edaaa7780687b18f7365af8f Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Wed, 10 Sep 2025 11:21:38 -0700 Subject: [PATCH] nir/copy_prop_vars: Mask out no-op writes to variables. The pass previously supported removing complete no-op writes, but we can do better by noticing if any channel being written is the current channel's value and masking off those writes. I noticed this happening in Stray's fragment shader, where it looked like some translation layer had turned var[x].zw = vec2(a, b) into var[x] = vec4(var[x].x, var[x].y, a, b). This in turn lets nir_shrink_vec_array_vars be more effective. Totals: MaxWaves: 22158876 -> 22156696 (-0.01%); split: +0.00%, -0.01% Instrs: 401167243 -> 401007996 (-0.04%); split: -0.04%, +0.00% CodeSize: 1004397302 -> 1004133728 (-0.03%); split: -0.03%, +0.00% STPs: 369810 -> 234618 (-36.56%) LDPs: 209430 -> 172011 (-17.87%) Totals from 1884 (0.12% of 1560230) affected shaders: MaxWaves: 12686 -> 10506 (-17.18%); split: +6.97%, -24.15% Instrs: 2099486 -> 1940239 (-7.59%); split: -7.64%, +0.06% CodeSize: 4570472 -> 4306898 (-5.77%); split: -5.81%, +0.05% NOPs: 334399 -> 270881 (-18.99%); split: -20.58%, +1.58% MOVs: 131003 -> 148034 (+13.00%); split: -11.59%, +24.59% COVs: 14512 -> 16921 (+16.60%); split: -0.23%, +16.83% Full: 58120 -> 72399 (+24.57%); split: -6.75%, +31.31% (ss): 79215 -> 45331 (-42.77%); split: -48.46%, +5.68% (sy): 33081 -> 11119 (-66.39%); split: -66.56%, +0.18% (ss)-stall: 302152 -> 115528 (-61.76%); split: -64.34%, +2.57% (sy)-stall: 2706110 -> 498998 (-81.56%); split: -81.68%, +0.12% STPs: 212045 -> 76853 (-63.76%) LDPs: 47337 -> 9918 (-79.05%) Preamble Instrs: 413954 -> 413630 (-0.08%); split: -0.21%, +0.13% Cat0: 370362 -> 306844 (-17.15%); split: -18.58%, +1.43% Cat1: 145629 -> 165003 (+13.30%); split: -10.51%, +23.81% Cat2: 687947 -> 683992 (-0.57%); split: -0.61%, +0.04% Cat3: 362919 -> 360690 (-0.61%); split: -0.72%, +0.11% Cat6: 461411 -> 352375 (-23.63%) Cat7: 16857 -> 16974 (+0.69%); split: -0.35%, +1.04% Part-of: --- src/compiler/nir/nir_opt_copy_prop_vars.c | 49 +++++++++++++++++++---- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index 7c7b949524d..3aeae44b452 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -123,20 +123,22 @@ struct copy_prop_var_state { bool progress; }; -static bool +static nir_component_mask_t value_equals_store_src(struct value *value, nir_intrinsic_instr *intrin) { assert(intrin->intrinsic == nir_intrinsic_store_deref); nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); + nir_component_mask_t equals_mask = 0; for (unsigned i = 0; i < intrin->num_components; i++) { + nir_scalar src = nir_scalar_resolved(intrin->src[1].ssa, i); if ((write_mask & (1 << i)) && - (value->ssa.def[i] != intrin->src[1].ssa || - value->ssa.component[i] != i)) - return false; + (value->ssa.def[i] == src.def && + value->ssa.component[i] == src.comp)) + equals_mask |= 1 << i; } - return true; + return equals_mask; } static struct vars_written * @@ -1110,7 +1112,7 @@ copy_prop_vars_block(struct copy_prop_var_state *state, * rewrite the vecN itself. */ nir_def_rewrite_uses_after(&intrin->def, - value.ssa.def[0]); + value.ssa.def[0]); } else { nir_def_rewrite_uses(&intrin->def, value.ssa.def[0]); @@ -1195,13 +1197,46 @@ copy_prop_vars_block(struct copy_prop_var_state *state, struct copy_entry *entry = lookup_entry_for_deref(state, copies, &dst, nir_derefs_equal_bit, NULL); - if (entry && value_equals_store_src(&entry->src, intrin)) { + nir_component_mask_t equals_mask = entry ? value_equals_store_src(&entry->src, intrin) : 0; + if (equals_mask == nir_intrinsic_write_mask(intrin)) { /* If we are storing the value from a load of the same var the * store is redundant so remove it. */ nir_instr_remove(instr); state->progress = true; } else { + if (!(b->shader->info.stage == MESA_SHADER_FRAGMENT && + (nir_deref_mode_may_be(dst.instr, nir_var_shader_out)))) { + /* If any channels we wrote were already the dst's value, mask them + * off (which can lead to other dead code elimination). Apparently + * glslang does write masking with load-vec-store. + * + * Skip this for FS outputs, where multiple drivers don't like color + * writes getting channels writemasked out based on copying the + * fbfetched data through. + */ + nir_component_mask_t remove_mask = equals_mask & nir_intrinsic_write_mask(intrin); + if (remove_mask) { + nir_intrinsic_set_write_mask(intrin, nir_intrinsic_write_mask(intrin) & ~equals_mask); + + /* For any channels we're trimming off the write mask, replace + * them with undefs. This lets them be dead-code eliminated, + * which no other pass would do on its own. + */ + b->cursor = nir_before_instr(instr); + nir_def *channels[NIR_MAX_VEC_COMPONENTS]; + nir_def *undef = nir_undef(b, 1, intrin->src[1].ssa->bit_size); + for (int i = 0; i < intrin->num_components; i++) { + if (remove_mask & (1 << i)) { + channels[i] = undef; + } else { + channels[i] = nir_channel(b, intrin->src[1].ssa, i); + } + } + nir_src_rewrite(&intrin->src[1], nir_vec(b, channels, intrin->num_components)); + state->progress = true; + } + } struct value value = { 0 }; value_set_ssa_components(&value, intrin->src[1].ssa, intrin->num_components);