diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index 7c7b949524d..3aeae44b452 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -123,20 +123,22 @@ struct copy_prop_var_state { bool progress; }; -static bool +static nir_component_mask_t value_equals_store_src(struct value *value, nir_intrinsic_instr *intrin) { assert(intrin->intrinsic == nir_intrinsic_store_deref); nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); + nir_component_mask_t equals_mask = 0; for (unsigned i = 0; i < intrin->num_components; i++) { + nir_scalar src = nir_scalar_resolved(intrin->src[1].ssa, i); if ((write_mask & (1 << i)) && - (value->ssa.def[i] != intrin->src[1].ssa || - value->ssa.component[i] != i)) - return false; + (value->ssa.def[i] == src.def && + value->ssa.component[i] == src.comp)) + equals_mask |= 1 << i; } - return true; + return equals_mask; } static struct vars_written * @@ -1110,7 +1112,7 @@ copy_prop_vars_block(struct copy_prop_var_state *state, * rewrite the vecN itself. */ nir_def_rewrite_uses_after(&intrin->def, - value.ssa.def[0]); + value.ssa.def[0]); } else { nir_def_rewrite_uses(&intrin->def, value.ssa.def[0]); @@ -1195,13 +1197,46 @@ copy_prop_vars_block(struct copy_prop_var_state *state, struct copy_entry *entry = lookup_entry_for_deref(state, copies, &dst, nir_derefs_equal_bit, NULL); - if (entry && value_equals_store_src(&entry->src, intrin)) { + nir_component_mask_t equals_mask = entry ? value_equals_store_src(&entry->src, intrin) : 0; + if (equals_mask == nir_intrinsic_write_mask(intrin)) { /* If we are storing the value from a load of the same var the * store is redundant so remove it. */ nir_instr_remove(instr); state->progress = true; } else { + if (!(b->shader->info.stage == MESA_SHADER_FRAGMENT && + (nir_deref_mode_may_be(dst.instr, nir_var_shader_out)))) { + /* If any channels we wrote were already the dst's value, mask them + * off (which can lead to other dead code elimination). Apparently + * glslang does write masking with load-vec-store. + * + * Skip this for FS outputs, where multiple drivers don't like color + * writes getting channels writemasked out based on copying the + * fbfetched data through. + */ + nir_component_mask_t remove_mask = equals_mask & nir_intrinsic_write_mask(intrin); + if (remove_mask) { + nir_intrinsic_set_write_mask(intrin, nir_intrinsic_write_mask(intrin) & ~equals_mask); + + /* For any channels we're trimming off the write mask, replace + * them with undefs. This lets them be dead-code eliminated, + * which no other pass would do on its own. + */ + b->cursor = nir_before_instr(instr); + nir_def *channels[NIR_MAX_VEC_COMPONENTS]; + nir_def *undef = nir_undef(b, 1, intrin->src[1].ssa->bit_size); + for (int i = 0; i < intrin->num_components; i++) { + if (remove_mask & (1 << i)) { + channels[i] = undef; + } else { + channels[i] = nir_channel(b, intrin->src[1].ssa, i); + } + } + nir_src_rewrite(&intrin->src[1], nir_vec(b, channels, intrin->num_components)); + state->progress = true; + } + } struct value value = { 0 }; value_set_ssa_components(&value, intrin->src[1].ssa, intrin->num_components);