mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 04:30:10 +01:00
nir: remove duplicate alu channels in nir_opt_shrink_vectors
This will clean code like: vec3 32 ssa_8 = frcp ssa_7.www vec3 32 ssa_9 = fmul ssa_7.xyz, ssa_8 into vec1 32 ssa_8 = frcp ssa_7.w vec3 32 ssa_9 = fmul ssa_7.xyz, ssa_8.xxx This helps r300 driver because we can only do single channel for math ops at a time, so the first version would result in three frcp instructions. The nir_opt_shrink_vectors comments even claim the pass should be doing this, however it actually does it only for nir_op_vecx instructions, so extend this for generic alu instructions. RV530 shader-db: total instructions in shared programs: 135032 -> 133707 (-0.98%) instructions in affected programs: 46121 -> 44796 (-2.87%) helped: 452 HURT: 26 total temps in shared programs: 17051 -> 17033 (-0.11%) temps in affected programs: 1509 -> 1491 (-1.19%) helped: 91 HURT: 30 12.02->12.08 (+0.5%) fps gain in Unigine Sanctuary (n=5) with RV530 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7051 Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reiewed-by: Gert Wollny <gert.wollny@collabora.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20213>
This commit is contained in:
parent
980df9ede1
commit
cb7f201288
1 changed files with 39 additions and 28 deletions
|
|
@ -192,49 +192,60 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
|
|||
return false;
|
||||
|
||||
unsigned mask = nir_ssa_def_components_read(def);
|
||||
unsigned last_bit = util_last_bit(mask);
|
||||
unsigned num_components = util_bitcount(mask);
|
||||
|
||||
unsigned rounded = round_up_components(num_components);
|
||||
assert(rounded <= def->num_components);
|
||||
num_components = rounded;
|
||||
|
||||
/* return, if there is nothing to do */
|
||||
if (mask == 0 || num_components == def->num_components)
|
||||
if (mask == 0)
|
||||
return false;
|
||||
|
||||
const bool is_bitfield_mask = last_bit == num_components;
|
||||
if (is_bitfield_mask) {
|
||||
/* just reduce the number of components and return */
|
||||
def->num_components = num_components;
|
||||
instr->dest.write_mask = mask;
|
||||
return true;
|
||||
}
|
||||
|
||||
uint8_t reswizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };
|
||||
unsigned index = 0;
|
||||
for (unsigned i = 0; i < last_bit; i++) {
|
||||
unsigned num_components = 0;
|
||||
bool progress = false;
|
||||
for (unsigned i = 0; i < def->num_components; i++) {
|
||||
/* skip unused components */
|
||||
if (!((mask >> i) & 0x1))
|
||||
continue;
|
||||
|
||||
/* reswizzle the sources */
|
||||
for (int k = 0; k < nir_op_infos[instr->op].num_inputs; k++) {
|
||||
instr->src[k].swizzle[index] = instr->src[k].swizzle[i];
|
||||
reswizzle[i] = index;
|
||||
/* Try reuse a component with the same swizzles */
|
||||
unsigned j;
|
||||
for (j = 0; j < num_components; j++) {
|
||||
bool duplicate_channel = true;
|
||||
for (unsigned k = 0; k < nir_op_infos[instr->op].num_inputs; k++) {
|
||||
if (nir_op_infos[instr->op].input_sizes[k] != 0 ||
|
||||
instr->src[k].swizzle[i] != instr->src[k].swizzle[j]) {
|
||||
duplicate_channel = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (duplicate_channel) {
|
||||
reswizzle[i] = j;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise, just append the value */
|
||||
if (j == num_components) {
|
||||
for (int k = 0; k < nir_op_infos[instr->op].num_inputs; k++) {
|
||||
instr->src[k].swizzle[num_components] = instr->src[k].swizzle[i];
|
||||
}
|
||||
if (i != num_components)
|
||||
progress = true;
|
||||
reswizzle[i] = num_components++;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
assert(index == num_components);
|
||||
|
||||
unsigned rounded = round_up_components(num_components);
|
||||
assert(rounded <= def->num_components);
|
||||
|
||||
/* update dest */
|
||||
def->num_components = num_components;
|
||||
instr->dest.write_mask = BITFIELD_MASK(num_components);
|
||||
def->num_components = rounded;
|
||||
instr->dest.write_mask = BITFIELD_MASK(rounded);
|
||||
|
||||
/* update uses */
|
||||
reswizzle_alu_uses(def, reswizzle);
|
||||
if (progress)
|
||||
reswizzle_alu_uses(def, reswizzle);
|
||||
|
||||
return true;
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue