From cb7f20128874182629bdb0e5b2789096c0c0ec99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Sun, 4 Dec 2022 17:34:37 +0100 Subject: [PATCH] nir: remove duplicate alu channels in nir_opt_shrink_vectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will clean code like: vec3 32 ssa_8 = frcp ssa_7.www vec3 32 ssa_9 = fmul ssa_7.xyz, ssa_8 into vec1 32 ssa_8 = frcp ssa_7.w vec3 32 ssa_9 = fmul ssa_7.xyz, ssa_8.xxx This helps r300 driver because we can only do single channel for math ops at a time, so the first version would result in three frcp instructions. The nir_opt_shrink_vectors comments even claim the pass should be doing this, however it actually does it only for nir_op_vecx instructions, so extend this for generic alu instructions. RV530 shader-db: total instructions in shared programs: 135032 -> 133707 (-0.98%) instructions in affected programs: 46121 -> 44796 (-2.87%) helped: 452 HURT: 26 total temps in shared programs: 17051 -> 17033 (-0.11%) temps in affected programs: 1509 -> 1491 (-1.19%) helped: 91 HURT: 30 12.02->12.08 (+0.5%) fps gain in Unigine Sanctuary (n=5) with RV530 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7051 Signed-off-by: Pavel Ondračka Reiewed-by: Gert Wollny Reviewed-by: Daniel Schürmann Part-of: --- src/compiler/nir/nir_opt_shrink_vectors.c | 67 +++++++++++++---------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/src/compiler/nir/nir_opt_shrink_vectors.c b/src/compiler/nir/nir_opt_shrink_vectors.c index cc6e5add05e..bd842021cdc 100644 --- a/src/compiler/nir/nir_opt_shrink_vectors.c +++ b/src/compiler/nir/nir_opt_shrink_vectors.c @@ -192,49 +192,60 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr) return false; unsigned mask = nir_ssa_def_components_read(def); - unsigned last_bit = util_last_bit(mask); - unsigned num_components = util_bitcount(mask); - - unsigned rounded = round_up_components(num_components); - assert(rounded <= def->num_components); - num_components = rounded; - /* return, if there is nothing to do */ - if (mask == 0 || num_components == def->num_components) + if (mask == 0) return false; - const bool is_bitfield_mask = last_bit == num_components; - if (is_bitfield_mask) { - /* just reduce the number of components and return */ - def->num_components = num_components; - instr->dest.write_mask = mask; - return true; - } - uint8_t reswizzle[NIR_MAX_VEC_COMPONENTS] = { 0 }; - unsigned index = 0; - for (unsigned i = 0; i < last_bit; i++) { + unsigned num_components = 0; + bool progress = false; + for (unsigned i = 0; i < def->num_components; i++) { /* skip unused components */ if (!((mask >> i) & 0x1)) continue; - /* reswizzle the sources */ - for (int k = 0; k < nir_op_infos[instr->op].num_inputs; k++) { - instr->src[k].swizzle[index] = instr->src[k].swizzle[i]; - reswizzle[i] = index; + /* Try reuse a component with the same swizzles */ + unsigned j; + for (j = 0; j < num_components; j++) { + bool duplicate_channel = true; + for (unsigned k = 0; k < nir_op_infos[instr->op].num_inputs; k++) { + if (nir_op_infos[instr->op].input_sizes[k] != 0 || + instr->src[k].swizzle[i] != instr->src[k].swizzle[j]) { + duplicate_channel = false; + break; + } + } + + if (duplicate_channel) { + reswizzle[i] = j; + progress = true; + break; + } + } + + /* Otherwise, just append the value */ + if (j == num_components) { + for (int k = 0; k < nir_op_infos[instr->op].num_inputs; k++) { + instr->src[k].swizzle[num_components] = instr->src[k].swizzle[i]; + } + if (i != num_components) + progress = true; + reswizzle[i] = num_components++; } - index++; } - assert(index == num_components); + + unsigned rounded = round_up_components(num_components); + assert(rounded <= def->num_components); /* update dest */ - def->num_components = num_components; - instr->dest.write_mask = BITFIELD_MASK(num_components); + def->num_components = rounded; + instr->dest.write_mask = BITFIELD_MASK(rounded); /* update uses */ - reswizzle_alu_uses(def, reswizzle); + if (progress) + reswizzle_alu_uses(def, reswizzle); - return true; + return progress; } static bool