From e5963478c2f75d503d0acaeb2b20a4cb266670cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Tue, 17 Aug 2021 13:23:22 +0200 Subject: [PATCH] nir/opt_shrink_vectors: shrink load_const properly This patch enables removal of arbitrary channels in load_const instructions, if they are either unused or duplicates of other channels and only used by ALU. Totals from 692 (0.51% of 134913) affected shaders: (GFX10.3) VGPRs: 21832 -> 21544 (-1.32%) CodeSize: 1322016 -> 1313080 (-0.68%); split: -0.68%, +0.01% Instrs: 243635 -> 242231 (-0.58%); split: -0.58%, +0.00% Latency: 1856138 -> 1857237 (+0.06%); split: -0.09%, +0.15% InvThroughput: 424298 -> 421671 (-0.62%); split: -0.62%, +0.01% VClause: 4580 -> 4583 (+0.07%); split: -0.02%, +0.09% SClause: 14336 -> 14354 (+0.13%); split: -0.04%, +0.17% Copies: 8897 -> 8859 (-0.43%); split: -0.45%, +0.02% PreSGPRs: 20439 -> 20437 (-0.01%) PreVGPRs: 16011 -> 15907 (-0.65%); split: -0.97%, +0.32% i915g: total instructions in shared programs: 396471 -> 396309 (-0.04%) instructions in affected programs: 6408 -> 6246 (-2.53%) total const in shared programs: 56458 -> 56422 (-0.06%) const in affected programs: 407 -> 371 (-8.85%) LOST: shaders/closed/steam/trine-2/fp-3.shader_test FS r300: total instructions in shared programs: 1164421 -> 1165059 (0.05%) instructions in affected programs: 143981 -> 144619 (0.44%) total temps in shared programs: 165488 -> 165497 (<.01%) temps in affected programs: 318 -> 327 (2.83%) total consts in shared programs: 922140 -> 921952 (-0.02%) consts in affected programs: 12438 -> 12250 (-1.51%) softpipe: total instructions in shared programs: 2859978 -> 2860028 (<.01%) instructions in affected programs: 183355 -> 183405 (0.03%) total temps in shared programs: 517071 -> 516939 (-0.03%) temps in affected programs: 1416 -> 1284 (-9.32%) total imm in shared programs: 103601 -> 102767 (-0.81%) imm in affected programs: 3928 -> 3094 (-21.23%) Acked-by: Emma Anholt Reviewed-by: Ian Romanick Part-of: --- src/compiler/nir/nir_opt_shrink_vectors.c | 86 +++++++++++++++++++---- 1 file changed, 74 insertions(+), 12 deletions(-) diff --git a/src/compiler/nir/nir_opt_shrink_vectors.c b/src/compiler/nir/nir_opt_shrink_vectors.c index 9cb186f6c25..3a77a200f77 100644 --- a/src/compiler/nir/nir_opt_shrink_vectors.c +++ b/src/compiler/nir/nir_opt_shrink_vectors.c @@ -70,6 +70,31 @@ shrink_dest_to_read_mask(nir_ssa_def *def) return false; } +static void +reswizzle_alu_uses(nir_ssa_def *def, uint8_t *reswizzle) +{ + nir_foreach_use(use_src, def) { + /* all uses must be ALU instructions */ + assert(use_src->parent_instr->type == nir_instr_type_alu); + nir_alu_src *alu_src = (nir_alu_src*)use_src; + + /* reswizzle ALU sources */ + for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) + alu_src->swizzle[i] = reswizzle[alu_src->swizzle[i]]; + } +} + +static bool +is_only_used_by_alu(nir_ssa_def *def) +{ + nir_foreach_use(use_src, def) { + if (use_src->parent_instr->type != nir_instr_type_alu) + return false; + } + + return true; +} + static bool opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr) { @@ -93,11 +118,9 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr) break; } - /* don't remove any channels if used by an intrinsic */ - nir_foreach_use(use_src, def) { - if (use_src->parent_instr->type == nir_instr_type_intrinsic) - return false; - } + /* don't remove any channels if used by non-ALU */ + if (!is_only_used_by_alu(def)) + return false; unsigned mask = nir_ssa_def_components_read(def); unsigned last_bit = util_last_bit(mask); @@ -156,12 +179,7 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr) assert(index == num_components); /* update uses */ - nir_foreach_use(use_src, def) { - assert(use_src->parent_instr->type == nir_instr_type_alu); - nir_alu_src *alu_src = (nir_alu_src*)use_src; - for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) - alu_src->swizzle[i] = reswizzle[alu_src->swizzle[i]]; - } + reswizzle_alu_uses(def, reswizzle); return true; } @@ -204,7 +222,51 @@ opt_shrink_vectors_intrinsic(nir_builder *b, nir_intrinsic_instr *instr) static bool opt_shrink_vectors_load_const(nir_load_const_instr *instr) { - return shrink_dest_to_read_mask(&instr->def); + nir_ssa_def *def = &instr->def; + + /* early out if there's nothing to do. */ + if (def->num_components == 1) + return false; + + /* don't remove any channels if used by non-ALU */ + if (!is_only_used_by_alu(def)) + return false; + + unsigned mask = nir_ssa_def_components_read(def); + + /* If nothing was read, leave it up to DCE. */ + if (!mask) + return false; + + uint8_t reswizzle[NIR_MAX_VEC_COMPONENTS] = { 0 }; + unsigned num_components = 0; + for (unsigned i = 0; i < def->num_components; i++) { + if (!((mask >> i) & 0x1)) + continue; + + /* Try reuse a component with the same constant */ + unsigned j; + for (j = 0; j < num_components; j++) { + if (instr->value[i].u64 == instr->value[j].u64) { + reswizzle[i] = j; + break; + } + } + + /* Otherwise, just append the value */ + if (j == num_components) { + instr->value[num_components] = instr->value[i]; + reswizzle[i] = num_components++; + } + } + + if (num_components == def->num_components) + return false; + + def->num_components = num_components; + reswizzle_alu_uses(def, reswizzle); + + return true; } static bool