nir/opt_shrink_vectors: shrink load_const properly

This patch enables removal of arbitrary channels in
load_const instructions, if they are either unused or
duplicates of other channels and only used by ALU.

Totals from 692 (0.51% of 134913) affected shaders: (GFX10.3)
VGPRs: 21832 -> 21544 (-1.32%)
CodeSize: 1322016 -> 1313080 (-0.68%); split: -0.68%, +0.01%
Instrs: 243635 -> 242231 (-0.58%); split: -0.58%, +0.00%
Latency: 1856138 -> 1857237 (+0.06%); split: -0.09%, +0.15%
InvThroughput: 424298 -> 421671 (-0.62%); split: -0.62%, +0.01%
VClause: 4580 -> 4583 (+0.07%); split: -0.02%, +0.09%
SClause: 14336 -> 14354 (+0.13%); split: -0.04%, +0.17%
Copies: 8897 -> 8859 (-0.43%); split: -0.45%, +0.02%
PreSGPRs: 20439 -> 20437 (-0.01%)
PreVGPRs: 16011 -> 15907 (-0.65%); split: -0.97%, +0.32%

i915g:
total instructions in shared programs: 396471 -> 396309 (-0.04%)
instructions in affected programs: 6408 -> 6246 (-2.53%)
total const in shared programs: 56458 -> 56422 (-0.06%)
const in affected programs: 407 -> 371 (-8.85%)
LOST:   shaders/closed/steam/trine-2/fp-3.shader_test FS

r300:
total instructions in shared programs: 1164421 -> 1165059 (0.05%)
instructions in affected programs: 143981 -> 144619 (0.44%)
total temps in shared programs: 165488 -> 165497 (<.01%)
temps in affected programs: 318 -> 327 (2.83%)
total consts in shared programs: 922140 -> 921952 (-0.02%)
consts in affected programs: 12438 -> 12250 (-1.51%)

softpipe:
total instructions in shared programs: 2859978 -> 2860028 (<.01%)
instructions in affected programs: 183355 -> 183405 (0.03%)
total temps in shared programs: 517071 -> 516939 (-0.03%)
temps in affected programs: 1416 -> 1284 (-9.32%)
total imm in shared programs: 103601 -> 102767 (-0.81%)
imm in affected programs: 3928 -> 3094 (-21.23%)

Acked-by: Emma Anholt <emma@anholt.net>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12468>
This commit is contained in:
Daniel Schürmann 2021-08-17 13:23:22 +02:00 committed by Marge Bot
parent a10b5d7086
commit e5963478c2

View file

@ -70,6 +70,31 @@ shrink_dest_to_read_mask(nir_ssa_def *def)
return false;
}
static void
reswizzle_alu_uses(nir_ssa_def *def, uint8_t *reswizzle)
{
nir_foreach_use(use_src, def) {
/* all uses must be ALU instructions */
assert(use_src->parent_instr->type == nir_instr_type_alu);
nir_alu_src *alu_src = (nir_alu_src*)use_src;
/* reswizzle ALU sources */
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
alu_src->swizzle[i] = reswizzle[alu_src->swizzle[i]];
}
}
static bool
is_only_used_by_alu(nir_ssa_def *def)
{
nir_foreach_use(use_src, def) {
if (use_src->parent_instr->type != nir_instr_type_alu)
return false;
}
return true;
}
static bool
opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
{
@ -93,11 +118,9 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
break;
}
/* don't remove any channels if used by an intrinsic */
nir_foreach_use(use_src, def) {
if (use_src->parent_instr->type == nir_instr_type_intrinsic)
return false;
}
/* don't remove any channels if used by non-ALU */
if (!is_only_used_by_alu(def))
return false;
unsigned mask = nir_ssa_def_components_read(def);
unsigned last_bit = util_last_bit(mask);
@ -156,12 +179,7 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
assert(index == num_components);
/* update uses */
nir_foreach_use(use_src, def) {
assert(use_src->parent_instr->type == nir_instr_type_alu);
nir_alu_src *alu_src = (nir_alu_src*)use_src;
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
alu_src->swizzle[i] = reswizzle[alu_src->swizzle[i]];
}
reswizzle_alu_uses(def, reswizzle);
return true;
}
@ -204,7 +222,51 @@ opt_shrink_vectors_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
static bool
opt_shrink_vectors_load_const(nir_load_const_instr *instr)
{
return shrink_dest_to_read_mask(&instr->def);
nir_ssa_def *def = &instr->def;
/* early out if there's nothing to do. */
if (def->num_components == 1)
return false;
/* don't remove any channels if used by non-ALU */
if (!is_only_used_by_alu(def))
return false;
unsigned mask = nir_ssa_def_components_read(def);
/* If nothing was read, leave it up to DCE. */
if (!mask)
return false;
uint8_t reswizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };
unsigned num_components = 0;
for (unsigned i = 0; i < def->num_components; i++) {
if (!((mask >> i) & 0x1))
continue;
/* Try reuse a component with the same constant */
unsigned j;
for (j = 0; j < num_components; j++) {
if (instr->value[i].u64 == instr->value[j].u64) {
reswizzle[i] = j;
break;
}
}
/* Otherwise, just append the value */
if (j == num_components) {
instr->value[num_components] = instr->value[i];
reswizzle[i] = num_components++;
}
}
if (num_components == def->num_components)
return false;
def->num_components = num_components;
reswizzle_alu_uses(def, reswizzle);
return true;
}
static bool