mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-19 11:00:42 +01:00
nir/opt_shrink_vectors: shrink load_const properly
This patch enables removal of arbitrary channels in load_const instructions, if they are either unused or duplicates of other channels and only used by ALU. Totals from 692 (0.51% of 134913) affected shaders: (GFX10.3) VGPRs: 21832 -> 21544 (-1.32%) CodeSize: 1322016 -> 1313080 (-0.68%); split: -0.68%, +0.01% Instrs: 243635 -> 242231 (-0.58%); split: -0.58%, +0.00% Latency: 1856138 -> 1857237 (+0.06%); split: -0.09%, +0.15% InvThroughput: 424298 -> 421671 (-0.62%); split: -0.62%, +0.01% VClause: 4580 -> 4583 (+0.07%); split: -0.02%, +0.09% SClause: 14336 -> 14354 (+0.13%); split: -0.04%, +0.17% Copies: 8897 -> 8859 (-0.43%); split: -0.45%, +0.02% PreSGPRs: 20439 -> 20437 (-0.01%) PreVGPRs: 16011 -> 15907 (-0.65%); split: -0.97%, +0.32% i915g: total instructions in shared programs: 396471 -> 396309 (-0.04%) instructions in affected programs: 6408 -> 6246 (-2.53%) total const in shared programs: 56458 -> 56422 (-0.06%) const in affected programs: 407 -> 371 (-8.85%) LOST: shaders/closed/steam/trine-2/fp-3.shader_test FS r300: total instructions in shared programs: 1164421 -> 1165059 (0.05%) instructions in affected programs: 143981 -> 144619 (0.44%) total temps in shared programs: 165488 -> 165497 (<.01%) temps in affected programs: 318 -> 327 (2.83%) total consts in shared programs: 922140 -> 921952 (-0.02%) consts in affected programs: 12438 -> 12250 (-1.51%) softpipe: total instructions in shared programs: 2859978 -> 2860028 (<.01%) instructions in affected programs: 183355 -> 183405 (0.03%) total temps in shared programs: 517071 -> 516939 (-0.03%) temps in affected programs: 1416 -> 1284 (-9.32%) total imm in shared programs: 103601 -> 102767 (-0.81%) imm in affected programs: 3928 -> 3094 (-21.23%) Acked-by: Emma Anholt <emma@anholt.net> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12468>
This commit is contained in:
parent
a10b5d7086
commit
e5963478c2
1 changed files with 74 additions and 12 deletions
|
|
@ -70,6 +70,31 @@ shrink_dest_to_read_mask(nir_ssa_def *def)
|
|||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
reswizzle_alu_uses(nir_ssa_def *def, uint8_t *reswizzle)
|
||||
{
|
||||
nir_foreach_use(use_src, def) {
|
||||
/* all uses must be ALU instructions */
|
||||
assert(use_src->parent_instr->type == nir_instr_type_alu);
|
||||
nir_alu_src *alu_src = (nir_alu_src*)use_src;
|
||||
|
||||
/* reswizzle ALU sources */
|
||||
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
|
||||
alu_src->swizzle[i] = reswizzle[alu_src->swizzle[i]];
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
is_only_used_by_alu(nir_ssa_def *def)
|
||||
{
|
||||
nir_foreach_use(use_src, def) {
|
||||
if (use_src->parent_instr->type != nir_instr_type_alu)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
|
||||
{
|
||||
|
|
@ -93,11 +118,9 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
|
|||
break;
|
||||
}
|
||||
|
||||
/* don't remove any channels if used by an intrinsic */
|
||||
nir_foreach_use(use_src, def) {
|
||||
if (use_src->parent_instr->type == nir_instr_type_intrinsic)
|
||||
return false;
|
||||
}
|
||||
/* don't remove any channels if used by non-ALU */
|
||||
if (!is_only_used_by_alu(def))
|
||||
return false;
|
||||
|
||||
unsigned mask = nir_ssa_def_components_read(def);
|
||||
unsigned last_bit = util_last_bit(mask);
|
||||
|
|
@ -156,12 +179,7 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
|
|||
assert(index == num_components);
|
||||
|
||||
/* update uses */
|
||||
nir_foreach_use(use_src, def) {
|
||||
assert(use_src->parent_instr->type == nir_instr_type_alu);
|
||||
nir_alu_src *alu_src = (nir_alu_src*)use_src;
|
||||
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
|
||||
alu_src->swizzle[i] = reswizzle[alu_src->swizzle[i]];
|
||||
}
|
||||
reswizzle_alu_uses(def, reswizzle);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -204,7 +222,51 @@ opt_shrink_vectors_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
|
|||
static bool
|
||||
opt_shrink_vectors_load_const(nir_load_const_instr *instr)
|
||||
{
|
||||
return shrink_dest_to_read_mask(&instr->def);
|
||||
nir_ssa_def *def = &instr->def;
|
||||
|
||||
/* early out if there's nothing to do. */
|
||||
if (def->num_components == 1)
|
||||
return false;
|
||||
|
||||
/* don't remove any channels if used by non-ALU */
|
||||
if (!is_only_used_by_alu(def))
|
||||
return false;
|
||||
|
||||
unsigned mask = nir_ssa_def_components_read(def);
|
||||
|
||||
/* If nothing was read, leave it up to DCE. */
|
||||
if (!mask)
|
||||
return false;
|
||||
|
||||
uint8_t reswizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };
|
||||
unsigned num_components = 0;
|
||||
for (unsigned i = 0; i < def->num_components; i++) {
|
||||
if (!((mask >> i) & 0x1))
|
||||
continue;
|
||||
|
||||
/* Try reuse a component with the same constant */
|
||||
unsigned j;
|
||||
for (j = 0; j < num_components; j++) {
|
||||
if (instr->value[i].u64 == instr->value[j].u64) {
|
||||
reswizzle[i] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise, just append the value */
|
||||
if (j == num_components) {
|
||||
instr->value[num_components] = instr->value[i];
|
||||
reswizzle[i] = num_components++;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_components == def->num_components)
|
||||
return false;
|
||||
|
||||
def->num_components = num_components;
|
||||
reswizzle_alu_uses(def, reswizzle);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue