pan/mdg: Scalarize LUT instructions in NIR

Simpler. Small shaderdb regressions from using IR registers instead of
SSA, but that's probably what we needed for correctness (given that SSA
is violated otherwise) hence the Cc.

total instructions in shared programs: 1520220 -> 1518127 (-0.14%)
instructions in affected programs: 167437 -> 165344 (-1.25%)
helped: 662
HURT: 206
helped stats (abs) min: 1.0 max: 46.0 x̄: 3.65 x̃: 2
helped stats (rel) min: 0.18% max: 22.22% x̄: 2.43% x̃: 1.71%
HURT stats (abs)   min: 1.0 max: 7.0 x̄: 1.56 x̃: 1
HURT stats (rel)   min: 0.17% max: 8.33% x̄: 2.66% x̃: 2.33%
95% mean confidence interval for instructions value: -2.65 -2.18
95% mean confidence interval for instructions %-change: -1.45% -0.99%
Instructions are helped.

total bundles in shared programs: 649844 -> 649345 (-0.08%)
bundles in affected programs: 59278 -> 58779 (-0.84%)
helped: 577
HURT: 249
helped stats (abs) min: 1.0 max: 39.0 x̄: 1.56 x̃: 1
helped stats (rel) min: 0.26% max: 30.00% x̄: 3.13% x̃: 2.19%
HURT stats (abs)   min: 1.0 max: 12.0 x̄: 1.61 x̃: 1
HURT stats (rel)   min: 0.58% max: 25.00% x̄: 5.25% x̃: 4.00%
95% mean confidence interval for bundles value: -0.78 -0.43
95% mean confidence interval for bundles %-change: -0.98% -0.23%
Bundles are helped.

total quadwords in shared programs: 1136767 -> 1134956 (-0.16%)
quadwords in affected programs: 141780 -> 139969 (-1.28%)
helped: 744
HURT: 311
helped stats (abs) min: 1.0 max: 9.0 x̄: 3.13 x̃: 2
helped stats (rel) min: 0.14% max: 26.67% x̄: 2.77% x̃: 2.13%
HURT stats (abs)   min: 1.0 max: 8.0 x̄: 1.68 x̃: 1
HURT stats (rel)   min: 0.35% max: 10.00% x̄: 3.17% x̃: 1.69%
95% mean confidence interval for quadwords value: -1.89 -1.54
95% mean confidence interval for quadwords %-change: -1.27% -0.77%
Quadwords are helped.

total registers in shared programs: 90461 -> 90273 (-0.21%)
registers in affected programs: 2833 -> 2645 (-6.64%)
helped: 250
HURT: 82
helped stats (abs) min: 1.0 max: 2.0 x̄: 1.08 x̃: 1
helped stats (rel) min: 6.67% max: 33.33% x̄: 14.06% x̃: 12.50%
HURT stats (abs)   min: 1.0 max: 1.0 x̄: 1.00 x̃: 1
HURT stats (rel)   min: 6.67% max: 50.00% x̄: 13.90% x̃: 12.50%
95% mean confidence interval for registers value: -0.67 -0.47
95% mean confidence interval for registers %-change: -8.62% -5.69%
Registers are helped.

total threads in shared programs: 55685 -> 55686 (<.01%)
threads in affected programs: 76 -> 77 (1.32%)
helped: 20
HURT: 17
helped stats (abs) min: 1.0 max: 2.0 x̄: 1.30 x̃: 1
helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00%
HURT stats (abs)   min: 1.0 max: 2.0 x̄: 1.47 x̃: 1
HURT stats (rel)   min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00%
95% mean confidence interval for threads value: -0.47 0.52
95% mean confidence interval for threads %-change: 5.81% 56.35%
Inconclusive result (value mean confidence interval includes 0).

total spills in shared programs: 1387 -> 1379 (-0.58%)
spills in affected programs: 283 -> 275 (-2.83%)
helped: 5
HURT: 1

total fills in shared programs: 5256 -> 5176 (-1.52%)
fills in affected programs: 557 -> 477 (-14.36%)
helped: 5
HURT: 1

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19350>
This commit is contained in:
Alyssa Rosenzweig 2022-10-26 22:43:32 -04:00 committed by Marge Bot
parent 10759d1708
commit 23968aeeb5

View file

@ -303,6 +303,15 @@ mdg_should_scalarize(const nir_instr *instr, const void *_unused)
case nir_op_imul_high:
case nir_op_pack_half_2x16:
case nir_op_unpack_half_2x16:
/* The LUT unit is scalar */
case nir_op_fsqrt:
case nir_op_frcp:
case nir_op_frsq:
case nir_op_fsin_mdg:
case nir_op_fcos_mdg:
case nir_op_fexp2:
case nir_op_flog2:
return true;
default:
return false;
@ -1125,52 +1134,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
ins.is_pack = true;
}
if ((opcode_props & UNITS_ALL) == UNIT_VLUT) {
/* To avoid duplicating the lookup tables (probably), true LUT
* instructions can only operate as if they were scalars. Lower
* them here by changing the component. */
unsigned orig_mask = ins.mask;
unsigned swizzle_back[MIR_VEC_COMPONENTS];
memcpy(&swizzle_back, ins.swizzle[0], sizeof(swizzle_back));
midgard_instruction ins_split[MIR_VEC_COMPONENTS];
unsigned ins_count = 0;
for (int i = 0; i < nr_components; ++i) {
/* Mask the associated component, dropping the
* instruction if needed */
ins.mask = 1 << i;
ins.mask &= orig_mask;
for (unsigned j = 0; j < ins_count; ++j) {
if (swizzle_back[i] == ins_split[j].swizzle[0][0]) {
ins_split[j].mask |= ins.mask;
ins.mask = 0;
break;
}
}
if (!ins.mask)
continue;
for (unsigned j = 0; j < MIR_VEC_COMPONENTS; ++j)
ins.swizzle[0][j] =
swizzle_back[i]; /* Pull from the correct component */
ins_split[ins_count] = ins;
++ins_count;
}
for (unsigned i = 0; i < ins_count; ++i) {
emit_mir_instruction(ctx, ins_split[i]);
}
} else {
emit_mir_instruction(ctx, ins);
}
emit_mir_instruction(ctx, ins);
}
#undef ALU_CASE