pan/mdg: Scalarize LUT instructions in NIR

Simpler. Small shaderdb regressions from using IR registers instead of SSA, but that's probably what we needed for correctness (given that SSA is violated otherwise) hence the Cc. total instructions in shared programs: 1520220 -> 1518127 (-0.14%) instructions in affected programs: 167437 -> 165344 (-1.25%) helped: 662 HURT: 206 helped stats (abs) min: 1.0 max: 46.0 x̄: 3.65 x̃: 2 helped stats (rel) min: 0.18% max: 22.22% x̄: 2.43% x̃: 1.71% HURT stats (abs) min: 1.0 max: 7.0 x̄: 1.56 x̃: 1 HURT stats (rel) min: 0.17% max: 8.33% x̄: 2.66% x̃: 2.33% 95% mean confidence interval for instructions value: -2.65 -2.18 95% mean confidence interval for instructions %-change: -1.45% -0.99% Instructions are helped. total bundles in shared programs: 649844 -> 649345 (-0.08%) bundles in affected programs: 59278 -> 58779 (-0.84%) helped: 577 HURT: 249 helped stats (abs) min: 1.0 max: 39.0 x̄: 1.56 x̃: 1 helped stats (rel) min: 0.26% max: 30.00% x̄: 3.13% x̃: 2.19% HURT stats (abs) min: 1.0 max: 12.0 x̄: 1.61 x̃: 1 HURT stats (rel) min: 0.58% max: 25.00% x̄: 5.25% x̃: 4.00% 95% mean confidence interval for bundles value: -0.78 -0.43 95% mean confidence interval for bundles %-change: -0.98% -0.23% Bundles are helped. total quadwords in shared programs: 1136767 -> 1134956 (-0.16%) quadwords in affected programs: 141780 -> 139969 (-1.28%) helped: 744 HURT: 311 helped stats (abs) min: 1.0 max: 9.0 x̄: 3.13 x̃: 2 helped stats (rel) min: 0.14% max: 26.67% x̄: 2.77% x̃: 2.13% HURT stats (abs) min: 1.0 max: 8.0 x̄: 1.68 x̃: 1 HURT stats (rel) min: 0.35% max: 10.00% x̄: 3.17% x̃: 1.69% 95% mean confidence interval for quadwords value: -1.89 -1.54 95% mean confidence interval for quadwords %-change: -1.27% -0.77% Quadwords are helped. total registers in shared programs: 90461 -> 90273 (-0.21%) registers in affected programs: 2833 -> 2645 (-6.64%) helped: 250 HURT: 82 helped stats (abs) min: 1.0 max: 2.0 x̄: 1.08 x̃: 1 helped stats (rel) min: 6.67% max: 33.33% x̄: 14.06% x̃: 12.50% HURT stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 HURT stats (rel) min: 6.67% max: 50.00% x̄: 13.90% x̃: 12.50% 95% mean confidence interval for registers value: -0.67 -0.47 95% mean confidence interval for registers %-change: -8.62% -5.69% Registers are helped. total threads in shared programs: 55685 -> 55686 (<.01%) threads in affected programs: 76 -> 77 (1.32%) helped: 20 HURT: 17 helped stats (abs) min: 1.0 max: 2.0 x̄: 1.30 x̃: 1 helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% HURT stats (abs) min: 1.0 max: 2.0 x̄: 1.47 x̃: 1 HURT stats (rel) min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00% 95% mean confidence interval for threads value: -0.47 0.52 95% mean confidence interval for threads %-change: 5.81% 56.35% Inconclusive result (value mean confidence interval includes 0). total spills in shared programs: 1387 -> 1379 (-0.58%) spills in affected programs: 283 -> 275 (-2.83%) helped: 5 HURT: 1 total fills in shared programs: 5256 -> 5176 (-1.52%) fills in affected programs: 557 -> 477 (-14.36%) helped: 5 HURT: 1 Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19350>
2026-05-06 05:08:08 +02:00 · 2022-10-26 22:43:32 -04:00 · 2022-10-26 22:43:32 -04:00 · 23968aeeb5
commit 23968aeeb5
parent 10759d1708
1 changed files with 10 additions and 46 deletions
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@ -303,6 +303,15 @@ mdg_should_scalarize(const nir_instr *instr, const void *_unused)
   case nir_op_imul_high:
   case nir_op_pack_half_2x16:
   case nir_op_unpack_half_2x16:
+
+   /* The LUT unit is scalar */
+   case nir_op_fsqrt:
+   case nir_op_frcp:
+   case nir_op_frsq:
+   case nir_op_fsin_mdg:
+   case nir_op_fcos_mdg:
+   case nir_op_fexp2:
+   case nir_op_flog2:
      return true;
   default:
      return false;
@ -1125,52 +1134,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
      ins.is_pack = true;
   }

-   if ((opcode_props & UNITS_ALL) == UNIT_VLUT) {
-      /* To avoid duplicating the lookup tables (probably), true LUT
-       * instructions can only operate as if they were scalars. Lower
-       * them here by changing the component. */
-
-      unsigned orig_mask = ins.mask;
-
-      unsigned swizzle_back[MIR_VEC_COMPONENTS];
-      memcpy(&swizzle_back, ins.swizzle[0], sizeof(swizzle_back));
-
-      midgard_instruction ins_split[MIR_VEC_COMPONENTS];
-      unsigned ins_count = 0;
-
-      for (int i = 0; i < nr_components; ++i) {
-         /* Mask the associated component, dropping the
-          * instruction if needed */
-
-         ins.mask = 1 << i;
-         ins.mask &= orig_mask;
-
-         for (unsigned j = 0; j < ins_count; ++j) {
-            if (swizzle_back[i] == ins_split[j].swizzle[0][0]) {
-               ins_split[j].mask |= ins.mask;
-               ins.mask = 0;
-               break;
-            }
-         }
-
-         if (!ins.mask)
-            continue;
-
-         for (unsigned j = 0; j < MIR_VEC_COMPONENTS; ++j)
-            ins.swizzle[0][j] =
-               swizzle_back[i]; /* Pull from the correct component */
-
-         ins_split[ins_count] = ins;
-
-         ++ins_count;
-      }
-
-      for (unsigned i = 0; i < ins_count; ++i) {
-         emit_mir_instruction(ctx, ins_split[i]);
-      }
-   } else {
-      emit_mir_instruction(ctx, ins);
-   }
+   emit_mir_instruction(ctx, ins);
 }

 #undef ALU_CASE