From f267e7797fc5bb12367c8a151d3f976380a49c57 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Tue, 5 May 2026 16:16:04 +0200 Subject: [PATCH] ir3: allow const src0 for ldg.a/stg.a/ray_intersection While they allow a const src, they don't allow the full range of const registers since the encoding only uses 8 bits. Add a new helper (ir3_valid_const) that indicates this and use that in ir3_cp. Totals from 2278 (1.29% of 176266) affected shaders: MaxWaves: 30124 -> 30376 (+0.84%) Instrs: 1472806 -> 1468750 (-0.28%); split: -0.44%, +0.16% CodeSize: 2990430 -> 2967568 (-0.76%); split: -0.88%, +0.11% NOPs: 244627 -> 246385 (+0.72%); split: -1.71%, +2.43% MOVs: 51408 -> 45667 (-11.17%); split: -13.16%, +1.99% Full: 35639 -> 35216 (-1.19%) (ss): 46519 -> 42591 (-8.44%); split: -12.16%, +3.71% (sy): 16763 -> 17269 (+3.02%); split: -1.35%, +4.37% (ss)-stall: 187784 -> 180294 (-3.99%); split: -9.06%, +5.07% (sy)-stall: 922477 -> 936696 (+1.54%); split: -3.40%, +4.94% Cat0: 268783 -> 270558 (+0.66%); split: -1.54%, +2.20% Cat1: 63982 -> 58277 (-8.92%); split: -10.59%, +1.68% Cat2: 523798 -> 523707 (-0.02%) Cat7: 47476 -> 47441 (-0.07%); split: -0.11%, +0.03% Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3.c | 25 +++++++++++++++++++++++++ src/freedreno/ir3/ir3.h | 2 ++ src/freedreno/ir3/ir3_cp.c | 5 +++++ 3 files changed, 32 insertions(+) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index bc493a9dc76..7dc042c9a55 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -1837,6 +1837,12 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) else if (n == 1) valid_flags |= IR3_REG_SHARED; } + if (compiler->gen >= 7 && + (instr->opc == OPC_LDG_A || instr->opc == OPC_STG_A || + instr->opc == OPC_RAY_INTERSECTION) && + n == 0) { + valid_flags |= IR3_REG_CONST; + } if (flags & ~valid_flags) return false; @@ -1964,6 +1970,25 @@ ir3_valid_immediate(struct ir3_instruction *instr, int32_t immed) return !(immed & ~0x1ff) || !(-(uint32_t)immed & ~0x1ff); } +/* Some instructions (e.g., cat6) don't support the full range of const + * registers as src. + */ +bool +ir3_valid_const(struct ir3_instruction *instr, unsigned src_n, unsigned num) +{ + assert(ir3_valid_flags(instr, src_n, IR3_REG_CONST)); + + switch (instr->opc) { + case OPC_LDG_A: + case OPC_STG_A: + case OPC_RAY_INTERSECTION: + assert(src_n == 0); + return num < (1 << 8); + default: + return true; + } +} + struct ir3_instruction * ir3_get_cond_for_nonzero_compare(struct ir3_instruction *instr) { diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 9d5351e6386..83517828918 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -998,6 +998,8 @@ int ir3_flut(struct ir3_register *src_reg); bool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags); bool ir3_valid_immediate(struct ir3_instruction *instr, int32_t immed); +bool ir3_valid_const(struct ir3_instruction *instr, unsigned src_n, + unsigned num); /** * Given an instruction whose result we want to test for nonzero, return a diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index 7647265828d..6fd9955fe1d 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -409,6 +409,11 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, * dependency. */ if (src_reg->flags & IR3_REG_CONST) { + if (!(src_reg->flags & IR3_REG_RELATIV) && + !ir3_valid_const(instr, n, src_reg->num)) { + return false; + } + /* an instruction cannot reference two different * address registers: */