ir3: allow const src0 for ldg.a/stg.a/ray_intersection

While they allow a const src, they don't allow the full range of const
registers since the encoding only uses 8 bits. Add a new helper
(ir3_valid_const) that indicates this and use that in ir3_cp.

Totals from 2278 (1.29% of 176266) affected shaders:
MaxWaves: 30124 -> 30376 (+0.84%)
Instrs: 1472806 -> 1468750 (-0.28%); split: -0.44%, +0.16%
CodeSize: 2990430 -> 2967568 (-0.76%); split: -0.88%, +0.11%
NOPs: 244627 -> 246385 (+0.72%); split: -1.71%, +2.43%
MOVs: 51408 -> 45667 (-11.17%); split: -13.16%, +1.99%
Full: 35639 -> 35216 (-1.19%)
(ss): 46519 -> 42591 (-8.44%); split: -12.16%, +3.71%
(sy): 16763 -> 17269 (+3.02%); split: -1.35%, +4.37%
(ss)-stall: 187784 -> 180294 (-3.99%); split: -9.06%, +5.07%
(sy)-stall: 922477 -> 936696 (+1.54%); split: -3.40%, +4.94%
Cat0: 268783 -> 270558 (+0.66%); split: -1.54%, +2.20%
Cat1: 63982 -> 58277 (-8.92%); split: -10.59%, +1.68%
Cat2: 523798 -> 523707 (-0.02%)
Cat7: 47476 -> 47441 (-0.07%); split: -0.11%, +0.03%

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41365>
This commit is contained in:
Job Noorman 2026-05-05 16:16:04 +02:00 committed by Marge Bot
parent 8bd0212874
commit f267e7797f
3 changed files with 32 additions and 0 deletions

View file

@ -1837,6 +1837,12 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
else if (n == 1)
valid_flags |= IR3_REG_SHARED;
}
if (compiler->gen >= 7 &&
(instr->opc == OPC_LDG_A || instr->opc == OPC_STG_A ||
instr->opc == OPC_RAY_INTERSECTION) &&
n == 0) {
valid_flags |= IR3_REG_CONST;
}
if (flags & ~valid_flags)
return false;
@ -1964,6 +1970,25 @@ ir3_valid_immediate(struct ir3_instruction *instr, int32_t immed)
return !(immed & ~0x1ff) || !(-(uint32_t)immed & ~0x1ff);
}
/* Some instructions (e.g., cat6) don't support the full range of const
* registers as src.
*/
bool
ir3_valid_const(struct ir3_instruction *instr, unsigned src_n, unsigned num)
{
assert(ir3_valid_flags(instr, src_n, IR3_REG_CONST));
switch (instr->opc) {
case OPC_LDG_A:
case OPC_STG_A:
case OPC_RAY_INTERSECTION:
assert(src_n == 0);
return num < (1 << 8);
default:
return true;
}
}
struct ir3_instruction *
ir3_get_cond_for_nonzero_compare(struct ir3_instruction *instr)
{

View file

@ -998,6 +998,8 @@ int ir3_flut(struct ir3_register *src_reg);
bool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags);
bool ir3_valid_immediate(struct ir3_instruction *instr, int32_t immed);
bool ir3_valid_const(struct ir3_instruction *instr, unsigned src_n,
unsigned num);
/**
* Given an instruction whose result we want to test for nonzero, return a

View file

@ -409,6 +409,11 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
* dependency.
*/
if (src_reg->flags & IR3_REG_CONST) {
if (!(src_reg->flags & IR3_REG_RELATIV) &&
!ir3_valid_const(instr, n, src_reg->num)) {
return false;
}
/* an instruction cannot reference two different
* address registers:
*/