From 75604ff945a8ee60267bc974350ab6b1638f2ceb Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Thu, 16 Oct 2025 17:42:05 +0200 Subject: [PATCH] ir3: allow (neg) on sel.b on a6xx gen4+ Setting the (neg) flag on a sel.b source behaves as fneg on a6xx gen4+. Totals from 22733 (13.80% of 164705) affected shaders: MaxWaves: 277060 -> 277292 (+0.08%); split: +0.11%, -0.03% Instrs: 17676148 -> 17634471 (-0.24%); split: -0.47%, +0.24% CodeSize: 34824114 -> 34693740 (-0.37%); split: -0.44%, +0.06% NOPs: 3466984 -> 3487984 (+0.61%); split: -1.17%, +1.78% MOVs: 521091 -> 522791 (+0.33%); split: -1.87%, +2.20% Full: 315929 -> 315699 (-0.07%); split: -0.09%, +0.02% (ss): 473545 -> 472947 (-0.13%); split: -1.36%, +1.23% (sy): 195612 -> 195743 (+0.07%); split: -1.00%, +1.07% (ss)-stall: 1928887 -> 1922757 (-0.32%); split: -1.96%, +1.64% (sy)-stall: 4965071 -> 4972119 (+0.14%); split: -1.43%, +1.57% STPs: 777 -> 762 (-1.93%) LDPs: 2168 -> 2117 (-2.35%) Preamble Instrs: 3465691 -> 3462635 (-0.09%); split: -0.09%, +0.00% Last helper: 4666320 -> 4593331 (-1.56%); split: -2.81%, +1.24% Last baryf: 235724 -> 230049 (-2.41%); split: -4.56%, +2.15% Subgroup size: 2021248 -> 2021952 (+0.03%); split: +0.07%, -0.03% Cat0: 3792738 -> 3814197 (+0.57%); split: -1.08%, +1.64% Cat1: 757480 -> 759260 (+0.23%); split: -1.28%, +1.52% Cat2: 6960677 -> 6897218 (-0.91%) Cat6: 78290 -> 78224 (-0.08%) Cat7: 422101 -> 420710 (-0.33%); split: -1.41%, +1.09% Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3.c | 13 +++++++------ src/freedreno/ir3/ir3.h | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 887ef8a22f7..94a9bfaf71e 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -1773,8 +1773,8 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) } break; case 3: - valid_flags = - ir3_cat3_absneg(instr->opc, n) | IR3_REG_RELATIV | IR3_REG_SHARED; + valid_flags = ir3_cat3_absneg(compiler, instr->opc, n) | IR3_REG_RELATIV | + IR3_REG_SHARED; switch (instr->opc) { case OPC_SHRM: @@ -2128,7 +2128,7 @@ ir3_cat2_absneg(opc_t opc) /* map cat3 instructions to valid abs/neg flags: */ inline unsigned -ir3_cat3_absneg(opc_t opc, unsigned src_n) +ir3_cat3_absneg(struct ir3_compiler *compiler, opc_t opc, unsigned src_n) { switch (opc) { case OPC_MAD_F16: @@ -2137,6 +2137,10 @@ ir3_cat3_absneg(opc_t opc, unsigned src_n) case OPC_SEL_F32: return IR3_REG_FNEG; + case OPC_SEL_B16: + case OPC_SEL_B32: + return compiler->has_sel_b_fneg ? IR3_REG_FNEG : 0; + case OPC_SAD_S16: case OPC_SAD_S32: return src_n == 1 ? IR3_REG_SNEG : 0; @@ -2151,9 +2155,6 @@ ir3_cat3_absneg(opc_t opc, unsigned src_n) case OPC_SEL_S32: /* neg *may* work on 3rd src.. */ - case OPC_SEL_B16: - case OPC_SEL_B32: - case OPC_SHRM: case OPC_SHLM: case OPC_SHRG: diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index da479e27d86..89c63f11ab9 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1753,7 +1753,8 @@ ir3_cat2_int(opc_t opc) unsigned ir3_cat2_absneg(opc_t opc); /* map cat3 instructions to valid abs/neg flags: */ -unsigned ir3_cat3_absneg(opc_t opc, unsigned src_n); +unsigned ir3_cat3_absneg(struct ir3_compiler *compiler, opc_t opc, + unsigned src_n); /* Return the type (float, int, or uint) the op uses when converting from the * internal result of the op (which is assumed to be the same size as the