From c63c750380dae74dd76a81bde0876c9ab8475e5c Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 1 Jun 2024 20:42:31 +0200 Subject: [PATCH] aco/gfx11+: fix inline constants for v_pk_fmac_f16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On newer hardware, the hi operation reads the lo half of the inline constant. On older hardware, it reads the hi half (zero). I tested this on Navi31 for gfx11 and Raphael for gfx10. Foz-DB Navi31: Totals from 4 (0.01% of 79395) affected shaders: CodeSize: 36832 -> 36448 (-1.04%) Latency: 20362 -> 20334 (-0.14%) Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 9edd8c4d2c4..f1aa44a0131 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2567,8 +2567,16 @@ vop3_can_use_vop2acc(ra_ctx& ctx, Instruction* instr) return false; if (instr->isVOP3P()) { - if (instr->valu().opsel_lo != 0 || instr->valu().opsel_hi != 0x7) - return false; + for (unsigned i = 0; i < 3; i++) { + if (instr->valu().opsel_lo[i]) + return false; + + /* v_pk_fmac_f16 inline constants are replicated to hi bits starting with gfx11. */ + if (instr->valu().opsel_hi[i] == + (instr->operands[i].isConstant() && !instr->operands[i].isLiteral() && + ctx.program->gfx_level >= GFX11)) + return false; + } } else { if (instr->valu().opsel & (ctx.program->gfx_level < GFX11 ? 0xf : ~0x3)) return false;