aco: avoid wraparound for smem global loads with both offsets

fossil-db (gfx1201):
Totals from 296 (0.37% of 79839) affected shaders:
Instrs: 382593 -> 380149 (-0.64%)
CodeSize: 1981452 -> 1970988 (-0.53%); split: -0.53%, +0.00%
Latency: 1575286 -> 1574252 (-0.07%)
InvThroughput: 215839 -> 215818 (-0.01%)
SClause: 8679 -> 8677 (-0.02%); split: -0.03%, +0.01%
Copies: 19642 -> 19641 (-0.01%); split: -0.03%, +0.02%
PreSGPRs: 14521 -> 14515 (-0.04%)
SALU: 57097 -> 55718 (-2.42%)

fossil-db (polaris10):
Totals from 30 (0.05% of 62201) affected shaders:
Instrs: 23341 -> 23379 (+0.16%); split: -0.01%, +0.18%
CodeSize: 121316 -> 121516 (+0.16%); split: -0.01%, +0.17%
SGPRs: 2368 -> 2384 (+0.68%)
Latency: 235153 -> 235374 (+0.09%); split: -0.01%, +0.11%
InvThroughput: 92582 -> 92566 (-0.02%)
SClause: 616 -> 619 (+0.49%)
Copies: 2717 -> 2720 (+0.11%)
PreSGPRs: 1204 -> 1213 (+0.75%)
SALU: 3654 -> 3692 (+1.04%); split: -0.08%, +1.12%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Backport-to: 25.2
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37301>
This commit is contained in:
Rhys Perry 2025-08-25 16:17:10 +01:00 committed by Marge Bot
parent 7f63279307
commit 6d71521ecd

View file

@ -540,7 +540,8 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, unsigned bytes_needed
*/
RegClass rc(RegType::sgpr, DIV_ROUND_UP(util_next_power_of_two(bytes_needed), 4u));
aco_ptr<Instruction> load{create_instruction(op, Format::SMEM, 2, 1)};
bool soe = !buffer && offset.id() && const_offset && bld.program->gfx_level >= GFX9;
aco_ptr<Instruction> load{create_instruction(op, Format::SMEM, 2 + soe, 1)};
if (buffer) {
if (const_offset)
offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
@ -549,13 +550,17 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, unsigned bytes_needed
load->operands[1] = Operand(offset);
} else {
load->operands[0] = Operand(addr);
if (offset.id() && const_offset)
load->operands[1] = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
Operand::c32(const_offset));
else if (offset.id())
load->operands[1] = Operand(offset);
else
if (soe) {
load->operands[1] = Operand::c32(const_offset);
load->operands[2] = Operand(offset);
} else if (offset.id() && const_offset) {
load->operands[0] = Operand(add64_32(bld, addr, Operand::c32(const_offset)));
load->operands[1] = Operand(offset);
} else if (offset.id()) {
load->operands[1] = Operand(offset);
} else {
load->operands[1] = Operand::c32(const_offset);
}
}
Temp val = info.dst.regClass() == rc && rc.bytes() == bytes_needed ? info.dst : bld.tmp(rc);
load->definitions[0] = Definition(val);