aco/insert_NOPs: remove redundant VALUReadSGPRHazard waits

Mostly removes SALU->VALU waits if the VALU writes a sgpr.

Foz-DB GFX1201:
Totals from 18553 (22.51% of 82419) affected shaders:
Instrs: 27388414 -> 27321118 (-0.25%)
CodeSize: 145389276 -> 145118128 (-0.19%); split: -0.19%, +0.00%
Latency: 200288087 -> 200252583 (-0.02%); split: -0.02%, +0.00%
InvThroughput: 36311237 -> 36307369 (-0.01%); split: -0.01%, +0.00%

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38445>
This commit is contained in:
Georg Lehmann 2025-11-14 10:46:58 +01:00 committed by Marge Bot
parent b1d730982e
commit 018f45f981

View file

@ -1610,7 +1610,8 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
for (unsigned i = 0; i < op.size(); i++) {
PhysReg reg = op.physReg().advance(i * 4);
if (ctx.sgpr_read_by_valu_then_wr_by_salu.get(reg) < expiry_count) {
if (ctx.sgpr_read_by_valu_then_wr_by_salu.get(reg) < expiry_count &&
wait.sa_sdst > 0) {
imm &= 0xfffe;
wait.sa_sdst = 0;
}
@ -1620,11 +1621,13 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
/* s_wait_alu on va_sdst (if non-VCC SGPR) or va_vcc (if VCC SGPR) */
if (ctx.sgpr_read_by_valu_then_wr_by_valu[reg]) {
bool is_vcc = reg == vcc || reg == vcc_hi;
imm &= is_vcc ? 0xfffd : 0xf1ff;
if (is_vcc)
if (is_vcc && wait.va_vcc > 0) {
imm &= 0xfffd;
wait.va_vcc = 0;
else
} else if (!is_vcc && wait.va_sdst > 0) {
imm &= 0xf1ff;
wait.va_sdst = 0;
}
}
}
}