aco: init vm_vsrc/sa_sdst from depctr_wait

fossil-db (navi31):
Totals from 5805 (7.31% of 79377) affected shaders:
Instrs: 14229621 -> 14207115 (-0.16%); split: -0.16%, +0.00%
CodeSize: 75358724 -> 75268624 (-0.12%); split: -0.12%, +0.00%
Latency: 133637034 -> 133624262 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 22067819 -> 22066213 (-0.01%); split: -0.01%, +0.00%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34529>
This commit is contained in:
Rhys Perry 2025-04-15 15:40:11 +01:00 committed by Marge Bot
parent ce2be5ab8e
commit 3d6fa6996c
2 changed files with 22 additions and 20 deletions

View file

@ -1395,22 +1395,14 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
}
depctr_wait wait = parse_depctr_wait(instr.get());
unsigned va_vdst = wait.va_vdst;
unsigned vm_vsrc = 7;
unsigned sa_sdst = 1;
if (debug_flags & DEBUG_FORCE_WAITDEPS)
wait = parse_depctr_wait(bld.sopp(aco_opcode::s_waitcnt_depctr, 0x0000));
else if (instr->isLDSDIR() && state.program->gfx_level >= GFX12)
wait.vm_vsrc = instr->ldsdir().wait_vsrc ? 7 : 0;
if (debug_flags & DEBUG_FORCE_WAITDEPS) {
bld.sopp(aco_opcode::s_waitcnt_depctr, 0x0000);
va_vdst = 0;
vm_vsrc = 0;
sa_sdst = 0;
} else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
/* va_vdst already obtained through parse_depctr_wait(). */
vm_vsrc = (instr->salu().imm >> 2) & 0x7;
sa_sdst = instr->salu().imm & 0x1;
} else if (instr->isLDSDIR() && state.program->gfx_level >= GFX12) {
vm_vsrc = instr->ldsdir().wait_vsrc ? 7 : 0;
}
unsigned va_vdst = wait.va_vdst;
unsigned vm_vsrc = wait.vm_vsrc;
unsigned sa_sdst = wait.sa_sdst;
if (instr->isLDSDIR()) {
unsigned count = handle_lds_direct_valu_hazard(state, instr);

View file

@ -2224,18 +2224,28 @@ BEGIN_TEST(insert_nops.setpc_gfx12)
//! p_unit_test 9
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
//! v1: %0:v[1] = v_mov_b32 %0:s[5]
//! v1: %0:v[2] = v_mov_b32 %0:vcc_lo
//! s1: %0:s[4] = s_mov_b32 0
//! s1: %0:s[5] = v_readfirstlane_b32 %0:v[0]
//! s1: %0:vcc_lo = v_readfirstlane_b32 %0:v[1]
//! s_waitcnt_depctr va_vdst(0) va_sdst(0) va_vcc(0) sa_sdst(0)
//! s1: %0:s[4] = s_mov_b32 0
//! s_waitcnt_depctr va_vdst(0) va_vcc(0) sa_sdst(0)
//! s_setpc_b64 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(258), v1), Operand(PhysReg(vcc), s1));
bld.vop1(aco_opcode::v_readfirstlane_b32, Definition(vcc, s1), Operand(PhysReg(257), v1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
//! p_unit_test 10
//! v1: %0:v[1] = v_mov_b32 %0:s[5]
//! v1: %0:v[2] = v_mov_b32 %0:vcc_lo
//! s1: %0:s[5] = v_readfirstlane_b32 %0:v[0]
//! s1: %0:vcc_lo = v_readfirstlane_b32 %0:v[1]
//! s_waitcnt_depctr va_vdst(0) va_sdst(0) va_vcc(0)
//! s_setpc_b64 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(257), v1), Operand(PhysReg(5), s1));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(258), v1), Operand(PhysReg(vcc), s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
bld.vop1(aco_opcode::v_readfirstlane_b32, Definition(PhysReg(5), s1), Operand(PhysReg(256), v1));
bld.vop1(aco_opcode::v_readfirstlane_b32, Definition(vcc, s1), Operand(PhysReg(257), v1));
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));