mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 22:10:11 +01:00
aco: don't consider sa_sdst=0 before SALU write to fix VALUMaskWriteHazard
LLVM does but that's probably a bug. fossil-db (navi31): Totals from 311 (0.39% of 79395) affected shaders: Instrs: 380453 -> 381075 (+0.16%) CodeSize: 1961012 -> 1964744 (+0.19%) Latency: 4799095 -> 4800313 (+0.03%) InvThroughput: 958358 -> 958904 (+0.06%) VALU: 242322 -> 242633 (+0.13%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Backport-to: 24.1 Backport-to: 24.2 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30818>
This commit is contained in:
parent
8f5ee70d85
commit
b1ba7d1b99
3 changed files with 35 additions and 8 deletions
|
|
@ -375,5 +375,5 @@ Triggered by:
|
|||
SALU writing then SALU or VALU reading a SGPR that was previously used as a lane mask for a VALU.
|
||||
|
||||
Mitigated by:
|
||||
A VALU instruction reading a non-exec SGPR before the SALU write, or a sa_sdst=0 wait:
|
||||
`s_waitcnt_depctr 0xfffe`
|
||||
A VALU instruction reading a non-exec SGPR before the SALU write, or a sa_sdst=0 wait after the
|
||||
SALU write: `s_waitcnt_depctr 0xfffe`
|
||||
|
|
|
|||
|
|
@ -1631,6 +1631,7 @@ resolve_all_gfx11(State& state, NOP_ctx_gfx11& ctx,
|
|||
Builder bld(state.program, &new_instructions);
|
||||
|
||||
unsigned waitcnt_depctr = 0xffff;
|
||||
bool valu_read_sgpr = false;
|
||||
|
||||
/* LdsDirectVALUHazard/VALUPartialForwardingHazard/VALUTransUseHazard */
|
||||
bool has_vdst0_since_valu = true;
|
||||
|
|
@ -1651,12 +1652,15 @@ resolve_all_gfx11(State& state, NOP_ctx_gfx11& ctx,
|
|||
}
|
||||
|
||||
/* VALUMaskWriteHazard */
|
||||
if (state.program->gfx_level < GFX12 && state.program->wave_size == 64 &&
|
||||
(ctx.sgpr_read_by_valu_as_lanemask.any() ||
|
||||
ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu.any())) {
|
||||
waitcnt_depctr &= 0xfffe;
|
||||
ctx.sgpr_read_by_valu_as_lanemask.reset();
|
||||
ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu.reset();
|
||||
if (state.program->gfx_level < GFX12 && state.program->wave_size == 64) {
|
||||
if (ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu.any()) {
|
||||
waitcnt_depctr &= 0xfffe;
|
||||
ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu.reset();
|
||||
}
|
||||
if (ctx.sgpr_read_by_valu_as_lanemask.any()) {
|
||||
valu_read_sgpr = true;
|
||||
ctx.sgpr_read_by_valu_as_lanemask.reset();
|
||||
}
|
||||
}
|
||||
|
||||
/* LdsDirectVMEMHazard */
|
||||
|
|
@ -1671,6 +1675,16 @@ resolve_all_gfx11(State& state, NOP_ctx_gfx11& ctx,
|
|||
|
||||
if (waitcnt_depctr != 0xffff)
|
||||
bld.sopp(aco_opcode::s_waitcnt_depctr, waitcnt_depctr);
|
||||
|
||||
if (valu_read_sgpr) {
|
||||
/* This has to be after the s_waitcnt_depctr so that the instruction is not involved in any
|
||||
* other hazards. */
|
||||
bld.vop3(aco_opcode::v_xor3_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
|
||||
Operand(PhysReg(0), s1), Operand(PhysReg(0), s1));
|
||||
|
||||
/* workaround possible LdsDirectVALUHazard/VALUPartialForwardingHazard */
|
||||
bld.sopp(aco_opcode::s_waitcnt_depctr, 0x0fff);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ctx>
|
||||
|
|
|
|||
|
|
@ -1646,9 +1646,22 @@ BEGIN_TEST(insert_nops.setpc_gfx11)
|
|||
/* VALUMaskWriteHazard */
|
||||
//! p_unit_test 4
|
||||
//! v1: %0:v[0] = v_cndmask_b32 0, 0, %0:vcc
|
||||
//! s1: %0:vcc_hi = s_mov_b32 0
|
||||
//! s_waitcnt_depctr va_vdst(0) sa_sdst(0)
|
||||
//! s_setpc_b64 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
|
||||
Operand::zero(), Operand(vcc, s2));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(vcc_hi, s1), Operand::c32(0));
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
|
||||
|
||||
//! p_unit_test 8
|
||||
//! v1: %0:v[0] = v_cndmask_b32 0, 0, %0:vcc
|
||||
//! s_waitcnt_depctr va_vdst(0)
|
||||
//! v1: %0:v[0] = v_xor3_b32 %0:v[0], %0:s[0], %0:s[0]
|
||||
//! s_waitcnt_depctr va_vdst(0)
|
||||
//! s_setpc_b64 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8));
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
|
||||
Operand::zero(), Operand(vcc, s2));
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue