From 8f5ee70d8581ca2bc58b488e9fcdb47c1b4709bb Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 22 Aug 2024 15:39:08 +0100 Subject: [PATCH] aco: also consider VALU reads for VALUMaskWriteHazard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fossil-db (navi31): Totals from 9776 (12.31% of 79395) affected shaders: Instrs: 19348258 -> 19383680 (+0.18%); split: -0.00%, +0.19% CodeSize: 101223460 -> 101366964 (+0.14%); split: -0.01%, +0.15% Latency: 172853115 -> 172866070 (+0.01%); split: -0.01%, +0.01% InvThroughput: 27590468 -> 27592390 (+0.01%); split: -0.00%, +0.01% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11550 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11436 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11337 Gitlab: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11738 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11741 Backport-to: 24.1 Backport-to: 24.2 Part-of: --- src/amd/compiler/README-ISA.md | 2 +- src/amd/compiler/aco_insert_NOPs.cpp | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/README-ISA.md b/src/amd/compiler/README-ISA.md index 3bd7dfeb873..20a5d9b1ec6 100644 --- a/src/amd/compiler/README-ISA.md +++ b/src/amd/compiler/README-ISA.md @@ -372,7 +372,7 @@ A va_vdst=0 wait: `s_waitcnt_deptr 0x0fff` ### VALUMaskWriteHazard Triggered by: -SALU writing then reading a SGPR that was previously used as a lane mask for a VALU. +SALU writing then SALU or VALU reading a SGPR that was previously used as a lane mask for a VALU. Mitigated by: A VALU instruction reading a non-exec SGPR before the SALU write, or a sa_sdst=0 wait: diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index b9744434fd5..2c506da584b 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -1449,13 +1449,14 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& if (state.program->gfx_level < GFX12) { /* VALUMaskWriteHazard - * VALU reads SGPR as a lane mask and later written by SALU cannot safely be read by SALU. + * VALU reads SGPR as a lane mask and later written by SALU cannot safely be read by SALU or + * VALU. */ if (state.program->wave_size == 64 && instr->isSALU() && check_written_regs(instr, ctx.sgpr_read_by_valu_as_lanemask)) { ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu = ctx.sgpr_read_by_valu_as_lanemask; ctx.sgpr_read_by_valu_as_lanemask.reset(); - } else if (state.program->wave_size == 64 && instr->isSALU() && + } else if (state.program->wave_size == 64 && (instr->isSALU() || instr->isVALU()) && check_read_regs(instr, ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu)) { bld.sopp(aco_opcode::s_waitcnt_depctr, 0xfffe); sa_sdst = 0;