From 1a9840b3416bb67b9a641705c0f065b7053f4399 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 14 Jul 2022 15:27:57 +0100 Subject: [PATCH] aco/gfx11: workaround VcmpxPermlaneHazard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same as GFX10, but in a separate pass because it's the only hazard that's shared. No fossil-db changes. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/README-ISA.md | 6 +++++ src/amd/compiler/aco_insert_NOPs.cpp | 36 +++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/README-ISA.md b/src/amd/compiler/README-ISA.md index f2474366d37..040d2815353 100644 --- a/src/amd/compiler/README-ISA.md +++ b/src/amd/compiler/README-ISA.md @@ -288,3 +288,9 @@ Only `s_waitcnt_vscnt null, 0`. Needed even if the first instruction is a load. NSA MIMG instructions should be limited to 3 dwords before GFX10.3 to avoid stability issues: https://reviews.llvm.org/D103348 + +## RDNA3 / GFX11 hazards + +### VcmpxPermlaneHazard + +Same as GFX10. diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index f4eb8165483..02308055fe3 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -193,6 +193,15 @@ struct NOP_ctx_gfx10 { } }; +struct NOP_ctx_gfx11 { + /* VcmpxPermlaneHazard */ + bool has_Vcmpx = false; + + void join(const NOP_ctx_gfx11& other) { has_Vcmpx |= other.has_Vcmpx; } + + bool operator==(const NOP_ctx_gfx11& other) { return has_Vcmpx == other.has_Vcmpx; } +}; + int get_wait_states(aco_ptr& instr) { @@ -856,6 +865,29 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr& } } +void +handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& instr, + std::vector>& new_instructions) +{ + Builder bld(state.program, &new_instructions); + + /* VcmpxPermlaneHazard + * Handle any permlane following a VOPC instruction writing exec, insert v_mov between them. + */ + if (instr->isVOPC() && instr->definitions[0].physReg() == exec) { + ctx.has_Vcmpx = true; + } else if (ctx.has_Vcmpx && (instr->opcode == aco_opcode::v_permlane16_b32 || + instr->opcode == aco_opcode::v_permlanex16_b32)) { + ctx.has_Vcmpx = false; + + /* v_nop would be discarded by SQ, so use v_mov with the first operand of the permlane */ + bld.vop1(aco_opcode::v_mov_b32, Definition(instr->operands[0].physReg(), v1), + Operand(instr->operands[0].physReg(), v1)); + } else if (instr->isVALU() && instr->opcode != aco_opcode::v_nop) { + ctx.has_Vcmpx = false; + } +} + template using HandleInstr = void (*)(State& state, Ctx&, aco_ptr&, std::vector>&); @@ -925,7 +957,9 @@ mitigate_hazards(Program* program) void insert_NOPs(Program* program) { - if (program->gfx_level >= GFX10_3) + if (program->gfx_level >= GFX11) + mitigate_hazards(program); + else if (program->gfx_level >= GFX10_3) ; /* no hazards/bugs to mitigate */ else if (program->gfx_level >= GFX10) mitigate_hazards(program);