From 8e99bf53805c4fa3e40817c0aeb441a02a8814de Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 27 Jan 2026 15:12:52 +0100 Subject: [PATCH] aco: add a helper function for non supported DPP opcodes Cc: mesa-stable Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_ir.cpp | 80 +++++++++++++++++++++---------------- src/amd/compiler/aco_ir.h | 2 + 2 files changed, 47 insertions(+), 35 deletions(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index d43a37a2631..6edd4eb81d6 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -391,6 +391,50 @@ convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr& instr) return tmp; } +bool +opcode_supports_dpp(amd_gfx_level gfx_level, aco_opcode opcode, bool vop3p) +{ + switch (opcode) { + case aco_opcode::v_pk_fmac_f16: return gfx_level < GFX11; + /* there are more cases but those all take 64-bit inputs */ + case aco_opcode::v_madmk_f32: + case aco_opcode::v_madak_f32: + case aco_opcode::v_madmk_f16: + case aco_opcode::v_madak_f16: + case aco_opcode::v_fmamk_f32: + case aco_opcode::v_fmaak_f32: + case aco_opcode::v_fmamk_f16: + case aco_opcode::v_fmaak_f16: + case aco_opcode::v_readfirstlane_b32: + case aco_opcode::v_cvt_f64_i32: + case aco_opcode::v_cvt_f64_f32: + case aco_opcode::v_cvt_f64_u32: + case aco_opcode::v_mul_lo_u32: + case aco_opcode::v_mul_lo_i32: + case aco_opcode::v_mul_hi_u32: + case aco_opcode::v_mul_hi_i32: + case aco_opcode::v_qsad_pk_u16_u8: + case aco_opcode::v_mqsad_pk_u16_u8: + case aco_opcode::v_mqsad_u32_u8: + case aco_opcode::v_mad_u64_u32: + case aco_opcode::v_mad_i64_i32: + case aco_opcode::v_permlane16_b32: + case aco_opcode::v_permlanex16_b32: + case aco_opcode::v_permlane64_b32: + case aco_opcode::v_readlane_b32_e64: + case aco_opcode::v_writelane_b32_e64: return false; + /* simpler than listing all VOP3P opcodes which do not support DPP */ + case aco_opcode::v_fma_mix_f32: + case aco_opcode::v_fma_mixlo_f16: + case aco_opcode::v_fma_mixhi_f16: + case aco_opcode::p_v_fma_mixlo_f16_rtz: + case aco_opcode::p_v_fma_mixhi_f16_rtz: + case aco_opcode::v_dot2_f32_f16: + case aco_opcode::v_dot2_f32_bf16: return gfx_level >= GFX11; + default: return !vop3p; + } +} + bool can_use_DPP(amd_gfx_level gfx_level, const aco_ptr& instr, bool dpp8) { @@ -433,41 +477,7 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr& instr, bool dpp if (instr->writes_exec()) return false; - /* simpler than listing all VOP3P opcodes which do not support DPP */ - if (instr->isVOP3P()) { - return instr->opcode == aco_opcode::v_fma_mix_f32 || - instr->opcode == aco_opcode::v_fma_mixlo_f16 || - instr->opcode == aco_opcode::v_fma_mixhi_f16 || - instr->opcode == aco_opcode::p_v_fma_mixlo_f16_rtz || - instr->opcode == aco_opcode::p_v_fma_mixhi_f16_rtz || - instr->opcode == aco_opcode::v_dot2_f32_f16 || - instr->opcode == aco_opcode::v_dot2_f32_bf16; - } - - if (instr->opcode == aco_opcode::v_pk_fmac_f16) - return gfx_level < GFX11; - - /* there are more cases but those all take 64-bit inputs */ - return instr->opcode != aco_opcode::v_madmk_f32 && instr->opcode != aco_opcode::v_madak_f32 && - instr->opcode != aco_opcode::v_madmk_f16 && instr->opcode != aco_opcode::v_madak_f16 && - instr->opcode != aco_opcode::v_fmamk_f32 && instr->opcode != aco_opcode::v_fmaak_f32 && - instr->opcode != aco_opcode::v_fmamk_f16 && instr->opcode != aco_opcode::v_fmaak_f16 && - instr->opcode != aco_opcode::v_readfirstlane_b32 && - instr->opcode != aco_opcode::v_cvt_f64_i32 && - instr->opcode != aco_opcode::v_cvt_f64_f32 && - instr->opcode != aco_opcode::v_cvt_f64_u32 && instr->opcode != aco_opcode::v_mul_lo_u32 && - instr->opcode != aco_opcode::v_mul_lo_i32 && instr->opcode != aco_opcode::v_mul_hi_u32 && - instr->opcode != aco_opcode::v_mul_hi_i32 && - instr->opcode != aco_opcode::v_qsad_pk_u16_u8 && - instr->opcode != aco_opcode::v_mqsad_pk_u16_u8 && - instr->opcode != aco_opcode::v_mqsad_u32_u8 && - instr->opcode != aco_opcode::v_mad_u64_u32 && - instr->opcode != aco_opcode::v_mad_i64_i32 && - instr->opcode != aco_opcode::v_permlane16_b32 && - instr->opcode != aco_opcode::v_permlanex16_b32 && - instr->opcode != aco_opcode::v_permlane64_b32 && - instr->opcode != aco_opcode::v_readlane_b32_e64 && - instr->opcode != aco_opcode::v_writelane_b32_e64; + return opcode_supports_dpp(gfx_level, instr->opcode, instr->isVOP3P()); } aco_ptr diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 23bdb12e1d3..76ffc718534 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2040,6 +2040,8 @@ bool can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx); bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op); uint8_t get_gfx11_true16_mask(aco_opcode op); bool can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr& instr, bool pre_ra); +bool opcode_supports_dpp(amd_gfx_level gfx_level, aco_opcode opcode, bool vop3p); +bool can_use_DPP(amd_gfx_level gfx_level, const aco_ptr& instr, bool dpp8); bool can_use_DPP(amd_gfx_level gfx_level, const aco_ptr& instr, bool dpp8); bool can_write_m0(const aco_ptr& instr); /* updates "instr" and returns the old instruction (or NULL if no update was needed) */