aco: add a helper function for non supported DPP opcodes

Cc: mesa-stable

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39516>
This commit is contained in:
Georg Lehmann 2026-01-27 15:12:52 +01:00 committed by Marge Bot
parent d12e3454e6
commit 8e99bf5380
2 changed files with 47 additions and 35 deletions

View file

@ -391,6 +391,50 @@ convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr)
return tmp;
}
bool
opcode_supports_dpp(amd_gfx_level gfx_level, aco_opcode opcode, bool vop3p)
{
switch (opcode) {
case aco_opcode::v_pk_fmac_f16: return gfx_level < GFX11;
/* there are more cases but those all take 64-bit inputs */
case aco_opcode::v_madmk_f32:
case aco_opcode::v_madak_f32:
case aco_opcode::v_madmk_f16:
case aco_opcode::v_madak_f16:
case aco_opcode::v_fmamk_f32:
case aco_opcode::v_fmaak_f32:
case aco_opcode::v_fmamk_f16:
case aco_opcode::v_fmaak_f16:
case aco_opcode::v_readfirstlane_b32:
case aco_opcode::v_cvt_f64_i32:
case aco_opcode::v_cvt_f64_f32:
case aco_opcode::v_cvt_f64_u32:
case aco_opcode::v_mul_lo_u32:
case aco_opcode::v_mul_lo_i32:
case aco_opcode::v_mul_hi_u32:
case aco_opcode::v_mul_hi_i32:
case aco_opcode::v_qsad_pk_u16_u8:
case aco_opcode::v_mqsad_pk_u16_u8:
case aco_opcode::v_mqsad_u32_u8:
case aco_opcode::v_mad_u64_u32:
case aco_opcode::v_mad_i64_i32:
case aco_opcode::v_permlane16_b32:
case aco_opcode::v_permlanex16_b32:
case aco_opcode::v_permlane64_b32:
case aco_opcode::v_readlane_b32_e64:
case aco_opcode::v_writelane_b32_e64: return false;
/* simpler than listing all VOP3P opcodes which do not support DPP */
case aco_opcode::v_fma_mix_f32:
case aco_opcode::v_fma_mixlo_f16:
case aco_opcode::v_fma_mixhi_f16:
case aco_opcode::p_v_fma_mixlo_f16_rtz:
case aco_opcode::p_v_fma_mixhi_f16_rtz:
case aco_opcode::v_dot2_f32_f16:
case aco_opcode::v_dot2_f32_bf16: return gfx_level >= GFX11;
default: return !vop3p;
}
}
bool
can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp8)
{
@ -433,41 +477,7 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp
if (instr->writes_exec())
return false;
/* simpler than listing all VOP3P opcodes which do not support DPP */
if (instr->isVOP3P()) {
return instr->opcode == aco_opcode::v_fma_mix_f32 ||
instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
instr->opcode == aco_opcode::p_v_fma_mixlo_f16_rtz ||
instr->opcode == aco_opcode::p_v_fma_mixhi_f16_rtz ||
instr->opcode == aco_opcode::v_dot2_f32_f16 ||
instr->opcode == aco_opcode::v_dot2_f32_bf16;
}
if (instr->opcode == aco_opcode::v_pk_fmac_f16)
return gfx_level < GFX11;
/* there are more cases but those all take 64-bit inputs */
return instr->opcode != aco_opcode::v_madmk_f32 && instr->opcode != aco_opcode::v_madak_f32 &&
instr->opcode != aco_opcode::v_madmk_f16 && instr->opcode != aco_opcode::v_madak_f16 &&
instr->opcode != aco_opcode::v_fmamk_f32 && instr->opcode != aco_opcode::v_fmaak_f32 &&
instr->opcode != aco_opcode::v_fmamk_f16 && instr->opcode != aco_opcode::v_fmaak_f16 &&
instr->opcode != aco_opcode::v_readfirstlane_b32 &&
instr->opcode != aco_opcode::v_cvt_f64_i32 &&
instr->opcode != aco_opcode::v_cvt_f64_f32 &&
instr->opcode != aco_opcode::v_cvt_f64_u32 && instr->opcode != aco_opcode::v_mul_lo_u32 &&
instr->opcode != aco_opcode::v_mul_lo_i32 && instr->opcode != aco_opcode::v_mul_hi_u32 &&
instr->opcode != aco_opcode::v_mul_hi_i32 &&
instr->opcode != aco_opcode::v_qsad_pk_u16_u8 &&
instr->opcode != aco_opcode::v_mqsad_pk_u16_u8 &&
instr->opcode != aco_opcode::v_mqsad_u32_u8 &&
instr->opcode != aco_opcode::v_mad_u64_u32 &&
instr->opcode != aco_opcode::v_mad_i64_i32 &&
instr->opcode != aco_opcode::v_permlane16_b32 &&
instr->opcode != aco_opcode::v_permlanex16_b32 &&
instr->opcode != aco_opcode::v_permlane64_b32 &&
instr->opcode != aco_opcode::v_readlane_b32_e64 &&
instr->opcode != aco_opcode::v_writelane_b32_e64;
return opcode_supports_dpp(gfx_level, instr->opcode, instr->isVOP3P());
}
aco_ptr<Instruction>

View file

@ -2040,6 +2040,8 @@ bool can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx);
bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op);
uint8_t get_gfx11_true16_mask(aco_opcode op);
bool can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra);
bool opcode_supports_dpp(amd_gfx_level gfx_level, aco_opcode opcode, bool vop3p);
bool can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp8);
bool can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp8);
bool can_write_m0(const aco_ptr<Instruction>& instr);
/* updates "instr" and returns the old instruction (or NULL if no update was needed) */