mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-06 03:50:26 +01:00
aco: add a helper function for non supported DPP opcodes
Cc: mesa-stable Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39516>
This commit is contained in:
parent
d12e3454e6
commit
8e99bf5380
2 changed files with 47 additions and 35 deletions
|
|
@ -391,6 +391,50 @@ convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr)
|
|||
return tmp;
|
||||
}
|
||||
|
||||
bool
|
||||
opcode_supports_dpp(amd_gfx_level gfx_level, aco_opcode opcode, bool vop3p)
|
||||
{
|
||||
switch (opcode) {
|
||||
case aco_opcode::v_pk_fmac_f16: return gfx_level < GFX11;
|
||||
/* there are more cases but those all take 64-bit inputs */
|
||||
case aco_opcode::v_madmk_f32:
|
||||
case aco_opcode::v_madak_f32:
|
||||
case aco_opcode::v_madmk_f16:
|
||||
case aco_opcode::v_madak_f16:
|
||||
case aco_opcode::v_fmamk_f32:
|
||||
case aco_opcode::v_fmaak_f32:
|
||||
case aco_opcode::v_fmamk_f16:
|
||||
case aco_opcode::v_fmaak_f16:
|
||||
case aco_opcode::v_readfirstlane_b32:
|
||||
case aco_opcode::v_cvt_f64_i32:
|
||||
case aco_opcode::v_cvt_f64_f32:
|
||||
case aco_opcode::v_cvt_f64_u32:
|
||||
case aco_opcode::v_mul_lo_u32:
|
||||
case aco_opcode::v_mul_lo_i32:
|
||||
case aco_opcode::v_mul_hi_u32:
|
||||
case aco_opcode::v_mul_hi_i32:
|
||||
case aco_opcode::v_qsad_pk_u16_u8:
|
||||
case aco_opcode::v_mqsad_pk_u16_u8:
|
||||
case aco_opcode::v_mqsad_u32_u8:
|
||||
case aco_opcode::v_mad_u64_u32:
|
||||
case aco_opcode::v_mad_i64_i32:
|
||||
case aco_opcode::v_permlane16_b32:
|
||||
case aco_opcode::v_permlanex16_b32:
|
||||
case aco_opcode::v_permlane64_b32:
|
||||
case aco_opcode::v_readlane_b32_e64:
|
||||
case aco_opcode::v_writelane_b32_e64: return false;
|
||||
/* simpler than listing all VOP3P opcodes which do not support DPP */
|
||||
case aco_opcode::v_fma_mix_f32:
|
||||
case aco_opcode::v_fma_mixlo_f16:
|
||||
case aco_opcode::v_fma_mixhi_f16:
|
||||
case aco_opcode::p_v_fma_mixlo_f16_rtz:
|
||||
case aco_opcode::p_v_fma_mixhi_f16_rtz:
|
||||
case aco_opcode::v_dot2_f32_f16:
|
||||
case aco_opcode::v_dot2_f32_bf16: return gfx_level >= GFX11;
|
||||
default: return !vop3p;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp8)
|
||||
{
|
||||
|
|
@ -433,41 +477,7 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp
|
|||
if (instr->writes_exec())
|
||||
return false;
|
||||
|
||||
/* simpler than listing all VOP3P opcodes which do not support DPP */
|
||||
if (instr->isVOP3P()) {
|
||||
return instr->opcode == aco_opcode::v_fma_mix_f32 ||
|
||||
instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
|
||||
instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
|
||||
instr->opcode == aco_opcode::p_v_fma_mixlo_f16_rtz ||
|
||||
instr->opcode == aco_opcode::p_v_fma_mixhi_f16_rtz ||
|
||||
instr->opcode == aco_opcode::v_dot2_f32_f16 ||
|
||||
instr->opcode == aco_opcode::v_dot2_f32_bf16;
|
||||
}
|
||||
|
||||
if (instr->opcode == aco_opcode::v_pk_fmac_f16)
|
||||
return gfx_level < GFX11;
|
||||
|
||||
/* there are more cases but those all take 64-bit inputs */
|
||||
return instr->opcode != aco_opcode::v_madmk_f32 && instr->opcode != aco_opcode::v_madak_f32 &&
|
||||
instr->opcode != aco_opcode::v_madmk_f16 && instr->opcode != aco_opcode::v_madak_f16 &&
|
||||
instr->opcode != aco_opcode::v_fmamk_f32 && instr->opcode != aco_opcode::v_fmaak_f32 &&
|
||||
instr->opcode != aco_opcode::v_fmamk_f16 && instr->opcode != aco_opcode::v_fmaak_f16 &&
|
||||
instr->opcode != aco_opcode::v_readfirstlane_b32 &&
|
||||
instr->opcode != aco_opcode::v_cvt_f64_i32 &&
|
||||
instr->opcode != aco_opcode::v_cvt_f64_f32 &&
|
||||
instr->opcode != aco_opcode::v_cvt_f64_u32 && instr->opcode != aco_opcode::v_mul_lo_u32 &&
|
||||
instr->opcode != aco_opcode::v_mul_lo_i32 && instr->opcode != aco_opcode::v_mul_hi_u32 &&
|
||||
instr->opcode != aco_opcode::v_mul_hi_i32 &&
|
||||
instr->opcode != aco_opcode::v_qsad_pk_u16_u8 &&
|
||||
instr->opcode != aco_opcode::v_mqsad_pk_u16_u8 &&
|
||||
instr->opcode != aco_opcode::v_mqsad_u32_u8 &&
|
||||
instr->opcode != aco_opcode::v_mad_u64_u32 &&
|
||||
instr->opcode != aco_opcode::v_mad_i64_i32 &&
|
||||
instr->opcode != aco_opcode::v_permlane16_b32 &&
|
||||
instr->opcode != aco_opcode::v_permlanex16_b32 &&
|
||||
instr->opcode != aco_opcode::v_permlane64_b32 &&
|
||||
instr->opcode != aco_opcode::v_readlane_b32_e64 &&
|
||||
instr->opcode != aco_opcode::v_writelane_b32_e64;
|
||||
return opcode_supports_dpp(gfx_level, instr->opcode, instr->isVOP3P());
|
||||
}
|
||||
|
||||
aco_ptr<Instruction>
|
||||
|
|
|
|||
|
|
@ -2040,6 +2040,8 @@ bool can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx);
|
|||
bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op);
|
||||
uint8_t get_gfx11_true16_mask(aco_opcode op);
|
||||
bool can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra);
|
||||
bool opcode_supports_dpp(amd_gfx_level gfx_level, aco_opcode opcode, bool vop3p);
|
||||
bool can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp8);
|
||||
bool can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp8);
|
||||
bool can_write_m0(const aco_ptr<Instruction>& instr);
|
||||
/* updates "instr" and returns the old instruction (or NULL if no update was needed) */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue