aco,nir: support subdword v_permlane_b16

Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38389>
This commit is contained in:
Georg Lehmann 2025-09-06 21:02:18 +02:00 committed by Marge Bot
parent a4a0d28ea6
commit 3a175b54a4
2 changed files with 3 additions and 2 deletions

View file

@ -4707,7 +4707,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
if (src.regClass() == s1) {
bld.copy(Definition(dst), src);
} else if (dst.regClass() == v1 && src.regClass() == v1) {
} else if (dst.type() == RegType::vgpr && src.type() == RegType::vgpr && dst.size() == 1 &&
src.size() == 1) {
bld.vop3(aco_opcode::v_permlane16_b32, Definition(dst), src,
bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa)),
bld.as_uniform(get_ssa_temp(ctx, instr->src[2].ssa)));

View file

@ -623,7 +623,7 @@ intrinsic("write_invocation_amd", src_comp=[0, 0, 1], dest_comp=0, bit_sizes=src
# src = [ mask, addition ]
intrinsic("mbcnt_amd", src_comp=[1, 1], dest_comp=1, bit_sizes=[32], flags=[CAN_REORDER, CAN_ELIMINATE])
# Compiled to v_permlane16_b32. src = [ value, lanesel_lo, lanesel_hi ]
intrinsic("lane_permute_16_amd", src_comp=[1, 1, 1], dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
intrinsic("lane_permute_16_amd", src_comp=[1, 1, 1], dest_comp=1, bit_sizes=src0, flags=[CAN_ELIMINATE])
# subgroup shuffle up/down with cluster size 16.
# base in [-15, -1]: DPP_ROW_SR
# base in [ 1, 15]: DPP_ROW_SL, otherwise invalid.