nir,aco,ac/llvm: add nir_op_alignbyte_amd

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31904>
This commit is contained in:
Rhys Perry 2024-09-10 12:21:30 +01:00 committed by Marge Bot
parent db0cbb7e9b
commit 0619e4db63
4 changed files with 20 additions and 1 deletions

View file

@ -3634,6 +3634,14 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
}
break;
}
case nir_op_alignbyte_amd: {
if (dst.regClass() == v1) {
emit_vop3a_instruction(ctx, instr, aco_opcode::v_alignbyte_b32, dst, false, 3u);
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
case nir_op_fquantize2f16: {
Temp src = get_alu_src(ctx, instr->src[0]);
if (dst.regClass() == v1) {

View file

@ -445,7 +445,8 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_op_udot_2x16_uadd:
case nir_op_sdot_2x16_iadd:
case nir_op_udot_2x16_uadd_sat:
case nir_op_sdot_2x16_iadd_sat: type = RegType::vgpr; break;
case nir_op_sdot_2x16_iadd_sat:
case nir_op_alignbyte_amd: type = RegType::vgpr; break;
case nir_op_fmul:
case nir_op_ffma:
case nir_op_fadd:

View file

@ -1245,6 +1245,11 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
(LLVMValueRef[]){src[0], src[1], src[2]}, 3, 0);
break;
case nir_op_alignbyte_amd:
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32,
(LLVMValueRef[]){src[0], src[1], src[2]}, 3, 0);
break;
default:
fprintf(stderr, "Unknown NIR alu instr: ");
nir_print_instr(&instr->instr, stderr);

View file

@ -1307,6 +1307,11 @@ unop_horiz("cube_amd", 4, tfloat32, 3, tfloat32, """
unop("fsin_amd", tfloat, "sinf(6.2831853 * src0)")
unop("fcos_amd", tfloat, "cosf(6.2831853 * src0)")
opcode("alignbyte_amd", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], False, "", """
uint64_t src = src1 | ((uint64_t)src0 << 32);
dst = src >> ((src2 & 0x3) * 8);
""")
# Midgard specific sin and cos
# These expect their inputs to be divided by pi.
unop("fsin_mdg", tfloat, "sinf(3.141592653589793 * src0)")