From cea6723243d7b22e8d2c92790eba4e27bf2a0e06 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 17 Oct 2022 12:49:33 +0200 Subject: [PATCH] ac/llvm: Implement signed idot on GFX11. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Georg Lehmann Reviewed-by: Marek Olšák Reviewed-by: Rhys Perry Part-of: --- src/amd/llvm/ac_llvm_build.c | 16 ++++++++++++++++ src/amd/llvm/ac_llvm_build.h | 3 +++ src/amd/llvm/ac_nir_to_llvm.c | 25 +++++++++++++++++++------ 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 0758fee8b01..8fecb47c830 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -2762,6 +2762,22 @@ LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef return result; } +LLVMValueRef ac_build_sudot_4x8(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1, + LLVMValueRef s2, bool clamp, unsigned neg_lo) +{ + const char *name = "llvm.amdgcn.sudot4"; + LLVMValueRef src[6]; + + src[0] = LLVMConstInt(ctx->i1, !!(neg_lo & 0x1), false); + src[1] = s0; + src[2] = LLVMConstInt(ctx->i1, !!(neg_lo & 0x2), false); + src[3] = s1; + src[4] = s2; + src[5] = LLVMConstInt(ctx->i1, clamp, false); + + return ac_build_intrinsic(ctx, name, ctx->i32, src, 6, AC_FUNC_ATTR_READNONE); +} + void ac_init_exec_full_mask(struct ac_llvm_context *ctx) { LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0); diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index cf06fba1a4f..823005ebb37 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -463,6 +463,9 @@ LLVMValueRef ac_build_fsat(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef src0); +LLVMValueRef ac_build_sudot_4x8(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1, + LLVMValueRef s2, bool clamp, unsigned neg_lo); + void ac_init_exec_full_mask(struct ac_llvm_context *ctx); void ac_declare_lds_as_pointer(struct ac_llvm_context *ac); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 4a5a5dbc73f..36b0c651e15 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1312,14 +1312,27 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) } case nir_op_sdot_4x8_iadd: + case nir_op_sdot_4x8_iadd_sat: { + if (ctx->ac.gfx_level >= GFX11) { + result = ac_build_sudot_4x8(&ctx->ac, src[0], src[1], src[2], + instr->op == nir_op_sdot_4x8_iadd_sat, 0x3); + } else { + const char *name = "llvm.amdgcn.sdot4"; + src[3] = LLVMConstInt(ctx->ac.i1, instr->op == nir_op_sdot_4x8_iadd_sat, false); + result = ac_build_intrinsic(&ctx->ac, name, def_type, src, 4, AC_FUNC_ATTR_READNONE); + } + break; + } + case nir_op_sudot_4x8_iadd: + case nir_op_sudot_4x8_iadd_sat: { + result = ac_build_sudot_4x8(&ctx->ac, src[0], src[1], src[2], + instr->op == nir_op_sudot_4x8_iadd_sat, 0x1); + break; + } case nir_op_udot_4x8_uadd: - case nir_op_sdot_4x8_iadd_sat: case nir_op_udot_4x8_uadd_sat: { - const char *name = instr->op == nir_op_sdot_4x8_iadd || - instr->op == nir_op_sdot_4x8_iadd_sat - ? "llvm.amdgcn.sdot4" : "llvm.amdgcn.udot4"; - src[3] = LLVMConstInt(ctx->ac.i1, instr->op == nir_op_sdot_4x8_iadd_sat || - instr->op == nir_op_udot_4x8_uadd_sat, false); + const char *name = "llvm.amdgcn.udot4"; + src[3] = LLVMConstInt(ctx->ac.i1, instr->op == nir_op_udot_4x8_uadd_sat, false); result = ac_build_intrinsic(&ctx->ac, name, def_type, src, 4, AC_FUNC_ATTR_READNONE); break; }