ac/llvm: Enable helper invocations for quad OPs

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9239 cc: mesa-stable Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27110> (cherry picked from commit 220c912080)
2026-05-09 08:58:02 +02:00 · 2023-09-19 16:07:46 +02:00 · 2023-09-19 16:07:46 +02:00 · 13b0648cc5
commit 13b0648cc5
parent f948ccf791
5 changed files with 20 additions and 5 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
@ -1184,7 +1184,7 @@
        "description": "ac/llvm: Enable helper invocations for quad OPs",
        "nominated": true,
        "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
        "main_sha": null,
        "because_sha": null,
        "notes": null
--- a/src/amd/ci/radv-navi21-llvm-fails.txt
+++ b/src/amd/ci/radv-navi21-llvm-fails.txt
@ -3,8 +3,6 @@ dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.tanh_d
 dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_denorm_flush_to_zero_frag,Fail
 dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_denorm_flush_to_zero_vert,Fail

-dEQP-VK.draw.renderpass.shader_invocation.helper_invocation,Fail
-
 dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r32g32b32a32_sfloat.samples_8,Fail
 dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r8g8b8a8_unorm.samples_8,Fail
 dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_4.r32g32b32a32_sfloat.samples_8,Fail
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@ -2989,7 +2989,7 @@ LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
   return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
 }

-static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
+static LLVMValueRef ac_build_mode(struct ac_llvm_context *ctx, LLVMValueRef src, const char *mode)
 {
   LLVMTypeRef src_type = LLVMTypeOf(src);
   unsigned bitsize = ac_get_elem_bits(ctx, src_type);
@ -3002,7 +3002,7 @@ static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
      src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");

   ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
-   snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
+   snprintf(name, sizeof(name), "llvm.amdgcn.%s.%s", mode, type);
   ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src}, 1, 0);

   if (bitsize < 32)
@ -3011,6 +3011,16 @@ static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
   return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
 }

+static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
+{
+   return ac_build_mode(ctx, src, "wwm");
+}
+
+LLVMValueRef ac_build_wqm(struct ac_llvm_context *ctx, LLVMValueRef src)
+{
+   return ac_build_mode(ctx, src, "wqm");
+}
+
 static LLVMValueRef ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
                                          LLVMValueRef inactive)
 {
--- a/src/amd/llvm/ac_llvm_build.h
+++ b/src/amd/llvm/ac_llvm_build.h
@ -469,6 +469,8 @@ LLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, L
 LLVMValueRef ac_build_mbcnt_add(struct ac_llvm_context *ctx, LLVMValueRef mask, LLVMValueRef add_src);
 LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);

+LLVMValueRef ac_build_wqm(struct ac_llvm_context *ctx, LLVMValueRef src);
+
 LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);

 LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@ -3415,21 +3415,26 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
   case nir_intrinsic_quad_broadcast: {
      unsigned lane = nir_src_as_uint(instr->src[1]);
      result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
+      result = ac_build_wqm(&ctx->ac, result);
      break;
   }
   case nir_intrinsic_quad_swap_horizontal:
      result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
+      result = ac_build_wqm(&ctx->ac, result);
      break;
   case nir_intrinsic_quad_swap_vertical:
      result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
+      result = ac_build_wqm(&ctx->ac, result);
      break;
   case nir_intrinsic_quad_swap_diagonal:
      result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
+      result = ac_build_wqm(&ctx->ac, result);
      break;
   case nir_intrinsic_quad_swizzle_amd: {
      uint32_t mask = nir_intrinsic_swizzle_mask(instr);
      result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
                                     (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
+      result = ac_build_wqm(&ctx->ac, result);
      break;
   }
   case nir_intrinsic_masked_swizzle_amd: {