From 13b0648cc53939911f171cd965e898c3c4ed0f24 Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Tue, 19 Sep 2023 16:07:46 +0200 Subject: [PATCH] ac/llvm: Enable helper invocations for quad OPs Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9239 cc: mesa-stable Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: (cherry picked from commit 220c91208037a499ff7a553f263d20e5844094a4) --- .pick_status.json | 2 +- src/amd/ci/radv-navi21-llvm-fails.txt | 2 -- src/amd/llvm/ac_llvm_build.c | 14 ++++++++++++-- src/amd/llvm/ac_llvm_build.h | 2 ++ src/amd/llvm/ac_nir_to_llvm.c | 5 +++++ 5 files changed, 20 insertions(+), 5 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 3e8514513a8..1e6b273450a 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1184,7 +1184,7 @@ "description": "ac/llvm: Enable helper invocations for quad OPs", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/ci/radv-navi21-llvm-fails.txt b/src/amd/ci/radv-navi21-llvm-fails.txt index 15922d1f7cd..12cd7f2dfc9 100644 --- a/src/amd/ci/radv-navi21-llvm-fails.txt +++ b/src/amd/ci/radv-navi21-llvm-fails.txt @@ -3,8 +3,6 @@ dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.tanh_d dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_denorm_flush_to_zero_frag,Fail dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_denorm_flush_to_zero_vert,Fail -dEQP-VK.draw.renderpass.shader_invocation.helper_invocation,Fail - dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r32g32b32a32_sfloat.samples_8,Fail dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r8g8b8a8_unorm.samples_8,Fail dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_4.r32g32b32a32_sfloat.samples_8,Fail diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index d63653e6f49..25013b7667c 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -2989,7 +2989,7 @@ LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); } -static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src) +static LLVMValueRef ac_build_mode(struct ac_llvm_context *ctx, LLVMValueRef src, const char *mode) { LLVMTypeRef src_type = LLVMTypeOf(src); unsigned bitsize = ac_get_elem_bits(ctx, src_type); @@ -3002,7 +3002,7 @@ static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src) src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type)); - snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type); + snprintf(name, sizeof(name), "llvm.amdgcn.%s.%s", mode, type); ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src}, 1, 0); if (bitsize < 32) @@ -3011,6 +3011,16 @@ static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src) return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); } +static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src) +{ + return ac_build_mode(ctx, src, "wwm"); +} + +LLVMValueRef ac_build_wqm(struct ac_llvm_context *ctx, LLVMValueRef src) +{ + return ac_build_mode(ctx, src, "wqm"); +} + static LLVMValueRef ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef inactive) { diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index bede3ff2e0e..0ec793fef50 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -469,6 +469,8 @@ LLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, L LLVMValueRef ac_build_mbcnt_add(struct ac_llvm_context *ctx, LLVMValueRef mask, LLVMValueRef add_src); LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask); +LLVMValueRef ac_build_wqm(struct ac_llvm_context *ctx, LLVMValueRef src); + LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op); LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index c2fdb9f88ce..98589f0dc8c 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3415,21 +3415,26 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins case nir_intrinsic_quad_broadcast: { unsigned lane = nir_src_as_uint(instr->src[1]); result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane); + result = ac_build_wqm(&ctx->ac, result); break; } case nir_intrinsic_quad_swap_horizontal: result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2); + result = ac_build_wqm(&ctx->ac, result); break; case nir_intrinsic_quad_swap_vertical: result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1); + result = ac_build_wqm(&ctx->ac, result); break; case nir_intrinsic_quad_swap_diagonal: result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0); + result = ac_build_wqm(&ctx->ac, result); break; case nir_intrinsic_quad_swizzle_amd: { uint32_t mask = nir_intrinsic_swizzle_mask(instr); result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3, (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3); + result = ac_build_wqm(&ctx->ac, result); break; } case nir_intrinsic_masked_swizzle_amd: {