ac/llvm: Enable helper invocations for quad OPs

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9239
cc: mesa-stable

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27110>
(cherry picked from commit 220c912080)
This commit is contained in:
Konstantin Seurer 2023-09-19 16:07:46 +02:00 committed by Eric Engestrom
parent f948ccf791
commit 13b0648cc5
5 changed files with 20 additions and 5 deletions

View file

@ -1184,7 +1184,7 @@
"description": "ac/llvm: Enable helper invocations for quad OPs",
"nominated": true,
"nomination_type": 0,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -3,8 +3,6 @@ dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.tanh_d
dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_denorm_flush_to_zero_frag,Fail
dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_denorm_flush_to_zero_vert,Fail
dEQP-VK.draw.renderpass.shader_invocation.helper_invocation,Fail
dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r32g32b32a32_sfloat.samples_8,Fail
dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r8g8b8a8_unorm.samples_8,Fail
dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_4.r32g32b32a32_sfloat.samples_8,Fail

View file

@ -2989,7 +2989,7 @@ LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
}
static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
static LLVMValueRef ac_build_mode(struct ac_llvm_context *ctx, LLVMValueRef src, const char *mode)
{
LLVMTypeRef src_type = LLVMTypeOf(src);
unsigned bitsize = ac_get_elem_bits(ctx, src_type);
@ -3002,7 +3002,7 @@ static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
snprintf(name, sizeof(name), "llvm.amdgcn.%s.%s", mode, type);
ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src}, 1, 0);
if (bitsize < 32)
@ -3011,6 +3011,16 @@ static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
}
static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
{
return ac_build_mode(ctx, src, "wwm");
}
LLVMValueRef ac_build_wqm(struct ac_llvm_context *ctx, LLVMValueRef src)
{
return ac_build_mode(ctx, src, "wqm");
}
static LLVMValueRef ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
LLVMValueRef inactive)
{

View file

@ -469,6 +469,8 @@ LLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, L
LLVMValueRef ac_build_mbcnt_add(struct ac_llvm_context *ctx, LLVMValueRef mask, LLVMValueRef add_src);
LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
LLVMValueRef ac_build_wqm(struct ac_llvm_context *ctx, LLVMValueRef src);
LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);

View file

@ -3415,21 +3415,26 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
case nir_intrinsic_quad_broadcast: {
unsigned lane = nir_src_as_uint(instr->src[1]);
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
result = ac_build_wqm(&ctx->ac, result);
break;
}
case nir_intrinsic_quad_swap_horizontal:
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
result = ac_build_wqm(&ctx->ac, result);
break;
case nir_intrinsic_quad_swap_vertical:
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
result = ac_build_wqm(&ctx->ac, result);
break;
case nir_intrinsic_quad_swap_diagonal:
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
result = ac_build_wqm(&ctx->ac, result);
break;
case nir_intrinsic_quad_swizzle_amd: {
uint32_t mask = nir_intrinsic_swizzle_mask(instr);
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
(mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
result = ac_build_wqm(&ctx->ac, result);
break;
}
case nir_intrinsic_masked_swizzle_amd: {