mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 08:58:02 +02:00
ac/llvm: Enable helper invocations for quad OPs
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9239
cc: mesa-stable
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27110>
(cherry picked from commit 220c912080)
This commit is contained in:
parent
f948ccf791
commit
13b0648cc5
5 changed files with 20 additions and 5 deletions
|
|
@ -1184,7 +1184,7 @@
|
|||
"description": "ac/llvm: Enable helper invocations for quad OPs",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -3,8 +3,6 @@ dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.tanh_d
|
|||
dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_denorm_flush_to_zero_frag,Fail
|
||||
dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_denorm_flush_to_zero_vert,Fail
|
||||
|
||||
dEQP-VK.draw.renderpass.shader_invocation.helper_invocation,Fail
|
||||
|
||||
dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r32g32b32a32_sfloat.samples_8,Fail
|
||||
dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r8g8b8a8_unorm.samples_8,Fail
|
||||
dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_4.r32g32b32a32_sfloat.samples_8,Fail
|
||||
|
|
|
|||
|
|
@ -2989,7 +2989,7 @@ LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
|
|||
return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
|
||||
}
|
||||
|
||||
static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
|
||||
static LLVMValueRef ac_build_mode(struct ac_llvm_context *ctx, LLVMValueRef src, const char *mode)
|
||||
{
|
||||
LLVMTypeRef src_type = LLVMTypeOf(src);
|
||||
unsigned bitsize = ac_get_elem_bits(ctx, src_type);
|
||||
|
|
@ -3002,7 +3002,7 @@ static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
|
|||
src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
|
||||
|
||||
ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
|
||||
snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
|
||||
snprintf(name, sizeof(name), "llvm.amdgcn.%s.%s", mode, type);
|
||||
ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src}, 1, 0);
|
||||
|
||||
if (bitsize < 32)
|
||||
|
|
@ -3011,6 +3011,16 @@ static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
|
|||
return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
|
||||
}
|
||||
|
||||
static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
|
||||
{
|
||||
return ac_build_mode(ctx, src, "wwm");
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_wqm(struct ac_llvm_context *ctx, LLVMValueRef src)
|
||||
{
|
||||
return ac_build_mode(ctx, src, "wqm");
|
||||
}
|
||||
|
||||
static LLVMValueRef ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
|
||||
LLVMValueRef inactive)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -469,6 +469,8 @@ LLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, L
|
|||
LLVMValueRef ac_build_mbcnt_add(struct ac_llvm_context *ctx, LLVMValueRef mask, LLVMValueRef add_src);
|
||||
LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
|
||||
|
||||
LLVMValueRef ac_build_wqm(struct ac_llvm_context *ctx, LLVMValueRef src);
|
||||
|
||||
LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
|
||||
|
||||
LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
|
||||
|
|
|
|||
|
|
@ -3415,21 +3415,26 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
case nir_intrinsic_quad_broadcast: {
|
||||
unsigned lane = nir_src_as_uint(instr->src[1]);
|
||||
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
|
||||
result = ac_build_wqm(&ctx->ac, result);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_quad_swap_horizontal:
|
||||
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
|
||||
result = ac_build_wqm(&ctx->ac, result);
|
||||
break;
|
||||
case nir_intrinsic_quad_swap_vertical:
|
||||
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
|
||||
result = ac_build_wqm(&ctx->ac, result);
|
||||
break;
|
||||
case nir_intrinsic_quad_swap_diagonal:
|
||||
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
|
||||
result = ac_build_wqm(&ctx->ac, result);
|
||||
break;
|
||||
case nir_intrinsic_quad_swizzle_amd: {
|
||||
uint32_t mask = nir_intrinsic_swizzle_mask(instr);
|
||||
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
|
||||
(mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
|
||||
result = ac_build_wqm(&ctx->ac, result);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_masked_swizzle_amd: {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue