mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 12:50:10 +01:00
radv/llvm: fix subgroup shuffle for chips without bpermute
bpermute only exists on GFX8+ and only with Wave32 on GFX10. Instead we have to use readlane with a waterfall loop to defeat the LLVM backend. This fixes DOOM Eternal which requires subgroup shuffle. Cc: <mesa-stable@lists.freedesktop.org> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4284> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4284>
This commit is contained in:
parent
2a70a1d69d
commit
7ac8bb33cd
2 changed files with 30 additions and 5 deletions
|
|
@ -3950,8 +3950,33 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_shuffle:
|
||||
result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
|
||||
get_src(ctx, instr->src[1]));
|
||||
if (ctx->ac.chip_class == GFX8 ||
|
||||
ctx->ac.chip_class == GFX9 ||
|
||||
(ctx->ac.chip_class == GFX10 && ctx->ac.wave_size == 32)) {
|
||||
result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
|
||||
get_src(ctx, instr->src[1]));
|
||||
} else {
|
||||
LLVMValueRef src = get_src(ctx, instr->src[0]);
|
||||
LLVMValueRef index = get_src(ctx, instr->src[1]);
|
||||
LLVMTypeRef type = LLVMTypeOf(src);
|
||||
struct waterfall_context wctx;
|
||||
LLVMValueRef index_val;
|
||||
|
||||
index_val = enter_waterfall(ctx, &wctx, index, true);
|
||||
|
||||
src = LLVMBuildZExt(ctx->ac.builder, src,
|
||||
ctx->ac.i32, "");
|
||||
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
|
||||
ctx->ac.i32,
|
||||
(LLVMValueRef []) { src, index_val }, 2,
|
||||
AC_FUNC_ATTR_READNONE |
|
||||
AC_FUNC_ATTR_CONVERGENT);
|
||||
|
||||
result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
|
||||
|
||||
result = exit_waterfall(ctx, &wctx, result);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_reduce:
|
||||
result = ac_build_reduce(&ctx->ac,
|
||||
|
|
|
|||
|
|
@ -1481,9 +1481,9 @@ radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
|
|||
VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
|
||||
VK_SUBGROUP_FEATURE_QUAD_BIT;
|
||||
|
||||
if (pdevice->rad_info.chip_class == GFX8 ||
|
||||
pdevice->rad_info.chip_class == GFX9 ||
|
||||
(pdevice->rad_info.chip_class == GFX10 && pdevice->use_aco)) {
|
||||
if (((pdevice->rad_info.chip_class == GFX6 ||
|
||||
pdevice->rad_info.chip_class == GFX7) && !pdevice->use_aco) ||
|
||||
pdevice->rad_info.chip_class >= GFX8) {
|
||||
p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
|
||||
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue