radv/llvm: fix subgroup shuffle for chips without bpermute

bpermute only exists on GFX8+ and only with Wave32 on GFX10. Instead
we have to use readlane with a waterfall loop to defeat the LLVM
backend.

This fixes DOOM Eternal which requires subgroup shuffle.

Cc: <mesa-stable@lists.freedesktop.org>
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4284>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4284>
This commit is contained in:
Samuel Pitoiset 2020-03-23 12:02:15 +01:00 committed by Marge Bot
parent 2a70a1d69d
commit 7ac8bb33cd
2 changed files with 30 additions and 5 deletions

View file

@ -3950,8 +3950,33 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
break;
}
case nir_intrinsic_shuffle:
result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
get_src(ctx, instr->src[1]));
if (ctx->ac.chip_class == GFX8 ||
ctx->ac.chip_class == GFX9 ||
(ctx->ac.chip_class == GFX10 && ctx->ac.wave_size == 32)) {
result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
get_src(ctx, instr->src[1]));
} else {
LLVMValueRef src = get_src(ctx, instr->src[0]);
LLVMValueRef index = get_src(ctx, instr->src[1]);
LLVMTypeRef type = LLVMTypeOf(src);
struct waterfall_context wctx;
LLVMValueRef index_val;
index_val = enter_waterfall(ctx, &wctx, index, true);
src = LLVMBuildZExt(ctx->ac.builder, src,
ctx->ac.i32, "");
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
ctx->ac.i32,
(LLVMValueRef []) { src, index_val }, 2,
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
result = exit_waterfall(ctx, &wctx, result);
}
break;
case nir_intrinsic_reduce:
result = ac_build_reduce(&ctx->ac,

View file

@ -1481,9 +1481,9 @@ radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
VK_SUBGROUP_FEATURE_QUAD_BIT;
if (pdevice->rad_info.chip_class == GFX8 ||
pdevice->rad_info.chip_class == GFX9 ||
(pdevice->rad_info.chip_class == GFX10 && pdevice->use_aco)) {
if (((pdevice->rad_info.chip_class == GFX6 ||
pdevice->rad_info.chip_class == GFX7) && !pdevice->use_aco) ||
pdevice->rad_info.chip_class >= GFX8) {
p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
}