mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
radv/gfx10: fix implementation of exclusive scans
This implementation is loosely based on ROCm. https://github.com/RadeonOpenCompute/ROCm-Device-Libs/blob/master/ockl/src/wfredscan.cl This fixes dEQP-VK.subgroups.arithmetic.*.subgroupexclusive* on GFX10. Fixes:227c29a80d("amd/common/gfx10: implement scan & reduce operations") Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commitc9aa843961) Conflicts resolved by Dylan Baker
This commit is contained in:
parent
a3869c14c0
commit
5c98b36577
1 changed files with 58 additions and 25 deletions
|
|
@ -4218,8 +4218,43 @@ ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValu
|
||||||
{
|
{
|
||||||
LLVMValueRef result, tmp;
|
LLVMValueRef result, tmp;
|
||||||
|
|
||||||
if (ctx->chip_class >= GFX10) {
|
if (inclusive) {
|
||||||
result = inclusive ? src : identity;
|
result = src;
|
||||||
|
} else if (ctx->chip_class >= GFX10) {
|
||||||
|
/* wavefront shift_right by 1 on GFX10 (emulate dpp_wf_sr1) */
|
||||||
|
LLVMValueRef active, tmp1, tmp2;
|
||||||
|
LLVMValueRef tid = ac_get_thread_id(ctx);
|
||||||
|
|
||||||
|
tmp1 = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
|
||||||
|
|
||||||
|
tmp2 = ac_build_permlane16(ctx, src, (uint64_t)~0, true, false);
|
||||||
|
|
||||||
|
if (maxprefix > 32) {
|
||||||
|
active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid,
|
||||||
|
LLVMConstInt(ctx->i32, 32, false), "");
|
||||||
|
|
||||||
|
tmp2 = LLVMBuildSelect(ctx->builder, active,
|
||||||
|
ac_build_readlane(ctx, src,
|
||||||
|
LLVMConstInt(ctx->i32, 31, false)),
|
||||||
|
tmp2, "");
|
||||||
|
|
||||||
|
active = LLVMBuildOr(ctx->builder, active,
|
||||||
|
LLVMBuildICmp(ctx->builder, LLVMIntEQ,
|
||||||
|
LLVMBuildAnd(ctx->builder, tid,
|
||||||
|
LLVMConstInt(ctx->i32, 0x1f, false), ""),
|
||||||
|
LLVMConstInt(ctx->i32, 0x10, false), ""), "");
|
||||||
|
src = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
|
||||||
|
} else if (maxprefix > 16) {
|
||||||
|
active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid,
|
||||||
|
LLVMConstInt(ctx->i32, 16, false), "");
|
||||||
|
|
||||||
|
src = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
result = src;
|
||||||
|
} else if (ctx->chip_class >= GFX8) {
|
||||||
|
src = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
|
||||||
|
result = src;
|
||||||
} else {
|
} else {
|
||||||
if (!inclusive)
|
if (!inclusive)
|
||||||
src = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
|
src = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
|
||||||
|
|
@ -4249,33 +4284,31 @@ ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValu
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
if (ctx->chip_class >= GFX10) {
|
if (ctx->chip_class >= GFX10) {
|
||||||
/* dpp_row_bcast{15,31} are not supported on gfx10. */
|
|
||||||
LLVMBuilderRef builder = ctx->builder;
|
|
||||||
LLVMValueRef tid = ac_get_thread_id(ctx);
|
LLVMValueRef tid = ac_get_thread_id(ctx);
|
||||||
LLVMValueRef cc;
|
LLVMValueRef active;
|
||||||
/* TODO-GFX10: Can we get better code-gen by putting this into
|
|
||||||
* a branch so that LLVM generates EXEC mask manipulations? */
|
tmp = ac_build_permlane16(ctx, result, ~(uint64_t)0, true, false);
|
||||||
if (inclusive)
|
|
||||||
tmp = result;
|
active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
|
||||||
else
|
LLVMBuildAnd(ctx->builder, tid,
|
||||||
tmp = ac_build_alu_op(ctx, result, src, op);
|
LLVMConstInt(ctx->i32, 16, false), ""),
|
||||||
tmp = ac_build_permlane16(ctx, tmp, ~(uint64_t)0, true, false);
|
ctx->i32_0, "");
|
||||||
tmp = ac_build_alu_op(ctx, result, tmp, op);
|
|
||||||
cc = LLVMBuildAnd(builder, tid, LLVMConstInt(ctx->i32, 16, false), "");
|
tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
|
||||||
cc = LLVMBuildICmp(builder, LLVMIntNE, cc, ctx->i32_0, "");
|
|
||||||
result = LLVMBuildSelect(builder, cc, tmp, result, "");
|
result = ac_build_alu_op(ctx, result, tmp, op);
|
||||||
|
|
||||||
if (maxprefix <= 32)
|
if (maxprefix <= 32)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
if (inclusive)
|
tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
|
||||||
tmp = result;
|
|
||||||
else
|
active = LLVMBuildICmp(ctx->builder, LLVMIntUGE, tid,
|
||||||
tmp = ac_build_alu_op(ctx, result, src, op);
|
LLVMConstInt(ctx->i32, 32, false), "");
|
||||||
tmp = ac_build_readlane(ctx, tmp, LLVMConstInt(ctx->i32, 31, false));
|
|
||||||
tmp = ac_build_alu_op(ctx, result, tmp, op);
|
tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
|
||||||
cc = LLVMBuildICmp(builder, LLVMIntUGE, tid,
|
|
||||||
LLVMConstInt(ctx->i32, 32, false), "");
|
result = ac_build_alu_op(ctx, result, tmp, op);
|
||||||
result = LLVMBuildSelect(builder, cc, tmp, result, "");
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue