ac/llvm: allow ac_build_optimization_barrier with SGPRs, pointers, and metadata

sgpr=true prevents moving the value to a VGPR.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10813>
This commit is contained in:
Marek Olšák 2021-05-08 04:24:58 -04:00 committed by Marge Bot
parent 5f33f80dc7
commit 57e182c75b
4 changed files with 36 additions and 15 deletions

View file

@ -401,25 +401,46 @@ void ac_build_s_barrier(struct ac_llvm_context *ctx)
* Optionally, a value can be passed through the inline assembly to prevent
* LLVM from hoisting calls to ReadNone functions.
*/
void ac_build_optimization_barrier(struct ac_llvm_context *ctx, LLVMValueRef *pvgpr)
void ac_build_optimization_barrier(struct ac_llvm_context *ctx, LLVMValueRef *pgpr, bool sgpr)
{
static int counter = 0;
LLVMBuilderRef builder = ctx->builder;
char code[16];
const char *constraint = sgpr ? "=s,0" : "=v,0";
snprintf(code, sizeof(code), "; %d", (int)p_atomic_inc_return(&counter));
if (!pvgpr) {
if (!pgpr) {
LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
LLVMBuildCall(builder, inlineasm, NULL, 0, "");
} else if (LLVMTypeOf(*pgpr) == ctx->i32) {
/* Simple version for i32 that allows the caller to set LLVM metadata on the call
* instruction. */
LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, constraint, true, false);
*pgpr = LLVMBuildCall(builder, inlineasm, pgpr, 1, "");
} else if (LLVMTypeOf(*pgpr) == ctx->i16) {
/* Simple version for i16 that allows the caller to set LLVM metadata on the call
* instruction. */
LLVMTypeRef ftype = LLVMFunctionType(ctx->i16, &ctx->i16, 1, false);
LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, constraint, true, false);
*pgpr = LLVMBuildCall(builder, inlineasm, pgpr, 1, "");
} else if (LLVMGetTypeKind(LLVMTypeOf(*pgpr)) == LLVMPointerTypeKind) {
LLVMTypeRef type = LLVMTypeOf(*pgpr);
LLVMTypeRef ftype = LLVMFunctionType(type, &type, 1, false);
LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, constraint, true, false);
*pgpr = LLVMBuildCall(builder, inlineasm, pgpr, 1, "");
} else {
LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
LLVMTypeRef type = LLVMTypeOf(*pvgpr);
LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, constraint, true, false);
LLVMTypeRef type = LLVMTypeOf(*pgpr);
unsigned bitsize = ac_get_elem_bits(ctx, type);
LLVMValueRef vgpr = *pvgpr;
LLVMValueRef vgpr = *pgpr;
LLVMTypeRef vgpr_type;
unsigned vgpr_size;
LLVMValueRef vgpr0;
@ -441,7 +462,7 @@ void ac_build_optimization_barrier(struct ac_llvm_context *ctx, LLVMValueRef *pv
if (bitsize < 32)
vgpr = LLVMBuildTrunc(builder, vgpr, type, "");
*pvgpr = vgpr;
*pgpr = vgpr;
}
}
@ -471,7 +492,7 @@ LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value)
/* We currently have no other way to prevent LLVM from lifting the icmp
* calls to a dominating basic block.
*/
ac_build_optimization_barrier(ctx, &args[0]);
ac_build_optimization_barrier(ctx, &args[0], false);
args[0] = ac_to_integer(ctx, args[0]);
@ -3366,7 +3387,7 @@ static LLVMValueRef _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef
LLVMValueRef result;
if (with_opt_barrier)
ac_build_optimization_barrier(ctx, &src);
ac_build_optimization_barrier(ctx, &src, false);
src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
if (lane)
@ -4061,7 +4082,7 @@ LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef s
return result;
}
ac_build_optimization_barrier(ctx, &src);
ac_build_optimization_barrier(ctx, &src, false);
LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
@ -4083,7 +4104,7 @@ LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef s
return result;
}
ac_build_optimization_barrier(ctx, &src);
ac_build_optimization_barrier(ctx, &src, false);
LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
@ -4098,7 +4119,7 @@ LLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_
{
if (cluster_size == 1)
return src;
ac_build_optimization_barrier(ctx, &src);
ac_build_optimization_barrier(ctx, &src, false);
LLVMValueRef result, swap;
LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
@ -4219,7 +4240,7 @@ void ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan
{
tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), "");
ac_build_optimization_barrier(ctx, &tmp);
ac_build_optimization_barrier(ctx, &tmp, false);
bbs[1] = LLVMGetInsertBlock(builder);
phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves, true);

View file

@ -175,7 +175,7 @@ LLVMValueRef ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, unsigne
LLVMValueRef *values, LLVMBasicBlockRef *blocks);
void ac_build_s_barrier(struct ac_llvm_context *ctx);
void ac_build_optimization_barrier(struct ac_llvm_context *ctx, LLVMValueRef *pvgpr);
void ac_build_optimization_barrier(struct ac_llvm_context *ctx, LLVMValueRef *pgpr, bool sgpr);
LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx, nir_scope scope);

View file

@ -538,7 +538,7 @@ static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, struct waterfall_
* opteration into the break block.
*/
LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);
ac_build_optimization_barrier(&ctx->ac, &cc);
ac_build_optimization_barrier(&ctx->ac, &cc, false);
LLVMValueRef active =
LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");

View file

@ -675,7 +675,7 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx)
ac_cull_triangle(&ctx->ac, pos, prim_restart_accepted, vp_scale, vp_translate,
ac_get_arg(&ctx->ac, param_smallprim_precision), &options);
ac_build_optimization_barrier(&ctx->ac, &accepted);
ac_build_optimization_barrier(&ctx->ac, &accepted, false);
LLVMValueRef accepted_threadmask = ac_get_i1_sgpr_mask(&ctx->ac, accepted);
/* Count the number of active threads by doing bitcount(accepted). */