mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 14:18:07 +02:00
ac/llvm: use new s_wait instructions and split the existing ones for gfx12
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29007>
This commit is contained in:
parent
12bca6123a
commit
a6c46509cc
5 changed files with 66 additions and 46 deletions
|
|
@ -2066,44 +2066,60 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
|
|||
if (!wait_flags)
|
||||
return;
|
||||
|
||||
unsigned expcnt = 7;
|
||||
unsigned lgkmcnt = 63;
|
||||
unsigned vmcnt = ctx->gfx_level >= GFX9 ? 63 : 15;
|
||||
unsigned vscnt = 63;
|
||||
if (ctx->gfx_level >= GFX12) {
|
||||
if (wait_flags & AC_WAIT_DS)
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.s.wait.dscnt", ctx->voidt, &ctx->i16_0, 1, 0);
|
||||
if (wait_flags & AC_WAIT_KM)
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.s.wait.kmcnt", ctx->voidt, &ctx->i16_0, 1, 0);
|
||||
if (wait_flags & AC_WAIT_EXP)
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.s.wait.expcnt", ctx->voidt, &ctx->i16_0, 1, 0);
|
||||
if (wait_flags & AC_WAIT_LOAD)
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.s.wait.loadcnt", ctx->voidt, &ctx->i16_0, 1, 0);
|
||||
if (wait_flags & AC_WAIT_STORE)
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.s.wait.storecnt", ctx->voidt, &ctx->i16_0, 1, 0);
|
||||
if (wait_flags & AC_WAIT_SAMPLE)
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.s.wait.samplecnt", ctx->voidt, &ctx->i16_0, 1, 0);
|
||||
if (wait_flags & AC_WAIT_BVH)
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.s.wait.bvhcnt", ctx->voidt, &ctx->i16_0, 1, 0);
|
||||
} else {
|
||||
unsigned expcnt = 7;
|
||||
unsigned lgkmcnt = 63;
|
||||
unsigned vmcnt = ctx->gfx_level >= GFX9 ? 63 : 15;
|
||||
unsigned vscnt = 63;
|
||||
|
||||
if (wait_flags & AC_WAIT_EXP)
|
||||
expcnt = 0;
|
||||
if (wait_flags & AC_WAIT_LGKM)
|
||||
lgkmcnt = 0;
|
||||
if (wait_flags & AC_WAIT_VLOAD)
|
||||
vmcnt = 0;
|
||||
|
||||
if (wait_flags & AC_WAIT_VSTORE) {
|
||||
if (ctx->gfx_level >= GFX10)
|
||||
vscnt = 0;
|
||||
else
|
||||
if (wait_flags & AC_WAIT_EXP)
|
||||
expcnt = 0;
|
||||
if (wait_flags & (AC_WAIT_DS | AC_WAIT_KM))
|
||||
lgkmcnt = 0;
|
||||
if (wait_flags & (AC_WAIT_LOAD | AC_WAIT_SAMPLE | AC_WAIT_BVH))
|
||||
vmcnt = 0;
|
||||
|
||||
if (wait_flags & AC_WAIT_STORE) {
|
||||
if (ctx->gfx_level >= GFX10)
|
||||
vscnt = 0;
|
||||
else
|
||||
vmcnt = 0;
|
||||
}
|
||||
|
||||
/* There is no intrinsic for vscnt(0), so use a fence. It waits for everything except expcnt. */
|
||||
if (vscnt == 0) {
|
||||
assert(!(wait_flags & AC_WAIT_EXP));
|
||||
LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, "");
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned simm16;
|
||||
|
||||
if (ctx->gfx_level >= GFX11)
|
||||
simm16 = expcnt | (lgkmcnt << 4) | (vmcnt << 10);
|
||||
else
|
||||
simm16 = (lgkmcnt << 8) | (expcnt << 4) | (vmcnt & 0xf) | ((vmcnt >> 4) << 14);
|
||||
|
||||
LLVMValueRef args[1] = {
|
||||
LLVMConstInt(ctx->i32, simm16, false),
|
||||
};
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0);
|
||||
}
|
||||
|
||||
/* There is no intrinsic for vscnt(0), so use a fence. */
|
||||
if ((wait_flags & AC_WAIT_LGKM && wait_flags & AC_WAIT_VLOAD && wait_flags & AC_WAIT_VSTORE) ||
|
||||
vscnt == 0) {
|
||||
assert(!(wait_flags & AC_WAIT_EXP));
|
||||
LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, "");
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned simm16;
|
||||
|
||||
if (ctx->gfx_level >= GFX11)
|
||||
simm16 = expcnt | (lgkmcnt << 4) | (vmcnt << 10);
|
||||
else
|
||||
simm16 = (lgkmcnt << 8) | (expcnt << 4) | (vmcnt & 0xf) | ((vmcnt >> 4) << 14);
|
||||
|
||||
LLVMValueRef args[1] = {
|
||||
LLVMConstInt(ctx->i32, simm16, false),
|
||||
};
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0);
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_fsat(struct ac_llvm_context *ctx, LLVMValueRef src,
|
||||
|
|
|
|||
|
|
@ -30,10 +30,14 @@ enum
|
|||
AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
|
||||
};
|
||||
|
||||
#define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */
|
||||
#define AC_WAIT_VLOAD (1 << 1) /* VMEM load/sample instructions */
|
||||
#define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */
|
||||
#define AC_WAIT_EXP (1 << 3) /* EXP instructions */
|
||||
/* Fine-grained wait flags for GFX12, older chips merge them. */
|
||||
#define AC_WAIT_DS (1 << 0) /* s_wait_dscnt (GFX12) or lgkmcnt (GFX6-11): LDS (all gens), GDS (GFX6-11) */
|
||||
#define AC_WAIT_KM (1 << 1) /* s_wait_kmcnt (GFX12) or lgkmcnt (GFX6-11): SMEM, message */
|
||||
#define AC_WAIT_EXP (1 << 2) /* s_wait_expcnt: Exports */
|
||||
#define AC_WAIT_LOAD (1 << 3) /* s_wait_loadcnt (GFX12) or vmcnt (GFX6-11): VMEM loads */
|
||||
#define AC_WAIT_STORE (1 << 4) /* s_wait_storecnt (GFX12) or vscnt (GFX10-11) or vmcnt (GFX6-9): VMEM stores */
|
||||
#define AC_WAIT_SAMPLE (1 << 5) /* s_wait_samplecnt (GFX12) or vmcnt (GFX6-11): VMEM sample/gather */
|
||||
#define AC_WAIT_BVH (1 << 6) /* s_wait_bvhcnt (GFX12) or vmcnt (GFX6-11): BVH */
|
||||
|
||||
struct ac_llvm_flow;
|
||||
struct ac_llvm_compiler;
|
||||
|
|
|
|||
|
|
@ -3208,9 +3208,9 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
|
||||
unsigned wait_flags = 0;
|
||||
if (modes & (nir_var_mem_global | nir_var_mem_ssbo | nir_var_image))
|
||||
wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
|
||||
wait_flags |= AC_WAIT_LOAD | AC_WAIT_STORE;
|
||||
if (modes & nir_var_mem_shared)
|
||||
wait_flags |= AC_WAIT_LGKM;
|
||||
wait_flags |= AC_WAIT_DS;
|
||||
|
||||
if (wait_flags)
|
||||
ac_build_waitcnt(&ctx->ac, wait_flags);
|
||||
|
|
@ -3624,7 +3624,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
/* Set release=0 to start a GDS mutex. Set done=0 because it's not the last one. */
|
||||
ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ds.ordered.add", ctx->ac.i32,
|
||||
args, ARRAY_SIZE(args), 0);
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_DS);
|
||||
|
||||
LLVMValueRef global_count[4];
|
||||
LLVMValueRef count_vec = get_src(ctx, instr->src[1]);
|
||||
|
|
@ -3644,7 +3644,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
}
|
||||
}
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_DS);
|
||||
|
||||
/* Set release=1 to end a GDS mutex. Set done=1 because it's the last one. */
|
||||
args[6] = args[7] = ctx->ac.i1true;
|
||||
|
|
|
|||
|
|
@ -351,7 +351,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, const struct radv_nir
|
|||
* and contains a barrier, it will wait there and then
|
||||
* reach s_endpgm.
|
||||
*/
|
||||
ac_build_waitcnt(&ctx.ac, AC_WAIT_LGKM);
|
||||
ac_build_waitcnt(&ctx.ac, AC_WAIT_DS);
|
||||
ac_build_s_barrier(&ctx.ac, shaders[shader_idx]->info.stage);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -704,7 +704,7 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
|
|||
if (!shader->key.ge.opt.same_patch_vertices ||
|
||||
shader->selector->info.base.inputs_read &
|
||||
~shader->selector->info.tcs_vgpr_only_inputs) {
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_DS);
|
||||
|
||||
/* If both input and output patches are wholly in one wave, we don't need a barrier.
|
||||
* That's true when both VS and TCS have the same number of patch vertices and
|
||||
|
|
@ -715,7 +715,7 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
|
|||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
}
|
||||
} else if (ctx->stage == MESA_SHADER_GEOMETRY) {
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_DS);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue