mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
aco: Add Primitive Ordered Pixel Shading waitcnt rules
When letting the overlapping waves enter their ordered sections, there must be no memory accesses to resources which need primitive-ordered access that are still pending, or there would be a race between the current wave and the overlapping waves. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Signed-off-by: Vitaliy Triang3l Kuzmin <triang3l@yandex.ru> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22250>
This commit is contained in:
parent
a87628cd08
commit
e0f4b52559
4 changed files with 37 additions and 0 deletions
|
|
@ -509,6 +509,25 @@ kill(wait_imm& imm, alu_delay_info& delay, Instruction* instr, wait_ctx& ctx,
|
|||
force_waitcnt(ctx, imm);
|
||||
}
|
||||
|
||||
/* Make sure POPS coherent memory accesses have reached the L2 cache before letting the
|
||||
* overlapping waves proceed into the ordered section.
|
||||
*/
|
||||
if (ctx.program->has_pops_overlapped_waves_wait &&
|
||||
(ctx.gfx_level >= GFX11 ? instr->isEXP() && instr->exp().done
|
||||
: (instr->opcode == aco_opcode::s_sendmsg &&
|
||||
instr->sopp().imm == sendmsg_ordered_ps_done))) {
|
||||
if (ctx.vm_cnt)
|
||||
imm.vm = 0;
|
||||
if (ctx.gfx_level >= GFX10 && ctx.vs_cnt)
|
||||
imm.vs = 0;
|
||||
/* Await SMEM loads too, as it's possible for an application to create them, like using a
|
||||
* scalarization loop - pointless and unoptimal for an inherently divergent address of
|
||||
* per-pixel data, but still can be done at least synthetically and must be handled correctly.
|
||||
*/
|
||||
if (ctx.program->has_smem_buffer_or_global_loads && ctx.lgkm_cnt)
|
||||
imm.lgkm = 0;
|
||||
}
|
||||
|
||||
check_instr(ctx, imm, delay, instr);
|
||||
|
||||
/* It's required to wait for scalar stores before "writing back" data.
|
||||
|
|
|
|||
|
|
@ -4398,6 +4398,8 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned
|
|||
{
|
||||
assert(align >= 4u);
|
||||
|
||||
bld.program->has_smem_buffer_or_global_loads = true;
|
||||
|
||||
bool buffer = info.resource.id() && info.resource.bytes() == 16;
|
||||
Temp addr = info.resource;
|
||||
if (!buffer && !addr.id()) {
|
||||
|
|
|
|||
|
|
@ -2118,6 +2118,7 @@ public:
|
|||
Stage stage;
|
||||
bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
|
||||
bool needs_wqm = false; /* there exists a p_wqm instruction */
|
||||
bool has_smem_buffer_or_global_loads = false;
|
||||
bool has_pops_overlapped_waves_wait = false;
|
||||
bool has_color_exports = false;
|
||||
|
||||
|
|
|
|||
|
|
@ -2462,6 +2462,21 @@ lower_to_hw_instr(Program* program)
|
|||
block = &program->blocks[block_idx];
|
||||
|
||||
bld.reset(discard_block);
|
||||
if (program->has_pops_overlapped_waves_wait &&
|
||||
(program->gfx_level >= GFX11 || discard_sends_pops_done)) {
|
||||
/* If this discard early exit potentially exits the POPS ordered section, do
|
||||
* the waitcnt necessary before resuming overlapping waves as the normal
|
||||
* waitcnt insertion doesn't work in a discard early exit block.
|
||||
*/
|
||||
if (program->gfx_level >= GFX10)
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
|
||||
wait_imm pops_exit_wait_imm;
|
||||
pops_exit_wait_imm.vm = 0;
|
||||
if (program->has_smem_buffer_or_global_loads)
|
||||
pops_exit_wait_imm.lgkm = 0;
|
||||
bld.sopp(aco_opcode::s_waitcnt, -1,
|
||||
pops_exit_wait_imm.pack(program->gfx_level));
|
||||
}
|
||||
if (discard_sends_pops_done)
|
||||
bld.sopp(aco_opcode::s_sendmsg, -1, sendmsg_ordered_ps_done);
|
||||
unsigned target = V_008DFC_SQ_EXP_NULL;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue