aco: Add Primitive Ordered Pixel Shading scheduling rules

Implementing the acquire/release semantics of fragment shader interlock
ordered section in Vulkan, and preventing reordering of memory accesses
requiring primitive ordering out of the ordered section.

Also, the ordered section should be as short as possible, so not reordering
the instructions awaiting overlapped waves upwards, and the exit from the
ordered section downwards.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Signed-off-by: Vitaliy Triang3l Kuzmin <triang3l@yandex.ru>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22250>
This commit is contained in:
Vitaliy Triang3l Kuzmin 2023-04-03 21:27:47 +03:00 committed by Marge Bot
parent 6082e126eb
commit 2194e8bd82
2 changed files with 33 additions and 1 deletions

View file

@ -223,6 +223,17 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
memory_sync_info
get_sync_info(const Instruction* instr)
{
/* Primitive Ordered Pixel Shading barriers necessary for accesses to memory shared between
* overlapping waves in the queue family.
*/
if (instr->opcode == aco_opcode::p_pops_gfx9_overlapped_wave_wait_done ||
(instr->opcode == aco_opcode::s_wait_event &&
!(instr->sopp().imm & wait_event_imm_dont_wait_export_ready))) {
return memory_sync_info(storage_buffer | storage_image, semantic_acquire, scope_queuefamily);
} else if (instr->opcode == aco_opcode::p_pops_gfx9_ordered_section_done) {
return memory_sync_info(storage_buffer | storage_image, semantic_release, scope_queuefamily);
}
switch (instr->format) {
case Format::SMEM: return instr->smem().sync;
case Format::MUBUF: return instr->mubuf().sync;

View file

@ -571,6 +571,21 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
if (!upwards && instr->opcode == aco_opcode::p_exit_early_if)
return hazard_fail_unreorderable;
/* In Primitive Ordered Pixel Shading, await overlapped waves as late as possible, and notify
* overlapping waves that they can continue execution as early as possible.
*/
if (upwards) {
if (instr->opcode == aco_opcode::p_pops_gfx9_add_exiting_wave_id ||
(instr->opcode == aco_opcode::s_wait_event &&
!(instr->sopp().imm & wait_event_imm_dont_wait_export_ready))) {
return hazard_fail_unreorderable;
}
} else {
if (instr->opcode == aco_opcode::p_pops_gfx9_ordered_section_done) {
return hazard_fail_unreorderable;
}
}
if (query->uses_exec || query->writes_exec) {
for (const Definition& def : instr->definitions) {
if (def.isFixed() && def.physReg() == exec)
@ -580,7 +595,13 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
if (query->writes_exec && needs_exec_mask(instr))
return hazard_fail_exec;
/* don't move exports so that they stay closer together */
/* Don't move exports so that they stay closer together.
* Also, with Primitive Ordered Pixel Shading on GFX11+, the `done` export must not be moved
* above the memory accesses before the queue family scope (more precisely, fragment interlock
* scope, but it's not available in ACO) release barrier that is expected to be inserted before
* the export, as well as before any `s_wait_event export_ready` which enters the ordered
* section, because the `done` export exits the ordered section.
*/
if (instr->isEXP())
return hazard_fail_export;