From 2194e8bd82c1bcbcc2002e7d8a03e831c31d870a Mon Sep 17 00:00:00 2001 From: Vitaliy Triang3l Kuzmin Date: Mon, 3 Apr 2023 21:27:47 +0300 Subject: [PATCH] aco: Add Primitive Ordered Pixel Shading scheduling rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implementing the acquire/release semantics of fragment shader interlock ordered section in Vulkan, and preventing reordering of memory accesses requiring primitive ordering out of the ordered section. Also, the ordered section should be as short as possible, so not reordering the instructions awaiting overlapped waves upwards, and the exit from the ordered section downwards. Reviewed-by: Timur Kristóf Signed-off-by: Vitaliy Triang3l Kuzmin Part-of: --- src/amd/compiler/aco_ir.cpp | 11 +++++++++++ src/amd/compiler/aco_scheduler.cpp | 23 ++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index ea5e1f229d6..5119890c848 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -223,6 +223,17 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, memory_sync_info get_sync_info(const Instruction* instr) { + /* Primitive Ordered Pixel Shading barriers necessary for accesses to memory shared between + * overlapping waves in the queue family. + */ + if (instr->opcode == aco_opcode::p_pops_gfx9_overlapped_wave_wait_done || + (instr->opcode == aco_opcode::s_wait_event && + !(instr->sopp().imm & wait_event_imm_dont_wait_export_ready))) { + return memory_sync_info(storage_buffer | storage_image, semantic_acquire, scope_queuefamily); + } else if (instr->opcode == aco_opcode::p_pops_gfx9_ordered_section_done) { + return memory_sync_info(storage_buffer | storage_image, semantic_release, scope_queuefamily); + } + switch (instr->format) { case Format::SMEM: return instr->smem().sync; case Format::MUBUF: return instr->mubuf().sync; diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 2e43fa2d62a..d0cd09db362 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -571,6 +571,21 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards) if (!upwards && instr->opcode == aco_opcode::p_exit_early_if) return hazard_fail_unreorderable; + /* In Primitive Ordered Pixel Shading, await overlapped waves as late as possible, and notify + * overlapping waves that they can continue execution as early as possible. + */ + if (upwards) { + if (instr->opcode == aco_opcode::p_pops_gfx9_add_exiting_wave_id || + (instr->opcode == aco_opcode::s_wait_event && + !(instr->sopp().imm & wait_event_imm_dont_wait_export_ready))) { + return hazard_fail_unreorderable; + } + } else { + if (instr->opcode == aco_opcode::p_pops_gfx9_ordered_section_done) { + return hazard_fail_unreorderable; + } + } + if (query->uses_exec || query->writes_exec) { for (const Definition& def : instr->definitions) { if (def.isFixed() && def.physReg() == exec) @@ -580,7 +595,13 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards) if (query->writes_exec && needs_exec_mask(instr)) return hazard_fail_exec; - /* don't move exports so that they stay closer together */ + /* Don't move exports so that they stay closer together. + * Also, with Primitive Ordered Pixel Shading on GFX11+, the `done` export must not be moved + * above the memory accesses before the queue family scope (more precisely, fragment interlock + * scope, but it's not available in ACO) release barrier that is expected to be inserted before + * the export, as well as before any `s_wait_event export_ready` which enters the ordered + * section, because the `done` export exits the ordered section. + */ if (instr->isEXP()) return hazard_fail_export;