diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index ea5e1f229d6..5119890c848 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -223,6 +223,17 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, memory_sync_info get_sync_info(const Instruction* instr) { + /* Primitive Ordered Pixel Shading barriers necessary for accesses to memory shared between + * overlapping waves in the queue family. + */ + if (instr->opcode == aco_opcode::p_pops_gfx9_overlapped_wave_wait_done || + (instr->opcode == aco_opcode::s_wait_event && + !(instr->sopp().imm & wait_event_imm_dont_wait_export_ready))) { + return memory_sync_info(storage_buffer | storage_image, semantic_acquire, scope_queuefamily); + } else if (instr->opcode == aco_opcode::p_pops_gfx9_ordered_section_done) { + return memory_sync_info(storage_buffer | storage_image, semantic_release, scope_queuefamily); + } + switch (instr->format) { case Format::SMEM: return instr->smem().sync; case Format::MUBUF: return instr->mubuf().sync; diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 2e43fa2d62a..d0cd09db362 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -571,6 +571,21 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards) if (!upwards && instr->opcode == aco_opcode::p_exit_early_if) return hazard_fail_unreorderable; + /* In Primitive Ordered Pixel Shading, await overlapped waves as late as possible, and notify + * overlapping waves that they can continue execution as early as possible. + */ + if (upwards) { + if (instr->opcode == aco_opcode::p_pops_gfx9_add_exiting_wave_id || + (instr->opcode == aco_opcode::s_wait_event && + !(instr->sopp().imm & wait_event_imm_dont_wait_export_ready))) { + return hazard_fail_unreorderable; + } + } else { + if (instr->opcode == aco_opcode::p_pops_gfx9_ordered_section_done) { + return hazard_fail_unreorderable; + } + } + if (query->uses_exec || query->writes_exec) { for (const Definition& def : instr->definitions) { if (def.isFixed() && def.physReg() == exec) @@ -580,7 +595,13 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards) if (query->writes_exec && needs_exec_mask(instr)) return hazard_fail_exec; - /* don't move exports so that they stay closer together */ + /* Don't move exports so that they stay closer together. + * Also, with Primitive Ordered Pixel Shading on GFX11+, the `done` export must not be moved + * above the memory accesses before the queue family scope (more precisely, fragment interlock + * scope, but it's not available in ACO) release barrier that is expected to be inserted before + * the export, as well as before any `s_wait_event export_ready` which enters the ordered + * section, because the `done` export exits the ordered section. + */ if (instr->isEXP()) return hazard_fail_export;