diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 3da8040b80e..9f83cf86e37 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -293,6 +293,11 @@ is_atomic_or_control_instr(Program* program, const Instruction* instr, memory_sy } uint16_t cls = BITFIELD_MASK(storage_count); + if (is_acquire) { + if (is_wait_export_ready(program->gfx_level, instr) || + instr->opcode == aco_opcode::p_pops_gfx9_add_exiting_wave_id) + return cls & ~storage_shared; + } if (is_release) { if (is_done_sendmsg(program->gfx_level, instr) || is_pos_prim_export(program->gfx_level, instr)) diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index c11ba95954b..6c336c89f2a 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -64,7 +64,8 @@ enum memory_semantics : uint8_t { semantic_none = 0x0, /* for loads: don't move any access after this load to before this load (even other loads) * for barriers: don't move any access after the barrier to before any - * atomic_loads/control_barriers before the barrier */ + * atomic_loads/control_barriers/p_pops_gfx9_add_exiting_wave_id or + * certain s_wait_event before the barrier */ semantic_acquire = 0x1, /* for stores: don't move any access before this store to after this store * for barriers: don't move any access before the barrier to after any