diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index a4d4682be36..acdd1f673e2 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2891,8 +2891,12 @@ lower_to_hw_instr(Program* program) program->workgroup_size > program->wave_size; bld.insert(std::move(instr)); - if (emit_s_barrier) + if (emit_s_barrier && ctx.program->gfx_level >= GFX12) { + bld.sop1(aco_opcode::s_barrier_signal, Operand::c32(-1)); + bld.sopp(aco_opcode::s_barrier_wait, UINT16_MAX); + } else if (emit_s_barrier) { bld.sopp(aco_opcode::s_barrier); + } } else if (instr->opcode == aco_opcode::p_cvt_f16_f32_rtne) { float_mode new_mode = block->fp_mode; new_mode.round16_64 = fp_round_ne; diff --git a/src/amd/compiler/aco_scheduler_ilp.cpp b/src/amd/compiler/aco_scheduler_ilp.cpp index c0bb7895f3d..44a3442c50b 100644 --- a/src/amd/compiler/aco_scheduler_ilp.cpp +++ b/src/amd/compiler/aco_scheduler_ilp.cpp @@ -97,6 +97,12 @@ can_reorder(const Instruction* const instr) case aco_opcode::s_set_gpr_idx_idx: case aco_opcode::s_sendmsg_rtn_b32: case aco_opcode::s_sendmsg_rtn_b64: + case aco_opcode::s_barrier_signal: + case aco_opcode::s_barrier_signal_isfirst: + case aco_opcode::s_get_barrier_state: + case aco_opcode::s_barrier_init: + case aco_opcode::s_barrier_join: + case aco_opcode::s_wakeup_barrier: /* SOPK */ case aco_opcode::s_cbranch_i_fork: case aco_opcode::s_getreg_b32: