diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index c976649692d..5a9e0b8b24e 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11498,7 +11498,8 @@ pops_await_overlapped_waves(isel_context* ctx) /* GFX11+ - waiting for the export from the overlapped waves. * Await the export_ready event (bit wait_event_imm_dont_wait_export_ready clear). */ - bld.sopp(aco_opcode::s_wait_event, 0); + bld.sopp(aco_opcode::s_wait_event, + ctx->program->gfx_level >= GFX12 ? wait_event_imm_wait_export_ready_gfx12 : 0); return; } diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 4dd55de7edb..05f0745e6a7 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -209,6 +209,14 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, program->next_fp_mode.round32 = fp_round_ne; } +bool +is_wait_export_ready(amd_gfx_level gfx_level, const Instruction* instr) +{ + return instr->opcode == aco_opcode::s_wait_event && + (gfx_level >= GFX12 ? (instr->salu().imm & wait_event_imm_wait_export_ready_gfx12) + : !(instr->salu().imm & wait_event_imm_dont_wait_export_ready_gfx11)); +} + memory_sync_info get_sync_info(const Instruction* instr) { @@ -216,8 +224,7 @@ get_sync_info(const Instruction* instr) * overlapping waves in the queue family. */ if (instr->opcode == aco_opcode::p_pops_gfx9_overlapped_wave_wait_done || - (instr->opcode == aco_opcode::s_wait_event && - !(instr->salu().imm & wait_event_imm_dont_wait_export_ready))) { + instr->opcode == aco_opcode::s_wait_event) { return memory_sync_info(storage_buffer | storage_image, semantic_acquire, scope_queuefamily); } else if (instr->opcode == aco_opcode::p_pops_gfx9_ordered_section_done) { return memory_sync_info(storage_buffer | storage_image, semantic_release, scope_queuefamily); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index d1a2678e54e..bbac03eb410 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -240,7 +240,8 @@ enum wait_event_imm : uint16_t { * their ordered sections (by performing the `done` export), and that the current wave may enter * its ordered section. */ - wait_event_imm_dont_wait_export_ready = 0x1, + wait_event_imm_dont_wait_export_ready_gfx11 = 0x1, + wait_event_imm_wait_export_ready_gfx12 = 0x2, }; constexpr Format @@ -1727,6 +1728,7 @@ is_phi(aco_ptr& instr) return is_phi(instr.get()); } +bool is_wait_export_ready(amd_gfx_level gfx_level, const Instruction* instr); memory_sync_info get_sync_info(const Instruction* instr); inline bool diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index ee893295247..e34abb9b5fd 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -395,8 +395,8 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins break; } case aco_opcode::s_wait_event: { - if (!(imm & wait_event_imm_dont_wait_export_ready)) - fprintf(output, " export_ready"); + if (is_wait_export_ready(gfx_level, instr)) + fprintf(output, " wait_export_ready"); break; } default: { diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index bc471c5316e..44a73c81313 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -562,8 +562,7 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards) */ if (upwards) { if (instr->opcode == aco_opcode::p_pops_gfx9_add_exiting_wave_id || - (instr->opcode == aco_opcode::s_wait_event && - !(instr->salu().imm & wait_event_imm_dont_wait_export_ready))) { + is_wait_export_ready(query->gfx_level, instr)) { return hazard_fail_unreorderable; } } else {