diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index 6bc0185bfbe..22e17f27ad7 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -136,6 +136,21 @@ enum bperm_swiz { bperm_255 = 13, }; +enum class alu_delay_wait { + NO_DEP = 0, + VALU_DEP_1 = 1, + VALU_DEP_2 = 2, + VALU_DEP_3 = 3, + VALU_DEP_4 = 4, + TRANS32_DEP_1 = 5, + TRANS32_DEP_2 = 6, + TRANS32_DEP_3 = 7, + FMA_ACCUM_CYCLE_1 = 8, + SALU_CYCLE_1 = 9, + SALU_CYCLE_2 = 10, + SALU_CYCLE_3 = 11, +}; + class Builder { public: struct Result { diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index ad44b9ef483..9643a9e2f8a 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -22,6 +22,7 @@ * */ +#include "aco_builder.h" #include "aco_ir.h" #include "common/sid.h" @@ -167,21 +168,6 @@ struct alu_delay_info { } }; -enum class alu_delay_wait { - NO_DEP, - VALU_DEP_1, - VALU_DEP_2, - VALU_DEP_3, - VALU_DEP_4, - TRANS32_DEP_1, - TRANS32_DEP_2, - TRANS32_DEP_3, - FMA_ACCUM_CYCLE_1, - SALU_CYCLE_1, - SALU_CYCLE_2, - SALU_CYCLE_3 -}; - uint8_t get_counters_for_event(wait_event ev) { diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index c069baa368c..37db33230e4 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -322,6 +322,31 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins fprintf(output, " sa_sdst(%d)", sa_sdst); break; } + case aco_opcode::s_delay_alu: { + unsigned delay[2] = {imm & 0xfu, (imm >> 7) & 0xfu}; + unsigned skip = (imm >> 4) & 0x3; + for (unsigned i = 0; i < 2; i++) { + if (i == 1 && skip) { + if (skip == 1) + fprintf(output, " next"); + else + fprintf(output, " skip_%u", skip - 1); + } + + alu_delay_wait wait = (alu_delay_wait)delay[i]; + if (wait >= alu_delay_wait::VALU_DEP_1 && wait <= alu_delay_wait::VALU_DEP_4) + fprintf(output, " valu_dep_%u", delay[i]); + else if (wait >= alu_delay_wait::TRANS32_DEP_1 && wait <= alu_delay_wait::TRANS32_DEP_3) + fprintf(output, " trans32_dep_%u", + delay[i] - (unsigned)alu_delay_wait::TRANS32_DEP_1 + 1); + else if (wait == alu_delay_wait::FMA_ACCUM_CYCLE_1) + fprintf(output, " fma_accum_cycle_1"); + else if (wait >= alu_delay_wait::SALU_CYCLE_1 && wait <= alu_delay_wait::SALU_CYCLE_3) + fprintf(output, " salu_cycle_%u", + delay[i] - (unsigned)alu_delay_wait::SALU_CYCLE_1 + 1); + } + break; + } case aco_opcode::s_endpgm: case aco_opcode::s_endpgm_saved: case aco_opcode::s_endpgm_ordered_ps_done: