aco: update sendmsg enum from LLVM

Add GFX11 enums and some new ones that apparently existed before. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17710>
2025-12-30 18:40:13 +01:00 · 2022-07-21 15:45:11 +01:00 · 2022-07-21 15:45:11 +01:00 · 6407d783ea
commit 6407d783ea
parent 7cecc81683
3 changed files with 62 additions and 38 deletions
--- a/src/amd/compiler/aco_builder_h.py
+++ b/src/amd/compiler/aco_builder_h.py
@ -85,15 +85,26 @@ aco_ptr<Instruction> create_s_mov(Definition dst, Operand src);

 enum sendmsg {
   sendmsg_none = 0,
-   _sendmsg_gs = 2,
-   _sendmsg_gs_done = 3,
-   sendmsg_save_wave = 4,
-   sendmsg_stall_wave_gen = 5,
-   sendmsg_halt_waves = 6,
-   sendmsg_ordered_ps_done = 7,
-   sendmsg_early_prim_dealloc = 8,
-   sendmsg_gs_alloc_req = 9,
-   sendmsg_id_mask = 0xf,
+   _sendmsg_gs = 2, /* gfx6 to gfx10.3 */
+   _sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */
+   sendmsg_hs_tessfactor = 2, /* gfx11+ */
+   sendmsg_dealloc_vgprs = 3, /* gfx11+ */
+   sendmsg_save_wave = 4, /* gfx8 to gfx10.3 */
+   sendmsg_stall_wave_gen = 5, /* gfx9+ */
+   sendmsg_halt_waves = 6, /* gfx9+ */
+   sendmsg_ordered_ps_done = 7, /* gfx9+ */
+   sendmsg_early_prim_dealloc = 8, /* gfx9 to gfx10 */
+   sendmsg_gs_alloc_req = 9, /* gfx9+ */
+   sendmsg_get_doorbell = 10, /* gfx9 to gfx10.3 */
+   sendmsg_get_ddid = 11, /* gfx10 to gfx10.3 */
+   sendmsg_rtn_get_doorbell = 128, /* gfx11+ */
+   sendmsg_rtn_get_ddid = 129, /* gfx11+ */
+   sendmsg_rtn_get_tma = 130, /* gfx11+ */
+   sendmsg_rtn_get_realtime = 131, /* gfx11+ */
+   sendmsg_rtn_save_wave = 132, /* gfx11+ */
+   sendmsg_rtn_get_tba = 133, /* gfx11+ */
+   sendmsg_id_mask_gfx6 = 0xf,
+   sendmsg_id_mask_gfx11 = 0xff,
 };

 inline sendmsg
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@ -333,16 +333,25 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
         break;
      }
      case aco_opcode::s_sendmsg: {
-         unsigned id = imm & sendmsg_id_mask;
+         unsigned id =
+            gfx_level >= GFX11 ? (imm & sendmsg_id_mask_gfx11) : (imm & sendmsg_id_mask_gfx6);
+         static_assert(_sendmsg_gs == sendmsg_hs_tessfactor);
+         static_assert(_sendmsg_gs_done == sendmsg_dealloc_vgprs);
         switch (id) {
         case sendmsg_none: fprintf(output, " sendmsg(MSG_NONE)"); break;
         case _sendmsg_gs:
-            fprintf(output, " sendmsg(gs%s%s, %u)", imm & 0x10 ? ", cut" : "",
-                    imm & 0x20 ? ", emit" : "", imm >> 8);
+            if (gfx_level >= GFX11)
+               fprintf(output, " sendmsg(hs_tessfactor)");
+            else
+               fprintf(output, " sendmsg(gs%s%s, %u)", imm & 0x10 ? ", cut" : "",
+                       imm & 0x20 ? ", emit" : "", imm >> 8);
            break;
         case _sendmsg_gs_done:
-            fprintf(output, " sendmsg(gs_done%s%s, %u)", imm & 0x10 ? ", cut" : "",
-                    imm & 0x20 ? ", emit" : "", imm >> 8);
+            if (gfx_level >= GFX11)
+               fprintf(output, " sendmsg(dealloc_vgprs)");
+            else
+               fprintf(output, " sendmsg(gs_done%s%s, %u)", imm & 0x10 ? ", cut" : "",
+                       imm & 0x20 ? ", emit" : "", imm >> 8);
            break;
         case sendmsg_save_wave: fprintf(output, " sendmsg(save_wave)"); break;
         case sendmsg_stall_wave_gen: fprintf(output, " sendmsg(stall_wave_gen)"); break;
@ -350,6 +359,15 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
         case sendmsg_ordered_ps_done: fprintf(output, " sendmsg(ordered_ps_done)"); break;
         case sendmsg_early_prim_dealloc: fprintf(output, " sendmsg(early_prim_dealloc)"); break;
         case sendmsg_gs_alloc_req: fprintf(output, " sendmsg(gs_alloc_req)"); break;
+         case sendmsg_get_doorbell: fprintf(output, " sendmsg(get_doorbell)"); break;
+         case sendmsg_get_ddid: fprintf(output, " sendmsg(get_ddid)"); break;
+         case sendmsg_rtn_get_doorbell: fprintf(output, " sendmsg(rtn_get_doorbell)"); break;
+         case sendmsg_rtn_get_ddid: fprintf(output, " sendmsg(rtn_get_ddid)"); break;
+         case sendmsg_rtn_get_tma: fprintf(output, " sendmsg(rtn_get_Tma)"); break;
+         case sendmsg_rtn_get_realtime: fprintf(output, " sendmsg(rtn_get_realtime)"); break;
+         case sendmsg_rtn_save_wave: fprintf(output, " sendmsg(rtn_save_wave)"); break;
+         case sendmsg_rtn_get_tba: fprintf(output, " sendmsg(rtn_get_Tba)"); break;
+         default: fprintf(output, " imm:%u", imm);
         }
         break;
      }
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@ -122,6 +122,7 @@ struct MoveState {
 };

 struct sched_ctx {
+   amd_gfx_level gfx_level;
   int16_t num_waves;
   int16_t last_SMEM_stall;
   int last_SMEM_dep_idx;
@ -420,20 +421,10 @@ MoveState::upwards_skip(UpwardsCursor& cursor)
 }

 bool
-is_gs_or_done_sendmsg(const Instruction* instr)
+is_done_sendmsg(amd_gfx_level gfx_level, const Instruction* instr)
 {
-   if (instr->opcode == aco_opcode::s_sendmsg) {
-      uint16_t imm = instr->sopp().imm;
-      return (imm & sendmsg_id_mask) == _sendmsg_gs || (imm & sendmsg_id_mask) == _sendmsg_gs_done;
-   }
-   return false;
-}
-
-bool
-is_done_sendmsg(const Instruction* instr)
-{
-   if (instr->opcode == aco_opcode::s_sendmsg)
-      return (instr->sopp().imm & sendmsg_id_mask) == _sendmsg_gs_done;
+   if (gfx_level <= GFX10_3 && instr->opcode == aco_opcode::s_sendmsg)
+      return (instr->sopp().imm & sendmsg_id_mask_gfx6) == _sendmsg_gs_done;
   return false;
 }

@ -464,6 +455,7 @@ struct memory_event_set {
 };

 struct hazard_query {
+   amd_gfx_level gfx_level;
   bool contains_spill;
   bool contains_sendmsg;
   bool uses_exec;
@ -473,8 +465,9 @@ struct hazard_query {
 };

 void
-init_hazard_query(hazard_query* query)
+init_hazard_query(const sched_ctx& ctx, hazard_query* query)
 {
+   query->gfx_level = ctx.gfx_level;
   query->contains_spill = false;
   query->contains_sendmsg = false;
   query->uses_exec = false;
@ -484,9 +477,10 @@ init_hazard_query(hazard_query* query)
 }

 void
-add_memory_event(memory_event_set* set, Instruction* instr, memory_sync_info* sync)
+add_memory_event(amd_gfx_level gfx_level, memory_event_set* set, Instruction* instr,
+                 memory_sync_info* sync)
 {
-   set->has_control_barrier |= is_done_sendmsg(instr);
+   set->has_control_barrier |= is_done_sendmsg(gfx_level, instr);
   if (instr->opcode == aco_opcode::p_barrier) {
      Pseudo_barrier_instruction& bar = instr->barrier();
      if (bar.sync.semantics & semantic_acquire)
@ -524,7 +518,7 @@ add_to_hazard_query(hazard_query* query, Instruction* instr)

   memory_sync_info sync = get_sync_info_with_hack(instr);

-   add_memory_event(&query->mem_events, instr, &sync);
+   add_memory_event(query->gfx_level, &query->mem_events, instr, &sync);

   if (!(sync.semantics & semantic_can_reorder)) {
      unsigned storage = sync.storage;
@ -580,7 +574,7 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
   memory_event_set instr_set;
   memset(&instr_set, 0, sizeof(instr_set));
   memory_sync_info sync = get_sync_info_with_hack(instr);
-   add_memory_event(&instr_set, instr, &sync);
+   add_memory_event(query->gfx_level, &instr_set, instr, &sync);

   memory_event_set* first = &instr_set;
   memory_event_set* second = &query->mem_events;
@ -655,7 +649,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe

   /* first, check if we have instructions before current to move down */
   hazard_query hq;
-   init_hazard_query(&hq);
+   init_hazard_query(ctx, &hq);
   add_to_hazard_query(&hq, current);

   DownwardsCursor cursor = ctx.mv.downwards_init(idx, false, false);
@ -751,7 +745,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
      if (is_dependency) {
         if (!found_dependency) {
            ctx.mv.upwards_update_insert_idx(up_cursor);
-            init_hazard_query(&hq);
+            init_hazard_query(ctx, &hq);
            found_dependency = true;
         }
      }
@ -797,8 +791,8 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
   /* first, check if we have instructions before current to move down */
   hazard_query indep_hq;
   hazard_query clause_hq;
-   init_hazard_query(&indep_hq);
-   init_hazard_query(&clause_hq);
+   init_hazard_query(ctx, &indep_hq);
+   init_hazard_query(ctx, &clause_hq);
   add_to_hazard_query(&indep_hq, current);

   DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true);
@ -923,7 +917,7 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
      if (is_dependency) {
         if (!found_dependency) {
            ctx.mv.upwards_update_insert_idx(up_cursor);
-            init_hazard_query(&indep_hq);
+            init_hazard_query(ctx, &indep_hq);
            found_dependency = true;
         }
      } else if (is_vmem) {
@ -967,7 +961,7 @@ schedule_position_export(sched_ctx& ctx, Block* block, std::vector<RegisterDeman
   DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, false);

   hazard_query hq;
-   init_hazard_query(&hq);
+   init_hazard_query(ctx, &hq);
   add_to_hazard_query(&hq, current);

   for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int)idx - window_size;
@ -1054,6 +1048,7 @@ schedule_program(Program* program, live& live_vars)
   demand.vgpr += program->config->num_shared_vgprs / 2;

   sched_ctx ctx;
+   ctx.gfx_level = program->gfx_level;
   ctx.mv.depends_on.resize(program->peekAllocationId());
   ctx.mv.RAR_dependencies.resize(program->peekAllocationId());
   ctx.mv.RAR_dependencies_clause.resize(program->peekAllocationId());