winsys/amdgpu: make csc context as array

Instead of csc1 and csc2, make it as an array. Use current_cs_index to point to csc that will be getting filled with commands. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33661>
2026-02-24 11:40:25 +01:00 · 2025-02-17 10:51:38 +05:30 · 2025-02-17 10:51:38 +05:30 · fc36840c04
commit fc36840c04
parent eb5bd057a1
3 changed files with 75 additions and 70 deletions
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp
@ -649,7 +649,7 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
   /* Don't use the "domains" parameter. Amdgpu doesn't support changing
    * the buffer placement during command submission.
    */
-   struct amdgpu_cs_context *cs = amdgpu_cs(rcs)->csc;
+   struct amdgpu_cs_context *cs = amdgpu_csc_get_current(amdgpu_cs(rcs));
   struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
   struct amdgpu_cs_buffer *buffer;

@ -743,7 +743,7 @@ static bool amdgpu_get_new_ib(struct amdgpu_winsys *aws,
                              struct amdgpu_ib *main_ib,
                              struct amdgpu_cs *cs)
 {
-   struct drm_amdgpu_cs_chunk_ib *chunk_ib = &cs->csc->chunk_ib[IB_MAIN];
+   struct drm_amdgpu_cs_chunk_ib *chunk_ib = &amdgpu_csc_get_current(cs)->chunk_ib[IB_MAIN];
   /* This is the minimum size of a contiguous IB. */
   unsigned ib_size = 16 * 1024;

@ -787,7 +787,7 @@ static bool amdgpu_get_new_ib(struct amdgpu_winsys *aws,

   rcs->current.buf = (uint32_t*)(main_ib->big_buffer_cpu_ptr + main_ib->used_ib_space);

-   cs->csc->ib_main_addr = rcs->current.buf;
+   amdgpu_csc_get_current(cs)->ib_main_addr = rcs->current.buf;

   ib_size = main_ib->big_buffer->size - main_ib->used_ib_space;
   rcs->current.max_dw = ib_size / 4 - amdgpu_cs_epilog_dws(cs);
@ -906,8 +906,8 @@ static void amdgpu_cs_destroy(struct radeon_cmdbuf *rcs)
   radeon_bo_reference(&cs->aws->dummy_sws.base, &cs->preamble_ib_bo, NULL);
   radeon_bo_reference(&cs->aws->dummy_sws.base, &cs->main_ib.big_buffer, NULL);
   FREE(rcs->prev);
-   amdgpu_destroy_cs_context(cs->aws, &cs->csc1);
-   amdgpu_destroy_cs_context(cs->aws, &cs->csc2);
+   for (unsigned i = 0; i < ARRAY_SIZE(cs->csc); i++)
+      amdgpu_destroy_cs_context(cs->aws, &cs->csc[i]);
   amdgpu_fence_reference(&cs->next_fence, NULL);
   FREE(cs);
 }
@ -964,29 +964,20 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs,
   ac_drm_cs_chunk_fence_info_to_data(cs->ctx->user_fence_bo_kms_handle, cs->ip_type * 4,
                                      (struct drm_amdgpu_cs_chunk_data*)&cs->fence_chunk);

-   if (!amdgpu_init_cs_context(ctx->aws, &cs->csc1, ip_type)) {
-      FREE(cs);
-      return false;
-   }
-
-   if (!amdgpu_init_cs_context(ctx->aws, &cs->csc2, ip_type)) {
-      amdgpu_destroy_cs_context(ctx->aws, &cs->csc1);
-      FREE(cs);
-      return false;
-   }
-
   memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist));

-   /* Set the first submission context as current. */
-   cs->csc = &cs->csc1;
-   cs->cst = &cs->csc2;
+   for (unsigned i = 0; i < ARRAY_SIZE(cs->csc); i++) {
+      if (!amdgpu_init_cs_context(ctx->aws, &cs->csc[i], ip_type)) {
+         if (i)
+            amdgpu_destroy_cs_context(ctx->aws, &cs->csc[0]);
+         FREE(cs);
+         return false;
+      }

-   /* Assign to both amdgpu_cs_context; only csc will use it. */
-   cs->csc1.buffer_indices_hashlist = cs->buffer_indices_hashlist;
-   cs->csc2.buffer_indices_hashlist = cs->buffer_indices_hashlist;
-
-   cs->csc1.aws = ctx->aws;
-   cs->csc2.aws = ctx->aws;
+     /* only csc will use for buffer_indices_hashlist. */
+      cs->csc[i].buffer_indices_hashlist = cs->buffer_indices_hashlist;
+      cs->csc[i].aws = ctx->aws;
+   }

   p_atomic_inc(&ctx->aws->num_cs);
   rcs->priv = cs;
@ -1013,7 +1004,6 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i
 {
   struct amdgpu_cs *cs = amdgpu_cs(rcs);
   struct amdgpu_winsys *aws = cs->aws;
-   struct amdgpu_cs_context *csc[2] = {&cs->csc1, &cs->csc2};
   unsigned size = align(preamble_num_dw * 4, aws->info.ip[AMD_IP_GFX].ib_alignment);
   struct pb_buffer_lean *preamble_bo;
   uint32_t *map;
@ -1041,11 +1031,11 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i
   amdgpu_pad_gfx_compute_ib(aws, cs->ip_type, map, &preamble_num_dw, 0);
   amdgpu_bo_unmap(&aws->dummy_sws.base, preamble_bo);

-   for (unsigned i = 0; i < 2; i++) {
-      csc[i]->chunk_ib[IB_PREAMBLE].va_start = amdgpu_bo_get_va(preamble_bo);
-      csc[i]->chunk_ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4;
+   for (unsigned i = 0; i < ARRAY_SIZE(cs->csc); i++) {
+      cs->csc[i].chunk_ib[IB_PREAMBLE].va_start = amdgpu_bo_get_va(preamble_bo);
+      cs->csc[i].chunk_ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4;

-      csc[i]->chunk_ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT;
+      cs->csc[i].chunk_ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT;
   }

   assert(!cs->preamble_ib_bo);
@ -1167,7 +1157,7 @@ static void amdgpu_add_slab_backing_buffers(struct amdgpu_cs_context *cs)
 static unsigned amdgpu_cs_get_buffer_list(struct radeon_cmdbuf *rcs,
                                          struct radeon_bo_list_item *list)
 {
-    struct amdgpu_cs_context *cs = amdgpu_cs(rcs)->csc;
+   struct amdgpu_cs_context *cs = amdgpu_csc_get_current(amdgpu_cs(rcs));

    /* We do this in the CS thread, but since we need to return the final usage of all buffers
     * here, do it here too. There is no harm in doing it again in the CS thread.
@ -1212,7 +1202,7 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rcs,
                                           struct pipe_fence_handle *pfence)
 {
   struct amdgpu_cs *acs = amdgpu_cs(rcs);
-   struct amdgpu_cs_context *cs = acs->csc;
+   struct amdgpu_cs_context *cs = amdgpu_csc_get_current(acs);
   struct amdgpu_fence *fence = (struct amdgpu_fence*)pfence;

   util_queue_fence_wait(&fence->submitted);
@ -1264,7 +1254,7 @@ static void amdgpu_cs_add_syncobj_signal(struct radeon_cmdbuf *rws,
                                         struct pipe_fence_handle *fence)
 {
   struct amdgpu_cs *acs = amdgpu_cs(rws);
-   struct amdgpu_cs_context *cs = acs->csc;
+   struct amdgpu_cs_context *cs = amdgpu_csc_get_current(acs);

   add_fence_to_list(&cs->syncobj_to_signal, (struct amdgpu_fence*)fence);
 }
@ -1275,7 +1265,7 @@ static int amdgpu_cs_submit_ib_kernelq(struct amdgpu_cs *acs,
                                       uint64_t *seq_no)
 {
   struct amdgpu_winsys *aws = acs->aws;
-   struct amdgpu_cs_context *cs = acs->cst;
+   struct amdgpu_cs_context *cs = amdgpu_csc_get_submitted(acs);
   struct drm_amdgpu_bo_list_in bo_list_in;
   struct drm_amdgpu_cs_chunk chunks[8];
   unsigned num_chunks = 0;
@ -1470,7 +1460,7 @@ static int amdgpu_cs_submit_ib_userq(struct amdgpu_userq *userq,
 {
   int r = 0;
   struct amdgpu_winsys *aws = acs->aws;
-   struct amdgpu_cs_context *cs = acs->cst;
+   struct amdgpu_cs_context *cs = amdgpu_csc_get_submitted(acs);

   /* Syncobj dependencies. */
   unsigned num_syncobj_dependencies = cs->syncobj_dependencies.num;
@ -1592,7 +1582,7 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
 {
   struct amdgpu_cs *acs = (struct amdgpu_cs*)job;
   struct amdgpu_winsys *aws = acs->aws;
-   struct amdgpu_cs_context *cs = acs->cst;
+   struct amdgpu_cs_context *cs = amdgpu_csc_get_submitted(acs);
   int r;
   uint64_t seq_no = 0;
   bool has_user_fence = amdgpu_cs_has_user_fence(acs);
@ -2073,6 +2063,7 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
 {
   struct amdgpu_cs *cs = amdgpu_cs(rcs);
   struct amdgpu_winsys *aws = cs->aws;
+   struct amdgpu_cs_context *csc_current = amdgpu_csc_get_current(cs);
   int error_code = 0;
   uint32_t ib_pad_dw_mask = aws->info.ip[cs->ip_type].ib_pad_dw_mask;

@ -2128,26 +2119,25 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
   if (likely(radeon_emitted(rcs, 0) &&
       rcs->current.cdw <= rcs->current.max_dw &&
       !(flags & RADEON_FLUSH_NOOP))) {
-      struct amdgpu_cs_context *cur = cs->csc;

      /* Set IB sizes. */
      amdgpu_ib_finalize(aws, rcs, &cs->main_ib, cs->ip_type);

      /* Create a fence. */
-      amdgpu_fence_reference(&cur->fence, NULL);
+      amdgpu_fence_reference(&csc_current->fence, NULL);
      if (cs->next_fence) {
         /* just move the reference */
-         cur->fence = cs->next_fence;
+         csc_current->fence = cs->next_fence;
         cs->next_fence = NULL;
      } else {
-         cur->fence = amdgpu_fence_create(cs);
+         csc_current->fence = amdgpu_fence_create(cs);
      }
      if (fence)
-         amdgpu_fence_reference(fence, cur->fence);
+         amdgpu_fence_reference(fence, csc_current->fence);

-      for (unsigned i = 0; i < ARRAY_SIZE(cur->buffer_lists); i++) {
-         unsigned num_buffers = cur->buffer_lists[i].num_buffers;
-         struct amdgpu_cs_buffer *buffers = cur->buffer_lists[i].buffers;
+      for (unsigned i = 0; i < ARRAY_SIZE(csc_current->buffer_lists); i++) {
+         unsigned num_buffers = csc_current->buffer_lists[i].num_buffers;
+         struct amdgpu_cs_buffer *buffers = csc_current->buffer_lists[i].buffers;

         for (unsigned j = 0; j < num_buffers; j++)
            p_atomic_inc(&buffers[j].bo->num_active_ioctls);
@ -2155,19 +2145,19 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,

      amdgpu_cs_sync_flush(rcs);

-      cur->chunk_ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */
+      csc_current->chunk_ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */
      if (cs->noop && cs->ip_type == AMD_IP_GFX) {
         /* Reduce the IB size and fill it with NOP to make it like an empty IB. */
         unsigned noop_dw_size = aws->info.ip[AMD_IP_GFX].ib_pad_dw_mask + 1;
-         assert(cur->chunk_ib[IB_MAIN].ib_bytes / 4 >= noop_dw_size);
+         assert(csc_current->chunk_ib[IB_MAIN].ib_bytes / 4 >= noop_dw_size);

-         cur->ib_main_addr[0] = PKT3(PKT3_NOP, noop_dw_size - 2, 0);
-         cur->chunk_ib[IB_MAIN].ib_bytes = noop_dw_size * 4;
+         csc_current->ib_main_addr[0] = PKT3(PKT3_NOP, noop_dw_size - 2, 0);
+         csc_current->chunk_ib[IB_MAIN].ib_bytes = noop_dw_size * 4;
      }

-      /* Swap command streams. "cst" is going to be submitted. */
-      cs->csc = cs->cst;
-      cs->cst = cur;
+      amdgpu_csc_swap(cs);
+      csc_current = amdgpu_csc_get_current(cs);
+      struct amdgpu_cs_context *csc_submitted = amdgpu_csc_get_submitted(cs);

      /* only gfx, compute and sdma queues are supported in userqueues. */
      if (aws->info.use_userq && cs->ip_type <= AMD_IP_SDMA) {
@ -2182,23 +2172,23 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
      }

      if (flags & RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION)
-         cs->csc->secure = !cs->cst->secure;
+         csc_current->secure = !csc_submitted->secure;
      else
-         cs->csc->secure = cs->cst->secure;
+         csc_current->secure = csc_submitted->secure;

      if (!(flags & PIPE_FLUSH_ASYNC)) {
         amdgpu_cs_sync_flush(rcs);
-         error_code = cur->error_code;
+         error_code = csc_submitted->error_code;
      }
   } else {
      if (flags & RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION)
-         cs->csc->secure = !cs->csc->secure;
+         csc_current->secure = !csc_current->secure;

-      amdgpu_cs_context_cleanup_buffers(aws, cs->csc);
-      amdgpu_cs_context_cleanup(aws, cs->csc);
+      amdgpu_cs_context_cleanup_buffers(aws, csc_current);
+      amdgpu_cs_context_cleanup(aws, csc_current);
   }

-   memset(cs->csc->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist));
+   memset(csc_current->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist));

   amdgpu_get_new_ib(aws, rcs, &cs->main_ib, cs);

--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@ -132,15 +132,12 @@ struct amdgpu_cs {
    */
   bool uses_alt_fence;

-   /* We flip between these two CS. While one is being consumed
-    * by the kernel in another thread, the other one is being filled
-    * by the pipe driver. */
-   struct amdgpu_cs_context csc1;
-   struct amdgpu_cs_context csc2;
-   /* The currently-used CS. */
-   struct amdgpu_cs_context *csc;
-   /* The CS being currently-owned by the other thread. */
-   struct amdgpu_cs_context *cst;
+   /* Max AMDGPU_FENCE_RING_SIZE jobs can be submitted. Commands are being filled and submitted
+    * between the two csc till AMDGPU_FENCE_RING_SIZE jobs are in queue. current_csc_index will
+    * point to csc that will be filled by commands.
+    */
+   struct amdgpu_cs_context csc[2];
+   int current_csc_index;
   /* buffer_indices_hashlist[hash(bo)] returns -1 if the bo
    * isn't part of any buffer lists or the index where the bo could be found.
    * Since 1) hash collisions of 2 different bo can happen and 2) we use a
@ -185,6 +182,24 @@ struct amdgpu_fence {
   uint_seq_no queue_seq_no;  /* winsys-generated sequence number */
 };

+static inline struct amdgpu_cs_context *
+amdgpu_csc_get_current(struct amdgpu_cs *acs)
+{
+   return &acs->csc[acs->current_csc_index];
+}
+
+static inline struct amdgpu_cs_context *
+amdgpu_csc_get_submitted(struct amdgpu_cs *acs)
+{
+   return &acs->csc[!acs->current_csc_index];
+}
+
+static inline void
+amdgpu_csc_swap(struct amdgpu_cs *acs)
+{
+   acs->current_csc_index = !acs->current_csc_index;
+}
+
 void amdgpu_fence_destroy(struct amdgpu_fence *fence);

 static inline void amdgpu_ctx_reference(struct amdgpu_ctx **dst, struct amdgpu_ctx *src)
@ -249,7 +264,7 @@ static inline bool
 amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
                              struct amdgpu_winsys_bo *bo)
 {
-   return amdgpu_lookup_buffer_any_type(cs->csc, bo) != NULL;
+   return amdgpu_lookup_buffer_any_type(amdgpu_csc_get_current(cs), bo) != NULL;
 }

 static inline unsigned get_buf_list_idx(struct amdgpu_winsys_bo *bo)
@ -264,7 +279,7 @@ amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
                                         struct amdgpu_winsys_bo *bo,
                                         unsigned usage)
 {
-   struct amdgpu_cs_buffer *buffer = amdgpu_lookup_buffer_any_type(cs->csc, bo);
+   struct amdgpu_cs_buffer *buffer = amdgpu_lookup_buffer_any_type(amdgpu_csc_get_current(cs), bo);

   return buffer && (buffer->usage & usage) != 0;
 }
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@ -307,7 +307,7 @@ static bool kms_handle_equals(const void *a, const void *b)
 static bool amdgpu_cs_is_secure(struct radeon_cmdbuf *rcs)
 {
   struct amdgpu_cs *cs = amdgpu_cs(rcs);
-   return cs->csc->secure;
+   return amdgpu_csc_get_current(cs)->secure;
 }

 static uint32_t