nvk: Use nvk_queue_submit_simple() for nvk_queue_state_update()

This change has two main advantages. First is that the previous code had a subtle bug where state updates would drop the previous state pushbuf without first guaranteeing that the queue was idle, potentially leading to a fault. This worked when I first wrote the code because we were using the old nouveau UAPI which took a list of BOs on each submit and the kernel would interanlly reference count them. With the modern UAPI, there is no such safety net. The second effect of this commit is that we're now only submitting the pushbuf when one of the pointers or ranges actually changes. This should hopefully reduce GPU stalling as setting those pointers is a pretty heavy-weight operation. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30596>
2026-05-05 03:08:05 +02:00 · 2024-08-09 15:37:14 -05:00 · 2024-08-09 15:37:14 -05:00 · f49a3e0a68
commit f49a3e0a68
parent 5fa5c5a300
2 changed files with 18 additions and 53 deletions
--- a/src/nouveau/vulkan/nvk_queue.c
+++ b/src/nouveau/vulkan/nvk_queue.c
@ -19,6 +19,10 @@
 #include "nv_push_clc3c0.h"
 #include "nv_push_clc397.h"

+static VkResult
+nvk_queue_submit_simple(struct nvk_queue *queue,
+                        uint32_t dw_count, const uint32_t *dw);
+
 static void
 nvk_queue_state_init(struct nvk_queue_state *qs)
 {
@ -35,21 +39,6 @@ nvk_queue_state_finish(struct nvk_device *dev,
      nvkmd_mem_unref(qs->samplers.mem);
   if (qs->slm.mem)
      nvkmd_mem_unref(qs->slm.mem);
-   if (qs->push.mem)
-      nvkmd_mem_unref(qs->push.mem);
-}
-
-static void
-nvk_queue_state_dump_push(struct nvk_device *dev,
-                          struct nvk_queue_state *qs, FILE *fp)
-{
-   struct nvk_physical_device *pdev = nvk_device_physical(dev);
-
-   struct nv_push push = {
-      .start = (uint32_t *)qs->push.mem->map,
-      .end = (uint32_t *)qs->push.mem->map + qs->push.dw_count,
-   };
-   vk_push_print(fp, &push, &pdev->info);
 }

 static VkResult
@ -103,25 +92,12 @@ nvk_queue_state_update(struct nvk_queue *queue,
         nvkmd_mem_unref(mem);
   }

-   /* TODO: We're currently depending on kernel reference counting to protect
-    * us here.  If we ever stop reference counting in the kernel, we will
-    * either need to delay destruction or hold on to our extra BO references
-    * and insert a GPU stall here if anything has changed before dropping our
-    * old references.
-    */
-
   if (!dirty)
      return VK_SUCCESS;

-   struct nvkmd_mem *push_mem;
-   VkResult result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
-                                                256 * 4, 0, NVKMD_MEM_LOCAL,
-                                                NVKMD_MEM_MAP_WR, &push_mem);
-   if (result != VK_SUCCESS)
-      return result;
-
+   uint32_t push_data[64];
   struct nv_push push;
-   nv_push_init(&push, push_mem->map, 256);
+   nv_push_init(&push, push_data, 64);
   struct nv_push *p = &push;

   if (qs->images.mem) {
@ -238,13 +214,19 @@ nvk_queue_state_update(struct nvk_queue *queue,
      P_IMMD(p, NV9097, SET_SHADER_LOCAL_MEMORY_WINDOW, 0xff << 24);
   }

-   if (qs->push.mem)
-      nvkmd_mem_unref(qs->push.mem);
+   /* From nvc0_screen.c:
+    *
+    *    "Reduce likelihood of collision with real buffers by placing the
+    *    hole at the top of the 4G area. This will have to be dealt with
+    *    for real eventually by blocking off that area from the VM."
+    *
+    * Really?!?  TODO: Fix this for realz.  Annoyingly, we only have a
+    * 32-bit pointer for this in 3D rather than a full 48 like we have for
+    * compute.
+    */
+   P_IMMD(p, NV9097, SET_SHADER_LOCAL_MEMORY_WINDOW, 0xff << 24);

-   qs->push.mem = push_mem;
-   qs->push.dw_count = nv_push_dw_count(&push);
-
-   return VK_SUCCESS;
+   return nvk_queue_submit_simple(queue, nv_push_dw_count(p), push_data);
 }

 static VkResult
@ -299,16 +281,6 @@ nvk_queue_submit_exec(struct nvk_queue *queue,
      if (result != VK_SUCCESS)
         return result;

-      if (queue->state.push.mem != NULL) {
-         struct nvkmd_ctx_exec exec = {
-            .addr = queue->state.push.mem->va->addr,
-            .size_B = queue->state.push.dw_count * 4,
-         };
-         result = nvkmd_ctx_exec(queue->exec_ctx, &queue->vk.base, 1, &exec);
-         if (result != VK_SUCCESS)
-            goto fail;
-      }
-
      uint64_t upload_time_point;
      result = nvk_upload_queue_flush(dev, &dev->upload, &upload_time_point);
      if (result != VK_SUCCESS)
@ -374,8 +346,6 @@ nvk_queue_submit_exec(struct nvk_queue *queue,
 fail:
   if ((sync && result != VK_SUCCESS) ||
       (pdev->debug_flags & NVK_DEBUG_PUSH_DUMP)) {
-      nvk_queue_state_dump_push(dev, &queue->state, stderr);
-
      for (unsigned i = 0; i < submit->command_buffer_count; i++) {
         struct nvk_cmd_buffer *cmd =
            container_of(submit->command_buffers[i], struct nvk_cmd_buffer, vk);
--- a/src/nouveau/vulkan/nvk_queue.h
+++ b/src/nouveau/vulkan/nvk_queue.h
@ -34,11 +34,6 @@ struct nvk_queue_state {
      uint32_t bytes_per_warp;
      uint32_t bytes_per_tpc;
   } slm;
-
-   struct {
-      struct nvkmd_mem *mem;
-      uint32_t dw_count;
-   } push;
 };

 struct nvk_queue {