nvk: Multiply by local_size for CS invocations in DGC codepath

We were entirely missing the local size in the accounting for CS invocations. Cc: mesa-stable Signed-off-by: Mary Guillemard <mary@mary.zone> Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41553>
2026-05-23 23:48:18 +02:00 · 2026-05-13 17:35:34 +02:00 · 2026-05-13 17:35:34 +02:00 · 662a346ce3
commit 662a346ce3
parent dfa779c559
3 changed files with 47 additions and 1 deletions
--- a/src/nouveau/compiler/nak.h
+++ b/src/nouveau/compiler/nak.h
@ -284,6 +284,10 @@ struct nak_qmd_dispatch_size_layout {
   uint16_t x_start, x_end;
   uint16_t y_start, y_end;
   uint16_t z_start, z_end;
+
+   uint16_t local_x_start, local_x_end;
+   uint16_t local_y_start, local_y_end;
+   uint16_t local_z_start, local_z_end;
 };

 struct nak_qmd_dispatch_size_layout
--- a/src/nouveau/compiler/nak/qmd.rs
+++ b/src/nouveau/compiler/nak/qmd.rs
@ -80,6 +80,11 @@ macro_rules! qmd_impl_common {
            let w = paste! {$c::[<$s _CTA_RASTER_WIDTH>]};
            let h = paste! {$c::[<$s _CTA_RASTER_HEIGHT>]};
            let d = paste! {$c::[<$s _CTA_RASTER_DEPTH>]};
+
+            let local_w = paste! {$c::[<$s _CTA_THREAD_DIMENSION0>]};
+            let local_h = paste! {$c::[<$s _CTA_THREAD_DIMENSION1>]};
+            let local_d = paste! {$c::[<$s _CTA_THREAD_DIMENSION2>]};
+
            nak_qmd_dispatch_size_layout {
                x_start: w.start as u16,
                x_end: w.end as u16,
@ -87,6 +92,12 @@ macro_rules! qmd_impl_common {
                y_end: h.end as u16,
                z_start: d.start as u16,
                z_end: d.end as u16,
+                local_x_start: local_w.start as u16,
+                local_x_end: local_w.end as u16,
+                local_y_start: local_h.start as u16,
+                local_y_end: local_h.end as u16,
+                local_z_start: local_d.start as u16,
+                local_z_end: local_d.end as u16,
            }
        };

--- a/src/nouveau/vulkan/nvk_cmd_indirect.c
+++ b/src/nouveau/vulkan/nvk_cmd_indirect.c
@ -394,12 +394,43 @@ build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p,
               nir_ior(b, load_global_dw(b, shader_qmd_addr, cb0_addr_hi_dw),
                          nir_unpack_64_2x32_split_y(b, root_addr_shifted));

+            /* Ensure each local size parameters are part of the same 32-bit
+             * value */
+            assert(qmd_layout.local_x_start / 32 ==
+                   (qmd_layout.local_x_end - 1) / 32);
+            assert(qmd_layout.local_y_start / 32 ==
+                   (qmd_layout.local_y_end - 1) / 32);
+            assert(qmd_layout.local_z_start / 32 ==
+                   (qmd_layout.local_z_end - 1) / 32);
+
+            /* Preload local size parameters as we are going to use them */
+            qmd_repl[qmd_layout.local_x_start / 32] = load_global_dw(
+               b, shader_qmd_addr, qmd_layout.local_x_start / 32);
+            qmd_repl[qmd_layout.local_y_start / 32] = load_global_dw(
+               b, shader_qmd_addr, qmd_layout.local_y_start / 32);
+            qmd_repl[qmd_layout.local_z_start / 32] = load_global_dw(
+               b, shader_qmd_addr, qmd_layout.local_z_start / 32);
+
            copy_repl_global_dw(b, qmd_addr, shader_qmd_addr,
                                qmd_repl, qmd_repl_count);

-            /* Now emit commands */
+            nir_def *local_x = nir_ubitfield_extract_imm(
+               b, qmd_repl[qmd_layout.local_x_start / 32],
+               qmd_layout.local_x_start % 32, qmd_layout.local_x_end % 32);
+            nir_def *local_y = nir_ubitfield_extract_imm(
+               b, qmd_repl[qmd_layout.local_y_start / 32],
+               qmd_layout.local_y_start % 32, qmd_layout.local_y_end % 32);
+            nir_def *local_z = nir_ubitfield_extract_imm(
+               b, qmd_repl[qmd_layout.local_z_start / 32],
+               qmd_layout.local_z_start % 32, qmd_layout.local_z_end % 32);
+            nir_def *local_size =
+               nir_imul(b, nir_imul(b, local_x, local_y), local_z);
+
            nir_def *invoc = nir_imul_2x32_64(b, disp_size_x, disp_size_y);
            invoc = nir_imul(b, invoc, nir_u2u64(b, disp_size_z));
+            invoc = nir_imul(b, invoc, nir_u2u64(b, local_size));
+
+            /* Now emit commands */
            if (pdev->info.cls_compute >= AMPERE_COMPUTE_B)
               nvk_nir_P_1INC(b, p, NVC7C0, CALL_MME_MACRO(NVK_MME_ADD_CS_INVOCATIONS), 2);
            else