From 662a346ce3a1f557a61d745b70116be1e16607e9 Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Wed, 13 May 2026 17:35:34 +0200 Subject: [PATCH] nvk: Multiply by local_size for CS invocations in DGC codepath We were entirely missing the local size in the accounting for CS invocations. Cc: mesa-stable Signed-off-by: Mary Guillemard Reviewed-by: Mel Henning Part-of: --- src/nouveau/compiler/nak.h | 4 ++++ src/nouveau/compiler/nak/qmd.rs | 11 +++++++++ src/nouveau/vulkan/nvk_cmd_indirect.c | 33 ++++++++++++++++++++++++++- 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/nouveau/compiler/nak.h b/src/nouveau/compiler/nak.h index 6e952b00e98..bdceef7e22b 100644 --- a/src/nouveau/compiler/nak.h +++ b/src/nouveau/compiler/nak.h @@ -284,6 +284,10 @@ struct nak_qmd_dispatch_size_layout { uint16_t x_start, x_end; uint16_t y_start, y_end; uint16_t z_start, z_end; + + uint16_t local_x_start, local_x_end; + uint16_t local_y_start, local_y_end; + uint16_t local_z_start, local_z_end; }; struct nak_qmd_dispatch_size_layout diff --git a/src/nouveau/compiler/nak/qmd.rs b/src/nouveau/compiler/nak/qmd.rs index 281937afc28..d91d6a32841 100644 --- a/src/nouveau/compiler/nak/qmd.rs +++ b/src/nouveau/compiler/nak/qmd.rs @@ -80,6 +80,11 @@ macro_rules! qmd_impl_common { let w = paste! {$c::[<$s _CTA_RASTER_WIDTH>]}; let h = paste! {$c::[<$s _CTA_RASTER_HEIGHT>]}; let d = paste! {$c::[<$s _CTA_RASTER_DEPTH>]}; + + let local_w = paste! {$c::[<$s _CTA_THREAD_DIMENSION0>]}; + let local_h = paste! {$c::[<$s _CTA_THREAD_DIMENSION1>]}; + let local_d = paste! {$c::[<$s _CTA_THREAD_DIMENSION2>]}; + nak_qmd_dispatch_size_layout { x_start: w.start as u16, x_end: w.end as u16, @@ -87,6 +92,12 @@ macro_rules! qmd_impl_common { y_end: h.end as u16, z_start: d.start as u16, z_end: d.end as u16, + local_x_start: local_w.start as u16, + local_x_end: local_w.end as u16, + local_y_start: local_h.start as u16, + local_y_end: local_h.end as u16, + local_z_start: local_d.start as u16, + local_z_end: local_d.end as u16, } }; diff --git a/src/nouveau/vulkan/nvk_cmd_indirect.c b/src/nouveau/vulkan/nvk_cmd_indirect.c index 5b0be386fa7..cac8b0d4d3c 100644 --- a/src/nouveau/vulkan/nvk_cmd_indirect.c +++ b/src/nouveau/vulkan/nvk_cmd_indirect.c @@ -394,12 +394,43 @@ build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p, nir_ior(b, load_global_dw(b, shader_qmd_addr, cb0_addr_hi_dw), nir_unpack_64_2x32_split_y(b, root_addr_shifted)); + /* Ensure each local size parameters are part of the same 32-bit + * value */ + assert(qmd_layout.local_x_start / 32 == + (qmd_layout.local_x_end - 1) / 32); + assert(qmd_layout.local_y_start / 32 == + (qmd_layout.local_y_end - 1) / 32); + assert(qmd_layout.local_z_start / 32 == + (qmd_layout.local_z_end - 1) / 32); + + /* Preload local size parameters as we are going to use them */ + qmd_repl[qmd_layout.local_x_start / 32] = load_global_dw( + b, shader_qmd_addr, qmd_layout.local_x_start / 32); + qmd_repl[qmd_layout.local_y_start / 32] = load_global_dw( + b, shader_qmd_addr, qmd_layout.local_y_start / 32); + qmd_repl[qmd_layout.local_z_start / 32] = load_global_dw( + b, shader_qmd_addr, qmd_layout.local_z_start / 32); + copy_repl_global_dw(b, qmd_addr, shader_qmd_addr, qmd_repl, qmd_repl_count); - /* Now emit commands */ + nir_def *local_x = nir_ubitfield_extract_imm( + b, qmd_repl[qmd_layout.local_x_start / 32], + qmd_layout.local_x_start % 32, qmd_layout.local_x_end % 32); + nir_def *local_y = nir_ubitfield_extract_imm( + b, qmd_repl[qmd_layout.local_y_start / 32], + qmd_layout.local_y_start % 32, qmd_layout.local_y_end % 32); + nir_def *local_z = nir_ubitfield_extract_imm( + b, qmd_repl[qmd_layout.local_z_start / 32], + qmd_layout.local_z_start % 32, qmd_layout.local_z_end % 32); + nir_def *local_size = + nir_imul(b, nir_imul(b, local_x, local_y), local_z); + nir_def *invoc = nir_imul_2x32_64(b, disp_size_x, disp_size_y); invoc = nir_imul(b, invoc, nir_u2u64(b, disp_size_z)); + invoc = nir_imul(b, invoc, nir_u2u64(b, local_size)); + + /* Now emit commands */ if (pdev->info.cls_compute >= AMPERE_COMPUTE_B) nvk_nir_P_1INC(b, p, NVC7C0, CALL_MME_MACRO(NVK_MME_ADD_CS_INVOCATIONS), 2); else