nvk: Add support for mesh and task shader binding

Signed-off-by: Mary Guillemard <mary@mary.zone> Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Tested-by: Thomas H.P. Andersen <phomes@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27196>
2026-06-09 23:08:18 +02:00 · 2026-02-05 09:21:53 +01:00 · 2026-02-05 09:21:53 +01:00 · 96ade67e2b
commit 96ade67e2b
parent 3286990481
5 changed files with 119 additions and 11 deletions
--- a/src/nouveau/vulkan/nvk_cmd_draw.c
+++ b/src/nouveau/vulkan/nvk_cmd_draw.c
@ -1893,6 +1893,15 @@ nvk_cmd_bind_graphics_shader(struct nvk_cmd_buffer *cmd,
   if (cmd->state.gfx.shaders[stage] == shader)
      return;

+   /* IA state changes depending on whether a mesh shader is bound (see
+    * nvk_flush_ia_state) */
+   if (stage == MESA_SHADER_MESH &&
+       (cmd->state.gfx.shaders[stage] == NULL) != (shader == NULL)) {
+      struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
+      BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY);
+      BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE);
+   }
+
   cmd->state.gfx.shaders[stage] = shader;
   cmd->state.gfx.shaders_dirty |= mesa_to_vk_shader_stage(stage);
 }
@ -2227,7 +2236,7 @@ nvk_cmd_flush_gfx_shaders(struct nvk_cmd_buffer *cmd)
   u_foreach_bit(s, cmd->state.gfx.shaders_dirty &
                    NVK_SHADER_STAGE_GRAPHICS_BITS) {
      mesa_shader_stage stage = vk_to_mesa_shader_stage(1 << s);
-      uint32_t type = mesa_to_nv9097_shader_type(stage);
+      uint32_t type = mesa_to_nv9097_shader_type(stage, has_task_shader);
      types_dirty |= BITFIELD_BIT(type);

      /* Only copy non-NULL shaders because mesh/task alias with vertex and
@ -2478,16 +2487,26 @@ nvk_flush_ia_state(struct nvk_cmd_buffer *cmd)
   const struct vk_dynamic_graphics_state *dyn =
      &cmd->vk.dynamic_graphics_state;

+   /* Mesh shaders are affected by IA state:
+    * - SET_PRIMITIVE_TOPOLOGY takes precedence over SET_MESH_SHADER_A topology.
+    * - SET_DA_PRIMITIVE_RESTART affects mesh shaders.
+    *
+    * So in case we have mesh shader enabled, we disable primitive restart and
+    * force point list like what the proprietary driver does.
+    */
+   const bool has_mesh_shader = cmd->state.gfx.shaders[MESA_SHADER_MESH];
   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) {
+      uint8_t topology = has_mesh_shader ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST
+                                         : dyn->ia.primitive_topology;
      struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
      P_MTHD(p, NV9097, SET_PRIMITIVE_TOPOLOGY);
-      P_INLINE_DATA(p, vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology));
+      P_INLINE_DATA(p, vk_to_nv9097_primitive_topology(topology));
   }

   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
      struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
      P_IMMD(p, NV9097, SET_DA_PRIMITIVE_RESTART,
-             dyn->ia.primitive_restart_enable);
+             dyn->ia.primitive_restart_enable && !has_mesh_shader);
   }
 }

--- a/src/nouveau/vulkan/nvk_cmd_indirect.c
+++ b/src/nouveau/vulkan/nvk_cmd_indirect.c
@ -500,7 +500,8 @@ build_gfx_set_exec(nir_builder *b, struct nvk_nir_push *p, nir_def *token_addr,
         if (stage != MESA_SHADER_FRAGMENT)
            last_vtgm = stage;

-         uint32_t type = mesa_to_nv9097_shader_type(stage);
+         uint32_t type = mesa_to_nv9097_shader_type(
+            stage, token->shaderStages & VK_SHADER_STAGE_TASK_BIT_EXT);
         type_stage[type] = stage;
         type_shader_idx[type] = load_global_dw(b, token_addr, i++);
      }
@ -1150,7 +1151,8 @@ nvk_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer,
         uint8_t set_types = 0;
         u_foreach_bit(s, layout->set_stages) {
            mesa_shader_stage stage = vk_to_mesa_shader_stage(1 << s);
-            uint32_t type = mesa_to_nv9097_shader_type(stage);
+            uint32_t type = mesa_to_nv9097_shader_type(
+               stage, layout->set_stages & VK_SHADER_STAGE_TASK_BIT_EXT);
            set_types |= BITFIELD_BIT(type);
         }

--- a/src/nouveau/vulkan/nvk_indirect_execution_set.c
+++ b/src/nouveau/vulkan/nvk_indirect_execution_set.c
@ -128,7 +128,8 @@ nvk_ies_set_gfx_pipeline(struct nvk_device *dev,
      if (stage != MESA_SHADER_FRAGMENT)
         last_vtgm = stage;

-      uint32_t type = mesa_to_nv9097_shader_type(stage);
+      uint32_t type = mesa_to_nv9097_shader_type(
+         stage, pipeline->stages & VK_SHADER_STAGE_TASK_BIT_EXT);
      type_shader[type] = shader;
   }

--- a/src/nouveau/vulkan/nvk_shader.c
+++ b/src/nouveau/vulkan/nvk_shader.c
@ -32,6 +32,7 @@
 #include "nv_push_cl9097.h"
 #include "nv_push_clb197.h"
 #include "nv_push_clc397.h"
+#include "nv_push_clc597.h"
 #include "nv_push_clc797.h"

 const struct nak_constant_offset_info nak_const_offsets_base = {
@ -686,14 +687,19 @@ nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader)
 }

 uint32_t
-mesa_to_nv9097_shader_type(mesa_shader_stage stage)
+mesa_to_nv9097_shader_type(mesa_shader_stage stage, bool has_task_shader)
 {
+   if (stage == MESA_SHADER_MESH && !has_task_shader)
+      stage = MESA_SHADER_TASK;
+
   static const uint32_t mesa_to_nv9097[] = {
      [MESA_SHADER_VERTEX]    = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX,
      [MESA_SHADER_TESS_CTRL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION_INIT,
      [MESA_SHADER_TESS_EVAL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION,
      [MESA_SHADER_GEOMETRY]  = NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY,
      [MESA_SHADER_FRAGMENT]  = NV9097_SET_PIPELINE_SHADER_TYPE_PIXEL,
+      [MESA_SHADER_TASK]      = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX,
+      [MESA_SHADER_MESH]      = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION,
   };
   assert(stage < ARRAY_SIZE(mesa_to_nv9097));
   return mesa_to_nv9097[stage];
@ -721,12 +727,22 @@ nvk_max_shader_push_dw(const struct nvk_physical_device *pdev,

   uint16_t max_dw_count = 9;

+   if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TASK ||
+       stage == MESA_SHADER_MESH)
+      max_dw_count += 2;
+
   if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL)
      max_dw_count += 2;

   if (stage == MESA_SHADER_FRAGMENT)
      max_dw_count += 13;

+   if (stage == MESA_SHADER_TASK)
+      max_dw_count += 2;
+
+   if (stage == MESA_SHADER_MESH)
+      max_dw_count += 15;
+
   if (last_vtgm) {
      max_dw_count += 8;
      max_dw_count += 4 * (5 + (128 / 4));
@ -750,7 +766,8 @@ nvk_shader_fill_push(struct nvk_device *dev,

   bool has_task_shader = shader->info.stage == MESA_SHADER_MESH &&
                          shader->info.mesh.has_task_shader;
-   const uint32_t type = mesa_to_nv9097_shader_type(shader->info.stage);
+   const uint32_t type =
+      mesa_to_nv9097_shader_type(shader->info.stage, has_task_shader);

   /* We always map index == type */
   const uint32_t idx = type;
@ -800,7 +817,74 @@ nvk_shader_fill_push(struct nvk_device *dev,
                                           shader->info.ts.point_mode));
   }

-   if (shader->info.stage == MESA_SHADER_FRAGMENT) {
+   bool could_be_first_stage = shader->info.stage == MESA_SHADER_VERTEX ||
+                               shader->info.stage == MESA_SHADER_TASK ||
+                               shader->info.stage == MESA_SHADER_MESH;
+   bool is_first_stage =
+      could_be_first_stage && (shader->info.stage != MESA_SHADER_MESH ||
+                               !shader->info.mesh.has_task_shader);
+
+   if (could_be_first_stage) {
+      max_dw_count += 2;
+
+      if (pdev->info.cls_eng3d >= TURING_A && is_first_stage)
+         P_IMMD(p, NVC597, SET_MESH_CONTROL,
+                shader->info.stage != MESA_SHADER_VERTEX);
+   }
+
+   if (shader->info.stage == MESA_SHADER_TASK) {
+      max_dw_count += 2;
+      uint16_t smem_lines = DIV_ROUND_UP(shader->info.task.smem_size, 128);
+      uint16_t task_smem_lines = DIV_ROUND_UP(shader->info.task.payload_smem_size, 128);
+
+      /* Task payload should be part of shared memory */
+      assert(task_smem_lines <= smem_lines);
+
+      P_IMMD(p, NVC597, SET_MESH_INIT_SHADER, {
+         .thread_count = shader->info.task.local_size,
+         .local_buffer_lines = smem_lines,
+         .output_to_m_s_lines = task_smem_lines,
+      });
+   } else if (shader->info.stage == MESA_SHADER_MESH) {
+      max_dw_count += 15;
+
+      assert(shader->info.mesh.max_vertices != 0);
+      assert(shader->info.mesh.max_primitives != 0);
+
+      /* On Turing only, if a task+mesh pipeline was previously bound and we
+       * bind a mesh only pipeline after it, the hardware will misbehave in
+       * TRACK_WITH_FILTER mode and assume that the vertex stage has a task
+       * shader instead.
+       *
+       * NVIDIA proprietary driver apply this workaround on all generations so
+       * we also do the same here just in case.
+       */
+      P_IMMD(p, NV9097, SET_MME_SHADOW_RAM_CONTROL, MODE_METHOD_TRACK);
+      P_MTHD(p, NVC597, SET_MESH_SHADER_A);
+      P_NVC597_SET_MESH_SHADER_A(p, {
+         .output_topology = shader->info.mesh.topology,
+         .max_vertex = shader->info.mesh.max_vertices,
+         .max_primitive = shader->info.mesh.max_primitives,
+      });
+      P_NVC597_SET_MESH_SHADER_B(p, {
+         .shared_mem_lines = DIV_ROUND_UP(shader->info.mesh.smem_size, 128),
+         .thread_count = shader->info.mesh.local_size,
+      });
+      P_IMMD(p, NV9097, SET_MME_SHADOW_RAM_CONTROL, MODE_METHOD_TRACK_WITH_FILTER);
+
+      if (shader->info.mesh.has_gs_sph) {
+         P_IMMD(p, NV9097, SET_PIPELINE_SHADER(NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY), {
+            .enable  = shader->info.mesh.has_gs_sph,
+            .type    = TYPE_GEOMETRY,
+         });
+
+         uint64_t gs_hdr_addr = shader->gs_hdr_addr;
+         P_MTHD(p, NVC397, SET_PIPELINE_PROGRAM_ADDRESS_A(NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY));
+         P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_A(p, NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY, gs_hdr_addr >> 32);
+         P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_B(p, NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY, gs_hdr_addr);
+         P_IMMD(p, NVC397, SET_GS_MODE, TYPE_ANY);
+      }
+   } else if (shader->info.stage == MESA_SHADER_FRAGMENT) {
      max_dw_count += 13;

      P_MTHD(p, NVC397, SET_SUBTILING_PERF_KNOB_A);
--- a/src/nouveau/vulkan/nvk_shader.h
+++ b/src/nouveau/vulkan/nvk_shader.h
@ -34,7 +34,9 @@ struct vk_shader_module;
   (VK_SHADER_STAGE_VERTEX_BIT | \
    VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | \
    VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | \
-    VK_SHADER_STAGE_GEOMETRY_BIT)
+    VK_SHADER_STAGE_GEOMETRY_BIT | \
+    VK_SHADER_STAGE_TASK_BIT_EXT | \
+    VK_SHADER_STAGE_MESH_BIT_EXT)

 #define NVK_SHADER_STAGE_GRAPHICS_BITS \
   (NVK_SHADER_STAGE_VTGM_BITS | VK_SHADER_STAGE_FRAGMENT_BIT)
@ -161,7 +163,7 @@ nvk_compile_nir_shader(struct nvk_device *dev, nir_shader *nir,
                       const VkAllocationCallbacks *alloc,
                       struct nvk_shader **shader_out);

-uint32_t mesa_to_nv9097_shader_type(mesa_shader_stage stage);
+uint32_t mesa_to_nv9097_shader_type(mesa_shader_stage stage, bool has_task_shader);
 uint32_t nvk_pipeline_bind_group(mesa_shader_stage stage, bool has_task_shader);

 #endif