nvk: Add support for mesh and task shader binding

Signed-off-by: Mary Guillemard <mary@mary.zone>
Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Tested-by: Thomas H.P. Andersen <phomes@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27196>
This commit is contained in:
Mary Guillemard 2026-02-05 09:21:53 +01:00 committed by Marge Bot
parent 3286990481
commit 96ade67e2b
5 changed files with 119 additions and 11 deletions

View file

@ -1893,6 +1893,15 @@ nvk_cmd_bind_graphics_shader(struct nvk_cmd_buffer *cmd,
if (cmd->state.gfx.shaders[stage] == shader)
return;
/* IA state changes depending on whether a mesh shader is bound (see
* nvk_flush_ia_state) */
if (stage == MESA_SHADER_MESH &&
(cmd->state.gfx.shaders[stage] == NULL) != (shader == NULL)) {
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY);
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE);
}
cmd->state.gfx.shaders[stage] = shader;
cmd->state.gfx.shaders_dirty |= mesa_to_vk_shader_stage(stage);
}
@ -2227,7 +2236,7 @@ nvk_cmd_flush_gfx_shaders(struct nvk_cmd_buffer *cmd)
u_foreach_bit(s, cmd->state.gfx.shaders_dirty &
NVK_SHADER_STAGE_GRAPHICS_BITS) {
mesa_shader_stage stage = vk_to_mesa_shader_stage(1 << s);
uint32_t type = mesa_to_nv9097_shader_type(stage);
uint32_t type = mesa_to_nv9097_shader_type(stage, has_task_shader);
types_dirty |= BITFIELD_BIT(type);
/* Only copy non-NULL shaders because mesh/task alias with vertex and
@ -2478,16 +2487,26 @@ nvk_flush_ia_state(struct nvk_cmd_buffer *cmd)
const struct vk_dynamic_graphics_state *dyn =
&cmd->vk.dynamic_graphics_state;
/* Mesh shaders are affected by IA state:
* - SET_PRIMITIVE_TOPOLOGY takes precedence over SET_MESH_SHADER_A topology.
* - SET_DA_PRIMITIVE_RESTART affects mesh shaders.
*
* So in case we have mesh shader enabled, we disable primitive restart and
* force point list like what the proprietary driver does.
*/
const bool has_mesh_shader = cmd->state.gfx.shaders[MESA_SHADER_MESH];
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) {
uint8_t topology = has_mesh_shader ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST
: dyn->ia.primitive_topology;
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
P_MTHD(p, NV9097, SET_PRIMITIVE_TOPOLOGY);
P_INLINE_DATA(p, vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology));
P_INLINE_DATA(p, vk_to_nv9097_primitive_topology(topology));
}
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
P_IMMD(p, NV9097, SET_DA_PRIMITIVE_RESTART,
dyn->ia.primitive_restart_enable);
dyn->ia.primitive_restart_enable && !has_mesh_shader);
}
}

View file

@ -500,7 +500,8 @@ build_gfx_set_exec(nir_builder *b, struct nvk_nir_push *p, nir_def *token_addr,
if (stage != MESA_SHADER_FRAGMENT)
last_vtgm = stage;
uint32_t type = mesa_to_nv9097_shader_type(stage);
uint32_t type = mesa_to_nv9097_shader_type(
stage, token->shaderStages & VK_SHADER_STAGE_TASK_BIT_EXT);
type_stage[type] = stage;
type_shader_idx[type] = load_global_dw(b, token_addr, i++);
}
@ -1150,7 +1151,8 @@ nvk_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer,
uint8_t set_types = 0;
u_foreach_bit(s, layout->set_stages) {
mesa_shader_stage stage = vk_to_mesa_shader_stage(1 << s);
uint32_t type = mesa_to_nv9097_shader_type(stage);
uint32_t type = mesa_to_nv9097_shader_type(
stage, layout->set_stages & VK_SHADER_STAGE_TASK_BIT_EXT);
set_types |= BITFIELD_BIT(type);
}

View file

@ -128,7 +128,8 @@ nvk_ies_set_gfx_pipeline(struct nvk_device *dev,
if (stage != MESA_SHADER_FRAGMENT)
last_vtgm = stage;
uint32_t type = mesa_to_nv9097_shader_type(stage);
uint32_t type = mesa_to_nv9097_shader_type(
stage, pipeline->stages & VK_SHADER_STAGE_TASK_BIT_EXT);
type_shader[type] = shader;
}

View file

@ -32,6 +32,7 @@
#include "nv_push_cl9097.h"
#include "nv_push_clb197.h"
#include "nv_push_clc397.h"
#include "nv_push_clc597.h"
#include "nv_push_clc797.h"
const struct nak_constant_offset_info nak_const_offsets_base = {
@ -686,14 +687,19 @@ nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader)
}
uint32_t
mesa_to_nv9097_shader_type(mesa_shader_stage stage)
mesa_to_nv9097_shader_type(mesa_shader_stage stage, bool has_task_shader)
{
if (stage == MESA_SHADER_MESH && !has_task_shader)
stage = MESA_SHADER_TASK;
static const uint32_t mesa_to_nv9097[] = {
[MESA_SHADER_VERTEX] = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX,
[MESA_SHADER_TESS_CTRL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION_INIT,
[MESA_SHADER_TESS_EVAL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION,
[MESA_SHADER_GEOMETRY] = NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY,
[MESA_SHADER_FRAGMENT] = NV9097_SET_PIPELINE_SHADER_TYPE_PIXEL,
[MESA_SHADER_TASK] = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX,
[MESA_SHADER_MESH] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION,
};
assert(stage < ARRAY_SIZE(mesa_to_nv9097));
return mesa_to_nv9097[stage];
@ -721,12 +727,22 @@ nvk_max_shader_push_dw(const struct nvk_physical_device *pdev,
uint16_t max_dw_count = 9;
if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TASK ||
stage == MESA_SHADER_MESH)
max_dw_count += 2;
if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL)
max_dw_count += 2;
if (stage == MESA_SHADER_FRAGMENT)
max_dw_count += 13;
if (stage == MESA_SHADER_TASK)
max_dw_count += 2;
if (stage == MESA_SHADER_MESH)
max_dw_count += 15;
if (last_vtgm) {
max_dw_count += 8;
max_dw_count += 4 * (5 + (128 / 4));
@ -750,7 +766,8 @@ nvk_shader_fill_push(struct nvk_device *dev,
bool has_task_shader = shader->info.stage == MESA_SHADER_MESH &&
shader->info.mesh.has_task_shader;
const uint32_t type = mesa_to_nv9097_shader_type(shader->info.stage);
const uint32_t type =
mesa_to_nv9097_shader_type(shader->info.stage, has_task_shader);
/* We always map index == type */
const uint32_t idx = type;
@ -800,7 +817,74 @@ nvk_shader_fill_push(struct nvk_device *dev,
shader->info.ts.point_mode));
}
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
bool could_be_first_stage = shader->info.stage == MESA_SHADER_VERTEX ||
shader->info.stage == MESA_SHADER_TASK ||
shader->info.stage == MESA_SHADER_MESH;
bool is_first_stage =
could_be_first_stage && (shader->info.stage != MESA_SHADER_MESH ||
!shader->info.mesh.has_task_shader);
if (could_be_first_stage) {
max_dw_count += 2;
if (pdev->info.cls_eng3d >= TURING_A && is_first_stage)
P_IMMD(p, NVC597, SET_MESH_CONTROL,
shader->info.stage != MESA_SHADER_VERTEX);
}
if (shader->info.stage == MESA_SHADER_TASK) {
max_dw_count += 2;
uint16_t smem_lines = DIV_ROUND_UP(shader->info.task.smem_size, 128);
uint16_t task_smem_lines = DIV_ROUND_UP(shader->info.task.payload_smem_size, 128);
/* Task payload should be part of shared memory */
assert(task_smem_lines <= smem_lines);
P_IMMD(p, NVC597, SET_MESH_INIT_SHADER, {
.thread_count = shader->info.task.local_size,
.local_buffer_lines = smem_lines,
.output_to_m_s_lines = task_smem_lines,
});
} else if (shader->info.stage == MESA_SHADER_MESH) {
max_dw_count += 15;
assert(shader->info.mesh.max_vertices != 0);
assert(shader->info.mesh.max_primitives != 0);
/* On Turing only, if a task+mesh pipeline was previously bound and we
* bind a mesh only pipeline after it, the hardware will misbehave in
* TRACK_WITH_FILTER mode and assume that the vertex stage has a task
* shader instead.
*
* NVIDIA proprietary driver apply this workaround on all generations so
* we also do the same here just in case.
*/
P_IMMD(p, NV9097, SET_MME_SHADOW_RAM_CONTROL, MODE_METHOD_TRACK);
P_MTHD(p, NVC597, SET_MESH_SHADER_A);
P_NVC597_SET_MESH_SHADER_A(p, {
.output_topology = shader->info.mesh.topology,
.max_vertex = shader->info.mesh.max_vertices,
.max_primitive = shader->info.mesh.max_primitives,
});
P_NVC597_SET_MESH_SHADER_B(p, {
.shared_mem_lines = DIV_ROUND_UP(shader->info.mesh.smem_size, 128),
.thread_count = shader->info.mesh.local_size,
});
P_IMMD(p, NV9097, SET_MME_SHADOW_RAM_CONTROL, MODE_METHOD_TRACK_WITH_FILTER);
if (shader->info.mesh.has_gs_sph) {
P_IMMD(p, NV9097, SET_PIPELINE_SHADER(NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY), {
.enable = shader->info.mesh.has_gs_sph,
.type = TYPE_GEOMETRY,
});
uint64_t gs_hdr_addr = shader->gs_hdr_addr;
P_MTHD(p, NVC397, SET_PIPELINE_PROGRAM_ADDRESS_A(NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY));
P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_A(p, NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY, gs_hdr_addr >> 32);
P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_B(p, NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY, gs_hdr_addr);
P_IMMD(p, NVC397, SET_GS_MODE, TYPE_ANY);
}
} else if (shader->info.stage == MESA_SHADER_FRAGMENT) {
max_dw_count += 13;
P_MTHD(p, NVC397, SET_SUBTILING_PERF_KNOB_A);

View file

@ -34,7 +34,9 @@ struct vk_shader_module;
(VK_SHADER_STAGE_VERTEX_BIT | \
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | \
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | \
VK_SHADER_STAGE_GEOMETRY_BIT)
VK_SHADER_STAGE_GEOMETRY_BIT | \
VK_SHADER_STAGE_TASK_BIT_EXT | \
VK_SHADER_STAGE_MESH_BIT_EXT)
#define NVK_SHADER_STAGE_GRAPHICS_BITS \
(NVK_SHADER_STAGE_VTGM_BITS | VK_SHADER_STAGE_FRAGMENT_BIT)
@ -161,7 +163,7 @@ nvk_compile_nir_shader(struct nvk_device *dev, nir_shader *nir,
const VkAllocationCallbacks *alloc,
struct nvk_shader **shader_out);
uint32_t mesa_to_nv9097_shader_type(mesa_shader_stage stage);
uint32_t mesa_to_nv9097_shader_type(mesa_shader_stage stage, bool has_task_shader);
uint32_t nvk_pipeline_bind_group(mesa_shader_stage stage, bool has_task_shader);
#endif