diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index fca0bbae671..f9aec38a810 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -1893,6 +1893,15 @@ nvk_cmd_bind_graphics_shader(struct nvk_cmd_buffer *cmd, if (cmd->state.gfx.shaders[stage] == shader) return; + /* IA state changes depending on whether a mesh shader is bound (see + * nvk_flush_ia_state) */ + if (stage == MESA_SHADER_MESH && + (cmd->state.gfx.shaders[stage] == NULL) != (shader == NULL)) { + struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY); + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE); + } + cmd->state.gfx.shaders[stage] = shader; cmd->state.gfx.shaders_dirty |= mesa_to_vk_shader_stage(stage); } @@ -2227,7 +2236,7 @@ nvk_cmd_flush_gfx_shaders(struct nvk_cmd_buffer *cmd) u_foreach_bit(s, cmd->state.gfx.shaders_dirty & NVK_SHADER_STAGE_GRAPHICS_BITS) { mesa_shader_stage stage = vk_to_mesa_shader_stage(1 << s); - uint32_t type = mesa_to_nv9097_shader_type(stage); + uint32_t type = mesa_to_nv9097_shader_type(stage, has_task_shader); types_dirty |= BITFIELD_BIT(type); /* Only copy non-NULL shaders because mesh/task alias with vertex and @@ -2478,16 +2487,26 @@ nvk_flush_ia_state(struct nvk_cmd_buffer *cmd) const struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; + /* Mesh shaders are affected by IA state: + * - SET_PRIMITIVE_TOPOLOGY takes precedence over SET_MESH_SHADER_A topology. + * - SET_DA_PRIMITIVE_RESTART affects mesh shaders. + * + * So in case we have mesh shader enabled, we disable primitive restart and + * force point list like what the proprietary driver does. + */ + const bool has_mesh_shader = cmd->state.gfx.shaders[MESA_SHADER_MESH]; if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) { + uint8_t topology = has_mesh_shader ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST + : dyn->ia.primitive_topology; struct nv_push *p = nvk_cmd_buffer_push(cmd, 2); P_MTHD(p, NV9097, SET_PRIMITIVE_TOPOLOGY); - P_INLINE_DATA(p, vk_to_nv9097_primitive_topology(dyn->ia.primitive_topology)); + P_INLINE_DATA(p, vk_to_nv9097_primitive_topology(topology)); } if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) { struct nv_push *p = nvk_cmd_buffer_push(cmd, 2); P_IMMD(p, NV9097, SET_DA_PRIMITIVE_RESTART, - dyn->ia.primitive_restart_enable); + dyn->ia.primitive_restart_enable && !has_mesh_shader); } } diff --git a/src/nouveau/vulkan/nvk_cmd_indirect.c b/src/nouveau/vulkan/nvk_cmd_indirect.c index cac8b0d4d3c..a059eda5c37 100644 --- a/src/nouveau/vulkan/nvk_cmd_indirect.c +++ b/src/nouveau/vulkan/nvk_cmd_indirect.c @@ -500,7 +500,8 @@ build_gfx_set_exec(nir_builder *b, struct nvk_nir_push *p, nir_def *token_addr, if (stage != MESA_SHADER_FRAGMENT) last_vtgm = stage; - uint32_t type = mesa_to_nv9097_shader_type(stage); + uint32_t type = mesa_to_nv9097_shader_type( + stage, token->shaderStages & VK_SHADER_STAGE_TASK_BIT_EXT); type_stage[type] = stage; type_shader_idx[type] = load_global_dw(b, token_addr, i++); } @@ -1150,7 +1151,8 @@ nvk_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, uint8_t set_types = 0; u_foreach_bit(s, layout->set_stages) { mesa_shader_stage stage = vk_to_mesa_shader_stage(1 << s); - uint32_t type = mesa_to_nv9097_shader_type(stage); + uint32_t type = mesa_to_nv9097_shader_type( + stage, layout->set_stages & VK_SHADER_STAGE_TASK_BIT_EXT); set_types |= BITFIELD_BIT(type); } diff --git a/src/nouveau/vulkan/nvk_indirect_execution_set.c b/src/nouveau/vulkan/nvk_indirect_execution_set.c index 8d3455e4a26..155ed8a98f7 100644 --- a/src/nouveau/vulkan/nvk_indirect_execution_set.c +++ b/src/nouveau/vulkan/nvk_indirect_execution_set.c @@ -128,7 +128,8 @@ nvk_ies_set_gfx_pipeline(struct nvk_device *dev, if (stage != MESA_SHADER_FRAGMENT) last_vtgm = stage; - uint32_t type = mesa_to_nv9097_shader_type(stage); + uint32_t type = mesa_to_nv9097_shader_type( + stage, pipeline->stages & VK_SHADER_STAGE_TASK_BIT_EXT); type_shader[type] = shader; } diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index 0aff769d729..4053b8c634c 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -32,6 +32,7 @@ #include "nv_push_cl9097.h" #include "nv_push_clb197.h" #include "nv_push_clc397.h" +#include "nv_push_clc597.h" #include "nv_push_clc797.h" const struct nak_constant_offset_info nak_const_offsets_base = { @@ -686,14 +687,19 @@ nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader) } uint32_t -mesa_to_nv9097_shader_type(mesa_shader_stage stage) +mesa_to_nv9097_shader_type(mesa_shader_stage stage, bool has_task_shader) { + if (stage == MESA_SHADER_MESH && !has_task_shader) + stage = MESA_SHADER_TASK; + static const uint32_t mesa_to_nv9097[] = { [MESA_SHADER_VERTEX] = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX, [MESA_SHADER_TESS_CTRL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION_INIT, [MESA_SHADER_TESS_EVAL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION, [MESA_SHADER_GEOMETRY] = NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY, [MESA_SHADER_FRAGMENT] = NV9097_SET_PIPELINE_SHADER_TYPE_PIXEL, + [MESA_SHADER_TASK] = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX, + [MESA_SHADER_MESH] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION, }; assert(stage < ARRAY_SIZE(mesa_to_nv9097)); return mesa_to_nv9097[stage]; @@ -721,12 +727,22 @@ nvk_max_shader_push_dw(const struct nvk_physical_device *pdev, uint16_t max_dw_count = 9; + if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TASK || + stage == MESA_SHADER_MESH) + max_dw_count += 2; + if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL) max_dw_count += 2; if (stage == MESA_SHADER_FRAGMENT) max_dw_count += 13; + if (stage == MESA_SHADER_TASK) + max_dw_count += 2; + + if (stage == MESA_SHADER_MESH) + max_dw_count += 15; + if (last_vtgm) { max_dw_count += 8; max_dw_count += 4 * (5 + (128 / 4)); @@ -750,7 +766,8 @@ nvk_shader_fill_push(struct nvk_device *dev, bool has_task_shader = shader->info.stage == MESA_SHADER_MESH && shader->info.mesh.has_task_shader; - const uint32_t type = mesa_to_nv9097_shader_type(shader->info.stage); + const uint32_t type = + mesa_to_nv9097_shader_type(shader->info.stage, has_task_shader); /* We always map index == type */ const uint32_t idx = type; @@ -800,7 +817,74 @@ nvk_shader_fill_push(struct nvk_device *dev, shader->info.ts.point_mode)); } - if (shader->info.stage == MESA_SHADER_FRAGMENT) { + bool could_be_first_stage = shader->info.stage == MESA_SHADER_VERTEX || + shader->info.stage == MESA_SHADER_TASK || + shader->info.stage == MESA_SHADER_MESH; + bool is_first_stage = + could_be_first_stage && (shader->info.stage != MESA_SHADER_MESH || + !shader->info.mesh.has_task_shader); + + if (could_be_first_stage) { + max_dw_count += 2; + + if (pdev->info.cls_eng3d >= TURING_A && is_first_stage) + P_IMMD(p, NVC597, SET_MESH_CONTROL, + shader->info.stage != MESA_SHADER_VERTEX); + } + + if (shader->info.stage == MESA_SHADER_TASK) { + max_dw_count += 2; + uint16_t smem_lines = DIV_ROUND_UP(shader->info.task.smem_size, 128); + uint16_t task_smem_lines = DIV_ROUND_UP(shader->info.task.payload_smem_size, 128); + + /* Task payload should be part of shared memory */ + assert(task_smem_lines <= smem_lines); + + P_IMMD(p, NVC597, SET_MESH_INIT_SHADER, { + .thread_count = shader->info.task.local_size, + .local_buffer_lines = smem_lines, + .output_to_m_s_lines = task_smem_lines, + }); + } else if (shader->info.stage == MESA_SHADER_MESH) { + max_dw_count += 15; + + assert(shader->info.mesh.max_vertices != 0); + assert(shader->info.mesh.max_primitives != 0); + + /* On Turing only, if a task+mesh pipeline was previously bound and we + * bind a mesh only pipeline after it, the hardware will misbehave in + * TRACK_WITH_FILTER mode and assume that the vertex stage has a task + * shader instead. + * + * NVIDIA proprietary driver apply this workaround on all generations so + * we also do the same here just in case. + */ + P_IMMD(p, NV9097, SET_MME_SHADOW_RAM_CONTROL, MODE_METHOD_TRACK); + P_MTHD(p, NVC597, SET_MESH_SHADER_A); + P_NVC597_SET_MESH_SHADER_A(p, { + .output_topology = shader->info.mesh.topology, + .max_vertex = shader->info.mesh.max_vertices, + .max_primitive = shader->info.mesh.max_primitives, + }); + P_NVC597_SET_MESH_SHADER_B(p, { + .shared_mem_lines = DIV_ROUND_UP(shader->info.mesh.smem_size, 128), + .thread_count = shader->info.mesh.local_size, + }); + P_IMMD(p, NV9097, SET_MME_SHADOW_RAM_CONTROL, MODE_METHOD_TRACK_WITH_FILTER); + + if (shader->info.mesh.has_gs_sph) { + P_IMMD(p, NV9097, SET_PIPELINE_SHADER(NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY), { + .enable = shader->info.mesh.has_gs_sph, + .type = TYPE_GEOMETRY, + }); + + uint64_t gs_hdr_addr = shader->gs_hdr_addr; + P_MTHD(p, NVC397, SET_PIPELINE_PROGRAM_ADDRESS_A(NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY)); + P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_A(p, NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY, gs_hdr_addr >> 32); + P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_B(p, NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY, gs_hdr_addr); + P_IMMD(p, NVC397, SET_GS_MODE, TYPE_ANY); + } + } else if (shader->info.stage == MESA_SHADER_FRAGMENT) { max_dw_count += 13; P_MTHD(p, NVC397, SET_SUBTILING_PERF_KNOB_A); diff --git a/src/nouveau/vulkan/nvk_shader.h b/src/nouveau/vulkan/nvk_shader.h index 82de2bd2cff..be9aebec05c 100644 --- a/src/nouveau/vulkan/nvk_shader.h +++ b/src/nouveau/vulkan/nvk_shader.h @@ -34,7 +34,9 @@ struct vk_shader_module; (VK_SHADER_STAGE_VERTEX_BIT | \ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | \ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | \ - VK_SHADER_STAGE_GEOMETRY_BIT) + VK_SHADER_STAGE_GEOMETRY_BIT | \ + VK_SHADER_STAGE_TASK_BIT_EXT | \ + VK_SHADER_STAGE_MESH_BIT_EXT) #define NVK_SHADER_STAGE_GRAPHICS_BITS \ (NVK_SHADER_STAGE_VTGM_BITS | VK_SHADER_STAGE_FRAGMENT_BIT) @@ -161,7 +163,7 @@ nvk_compile_nir_shader(struct nvk_device *dev, nir_shader *nir, const VkAllocationCallbacks *alloc, struct nvk_shader **shader_out); -uint32_t mesa_to_nv9097_shader_type(mesa_shader_stage stage); +uint32_t mesa_to_nv9097_shader_type(mesa_shader_stage stage, bool has_task_shader); uint32_t nvk_pipeline_bind_group(mesa_shader_stage stage, bool has_task_shader); #endif