diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index dde4392de6b..589e1d98789 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1396,18 +1396,30 @@ radv_pipeline_is_blend_enabled(const VkGraphicsPipelineCreateInfo *pCreateInfo) } static uint64_t -radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreateInfo *pCreateInfo) +radv_pipeline_needed_dynamic_state(const struct radv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) { bool has_color_att = radv_pipeline_has_color_attachments(pCreateInfo); + bool has_static_rasterizer_discard = + pCreateInfo->pRasterizationState->rasterizerDiscardEnable && + !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT); uint64_t states = RADV_DYNAMIC_ALL; + /* Disable dynamic states that are useless to mesh shading. */ + if (radv_pipeline_has_mesh(pipeline)) { + if (has_static_rasterizer_discard) + return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE | RADV_DYNAMIC_PRIMITIVE_TOPOLOGY; + + states &= ~(RADV_DYNAMIC_VERTEX_INPUT | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | + RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE); + } + /* If rasterization is disabled we do not care about any of the * dynamic states, since they are all rasterization related only, * except primitive topology, primitive restart enable, vertex * binding stride and rasterization discard itself. */ - if (pCreateInfo->pRasterizationState->rasterizerDiscardEnable && - !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT)) { + if (has_static_rasterizer_discard) { return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE | RADV_DYNAMIC_VERTEX_INPUT; @@ -1572,7 +1584,7 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct radv_graphics_pipeline_create_info *extra) { - uint64_t needed_states = radv_pipeline_needed_dynamic_state(pCreateInfo); + uint64_t needed_states = radv_pipeline_needed_dynamic_state(pipeline, pCreateInfo); uint64_t states = needed_states; pipeline->dynamic_state = default_dynamic_state; @@ -1640,9 +1652,14 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, } if (states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) { - dynamic->primitive_topology = si_translate_prim(pCreateInfo->pInputAssemblyState->topology); - if (extra && extra->use_rectlist) { - dynamic->primitive_topology = V_008958_DI_PT_RECTLIST; + if (radv_pipeline_has_mesh(pipeline)) { + dynamic->primitive_topology = V_008958_DI_PT_POINTLIST; + } else { + dynamic->primitive_topology = si_translate_prim(pCreateInfo->pInputAssemblyState->topology); + + if (extra && extra->use_rectlist) { + dynamic->primitive_topology = V_008958_DI_PT_RECTLIST; + } } } @@ -2053,6 +2070,57 @@ gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum chip_class chip_class, uin S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1)); } +static void +gfx10_get_ngg_ms_info(nir_shader ** nir, struct radv_shader_info *infos, struct gfx10_ngg_info *ngg) +{ + /* Special case for mesh shader workgroups. + * + * Mesh shaders don't have any real vertex input, but they can produce + * an arbitrary number of vertices and primitives (up to 256). + * We need to precisely control the number of mesh shader workgroups + * that are launched from draw calls. + * + * To achieve that, we set: + * - input primitive topology to point list + * - input vertex and primitive count to 1 + * - max output vertex count and primitive amplification factor + * to the boundaries of the shader + * + * With that, in the draw call: + * - drawing 1 input vertex ~ launching 1 mesh shader workgroup + * + * In the shader: + * - base vertex ~ first workgroup index (firstTask in NV_mesh_shader) + * - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D) + * + * Notes: + * - without GS_EN=1 PRIM_AMP_FACTOR and MAX_VERTS_PER_SUBGROUP don't seem to work + * - with GS_EN=1 we must also set VGT_GS_MAX_VERT_OUT (otherwise the GPU hangs) + * - with GS_FAST_LAUNCH=1 every lane's VGPRs are initialized to the same input vertex index + * + */ + nir_shader *ms = nir[MESA_SHADER_MESH]; + + ngg->enable_vertex_grouping = true; + ngg->esgs_ring_size = 1; + ngg->hw_max_esverts = 1; + ngg->max_gsprims = 1; + ngg->max_out_verts = ms->info.mesh.max_vertices_out; + ngg->max_vert_out_per_gs_instance = false; + ngg->ngg_emit_size = 0; + ngg->prim_amp_factor = ms->info.mesh.max_primitives_out; + ngg->vgt_esgs_ring_itemsize = 1; + + unsigned min_ngg_workgroup_size = + ac_compute_ngg_workgroup_size(ngg->hw_max_esverts, ngg->max_gsprims, + ngg->max_out_verts, ngg->prim_amp_factor); + + unsigned api_workgroup_size = + ac_compute_cs_workgroup_size(ms->info.workgroup_size, false, UINT32_MAX); + + infos[MESA_SHADER_MESH].workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size); +} + static void gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline, nir_shader **nir, struct radv_shader_info *infos, struct gfx10_ngg_info *ngg) @@ -2358,6 +2426,10 @@ get_vs_output_info(const struct radv_pipeline *pipeline) static bool radv_nir_stage_uses_xfb(const nir_shader *nir) { + /* Mesh shaders don't support XFB. */ + if (nir->info.stage == MESA_SHADER_MESH) + return false; + nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL); bool uses_xfb = !!xfb; @@ -2422,6 +2494,9 @@ radv_link_shaders(struct radv_pipeline *pipeline, if (shaders[MESA_SHADER_VERTEX]) { ordered_shaders[shader_count++] = shaders[MESA_SHADER_VERTEX]; } + if (shaders[MESA_SHADER_MESH]) { + ordered_shaders[shader_count++] = shaders[MESA_SHADER_MESH]; + } if (shaders[MESA_SHADER_COMPUTE]) { ordered_shaders[shader_count++] = shaders[MESA_SHADER_COMPUTE]; } @@ -2515,7 +2590,8 @@ radv_link_shaders(struct radv_pipeline *pipeline, info->stage == pipeline->graphics.last_vgt_api_stage && ((info->stage == MESA_SHADER_VERTEX && pipeline_key->vs.topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) || (info->stage == MESA_SHADER_TESS_EVAL && info->tess.point_mode) || - (info->stage == MESA_SHADER_GEOMETRY && info->gs.output_primitive == GL_POINTS)); + (info->stage == MESA_SHADER_GEOMETRY && info->gs.output_primitive == GL_POINTS) || + (info->stage == MESA_SHADER_MESH && info->mesh.primitive_type == GL_POINTS)); nir_variable *psiz_var = nir_find_variable_with_location(ordered_shaders[i], nir_var_shader_out, VARYING_SLOT_PSIZ); @@ -2559,6 +2635,7 @@ radv_link_shaders(struct radv_pipeline *pipeline, } if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL || + ordered_shaders[i]->info.stage == MESA_SHADER_MESH || (ordered_shaders[i]->info.stage == MESA_SHADER_VERTEX && has_geom_tess) || (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) { nir_lower_io_to_vector(ordered_shaders[i], nir_var_shader_out); @@ -2744,7 +2821,7 @@ radv_generate_graphics_pipeline_key(const struct radv_pipeline *pipeline, } } - if (!key.vs.dynamic_input_state) { + if (!key.vs.dynamic_input_state && pCreateInfo->pVertexInputState) { const VkPipelineVertexInputStateCreateInfo *input_state = pCreateInfo->pVertexInputState; const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisor_state = vk_find_struct_const( input_state->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); @@ -2845,7 +2922,7 @@ radv_generate_graphics_pipeline_key(const struct radv_pipeline *pipeline, } if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { - key.vs.topology = pCreateInfo->pInputAssemblyState->topology; + key.vs.topology = pCreateInfo->pInputAssemblyState ? pCreateInfo->pInputAssemblyState->topology : 0; const VkPipelineRasterizationStateCreateInfo *raster_info = pCreateInfo->pRasterizationState; const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *provoking_vtx_info = @@ -2903,7 +2980,9 @@ radv_determine_ngg_settings(struct radv_pipeline *pipeline, { struct radv_device *device = pipeline->device; - if (!nir[MESA_SHADER_GEOMETRY] && pipeline->graphics.last_vgt_api_stage != MESA_SHADER_NONE) { + /* Shader settings for VS or TES without GS. */ + if (pipeline->graphics.last_vgt_api_stage == MESA_SHADER_VERTEX || + pipeline->graphics.last_vgt_api_stage == MESA_SHADER_TESS_EVAL) { uint64_t ps_inputs_read = nir[MESA_SHADER_FRAGMENT] ? nir[MESA_SHADER_FRAGMENT]->info.inputs_read : 0; gl_shader_stage es_stage = pipeline->graphics.last_vgt_api_stage; @@ -2972,8 +3051,10 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, if (pipeline_key->use_ngg) { if (nir[MESA_SHADER_TESS_CTRL]) { infos[MESA_SHADER_TESS_EVAL].is_ngg = true; - } else { + } else if (nir[MESA_SHADER_VERTEX]) { infos[MESA_SHADER_VERTEX].is_ngg = true; + } else if (nir[MESA_SHADER_MESH]) { + infos[MESA_SHADER_MESH].is_ngg = true; } if (nir[MESA_SHADER_TESS_CTRL] && nir[MESA_SHADER_GEOMETRY] && @@ -3571,11 +3652,24 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout modules[i]->sha1); pipeline->active_stages |= mesa_to_vk_shader_stage(i); - if (i < MESA_SHADER_FRAGMENT) + if (i < MESA_SHADER_FRAGMENT || i == MESA_SHADER_MESH) pipeline->graphics.last_vgt_api_stage = i; } } + ASSERTED bool primitive_shading = + modules[MESA_SHADER_VERTEX] || modules[MESA_SHADER_TESS_CTRL] || + modules[MESA_SHADER_TESS_EVAL] || modules[MESA_SHADER_GEOMETRY]; + ASSERTED bool mesh_shading = + modules[MESA_SHADER_MESH]; + + /* Primitive and mesh shading must not be mixed in the same pipeline. */ + assert(!primitive_shading || !mesh_shading); + /* Mesh shaders are mandatory in mesh shading pipelines. */ + assert(mesh_shading == !!modules[MESA_SHADER_MESH]); + /* Mesh shaders always need NGG. */ + assert(!mesh_shading || pipeline_key->use_ngg); + if (custom_hash) memcpy(hash, custom_hash, 20); else { @@ -3647,7 +3741,8 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout radv_fill_shader_info(pipeline, pipeline_layout, pStages, pipeline_key, infos, nir); bool pipeline_has_ngg = (nir[MESA_SHADER_VERTEX] && infos[MESA_SHADER_VERTEX].is_ngg) || - (nir[MESA_SHADER_TESS_EVAL] && infos[MESA_SHADER_TESS_EVAL].is_ngg); + (nir[MESA_SHADER_TESS_EVAL] && infos[MESA_SHADER_TESS_EVAL].is_ngg) || + (nir[MESA_SHADER_MESH] && infos[MESA_SHADER_MESH].is_ngg); if (pipeline_has_ngg) { struct gfx10_ngg_info *ngg_info; @@ -3656,10 +3751,17 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout ngg_info = &infos[MESA_SHADER_GEOMETRY].ngg_info; else if (nir[MESA_SHADER_TESS_CTRL]) ngg_info = &infos[MESA_SHADER_TESS_EVAL].ngg_info; - else + else if (nir[MESA_SHADER_VERTEX]) ngg_info = &infos[MESA_SHADER_VERTEX].ngg_info; + else if (nir[MESA_SHADER_MESH]) + ngg_info = &infos[MESA_SHADER_MESH].ngg_info; + else + unreachable("Missing NGG shader stage."); - gfx10_get_ngg_info(pipeline_key, pipeline, nir, infos, ngg_info); + if (pipeline->graphics.last_vgt_api_stage == MESA_SHADER_MESH) + gfx10_get_ngg_ms_info(nir, infos, ngg_info); + else + gfx10_get_ngg_info(pipeline_key, pipeline, nir, infos, ngg_info); } else if (nir[MESA_SHADER_GEOMETRY]) { struct gfx9_gs_info *gs_info = &infos[MESA_SHADER_GEOMETRY].gs_ring_info; @@ -3949,6 +4051,9 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline, gl_shader_sta } else { return R_00B130_SPI_SHADER_USER_DATA_VS_0; } + case MESA_SHADER_MESH: + assert(has_ngg); + return R_00B230_SPI_SHADER_USER_DATA_GS_0; default: unreachable("unknown shader"); } @@ -4713,10 +4818,9 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf { uint64_t va = radv_shader_get_va(shader); gl_shader_stage es_type = + radv_pipeline_has_mesh(pipeline) ? MESA_SHADER_MESH : radv_pipeline_has_tess(pipeline) ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; - struct radv_shader *es = es_type == MESA_SHADER_TESS_EVAL - ? pipeline->shaders[MESA_SHADER_TESS_EVAL] - : pipeline->shaders[MESA_SHADER_VERTEX]; + struct radv_shader *es = pipeline->shaders[es_type]; const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info; radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); @@ -5113,6 +5217,18 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs, struct rade radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out); } +static void +radv_pipeline_generate_mesh_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, + const struct radv_pipeline *pipeline) +{ + struct radv_shader *ms = pipeline->shaders[MESA_SHADER_MESH]; + if (!ms) + return; + + radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, ms); + radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, ms->info.workgroup_size); +} + static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool explicit, bool float16) { @@ -5352,6 +5468,9 @@ radv_pipeline_generate_vgt_shader_config(struct radeon_cmdbuf *ctx_cs, stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); } else if (radv_pipeline_has_gs(pipeline)) { stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1); + } else if (radv_pipeline_has_mesh(pipeline)) { + assert(!radv_pipeline_has_ngg_passthrough(pipeline)); + stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(1); } else if (radv_pipeline_has_ngg(pipeline)) { stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL); } @@ -5383,6 +5502,8 @@ radv_pipeline_generate_vgt_shader_config(struct radeon_cmdbuf *ctx_cs, vs_size = pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.wave_size; else if (pipeline->shaders[MESA_SHADER_VERTEX]) vs_size = pipeline->shaders[MESA_SHADER_VERTEX]->info.wave_size; + else if (pipeline->shaders[MESA_SHADER_MESH]) + vs_size = gs_size = pipeline->shaders[MESA_SHADER_MESH]->info.wave_size; if (radv_pipeline_has_ngg(pipeline)) { assert(!radv_pipeline_has_gs_copy_shader(pipeline)); @@ -5483,6 +5604,9 @@ radv_pipeline_generate_vgt_gs_out(struct radeon_cmdbuf *ctx_cs, gs_out = si_conv_gl_prim_to_gs_out( pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.primitive_mode); } + } else if (radv_pipeline_has_mesh(pipeline)) { + gs_out = + si_conv_gl_prim_to_gs_out(pipeline->shaders[MESA_SHADER_MESH]->info.ms.output_prim); } else { gs_out = si_conv_prim_to_gs_out(pCreateInfo->pInputAssemblyState->topology); } @@ -5596,6 +5720,7 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline, radv_pipeline_generate_multisample_state(ctx_cs, pipeline); radv_pipeline_generate_vgt_gs_mode(ctx_cs, pipeline); radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline); + radv_pipeline_generate_mesh_shader(ctx_cs, cs, pipeline); if (radv_pipeline_has_tess(pipeline)) { radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline); @@ -5715,16 +5840,21 @@ radv_pipeline_init_shader_stages_state(struct radv_pipeline *pipeline) } } + gl_shader_stage first_stage = + radv_pipeline_has_mesh(pipeline) ? MESA_SHADER_MESH : MESA_SHADER_VERTEX; + struct radv_userdata_info *loc = - radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_BASE_VERTEX_START_INSTANCE); + radv_lookup_user_sgpr(pipeline, first_stage, AC_UD_VS_BASE_VERTEX_START_INSTANCE); if (loc->sgpr_idx != -1) { - pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX]; + pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[first_stage]; pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4; pipeline->graphics.vtx_emit_num = loc->num_sgprs; pipeline->graphics.uses_drawid = - radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_draw_id; + radv_get_shader(pipeline, first_stage)->info.vs.needs_draw_id; pipeline->graphics.uses_baseinstance = - radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_base_instance; + radv_get_shader(pipeline, first_stage)->info.vs.needs_base_instance; + + assert(first_stage != MESA_SHADER_MESH || !pipeline->graphics.uses_baseinstance); } } @@ -5770,7 +5900,8 @@ radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device, pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo); - radv_pipeline_init_input_assembly_state(pipeline, pCreateInfo, extra); + if (!radv_pipeline_has_mesh(pipeline)) + radv_pipeline_init_input_assembly_state(pipeline, pCreateInfo, extra); radv_pipeline_init_dynamic_state(pipeline, pCreateInfo, extra); radv_pipeline_init_raster_state(pipeline, pCreateInfo); radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo); @@ -5825,7 +5956,9 @@ radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device, pCreateInfo->pTessellationState->patchControlPoints; } - radv_pipeline_init_vertex_input_state(pipeline, pCreateInfo, &key); + if (!radv_pipeline_has_mesh(pipeline)) + radv_pipeline_init_vertex_input_state(pipeline, pCreateInfo, &key); + radv_pipeline_init_binning_state(pipeline, pCreateInfo, &blend); radv_pipeline_init_shader_stages_state(pipeline); radv_pipeline_init_scratch(device, pipeline);