From 45f04dae7522a17d6dda44104e1ece8e3c3c9155 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 23 Aug 2022 11:42:49 +0200 Subject: [PATCH] radv: move more MS info to gather_shader_info_ms() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the workgroup size computation remains at the same place, but I think it should be computed in a separate helper later. Signed-off-by: Samuel Pitoiset Reviewed-by: Timur Kristóf Part-of: --- src/amd/vulkan/radv_pipeline.c | 55 +------------------------------ src/amd/vulkan/radv_shader_info.c | 47 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 54 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 0ff2ab986fd..bfe81a54c31 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2055,57 +2055,6 @@ gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1)); } -static void -gfx10_get_ngg_ms_info(struct radv_pipeline_stage *stage, struct gfx10_ngg_info *ngg) -{ - /* Special case for mesh shader workgroups. - * - * Mesh shaders don't have any real vertex input, but they can produce - * an arbitrary number of vertices and primitives (up to 256). - * We need to precisely control the number of mesh shader workgroups - * that are launched from draw calls. - * - * To achieve that, we set: - * - input primitive topology to point list - * - input vertex and primitive count to 1 - * - max output vertex count and primitive amplification factor - * to the boundaries of the shader - * - * With that, in the draw call: - * - drawing 1 input vertex ~ launching 1 mesh shader workgroup - * - * In the shader: - * - base vertex ~ first workgroup index (firstTask in NV_mesh_shader) - * - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D) - * - * Notes: - * - without GS_EN=1 PRIM_AMP_FACTOR and MAX_VERTS_PER_SUBGROUP don't seem to work - * - with GS_EN=1 we must also set VGT_GS_MAX_VERT_OUT (otherwise the GPU hangs) - * - with GS_FAST_LAUNCH=1 every lane's VGPRs are initialized to the same input vertex index - * - */ - nir_shader *ms = stage->nir; - - ngg->enable_vertex_grouping = true; - ngg->esgs_ring_size = 1; - ngg->hw_max_esverts = 1; - ngg->max_gsprims = 1; - ngg->max_out_verts = ms->info.mesh.max_vertices_out; - ngg->max_vert_out_per_gs_instance = false; - ngg->ngg_emit_size = 0; - ngg->prim_amp_factor = ms->info.mesh.max_primitives_out; - ngg->vgt_esgs_ring_itemsize = 1; - - unsigned min_ngg_workgroup_size = - ac_compute_ngg_workgroup_size(ngg->hw_max_esverts, ngg->max_gsprims, - ngg->max_out_verts, ngg->prim_amp_factor); - - unsigned api_workgroup_size = - ac_compute_cs_workgroup_size(ms->info.workgroup_size, false, UINT32_MAX); - - stage->info.workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size); -} - static void gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline, struct radv_pipeline_stage *stages, struct gfx10_ngg_info *ngg) @@ -4624,9 +4573,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout else unreachable("Missing NGG shader stage."); - if (*last_vgt_api_stage == MESA_SHADER_MESH) - gfx10_get_ngg_ms_info(&stages[MESA_SHADER_MESH], ngg_info); - else + if (*last_vgt_api_stage != MESA_SHADER_MESH) gfx10_get_ngg_info(pipeline_key, pipeline, stages, ngg_info); } else if (stages[MESA_SHADER_GEOMETRY].nir) { struct gfx9_gs_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info.gs_ring_info; diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 29a8c1f7cec..537d2ce8439 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -414,7 +414,54 @@ gather_shader_info_gs(const nir_shader *nir, struct radv_shader_info *info) static void gather_shader_info_mesh(const nir_shader *nir, struct radv_shader_info *info) { + struct gfx10_ngg_info *ngg_info = &info->ngg_info; + info->ms.output_prim = nir->info.mesh.primitive_type; + + /* Special case for mesh shader workgroups. + * + * Mesh shaders don't have any real vertex input, but they can produce + * an arbitrary number of vertices and primitives (up to 256). + * We need to precisely control the number of mesh shader workgroups + * that are launched from draw calls. + * + * To achieve that, we set: + * - input primitive topology to point list + * - input vertex and primitive count to 1 + * - max output vertex count and primitive amplification factor + * to the boundaries of the shader + * + * With that, in the draw call: + * - drawing 1 input vertex ~ launching 1 mesh shader workgroup + * + * In the shader: + * - base vertex ~ first workgroup index (firstTask in NV_mesh_shader) + * - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D) + * + * Notes: + * - without GS_EN=1 PRIM_AMP_FACTOR and MAX_VERTS_PER_SUBGROUP don't seem to work + * - with GS_EN=1 we must also set VGT_GS_MAX_VERT_OUT (otherwise the GPU hangs) + * - with GS_FAST_LAUNCH=1 every lane's VGPRs are initialized to the same input vertex index + * + */ + ngg_info->enable_vertex_grouping = true; + ngg_info->esgs_ring_size = 1; + ngg_info->hw_max_esverts = 1; + ngg_info->max_gsprims = 1; + ngg_info->max_out_verts = nir->info.mesh.max_vertices_out; + ngg_info->max_vert_out_per_gs_instance = false; + ngg_info->ngg_emit_size = 0; + ngg_info->prim_amp_factor = nir->info.mesh.max_primitives_out; + ngg_info->vgt_esgs_ring_itemsize = 1; + + unsigned min_ngg_workgroup_size = + ac_compute_ngg_workgroup_size(ngg_info->hw_max_esverts, ngg_info->max_gsprims, + ngg_info->max_out_verts, ngg_info->prim_amp_factor); + + unsigned api_workgroup_size = + ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX); + + info->workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size); } static void