ac/nir/ngg: Pass radeon_info to mesh shader lowering.

Same idea as the VS/TES and GS lowering: Make shader compilation decisions based on the features of the current GPU instead of ad-hoc deciding according to GFX level. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33218>
2026-05-09 02:28:10 +02:00 · 2025-01-27 12:47:52 +01:00 · 2025-01-27 12:47:52 +01:00 · f7305f776e
commit f7305f776e
parent b8204c8df9
3 changed files with 33 additions and 32 deletions
--- a/src/amd/common/nir/ac_nir.h
+++ b/src/amd/common/nir/ac_nir.h
@ -183,16 +183,16 @@ ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options)

 void
 ac_nir_lower_ngg_mesh(nir_shader *shader,
-                    enum amd_gfx_level gfx_level,
-                    uint32_t clipdist_enable_mask,
-                    const uint8_t *vs_output_param_offset,
-                    bool has_param_exports,
-                    bool *out_needs_scratch_ring,
-                    unsigned wave_size,
-                    unsigned workgroup_size,
-                    bool multiview,
-                    bool has_query,
-                    bool fast_launch_2);
+                      const struct radeon_info *hw_info,
+                      uint32_t clipdist_enable_mask,
+                      const uint8_t *vs_output_param_offset,
+                      bool has_param_exports,
+                      bool *out_needs_scratch_ring,
+                      unsigned wave_size,
+                      unsigned workgroup_size,
+                      bool multiview,
+                      bool has_query,
+                      bool fast_launch_2);

 void
 ac_nir_lower_task_outputs_to_mem(nir_shader *shader,
--- a/src/amd/common/nir/ac_nir_lower_ngg_mesh.c
+++ b/src/amd/common/nir/ac_nir_lower_ngg_mesh.c
@ -6,6 +6,7 @@

 #include "ac_nir.h"
 #include "ac_nir_helpers.h"
+#include "ac_gpu_info.h"

 #include "nir_builder.h"

@ -89,7 +90,7 @@ typedef struct

 typedef struct
 {
-   enum amd_gfx_level gfx_level;
+   const struct radeon_info *hw_info;
   bool fast_launch_2;
   bool vert_multirow_export;
   bool prim_multirow_export;
@ -799,7 +800,7 @@ ms_prim_exp_arg_ch1(nir_builder *b, nir_def *invocation_index, nir_def *num_vtx,
      indices[i] = nir_umin(b, indices[i], max_vtx_idx);
   }

-   return ac_nir_pack_ngg_prim_exp_arg(b, s->vertices_per_prim, indices, cull_flag, s->gfx_level);
+   return ac_nir_pack_ngg_prim_exp_arg(b, s->vertices_per_prim, indices, cull_flag, s->hw_info->gfx_level);
 }

 static nir_def *
@ -826,7 +827,7 @@ ms_prim_exp_arg_ch2(nir_builder *b, uint64_t outputs_mask, lower_ngg_ms_state *s

      if (outputs_mask & VARYING_BIT_LAYER) {
         nir_def *layer =
-            nir_ishl_imm(b, s->out.outputs[VARYING_SLOT_LAYER][0], s->gfx_level >= GFX11 ? 0 : 17);
+            nir_ishl_imm(b, s->out.outputs[VARYING_SLOT_LAYER][0], s->hw_info->gfx_level >= GFX11 ? 0 : 17);
         prim_exp_arg_ch2 = nir_ior(b, prim_exp_arg_ch2, layer);
      }

@ -890,7 +891,7 @@ emit_ms_vertex(nir_builder *b, nir_def *index, nir_def *row, bool exports, bool
   ms_emit_arrayed_outputs(b, index, per_vertex_outputs, s);

   if (exports) {
-      ac_nir_export_position(b, s->gfx_level, s->clipdist_enable_mask,
+      ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask,
                             !s->has_param_exports, false, true,
                             s->per_vertex_outputs | VARYING_BIT_POS, &s->out, row);
   }
@ -899,12 +900,12 @@ emit_ms_vertex(nir_builder *b, nir_def *index, nir_def *row, bool exports, bool
      /* Export generic attributes on GFX10.3
       * (On GFX11 they are already stored in the attribute ring.)
       */
-      if (s->has_param_exports && s->gfx_level == GFX10_3) {
+      if (s->has_param_exports && s->hw_info->gfx_level == GFX10_3) {
         ac_nir_export_parameters(b, s->vs_output_param_offset, per_vertex_outputs, 0, &s->out);
      }

      /* GFX11+: also store special outputs to the attribute ring so PS can load them. */
-      if (s->gfx_level >= GFX11 && (per_vertex_outputs & MS_VERT_ARG_EXP_MASK))
+      if (s->hw_info->gfx_level >= GFX11 && (per_vertex_outputs & MS_VERT_ARG_EXP_MASK))
         ms_emit_attribute_ring_output_stores(b, per_vertex_outputs & MS_VERT_ARG_EXP_MASK, index, s);
   }
 }
@ -937,12 +938,12 @@ emit_ms_primitive(nir_builder *b, nir_def *index, nir_def *row, bool exports, bo
      /* Export generic attributes on GFX10.3
       * (On GFX11 they are already stored in the attribute ring.)
       */
-      if (s->has_param_exports && s->gfx_level == GFX10_3) {
+      if (s->has_param_exports && s->hw_info->gfx_level == GFX10_3) {
         ac_nir_export_parameters(b, s->vs_output_param_offset, per_primitive_outputs, 0, &s->out);
      }

      /* GFX11+: also store special outputs to the attribute ring so PS can load them. */
-      if (s->gfx_level >= GFX11)
+      if (s->hw_info->gfx_level >= GFX11)
         ms_emit_attribute_ring_output_stores(b, per_primitive_outputs & MS_PRIM_ARG_EXP_MASK, index, s);
   }
 }
@ -1045,7 +1046,7 @@ emit_ms_finale(nir_builder *b, lower_ngg_ms_state *s)
      (per_vertex_outputs & MS_VERT_ARG_EXP_MASK) ||
      (per_primitive_outputs & MS_PRIM_ARG_EXP_MASK);

-   const bool wait_attr_ring = must_wait_attr_ring(s->gfx_level, has_special_param_exports);
+   const bool wait_attr_ring = must_wait_attr_ring(s->hw_info->gfx_level, has_special_param_exports);

   /* Export vertices. */
   if ((per_vertex_outputs & ~VARYING_BIT_POS) || !wait_attr_ring) {
@ -1350,16 +1351,16 @@ ms_calculate_output_layout(enum amd_gfx_level gfx_level, unsigned api_shared_siz

 void
 ac_nir_lower_ngg_mesh(nir_shader *shader,
-                    enum amd_gfx_level gfx_level,
-                    uint32_t clipdist_enable_mask,
-                    const uint8_t *vs_output_param_offset,
-                    bool has_param_exports,
-                    bool *out_needs_scratch_ring,
-                    unsigned wave_size,
-                    unsigned hw_workgroup_size,
-                    bool multiview,
-                    bool has_query,
-                    bool fast_launch_2)
+                      const struct radeon_info *hw_info,
+                      uint32_t clipdist_enable_mask,
+                      const uint8_t *vs_output_param_offset,
+                      bool has_param_exports,
+                      bool *out_needs_scratch_ring,
+                      unsigned wave_size,
+                      unsigned hw_workgroup_size,
+                      bool multiview,
+                      bool has_query,
+                      bool fast_launch_2)
 {
   unsigned vertices_per_prim =
      mesa_vertices_per_prim(shader->info.mesh.primitive_type);
@ -1379,7 +1380,7 @@ ac_nir_lower_ngg_mesh(nir_shader *shader,
   unsigned max_primitives = shader->info.mesh.max_primitives_out;

   ms_out_mem_layout layout = ms_calculate_output_layout(
-      gfx_level, shader->info.shared_size, per_vertex_outputs, per_primitive_outputs,
+      hw_info->gfx_level, shader->info.shared_size, per_vertex_outputs, per_primitive_outputs,
      cross_invocation_access, max_vertices, max_primitives, vertices_per_prim);

   shader->info.shared_size = layout.lds.total_size;
@ -1406,7 +1407,7 @@ ac_nir_lower_ngg_mesh(nir_shader *shader,
      .hw_workgroup_size = hw_workgroup_size,
      .insert_layer_output = multiview && !(shader->info.outputs_written & VARYING_BIT_LAYER),
      .uses_cull_flags = uses_cull,
-      .gfx_level = gfx_level,
+      .hw_info = hw_info,
      .fast_launch_2 = fast_launch_2,
      .vert_multirow_export = fast_launch_2 && max_vertices > hw_workgroup_size,
      .prim_multirow_export = fast_launch_2 && max_primitives > hw_workgroup_size,
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@ -802,7 +802,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
      unsigned hw_workgroup_size = ALIGN(info->workgroup_size, info->wave_size);

      bool scratch_ring = false;
-      NIR_PASS_V(nir, ac_nir_lower_ngg_mesh, pdev->info.gfx_level, options.clip_cull_dist_mask,
+      NIR_PASS_V(nir, ac_nir_lower_ngg_mesh, &pdev->info, options.clip_cull_dist_mask,
                 options.vs_output_param_offset, options.has_param_exports, &scratch_ring, info->wave_size,
                 hw_workgroup_size, gfx_state->has_multiview_view_index, info->ms.has_query, pdev->mesh_fast_launch_2);
      ngg_stage->info.ms.needs_ms_scratch_ring = scratch_ring;