radv: rework configuring VGT_SHADER_STAGES_EN

For shader objects. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22991>
2026-04-27 02:20:38 +02:00 · 2023-05-15 13:02:26 +02:00 · 2023-05-15 13:02:26 +02:00 · 55df7b6415
commit 55df7b6415
parent 978d80fbe2
2 changed files with 89 additions and 49 deletions
--- a/src/amd/vulkan/radv_pipeline_graphics.c
+++ b/src/amd/vulkan/radv_pipeline_graphics.c
@ -3523,43 +3523,87 @@ radv_pipeline_emit_vgt_vertex_reuse(const struct radv_device *device, struct rad
                          S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth));
 }

+static struct radv_vgt_shader_key
+radv_pipeline_generate_vgt_shader_key(const struct radv_device *device,
+                                      const struct radv_graphics_pipeline *pipeline)
+{
+   uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
+   struct radv_vgt_shader_key key;
+
+   memset(&key, 0, sizeof(key));
+
+   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL))
+      hs_size = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.wave_size;
+
+   if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) {
+      vs_size = gs_size = pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.wave_size;
+      if (radv_pipeline_has_gs_copy_shader(&pipeline->base))
+         vs_size = pipeline->base.gs_copy_shader->info.wave_size;
+   } else if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL])
+      vs_size = pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.wave_size;
+   else if (pipeline->base.shaders[MESA_SHADER_VERTEX])
+      vs_size = pipeline->base.shaders[MESA_SHADER_VERTEX]->info.wave_size;
+   else if (pipeline->base.shaders[MESA_SHADER_MESH])
+      vs_size = gs_size = pipeline->base.shaders[MESA_SHADER_MESH]->info.wave_size;
+
+   if (radv_pipeline_has_ngg(pipeline)) {
+      assert(!radv_pipeline_has_gs_copy_shader(&pipeline->base));
+      gs_size = vs_size;
+   }
+
+   key.tess = radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL);
+   key.gs = radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY);
+   if (radv_pipeline_has_ngg(pipeline)) {
+      key.ngg = 1;
+      key.ngg_passthrough = radv_pipeline_has_ngg_passthrough(pipeline);
+   }
+   key.streamout = !!pipeline->streamout_shader;
+   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
+      key.mesh = 1;
+      key.mesh_scratch_ring =
+         pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring;
+   }
+
+   key.hs_wave32 = hs_size == 32;
+   key.vs_wave32 = vs_size == 32;
+   key.gs_wave32 = gs_size == 32;
+
+   return key;
+}
+
 static void
-radv_pipeline_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
-                                     const struct radv_graphics_pipeline *pipeline)
+radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
+                            const struct radv_vgt_shader_key *key)
 {
   const struct radv_physical_device *pdevice = device->physical_device;
   uint32_t stages = 0;
-   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
+
+   if (key->tess) {
      stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);

-      if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
+      if (key->gs)
         stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | S_028B54_GS_EN(1);
-      else if (radv_pipeline_has_ngg(pipeline))
+      else if (key->ngg)
         stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS);
      else
         stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
-   } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
+   } else if (key->gs) {
      stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
-   } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
-      assert(!radv_pipeline_has_ngg_passthrough(pipeline));
-      stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(1);
-
-      if (pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring)
-         stages |= S_028B54_NGG_WAVE_ID_EN(1);
-   } else if (radv_pipeline_has_ngg(pipeline)) {
+   } else if (key->mesh) {
+      assert(!key->ngg_passthrough);
+      stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(1) |
+                S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring);
+   } else if (key->ngg) {
      stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL);
   }

-   if (radv_pipeline_has_ngg(pipeline)) {
-      stages |= S_028B54_PRIMGEN_EN(1);
-      if (pipeline->streamout_shader)
-         stages |= S_028B54_NGG_WAVE_ID_EN(1);
-      if (radv_pipeline_has_ngg_passthrough(pipeline)) {
-         stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1);
-         if (pdevice->rad_info.family >= CHIP_NAVI23)
-            stages |= S_028B54_PRIMGEN_PASSTHRU_NO_MSG(1);
-      }
-   } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
+   if (key->ngg) {
+      stages |= S_028B54_PRIMGEN_EN(1) |
+                S_028B54_NGG_WAVE_ID_EN(key->streamout) |
+                S_028B54_PRIMGEN_PASSTHRU_EN(key->ngg_passthrough) |
+                S_028B54_PRIMGEN_PASSTHRU_NO_MSG(key->ngg_passthrough &&
+                                                 pdevice->rad_info.family >= CHIP_NAVI23);
+   } else if (key->gs) {
      stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
   }

@ -3567,31 +3611,11 @@ radv_pipeline_emit_vgt_shader_config(const struct radv_device *device, struct ra
      stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);

   if (pdevice->rad_info.gfx_level >= GFX10) {
-      uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
-
-      if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL))
-         hs_size = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.wave_size;
-
-      if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) {
-         vs_size = gs_size = pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.wave_size;
-         if (radv_pipeline_has_gs_copy_shader(&pipeline->base))
-            vs_size = pipeline->base.gs_copy_shader->info.wave_size;
-      } else if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL])
-         vs_size = pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.wave_size;
-      else if (pipeline->base.shaders[MESA_SHADER_VERTEX])
-         vs_size = pipeline->base.shaders[MESA_SHADER_VERTEX]->info.wave_size;
-      else if (pipeline->base.shaders[MESA_SHADER_MESH])
-         vs_size = gs_size = pipeline->base.shaders[MESA_SHADER_MESH]->info.wave_size;
-
-      if (radv_pipeline_has_ngg(pipeline)) {
-         assert(!radv_pipeline_has_gs_copy_shader(&pipeline->base));
-         gs_size = vs_size;
-      }
-
-      /* legacy GS only supports Wave64 */
-      stages |= S_028B54_HS_W32_EN(hs_size == 32 ? 1 : 0) |
-                S_028B54_GS_W32_EN(gs_size == 32 ? 1 : 0) |
-                S_028B54_VS_W32_EN(vs_size == 32 ? 1 : 0);
+      stages |= S_028B54_HS_W32_EN(key->hs_wave32) |
+                S_028B54_GS_W32_EN(key->gs_wave32) |
+                S_028B54_VS_W32_EN(pdevice->rad_info.gfx_level < GFX11 && key->vs_wave32);
+      /* Legacy GS only supports Wave64. Read it as an implication. */
+      assert(!(key->gs && !key->ngg) || !key->gs_wave32);
   }

   radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, stages);
@ -3711,6 +3735,9 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi
   cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw));
   ctx_cs->buf = cs->buf + cs->max_dw;

+   struct radv_vgt_shader_key vgt_shader_key =
+      radv_pipeline_generate_vgt_shader_key(device, pipeline);
+
   radv_pipeline_emit_blend_state(ctx_cs, pipeline, blend);
   radv_pipeline_emit_vgt_gs_mode(device, ctx_cs, pipeline);

@ -3753,7 +3780,7 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi
   }

   radv_pipeline_emit_vgt_vertex_reuse(device, ctx_cs, pipeline);
-   radv_pipeline_emit_vgt_shader_config(device, ctx_cs, pipeline);
+   radv_emit_vgt_shader_config(device, ctx_cs, &vgt_shader_key);
   radv_pipeline_emit_vgt_gs_out(device, ctx_cs, pipeline, vgt_gs_out_prim_type);

   if (pdevice->rad_info.gfx_level >= GFX10_3) {
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@ -1916,6 +1916,19 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx
                                unsigned data_sel, uint64_t va, uint32_t new_fence,
                                uint64_t gfx9_eop_bug_va);

+struct radv_vgt_shader_key {
+   uint8_t tess : 1;
+   uint8_t gs : 1;
+   uint8_t mesh_scratch_ring : 1;
+   uint8_t mesh : 1;
+   uint8_t ngg_passthrough : 1;
+   uint8_t ngg : 1;       /* gfx10+ */
+   uint8_t streamout : 1; /* only used with NGG */
+   uint8_t hs_wave32 : 1;
+   uint8_t gs_wave32 : 1;
+   uint8_t vs_wave32 : 1;
+};
+
 void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref,
                      uint32_t mask);
 void si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,