radv: move load_grid_size_from_user_sgpr to radv_physical_device

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41022>
2026-05-07 11:28:05 +02:00 · 2026-04-17 11:05:09 +01:00 · 2026-04-17 11:05:09 +01:00 · 5f3b73b2f0
commit 5f3b73b2f0
parent 48645f21b5
6 changed files with 16 additions and 13 deletions
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@ -7616,7 +7616,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2

   if (dst_flags & (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT)) {
      /* SMEM loads are used to read compute dispatch size in shaders */
-      if ((dst_flags & VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT) && !device->load_grid_size_from_user_sgpr) {
+      if ((dst_flags & VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT) && !pdev->load_grid_size_from_user_sgpr) {
         flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
      }

@ -14022,7 +14022,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
      if (grid_size_offset) {
         radeon_begin(cs);

-         if (device->load_grid_size_from_user_sgpr) {
+         if (pdev->load_grid_size_from_user_sgpr) {
            assert(pdev->info.gfx_level >= GFX10_3);

            radeon_emit(PKT3(PKT3_LOAD_SH_REG_INDEX, 3, 0));
@ -14128,7 +14128,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
      }

      if (grid_size_offset) {
-         if (device->load_grid_size_from_user_sgpr) {
+         if (pdev->load_grid_size_from_user_sgpr) {
            radeon_begin(cs);
            radeon_set_sh_reg_seq(grid_size_offset, 3);
            radeon_emit(blocks[0]);
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@ -1197,7 +1197,7 @@ radv_device_init_compiler_info(struct radv_device *device)
      /* Shader features */
      .device_robustness_state = &device->vk.robustness_state,
      .use_ngg = pdev->use_ngg,
-      .load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr,
+      .load_grid_size_from_user_sgpr = pdev->load_grid_size_from_user_sgpr,
      .emulate_ngg_gs_query_pipeline_stat = pdev->emulate_ngg_gs_query_pipeline_stat,
      .primitives_generated_query = device->cache_key.primitives_generated_query,
      .mesh_shader_queries = device->cache_key.mesh_shader_queries,
@ -1379,9 +1379,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
      fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n", 1 << util_logbase2(device->force_aniso));
   }

-   /* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */
-   device->load_grid_size_from_user_sgpr = pdev->info.gfx_level >= GFX10_3;
-
   device->ws = pdev->ws;
   device->vk.sync = device->ws->get_sync_provider(device->ws);

--- a/src/amd/vulkan/radv_device.h
+++ b/src/amd/vulkan/radv_device.h
@ -200,9 +200,6 @@ struct radv_device {
   /* Whether to DMA shaders to invisible VRAM or to upload directly through BAR. */
   bool shader_use_invisible_vram;

-   /* Whether to inline the compute dispatch size in user sgprs. */
-   bool load_grid_size_from_user_sgpr;
-
   /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
   int force_aniso;

--- a/src/amd/vulkan/radv_dgc.c
+++ b/src/amd/vulkan/radv_dgc.c
@ -260,6 +260,7 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
                               uint32_t *upload_size)
 {
   const struct radv_device *device = container_of(layout->vk.base.device, struct radv_device, vk);
+   const struct radv_physical_device *pdev = radv_device_physical(device);

   const VkGeneratedCommandsPipelineInfoEXT *pipeline_info =
      vk_find_struct_const(pNext, GENERATED_COMMANDS_PIPELINE_INFO_EXT);
@ -291,7 +292,7 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
   }

   if (uses_grid_base_sgpr) {
-      if (device->load_grid_size_from_user_sgpr) {
+      if (pdev->load_grid_size_from_user_sgpr) {
         /* PKT3_SET_SH_REG for immediate values */
         *cmd_size += 5 * 4;
      } else {
@ -409,6 +410,7 @@ radv_get_sequence_size_rt(const struct radv_indirect_command_layout *layout, con
                          uint32_t *upload_size)
 {
   const struct radv_device *device = container_of(layout->vk.base.device, struct radv_device, vk);
+   const struct radv_physical_device *pdev = radv_device_physical(device);

   const VkGeneratedCommandsPipelineInfoEXT *pipeline_info =
      vk_find_struct_const(pNext, GENERATED_COMMANDS_PIPELINE_INFO_EXT);
@ -421,7 +423,7 @@ radv_get_sequence_size_rt(const struct radv_indirect_command_layout *layout, con

   const struct radv_userdata_info *cs_grid_size_loc = radv_get_user_sgpr_info(rt_prolog, AC_UD_CS_GRID_SIZE);
   if (cs_grid_size_loc->sgpr_idx != -1) {
-      if (device->load_grid_size_from_user_sgpr) {
+      if (pdev->load_grid_size_from_user_sgpr) {
         /* PKT3_LOAD_SH_REG_INDEX */
         *cmd_size += 5 * 4;
      } else {
@ -2159,13 +2161,14 @@ dgc_emit_dispatch_direct(struct dgc_cmdbuf *cs, nir_def *wg_x, nir_def *wg_y, ni
                         bool is_rt)
 {
   const struct radv_device *device = cs->dev;
+   const struct radv_physical_device *pdev = radv_device_physical(device);
   nir_builder *b = cs->b;

   nir_push_if(b, nir_iand(b, nir_ine_imm(b, wg_x, 0), nir_iand(b, nir_ine_imm(b, wg_y, 0), nir_ine_imm(b, wg_z, 0))));
   {
      nir_push_if(b, nir_ine_imm(b, grid_sgpr, 0));
      {
-         if (device->load_grid_size_from_user_sgpr) {
+         if (pdev->load_grid_size_from_user_sgpr) {
            dgc_emit_grid_size_user_sgpr(cs, grid_sgpr, wg_x, wg_y, wg_z);
         } else {
            dgc_emit_grid_size_pointer(cs, grid_sgpr, size_va);
--- a/src/amd/vulkan/radv_physical_device.c
+++ b/src/amd/vulkan/radv_physical_device.c
@ -2629,6 +2629,9 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm

   pdev->emulate_mesh_shader_queries = pdev->info.gfx_level == GFX10_3;

+   /* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */
+   pdev->load_grid_size_from_user_sgpr = pdev->info.gfx_level >= GFX10_3;
+
   /* Determine the number of threads per wave for all stages. */
   pdev->cs_wave_size = 64;
   pdev->ps_wave_size = 64;
--- a/src/amd/vulkan/radv_physical_device.h
+++ b/src/amd/vulkan/radv_physical_device.h
@ -141,6 +141,9 @@ struct radv_physical_device {
   /* Whether to emulate mesh/task shader queries. */
   bool emulate_mesh_shader_queries;

+   /* Whether to inline the compute dispatch size in user sgprs. */
+   bool load_grid_size_from_user_sgpr;
+
   /* Number of threads per wave. */
   uint8_t ps_wave_size;
   uint8_t cs_wave_size;