From 5f3b73b2f0f3383db522428b3676db12531e5797 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 17 Apr 2026 11:05:09 +0100 Subject: [PATCH] radv: move load_grid_size_from_user_sgpr to radv_physical_device Signed-off-by: Rhys Perry Reviewed-by: Konstantin Seurer Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 6 +++--- src/amd/vulkan/radv_device.c | 5 +---- src/amd/vulkan/radv_device.h | 3 --- src/amd/vulkan/radv_dgc.c | 9 ++++++--- src/amd/vulkan/radv_physical_device.c | 3 +++ src/amd/vulkan/radv_physical_device.h | 3 +++ 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 9743c6cf92f..47b18cf5eba 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -7616,7 +7616,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 if (dst_flags & (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT)) { /* SMEM loads are used to read compute dispatch size in shaders */ - if ((dst_flags & VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT) && !device->load_grid_size_from_user_sgpr) { + if ((dst_flags & VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT) && !pdev->load_grid_size_from_user_sgpr) { flush_bits |= RADV_CMD_FLAG_INV_SCACHE; } @@ -14022,7 +14022,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv if (grid_size_offset) { radeon_begin(cs); - if (device->load_grid_size_from_user_sgpr) { + if (pdev->load_grid_size_from_user_sgpr) { assert(pdev->info.gfx_level >= GFX10_3); radeon_emit(PKT3(PKT3_LOAD_SH_REG_INDEX, 3, 0)); @@ -14128,7 +14128,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv } if (grid_size_offset) { - if (device->load_grid_size_from_user_sgpr) { + if (pdev->load_grid_size_from_user_sgpr) { radeon_begin(cs); radeon_set_sh_reg_seq(grid_size_offset, 3); radeon_emit(blocks[0]); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index a191396ae63..c365dbcc02d 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1197,7 +1197,7 @@ radv_device_init_compiler_info(struct radv_device *device) /* Shader features */ .device_robustness_state = &device->vk.robustness_state, .use_ngg = pdev->use_ngg, - .load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr, + .load_grid_size_from_user_sgpr = pdev->load_grid_size_from_user_sgpr, .emulate_ngg_gs_query_pipeline_stat = pdev->emulate_ngg_gs_query_pipeline_stat, .primitives_generated_query = device->cache_key.primitives_generated_query, .mesh_shader_queries = device->cache_key.mesh_shader_queries, @@ -1379,9 +1379,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n", 1 << util_logbase2(device->force_aniso)); } - /* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */ - device->load_grid_size_from_user_sgpr = pdev->info.gfx_level >= GFX10_3; - device->ws = pdev->ws; device->vk.sync = device->ws->get_sync_provider(device->ws); diff --git a/src/amd/vulkan/radv_device.h b/src/amd/vulkan/radv_device.h index f5f66b650c6..dac7ffd07a4 100644 --- a/src/amd/vulkan/radv_device.h +++ b/src/amd/vulkan/radv_device.h @@ -200,9 +200,6 @@ struct radv_device { /* Whether to DMA shaders to invisible VRAM or to upload directly through BAR. */ bool shader_use_invisible_vram; - /* Whether to inline the compute dispatch size in user sgprs. */ - bool load_grid_size_from_user_sgpr; - /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */ int force_aniso; diff --git a/src/amd/vulkan/radv_dgc.c b/src/amd/vulkan/radv_dgc.c index 303dd9b1330..47462ade7f3 100644 --- a/src/amd/vulkan/radv_dgc.c +++ b/src/amd/vulkan/radv_dgc.c @@ -260,6 +260,7 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout uint32_t *upload_size) { const struct radv_device *device = container_of(layout->vk.base.device, struct radv_device, vk); + const struct radv_physical_device *pdev = radv_device_physical(device); const VkGeneratedCommandsPipelineInfoEXT *pipeline_info = vk_find_struct_const(pNext, GENERATED_COMMANDS_PIPELINE_INFO_EXT); @@ -291,7 +292,7 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout } if (uses_grid_base_sgpr) { - if (device->load_grid_size_from_user_sgpr) { + if (pdev->load_grid_size_from_user_sgpr) { /* PKT3_SET_SH_REG for immediate values */ *cmd_size += 5 * 4; } else { @@ -409,6 +410,7 @@ radv_get_sequence_size_rt(const struct radv_indirect_command_layout *layout, con uint32_t *upload_size) { const struct radv_device *device = container_of(layout->vk.base.device, struct radv_device, vk); + const struct radv_physical_device *pdev = radv_device_physical(device); const VkGeneratedCommandsPipelineInfoEXT *pipeline_info = vk_find_struct_const(pNext, GENERATED_COMMANDS_PIPELINE_INFO_EXT); @@ -421,7 +423,7 @@ radv_get_sequence_size_rt(const struct radv_indirect_command_layout *layout, con const struct radv_userdata_info *cs_grid_size_loc = radv_get_user_sgpr_info(rt_prolog, AC_UD_CS_GRID_SIZE); if (cs_grid_size_loc->sgpr_idx != -1) { - if (device->load_grid_size_from_user_sgpr) { + if (pdev->load_grid_size_from_user_sgpr) { /* PKT3_LOAD_SH_REG_INDEX */ *cmd_size += 5 * 4; } else { @@ -2159,13 +2161,14 @@ dgc_emit_dispatch_direct(struct dgc_cmdbuf *cs, nir_def *wg_x, nir_def *wg_y, ni bool is_rt) { const struct radv_device *device = cs->dev; + const struct radv_physical_device *pdev = radv_device_physical(device); nir_builder *b = cs->b; nir_push_if(b, nir_iand(b, nir_ine_imm(b, wg_x, 0), nir_iand(b, nir_ine_imm(b, wg_y, 0), nir_ine_imm(b, wg_z, 0)))); { nir_push_if(b, nir_ine_imm(b, grid_sgpr, 0)); { - if (device->load_grid_size_from_user_sgpr) { + if (pdev->load_grid_size_from_user_sgpr) { dgc_emit_grid_size_user_sgpr(cs, grid_sgpr, wg_x, wg_y, wg_z); } else { dgc_emit_grid_size_pointer(cs, grid_sgpr, size_va); diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index 5d4979de98c..35ea75e2548 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -2629,6 +2629,9 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm pdev->emulate_mesh_shader_queries = pdev->info.gfx_level == GFX10_3; + /* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */ + pdev->load_grid_size_from_user_sgpr = pdev->info.gfx_level >= GFX10_3; + /* Determine the number of threads per wave for all stages. */ pdev->cs_wave_size = 64; pdev->ps_wave_size = 64; diff --git a/src/amd/vulkan/radv_physical_device.h b/src/amd/vulkan/radv_physical_device.h index 75b5f0ab681..680ad681769 100644 --- a/src/amd/vulkan/radv_physical_device.h +++ b/src/amd/vulkan/radv_physical_device.h @@ -141,6 +141,9 @@ struct radv_physical_device { /* Whether to emulate mesh/task shader queries. */ bool emulate_mesh_shader_queries; + /* Whether to inline the compute dispatch size in user sgprs. */ + bool load_grid_size_from_user_sgpr; + /* Number of threads per wave. */ uint8_t ps_wave_size; uint8_t cs_wave_size;