From a47952d495ed6f1f6044f7e2bb1639daa6dcf1b9 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 2 Sep 2025 11:51:58 +0200 Subject: [PATCH] radv: upload and emit dynamic descriptors separately from push constants Dynamic descriptors are rarely used and this will allow to do more optimizations for push constants, like gathering the size from shaders themselves instead of using the pipeline layout. fossils-db (GFX1201): Totals from 21740 (27.30% of 79646) affected shaders: Instrs: 11186407 -> 11192061 (+0.05%); split: -0.05%, +0.10% CodeSize: 59842068 -> 59864412 (+0.04%); split: -0.04%, +0.08% Latency: 56333136 -> 56325208 (-0.01%); split: -0.03%, +0.02% InvThroughput: 8576452 -> 8576516 (+0.00%); split: -0.00%, +0.00% SClause: 279186 -> 279713 (+0.19%); split: -0.06%, +0.25% Copies: 577854 -> 581735 (+0.67%); split: -0.28%, +0.95% PreSGPRs: 867163 -> 866409 (-0.09%) SALU: 1391187 -> 1395055 (+0.28%); split: -0.12%, +0.39% Signed-off-by: Samuel Pitoiset Part-of: --- .../nir/radv_nir_apply_pipeline_layout.c | 4 +- src/amd/vulkan/radv_cmd_buffer.c | 87 ++++++++++++++++--- src/amd/vulkan/radv_cmd_buffer.h | 3 +- src/amd/vulkan/radv_shader_info.c | 4 +- 4 files changed, 81 insertions(+), 17 deletions(-) diff --git a/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c b/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c index df074c9233a..6cbb26964a1 100644 --- a/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c +++ b/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c @@ -69,8 +69,8 @@ visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intri nir_def *set_ptr; if (vk_descriptor_type_is_dynamic(layout->binding[binding].type)) { unsigned idx = state->layout->set[desc_set].dynamic_offset_start + layout->binding[binding].dynamic_offset_offset; - set_ptr = get_scalar_arg(b, 1, state->args->ac.push_constants); - offset = state->layout->push_constant_size + idx * 16; + set_ptr = get_scalar_arg(b, 1, state->args->ac.dynamic_descriptors); + offset = idx * 16; stride = 16; } else { set_ptr = load_desc_ptr(b, state, desc_set); diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index dbb4ad843ec..0f1b2775f15 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1285,6 +1285,7 @@ radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandB for (unsigned i = 0; i < MAX_BIND_POINTS; i++) { cmd_buffer->descriptors[i].dirty = 0; cmd_buffer->descriptors[i].valid = 0; + cmd_buffer->descriptors[i].dirty_dynamic = false; } radv_cmd_buffer_reset_rendering(cmd_buffer); @@ -6285,7 +6286,7 @@ radv_must_flush_constants(const struct radv_cmd_buffer *cmd_buffer, VkShaderStag { const struct radv_push_constant_state *push_constants = radv_get_push_constants_state(cmd_buffer, bind_point); - if (push_constants->size || push_constants->dynamic_offset_count) + if (push_constants->size) return stages & cmd_buffer->push_constant_stages; return 0; @@ -6331,16 +6332,15 @@ radv_emit_push_constants_per_stage(const struct radv_device *device, struct radv static void radv_upload_push_constants(struct radv_cmd_buffer *cmd_buffer, const struct radv_push_constant_state *pc_state, - const struct radv_descriptor_state *descriptors_state, uint64_t *va) + uint64_t *va) { unsigned offset; void *ptr; - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, pc_state->size + 16 * pc_state->dynamic_offset_count, &offset, &ptr)) + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, pc_state->size, &offset, &ptr)) return; memcpy(ptr, cmd_buffer->push_constants, pc_state->size); - memcpy((char *)ptr + pc_state->size, descriptors_state->dynamic_buffers, 16 * pc_state->dynamic_offset_count); *va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset; } @@ -6350,7 +6350,6 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_cmd_stream *cs = cmd_buffer->cs; - struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); const struct radv_push_constant_state *push_constants = radv_get_push_constants_state(cmd_buffer, bind_point); uint64_t va = 0; uint32_t internal_stages = stages; @@ -6368,7 +6367,7 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag } if (push_constants->need_upload) { - radv_upload_push_constants(cmd_buffer, push_constants, descriptors_state, &va); + radv_upload_push_constants(cmd_buffer, push_constants, &va); } if (internal_stages & VK_SHADER_STAGE_COMPUTE_BIT) { @@ -6400,6 +6399,58 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag cmd_buffer->push_constant_stages &= ~stages; } +static void +radv_upload_dynamic_descriptors(struct radv_cmd_buffer *cmd_buffer, + const struct radv_descriptor_state *descriptors_state, uint64_t *va) +{ + const uint32_t size = descriptors_state->dynamic_offset_count * 16; + unsigned offset; + void *ptr; + + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &offset, &ptr)) + return; + + memcpy(ptr, descriptors_state->dynamic_buffers, size); + + *va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset; +} + +static void +radv_flush_dynamic_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages, + VkPipelineBindPoint bind_point) +{ + const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_cmd_stream *cs = cmd_buffer->cs; + uint64_t va = 0; + + radv_upload_dynamic_descriptors(cmd_buffer, descriptors_state, &va); + + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MESA_VULKAN_SHADER_STAGES * 4); + + if (stages & VK_SHADER_STAGE_COMPUTE_BIT) { + const struct radv_shader *compute_shader = bind_point == VK_PIPELINE_BIND_POINT_COMPUTE + ? cmd_buffer->state.shaders[MESA_SHADER_COMPUTE] + : cmd_buffer->state.rt_prolog; + + radv_emit_userdata_address(device, cs, compute_shader, AC_UD_DYNAMIC_DESCRIPTORS, va); + } else { + radv_foreach_stage (stage, stages & ~VK_SHADER_STAGE_TASK_BIT_EXT) { + if (!cmd_buffer->state.shaders[stage]) + continue; + + radv_emit_userdata_address(device, cs, cmd_buffer->state.shaders[stage], AC_UD_DYNAMIC_DESCRIPTORS, va); + } + + if (stages & VK_SHADER_STAGE_TASK_BIT_EXT) { + radv_emit_userdata_address(device, cmd_buffer->gang.cs, cmd_buffer->state.shaders[MESA_SHADER_TASK], + AC_UD_DYNAMIC_DESCRIPTORS, va); + } + } + + assert(cs->b->cdw <= cdw_max); +} + ALWAYS_INLINE void radv_get_vbo_info(const struct radv_cmd_buffer *cmd_buffer, uint32_t idx, struct radv_vbo_info *vbo_info) { @@ -6717,6 +6768,11 @@ radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer) descriptors_state->dirty = 0; } + if (descriptors_state->dirty_dynamic && descriptors_state->dynamic_offset_count) { + radv_flush_dynamic_descriptors(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS); + descriptors_state->dirty_dynamic = false; + } + const VkShaderStageFlags pc_stages = radv_must_flush_constants(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS); if (pc_stages) radv_flush_constants(cmd_buffer, pc_stages, VK_PIPELINE_BIND_POINT_GRAPHICS); @@ -7773,7 +7829,7 @@ radv_bind_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, const VkBindDescri ac_build_raw_buffer_descriptor(pdev->info.gfx_level, va, size, dst); } - cmd_buffer->push_constant_stages |= set->header.layout->dynamic_shader_stages; + descriptors_state->dirty_dynamic = true; } } } @@ -8667,8 +8723,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline cmd_buffer->push_constant_state[vk_to_bind_point(pipelineBindPoint)].size = pipeline->push_constant_size; cmd_buffer->push_constant_state[vk_to_bind_point(pipelineBindPoint)].need_upload = pipeline->need_push_constants_upload; - cmd_buffer->push_constant_state[vk_to_bind_point(pipelineBindPoint)].dynamic_offset_count = - pipeline->dynamic_offset_count; + cmd_buffer->descriptors[vk_to_bind_point(pipelineBindPoint)].dynamic_offset_count = pipeline->dynamic_offset_count; cmd_buffer->descriptors[vk_to_bind_point(pipelineBindPoint)].need_indirect_descriptors = pipeline->need_indirect_descriptors; } @@ -12356,9 +12411,9 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) struct radv_push_constant_state *pc_state = &cmd_buffer->push_constant_state[VK_PIPELINE_BIND_POINT_GRAPHICS]; descriptors_state->need_indirect_descriptors = need_indirect_descriptors; + descriptors_state->dynamic_offset_count = dynamic_offset_count; pc_state->need_upload = need_push_constants_upload; pc_state->size = push_constant_size; - pc_state->dynamic_offset_count = dynamic_offset_count; if (pdev->info.gfx_level <= GFX9) { cmd_buffer->state.ia_multi_vgt_param = radv_compute_ia_multi_vgt_param(device, cmd_buffer->state.shaders); @@ -12491,6 +12546,11 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_ descriptors_state->dirty = 0; } + if (descriptors_state->dirty_dynamic && descriptors_state->dynamic_offset_count) { + radv_flush_dynamic_descriptors(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS); + descriptors_state->dirty_dynamic = false; + } + const VkShaderStageFlags pc_stages = radv_must_flush_constants(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS); if (pc_stages) radv_flush_constants(cmd_buffer, pc_stages, VK_PIPELINE_BIND_POINT_GRAPHICS); @@ -13343,6 +13403,11 @@ radv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPip descriptors_state->dirty = 0; } + if (descriptors_state->dirty_dynamic && descriptors_state->dynamic_offset_count) { + radv_flush_dynamic_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT, bind_point); + descriptors_state->dirty_dynamic = false; + } + const VkShaderStageFlags stages = bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR ? RADV_RT_STAGE_BITS : VK_SHADER_STAGE_COMPUTE_BIT; const VkShaderStageFlags pc_stages = radv_must_flush_constants(cmd_buffer, stages, bind_point); @@ -15329,9 +15394,9 @@ radv_bind_compute_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_ struct radv_push_constant_state *pc_state = &cmd_buffer->push_constant_state[VK_PIPELINE_BIND_POINT_COMPUTE]; descriptors_state->need_indirect_descriptors = radv_shader_need_indirect_descriptors(shader); + descriptors_state->dynamic_offset_count = shader_obj->dynamic_offset_count; pc_state->need_upload = radv_shader_need_push_constants_upload(shader); pc_state->size = shader_obj->push_constant_size; - pc_state->dynamic_offset_count = shader_obj->dynamic_offset_count; assert(cs->b->cdw <= cdw_max); } diff --git a/src/amd/vulkan/radv_cmd_buffer.h b/src/amd/vulkan/radv_cmd_buffer.h index 966c911a94b..265eacae600 100644 --- a/src/amd/vulkan/radv_cmd_buffer.h +++ b/src/amd/vulkan/radv_cmd_buffer.h @@ -244,6 +244,8 @@ struct radv_descriptor_state { uint32_t valid; struct radv_push_descriptor_set push_set; uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS]; + uint32_t dynamic_offset_count; + bool dirty_dynamic; uint64_t descriptor_buffers[MAX_SETS]; bool need_indirect_descriptors; uint64_t indirect_descriptor_sets_va; @@ -251,7 +253,6 @@ struct radv_descriptor_state { struct radv_push_constant_state { uint32_t size; - uint32_t dynamic_offset_count; bool need_upload; }; diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index f64b44a0bea..d3730ac6a08 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -1034,10 +1034,8 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n const struct radv_physical_device *pdev = radv_device_physical(device); struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions); - if (layout->use_dynamic_descriptors) { - info->loads_push_constants = true; + if (layout->use_dynamic_descriptors) info->loads_dynamic_offsets = true; - } nir_foreach_block (block, func->impl) { gather_info_block(nir, block, info, gfx_state, stage_key, consider_force_vrs);