diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 22b43179c17..a4a59f2c9c0 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -13066,6 +13066,7 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); uint32_t push_constant_size = 0, dynamic_offset_count = 0; + bool need_dynamic_descriptors_offset_addr = false; bool need_indirect_descriptors = false; bool need_push_constants_upload = false; @@ -13101,6 +13102,7 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) /* Compute push constants/indirect descriptors state. */ need_indirect_descriptors |= radv_shader_need_indirect_descriptors(shader); + need_dynamic_descriptors_offset_addr |= radv_shader_need_dynamic_descriptors_offset_addr(shader); need_push_constants_upload |= radv_shader_need_push_constants_upload(shader); push_constant_size = MAX2(push_constant_size, shader->info.push_constant_size); dynamic_offset_count += shader_obj->dynamic_offset_count; @@ -13144,6 +13146,7 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) struct radv_push_constant_state *pc_state = &cmd_buffer->push_constant_state[VK_PIPELINE_BIND_POINT_GRAPHICS]; descriptors_state->need_indirect_descriptors = need_indirect_descriptors; + descriptors_state->need_dynamic_descriptors_offset_addr = need_dynamic_descriptors_offset_addr; descriptors_state->dynamic_offset_count = dynamic_offset_count; pc_state->need_upload = need_push_constants_upload; pc_state->size = align(push_constant_size, 4); @@ -16284,6 +16287,7 @@ radv_bind_compute_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_ struct radv_push_constant_state *pc_state = &cmd_buffer->push_constant_state[VK_PIPELINE_BIND_POINT_COMPUTE]; descriptors_state->need_indirect_descriptors = radv_shader_need_indirect_descriptors(shader); + descriptors_state->need_dynamic_descriptors_offset_addr = radv_shader_need_dynamic_descriptors_offset_addr(shader); descriptors_state->dynamic_offset_count = shader_obj->dynamic_offset_count; pc_state->need_upload = radv_shader_need_push_constants_upload(shader); pc_state->size = align(shader->info.push_constant_size, 4); diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index c66ee7aa5ff..5bc4e905746 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -168,7 +168,7 @@ declare_global_input_sgprs(struct radv_shader_args_state *state, const enum amd_ if (info->merged_shader_compiled_separately || info->loads_dynamic_offsets) { RADV_ADD_UD_ARG(state, 1, AC_ARG_CONST_ADDR, ac.dynamic_descriptors, AC_UD_DYNAMIC_DESCRIPTORS); - if (info->loads_dynamic_descriptors_offset_addr) { + if (info->merged_shader_compiled_separately || info->loads_dynamic_descriptors_offset_addr) { RADV_ADD_UD_ARG(state, 1, AC_ARG_CONST_ADDR, ac.dynamic_descriptors_offset_addr, AC_UD_DYNAMIC_DESCRIPTORS_OFFSET_ADDR); } @@ -462,6 +462,7 @@ declare_unmerged_vs_tcs_args(struct radv_shader_args_state *state, const enum am ac_add_preserved(&state->args->ac, &state->args->descriptors[0]); ac_add_preserved(&state->args->ac, &state->args->ac.push_constants); ac_add_preserved(&state->args->ac, &state->args->ac.dynamic_descriptors); + ac_add_preserved(&state->args->ac, &state->args->ac.dynamic_descriptors_offset_addr); ac_add_preserved(&state->args->ac, &state->args->ac.view_index); ac_add_preserved(&state->args->ac, &state->args->ac.tcs_offchip_layout); ac_add_preserved(&state->args->ac, &state->args->epilog_pc); @@ -528,6 +529,7 @@ declare_unmerged_vs_tes_gs_args(struct radv_shader_args_state *state, const enum ac_add_preserved(&state->args->ac, &state->args->descriptors[0]); ac_add_preserved(&state->args->ac, &state->args->ac.push_constants); ac_add_preserved(&state->args->ac, &state->args->ac.dynamic_descriptors); + ac_add_preserved(&state->args->ac, &state->args->ac.dynamic_descriptors_offset_addr); ac_add_preserved(&state->args->ac, &state->args->streamout_buffers); if (gfx_level >= GFX12) ac_add_preserved(&state->args->ac, &state->args->streamout_state); diff --git a/src/amd/vulkan/radv_shader_object.c b/src/amd/vulkan/radv_shader_object.c index 1c1dcc7a68e..a6b9a8e42c2 100644 --- a/src/amd/vulkan/radv_shader_object.c +++ b/src/amd/vulkan/radv_shader_object.c @@ -15,6 +15,10 @@ #include "radv_pipeline_graphics.h" #include "radv_shader_object.h" +struct radv_shader_object_metadata { + uint32_t dynamic_offset_count; +}; + static void radv_shader_object_destroy_variant(struct radv_device *device, VkShaderCodeTypeEXT code_type, struct radv_shader *shader, struct radv_shader_binary *binary) @@ -54,6 +58,21 @@ radv_DestroyShaderEXT(VkDevice _device, VkShaderEXT shader, const VkAllocationCa radv_shader_object_destroy(device, shader_obj, pAllocator); } +static void +radv_shader_layout_add_set(struct radv_shader_layout *layout, uint32_t set_idx, + struct radv_descriptor_set_layout *set_layout) +{ + if (layout->set[set_idx].layout) + return; + + layout->num_sets = MAX2(set_idx + 1, layout->num_sets); + + layout->set[set_idx].layout = set_layout; + layout->set[set_idx].dynamic_offset_start = layout->dynamic_offset_count; + + layout->dynamic_offset_count += set_layout->dynamic_offset_count; +} + static void radv_get_shader_layout(const VkShaderCreateInfoEXT *pCreateInfo, struct radv_shader_layout *layout) { @@ -67,18 +86,29 @@ radv_get_shader_layout(const VkShaderCreateInfoEXT *pCreateInfo, struct radv_sha if (set_layout == NULL) continue; - layout->num_sets = MAX2(i + 1, layout->num_sets); + radv_shader_layout_add_set(layout, i, set_layout); - layout->set[i].layout = set_layout; - layout->set[i].dynamic_offset_start = layout->dynamic_offset_count; - - layout->dynamic_offset_count += set_layout->dynamic_offset_count; dynamic_shader_stages |= set_layout->dynamic_shader_stages; } if (layout->dynamic_offset_count && (dynamic_shader_stages & pCreateInfo->stage)) { layout->use_dynamic_descriptors = true; } + + layout->independent_sets = !!(pCreateInfo->flags & VK_SHADER_CREATE_INDEPENDENT_SETS_BIT_KHR); +} + +static void +radv_merge_shader_layout(const struct radv_shader_layout *src, struct radv_shader_layout *dst) +{ + for (uint32_t s = 0; s < src->num_sets; s++) { + if (!src->set[s].layout) + continue; + + radv_shader_layout_add_set(dst, s, src->set[s].layout); + } + + dst->use_dynamic_descriptors |= src->use_dynamic_descriptors; } static void @@ -286,14 +316,10 @@ radv_shader_object_init(struct radv_shader_object *shader_obj, struct radv_devic const VkShaderCreateInfoEXT *pCreateInfo) { const struct radv_physical_device *pdev = radv_device_physical(device); - struct radv_shader_layout layout = {0}; VkResult result; - radv_get_shader_layout(pCreateInfo, &layout); - shader_obj->stage = vk_to_mesa_shader_stage(pCreateInfo->stage); shader_obj->code_type = pCreateInfo->codeType; - shader_obj->dynamic_offset_count = layout.dynamic_offset_count; if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_BINARY_EXT) { if (pCreateInfo->codeSize < VK_UUID_SIZE + sizeof(uint32_t)) { @@ -308,6 +334,11 @@ radv_shader_object_init(struct radv_shader_object *shader_obj, struct radv_devic if (memcmp(cache_uuid, pdev->cache_uuid, VK_UUID_SIZE)) return VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT; + const struct radv_shader_object_metadata *md = + (struct radv_shader_object_metadata *)blob_read_bytes(&blob, sizeof(struct radv_shader_object_metadata)); + + shader_obj->dynamic_offset_count = md->dynamic_offset_count; + const bool has_main_binary = blob_read_uint32(&blob); if (has_main_binary) { @@ -350,8 +381,14 @@ radv_shader_object_init(struct radv_shader_object *shader_obj, struct radv_devic } } } else { + struct radv_shader_layout layout = {0}; + assert(pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT); + radv_get_shader_layout(pCreateInfo, &layout); + + shader_obj->dynamic_offset_count = layout.dynamic_offset_count; + if (pCreateInfo->stage == VK_SHADER_STAGE_COMPUTE_BIT) { result = radv_shader_object_init_compute(shader_obj, device, pCreateInfo); } else { @@ -465,6 +502,21 @@ radv_shader_object_create_linked(VkDevice _device, uint32_t createInfoCount, con default: assert(0); } + + if (stages[i].layout.independent_sets) { + /* Merge layouts for merged stages with independent sets. */ + if (stages[i].stage == MESA_SHADER_VERTEX) { + if (stages[i].next_stage == MESA_SHADER_TESS_CTRL) { + radv_merge_shader_layout(&stages[MESA_SHADER_VERTEX].layout, &stages[MESA_SHADER_TESS_CTRL].layout); + } else if (stages[i].next_stage == MESA_SHADER_GEOMETRY) { + radv_merge_shader_layout(&stages[MESA_SHADER_VERTEX].layout, &stages[MESA_SHADER_GEOMETRY].layout); + } + } + + if (stages[i].stage == MESA_SHADER_TESS_EVAL && stages[i].next_stage == MESA_SHADER_GEOMETRY) { + radv_merge_shader_layout(&stages[MESA_SHADER_TESS_EVAL].layout, &stages[MESA_SHADER_GEOMETRY].layout); + } + } } struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES] = {NULL}; @@ -650,6 +702,7 @@ radv_get_shader_object_size(const struct radv_shader_object *shader_obj) { size_t size = VK_UUID_SIZE; + size += sizeof(struct radv_shader_object_metadata); size += radv_get_shader_binary_size(shader_obj->binary); if (shader_obj->stage == MESA_SHADER_VERTEX) { @@ -664,6 +717,16 @@ radv_get_shader_object_size(const struct radv_shader_object *shader_obj) return size; } +static void +radv_write_shader_object_metadata(struct blob *blob, const struct radv_shader_object *shader_obj) +{ + struct radv_shader_object_metadata md = { + .dynamic_offset_count = shader_obj->dynamic_offset_count, + }; + + blob_write_bytes(blob, &md, sizeof(md)); +} + static void radv_write_shader_binary(struct blob *blob, const struct radv_shader_binary *binary) { @@ -702,6 +765,7 @@ radv_GetShaderBinaryDataEXT(VkDevice _device, VkShaderEXT shader, size_t *pDataS blob_init_fixed(&blob, pData, *pDataSize); blob_write_bytes(&blob, pdev->cache_uuid, VK_UUID_SIZE); + radv_write_shader_object_metadata(&blob, shader_obj); radv_write_shader_binary(&blob, shader_obj->binary); if (shader_obj->stage == MESA_SHADER_VERTEX) {