diff --git a/src/panfrost/ci/panfrost-g52-fails.txt b/src/panfrost/ci/panfrost-g52-fails.txt index 3377b22343b..83919de02a1 100644 --- a/src/panfrost/ci/panfrost-g52-fails.txt +++ b/src/panfrost/ci/panfrost-g52-fails.txt @@ -1101,28 +1101,6 @@ dEQP-VK.pipeline.monolithic.sampler.exact_sampling.r16g16_sint.gradient.unnormal dEQP-VK.pipeline.monolithic.sampler.exact_sampling.r32_sfloat.gradient.normalized_coords.edge_right,Fail dEQP-VK.pipeline.monolithic.sampler.exact_sampling.r32_sfloat.gradient.unnormalized_coords.edge_right,Fail -# We store image/buffer information in a UBO, and -# maxDescriptorSetSampledImages * sizeof(struct panvk_image_desc) exceeds the -# maximum UBO size. -# The easy fix would be to further limit the max value in the physical device -# limit, but given this UBO contains information for storage buffers too, it -# implies putting a constraint on -# maxDescriptorSetStorageBuffers+maxDescriptorSetSampledImages. -# The proper fix would be to use a global pointer/size pair, and lower -# descriptor info loads to global loads. -dEQP-VK.pipeline.monolithic.descriptor_limits.compute_shader.sampled_images_16384,Crash -dEQP-VK.pipeline.monolithic.descriptor_limits.compute_shader.sampled_images_32768,Crash -dEQP-VK.pipeline.monolithic.descriptor_limits.compute_shader.sampled_images_65535,Crash -dEQP-VK.pipeline.monolithic.descriptor_limits.compute_shader.samplers_16384,Crash -dEQP-VK.pipeline.monolithic.descriptor_limits.compute_shader.samplers_32768,Crash -dEQP-VK.pipeline.monolithic.descriptor_limits.compute_shader.samplers_65535,Crash -dEQP-VK.pipeline.monolithic.descriptor_limits.fragment_shader.sampled_images_16384,Crash -dEQP-VK.pipeline.monolithic.descriptor_limits.fragment_shader.sampled_images_32768,Crash -dEQP-VK.pipeline.monolithic.descriptor_limits.fragment_shader.sampled_images_65535,Crash -dEQP-VK.pipeline.monolithic.descriptor_limits.fragment_shader.samplers_16384,Crash -dEQP-VK.pipeline.monolithic.descriptor_limits.fragment_shader.samplers_32768,Crash -dEQP-VK.pipeline.monolithic.descriptor_limits.fragment_shader.samplers_65535,Crash - # resolve operations not supported yet dEQP-VK.pipeline.monolithic.framebuffer_attachment.2d_19x27_32x32_ms,Crash dEQP-VK.pipeline.monolithic.framebuffer_attachment.2d_32x32_39x41_ms,Crash diff --git a/src/panfrost/vulkan/bifrost/panvk_cmd_desc_state.h b/src/panfrost/vulkan/bifrost/panvk_cmd_desc_state.h index 4c53633d9d6..e37cd809c9f 100644 --- a/src/panfrost/vulkan/bifrost/panvk_cmd_desc_state.h +++ b/src/panfrost/vulkan/bifrost/panvk_cmd_desc_state.h @@ -22,52 +22,18 @@ #include "pan_pool.h" -struct panvk_descriptor_state { - const struct panvk_descriptor_set *sets[MAX_SETS]; - struct panvk_push_descriptor_set *push_sets[MAX_SETS]; - - struct { - struct mali_uniform_buffer_packed ubos[MAX_DYNAMIC_UNIFORM_BUFFERS]; - struct panvk_ssbo_addr ssbos[MAX_DYNAMIC_STORAGE_BUFFERS]; - } dyn; - mali_ptr ubos; - mali_ptr textures; - mali_ptr samplers; - mali_ptr dyn_desc_ubo; - - struct { - mali_ptr attribs; - mali_ptr attrib_bufs; - } img; +struct panvk_shader_desc_state { + mali_ptr tables[PANVK_BIFROST_DESC_TABLE_COUNT]; + mali_ptr img_attrib_table; + mali_ptr dyn_ssbos; }; -void panvk_per_arch(cmd_prepare_push_sets)( - struct pan_pool *desc_pool_base, struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline); +struct panvk_descriptor_state { + const struct panvk_descriptor_set *sets[MAX_SETS]; + struct panvk_descriptor_set *push_sets[MAX_SETS]; -void panvk_per_arch(cmd_unprepare_push_sets)( - struct panvk_descriptor_state *desc_state); - -void panvk_per_arch(cmd_prepare_ubos)(struct pan_pool *desc_pool_base, - struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline); - -void panvk_per_arch(cmd_prepare_textures)( - struct pan_pool *desc_pool_base, struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline); - -void panvk_per_arch(cmd_prepare_samplers)( - struct pan_pool *desc_pool_base, struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline); - -void panvk_per_arch(fill_img_attribs)(struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline, - void *attrib_bufs, void *attribs, - unsigned first_buf); - -void panvk_per_arch(prepare_img_attribs)( - struct pan_pool *desc_pool_base, struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline); + uint32_t dyn_buf_offsets[MAX_SETS][MAX_DYNAMIC_BUFFERS]; +}; void panvk_per_arch(cmd_desc_state_reset)( struct panvk_descriptor_state *gfx_desc_state, @@ -84,8 +50,22 @@ void panvk_per_arch(cmd_desc_state_bind_sets)( const VkDescriptorSet *desc_sets, uint32_t dyn_offset_count, const uint32_t *dyn_offsets); -struct panvk_push_descriptor_set *panvk_per_arch(cmd_push_descriptors)( +struct panvk_descriptor_set *panvk_per_arch(cmd_push_descriptors)( struct vk_command_buffer *cmdbuf, struct panvk_descriptor_state *desc_state, uint32_t set); +void panvk_per_arch(cmd_prepare_dyn_ssbos)( + struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state, + const struct panvk_pipeline_shader *shader, + struct panvk_shader_desc_state *shader_desc_state); + +void panvk_per_arch(cmd_prepare_shader_desc_tables)( + struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state, + const struct panvk_pipeline_shader *shader, + struct panvk_shader_desc_state *shader_desc_state); + +void panvk_per_arch(cmd_prepare_push_descs)( + struct pan_pool *desc_pool, struct panvk_descriptor_state *desc_state, + const struct panvk_pipeline *pipeline); + #endif diff --git a/src/panfrost/vulkan/bifrost/panvk_descriptor_set.h b/src/panfrost/vulkan/bifrost/panvk_descriptor_set.h deleted file mode 100644 index 846a04028aa..00000000000 --- a/src/panfrost/vulkan/bifrost/panvk_descriptor_set.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright © 2021 Collabora Ltd. - * SPDX-License-Identifier: MIT - */ - -#ifndef PANVK_DESCRIPTOR_SET_H -#define PANVK_DESCRIPTOR_SET_H - -#ifndef PAN_ARCH -#error "PAN_ARCH must be defined" -#endif - -#include - -#include "vk_object.h" - -#include "panvk_macros.h" - -#define PANVK_MAX_PUSH_DESCS 32 -#define PANVK_MAX_DESC_SIZE 32 -#define PANVK_MAX_DESC_UBO_STRIDE 8 - -struct panvk_cmd_buffer; -struct panvk_descriptor_pool; -struct panvk_descriptor_set_layout; -struct panvk_priv_bo; - -struct panvk_descriptor_pool { - struct vk_object_base base; -}; - -VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_descriptor_pool, base, VkDescriptorPool, - VK_OBJECT_TYPE_DESCRIPTOR_POOL) - -/* This has to match nir_address_format_64bit_bounded_global */ -struct panvk_ssbo_addr { - uint64_t base_addr; - uint32_t size; - uint32_t zero; /* Must be zero! */ -}; - -struct panvk_bview_desc { - uint32_t elems; -}; - -struct panvk_image_desc { - uint16_t width; - uint16_t height; - uint16_t depth; - uint8_t levels; - uint8_t samples; -}; - -struct panvk_buffer_desc { - struct panvk_buffer *buffer; - VkDeviceSize offset; - VkDeviceSize size; -}; - -struct panvk_descriptor_set { - struct vk_object_base base; - struct panvk_descriptor_pool *pool; - const struct panvk_descriptor_set_layout *layout; - struct panvk_buffer_desc *dyn_ssbos; - void *ubos; - struct panvk_buffer_desc *dyn_ubos; - void *samplers; - void *textures; - void *img_attrib_bufs; - uint32_t *img_fmts; - - struct { - struct panvk_priv_bo *bo; - struct { - uint64_t dev; - void *host; - } addr; - } desc_ubo; -}; - -VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_descriptor_set, base, VkDescriptorSet, - VK_OBJECT_TYPE_DESCRIPTOR_SET) - -struct panvk_push_descriptor_set { - struct { - uint8_t descs[PANVK_MAX_PUSH_DESCS * PANVK_MAX_DESC_SIZE]; - uint8_t desc_ubo[PANVK_MAX_PUSH_DESCS * PANVK_MAX_DESC_UBO_STRIDE]; - uint32_t img_fmts[PANVK_MAX_PUSH_DESCS]; - } storage; - struct panvk_descriptor_set set; -}; - -#ifdef PAN_ARCH -void panvk_per_arch(push_descriptor_set_assign_layout)( - struct panvk_push_descriptor_set *push_set, - const struct panvk_descriptor_set_layout *layout); - -void panvk_per_arch(push_descriptor_set)( - struct panvk_push_descriptor_set *push_set, - const struct panvk_descriptor_set_layout *layout, uint32_t write_count, - const VkWriteDescriptorSet *writes); - -void panvk_per_arch(push_descriptor_set_with_template)( - struct panvk_push_descriptor_set *push_set, - const struct panvk_descriptor_set_layout *layout, - VkDescriptorUpdateTemplate templ, const void *data); -#endif - -#endif diff --git a/src/panfrost/vulkan/bifrost/panvk_descriptor_set_layout.h b/src/panfrost/vulkan/bifrost/panvk_descriptor_set_layout.h deleted file mode 100644 index 1516cd7d280..00000000000 --- a/src/panfrost/vulkan/bifrost/panvk_descriptor_set_layout.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright © 2021 Collabora Ltd. - * SPDX-License-Identifier: MIT - */ - -#ifndef PANVK_DESCRIPTOR_SET_LAYOUT_H -#define PANVK_DESCRIPTOR_SET_LAYOUT_H - -#ifndef PAN_ARCH -#error "PAN_ARCH must be defined" -#endif - -#include - -#include "vk_descriptor_set_layout.h" - -struct panvk_descriptor_set_binding_layout { - VkDescriptorType type; - - /* Number of array elements in this binding */ - unsigned array_size; - - /* Indices in the desc arrays */ - union { - struct { - union { - unsigned sampler_idx; - unsigned img_idx; - }; - unsigned tex_idx; - }; - unsigned dyn_ssbo_idx; - unsigned ubo_idx; - unsigned dyn_ubo_idx; - }; - - /* Offset into the descriptor UBO where this binding starts */ - uint32_t desc_ubo_offset; - - /* Stride between descriptors in this binding in the UBO */ - uint16_t desc_ubo_stride; - - /* Shader stages affected by this set+binding */ - uint16_t shader_stages; - - struct panvk_sampler **immutable_samplers; -}; - -struct panvk_descriptor_set_layout { - struct vk_descriptor_set_layout vk; - VkDescriptorSetLayoutCreateFlags flags; - - /* Shader stages affected by this descriptor set */ - uint16_t shader_stages; - - unsigned num_samplers; - unsigned num_textures; - unsigned num_ubos; - unsigned num_dyn_ubos; - unsigned num_dyn_ssbos; - unsigned num_imgs; - - /* Size of the descriptor UBO */ - uint32_t desc_ubo_size; - - /* Index of the descriptor UBO */ - unsigned desc_ubo_index; - - /* Number of bindings in this descriptor set */ - uint32_t binding_count; - - /* Bindings in this descriptor set */ - struct panvk_descriptor_set_binding_layout bindings[0]; -}; - -VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_descriptor_set_layout, vk.base, - VkDescriptorSetLayout, - VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) - -static inline const struct panvk_descriptor_set_layout * -vk_to_panvk_descriptor_set_layout(const struct vk_descriptor_set_layout *layout) -{ - return container_of(layout, const struct panvk_descriptor_set_layout, vk); -} - -#endif diff --git a/src/panfrost/vulkan/bifrost/panvk_pipeline.h b/src/panfrost/vulkan/bifrost/panvk_pipeline.h index 062352fbf67..1aa5c726d84 100644 --- a/src/panfrost/vulkan/bifrost/panvk_pipeline.h +++ b/src/panfrost/vulkan/bifrost/panvk_pipeline.h @@ -36,7 +36,23 @@ struct panvk_pipeline_shader { } varyings; struct pan_shader_info info; - bool has_img_access; + + struct { + uint32_t used_set_mask; + + struct { + uint32_t map[MAX_DYNAMIC_UNIFORM_BUFFERS]; + uint32_t count; + } dyn_ubos; + struct { + uint32_t map[MAX_DYNAMIC_STORAGE_BUFFERS]; + uint32_t count; + } dyn_ssbos; + struct { + mali_ptr map; + uint32_t count[PANVK_BIFROST_DESC_TABLE_COUNT]; + } others; + } desc_info; }; enum panvk_pipeline_type { @@ -48,7 +64,7 @@ struct panvk_pipeline { struct vk_object_base base; enum panvk_pipeline_type type; - const struct panvk_pipeline_layout *layout; + const struct vk_pipeline_layout *layout; struct panvk_pool bin_pool; struct panvk_pool desc_pool; diff --git a/src/panfrost/vulkan/bifrost/panvk_pipeline_layout.h b/src/panfrost/vulkan/bifrost/panvk_pipeline_layout.h deleted file mode 100644 index bcb89feecba..00000000000 --- a/src/panfrost/vulkan/bifrost/panvk_pipeline_layout.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright © 2021 Collabora Ltd. - * SPDX-License-Identifier: MIT - */ - -#ifndef PANVK_PIPELINE_LAYOUT_H -#define PANVK_PIPELINE_LAYOUT_H - -#ifndef PAN_ARCH -#error "PAN_ARCH must be defined" -#endif - -#include - -#include "vk_pipeline_layout.h" - -#include "panvk_descriptor_set_layout.h" -#include "panvk_macros.h" - -#define MAX_SETS 4 -#define MAX_DYNAMIC_UNIFORM_BUFFERS 16 -#define MAX_DYNAMIC_STORAGE_BUFFERS 8 -#define MAX_DYNAMIC_BUFFERS \ - (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS) - -struct panvk_pipeline_layout { - struct vk_pipeline_layout vk; - - unsigned char sha1[20]; - - unsigned num_samplers; - unsigned num_textures; - unsigned num_ubos; - unsigned num_dyn_ubos; - unsigned num_dyn_ssbos; - uint32_t num_imgs; - - struct { - uint32_t size; - } push_constants; - - struct { - unsigned sampler_offset; - unsigned tex_offset; - unsigned ubo_offset; - unsigned dyn_ubo_offset; - unsigned dyn_ssbo_offset; - unsigned img_offset; - unsigned dyn_desc_ubo_offset; - } sets[MAX_SETS]; -}; - -VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_pipeline_layout, vk.base, VkPipelineLayout, - VK_OBJECT_TYPE_PIPELINE_LAYOUT) - -unsigned panvk_per_arch(pipeline_layout_ubo_start)( - const struct panvk_pipeline_layout *layout, unsigned set, bool is_dynamic); - -unsigned panvk_per_arch(pipeline_layout_ubo_index)( - const struct panvk_pipeline_layout *layout, unsigned set, unsigned binding, - unsigned array_index); - -unsigned panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)( - const struct panvk_pipeline_layout *layout); - -unsigned panvk_per_arch(pipeline_layout_dyn_ubos_offset)( - const struct panvk_pipeline_layout *layout); - -unsigned panvk_per_arch(pipeline_layout_total_ubo_count)( - const struct panvk_pipeline_layout *layout); - -#endif diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_cmd_desc_state.c b/src/panfrost/vulkan/bifrost/panvk_vX_cmd_desc_state.c index 4268cd75d91..2adab26296f 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_cmd_desc_state.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_cmd_desc_state.c @@ -15,7 +15,6 @@ #include "panvk_cmd_desc_state.h" #include "panvk_entrypoints.h" #include "panvk_pipeline.h" -#include "panvk_pipeline_layout.h" #include "pan_pool.h" @@ -25,246 +24,6 @@ #include "vk_command_buffer.h" #include "vk_command_pool.h" -void -panvk_per_arch(cmd_prepare_push_sets)(struct pan_pool *desc_pool_base, - struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline) -{ - const struct panvk_pipeline_layout *playout = pipeline->layout; - - for (unsigned i = 0; i < playout->vk.set_count; i++) { - const struct panvk_descriptor_set_layout *slayout = - vk_to_panvk_descriptor_set_layout(playout->vk.set_layouts[i]); - bool is_push_set = - slayout->flags & - VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR; - - if (desc_state->sets[i] || !is_push_set || !desc_state->push_sets[i]) - continue; - - struct panvk_descriptor_set *set = &desc_state->push_sets[i]->set; - - panvk_per_arch(push_descriptor_set_assign_layout)( - desc_state->push_sets[i], slayout); - if (slayout->desc_ubo_size) { - struct panfrost_ptr desc_ubo = - pan_pool_alloc_aligned(desc_pool_base, slayout->desc_ubo_size, 16); - struct mali_uniform_buffer_packed *ubos = set->ubos; - - memcpy(desc_ubo.cpu, set->desc_ubo.addr.host, slayout->desc_ubo_size); - set->desc_ubo.addr.dev = desc_ubo.gpu; - set->desc_ubo.addr.host = desc_ubo.cpu; - - pan_pack(&ubos[slayout->desc_ubo_index], UNIFORM_BUFFER, cfg) { - cfg.pointer = set->desc_ubo.addr.dev; - cfg.entries = DIV_ROUND_UP(slayout->desc_ubo_size, 16); - } - } - - desc_state->sets[i] = &desc_state->push_sets[i]->set; - } -} - -void -panvk_per_arch(cmd_unprepare_push_sets)( - struct panvk_descriptor_state *desc_state) -{ - for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { - if (desc_state->push_sets[i] && - &desc_state->push_sets[i]->set == desc_state->sets[i]) - desc_state->sets[i] = NULL; - } -} - -static void -panvk_cmd_prepare_dyn_ssbos(struct pan_pool *desc_pool_base, - struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline) -{ - if (!pipeline->layout->num_dyn_ssbos || desc_state->dyn_desc_ubo) - return; - - struct panfrost_ptr ssbo_descs = - pan_pool_alloc_aligned(desc_pool_base, sizeof(desc_state->dyn.ssbos), 16); - - memcpy(ssbo_descs.cpu, desc_state->dyn.ssbos, sizeof(desc_state->dyn.ssbos)); - - desc_state->dyn_desc_ubo = ssbo_descs.gpu; -} - -void -panvk_per_arch(cmd_prepare_ubos)(struct pan_pool *desc_pool_base, - struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline) -{ - unsigned ubo_count = - panvk_per_arch(pipeline_layout_total_ubo_count)(pipeline->layout); - - if (!ubo_count || desc_state->ubos) - return; - - panvk_cmd_prepare_dyn_ssbos(desc_pool_base, desc_state, pipeline); - - struct panfrost_ptr ubos = - pan_pool_alloc_desc_array(desc_pool_base, ubo_count, UNIFORM_BUFFER); - struct mali_uniform_buffer_packed *ubo_descs = ubos.cpu; - - for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) { - const struct panvk_descriptor_set_layout *set_layout = - vk_to_panvk_descriptor_set_layout(pipeline->layout->vk.set_layouts[s]); - const struct panvk_descriptor_set *set = desc_state->sets[s]; - - unsigned ubo_start = - panvk_per_arch(pipeline_layout_ubo_start)(pipeline->layout, s, false); - - if (!set) { - memset(&ubo_descs[ubo_start], 0, - set_layout->num_ubos * sizeof(*ubo_descs)); - } else { - memcpy(&ubo_descs[ubo_start], set->ubos, - set_layout->num_ubos * sizeof(*ubo_descs)); - } - } - - unsigned dyn_ubos_offset = - panvk_per_arch(pipeline_layout_dyn_ubos_offset)(pipeline->layout); - - memcpy(&ubo_descs[dyn_ubos_offset], desc_state->dyn.ubos, - pipeline->layout->num_dyn_ubos * sizeof(*ubo_descs)); - - if (pipeline->layout->num_dyn_ssbos) { - unsigned dyn_desc_ubo = - panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)(pipeline->layout); - - pan_pack(&ubo_descs[dyn_desc_ubo], UNIFORM_BUFFER, cfg) { - cfg.pointer = desc_state->dyn_desc_ubo; - cfg.entries = - pipeline->layout->num_dyn_ssbos * sizeof(struct panvk_ssbo_addr); - } - } - - desc_state->ubos = ubos.gpu; -} - -void -panvk_per_arch(cmd_prepare_textures)(struct pan_pool *desc_pool_base, - struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline) -{ - unsigned num_textures = pipeline->layout->num_textures; - - if (!num_textures || desc_state->textures) - return; - - struct panfrost_ptr textures = pan_pool_alloc_aligned( - desc_pool_base, num_textures * pan_size(TEXTURE), pan_size(TEXTURE)); - - void *texture = textures.cpu; - - for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { - if (!desc_state->sets[i]) - continue; - - memcpy(texture, desc_state->sets[i]->textures, - desc_state->sets[i]->layout->num_textures * pan_size(TEXTURE)); - - texture += desc_state->sets[i]->layout->num_textures * pan_size(TEXTURE); - } - - desc_state->textures = textures.gpu; -} - -void -panvk_per_arch(cmd_prepare_samplers)(struct pan_pool *desc_pool_base, - struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline) -{ - unsigned num_samplers = pipeline->layout->num_samplers; - - if (!num_samplers || desc_state->samplers) - return; - - struct panfrost_ptr samplers = - pan_pool_alloc_desc_array(desc_pool_base, num_samplers, SAMPLER); - - void *sampler = samplers.cpu; - - /* Prepare the dummy sampler */ - pan_pack(sampler, SAMPLER, cfg) { - cfg.seamless_cube_map = false; - cfg.magnify_nearest = true; - cfg.minify_nearest = true; - cfg.normalized_coordinates = false; - } - - sampler += pan_size(SAMPLER); - - for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { - if (!desc_state->sets[i]) - continue; - - memcpy(sampler, desc_state->sets[i]->samplers, - desc_state->sets[i]->layout->num_samplers * pan_size(SAMPLER)); - - sampler += desc_state->sets[i]->layout->num_samplers * pan_size(SAMPLER); - } - - desc_state->samplers = samplers.gpu; -} - -void -panvk_per_arch(fill_img_attribs)(struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline, - void *attrib_bufs, void *attribs, - unsigned first_buf) -{ - for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) { - const struct panvk_descriptor_set *set = desc_state->sets[s]; - - if (!set) - continue; - - const struct panvk_descriptor_set_layout *layout = set->layout; - unsigned img_idx = pipeline->layout->sets[s].img_offset; - unsigned offset = img_idx * pan_size(ATTRIBUTE_BUFFER) * 2; - unsigned size = layout->num_imgs * pan_size(ATTRIBUTE_BUFFER) * 2; - - memcpy(attrib_bufs + offset, desc_state->sets[s]->img_attrib_bufs, size); - - offset = img_idx * pan_size(ATTRIBUTE); - for (unsigned i = 0; i < layout->num_imgs; i++) { - pan_pack(attribs + offset, ATTRIBUTE, cfg) { - cfg.buffer_index = first_buf + (img_idx + i) * 2; - cfg.format = desc_state->sets[s]->img_fmts[i]; - cfg.offset_enable = false; - } - offset += pan_size(ATTRIBUTE); - } - } -} - -void -panvk_per_arch(prepare_img_attribs)(struct pan_pool *desc_pool_base, - struct panvk_descriptor_state *desc_state, - const struct panvk_pipeline *pipeline) -{ - if (desc_state->img.attribs) - return; - - unsigned attrib_count = pipeline->layout->num_imgs; - unsigned attrib_buf_count = (pipeline->layout->num_imgs * 2); - struct panfrost_ptr bufs = pan_pool_alloc_desc_array( - desc_pool_base, attrib_buf_count + 1, ATTRIBUTE_BUFFER); - struct panfrost_ptr attribs = - pan_pool_alloc_desc_array(desc_pool_base, attrib_count, ATTRIBUTE); - - panvk_per_arch(fill_img_attribs)(desc_state, pipeline, bufs.cpu, attribs.cpu, - 0); - - desc_state->img.attrib_bufs = bufs.gpu; - desc_state->img.attribs = attribs.gpu; -} - void panvk_per_arch(cmd_desc_state_reset)( struct panvk_descriptor_state *gfx_desc_state, @@ -288,62 +47,6 @@ panvk_per_arch(cmd_desc_state_cleanup)( } } -static void -panvk_emit_dyn_ubo(struct panvk_descriptor_state *desc_state, - const struct panvk_descriptor_set *desc_set, - unsigned binding, unsigned array_idx, uint32_t dyn_offset, - unsigned dyn_ubo_slot) -{ - struct mali_uniform_buffer_packed *ubo = &desc_state->dyn.ubos[dyn_ubo_slot]; - const struct panvk_descriptor_set_layout *slayout = desc_set->layout; - VkDescriptorType type = slayout->bindings[binding].type; - - assert(type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC); - assert(dyn_ubo_slot < ARRAY_SIZE(desc_state->dyn.ubos)); - - const unsigned dyn_ubo_idx = slayout->bindings[binding].dyn_ubo_idx; - const struct panvk_buffer_desc *bdesc = - &desc_set->dyn_ubos[dyn_ubo_idx + array_idx]; - mali_ptr address = - panvk_buffer_gpu_ptr(bdesc->buffer, bdesc->offset + dyn_offset); - size_t size = panvk_buffer_range(bdesc->buffer, bdesc->offset + dyn_offset, - bdesc->size); - - if (size) { - pan_pack(ubo, UNIFORM_BUFFER, cfg) { - cfg.pointer = address; - cfg.entries = DIV_ROUND_UP(size, 16); - } - } else { - memset(ubo, 0, sizeof(*ubo)); - } -} - -static void -panvk_emit_dyn_ssbo(struct panvk_descriptor_state *desc_state, - const struct panvk_descriptor_set *desc_set, - unsigned binding, unsigned array_idx, uint32_t dyn_offset, - unsigned dyn_ssbo_slot) -{ - struct panvk_ssbo_addr *ssbo = &desc_state->dyn.ssbos[dyn_ssbo_slot]; - const struct panvk_descriptor_set_layout *slayout = desc_set->layout; - VkDescriptorType type = slayout->bindings[binding].type; - - assert(type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC); - assert(dyn_ssbo_slot < ARRAY_SIZE(desc_state->dyn.ssbos)); - - const unsigned dyn_ssbo_idx = slayout->bindings[binding].dyn_ssbo_idx; - const struct panvk_buffer_desc *bdesc = - &desc_set->dyn_ssbos[dyn_ssbo_idx + array_idx]; - - *ssbo = (struct panvk_ssbo_addr){ - .base_addr = - panvk_buffer_gpu_ptr(bdesc->buffer, bdesc->offset + dyn_offset), - .size = panvk_buffer_range(bdesc->buffer, bdesc->offset + dyn_offset, - bdesc->size), - }; -} - void panvk_per_arch(cmd_desc_state_bind_sets)( struct panvk_descriptor_state *desc_state, VkPipelineLayout layout, @@ -351,77 +54,193 @@ panvk_per_arch(cmd_desc_state_bind_sets)( const VkDescriptorSet *desc_sets, uint32_t dyn_offset_count, const uint32_t *dyn_offsets) { - VK_FROM_HANDLE(panvk_pipeline_layout, playout, layout); - unsigned dynoffset_idx = 0; for (unsigned i = 0; i < desc_set_count; ++i) { - unsigned idx = i + first_set; + unsigned set_idx = i + first_set; VK_FROM_HANDLE(panvk_descriptor_set, set, desc_sets[i]); - desc_state->sets[idx] = set; + /* Invalidate the push set. */ + if (desc_state->sets[set_idx] && + desc_state->sets[set_idx] == desc_state->push_sets[set_idx]) + desc_state->push_sets[set_idx]->descs.dev = 0; - if (set->layout->num_dyn_ssbos || set->layout->num_dyn_ubos) { - unsigned dyn_ubo_slot = playout->sets[idx].dyn_ubo_offset; - unsigned dyn_ssbo_slot = playout->sets[idx].dyn_ssbo_offset; + desc_state->sets[set_idx] = set; - for (unsigned b = 0; b < set->layout->binding_count; b++) { - for (unsigned e = 0; e < set->layout->bindings[b].array_size; e++) { - VkDescriptorType type = set->layout->bindings[b].type; + if (!set || !set->layout->dyn_buf_count) + continue; - if (type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { - panvk_emit_dyn_ubo(desc_state, set, b, e, - dyn_offsets[dynoffset_idx++], - dyn_ubo_slot++); - } else if (type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { - panvk_emit_dyn_ssbo(desc_state, set, b, e, - dyn_offsets[dynoffset_idx++], - dyn_ssbo_slot++); - } - } + for (unsigned b = 0; b < set->layout->binding_count; b++) { + VkDescriptorType type = set->layout->bindings[b].type; + + if (type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC && + type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + continue; + + unsigned dyn_buf_idx = set->layout->bindings[b].desc_idx; + for (unsigned e = 0; e < set->layout->bindings[b].desc_count; e++) { + desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx++] = + dyn_offsets[dynoffset_idx++]; } } } - /* Unconditionally reset all previously emitted descriptors tables. - * TODO: we could be smarter by checking which part of the pipeline layout - * are compatible with the previouly bound descriptor sets. - */ - desc_state->ubos = 0; - desc_state->textures = 0; - desc_state->samplers = 0; - desc_state->dyn_desc_ubo = 0; - desc_state->img.attrib_bufs = 0; - desc_state->img.attribs = 0; - assert(dynoffset_idx == dyn_offset_count); } -struct panvk_push_descriptor_set * +struct panvk_descriptor_set * panvk_per_arch(cmd_push_descriptors)(struct vk_command_buffer *cmdbuf, struct panvk_descriptor_state *desc_state, - uint32_t set) + uint32_t set_idx) { - assert(set < MAX_SETS); - if (unlikely(desc_state->push_sets[set] == NULL)) { - desc_state->push_sets[set] = - vk_zalloc(&cmdbuf->pool->alloc, sizeof(*desc_state->push_sets[0]), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (unlikely(desc_state->push_sets[set] == NULL)) { + assert(set_idx < MAX_SETS); + + if (unlikely(desc_state->push_sets[set_idx] == NULL)) { + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct panvk_descriptor_set, set, 1); + VK_MULTIALLOC_DECL(&ma, struct panvk_opaque_desc, descs, MAX_PUSH_DESCS); + + if (unlikely(!vk_multialloc_zalloc(&ma, &cmdbuf->pool->alloc, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))) { vk_command_buffer_set_error(cmdbuf, VK_ERROR_OUT_OF_HOST_MEMORY); return NULL; } + + desc_state->push_sets[set_idx] = set; + set->descs.host = descs; } - /* Pushing descriptors replaces whatever sets are bound */ - desc_state->sets[set] = NULL; + struct panvk_descriptor_set *set = desc_state->push_sets[set_idx]; - /* Reset all descs to force emission of new tables on the next draw/dispatch. - * TODO: Be smarter and only reset those when required. - */ - desc_state->ubos = 0; - desc_state->textures = 0; - desc_state->samplers = 0; - desc_state->img.attrib_bufs = 0; - desc_state->img.attribs = 0; - return desc_state->push_sets[set]; + /* Pushing descriptors replaces whatever sets are bound */ + desc_state->sets[set_idx] = set; + return set; +} + +void +panvk_per_arch(cmd_prepare_dyn_ssbos)( + struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state, + const struct panvk_pipeline_shader *shader, + struct panvk_shader_desc_state *shader_desc_state) +{ + if (!shader->desc_info.dyn_ssbos.count || shader_desc_state->dyn_ssbos) + return; + + struct panfrost_ptr ptr = pan_pool_alloc_aligned( + desc_pool, shader->desc_info.dyn_ssbos.count * PANVK_DESCRIPTOR_SIZE, + PANVK_DESCRIPTOR_SIZE); + + struct panvk_ssbo_addr *ssbos = ptr.cpu; + for (uint32_t i = 0; i < shader->desc_info.dyn_ssbos.count; i++) { + uint32_t src_handle = shader->desc_info.dyn_ssbos.map[i]; + uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle); + uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle); + const struct panvk_descriptor_set *set = desc_state->sets[set_idx]; + const uint32_t dyn_buf_offset = + desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx]; + + assert(set_idx < MAX_SETS); + assert(set); + + ssbos[i] = (struct panvk_ssbo_addr){ + .base_addr = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset, + .size = set->dyn_bufs[dyn_buf_idx].size, + }; + } + + shader_desc_state->dyn_ssbos = ptr.gpu; +} + +static void +panvk_cmd_fill_dyn_ubos(const struct panvk_descriptor_state *desc_state, + const struct panvk_pipeline_shader *shader, + struct mali_uniform_buffer_packed *ubos, + uint32_t ubo_count) +{ + for (uint32_t i = 0; i < shader->desc_info.dyn_ubos.count; i++) { + uint32_t src_handle = shader->desc_info.dyn_ubos.map[i]; + uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle); + uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle); + uint32_t ubo_idx = + i + shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_UBO]; + const struct panvk_descriptor_set *set = desc_state->sets[set_idx]; + const uint32_t dyn_buf_offset = + desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx]; + + assert(set_idx < MAX_SETS); + assert(set); + assert(ubo_idx < ubo_count); + + pan_pack(&ubos[ubo_idx], UNIFORM_BUFFER, cfg) { + cfg.pointer = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset; + cfg.entries = DIV_ROUND_UP(set->dyn_bufs[dyn_buf_idx].size, 16); + } + } +} + +void +panvk_per_arch(cmd_prepare_shader_desc_tables)( + struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state, + const struct panvk_pipeline_shader *shader, + struct panvk_shader_desc_state *shader_desc_state) +{ + for (uint32_t i = 0; i < ARRAY_SIZE(shader->desc_info.others.count); i++) { + uint32_t desc_count = + shader->desc_info.others.count[i] + + (i == PANVK_BIFROST_DESC_TABLE_UBO ? shader->desc_info.dyn_ubos.count + : 0); + uint32_t desc_size = + i == PANVK_BIFROST_DESC_TABLE_UBO ? 8 : PANVK_DESCRIPTOR_SIZE; + + if (!desc_count || shader_desc_state->tables[i]) + continue; + + struct panfrost_ptr ptr = pan_pool_alloc_aligned( + desc_pool, desc_count * desc_size, PANVK_DESCRIPTOR_SIZE); + + shader_desc_state->tables[i] = ptr.gpu; + + if (i == PANVK_BIFROST_DESC_TABLE_UBO) + panvk_cmd_fill_dyn_ubos(desc_state, shader, ptr.cpu, desc_count); + + if (i == PANVK_BIFROST_DESC_TABLE_IMG) { + assert(!shader_desc_state->img_attrib_table); + + ptr = pan_pool_alloc_desc_array(desc_pool, desc_count, ATTRIBUTE); + shader_desc_state->img_attrib_table = ptr.gpu; + } + } + + uint32_t tex_count = + shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_TEXTURE]; + uint32_t sampler_count = + shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_SAMPLER]; + + if (tex_count && !sampler_count) { + struct panfrost_ptr sampler = pan_pool_alloc_desc(desc_pool, SAMPLER); + + /* Emit a dummy sampler if we have to. */ + pan_pack(sampler.cpu, SAMPLER, _) { + } + + shader_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER] = sampler.gpu; + } +} + +void +panvk_per_arch(cmd_prepare_push_descs)(struct pan_pool *desc_pool, + struct panvk_descriptor_state *desc_state, + const struct panvk_pipeline *pipeline) +{ + const struct vk_pipeline_layout *playout = pipeline->layout; + + for (unsigned i = 0; i < playout->set_count; i++) { + struct panvk_descriptor_set *push_set = desc_state->push_sets[i]; + + if (!push_set || desc_state->sets[i] != push_set || push_set->descs.dev) + continue; + + push_set->descs.dev = pan_pool_upload_aligned( + desc_pool, push_set->descs.host, + push_set->desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE); + } } diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_descriptor_set.c b/src/panfrost/vulkan/bifrost/panvk_vX_descriptor_set.c deleted file mode 100644 index 0a784b70834..00000000000 --- a/src/panfrost/vulkan/bifrost/panvk_vX_descriptor_set.c +++ /dev/null @@ -1,995 +0,0 @@ -/* - * Copyright © 2021 Collabora Ltd. - * - * Derived from: - * Copyright © 2016 Red Hat. - * Copyright © 2016 Bas Nieuwenhuizen - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "genxml/gen_macros.h" - -#include "panvk_buffer_view.h" -#include "panvk_device.h" -#include "panvk_entrypoints.h" -#include "panvk_image.h" -#include "panvk_image_view.h" -#include "panvk_priv_bo.h" - -#include -#include -#include -#include -#include - -#include "util/mesa-sha1.h" -#include "vk_alloc.h" -#include "vk_descriptor_update_template.h" -#include "vk_descriptors.h" -#include "vk_format.h" -#include "vk_log.h" -#include "vk_util.h" - -#include "panvk_buffer.h" -#include "panvk_descriptor_set.h" -#include "panvk_descriptor_set_layout.h" -#include "panvk_sampler.h" - -VKAPI_ATTR VkResult VKAPI_CALL -panvk_per_arch(CreateDescriptorPool)( - VkDevice _device, const VkDescriptorPoolCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkDescriptorPool *pDescriptorPool) -{ - VK_FROM_HANDLE(panvk_device, device, _device); - struct panvk_descriptor_pool *pool; - - pool = vk_object_zalloc(&device->vk, pAllocator, - sizeof(struct panvk_descriptor_pool), - VK_OBJECT_TYPE_DESCRIPTOR_POOL); - if (!pool) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - *pDescriptorPool = panvk_descriptor_pool_to_handle(pool); - return VK_SUCCESS; -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(DestroyDescriptorPool)(VkDevice _device, VkDescriptorPool _pool, - const VkAllocationCallbacks *pAllocator) -{ - VK_FROM_HANDLE(panvk_device, device, _device); - VK_FROM_HANDLE(panvk_descriptor_pool, pool, _pool); - - if (pool) - vk_object_free(&device->vk, pAllocator, pool); -} - -VKAPI_ATTR VkResult VKAPI_CALL -panvk_per_arch(ResetDescriptorPool)(VkDevice _device, VkDescriptorPool _pool, - VkDescriptorPoolResetFlags flags) -{ - return VK_SUCCESS; -} - -static void -panvk_descriptor_set_destroy(struct panvk_device *device, - struct panvk_descriptor_pool *pool, - struct panvk_descriptor_set *set) -{ - if (set->desc_ubo.bo) - panvk_priv_bo_destroy(set->desc_ubo.bo, NULL); - - vk_object_free(&device->vk, NULL, set); -} - -VKAPI_ATTR VkResult VKAPI_CALL -panvk_per_arch(FreeDescriptorSets)(VkDevice _device, - VkDescriptorPool descriptorPool, - uint32_t count, - const VkDescriptorSet *pDescriptorSets) -{ - VK_FROM_HANDLE(panvk_device, device, _device); - VK_FROM_HANDLE(panvk_descriptor_pool, pool, descriptorPool); - - for (unsigned i = 0; i < count; i++) { - VK_FROM_HANDLE(panvk_descriptor_set, set, pDescriptorSets[i]); - - if (set) - panvk_descriptor_set_destroy(device, pool, set); - } - return VK_SUCCESS; -} - -static void -panvk_fill_bview_desc(struct panvk_bview_desc *desc, - struct panvk_buffer_view *view) -{ - desc->elems = view->vk.elements; -} - -static void -panvk_fill_image_desc(struct panvk_image_desc *desc, - struct panvk_image_view *view) -{ - desc->width = view->vk.extent.width - 1; - desc->height = view->vk.extent.height - 1; - desc->depth = view->vk.extent.depth - 1; - desc->levels = view->vk.level_count; - desc->samples = view->vk.image->samples; - - /* Stick array layer count after the last valid size component */ - if (view->vk.image->image_type == VK_IMAGE_TYPE_1D) - desc->height = view->vk.layer_count - 1; - else if (view->vk.image->image_type == VK_IMAGE_TYPE_2D) - desc->depth = view->vk.layer_count - 1; -} - -static void panvk_write_sampler_desc_raw(struct panvk_descriptor_set *set, - uint32_t binding, uint32_t elem, - struct panvk_sampler *sampler); - -static struct panvk_descriptor_set * -panvk_descriptor_set_alloc(const struct panvk_descriptor_set_layout *layout, - const VkAllocationCallbacks *alloc, - VkSystemAllocationScope scope) -{ - VK_MULTIALLOC(ma); - VK_MULTIALLOC_DECL(&ma, struct panvk_descriptor_set, set, 1); - VK_MULTIALLOC_DECL(&ma, struct panvk_buffer_desc, dyn_ssbos, - layout->num_dyn_ssbos); - VK_MULTIALLOC_DECL(&ma, struct mali_uniform_buffer_packed, ubos, - layout->num_ubos); - VK_MULTIALLOC_DECL(&ma, struct panvk_buffer_desc, dyn_ubos, - layout->num_dyn_ubos); - VK_MULTIALLOC_DECL(&ma, struct mali_sampler_packed, samplers, - layout->num_samplers); - VK_MULTIALLOC_DECL(&ma, struct mali_texture_packed, textures, - layout->num_textures); - VK_MULTIALLOC_DECL(&ma, struct mali_attribute_buffer_packed, img_attrib_bufs, - layout->num_imgs * 2); - VK_MULTIALLOC_DECL(&ma, uint32_t, img_fmts, layout->num_imgs); - - if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) - return NULL; - - set->layout = layout; - - if (layout->num_dyn_ssbos) - set->dyn_ssbos = dyn_ssbos; - - if (layout->num_ubos) - set->ubos = ubos; - - if (layout->num_dyn_ubos) - set->dyn_ubos = dyn_ubos; - - if (layout->num_samplers) - set->samplers = samplers; - - if (layout->num_textures) - set->textures = textures; - - if (layout->num_imgs) { - set->img_attrib_bufs = img_attrib_bufs; - set->img_fmts = img_fmts; - } - - return set; -} - -static VkResult -panvk_per_arch(descriptor_set_create)( - struct panvk_device *device, struct panvk_descriptor_pool *pool, - const struct panvk_descriptor_set_layout *layout, - struct panvk_descriptor_set **out_set) -{ - /* TODO: Allocate from the pool! */ - struct panvk_descriptor_set *set = panvk_descriptor_set_alloc( - layout, &device->vk.alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &set->base, VK_OBJECT_TYPE_DESCRIPTOR_SET); - - if (layout->desc_ubo_size) { - set->desc_ubo.bo = - panvk_priv_bo_create(device, layout->desc_ubo_size, 0, NULL, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set->desc_ubo.bo) - goto err_free_set; - - struct mali_uniform_buffer_packed *ubos = set->ubos; - - set->desc_ubo.addr.dev = set->desc_ubo.bo->addr.dev; - set->desc_ubo.addr.host = set->desc_ubo.bo->addr.host; - pan_pack(&ubos[layout->desc_ubo_index], UNIFORM_BUFFER, cfg) { - cfg.pointer = set->desc_ubo.addr.dev; - cfg.entries = DIV_ROUND_UP(layout->desc_ubo_size, 16); - } - } - - for (unsigned i = 0; i < layout->binding_count; i++) { - if (!layout->bindings[i].immutable_samplers) - continue; - - for (unsigned j = 0; j < layout->bindings[i].array_size; j++) { - struct panvk_sampler *sampler = - layout->bindings[i].immutable_samplers[j]; - panvk_write_sampler_desc_raw(set, i, j, sampler); - } - } - - *out_set = set; - return VK_SUCCESS; - -err_free_set: - if (set->desc_ubo.bo) - panvk_priv_bo_destroy(set->desc_ubo.bo, NULL); - vk_object_free(&device->vk, NULL, set); - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); -} - -VKAPI_ATTR VkResult VKAPI_CALL -panvk_per_arch(AllocateDescriptorSets)( - VkDevice _device, const VkDescriptorSetAllocateInfo *pAllocateInfo, - VkDescriptorSet *pDescriptorSets) -{ - VK_FROM_HANDLE(panvk_device, device, _device); - VK_FROM_HANDLE(panvk_descriptor_pool, pool, pAllocateInfo->descriptorPool); - VkResult result; - unsigned i; - - for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { - VK_FROM_HANDLE(panvk_descriptor_set_layout, layout, - pAllocateInfo->pSetLayouts[i]); - struct panvk_descriptor_set *set = NULL; - - result = - panvk_per_arch(descriptor_set_create)(device, pool, layout, &set); - if (result != VK_SUCCESS) - goto err_free_sets; - - pDescriptorSets[i] = panvk_descriptor_set_to_handle(set); - } - - return VK_SUCCESS; - -err_free_sets: - panvk_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, i, - pDescriptorSets); - for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) - pDescriptorSets[i] = VK_NULL_HANDLE; - - return result; -} - -static void * -panvk_desc_ubo_data(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem) -{ - const struct panvk_descriptor_set_binding_layout *binding_layout = - &set->layout->bindings[binding]; - - /* Dynamic SSBO info are stored in a separate UBO allocated from the - * cmd_buffer descriptor pool. - */ - assert(binding_layout->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC); - - return (char *)set->desc_ubo.addr.host + binding_layout->desc_ubo_offset + - elem * binding_layout->desc_ubo_stride; -} - -static struct mali_sampler_packed * -panvk_sampler_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem) -{ - const struct panvk_descriptor_set_binding_layout *binding_layout = - &set->layout->bindings[binding]; - - uint32_t sampler_idx = binding_layout->sampler_idx + elem; - - return &((struct mali_sampler_packed *)set->samplers)[sampler_idx]; -} - -static void -panvk_write_sampler_desc_raw(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem, struct panvk_sampler *sampler) -{ - memcpy(panvk_sampler_desc(set, binding, elem), &sampler->desc, - sizeof(sampler->desc)); -} - -static void -panvk_write_sampler_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem, - const VkDescriptorImageInfo *const pImageInfo) -{ - const struct panvk_descriptor_set_binding_layout *binding_layout = - &set->layout->bindings[binding]; - bool push_set = set->layout->flags & - VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR; - - if (binding_layout->immutable_samplers && !push_set) - return; - - struct panvk_sampler *sampler = - binding_layout->immutable_samplers - ? binding_layout->immutable_samplers[elem] - : panvk_sampler_from_handle(pImageInfo->sampler); - - panvk_write_sampler_desc_raw(set, binding, elem, sampler); -} - -static void -panvk_copy_sampler_desc(struct panvk_descriptor_set *dst_set, - uint32_t dst_binding, uint32_t dst_elem, - struct panvk_descriptor_set *src_set, - uint32_t src_binding, uint32_t src_elem) -{ - const struct panvk_descriptor_set_binding_layout *dst_binding_layout = - &dst_set->layout->bindings[dst_binding]; - - if (dst_binding_layout->immutable_samplers) - return; - - memcpy(panvk_sampler_desc(dst_set, dst_binding, dst_elem), - panvk_sampler_desc(src_set, src_binding, src_elem), - sizeof(struct mali_sampler_packed)); -} - -static struct mali_texture_packed * -panvk_tex_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem) -{ - const struct panvk_descriptor_set_binding_layout *binding_layout = - &set->layout->bindings[binding]; - - unsigned tex_idx = binding_layout->tex_idx + elem; - - return &((struct mali_texture_packed *)set->textures)[tex_idx]; -} - -static void -panvk_write_tex_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem, - const VkDescriptorImageInfo *const pImageInfo) -{ - VK_FROM_HANDLE(panvk_image_view, view, pImageInfo->imageView); - - memcpy(panvk_tex_desc(set, binding, elem), view->descs.tex.opaque, - pan_size(TEXTURE)); - - panvk_fill_image_desc(panvk_desc_ubo_data(set, binding, elem), view); -} - -static void -panvk_copy_tex_desc(struct panvk_descriptor_set *dst_set, uint32_t dst_binding, - uint32_t dst_elem, struct panvk_descriptor_set *src_set, - uint32_t src_binding, uint32_t src_elem) -{ - *panvk_tex_desc(dst_set, dst_binding, dst_elem) = - *panvk_tex_desc(src_set, src_binding, src_elem); - - /* Descriptor UBO data gets copied automatically */ -} - -static void -panvk_write_tex_buf_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem, const VkBufferView bufferView) -{ - VK_FROM_HANDLE(panvk_buffer_view, view, bufferView); - - memcpy(panvk_tex_desc(set, binding, elem), view->descs.tex.opaque, - pan_size(TEXTURE)); - - panvk_fill_bview_desc(panvk_desc_ubo_data(set, binding, elem), view); -} - -static uint32_t -panvk_img_idx(struct panvk_descriptor_set *set, uint32_t binding, uint32_t elem) -{ - const struct panvk_descriptor_set_binding_layout *binding_layout = - &set->layout->bindings[binding]; - - return binding_layout->img_idx + elem; -} - -static void -panvk_write_img_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem, const VkDescriptorImageInfo *pImageInfo) -{ - VK_FROM_HANDLE(panvk_image_view, view, pImageInfo->imageView); - - unsigned img_idx = panvk_img_idx(set, binding, elem); - void *attrib_buf = (uint8_t *)set->img_attrib_bufs + - (pan_size(ATTRIBUTE_BUFFER) * 2 * img_idx); - - set->img_fmts[img_idx] = - GENX(panfrost_format_from_pipe_format)(view->pview.format)->hw; - memcpy(attrib_buf, view->descs.img_attrib_buf, - pan_size(ATTRIBUTE_BUFFER) * 2); - - panvk_fill_image_desc(panvk_desc_ubo_data(set, binding, elem), view); -} - -static void -panvk_copy_img_desc(struct panvk_descriptor_set *dst_set, uint32_t dst_binding, - uint32_t dst_elem, struct panvk_descriptor_set *src_set, - uint32_t src_binding, uint32_t src_elem) -{ - unsigned dst_img_idx = panvk_img_idx(dst_set, dst_binding, dst_elem); - unsigned src_img_idx = panvk_img_idx(src_set, src_binding, src_elem); - - void *dst_attrib_buf = (uint8_t *)dst_set->img_attrib_bufs + - (pan_size(ATTRIBUTE_BUFFER) * 2 * dst_img_idx); - void *src_attrib_buf = (uint8_t *)src_set->img_attrib_bufs + - (pan_size(ATTRIBUTE_BUFFER) * 2 * src_img_idx); - - dst_set->img_fmts[dst_img_idx] = src_set->img_fmts[src_img_idx]; - memcpy(dst_attrib_buf, src_attrib_buf, pan_size(ATTRIBUTE_BUFFER) * 2); - - /* Descriptor UBO data gets copied automatically */ -} - -static void -panvk_write_img_buf_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem, const VkBufferView bufferView) -{ - VK_FROM_HANDLE(panvk_buffer_view, view, bufferView); - - unsigned img_idx = panvk_img_idx(set, binding, elem); - void *attrib_buf = (uint8_t *)set->img_attrib_bufs + - (pan_size(ATTRIBUTE_BUFFER) * 2 * img_idx); - enum pipe_format pfmt = vk_format_to_pipe_format(view->vk.format); - - set->img_fmts[img_idx] = GENX(panfrost_format_from_pipe_format)(pfmt)->hw; - memcpy(attrib_buf, view->descs.img_attrib_buf, - pan_size(ATTRIBUTE_BUFFER) * 2); - - panvk_fill_bview_desc(panvk_desc_ubo_data(set, binding, elem), view); -} - -static struct mali_uniform_buffer_packed * -panvk_ubo_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem) -{ - const struct panvk_descriptor_set_binding_layout *binding_layout = - &set->layout->bindings[binding]; - - unsigned ubo_idx = binding_layout->ubo_idx + elem; - - return &((struct mali_uniform_buffer_packed *)set->ubos)[ubo_idx]; -} - -static void -panvk_write_ubo_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem, const VkDescriptorBufferInfo *pBufferInfo) -{ - VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer); - - mali_ptr ptr = panvk_buffer_gpu_ptr(buffer, pBufferInfo->offset); - size_t size = - panvk_buffer_range(buffer, pBufferInfo->offset, pBufferInfo->range); - - pan_pack(panvk_ubo_desc(set, binding, elem), UNIFORM_BUFFER, cfg) { - cfg.pointer = ptr; - cfg.entries = DIV_ROUND_UP(size, 16); - } -} - -static void -panvk_copy_ubo_desc(struct panvk_descriptor_set *dst_set, uint32_t dst_binding, - uint32_t dst_elem, struct panvk_descriptor_set *src_set, - uint32_t src_binding, uint32_t src_elem) -{ - *panvk_ubo_desc(dst_set, dst_binding, dst_elem) = - *panvk_ubo_desc(src_set, src_binding, src_elem); -} - -static struct panvk_buffer_desc * -panvk_dyn_ubo_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem) -{ - const struct panvk_descriptor_set_binding_layout *binding_layout = - &set->layout->bindings[binding]; - - return &set->dyn_ubos[binding_layout->dyn_ubo_idx + elem]; -} - -static void -panvk_write_dyn_ubo_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem, - const VkDescriptorBufferInfo *pBufferInfo) -{ - VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer); - - *panvk_dyn_ubo_desc(set, binding, elem) = (struct panvk_buffer_desc){ - .buffer = buffer, - .offset = pBufferInfo->offset, - .size = pBufferInfo->range, - }; -} - -static void -panvk_copy_dyn_ubo_desc(struct panvk_descriptor_set *dst_set, - uint32_t dst_binding, uint32_t dst_elem, - struct panvk_descriptor_set *src_set, - uint32_t src_binding, uint32_t src_elem) -{ - *panvk_dyn_ubo_desc(dst_set, dst_binding, dst_elem) = - *panvk_dyn_ubo_desc(src_set, src_binding, src_elem); -} - -static void -panvk_write_ssbo_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem, const VkDescriptorBufferInfo *pBufferInfo) -{ - VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer); - - struct panvk_ssbo_addr *desc = panvk_desc_ubo_data(set, binding, elem); - *desc = (struct panvk_ssbo_addr){ - .base_addr = panvk_buffer_gpu_ptr(buffer, pBufferInfo->offset), - .size = - panvk_buffer_range(buffer, pBufferInfo->offset, pBufferInfo->range), - }; -} - -static void -panvk_copy_ssbo_desc(struct panvk_descriptor_set *dst_set, uint32_t dst_binding, - uint32_t dst_elem, struct panvk_descriptor_set *src_set, - uint32_t src_binding, uint32_t src_elem) -{ - /* Descriptor UBO data gets copied automatically */ -} - -static struct panvk_buffer_desc * -panvk_dyn_ssbo_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem) -{ - const struct panvk_descriptor_set_binding_layout *binding_layout = - &set->layout->bindings[binding]; - - return &set->dyn_ssbos[binding_layout->dyn_ssbo_idx + elem]; -} - -static void -panvk_write_dyn_ssbo_desc(struct panvk_descriptor_set *set, uint32_t binding, - uint32_t elem, - const VkDescriptorBufferInfo *pBufferInfo) -{ - VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer); - - *panvk_dyn_ssbo_desc(set, binding, elem) = (struct panvk_buffer_desc){ - .buffer = buffer, - .offset = pBufferInfo->offset, - .size = pBufferInfo->range, - }; -} - -static void -panvk_copy_dyn_ssbo_desc(struct panvk_descriptor_set *dst_set, - uint32_t dst_binding, uint32_t dst_elem, - struct panvk_descriptor_set *src_set, - uint32_t src_binding, uint32_t src_elem) -{ - *panvk_dyn_ssbo_desc(dst_set, dst_binding, dst_elem) = - *panvk_dyn_ssbo_desc(src_set, src_binding, src_elem); -} - -static void -panvk_descriptor_set_write(struct panvk_descriptor_set *set, - const VkWriteDescriptorSet *write) -{ - switch (write->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - panvk_write_sampler_desc(set, write->dstBinding, - write->dstArrayElement + j, - &write->pImageInfo[j]); - } - break; - - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - panvk_write_sampler_desc(set, write->dstBinding, - write->dstArrayElement + j, - &write->pImageInfo[j]); - panvk_write_tex_desc(set, write->dstBinding, - write->dstArrayElement + j, - &write->pImageInfo[j]); - } - break; - - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - panvk_write_tex_desc(set, write->dstBinding, - write->dstArrayElement + j, - &write->pImageInfo[j]); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - panvk_write_img_desc(set, write->dstBinding, - write->dstArrayElement + j, - &write->pImageInfo[j]); - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - panvk_write_tex_buf_desc(set, write->dstBinding, - write->dstArrayElement + j, - write->pTexelBufferView[j]); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - panvk_write_img_buf_desc(set, write->dstBinding, - write->dstArrayElement + j, - write->pTexelBufferView[j]); - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - panvk_write_ubo_desc(set, write->dstBinding, - write->dstArrayElement + j, - &write->pBufferInfo[j]); - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - panvk_write_dyn_ubo_desc(set, write->dstBinding, - write->dstArrayElement + j, - &write->pBufferInfo[j]); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - panvk_write_ssbo_desc(set, write->dstBinding, - write->dstArrayElement + j, - &write->pBufferInfo[j]); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - panvk_write_dyn_ssbo_desc(set, write->dstBinding, - write->dstArrayElement + j, - &write->pBufferInfo[j]); - } - break; - - default: - unreachable("Unsupported descriptor type"); - } -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(UpdateDescriptorSets)( - VkDevice _device, uint32_t descriptorWriteCount, - const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount, - const VkCopyDescriptorSet *pDescriptorCopies) -{ - for (unsigned i = 0; i < descriptorWriteCount; i++) { - const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; - VK_FROM_HANDLE(panvk_descriptor_set, set, write->dstSet); - - panvk_descriptor_set_write(set, write); - } - - for (unsigned i = 0; i < descriptorCopyCount; i++) { - const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; - VK_FROM_HANDLE(panvk_descriptor_set, src_set, copy->srcSet); - VK_FROM_HANDLE(panvk_descriptor_set, dst_set, copy->dstSet); - - const struct panvk_descriptor_set_binding_layout *dst_binding_layout = - &dst_set->layout->bindings[copy->dstBinding]; - const struct panvk_descriptor_set_binding_layout *src_binding_layout = - &src_set->layout->bindings[copy->srcBinding]; - - assert(dst_binding_layout->type == src_binding_layout->type); - - /* Dynamic SSBO info are stored in a separate UBO allocated from the - * cmd_buffer descriptor pool. - */ - bool src_has_data_in_desc_ubo = - src_binding_layout->desc_ubo_stride > 0 && - src_binding_layout->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; - bool dst_has_data_in_desc_ubo = - dst_binding_layout->desc_ubo_stride > 0 && - dst_binding_layout->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; - - if (src_has_data_in_desc_ubo && dst_has_data_in_desc_ubo) { - for (uint32_t j = 0; j < copy->descriptorCount; j++) { - memcpy(panvk_desc_ubo_data(dst_set, copy->dstBinding, - copy->dstArrayElement + j), - panvk_desc_ubo_data(src_set, copy->srcBinding, - copy->srcArrayElement + j), - MIN2(dst_binding_layout->desc_ubo_stride, - src_binding_layout->desc_ubo_stride)); - } - } - - switch (src_binding_layout->type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - for (uint32_t j = 0; j < copy->descriptorCount; j++) { - panvk_copy_sampler_desc( - dst_set, copy->dstBinding, copy->dstArrayElement + j, src_set, - copy->srcBinding, copy->srcArrayElement + j); - } - break; - - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for (uint32_t j = 0; j < copy->descriptorCount; j++) { - panvk_copy_sampler_desc( - dst_set, copy->dstBinding, copy->dstArrayElement + j, src_set, - copy->srcBinding, copy->srcArrayElement + j); - panvk_copy_tex_desc(dst_set, copy->dstBinding, - copy->dstArrayElement + j, src_set, - copy->srcBinding, copy->srcArrayElement + j); - } - break; - - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - for (uint32_t j = 0; j < copy->descriptorCount; j++) { - panvk_copy_tex_desc(dst_set, copy->dstBinding, - copy->dstArrayElement + j, src_set, - copy->srcBinding, copy->srcArrayElement + j); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - for (uint32_t j = 0; j < copy->descriptorCount; j++) { - panvk_copy_img_desc(dst_set, copy->dstBinding, - copy->dstArrayElement + j, src_set, - copy->srcBinding, copy->srcArrayElement + j); - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - for (uint32_t j = 0; j < copy->descriptorCount; j++) { - panvk_copy_ubo_desc(dst_set, copy->dstBinding, - copy->dstArrayElement + j, src_set, - copy->srcBinding, copy->srcArrayElement + j); - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - for (uint32_t j = 0; j < copy->descriptorCount; j++) { - panvk_copy_dyn_ubo_desc( - dst_set, copy->dstBinding, copy->dstArrayElement + j, src_set, - copy->srcBinding, copy->srcArrayElement + j); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - for (uint32_t j = 0; j < copy->descriptorCount; j++) { - panvk_copy_ssbo_desc(dst_set, copy->dstBinding, - copy->dstArrayElement + j, src_set, - copy->srcBinding, copy->srcArrayElement + j); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - for (uint32_t j = 0; j < copy->descriptorCount; j++) { - panvk_copy_dyn_ssbo_desc( - dst_set, copy->dstBinding, copy->dstArrayElement + j, src_set, - copy->srcBinding, copy->srcArrayElement + j); - } - break; - - default: - unreachable("Unsupported descriptor type"); - } - } -} - -static void -panvk_descriptor_set_update_with_template(struct panvk_descriptor_set *set, - VkDescriptorUpdateTemplate templ, - const void *data) -{ - VK_FROM_HANDLE(vk_descriptor_update_template, template, templ); - - for (uint32_t i = 0; i < template->entry_count; i++) { - const struct vk_descriptor_template_entry *entry = &template->entries[i]; - - switch (entry->type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - for (unsigned j = 0; j < entry->array_count; j++) { - const VkDescriptorImageInfo *info = - data + entry->offset + j * entry->stride; - - if (entry->type == VK_DESCRIPTOR_TYPE_SAMPLER || - entry->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - panvk_write_sampler_desc(set, entry->binding, - entry->array_element + j, info); - } - - if (entry->type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE || - entry->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER || - entry->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) { - panvk_write_tex_desc(set, entry->binding, - entry->array_element + j, info); - } - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - for (unsigned j = 0; j < entry->array_count; j++) { - const VkDescriptorImageInfo *info = - data + entry->offset + j * entry->stride; - - panvk_write_img_desc(set, entry->binding, entry->array_element + j, - info); - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - for (unsigned j = 0; j < entry->array_count; j++) { - const VkBufferView *view = data + entry->offset + j * entry->stride; - - panvk_write_tex_buf_desc(set, entry->binding, - entry->array_element + j, *view); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - for (unsigned j = 0; j < entry->array_count; j++) { - const VkBufferView *view = data + entry->offset + j * entry->stride; - - panvk_write_img_buf_desc(set, entry->binding, - entry->array_element + j, *view); - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - for (unsigned j = 0; j < entry->array_count; j++) { - const VkDescriptorBufferInfo *info = - data + entry->offset + j * entry->stride; - - panvk_write_ubo_desc(set, entry->binding, entry->array_element + j, - info); - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - for (unsigned j = 0; j < entry->array_count; j++) { - const VkDescriptorBufferInfo *info = - data + entry->offset + j * entry->stride; - - panvk_write_dyn_ubo_desc(set, entry->binding, - entry->array_element + j, info); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - for (unsigned j = 0; j < entry->array_count; j++) { - const VkDescriptorBufferInfo *info = - data + entry->offset + j * entry->stride; - - panvk_write_ssbo_desc(set, entry->binding, entry->array_element + j, - info); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - for (unsigned j = 0; j < entry->array_count; j++) { - const VkDescriptorBufferInfo *info = - data + entry->offset + j * entry->stride; - - panvk_write_dyn_ssbo_desc(set, entry->binding, - entry->array_element + j, info); - } - break; - default: - unreachable("Invalid type"); - } - } -} - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(UpdateDescriptorSetWithTemplate)( - VkDevice _device, VkDescriptorSet descriptorSet, - VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *data) -{ - VK_FROM_HANDLE(panvk_descriptor_set, set, descriptorSet); - - panvk_descriptor_set_update_with_template(set, descriptorUpdateTemplate, - data); -} - -void -panvk_per_arch(push_descriptor_set_assign_layout)( - struct panvk_push_descriptor_set *push_set, - const struct panvk_descriptor_set_layout *layout) -{ - ASSERTED unsigned num_descs = layout->num_samplers + layout->num_textures + - layout->num_ubos + layout->num_imgs; - struct panvk_descriptor_set *set = &push_set->set; - unsigned desc_offset = 0; - - set->layout = layout; - assert(layout->num_dyn_ubos == 0); - assert(layout->num_dyn_ssbos == 0); - assert(num_descs <= PANVK_MAX_PUSH_DESCS); - assert(layout->desc_ubo_size <= sizeof(push_set->storage.desc_ubo)); - - if (layout->num_ubos) { - set->ubos = (void *)(push_set->storage.descs + desc_offset); - desc_offset += PANVK_MAX_DESC_SIZE * layout->num_ubos; - } - - if (layout->num_samplers) { - set->samplers = (void *)(push_set->storage.descs + desc_offset); - desc_offset += PANVK_MAX_DESC_SIZE * layout->num_samplers; - } - - if (layout->num_textures) { - set->textures = (void *)(push_set->storage.descs + desc_offset); - desc_offset += PANVK_MAX_DESC_SIZE * layout->num_textures; - } - - if (layout->num_imgs) { - set->img_attrib_bufs = (void *)(push_set->storage.descs + desc_offset); - desc_offset += PANVK_MAX_DESC_SIZE * layout->num_imgs; - set->img_fmts = push_set->storage.img_fmts; - } - - if (layout->desc_ubo_size) - set->desc_ubo.addr.host = push_set->storage.desc_ubo; -} - -void -panvk_per_arch(push_descriptor_set)( - struct panvk_push_descriptor_set *push_set, - const struct panvk_descriptor_set_layout *layout, uint32_t write_count, - const VkWriteDescriptorSet *writes) -{ - panvk_per_arch(push_descriptor_set_assign_layout)(push_set, layout); - for (unsigned i = 0; i < write_count; i++) { - const VkWriteDescriptorSet *write = &writes[i]; - - panvk_descriptor_set_write(&push_set->set, write); - } -} - -void -panvk_per_arch(push_descriptor_set_with_template)( - struct panvk_push_descriptor_set *push_set, - const struct panvk_descriptor_set_layout *layout, - VkDescriptorUpdateTemplate templ, const void *data) -{ - panvk_per_arch(push_descriptor_set_assign_layout)(push_set, layout); - panvk_descriptor_set_update_with_template(&push_set->set, templ, data); -} diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_descriptor_set_layout.c b/src/panfrost/vulkan/bifrost/panvk_vX_descriptor_set_layout.c deleted file mode 100644 index 287af23c204..00000000000 --- a/src/panfrost/vulkan/bifrost/panvk_vX_descriptor_set_layout.c +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Copyright © 2021 Collabora Ltd. - * SPDX-License-Identifier: MIT - */ - -#include "vk_descriptors.h" -#include "vk_log.h" - -#include "panvk_descriptor_set.h" -#include "panvk_descriptor_set_layout.h" -#include "panvk_device.h" -#include "panvk_entrypoints.h" -#include "panvk_pipeline_layout.h" -#include "panvk_sampler.h" - -#define PANVK_DESCRIPTOR_ALIGN 8 - -/* FIXME: make sure those values are correct */ -#define PANVK_MAX_TEXTURES (1 << 16) -#define PANVK_MAX_IMAGES (1 << 8) -#define PANVK_MAX_SAMPLERS (1 << 16) -#define PANVK_MAX_UBOS 255 - -VKAPI_ATTR void VKAPI_CALL -panvk_per_arch(GetDescriptorSetLayoutSupport)( - VkDevice _device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo, - VkDescriptorSetLayoutSupport *pSupport) -{ - VK_FROM_HANDLE(panvk_device, device, _device); - - pSupport->supported = false; - - VkDescriptorSetLayoutBinding *bindings; - VkResult result = vk_create_sorted_bindings( - pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings); - if (result != VK_SUCCESS) { - vk_error(device, result); - return; - } - - unsigned sampler_idx = 0, tex_idx = 0, ubo_idx = 0; - unsigned img_idx = 0; - UNUSED unsigned dynoffset_idx = 0; - - for (unsigned i = 0; i < pCreateInfo->bindingCount; i++) { - const VkDescriptorSetLayoutBinding *binding = &bindings[i]; - - switch (binding->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - sampler_idx += binding->descriptorCount; - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - sampler_idx += binding->descriptorCount; - tex_idx += binding->descriptorCount; - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - tex_idx += binding->descriptorCount; - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - dynoffset_idx += binding->descriptorCount; - FALLTHROUGH; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - ubo_idx += binding->descriptorCount; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - dynoffset_idx += binding->descriptorCount; - FALLTHROUGH; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - img_idx += binding->descriptorCount; - break; - default: - unreachable("Invalid descriptor type"); - } - } - free(bindings); - - /* The maximum values apply to all sets attached to a pipeline since all - * sets descriptors have to be merged in a single array. - */ - if (tex_idx > PANVK_MAX_TEXTURES / MAX_SETS || - sampler_idx > PANVK_MAX_SAMPLERS / MAX_SETS || - ubo_idx > PANVK_MAX_UBOS / MAX_SETS || - img_idx > PANVK_MAX_IMAGES / MAX_SETS) - return; - - pSupport->supported = true; -} - -VKAPI_ATTR VkResult VKAPI_CALL -panvk_per_arch(CreateDescriptorSetLayout)( - VkDevice _device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkDescriptorSetLayout *pSetLayout) -{ - VK_FROM_HANDLE(panvk_device, device, _device); - struct panvk_descriptor_set_layout *set_layout; - VkDescriptorSetLayoutBinding *bindings = NULL; - unsigned num_bindings = 0; - VkResult result; - - if (pCreateInfo->bindingCount) { - result = vk_create_sorted_bindings(pCreateInfo->pBindings, - pCreateInfo->bindingCount, &bindings); - if (result != VK_SUCCESS) - return vk_error(device, result); - - num_bindings = bindings[pCreateInfo->bindingCount - 1].binding + 1; - } - - unsigned num_immutable_samplers = 0; - for (unsigned i = 0; i < pCreateInfo->bindingCount; i++) { - if (bindings[i].pImmutableSamplers) - num_immutable_samplers += bindings[i].descriptorCount; - } - - size_t size = - sizeof(*set_layout) + - (sizeof(struct panvk_descriptor_set_binding_layout) * num_bindings) + - (sizeof(struct panvk_sampler *) * num_immutable_samplers); - set_layout = vk_descriptor_set_layout_zalloc(&device->vk, size); - if (!set_layout) { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto err_free_bindings; - } - - set_layout->flags = pCreateInfo->flags; - - struct panvk_sampler **immutable_samplers = - (struct panvk_sampler **)((uint8_t *)set_layout + sizeof(*set_layout) + - (sizeof( - struct panvk_descriptor_set_binding_layout) * - num_bindings)); - - set_layout->binding_count = num_bindings; - - unsigned sampler_idx = 0, tex_idx = 0, ubo_idx = 0; - unsigned dyn_ubo_idx = 0, dyn_ssbo_idx = 0, img_idx = 0; - uint32_t desc_ubo_size = 0, dyn_desc_ubo_size = 0; - - for (unsigned i = 0; i < pCreateInfo->bindingCount; i++) { - const VkDescriptorSetLayoutBinding *binding = &bindings[i]; - struct panvk_descriptor_set_binding_layout *binding_layout = - &set_layout->bindings[binding->binding]; - - binding_layout->type = binding->descriptorType; - binding_layout->array_size = binding->descriptorCount; - binding_layout->shader_stages = binding->stageFlags; - binding_layout->desc_ubo_stride = 0; - if (binding->pImmutableSamplers) { - binding_layout->immutable_samplers = immutable_samplers; - immutable_samplers += binding_layout->array_size; - for (unsigned j = 0; j < binding_layout->array_size; j++) { - VK_FROM_HANDLE(panvk_sampler, sampler, - binding->pImmutableSamplers[j]); - binding_layout->immutable_samplers[j] = sampler; - } - } - - switch (binding_layout->type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - binding_layout->sampler_idx = sampler_idx; - sampler_idx += binding_layout->array_size; - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - binding_layout->sampler_idx = sampler_idx; - binding_layout->tex_idx = tex_idx; - sampler_idx += binding_layout->array_size; - tex_idx += binding_layout->array_size; - binding_layout->desc_ubo_stride = sizeof(struct panvk_image_desc); - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - binding_layout->tex_idx = tex_idx; - tex_idx += binding_layout->array_size; - binding_layout->desc_ubo_stride = sizeof(struct panvk_image_desc); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - binding_layout->tex_idx = tex_idx; - tex_idx += binding_layout->array_size; - binding_layout->desc_ubo_stride = sizeof(struct panvk_bview_desc); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - binding_layout->dyn_ubo_idx = dyn_ubo_idx; - dyn_ubo_idx += binding_layout->array_size; - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - binding_layout->ubo_idx = ubo_idx; - ubo_idx += binding_layout->array_size; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - binding_layout->dyn_ssbo_idx = dyn_ssbo_idx; - dyn_ssbo_idx += binding_layout->array_size; - binding_layout->desc_ubo_stride = sizeof(struct panvk_ssbo_addr); - break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - binding_layout->desc_ubo_stride = sizeof(struct panvk_ssbo_addr); - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - binding_layout->img_idx = img_idx; - img_idx += binding_layout->array_size; - binding_layout->desc_ubo_stride = sizeof(struct panvk_image_desc); - break; - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - binding_layout->img_idx = img_idx; - img_idx += binding_layout->array_size; - binding_layout->desc_ubo_stride = sizeof(struct panvk_bview_desc); - break; - default: - unreachable("Invalid descriptor type"); - } - - if (binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { - binding_layout->desc_ubo_offset = dyn_desc_ubo_size; - dyn_desc_ubo_size += - binding_layout->desc_ubo_stride * binding_layout->array_size; - } else { - desc_ubo_size = ALIGN_POT(desc_ubo_size, PANVK_DESCRIPTOR_ALIGN); - binding_layout->desc_ubo_offset = desc_ubo_size; - desc_ubo_size += - binding_layout->desc_ubo_stride * binding_layout->array_size; - } - } - - set_layout->desc_ubo_size = desc_ubo_size; - if (desc_ubo_size > 0) - set_layout->desc_ubo_index = ubo_idx++; - - set_layout->num_samplers = sampler_idx; - set_layout->num_textures = tex_idx; - set_layout->num_ubos = ubo_idx; - set_layout->num_dyn_ubos = dyn_ubo_idx; - set_layout->num_dyn_ssbos = dyn_ssbo_idx; - set_layout->num_imgs = img_idx; - - free(bindings); - *pSetLayout = panvk_descriptor_set_layout_to_handle(set_layout); - return VK_SUCCESS; - -err_free_bindings: - free(bindings); - return vk_error(device, result); -} diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c new file mode 100644 index 00000000000..d5d8642e0fd --- /dev/null +++ b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c @@ -0,0 +1,375 @@ +/* + * Copyright © 2024 Collabora Ltd. + * + * SPDX-License-Identifier: MIT + */ + +#include +#include + +#include "genxml/gen_macros.h" + +#include "nir.h" +#include "nir_builder.h" + +#include "pan_encoder.h" +#include "pan_shader.h" + +#include "panvk_cmd_buffer.h" +#include "panvk_device.h" +#include "panvk_pipeline.h" +#include "panvk_shader.h" + +struct pan_nir_desc_copy_info { + mali_ptr sets[MAX_SETS]; + mali_ptr tables[PANVK_BIFROST_DESC_TABLE_COUNT]; + mali_ptr img_attrib_table; + struct { + mali_ptr table; + uint32_t limits[PANVK_BIFROST_DESC_TABLE_COUNT]; + uint32_t attrib_buf_idx_offset; + } desc_copy; + uint32_t set_desc_counts[MAX_SETS]; +}; + +#define get_input_field(b, name) \ + nir_load_push_constant( \ + b, 1, sizeof(((struct pan_nir_desc_copy_info *)0)->name) * 8, \ + nir_imm_int(b, 0), \ + .base = offsetof(struct pan_nir_desc_copy_info, name), \ + .range = sizeof(((struct pan_nir_desc_copy_info *)0)->name)) + +#define get_input_array_slot(b, name, index) \ + nir_load_push_constant( \ + b, 1, sizeof(((struct pan_nir_desc_copy_info *)0)->name[0]) * 8, \ + nir_imul_imm(b, index, \ + sizeof(((struct pan_nir_desc_copy_info *)0)->name[0])), \ + .base = offsetof(struct pan_nir_desc_copy_info, name), \ + .range = sizeof(((struct pan_nir_desc_copy_info *)0)->name)) + +static void +extract_desc_info_from_handle(nir_builder *b, nir_def *handle, nir_def **table, + nir_def **desc_idx) +{ + *table = nir_ushr_imm(b, handle, 28); + *desc_idx = nir_iand_imm(b, handle, 0xfffffff); +} + +static void +set_to_table_copy(nir_builder *b, nir_def *set_ptr, nir_def *set_desc_count, + nir_def *src_desc_idx, nir_def *table_ptr, + nir_def *dst_desc_idx, unsigned element_size) +{ + /* The last binding can have + * VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT set, we need to make + * we don't do an out-of-bound access on the source set. */ + nir_def *dst_offset = + nir_u2u64(b, nir_imul_imm(b, dst_desc_idx, element_size)); + + nir_push_if(b, nir_ult(b, src_desc_idx, set_desc_count)); + { + nir_def *src_offset = + nir_u2u64(b, nir_imul_imm(b, src_desc_idx, PANVK_DESCRIPTOR_SIZE)); + nir_def *desc = nir_load_global(b, nir_iadd(b, set_ptr, src_offset), + element_size, element_size / 4, 32); + nir_store_global(b, nir_iadd(b, table_ptr, dst_offset), element_size, + desc, ~0); + } + nir_push_else(b, NULL); + { + nir_const_value v[] = { + nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32), + nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32), + nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32), + nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32), + }; + + nir_def *desc = nir_build_imm(b, element_size / 4, 32, v); + nir_store_global(b, nir_iadd(b, table_ptr, dst_offset), element_size, + desc, ~0); + } + nir_pop_if(b, NULL); +} + +static void +set_to_table_img_copy(nir_builder *b, nir_def *set_ptr, nir_def *set_desc_count, + nir_def *src_desc_idx, nir_def *attrib_table_ptr, + nir_def *attrib_buf_table_ptr, nir_def *dst_desc_idx) +{ + /* The last binding can have + * VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT set, we need to make + * sure we don't do an out-of-bound access on the source set. */ + const unsigned element_size = pan_size(ATTRIBUTE_BUFFER) * 2; + const unsigned attrib_buf_comps = element_size / 4; + const unsigned attrib_comps = pan_size(ATTRIBUTE) / 4; + nir_def *attrib_offset = + nir_u2u64(b, nir_imul_imm(b, dst_desc_idx, pan_size(ATTRIBUTE))); + nir_def *attrib_buf_offset = + nir_u2u64(b, nir_imul_imm(b, dst_desc_idx, element_size)); + + nir_push_if(b, nir_ult(b, src_desc_idx, set_desc_count)); + { + nir_def *attr_buf_idx_offset = + get_input_field(b, desc_copy.attrib_buf_idx_offset); + nir_def *src_offset = + nir_u2u64(b, nir_imul_imm(b, src_desc_idx, PANVK_DESCRIPTOR_SIZE)); + nir_def *src_desc = nir_load_global(b, nir_iadd(b, set_ptr, src_offset), + element_size, element_size / 4, 32); + nir_def *fmt = nir_iand_imm(b, nir_channel(b, src_desc, 2), 0xfffffc00); + + /* Each image descriptor takes two attribute buffer slots, and we need + * to add the attribute buffer offset to have images working with vertex + * shader. */ + nir_def *buf_idx = + nir_iadd(b, nir_imul_imm(b, dst_desc_idx, 2), attr_buf_idx_offset); + + nir_def *attrib_w1 = nir_ior(b, buf_idx, fmt); + + nir_def *attrib_desc = nir_vec2(b, attrib_w1, nir_imm_int(b, 0)); + + nir_store_global(b, nir_iadd(b, attrib_table_ptr, attrib_offset), + pan_size(ATTRIBUTE), attrib_desc, + nir_component_mask(attrib_comps)); + + nir_def *attrib_buf_desc = nir_vec8( + b, nir_channel(b, src_desc, 0), nir_channel(b, src_desc, 1), + nir_iand_imm(b, nir_channel(b, src_desc, 2), BITFIELD_MASK(10)), + nir_channel(b, src_desc, 3), nir_channel(b, src_desc, 4), + nir_channel(b, src_desc, 5), nir_channel(b, src_desc, 6), + nir_channel(b, src_desc, 7)); + nir_store_global(b, nir_iadd(b, attrib_buf_table_ptr, attrib_buf_offset), + element_size, attrib_buf_desc, + nir_component_mask(attrib_buf_comps)); + } + nir_push_else(b, NULL); + { + nir_const_value v[] = { + nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32), + nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32), + nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32), + nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32), + }; + + nir_def *desc = + nir_build_imm(b, MAX2(attrib_buf_comps, attrib_comps), 32, v); + + nir_store_global(b, nir_iadd(b, attrib_buf_table_ptr, attrib_buf_offset), + pan_size(ATTRIBUTE), desc, + nir_component_mask(attrib_buf_comps)); + nir_store_global(b, nir_iadd(b, attrib_table_ptr, attrib_offset), + element_size, desc, nir_component_mask(attrib_comps)); + } + nir_pop_if(b, NULL); +} + +static void +single_desc_copy(nir_builder *b, nir_def *desc_copy_idx) +{ + nir_def *desc_copy_offset = nir_imul_imm(b, desc_copy_idx, sizeof(uint32_t)); + nir_def *desc_copy_ptr = nir_iadd(b, get_input_field(b, desc_copy.table), + nir_u2u64(b, desc_copy_offset)); + nir_def *src_copy_handle = nir_load_global(b, desc_copy_ptr, 4, 1, 32); + + nir_def *set_idx, *src_desc_idx; + extract_desc_info_from_handle(b, src_copy_handle, &set_idx, &src_desc_idx); + + nir_def *set_ptr = get_input_array_slot(b, sets, set_idx); + nir_def *set_desc_count = get_input_array_slot(b, set_desc_counts, set_idx); + nir_def *ubo_end = + get_input_field(b, desc_copy.limits[PANVK_BIFROST_DESC_TABLE_UBO]); + nir_def *img_end = + get_input_field(b, desc_copy.limits[PANVK_BIFROST_DESC_TABLE_IMG]); + nir_def *tex_end = + get_input_field(b, desc_copy.limits[PANVK_BIFROST_DESC_TABLE_TEXTURE]); + nir_def *sampler_end = + get_input_field(b, desc_copy.limits[PANVK_BIFROST_DESC_TABLE_SAMPLER]); + + nir_push_if(b, nir_ult(b, desc_copy_idx, ubo_end)); + { + nir_def *table_ptr = + get_input_field(b, tables[PANVK_BIFROST_DESC_TABLE_UBO]); + + set_to_table_copy(b, set_ptr, set_desc_count, src_desc_idx, table_ptr, + desc_copy_idx, sizeof(struct mali_attribute_packed)); + } + nir_push_else(b, NULL); + { + nir_push_if(b, nir_ult(b, desc_copy_idx, img_end)); + { + nir_def *table_ptr = + get_input_field(b, tables[PANVK_BIFROST_DESC_TABLE_IMG]); + nir_def *attrib_table_ptr = get_input_field(b, img_attrib_table); + nir_def *attrib_buf_table_ptr = table_ptr; + + set_to_table_img_copy(b, set_ptr, set_desc_count, src_desc_idx, + attrib_table_ptr, attrib_buf_table_ptr, + nir_isub(b, desc_copy_idx, ubo_end)); + } + nir_push_else(b, NULL); + { + nir_push_if(b, nir_ult(b, desc_copy_idx, tex_end)); + { + nir_def *table_ptr = + get_input_field(b, tables[PANVK_BIFROST_DESC_TABLE_TEXTURE]); + + set_to_table_copy(b, set_ptr, set_desc_count, src_desc_idx, + table_ptr, nir_isub(b, desc_copy_idx, img_end), + sizeof(struct mali_texture_packed)); + } + nir_push_else(b, NULL); + { + nir_push_if(b, nir_ult(b, desc_copy_idx, sampler_end)); + { + nir_def *table_ptr = + get_input_field(b, tables[PANVK_BIFROST_DESC_TABLE_SAMPLER]); + + set_to_table_copy(b, set_ptr, set_desc_count, src_desc_idx, + table_ptr, nir_isub(b, desc_copy_idx, tex_end), + sizeof(struct mali_sampler_packed)); + } + nir_pop_if(b, NULL); + } + nir_pop_if(b, NULL); + } + nir_pop_if(b, NULL); + } + nir_pop_if(b, NULL); +} + +static mali_ptr +panvk_meta_desc_copy_shader(struct panvk_device *dev, + struct pan_shader_info *shader_info) +{ + struct panvk_physical_device *phys_dev = + to_panvk_physical_device(dev->vk.physical); + + nir_builder b = nir_builder_init_simple_shader( + MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(), "%s", + "desc_copy"); + + /* We actually customize that at execution time to issue the + * exact number of jobs. */ + b.shader->info.workgroup_size[0] = 1; + b.shader->info.workgroup_size[1] = 1; + b.shader->info.workgroup_size[2] = 1; + + nir_def *desc_copy_id = + nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0); + single_desc_copy(&b, desc_copy_id); + + struct panfrost_compile_inputs inputs = { + .gpu_id = phys_dev->kmod.props.gpu_prod_id, + .no_ubo_to_push = true, + }; + struct util_dynarray binary; + + util_dynarray_init(&binary, NULL); + pan_shader_preprocess(b.shader, inputs.gpu_id); + GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); + ralloc_free(b.shader); + + shader_info->push.count = + DIV_ROUND_UP(sizeof(struct pan_nir_desc_copy_info), 4); + + mali_ptr shader = pan_pool_upload_aligned(&dev->meta.bin_pool.base, + binary.data, binary.size, 128); + + util_dynarray_fini(&binary); + return shader; +} + +void +panvk_per_arch(meta_desc_copy_init)(struct panvk_device *dev) +{ + struct pan_shader_info shader_info; + + mali_ptr shader = panvk_meta_desc_copy_shader(dev, &shader_info); + struct panfrost_ptr rsd = + pan_pool_alloc_desc(&dev->meta.desc_pool.base, RENDERER_STATE); + + pan_pack(rsd.cpu, RENDERER_STATE, cfg) { + pan_shader_prepare_rsd(&shader_info, shader, &cfg); + } + + dev->meta.desc_copy.rsd = rsd.gpu; +} + +struct panfrost_ptr +panvk_per_arch(meta_get_copy_desc_job)( + struct panvk_device *dev, struct pan_pool *desc_pool, + const struct panvk_pipeline_shader *shader, + const struct panvk_descriptor_state *desc_state, + const struct panvk_shader_desc_state *shader_desc_state) +{ + mali_ptr copy_table = shader->desc_info.others.map; + if (!copy_table) + return (struct panfrost_ptr){0}; + + struct pan_nir_desc_copy_info copy_info = { + .img_attrib_table = shader_desc_state->img_attrib_table, + .desc_copy = { + .table = copy_table, + .attrib_buf_idx_offset = + shader->info.stage == MESA_SHADER_VERTEX ? MAX_VS_ATTRIBS : 0, + }, + }; + + for (uint32_t i = 0; i < ARRAY_SIZE(copy_info.desc_copy.limits); i++) + copy_info.desc_copy.limits[i] = + shader->desc_info.others.count[i] + + (i > 0 ? copy_info.desc_copy.limits[i - 1] : 0); + + for (uint32_t i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { + const struct panvk_descriptor_set *set = desc_state->sets[i]; + + if (!set) + continue; + + copy_info.sets[i] = set->descs.dev; + copy_info.set_desc_counts[i] = set->desc_count; + } + + for (uint32_t i = 0; i < ARRAY_SIZE(shader->desc_info.others.count); i++) { + uint32_t desc_count = shader->desc_info.others.count[i]; + + if (!desc_count) + continue; + + copy_info.tables[i] = shader_desc_state->tables[i]; + } + + mali_ptr push_uniforms = + pan_pool_upload_aligned(desc_pool, ©_info, sizeof(copy_info), 16); + + struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, COMPUTE_JOB); + + /* Given the per-stage max descriptors limit, we should never + * reach the workgroup dimension limit. */ + uint32_t copy_count = + copy_info.desc_copy.limits[PANVK_BIFROST_DESC_TABLE_COUNT - 1]; + + assert(copy_count - 1 < BITFIELD_MASK(10)); + + panfrost_pack_work_groups_compute( + pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), 1, 1, 1, copy_count, 1, + 1, false, false); + + pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) { + cfg.job_task_split = util_logbase2_ceil(copy_count + 1) + + util_logbase2_ceil(1 + 1) + + util_logbase2_ceil(1 + 1); + } + + struct pan_tls_info tlsinfo = {0}; + struct panfrost_ptr tls = pan_pool_alloc_desc(desc_pool, LOCAL_STORAGE); + + GENX(pan_emit_tls)(&tlsinfo, tls.cpu); + + pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) { + cfg.state = dev->meta.desc_copy.rsd; + cfg.push_uniforms = push_uniforms; + cfg.thread_storage = tls.gpu; + } + + return job; +} diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_nir_lower_descriptors.c b/src/panfrost/vulkan/bifrost/panvk_vX_nir_lower_descriptors.c index e9d061a167e..0319db78480 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_nir_lower_descriptors.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_nir_lower_descriptors.c @@ -28,24 +28,26 @@ */ #include "panvk_device.h" -#include "panvk_pipeline_layout.h" #include "panvk_shader.h" +#include "vk_pipeline_layout.h" + +#include "util/bitset.h" #include "nir.h" #include "nir_builder.h" -struct apply_descriptors_ctx { - const struct panvk_pipeline_layout *layout; +struct lower_desc_ctx { + const struct panvk_descriptor_set_layout *set_layouts[MAX_SETS]; + struct panvk_shader_desc_info *desc_info; + struct hash_table *ht; bool add_bounds_checks; - bool has_img_access; - nir_address_format desc_addr_format; nir_address_format ubo_addr_format; nir_address_format ssbo_addr_format; }; static nir_address_format addr_format_for_desc_type(VkDescriptorType desc_type, - const struct apply_descriptors_ctx *ctx) + const struct lower_desc_ctx *ctx) { switch (desc_type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: @@ -62,18 +64,130 @@ addr_format_for_desc_type(VkDescriptorType desc_type, } static const struct panvk_descriptor_set_layout * -get_set_layout(uint32_t set, const struct apply_descriptors_ctx *ctx) +get_set_layout(uint32_t set, const struct lower_desc_ctx *ctx) { - return vk_to_panvk_descriptor_set_layout(ctx->layout->vk.set_layouts[set]); + return ctx->set_layouts[set]; } static const struct panvk_descriptor_set_binding_layout * get_binding_layout(uint32_t set, uint32_t binding, - const struct apply_descriptors_ctx *ctx) + const struct lower_desc_ctx *ctx) { return &get_set_layout(set, ctx)->bindings[binding]; } +#define DELETED_KEY (void *)(uintptr_t)1 + +struct desc_id { + uint32_t set; + uint32_t binding; + uint32_t subdesc; +}; + +static void * +desc_id_to_key(struct desc_id id) +{ + assert(id.set <= BITFIELD_MASK(4)); + assert(id.subdesc <= BITFIELD_MASK(1)); + assert(id.binding <= BITFIELD_MASK(27)); + + uint32_t handle = (id.set << 28) | (id.subdesc << 27) | id.binding; + assert(handle < UINT32_MAX - 2); + return (void *)(uintptr_t)(handle + 2); +} + +static struct desc_id +key_to_desc_id(const void *key) +{ + uint32_t handle = (uintptr_t)key - 2; + + return (struct desc_id){ + .set = handle >> 28, + .subdesc = (handle & BITFIELD_BIT(27)) ? 1 : 0, + .binding = handle & BITFIELD_MASK(27), + }; +} + +static enum panvk_bifrost_desc_table_type +desc_type_to_table_type(VkDescriptorType type, unsigned subdesc_idx) +{ + switch (type) { + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + return subdesc_idx == 1 ? PANVK_BIFROST_DESC_TABLE_SAMPLER + : PANVK_BIFROST_DESC_TABLE_TEXTURE; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + return PANVK_BIFROST_DESC_TABLE_TEXTURE; + case VK_DESCRIPTOR_TYPE_SAMPLER: + return PANVK_BIFROST_DESC_TABLE_SAMPLER; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + return PANVK_BIFROST_DESC_TABLE_IMG; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + return PANVK_BIFROST_DESC_TABLE_UBO; + default: + return PANVK_BIFROST_DESC_TABLE_INVALID; + } +} + +static uint32_t +get_subdesc_idx(const struct panvk_descriptor_set_binding_layout *bind_layout, + VkDescriptorType subdesc_type) +{ + if (bind_layout->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + assert(subdesc_type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE || + subdesc_type == VK_DESCRIPTOR_TYPE_SAMPLER); + return subdesc_type == VK_DESCRIPTOR_TYPE_SAMPLER ? 1 : 0; + } + + return 0; +} + +static uint32_t +shader_desc_idx(uint32_t set, uint32_t binding, VkDescriptorType subdesc_type, + const struct lower_desc_ctx *ctx) +{ + const struct panvk_descriptor_set_layout *set_layout = + get_set_layout(set, ctx); + const struct panvk_descriptor_set_binding_layout *bind_layout = + &set_layout->bindings[binding]; + struct desc_id src = { + .set = set, + .subdesc = get_subdesc_idx(bind_layout, subdesc_type), + .binding = binding, + }; + struct hash_entry *he = + _mesa_hash_table_search(ctx->ht, desc_id_to_key(src)); + + assert(he); + + const struct panvk_shader_desc_map *map; + uint32_t *entry = he->data; + + if (bind_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { + map = &ctx->desc_info->dyn_ubos; + } else if (bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { + map = &ctx->desc_info->dyn_ssbos; + } else { + uint32_t table = desc_type_to_table_type(bind_layout->type, src.subdesc); + + assert(table < PANVK_BIFROST_DESC_TABLE_COUNT); + map = &ctx->desc_info->others[table]; + } + + assert(entry >= map->map && entry < map->map + map->count); + + uint32_t idx = entry - map->map; + + /* Adjust the destination index for all dynamic UBOs, which are laid out + * just after the regular UBOs in the UBO table. */ + if (bind_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) + idx += ctx->desc_info->others[PANVK_BIFROST_DESC_TABLE_UBO].count; + + return idx; +} + /** Build a Vulkan resource index * * A "resource index" is the term used by our SPIR-V parser and the relevant @@ -97,24 +211,22 @@ get_binding_layout(uint32_t set, uint32_t binding, static nir_def * build_res_index(nir_builder *b, uint32_t set, uint32_t binding, nir_def *array_index, nir_address_format addr_format, - const struct apply_descriptors_ctx *ctx) + const struct lower_desc_ctx *ctx) { const struct panvk_descriptor_set_layout *set_layout = get_set_layout(set, ctx); const struct panvk_descriptor_set_binding_layout *bind_layout = &set_layout->bindings[binding]; - - uint32_t array_size = bind_layout->array_size; + uint32_t array_size = bind_layout->desc_count; switch (bind_layout->type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: { assert(addr_format == nir_address_format_32bit_index_offset); - const unsigned ubo_idx = panvk_per_arch(pipeline_layout_ubo_index)( - ctx->layout, set, binding, 0); - - const uint32_t packed = (array_size - 1) << 16 | ubo_idx; + const uint32_t packed = + (array_size - 1) << 16 | + shader_desc_idx(set, binding, bind_layout->type, ctx); return nir_vec2(b, nir_imm_int(b, packed), array_index); } @@ -124,24 +236,31 @@ build_res_index(nir_builder *b, uint32_t set, uint32_t binding, assert(addr_format == nir_address_format_64bit_bounded_global || addr_format == nir_address_format_64bit_global_32bit_offset); - const bool is_dynamic = - bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; - const unsigned desc_ubo_idx = - is_dynamic - ? panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)(ctx->layout) - : panvk_per_arch(pipeline_layout_ubo_start)(ctx->layout, set, - false) + - set_layout->desc_ubo_index; - const unsigned desc_ubo_offset = - bind_layout->desc_ubo_offset + - (is_dynamic ? ctx->layout->sets[set].dyn_desc_ubo_offset : 0); + unsigned base_addr_sysval_offs; + uint32_t desc_idx = + bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC + ? shader_desc_idx(set, binding, bind_layout->type, ctx) + : bind_layout->desc_idx; - const uint32_t packed = - (bind_layout->desc_ubo_stride << 16) | desc_ubo_idx; + if (b->shader->info.stage == MESA_SHADER_COMPUTE) + base_addr_sysval_offs = + bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER + ? offsetof(struct panvk_compute_sysvals, desc.sets[set]) + : offsetof(struct panvk_compute_sysvals, desc.dyn_ssbos); + else if (b->shader->info.stage == MESA_SHADER_VERTEX) + base_addr_sysval_offs = + bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER + ? offsetof(struct panvk_graphics_sysvals, desc.sets[set]) + : offsetof(struct panvk_graphics_sysvals, desc.vs_dyn_ssbos); + else + base_addr_sysval_offs = + bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER + ? offsetof(struct panvk_graphics_sysvals, desc.sets[set]) + : offsetof(struct panvk_graphics_sysvals, desc.fs_dyn_ssbos); - return nir_vec4(b, nir_imm_int(b, packed), - nir_imm_int(b, desc_ubo_offset), - nir_imm_int(b, array_size - 1), array_index); + return nir_vec4(b, nir_imm_int(b, base_addr_sysval_offs), + nir_imm_int(b, desc_idx), array_index, + nir_imm_int(b, array_size - 1)); } default: @@ -169,8 +288,8 @@ build_res_reindex(nir_builder *b, nir_def *orig, nir_def *delta, case nir_address_format_64bit_bounded_global: case nir_address_format_64bit_global_32bit_offset: return nir_vec4(b, nir_channel(b, orig, 0), nir_channel(b, orig, 1), - nir_channel(b, orig, 2), - nir_iadd(b, nir_channel(b, orig, 3), delta)); + nir_iadd(b, nir_channel(b, orig, 2), delta), + nir_imm_int(b, 3)); default: unreachable("Unhandled address format"); @@ -187,40 +306,40 @@ build_res_reindex(nir_builder *b, nir_def *orig, nir_def *delta, static nir_def * build_buffer_addr_for_res_index(nir_builder *b, nir_def *res_index, nir_address_format addr_format, - const struct apply_descriptors_ctx *ctx) + const struct lower_desc_ctx *ctx) { switch (addr_format) { case nir_address_format_32bit_index_offset: { nir_def *packed = nir_channel(b, res_index, 0); nir_def *array_index = nir_channel(b, res_index, 1); - nir_def *surface_index = nir_extract_u16(b, packed, nir_imm_int(b, 0)); + nir_def *first_desc_index = nir_extract_u16(b, packed, nir_imm_int(b, 0)); nir_def *array_max = nir_extract_u16(b, packed, nir_imm_int(b, 1)); if (ctx->add_bounds_checks) array_index = nir_umin(b, array_index, array_max); - return nir_vec2(b, nir_iadd(b, surface_index, array_index), + return nir_vec2(b, nir_iadd(b, first_desc_index, array_index), nir_imm_int(b, 0)); } case nir_address_format_64bit_bounded_global: case nir_address_format_64bit_global_32bit_offset: { - nir_def *packed = nir_channel(b, res_index, 0); - nir_def *desc_ubo_offset = nir_channel(b, res_index, 1); - nir_def *array_max = nir_channel(b, res_index, 2); - nir_def *array_index = nir_channel(b, res_index, 3); - - nir_def *desc_ubo_idx = nir_extract_u16(b, packed, nir_imm_int(b, 0)); - nir_def *desc_ubo_stride = nir_extract_u16(b, packed, nir_imm_int(b, 1)); + nir_def *base_addr_sysval_offset = nir_channel(b, res_index, 0); + nir_def *first_desc_index = nir_channel(b, res_index, 1); + nir_def *array_index = nir_channel(b, res_index, 2); + nir_def *array_max = nir_channel(b, res_index, 3); if (ctx->add_bounds_checks) array_index = nir_umin(b, array_index, array_max); - desc_ubo_offset = nir_iadd(b, desc_ubo_offset, - nir_imul(b, array_index, desc_ubo_stride)); + nir_def *desc_offset = nir_imul_imm( + b, nir_iadd(b, array_index, first_desc_index), PANVK_DESCRIPTOR_SIZE); - nir_def *desc = nir_load_ubo(b, 4, 32, desc_ubo_idx, desc_ubo_offset, - .align_mul = 16, .range = ~0); + nir_def *base_addr = nir_load_push_constant( + b, 1, 64, base_addr_sysval_offset, .base = 256, .range = 256); + nir_def *desc_addr = nir_iadd(b, base_addr, nir_u2u64(b, desc_offset)); + nir_def *desc = + nir_load_global(b, desc_addr, PANVK_DESCRIPTOR_SIZE, 4, 32); /* The offset in the descriptor is guaranteed to be zero when it's * written into the descriptor set. This lets us avoid some unnecessary @@ -237,7 +356,7 @@ build_buffer_addr_for_res_index(nir_builder *b, nir_def *res_index, static bool lower_res_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, - const struct apply_descriptors_ctx *ctx) + const struct lower_desc_ctx *ctx) { b->cursor = nir_before_instr(&intrin->instr); @@ -277,17 +396,24 @@ lower_res_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, static void get_resource_deref_binding(nir_deref_instr *deref, uint32_t *set, uint32_t *binding, uint32_t *index_imm, - nir_def **index_ssa) + nir_def **index_ssa, uint32_t *max_idx) { *index_imm = 0; + *max_idx = 0; *index_ssa = NULL; if (deref->deref_type == nir_deref_type_array) { - if (nir_src_is_const(deref->arr.index)) + if (nir_src_is_const(deref->arr.index)) { *index_imm = nir_src_as_uint(deref->arr.index); - else + *max_idx = *index_imm; + } else { *index_ssa = deref->arr.index.ssa; + /* Zero means variable array. The minus one should give us UINT32_MAX, + * which matches what we want. */ + *max_idx = glsl_array_size(nir_deref_instr_parent(deref)->type) - 1; + } + deref = nir_deref_instr_parent(deref); } @@ -300,80 +426,151 @@ get_resource_deref_binding(nir_deref_instr *deref, uint32_t *set, static nir_def * load_resource_deref_desc(nir_builder *b, nir_deref_instr *deref, - unsigned desc_offset, unsigned num_components, - unsigned bit_size, - const struct apply_descriptors_ctx *ctx) + VkDescriptorType subdesc_type, unsigned desc_offset, + unsigned num_components, unsigned bit_size, + const struct lower_desc_ctx *ctx) { - uint32_t set, binding, index_imm; + uint32_t set, binding, index_imm, max_idx; nir_def *index_ssa; - get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa); + get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa, + &max_idx); const struct panvk_descriptor_set_layout *set_layout = get_set_layout(set, ctx); const struct panvk_descriptor_set_binding_layout *bind_layout = &set_layout->bindings[binding]; + unsigned subdesc_idx = get_subdesc_idx(bind_layout, subdesc_type); assert(index_ssa == NULL || index_imm == 0); if (index_ssa == NULL) index_ssa = nir_imm_int(b, index_imm); - const unsigned set_ubo_idx = - panvk_per_arch(pipeline_layout_ubo_start)(ctx->layout, set, false) + - set_layout->desc_ubo_index; + unsigned desc_stride = panvk_get_desc_stride(bind_layout->type); + unsigned set_base_addr_sysval_offs = + b->shader->info.stage == MESA_SHADER_COMPUTE + ? offsetof(struct panvk_compute_sysvals, desc.sets[set]) + : offsetof(struct panvk_graphics_sysvals, desc.sets[set]); + nir_def *set_base_addr = nir_load_push_constant( + b, 1, 64, nir_imm_int(b, 0), .base = 256 + set_base_addr_sysval_offs, + .range = 8); + nir_def *set_offset = + nir_imul_imm(b, + nir_iadd_imm(b, nir_imul_imm(b, index_ssa, desc_stride), + bind_layout->desc_idx + subdesc_idx), + PANVK_DESCRIPTOR_SIZE); - nir_def *desc_ubo_offset = - nir_iadd_imm(b, nir_imul_imm(b, index_ssa, bind_layout->desc_ubo_stride), - bind_layout->desc_ubo_offset + desc_offset); + set_offset = nir_iadd_imm(b, set_offset, desc_offset); - assert(bind_layout->desc_ubo_stride > 0); - unsigned desc_align = (1 << (ffs(bind_layout->desc_ubo_stride) - 1)); - desc_align = MIN2(desc_align, 16); + unsigned desc_align = 1 << (ffs(PANVK_DESCRIPTOR_SIZE + desc_offset) - 1); - return nir_load_ubo(b, num_components, bit_size, nir_imm_int(b, set_ubo_idx), - desc_ubo_offset, .align_mul = desc_align, - .align_offset = (desc_offset % desc_align), .range = ~0); + return nir_load_global(b, + nir_iadd(b, set_base_addr, nir_u2u64(b, set_offset)), + desc_align, num_components, bit_size); } static nir_def * -load_tex_img_size(nir_builder *b, nir_deref_instr *deref, - enum glsl_sampler_dim dim, - const struct apply_descriptors_ctx *ctx) +load_tex_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim, + bool is_array, const struct lower_desc_ctx *ctx) { if (dim == GLSL_SAMPLER_DIM_BUF) { - return load_resource_deref_desc(b, deref, 0, 1, 32, ctx); + nir_def *tex_w = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 4, 1, 16, ctx); + + /* S dimension is 16 bits wide. We don't support combining S,T dimensions + * to allow large buffers yet. */ + return nir_iadd_imm(b, nir_u2u32(b, tex_w), 1); } else { - nir_def *desc = load_resource_deref_desc(b, deref, 0, 4, 16, ctx); + nir_def *tex_w_h = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 4, 2, 16, ctx); + nir_def *tex_depth_or_layer_count = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + dim == GLSL_SAMPLER_DIM_3D ? 28 : 24, 1, 16, ctx); + + nir_def *tex_sz = + is_array && dim == GLSL_SAMPLER_DIM_1D + ? nir_vec2(b, nir_channel(b, tex_w_h, 0), tex_depth_or_layer_count) + : nir_vec3(b, nir_channel(b, tex_w_h, 0), + nir_channel(b, tex_w_h, 1), tex_depth_or_layer_count); + + tex_sz = nir_pad_vector_imm_int(b, tex_sz, 0, 4); /* The sizes are provided as 16-bit values with 1 subtracted so * convert to 32-bit and add 1. */ - return nir_iadd_imm(b, nir_u2u32(b, desc), 1); + return nir_iadd_imm(b, nir_u2u32(b, tex_sz), 1); } } static nir_def * -load_tex_img_levels(nir_builder *b, nir_deref_instr *deref, - enum glsl_sampler_dim dim, - const struct apply_descriptors_ctx *ctx) +load_img_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim, + bool is_array, const struct lower_desc_ctx *ctx) { - assert(dim != GLSL_SAMPLER_DIM_BUF); - nir_def *desc = load_resource_deref_desc(b, deref, 0, 4, 16, ctx); - return nir_u2u32(b, nir_iand_imm(b, nir_channel(b, desc, 3), 0xff)); + if (dim == GLSL_SAMPLER_DIM_BUF) { + nir_def *tex_w = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 18, 1, 16, ctx); + + /* S dimension is 16 bits wide. We don't support combining S,T dimensions + * to allow large buffers yet. */ + return nir_iadd_imm(b, nir_u2u32(b, tex_w), 1); + } else { + nir_def *tex_sz = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 18, 3, 16, ctx); + + if (is_array && dim == GLSL_SAMPLER_DIM_1D) + tex_sz = + nir_vec2(b, nir_channel(b, tex_sz, 0), nir_channel(b, tex_sz, 2)); + + tex_sz = nir_pad_vector_imm_int(b, tex_sz, 0, 4); + + /* The sizes are provided as 16-bit values with 1 subtracted so + * convert to 32-bit and add 1. + */ + return nir_iadd_imm(b, nir_u2u32(b, tex_sz), 1); + } } static nir_def * -load_tex_img_samples(nir_builder *b, nir_deref_instr *deref, - enum glsl_sampler_dim dim, - const struct apply_descriptors_ctx *ctx) +load_tex_levels(nir_builder *b, nir_deref_instr *deref, + enum glsl_sampler_dim dim, const struct lower_desc_ctx *ctx) { assert(dim != GLSL_SAMPLER_DIM_BUF); - nir_def *desc = load_resource_deref_desc(b, deref, 0, 4, 16, ctx); - return nir_u2u32(b, nir_ushr_imm(b, nir_channel(b, desc, 3), 8)); + + /* LOD count is stored in word2[16:21] and has a minus(1) modifier. */ + nir_def *tex_word2 = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 8, 1, 32, ctx); + nir_def *lod_count = nir_iand_imm(b, nir_ushr_imm(b, tex_word2, 16), 0x1f); + return nir_iadd_imm(b, lod_count, 1); +} + +static nir_def * +load_tex_samples(nir_builder *b, nir_deref_instr *deref, + enum glsl_sampler_dim dim, const struct lower_desc_ctx *ctx) +{ + assert(dim != GLSL_SAMPLER_DIM_BUF); + + /* Sample count is stored in word3[13:25], and has a log2 modifier. */ + nir_def *tex_word3 = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 12, 1, 32, ctx); + nir_def *sample_count = nir_iand_imm(b, nir_ushr_imm(b, tex_word3, 13), 0x7); + return nir_ishl(b, nir_imm_int(b, 1), sample_count); +} + +static nir_def * +load_img_samples(nir_builder *b, nir_deref_instr *deref, + enum glsl_sampler_dim dim, const struct lower_desc_ctx *ctx) +{ + assert(dim != GLSL_SAMPLER_DIM_BUF); + + /* Sample count is stored in the image depth field. + * FIXME: This won't work for 2DMSArray images, but those are already + * broken. */ + nir_def *sample_count = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 22, 1, 16, ctx); + return nir_iadd_imm(b, nir_u2u32(b, sample_count), 1); } static bool -lower_tex(nir_builder *b, nir_tex_instr *tex, - const struct apply_descriptors_ctx *ctx) +lower_tex(nir_builder *b, nir_tex_instr *tex, const struct lower_desc_ctx *ctx) { bool progress = false; @@ -390,16 +587,16 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, nir_def *res; switch (tex->op) { case nir_texop_txs: - res = nir_channels(b, load_tex_img_size(b, deref, dim, ctx), + res = nir_channels(b, load_tex_size(b, deref, dim, tex->is_array, ctx), nir_component_mask(tex->def.num_components)); break; case nir_texop_query_levels: assert(tex->def.num_components == 1); - res = load_tex_img_levels(b, deref, dim, ctx); + res = load_tex_levels(b, deref, dim, ctx); break; case nir_texop_texture_samples: assert(tex->def.num_components == 1); - res = load_tex_img_samples(b, deref, dim, ctx); + res = load_tex_samples(b, deref, dim, ctx); break; default: unreachable("Unsupported texture query op"); @@ -416,15 +613,14 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src); nir_tex_instr_remove_src(tex, sampler_src_idx); - uint32_t set, binding, index_imm; + uint32_t set, binding, index_imm, max_idx; nir_def *index_ssa; - get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa); + get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa, + &max_idx); - const struct panvk_descriptor_set_binding_layout *bind_layout = - get_binding_layout(set, binding, ctx); - - tex->sampler_index = ctx->layout->sets[set].sampler_offset + - bind_layout->sampler_idx + index_imm; + tex->sampler_index = + shader_desc_idx(set, binding, VK_DESCRIPTOR_TYPE_SAMPLER, ctx) + + index_imm; if (index_ssa != NULL) { nir_tex_instr_add_src(tex, nir_tex_src_sampler_offset, index_ssa); @@ -437,15 +633,14 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src); nir_tex_instr_remove_src(tex, tex_src_idx); - uint32_t set, binding, index_imm; + uint32_t set, binding, index_imm, max_idx; nir_def *index_ssa; - get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa); - - const struct panvk_descriptor_set_binding_layout *bind_layout = - get_binding_layout(set, binding, ctx); + get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa, + &max_idx); tex->texture_index = - ctx->layout->sets[set].tex_offset + bind_layout->tex_idx + index_imm; + shader_desc_idx(set, binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ctx) + + index_imm; if (index_ssa != NULL) { nir_tex_instr_add_src(tex, nir_tex_src_texture_offset, index_ssa); @@ -458,11 +653,12 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, static nir_def * get_img_index(nir_builder *b, nir_deref_instr *deref, - const struct apply_descriptors_ctx *ctx) + const struct lower_desc_ctx *ctx) { - uint32_t set, binding, index_imm; + uint32_t set, binding, index_imm, max_idx; nir_def *index_ssa; - get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa); + get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa, + &max_idx); const struct panvk_descriptor_set_binding_layout *bind_layout = get_binding_layout(set, binding, ctx); @@ -470,8 +666,7 @@ get_img_index(nir_builder *b, nir_deref_instr *deref, bind_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER || bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); - unsigned img_offset = - ctx->layout->sets[set].img_offset + bind_layout->img_idx; + unsigned img_offset = shader_desc_idx(set, binding, bind_layout->type, ctx); if (index_ssa == NULL) { return nir_imm_int(b, img_offset + index_imm); @@ -483,7 +678,7 @@ get_img_index(nir_builder *b, nir_deref_instr *deref, static bool lower_img_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, - struct apply_descriptors_ctx *ctx) + struct lower_desc_ctx *ctx) { b->cursor = nir_before_instr(&intr->instr); nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); @@ -491,15 +686,16 @@ lower_img_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, if (intr->intrinsic == nir_intrinsic_image_deref_size || intr->intrinsic == nir_intrinsic_image_deref_samples) { const enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr); + bool is_array = nir_intrinsic_image_array(intr); nir_def *res; switch (intr->intrinsic) { case nir_intrinsic_image_deref_size: - res = nir_channels(b, load_tex_img_size(b, deref, dim, ctx), + res = nir_channels(b, load_img_size(b, deref, dim, is_array, ctx), nir_component_mask(intr->def.num_components)); break; case nir_intrinsic_image_deref_samples: - res = load_tex_img_samples(b, deref, dim, ctx); + res = load_img_samples(b, deref, dim, ctx); break; default: unreachable("Unsupported image query op"); @@ -509,7 +705,6 @@ lower_img_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, nir_instr_remove(&intr->instr); } else { nir_rewrite_image_intrinsic(intr, get_img_index(b, deref, ctx), false); - ctx->has_img_access = true; } return true; @@ -517,7 +712,7 @@ lower_img_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, static bool lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, - struct apply_descriptors_ctx *ctx) + struct lower_desc_ctx *ctx) { switch (intr->intrinsic) { case nir_intrinsic_vulkan_resource_index: @@ -539,7 +734,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, static bool lower_descriptors_instr(nir_builder *b, nir_instr *instr, void *data) { - struct apply_descriptors_ctx *ctx = data; + struct lower_desc_ctx *ctx = data; switch (instr->type) { case nir_instr_type_tex: @@ -551,25 +746,276 @@ lower_descriptors_instr(nir_builder *b, nir_instr *instr, void *data) } } +static void +record_binding(struct lower_desc_ctx *ctx, unsigned set, unsigned binding, + VkDescriptorType subdesc_type, uint32_t max_idx) +{ + const struct panvk_descriptor_set_layout *set_layout = ctx->set_layouts[set]; + const struct panvk_descriptor_set_binding_layout *binding_layout = + &set_layout->bindings[binding]; + uint32_t subdesc_idx = get_subdesc_idx(binding_layout, subdesc_type); + uint32_t desc_stride = panvk_get_desc_stride(binding_layout->type); + + assert(desc_stride == 1 || desc_stride == 2); + ctx->desc_info->used_set_mask |= BITFIELD_BIT(set); + + /* SSBOs are accessed directly from the sets, no need to record accesses + * to such resources. */ + if (binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) + return; + + assert(subdesc_idx < desc_stride); + assert(!(binding & BITFIELD_BIT(27))); + + struct desc_id src = { + .set = set, + .subdesc = subdesc_idx, + .binding = binding, + }; + const void *key = desc_id_to_key(src); + struct hash_entry *he = _mesa_hash_table_search(ctx->ht, key); + uint32_t old_desc_count = 0; + uint32_t new_desc_count = + max_idx == UINT32_MAX ? binding_layout->desc_count : max_idx + 1; + + assert(new_desc_count <= binding_layout->desc_count); + + if (!he) + he = _mesa_hash_table_insert(ctx->ht, key, + (void *)(uintptr_t)new_desc_count); + else + old_desc_count = (uintptr_t)he->data; + + if (old_desc_count >= new_desc_count) + return; + + uint32_t desc_count_diff = new_desc_count - old_desc_count; + + if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { + ctx->desc_info->dyn_ubos.count += desc_count_diff; + } else if (binding_layout->type == + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { + ctx->desc_info->dyn_ssbos.count += desc_count_diff; + } else { + uint32_t table = + desc_type_to_table_type(binding_layout->type, subdesc_idx); + + assert(table < PANVK_BIFROST_DESC_TABLE_COUNT); + ctx->desc_info->others[table].count += desc_count_diff; + } + + he->data = (void *)(uintptr_t)new_desc_count; +} + +static uint32_t * +fill_copy_descs_for_binding(struct lower_desc_ctx *ctx, unsigned set, + unsigned binding, uint32_t subdesc_idx, + uint32_t desc_count) +{ + assert(desc_count); + + const struct panvk_descriptor_set_layout *set_layout = ctx->set_layouts[set]; + const struct panvk_descriptor_set_binding_layout *binding_layout = + &set_layout->bindings[binding]; + uint32_t desc_stride = panvk_get_desc_stride(binding_layout->type); + uint32_t *first_entry = NULL; + + assert(desc_count <= binding_layout->desc_count); + + for (uint32_t i = 0; i < desc_count; i++) { + uint32_t src_idx = src_idx = + binding_layout->desc_idx + (i * desc_stride) + subdesc_idx; + struct panvk_shader_desc_map *map; + + if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { + map = &ctx->desc_info->dyn_ubos; + } else if (binding_layout->type == + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { + map = &ctx->desc_info->dyn_ssbos; + } else { + uint32_t dst_table = + desc_type_to_table_type(binding_layout->type, subdesc_idx); + + assert(dst_table < PANVK_BIFROST_DESC_TABLE_COUNT); + map = &ctx->desc_info->others[dst_table]; + } + + if (!first_entry) + first_entry = &map->map[map->count]; + + map->map[map->count++] = COPY_DESC_HANDLE(set, src_idx); + } + + return first_entry; +} + +static void +create_copy_table(struct lower_desc_ctx *ctx) +{ + struct panvk_shader_desc_info *desc_info = ctx->desc_info; + uint32_t *copy_table; + uint32_t copy_count; + + copy_count = desc_info->dyn_ubos.count + desc_info->dyn_ssbos.count; + for (uint32_t i = 0; i < PANVK_BIFROST_DESC_TABLE_COUNT; i++) + copy_count += desc_info->others[i].count; + + if (copy_count == 0) + return; + + copy_table = calloc(copy_count, sizeof(*copy_table)); + assert(copy_table); + + desc_info->dyn_ubos.map = copy_table; + copy_table += desc_info->dyn_ubos.count; + desc_info->dyn_ubos.count = 0; + desc_info->dyn_ssbos.map = copy_table; + copy_table += desc_info->dyn_ssbos.count; + desc_info->dyn_ssbos.count = 0; + for (uint32_t i = 0; i < PANVK_BIFROST_DESC_TABLE_COUNT; i++) { + desc_info->others[i].map = copy_table; + copy_table += desc_info->others[i].count; + desc_info->others[i].count = 0; + } + + hash_table_foreach(ctx->ht, he) { + /* We use the upper binding bit to encode the subdesc index. */ + uint32_t desc_count = (uintptr_t)he->data; + struct desc_id src = key_to_desc_id(he->key); + + /* Until now, we were just using the hash table to track descriptors + * count, but after that point, it's a -> + * map. */ + he->data = fill_copy_descs_for_binding(ctx, src.set, src.binding, + src.subdesc, desc_count); + } +} + +/* TODO: Texture instructions support bindless through DTSEL_IMM(63), + * which would save us copies of the texture/sampler descriptors. */ +static bool +collect_tex_desc_access(nir_builder *b, nir_tex_instr *tex, + struct lower_desc_ctx *ctx) +{ + bool recorded = false; + int sampler_src_idx = + nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); + if (sampler_src_idx >= 0) { + nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src); + + uint32_t set, binding, index_imm, max_idx; + nir_def *index_ssa; + get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa, + &max_idx); + + record_binding(ctx, set, binding, VK_DESCRIPTOR_TYPE_SAMPLER, max_idx); + recorded = true; + } + + int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + if (tex_src_idx >= 0) { + nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src); + + uint32_t set, binding, index_imm, max_idx; + nir_def *index_ssa; + get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa, + &max_idx); + + record_binding(ctx, set, binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + max_idx); + recorded = true; + } + + return recorded; +} + +static bool +collect_intr_desc_access(nir_builder *b, nir_intrinsic_instr *intrin, + struct lower_desc_ctx *ctx) +{ + switch (intrin->intrinsic) { + case nir_intrinsic_vulkan_resource_index: { + unsigned set, binding; + + set = nir_intrinsic_desc_set(intrin); + binding = nir_intrinsic_binding(intrin); + + /* TODO: walk the reindex chain from load_vulkan_descriptor() to try and + * guess the max index. */ + record_binding(ctx, set, binding, ~0, UINT32_MAX); + return true; + } + + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_atomic: + case nir_intrinsic_image_deref_atomic_swap: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + unsigned set, binding, index_imm, max_idx; + nir_def *index_ssa; + + get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa, + &max_idx); + record_binding(ctx, set, binding, ~0, max_idx); + return true; + } + default: + return false; + } +} + +static bool +collect_instr_desc_access(nir_builder *b, nir_instr *instr, void *data) +{ + struct lower_desc_ctx *ctx = data; + + switch (instr->type) { + case nir_instr_type_tex: + return collect_tex_desc_access(b, nir_instr_as_tex(instr), ctx); + case nir_instr_type_intrinsic: + return collect_intr_desc_access(b, nir_instr_as_intrinsic(instr), ctx); + default: + return false; + } +} + bool panvk_per_arch(nir_lower_descriptors)(nir_shader *nir, struct panvk_device *dev, - const struct panvk_pipeline_layout *layout, - bool *has_img_access_out) + const struct vk_pipeline_layout *layout, + struct panvk_shader_desc_info *desc_info) { - struct apply_descriptors_ctx ctx = { - .layout = layout, - .desc_addr_format = nir_address_format_32bit_index_offset, + struct lower_desc_ctx ctx = { + .desc_info = desc_info, .ubo_addr_format = nir_address_format_32bit_index_offset, .ssbo_addr_format = dev->vk.enabled_features.robustBufferAccess ? nir_address_format_64bit_bounded_global : nir_address_format_64bit_global_32bit_offset, }; - bool progress = nir_shader_instructions_pass( - nir, lower_descriptors_instr, - nir_metadata_block_index | nir_metadata_dominance, (void *)&ctx); - if (has_img_access_out) - *has_img_access_out = ctx.has_img_access; + ctx.ht = _mesa_hash_table_create_u32_keys(NULL); + assert(ctx.ht); + _mesa_hash_table_set_deleted_key(ctx.ht, DELETED_KEY); + + for (uint32_t i = 0; i < layout->set_count; i++) { + ctx.set_layouts[i] = + to_panvk_descriptor_set_layout(layout->set_layouts[i]); + } + + bool progress = nir_shader_instructions_pass(nir, collect_instr_desc_access, + nir_metadata_all, &ctx); + if (!progress) + goto out; + + create_copy_table(&ctx); + + progress = nir_shader_instructions_pass( + nir, lower_descriptors_instr, + nir_metadata_block_index | nir_metadata_dominance, &ctx); + +out: + _mesa_hash_table_destroy(ctx.ht, NULL); return progress; } diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c b/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c index 736eb9d3d52..a00f738669c 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c @@ -30,7 +30,6 @@ #include "panvk_device.h" #include "panvk_entrypoints.h" #include "panvk_pipeline.h" -#include "panvk_pipeline_layout.h" #include "panvk_priv_bo.h" #include "panvk_shader.h" @@ -44,6 +43,7 @@ #include "vk_blend.h" #include "vk_format.h" #include "vk_pipeline_cache.h" +#include "vk_pipeline_layout.h" #include "vk_render_pass.h" #include "vk_util.h" @@ -76,7 +76,32 @@ init_pipeline_shader(struct panvk_pipeline *pipeline, } pshader->info = shader->info; - pshader->has_img_access = shader->has_img_access; + pshader->desc_info.used_set_mask = shader->desc_info.used_set_mask; + + uint32_t copy_count = 0; + for (uint32_t i = 0; i < ARRAY_SIZE(shader->desc_info.others); i++) { + pshader->desc_info.others.count[i] = shader->desc_info.others[i].count; + copy_count += shader->desc_info.others[i].count; + } + + if (copy_count) { + pshader->desc_info.others.map = pan_pool_upload_aligned( + &pipeline->desc_pool.base, shader->desc_info.others[0].map, + copy_count * sizeof(uint32_t), sizeof(uint32_t)); + } + + assert(shader->desc_info.dyn_ubos.count < + ARRAY_SIZE(pshader->desc_info.dyn_ubos.map)); + pshader->desc_info.dyn_ubos.count = shader->desc_info.dyn_ubos.count; + memcpy(pshader->desc_info.dyn_ubos.map, shader->desc_info.dyn_ubos.map, + shader->desc_info.dyn_ubos.count * + sizeof(*pshader->desc_info.dyn_ubos.map)); + assert(shader->desc_info.dyn_ssbos.count < + ARRAY_SIZE(pshader->desc_info.dyn_ssbos.map)); + pshader->desc_info.dyn_ssbos.count = shader->desc_info.dyn_ssbos.count; + memcpy(pshader->desc_info.dyn_ssbos.map, shader->desc_info.dyn_ssbos.map, + shader->desc_info.dyn_ssbos.count * + sizeof(*pshader->desc_info.dyn_ssbos.map)); if (stage_info->stage == VK_SHADER_STAGE_COMPUTE_BIT) { struct panvk_compute_pipeline *compute_pipeline = @@ -303,7 +328,7 @@ panvk_graphics_pipeline_create(struct panvk_device *dev, const VkAllocationCallbacks *alloc, struct panvk_pipeline **out) { - VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout); + VK_FROM_HANDLE(vk_pipeline_layout, layout, create_info->layout); struct vk_graphics_pipeline_all_state all; struct vk_graphics_pipeline_state state = {}; VkResult result; @@ -401,7 +426,7 @@ panvk_compute_pipeline_create(struct panvk_device *dev, const VkAllocationCallbacks *alloc, struct panvk_pipeline **out) { - VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout); + VK_FROM_HANDLE(vk_pipeline_layout, layout, create_info->layout); struct panvk_compute_pipeline *compute_pipeline = vk_object_zalloc( &dev->vk, alloc, sizeof(*compute_pipeline), VK_OBJECT_TYPE_PIPELINE); diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_pipeline_layout.c b/src/panfrost/vulkan/bifrost/panvk_vX_pipeline_layout.c deleted file mode 100644 index da875df9fa2..00000000000 --- a/src/panfrost/vulkan/bifrost/panvk_vX_pipeline_layout.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright © 2021 Collabora Ltd. - * - * SPDX-License-Identifier: MIT - */ - -#include "genxml/gen_macros.h" - -#include "vk_log.h" - -#include "panvk_descriptor_set.h" -#include "panvk_device.h" -#include "panvk_entrypoints.h" -#include "panvk_macros.h" -#include "panvk_pipeline_layout.h" -#include "panvk_sampler.h" -#include "panvk_shader.h" - -#include "util/mesa-sha1.h" - -/* - * Pipeline layouts. These have nothing to do with the pipeline. They are - * just multiple descriptor set layouts pasted together. - */ - -VKAPI_ATTR VkResult VKAPI_CALL -panvk_per_arch(CreatePipelineLayout)( - VkDevice _device, const VkPipelineLayoutCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout) -{ - VK_FROM_HANDLE(panvk_device, device, _device); - struct panvk_pipeline_layout *layout; - struct mesa_sha1 ctx; - - layout = - vk_pipeline_layout_zalloc(&device->vk, sizeof(*layout), pCreateInfo); - if (layout == NULL) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - _mesa_sha1_init(&ctx); - - unsigned sampler_idx = 0, tex_idx = 0, ubo_idx = 0; - unsigned dyn_ubo_idx = 0, dyn_ssbo_idx = 0, img_idx = 0; - unsigned dyn_desc_ubo_offset = 0; - for (unsigned set = 0; set < pCreateInfo->setLayoutCount; set++) { - const struct panvk_descriptor_set_layout *set_layout = - vk_to_panvk_descriptor_set_layout(layout->vk.set_layouts[set]); - - layout->sets[set].sampler_offset = sampler_idx; - layout->sets[set].tex_offset = tex_idx; - layout->sets[set].ubo_offset = ubo_idx; - layout->sets[set].dyn_ubo_offset = dyn_ubo_idx; - layout->sets[set].dyn_ssbo_offset = dyn_ssbo_idx; - layout->sets[set].img_offset = img_idx; - layout->sets[set].dyn_desc_ubo_offset = dyn_desc_ubo_offset; - sampler_idx += set_layout->num_samplers; - tex_idx += set_layout->num_textures; - ubo_idx += set_layout->num_ubos; - dyn_ubo_idx += set_layout->num_dyn_ubos; - dyn_ssbo_idx += set_layout->num_dyn_ssbos; - img_idx += set_layout->num_imgs; - dyn_desc_ubo_offset += - set_layout->num_dyn_ssbos * sizeof(struct panvk_ssbo_addr); - - for (unsigned b = 0; b < set_layout->binding_count; b++) { - const struct panvk_descriptor_set_binding_layout *binding_layout = - &set_layout->bindings[b]; - - if (binding_layout->immutable_samplers) { - for (unsigned s = 0; s < binding_layout->array_size; s++) { - struct panvk_sampler *sampler = - binding_layout->immutable_samplers[s]; - - _mesa_sha1_update(&ctx, &sampler->desc, sizeof(sampler->desc)); - } - } - _mesa_sha1_update(&ctx, &binding_layout->type, - sizeof(binding_layout->type)); - _mesa_sha1_update(&ctx, &binding_layout->array_size, - sizeof(binding_layout->array_size)); - _mesa_sha1_update(&ctx, &binding_layout->shader_stages, - sizeof(binding_layout->shader_stages)); - } - } - - for (unsigned range = 0; range < pCreateInfo->pushConstantRangeCount; - range++) { - layout->push_constants.size = - MAX2(pCreateInfo->pPushConstantRanges[range].offset + - pCreateInfo->pPushConstantRanges[range].size, - layout->push_constants.size); - } - - layout->num_samplers = sampler_idx; - layout->num_textures = tex_idx; - layout->num_ubos = ubo_idx; - layout->num_dyn_ubos = dyn_ubo_idx; - layout->num_dyn_ssbos = dyn_ssbo_idx; - layout->num_imgs = img_idx; - - /* Some NIR texture operations don't require a sampler, but Bifrost/Midgard - * ones always expect one. Add a dummy sampler to deal with this limitation. - */ - if (layout->num_textures) { - layout->num_samplers++; - for (unsigned set = 0; set < pCreateInfo->setLayoutCount; set++) - layout->sets[set].sampler_offset++; - } - - _mesa_sha1_final(&ctx, layout->sha1); - - *pPipelineLayout = panvk_pipeline_layout_to_handle(layout); - return VK_SUCCESS; -} - -unsigned -panvk_per_arch(pipeline_layout_ubo_start)( - const struct panvk_pipeline_layout *layout, unsigned set, bool is_dynamic) -{ - if (is_dynamic) - return layout->num_ubos + layout->sets[set].dyn_ubo_offset; - - return layout->sets[set].ubo_offset; -} - -unsigned -panvk_per_arch(pipeline_layout_ubo_index)( - const struct panvk_pipeline_layout *layout, unsigned set, unsigned binding, - unsigned array_index) -{ - const struct panvk_descriptor_set_layout *set_layout = - vk_to_panvk_descriptor_set_layout(layout->vk.set_layouts[set]); - const struct panvk_descriptor_set_binding_layout *binding_layout = - &set_layout->bindings[binding]; - - const bool is_dynamic = - binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - const uint32_t ubo_idx = - is_dynamic ? binding_layout->dyn_ubo_idx : binding_layout->ubo_idx; - - return panvk_per_arch(pipeline_layout_ubo_start)(layout, set, is_dynamic) + - ubo_idx + array_index; -} - -unsigned -panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)( - const struct panvk_pipeline_layout *layout) -{ - return layout->num_ubos + layout->num_dyn_ubos; -} - -unsigned -panvk_per_arch(pipeline_layout_total_ubo_count)( - const struct panvk_pipeline_layout *layout) -{ - return layout->num_ubos + layout->num_dyn_ubos + - (layout->num_dyn_ssbos ? 1 : 0); -} - -unsigned -panvk_per_arch(pipeline_layout_dyn_ubos_offset)( - const struct panvk_pipeline_layout *layout) -{ - return layout->num_ubos; -} diff --git a/src/panfrost/vulkan/jm/panvk_cmd_buffer.h b/src/panfrost/vulkan/jm/panvk_cmd_buffer.h index 3a4f50f4bb2..9164a44843f 100644 --- a/src/panfrost/vulkan/jm/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/jm/panvk_cmd_buffer.h @@ -93,11 +93,17 @@ struct panvk_cmd_graphics_state { struct { mali_ptr rsd; +#if PAN_ARCH <= 7 + struct panvk_shader_desc_state desc; +#endif } fs; struct { mali_ptr attribs; mali_ptr attrib_bufs; +#if PAN_ARCH <= 7 + struct panvk_shader_desc_state desc; +#endif } vs; struct { @@ -141,6 +147,11 @@ struct panvk_cmd_compute_state { const struct panvk_compute_pipeline *pipeline; struct panvk_compute_sysvals sysvals; mali_ptr push_uniforms; +#if PAN_ARCH <= 7 + struct { + struct panvk_shader_desc_state desc; + } cs; +#endif }; struct panvk_cmd_buffer { diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c index 94966a7df06..5ba507f2d09 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c @@ -38,7 +38,6 @@ #include "panvk_instance.h" #include "panvk_physical_device.h" #include "panvk_pipeline.h" -#include "panvk_pipeline_layout.h" #include "panvk_priv_bo.h" #include "pan_blitter.h" @@ -298,6 +297,10 @@ panvk_reset_cmdbuf(struct vk_command_buffer *vk_cmdbuf, panvk_per_arch(cmd_desc_state_reset)(&cmdbuf->state.gfx.desc_state, &cmdbuf->state.compute.desc_state); + memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc)); + memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc)); + memset(&cmdbuf->state.compute.cs.desc, 0, + sizeof(cmdbuf->state.compute.cs.desc)); } static void @@ -401,6 +404,15 @@ panvk_per_arch(CmdBindDescriptorSets)( panvk_per_arch(cmd_desc_state_bind_sets)( desc_state, layout, firstSet, descriptorSetCount, pDescriptorSets, dynamicOffsetCount, pDynamicOffsets); + + /* TODO: Invalidate only if the shader tables are disturbed */ + if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { + memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc)); + memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc)); + } else { + memset(&cmdbuf->state.compute.cs.desc, 0, + sizeof(cmdbuf->state.compute.cs.desc)); + } } VKAPI_ATTR void VKAPI_CALL @@ -439,12 +451,16 @@ panvk_per_arch(CmdBindPipeline)(VkCommandBuffer commandBuffer, cmdbuf->state.gfx.fs.rsd = 0; cmdbuf->state.gfx.pipeline = gfx_pipeline; + memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc)); + memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc)); break; } case VK_PIPELINE_BIND_POINT_COMPUTE: cmdbuf->state.compute.pipeline = panvk_pipeline_to_compute_pipeline(pipeline); + memset(&cmdbuf->state.compute.cs.desc, 0, + sizeof(cmdbuf->state.compute.cs.desc)); break; default: @@ -460,18 +476,33 @@ panvk_per_arch(CmdPushDescriptorSetKHR)( const VkWriteDescriptorSet *pDescriptorWrites) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_pipeline_layout, playout, layout); + VK_FROM_HANDLE(vk_pipeline_layout, playout, layout); const struct panvk_descriptor_set_layout *set_layout = - vk_to_panvk_descriptor_set_layout(playout->vk.set_layouts[set]); + to_panvk_descriptor_set_layout(playout->set_layouts[set]); struct panvk_descriptor_state *desc_state = panvk_cmd_get_desc_state(cmdbuf, pipelineBindPoint); - struct panvk_push_descriptor_set *push_set = + struct panvk_descriptor_set *push_set = panvk_per_arch(cmd_push_descriptors)(&cmdbuf->vk, desc_state, set); if (!push_set) return; - panvk_per_arch(push_descriptor_set)(push_set, set_layout, - descriptorWriteCount, pDescriptorWrites); + push_set->layout = set_layout; + push_set->desc_count = set_layout->desc_count; + + for (uint32_t i = 0; i < descriptorWriteCount; i++) + panvk_per_arch(descriptor_set_write)(push_set, &pDescriptorWrites[i], + true); + + push_set->descs.dev = 0; + push_set->layout = NULL; + + if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { + memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc)); + memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc)); + } else { + memset(&cmdbuf->state.compute.cs.desc, 0, + sizeof(cmdbuf->state.compute.cs.desc)); + } } VKAPI_ATTR void VKAPI_CALL @@ -483,16 +514,30 @@ panvk_per_arch(CmdPushDescriptorSetWithTemplateKHR)( VK_FROM_HANDLE(vk_descriptor_update_template, template, descriptorUpdateTemplate); VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_pipeline_layout, playout, layout); + VK_FROM_HANDLE(vk_pipeline_layout, playout, layout); const struct panvk_descriptor_set_layout *set_layout = - vk_to_panvk_descriptor_set_layout(playout->vk.set_layouts[set]); + to_panvk_descriptor_set_layout(playout->set_layouts[set]); struct panvk_descriptor_state *desc_state = panvk_cmd_get_desc_state(cmdbuf, template->bind_point); - struct panvk_push_descriptor_set *push_set = + struct panvk_descriptor_set *push_set = panvk_per_arch(cmd_push_descriptors)(&cmdbuf->vk, desc_state, set); if (!push_set) return; - panvk_per_arch(push_descriptor_set_with_template)( - push_set, set_layout, descriptorUpdateTemplate, pData); + push_set->layout = set_layout; + push_set->desc_count = set_layout->desc_count; + + panvk_per_arch(descriptor_set_write_template)(push_set, template, pData, + true); + + push_set->descs.dev = 0; + push_set->layout = NULL; + + if (template->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc)); + memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc)); + } else { + memset(&cmdbuf->state.compute.cs.desc, 0, + sizeof(cmdbuf->state.compute.cs.desc)); + } } diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c index a5651809911..a4f18320276 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c @@ -27,13 +27,8 @@ struct panvk_dispatch_info { struct pan_compute_dim wg_count; - mali_ptr attributes; - mali_ptr attribute_bufs; mali_ptr tsd; - mali_ptr ubos; mali_ptr push_uniforms; - mali_ptr textures; - mali_ptr samplers; }; VKAPI_ATTR void VKAPI_CALL @@ -53,12 +48,16 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, struct panvk_descriptor_state *desc_state = &cmdbuf->state.compute.desc_state; + struct panvk_shader_desc_state *cs_desc_state = + &cmdbuf->state.compute.cs.desc; const struct panvk_compute_pipeline *pipeline = cmdbuf->state.compute.pipeline; - struct pan_pool *desc_pool_base = &cmdbuf->desc_pool.base; - struct panfrost_ptr job = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); - util_dynarray_append(&batch->jobs, void *, job.cpu); + + panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); + dispatch.tsd = batch->tls.gpu; + + panvk_per_arch(cmd_prepare_push_descs)(&cmdbuf->desc_pool.base, desc_state, + &pipeline->base); struct panvk_compute_sysvals *sysvals = &cmdbuf->state.compute.sysvals; sysvals->num_work_groups.x = x; @@ -67,38 +66,38 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, sysvals->local_group_size.x = pipeline->local_size.x; sysvals->local_group_size.y = pipeline->local_size.y; sysvals->local_group_size.z = pipeline->local_size.z; + panvk_per_arch(cmd_prepare_dyn_ssbos)(&cmdbuf->desc_pool.base, desc_state, + &pipeline->cs, cs_desc_state); + sysvals->desc.dyn_ssbos = cs_desc_state->dyn_ssbos; - panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); - dispatch.tsd = batch->tls.gpu; + for (uint32_t i = 0; i < MAX_SETS; i++) { + if (pipeline->cs.desc_info.used_set_mask & BITFIELD_BIT(i)) + sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev; + } - panvk_per_arch(cmd_prepare_push_sets)(desc_pool_base, desc_state, - &pipeline->base); - - if (pipeline->cs.has_img_access) - panvk_per_arch(prepare_img_attribs)(desc_pool_base, desc_state, - &pipeline->base); - - dispatch.attributes = desc_state->img.attribs; - dispatch.attribute_bufs = desc_state->img.attrib_bufs; - - panvk_per_arch(cmd_prepare_ubos)(desc_pool_base, desc_state, - &pipeline->base); - dispatch.ubos = desc_state->ubos; + cmdbuf->state.compute.push_uniforms = 0; if (!cmdbuf->state.compute.push_uniforms) { cmdbuf->state.compute.push_uniforms = panvk_cmd_prepare_push_uniforms( - desc_pool_base, &cmdbuf->state.push_constants, + &cmdbuf->desc_pool.base, &cmdbuf->state.push_constants, &cmdbuf->state.compute.sysvals, sizeof(cmdbuf->state.compute.sysvals)); } + dispatch.push_uniforms = cmdbuf->state.compute.push_uniforms; - panvk_per_arch(cmd_prepare_textures)(desc_pool_base, desc_state, - &pipeline->base); - dispatch.textures = desc_state->textures; + panvk_per_arch(cmd_prepare_shader_desc_tables)( + &cmdbuf->desc_pool.base, desc_state, &pipeline->cs, cs_desc_state); - panvk_per_arch(cmd_prepare_samplers)(desc_pool_base, desc_state, - &pipeline->base); - dispatch.samplers = desc_state->samplers; + struct panfrost_ptr copy_desc_job = panvk_per_arch(meta_get_copy_desc_job)( + dev, &cmdbuf->desc_pool.base, &pipeline->cs, + &cmdbuf->state.compute.desc_state, cs_desc_state); + + if (copy_desc_job.cpu) + util_dynarray_append(&batch->jobs, void *, copy_desc_job.cpu); + + struct panfrost_ptr job = + pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); + util_dynarray_append(&batch->jobs, void *, job.cpu); panfrost_pack_work_groups_compute( pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), dispatch.wg_count.x, @@ -113,17 +112,24 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) { cfg.state = pipeline->cs.rsd; - cfg.attributes = dispatch.attributes; - cfg.attribute_buffers = dispatch.attribute_bufs; + cfg.attributes = cs_desc_state->img_attrib_table; + cfg.attribute_buffers = + cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG]; cfg.thread_storage = dispatch.tsd; - cfg.uniform_buffers = dispatch.ubos; + cfg.uniform_buffers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO]; cfg.push_uniforms = dispatch.push_uniforms; - cfg.textures = dispatch.textures; - cfg.samplers = dispatch.samplers; + cfg.textures = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE]; + cfg.samplers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER]; } - pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0, &job, - false); + unsigned copy_desc_dep = + copy_desc_job.gpu + ? pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0, + ©_desc_job, false) + : 0; + + pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, + copy_desc_dep, &job, false); batch->tlsinfo.tls.size = pipeline->cs.info.tls_size; batch->tlsinfo.wls.size = pipeline->cs.info.wls_size; @@ -137,7 +143,6 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, } panvk_per_arch(cmd_close_batch)(cmdbuf); - panvk_per_arch(cmd_unprepare_push_sets)(desc_state); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index 6511bbe7304..c9f7b2d3c31 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -20,6 +20,7 @@ #include "panvk_image_view.h" #include "panvk_instance.h" #include "panvk_pipeline.h" +#include "panvk_priv_bo.h" #include "panvk_shader.h" #include "pan_desc.h" @@ -32,6 +33,7 @@ #include "pan_shader.h" #include "vk_format.h" +#include "vk_pipeline_layout.h" struct panvk_draw_info { unsigned first_index; @@ -54,14 +56,9 @@ struct panvk_draw_info { struct { mali_ptr rsd; mali_ptr varyings; - mali_ptr attributes; - mali_ptr attribute_bufs; } fs; mali_ptr push_uniforms; mali_ptr varying_bufs; - mali_ptr textures; - mali_ptr samplers; - mali_ptr ubos; mali_ptr position; mali_ptr indices; union { @@ -73,7 +70,9 @@ struct panvk_draw_info { const struct pan_tiler_context *tiler_ctx; mali_ptr viewport; struct { + struct panfrost_ptr vertex_copy_desc; struct panfrost_ptr vertex; + struct panfrost_ptr frag_copy_desc; struct panfrost_ptr tiler; } jobs; }; @@ -90,6 +89,10 @@ static void panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { + const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline; + struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; + struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; + struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals; struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb; @@ -138,6 +141,21 @@ panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf, sysvals->viewport.offset.z = viewport->minDepth; cmdbuf->state.gfx.push_uniforms = 0; } + + panvk_per_arch(cmd_prepare_dyn_ssbos)(&cmdbuf->desc_pool.base, desc_state, + &pipeline->vs, vs_desc_state); + sysvals->desc.vs_dyn_ssbos = vs_desc_state->dyn_ssbos; + panvk_per_arch(cmd_prepare_dyn_ssbos)(&cmdbuf->desc_pool.base, desc_state, + &pipeline->fs, fs_desc_state); + sysvals->desc.fs_dyn_ssbos = fs_desc_state->dyn_ssbos; + + for (uint32_t i = 0; i < MAX_SETS; i++) { + uint32_t used_set_mask = pipeline->vs.desc_info.used_set_mask | + pipeline->fs.desc_info.used_set_mask; + + if (used_set_mask & BITFIELD_BIT(i)) + sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev; + } } static bool @@ -626,12 +644,11 @@ static void panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { - struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline; const struct vk_vertex_input_state *vi = cmdbuf->vk.dynamic_graphics_state.vi; unsigned num_imgs = - pipeline->vs.has_img_access ? pipeline->base.layout->num_imgs : 0; + pipeline->vs.desc_info.others.count[PANVK_BIFROST_DESC_TABLE_IMG]; unsigned num_vs_attribs = util_last_bit(vi->attributes_valid); unsigned num_vbs = util_last_bit(vi->bindings_valid); unsigned attrib_count = @@ -639,7 +656,7 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, bool dirty = is_dirty(cmdbuf, VI) || is_dirty(cmdbuf, VI_BINDINGS_VALID) || is_dirty(cmdbuf, VI_BINDING_STRIDES) || - (num_imgs && !desc_state->img.attribs) || + (num_imgs && !cmdbuf->state.gfx.vs.desc.img_attrib_table) || (cmdbuf->state.gfx.vb.count && !cmdbuf->state.gfx.vs.attrib_bufs) || (attrib_count && !cmdbuf->state.gfx.vs.attribs); @@ -675,50 +692,28 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, } } - if (num_imgs) { - /* Image load/store are passed a fixed offset, so we can make vertex input - * dynamic. Images are always placed after all potential vertex - * attributes. Buffers are tightly packed since they don't interfere with - * the vertex shader. - */ - unsigned attribs_offset = MAX_VS_ATTRIBS * pan_size(ATTRIBUTE); - unsigned bufs_offset = num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2; - - memset(attribs.cpu + num_vs_attribs * pan_size(ATTRIBUTE), 0, - (MAX_VS_ATTRIBS - num_vs_attribs) * pan_size(ATTRIBUTE)); - panvk_per_arch(fill_img_attribs)( - desc_state, &pipeline->base, bufs.cpu + bufs_offset, - attribs.cpu + attribs_offset, num_vbs * 2); - desc_state->img.attrib_bufs = bufs.gpu + bufs_offset; - desc_state->img.attribs = attribs.gpu + attribs_offset; - } - /* A NULL entry is needed to stop prefecting on Bifrost */ memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0, pan_size(ATTRIBUTE_BUFFER)); cmdbuf->state.gfx.vs.attrib_bufs = bufs.gpu; cmdbuf->state.gfx.vs.attribs = attribs.gpu; + + if (num_imgs) { + cmdbuf->state.gfx.vs.desc.img_attrib_table = + attribs.gpu + (MAX_VS_ATTRIBS * pan_size(ATTRIBUTE)); + cmdbuf->state.gfx.vs.desc.tables[PANVK_BIFROST_DESC_TABLE_IMG] = + bufs.gpu + (num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2); + } } static void panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { - struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; - const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline; - panvk_draw_prepare_vs_attribs(cmdbuf, draw); draw->vs.attributes = cmdbuf->state.gfx.vs.attribs; draw->vs.attribute_bufs = cmdbuf->state.gfx.vs.attrib_bufs; - - if (pipeline->fs.has_img_access) { - struct pan_pool *desc_pool_base = &cmdbuf->desc_pool.base; - panvk_per_arch(prepare_img_attribs)(desc_pool_base, desc_state, - &pipeline->base); - draw->fs.attributes = desc_state->img.attribs; - draw->fs.attribute_bufs = desc_state->img.attrib_bufs; - } } void @@ -780,11 +775,25 @@ static void panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline; + struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; struct panvk_batch *batch = cmdbuf->cur_batch; - struct panfrost_ptr ptr = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); + panvk_per_arch(cmd_prepare_shader_desc_tables)(&cmdbuf->desc_pool.base, + &cmdbuf->state.gfx.desc_state, + &pipeline->vs, vs_desc_state); + + struct panfrost_ptr ptr = panvk_per_arch(meta_get_copy_desc_job)( + dev, &cmdbuf->desc_pool.base, &pipeline->vs, + &cmdbuf->state.gfx.desc_state, vs_desc_state); + + if (ptr.cpu) + util_dynarray_append(&batch->jobs, void *, ptr.cpu); + + draw->jobs.vertex_copy_desc = ptr; + + ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.vertex = ptr; @@ -805,10 +814,10 @@ panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, cfg.offset_start = draw->offset_start; cfg.instance_size = draw->instance_count > 1 ? draw->padded_vertex_count : 1; - cfg.uniform_buffers = draw->ubos; + cfg.uniform_buffers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO]; cfg.push_uniforms = draw->push_uniforms; - cfg.textures = draw->textures; - cfg.samplers = draw->samplers; + cfg.textures = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE]; + cfg.samplers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER]; } } @@ -909,6 +918,7 @@ static void panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf, const struct panvk_draw_info *draw, void *dcd) { + struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; const struct vk_rasterization_state *rs = &cmdbuf->vk.dynamic_graphics_state.rs; const struct vk_input_assembly_state *ia = @@ -920,8 +930,9 @@ panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf, cfg.cull_back_face = (rs->cull_mode & VK_CULL_MODE_BACK_BIT) != 0; cfg.position = draw->position; cfg.state = draw->fs.rsd; - cfg.attributes = draw->fs.attributes; - cfg.attribute_buffers = draw->fs.attribute_bufs; + cfg.attributes = fs_desc_state->img_attrib_table; + cfg.attribute_buffers = + fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG]; cfg.viewport = draw->viewport; cfg.varyings = draw->fs.varyings; cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0; @@ -938,10 +949,10 @@ panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf, cfg.offset_start = draw->offset_start; cfg.instance_size = draw->instance_count > 1 ? draw->padded_vertex_count : 1; - cfg.uniform_buffers = draw->ubos; + cfg.uniform_buffers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO]; cfg.push_uniforms = draw->push_uniforms; - cfg.textures = draw->textures; - cfg.samplers = draw->samplers; + cfg.textures = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE]; + cfg.samplers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER]; /* TODO: occlusion queries */ } @@ -951,14 +962,30 @@ static void panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct panvk_batch *batch = cmdbuf->cur_batch; - struct panfrost_ptr ptr = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB); /* If the vertex job doesn't write the position, we don't need a tiler job. */ if (!draw->position) return; + const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline; + struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; + + panvk_per_arch(cmd_prepare_shader_desc_tables)(&cmdbuf->desc_pool.base, + &cmdbuf->state.gfx.desc_state, + &pipeline->fs, fs_desc_state); + + struct panfrost_ptr ptr = panvk_per_arch(meta_get_copy_desc_job)( + dev, &cmdbuf->desc_pool.base, &pipeline->fs, + &cmdbuf->state.gfx.desc_state, fs_desc_state); + + if (ptr.cpu) + util_dynarray_append(&batch->jobs, void *, ptr.cpu); + + draw->jobs.frag_copy_desc = ptr; + + ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB); util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.tiler = ptr; @@ -1015,10 +1042,11 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) const struct vk_rasterization_state *rs = &cmdbuf->vk.dynamic_graphics_state.rs; - /* There are only 16 bits in the descriptor for the job ID, make sure all - * the 3 (2 in Bifrost) jobs in this draw are in the same batch. + /* There are only 16 bits in the descriptor for the job ID. Each job has a + * pilot shader dealing with descriptor copies, and we need one + * pair per draw. */ - if (batch->jc.job_index >= (UINT16_MAX - 3)) { + if (batch->jc.job_index >= (UINT16_MAX - 4)) { panvk_per_arch(cmd_close_batch)(cmdbuf); panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf); batch = panvk_per_arch(cmd_open_batch)(cmdbuf); @@ -1029,32 +1057,20 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); + panvk_per_arch(cmd_prepare_push_descs)(&cmdbuf->desc_pool.base, desc_state, + &pipeline->base); panvk_cmd_prepare_draw_sysvals(cmdbuf, draw); - struct pan_pool *desc_pool_base = &cmdbuf->desc_pool.base; - panvk_per_arch(cmd_prepare_push_sets)(desc_pool_base, desc_state, - &pipeline->base); - if (!cmdbuf->state.gfx.push_uniforms) { cmdbuf->state.gfx.push_uniforms = panvk_cmd_prepare_push_uniforms( - desc_pool_base, &cmdbuf->state.push_constants, + &cmdbuf->desc_pool.base, &cmdbuf->state.push_constants, &cmdbuf->state.gfx.sysvals, sizeof(cmdbuf->state.gfx.sysvals)); } - panvk_per_arch(cmd_prepare_ubos)(desc_pool_base, desc_state, - &pipeline->base); - panvk_per_arch(cmd_prepare_textures)(desc_pool_base, desc_state, - &pipeline->base); - panvk_per_arch(cmd_prepare_samplers)(desc_pool_base, desc_state, - &pipeline->base); - /* TODO: indexed draws */ draw->tls = batch->tls.gpu; draw->fb = batch->fb.desc.gpu; - draw->ubos = desc_state->ubos; draw->push_uniforms = cmdbuf->state.gfx.push_uniforms; - draw->textures = desc_state->textures; - draw->samplers = desc_state->samplers; panfrost_pack_work_groups_compute(&draw->invocation, 1, draw->vertex_range, draw->instance_count, 1, 1, 1, true, @@ -1071,17 +1087,31 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) MAX3(pipeline->vs.info.tls_size, pipeline->fs.info.tls_size, batch->tlsinfo.tls.size); - unsigned vjob_id = pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_VERTEX, false, - false, 0, 0, &draw->jobs.vertex, false); + unsigned copy_desc_job_id = + draw->jobs.vertex_copy_desc.gpu + ? pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0, + &draw->jobs.vertex_copy_desc, false) + : 0; + + unsigned vjob_id = + pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_VERTEX, false, false, 0, + copy_desc_job_id, &draw->jobs.vertex, false); if (!rs->rasterizer_discard_enable && draw->position) { + /* We don't need to add frag_copy_desc as a dependency, because the + * tiler job doesn't execute the fragment shader. The fragment job + * will, and the tiler/fragment synchronization happens at the batch + * level. */ + if (draw->jobs.frag_copy_desc.gpu) + pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0, + &draw->jobs.frag_copy_desc, false); + pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_TILER, false, false, vjob_id, 0, &draw->jobs.tiler, false); } /* Clear the dirty flags all at once */ cmdbuf->state.gfx.dirty = 0; - panvk_per_arch(cmd_unprepare_push_sets)(desc_state); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/panfrost/vulkan/jm/panvk_vX_meta.c b/src/panfrost/vulkan/jm/panvk_vX_meta.c index 29fb3f55731..7ed1cba63a1 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_meta.c +++ b/src/panfrost/vulkan/jm/panvk_vX_meta.c @@ -57,6 +57,7 @@ panvk_per_arch(meta_init)(struct panvk_device *dev) panvk_per_arch(meta_blit_init)(dev); panvk_per_arch(meta_copy_init)(dev); panvk_per_arch(meta_clear_init)(dev); + panvk_per_arch(meta_desc_copy_init)(dev); } void diff --git a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build index c7c2d20ddd5..556e46a5944 100644 --- a/src/panfrost/vulkan/meson.build +++ b/src/panfrost/vulkan/meson.build @@ -57,19 +57,14 @@ bifrost_archs = [6, 7] bifrost_inc_dir = ['bifrost'] bifrost_files = [ 'bifrost/panvk_vX_cmd_desc_state.c', - 'bifrost/panvk_vX_descriptor_set.c', - 'bifrost/panvk_vX_descriptor_set_layout.c', + 'bifrost/panvk_vX_meta_desc_copy.c', 'bifrost/panvk_vX_nir_lower_descriptors.c', 'bifrost/panvk_vX_pipeline.c', - 'bifrost/panvk_vX_pipeline_layout.c', ] valhall_archs = [9, 10] valhall_inc_dir = ['valhall'] -valhall_files = [ - 'valhall/panvk_vX_descriptor_set.c', - 'valhall/panvk_vX_descriptor_set_layout.c', -] +valhall_files = [] jm_archs = [6, 7] jm_inc_dir = ['jm'] @@ -89,6 +84,8 @@ common_per_arch_files = [ panvk_entrypoints[0], 'panvk_vX_blend.c', 'panvk_vX_buffer_view.c', + 'panvk_vX_descriptor_set.c', + 'panvk_vX_descriptor_set_layout.c', 'panvk_vX_device.c', 'panvk_vX_image_view.c', 'panvk_vX_sampler.c', @@ -101,6 +98,8 @@ foreach arch : [6, 7, 9, 10] # Just add the v9/v10 entrypoints for now. per_arch_files = [ panvk_entrypoints[0], + 'panvk_vX_descriptor_set.c', + 'panvk_vX_descriptor_set_layout.c', ] else per_arch_files = common_per_arch_files diff --git a/src/panfrost/vulkan/valhall/panvk_descriptor_set.h b/src/panfrost/vulkan/panvk_descriptor_set.h similarity index 100% rename from src/panfrost/vulkan/valhall/panvk_descriptor_set.h rename to src/panfrost/vulkan/panvk_descriptor_set.h diff --git a/src/panfrost/vulkan/valhall/panvk_descriptor_set_layout.h b/src/panfrost/vulkan/panvk_descriptor_set_layout.h similarity index 100% rename from src/panfrost/vulkan/valhall/panvk_descriptor_set_layout.h rename to src/panfrost/vulkan/panvk_descriptor_set_layout.h diff --git a/src/panfrost/vulkan/panvk_device.h b/src/panfrost/vulkan/panvk_device.h index 7c74f80906b..7257756aaf1 100644 --- a/src/panfrost/vulkan/panvk_device.h +++ b/src/panfrost/vulkan/panvk_device.h @@ -39,6 +39,11 @@ struct panvk_device { struct panvk_blend_shader_cache blend_shader_cache; struct panvk_meta meta; + struct { + struct panvk_priv_bo *shader_bo; + struct panvk_priv_bo *rsd_bo; + } desc_copy; + struct vk_device_dispatch_table cmd_dispatch; struct panvk_queue *queues[PANVK_MAX_QUEUE_FAMILIES]; diff --git a/src/panfrost/vulkan/panvk_meta.h b/src/panfrost/vulkan/panvk_meta.h index ef7b64c1e5f..61b723ce781 100644 --- a/src/panfrost/vulkan/panvk_meta.h +++ b/src/panfrost/vulkan/panvk_meta.h @@ -68,9 +68,26 @@ struct panvk_meta { mali_ptr rsd; } fillbuf; } copy; + + struct { + mali_ptr rsd; + } desc_copy; }; #if PAN_ARCH + +#if PAN_ARCH <= 7 +struct panvk_descriptor_state; +struct panvk_pipeline_shader; +struct panvk_shader_desc_state; + +struct panfrost_ptr panvk_per_arch(meta_get_copy_desc_job)( + struct panvk_device *dev, struct pan_pool *desc_pool, + const struct panvk_pipeline_shader *shader, + const struct panvk_descriptor_state *desc_state, + const struct panvk_shader_desc_state *shader_desc_state); +#endif + void panvk_per_arch(meta_init)(struct panvk_device *dev); void panvk_per_arch(meta_cleanup)(struct panvk_device *dev); @@ -86,6 +103,8 @@ void panvk_per_arch(meta_blit_init)(struct panvk_device *dev); void panvk_per_arch(meta_blit_cleanup)(struct panvk_device *dev); void panvk_per_arch(meta_copy_init)(struct panvk_device *dev); + +void panvk_per_arch(meta_desc_copy_init)(struct panvk_device *dev); #endif #endif diff --git a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c index b3f130aa324..1552391ff62 100644 --- a/src/panfrost/vulkan/panvk_physical_device.c +++ b/src/panfrost/vulkan/panvk_physical_device.c @@ -290,7 +290,6 @@ get_device_properties(const struct panvk_physical_device *device, */ .maxBoundDescriptorSets = 4, /* MALI_RENDERER_STATE::sampler_count is 16-bit. */ - .maxPerStageDescriptorSamplers = UINT16_MAX, .maxDescriptorSetSamplers = UINT16_MAX, /* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots * for our internal UBOs. @@ -304,25 +303,31 @@ get_device_properties(const struct panvk_physical_device *device, * a minus(1) modifier, which gives a maximum of 2^12 SSBO * descriptors. */ - .maxPerStageDescriptorStorageBuffers = 1 << 12, .maxDescriptorSetStorageBuffers = 1 << 12, /* MALI_RENDERER_STATE::sampler_count is 16-bit. */ - .maxPerStageDescriptorSampledImages = UINT16_MAX, .maxDescriptorSetSampledImages = UINT16_MAX, /* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two * MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images. */ - .maxPerStageDescriptorStorageImages = 1 << 8, .maxDescriptorSetStorageImages = 1 << 8, /* A maximum of 8 color render targets, and one depth-stencil render * target. */ - .maxPerStageDescriptorInputAttachments = 9, .maxDescriptorSetInputAttachments = 9, - /* Could be the sum of all maxPerStageXxx values, but we limit ourselves - * to 2^16 to make things simpler. + + /* We could theoretically use the maxDescriptor values here (except for + * UBOs where we're really limited to 256 on the shader side), but on + * Bifrost we have to copy some tables around, which comes at an extra + * memory/processing cost, so let's pick something smaller. */ - .maxPerStageResources = 1 << 16, + .maxPerStageDescriptorInputAttachments = 9, + .maxPerStageDescriptorSampledImages = 256, + .maxPerStageDescriptorSamplers = 128, + .maxPerStageDescriptorStorageBuffers = 64, + .maxPerStageDescriptorStorageImages = 32, + .maxPerStageDescriptorUniformBuffers = 64, + .maxPerStageResources = 9 + 256 + 128 + 64 + 32 + 64, + /* Software limits to keep VkCommandBuffer tracking sane. */ .maxDescriptorSetUniformBuffersDynamic = 16, .maxDescriptorSetStorageBuffersDynamic = 8, diff --git a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h index e0165ad4c90..0d756d6fef7 100644 --- a/src/panfrost/vulkan/panvk_shader.h +++ b/src/panfrost/vulkan/panvk_shader.h @@ -18,7 +18,8 @@ #include "panvk_descriptor_set.h" #include "panvk_macros.h" -#include "panvk_pipeline_layout.h" + +#include "vk_pipeline_layout.h" #define MAX_VS_ATTRIBS 16 @@ -51,6 +52,14 @@ struct panvk_graphics_sysvals { uint32_t base_vertex; uint32_t base_instance; } vs; + +#if PAN_ARCH <= 7 + struct { + uint64_t sets[MAX_SETS]; + uint64_t vs_dyn_ssbos; + uint64_t fs_dyn_ssbos; + } desc; +#endif }; struct panvk_compute_sysvals { @@ -60,13 +69,58 @@ struct panvk_compute_sysvals { struct { uint32_t x, y, z; } local_group_size; + +#if PAN_ARCH <= 7 + struct { + uint64_t sets[MAX_SETS]; + uint64_t dyn_ssbos; + } desc; +#endif +}; + +enum panvk_bifrost_desc_table_type { + PANVK_BIFROST_DESC_TABLE_INVALID = -1, + + /* UBO is encoded on 8 bytes */ + PANVK_BIFROST_DESC_TABLE_UBO = 0, + + /* Images are using a <3DAttributeBuffer,Attribute> pair, each + * of them being stored in a separate table. */ + PANVK_BIFROST_DESC_TABLE_IMG, + + /* Texture and sampler are encoded on 32 bytes */ + PANVK_BIFROST_DESC_TABLE_TEXTURE, + PANVK_BIFROST_DESC_TABLE_SAMPLER, + + PANVK_BIFROST_DESC_TABLE_COUNT, +}; + +#define COPY_DESC_HANDLE(table, idx) ((table << 28) | (idx)) +#define COPY_DESC_HANDLE_EXTRACT_INDEX(handle) ((handle)&BITFIELD_MASK(28)) +#define COPY_DESC_HANDLE_EXTRACT_TABLE(handle) ((handle) >> 28) + +struct panvk_shader_desc_map { + /* The index of the map serves as the table offset, the value of the + * entry is a COPY_DESC_HANDLE() encoding the source set, and the + * index of the descriptor in the set. */ + uint32_t *map; + + /* Number of entries in the map array. */ + uint32_t count; +}; + +struct panvk_shader_desc_info { + uint32_t used_set_mask; + struct panvk_shader_desc_map dyn_ubos; + struct panvk_shader_desc_map dyn_ssbos; + struct panvk_shader_desc_map others[PANVK_BIFROST_DESC_TABLE_COUNT]; }; struct panvk_shader { struct pan_shader_info info; struct util_dynarray binary; struct pan_compute_dim local_size; - bool has_img_access; + struct panvk_shader_desc_info desc_info; }; bool panvk_per_arch(blend_needs_lowering)(const struct panvk_device *dev, @@ -75,15 +129,15 @@ bool panvk_per_arch(blend_needs_lowering)(const struct panvk_device *dev, struct panvk_shader *panvk_per_arch(shader_create)( struct panvk_device *dev, const VkPipelineShaderStageCreateInfo *stage_info, - const struct panvk_pipeline_layout *layout, - const VkAllocationCallbacks *alloc); + const struct vk_pipeline_layout *layout, const VkAllocationCallbacks *alloc); void panvk_per_arch(shader_destroy)(struct panvk_device *dev, struct panvk_shader *shader, const VkAllocationCallbacks *alloc); bool panvk_per_arch(nir_lower_descriptors)( - struct nir_shader *nir, struct panvk_device *dev, - const struct panvk_pipeline_layout *layout, bool *has_img_access_out); + nir_shader *nir, struct panvk_device *dev, + const struct vk_pipeline_layout *layout, + struct panvk_shader_desc_info *shader_desc_info); #endif diff --git a/src/panfrost/vulkan/panvk_vX_buffer_view.c b/src/panfrost/vulkan/panvk_vX_buffer_view.c index 5a12f1cc8f0..9d45a671d6b 100644 --- a/src/panfrost/vulkan/panvk_vX_buffer_view.c +++ b/src/panfrost/vulkan/panvk_vX_buffer_view.c @@ -110,9 +110,20 @@ panvk_per_arch(CreateBufferView)(VkDevice _device, unsigned blksz = vk_format_get_blocksize(pCreateInfo->format); pan_pack(view->descs.img_attrib_buf[0].opaque, ATTRIBUTE_BUFFER, cfg) { + /* The format is the only thing we lack to emit attribute descriptors + * when copying from the set to the attribute tables. Instead of + * making the descriptor size to store an extra format, we pack + * the 22-bit format with the texel stride, which is expected to be + * fit in remaining 10 bits. + */ + uint32_t hw_fmt = GENX(panfrost_format_from_pipe_format)(pfmt)->hw; + + assert(blksz < BITFIELD_MASK(10)); + assert(hw_fmt < BITFIELD_MASK(22)); + cfg.type = MALI_ATTRIBUTE_TYPE_3D_LINEAR; cfg.pointer = address; - cfg.stride = blksz; + cfg.stride = blksz | (hw_fmt << 10); cfg.size = view->vk.elements * blksz; } diff --git a/src/panfrost/vulkan/valhall/panvk_vX_descriptor_set.c b/src/panfrost/vulkan/panvk_vX_descriptor_set.c similarity index 100% rename from src/panfrost/vulkan/valhall/panvk_vX_descriptor_set.c rename to src/panfrost/vulkan/panvk_vX_descriptor_set.c diff --git a/src/panfrost/vulkan/valhall/panvk_vX_descriptor_set_layout.c b/src/panfrost/vulkan/panvk_vX_descriptor_set_layout.c similarity index 100% rename from src/panfrost/vulkan/valhall/panvk_vX_descriptor_set_layout.c rename to src/panfrost/vulkan/panvk_vX_descriptor_set_layout.c diff --git a/src/panfrost/vulkan/panvk_vX_image_view.c b/src/panfrost/vulkan/panvk_vX_image_view.c index 53a44f9f0a0..077bb39e9c2 100644 --- a/src/panfrost/vulkan/panvk_vX_image_view.c +++ b/src/panfrost/vulkan/panvk_vX_image_view.c @@ -167,11 +167,24 @@ panvk_per_arch(CreateImageView)(VkDevice _device, is_3d ? view->pview.first_layer : 0); pan_pack(view->descs.img_attrib_buf[0].opaque, ATTRIBUTE_BUFFER, cfg) { + /* The format is the only thing we lack to emit attribute descriptors + * when copying from the set to the attribute tables. Instead of + * making the descriptor size to store an extra format, we pack + * the 22-bit format with the texel stride, which is expected to be + * fit in remaining 10 bits. + */ + uint32_t fmt_blksize = util_format_get_blocksize(view->pview.format); + uint32_t hw_fmt = + GENX(panfrost_format_from_pipe_format)(view->pview.format)->hw; + + assert(fmt_blksize < BITFIELD_MASK(10)); + assert(hw_fmt < BITFIELD_MASK(22)); + cfg.type = image->pimage.layout.modifier == DRM_FORMAT_MOD_LINEAR ? MALI_ATTRIBUTE_TYPE_3D_LINEAR : MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED; cfg.pointer = image->pimage.data.base + offset; - cfg.stride = util_format_get_blocksize(view->pview.format); + cfg.stride = fmt_blksize | (hw_fmt << 10); cfg.size = pan_kmod_bo_size(image->bo) - offset; } diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index ee4fa894768..36132e696fe 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -32,8 +32,6 @@ #include "panvk_device.h" #include "panvk_instance.h" #include "panvk_physical_device.h" -#include "panvk_pipeline.h" -#include "panvk_pipeline_layout.h" #include "panvk_shader.h" #include "spirv/nir_spirv.h" @@ -49,6 +47,7 @@ #include "pan_shader.h" #include "vk_pipeline.h" +#include "vk_pipeline_layout.h" #include "vk_util.h" static nir_def * @@ -196,7 +195,7 @@ shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) struct panvk_shader * panvk_per_arch(shader_create)(struct panvk_device *dev, const VkPipelineShaderStageCreateInfo *stage_info, - const struct panvk_pipeline_layout *layout, + const struct vk_pipeline_layout *layout, const VkAllocationCallbacks *alloc) { struct panvk_physical_device *phys_dev = @@ -289,7 +288,7 @@ panvk_per_arch(shader_create)(struct panvk_device *dev, NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options); NIR_PASS_V(nir, panvk_per_arch(nir_lower_descriptors), dev, layout, - &shader->has_img_access); + &shader->desc_info); NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo, nir_address_format_32bit_index_offset); @@ -365,9 +364,16 @@ panvk_per_arch(shader_create)(struct panvk_device *dev, /* Patch the descriptor count */ shader->info.ubo_count = - panvk_per_arch(pipeline_layout_total_ubo_count)(layout); - shader->info.sampler_count = layout->num_samplers; - shader->info.texture_count = layout->num_textures; + shader->desc_info.others[PANVK_BIFROST_DESC_TABLE_UBO].count + + shader->desc_info.dyn_ubos.count; + shader->info.texture_count = + shader->desc_info.others[PANVK_BIFROST_DESC_TABLE_TEXTURE].count; + shader->info.sampler_count = + shader->desc_info.others[PANVK_BIFROST_DESC_TABLE_SAMPLER].count; + + /* Dummy sampler. */ + if (!shader->info.sampler_count && shader->info.texture_count) + shader->info.sampler_count++; if (stage == MESA_SHADER_VERTEX) { /* We leave holes in the attribute locations, but pan_shader.c assumes the @@ -384,9 +390,10 @@ panvk_per_arch(shader_create)(struct panvk_device *dev, /* Image attributes start at MAX_VS_ATTRIBS in the VS attribute table, * and zero in other stages. */ - if (shader->has_img_access) + if (shader->desc_info.others[PANVK_BIFROST_DESC_TABLE_IMG].count > 0) shader->info.attribute_count = - layout->num_imgs + (stage == MESA_SHADER_VERTEX ? MAX_VS_ATTRIBS : 0); + shader->desc_info.others[PANVK_BIFROST_DESC_TABLE_IMG].count + + (stage == MESA_SHADER_VERTEX ? MAX_VS_ATTRIBS : 0); shader->local_size.x = nir->info.workgroup_size[0]; shader->local_size.y = nir->info.workgroup_size[1]; @@ -403,6 +410,7 @@ panvk_per_arch(shader_destroy)(struct panvk_device *dev, const VkAllocationCallbacks *alloc) { util_dynarray_fini(&shader->binary); + free(shader->desc_info.dyn_ubos.map); vk_free2(&dev->vk.alloc, alloc, shader); }