panvk: Overhaul the Bifrost descriptor set implementation

Turns out the current approach makes implementation of advanced features
like update-after-bind or shader modules quite challenging. Instead of
adding hacks all over the place to support these features, let's use
the Valhall descriptor model.

Each shader now gets its own descriptor tables, which are fed by pilot
shaders copying the descriptors used by the shader from the descriptor
sets currently bound the command buffer.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Co-developped-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29654>
This commit is contained in:
Boris Brezillon 2024-05-30 13:05:43 +02:00 committed by Marge Bot
parent ad86990056
commit 7bea6f8612
30 changed files with 1541 additions and 2369 deletions

View file

@ -1101,28 +1101,6 @@ dEQP-VK.pipeline.monolithic.sampler.exact_sampling.r16g16_sint.gradient.unnormal
dEQP-VK.pipeline.monolithic.sampler.exact_sampling.r32_sfloat.gradient.normalized_coords.edge_right,Fail
dEQP-VK.pipeline.monolithic.sampler.exact_sampling.r32_sfloat.gradient.unnormalized_coords.edge_right,Fail
# We store image/buffer information in a UBO, and
# maxDescriptorSetSampledImages * sizeof(struct panvk_image_desc) exceeds the
# maximum UBO size.
# The easy fix would be to further limit the max value in the physical device
# limit, but given this UBO contains information for storage buffers too, it
# implies putting a constraint on
# maxDescriptorSetStorageBuffers+maxDescriptorSetSampledImages.
# The proper fix would be to use a global pointer/size pair, and lower
# descriptor info loads to global loads.
dEQP-VK.pipeline.monolithic.descriptor_limits.compute_shader.sampled_images_16384,Crash
dEQP-VK.pipeline.monolithic.descriptor_limits.compute_shader.sampled_images_32768,Crash
dEQP-VK.pipeline.monolithic.descriptor_limits.compute_shader.sampled_images_65535,Crash
dEQP-VK.pipeline.monolithic.descriptor_limits.compute_shader.samplers_16384,Crash
dEQP-VK.pipeline.monolithic.descriptor_limits.compute_shader.samplers_32768,Crash
dEQP-VK.pipeline.monolithic.descriptor_limits.compute_shader.samplers_65535,Crash
dEQP-VK.pipeline.monolithic.descriptor_limits.fragment_shader.sampled_images_16384,Crash
dEQP-VK.pipeline.monolithic.descriptor_limits.fragment_shader.sampled_images_32768,Crash
dEQP-VK.pipeline.monolithic.descriptor_limits.fragment_shader.sampled_images_65535,Crash
dEQP-VK.pipeline.monolithic.descriptor_limits.fragment_shader.samplers_16384,Crash
dEQP-VK.pipeline.monolithic.descriptor_limits.fragment_shader.samplers_32768,Crash
dEQP-VK.pipeline.monolithic.descriptor_limits.fragment_shader.samplers_65535,Crash
# resolve operations not supported yet
dEQP-VK.pipeline.monolithic.framebuffer_attachment.2d_19x27_32x32_ms,Crash
dEQP-VK.pipeline.monolithic.framebuffer_attachment.2d_32x32_39x41_ms,Crash

View file

@ -22,52 +22,18 @@
#include "pan_pool.h"
struct panvk_descriptor_state {
const struct panvk_descriptor_set *sets[MAX_SETS];
struct panvk_push_descriptor_set *push_sets[MAX_SETS];
struct {
struct mali_uniform_buffer_packed ubos[MAX_DYNAMIC_UNIFORM_BUFFERS];
struct panvk_ssbo_addr ssbos[MAX_DYNAMIC_STORAGE_BUFFERS];
} dyn;
mali_ptr ubos;
mali_ptr textures;
mali_ptr samplers;
mali_ptr dyn_desc_ubo;
struct {
mali_ptr attribs;
mali_ptr attrib_bufs;
} img;
struct panvk_shader_desc_state {
mali_ptr tables[PANVK_BIFROST_DESC_TABLE_COUNT];
mali_ptr img_attrib_table;
mali_ptr dyn_ssbos;
};
void panvk_per_arch(cmd_prepare_push_sets)(
struct pan_pool *desc_pool_base, struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline);
struct panvk_descriptor_state {
const struct panvk_descriptor_set *sets[MAX_SETS];
struct panvk_descriptor_set *push_sets[MAX_SETS];
void panvk_per_arch(cmd_unprepare_push_sets)(
struct panvk_descriptor_state *desc_state);
void panvk_per_arch(cmd_prepare_ubos)(struct pan_pool *desc_pool_base,
struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline);
void panvk_per_arch(cmd_prepare_textures)(
struct pan_pool *desc_pool_base, struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline);
void panvk_per_arch(cmd_prepare_samplers)(
struct pan_pool *desc_pool_base, struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline);
void panvk_per_arch(fill_img_attribs)(struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline,
void *attrib_bufs, void *attribs,
unsigned first_buf);
void panvk_per_arch(prepare_img_attribs)(
struct pan_pool *desc_pool_base, struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline);
uint32_t dyn_buf_offsets[MAX_SETS][MAX_DYNAMIC_BUFFERS];
};
void panvk_per_arch(cmd_desc_state_reset)(
struct panvk_descriptor_state *gfx_desc_state,
@ -84,8 +50,22 @@ void panvk_per_arch(cmd_desc_state_bind_sets)(
const VkDescriptorSet *desc_sets, uint32_t dyn_offset_count,
const uint32_t *dyn_offsets);
struct panvk_push_descriptor_set *panvk_per_arch(cmd_push_descriptors)(
struct panvk_descriptor_set *panvk_per_arch(cmd_push_descriptors)(
struct vk_command_buffer *cmdbuf, struct panvk_descriptor_state *desc_state,
uint32_t set);
void panvk_per_arch(cmd_prepare_dyn_ssbos)(
struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline_shader *shader,
struct panvk_shader_desc_state *shader_desc_state);
void panvk_per_arch(cmd_prepare_shader_desc_tables)(
struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline_shader *shader,
struct panvk_shader_desc_state *shader_desc_state);
void panvk_per_arch(cmd_prepare_push_descs)(
struct pan_pool *desc_pool, struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline);
#endif

View file

@ -1,109 +0,0 @@
/*
* Copyright © 2021 Collabora Ltd.
* SPDX-License-Identifier: MIT
*/
#ifndef PANVK_DESCRIPTOR_SET_H
#define PANVK_DESCRIPTOR_SET_H
#ifndef PAN_ARCH
#error "PAN_ARCH must be defined"
#endif
#include <stdint.h>
#include "vk_object.h"
#include "panvk_macros.h"
#define PANVK_MAX_PUSH_DESCS 32
#define PANVK_MAX_DESC_SIZE 32
#define PANVK_MAX_DESC_UBO_STRIDE 8
struct panvk_cmd_buffer;
struct panvk_descriptor_pool;
struct panvk_descriptor_set_layout;
struct panvk_priv_bo;
struct panvk_descriptor_pool {
struct vk_object_base base;
};
VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_descriptor_pool, base, VkDescriptorPool,
VK_OBJECT_TYPE_DESCRIPTOR_POOL)
/* This has to match nir_address_format_64bit_bounded_global */
struct panvk_ssbo_addr {
uint64_t base_addr;
uint32_t size;
uint32_t zero; /* Must be zero! */
};
struct panvk_bview_desc {
uint32_t elems;
};
struct panvk_image_desc {
uint16_t width;
uint16_t height;
uint16_t depth;
uint8_t levels;
uint8_t samples;
};
struct panvk_buffer_desc {
struct panvk_buffer *buffer;
VkDeviceSize offset;
VkDeviceSize size;
};
struct panvk_descriptor_set {
struct vk_object_base base;
struct panvk_descriptor_pool *pool;
const struct panvk_descriptor_set_layout *layout;
struct panvk_buffer_desc *dyn_ssbos;
void *ubos;
struct panvk_buffer_desc *dyn_ubos;
void *samplers;
void *textures;
void *img_attrib_bufs;
uint32_t *img_fmts;
struct {
struct panvk_priv_bo *bo;
struct {
uint64_t dev;
void *host;
} addr;
} desc_ubo;
};
VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_descriptor_set, base, VkDescriptorSet,
VK_OBJECT_TYPE_DESCRIPTOR_SET)
struct panvk_push_descriptor_set {
struct {
uint8_t descs[PANVK_MAX_PUSH_DESCS * PANVK_MAX_DESC_SIZE];
uint8_t desc_ubo[PANVK_MAX_PUSH_DESCS * PANVK_MAX_DESC_UBO_STRIDE];
uint32_t img_fmts[PANVK_MAX_PUSH_DESCS];
} storage;
struct panvk_descriptor_set set;
};
#ifdef PAN_ARCH
void panvk_per_arch(push_descriptor_set_assign_layout)(
struct panvk_push_descriptor_set *push_set,
const struct panvk_descriptor_set_layout *layout);
void panvk_per_arch(push_descriptor_set)(
struct panvk_push_descriptor_set *push_set,
const struct panvk_descriptor_set_layout *layout, uint32_t write_count,
const VkWriteDescriptorSet *writes);
void panvk_per_arch(push_descriptor_set_with_template)(
struct panvk_push_descriptor_set *push_set,
const struct panvk_descriptor_set_layout *layout,
VkDescriptorUpdateTemplate templ, const void *data);
#endif
#endif

View file

@ -1,86 +0,0 @@
/*
* Copyright © 2021 Collabora Ltd.
* SPDX-License-Identifier: MIT
*/
#ifndef PANVK_DESCRIPTOR_SET_LAYOUT_H
#define PANVK_DESCRIPTOR_SET_LAYOUT_H
#ifndef PAN_ARCH
#error "PAN_ARCH must be defined"
#endif
#include <stdint.h>
#include "vk_descriptor_set_layout.h"
struct panvk_descriptor_set_binding_layout {
VkDescriptorType type;
/* Number of array elements in this binding */
unsigned array_size;
/* Indices in the desc arrays */
union {
struct {
union {
unsigned sampler_idx;
unsigned img_idx;
};
unsigned tex_idx;
};
unsigned dyn_ssbo_idx;
unsigned ubo_idx;
unsigned dyn_ubo_idx;
};
/* Offset into the descriptor UBO where this binding starts */
uint32_t desc_ubo_offset;
/* Stride between descriptors in this binding in the UBO */
uint16_t desc_ubo_stride;
/* Shader stages affected by this set+binding */
uint16_t shader_stages;
struct panvk_sampler **immutable_samplers;
};
struct panvk_descriptor_set_layout {
struct vk_descriptor_set_layout vk;
VkDescriptorSetLayoutCreateFlags flags;
/* Shader stages affected by this descriptor set */
uint16_t shader_stages;
unsigned num_samplers;
unsigned num_textures;
unsigned num_ubos;
unsigned num_dyn_ubos;
unsigned num_dyn_ssbos;
unsigned num_imgs;
/* Size of the descriptor UBO */
uint32_t desc_ubo_size;
/* Index of the descriptor UBO */
unsigned desc_ubo_index;
/* Number of bindings in this descriptor set */
uint32_t binding_count;
/* Bindings in this descriptor set */
struct panvk_descriptor_set_binding_layout bindings[0];
};
VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_descriptor_set_layout, vk.base,
VkDescriptorSetLayout,
VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
static inline const struct panvk_descriptor_set_layout *
vk_to_panvk_descriptor_set_layout(const struct vk_descriptor_set_layout *layout)
{
return container_of(layout, const struct panvk_descriptor_set_layout, vk);
}
#endif

View file

@ -36,7 +36,23 @@ struct panvk_pipeline_shader {
} varyings;
struct pan_shader_info info;
bool has_img_access;
struct {
uint32_t used_set_mask;
struct {
uint32_t map[MAX_DYNAMIC_UNIFORM_BUFFERS];
uint32_t count;
} dyn_ubos;
struct {
uint32_t map[MAX_DYNAMIC_STORAGE_BUFFERS];
uint32_t count;
} dyn_ssbos;
struct {
mali_ptr map;
uint32_t count[PANVK_BIFROST_DESC_TABLE_COUNT];
} others;
} desc_info;
};
enum panvk_pipeline_type {
@ -48,7 +64,7 @@ struct panvk_pipeline {
struct vk_object_base base;
enum panvk_pipeline_type type;
const struct panvk_pipeline_layout *layout;
const struct vk_pipeline_layout *layout;
struct panvk_pool bin_pool;
struct panvk_pool desc_pool;

View file

@ -1,72 +0,0 @@
/*
* Copyright © 2021 Collabora Ltd.
* SPDX-License-Identifier: MIT
*/
#ifndef PANVK_PIPELINE_LAYOUT_H
#define PANVK_PIPELINE_LAYOUT_H
#ifndef PAN_ARCH
#error "PAN_ARCH must be defined"
#endif
#include <stdint.h>
#include "vk_pipeline_layout.h"
#include "panvk_descriptor_set_layout.h"
#include "panvk_macros.h"
#define MAX_SETS 4
#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
#define MAX_DYNAMIC_STORAGE_BUFFERS 8
#define MAX_DYNAMIC_BUFFERS \
(MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
struct panvk_pipeline_layout {
struct vk_pipeline_layout vk;
unsigned char sha1[20];
unsigned num_samplers;
unsigned num_textures;
unsigned num_ubos;
unsigned num_dyn_ubos;
unsigned num_dyn_ssbos;
uint32_t num_imgs;
struct {
uint32_t size;
} push_constants;
struct {
unsigned sampler_offset;
unsigned tex_offset;
unsigned ubo_offset;
unsigned dyn_ubo_offset;
unsigned dyn_ssbo_offset;
unsigned img_offset;
unsigned dyn_desc_ubo_offset;
} sets[MAX_SETS];
};
VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_pipeline_layout, vk.base, VkPipelineLayout,
VK_OBJECT_TYPE_PIPELINE_LAYOUT)
unsigned panvk_per_arch(pipeline_layout_ubo_start)(
const struct panvk_pipeline_layout *layout, unsigned set, bool is_dynamic);
unsigned panvk_per_arch(pipeline_layout_ubo_index)(
const struct panvk_pipeline_layout *layout, unsigned set, unsigned binding,
unsigned array_index);
unsigned panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)(
const struct panvk_pipeline_layout *layout);
unsigned panvk_per_arch(pipeline_layout_dyn_ubos_offset)(
const struct panvk_pipeline_layout *layout);
unsigned panvk_per_arch(pipeline_layout_total_ubo_count)(
const struct panvk_pipeline_layout *layout);
#endif

View file

@ -15,7 +15,6 @@
#include "panvk_cmd_desc_state.h"
#include "panvk_entrypoints.h"
#include "panvk_pipeline.h"
#include "panvk_pipeline_layout.h"
#include "pan_pool.h"
@ -25,246 +24,6 @@
#include "vk_command_buffer.h"
#include "vk_command_pool.h"
void
panvk_per_arch(cmd_prepare_push_sets)(struct pan_pool *desc_pool_base,
struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline)
{
const struct panvk_pipeline_layout *playout = pipeline->layout;
for (unsigned i = 0; i < playout->vk.set_count; i++) {
const struct panvk_descriptor_set_layout *slayout =
vk_to_panvk_descriptor_set_layout(playout->vk.set_layouts[i]);
bool is_push_set =
slayout->flags &
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
if (desc_state->sets[i] || !is_push_set || !desc_state->push_sets[i])
continue;
struct panvk_descriptor_set *set = &desc_state->push_sets[i]->set;
panvk_per_arch(push_descriptor_set_assign_layout)(
desc_state->push_sets[i], slayout);
if (slayout->desc_ubo_size) {
struct panfrost_ptr desc_ubo =
pan_pool_alloc_aligned(desc_pool_base, slayout->desc_ubo_size, 16);
struct mali_uniform_buffer_packed *ubos = set->ubos;
memcpy(desc_ubo.cpu, set->desc_ubo.addr.host, slayout->desc_ubo_size);
set->desc_ubo.addr.dev = desc_ubo.gpu;
set->desc_ubo.addr.host = desc_ubo.cpu;
pan_pack(&ubos[slayout->desc_ubo_index], UNIFORM_BUFFER, cfg) {
cfg.pointer = set->desc_ubo.addr.dev;
cfg.entries = DIV_ROUND_UP(slayout->desc_ubo_size, 16);
}
}
desc_state->sets[i] = &desc_state->push_sets[i]->set;
}
}
void
panvk_per_arch(cmd_unprepare_push_sets)(
struct panvk_descriptor_state *desc_state)
{
for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
if (desc_state->push_sets[i] &&
&desc_state->push_sets[i]->set == desc_state->sets[i])
desc_state->sets[i] = NULL;
}
}
static void
panvk_cmd_prepare_dyn_ssbos(struct pan_pool *desc_pool_base,
struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline)
{
if (!pipeline->layout->num_dyn_ssbos || desc_state->dyn_desc_ubo)
return;
struct panfrost_ptr ssbo_descs =
pan_pool_alloc_aligned(desc_pool_base, sizeof(desc_state->dyn.ssbos), 16);
memcpy(ssbo_descs.cpu, desc_state->dyn.ssbos, sizeof(desc_state->dyn.ssbos));
desc_state->dyn_desc_ubo = ssbo_descs.gpu;
}
void
panvk_per_arch(cmd_prepare_ubos)(struct pan_pool *desc_pool_base,
struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline)
{
unsigned ubo_count =
panvk_per_arch(pipeline_layout_total_ubo_count)(pipeline->layout);
if (!ubo_count || desc_state->ubos)
return;
panvk_cmd_prepare_dyn_ssbos(desc_pool_base, desc_state, pipeline);
struct panfrost_ptr ubos =
pan_pool_alloc_desc_array(desc_pool_base, ubo_count, UNIFORM_BUFFER);
struct mali_uniform_buffer_packed *ubo_descs = ubos.cpu;
for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) {
const struct panvk_descriptor_set_layout *set_layout =
vk_to_panvk_descriptor_set_layout(pipeline->layout->vk.set_layouts[s]);
const struct panvk_descriptor_set *set = desc_state->sets[s];
unsigned ubo_start =
panvk_per_arch(pipeline_layout_ubo_start)(pipeline->layout, s, false);
if (!set) {
memset(&ubo_descs[ubo_start], 0,
set_layout->num_ubos * sizeof(*ubo_descs));
} else {
memcpy(&ubo_descs[ubo_start], set->ubos,
set_layout->num_ubos * sizeof(*ubo_descs));
}
}
unsigned dyn_ubos_offset =
panvk_per_arch(pipeline_layout_dyn_ubos_offset)(pipeline->layout);
memcpy(&ubo_descs[dyn_ubos_offset], desc_state->dyn.ubos,
pipeline->layout->num_dyn_ubos * sizeof(*ubo_descs));
if (pipeline->layout->num_dyn_ssbos) {
unsigned dyn_desc_ubo =
panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)(pipeline->layout);
pan_pack(&ubo_descs[dyn_desc_ubo], UNIFORM_BUFFER, cfg) {
cfg.pointer = desc_state->dyn_desc_ubo;
cfg.entries =
pipeline->layout->num_dyn_ssbos * sizeof(struct panvk_ssbo_addr);
}
}
desc_state->ubos = ubos.gpu;
}
void
panvk_per_arch(cmd_prepare_textures)(struct pan_pool *desc_pool_base,
struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline)
{
unsigned num_textures = pipeline->layout->num_textures;
if (!num_textures || desc_state->textures)
return;
struct panfrost_ptr textures = pan_pool_alloc_aligned(
desc_pool_base, num_textures * pan_size(TEXTURE), pan_size(TEXTURE));
void *texture = textures.cpu;
for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
if (!desc_state->sets[i])
continue;
memcpy(texture, desc_state->sets[i]->textures,
desc_state->sets[i]->layout->num_textures * pan_size(TEXTURE));
texture += desc_state->sets[i]->layout->num_textures * pan_size(TEXTURE);
}
desc_state->textures = textures.gpu;
}
void
panvk_per_arch(cmd_prepare_samplers)(struct pan_pool *desc_pool_base,
struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline)
{
unsigned num_samplers = pipeline->layout->num_samplers;
if (!num_samplers || desc_state->samplers)
return;
struct panfrost_ptr samplers =
pan_pool_alloc_desc_array(desc_pool_base, num_samplers, SAMPLER);
void *sampler = samplers.cpu;
/* Prepare the dummy sampler */
pan_pack(sampler, SAMPLER, cfg) {
cfg.seamless_cube_map = false;
cfg.magnify_nearest = true;
cfg.minify_nearest = true;
cfg.normalized_coordinates = false;
}
sampler += pan_size(SAMPLER);
for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
if (!desc_state->sets[i])
continue;
memcpy(sampler, desc_state->sets[i]->samplers,
desc_state->sets[i]->layout->num_samplers * pan_size(SAMPLER));
sampler += desc_state->sets[i]->layout->num_samplers * pan_size(SAMPLER);
}
desc_state->samplers = samplers.gpu;
}
void
panvk_per_arch(fill_img_attribs)(struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline,
void *attrib_bufs, void *attribs,
unsigned first_buf)
{
for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) {
const struct panvk_descriptor_set *set = desc_state->sets[s];
if (!set)
continue;
const struct panvk_descriptor_set_layout *layout = set->layout;
unsigned img_idx = pipeline->layout->sets[s].img_offset;
unsigned offset = img_idx * pan_size(ATTRIBUTE_BUFFER) * 2;
unsigned size = layout->num_imgs * pan_size(ATTRIBUTE_BUFFER) * 2;
memcpy(attrib_bufs + offset, desc_state->sets[s]->img_attrib_bufs, size);
offset = img_idx * pan_size(ATTRIBUTE);
for (unsigned i = 0; i < layout->num_imgs; i++) {
pan_pack(attribs + offset, ATTRIBUTE, cfg) {
cfg.buffer_index = first_buf + (img_idx + i) * 2;
cfg.format = desc_state->sets[s]->img_fmts[i];
cfg.offset_enable = false;
}
offset += pan_size(ATTRIBUTE);
}
}
}
void
panvk_per_arch(prepare_img_attribs)(struct pan_pool *desc_pool_base,
struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline)
{
if (desc_state->img.attribs)
return;
unsigned attrib_count = pipeline->layout->num_imgs;
unsigned attrib_buf_count = (pipeline->layout->num_imgs * 2);
struct panfrost_ptr bufs = pan_pool_alloc_desc_array(
desc_pool_base, attrib_buf_count + 1, ATTRIBUTE_BUFFER);
struct panfrost_ptr attribs =
pan_pool_alloc_desc_array(desc_pool_base, attrib_count, ATTRIBUTE);
panvk_per_arch(fill_img_attribs)(desc_state, pipeline, bufs.cpu, attribs.cpu,
0);
desc_state->img.attrib_bufs = bufs.gpu;
desc_state->img.attribs = attribs.gpu;
}
void
panvk_per_arch(cmd_desc_state_reset)(
struct panvk_descriptor_state *gfx_desc_state,
@ -288,62 +47,6 @@ panvk_per_arch(cmd_desc_state_cleanup)(
}
}
static void
panvk_emit_dyn_ubo(struct panvk_descriptor_state *desc_state,
const struct panvk_descriptor_set *desc_set,
unsigned binding, unsigned array_idx, uint32_t dyn_offset,
unsigned dyn_ubo_slot)
{
struct mali_uniform_buffer_packed *ubo = &desc_state->dyn.ubos[dyn_ubo_slot];
const struct panvk_descriptor_set_layout *slayout = desc_set->layout;
VkDescriptorType type = slayout->bindings[binding].type;
assert(type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
assert(dyn_ubo_slot < ARRAY_SIZE(desc_state->dyn.ubos));
const unsigned dyn_ubo_idx = slayout->bindings[binding].dyn_ubo_idx;
const struct panvk_buffer_desc *bdesc =
&desc_set->dyn_ubos[dyn_ubo_idx + array_idx];
mali_ptr address =
panvk_buffer_gpu_ptr(bdesc->buffer, bdesc->offset + dyn_offset);
size_t size = panvk_buffer_range(bdesc->buffer, bdesc->offset + dyn_offset,
bdesc->size);
if (size) {
pan_pack(ubo, UNIFORM_BUFFER, cfg) {
cfg.pointer = address;
cfg.entries = DIV_ROUND_UP(size, 16);
}
} else {
memset(ubo, 0, sizeof(*ubo));
}
}
static void
panvk_emit_dyn_ssbo(struct panvk_descriptor_state *desc_state,
const struct panvk_descriptor_set *desc_set,
unsigned binding, unsigned array_idx, uint32_t dyn_offset,
unsigned dyn_ssbo_slot)
{
struct panvk_ssbo_addr *ssbo = &desc_state->dyn.ssbos[dyn_ssbo_slot];
const struct panvk_descriptor_set_layout *slayout = desc_set->layout;
VkDescriptorType type = slayout->bindings[binding].type;
assert(type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC);
assert(dyn_ssbo_slot < ARRAY_SIZE(desc_state->dyn.ssbos));
const unsigned dyn_ssbo_idx = slayout->bindings[binding].dyn_ssbo_idx;
const struct panvk_buffer_desc *bdesc =
&desc_set->dyn_ssbos[dyn_ssbo_idx + array_idx];
*ssbo = (struct panvk_ssbo_addr){
.base_addr =
panvk_buffer_gpu_ptr(bdesc->buffer, bdesc->offset + dyn_offset),
.size = panvk_buffer_range(bdesc->buffer, bdesc->offset + dyn_offset,
bdesc->size),
};
}
void
panvk_per_arch(cmd_desc_state_bind_sets)(
struct panvk_descriptor_state *desc_state, VkPipelineLayout layout,
@ -351,77 +54,193 @@ panvk_per_arch(cmd_desc_state_bind_sets)(
const VkDescriptorSet *desc_sets, uint32_t dyn_offset_count,
const uint32_t *dyn_offsets)
{
VK_FROM_HANDLE(panvk_pipeline_layout, playout, layout);
unsigned dynoffset_idx = 0;
for (unsigned i = 0; i < desc_set_count; ++i) {
unsigned idx = i + first_set;
unsigned set_idx = i + first_set;
VK_FROM_HANDLE(panvk_descriptor_set, set, desc_sets[i]);
desc_state->sets[idx] = set;
/* Invalidate the push set. */
if (desc_state->sets[set_idx] &&
desc_state->sets[set_idx] == desc_state->push_sets[set_idx])
desc_state->push_sets[set_idx]->descs.dev = 0;
if (set->layout->num_dyn_ssbos || set->layout->num_dyn_ubos) {
unsigned dyn_ubo_slot = playout->sets[idx].dyn_ubo_offset;
unsigned dyn_ssbo_slot = playout->sets[idx].dyn_ssbo_offset;
desc_state->sets[set_idx] = set;
for (unsigned b = 0; b < set->layout->binding_count; b++) {
for (unsigned e = 0; e < set->layout->bindings[b].array_size; e++) {
VkDescriptorType type = set->layout->bindings[b].type;
if (!set || !set->layout->dyn_buf_count)
continue;
if (type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
panvk_emit_dyn_ubo(desc_state, set, b, e,
dyn_offsets[dynoffset_idx++],
dyn_ubo_slot++);
} else if (type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
panvk_emit_dyn_ssbo(desc_state, set, b, e,
dyn_offsets[dynoffset_idx++],
dyn_ssbo_slot++);
}
}
for (unsigned b = 0; b < set->layout->binding_count; b++) {
VkDescriptorType type = set->layout->bindings[b].type;
if (type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
continue;
unsigned dyn_buf_idx = set->layout->bindings[b].desc_idx;
for (unsigned e = 0; e < set->layout->bindings[b].desc_count; e++) {
desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx++] =
dyn_offsets[dynoffset_idx++];
}
}
}
/* Unconditionally reset all previously emitted descriptors tables.
* TODO: we could be smarter by checking which part of the pipeline layout
* are compatible with the previouly bound descriptor sets.
*/
desc_state->ubos = 0;
desc_state->textures = 0;
desc_state->samplers = 0;
desc_state->dyn_desc_ubo = 0;
desc_state->img.attrib_bufs = 0;
desc_state->img.attribs = 0;
assert(dynoffset_idx == dyn_offset_count);
}
struct panvk_push_descriptor_set *
struct panvk_descriptor_set *
panvk_per_arch(cmd_push_descriptors)(struct vk_command_buffer *cmdbuf,
struct panvk_descriptor_state *desc_state,
uint32_t set)
uint32_t set_idx)
{
assert(set < MAX_SETS);
if (unlikely(desc_state->push_sets[set] == NULL)) {
desc_state->push_sets[set] =
vk_zalloc(&cmdbuf->pool->alloc, sizeof(*desc_state->push_sets[0]), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (unlikely(desc_state->push_sets[set] == NULL)) {
assert(set_idx < MAX_SETS);
if (unlikely(desc_state->push_sets[set_idx] == NULL)) {
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct panvk_descriptor_set, set, 1);
VK_MULTIALLOC_DECL(&ma, struct panvk_opaque_desc, descs, MAX_PUSH_DESCS);
if (unlikely(!vk_multialloc_zalloc(&ma, &cmdbuf->pool->alloc,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))) {
vk_command_buffer_set_error(cmdbuf, VK_ERROR_OUT_OF_HOST_MEMORY);
return NULL;
}
desc_state->push_sets[set_idx] = set;
set->descs.host = descs;
}
/* Pushing descriptors replaces whatever sets are bound */
desc_state->sets[set] = NULL;
struct panvk_descriptor_set *set = desc_state->push_sets[set_idx];
/* Reset all descs to force emission of new tables on the next draw/dispatch.
* TODO: Be smarter and only reset those when required.
*/
desc_state->ubos = 0;
desc_state->textures = 0;
desc_state->samplers = 0;
desc_state->img.attrib_bufs = 0;
desc_state->img.attribs = 0;
return desc_state->push_sets[set];
/* Pushing descriptors replaces whatever sets are bound */
desc_state->sets[set_idx] = set;
return set;
}
void
panvk_per_arch(cmd_prepare_dyn_ssbos)(
struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline_shader *shader,
struct panvk_shader_desc_state *shader_desc_state)
{
if (!shader->desc_info.dyn_ssbos.count || shader_desc_state->dyn_ssbos)
return;
struct panfrost_ptr ptr = pan_pool_alloc_aligned(
desc_pool, shader->desc_info.dyn_ssbos.count * PANVK_DESCRIPTOR_SIZE,
PANVK_DESCRIPTOR_SIZE);
struct panvk_ssbo_addr *ssbos = ptr.cpu;
for (uint32_t i = 0; i < shader->desc_info.dyn_ssbos.count; i++) {
uint32_t src_handle = shader->desc_info.dyn_ssbos.map[i];
uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle);
uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle);
const struct panvk_descriptor_set *set = desc_state->sets[set_idx];
const uint32_t dyn_buf_offset =
desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx];
assert(set_idx < MAX_SETS);
assert(set);
ssbos[i] = (struct panvk_ssbo_addr){
.base_addr = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset,
.size = set->dyn_bufs[dyn_buf_idx].size,
};
}
shader_desc_state->dyn_ssbos = ptr.gpu;
}
static void
panvk_cmd_fill_dyn_ubos(const struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline_shader *shader,
struct mali_uniform_buffer_packed *ubos,
uint32_t ubo_count)
{
for (uint32_t i = 0; i < shader->desc_info.dyn_ubos.count; i++) {
uint32_t src_handle = shader->desc_info.dyn_ubos.map[i];
uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle);
uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle);
uint32_t ubo_idx =
i + shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_UBO];
const struct panvk_descriptor_set *set = desc_state->sets[set_idx];
const uint32_t dyn_buf_offset =
desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx];
assert(set_idx < MAX_SETS);
assert(set);
assert(ubo_idx < ubo_count);
pan_pack(&ubos[ubo_idx], UNIFORM_BUFFER, cfg) {
cfg.pointer = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset;
cfg.entries = DIV_ROUND_UP(set->dyn_bufs[dyn_buf_idx].size, 16);
}
}
}
void
panvk_per_arch(cmd_prepare_shader_desc_tables)(
struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline_shader *shader,
struct panvk_shader_desc_state *shader_desc_state)
{
for (uint32_t i = 0; i < ARRAY_SIZE(shader->desc_info.others.count); i++) {
uint32_t desc_count =
shader->desc_info.others.count[i] +
(i == PANVK_BIFROST_DESC_TABLE_UBO ? shader->desc_info.dyn_ubos.count
: 0);
uint32_t desc_size =
i == PANVK_BIFROST_DESC_TABLE_UBO ? 8 : PANVK_DESCRIPTOR_SIZE;
if (!desc_count || shader_desc_state->tables[i])
continue;
struct panfrost_ptr ptr = pan_pool_alloc_aligned(
desc_pool, desc_count * desc_size, PANVK_DESCRIPTOR_SIZE);
shader_desc_state->tables[i] = ptr.gpu;
if (i == PANVK_BIFROST_DESC_TABLE_UBO)
panvk_cmd_fill_dyn_ubos(desc_state, shader, ptr.cpu, desc_count);
if (i == PANVK_BIFROST_DESC_TABLE_IMG) {
assert(!shader_desc_state->img_attrib_table);
ptr = pan_pool_alloc_desc_array(desc_pool, desc_count, ATTRIBUTE);
shader_desc_state->img_attrib_table = ptr.gpu;
}
}
uint32_t tex_count =
shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_TEXTURE];
uint32_t sampler_count =
shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_SAMPLER];
if (tex_count && !sampler_count) {
struct panfrost_ptr sampler = pan_pool_alloc_desc(desc_pool, SAMPLER);
/* Emit a dummy sampler if we have to. */
pan_pack(sampler.cpu, SAMPLER, _) {
}
shader_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER] = sampler.gpu;
}
}
void
panvk_per_arch(cmd_prepare_push_descs)(struct pan_pool *desc_pool,
struct panvk_descriptor_state *desc_state,
const struct panvk_pipeline *pipeline)
{
const struct vk_pipeline_layout *playout = pipeline->layout;
for (unsigned i = 0; i < playout->set_count; i++) {
struct panvk_descriptor_set *push_set = desc_state->push_sets[i];
if (!push_set || desc_state->sets[i] != push_set || push_set->descs.dev)
continue;
push_set->descs.dev = pan_pool_upload_aligned(
desc_pool, push_set->descs.host,
push_set->desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE);
}
}

View file

@ -1,995 +0,0 @@
/*
* Copyright © 2021 Collabora Ltd.
*
* Derived from:
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "genxml/gen_macros.h"
#include "panvk_buffer_view.h"
#include "panvk_device.h"
#include "panvk_entrypoints.h"
#include "panvk_image.h"
#include "panvk_image_view.h"
#include "panvk_priv_bo.h"
#include <assert.h>
#include <fcntl.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include "util/mesa-sha1.h"
#include "vk_alloc.h"
#include "vk_descriptor_update_template.h"
#include "vk_descriptors.h"
#include "vk_format.h"
#include "vk_log.h"
#include "vk_util.h"
#include "panvk_buffer.h"
#include "panvk_descriptor_set.h"
#include "panvk_descriptor_set_layout.h"
#include "panvk_sampler.h"
VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(CreateDescriptorPool)(
VkDevice _device, const VkDescriptorPoolCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkDescriptorPool *pDescriptorPool)
{
VK_FROM_HANDLE(panvk_device, device, _device);
struct panvk_descriptor_pool *pool;
pool = vk_object_zalloc(&device->vk, pAllocator,
sizeof(struct panvk_descriptor_pool),
VK_OBJECT_TYPE_DESCRIPTOR_POOL);
if (!pool)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
*pDescriptorPool = panvk_descriptor_pool_to_handle(pool);
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(DestroyDescriptorPool)(VkDevice _device, VkDescriptorPool _pool,
const VkAllocationCallbacks *pAllocator)
{
VK_FROM_HANDLE(panvk_device, device, _device);
VK_FROM_HANDLE(panvk_descriptor_pool, pool, _pool);
if (pool)
vk_object_free(&device->vk, pAllocator, pool);
}
VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(ResetDescriptorPool)(VkDevice _device, VkDescriptorPool _pool,
VkDescriptorPoolResetFlags flags)
{
return VK_SUCCESS;
}
static void
panvk_descriptor_set_destroy(struct panvk_device *device,
struct panvk_descriptor_pool *pool,
struct panvk_descriptor_set *set)
{
if (set->desc_ubo.bo)
panvk_priv_bo_destroy(set->desc_ubo.bo, NULL);
vk_object_free(&device->vk, NULL, set);
}
VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(FreeDescriptorSets)(VkDevice _device,
VkDescriptorPool descriptorPool,
uint32_t count,
const VkDescriptorSet *pDescriptorSets)
{
VK_FROM_HANDLE(panvk_device, device, _device);
VK_FROM_HANDLE(panvk_descriptor_pool, pool, descriptorPool);
for (unsigned i = 0; i < count; i++) {
VK_FROM_HANDLE(panvk_descriptor_set, set, pDescriptorSets[i]);
if (set)
panvk_descriptor_set_destroy(device, pool, set);
}
return VK_SUCCESS;
}
static void
panvk_fill_bview_desc(struct panvk_bview_desc *desc,
struct panvk_buffer_view *view)
{
desc->elems = view->vk.elements;
}
static void
panvk_fill_image_desc(struct panvk_image_desc *desc,
struct panvk_image_view *view)
{
desc->width = view->vk.extent.width - 1;
desc->height = view->vk.extent.height - 1;
desc->depth = view->vk.extent.depth - 1;
desc->levels = view->vk.level_count;
desc->samples = view->vk.image->samples;
/* Stick array layer count after the last valid size component */
if (view->vk.image->image_type == VK_IMAGE_TYPE_1D)
desc->height = view->vk.layer_count - 1;
else if (view->vk.image->image_type == VK_IMAGE_TYPE_2D)
desc->depth = view->vk.layer_count - 1;
}
static void panvk_write_sampler_desc_raw(struct panvk_descriptor_set *set,
uint32_t binding, uint32_t elem,
struct panvk_sampler *sampler);
static struct panvk_descriptor_set *
panvk_descriptor_set_alloc(const struct panvk_descriptor_set_layout *layout,
const VkAllocationCallbacks *alloc,
VkSystemAllocationScope scope)
{
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct panvk_descriptor_set, set, 1);
VK_MULTIALLOC_DECL(&ma, struct panvk_buffer_desc, dyn_ssbos,
layout->num_dyn_ssbos);
VK_MULTIALLOC_DECL(&ma, struct mali_uniform_buffer_packed, ubos,
layout->num_ubos);
VK_MULTIALLOC_DECL(&ma, struct panvk_buffer_desc, dyn_ubos,
layout->num_dyn_ubos);
VK_MULTIALLOC_DECL(&ma, struct mali_sampler_packed, samplers,
layout->num_samplers);
VK_MULTIALLOC_DECL(&ma, struct mali_texture_packed, textures,
layout->num_textures);
VK_MULTIALLOC_DECL(&ma, struct mali_attribute_buffer_packed, img_attrib_bufs,
layout->num_imgs * 2);
VK_MULTIALLOC_DECL(&ma, uint32_t, img_fmts, layout->num_imgs);
if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
return NULL;
set->layout = layout;
if (layout->num_dyn_ssbos)
set->dyn_ssbos = dyn_ssbos;
if (layout->num_ubos)
set->ubos = ubos;
if (layout->num_dyn_ubos)
set->dyn_ubos = dyn_ubos;
if (layout->num_samplers)
set->samplers = samplers;
if (layout->num_textures)
set->textures = textures;
if (layout->num_imgs) {
set->img_attrib_bufs = img_attrib_bufs;
set->img_fmts = img_fmts;
}
return set;
}
static VkResult
panvk_per_arch(descriptor_set_create)(
struct panvk_device *device, struct panvk_descriptor_pool *pool,
const struct panvk_descriptor_set_layout *layout,
struct panvk_descriptor_set **out_set)
{
/* TODO: Allocate from the pool! */
struct panvk_descriptor_set *set = panvk_descriptor_set_alloc(
layout, &device->vk.alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!set)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
vk_object_base_init(&device->vk, &set->base, VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (layout->desc_ubo_size) {
set->desc_ubo.bo =
panvk_priv_bo_create(device, layout->desc_ubo_size, 0, NULL,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!set->desc_ubo.bo)
goto err_free_set;
struct mali_uniform_buffer_packed *ubos = set->ubos;
set->desc_ubo.addr.dev = set->desc_ubo.bo->addr.dev;
set->desc_ubo.addr.host = set->desc_ubo.bo->addr.host;
pan_pack(&ubos[layout->desc_ubo_index], UNIFORM_BUFFER, cfg) {
cfg.pointer = set->desc_ubo.addr.dev;
cfg.entries = DIV_ROUND_UP(layout->desc_ubo_size, 16);
}
}
for (unsigned i = 0; i < layout->binding_count; i++) {
if (!layout->bindings[i].immutable_samplers)
continue;
for (unsigned j = 0; j < layout->bindings[i].array_size; j++) {
struct panvk_sampler *sampler =
layout->bindings[i].immutable_samplers[j];
panvk_write_sampler_desc_raw(set, i, j, sampler);
}
}
*out_set = set;
return VK_SUCCESS;
err_free_set:
if (set->desc_ubo.bo)
panvk_priv_bo_destroy(set->desc_ubo.bo, NULL);
vk_object_free(&device->vk, NULL, set);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(AllocateDescriptorSets)(
VkDevice _device, const VkDescriptorSetAllocateInfo *pAllocateInfo,
VkDescriptorSet *pDescriptorSets)
{
VK_FROM_HANDLE(panvk_device, device, _device);
VK_FROM_HANDLE(panvk_descriptor_pool, pool, pAllocateInfo->descriptorPool);
VkResult result;
unsigned i;
for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
VK_FROM_HANDLE(panvk_descriptor_set_layout, layout,
pAllocateInfo->pSetLayouts[i]);
struct panvk_descriptor_set *set = NULL;
result =
panvk_per_arch(descriptor_set_create)(device, pool, layout, &set);
if (result != VK_SUCCESS)
goto err_free_sets;
pDescriptorSets[i] = panvk_descriptor_set_to_handle(set);
}
return VK_SUCCESS;
err_free_sets:
panvk_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, i,
pDescriptorSets);
for (i = 0; i < pAllocateInfo->descriptorSetCount; i++)
pDescriptorSets[i] = VK_NULL_HANDLE;
return result;
}
static void *
panvk_desc_ubo_data(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem)
{
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set->layout->bindings[binding];
/* Dynamic SSBO info are stored in a separate UBO allocated from the
* cmd_buffer descriptor pool.
*/
assert(binding_layout->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC);
return (char *)set->desc_ubo.addr.host + binding_layout->desc_ubo_offset +
elem * binding_layout->desc_ubo_stride;
}
static struct mali_sampler_packed *
panvk_sampler_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem)
{
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set->layout->bindings[binding];
uint32_t sampler_idx = binding_layout->sampler_idx + elem;
return &((struct mali_sampler_packed *)set->samplers)[sampler_idx];
}
static void
panvk_write_sampler_desc_raw(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem, struct panvk_sampler *sampler)
{
memcpy(panvk_sampler_desc(set, binding, elem), &sampler->desc,
sizeof(sampler->desc));
}
static void
panvk_write_sampler_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem,
const VkDescriptorImageInfo *const pImageInfo)
{
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set->layout->bindings[binding];
bool push_set = set->layout->flags &
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
if (binding_layout->immutable_samplers && !push_set)
return;
struct panvk_sampler *sampler =
binding_layout->immutable_samplers
? binding_layout->immutable_samplers[elem]
: panvk_sampler_from_handle(pImageInfo->sampler);
panvk_write_sampler_desc_raw(set, binding, elem, sampler);
}
static void
panvk_copy_sampler_desc(struct panvk_descriptor_set *dst_set,
uint32_t dst_binding, uint32_t dst_elem,
struct panvk_descriptor_set *src_set,
uint32_t src_binding, uint32_t src_elem)
{
const struct panvk_descriptor_set_binding_layout *dst_binding_layout =
&dst_set->layout->bindings[dst_binding];
if (dst_binding_layout->immutable_samplers)
return;
memcpy(panvk_sampler_desc(dst_set, dst_binding, dst_elem),
panvk_sampler_desc(src_set, src_binding, src_elem),
sizeof(struct mali_sampler_packed));
}
static struct mali_texture_packed *
panvk_tex_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem)
{
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set->layout->bindings[binding];
unsigned tex_idx = binding_layout->tex_idx + elem;
return &((struct mali_texture_packed *)set->textures)[tex_idx];
}
static void
panvk_write_tex_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem,
const VkDescriptorImageInfo *const pImageInfo)
{
VK_FROM_HANDLE(panvk_image_view, view, pImageInfo->imageView);
memcpy(panvk_tex_desc(set, binding, elem), view->descs.tex.opaque,
pan_size(TEXTURE));
panvk_fill_image_desc(panvk_desc_ubo_data(set, binding, elem), view);
}
static void
panvk_copy_tex_desc(struct panvk_descriptor_set *dst_set, uint32_t dst_binding,
uint32_t dst_elem, struct panvk_descriptor_set *src_set,
uint32_t src_binding, uint32_t src_elem)
{
*panvk_tex_desc(dst_set, dst_binding, dst_elem) =
*panvk_tex_desc(src_set, src_binding, src_elem);
/* Descriptor UBO data gets copied automatically */
}
static void
panvk_write_tex_buf_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem, const VkBufferView bufferView)
{
VK_FROM_HANDLE(panvk_buffer_view, view, bufferView);
memcpy(panvk_tex_desc(set, binding, elem), view->descs.tex.opaque,
pan_size(TEXTURE));
panvk_fill_bview_desc(panvk_desc_ubo_data(set, binding, elem), view);
}
static uint32_t
panvk_img_idx(struct panvk_descriptor_set *set, uint32_t binding, uint32_t elem)
{
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set->layout->bindings[binding];
return binding_layout->img_idx + elem;
}
static void
panvk_write_img_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem, const VkDescriptorImageInfo *pImageInfo)
{
VK_FROM_HANDLE(panvk_image_view, view, pImageInfo->imageView);
unsigned img_idx = panvk_img_idx(set, binding, elem);
void *attrib_buf = (uint8_t *)set->img_attrib_bufs +
(pan_size(ATTRIBUTE_BUFFER) * 2 * img_idx);
set->img_fmts[img_idx] =
GENX(panfrost_format_from_pipe_format)(view->pview.format)->hw;
memcpy(attrib_buf, view->descs.img_attrib_buf,
pan_size(ATTRIBUTE_BUFFER) * 2);
panvk_fill_image_desc(panvk_desc_ubo_data(set, binding, elem), view);
}
static void
panvk_copy_img_desc(struct panvk_descriptor_set *dst_set, uint32_t dst_binding,
uint32_t dst_elem, struct panvk_descriptor_set *src_set,
uint32_t src_binding, uint32_t src_elem)
{
unsigned dst_img_idx = panvk_img_idx(dst_set, dst_binding, dst_elem);
unsigned src_img_idx = panvk_img_idx(src_set, src_binding, src_elem);
void *dst_attrib_buf = (uint8_t *)dst_set->img_attrib_bufs +
(pan_size(ATTRIBUTE_BUFFER) * 2 * dst_img_idx);
void *src_attrib_buf = (uint8_t *)src_set->img_attrib_bufs +
(pan_size(ATTRIBUTE_BUFFER) * 2 * src_img_idx);
dst_set->img_fmts[dst_img_idx] = src_set->img_fmts[src_img_idx];
memcpy(dst_attrib_buf, src_attrib_buf, pan_size(ATTRIBUTE_BUFFER) * 2);
/* Descriptor UBO data gets copied automatically */
}
static void
panvk_write_img_buf_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem, const VkBufferView bufferView)
{
VK_FROM_HANDLE(panvk_buffer_view, view, bufferView);
unsigned img_idx = panvk_img_idx(set, binding, elem);
void *attrib_buf = (uint8_t *)set->img_attrib_bufs +
(pan_size(ATTRIBUTE_BUFFER) * 2 * img_idx);
enum pipe_format pfmt = vk_format_to_pipe_format(view->vk.format);
set->img_fmts[img_idx] = GENX(panfrost_format_from_pipe_format)(pfmt)->hw;
memcpy(attrib_buf, view->descs.img_attrib_buf,
pan_size(ATTRIBUTE_BUFFER) * 2);
panvk_fill_bview_desc(panvk_desc_ubo_data(set, binding, elem), view);
}
static struct mali_uniform_buffer_packed *
panvk_ubo_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem)
{
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set->layout->bindings[binding];
unsigned ubo_idx = binding_layout->ubo_idx + elem;
return &((struct mali_uniform_buffer_packed *)set->ubos)[ubo_idx];
}
static void
panvk_write_ubo_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem, const VkDescriptorBufferInfo *pBufferInfo)
{
VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer);
mali_ptr ptr = panvk_buffer_gpu_ptr(buffer, pBufferInfo->offset);
size_t size =
panvk_buffer_range(buffer, pBufferInfo->offset, pBufferInfo->range);
pan_pack(panvk_ubo_desc(set, binding, elem), UNIFORM_BUFFER, cfg) {
cfg.pointer = ptr;
cfg.entries = DIV_ROUND_UP(size, 16);
}
}
static void
panvk_copy_ubo_desc(struct panvk_descriptor_set *dst_set, uint32_t dst_binding,
uint32_t dst_elem, struct panvk_descriptor_set *src_set,
uint32_t src_binding, uint32_t src_elem)
{
*panvk_ubo_desc(dst_set, dst_binding, dst_elem) =
*panvk_ubo_desc(src_set, src_binding, src_elem);
}
static struct panvk_buffer_desc *
panvk_dyn_ubo_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem)
{
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set->layout->bindings[binding];
return &set->dyn_ubos[binding_layout->dyn_ubo_idx + elem];
}
static void
panvk_write_dyn_ubo_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem,
const VkDescriptorBufferInfo *pBufferInfo)
{
VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer);
*panvk_dyn_ubo_desc(set, binding, elem) = (struct panvk_buffer_desc){
.buffer = buffer,
.offset = pBufferInfo->offset,
.size = pBufferInfo->range,
};
}
static void
panvk_copy_dyn_ubo_desc(struct panvk_descriptor_set *dst_set,
uint32_t dst_binding, uint32_t dst_elem,
struct panvk_descriptor_set *src_set,
uint32_t src_binding, uint32_t src_elem)
{
*panvk_dyn_ubo_desc(dst_set, dst_binding, dst_elem) =
*panvk_dyn_ubo_desc(src_set, src_binding, src_elem);
}
static void
panvk_write_ssbo_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem, const VkDescriptorBufferInfo *pBufferInfo)
{
VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer);
struct panvk_ssbo_addr *desc = panvk_desc_ubo_data(set, binding, elem);
*desc = (struct panvk_ssbo_addr){
.base_addr = panvk_buffer_gpu_ptr(buffer, pBufferInfo->offset),
.size =
panvk_buffer_range(buffer, pBufferInfo->offset, pBufferInfo->range),
};
}
static void
panvk_copy_ssbo_desc(struct panvk_descriptor_set *dst_set, uint32_t dst_binding,
uint32_t dst_elem, struct panvk_descriptor_set *src_set,
uint32_t src_binding, uint32_t src_elem)
{
/* Descriptor UBO data gets copied automatically */
}
static struct panvk_buffer_desc *
panvk_dyn_ssbo_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem)
{
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set->layout->bindings[binding];
return &set->dyn_ssbos[binding_layout->dyn_ssbo_idx + elem];
}
static void
panvk_write_dyn_ssbo_desc(struct panvk_descriptor_set *set, uint32_t binding,
uint32_t elem,
const VkDescriptorBufferInfo *pBufferInfo)
{
VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer);
*panvk_dyn_ssbo_desc(set, binding, elem) = (struct panvk_buffer_desc){
.buffer = buffer,
.offset = pBufferInfo->offset,
.size = pBufferInfo->range,
};
}
static void
panvk_copy_dyn_ssbo_desc(struct panvk_descriptor_set *dst_set,
uint32_t dst_binding, uint32_t dst_elem,
struct panvk_descriptor_set *src_set,
uint32_t src_binding, uint32_t src_elem)
{
*panvk_dyn_ssbo_desc(dst_set, dst_binding, dst_elem) =
*panvk_dyn_ssbo_desc(src_set, src_binding, src_elem);
}
static void
panvk_descriptor_set_write(struct panvk_descriptor_set *set,
const VkWriteDescriptorSet *write)
{
switch (write->descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
panvk_write_sampler_desc(set, write->dstBinding,
write->dstArrayElement + j,
&write->pImageInfo[j]);
}
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
panvk_write_sampler_desc(set, write->dstBinding,
write->dstArrayElement + j,
&write->pImageInfo[j]);
panvk_write_tex_desc(set, write->dstBinding,
write->dstArrayElement + j,
&write->pImageInfo[j]);
}
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
panvk_write_tex_desc(set, write->dstBinding,
write->dstArrayElement + j,
&write->pImageInfo[j]);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
panvk_write_img_desc(set, write->dstBinding,
write->dstArrayElement + j,
&write->pImageInfo[j]);
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
panvk_write_tex_buf_desc(set, write->dstBinding,
write->dstArrayElement + j,
write->pTexelBufferView[j]);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
panvk_write_img_buf_desc(set, write->dstBinding,
write->dstArrayElement + j,
write->pTexelBufferView[j]);
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
panvk_write_ubo_desc(set, write->dstBinding,
write->dstArrayElement + j,
&write->pBufferInfo[j]);
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
panvk_write_dyn_ubo_desc(set, write->dstBinding,
write->dstArrayElement + j,
&write->pBufferInfo[j]);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
panvk_write_ssbo_desc(set, write->dstBinding,
write->dstArrayElement + j,
&write->pBufferInfo[j]);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
panvk_write_dyn_ssbo_desc(set, write->dstBinding,
write->dstArrayElement + j,
&write->pBufferInfo[j]);
}
break;
default:
unreachable("Unsupported descriptor type");
}
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(UpdateDescriptorSets)(
VkDevice _device, uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount,
const VkCopyDescriptorSet *pDescriptorCopies)
{
for (unsigned i = 0; i < descriptorWriteCount; i++) {
const VkWriteDescriptorSet *write = &pDescriptorWrites[i];
VK_FROM_HANDLE(panvk_descriptor_set, set, write->dstSet);
panvk_descriptor_set_write(set, write);
}
for (unsigned i = 0; i < descriptorCopyCount; i++) {
const VkCopyDescriptorSet *copy = &pDescriptorCopies[i];
VK_FROM_HANDLE(panvk_descriptor_set, src_set, copy->srcSet);
VK_FROM_HANDLE(panvk_descriptor_set, dst_set, copy->dstSet);
const struct panvk_descriptor_set_binding_layout *dst_binding_layout =
&dst_set->layout->bindings[copy->dstBinding];
const struct panvk_descriptor_set_binding_layout *src_binding_layout =
&src_set->layout->bindings[copy->srcBinding];
assert(dst_binding_layout->type == src_binding_layout->type);
/* Dynamic SSBO info are stored in a separate UBO allocated from the
* cmd_buffer descriptor pool.
*/
bool src_has_data_in_desc_ubo =
src_binding_layout->desc_ubo_stride > 0 &&
src_binding_layout->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
bool dst_has_data_in_desc_ubo =
dst_binding_layout->desc_ubo_stride > 0 &&
dst_binding_layout->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
if (src_has_data_in_desc_ubo && dst_has_data_in_desc_ubo) {
for (uint32_t j = 0; j < copy->descriptorCount; j++) {
memcpy(panvk_desc_ubo_data(dst_set, copy->dstBinding,
copy->dstArrayElement + j),
panvk_desc_ubo_data(src_set, copy->srcBinding,
copy->srcArrayElement + j),
MIN2(dst_binding_layout->desc_ubo_stride,
src_binding_layout->desc_ubo_stride));
}
}
switch (src_binding_layout->type) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
for (uint32_t j = 0; j < copy->descriptorCount; j++) {
panvk_copy_sampler_desc(
dst_set, copy->dstBinding, copy->dstArrayElement + j, src_set,
copy->srcBinding, copy->srcArrayElement + j);
}
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
for (uint32_t j = 0; j < copy->descriptorCount; j++) {
panvk_copy_sampler_desc(
dst_set, copy->dstBinding, copy->dstArrayElement + j, src_set,
copy->srcBinding, copy->srcArrayElement + j);
panvk_copy_tex_desc(dst_set, copy->dstBinding,
copy->dstArrayElement + j, src_set,
copy->srcBinding, copy->srcArrayElement + j);
}
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
for (uint32_t j = 0; j < copy->descriptorCount; j++) {
panvk_copy_tex_desc(dst_set, copy->dstBinding,
copy->dstArrayElement + j, src_set,
copy->srcBinding, copy->srcArrayElement + j);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
for (uint32_t j = 0; j < copy->descriptorCount; j++) {
panvk_copy_img_desc(dst_set, copy->dstBinding,
copy->dstArrayElement + j, src_set,
copy->srcBinding, copy->srcArrayElement + j);
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
for (uint32_t j = 0; j < copy->descriptorCount; j++) {
panvk_copy_ubo_desc(dst_set, copy->dstBinding,
copy->dstArrayElement + j, src_set,
copy->srcBinding, copy->srcArrayElement + j);
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
for (uint32_t j = 0; j < copy->descriptorCount; j++) {
panvk_copy_dyn_ubo_desc(
dst_set, copy->dstBinding, copy->dstArrayElement + j, src_set,
copy->srcBinding, copy->srcArrayElement + j);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
for (uint32_t j = 0; j < copy->descriptorCount; j++) {
panvk_copy_ssbo_desc(dst_set, copy->dstBinding,
copy->dstArrayElement + j, src_set,
copy->srcBinding, copy->srcArrayElement + j);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
for (uint32_t j = 0; j < copy->descriptorCount; j++) {
panvk_copy_dyn_ssbo_desc(
dst_set, copy->dstBinding, copy->dstArrayElement + j, src_set,
copy->srcBinding, copy->srcArrayElement + j);
}
break;
default:
unreachable("Unsupported descriptor type");
}
}
}
static void
panvk_descriptor_set_update_with_template(struct panvk_descriptor_set *set,
VkDescriptorUpdateTemplate templ,
const void *data)
{
VK_FROM_HANDLE(vk_descriptor_update_template, template, templ);
for (uint32_t i = 0; i < template->entry_count; i++) {
const struct vk_descriptor_template_entry *entry = &template->entries[i];
switch (entry->type) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
for (unsigned j = 0; j < entry->array_count; j++) {
const VkDescriptorImageInfo *info =
data + entry->offset + j * entry->stride;
if (entry->type == VK_DESCRIPTOR_TYPE_SAMPLER ||
entry->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
panvk_write_sampler_desc(set, entry->binding,
entry->array_element + j, info);
}
if (entry->type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
entry->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
entry->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
panvk_write_tex_desc(set, entry->binding,
entry->array_element + j, info);
}
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
for (unsigned j = 0; j < entry->array_count; j++) {
const VkDescriptorImageInfo *info =
data + entry->offset + j * entry->stride;
panvk_write_img_desc(set, entry->binding, entry->array_element + j,
info);
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
for (unsigned j = 0; j < entry->array_count; j++) {
const VkBufferView *view = data + entry->offset + j * entry->stride;
panvk_write_tex_buf_desc(set, entry->binding,
entry->array_element + j, *view);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
for (unsigned j = 0; j < entry->array_count; j++) {
const VkBufferView *view = data + entry->offset + j * entry->stride;
panvk_write_img_buf_desc(set, entry->binding,
entry->array_element + j, *view);
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
for (unsigned j = 0; j < entry->array_count; j++) {
const VkDescriptorBufferInfo *info =
data + entry->offset + j * entry->stride;
panvk_write_ubo_desc(set, entry->binding, entry->array_element + j,
info);
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
for (unsigned j = 0; j < entry->array_count; j++) {
const VkDescriptorBufferInfo *info =
data + entry->offset + j * entry->stride;
panvk_write_dyn_ubo_desc(set, entry->binding,
entry->array_element + j, info);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
for (unsigned j = 0; j < entry->array_count; j++) {
const VkDescriptorBufferInfo *info =
data + entry->offset + j * entry->stride;
panvk_write_ssbo_desc(set, entry->binding, entry->array_element + j,
info);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
for (unsigned j = 0; j < entry->array_count; j++) {
const VkDescriptorBufferInfo *info =
data + entry->offset + j * entry->stride;
panvk_write_dyn_ssbo_desc(set, entry->binding,
entry->array_element + j, info);
}
break;
default:
unreachable("Invalid type");
}
}
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(UpdateDescriptorSetWithTemplate)(
VkDevice _device, VkDescriptorSet descriptorSet,
VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *data)
{
VK_FROM_HANDLE(panvk_descriptor_set, set, descriptorSet);
panvk_descriptor_set_update_with_template(set, descriptorUpdateTemplate,
data);
}
void
panvk_per_arch(push_descriptor_set_assign_layout)(
struct panvk_push_descriptor_set *push_set,
const struct panvk_descriptor_set_layout *layout)
{
ASSERTED unsigned num_descs = layout->num_samplers + layout->num_textures +
layout->num_ubos + layout->num_imgs;
struct panvk_descriptor_set *set = &push_set->set;
unsigned desc_offset = 0;
set->layout = layout;
assert(layout->num_dyn_ubos == 0);
assert(layout->num_dyn_ssbos == 0);
assert(num_descs <= PANVK_MAX_PUSH_DESCS);
assert(layout->desc_ubo_size <= sizeof(push_set->storage.desc_ubo));
if (layout->num_ubos) {
set->ubos = (void *)(push_set->storage.descs + desc_offset);
desc_offset += PANVK_MAX_DESC_SIZE * layout->num_ubos;
}
if (layout->num_samplers) {
set->samplers = (void *)(push_set->storage.descs + desc_offset);
desc_offset += PANVK_MAX_DESC_SIZE * layout->num_samplers;
}
if (layout->num_textures) {
set->textures = (void *)(push_set->storage.descs + desc_offset);
desc_offset += PANVK_MAX_DESC_SIZE * layout->num_textures;
}
if (layout->num_imgs) {
set->img_attrib_bufs = (void *)(push_set->storage.descs + desc_offset);
desc_offset += PANVK_MAX_DESC_SIZE * layout->num_imgs;
set->img_fmts = push_set->storage.img_fmts;
}
if (layout->desc_ubo_size)
set->desc_ubo.addr.host = push_set->storage.desc_ubo;
}
void
panvk_per_arch(push_descriptor_set)(
struct panvk_push_descriptor_set *push_set,
const struct panvk_descriptor_set_layout *layout, uint32_t write_count,
const VkWriteDescriptorSet *writes)
{
panvk_per_arch(push_descriptor_set_assign_layout)(push_set, layout);
for (unsigned i = 0; i < write_count; i++) {
const VkWriteDescriptorSet *write = &writes[i];
panvk_descriptor_set_write(&push_set->set, write);
}
}
void
panvk_per_arch(push_descriptor_set_with_template)(
struct panvk_push_descriptor_set *push_set,
const struct panvk_descriptor_set_layout *layout,
VkDescriptorUpdateTemplate templ, const void *data)
{
panvk_per_arch(push_descriptor_set_assign_layout)(push_set, layout);
panvk_descriptor_set_update_with_template(&push_set->set, templ, data);
}

View file

@ -1,246 +0,0 @@
/*
* Copyright © 2021 Collabora Ltd.
* SPDX-License-Identifier: MIT
*/
#include "vk_descriptors.h"
#include "vk_log.h"
#include "panvk_descriptor_set.h"
#include "panvk_descriptor_set_layout.h"
#include "panvk_device.h"
#include "panvk_entrypoints.h"
#include "panvk_pipeline_layout.h"
#include "panvk_sampler.h"
#define PANVK_DESCRIPTOR_ALIGN 8
/* FIXME: make sure those values are correct */
#define PANVK_MAX_TEXTURES (1 << 16)
#define PANVK_MAX_IMAGES (1 << 8)
#define PANVK_MAX_SAMPLERS (1 << 16)
#define PANVK_MAX_UBOS 255
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(GetDescriptorSetLayoutSupport)(
VkDevice _device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
VkDescriptorSetLayoutSupport *pSupport)
{
VK_FROM_HANDLE(panvk_device, device, _device);
pSupport->supported = false;
VkDescriptorSetLayoutBinding *bindings;
VkResult result = vk_create_sorted_bindings(
pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
if (result != VK_SUCCESS) {
vk_error(device, result);
return;
}
unsigned sampler_idx = 0, tex_idx = 0, ubo_idx = 0;
unsigned img_idx = 0;
UNUSED unsigned dynoffset_idx = 0;
for (unsigned i = 0; i < pCreateInfo->bindingCount; i++) {
const VkDescriptorSetLayoutBinding *binding = &bindings[i];
switch (binding->descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
sampler_idx += binding->descriptorCount;
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
sampler_idx += binding->descriptorCount;
tex_idx += binding->descriptorCount;
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
tex_idx += binding->descriptorCount;
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
dynoffset_idx += binding->descriptorCount;
FALLTHROUGH;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
ubo_idx += binding->descriptorCount;
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
dynoffset_idx += binding->descriptorCount;
FALLTHROUGH;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
img_idx += binding->descriptorCount;
break;
default:
unreachable("Invalid descriptor type");
}
}
free(bindings);
/* The maximum values apply to all sets attached to a pipeline since all
* sets descriptors have to be merged in a single array.
*/
if (tex_idx > PANVK_MAX_TEXTURES / MAX_SETS ||
sampler_idx > PANVK_MAX_SAMPLERS / MAX_SETS ||
ubo_idx > PANVK_MAX_UBOS / MAX_SETS ||
img_idx > PANVK_MAX_IMAGES / MAX_SETS)
return;
pSupport->supported = true;
}
VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(CreateDescriptorSetLayout)(
VkDevice _device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkDescriptorSetLayout *pSetLayout)
{
VK_FROM_HANDLE(panvk_device, device, _device);
struct panvk_descriptor_set_layout *set_layout;
VkDescriptorSetLayoutBinding *bindings = NULL;
unsigned num_bindings = 0;
VkResult result;
if (pCreateInfo->bindingCount) {
result = vk_create_sorted_bindings(pCreateInfo->pBindings,
pCreateInfo->bindingCount, &bindings);
if (result != VK_SUCCESS)
return vk_error(device, result);
num_bindings = bindings[pCreateInfo->bindingCount - 1].binding + 1;
}
unsigned num_immutable_samplers = 0;
for (unsigned i = 0; i < pCreateInfo->bindingCount; i++) {
if (bindings[i].pImmutableSamplers)
num_immutable_samplers += bindings[i].descriptorCount;
}
size_t size =
sizeof(*set_layout) +
(sizeof(struct panvk_descriptor_set_binding_layout) * num_bindings) +
(sizeof(struct panvk_sampler *) * num_immutable_samplers);
set_layout = vk_descriptor_set_layout_zalloc(&device->vk, size);
if (!set_layout) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto err_free_bindings;
}
set_layout->flags = pCreateInfo->flags;
struct panvk_sampler **immutable_samplers =
(struct panvk_sampler **)((uint8_t *)set_layout + sizeof(*set_layout) +
(sizeof(
struct panvk_descriptor_set_binding_layout) *
num_bindings));
set_layout->binding_count = num_bindings;
unsigned sampler_idx = 0, tex_idx = 0, ubo_idx = 0;
unsigned dyn_ubo_idx = 0, dyn_ssbo_idx = 0, img_idx = 0;
uint32_t desc_ubo_size = 0, dyn_desc_ubo_size = 0;
for (unsigned i = 0; i < pCreateInfo->bindingCount; i++) {
const VkDescriptorSetLayoutBinding *binding = &bindings[i];
struct panvk_descriptor_set_binding_layout *binding_layout =
&set_layout->bindings[binding->binding];
binding_layout->type = binding->descriptorType;
binding_layout->array_size = binding->descriptorCount;
binding_layout->shader_stages = binding->stageFlags;
binding_layout->desc_ubo_stride = 0;
if (binding->pImmutableSamplers) {
binding_layout->immutable_samplers = immutable_samplers;
immutable_samplers += binding_layout->array_size;
for (unsigned j = 0; j < binding_layout->array_size; j++) {
VK_FROM_HANDLE(panvk_sampler, sampler,
binding->pImmutableSamplers[j]);
binding_layout->immutable_samplers[j] = sampler;
}
}
switch (binding_layout->type) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
binding_layout->sampler_idx = sampler_idx;
sampler_idx += binding_layout->array_size;
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
binding_layout->sampler_idx = sampler_idx;
binding_layout->tex_idx = tex_idx;
sampler_idx += binding_layout->array_size;
tex_idx += binding_layout->array_size;
binding_layout->desc_ubo_stride = sizeof(struct panvk_image_desc);
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
binding_layout->tex_idx = tex_idx;
tex_idx += binding_layout->array_size;
binding_layout->desc_ubo_stride = sizeof(struct panvk_image_desc);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
binding_layout->tex_idx = tex_idx;
tex_idx += binding_layout->array_size;
binding_layout->desc_ubo_stride = sizeof(struct panvk_bview_desc);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
binding_layout->dyn_ubo_idx = dyn_ubo_idx;
dyn_ubo_idx += binding_layout->array_size;
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
binding_layout->ubo_idx = ubo_idx;
ubo_idx += binding_layout->array_size;
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
binding_layout->dyn_ssbo_idx = dyn_ssbo_idx;
dyn_ssbo_idx += binding_layout->array_size;
binding_layout->desc_ubo_stride = sizeof(struct panvk_ssbo_addr);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
binding_layout->desc_ubo_stride = sizeof(struct panvk_ssbo_addr);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
binding_layout->img_idx = img_idx;
img_idx += binding_layout->array_size;
binding_layout->desc_ubo_stride = sizeof(struct panvk_image_desc);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
binding_layout->img_idx = img_idx;
img_idx += binding_layout->array_size;
binding_layout->desc_ubo_stride = sizeof(struct panvk_bview_desc);
break;
default:
unreachable("Invalid descriptor type");
}
if (binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
binding_layout->desc_ubo_offset = dyn_desc_ubo_size;
dyn_desc_ubo_size +=
binding_layout->desc_ubo_stride * binding_layout->array_size;
} else {
desc_ubo_size = ALIGN_POT(desc_ubo_size, PANVK_DESCRIPTOR_ALIGN);
binding_layout->desc_ubo_offset = desc_ubo_size;
desc_ubo_size +=
binding_layout->desc_ubo_stride * binding_layout->array_size;
}
}
set_layout->desc_ubo_size = desc_ubo_size;
if (desc_ubo_size > 0)
set_layout->desc_ubo_index = ubo_idx++;
set_layout->num_samplers = sampler_idx;
set_layout->num_textures = tex_idx;
set_layout->num_ubos = ubo_idx;
set_layout->num_dyn_ubos = dyn_ubo_idx;
set_layout->num_dyn_ssbos = dyn_ssbo_idx;
set_layout->num_imgs = img_idx;
free(bindings);
*pSetLayout = panvk_descriptor_set_layout_to_handle(set_layout);
return VK_SUCCESS;
err_free_bindings:
free(bindings);
return vk_error(device, result);
}

View file

@ -0,0 +1,375 @@
/*
* Copyright © 2024 Collabora Ltd.
*
* SPDX-License-Identifier: MIT
*/
#include <stddef.h>
#include <stdint.h>
#include "genxml/gen_macros.h"
#include "nir.h"
#include "nir_builder.h"
#include "pan_encoder.h"
#include "pan_shader.h"
#include "panvk_cmd_buffer.h"
#include "panvk_device.h"
#include "panvk_pipeline.h"
#include "panvk_shader.h"
struct pan_nir_desc_copy_info {
mali_ptr sets[MAX_SETS];
mali_ptr tables[PANVK_BIFROST_DESC_TABLE_COUNT];
mali_ptr img_attrib_table;
struct {
mali_ptr table;
uint32_t limits[PANVK_BIFROST_DESC_TABLE_COUNT];
uint32_t attrib_buf_idx_offset;
} desc_copy;
uint32_t set_desc_counts[MAX_SETS];
};
#define get_input_field(b, name) \
nir_load_push_constant( \
b, 1, sizeof(((struct pan_nir_desc_copy_info *)0)->name) * 8, \
nir_imm_int(b, 0), \
.base = offsetof(struct pan_nir_desc_copy_info, name), \
.range = sizeof(((struct pan_nir_desc_copy_info *)0)->name))
#define get_input_array_slot(b, name, index) \
nir_load_push_constant( \
b, 1, sizeof(((struct pan_nir_desc_copy_info *)0)->name[0]) * 8, \
nir_imul_imm(b, index, \
sizeof(((struct pan_nir_desc_copy_info *)0)->name[0])), \
.base = offsetof(struct pan_nir_desc_copy_info, name), \
.range = sizeof(((struct pan_nir_desc_copy_info *)0)->name))
static void
extract_desc_info_from_handle(nir_builder *b, nir_def *handle, nir_def **table,
nir_def **desc_idx)
{
*table = nir_ushr_imm(b, handle, 28);
*desc_idx = nir_iand_imm(b, handle, 0xfffffff);
}
static void
set_to_table_copy(nir_builder *b, nir_def *set_ptr, nir_def *set_desc_count,
nir_def *src_desc_idx, nir_def *table_ptr,
nir_def *dst_desc_idx, unsigned element_size)
{
/* The last binding can have
* VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT set, we need to make
* we don't do an out-of-bound access on the source set. */
nir_def *dst_offset =
nir_u2u64(b, nir_imul_imm(b, dst_desc_idx, element_size));
nir_push_if(b, nir_ult(b, src_desc_idx, set_desc_count));
{
nir_def *src_offset =
nir_u2u64(b, nir_imul_imm(b, src_desc_idx, PANVK_DESCRIPTOR_SIZE));
nir_def *desc = nir_load_global(b, nir_iadd(b, set_ptr, src_offset),
element_size, element_size / 4, 32);
nir_store_global(b, nir_iadd(b, table_ptr, dst_offset), element_size,
desc, ~0);
}
nir_push_else(b, NULL);
{
nir_const_value v[] = {
nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32),
nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32),
nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32),
nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32),
};
nir_def *desc = nir_build_imm(b, element_size / 4, 32, v);
nir_store_global(b, nir_iadd(b, table_ptr, dst_offset), element_size,
desc, ~0);
}
nir_pop_if(b, NULL);
}
static void
set_to_table_img_copy(nir_builder *b, nir_def *set_ptr, nir_def *set_desc_count,
nir_def *src_desc_idx, nir_def *attrib_table_ptr,
nir_def *attrib_buf_table_ptr, nir_def *dst_desc_idx)
{
/* The last binding can have
* VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT set, we need to make
* sure we don't do an out-of-bound access on the source set. */
const unsigned element_size = pan_size(ATTRIBUTE_BUFFER) * 2;
const unsigned attrib_buf_comps = element_size / 4;
const unsigned attrib_comps = pan_size(ATTRIBUTE) / 4;
nir_def *attrib_offset =
nir_u2u64(b, nir_imul_imm(b, dst_desc_idx, pan_size(ATTRIBUTE)));
nir_def *attrib_buf_offset =
nir_u2u64(b, nir_imul_imm(b, dst_desc_idx, element_size));
nir_push_if(b, nir_ult(b, src_desc_idx, set_desc_count));
{
nir_def *attr_buf_idx_offset =
get_input_field(b, desc_copy.attrib_buf_idx_offset);
nir_def *src_offset =
nir_u2u64(b, nir_imul_imm(b, src_desc_idx, PANVK_DESCRIPTOR_SIZE));
nir_def *src_desc = nir_load_global(b, nir_iadd(b, set_ptr, src_offset),
element_size, element_size / 4, 32);
nir_def *fmt = nir_iand_imm(b, nir_channel(b, src_desc, 2), 0xfffffc00);
/* Each image descriptor takes two attribute buffer slots, and we need
* to add the attribute buffer offset to have images working with vertex
* shader. */
nir_def *buf_idx =
nir_iadd(b, nir_imul_imm(b, dst_desc_idx, 2), attr_buf_idx_offset);
nir_def *attrib_w1 = nir_ior(b, buf_idx, fmt);
nir_def *attrib_desc = nir_vec2(b, attrib_w1, nir_imm_int(b, 0));
nir_store_global(b, nir_iadd(b, attrib_table_ptr, attrib_offset),
pan_size(ATTRIBUTE), attrib_desc,
nir_component_mask(attrib_comps));
nir_def *attrib_buf_desc = nir_vec8(
b, nir_channel(b, src_desc, 0), nir_channel(b, src_desc, 1),
nir_iand_imm(b, nir_channel(b, src_desc, 2), BITFIELD_MASK(10)),
nir_channel(b, src_desc, 3), nir_channel(b, src_desc, 4),
nir_channel(b, src_desc, 5), nir_channel(b, src_desc, 6),
nir_channel(b, src_desc, 7));
nir_store_global(b, nir_iadd(b, attrib_buf_table_ptr, attrib_buf_offset),
element_size, attrib_buf_desc,
nir_component_mask(attrib_buf_comps));
}
nir_push_else(b, NULL);
{
nir_const_value v[] = {
nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32),
nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32),
nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32),
nir_const_value_for_uint(0, 32), nir_const_value_for_uint(0, 32),
};
nir_def *desc =
nir_build_imm(b, MAX2(attrib_buf_comps, attrib_comps), 32, v);
nir_store_global(b, nir_iadd(b, attrib_buf_table_ptr, attrib_buf_offset),
pan_size(ATTRIBUTE), desc,
nir_component_mask(attrib_buf_comps));
nir_store_global(b, nir_iadd(b, attrib_table_ptr, attrib_offset),
element_size, desc, nir_component_mask(attrib_comps));
}
nir_pop_if(b, NULL);
}
static void
single_desc_copy(nir_builder *b, nir_def *desc_copy_idx)
{
nir_def *desc_copy_offset = nir_imul_imm(b, desc_copy_idx, sizeof(uint32_t));
nir_def *desc_copy_ptr = nir_iadd(b, get_input_field(b, desc_copy.table),
nir_u2u64(b, desc_copy_offset));
nir_def *src_copy_handle = nir_load_global(b, desc_copy_ptr, 4, 1, 32);
nir_def *set_idx, *src_desc_idx;
extract_desc_info_from_handle(b, src_copy_handle, &set_idx, &src_desc_idx);
nir_def *set_ptr = get_input_array_slot(b, sets, set_idx);
nir_def *set_desc_count = get_input_array_slot(b, set_desc_counts, set_idx);
nir_def *ubo_end =
get_input_field(b, desc_copy.limits[PANVK_BIFROST_DESC_TABLE_UBO]);
nir_def *img_end =
get_input_field(b, desc_copy.limits[PANVK_BIFROST_DESC_TABLE_IMG]);
nir_def *tex_end =
get_input_field(b, desc_copy.limits[PANVK_BIFROST_DESC_TABLE_TEXTURE]);
nir_def *sampler_end =
get_input_field(b, desc_copy.limits[PANVK_BIFROST_DESC_TABLE_SAMPLER]);
nir_push_if(b, nir_ult(b, desc_copy_idx, ubo_end));
{
nir_def *table_ptr =
get_input_field(b, tables[PANVK_BIFROST_DESC_TABLE_UBO]);
set_to_table_copy(b, set_ptr, set_desc_count, src_desc_idx, table_ptr,
desc_copy_idx, sizeof(struct mali_attribute_packed));
}
nir_push_else(b, NULL);
{
nir_push_if(b, nir_ult(b, desc_copy_idx, img_end));
{
nir_def *table_ptr =
get_input_field(b, tables[PANVK_BIFROST_DESC_TABLE_IMG]);
nir_def *attrib_table_ptr = get_input_field(b, img_attrib_table);
nir_def *attrib_buf_table_ptr = table_ptr;
set_to_table_img_copy(b, set_ptr, set_desc_count, src_desc_idx,
attrib_table_ptr, attrib_buf_table_ptr,
nir_isub(b, desc_copy_idx, ubo_end));
}
nir_push_else(b, NULL);
{
nir_push_if(b, nir_ult(b, desc_copy_idx, tex_end));
{
nir_def *table_ptr =
get_input_field(b, tables[PANVK_BIFROST_DESC_TABLE_TEXTURE]);
set_to_table_copy(b, set_ptr, set_desc_count, src_desc_idx,
table_ptr, nir_isub(b, desc_copy_idx, img_end),
sizeof(struct mali_texture_packed));
}
nir_push_else(b, NULL);
{
nir_push_if(b, nir_ult(b, desc_copy_idx, sampler_end));
{
nir_def *table_ptr =
get_input_field(b, tables[PANVK_BIFROST_DESC_TABLE_SAMPLER]);
set_to_table_copy(b, set_ptr, set_desc_count, src_desc_idx,
table_ptr, nir_isub(b, desc_copy_idx, tex_end),
sizeof(struct mali_sampler_packed));
}
nir_pop_if(b, NULL);
}
nir_pop_if(b, NULL);
}
nir_pop_if(b, NULL);
}
nir_pop_if(b, NULL);
}
static mali_ptr
panvk_meta_desc_copy_shader(struct panvk_device *dev,
struct pan_shader_info *shader_info)
{
struct panvk_physical_device *phys_dev =
to_panvk_physical_device(dev->vk.physical);
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(), "%s",
"desc_copy");
/* We actually customize that at execution time to issue the
* exact number of jobs. */
b.shader->info.workgroup_size[0] = 1;
b.shader->info.workgroup_size[1] = 1;
b.shader->info.workgroup_size[2] = 1;
nir_def *desc_copy_id =
nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
single_desc_copy(&b, desc_copy_id);
struct panfrost_compile_inputs inputs = {
.gpu_id = phys_dev->kmod.props.gpu_prod_id,
.no_ubo_to_push = true,
};
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
pan_shader_preprocess(b.shader, inputs.gpu_id);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
ralloc_free(b.shader);
shader_info->push.count =
DIV_ROUND_UP(sizeof(struct pan_nir_desc_copy_info), 4);
mali_ptr shader = pan_pool_upload_aligned(&dev->meta.bin_pool.base,
binary.data, binary.size, 128);
util_dynarray_fini(&binary);
return shader;
}
void
panvk_per_arch(meta_desc_copy_init)(struct panvk_device *dev)
{
struct pan_shader_info shader_info;
mali_ptr shader = panvk_meta_desc_copy_shader(dev, &shader_info);
struct panfrost_ptr rsd =
pan_pool_alloc_desc(&dev->meta.desc_pool.base, RENDERER_STATE);
pan_pack(rsd.cpu, RENDERER_STATE, cfg) {
pan_shader_prepare_rsd(&shader_info, shader, &cfg);
}
dev->meta.desc_copy.rsd = rsd.gpu;
}
struct panfrost_ptr
panvk_per_arch(meta_get_copy_desc_job)(
struct panvk_device *dev, struct pan_pool *desc_pool,
const struct panvk_pipeline_shader *shader,
const struct panvk_descriptor_state *desc_state,
const struct panvk_shader_desc_state *shader_desc_state)
{
mali_ptr copy_table = shader->desc_info.others.map;
if (!copy_table)
return (struct panfrost_ptr){0};
struct pan_nir_desc_copy_info copy_info = {
.img_attrib_table = shader_desc_state->img_attrib_table,
.desc_copy = {
.table = copy_table,
.attrib_buf_idx_offset =
shader->info.stage == MESA_SHADER_VERTEX ? MAX_VS_ATTRIBS : 0,
},
};
for (uint32_t i = 0; i < ARRAY_SIZE(copy_info.desc_copy.limits); i++)
copy_info.desc_copy.limits[i] =
shader->desc_info.others.count[i] +
(i > 0 ? copy_info.desc_copy.limits[i - 1] : 0);
for (uint32_t i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
const struct panvk_descriptor_set *set = desc_state->sets[i];
if (!set)
continue;
copy_info.sets[i] = set->descs.dev;
copy_info.set_desc_counts[i] = set->desc_count;
}
for (uint32_t i = 0; i < ARRAY_SIZE(shader->desc_info.others.count); i++) {
uint32_t desc_count = shader->desc_info.others.count[i];
if (!desc_count)
continue;
copy_info.tables[i] = shader_desc_state->tables[i];
}
mali_ptr push_uniforms =
pan_pool_upload_aligned(desc_pool, &copy_info, sizeof(copy_info), 16);
struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, COMPUTE_JOB);
/* Given the per-stage max descriptors limit, we should never
* reach the workgroup dimension limit. */
uint32_t copy_count =
copy_info.desc_copy.limits[PANVK_BIFROST_DESC_TABLE_COUNT - 1];
assert(copy_count - 1 < BITFIELD_MASK(10));
panfrost_pack_work_groups_compute(
pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), 1, 1, 1, copy_count, 1,
1, false, false);
pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
cfg.job_task_split = util_logbase2_ceil(copy_count + 1) +
util_logbase2_ceil(1 + 1) +
util_logbase2_ceil(1 + 1);
}
struct pan_tls_info tlsinfo = {0};
struct panfrost_ptr tls = pan_pool_alloc_desc(desc_pool, LOCAL_STORAGE);
GENX(pan_emit_tls)(&tlsinfo, tls.cpu);
pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
cfg.state = dev->meta.desc_copy.rsd;
cfg.push_uniforms = push_uniforms;
cfg.thread_storage = tls.gpu;
}
return job;
}

View file

@ -28,24 +28,26 @@
*/
#include "panvk_device.h"
#include "panvk_pipeline_layout.h"
#include "panvk_shader.h"
#include "vk_pipeline_layout.h"
#include "util/bitset.h"
#include "nir.h"
#include "nir_builder.h"
struct apply_descriptors_ctx {
const struct panvk_pipeline_layout *layout;
struct lower_desc_ctx {
const struct panvk_descriptor_set_layout *set_layouts[MAX_SETS];
struct panvk_shader_desc_info *desc_info;
struct hash_table *ht;
bool add_bounds_checks;
bool has_img_access;
nir_address_format desc_addr_format;
nir_address_format ubo_addr_format;
nir_address_format ssbo_addr_format;
};
static nir_address_format
addr_format_for_desc_type(VkDescriptorType desc_type,
const struct apply_descriptors_ctx *ctx)
const struct lower_desc_ctx *ctx)
{
switch (desc_type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
@ -62,18 +64,130 @@ addr_format_for_desc_type(VkDescriptorType desc_type,
}
static const struct panvk_descriptor_set_layout *
get_set_layout(uint32_t set, const struct apply_descriptors_ctx *ctx)
get_set_layout(uint32_t set, const struct lower_desc_ctx *ctx)
{
return vk_to_panvk_descriptor_set_layout(ctx->layout->vk.set_layouts[set]);
return ctx->set_layouts[set];
}
static const struct panvk_descriptor_set_binding_layout *
get_binding_layout(uint32_t set, uint32_t binding,
const struct apply_descriptors_ctx *ctx)
const struct lower_desc_ctx *ctx)
{
return &get_set_layout(set, ctx)->bindings[binding];
}
#define DELETED_KEY (void *)(uintptr_t)1
struct desc_id {
uint32_t set;
uint32_t binding;
uint32_t subdesc;
};
static void *
desc_id_to_key(struct desc_id id)
{
assert(id.set <= BITFIELD_MASK(4));
assert(id.subdesc <= BITFIELD_MASK(1));
assert(id.binding <= BITFIELD_MASK(27));
uint32_t handle = (id.set << 28) | (id.subdesc << 27) | id.binding;
assert(handle < UINT32_MAX - 2);
return (void *)(uintptr_t)(handle + 2);
}
static struct desc_id
key_to_desc_id(const void *key)
{
uint32_t handle = (uintptr_t)key - 2;
return (struct desc_id){
.set = handle >> 28,
.subdesc = (handle & BITFIELD_BIT(27)) ? 1 : 0,
.binding = handle & BITFIELD_MASK(27),
};
}
static enum panvk_bifrost_desc_table_type
desc_type_to_table_type(VkDescriptorType type, unsigned subdesc_idx)
{
switch (type) {
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
return subdesc_idx == 1 ? PANVK_BIFROST_DESC_TABLE_SAMPLER
: PANVK_BIFROST_DESC_TABLE_TEXTURE;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
return PANVK_BIFROST_DESC_TABLE_TEXTURE;
case VK_DESCRIPTOR_TYPE_SAMPLER:
return PANVK_BIFROST_DESC_TABLE_SAMPLER;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
return PANVK_BIFROST_DESC_TABLE_IMG;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
return PANVK_BIFROST_DESC_TABLE_UBO;
default:
return PANVK_BIFROST_DESC_TABLE_INVALID;
}
}
static uint32_t
get_subdesc_idx(const struct panvk_descriptor_set_binding_layout *bind_layout,
VkDescriptorType subdesc_type)
{
if (bind_layout->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
assert(subdesc_type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
subdesc_type == VK_DESCRIPTOR_TYPE_SAMPLER);
return subdesc_type == VK_DESCRIPTOR_TYPE_SAMPLER ? 1 : 0;
}
return 0;
}
static uint32_t
shader_desc_idx(uint32_t set, uint32_t binding, VkDescriptorType subdesc_type,
const struct lower_desc_ctx *ctx)
{
const struct panvk_descriptor_set_layout *set_layout =
get_set_layout(set, ctx);
const struct panvk_descriptor_set_binding_layout *bind_layout =
&set_layout->bindings[binding];
struct desc_id src = {
.set = set,
.subdesc = get_subdesc_idx(bind_layout, subdesc_type),
.binding = binding,
};
struct hash_entry *he =
_mesa_hash_table_search(ctx->ht, desc_id_to_key(src));
assert(he);
const struct panvk_shader_desc_map *map;
uint32_t *entry = he->data;
if (bind_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
map = &ctx->desc_info->dyn_ubos;
} else if (bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
map = &ctx->desc_info->dyn_ssbos;
} else {
uint32_t table = desc_type_to_table_type(bind_layout->type, src.subdesc);
assert(table < PANVK_BIFROST_DESC_TABLE_COUNT);
map = &ctx->desc_info->others[table];
}
assert(entry >= map->map && entry < map->map + map->count);
uint32_t idx = entry - map->map;
/* Adjust the destination index for all dynamic UBOs, which are laid out
* just after the regular UBOs in the UBO table. */
if (bind_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
idx += ctx->desc_info->others[PANVK_BIFROST_DESC_TABLE_UBO].count;
return idx;
}
/** Build a Vulkan resource index
*
* A "resource index" is the term used by our SPIR-V parser and the relevant
@ -97,24 +211,22 @@ get_binding_layout(uint32_t set, uint32_t binding,
static nir_def *
build_res_index(nir_builder *b, uint32_t set, uint32_t binding,
nir_def *array_index, nir_address_format addr_format,
const struct apply_descriptors_ctx *ctx)
const struct lower_desc_ctx *ctx)
{
const struct panvk_descriptor_set_layout *set_layout =
get_set_layout(set, ctx);
const struct panvk_descriptor_set_binding_layout *bind_layout =
&set_layout->bindings[binding];
uint32_t array_size = bind_layout->array_size;
uint32_t array_size = bind_layout->desc_count;
switch (bind_layout->type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
assert(addr_format == nir_address_format_32bit_index_offset);
const unsigned ubo_idx = panvk_per_arch(pipeline_layout_ubo_index)(
ctx->layout, set, binding, 0);
const uint32_t packed = (array_size - 1) << 16 | ubo_idx;
const uint32_t packed =
(array_size - 1) << 16 |
shader_desc_idx(set, binding, bind_layout->type, ctx);
return nir_vec2(b, nir_imm_int(b, packed), array_index);
}
@ -124,24 +236,31 @@ build_res_index(nir_builder *b, uint32_t set, uint32_t binding,
assert(addr_format == nir_address_format_64bit_bounded_global ||
addr_format == nir_address_format_64bit_global_32bit_offset);
const bool is_dynamic =
bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
const unsigned desc_ubo_idx =
is_dynamic
? panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)(ctx->layout)
: panvk_per_arch(pipeline_layout_ubo_start)(ctx->layout, set,
false) +
set_layout->desc_ubo_index;
const unsigned desc_ubo_offset =
bind_layout->desc_ubo_offset +
(is_dynamic ? ctx->layout->sets[set].dyn_desc_ubo_offset : 0);
unsigned base_addr_sysval_offs;
uint32_t desc_idx =
bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
? shader_desc_idx(set, binding, bind_layout->type, ctx)
: bind_layout->desc_idx;
const uint32_t packed =
(bind_layout->desc_ubo_stride << 16) | desc_ubo_idx;
if (b->shader->info.stage == MESA_SHADER_COMPUTE)
base_addr_sysval_offs =
bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
? offsetof(struct panvk_compute_sysvals, desc.sets[set])
: offsetof(struct panvk_compute_sysvals, desc.dyn_ssbos);
else if (b->shader->info.stage == MESA_SHADER_VERTEX)
base_addr_sysval_offs =
bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
? offsetof(struct panvk_graphics_sysvals, desc.sets[set])
: offsetof(struct panvk_graphics_sysvals, desc.vs_dyn_ssbos);
else
base_addr_sysval_offs =
bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
? offsetof(struct panvk_graphics_sysvals, desc.sets[set])
: offsetof(struct panvk_graphics_sysvals, desc.fs_dyn_ssbos);
return nir_vec4(b, nir_imm_int(b, packed),
nir_imm_int(b, desc_ubo_offset),
nir_imm_int(b, array_size - 1), array_index);
return nir_vec4(b, nir_imm_int(b, base_addr_sysval_offs),
nir_imm_int(b, desc_idx), array_index,
nir_imm_int(b, array_size - 1));
}
default:
@ -169,8 +288,8 @@ build_res_reindex(nir_builder *b, nir_def *orig, nir_def *delta,
case nir_address_format_64bit_bounded_global:
case nir_address_format_64bit_global_32bit_offset:
return nir_vec4(b, nir_channel(b, orig, 0), nir_channel(b, orig, 1),
nir_channel(b, orig, 2),
nir_iadd(b, nir_channel(b, orig, 3), delta));
nir_iadd(b, nir_channel(b, orig, 2), delta),
nir_imm_int(b, 3));
default:
unreachable("Unhandled address format");
@ -187,40 +306,40 @@ build_res_reindex(nir_builder *b, nir_def *orig, nir_def *delta,
static nir_def *
build_buffer_addr_for_res_index(nir_builder *b, nir_def *res_index,
nir_address_format addr_format,
const struct apply_descriptors_ctx *ctx)
const struct lower_desc_ctx *ctx)
{
switch (addr_format) {
case nir_address_format_32bit_index_offset: {
nir_def *packed = nir_channel(b, res_index, 0);
nir_def *array_index = nir_channel(b, res_index, 1);
nir_def *surface_index = nir_extract_u16(b, packed, nir_imm_int(b, 0));
nir_def *first_desc_index = nir_extract_u16(b, packed, nir_imm_int(b, 0));
nir_def *array_max = nir_extract_u16(b, packed, nir_imm_int(b, 1));
if (ctx->add_bounds_checks)
array_index = nir_umin(b, array_index, array_max);
return nir_vec2(b, nir_iadd(b, surface_index, array_index),
return nir_vec2(b, nir_iadd(b, first_desc_index, array_index),
nir_imm_int(b, 0));
}
case nir_address_format_64bit_bounded_global:
case nir_address_format_64bit_global_32bit_offset: {
nir_def *packed = nir_channel(b, res_index, 0);
nir_def *desc_ubo_offset = nir_channel(b, res_index, 1);
nir_def *array_max = nir_channel(b, res_index, 2);
nir_def *array_index = nir_channel(b, res_index, 3);
nir_def *desc_ubo_idx = nir_extract_u16(b, packed, nir_imm_int(b, 0));
nir_def *desc_ubo_stride = nir_extract_u16(b, packed, nir_imm_int(b, 1));
nir_def *base_addr_sysval_offset = nir_channel(b, res_index, 0);
nir_def *first_desc_index = nir_channel(b, res_index, 1);
nir_def *array_index = nir_channel(b, res_index, 2);
nir_def *array_max = nir_channel(b, res_index, 3);
if (ctx->add_bounds_checks)
array_index = nir_umin(b, array_index, array_max);
desc_ubo_offset = nir_iadd(b, desc_ubo_offset,
nir_imul(b, array_index, desc_ubo_stride));
nir_def *desc_offset = nir_imul_imm(
b, nir_iadd(b, array_index, first_desc_index), PANVK_DESCRIPTOR_SIZE);
nir_def *desc = nir_load_ubo(b, 4, 32, desc_ubo_idx, desc_ubo_offset,
.align_mul = 16, .range = ~0);
nir_def *base_addr = nir_load_push_constant(
b, 1, 64, base_addr_sysval_offset, .base = 256, .range = 256);
nir_def *desc_addr = nir_iadd(b, base_addr, nir_u2u64(b, desc_offset));
nir_def *desc =
nir_load_global(b, desc_addr, PANVK_DESCRIPTOR_SIZE, 4, 32);
/* The offset in the descriptor is guaranteed to be zero when it's
* written into the descriptor set. This lets us avoid some unnecessary
@ -237,7 +356,7 @@ build_buffer_addr_for_res_index(nir_builder *b, nir_def *res_index,
static bool
lower_res_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
const struct apply_descriptors_ctx *ctx)
const struct lower_desc_ctx *ctx)
{
b->cursor = nir_before_instr(&intrin->instr);
@ -277,17 +396,24 @@ lower_res_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
static void
get_resource_deref_binding(nir_deref_instr *deref, uint32_t *set,
uint32_t *binding, uint32_t *index_imm,
nir_def **index_ssa)
nir_def **index_ssa, uint32_t *max_idx)
{
*index_imm = 0;
*max_idx = 0;
*index_ssa = NULL;
if (deref->deref_type == nir_deref_type_array) {
if (nir_src_is_const(deref->arr.index))
if (nir_src_is_const(deref->arr.index)) {
*index_imm = nir_src_as_uint(deref->arr.index);
else
*max_idx = *index_imm;
} else {
*index_ssa = deref->arr.index.ssa;
/* Zero means variable array. The minus one should give us UINT32_MAX,
* which matches what we want. */
*max_idx = glsl_array_size(nir_deref_instr_parent(deref)->type) - 1;
}
deref = nir_deref_instr_parent(deref);
}
@ -300,80 +426,151 @@ get_resource_deref_binding(nir_deref_instr *deref, uint32_t *set,
static nir_def *
load_resource_deref_desc(nir_builder *b, nir_deref_instr *deref,
unsigned desc_offset, unsigned num_components,
unsigned bit_size,
const struct apply_descriptors_ctx *ctx)
VkDescriptorType subdesc_type, unsigned desc_offset,
unsigned num_components, unsigned bit_size,
const struct lower_desc_ctx *ctx)
{
uint32_t set, binding, index_imm;
uint32_t set, binding, index_imm, max_idx;
nir_def *index_ssa;
get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa);
get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa,
&max_idx);
const struct panvk_descriptor_set_layout *set_layout =
get_set_layout(set, ctx);
const struct panvk_descriptor_set_binding_layout *bind_layout =
&set_layout->bindings[binding];
unsigned subdesc_idx = get_subdesc_idx(bind_layout, subdesc_type);
assert(index_ssa == NULL || index_imm == 0);
if (index_ssa == NULL)
index_ssa = nir_imm_int(b, index_imm);
const unsigned set_ubo_idx =
panvk_per_arch(pipeline_layout_ubo_start)(ctx->layout, set, false) +
set_layout->desc_ubo_index;
unsigned desc_stride = panvk_get_desc_stride(bind_layout->type);
unsigned set_base_addr_sysval_offs =
b->shader->info.stage == MESA_SHADER_COMPUTE
? offsetof(struct panvk_compute_sysvals, desc.sets[set])
: offsetof(struct panvk_graphics_sysvals, desc.sets[set]);
nir_def *set_base_addr = nir_load_push_constant(
b, 1, 64, nir_imm_int(b, 0), .base = 256 + set_base_addr_sysval_offs,
.range = 8);
nir_def *set_offset =
nir_imul_imm(b,
nir_iadd_imm(b, nir_imul_imm(b, index_ssa, desc_stride),
bind_layout->desc_idx + subdesc_idx),
PANVK_DESCRIPTOR_SIZE);
nir_def *desc_ubo_offset =
nir_iadd_imm(b, nir_imul_imm(b, index_ssa, bind_layout->desc_ubo_stride),
bind_layout->desc_ubo_offset + desc_offset);
set_offset = nir_iadd_imm(b, set_offset, desc_offset);
assert(bind_layout->desc_ubo_stride > 0);
unsigned desc_align = (1 << (ffs(bind_layout->desc_ubo_stride) - 1));
desc_align = MIN2(desc_align, 16);
unsigned desc_align = 1 << (ffs(PANVK_DESCRIPTOR_SIZE + desc_offset) - 1);
return nir_load_ubo(b, num_components, bit_size, nir_imm_int(b, set_ubo_idx),
desc_ubo_offset, .align_mul = desc_align,
.align_offset = (desc_offset % desc_align), .range = ~0);
return nir_load_global(b,
nir_iadd(b, set_base_addr, nir_u2u64(b, set_offset)),
desc_align, num_components, bit_size);
}
static nir_def *
load_tex_img_size(nir_builder *b, nir_deref_instr *deref,
enum glsl_sampler_dim dim,
const struct apply_descriptors_ctx *ctx)
load_tex_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim,
bool is_array, const struct lower_desc_ctx *ctx)
{
if (dim == GLSL_SAMPLER_DIM_BUF) {
return load_resource_deref_desc(b, deref, 0, 1, 32, ctx);
nir_def *tex_w = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 4, 1, 16, ctx);
/* S dimension is 16 bits wide. We don't support combining S,T dimensions
* to allow large buffers yet. */
return nir_iadd_imm(b, nir_u2u32(b, tex_w), 1);
} else {
nir_def *desc = load_resource_deref_desc(b, deref, 0, 4, 16, ctx);
nir_def *tex_w_h = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 4, 2, 16, ctx);
nir_def *tex_depth_or_layer_count = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
dim == GLSL_SAMPLER_DIM_3D ? 28 : 24, 1, 16, ctx);
nir_def *tex_sz =
is_array && dim == GLSL_SAMPLER_DIM_1D
? nir_vec2(b, nir_channel(b, tex_w_h, 0), tex_depth_or_layer_count)
: nir_vec3(b, nir_channel(b, tex_w_h, 0),
nir_channel(b, tex_w_h, 1), tex_depth_or_layer_count);
tex_sz = nir_pad_vector_imm_int(b, tex_sz, 0, 4);
/* The sizes are provided as 16-bit values with 1 subtracted so
* convert to 32-bit and add 1.
*/
return nir_iadd_imm(b, nir_u2u32(b, desc), 1);
return nir_iadd_imm(b, nir_u2u32(b, tex_sz), 1);
}
}
static nir_def *
load_tex_img_levels(nir_builder *b, nir_deref_instr *deref,
enum glsl_sampler_dim dim,
const struct apply_descriptors_ctx *ctx)
load_img_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim,
bool is_array, const struct lower_desc_ctx *ctx)
{
assert(dim != GLSL_SAMPLER_DIM_BUF);
nir_def *desc = load_resource_deref_desc(b, deref, 0, 4, 16, ctx);
return nir_u2u32(b, nir_iand_imm(b, nir_channel(b, desc, 3), 0xff));
if (dim == GLSL_SAMPLER_DIM_BUF) {
nir_def *tex_w = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 18, 1, 16, ctx);
/* S dimension is 16 bits wide. We don't support combining S,T dimensions
* to allow large buffers yet. */
return nir_iadd_imm(b, nir_u2u32(b, tex_w), 1);
} else {
nir_def *tex_sz = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 18, 3, 16, ctx);
if (is_array && dim == GLSL_SAMPLER_DIM_1D)
tex_sz =
nir_vec2(b, nir_channel(b, tex_sz, 0), nir_channel(b, tex_sz, 2));
tex_sz = nir_pad_vector_imm_int(b, tex_sz, 0, 4);
/* The sizes are provided as 16-bit values with 1 subtracted so
* convert to 32-bit and add 1.
*/
return nir_iadd_imm(b, nir_u2u32(b, tex_sz), 1);
}
}
static nir_def *
load_tex_img_samples(nir_builder *b, nir_deref_instr *deref,
enum glsl_sampler_dim dim,
const struct apply_descriptors_ctx *ctx)
load_tex_levels(nir_builder *b, nir_deref_instr *deref,
enum glsl_sampler_dim dim, const struct lower_desc_ctx *ctx)
{
assert(dim != GLSL_SAMPLER_DIM_BUF);
nir_def *desc = load_resource_deref_desc(b, deref, 0, 4, 16, ctx);
return nir_u2u32(b, nir_ushr_imm(b, nir_channel(b, desc, 3), 8));
/* LOD count is stored in word2[16:21] and has a minus(1) modifier. */
nir_def *tex_word2 = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 8, 1, 32, ctx);
nir_def *lod_count = nir_iand_imm(b, nir_ushr_imm(b, tex_word2, 16), 0x1f);
return nir_iadd_imm(b, lod_count, 1);
}
static nir_def *
load_tex_samples(nir_builder *b, nir_deref_instr *deref,
enum glsl_sampler_dim dim, const struct lower_desc_ctx *ctx)
{
assert(dim != GLSL_SAMPLER_DIM_BUF);
/* Sample count is stored in word3[13:25], and has a log2 modifier. */
nir_def *tex_word3 = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 12, 1, 32, ctx);
nir_def *sample_count = nir_iand_imm(b, nir_ushr_imm(b, tex_word3, 13), 0x7);
return nir_ishl(b, nir_imm_int(b, 1), sample_count);
}
static nir_def *
load_img_samples(nir_builder *b, nir_deref_instr *deref,
enum glsl_sampler_dim dim, const struct lower_desc_ctx *ctx)
{
assert(dim != GLSL_SAMPLER_DIM_BUF);
/* Sample count is stored in the image depth field.
* FIXME: This won't work for 2DMSArray images, but those are already
* broken. */
nir_def *sample_count = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 22, 1, 16, ctx);
return nir_iadd_imm(b, nir_u2u32(b, sample_count), 1);
}
static bool
lower_tex(nir_builder *b, nir_tex_instr *tex,
const struct apply_descriptors_ctx *ctx)
lower_tex(nir_builder *b, nir_tex_instr *tex, const struct lower_desc_ctx *ctx)
{
bool progress = false;
@ -390,16 +587,16 @@ lower_tex(nir_builder *b, nir_tex_instr *tex,
nir_def *res;
switch (tex->op) {
case nir_texop_txs:
res = nir_channels(b, load_tex_img_size(b, deref, dim, ctx),
res = nir_channels(b, load_tex_size(b, deref, dim, tex->is_array, ctx),
nir_component_mask(tex->def.num_components));
break;
case nir_texop_query_levels:
assert(tex->def.num_components == 1);
res = load_tex_img_levels(b, deref, dim, ctx);
res = load_tex_levels(b, deref, dim, ctx);
break;
case nir_texop_texture_samples:
assert(tex->def.num_components == 1);
res = load_tex_img_samples(b, deref, dim, ctx);
res = load_tex_samples(b, deref, dim, ctx);
break;
default:
unreachable("Unsupported texture query op");
@ -416,15 +613,14 @@ lower_tex(nir_builder *b, nir_tex_instr *tex,
nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
nir_tex_instr_remove_src(tex, sampler_src_idx);
uint32_t set, binding, index_imm;
uint32_t set, binding, index_imm, max_idx;
nir_def *index_ssa;
get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa);
get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa,
&max_idx);
const struct panvk_descriptor_set_binding_layout *bind_layout =
get_binding_layout(set, binding, ctx);
tex->sampler_index = ctx->layout->sets[set].sampler_offset +
bind_layout->sampler_idx + index_imm;
tex->sampler_index =
shader_desc_idx(set, binding, VK_DESCRIPTOR_TYPE_SAMPLER, ctx) +
index_imm;
if (index_ssa != NULL) {
nir_tex_instr_add_src(tex, nir_tex_src_sampler_offset, index_ssa);
@ -437,15 +633,14 @@ lower_tex(nir_builder *b, nir_tex_instr *tex,
nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
nir_tex_instr_remove_src(tex, tex_src_idx);
uint32_t set, binding, index_imm;
uint32_t set, binding, index_imm, max_idx;
nir_def *index_ssa;
get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa);
const struct panvk_descriptor_set_binding_layout *bind_layout =
get_binding_layout(set, binding, ctx);
get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa,
&max_idx);
tex->texture_index =
ctx->layout->sets[set].tex_offset + bind_layout->tex_idx + index_imm;
shader_desc_idx(set, binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ctx) +
index_imm;
if (index_ssa != NULL) {
nir_tex_instr_add_src(tex, nir_tex_src_texture_offset, index_ssa);
@ -458,11 +653,12 @@ lower_tex(nir_builder *b, nir_tex_instr *tex,
static nir_def *
get_img_index(nir_builder *b, nir_deref_instr *deref,
const struct apply_descriptors_ctx *ctx)
const struct lower_desc_ctx *ctx)
{
uint32_t set, binding, index_imm;
uint32_t set, binding, index_imm, max_idx;
nir_def *index_ssa;
get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa);
get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa,
&max_idx);
const struct panvk_descriptor_set_binding_layout *bind_layout =
get_binding_layout(set, binding, ctx);
@ -470,8 +666,7 @@ get_img_index(nir_builder *b, nir_deref_instr *deref,
bind_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ||
bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
unsigned img_offset =
ctx->layout->sets[set].img_offset + bind_layout->img_idx;
unsigned img_offset = shader_desc_idx(set, binding, bind_layout->type, ctx);
if (index_ssa == NULL) {
return nir_imm_int(b, img_offset + index_imm);
@ -483,7 +678,7 @@ get_img_index(nir_builder *b, nir_deref_instr *deref,
static bool
lower_img_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
struct apply_descriptors_ctx *ctx)
struct lower_desc_ctx *ctx)
{
b->cursor = nir_before_instr(&intr->instr);
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
@ -491,15 +686,16 @@ lower_img_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
if (intr->intrinsic == nir_intrinsic_image_deref_size ||
intr->intrinsic == nir_intrinsic_image_deref_samples) {
const enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
bool is_array = nir_intrinsic_image_array(intr);
nir_def *res;
switch (intr->intrinsic) {
case nir_intrinsic_image_deref_size:
res = nir_channels(b, load_tex_img_size(b, deref, dim, ctx),
res = nir_channels(b, load_img_size(b, deref, dim, is_array, ctx),
nir_component_mask(intr->def.num_components));
break;
case nir_intrinsic_image_deref_samples:
res = load_tex_img_samples(b, deref, dim, ctx);
res = load_img_samples(b, deref, dim, ctx);
break;
default:
unreachable("Unsupported image query op");
@ -509,7 +705,6 @@ lower_img_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
nir_instr_remove(&intr->instr);
} else {
nir_rewrite_image_intrinsic(intr, get_img_index(b, deref, ctx), false);
ctx->has_img_access = true;
}
return true;
@ -517,7 +712,7 @@ lower_img_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
static bool
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
struct apply_descriptors_ctx *ctx)
struct lower_desc_ctx *ctx)
{
switch (intr->intrinsic) {
case nir_intrinsic_vulkan_resource_index:
@ -539,7 +734,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
static bool
lower_descriptors_instr(nir_builder *b, nir_instr *instr, void *data)
{
struct apply_descriptors_ctx *ctx = data;
struct lower_desc_ctx *ctx = data;
switch (instr->type) {
case nir_instr_type_tex:
@ -551,25 +746,276 @@ lower_descriptors_instr(nir_builder *b, nir_instr *instr, void *data)
}
}
static void
record_binding(struct lower_desc_ctx *ctx, unsigned set, unsigned binding,
VkDescriptorType subdesc_type, uint32_t max_idx)
{
const struct panvk_descriptor_set_layout *set_layout = ctx->set_layouts[set];
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set_layout->bindings[binding];
uint32_t subdesc_idx = get_subdesc_idx(binding_layout, subdesc_type);
uint32_t desc_stride = panvk_get_desc_stride(binding_layout->type);
assert(desc_stride == 1 || desc_stride == 2);
ctx->desc_info->used_set_mask |= BITFIELD_BIT(set);
/* SSBOs are accessed directly from the sets, no need to record accesses
* to such resources. */
if (binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
return;
assert(subdesc_idx < desc_stride);
assert(!(binding & BITFIELD_BIT(27)));
struct desc_id src = {
.set = set,
.subdesc = subdesc_idx,
.binding = binding,
};
const void *key = desc_id_to_key(src);
struct hash_entry *he = _mesa_hash_table_search(ctx->ht, key);
uint32_t old_desc_count = 0;
uint32_t new_desc_count =
max_idx == UINT32_MAX ? binding_layout->desc_count : max_idx + 1;
assert(new_desc_count <= binding_layout->desc_count);
if (!he)
he = _mesa_hash_table_insert(ctx->ht, key,
(void *)(uintptr_t)new_desc_count);
else
old_desc_count = (uintptr_t)he->data;
if (old_desc_count >= new_desc_count)
return;
uint32_t desc_count_diff = new_desc_count - old_desc_count;
if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
ctx->desc_info->dyn_ubos.count += desc_count_diff;
} else if (binding_layout->type ==
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
ctx->desc_info->dyn_ssbos.count += desc_count_diff;
} else {
uint32_t table =
desc_type_to_table_type(binding_layout->type, subdesc_idx);
assert(table < PANVK_BIFROST_DESC_TABLE_COUNT);
ctx->desc_info->others[table].count += desc_count_diff;
}
he->data = (void *)(uintptr_t)new_desc_count;
}
static uint32_t *
fill_copy_descs_for_binding(struct lower_desc_ctx *ctx, unsigned set,
unsigned binding, uint32_t subdesc_idx,
uint32_t desc_count)
{
assert(desc_count);
const struct panvk_descriptor_set_layout *set_layout = ctx->set_layouts[set];
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set_layout->bindings[binding];
uint32_t desc_stride = panvk_get_desc_stride(binding_layout->type);
uint32_t *first_entry = NULL;
assert(desc_count <= binding_layout->desc_count);
for (uint32_t i = 0; i < desc_count; i++) {
uint32_t src_idx = src_idx =
binding_layout->desc_idx + (i * desc_stride) + subdesc_idx;
struct panvk_shader_desc_map *map;
if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
map = &ctx->desc_info->dyn_ubos;
} else if (binding_layout->type ==
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
map = &ctx->desc_info->dyn_ssbos;
} else {
uint32_t dst_table =
desc_type_to_table_type(binding_layout->type, subdesc_idx);
assert(dst_table < PANVK_BIFROST_DESC_TABLE_COUNT);
map = &ctx->desc_info->others[dst_table];
}
if (!first_entry)
first_entry = &map->map[map->count];
map->map[map->count++] = COPY_DESC_HANDLE(set, src_idx);
}
return first_entry;
}
static void
create_copy_table(struct lower_desc_ctx *ctx)
{
struct panvk_shader_desc_info *desc_info = ctx->desc_info;
uint32_t *copy_table;
uint32_t copy_count;
copy_count = desc_info->dyn_ubos.count + desc_info->dyn_ssbos.count;
for (uint32_t i = 0; i < PANVK_BIFROST_DESC_TABLE_COUNT; i++)
copy_count += desc_info->others[i].count;
if (copy_count == 0)
return;
copy_table = calloc(copy_count, sizeof(*copy_table));
assert(copy_table);
desc_info->dyn_ubos.map = copy_table;
copy_table += desc_info->dyn_ubos.count;
desc_info->dyn_ubos.count = 0;
desc_info->dyn_ssbos.map = copy_table;
copy_table += desc_info->dyn_ssbos.count;
desc_info->dyn_ssbos.count = 0;
for (uint32_t i = 0; i < PANVK_BIFROST_DESC_TABLE_COUNT; i++) {
desc_info->others[i].map = copy_table;
copy_table += desc_info->others[i].count;
desc_info->others[i].count = 0;
}
hash_table_foreach(ctx->ht, he) {
/* We use the upper binding bit to encode the subdesc index. */
uint32_t desc_count = (uintptr_t)he->data;
struct desc_id src = key_to_desc_id(he->key);
/* Until now, we were just using the hash table to track descriptors
* count, but after that point, it's a <set,binding> -> <table_index>
* map. */
he->data = fill_copy_descs_for_binding(ctx, src.set, src.binding,
src.subdesc, desc_count);
}
}
/* TODO: Texture instructions support bindless through DTSEL_IMM(63),
* which would save us copies of the texture/sampler descriptors. */
static bool
collect_tex_desc_access(nir_builder *b, nir_tex_instr *tex,
struct lower_desc_ctx *ctx)
{
bool recorded = false;
int sampler_src_idx =
nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
if (sampler_src_idx >= 0) {
nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
uint32_t set, binding, index_imm, max_idx;
nir_def *index_ssa;
get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa,
&max_idx);
record_binding(ctx, set, binding, VK_DESCRIPTOR_TYPE_SAMPLER, max_idx);
recorded = true;
}
int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
if (tex_src_idx >= 0) {
nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
uint32_t set, binding, index_imm, max_idx;
nir_def *index_ssa;
get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa,
&max_idx);
record_binding(ctx, set, binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
max_idx);
recorded = true;
}
return recorded;
}
static bool
collect_intr_desc_access(nir_builder *b, nir_intrinsic_instr *intrin,
struct lower_desc_ctx *ctx)
{
switch (intrin->intrinsic) {
case nir_intrinsic_vulkan_resource_index: {
unsigned set, binding;
set = nir_intrinsic_desc_set(intrin);
binding = nir_intrinsic_binding(intrin);
/* TODO: walk the reindex chain from load_vulkan_descriptor() to try and
* guess the max index. */
record_binding(ctx, set, binding, ~0, UINT32_MAX);
return true;
}
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_atomic:
case nir_intrinsic_image_deref_atomic_swap:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_samples: {
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
unsigned set, binding, index_imm, max_idx;
nir_def *index_ssa;
get_resource_deref_binding(deref, &set, &binding, &index_imm, &index_ssa,
&max_idx);
record_binding(ctx, set, binding, ~0, max_idx);
return true;
}
default:
return false;
}
}
static bool
collect_instr_desc_access(nir_builder *b, nir_instr *instr, void *data)
{
struct lower_desc_ctx *ctx = data;
switch (instr->type) {
case nir_instr_type_tex:
return collect_tex_desc_access(b, nir_instr_as_tex(instr), ctx);
case nir_instr_type_intrinsic:
return collect_intr_desc_access(b, nir_instr_as_intrinsic(instr), ctx);
default:
return false;
}
}
bool
panvk_per_arch(nir_lower_descriptors)(nir_shader *nir, struct panvk_device *dev,
const struct panvk_pipeline_layout *layout,
bool *has_img_access_out)
const struct vk_pipeline_layout *layout,
struct panvk_shader_desc_info *desc_info)
{
struct apply_descriptors_ctx ctx = {
.layout = layout,
.desc_addr_format = nir_address_format_32bit_index_offset,
struct lower_desc_ctx ctx = {
.desc_info = desc_info,
.ubo_addr_format = nir_address_format_32bit_index_offset,
.ssbo_addr_format = dev->vk.enabled_features.robustBufferAccess
? nir_address_format_64bit_bounded_global
: nir_address_format_64bit_global_32bit_offset,
};
bool progress = nir_shader_instructions_pass(
nir, lower_descriptors_instr,
nir_metadata_block_index | nir_metadata_dominance, (void *)&ctx);
if (has_img_access_out)
*has_img_access_out = ctx.has_img_access;
ctx.ht = _mesa_hash_table_create_u32_keys(NULL);
assert(ctx.ht);
_mesa_hash_table_set_deleted_key(ctx.ht, DELETED_KEY);
for (uint32_t i = 0; i < layout->set_count; i++) {
ctx.set_layouts[i] =
to_panvk_descriptor_set_layout(layout->set_layouts[i]);
}
bool progress = nir_shader_instructions_pass(nir, collect_instr_desc_access,
nir_metadata_all, &ctx);
if (!progress)
goto out;
create_copy_table(&ctx);
progress = nir_shader_instructions_pass(
nir, lower_descriptors_instr,
nir_metadata_block_index | nir_metadata_dominance, &ctx);
out:
_mesa_hash_table_destroy(ctx.ht, NULL);
return progress;
}

View file

@ -30,7 +30,6 @@
#include "panvk_device.h"
#include "panvk_entrypoints.h"
#include "panvk_pipeline.h"
#include "panvk_pipeline_layout.h"
#include "panvk_priv_bo.h"
#include "panvk_shader.h"
@ -44,6 +43,7 @@
#include "vk_blend.h"
#include "vk_format.h"
#include "vk_pipeline_cache.h"
#include "vk_pipeline_layout.h"
#include "vk_render_pass.h"
#include "vk_util.h"
@ -76,7 +76,32 @@ init_pipeline_shader(struct panvk_pipeline *pipeline,
}
pshader->info = shader->info;
pshader->has_img_access = shader->has_img_access;
pshader->desc_info.used_set_mask = shader->desc_info.used_set_mask;
uint32_t copy_count = 0;
for (uint32_t i = 0; i < ARRAY_SIZE(shader->desc_info.others); i++) {
pshader->desc_info.others.count[i] = shader->desc_info.others[i].count;
copy_count += shader->desc_info.others[i].count;
}
if (copy_count) {
pshader->desc_info.others.map = pan_pool_upload_aligned(
&pipeline->desc_pool.base, shader->desc_info.others[0].map,
copy_count * sizeof(uint32_t), sizeof(uint32_t));
}
assert(shader->desc_info.dyn_ubos.count <
ARRAY_SIZE(pshader->desc_info.dyn_ubos.map));
pshader->desc_info.dyn_ubos.count = shader->desc_info.dyn_ubos.count;
memcpy(pshader->desc_info.dyn_ubos.map, shader->desc_info.dyn_ubos.map,
shader->desc_info.dyn_ubos.count *
sizeof(*pshader->desc_info.dyn_ubos.map));
assert(shader->desc_info.dyn_ssbos.count <
ARRAY_SIZE(pshader->desc_info.dyn_ssbos.map));
pshader->desc_info.dyn_ssbos.count = shader->desc_info.dyn_ssbos.count;
memcpy(pshader->desc_info.dyn_ssbos.map, shader->desc_info.dyn_ssbos.map,
shader->desc_info.dyn_ssbos.count *
sizeof(*pshader->desc_info.dyn_ssbos.map));
if (stage_info->stage == VK_SHADER_STAGE_COMPUTE_BIT) {
struct panvk_compute_pipeline *compute_pipeline =
@ -303,7 +328,7 @@ panvk_graphics_pipeline_create(struct panvk_device *dev,
const VkAllocationCallbacks *alloc,
struct panvk_pipeline **out)
{
VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout);
VK_FROM_HANDLE(vk_pipeline_layout, layout, create_info->layout);
struct vk_graphics_pipeline_all_state all;
struct vk_graphics_pipeline_state state = {};
VkResult result;
@ -401,7 +426,7 @@ panvk_compute_pipeline_create(struct panvk_device *dev,
const VkAllocationCallbacks *alloc,
struct panvk_pipeline **out)
{
VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout);
VK_FROM_HANDLE(vk_pipeline_layout, layout, create_info->layout);
struct panvk_compute_pipeline *compute_pipeline = vk_object_zalloc(
&dev->vk, alloc, sizeof(*compute_pipeline), VK_OBJECT_TYPE_PIPELINE);

View file

@ -1,165 +0,0 @@
/*
* Copyright © 2021 Collabora Ltd.
*
* SPDX-License-Identifier: MIT
*/
#include "genxml/gen_macros.h"
#include "vk_log.h"
#include "panvk_descriptor_set.h"
#include "panvk_device.h"
#include "panvk_entrypoints.h"
#include "panvk_macros.h"
#include "panvk_pipeline_layout.h"
#include "panvk_sampler.h"
#include "panvk_shader.h"
#include "util/mesa-sha1.h"
/*
* Pipeline layouts. These have nothing to do with the pipeline. They are
* just multiple descriptor set layouts pasted together.
*/
VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(CreatePipelineLayout)(
VkDevice _device, const VkPipelineLayoutCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout)
{
VK_FROM_HANDLE(panvk_device, device, _device);
struct panvk_pipeline_layout *layout;
struct mesa_sha1 ctx;
layout =
vk_pipeline_layout_zalloc(&device->vk, sizeof(*layout), pCreateInfo);
if (layout == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
_mesa_sha1_init(&ctx);
unsigned sampler_idx = 0, tex_idx = 0, ubo_idx = 0;
unsigned dyn_ubo_idx = 0, dyn_ssbo_idx = 0, img_idx = 0;
unsigned dyn_desc_ubo_offset = 0;
for (unsigned set = 0; set < pCreateInfo->setLayoutCount; set++) {
const struct panvk_descriptor_set_layout *set_layout =
vk_to_panvk_descriptor_set_layout(layout->vk.set_layouts[set]);
layout->sets[set].sampler_offset = sampler_idx;
layout->sets[set].tex_offset = tex_idx;
layout->sets[set].ubo_offset = ubo_idx;
layout->sets[set].dyn_ubo_offset = dyn_ubo_idx;
layout->sets[set].dyn_ssbo_offset = dyn_ssbo_idx;
layout->sets[set].img_offset = img_idx;
layout->sets[set].dyn_desc_ubo_offset = dyn_desc_ubo_offset;
sampler_idx += set_layout->num_samplers;
tex_idx += set_layout->num_textures;
ubo_idx += set_layout->num_ubos;
dyn_ubo_idx += set_layout->num_dyn_ubos;
dyn_ssbo_idx += set_layout->num_dyn_ssbos;
img_idx += set_layout->num_imgs;
dyn_desc_ubo_offset +=
set_layout->num_dyn_ssbos * sizeof(struct panvk_ssbo_addr);
for (unsigned b = 0; b < set_layout->binding_count; b++) {
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set_layout->bindings[b];
if (binding_layout->immutable_samplers) {
for (unsigned s = 0; s < binding_layout->array_size; s++) {
struct panvk_sampler *sampler =
binding_layout->immutable_samplers[s];
_mesa_sha1_update(&ctx, &sampler->desc, sizeof(sampler->desc));
}
}
_mesa_sha1_update(&ctx, &binding_layout->type,
sizeof(binding_layout->type));
_mesa_sha1_update(&ctx, &binding_layout->array_size,
sizeof(binding_layout->array_size));
_mesa_sha1_update(&ctx, &binding_layout->shader_stages,
sizeof(binding_layout->shader_stages));
}
}
for (unsigned range = 0; range < pCreateInfo->pushConstantRangeCount;
range++) {
layout->push_constants.size =
MAX2(pCreateInfo->pPushConstantRanges[range].offset +
pCreateInfo->pPushConstantRanges[range].size,
layout->push_constants.size);
}
layout->num_samplers = sampler_idx;
layout->num_textures = tex_idx;
layout->num_ubos = ubo_idx;
layout->num_dyn_ubos = dyn_ubo_idx;
layout->num_dyn_ssbos = dyn_ssbo_idx;
layout->num_imgs = img_idx;
/* Some NIR texture operations don't require a sampler, but Bifrost/Midgard
* ones always expect one. Add a dummy sampler to deal with this limitation.
*/
if (layout->num_textures) {
layout->num_samplers++;
for (unsigned set = 0; set < pCreateInfo->setLayoutCount; set++)
layout->sets[set].sampler_offset++;
}
_mesa_sha1_final(&ctx, layout->sha1);
*pPipelineLayout = panvk_pipeline_layout_to_handle(layout);
return VK_SUCCESS;
}
unsigned
panvk_per_arch(pipeline_layout_ubo_start)(
const struct panvk_pipeline_layout *layout, unsigned set, bool is_dynamic)
{
if (is_dynamic)
return layout->num_ubos + layout->sets[set].dyn_ubo_offset;
return layout->sets[set].ubo_offset;
}
unsigned
panvk_per_arch(pipeline_layout_ubo_index)(
const struct panvk_pipeline_layout *layout, unsigned set, unsigned binding,
unsigned array_index)
{
const struct panvk_descriptor_set_layout *set_layout =
vk_to_panvk_descriptor_set_layout(layout->vk.set_layouts[set]);
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set_layout->bindings[binding];
const bool is_dynamic =
binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
const uint32_t ubo_idx =
is_dynamic ? binding_layout->dyn_ubo_idx : binding_layout->ubo_idx;
return panvk_per_arch(pipeline_layout_ubo_start)(layout, set, is_dynamic) +
ubo_idx + array_index;
}
unsigned
panvk_per_arch(pipeline_layout_dyn_desc_ubo_index)(
const struct panvk_pipeline_layout *layout)
{
return layout->num_ubos + layout->num_dyn_ubos;
}
unsigned
panvk_per_arch(pipeline_layout_total_ubo_count)(
const struct panvk_pipeline_layout *layout)
{
return layout->num_ubos + layout->num_dyn_ubos +
(layout->num_dyn_ssbos ? 1 : 0);
}
unsigned
panvk_per_arch(pipeline_layout_dyn_ubos_offset)(
const struct panvk_pipeline_layout *layout)
{
return layout->num_ubos;
}

View file

@ -93,11 +93,17 @@ struct panvk_cmd_graphics_state {
struct {
mali_ptr rsd;
#if PAN_ARCH <= 7
struct panvk_shader_desc_state desc;
#endif
} fs;
struct {
mali_ptr attribs;
mali_ptr attrib_bufs;
#if PAN_ARCH <= 7
struct panvk_shader_desc_state desc;
#endif
} vs;
struct {
@ -141,6 +147,11 @@ struct panvk_cmd_compute_state {
const struct panvk_compute_pipeline *pipeline;
struct panvk_compute_sysvals sysvals;
mali_ptr push_uniforms;
#if PAN_ARCH <= 7
struct {
struct panvk_shader_desc_state desc;
} cs;
#endif
};
struct panvk_cmd_buffer {

View file

@ -38,7 +38,6 @@
#include "panvk_instance.h"
#include "panvk_physical_device.h"
#include "panvk_pipeline.h"
#include "panvk_pipeline_layout.h"
#include "panvk_priv_bo.h"
#include "pan_blitter.h"
@ -298,6 +297,10 @@ panvk_reset_cmdbuf(struct vk_command_buffer *vk_cmdbuf,
panvk_per_arch(cmd_desc_state_reset)(&cmdbuf->state.gfx.desc_state,
&cmdbuf->state.compute.desc_state);
memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc));
memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc));
memset(&cmdbuf->state.compute.cs.desc, 0,
sizeof(cmdbuf->state.compute.cs.desc));
}
static void
@ -401,6 +404,15 @@ panvk_per_arch(CmdBindDescriptorSets)(
panvk_per_arch(cmd_desc_state_bind_sets)(
desc_state, layout, firstSet, descriptorSetCount, pDescriptorSets,
dynamicOffsetCount, pDynamicOffsets);
/* TODO: Invalidate only if the shader tables are disturbed */
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc));
memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc));
} else {
memset(&cmdbuf->state.compute.cs.desc, 0,
sizeof(cmdbuf->state.compute.cs.desc));
}
}
VKAPI_ATTR void VKAPI_CALL
@ -439,12 +451,16 @@ panvk_per_arch(CmdBindPipeline)(VkCommandBuffer commandBuffer,
cmdbuf->state.gfx.fs.rsd = 0;
cmdbuf->state.gfx.pipeline = gfx_pipeline;
memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc));
memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc));
break;
}
case VK_PIPELINE_BIND_POINT_COMPUTE:
cmdbuf->state.compute.pipeline =
panvk_pipeline_to_compute_pipeline(pipeline);
memset(&cmdbuf->state.compute.cs.desc, 0,
sizeof(cmdbuf->state.compute.cs.desc));
break;
default:
@ -460,18 +476,33 @@ panvk_per_arch(CmdPushDescriptorSetKHR)(
const VkWriteDescriptorSet *pDescriptorWrites)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_pipeline_layout, playout, layout);
VK_FROM_HANDLE(vk_pipeline_layout, playout, layout);
const struct panvk_descriptor_set_layout *set_layout =
vk_to_panvk_descriptor_set_layout(playout->vk.set_layouts[set]);
to_panvk_descriptor_set_layout(playout->set_layouts[set]);
struct panvk_descriptor_state *desc_state =
panvk_cmd_get_desc_state(cmdbuf, pipelineBindPoint);
struct panvk_push_descriptor_set *push_set =
struct panvk_descriptor_set *push_set =
panvk_per_arch(cmd_push_descriptors)(&cmdbuf->vk, desc_state, set);
if (!push_set)
return;
panvk_per_arch(push_descriptor_set)(push_set, set_layout,
descriptorWriteCount, pDescriptorWrites);
push_set->layout = set_layout;
push_set->desc_count = set_layout->desc_count;
for (uint32_t i = 0; i < descriptorWriteCount; i++)
panvk_per_arch(descriptor_set_write)(push_set, &pDescriptorWrites[i],
true);
push_set->descs.dev = 0;
push_set->layout = NULL;
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc));
memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc));
} else {
memset(&cmdbuf->state.compute.cs.desc, 0,
sizeof(cmdbuf->state.compute.cs.desc));
}
}
VKAPI_ATTR void VKAPI_CALL
@ -483,16 +514,30 @@ panvk_per_arch(CmdPushDescriptorSetWithTemplateKHR)(
VK_FROM_HANDLE(vk_descriptor_update_template, template,
descriptorUpdateTemplate);
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_pipeline_layout, playout, layout);
VK_FROM_HANDLE(vk_pipeline_layout, playout, layout);
const struct panvk_descriptor_set_layout *set_layout =
vk_to_panvk_descriptor_set_layout(playout->vk.set_layouts[set]);
to_panvk_descriptor_set_layout(playout->set_layouts[set]);
struct panvk_descriptor_state *desc_state =
panvk_cmd_get_desc_state(cmdbuf, template->bind_point);
struct panvk_push_descriptor_set *push_set =
struct panvk_descriptor_set *push_set =
panvk_per_arch(cmd_push_descriptors)(&cmdbuf->vk, desc_state, set);
if (!push_set)
return;
panvk_per_arch(push_descriptor_set_with_template)(
push_set, set_layout, descriptorUpdateTemplate, pData);
push_set->layout = set_layout;
push_set->desc_count = set_layout->desc_count;
panvk_per_arch(descriptor_set_write_template)(push_set, template, pData,
true);
push_set->descs.dev = 0;
push_set->layout = NULL;
if (template->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc));
memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc));
} else {
memset(&cmdbuf->state.compute.cs.desc, 0,
sizeof(cmdbuf->state.compute.cs.desc));
}
}

View file

@ -27,13 +27,8 @@
struct panvk_dispatch_info {
struct pan_compute_dim wg_count;
mali_ptr attributes;
mali_ptr attribute_bufs;
mali_ptr tsd;
mali_ptr ubos;
mali_ptr push_uniforms;
mali_ptr textures;
mali_ptr samplers;
};
VKAPI_ATTR void VKAPI_CALL
@ -53,12 +48,16 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
struct panvk_descriptor_state *desc_state =
&cmdbuf->state.compute.desc_state;
struct panvk_shader_desc_state *cs_desc_state =
&cmdbuf->state.compute.cs.desc;
const struct panvk_compute_pipeline *pipeline =
cmdbuf->state.compute.pipeline;
struct pan_pool *desc_pool_base = &cmdbuf->desc_pool.base;
struct panfrost_ptr job =
pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
util_dynarray_append(&batch->jobs, void *, job.cpu);
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
dispatch.tsd = batch->tls.gpu;
panvk_per_arch(cmd_prepare_push_descs)(&cmdbuf->desc_pool.base, desc_state,
&pipeline->base);
struct panvk_compute_sysvals *sysvals = &cmdbuf->state.compute.sysvals;
sysvals->num_work_groups.x = x;
@ -67,38 +66,38 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
sysvals->local_group_size.x = pipeline->local_size.x;
sysvals->local_group_size.y = pipeline->local_size.y;
sysvals->local_group_size.z = pipeline->local_size.z;
panvk_per_arch(cmd_prepare_dyn_ssbos)(&cmdbuf->desc_pool.base, desc_state,
&pipeline->cs, cs_desc_state);
sysvals->desc.dyn_ssbos = cs_desc_state->dyn_ssbos;
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
dispatch.tsd = batch->tls.gpu;
for (uint32_t i = 0; i < MAX_SETS; i++) {
if (pipeline->cs.desc_info.used_set_mask & BITFIELD_BIT(i))
sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev;
}
panvk_per_arch(cmd_prepare_push_sets)(desc_pool_base, desc_state,
&pipeline->base);
if (pipeline->cs.has_img_access)
panvk_per_arch(prepare_img_attribs)(desc_pool_base, desc_state,
&pipeline->base);
dispatch.attributes = desc_state->img.attribs;
dispatch.attribute_bufs = desc_state->img.attrib_bufs;
panvk_per_arch(cmd_prepare_ubos)(desc_pool_base, desc_state,
&pipeline->base);
dispatch.ubos = desc_state->ubos;
cmdbuf->state.compute.push_uniforms = 0;
if (!cmdbuf->state.compute.push_uniforms) {
cmdbuf->state.compute.push_uniforms = panvk_cmd_prepare_push_uniforms(
desc_pool_base, &cmdbuf->state.push_constants,
&cmdbuf->desc_pool.base, &cmdbuf->state.push_constants,
&cmdbuf->state.compute.sysvals, sizeof(cmdbuf->state.compute.sysvals));
}
dispatch.push_uniforms = cmdbuf->state.compute.push_uniforms;
panvk_per_arch(cmd_prepare_textures)(desc_pool_base, desc_state,
&pipeline->base);
dispatch.textures = desc_state->textures;
panvk_per_arch(cmd_prepare_shader_desc_tables)(
&cmdbuf->desc_pool.base, desc_state, &pipeline->cs, cs_desc_state);
panvk_per_arch(cmd_prepare_samplers)(desc_pool_base, desc_state,
&pipeline->base);
dispatch.samplers = desc_state->samplers;
struct panfrost_ptr copy_desc_job = panvk_per_arch(meta_get_copy_desc_job)(
dev, &cmdbuf->desc_pool.base, &pipeline->cs,
&cmdbuf->state.compute.desc_state, cs_desc_state);
if (copy_desc_job.cpu)
util_dynarray_append(&batch->jobs, void *, copy_desc_job.cpu);
struct panfrost_ptr job =
pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
util_dynarray_append(&batch->jobs, void *, job.cpu);
panfrost_pack_work_groups_compute(
pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), dispatch.wg_count.x,
@ -113,17 +112,24 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
cfg.state = pipeline->cs.rsd;
cfg.attributes = dispatch.attributes;
cfg.attribute_buffers = dispatch.attribute_bufs;
cfg.attributes = cs_desc_state->img_attrib_table;
cfg.attribute_buffers =
cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG];
cfg.thread_storage = dispatch.tsd;
cfg.uniform_buffers = dispatch.ubos;
cfg.uniform_buffers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
cfg.push_uniforms = dispatch.push_uniforms;
cfg.textures = dispatch.textures;
cfg.samplers = dispatch.samplers;
cfg.textures = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
cfg.samplers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
}
pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0, &job,
false);
unsigned copy_desc_dep =
copy_desc_job.gpu
? pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
&copy_desc_job, false)
: 0;
pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0,
copy_desc_dep, &job, false);
batch->tlsinfo.tls.size = pipeline->cs.info.tls_size;
batch->tlsinfo.wls.size = pipeline->cs.info.wls_size;
@ -137,7 +143,6 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
}
panvk_per_arch(cmd_close_batch)(cmdbuf);
panvk_per_arch(cmd_unprepare_push_sets)(desc_state);
}
VKAPI_ATTR void VKAPI_CALL

View file

@ -20,6 +20,7 @@
#include "panvk_image_view.h"
#include "panvk_instance.h"
#include "panvk_pipeline.h"
#include "panvk_priv_bo.h"
#include "panvk_shader.h"
#include "pan_desc.h"
@ -32,6 +33,7 @@
#include "pan_shader.h"
#include "vk_format.h"
#include "vk_pipeline_layout.h"
struct panvk_draw_info {
unsigned first_index;
@ -54,14 +56,9 @@ struct panvk_draw_info {
struct {
mali_ptr rsd;
mali_ptr varyings;
mali_ptr attributes;
mali_ptr attribute_bufs;
} fs;
mali_ptr push_uniforms;
mali_ptr varying_bufs;
mali_ptr textures;
mali_ptr samplers;
mali_ptr ubos;
mali_ptr position;
mali_ptr indices;
union {
@ -73,7 +70,9 @@ struct panvk_draw_info {
const struct pan_tiler_context *tiler_ctx;
mali_ptr viewport;
struct {
struct panfrost_ptr vertex_copy_desc;
struct panfrost_ptr vertex;
struct panfrost_ptr frag_copy_desc;
struct panfrost_ptr tiler;
} jobs;
};
@ -90,6 +89,10 @@ static void
panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf,
struct panvk_draw_info *draw)
{
const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline;
struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals;
struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb;
@ -138,6 +141,21 @@ panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf,
sysvals->viewport.offset.z = viewport->minDepth;
cmdbuf->state.gfx.push_uniforms = 0;
}
panvk_per_arch(cmd_prepare_dyn_ssbos)(&cmdbuf->desc_pool.base, desc_state,
&pipeline->vs, vs_desc_state);
sysvals->desc.vs_dyn_ssbos = vs_desc_state->dyn_ssbos;
panvk_per_arch(cmd_prepare_dyn_ssbos)(&cmdbuf->desc_pool.base, desc_state,
&pipeline->fs, fs_desc_state);
sysvals->desc.fs_dyn_ssbos = fs_desc_state->dyn_ssbos;
for (uint32_t i = 0; i < MAX_SETS; i++) {
uint32_t used_set_mask = pipeline->vs.desc_info.used_set_mask |
pipeline->fs.desc_info.used_set_mask;
if (used_set_mask & BITFIELD_BIT(i))
sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev;
}
}
static bool
@ -626,12 +644,11 @@ static void
panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
struct panvk_draw_info *draw)
{
struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline;
const struct vk_vertex_input_state *vi =
cmdbuf->vk.dynamic_graphics_state.vi;
unsigned num_imgs =
pipeline->vs.has_img_access ? pipeline->base.layout->num_imgs : 0;
pipeline->vs.desc_info.others.count[PANVK_BIFROST_DESC_TABLE_IMG];
unsigned num_vs_attribs = util_last_bit(vi->attributes_valid);
unsigned num_vbs = util_last_bit(vi->bindings_valid);
unsigned attrib_count =
@ -639,7 +656,7 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
bool dirty =
is_dirty(cmdbuf, VI) || is_dirty(cmdbuf, VI_BINDINGS_VALID) ||
is_dirty(cmdbuf, VI_BINDING_STRIDES) ||
(num_imgs && !desc_state->img.attribs) ||
(num_imgs && !cmdbuf->state.gfx.vs.desc.img_attrib_table) ||
(cmdbuf->state.gfx.vb.count && !cmdbuf->state.gfx.vs.attrib_bufs) ||
(attrib_count && !cmdbuf->state.gfx.vs.attribs);
@ -675,50 +692,28 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
}
}
if (num_imgs) {
/* Image load/store are passed a fixed offset, so we can make vertex input
* dynamic. Images are always placed after all potential vertex
* attributes. Buffers are tightly packed since they don't interfere with
* the vertex shader.
*/
unsigned attribs_offset = MAX_VS_ATTRIBS * pan_size(ATTRIBUTE);
unsigned bufs_offset = num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2;
memset(attribs.cpu + num_vs_attribs * pan_size(ATTRIBUTE), 0,
(MAX_VS_ATTRIBS - num_vs_attribs) * pan_size(ATTRIBUTE));
panvk_per_arch(fill_img_attribs)(
desc_state, &pipeline->base, bufs.cpu + bufs_offset,
attribs.cpu + attribs_offset, num_vbs * 2);
desc_state->img.attrib_bufs = bufs.gpu + bufs_offset;
desc_state->img.attribs = attribs.gpu + attribs_offset;
}
/* A NULL entry is needed to stop prefecting on Bifrost */
memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0,
pan_size(ATTRIBUTE_BUFFER));
cmdbuf->state.gfx.vs.attrib_bufs = bufs.gpu;
cmdbuf->state.gfx.vs.attribs = attribs.gpu;
if (num_imgs) {
cmdbuf->state.gfx.vs.desc.img_attrib_table =
attribs.gpu + (MAX_VS_ATTRIBS * pan_size(ATTRIBUTE));
cmdbuf->state.gfx.vs.desc.tables[PANVK_BIFROST_DESC_TABLE_IMG] =
bufs.gpu + (num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2);
}
}
static void
panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf,
struct panvk_draw_info *draw)
{
struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline;
panvk_draw_prepare_vs_attribs(cmdbuf, draw);
draw->vs.attributes = cmdbuf->state.gfx.vs.attribs;
draw->vs.attribute_bufs = cmdbuf->state.gfx.vs.attrib_bufs;
if (pipeline->fs.has_img_access) {
struct pan_pool *desc_pool_base = &cmdbuf->desc_pool.base;
panvk_per_arch(prepare_img_attribs)(desc_pool_base, desc_state,
&pipeline->base);
draw->fs.attributes = desc_state->img.attribs;
draw->fs.attribute_bufs = desc_state->img.attrib_bufs;
}
}
void
@ -780,11 +775,25 @@ static void
panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf,
struct panvk_draw_info *draw)
{
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline;
struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
struct panvk_batch *batch = cmdbuf->cur_batch;
struct panfrost_ptr ptr =
pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
panvk_per_arch(cmd_prepare_shader_desc_tables)(&cmdbuf->desc_pool.base,
&cmdbuf->state.gfx.desc_state,
&pipeline->vs, vs_desc_state);
struct panfrost_ptr ptr = panvk_per_arch(meta_get_copy_desc_job)(
dev, &cmdbuf->desc_pool.base, &pipeline->vs,
&cmdbuf->state.gfx.desc_state, vs_desc_state);
if (ptr.cpu)
util_dynarray_append(&batch->jobs, void *, ptr.cpu);
draw->jobs.vertex_copy_desc = ptr;
ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
util_dynarray_append(&batch->jobs, void *, ptr.cpu);
draw->jobs.vertex = ptr;
@ -805,10 +814,10 @@ panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf,
cfg.offset_start = draw->offset_start;
cfg.instance_size =
draw->instance_count > 1 ? draw->padded_vertex_count : 1;
cfg.uniform_buffers = draw->ubos;
cfg.uniform_buffers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
cfg.push_uniforms = draw->push_uniforms;
cfg.textures = draw->textures;
cfg.samplers = draw->samplers;
cfg.textures = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
cfg.samplers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
}
}
@ -909,6 +918,7 @@ static void
panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_draw_info *draw, void *dcd)
{
struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
const struct vk_rasterization_state *rs =
&cmdbuf->vk.dynamic_graphics_state.rs;
const struct vk_input_assembly_state *ia =
@ -920,8 +930,9 @@ panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf,
cfg.cull_back_face = (rs->cull_mode & VK_CULL_MODE_BACK_BIT) != 0;
cfg.position = draw->position;
cfg.state = draw->fs.rsd;
cfg.attributes = draw->fs.attributes;
cfg.attribute_buffers = draw->fs.attribute_bufs;
cfg.attributes = fs_desc_state->img_attrib_table;
cfg.attribute_buffers =
fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG];
cfg.viewport = draw->viewport;
cfg.varyings = draw->fs.varyings;
cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
@ -938,10 +949,10 @@ panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf,
cfg.offset_start = draw->offset_start;
cfg.instance_size =
draw->instance_count > 1 ? draw->padded_vertex_count : 1;
cfg.uniform_buffers = draw->ubos;
cfg.uniform_buffers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
cfg.push_uniforms = draw->push_uniforms;
cfg.textures = draw->textures;
cfg.samplers = draw->samplers;
cfg.textures = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
cfg.samplers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
/* TODO: occlusion queries */
}
@ -951,14 +962,30 @@ static void
panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf,
struct panvk_draw_info *draw)
{
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
struct panvk_batch *batch = cmdbuf->cur_batch;
struct panfrost_ptr ptr =
pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB);
/* If the vertex job doesn't write the position, we don't need a tiler job. */
if (!draw->position)
return;
const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline;
struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
panvk_per_arch(cmd_prepare_shader_desc_tables)(&cmdbuf->desc_pool.base,
&cmdbuf->state.gfx.desc_state,
&pipeline->fs, fs_desc_state);
struct panfrost_ptr ptr = panvk_per_arch(meta_get_copy_desc_job)(
dev, &cmdbuf->desc_pool.base, &pipeline->fs,
&cmdbuf->state.gfx.desc_state, fs_desc_state);
if (ptr.cpu)
util_dynarray_append(&batch->jobs, void *, ptr.cpu);
draw->jobs.frag_copy_desc = ptr;
ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB);
util_dynarray_append(&batch->jobs, void *, ptr.cpu);
draw->jobs.tiler = ptr;
@ -1015,10 +1042,11 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
const struct vk_rasterization_state *rs =
&cmdbuf->vk.dynamic_graphics_state.rs;
/* There are only 16 bits in the descriptor for the job ID, make sure all
* the 3 (2 in Bifrost) jobs in this draw are in the same batch.
/* There are only 16 bits in the descriptor for the job ID. Each job has a
* pilot shader dealing with descriptor copies, and we need one
* <vertex,tiler> pair per draw.
*/
if (batch->jc.job_index >= (UINT16_MAX - 3)) {
if (batch->jc.job_index >= (UINT16_MAX - 4)) {
panvk_per_arch(cmd_close_batch)(cmdbuf);
panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
batch = panvk_per_arch(cmd_open_batch)(cmdbuf);
@ -1029,32 +1057,20 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
panvk_per_arch(cmd_prepare_push_descs)(&cmdbuf->desc_pool.base, desc_state,
&pipeline->base);
panvk_cmd_prepare_draw_sysvals(cmdbuf, draw);
struct pan_pool *desc_pool_base = &cmdbuf->desc_pool.base;
panvk_per_arch(cmd_prepare_push_sets)(desc_pool_base, desc_state,
&pipeline->base);
if (!cmdbuf->state.gfx.push_uniforms) {
cmdbuf->state.gfx.push_uniforms = panvk_cmd_prepare_push_uniforms(
desc_pool_base, &cmdbuf->state.push_constants,
&cmdbuf->desc_pool.base, &cmdbuf->state.push_constants,
&cmdbuf->state.gfx.sysvals, sizeof(cmdbuf->state.gfx.sysvals));
}
panvk_per_arch(cmd_prepare_ubos)(desc_pool_base, desc_state,
&pipeline->base);
panvk_per_arch(cmd_prepare_textures)(desc_pool_base, desc_state,
&pipeline->base);
panvk_per_arch(cmd_prepare_samplers)(desc_pool_base, desc_state,
&pipeline->base);
/* TODO: indexed draws */
draw->tls = batch->tls.gpu;
draw->fb = batch->fb.desc.gpu;
draw->ubos = desc_state->ubos;
draw->push_uniforms = cmdbuf->state.gfx.push_uniforms;
draw->textures = desc_state->textures;
draw->samplers = desc_state->samplers;
panfrost_pack_work_groups_compute(&draw->invocation, 1, draw->vertex_range,
draw->instance_count, 1, 1, 1, true,
@ -1071,17 +1087,31 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
MAX3(pipeline->vs.info.tls_size, pipeline->fs.info.tls_size,
batch->tlsinfo.tls.size);
unsigned vjob_id = pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_VERTEX, false,
false, 0, 0, &draw->jobs.vertex, false);
unsigned copy_desc_job_id =
draw->jobs.vertex_copy_desc.gpu
? pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
&draw->jobs.vertex_copy_desc, false)
: 0;
unsigned vjob_id =
pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_VERTEX, false, false, 0,
copy_desc_job_id, &draw->jobs.vertex, false);
if (!rs->rasterizer_discard_enable && draw->position) {
/* We don't need to add frag_copy_desc as a dependency, because the
* tiler job doesn't execute the fragment shader. The fragment job
* will, and the tiler/fragment synchronization happens at the batch
* level. */
if (draw->jobs.frag_copy_desc.gpu)
pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
&draw->jobs.frag_copy_desc, false);
pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_TILER, false, false, vjob_id, 0,
&draw->jobs.tiler, false);
}
/* Clear the dirty flags all at once */
cmdbuf->state.gfx.dirty = 0;
panvk_per_arch(cmd_unprepare_push_sets)(desc_state);
}
VKAPI_ATTR void VKAPI_CALL

View file

@ -57,6 +57,7 @@ panvk_per_arch(meta_init)(struct panvk_device *dev)
panvk_per_arch(meta_blit_init)(dev);
panvk_per_arch(meta_copy_init)(dev);
panvk_per_arch(meta_clear_init)(dev);
panvk_per_arch(meta_desc_copy_init)(dev);
}
void

View file

@ -57,19 +57,14 @@ bifrost_archs = [6, 7]
bifrost_inc_dir = ['bifrost']
bifrost_files = [
'bifrost/panvk_vX_cmd_desc_state.c',
'bifrost/panvk_vX_descriptor_set.c',
'bifrost/panvk_vX_descriptor_set_layout.c',
'bifrost/panvk_vX_meta_desc_copy.c',
'bifrost/panvk_vX_nir_lower_descriptors.c',
'bifrost/panvk_vX_pipeline.c',
'bifrost/panvk_vX_pipeline_layout.c',
]
valhall_archs = [9, 10]
valhall_inc_dir = ['valhall']
valhall_files = [
'valhall/panvk_vX_descriptor_set.c',
'valhall/panvk_vX_descriptor_set_layout.c',
]
valhall_files = []
jm_archs = [6, 7]
jm_inc_dir = ['jm']
@ -89,6 +84,8 @@ common_per_arch_files = [
panvk_entrypoints[0],
'panvk_vX_blend.c',
'panvk_vX_buffer_view.c',
'panvk_vX_descriptor_set.c',
'panvk_vX_descriptor_set_layout.c',
'panvk_vX_device.c',
'panvk_vX_image_view.c',
'panvk_vX_sampler.c',
@ -101,6 +98,8 @@ foreach arch : [6, 7, 9, 10]
# Just add the v9/v10 entrypoints for now.
per_arch_files = [
panvk_entrypoints[0],
'panvk_vX_descriptor_set.c',
'panvk_vX_descriptor_set_layout.c',
]
else
per_arch_files = common_per_arch_files

View file

@ -39,6 +39,11 @@ struct panvk_device {
struct panvk_blend_shader_cache blend_shader_cache;
struct panvk_meta meta;
struct {
struct panvk_priv_bo *shader_bo;
struct panvk_priv_bo *rsd_bo;
} desc_copy;
struct vk_device_dispatch_table cmd_dispatch;
struct panvk_queue *queues[PANVK_MAX_QUEUE_FAMILIES];

View file

@ -68,9 +68,26 @@ struct panvk_meta {
mali_ptr rsd;
} fillbuf;
} copy;
struct {
mali_ptr rsd;
} desc_copy;
};
#if PAN_ARCH
#if PAN_ARCH <= 7
struct panvk_descriptor_state;
struct panvk_pipeline_shader;
struct panvk_shader_desc_state;
struct panfrost_ptr panvk_per_arch(meta_get_copy_desc_job)(
struct panvk_device *dev, struct pan_pool *desc_pool,
const struct panvk_pipeline_shader *shader,
const struct panvk_descriptor_state *desc_state,
const struct panvk_shader_desc_state *shader_desc_state);
#endif
void panvk_per_arch(meta_init)(struct panvk_device *dev);
void panvk_per_arch(meta_cleanup)(struct panvk_device *dev);
@ -86,6 +103,8 @@ void panvk_per_arch(meta_blit_init)(struct panvk_device *dev);
void panvk_per_arch(meta_blit_cleanup)(struct panvk_device *dev);
void panvk_per_arch(meta_copy_init)(struct panvk_device *dev);
void panvk_per_arch(meta_desc_copy_init)(struct panvk_device *dev);
#endif
#endif

View file

@ -290,7 +290,6 @@ get_device_properties(const struct panvk_physical_device *device,
*/
.maxBoundDescriptorSets = 4,
/* MALI_RENDERER_STATE::sampler_count is 16-bit. */
.maxPerStageDescriptorSamplers = UINT16_MAX,
.maxDescriptorSetSamplers = UINT16_MAX,
/* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots
* for our internal UBOs.
@ -304,25 +303,31 @@ get_device_properties(const struct panvk_physical_device *device,
* a minus(1) modifier, which gives a maximum of 2^12 SSBO
* descriptors.
*/
.maxPerStageDescriptorStorageBuffers = 1 << 12,
.maxDescriptorSetStorageBuffers = 1 << 12,
/* MALI_RENDERER_STATE::sampler_count is 16-bit. */
.maxPerStageDescriptorSampledImages = UINT16_MAX,
.maxDescriptorSetSampledImages = UINT16_MAX,
/* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two
* MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images.
*/
.maxPerStageDescriptorStorageImages = 1 << 8,
.maxDescriptorSetStorageImages = 1 << 8,
/* A maximum of 8 color render targets, and one depth-stencil render
* target.
*/
.maxPerStageDescriptorInputAttachments = 9,
.maxDescriptorSetInputAttachments = 9,
/* Could be the sum of all maxPerStageXxx values, but we limit ourselves
* to 2^16 to make things simpler.
/* We could theoretically use the maxDescriptor values here (except for
* UBOs where we're really limited to 256 on the shader side), but on
* Bifrost we have to copy some tables around, which comes at an extra
* memory/processing cost, so let's pick something smaller.
*/
.maxPerStageResources = 1 << 16,
.maxPerStageDescriptorInputAttachments = 9,
.maxPerStageDescriptorSampledImages = 256,
.maxPerStageDescriptorSamplers = 128,
.maxPerStageDescriptorStorageBuffers = 64,
.maxPerStageDescriptorStorageImages = 32,
.maxPerStageDescriptorUniformBuffers = 64,
.maxPerStageResources = 9 + 256 + 128 + 64 + 32 + 64,
/* Software limits to keep VkCommandBuffer tracking sane. */
.maxDescriptorSetUniformBuffersDynamic = 16,
.maxDescriptorSetStorageBuffersDynamic = 8,

View file

@ -18,7 +18,8 @@
#include "panvk_descriptor_set.h"
#include "panvk_macros.h"
#include "panvk_pipeline_layout.h"
#include "vk_pipeline_layout.h"
#define MAX_VS_ATTRIBS 16
@ -51,6 +52,14 @@ struct panvk_graphics_sysvals {
uint32_t base_vertex;
uint32_t base_instance;
} vs;
#if PAN_ARCH <= 7
struct {
uint64_t sets[MAX_SETS];
uint64_t vs_dyn_ssbos;
uint64_t fs_dyn_ssbos;
} desc;
#endif
};
struct panvk_compute_sysvals {
@ -60,13 +69,58 @@ struct panvk_compute_sysvals {
struct {
uint32_t x, y, z;
} local_group_size;
#if PAN_ARCH <= 7
struct {
uint64_t sets[MAX_SETS];
uint64_t dyn_ssbos;
} desc;
#endif
};
enum panvk_bifrost_desc_table_type {
PANVK_BIFROST_DESC_TABLE_INVALID = -1,
/* UBO is encoded on 8 bytes */
PANVK_BIFROST_DESC_TABLE_UBO = 0,
/* Images are using a <3DAttributeBuffer,Attribute> pair, each
* of them being stored in a separate table. */
PANVK_BIFROST_DESC_TABLE_IMG,
/* Texture and sampler are encoded on 32 bytes */
PANVK_BIFROST_DESC_TABLE_TEXTURE,
PANVK_BIFROST_DESC_TABLE_SAMPLER,
PANVK_BIFROST_DESC_TABLE_COUNT,
};
#define COPY_DESC_HANDLE(table, idx) ((table << 28) | (idx))
#define COPY_DESC_HANDLE_EXTRACT_INDEX(handle) ((handle)&BITFIELD_MASK(28))
#define COPY_DESC_HANDLE_EXTRACT_TABLE(handle) ((handle) >> 28)
struct panvk_shader_desc_map {
/* The index of the map serves as the table offset, the value of the
* entry is a COPY_DESC_HANDLE() encoding the source set, and the
* index of the descriptor in the set. */
uint32_t *map;
/* Number of entries in the map array. */
uint32_t count;
};
struct panvk_shader_desc_info {
uint32_t used_set_mask;
struct panvk_shader_desc_map dyn_ubos;
struct panvk_shader_desc_map dyn_ssbos;
struct panvk_shader_desc_map others[PANVK_BIFROST_DESC_TABLE_COUNT];
};
struct panvk_shader {
struct pan_shader_info info;
struct util_dynarray binary;
struct pan_compute_dim local_size;
bool has_img_access;
struct panvk_shader_desc_info desc_info;
};
bool panvk_per_arch(blend_needs_lowering)(const struct panvk_device *dev,
@ -75,15 +129,15 @@ bool panvk_per_arch(blend_needs_lowering)(const struct panvk_device *dev,
struct panvk_shader *panvk_per_arch(shader_create)(
struct panvk_device *dev, const VkPipelineShaderStageCreateInfo *stage_info,
const struct panvk_pipeline_layout *layout,
const VkAllocationCallbacks *alloc);
const struct vk_pipeline_layout *layout, const VkAllocationCallbacks *alloc);
void panvk_per_arch(shader_destroy)(struct panvk_device *dev,
struct panvk_shader *shader,
const VkAllocationCallbacks *alloc);
bool panvk_per_arch(nir_lower_descriptors)(
struct nir_shader *nir, struct panvk_device *dev,
const struct panvk_pipeline_layout *layout, bool *has_img_access_out);
nir_shader *nir, struct panvk_device *dev,
const struct vk_pipeline_layout *layout,
struct panvk_shader_desc_info *shader_desc_info);
#endif

View file

@ -110,9 +110,20 @@ panvk_per_arch(CreateBufferView)(VkDevice _device,
unsigned blksz = vk_format_get_blocksize(pCreateInfo->format);
pan_pack(view->descs.img_attrib_buf[0].opaque, ATTRIBUTE_BUFFER, cfg) {
/* The format is the only thing we lack to emit attribute descriptors
* when copying from the set to the attribute tables. Instead of
* making the descriptor size to store an extra format, we pack
* the 22-bit format with the texel stride, which is expected to be
* fit in remaining 10 bits.
*/
uint32_t hw_fmt = GENX(panfrost_format_from_pipe_format)(pfmt)->hw;
assert(blksz < BITFIELD_MASK(10));
assert(hw_fmt < BITFIELD_MASK(22));
cfg.type = MALI_ATTRIBUTE_TYPE_3D_LINEAR;
cfg.pointer = address;
cfg.stride = blksz;
cfg.stride = blksz | (hw_fmt << 10);
cfg.size = view->vk.elements * blksz;
}

View file

@ -167,11 +167,24 @@ panvk_per_arch(CreateImageView)(VkDevice _device,
is_3d ? view->pview.first_layer : 0);
pan_pack(view->descs.img_attrib_buf[0].opaque, ATTRIBUTE_BUFFER, cfg) {
/* The format is the only thing we lack to emit attribute descriptors
* when copying from the set to the attribute tables. Instead of
* making the descriptor size to store an extra format, we pack
* the 22-bit format with the texel stride, which is expected to be
* fit in remaining 10 bits.
*/
uint32_t fmt_blksize = util_format_get_blocksize(view->pview.format);
uint32_t hw_fmt =
GENX(panfrost_format_from_pipe_format)(view->pview.format)->hw;
assert(fmt_blksize < BITFIELD_MASK(10));
assert(hw_fmt < BITFIELD_MASK(22));
cfg.type = image->pimage.layout.modifier == DRM_FORMAT_MOD_LINEAR
? MALI_ATTRIBUTE_TYPE_3D_LINEAR
: MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED;
cfg.pointer = image->pimage.data.base + offset;
cfg.stride = util_format_get_blocksize(view->pview.format);
cfg.stride = fmt_blksize | (hw_fmt << 10);
cfg.size = pan_kmod_bo_size(image->bo) - offset;
}

View file

@ -32,8 +32,6 @@
#include "panvk_device.h"
#include "panvk_instance.h"
#include "panvk_physical_device.h"
#include "panvk_pipeline.h"
#include "panvk_pipeline_layout.h"
#include "panvk_shader.h"
#include "spirv/nir_spirv.h"
@ -49,6 +47,7 @@
#include "pan_shader.h"
#include "vk_pipeline.h"
#include "vk_pipeline_layout.h"
#include "vk_util.h"
static nir_def *
@ -196,7 +195,7 @@ shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
struct panvk_shader *
panvk_per_arch(shader_create)(struct panvk_device *dev,
const VkPipelineShaderStageCreateInfo *stage_info,
const struct panvk_pipeline_layout *layout,
const struct vk_pipeline_layout *layout,
const VkAllocationCallbacks *alloc)
{
struct panvk_physical_device *phys_dev =
@ -289,7 +288,7 @@ panvk_per_arch(shader_create)(struct panvk_device *dev,
NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
NIR_PASS_V(nir, panvk_per_arch(nir_lower_descriptors), dev, layout,
&shader->has_img_access);
&shader->desc_info);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
nir_address_format_32bit_index_offset);
@ -365,9 +364,16 @@ panvk_per_arch(shader_create)(struct panvk_device *dev,
/* Patch the descriptor count */
shader->info.ubo_count =
panvk_per_arch(pipeline_layout_total_ubo_count)(layout);
shader->info.sampler_count = layout->num_samplers;
shader->info.texture_count = layout->num_textures;
shader->desc_info.others[PANVK_BIFROST_DESC_TABLE_UBO].count +
shader->desc_info.dyn_ubos.count;
shader->info.texture_count =
shader->desc_info.others[PANVK_BIFROST_DESC_TABLE_TEXTURE].count;
shader->info.sampler_count =
shader->desc_info.others[PANVK_BIFROST_DESC_TABLE_SAMPLER].count;
/* Dummy sampler. */
if (!shader->info.sampler_count && shader->info.texture_count)
shader->info.sampler_count++;
if (stage == MESA_SHADER_VERTEX) {
/* We leave holes in the attribute locations, but pan_shader.c assumes the
@ -384,9 +390,10 @@ panvk_per_arch(shader_create)(struct panvk_device *dev,
/* Image attributes start at MAX_VS_ATTRIBS in the VS attribute table,
* and zero in other stages.
*/
if (shader->has_img_access)
if (shader->desc_info.others[PANVK_BIFROST_DESC_TABLE_IMG].count > 0)
shader->info.attribute_count =
layout->num_imgs + (stage == MESA_SHADER_VERTEX ? MAX_VS_ATTRIBS : 0);
shader->desc_info.others[PANVK_BIFROST_DESC_TABLE_IMG].count +
(stage == MESA_SHADER_VERTEX ? MAX_VS_ATTRIBS : 0);
shader->local_size.x = nir->info.workgroup_size[0];
shader->local_size.y = nir->info.workgroup_size[1];
@ -403,6 +410,7 @@ panvk_per_arch(shader_destroy)(struct panvk_device *dev,
const VkAllocationCallbacks *alloc)
{
util_dynarray_fini(&shader->binary);
free(shader->desc_info.dyn_ubos.map);
vk_free2(&dev->vk.alloc, alloc, shader);
}