pvr, pco: dynamic buffer and immutable sampler support

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-02-04 15:25:49 +00:00 committed by Marge Bot
parent 58e437781b
commit 0019b5ccaf
8 changed files with 297 additions and 32 deletions

View file

@ -143,6 +143,7 @@ typedef struct _pco_binding_data {
/** PCO descriptor set data. */
typedef struct _pco_descriptor_set_data {
pco_range range; /** Descriptor location range. */
pco_range dynamic_range; /** Dynamic descriptor location range. */
unsigned binding_count; /** Number of bindings. */
pco_binding_data *bindings; /** Descriptor set bindings. */

View file

@ -720,8 +720,23 @@ static pco_instr *trans_load_buffer(trans_ctx *tctx,
pco_ref_null(),
.s = true);
pco_ref addr_comps_dyn_off[2];
pco_ref_new_ssa_addr_comps(tctx->func, addr_comps_dyn_off);
pco_ref dyn_off_reg = pco_ref_hwreg(sh_index, PCO_REG_CLASS_SHARED);
dyn_off_reg = pco_ref_offset(dyn_off_reg, 3);
pco_add64_32(&tctx->b,
addr_comps_dyn_off[0],
addr_comps_dyn_off[1],
addr_comps[0],
addr_comps[1],
dyn_off_reg,
pco_ref_null(),
.s = true);
pco_ref addr = pco_ref_new_ssa_addr(tctx->func);
pco_vec(&tctx->b, addr, ARRAY_SIZE(addr_comps), addr_comps);
pco_vec(&tctx->b, addr, ARRAY_SIZE(addr_comps_dyn_off), addr_comps_dyn_off);
return pco_ld(&tctx->b,
dest,
@ -770,22 +785,40 @@ static pco_instr *trans_store_buffer(trans_ctx *tctx,
pco_ref base_addr[2];
pco_ref_hwreg_addr_comps(sh_index, PCO_REG_CLASS_SHARED, base_addr);
pco_ref addr_data_comps[3] = {
[2] = data_src,
};
pco_ref_new_ssa_addr_comps(tctx->func, addr_data_comps);
pco_ref addr_comps[2];
pco_ref_new_ssa_addr_comps(tctx->func, addr_comps);
pco_add64_32(&tctx->b,
addr_data_comps[0],
addr_data_comps[1],
addr_comps[0],
addr_comps[1],
base_addr[0],
base_addr[1],
offset_src,
pco_ref_null(),
.s = true);
pco_ref addr_data_comps_dyn_off[3] = {
[2] = data_src,
};
pco_ref_new_ssa_addr_comps(tctx->func, addr_data_comps_dyn_off);
pco_ref dyn_off_reg = pco_ref_hwreg(sh_index, PCO_REG_CLASS_SHARED);
dyn_off_reg = pco_ref_offset(dyn_off_reg, 3);
pco_add64_32(&tctx->b,
addr_data_comps_dyn_off[0],
addr_data_comps_dyn_off[1],
addr_comps[0],
addr_comps[1],
dyn_off_reg,
pco_ref_null(),
.s = true);
pco_ref addr_data = pco_ref_new_ssa_addr_data(tctx->func, chans);
pco_vec(&tctx->b, addr_data, ARRAY_SIZE(addr_data_comps), addr_data_comps);
pco_vec(&tctx->b,
addr_data,
ARRAY_SIZE(addr_data_comps_dyn_off),
addr_data_comps_dyn_off);
pco_ref data_comp = pco_ref_new_ssa(tctx->func,
pco_ref_get_bits(data_src),
@ -866,22 +899,40 @@ static pco_instr *trans_atomic_buffer(trans_ctx *tctx,
pco_ref base_addr[2];
pco_ref_hwreg_addr_comps(sh_index, PCO_REG_CLASS_SHARED, base_addr);
pco_ref addr_data_comps[3] = {
[2] = data_src,
};
pco_ref_new_ssa_addr_comps(tctx->func, addr_data_comps);
pco_ref addr_comps[2];
pco_ref_new_ssa_addr_comps(tctx->func, addr_comps);
pco_add64_32(&tctx->b,
addr_data_comps[0],
addr_data_comps[1],
addr_comps[0],
addr_comps[1],
base_addr[0],
base_addr[1],
offset_src,
pco_ref_null(),
.s = true);
pco_ref addr_data_comps_dyn_off[3] = {
[2] = data_src,
};
pco_ref_new_ssa_addr_comps(tctx->func, addr_data_comps_dyn_off);
pco_ref dyn_off_reg = pco_ref_hwreg(sh_index, PCO_REG_CLASS_SHARED);
dyn_off_reg = pco_ref_offset(dyn_off_reg, 3);
pco_add64_32(&tctx->b,
addr_data_comps_dyn_off[0],
addr_data_comps_dyn_off[1],
addr_comps[0],
addr_comps[1],
dyn_off_reg,
pco_ref_null(),
.s = true);
pco_ref addr_data = pco_ref_new_ssa_addr_data(tctx->func, chans);
pco_vec(&tctx->b, addr_data, ARRAY_SIZE(addr_data_comps), addr_data_comps);
pco_vec(&tctx->b,
addr_data,
ARRAY_SIZE(addr_data_comps_dyn_off),
addr_data_comps_dyn_off);
switch (bits) {
case 32:

View file

@ -1075,6 +1075,9 @@ struct pvr_const_map_entry_special_buffer {
uint8_t buffer_type;
uint32_t buffer_index;
uint32_t size_in_dwords;
uint32_t data;
} PVR_PACKED;
struct pvr_const_map_entry_doutu_address {

View file

@ -1574,6 +1574,7 @@ void pvr_pds_generate_descriptor_upload_program(
bool halt = last_dma && !input_program->secondary_program_present;
switch (buffer->type) {
case PVR_BUFFER_TYPE_DYNAMIC:
case PVR_BUFFER_TYPE_PUSH_CONSTS:
case PVR_BUFFER_TYPE_BLEND_CONSTS:
case PVR_BUFFER_TYPE_POINT_SAMPLER:
@ -1587,6 +1588,16 @@ void pvr_pds_generate_descriptor_upload_program(
special_buffer_entry->type =
PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
special_buffer_entry->buffer_type = buffer->type;
special_buffer_entry->size_in_dwords = buffer->size_in_dwords;
switch (buffer->type) {
case PVR_BUFFER_TYPE_DYNAMIC:
special_buffer_entry->data = buffer->desc_set;
break;
default:
break;
}
break;
}
case PVR_BUFFER_TYPE_COMPILE_TIME: {

View file

@ -2610,6 +2610,11 @@ void pvr_CmdBindDescriptorSets2KHR(
const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo)
{
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_pipeline_layout,
pipeline_layout,
pBindDescriptorSetsInfo->layout);
unsigned dyn_off = 0;
const bool has_dyn_offs = pBindDescriptorSetsInfo->dynamicOffsetCount > 0;
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
@ -2624,6 +2629,16 @@ void pvr_CmdBindDescriptorSets2KHR(
pBindDescriptorSetsInfo->pDescriptorSets[u]);
unsigned desc_set = u + pBindDescriptorSetsInfo->firstSet;
const struct pvr_descriptor_set_layout *set_layout =
vk_to_pvr_descriptor_set_layout(
pipeline_layout->set_layouts[desc_set]);
for (unsigned u = 0; u < set_layout->dynamic_buffer_count; ++u) {
set->dynamic_buffers[u].offset =
has_dyn_offs ? pBindDescriptorSetsInfo->pDynamicOffsets[dyn_off++]
: 0;
}
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
if (graphics_desc_state->sets[desc_set] != set) {
graphics_desc_state->sets[desc_set] = set;
@ -2639,6 +2654,8 @@ void pvr_CmdBindDescriptorSets2KHR(
}
}
assert(dyn_off == pBindDescriptorSetsInfo->dynamicOffsetCount);
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
cmd_buffer->state.dirty.gfx_desc_dirty = true;
@ -3650,6 +3667,32 @@ static VkResult pvr_setup_descriptor_mappings(
(struct pvr_const_map_entry_special_buffer *)entries;
switch (special_buff_entry->buffer_type) {
case PVR_BUFFER_TYPE_DYNAMIC: {
unsigned desc_set = special_buff_entry->data;
const struct pvr_descriptor_set *descriptor_set;
struct pvr_suballoc_bo *dynamic_desc_bo;
assert(desc_set < PVR_MAX_DESCRIPTOR_SETS);
descriptor_set = desc_state->sets[desc_set];
assert(descriptor_set);
result = pvr_cmd_buffer_upload_general(
cmd_buffer,
descriptor_set->dynamic_buffers,
special_buff_entry->size_in_dwords * sizeof(uint32_t),
&dynamic_desc_bo);
if (result != VK_SUCCESS)
return result;
PVR_WRITE(qword_buffer,
dynamic_desc_bo->dev_addr.addr,
special_buff_entry->const_offset,
pds_info->data_size_in_dwords);
break;
}
case PVR_BUFFER_TYPE_PUSH_CONSTS: {
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;

View file

@ -194,7 +194,7 @@ enum pvr_query_type {
struct pvr_buffer_descriptor {
uint64_t addr;
uint32_t size;
uint32_t rsvd;
uint32_t offset;
} PACKED;
static_assert(sizeof(struct pvr_buffer_descriptor) == 4 * sizeof(uint32_t),
"pvr_buffer_descriptor size is invalid.");
@ -245,6 +245,7 @@ struct pvr_descriptor_set_layout_binding {
struct pvr_sampler **immutable_samplers;
unsigned offset; /** Offset within the descriptor set. */
unsigned dynamic_buffer_idx;
unsigned stride; /** Stride of each descriptor in this binding. */
};
@ -302,6 +303,7 @@ struct pvr_descriptor {
struct pvr_descriptor_set {
struct vk_object_base base;
struct list_head link; /** Link in pvr_descriptor_pool::desc_sets. */
struct pvr_descriptor_set_layout *layout;
struct pvr_descriptor_pool *pool;
@ -310,7 +312,7 @@ struct pvr_descriptor_set {
pvr_dev_addr_t dev_addr; /** Descriptor set device address. */
void *mapping; /** Descriptor set CPU mapping. */
struct list_head link; /** Link in pvr_descriptor_pool::desc_sets. */
struct pvr_buffer_descriptor dynamic_buffers[];
};
struct pvr_event {

View file

@ -77,6 +77,8 @@ static unsigned pvr_descriptor_size(VkDescriptorType type)
switch (type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
return sizeof(struct pvr_buffer_descriptor);
case VK_DESCRIPTOR_TYPE_SAMPLER:
@ -178,6 +180,7 @@ VkResult pvr_CreateDescriptorSetLayout(
assert(!binding_flags_create_info ||
binding_flags_create_info->bindingCount == binding_count);
unsigned dynamic_buffer_idx = 0;
for (unsigned b = 0; b < pCreateInfo->bindingCount; ++b) {
const VkDescriptorSetLayoutBinding *binding = &bindings[b];
@ -187,10 +190,19 @@ VkResult pvr_CreateDescriptorSetLayout(
struct pvr_descriptor_set_layout_binding *layout_binding =
&layout_bindings[binding->binding];
layout_binding->offset = layout->size;
layout_binding->stride = pvr_descriptor_size(binding->descriptorType);
layout->size += binding->descriptorCount * layout_binding->stride;
if (vk_descriptor_type_is_dynamic(binding->descriptorType)) {
layout_binding->offset = ~0;
layout_binding->dynamic_buffer_idx = dynamic_buffer_idx;
dynamic_buffer_idx += binding->descriptorCount;
} else {
layout_binding->dynamic_buffer_idx = ~0;
layout_binding->offset = layout->size;
layout->size += binding->descriptorCount * layout_binding->stride;
}
layout_binding->type = binding->descriptorType;
@ -217,6 +229,8 @@ VkResult pvr_CreateDescriptorSetLayout(
}
}
assert(dynamic_buffer_count == dynamic_buffer_idx);
free(bindings);
*pSetLayout = pvr_descriptor_set_layout_to_handle(layout);
@ -265,6 +279,9 @@ VkResult pvr_CreateDescriptorPool(VkDevice _device,
const uint32_t descriptor_count =
pCreateInfo->pPoolSizes[i].descriptorCount;
if (vk_descriptor_type_is_dynamic(type))
continue;
bo_size += descriptor_count * pvr_descriptor_size(type);
}
}
@ -368,6 +385,12 @@ VkResult pvr_ResetDescriptorPool(VkDevice _device,
return VK_SUCCESS;
}
static void
write_sampler(const struct pvr_descriptor_set *set,
const VkDescriptorImageInfo *image_info,
const struct pvr_descriptor_set_layout_binding *binding,
uint32_t elem);
static VkResult
pvr_descriptor_set_create(struct pvr_device *device,
struct pvr_descriptor_pool *pool,
@ -375,13 +398,18 @@ pvr_descriptor_set_create(struct pvr_device *device,
struct pvr_descriptor_set **const descriptor_set_out)
{
struct pvr_descriptor_set *set;
unsigned set_alloc_size;
VkResult result;
*descriptor_set_out = NULL;
set_alloc_size = sizeof(*set);
set_alloc_size +=
layout->dynamic_buffer_count * sizeof(*set->dynamic_buffers);
set = vk_object_zalloc(&device->vk,
&pool->alloc,
sizeof(*set),
set_alloc_size,
VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (!set)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -399,6 +427,19 @@ pvr_descriptor_set_create(struct pvr_device *device,
list_addtail(&set->link, &pool->desc_sets);
/* Setup immutable samplers. */
for (unsigned u = 0; u < layout->binding_count; ++u) {
const struct pvr_descriptor_set_layout_binding *binding =
&layout->bindings[u];
if (binding->type == VK_DESCRIPTOR_TYPE_SAMPLER &&
binding->immutable_samplers) {
for (uint32_t j = 0; j < binding->descriptor_count; j++) {
write_sampler(set, NULL, binding, j);
}
}
}
*descriptor_set_out = set;
return VK_SUCCESS;
@ -495,20 +536,45 @@ write_buffer(const struct pvr_descriptor_set *set,
memcpy(desc_mapping, &buffer_desc, sizeof(buffer_desc));
}
static void
write_dynamic_buffer(struct pvr_descriptor_set *set,
const VkDescriptorBufferInfo *buffer_info,
const struct pvr_descriptor_set_layout_binding *binding,
uint32_t elem)
{
VK_FROM_HANDLE(pvr_buffer, buffer, buffer_info->buffer);
assert(binding->dynamic_buffer_idx != ~0);
const unsigned desc_offset = binding->dynamic_buffer_idx + elem;
struct pvr_buffer_descriptor *desc_mapping =
&set->dynamic_buffers[desc_offset];
const pvr_dev_addr_t buffer_addr =
PVR_DEV_ADDR_OFFSET(buffer->dev_addr, buffer_info->offset);
UNUSED uint32_t range =
vk_buffer_range(&buffer->vk, buffer_info->offset, buffer_info->range);
desc_mapping->addr = buffer_addr.addr;
desc_mapping->size = range;
}
static void
write_sampler(const struct pvr_descriptor_set *set,
const VkDescriptorImageInfo *image_info,
const struct pvr_descriptor_set_layout_binding *binding,
uint32_t elem)
{
PVR_FROM_HANDLE(pvr_sampler, info_sampler, image_info->sampler);
const unsigned desc_offset = binding->offset + (elem * binding->stride);
void *desc_mapping = (uint8_t *)set->mapping + desc_offset;
struct pvr_sampler *sampler;
struct pvr_sampler *sampler = binding->immutable_sampler_count
? binding->immutable_samplers[elem]
: info_sampler;
if (binding->immutable_sampler_count) {
sampler = binding->immutable_samplers[elem];
} else {
assert(image_info);
PVR_FROM_HANDLE(pvr_sampler, info_sampler, image_info->sampler);
sampler = info_sampler;
}
struct pvr_sampler_descriptor sampler_desc = sampler->descriptor;
memcpy(desc_mapping, &sampler_desc, sizeof(sampler_desc));
@ -670,6 +736,16 @@ void pvr_UpdateDescriptorSets(VkDevice _device,
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
write_dynamic_buffer(set,
&write->pBufferInfo[j],
binding,
write->dstArrayElement + j);
}
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
write_sampler(set,
@ -760,6 +836,19 @@ void pvr_UpdateDescriptorSets(VkDevice _device,
assert(src_binding->stride == dst_binding->stride);
if (vk_descriptor_type_is_dynamic(src_binding->type)) {
const unsigned src_desc_offset =
src_binding->dynamic_buffer_idx + copy->srcArrayElement;
const unsigned dst_desc_offset =
dst_binding->dynamic_buffer_idx + copy->dstArrayElement;
memcpy(&dst_set->dynamic_buffers[dst_desc_offset],
&src_set->dynamic_buffers[src_desc_offset],
sizeof(*src_set->dynamic_buffers) * copy->descriptorCount);
continue;
}
if (src_binding->stride > 0) {
for (uint32_t j = 0; j < copy->descriptorCount; j++) {
const unsigned src_desc_offset =

View file

@ -532,6 +532,7 @@ static VkResult pvr_pds_descriptor_program_create_and_upload(
const pco_descriptor_set_data *desc_set_data =
&data->common.desc_sets[desc_set];
const pco_range *desc_set_range = &desc_set_data->range;
const pco_range *desc_set_dynamic_range = &desc_set_data->dynamic_range;
/* If the descriptor set isn't for this stage or is unused, skip it. */
if (!(BITFIELD_BIT(stage) & set_layout->stage_flags)) {
@ -542,14 +543,23 @@ static VkResult pvr_pds_descriptor_program_create_and_upload(
if (!desc_set_data->used)
continue;
program.descriptor_sets[program.descriptor_set_count] =
(struct pvr_pds_descriptor_set){
.descriptor_set = desc_set,
.size_in_dwords = desc_set_range->count,
.destination = desc_set_range->start,
};
if (desc_set_range->count > 0) {
program.descriptor_sets[program.descriptor_set_count++] =
(struct pvr_pds_descriptor_set){
.descriptor_set = desc_set,
.size_in_dwords = desc_set_range->count,
.destination = desc_set_range->start,
};
}
program.descriptor_set_count++;
if (desc_set_dynamic_range->count > 0) {
program.buffers[program.buffer_count++] = (struct pvr_pds_buffer){
.type = PVR_BUFFER_TYPE_DYNAMIC,
.size_in_dwords = desc_set_dynamic_range->count,
.destination = desc_set_dynamic_range->start,
.desc_set = desc_set,
};
}
}
pds_info->entries = vk_alloc2(&device->vk.alloc,
@ -2136,6 +2146,10 @@ static void pvr_setup_descriptors(pco_data *data,
&set_layout->bindings[binding];
pco_binding_data *binding_data = &desc_set_data->bindings[binding];
/* Skip dynamic buffer bindings. */
if (layout_binding->offset == ~0)
continue;
binding_data->range = (pco_range){
.start = desc_set_range->start +
(layout_binding->offset / sizeof(uint32_t)),
@ -2147,6 +2161,57 @@ static void pvr_setup_descriptors(pco_data *data,
}
}
/* Allocate shareds for the dynamic descriptors. */
for (unsigned desc_set = 0; desc_set < layout->set_count; ++desc_set) {
const struct pvr_descriptor_set_layout *set_layout =
vk_to_pvr_descriptor_set_layout(layout->set_layouts[desc_set]);
const unsigned desc_set_dynamic_size_dw =
(set_layout->dynamic_buffer_count *
sizeof(struct pvr_buffer_descriptor)) /
sizeof(uint32_t);
pco_descriptor_set_data *desc_set_data =
&data->common.desc_sets[desc_set];
pco_range *desc_set_dynamic_range = &desc_set_data->dynamic_range;
if (!desc_set_dynamic_size_dw)
continue;
/* If the descriptor set isn't for this stage or is unused, skip it. */
if (!(BITFIELD_BIT(stage) & set_layout->stage_flags)) {
assert(!desc_set_data->used);
continue;
}
if (!desc_set_data->used)
continue;
desc_set_dynamic_range->start = data->common.shareds;
desc_set_dynamic_range->count = desc_set_dynamic_size_dw;
data->common.shareds += desc_set_dynamic_size_dw;
for (unsigned binding = 0; binding < set_layout->binding_count;
++binding) {
const struct pvr_descriptor_set_layout_binding *layout_binding =
&set_layout->bindings[binding];
pco_binding_data *binding_data = &desc_set_data->bindings[binding];
/* Skip non-dynamic bindings. */
if (layout_binding->dynamic_buffer_idx == ~0)
continue;
binding_data->range = (pco_range){
.start = desc_set_dynamic_range->start +
((layout_binding->dynamic_buffer_idx *
sizeof(struct pvr_buffer_descriptor)) /
sizeof(uint32_t)),
.count =
(layout_binding->stride * layout_binding->descriptor_count) /
sizeof(uint32_t),
.stride = layout_binding->stride / sizeof(uint32_t),
};
}
}
if (data->common.push_consts.used > 0) {
unsigned count = data->common.push_consts.used;