pvr: temporarily tweak support required for query programs

This enables the legacy query program implementation to function with
the updated descriptor set code, until it is replaced with NIR/uscgen.

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-07-01 15:13:15 +01:00 committed by Marge Bot
parent 35c38370c0
commit 61d5240850
4 changed files with 84 additions and 47 deletions

View file

@ -1501,6 +1501,7 @@ void pvr_pds_generate_descriptor_upload_program(
unsigned int next_const64;
unsigned int next_const32;
unsigned int instruction = 0;
uint32_t compile_time_buffer_index = 0;
unsigned int total_dma_count = 0;
unsigned int running_dma_count = 0;
@ -1584,6 +1585,18 @@ void pvr_pds_generate_descriptor_upload_program(
special_buffer_entry->buffer_type = buffer->type;
break;
}
case PVR_BUFFER_TYPE_COMPILE_TIME: {
struct pvr_const_map_entry_special_buffer *special_buffer_entry;
special_buffer_entry =
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
sizeof(*special_buffer_entry));
special_buffer_entry->type =
PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_COMPILE_TIME;
special_buffer_entry->buffer_index = compile_time_buffer_index++;
break;
}
}
entry_write_state.entry->const_offset = next_const64 * 2;

View file

@ -3754,7 +3754,7 @@ void pvr_compute_update_shared_private(
info = (struct pvr_compute_kernel_info){
.indirect_buffer_addr = PVR_DEV_ADDR_INVALID,
.usc_common_size =
DIV_ROUND_UP(const_shared_regs,
DIV_ROUND_UP(PVR_DW_TO_BYTES(const_shared_regs),
ROGUE_CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE),
.pds_data_size =
DIV_ROUND_UP(PVR_DW_TO_BYTES(pipeline->pds_shared_update_data_size_dw),

View file

@ -1360,6 +1360,16 @@ VkResult pvr_pds_compute_shader_create_and_upload(
PDS_GENERATE_CODE_SEGMENT,
dev_info);
for (unsigned u = 0; u < PVR_WORKGROUP_DIMENSIONS; ++u) {
unsigned offset = program->num_workgroups_constant_offset_in_dwords[0];
if (program->num_work_groups_regs[u] != PVR_PDS_REG_UNUSED)
data_buffer[offset + u] = 0;
offset = program->base_workgroup_constant_offset_in_dwords[0];
if (program->work_group_input_regs[u] != PVR_PDS_REG_UNUSED)
data_buffer[offset + u] = 0;
}
result = pvr_gpu_upload_pds(device,
data_buffer,
program->data_size,

View file

@ -66,7 +66,7 @@ static VkResult pvr_create_compute_secondary_prog(
VkResult result;
info->entries =
vk_alloc(&device->vk.alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
vk_zalloc(&device->vk.alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!info->entries)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -140,9 +140,11 @@ static VkResult pvr_create_compute_query_program(
{
const uint32_t cache_line_size =
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
struct pvr_pds_compute_shader_program pds_primary_prog;
struct pvr_pds_compute_shader_program pds_primary_prog = { 0 };
VkResult result;
memset(query_prog, 0, sizeof(*query_prog));
/* No support for query constant calc program. */
assert(shader_factory_info->const_calc_prog_inst_bytes == 0);
/* No support for query coefficient update program. */
@ -488,7 +490,8 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
const struct pvr_shader_factory_info *shader_factory_info;
struct pvr_sampler_descriptor sampler_state;
uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
struct pvr_image_descriptor image_descriptor;
const struct pvr_compute_query_shader *query_prog;
struct pvr_private_compute_pipeline pipeline;
const uint32_t buffer_count = core_count;
@ -498,7 +501,7 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
struct pvr_suballoc_bo *pvr_bo;
VkResult result;
pvr_csb_pack (&sampler_state.words[0], TEXSTATE_SAMPLER_WORD0, reg) {
pvr_csb_pack (&sampler_state[0U], TEXSTATE_SAMPLER_WORD0, reg) {
reg.addrmode_u = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
reg.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
reg.addrmode_w = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
@ -509,7 +512,7 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
}
/* clang-format off */
pvr_csb_pack (&sampler_state.words[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
/* clang-format on */
switch (query_info->type) {
@ -576,11 +579,11 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
switch (query_info->type) {
case PVR_QUERY_TYPE_AVAILABILITY_WRITE: {
struct pvr_image_descriptor image_sampler_state[4];
uint64_t image_sampler_state[3][ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
uint32_t image_sampler_idx = 0;
memcpy(&image_sampler_state[image_sampler_idx],
&sampler_state,
memcpy(&image_sampler_state[image_sampler_idx][0],
&sampler_state[0],
sizeof(sampler_state));
image_sampler_idx++;
@ -589,9 +592,11 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
num_query_indices,
query_info->availability_write.index_bo->dev_addr);
result = pvr_pack_tex_state(device,
&tex_info,
&image_sampler_state[image_sampler_idx]);
result = pvr_pack_tex_state(device, &tex_info, &image_descriptor);
memcpy(&image_sampler_state[image_sampler_idx][0],
image_descriptor.words,
sizeof(image_descriptor.words));
if (result != VK_SUCCESS) {
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
@ -605,9 +610,11 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
query_info->availability_write.num_queries,
query_info->availability_write.availability_bo->dev_addr);
result = pvr_pack_tex_state(device,
&tex_info,
&image_sampler_state[image_sampler_idx]);
result = pvr_pack_tex_state(device, &tex_info, &image_descriptor);
memcpy(&image_sampler_state[image_sampler_idx][0],
image_descriptor.words,
sizeof(image_descriptor.words));
if (result != VK_SUCCESS) {
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
@ -616,8 +623,8 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
image_sampler_idx++;
memcpy(&const_buffer[0],
&image_sampler_state[image_sampler_idx],
sizeof(image_sampler_state[image_sampler_idx]));
&image_sampler_state[0][0],
sizeof(image_sampler_state));
/* Only PVR_QUERY_AVAILABILITY_WRITE_COUNT driver consts allowed. */
assert(shader_factory_info->num_driver_consts ==
@ -635,14 +642,13 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
PVR_FROM_HANDLE(pvr_buffer,
buffer,
query_info->copy_query_results.dst_buffer);
const uint32_t image_sampler_state_arr_size = buffer_count + 2;
const uint32_t image_sampler_state_arr_size =
(buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS;
uint32_t image_sampler_idx = 0;
pvr_dev_addr_t addr;
uint64_t offset;
STACK_ARRAY(struct pvr_image_descriptor,
image_sampler_state,
image_sampler_state_arr_size);
STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size);
if (!image_sampler_state) {
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
@ -650,8 +656,11 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
VK_ERROR_OUT_OF_HOST_MEMORY);
}
memcpy(&image_sampler_state[image_sampler_idx],
&sampler_state,
#define SAMPLER_ARR_2D(_arr, _i, _j) \
_arr[_i * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS + _j]
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
&sampler_state[0],
sizeof(sampler_state));
image_sampler_idx++;
@ -661,9 +670,11 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
result = pvr_pack_tex_state(device,
&tex_info,
&image_sampler_state[image_sampler_idx]);
result = pvr_pack_tex_state(device, &tex_info, &image_descriptor);
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
image_descriptor.words,
sizeof(image_descriptor.words));
if (result != VK_SUCCESS) {
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
@ -677,9 +688,10 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
result = pvr_pack_tex_state(device,
&tex_info,
&image_sampler_state[image_sampler_idx]);
result = pvr_pack_tex_state(device, &tex_info, &image_descriptor);
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
image_descriptor.words,
sizeof(image_descriptor.words));
if (result != VK_SUCCESS) {
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
@ -689,8 +701,8 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
}
memcpy(&const_buffer[0],
image_sampler_state,
image_sampler_state_arr_size * sizeof(*image_sampler_state));
&SAMPLER_ARR_2D(image_sampler_state, 0, 0),
image_sampler_state_arr_size * sizeof(image_sampler_state[0]));
STACK_ARRAY_FINISH(image_sampler_state);
@ -703,8 +715,6 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
addr = buffer->dev_addr;
addr.addr += query_info->copy_query_results.dst_offset;
addr.addr += query_info->copy_query_results.first_query *
query_info->copy_query_results.stride;
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_INDEX_COUNT) = num_query_indices;
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_LOW) = addr.addr &
@ -727,14 +737,13 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
PVR_FROM_HANDLE(pvr_query_pool,
pool,
query_info->reset_query_pool.query_pool);
const uint32_t image_sampler_state_arr_size = buffer_count + 2;
const uint32_t image_sampler_state_arr_size =
(buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS;
uint32_t image_sampler_idx = 0;
pvr_dev_addr_t addr;
uint64_t offset;
STACK_ARRAY(struct pvr_image_descriptor,
image_sampler_state,
image_sampler_state_arr_size);
STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size);
if (!image_sampler_state) {
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
@ -742,8 +751,8 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
VK_ERROR_OUT_OF_HOST_MEMORY);
}
memcpy(&image_sampler_state[image_sampler_idx],
&sampler_state,
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
&sampler_state[0],
sizeof(sampler_state));
image_sampler_idx++;
@ -755,9 +764,11 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
result = pvr_pack_tex_state(device,
&tex_info,
&image_sampler_state[image_sampler_idx]);
result = pvr_pack_tex_state(device, &tex_info, &image_descriptor);
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
image_descriptor.words,
sizeof(image_descriptor.words));
if (result != VK_SUCCESS) {
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
@ -770,9 +781,10 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
result = pvr_pack_tex_state(device,
&tex_info,
&image_sampler_state[image_sampler_idx]);
result = pvr_pack_tex_state(device, &tex_info, &image_descriptor);
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
image_descriptor.words,
sizeof(image_descriptor.words));
if (result != VK_SUCCESS) {
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
@ -780,9 +792,11 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
image_sampler_idx++;
#undef SAMPLER_ARR_2D
memcpy(&const_buffer[0],
image_sampler_state,
image_sampler_state_arr_size * sizeof(*image_sampler_state));
&image_sampler_state[0],
image_sampler_state_arr_size * sizeof(image_sampler_state[0]));
STACK_ARRAY_FINISH(image_sampler_state);