diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 1eb6b9089df..869fbb716fb 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -360,11 +360,10 @@ pvr_cmd_buffer_emit_ppp_state(struct pvr_cmd_buffer *cmd_buffer, return VK_SUCCESS; } -static VkResult -pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer, - const void *const data, - const size_t size, - struct pvr_bo **const pvr_bo_out) +VkResult pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer, + const void *const data, + const size_t size, + struct pvr_bo **const pvr_bo_out) { struct pvr_device *const device = cmd_buffer->device; const uint32_t cache_line_size = @@ -1391,10 +1390,9 @@ pvr_compute_generate_idfwdf(struct pvr_cmd_buffer *cmd_buffer, pvr_compute_generate_control_stream(csb, sub_cmd, &info); } -static void -pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer, - struct pvr_sub_cmd_compute *const sub_cmd, - bool deallocate_shareds) +void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer, + struct pvr_sub_cmd_compute *const sub_cmd, + bool deallocate_shareds) { const struct pvr_pds_upload *program = &cmd_buffer->device->pds_compute_fence_program; @@ -2512,18 +2510,6 @@ VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer, return VK_SUCCESS; } -#define PVR_WRITE(_buffer, _value, _offset, _max) \ - do { \ - __typeof__(_value) __value = _value; \ - uint64_t __offset = _offset; \ - uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \ - static_assert(__same_type(*_buffer, __value), \ - "Buffer and value type mismatch"); \ - assert((__offset + __nr_dwords) <= (_max)); \ - assert((__offset % __nr_dwords) == 0U); \ - _buffer[__offset / __nr_dwords] = __value; \ - } while (0) - static VkResult pvr_setup_vertex_buffers(struct pvr_cmd_buffer *cmd_buffer, const struct pvr_graphics_pipeline *const gfx_pipeline) @@ -2872,8 +2858,6 @@ static VkResult pvr_setup_descriptor_mappings( return VK_SUCCESS; } -#undef PVR_WRITE - static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer, struct pvr_sub_cmd_compute *const sub_cmd) { diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index e8312e5ee8c..4a5f47f095b 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -219,6 +219,12 @@ enum pvr_deferred_cs_command_type { PVR_DEFERRED_CS_COMMAND_TYPE_DBSC2, }; +enum pvr_query_type { + PVR_QUERY_TYPE_AVAILABILITY_WRITE, + PVR_QUERY_TYPE_RESET_QUERY_POOL, + PVR_QUERY_TYPE_COPY_QUERY_RESULTS, +}; + struct pvr_bo; struct pvr_bo_store; struct pvr_compute_ctx; @@ -1282,6 +1288,35 @@ struct pvr_private_compute_pipeline { pvr_dev_addr_t const_buffer_addr; }; +struct pvr_query_info { + enum pvr_query_type type; + + union { + struct { + uint32_t num_query_indices; + struct pvr_bo *index_bo; + uint32_t num_queries; + struct pvr_bo *availability_bo; + } availability_write; + + struct { + VkQueryPool query_pool; + uint32_t first_query; + uint32_t query_count; + } reset_query_pool; + + struct { + VkQueryPool query_pool; + uint32_t first_query; + uint32_t query_count; + VkBuffer dst_buffer; + VkDeviceSize dst_offset; + VkDeviceSize stride; + VkQueryResultFlags flags; + } copy_query_results; + }; +}; + struct pvr_render_target { struct pvr_rt_dataset *rt_dataset; @@ -1584,10 +1619,18 @@ VkResult pvr_device_tile_buffer_ensure_cap(struct pvr_device *device, uint32_t capacity, uint32_t size_in_bytes); +VkResult pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer, + const void *const data, + const size_t size, + struct pvr_bo **const pvr_bo_out); + VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, enum pvr_sub_cmd_type type); VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer); +void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer, + struct pvr_sub_cmd_compute *const sub_cmd, + bool deallocate_shareds); void pvr_compute_update_shared_private( struct pvr_cmd_buffer *cmd_buffer, struct pvr_sub_cmd_compute *const sub_cmd, @@ -1608,6 +1651,9 @@ VkResult pvr_pds_compute_shader_create_and_upload( VkResult pvr_device_create_compute_query_programs(struct pvr_device *device); void pvr_device_destroy_compute_query_programs(struct pvr_device *device); +VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer, + const struct pvr_query_info *query_info); + #define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \ VK_FROM_HANDLE(__pvr_type, __name, __handle) @@ -1736,6 +1782,18 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_render_pass, } \ } while (false) +#define PVR_WRITE(_buffer, _value, _offset, _max) \ + do { \ + __typeof__(_value) __value = _value; \ + uint64_t __offset = _offset; \ + uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \ + static_assert(__same_type(*_buffer, __value), \ + "Buffer and value type mismatch"); \ + assert((__offset + __nr_dwords) <= (_max)); \ + assert((__offset % __nr_dwords) == 0U); \ + _buffer[__offset / __nr_dwords] = __value; \ + } while (0) + /* A non-fatal assert. Useful for debugging. */ #ifdef DEBUG # define pvr_assert(x) \ diff --git a/src/imagination/vulkan/pvr_query_compute.c b/src/imagination/vulkan/pvr_query_compute.c index e613cab974e..77cf1cb27a7 100644 --- a/src/imagination/vulkan/pvr_query_compute.c +++ b/src/imagination/vulkan/pvr_query_compute.c @@ -38,6 +38,7 @@ #include "pvr_tex_state.h" #include "vk_alloc.h" #include "vk_command_pool.h" +#include "vk_util.h" static inline void pvr_init_primary_compute_pds_program( struct pvr_pds_compute_shader_program *program) @@ -207,6 +208,147 @@ err_free_usc_bo: return result; } +/* TODO: See if we can dedup this with pvr_setup_descriptor_mappings() or + * pvr_setup_descriptor_mappings(). + */ +static VkResult pvr_write_compute_query_pds_data_section( + struct pvr_cmd_buffer *cmd_buffer, + const struct pvr_compute_query_shader *query_prog, + struct pvr_private_compute_pipeline *pipeline) +{ + const struct pvr_pds_info *const info = &query_prog->info; + const uint8_t *entries; + uint32_t *dword_buffer; + uint64_t *qword_buffer; + struct pvr_bo *pvr_bo; + VkResult result; + + result = pvr_cmd_buffer_alloc_mem(cmd_buffer, + cmd_buffer->device->heaps.pds_heap, + info->data_size_in_dwords << 2, + PVR_BO_ALLOC_FLAG_CPU_MAPPED, + &pvr_bo); + if (result != VK_SUCCESS) + return result; + + dword_buffer = (uint32_t *)pvr_bo->bo->map; + qword_buffer = (uint64_t *)pvr_bo->bo->map; + + entries = (uint8_t *)info->entries; + + /* TODO: Remove this when we can test this path and make sure that this is + * not needed. If it's needed we should probably be using LITERAL entries for + * this instead. + */ + memset(dword_buffer, 0xFE, info->data_size_in_dwords << 2); + + pipeline->pds_shared_update_data_size_dw = info->data_size_in_dwords; + + for (uint32_t i = 0; i < info->entry_count; i++) { + const struct pvr_const_map_entry *const entry_header = + (struct pvr_const_map_entry *)entries; + + switch (entry_header->type) { + case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: { + const struct pvr_const_map_entry_literal32 *const literal = + (struct pvr_const_map_entry_literal32 *)entries; + + PVR_WRITE(dword_buffer, + literal->literal_value, + literal->const_offset, + info->data_size_in_dwords); + + entries += sizeof(*literal); + break; + } + case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL64: { + const struct pvr_const_map_entry_literal64 *const literal = + (struct pvr_const_map_entry_literal64 *)entries; + + PVR_WRITE(qword_buffer, + literal->literal_value, + literal->const_offset, + info->data_size_in_dwords); + + entries += sizeof(*literal); + break; + } + case PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS: { + const struct pvr_const_map_entry_doutu_address *const doutu_addr = + (struct pvr_const_map_entry_doutu_address *)entries; + const pvr_dev_addr_t exec_addr = + PVR_DEV_ADDR_OFFSET(query_prog->pds_sec_code.pvr_bo->vma->dev_addr, + query_prog->pds_sec_code.code_offset); + uint64_t addr = 0ULL; + + pvr_set_usc_execution_address64(&addr, exec_addr.addr); + + PVR_WRITE(qword_buffer, + addr | doutu_addr->doutu_control, + doutu_addr->const_offset, + info->data_size_in_dwords); + + entries += sizeof(*doutu_addr); + break; + } + case PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER: { + const struct pvr_const_map_entry_special_buffer *special_buff_entry = + (struct pvr_const_map_entry_special_buffer *)entries; + + switch (special_buff_entry->buffer_type) { + case PVR_BUFFER_TYPE_COMPILE_TIME: { + uint64_t addr = pipeline->const_buffer_addr.addr; + + PVR_WRITE(qword_buffer, + addr, + special_buff_entry->const_offset, + info->data_size_in_dwords); + break; + } + + default: + unreachable("Unsupported special buffer type."); + } + + entries += sizeof(*special_buff_entry); + break; + } + default: + unreachable("Unsupported data section map"); + } + } + + pipeline->pds_shared_update_data_offset = + pvr_bo->vma->dev_addr.addr - + cmd_buffer->device->heaps.pds_heap->base_addr.addr; + + pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo); + + return VK_SUCCESS; +} + +static void pvr_write_private_compute_dispatch( + struct pvr_cmd_buffer *cmd_buffer, + struct pvr_private_compute_pipeline *pipeline, + uint32_t num_query_indices) +{ + struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd; + const uint32_t workgroup_size[PVR_WORKGROUP_DIMENSIONS] = { + DIV_ROUND_UP(num_query_indices, 32), + 1, + 1, + }; + + assert(sub_cmd->type == PVR_SUB_CMD_TYPE_OCCLUSION_QUERY); + + pvr_compute_update_shared_private(cmd_buffer, &sub_cmd->compute, pipeline); + pvr_compute_update_kernel_private(cmd_buffer, + &sub_cmd->compute, + pipeline, + workgroup_size); + pvr_compute_generate_fence(cmd_buffer, &sub_cmd->compute, false); +} + static void pvr_destroy_compute_query_program(struct pvr_device *device, struct pvr_compute_query_shader *program) @@ -323,3 +465,411 @@ void pvr_device_destroy_compute_query_programs(struct pvr_device *device) vk_free(&device->vk.alloc, device->copy_results_shaders); vk_free(&device->vk.alloc, device->reset_queries_shaders); } + +static void pvr_init_tex_info(const struct pvr_device_info *dev_info, + struct pvr_texture_state_info *tex_info, + uint32_t width, + pvr_dev_addr_t addr) +{ + const uint8_t *swizzle_arr = pvr_get_format_swizzle(tex_info->format); + bool is_view_1d = !PVR_HAS_FEATURE(dev_info, tpu_extended_integer_lookup) && + !PVR_HAS_FEATURE(dev_info, tpu_image_state_v2); + + *tex_info = (struct pvr_texture_state_info){ + .format = VK_FORMAT_R32_UINT, + .mem_layout = PVR_MEMLAYOUT_LINEAR, + .flags = PVR_TEXFLAGS_INDEX_LOOKUP, + .type = is_view_1d ? VK_IMAGE_VIEW_TYPE_1D : VK_IMAGE_VIEW_TYPE_2D, + .is_cube = false, + .tex_state_type = PVR_TEXTURE_STATE_SAMPLE, + .extent = { .width = width, .height = 1, .depth = 0 }, + .array_size = 1, + .base_level = 0, + .mip_levels = 1, + .mipmaps_present = false, + .sample_count = 1, + .stride = width, + .offset = 0, + .swizzle = { [0] = swizzle_arr[0], + [1] = swizzle_arr[1], + [2] = swizzle_arr[2], + [3] = swizzle_arr[3] }, + .addr = addr, + + }; +} + +/* TODO: Split this function into per program type functions. */ +VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer, + const struct pvr_query_info *query_info) +{ + struct pvr_device *device = cmd_buffer->device; + const uint32_t core_count = device->pdevice->dev_runtime_info.core_count; + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + const struct pvr_shader_factory_info *shader_factory_info; + uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS]; + const struct pvr_compute_query_shader *query_prog; + struct pvr_private_compute_pipeline pipeline; + const uint32_t buffer_count = core_count; + struct pvr_texture_state_info tex_info; + uint32_t num_query_indices; + uint32_t *const_buffer; + struct pvr_bo *pvr_bo; + VkResult result; + + pvr_csb_pack (&sampler_state[0U], TEXSTATE_SAMPLER, reg) { + reg.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); + reg.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); + reg.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); + reg.minfilter = PVRX(TEXSTATE_FILTER_POINT); + reg.magfilter = PVRX(TEXSTATE_FILTER_POINT); + reg.non_normalized_coords = true; + reg.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT); + } + + /* clang-format off */ + pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {} + /* clang-format on */ + + switch (query_info->type) { + case PVR_QUERY_TYPE_AVAILABILITY_WRITE: + /* Adds a compute shader (fenced on the last 3D) that writes a non-zero + * value in availability_bo at every index in index_bo. + */ + query_prog = &device->availability_shader; + shader_factory_info = &availability_query_write_info; + num_query_indices = query_info->availability_write.num_query_indices; + break; + + case PVR_QUERY_TYPE_COPY_QUERY_RESULTS: + /* Adds a compute shader to copy availability and query value data. */ + query_prog = &device->copy_results_shaders[buffer_count - 1]; + shader_factory_info = copy_query_results_collection[buffer_count - 1]; + num_query_indices = query_info->copy_query_results.query_count; + break; + + case PVR_QUERY_TYPE_RESET_QUERY_POOL: + /* Adds a compute shader to reset availability and query value data. */ + query_prog = &device->reset_queries_shaders[buffer_count - 1]; + shader_factory_info = reset_query_collection[buffer_count - 1]; + num_query_indices = query_info->reset_query_pool.query_count; + break; + + default: + unreachable("Invalid query type"); + } + + result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, + PVR_SUB_CMD_TYPE_OCCLUSION_QUERY); + if (result != VK_SUCCESS) + return result; + + pipeline.pds_code_offset = query_prog->pds_prim_code.code_offset; + pipeline.pds_data_offset = query_prog->pds_prim_code.data_offset; + + pipeline.pds_shared_update_code_offset = + query_prog->pds_sec_code.code_offset; + pipeline.pds_data_size_dw = query_prog->primary_data_size_dw; + pipeline.pds_temps_used = query_prog->primary_num_temps; + + pipeline.coeff_regs_count = shader_factory_info->coeff_regs; + pipeline.const_shared_regs_count = shader_factory_info->const_shared_regs; + + const_buffer = vk_alloc(&cmd_buffer->vk.pool->alloc, + shader_factory_info->const_shared_regs << 2, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!const_buffer) { + cmd_buffer->state.status = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return cmd_buffer->state.status; + } + + /* clang-format off */ +#define DRIVER_CONST(index) \ + assert(shader_factory_info->driver_const_location_map[index] < \ + shader_factory_info->const_shared_regs); \ + const_buffer[shader_factory_info->driver_const_location_map[index]] + /* clang-format on */ + + switch (query_info->type) { + case PVR_QUERY_TYPE_AVAILABILITY_WRITE: { + uint64_t image_sampler_state[3][ROGUE_NUM_TEXSTATE_SAMPLER_WORDS]; + uint32_t image_sampler_idx = 0; + + memcpy(&image_sampler_state[image_sampler_idx][0], + &sampler_state[0], + sizeof(sampler_state)); + image_sampler_idx++; + + pvr_init_tex_info(dev_info, + &tex_info, + num_query_indices, + query_info->availability_write.index_bo->vma->dev_addr); + + result = pvr_pack_tex_state(device, + &tex_info, + &image_sampler_state[image_sampler_idx][0]); + if (result != VK_SUCCESS) { + vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); + cmd_buffer->state.status = result; + return result; + } + + image_sampler_idx++; + + pvr_init_tex_info( + dev_info, + &tex_info, + query_info->availability_write.num_queries, + query_info->availability_write.availability_bo->vma->dev_addr); + + result = pvr_pack_tex_state(device, + &tex_info, + &image_sampler_state[image_sampler_idx][0]); + if (result != VK_SUCCESS) { + vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); + cmd_buffer->state.status = result; + return result; + } + + image_sampler_idx++; + + memcpy(&const_buffer[0], + &image_sampler_state[0][0], + sizeof(image_sampler_state)); + + /* Only PVR_QUERY_AVAILABILITY_WRITE_COUNT driver consts allowed. */ + assert(shader_factory_info->num_driver_consts == + PVR_QUERY_AVAILABILITY_WRITE_COUNT); + + DRIVER_CONST(PVR_QUERY_AVAILABILITY_WRITE_INDEX_COUNT) = + num_query_indices; + break; + } + + case PVR_QUERY_TYPE_COPY_QUERY_RESULTS: { + PVR_FROM_HANDLE(pvr_query_pool, + pool, + query_info->copy_query_results.query_pool); + PVR_FROM_HANDLE(pvr_buffer, + buffer, + query_info->copy_query_results.dst_buffer); + const uint32_t image_sampler_state_arr_size = + (buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS; + uint32_t image_sampler_idx = 0; + pvr_dev_addr_t addr; + uint64_t offset; + + STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size); + if (!image_sampler_state) { + vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); + + cmd_buffer->state.status = + vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); + return cmd_buffer->state.status; + } + +#define SAMPLER_ARR_2D(_arr, _i, _j) \ + _arr[_i * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS + _j] + + memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0), + &sampler_state[0], + sizeof(sampler_state)); + image_sampler_idx++; + + offset = query_info->copy_query_results.first_query * sizeof(uint32_t); + + addr = + PVR_DEV_ADDR_OFFSET(pool->availability_buffer->vma->dev_addr, offset); + + pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr); + + result = pvr_pack_tex_state( + device, + &tex_info, + &SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0)); + if (result != VK_SUCCESS) { + vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); + cmd_buffer->state.status = result; + return result; + } + + image_sampler_idx++; + + for (uint32_t i = 0; i < buffer_count; i++) { + addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->vma->dev_addr, + offset + i * pool->result_stride); + + pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr); + + result = pvr_pack_tex_state( + device, + &tex_info, + &SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0)); + if (result != VK_SUCCESS) { + vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); + cmd_buffer->state.status = result; + return result; + } + + image_sampler_idx++; + } + + memcpy(&const_buffer[0], + &SAMPLER_ARR_2D(image_sampler_state, 0, 0), + image_sampler_state_arr_size * sizeof(image_sampler_state[0])); + + STACK_ARRAY_FINISH(image_sampler_state); + + /* Only PVR_COPY_QUERY_POOL_RESULTS_COUNT driver consts allowed. */ + assert(shader_factory_info->num_driver_consts == + PVR_COPY_QUERY_POOL_RESULTS_COUNT); + + /* Assert if no memory is bound to destination buffer. */ + assert(buffer->dev_addr.addr == 0); + + addr = buffer->dev_addr; + addr.addr += query_info->copy_query_results.dst_offset; + addr.addr += query_info->copy_query_results.first_query * + query_info->copy_query_results.stride; + + DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_INDEX_COUNT) = num_query_indices; + DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_LOW) = addr.addr & + 0xFFFFFFFF; + DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_HIGH) = addr.addr >> + 32; + DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_DEST_STRIDE) = + query_info->copy_query_results.stride; + DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_PARTIAL_RESULT_FLAG) = + query_info->copy_query_results.flags & VK_QUERY_RESULT_PARTIAL_BIT; + DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_64_BIT_FLAG) = + query_info->copy_query_results.flags & VK_QUERY_RESULT_64_BIT; + DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_WITH_AVAILABILITY_FLAG) = + query_info->copy_query_results.flags & + VK_QUERY_RESULT_WITH_AVAILABILITY_BIT; + break; + } + + case PVR_QUERY_TYPE_RESET_QUERY_POOL: { + PVR_FROM_HANDLE(pvr_query_pool, + pool, + query_info->reset_query_pool.query_pool); + const uint32_t image_sampler_state_arr_size = + (buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS; + uint32_t image_sampler_idx = 0; + pvr_dev_addr_t addr; + uint64_t offset; + + STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size); + if (!image_sampler_state) { + vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); + + cmd_buffer->state.status = + vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); + return cmd_buffer->state.status; + } + + memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0), + &sampler_state[0], + sizeof(sampler_state)); + image_sampler_idx++; + + offset = query_info->reset_query_pool.first_query * sizeof(uint32_t); + + for (uint32_t i = 0; i < buffer_count; i++) { + addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->vma->dev_addr, + offset + i * pool->result_stride); + + pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr); + + result = pvr_pack_tex_state( + device, + &tex_info, + &SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0)); + if (result != VK_SUCCESS) { + vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); + cmd_buffer->state.status = result; + return result; + } + + image_sampler_idx++; + } + + addr = + PVR_DEV_ADDR_OFFSET(pool->availability_buffer->vma->dev_addr, offset); + + pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr); + + result = pvr_pack_tex_state( + device, + &tex_info, + &SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0)); + if (result != VK_SUCCESS) { + vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); + cmd_buffer->state.status = result; + return result; + } + + image_sampler_idx++; + +#undef SAMPLER_ARR_2D + + memcpy(&const_buffer[0], + &image_sampler_state[0], + image_sampler_state_arr_size * sizeof(image_sampler_state[0])); + + STACK_ARRAY_FINISH(image_sampler_state); + + /* Only PVR_RESET_QUERY_POOL_COUNT driver consts allowed. */ + assert(shader_factory_info->num_driver_consts == + PVR_RESET_QUERY_POOL_COUNT); + + DRIVER_CONST(PVR_RESET_QUERY_POOL_INDEX_COUNT) = num_query_indices; + break; + } + + default: + unreachable("Invalid query type"); + } + +#undef DRIVER_CONST + + for (uint32_t i = 0; i < shader_factory_info->num_static_const; i++) { + const struct pvr_static_buffer *load = + &shader_factory_info->static_const_buffer[i]; + + /* Assert if static const is out of range. */ + assert(load->dst_idx < shader_factory_info->const_shared_regs); + const_buffer[load->dst_idx] = load->value; + } + + result = + pvr_cmd_buffer_upload_general(cmd_buffer, + const_buffer, + shader_factory_info->const_shared_regs << 2, + &pvr_bo); + if (result != VK_SUCCESS) { + vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); + + return result; + } + + pipeline.const_buffer_addr = pvr_bo->vma->dev_addr; + + vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); + + /* PDS data section for the secondary/constant upload. */ + result = pvr_write_compute_query_pds_data_section(cmd_buffer, + query_prog, + &pipeline); + if (result != VK_SUCCESS) + return result; + + pipeline.workgroup_size.width = ROGUE_MAX_INSTANCES_PER_TASK; + pipeline.workgroup_size.height = 1; + pipeline.workgroup_size.depth = 1; + + pvr_write_private_compute_dispatch(cmd_buffer, &pipeline, num_query_indices); + + return pvr_cmd_buffer_end_sub_cmd(cmd_buffer); +}