mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 04:10:09 +01:00
pvr: Add support to generate query programs.
This commit adds support to generate three types of query related programs. PVR_QUERY_TYPE_AVAILABILITY_WRITE allows to submit the queries, PVR_QUERY_TYPE_RESET_QUERY_POOL allows to reset the pool and PVR_QUERY_TYPE_COPY_QUERY_RESULTS is to copy the results. Co-authored-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com> Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Reviewed-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19371>
This commit is contained in:
parent
963b696511
commit
d69362ae84
3 changed files with 615 additions and 23 deletions
|
|
@ -360,11 +360,10 @@ pvr_cmd_buffer_emit_ppp_state(struct pvr_cmd_buffer *cmd_buffer,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer,
|
||||
const void *const data,
|
||||
const size_t size,
|
||||
struct pvr_bo **const pvr_bo_out)
|
||||
VkResult pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer,
|
||||
const void *const data,
|
||||
const size_t size,
|
||||
struct pvr_bo **const pvr_bo_out)
|
||||
{
|
||||
struct pvr_device *const device = cmd_buffer->device;
|
||||
const uint32_t cache_line_size =
|
||||
|
|
@ -1391,10 +1390,9 @@ pvr_compute_generate_idfwdf(struct pvr_cmd_buffer *cmd_buffer,
|
|||
pvr_compute_generate_control_stream(csb, sub_cmd, &info);
|
||||
}
|
||||
|
||||
static void
|
||||
pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
|
||||
struct pvr_sub_cmd_compute *const sub_cmd,
|
||||
bool deallocate_shareds)
|
||||
void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
|
||||
struct pvr_sub_cmd_compute *const sub_cmd,
|
||||
bool deallocate_shareds)
|
||||
{
|
||||
const struct pvr_pds_upload *program =
|
||||
&cmd_buffer->device->pds_compute_fence_program;
|
||||
|
|
@ -2512,18 +2510,6 @@ VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
#define PVR_WRITE(_buffer, _value, _offset, _max) \
|
||||
do { \
|
||||
__typeof__(_value) __value = _value; \
|
||||
uint64_t __offset = _offset; \
|
||||
uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \
|
||||
static_assert(__same_type(*_buffer, __value), \
|
||||
"Buffer and value type mismatch"); \
|
||||
assert((__offset + __nr_dwords) <= (_max)); \
|
||||
assert((__offset % __nr_dwords) == 0U); \
|
||||
_buffer[__offset / __nr_dwords] = __value; \
|
||||
} while (0)
|
||||
|
||||
static VkResult
|
||||
pvr_setup_vertex_buffers(struct pvr_cmd_buffer *cmd_buffer,
|
||||
const struct pvr_graphics_pipeline *const gfx_pipeline)
|
||||
|
|
@ -2872,8 +2858,6 @@ static VkResult pvr_setup_descriptor_mappings(
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
#undef PVR_WRITE
|
||||
|
||||
static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
|
||||
struct pvr_sub_cmd_compute *const sub_cmd)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -219,6 +219,12 @@ enum pvr_deferred_cs_command_type {
|
|||
PVR_DEFERRED_CS_COMMAND_TYPE_DBSC2,
|
||||
};
|
||||
|
||||
enum pvr_query_type {
|
||||
PVR_QUERY_TYPE_AVAILABILITY_WRITE,
|
||||
PVR_QUERY_TYPE_RESET_QUERY_POOL,
|
||||
PVR_QUERY_TYPE_COPY_QUERY_RESULTS,
|
||||
};
|
||||
|
||||
struct pvr_bo;
|
||||
struct pvr_bo_store;
|
||||
struct pvr_compute_ctx;
|
||||
|
|
@ -1282,6 +1288,35 @@ struct pvr_private_compute_pipeline {
|
|||
pvr_dev_addr_t const_buffer_addr;
|
||||
};
|
||||
|
||||
struct pvr_query_info {
|
||||
enum pvr_query_type type;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t num_query_indices;
|
||||
struct pvr_bo *index_bo;
|
||||
uint32_t num_queries;
|
||||
struct pvr_bo *availability_bo;
|
||||
} availability_write;
|
||||
|
||||
struct {
|
||||
VkQueryPool query_pool;
|
||||
uint32_t first_query;
|
||||
uint32_t query_count;
|
||||
} reset_query_pool;
|
||||
|
||||
struct {
|
||||
VkQueryPool query_pool;
|
||||
uint32_t first_query;
|
||||
uint32_t query_count;
|
||||
VkBuffer dst_buffer;
|
||||
VkDeviceSize dst_offset;
|
||||
VkDeviceSize stride;
|
||||
VkQueryResultFlags flags;
|
||||
} copy_query_results;
|
||||
};
|
||||
};
|
||||
|
||||
struct pvr_render_target {
|
||||
struct pvr_rt_dataset *rt_dataset;
|
||||
|
||||
|
|
@ -1584,10 +1619,18 @@ VkResult pvr_device_tile_buffer_ensure_cap(struct pvr_device *device,
|
|||
uint32_t capacity,
|
||||
uint32_t size_in_bytes);
|
||||
|
||||
VkResult pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer,
|
||||
const void *const data,
|
||||
const size_t size,
|
||||
struct pvr_bo **const pvr_bo_out);
|
||||
|
||||
VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
|
||||
enum pvr_sub_cmd_type type);
|
||||
VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer);
|
||||
|
||||
void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
|
||||
struct pvr_sub_cmd_compute *const sub_cmd,
|
||||
bool deallocate_shareds);
|
||||
void pvr_compute_update_shared_private(
|
||||
struct pvr_cmd_buffer *cmd_buffer,
|
||||
struct pvr_sub_cmd_compute *const sub_cmd,
|
||||
|
|
@ -1608,6 +1651,9 @@ VkResult pvr_pds_compute_shader_create_and_upload(
|
|||
VkResult pvr_device_create_compute_query_programs(struct pvr_device *device);
|
||||
void pvr_device_destroy_compute_query_programs(struct pvr_device *device);
|
||||
|
||||
VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
|
||||
const struct pvr_query_info *query_info);
|
||||
|
||||
#define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \
|
||||
VK_FROM_HANDLE(__pvr_type, __name, __handle)
|
||||
|
||||
|
|
@ -1736,6 +1782,18 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_render_pass,
|
|||
} \
|
||||
} while (false)
|
||||
|
||||
#define PVR_WRITE(_buffer, _value, _offset, _max) \
|
||||
do { \
|
||||
__typeof__(_value) __value = _value; \
|
||||
uint64_t __offset = _offset; \
|
||||
uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \
|
||||
static_assert(__same_type(*_buffer, __value), \
|
||||
"Buffer and value type mismatch"); \
|
||||
assert((__offset + __nr_dwords) <= (_max)); \
|
||||
assert((__offset % __nr_dwords) == 0U); \
|
||||
_buffer[__offset / __nr_dwords] = __value; \
|
||||
} while (0)
|
||||
|
||||
/* A non-fatal assert. Useful for debugging. */
|
||||
#ifdef DEBUG
|
||||
# define pvr_assert(x) \
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@
|
|||
#include "pvr_tex_state.h"
|
||||
#include "vk_alloc.h"
|
||||
#include "vk_command_pool.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
static inline void pvr_init_primary_compute_pds_program(
|
||||
struct pvr_pds_compute_shader_program *program)
|
||||
|
|
@ -207,6 +208,147 @@ err_free_usc_bo:
|
|||
return result;
|
||||
}
|
||||
|
||||
/* TODO: See if we can dedup this with pvr_setup_descriptor_mappings() or
|
||||
* pvr_setup_descriptor_mappings().
|
||||
*/
|
||||
static VkResult pvr_write_compute_query_pds_data_section(
|
||||
struct pvr_cmd_buffer *cmd_buffer,
|
||||
const struct pvr_compute_query_shader *query_prog,
|
||||
struct pvr_private_compute_pipeline *pipeline)
|
||||
{
|
||||
const struct pvr_pds_info *const info = &query_prog->info;
|
||||
const uint8_t *entries;
|
||||
uint32_t *dword_buffer;
|
||||
uint64_t *qword_buffer;
|
||||
struct pvr_bo *pvr_bo;
|
||||
VkResult result;
|
||||
|
||||
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
|
||||
cmd_buffer->device->heaps.pds_heap,
|
||||
info->data_size_in_dwords << 2,
|
||||
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
|
||||
&pvr_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
dword_buffer = (uint32_t *)pvr_bo->bo->map;
|
||||
qword_buffer = (uint64_t *)pvr_bo->bo->map;
|
||||
|
||||
entries = (uint8_t *)info->entries;
|
||||
|
||||
/* TODO: Remove this when we can test this path and make sure that this is
|
||||
* not needed. If it's needed we should probably be using LITERAL entries for
|
||||
* this instead.
|
||||
*/
|
||||
memset(dword_buffer, 0xFE, info->data_size_in_dwords << 2);
|
||||
|
||||
pipeline->pds_shared_update_data_size_dw = info->data_size_in_dwords;
|
||||
|
||||
for (uint32_t i = 0; i < info->entry_count; i++) {
|
||||
const struct pvr_const_map_entry *const entry_header =
|
||||
(struct pvr_const_map_entry *)entries;
|
||||
|
||||
switch (entry_header->type) {
|
||||
case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
|
||||
const struct pvr_const_map_entry_literal32 *const literal =
|
||||
(struct pvr_const_map_entry_literal32 *)entries;
|
||||
|
||||
PVR_WRITE(dword_buffer,
|
||||
literal->literal_value,
|
||||
literal->const_offset,
|
||||
info->data_size_in_dwords);
|
||||
|
||||
entries += sizeof(*literal);
|
||||
break;
|
||||
}
|
||||
case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL64: {
|
||||
const struct pvr_const_map_entry_literal64 *const literal =
|
||||
(struct pvr_const_map_entry_literal64 *)entries;
|
||||
|
||||
PVR_WRITE(qword_buffer,
|
||||
literal->literal_value,
|
||||
literal->const_offset,
|
||||
info->data_size_in_dwords);
|
||||
|
||||
entries += sizeof(*literal);
|
||||
break;
|
||||
}
|
||||
case PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS: {
|
||||
const struct pvr_const_map_entry_doutu_address *const doutu_addr =
|
||||
(struct pvr_const_map_entry_doutu_address *)entries;
|
||||
const pvr_dev_addr_t exec_addr =
|
||||
PVR_DEV_ADDR_OFFSET(query_prog->pds_sec_code.pvr_bo->vma->dev_addr,
|
||||
query_prog->pds_sec_code.code_offset);
|
||||
uint64_t addr = 0ULL;
|
||||
|
||||
pvr_set_usc_execution_address64(&addr, exec_addr.addr);
|
||||
|
||||
PVR_WRITE(qword_buffer,
|
||||
addr | doutu_addr->doutu_control,
|
||||
doutu_addr->const_offset,
|
||||
info->data_size_in_dwords);
|
||||
|
||||
entries += sizeof(*doutu_addr);
|
||||
break;
|
||||
}
|
||||
case PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER: {
|
||||
const struct pvr_const_map_entry_special_buffer *special_buff_entry =
|
||||
(struct pvr_const_map_entry_special_buffer *)entries;
|
||||
|
||||
switch (special_buff_entry->buffer_type) {
|
||||
case PVR_BUFFER_TYPE_COMPILE_TIME: {
|
||||
uint64_t addr = pipeline->const_buffer_addr.addr;
|
||||
|
||||
PVR_WRITE(qword_buffer,
|
||||
addr,
|
||||
special_buff_entry->const_offset,
|
||||
info->data_size_in_dwords);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Unsupported special buffer type.");
|
||||
}
|
||||
|
||||
entries += sizeof(*special_buff_entry);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("Unsupported data section map");
|
||||
}
|
||||
}
|
||||
|
||||
pipeline->pds_shared_update_data_offset =
|
||||
pvr_bo->vma->dev_addr.addr -
|
||||
cmd_buffer->device->heaps.pds_heap->base_addr.addr;
|
||||
|
||||
pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void pvr_write_private_compute_dispatch(
|
||||
struct pvr_cmd_buffer *cmd_buffer,
|
||||
struct pvr_private_compute_pipeline *pipeline,
|
||||
uint32_t num_query_indices)
|
||||
{
|
||||
struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd;
|
||||
const uint32_t workgroup_size[PVR_WORKGROUP_DIMENSIONS] = {
|
||||
DIV_ROUND_UP(num_query_indices, 32),
|
||||
1,
|
||||
1,
|
||||
};
|
||||
|
||||
assert(sub_cmd->type == PVR_SUB_CMD_TYPE_OCCLUSION_QUERY);
|
||||
|
||||
pvr_compute_update_shared_private(cmd_buffer, &sub_cmd->compute, pipeline);
|
||||
pvr_compute_update_kernel_private(cmd_buffer,
|
||||
&sub_cmd->compute,
|
||||
pipeline,
|
||||
workgroup_size);
|
||||
pvr_compute_generate_fence(cmd_buffer, &sub_cmd->compute, false);
|
||||
}
|
||||
|
||||
static void
|
||||
pvr_destroy_compute_query_program(struct pvr_device *device,
|
||||
struct pvr_compute_query_shader *program)
|
||||
|
|
@ -323,3 +465,411 @@ void pvr_device_destroy_compute_query_programs(struct pvr_device *device)
|
|||
vk_free(&device->vk.alloc, device->copy_results_shaders);
|
||||
vk_free(&device->vk.alloc, device->reset_queries_shaders);
|
||||
}
|
||||
|
||||
static void pvr_init_tex_info(const struct pvr_device_info *dev_info,
|
||||
struct pvr_texture_state_info *tex_info,
|
||||
uint32_t width,
|
||||
pvr_dev_addr_t addr)
|
||||
{
|
||||
const uint8_t *swizzle_arr = pvr_get_format_swizzle(tex_info->format);
|
||||
bool is_view_1d = !PVR_HAS_FEATURE(dev_info, tpu_extended_integer_lookup) &&
|
||||
!PVR_HAS_FEATURE(dev_info, tpu_image_state_v2);
|
||||
|
||||
*tex_info = (struct pvr_texture_state_info){
|
||||
.format = VK_FORMAT_R32_UINT,
|
||||
.mem_layout = PVR_MEMLAYOUT_LINEAR,
|
||||
.flags = PVR_TEXFLAGS_INDEX_LOOKUP,
|
||||
.type = is_view_1d ? VK_IMAGE_VIEW_TYPE_1D : VK_IMAGE_VIEW_TYPE_2D,
|
||||
.is_cube = false,
|
||||
.tex_state_type = PVR_TEXTURE_STATE_SAMPLE,
|
||||
.extent = { .width = width, .height = 1, .depth = 0 },
|
||||
.array_size = 1,
|
||||
.base_level = 0,
|
||||
.mip_levels = 1,
|
||||
.mipmaps_present = false,
|
||||
.sample_count = 1,
|
||||
.stride = width,
|
||||
.offset = 0,
|
||||
.swizzle = { [0] = swizzle_arr[0],
|
||||
[1] = swizzle_arr[1],
|
||||
[2] = swizzle_arr[2],
|
||||
[3] = swizzle_arr[3] },
|
||||
.addr = addr,
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
/* TODO: Split this function into per program type functions. */
|
||||
VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
|
||||
const struct pvr_query_info *query_info)
|
||||
{
|
||||
struct pvr_device *device = cmd_buffer->device;
|
||||
const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
|
||||
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||
const struct pvr_shader_factory_info *shader_factory_info;
|
||||
uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
|
||||
const struct pvr_compute_query_shader *query_prog;
|
||||
struct pvr_private_compute_pipeline pipeline;
|
||||
const uint32_t buffer_count = core_count;
|
||||
struct pvr_texture_state_info tex_info;
|
||||
uint32_t num_query_indices;
|
||||
uint32_t *const_buffer;
|
||||
struct pvr_bo *pvr_bo;
|
||||
VkResult result;
|
||||
|
||||
pvr_csb_pack (&sampler_state[0U], TEXSTATE_SAMPLER, reg) {
|
||||
reg.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
|
||||
reg.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
|
||||
reg.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
|
||||
reg.minfilter = PVRX(TEXSTATE_FILTER_POINT);
|
||||
reg.magfilter = PVRX(TEXSTATE_FILTER_POINT);
|
||||
reg.non_normalized_coords = true;
|
||||
reg.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
|
||||
/* clang-format on */
|
||||
|
||||
switch (query_info->type) {
|
||||
case PVR_QUERY_TYPE_AVAILABILITY_WRITE:
|
||||
/* Adds a compute shader (fenced on the last 3D) that writes a non-zero
|
||||
* value in availability_bo at every index in index_bo.
|
||||
*/
|
||||
query_prog = &device->availability_shader;
|
||||
shader_factory_info = &availability_query_write_info;
|
||||
num_query_indices = query_info->availability_write.num_query_indices;
|
||||
break;
|
||||
|
||||
case PVR_QUERY_TYPE_COPY_QUERY_RESULTS:
|
||||
/* Adds a compute shader to copy availability and query value data. */
|
||||
query_prog = &device->copy_results_shaders[buffer_count - 1];
|
||||
shader_factory_info = copy_query_results_collection[buffer_count - 1];
|
||||
num_query_indices = query_info->copy_query_results.query_count;
|
||||
break;
|
||||
|
||||
case PVR_QUERY_TYPE_RESET_QUERY_POOL:
|
||||
/* Adds a compute shader to reset availability and query value data. */
|
||||
query_prog = &device->reset_queries_shaders[buffer_count - 1];
|
||||
shader_factory_info = reset_query_collection[buffer_count - 1];
|
||||
num_query_indices = query_info->reset_query_pool.query_count;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid query type");
|
||||
}
|
||||
|
||||
result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer,
|
||||
PVR_SUB_CMD_TYPE_OCCLUSION_QUERY);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
pipeline.pds_code_offset = query_prog->pds_prim_code.code_offset;
|
||||
pipeline.pds_data_offset = query_prog->pds_prim_code.data_offset;
|
||||
|
||||
pipeline.pds_shared_update_code_offset =
|
||||
query_prog->pds_sec_code.code_offset;
|
||||
pipeline.pds_data_size_dw = query_prog->primary_data_size_dw;
|
||||
pipeline.pds_temps_used = query_prog->primary_num_temps;
|
||||
|
||||
pipeline.coeff_regs_count = shader_factory_info->coeff_regs;
|
||||
pipeline.const_shared_regs_count = shader_factory_info->const_shared_regs;
|
||||
|
||||
const_buffer = vk_alloc(&cmd_buffer->vk.pool->alloc,
|
||||
shader_factory_info->const_shared_regs << 2,
|
||||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
if (!const_buffer) {
|
||||
cmd_buffer->state.status = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
return cmd_buffer->state.status;
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
#define DRIVER_CONST(index) \
|
||||
assert(shader_factory_info->driver_const_location_map[index] < \
|
||||
shader_factory_info->const_shared_regs); \
|
||||
const_buffer[shader_factory_info->driver_const_location_map[index]]
|
||||
/* clang-format on */
|
||||
|
||||
switch (query_info->type) {
|
||||
case PVR_QUERY_TYPE_AVAILABILITY_WRITE: {
|
||||
uint64_t image_sampler_state[3][ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
|
||||
uint32_t image_sampler_idx = 0;
|
||||
|
||||
memcpy(&image_sampler_state[image_sampler_idx][0],
|
||||
&sampler_state[0],
|
||||
sizeof(sampler_state));
|
||||
image_sampler_idx++;
|
||||
|
||||
pvr_init_tex_info(dev_info,
|
||||
&tex_info,
|
||||
num_query_indices,
|
||||
query_info->availability_write.index_bo->vma->dev_addr);
|
||||
|
||||
result = pvr_pack_tex_state(device,
|
||||
&tex_info,
|
||||
&image_sampler_state[image_sampler_idx][0]);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
cmd_buffer->state.status = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
image_sampler_idx++;
|
||||
|
||||
pvr_init_tex_info(
|
||||
dev_info,
|
||||
&tex_info,
|
||||
query_info->availability_write.num_queries,
|
||||
query_info->availability_write.availability_bo->vma->dev_addr);
|
||||
|
||||
result = pvr_pack_tex_state(device,
|
||||
&tex_info,
|
||||
&image_sampler_state[image_sampler_idx][0]);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
cmd_buffer->state.status = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
image_sampler_idx++;
|
||||
|
||||
memcpy(&const_buffer[0],
|
||||
&image_sampler_state[0][0],
|
||||
sizeof(image_sampler_state));
|
||||
|
||||
/* Only PVR_QUERY_AVAILABILITY_WRITE_COUNT driver consts allowed. */
|
||||
assert(shader_factory_info->num_driver_consts ==
|
||||
PVR_QUERY_AVAILABILITY_WRITE_COUNT);
|
||||
|
||||
DRIVER_CONST(PVR_QUERY_AVAILABILITY_WRITE_INDEX_COUNT) =
|
||||
num_query_indices;
|
||||
break;
|
||||
}
|
||||
|
||||
case PVR_QUERY_TYPE_COPY_QUERY_RESULTS: {
|
||||
PVR_FROM_HANDLE(pvr_query_pool,
|
||||
pool,
|
||||
query_info->copy_query_results.query_pool);
|
||||
PVR_FROM_HANDLE(pvr_buffer,
|
||||
buffer,
|
||||
query_info->copy_query_results.dst_buffer);
|
||||
const uint32_t image_sampler_state_arr_size =
|
||||
(buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS;
|
||||
uint32_t image_sampler_idx = 0;
|
||||
pvr_dev_addr_t addr;
|
||||
uint64_t offset;
|
||||
|
||||
STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size);
|
||||
if (!image_sampler_state) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
|
||||
cmd_buffer->state.status =
|
||||
vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
return cmd_buffer->state.status;
|
||||
}
|
||||
|
||||
#define SAMPLER_ARR_2D(_arr, _i, _j) \
|
||||
_arr[_i * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS + _j]
|
||||
|
||||
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
|
||||
&sampler_state[0],
|
||||
sizeof(sampler_state));
|
||||
image_sampler_idx++;
|
||||
|
||||
offset = query_info->copy_query_results.first_query * sizeof(uint32_t);
|
||||
|
||||
addr =
|
||||
PVR_DEV_ADDR_OFFSET(pool->availability_buffer->vma->dev_addr, offset);
|
||||
|
||||
pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
|
||||
|
||||
result = pvr_pack_tex_state(
|
||||
device,
|
||||
&tex_info,
|
||||
&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0));
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
cmd_buffer->state.status = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
image_sampler_idx++;
|
||||
|
||||
for (uint32_t i = 0; i < buffer_count; i++) {
|
||||
addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->vma->dev_addr,
|
||||
offset + i * pool->result_stride);
|
||||
|
||||
pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
|
||||
|
||||
result = pvr_pack_tex_state(
|
||||
device,
|
||||
&tex_info,
|
||||
&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0));
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
cmd_buffer->state.status = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
image_sampler_idx++;
|
||||
}
|
||||
|
||||
memcpy(&const_buffer[0],
|
||||
&SAMPLER_ARR_2D(image_sampler_state, 0, 0),
|
||||
image_sampler_state_arr_size * sizeof(image_sampler_state[0]));
|
||||
|
||||
STACK_ARRAY_FINISH(image_sampler_state);
|
||||
|
||||
/* Only PVR_COPY_QUERY_POOL_RESULTS_COUNT driver consts allowed. */
|
||||
assert(shader_factory_info->num_driver_consts ==
|
||||
PVR_COPY_QUERY_POOL_RESULTS_COUNT);
|
||||
|
||||
/* Assert if no memory is bound to destination buffer. */
|
||||
assert(buffer->dev_addr.addr == 0);
|
||||
|
||||
addr = buffer->dev_addr;
|
||||
addr.addr += query_info->copy_query_results.dst_offset;
|
||||
addr.addr += query_info->copy_query_results.first_query *
|
||||
query_info->copy_query_results.stride;
|
||||
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_INDEX_COUNT) = num_query_indices;
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_LOW) = addr.addr &
|
||||
0xFFFFFFFF;
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_HIGH) = addr.addr >>
|
||||
32;
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_DEST_STRIDE) =
|
||||
query_info->copy_query_results.stride;
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_PARTIAL_RESULT_FLAG) =
|
||||
query_info->copy_query_results.flags & VK_QUERY_RESULT_PARTIAL_BIT;
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_64_BIT_FLAG) =
|
||||
query_info->copy_query_results.flags & VK_QUERY_RESULT_64_BIT;
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_WITH_AVAILABILITY_FLAG) =
|
||||
query_info->copy_query_results.flags &
|
||||
VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
|
||||
break;
|
||||
}
|
||||
|
||||
case PVR_QUERY_TYPE_RESET_QUERY_POOL: {
|
||||
PVR_FROM_HANDLE(pvr_query_pool,
|
||||
pool,
|
||||
query_info->reset_query_pool.query_pool);
|
||||
const uint32_t image_sampler_state_arr_size =
|
||||
(buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS;
|
||||
uint32_t image_sampler_idx = 0;
|
||||
pvr_dev_addr_t addr;
|
||||
uint64_t offset;
|
||||
|
||||
STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size);
|
||||
if (!image_sampler_state) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
|
||||
cmd_buffer->state.status =
|
||||
vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
return cmd_buffer->state.status;
|
||||
}
|
||||
|
||||
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
|
||||
&sampler_state[0],
|
||||
sizeof(sampler_state));
|
||||
image_sampler_idx++;
|
||||
|
||||
offset = query_info->reset_query_pool.first_query * sizeof(uint32_t);
|
||||
|
||||
for (uint32_t i = 0; i < buffer_count; i++) {
|
||||
addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->vma->dev_addr,
|
||||
offset + i * pool->result_stride);
|
||||
|
||||
pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
|
||||
|
||||
result = pvr_pack_tex_state(
|
||||
device,
|
||||
&tex_info,
|
||||
&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0));
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
cmd_buffer->state.status = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
image_sampler_idx++;
|
||||
}
|
||||
|
||||
addr =
|
||||
PVR_DEV_ADDR_OFFSET(pool->availability_buffer->vma->dev_addr, offset);
|
||||
|
||||
pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
|
||||
|
||||
result = pvr_pack_tex_state(
|
||||
device,
|
||||
&tex_info,
|
||||
&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0));
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
cmd_buffer->state.status = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
image_sampler_idx++;
|
||||
|
||||
#undef SAMPLER_ARR_2D
|
||||
|
||||
memcpy(&const_buffer[0],
|
||||
&image_sampler_state[0],
|
||||
image_sampler_state_arr_size * sizeof(image_sampler_state[0]));
|
||||
|
||||
STACK_ARRAY_FINISH(image_sampler_state);
|
||||
|
||||
/* Only PVR_RESET_QUERY_POOL_COUNT driver consts allowed. */
|
||||
assert(shader_factory_info->num_driver_consts ==
|
||||
PVR_RESET_QUERY_POOL_COUNT);
|
||||
|
||||
DRIVER_CONST(PVR_RESET_QUERY_POOL_INDEX_COUNT) = num_query_indices;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Invalid query type");
|
||||
}
|
||||
|
||||
#undef DRIVER_CONST
|
||||
|
||||
for (uint32_t i = 0; i < shader_factory_info->num_static_const; i++) {
|
||||
const struct pvr_static_buffer *load =
|
||||
&shader_factory_info->static_const_buffer[i];
|
||||
|
||||
/* Assert if static const is out of range. */
|
||||
assert(load->dst_idx < shader_factory_info->const_shared_regs);
|
||||
const_buffer[load->dst_idx] = load->value;
|
||||
}
|
||||
|
||||
result =
|
||||
pvr_cmd_buffer_upload_general(cmd_buffer,
|
||||
const_buffer,
|
||||
shader_factory_info->const_shared_regs << 2,
|
||||
&pvr_bo);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
pipeline.const_buffer_addr = pvr_bo->vma->dev_addr;
|
||||
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
|
||||
/* PDS data section for the secondary/constant upload. */
|
||||
result = pvr_write_compute_query_pds_data_section(cmd_buffer,
|
||||
query_prog,
|
||||
&pipeline);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
pipeline.workgroup_size.width = ROGUE_MAX_INSTANCES_PER_TASK;
|
||||
pipeline.workgroup_size.height = 1;
|
||||
pipeline.workgroup_size.depth = 1;
|
||||
|
||||
pvr_write_private_compute_dispatch(cmd_buffer, &pipeline, num_query_indices);
|
||||
|
||||
return pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue