diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index f7078b2313c..f89c8de4d7c 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -1579,6 +1579,7 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) state->current_sub_cmd = NULL; if (query_pool) { + struct pvr_sub_cmd_event *sub_cmd; struct pvr_query_info query_info; assert(query_indices_bo); @@ -1595,6 +1596,25 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) query_info.availability_write.availability_bo = query_pool->availability_buffer; + /* Insert a barrier after the graphics sub command and before the + * query sub command so that the availability write program waits for the + * fragment shader to complete. + */ + + result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_EVENT); + if (result != VK_SUCCESS) + return result; + + sub_cmd = &cmd_buffer->state.current_sub_cmd->event; + + *sub_cmd = (struct pvr_sub_cmd_event) { + .type = PVR_EVENT_TYPE_BARRIER, + .barrier = { + .wait_for_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT, + .wait_at_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT, + }, + }; + return pvr_add_query_program(cmd_buffer, &query_info); } diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 79dad4a1fdc..21c6a1f31ce 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -133,6 +133,7 @@ enum pvr_job_type { PVR_JOB_TYPE_FRAG, PVR_JOB_TYPE_COMPUTE, PVR_JOB_TYPE_TRANSFER, + PVR_JOB_TYPE_OCCLUSION_QUERY, PVR_JOB_TYPE_MAX }; @@ -147,6 +148,11 @@ enum pvr_pipeline_stage_bits { PVR_PIPELINE_STAGE_FRAG_BIT = BITFIELD_BIT(PVR_JOB_TYPE_FRAG), PVR_PIPELINE_STAGE_COMPUTE_BIT = BITFIELD_BIT(PVR_JOB_TYPE_COMPUTE), PVR_PIPELINE_STAGE_TRANSFER_BIT = BITFIELD_BIT(PVR_JOB_TYPE_TRANSFER), + /* Note that this doesn't map to VkPipelineStageFlagBits so be careful with + * this. + */ + PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT = + BITFIELD_BIT(PVR_JOB_TYPE_OCCLUSION_QUERY), }; #define PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS \ diff --git a/src/imagination/vulkan/pvr_query.c b/src/imagination/vulkan/pvr_query.c index eba4ec25894..8d343469f2d 100644 --- a/src/imagination/vulkan/pvr_query.c +++ b/src/imagination/vulkan/pvr_query.c @@ -237,6 +237,8 @@ void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, { PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); struct pvr_query_info query_info; + struct pvr_sub_cmd_event *sub_cmd; + VkResult result; PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); @@ -250,7 +252,50 @@ void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, query_info.copy_query_results.stride = stride; query_info.copy_query_results.flags = flags; + result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_EVENT); + if (result != VK_SUCCESS) + return; + + /* The Vulkan 1.3.231 spec says: + * + * "vkCmdCopyQueryPoolResults is considered to be a transfer operation, + * and its writes to buffer memory must be synchronized using + * VK_PIPELINE_STAGE_TRANSFER_BIT and VK_ACCESS_TRANSFER_WRITE_BIT before + * using the results." + * + */ + /* We record barrier event sub commands to sync the compute job used for the + * copy query results program with transfer jobs to prevent an overlapping + * transfer job with the compute job. + */ + + sub_cmd = &cmd_buffer->state.current_sub_cmd->event; + *sub_cmd = (struct pvr_sub_cmd_event) { + .type = PVR_EVENT_TYPE_BARRIER, + .barrier = { + .wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT, + .wait_at_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT, + }, + }; + + result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer); + if (result != VK_SUCCESS) + return; + pvr_add_query_program(cmd_buffer, &query_info); + + result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_EVENT); + if (result != VK_SUCCESS) + return; + + sub_cmd = &cmd_buffer->state.current_sub_cmd->event; + *sub_cmd = (struct pvr_sub_cmd_event) { + .type = PVR_EVENT_TYPE_BARRIER, + .barrier = { + .wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT, + .wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT, + }, + }; } void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer, diff --git a/src/imagination/vulkan/pvr_queue.c b/src/imagination/vulkan/pvr_queue.c index ae253b5c0a3..b22d1d21bee 100644 --- a/src/imagination/vulkan/pvr_queue.c +++ b/src/imagination/vulkan/pvr_queue.c @@ -364,6 +364,53 @@ pvr_process_transfer_cmds(struct pvr_device *device, return result; } +static VkResult pvr_process_occlusion_query_cmd( + struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_sub_cmd_compute *sub_cmd, + struct vk_sync *barrier, + struct vk_sync **waits, + uint32_t wait_count, + uint32_t *stage_flags, + struct vk_sync *completions[static PVR_JOB_TYPE_MAX]) +{ + struct vk_sync *sync; + VkResult result; + + /* TODO: Currently we add barrier event sub commands to handle the sync + * necessary for the different occlusion query types. Would we get any speed + * up in processing the queue by doing that sync here without using event sub + * commands? + */ + + result = vk_sync_create(&device->vk, + &device->pdevice->ws->syncobj_type, + 0U, + 0UL, + &sync); + if (result != VK_SUCCESS) + return result; + + result = pvr_compute_job_submit(queue->query_ctx, + sub_cmd, + barrier, + waits, + wait_count, + stage_flags, + sync); + if (result != VK_SUCCESS) { + vk_sync_destroy(&device->vk, sync); + return result; + } + + if (completions[PVR_JOB_TYPE_OCCLUSION_QUERY]) + vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_OCCLUSION_QUERY]); + + completions[PVR_JOB_TYPE_OCCLUSION_QUERY] = sync; + + return result; +} + static VkResult pvr_process_event_cmd_barrier( struct pvr_device *device, struct pvr_sub_cmd_event *sub_cmd, @@ -679,6 +726,10 @@ static VkResult pvr_process_cmd_buffer( sub_cmd, &cmd_buffer->sub_cmds, link) { + /* TODO: Process PVR_SUB_COMMAND_FLAG_WAIT_ON_PREVIOUS_FRAG and + * PVR_SUB_COMMAND_FLAG_OCCLUSION_QUERY flags. + */ + switch (sub_cmd->type) { case PVR_SUB_CMD_TYPE_GRAPHICS: result = pvr_process_graphics_cmd(device, @@ -716,7 +767,15 @@ static VkResult pvr_process_cmd_buffer( break; case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY: - pvr_finishme("Add support to occlusion query."); + result = pvr_process_occlusion_query_cmd( + device, + queue, + &sub_cmd->compute, + barriers[PVR_JOB_TYPE_OCCLUSION_QUERY], + waits, + wait_count, + stage_flags, + per_cmd_buffer_syncobjs); break; case PVR_SUB_CMD_TYPE_EVENT: