diff --git a/src/imagination/vulkan/pvr_job_compute.c b/src/imagination/vulkan/pvr_job_compute.c index ef8d2275125..2f81499df22 100644 --- a/src/imagination/vulkan/pvr_job_compute.c +++ b/src/imagination/vulkan/pvr_job_compute.c @@ -179,10 +179,7 @@ pvr_submit_info_flags_init(const struct pvr_device_info *const dev_info, static void pvr_compute_job_ws_submit_info_init( struct pvr_compute_ctx *ctx, struct pvr_sub_cmd_compute *sub_cmd, - struct vk_sync *barrier, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, + struct vk_sync *wait, struct pvr_winsys_compute_submit_info *submit_info) { const struct pvr_device *const device = ctx->device; @@ -193,11 +190,7 @@ static void pvr_compute_job_ws_submit_info_init( submit_info->frame_num = device->global_queue_present_count; submit_info->job_num = device->global_cmd_buffer_submit_count; - submit_info->barrier = barrier; - - submit_info->waits = waits; - submit_info->wait_count = wait_count; - submit_info->stage_flags = stage_flags; + submit_info->wait = wait; pvr_submit_info_stream_init(ctx, sub_cmd, submit_info); pvr_submit_info_ext_stream_init(ctx, submit_info); @@ -206,22 +199,13 @@ static void pvr_compute_job_ws_submit_info_init( VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx, struct pvr_sub_cmd_compute *sub_cmd, - struct vk_sync *barrier, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, + struct vk_sync *wait, struct vk_sync *signal_sync) { struct pvr_winsys_compute_submit_info submit_info; struct pvr_device *device = ctx->device; - pvr_compute_job_ws_submit_info_init(ctx, - sub_cmd, - barrier, - waits, - wait_count, - stage_flags, - &submit_info); + pvr_compute_job_ws_submit_info_init(ctx, sub_cmd, wait, &submit_info); if (PVR_IS_DEBUG_SET(DUMP_CONTROL_STREAM)) { pvr_csb_dump(&sub_cmd->control_stream, diff --git a/src/imagination/vulkan/pvr_job_compute.h b/src/imagination/vulkan/pvr_job_compute.h index 72dfcd0c39a..ed715d51410 100644 --- a/src/imagination/vulkan/pvr_job_compute.h +++ b/src/imagination/vulkan/pvr_job_compute.h @@ -33,10 +33,7 @@ struct vk_sync; VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx, struct pvr_sub_cmd_compute *sub_cmd, - struct vk_sync *barrier, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, + struct vk_sync *wait, struct vk_sync *signal_sync); #endif /* PVR_JOB_COMPUTE_H */ diff --git a/src/imagination/vulkan/pvr_job_render.c b/src/imagination/vulkan/pvr_job_render.c index 32f49cbc718..a5baa3858d5 100644 --- a/src/imagination/vulkan/pvr_job_render.c +++ b/src/imagination/vulkan/pvr_job_render.c @@ -1279,10 +1279,13 @@ static void pvr_geom_state_flags_init(const struct pvr_render_job *const job, static void pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx, struct pvr_render_job *job, + struct vk_sync *wait, struct pvr_winsys_geometry_state *state) { pvr_geom_state_stream_init(ctx, job, state); pvr_geom_state_stream_ext_init(ctx, job, state); + + state->wait = wait; pvr_geom_state_flags_init(job, &state->flags); } @@ -1687,21 +1690,21 @@ static void pvr_frag_state_flags_init(const struct pvr_render_job *const job, static void pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx, struct pvr_render_job *job, + struct vk_sync *wait, struct pvr_winsys_fragment_state *state) { pvr_frag_state_stream_init(ctx, job, state); pvr_frag_state_stream_ext_init(ctx, job, state); + + state->wait = wait; pvr_frag_state_flags_init(job, &state->flags); } static void pvr_render_job_ws_submit_info_init( struct pvr_render_ctx *ctx, struct pvr_render_job *job, - struct vk_sync *barrier_geom, - struct vk_sync *barrier_frag, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, + struct vk_sync *wait_geom, + struct vk_sync *wait_frag, struct pvr_winsys_render_submit_info *submit_info) { memset(submit_info, 0, sizeof(*submit_info)); @@ -1712,29 +1715,25 @@ static void pvr_render_job_ws_submit_info_init( submit_info->frame_num = ctx->device->global_queue_present_count; submit_info->job_num = ctx->device->global_cmd_buffer_submit_count; - submit_info->barrier_geom = barrier_geom; - - submit_info->waits = waits; - submit_info->wait_count = wait_count; - submit_info->stage_flags = stage_flags; - - pvr_render_job_ws_geometry_state_init(ctx, job, &submit_info->geometry); + pvr_render_job_ws_geometry_state_init(ctx, + job, + wait_geom, + &submit_info->geometry); if (job->run_frag) { submit_info->run_frag = true; - submit_info->barrier_frag = barrier_frag; - pvr_render_job_ws_fragment_state_init(ctx, job, &submit_info->fragment); + pvr_render_job_ws_fragment_state_init(ctx, + job, + wait_frag, + &submit_info->fragment); } } VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx, struct pvr_render_job *job, - struct vk_sync *barrier_geom, - struct vk_sync *barrier_frag, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, + struct vk_sync *wait_geom, + struct vk_sync *wait_frag, struct vk_sync *signal_sync_geom, struct vk_sync *signal_sync_frag) { @@ -1745,11 +1744,8 @@ VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx, pvr_render_job_ws_submit_info_init(ctx, job, - barrier_geom, - barrier_frag, - waits, - wait_count, - stage_flags, + wait_geom, + wait_frag, &submit_info); if (PVR_IS_DEBUG_SET(DUMP_CONTROL_STREAM)) { diff --git a/src/imagination/vulkan/pvr_job_render.h b/src/imagination/vulkan/pvr_job_render.h index e42422205af..406aa6dfe88 100644 --- a/src/imagination/vulkan/pvr_job_render.h +++ b/src/imagination/vulkan/pvr_job_render.h @@ -129,11 +129,8 @@ void pvr_render_target_dataset_destroy(struct pvr_rt_dataset *dataset); VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx, struct pvr_render_job *job, - struct vk_sync *barrier_geom, - struct vk_sync *barrier_frag, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, + struct vk_sync *wait_geom, + struct vk_sync *wait_frag, struct vk_sync *signal_sync_geom, struct vk_sync *signal_sync_frag); diff --git a/src/imagination/vulkan/pvr_job_transfer.c b/src/imagination/vulkan/pvr_job_transfer.c index c4d6c1ba7dd..a0c2e793c99 100644 --- a/src/imagination/vulkan/pvr_job_transfer.c +++ b/src/imagination/vulkan/pvr_job_transfer.c @@ -39,37 +39,18 @@ VkResult pvr_transfer_job_submit(struct pvr_device *device, struct pvr_transfer_ctx *ctx, struct pvr_sub_cmd_transfer *sub_cmd, - struct vk_sync *barrier, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, + struct vk_sync *wait_sync, struct vk_sync *signal_sync) { - /* Wait for transfer semaphores here before doing any transfers. */ - for (uint32_t i = 0U; i < wait_count; i++) { - if (stage_flags[i] & PVR_PIPELINE_STAGE_TRANSFER_BIT) { - VkResult result = vk_sync_wait(&device->vk, - waits[i], - 0U, - VK_SYNC_WAIT_COMPLETE, - UINT64_MAX); - if (result != VK_SUCCESS) - return result; + VkResult result; - stage_flags[i] &= ~PVR_PIPELINE_STAGE_TRANSFER_BIT; - } - } - - - if (barrier) { - VkResult result = vk_sync_wait(&device->vk, - barrier, - 0U, - VK_SYNC_WAIT_COMPLETE, - UINT64_MAX); - if (result != VK_SUCCESS) - return result; - } + result = vk_sync_wait(&device->vk, + wait_sync, + 0U, + VK_SYNC_WAIT_COMPLETE, + UINT64_MAX); + if (result != VK_SUCCESS) + return result; list_for_each_entry_safe (struct pvr_transfer_cmd, transfer_cmd, diff --git a/src/imagination/vulkan/pvr_job_transfer.h b/src/imagination/vulkan/pvr_job_transfer.h index 4550374efd9..0084969c061 100644 --- a/src/imagination/vulkan/pvr_job_transfer.h +++ b/src/imagination/vulkan/pvr_job_transfer.h @@ -35,10 +35,7 @@ struct vk_sync; VkResult pvr_transfer_job_submit(struct pvr_device *device, struct pvr_transfer_ctx *ctx, struct pvr_sub_cmd_transfer *sub_cmd, - struct vk_sync *barrier, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, + struct vk_sync *wait, struct vk_sync *signal_sync); #endif /* PVR_JOB_TRANSFER_H */ diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 8dfbbae644e..abff0008c46 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -131,14 +131,8 @@ struct pvr_queue { struct pvr_compute_ctx *query_ctx; struct pvr_transfer_ctx *transfer_ctx; - struct vk_sync *completion[PVR_JOB_TYPE_MAX]; - - /* Used to setup a job dependency from jobs previously submitted, onto - * the next job per job type. - * - * Used to create dependencies for pipeline barriers. - */ - struct vk_sync *job_dependancy[PVR_JOB_TYPE_MAX]; + struct vk_sync *last_job_signal_sync[PVR_JOB_TYPE_MAX]; + struct vk_sync *next_job_wait_sync[PVR_JOB_TYPE_MAX]; }; struct pvr_vertex_binding { @@ -279,6 +273,8 @@ struct pvr_device { VkPhysicalDeviceFeatures features; struct pvr_bo_store *bo_store; + + struct vk_sync *presignaled_sync; }; struct pvr_device_memory { diff --git a/src/imagination/vulkan/pvr_queue.c b/src/imagination/vulkan/pvr_queue.c index e6d69d2cb55..00acb32cdee 100644 --- a/src/imagination/vulkan/pvr_queue.c +++ b/src/imagination/vulkan/pvr_queue.c @@ -54,6 +54,9 @@ #include "vk_sync_dummy.h" #include "vk_util.h" +static VkResult pvr_driver_queue_submit(struct vk_queue *queue, + struct vk_queue_submit *submit); + static VkResult pvr_queue_init(struct pvr_device *device, struct pvr_queue *queue, const VkDeviceQueueCreateInfo *pCreateInfo, @@ -101,6 +104,8 @@ static VkResult pvr_queue_init(struct pvr_device *device, queue->query_ctx = query_ctx; queue->transfer_ctx = transfer_ctx; + queue->vk.driver_submit = pvr_driver_queue_submit; + return VK_SUCCESS; err_query_ctx_destroy: @@ -157,14 +162,14 @@ err_queues_finish: static void pvr_queue_finish(struct pvr_queue *queue) { - for (uint32_t i = 0; i < ARRAY_SIZE(queue->job_dependancy); i++) { - if (queue->job_dependancy[i]) - vk_sync_destroy(&queue->device->vk, queue->job_dependancy[i]); + for (uint32_t i = 0; i < ARRAY_SIZE(queue->next_job_wait_sync); i++) { + if (queue->next_job_wait_sync[i]) + vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]); } - for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++) { - if (queue->completion[i]) - vk_sync_destroy(&queue->device->vk, queue->completion[i]); + for (uint32_t i = 0; i < ARRAY_SIZE(queue->last_job_signal_sync); i++) { + if (queue->last_job_signal_sync[i]) + vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]); } pvr_render_ctx_destroy(queue->gfx_ctx); @@ -183,184 +188,32 @@ void pvr_queues_destroy(struct pvr_device *device) vk_free(&device->vk.alloc, device->queues); } -VkResult pvr_QueueWaitIdle(VkQueue _queue) +static VkResult pvr_process_graphics_cmd(struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_cmd_buffer *cmd_buffer, + struct pvr_sub_cmd_gfx *sub_cmd) { - PVR_FROM_HANDLE(pvr_queue, queue, _queue); - - for (int i = 0U; i < ARRAY_SIZE(queue->completion); i++) { - VkResult result; - - if (!queue->completion[i]) - continue; - - result = vk_sync_wait(&queue->device->vk, - queue->completion[i], - 0U, - VK_SYNC_WAIT_COMPLETE, - UINT64_MAX); - if (result != VK_SUCCESS) - return result; - } - - return VK_SUCCESS; -} - -static VkResult -pvr_process_graphics_cmd_part(struct pvr_device *const device, - struct pvr_render_ctx *const gfx_ctx, - struct pvr_render_job *const job, - struct vk_sync *const geom_barrier, - struct vk_sync *const frag_barrier, - struct vk_sync **const geom_completion, - struct vk_sync **const frag_completion, - struct vk_sync **const waits, - const uint32_t wait_count, - uint32_t *const stage_flags) -{ - struct vk_sync *geom_sync = NULL; - struct vk_sync *frag_sync = NULL; + pvr_dev_addr_t original_ctrl_stream_addr = { 0 }; + struct vk_sync *geom_signal_sync; + struct vk_sync *frag_signal_sync; VkResult result; - /* For each of geom and frag, a completion sync is optional but only allowed - * iff barrier is present. - */ - assert(geom_barrier || !geom_completion); - assert(frag_barrier || !frag_completion); - - if (geom_barrier) { - result = vk_sync_create(&device->vk, - &device->pdevice->ws->syncobj_type, - 0U, - 0UL, - &geom_sync); - if (result != VK_SUCCESS) - goto err_out; - } - - if (frag_barrier) { - result = vk_sync_create(&device->vk, - &device->pdevice->ws->syncobj_type, - 0U, - 0UL, - &frag_sync); - if (result != VK_SUCCESS) - goto err_destroy_sync_geom; - } - - result = pvr_render_job_submit(gfx_ctx, - job, - geom_barrier, - frag_barrier, - waits, - wait_count, - stage_flags, - geom_sync, - frag_sync); - if (result != VK_SUCCESS) - goto err_destroy_sync_frag; - - /* Replace the completion fences. */ - if (geom_sync) { - if (*geom_completion) - vk_sync_destroy(&device->vk, *geom_completion); - - *geom_completion = geom_sync; - } - - if (frag_sync) { - if (*frag_completion) - vk_sync_destroy(&device->vk, *frag_completion); - - *frag_completion = frag_sync; - } - - return VK_SUCCESS; - -err_destroy_sync_frag: - if (frag_sync) - vk_sync_destroy(&device->vk, frag_sync); - -err_destroy_sync_geom: - if (geom_sync) - vk_sync_destroy(&device->vk, geom_sync); - -err_out: - return result; -} - -static VkResult -pvr_process_split_graphics_cmd(struct pvr_device *const device, - struct pvr_render_ctx *const gfx_ctx, - struct pvr_sub_cmd_gfx *sub_cmd, - struct vk_sync *const geom_barrier, - struct vk_sync *const frag_barrier, - struct vk_sync **const geom_completion, - struct vk_sync **const frag_completion, - struct vk_sync **const waits, - const uint32_t wait_count, - uint32_t *const stage_flags) -{ - struct pvr_render_job *const job = &sub_cmd->job; - const pvr_dev_addr_t original_ctrl_stream_addr = job->ctrl_stream_addr; - const bool original_geometry_terminate = job->geometry_terminate; - const bool original_run_frag = job->run_frag; - VkResult result; - - /* First submit must not touch fragment work. */ - job->geometry_terminate = false; - job->run_frag = false; - - result = pvr_process_graphics_cmd_part(device, - gfx_ctx, - job, - geom_barrier, - NULL, - geom_completion, - NULL, - waits, - wait_count, - stage_flags); - - job->geometry_terminate = original_geometry_terminate; - job->run_frag = original_run_frag; - + result = vk_sync_create(&device->vk, + &device->pdevice->ws->syncobj_type, + 0U, + 0UL, + &geom_signal_sync); if (result != VK_SUCCESS) return result; - /* Second submit contains only a trivial control stream to terminate the - * geometry work. - */ - assert(sub_cmd->terminate_ctrl_stream); - job->ctrl_stream_addr = sub_cmd->terminate_ctrl_stream->vma->dev_addr; + result = vk_sync_create(&device->vk, + &device->pdevice->ws->syncobj_type, + 0U, + 0UL, + &frag_signal_sync); + if (result != VK_SUCCESS) + goto err_destroy_geom_sync; - result = pvr_process_graphics_cmd_part(device, - gfx_ctx, - job, - NULL, - frag_barrier, - NULL, - frag_completion, - waits, - wait_count, - stage_flags); - - job->ctrl_stream_addr = original_ctrl_stream_addr; - - return result; -} - -static VkResult -pvr_process_graphics_cmd(struct pvr_device *device, - struct pvr_queue *queue, - struct pvr_cmd_buffer *cmd_buffer, - struct pvr_sub_cmd_gfx *sub_cmd, - struct vk_sync *barrier_geom, - struct vk_sync *barrier_frag, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, - struct vk_sync *completions[static PVR_JOB_TYPE_MAX]) -{ /* FIXME: DoShadowLoadOrStore() */ /* Perform two render submits when using multiple framebuffer layers. The @@ -370,41 +223,83 @@ pvr_process_graphics_cmd(struct pvr_device *device, * could result in missing primitives. */ if (pvr_sub_cmd_gfx_requires_split_submit(sub_cmd)) { - return pvr_process_split_graphics_cmd(device, - queue->gfx_ctx, - sub_cmd, - barrier_geom, - barrier_frag, - &completions[PVR_JOB_TYPE_GEOM], - &completions[PVR_JOB_TYPE_FRAG], - waits, - wait_count, - stage_flags); + /* If fragment work shouldn't be run there's no need for a split, + * and if geometry_terminate is false this kick can't have a fragment + * stage without another terminating geometry kick. + */ + assert(sub_cmd->job.geometry_terminate && sub_cmd->job.run_frag); + + /* First submit must not touch fragment work. */ + sub_cmd->job.geometry_terminate = false; + sub_cmd->job.run_frag = false; + + result = + pvr_render_job_submit(queue->gfx_ctx, + &sub_cmd->job, + queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM], + NULL, + NULL, + NULL); + + sub_cmd->job.geometry_terminate = true; + sub_cmd->job.run_frag = true; + + if (result != VK_SUCCESS) + goto err_destroy_frag_sync; + + original_ctrl_stream_addr = sub_cmd->job.ctrl_stream_addr; + + /* Second submit contains only a trivial control stream to terminate the + * geometry work. + */ + assert(sub_cmd->terminate_ctrl_stream); + sub_cmd->job.ctrl_stream_addr = + sub_cmd->terminate_ctrl_stream->vma->dev_addr; } - return pvr_process_graphics_cmd_part(device, - queue->gfx_ctx, - &sub_cmd->job, - barrier_geom, - barrier_frag, - &completions[PVR_JOB_TYPE_GEOM], - &completions[PVR_JOB_TYPE_FRAG], - waits, - wait_count, - stage_flags); + result = pvr_render_job_submit(queue->gfx_ctx, + &sub_cmd->job, + queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM], + queue->next_job_wait_sync[PVR_JOB_TYPE_FRAG], + geom_signal_sync, + frag_signal_sync); + + if (original_ctrl_stream_addr.addr > 0) + sub_cmd->job.ctrl_stream_addr = original_ctrl_stream_addr; + + if (result != VK_SUCCESS) + goto err_destroy_frag_sync; + + /* Replace the completion fences. */ + if (queue->last_job_signal_sync[PVR_JOB_TYPE_GEOM]) { + vk_sync_destroy(&device->vk, + queue->last_job_signal_sync[PVR_JOB_TYPE_GEOM]); + } + + queue->last_job_signal_sync[PVR_JOB_TYPE_GEOM] = geom_signal_sync; + + if (queue->last_job_signal_sync[PVR_JOB_TYPE_FRAG]) { + vk_sync_destroy(&device->vk, + queue->last_job_signal_sync[PVR_JOB_TYPE_FRAG]); + } + + queue->last_job_signal_sync[PVR_JOB_TYPE_FRAG] = frag_signal_sync; /* FIXME: DoShadowLoadOrStore() */ + + return VK_SUCCESS; + +err_destroy_frag_sync: + vk_sync_destroy(&device->vk, frag_signal_sync); +err_destroy_geom_sync: + vk_sync_destroy(&device->vk, geom_signal_sync); + + return result; } -static VkResult -pvr_process_compute_cmd(struct pvr_device *device, - struct pvr_queue *queue, - struct pvr_sub_cmd_compute *sub_cmd, - struct vk_sync *barrier, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, - struct vk_sync *completions[static PVR_JOB_TYPE_MAX]) +static VkResult pvr_process_compute_cmd(struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_sub_cmd_compute *sub_cmd) { struct vk_sync *sync; VkResult result; @@ -417,36 +312,30 @@ pvr_process_compute_cmd(struct pvr_device *device, if (result != VK_SUCCESS) return result; - result = pvr_compute_job_submit(queue->compute_ctx, - sub_cmd, - barrier, - waits, - wait_count, - stage_flags, - sync); + result = + pvr_compute_job_submit(queue->compute_ctx, + sub_cmd, + queue->next_job_wait_sync[PVR_JOB_TYPE_COMPUTE], + sync); if (result != VK_SUCCESS) { vk_sync_destroy(&device->vk, sync); return result; } - /* Replace the completion fences. */ - if (completions[PVR_JOB_TYPE_COMPUTE]) - vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_COMPUTE]); + /* Replace the signal fence. */ + if (queue->last_job_signal_sync[PVR_JOB_TYPE_COMPUTE]) { + vk_sync_destroy(&device->vk, + queue->last_job_signal_sync[PVR_JOB_TYPE_COMPUTE]); + } - completions[PVR_JOB_TYPE_COMPUTE] = sync; + queue->last_job_signal_sync[PVR_JOB_TYPE_COMPUTE] = sync; return result; } -static VkResult -pvr_process_transfer_cmds(struct pvr_device *device, - struct pvr_queue *queue, - struct pvr_sub_cmd_transfer *sub_cmd, - struct vk_sync *barrier, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, - struct vk_sync *completions[static PVR_JOB_TYPE_MAX]) +static VkResult pvr_process_transfer_cmds(struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_sub_cmd_transfer *sub_cmd) { struct vk_sync *sync; VkResult result; @@ -459,37 +348,32 @@ pvr_process_transfer_cmds(struct pvr_device *device, if (result != VK_SUCCESS) return result; - result = pvr_transfer_job_submit(device, - queue->transfer_ctx, - sub_cmd, - barrier, - waits, - wait_count, - stage_flags, - sync); + result = + pvr_transfer_job_submit(device, + queue->transfer_ctx, + sub_cmd, + queue->next_job_wait_sync[PVR_JOB_TYPE_TRANSFER], + sync); if (result != VK_SUCCESS) { vk_sync_destroy(&device->vk, sync); return result; } - /* Replace the completion fences. */ - if (completions[PVR_JOB_TYPE_TRANSFER]) - vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_TRANSFER]); + /* Replace the signal syncs. */ + if (queue->last_job_signal_sync[PVR_JOB_TYPE_TRANSFER]) { + vk_sync_destroy(&device->vk, + queue->last_job_signal_sync[PVR_JOB_TYPE_TRANSFER]); + } - completions[PVR_JOB_TYPE_TRANSFER] = sync; + queue->last_job_signal_sync[PVR_JOB_TYPE_TRANSFER] = sync; return result; } -static VkResult pvr_process_occlusion_query_cmd( - struct pvr_device *device, - struct pvr_queue *queue, - struct pvr_sub_cmd_compute *sub_cmd, - struct vk_sync *barrier, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, - struct vk_sync *completions[static PVR_JOB_TYPE_MAX]) +static VkResult +pvr_process_occlusion_query_cmd(struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_sub_cmd_compute *sub_cmd) { struct vk_sync *sync; VkResult result; @@ -508,42 +392,35 @@ static VkResult pvr_process_occlusion_query_cmd( if (result != VK_SUCCESS) return result; - result = pvr_compute_job_submit(queue->query_ctx, - sub_cmd, - barrier, - waits, - wait_count, - stage_flags, - sync); + result = pvr_compute_job_submit( + queue->query_ctx, + sub_cmd, + queue->next_job_wait_sync[PVR_JOB_TYPE_OCCLUSION_QUERY], + sync); if (result != VK_SUCCESS) { vk_sync_destroy(&device->vk, sync); return result; } - if (completions[PVR_JOB_TYPE_OCCLUSION_QUERY]) - vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_OCCLUSION_QUERY]); + if (queue->last_job_signal_sync[PVR_JOB_TYPE_OCCLUSION_QUERY]) { + vk_sync_destroy( + &device->vk, + queue->last_job_signal_sync[PVR_JOB_TYPE_OCCLUSION_QUERY]); + } - completions[PVR_JOB_TYPE_OCCLUSION_QUERY] = sync; + queue->last_job_signal_sync[PVR_JOB_TYPE_OCCLUSION_QUERY] = sync; return result; } -static VkResult pvr_process_event_cmd_barrier( - struct pvr_device *device, - struct pvr_sub_cmd_event *sub_cmd, - struct vk_sync *barriers[static PVR_JOB_TYPE_MAX], - struct vk_sync *per_cmd_buffer_syncobjs[static PVR_JOB_TYPE_MAX], - struct vk_sync *per_submit_syncobjs[static PVR_JOB_TYPE_MAX], - struct vk_sync *queue_syncobjs[static PVR_JOB_TYPE_MAX], - struct vk_sync *previous_queue_syncobjs[static PVR_JOB_TYPE_MAX]) +static VkResult pvr_process_event_cmd_barrier(struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_sub_cmd_event *sub_cmd) { const uint32_t src_mask = sub_cmd->barrier.wait_for_stage_mask; const uint32_t dst_mask = sub_cmd->barrier.wait_at_stage_mask; - const bool in_render_pass = sub_cmd->barrier.in_render_pass; - struct vk_sync *new_barriers[PVR_JOB_TYPE_MAX] = { 0 }; - struct vk_sync *completions[PVR_JOB_TYPE_MAX] = { 0 }; - struct vk_sync *src_syncobjs[PVR_JOB_TYPE_MAX]; - uint32_t src_syncobj_count = 0; + struct vk_sync_wait wait_syncs[PVR_JOB_TYPE_MAX + 1]; + uint32_t src_wait_count = 0; VkResult result; assert(sub_cmd->type == PVR_EVENT_TYPE_BARRIER); @@ -551,154 +428,79 @@ static VkResult pvr_process_event_cmd_barrier( assert(!(src_mask & ~PVR_PIPELINE_STAGE_ALL_BITS)); assert(!(dst_mask & ~PVR_PIPELINE_STAGE_ALL_BITS)); - /* TODO: We're likely over synchronizing here, but the kernel doesn't - * guarantee that jobs submitted on a context will execute and complete in - * order, even though in practice they will, so we play it safe and don't - * make any assumptions. If the kernel starts to offer this guarantee then - * remove the extra dependencies being added here. - */ - u_foreach_bit (stage, src_mask) { - struct vk_sync *syncobj; - - syncobj = per_cmd_buffer_syncobjs[stage]; - - if (!in_render_pass & !syncobj) { - if (per_submit_syncobjs[stage]) - syncobj = per_submit_syncobjs[stage]; - else if (queue_syncobjs[stage]) - syncobj = queue_syncobjs[stage]; - else if (previous_queue_syncobjs[stage]) - syncobj = previous_queue_syncobjs[stage]; + if (queue->last_job_signal_sync[stage]) { + wait_syncs[src_wait_count++] = (struct vk_sync_wait){ + .sync = queue->last_job_signal_sync[stage], + .stage_mask = ~(VkPipelineStageFlags2)0, + .wait_value = 0, + }; } - - if (!syncobj) - continue; - - src_syncobjs[src_syncobj_count++] = syncobj; } /* No previous src jobs that need finishing so no need for a barrier. */ - if (src_syncobj_count == 0) + if (src_wait_count == 0) return VK_SUCCESS; u_foreach_bit (stage, dst_mask) { - struct vk_sync *completion; + uint32_t wait_count = src_wait_count; + struct vk_sync_signal signal; + struct vk_sync *signal_sync; result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, 0UL, - &completion); + &signal_sync); if (result != VK_SUCCESS) - goto err_destroy_completions; + return result; - result = device->ws->ops->null_job_submit(device->ws, - src_syncobjs, - src_syncobj_count, - completion); - if (result != VK_SUCCESS) { - vk_sync_destroy(&device->vk, completion); + signal = (struct vk_sync_signal){ + .sync = signal_sync, + .stage_mask = ~(VkPipelineStageFlags2)0, + .signal_value = 0, + }; - goto err_destroy_completions; + if (queue->next_job_wait_sync[stage]) { + wait_syncs[wait_count++] = (struct vk_sync_wait){ + .sync = queue->next_job_wait_sync[stage], + .stage_mask = ~(VkPipelineStageFlags2)0, + .wait_value = 0, + }; } - completions[stage] = completion; - } - - u_foreach_bit (stage, dst_mask) { - struct vk_sync *barrier_src_syncobjs[2]; - uint32_t barrier_src_syncobj_count = 0; - struct vk_sync *barrier; - VkResult result; - - assert(completions[stage]); - barrier_src_syncobjs[barrier_src_syncobj_count++] = completions[stage]; - - /* If there is a previous barrier we want to merge it with the new one. - * - * E.g. - * A , B , - * X , - * C - * Y , - * D - * - * X barriers A and B at D. Y barriers C at D. So we want to merge both - * X and Y graphics vk_sync barriers to pass to D. - * - * Note that this is the same as: - * A , B , C - * X , - * Y , - * D - * - */ - if (barriers[stage]) - barrier_src_syncobjs[barrier_src_syncobj_count++] = barriers[stage]; - - result = vk_sync_create(&device->vk, - &device->pdevice->ws->syncobj_type, - 0U, - 0UL, - &barrier); - if (result != VK_SUCCESS) - goto err_destroy_new_barriers; - result = device->ws->ops->null_job_submit(device->ws, - barrier_src_syncobjs, - barrier_src_syncobj_count, - barrier); + wait_syncs, + wait_count, + &signal); if (result != VK_SUCCESS) { - vk_sync_destroy(&device->vk, barrier); - - goto err_destroy_new_barriers; + vk_sync_destroy(&device->vk, signal_sync); + return result; } - new_barriers[stage] = barrier; - } + if (queue->next_job_wait_sync[stage]) + vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]); - u_foreach_bit (stage, dst_mask) { - if (per_cmd_buffer_syncobjs[stage]) - vk_sync_destroy(&device->vk, per_cmd_buffer_syncobjs[stage]); - - per_cmd_buffer_syncobjs[stage] = completions[stage]; - - if (barriers[stage]) - vk_sync_destroy(&device->vk, barriers[stage]); - - barriers[stage] = new_barriers[stage]; + queue->next_job_wait_sync[stage] = signal_sync; } return VK_SUCCESS; - -err_destroy_new_barriers: - u_foreach_bit (stage, dst_mask) { - if (new_barriers[stage]) - vk_sync_destroy(&device->vk, new_barriers[stage]); - } - -err_destroy_completions: - u_foreach_bit (stage, dst_mask) { - if (completions[stage]) - vk_sync_destroy(&device->vk, completions[stage]); - } - - return result; } -static VkResult pvr_process_event_cmd_set_or_reset( - struct pvr_device *device, - struct pvr_sub_cmd_event *sub_cmd, - struct vk_sync *per_cmd_buffer_syncobjs[static PVR_JOB_TYPE_MAX]) +static VkResult +pvr_process_event_cmd_set_or_reset(struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_sub_cmd_event *sub_cmd) { /* Not PVR_JOB_TYPE_MAX since that also includes * PVR_JOB_TYPE_OCCLUSION_QUERY so no stage in the src mask. */ - struct vk_sync *src_syncobjs[PVR_NUM_SYNC_PIPELINE_STAGES]; - struct vk_sync *new_event_syncobj; - uint32_t src_syncobj_count = 0; + struct vk_sync_wait waits[PVR_NUM_SYNC_PIPELINE_STAGES]; + struct vk_sync_signal signal; + struct vk_sync *signal_sync; + uint32_t wait_for_stage_mask; + uint32_t wait_count = 0; VkResult result; assert(sub_cmd->type == PVR_EVENT_TYPE_SET || @@ -712,29 +514,34 @@ static VkResult pvr_process_event_cmd_set_or_reset( assert(!(wait_for_stage_mask & ~PVR_PIPELINE_STAGE_ALL_BITS)); u_foreach_bit (stage, wait_for_stage_mask) { - if (!per_cmd_buffer_syncobjs[stage]) + if (!queue->last_job_signal_sync[stage]) continue; - src_syncobjs[src_syncobj_count++] = per_cmd_buffer_syncobjs[stage]; + waits[wait_count++] = (struct vk_sync_wait){ + .sync = queue->last_job_signal_sync[stage], + .stage_mask = ~(VkPipelineStageFlags2)0, + .wait_value = 0, + }; } - assert(src_syncobj_count <= ARRAY_SIZE(src_syncobjs)); - result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, 0UL, - &new_event_syncobj); + &signal_sync); if (result != VK_SUCCESS) return result; - result = device->ws->ops->null_job_submit(device->ws, - src_syncobjs, - src_syncobj_count, - new_event_syncobj); - if (result != VK_SUCCESS) { - vk_sync_destroy(&device->vk, new_event_syncobj); + signal = (struct vk_sync_signal){ + .sync = signal_sync, + .stage_mask = ~(VkPipelineStageFlags2)0, + .signal_value = 0, + }; + result = + device->ws->ops->null_job_submit(device->ws, waits, wait_count, &signal); + if (result != VK_SUCCESS) { + vk_sync_destroy(&device->vk, signal_sync); return result; } @@ -742,13 +549,13 @@ static VkResult pvr_process_event_cmd_set_or_reset( if (sub_cmd->set.event->sync) vk_sync_destroy(&device->vk, sub_cmd->set.event->sync); - sub_cmd->set.event->sync = new_event_syncobj; + sub_cmd->set.event->sync = signal_sync; sub_cmd->set.event->state = PVR_EVENT_STATE_SET_BY_DEVICE; } else { if (sub_cmd->reset.event->sync) vk_sync_destroy(&device->vk, sub_cmd->reset.event->sync); - sub_cmd->reset.event->sync = new_event_syncobj; + sub_cmd->reset.event->sync = signal_sync; sub_cmd->reset.event->state = PVR_EVENT_STATE_RESET_BY_DEVICE; } @@ -771,271 +578,109 @@ static VkResult pvr_process_event_cmd_set_or_reset( * \parma[in,out] per_cmd_buffer_syncobjs Completion syncobjs for the command * buffer being processed. */ -static VkResult pvr_process_event_cmd_wait( - struct pvr_device *device, - struct pvr_sub_cmd_event *sub_cmd, - struct vk_sync *barriers[static PVR_JOB_TYPE_MAX], - struct vk_sync *per_cmd_buffer_syncobjs[static PVR_JOB_TYPE_MAX]) +static VkResult pvr_process_event_cmd_wait(struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_sub_cmd_event *sub_cmd) { - /* +1 if there's a previous barrier which we need to merge. */ - struct vk_sync *new_barriers[PVR_JOB_TYPE_MAX]; - struct vk_sync *completions[PVR_JOB_TYPE_MAX]; uint32_t dst_mask = 0; + VkResult result; - STACK_ARRAY(struct vk_sync *, src_syncobjs, sub_cmd->wait.count + 1); - if (!src_syncobjs) + STACK_ARRAY(struct vk_sync_wait, waits, sub_cmd->wait.count + 1); + if (!waits) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); for (uint32_t i = 0; i < sub_cmd->wait.count; i++) dst_mask |= sub_cmd->wait.wait_at_stage_masks[i]; u_foreach_bit (stage, dst_mask) { - uint32_t src_syncobj_count = 0; - struct vk_sync *completion; - struct vk_sync *barrier; - VkResult result; - - if (barriers[stage]) - src_syncobjs[src_syncobj_count++] = barriers[stage]; + struct vk_sync_signal signal; + struct vk_sync *signal_sync; + uint32_t wait_count = 0; for (uint32_t i = 0; i < sub_cmd->wait.count; i++) { - if (sub_cmd->wait.wait_at_stage_masks[i] & stage) - src_syncobjs[src_syncobj_count++] = sub_cmd->wait.events[i]->sync; + if (sub_cmd->wait.wait_at_stage_masks[i] & stage) { + waits[wait_count++] = (struct vk_sync_wait){ + .sync = sub_cmd->wait.events[i]->sync, + .stage_mask = ~(VkPipelineStageFlags2)0, + .wait_value = 0, + }; + } } - /* Create completion. */ + if (!wait_count) + continue; + + if (queue->next_job_wait_sync[stage]) { + waits[wait_count++] = (struct vk_sync_wait){ + .sync = queue->next_job_wait_sync[stage], + .stage_mask = ~(VkPipelineStageFlags2)0, + .wait_value = 0, + }; + } + + assert(wait_count <= (sub_cmd->wait.count + 1)); result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, 0UL, - &completion); - if (result != VK_SUCCESS) { - STACK_ARRAY_FINISH(src_syncobjs); - return result; - } + &signal_sync); + if (result != VK_SUCCESS) + goto err_free_waits; + + signal = (struct vk_sync_signal){ + .sync = signal_sync, + .stage_mask = ~(VkPipelineStageFlags2)0, + .signal_value = 0, + }; result = device->ws->ops->null_job_submit(device->ws, - src_syncobjs, - src_syncobj_count, - completion); + waits, + wait_count, + &signal); if (result != VK_SUCCESS) { - vk_sync_destroy(&device->vk, completion); - STACK_ARRAY_FINISH(src_syncobjs); - return result; + vk_sync_destroy(&device->vk, signal.sync); + goto err_free_waits; } - completions[stage] = completion; + if (queue->next_job_wait_sync[stage]) + vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]); - /* Create barrier. */ - - /* We can't reuse the completion as a barrier since a barrier can be - * passed into multiple job submissions based on the dst mask while the - * completion gets replaced on each job submission so we'd end up in a - * case where the completion is replaced but other job submissions (of - * different type, i.e. different stages in the dst mask) get fed the - * freed barrier resulting in a use after free. - */ - - result = vk_sync_create(&device->vk, - &device->pdevice->ws->syncobj_type, - 0U, - 0UL, - &barrier); - if (result != VK_SUCCESS) { - vk_sync_destroy(&device->vk, completion); - STACK_ARRAY_FINISH(src_syncobjs); - return result; - } - - result = - device->ws->ops->null_job_submit(device->ws, &completion, 1, barrier); - if (result != VK_SUCCESS) { - vk_sync_destroy(&device->vk, barrier); - vk_sync_destroy(&device->vk, completion); - STACK_ARRAY_FINISH(src_syncobjs); - return result; - } - - new_barriers[stage] = barrier; + queue->next_job_wait_sync[stage] = signal.sync; } - u_foreach_bit (stage, dst_mask) { - if (per_cmd_buffer_syncobjs[stage]) - vk_sync_destroy(&device->vk, per_cmd_buffer_syncobjs[stage]); - - per_cmd_buffer_syncobjs[stage] = completions[stage]; - - if (barriers[stage]) - vk_sync_destroy(&device->vk, barriers[stage]); - - barriers[stage] = new_barriers[stage]; - } - - STACK_ARRAY_FINISH(src_syncobjs); + STACK_ARRAY_FINISH(waits); return VK_SUCCESS; + +err_free_waits: + STACK_ARRAY_FINISH(waits); + + return result; } -static VkResult pvr_process_event_cmd( - struct pvr_device *device, - struct pvr_sub_cmd_event *sub_cmd, - struct vk_sync *barriers[static PVR_JOB_TYPE_MAX], - struct vk_sync *per_cmd_buffer_syncobjs[static PVR_JOB_TYPE_MAX], - struct vk_sync *per_submit_syncobjs[static PVR_JOB_TYPE_MAX], - struct vk_sync *queue_syncobjs[static PVR_JOB_TYPE_MAX], - struct vk_sync *previous_queue_syncobjs[static PVR_JOB_TYPE_MAX]) +static VkResult pvr_process_event_cmd(struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_sub_cmd_event *sub_cmd) { switch (sub_cmd->type) { case PVR_EVENT_TYPE_SET: case PVR_EVENT_TYPE_RESET: - return pvr_process_event_cmd_set_or_reset(device, - sub_cmd, - per_cmd_buffer_syncobjs); - + return pvr_process_event_cmd_set_or_reset(device, queue, sub_cmd); case PVR_EVENT_TYPE_WAIT: - return pvr_process_event_cmd_wait(device, - sub_cmd, - barriers, - per_cmd_buffer_syncobjs); - + return pvr_process_event_cmd_wait(device, queue, sub_cmd); case PVR_EVENT_TYPE_BARRIER: - return pvr_process_event_cmd_barrier(device, - sub_cmd, - barriers, - per_cmd_buffer_syncobjs, - per_submit_syncobjs, - queue_syncobjs, - previous_queue_syncobjs); - + return pvr_process_event_cmd_barrier(device, queue, sub_cmd); default: unreachable("Invalid event sub-command type."); }; } -static VkResult -pvr_set_semaphore_payloads(struct pvr_device *device, - struct vk_sync *completions[static PVR_JOB_TYPE_MAX], - const VkSemaphore *signals, - uint32_t signal_count) +static VkResult pvr_process_cmd_buffer(struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_cmd_buffer *cmd_buffer) { - struct vk_sync *sync; VkResult result; - int fd = -1; - - result = vk_sync_create(&device->vk, - &device->pdevice->ws->syncobj_type, - 0U, - 0UL, - &sync); - if (result != VK_SUCCESS) - return result; - - result = device->ws->ops->null_job_submit(device->ws, - completions, - PVR_JOB_TYPE_MAX, - sync); - if (result != VK_SUCCESS) - goto end_set_semaphore_payloads; - - /* If we have a single signal semaphore, we can simply move merged sync's - * payload to the signal semahpore's payload. - */ - if (signal_count == 1U) { - VK_FROM_HANDLE(vk_semaphore, sem, signals[0]); - struct vk_sync *sem_sync = vk_semaphore_get_active_sync(sem); - - result = vk_sync_move(&device->vk, sem_sync, sync); - goto end_set_semaphore_payloads; - } - - result = vk_sync_export_sync_file(&device->vk, sync, &fd); - if (result != VK_SUCCESS) - goto end_set_semaphore_payloads; - - for (uint32_t i = 0U; i < signal_count; i++) { - VK_FROM_HANDLE(vk_semaphore, sem, signals[i]); - struct vk_sync *sem_sync = vk_semaphore_get_active_sync(sem); - - result = vk_sync_import_sync_file(&device->vk, sem_sync, fd); - if (result != VK_SUCCESS) - goto end_set_semaphore_payloads; - } - -end_set_semaphore_payloads: - if (fd != -1) - close(fd); - - vk_sync_destroy(&device->vk, sync); - - return result; -} - -static VkResult -pvr_set_fence_payload(struct pvr_device *device, - struct vk_sync *completions[static PVR_JOB_TYPE_MAX], - VkFence _fence) -{ - VK_FROM_HANDLE(vk_fence, fence, _fence); - struct vk_sync *fence_sync; - struct vk_sync *sync; - VkResult result; - - result = vk_sync_create(&device->vk, - &device->pdevice->ws->syncobj_type, - 0U, - 0UL, - &sync); - if (result != VK_SUCCESS) - return result; - - result = device->ws->ops->null_job_submit(device->ws, - completions, - PVR_JOB_TYPE_MAX, - sync); - if (result != VK_SUCCESS) { - vk_sync_destroy(&device->vk, sync); - return result; - } - - fence_sync = vk_fence_get_active_sync(fence); - result = vk_sync_move(&device->vk, fence_sync, sync); - vk_sync_destroy(&device->vk, sync); - - return result; -} - -static void pvr_update_syncobjs(struct pvr_device *device, - struct vk_sync *src[static PVR_JOB_TYPE_MAX], - struct vk_sync *dst[static PVR_JOB_TYPE_MAX]) -{ - for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) { - if (src[i]) { - if (dst[i]) - vk_sync_destroy(&device->vk, dst[i]); - - dst[i] = src[i]; - } - } -} - -static VkResult pvr_process_cmd_buffer( - struct pvr_device *device, - struct pvr_queue *queue, - VkCommandBuffer commandBuffer, - struct vk_sync *barriers[static PVR_JOB_TYPE_MAX], - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, - struct vk_sync *per_submit_syncobjs[static PVR_JOB_TYPE_MAX], - struct vk_sync *queue_syncobjs[static PVR_JOB_TYPE_MAX], - struct vk_sync *previous_queue_syncobjs[static PVR_JOB_TYPE_MAX]) -{ - struct vk_sync *per_cmd_buffer_syncobjs[PVR_JOB_TYPE_MAX] = {}; - PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); - VkResult result; - - assert(cmd_buffer->vk.state == MESA_VK_COMMAND_BUFFER_STATE_EXECUTABLE); list_for_each_entry_safe (struct pvr_sub_cmd, sub_cmd, @@ -1058,38 +703,19 @@ static VkResult pvr_process_cmd_buffer( */ result = pvr_process_event_cmd_barrier(device, - &frag_to_transfer_barrier, - barriers, - per_cmd_buffer_syncobjs, - per_submit_syncobjs, - queue_syncobjs, - previous_queue_syncobjs); + queue, + &frag_to_transfer_barrier); if (result != VK_SUCCESS) break; } - result = pvr_process_graphics_cmd(device, - queue, - cmd_buffer, - &sub_cmd->gfx, - barriers[PVR_JOB_TYPE_GEOM], - barriers[PVR_JOB_TYPE_FRAG], - waits, - wait_count, - stage_flags, - per_cmd_buffer_syncobjs); + result = + pvr_process_graphics_cmd(device, queue, cmd_buffer, &sub_cmd->gfx); break; } case PVR_SUB_CMD_TYPE_COMPUTE: - result = pvr_process_compute_cmd(device, - queue, - &sub_cmd->compute, - barriers[PVR_JOB_TYPE_COMPUTE], - waits, - wait_count, - stage_flags, - per_cmd_buffer_syncobjs); + result = pvr_process_compute_cmd(device, queue, &sub_cmd->compute); break; case PVR_SUB_CMD_TYPE_TRANSFER: { @@ -1105,24 +731,13 @@ static VkResult pvr_process_cmd_buffer( }; result = pvr_process_event_cmd_barrier(device, - &frag_to_transfer_barrier, - barriers, - per_cmd_buffer_syncobjs, - per_submit_syncobjs, - queue_syncobjs, - previous_queue_syncobjs); + queue, + &frag_to_transfer_barrier); if (result != VK_SUCCESS) break; } - result = pvr_process_transfer_cmds(device, - queue, - &sub_cmd->transfer, - barriers[PVR_JOB_TYPE_TRANSFER], - waits, - wait_count, - stage_flags, - per_cmd_buffer_syncobjs); + result = pvr_process_transfer_cmds(device, queue, &sub_cmd->transfer); if (serialize_with_frag) { struct pvr_sub_cmd_event transfer_to_frag_barrier = { @@ -1137,37 +752,20 @@ static VkResult pvr_process_cmd_buffer( break; result = pvr_process_event_cmd_barrier(device, - &transfer_to_frag_barrier, - barriers, - per_cmd_buffer_syncobjs, - per_submit_syncobjs, - queue_syncobjs, - previous_queue_syncobjs); + queue, + &transfer_to_frag_barrier); } break; } case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY: - result = pvr_process_occlusion_query_cmd( - device, - queue, - &sub_cmd->compute, - barriers[PVR_JOB_TYPE_OCCLUSION_QUERY], - waits, - wait_count, - stage_flags, - per_cmd_buffer_syncobjs); + result = + pvr_process_occlusion_query_cmd(device, queue, &sub_cmd->compute); break; case PVR_SUB_CMD_TYPE_EVENT: - result = pvr_process_event_cmd(device, - &sub_cmd->event, - barriers, - per_cmd_buffer_syncobjs, - per_submit_syncobjs, - queue_syncobjs, - previous_queue_syncobjs); + result = pvr_process_event_cmd(device, queue, &sub_cmd->event); break; default: @@ -1181,144 +779,182 @@ static VkResult pvr_process_cmd_buffer( p_atomic_inc(&device->global_cmd_buffer_submit_count); } - pvr_update_syncobjs(device, per_cmd_buffer_syncobjs, per_submit_syncobjs); + return VK_SUCCESS; +} + +static VkResult pvr_clear_last_submits_syncs(struct pvr_queue *queue) +{ + struct vk_sync_wait waits[PVR_JOB_TYPE_MAX * 2]; + uint32_t wait_count = 0; + VkResult result; + + for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) { + if (queue->next_job_wait_sync[i]) { + waits[wait_count++] = (struct vk_sync_wait){ + .sync = queue->next_job_wait_sync[i], + .stage_mask = ~(VkPipelineStageFlags2)0, + .wait_value = 0, + }; + } + + if (queue->last_job_signal_sync[i]) { + waits[wait_count++] = (struct vk_sync_wait){ + .sync = queue->last_job_signal_sync[i], + .stage_mask = ~(VkPipelineStageFlags2)0, + .wait_value = 0, + }; + } + } + + result = vk_sync_wait_many(&queue->device->vk, + wait_count, + waits, + VK_SYNC_WAIT_COMPLETE, + UINT64_MAX); + + if (result != VK_SUCCESS) + return vk_error(queue, result); + + for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) { + if (queue->next_job_wait_sync[i]) { + vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]); + queue->next_job_wait_sync[i] = NULL; + } + + if (queue->last_job_signal_sync[i]) { + vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]); + queue->last_job_signal_sync[i] = NULL; + } + } return VK_SUCCESS; } -static VkResult -pvr_submit_null_job(struct pvr_device *device, - struct vk_sync **waits, - uint32_t wait_count, - uint32_t *stage_flags, - struct vk_sync *completions[static PVR_JOB_TYPE_MAX]) +static VkResult pvr_process_queue_signals(struct pvr_queue *queue, + struct vk_sync_signal *signals, + uint32_t signal_count) { + struct vk_sync_wait signal_waits[PVR_JOB_TYPE_MAX]; + struct pvr_device *device = queue->device; VkResult result; - STATIC_ASSERT(PVR_JOB_TYPE_MAX >= PVR_NUM_SYNC_PIPELINE_STAGES); - for (uint32_t i = 0U; i < PVR_JOB_TYPE_MAX; i++) { - struct vk_sync *per_job_waits[wait_count]; - uint32_t per_job_waits_count = 0; + for (uint32_t signal_idx = 0; signal_idx < signal_count; signal_idx++) { + struct vk_sync_signal *signal = &signals[signal_idx]; + const enum pvr_pipeline_stage_bits signal_stage_src = + pvr_stage_mask_src(signal->stage_mask); + uint32_t wait_count = 0; - /* Get the waits specific to the job type. */ - for (uint32_t j = 0U; j < wait_count; j++) { - if (stage_flags[j] & (1U << i)) { - per_job_waits[per_job_waits_count] = waits[j]; - per_job_waits_count++; - } + for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) { + if (!(signal_stage_src & BITFIELD_BIT(i)) || + !queue->last_job_signal_sync[i]) + continue; + + signal_waits[wait_count++] = (struct vk_sync_wait){ + .sync = queue->last_job_signal_sync[i], + .stage_mask = ~(VkPipelineStageFlags2)0, + .wait_value = 0, + }; } - if (per_job_waits_count == 0U) - continue; + result = device->ws->ops->null_job_submit(device->ws, + signal_waits, + wait_count, + signal); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +static VkResult pvr_process_queue_waits(struct pvr_queue *queue, + struct vk_sync_wait *waits, + uint32_t wait_count) +{ + struct pvr_device *device = queue->device; + VkResult result; + + STACK_ARRAY(struct vk_sync_wait, stage_waits, wait_count); + if (!stage_waits) + return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); + + for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) { + struct vk_sync_signal next_job_wait_signal_sync; + uint32_t stage_wait_count = 0; + + for (uint32_t wait_idx = 0; wait_idx < wait_count; wait_idx++) { + if (!(pvr_stage_mask(waits[wait_idx].stage_mask) & BITFIELD_BIT(i))) + continue; + + stage_waits[stage_wait_count++] = (struct vk_sync_wait){ + .sync = waits[wait_idx].sync, + .stage_mask = ~(VkPipelineStageFlags2)0, + .wait_value = waits[wait_idx].wait_value, + }; + } result = vk_sync_create(&device->vk, &device->pdevice->ws->syncobj_type, 0U, 0UL, - &completions[i]); + &queue->next_job_wait_sync[i]); if (result != VK_SUCCESS) - goto err_destroy_completion_syncs; + goto err_free_waits; + + next_job_wait_signal_sync = (struct vk_sync_signal){ + .sync = queue->next_job_wait_sync[i], + .stage_mask = ~(VkPipelineStageFlags2)0, + .signal_value = 0, + }; result = device->ws->ops->null_job_submit(device->ws, - per_job_waits, - per_job_waits_count, - completions[i]); + stage_waits, + stage_wait_count, + &next_job_wait_signal_sync); if (result != VK_SUCCESS) - goto err_destroy_completion_syncs; + goto err_free_waits; } + STACK_ARRAY_FINISH(stage_waits); + return VK_SUCCESS; -err_destroy_completion_syncs: - for (uint32_t i = 0U; i < PVR_JOB_TYPE_MAX; i++) { - if (completions[i]) { - vk_sync_destroy(&device->vk, completions[i]); - completions[i] = NULL; - } - } +err_free_waits: + STACK_ARRAY_FINISH(stage_waits); return result; } -VkResult pvr_QueueSubmit(VkQueue _queue, - uint32_t submitCount, - const VkSubmitInfo *pSubmits, - VkFence fence) +static VkResult pvr_driver_queue_submit(struct vk_queue *queue, + struct vk_queue_submit *submit) { - PVR_FROM_HANDLE(pvr_queue, queue, _queue); - struct vk_sync *completion_syncobjs[PVR_JOB_TYPE_MAX] = {}; - struct pvr_device *device = queue->device; + struct pvr_queue *driver_queue = container_of(queue, struct pvr_queue, vk); + struct pvr_device *device = driver_queue->device; VkResult result; - for (uint32_t i = 0U; i < submitCount; i++) { - struct vk_sync *per_submit_completion_syncobjs[PVR_JOB_TYPE_MAX] = {}; - const VkSubmitInfo *desc = &pSubmits[i]; - struct vk_sync *waits[desc->waitSemaphoreCount]; - uint32_t stage_flags[desc->waitSemaphoreCount]; - uint32_t wait_count = 0; + result = pvr_clear_last_submits_syncs(driver_queue); + if (result != VK_SUCCESS) + return result; - for (uint32_t j = 0U; j < desc->waitSemaphoreCount; j++) { - VK_FROM_HANDLE(vk_semaphore, semaphore, desc->pWaitSemaphores[j]); - struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore); + result = + pvr_process_queue_waits(driver_queue, submit->waits, submit->wait_count); + if (result != VK_SUCCESS) + return result; - if (sync->type == &vk_sync_dummy_type) - continue; - - /* We don't currently support timeline semaphores. */ - assert(!(sync->flags & VK_SYNC_IS_TIMELINE)); - - stage_flags[wait_count] = - pvr_stage_mask_dst(desc->pWaitDstStageMask[j]); - waits[wait_count] = vk_semaphore_get_active_sync(semaphore); - wait_count++; - } - - if (desc->commandBufferCount > 0U) { - for (uint32_t j = 0U; j < desc->commandBufferCount; j++) { - result = pvr_process_cmd_buffer(device, - queue, - desc->pCommandBuffers[j], - queue->job_dependancy, - waits, - wait_count, - stage_flags, - per_submit_completion_syncobjs, - completion_syncobjs, - queue->completion); - if (result != VK_SUCCESS) - return result; - } - } else { - result = pvr_submit_null_job(device, - waits, - wait_count, - stage_flags, - per_submit_completion_syncobjs); - if (result != VK_SUCCESS) - return result; - } - - if (desc->signalSemaphoreCount) { - result = pvr_set_semaphore_payloads(device, - per_submit_completion_syncobjs, - desc->pSignalSemaphores, - desc->signalSemaphoreCount); - if (result != VK_SUCCESS) - return result; - } - - pvr_update_syncobjs(device, - per_submit_completion_syncobjs, - completion_syncobjs); - } - - if (fence) { - result = pvr_set_fence_payload(device, completion_syncobjs, fence); + for (uint32_t i = 0U; i < submit->command_buffer_count; i++) { + result = pvr_process_cmd_buffer( + device, + driver_queue, + container_of(submit->command_buffers[i], struct pvr_cmd_buffer, vk)); if (result != VK_SUCCESS) return result; } - pvr_update_syncobjs(device, completion_syncobjs, queue->completion); + result = pvr_process_queue_signals(driver_queue, + submit->signals, + submit->signal_count); + if (result != VK_SUCCESS) + return result; return VK_SUCCESS; } diff --git a/src/imagination/vulkan/winsys/pvr_winsys.h b/src/imagination/vulkan/winsys/pvr_winsys.h index 2db610ac976..973794f59a3 100644 --- a/src/imagination/vulkan/winsys/pvr_winsys.h +++ b/src/imagination/vulkan/winsys/pvr_winsys.h @@ -292,12 +292,7 @@ struct pvr_winsys_transfer_submit_info { uint32_t frame_num; uint32_t job_num; - struct vk_sync *barrier; - - /* waits and stage_flags are arrays of length wait_count. */ - struct vk_sync **waits; - uint32_t wait_count; - uint32_t *stage_flags; + struct vk_sync *wait; uint32_t cmd_count; struct pvr_winsys_transfer_cmd cmds[PVR_TRANSFER_MAX_PREPARES_PER_SUBMIT]; @@ -310,12 +305,7 @@ struct pvr_winsys_compute_submit_info { uint32_t frame_num; uint32_t job_num; - struct vk_sync *barrier; - - /* waits and stage_flags are arrays of length wait_count. */ - struct vk_sync **waits; - uint32_t wait_count; - uint32_t *stage_flags; + struct vk_sync *wait; /* Firmware stream buffer. This is the maximum possible size taking into * consideration all HW features. @@ -354,14 +344,6 @@ struct pvr_winsys_render_submit_info { /* FIXME: should this be flags instead? */ bool run_frag; - struct vk_sync *barrier_geom; - struct vk_sync *barrier_frag; - - /* waits and stage_flags are arrays of length wait_count. */ - struct vk_sync **waits; - uint32_t wait_count; - uint32_t *stage_flags; - struct pvr_winsys_geometry_state { /* Firmware stream buffer. This is the maximum possible size taking into * consideration all HW features. @@ -377,6 +359,8 @@ struct pvr_winsys_render_submit_info { /* Must be 0 or a combination of PVR_WINSYS_GEOM_FLAG_* flags. */ uint32_t flags; + + struct vk_sync *wait; } geometry; struct pvr_winsys_fragment_state { @@ -394,6 +378,8 @@ struct pvr_winsys_render_submit_info { /* Must be 0 or a combination of PVR_WINSYS_FRAG_FLAG_* flags. */ uint32_t flags; + + struct vk_sync *wait; } fragment; }; @@ -485,9 +471,9 @@ struct pvr_winsys_ops { struct vk_sync *signal_sync); VkResult (*null_job_submit)(struct pvr_winsys *ws, - struct vk_sync **waits, + struct vk_sync_wait *waits, uint32_t wait_count, - struct vk_sync *signal_sync); + struct vk_sync_signal *signal_sync); }; struct pvr_winsys { diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c index 562dbceb77f..56d3aa80faa 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c @@ -33,6 +33,7 @@ #include "pvr_srv.h" #include "pvr_srv_bo.h" #include "pvr_srv_bridge.h" +#include "pvr_srv_job_common.h" #include "pvr_srv_job_compute.h" #include "pvr_srv_job_render.h" #include "pvr_srv_job_transfer.h" @@ -46,6 +47,7 @@ #include "util/macros.h" #include "util/os_misc.h" #include "vk_log.h" +#include "vk_sync.h" /* Amount of space used to hold sync prim values (in bytes). */ #define PVR_SRV_SYNC_PRIM_VALUE_SIZE 4U @@ -392,6 +394,11 @@ static void pvr_srv_winsys_destroy(struct pvr_winsys *ws) struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws); int fd = srv_ws->render_fd; + if (srv_ws->presignaled_sync) { + vk_sync_destroy(&srv_ws->presignaled_sync_device->vk, + &srv_ws->presignaled_sync->base); + } + pvr_srv_sync_prim_block_finish(srv_ws); pvr_srv_memctx_finish(srv_ws); vk_free(srv_ws->alloc, srv_ws); @@ -741,3 +748,83 @@ void pvr_srv_sync_prim_free(struct pvr_srv_sync_prim *sync_prim) vk_free(srv_ws->alloc, sync_prim); } } + +static VkResult pvr_srv_create_presignaled_sync(struct pvr_device *device, + struct pvr_srv_sync **out_sync) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(device->ws); + struct vk_sync *sync; + + int timeline_fd; + int sync_fd; + + VkResult result; + + result = pvr_srv_create_timeline(srv_ws->render_fd, &timeline_fd); + if (result != VK_SUCCESS) + return result; + + result = pvr_srv_set_timeline_sw_only(timeline_fd); + if (result != VK_SUCCESS) + goto err_close_timeline; + + result = pvr_srv_create_sw_fence(timeline_fd, &sync_fd, NULL); + if (result != VK_SUCCESS) + goto err_close_timeline; + + result = pvr_srv_sw_sync_timeline_increment(timeline_fd, NULL); + if (result != VK_SUCCESS) + goto err_close_sw_fence; + + result = vk_sync_create(&device->vk, + &device->pdevice->ws->syncobj_type, + 0U, + 0UL, + &sync); + if (result != VK_SUCCESS) + goto err_close_sw_fence; + + result = vk_sync_import_sync_file(&device->vk, sync, sync_fd); + if (result != VK_SUCCESS) + goto err_destroy_sync; + + *out_sync = to_srv_sync(sync); + (*out_sync)->signaled = true; + + close(timeline_fd); + + return VK_SUCCESS; + +err_destroy_sync: + vk_sync_destroy(&device->vk, sync); + +err_close_sw_fence: + close(sync_fd); + +err_close_timeline: + close(timeline_fd); + + return result; +} + +VkResult pvr_srv_sync_get_presignaled_sync(struct pvr_device *device, + struct pvr_srv_sync **out_sync) +{ + struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(device->ws); + VkResult result; + + if (!srv_ws->presignaled_sync) { + result = + pvr_srv_create_presignaled_sync(device, &srv_ws->presignaled_sync); + if (result != VK_SUCCESS) + return result; + + srv_ws->presignaled_sync_device = device; + } + + assert(device == srv_ws->presignaled_sync_device); + + *out_sync = srv_ws->presignaled_sync; + + return VK_SUCCESS; +} diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.h index 2f9aae8ff95..f6127d6bdbb 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.h +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.h @@ -28,6 +28,7 @@ #include #include +#include "pvr_srv_sync.h" #include "pvr_winsys.h" #include "util/macros.h" #include "util/vma.h" @@ -71,6 +72,9 @@ struct pvr_srv_winsys { int master_fd; int render_fd; + struct pvr_device *presignaled_sync_device; + struct pvr_srv_sync *presignaled_sync; + const VkAllocationCallbacks *alloc; /* Packed bvnc */ @@ -130,4 +134,7 @@ pvr_srv_sync_prim_get_fw_addr(const struct pvr_srv_sync_prim *const sync_prim) return sync_prim->srv_ws->sync_block_fw_addr + sync_prim->offset; } +VkResult pvr_srv_sync_get_presignaled_sync(struct pvr_device *device, + struct pvr_srv_sync **out_sync); + #endif /* PVR_SRV_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.c index 3b0e48ae4b4..9419ebeb85b 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.c +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.c @@ -87,6 +87,76 @@ VkResult pvr_srv_init_module(int fd, enum pvr_srvkm_module_type module) return VK_SUCCESS; } +VkResult pvr_srv_set_timeline_sw_only(int sw_timeline_fd) +{ + int ret; + + assert(sw_timeline_fd >= 0); + + ret = drmIoctl(sw_timeline_fd, DRM_IOCTL_SRVKM_SYNC_FORCE_SW_ONLY_CMD, NULL); + + if (unlikely(ret < 0)) { + return vk_errorf( + NULL, + VK_ERROR_OUT_OF_HOST_MEMORY, + "DRM_IOCTL_SRVKM_SYNC_FORCE_SW_ONLY_CMD failed, Errno: %s", + strerror(errno)); + } + + return VK_SUCCESS; +} + +VkResult pvr_srv_create_sw_fence(int sw_timeline_fd, + int *new_fence_fd, + uint64_t *sync_pt_idx) +{ + struct drm_srvkm_sw_sync_create_fence_data data = { .name[0] = '\0' }; + int ret; + + assert(sw_timeline_fd >= 0); + assert(new_fence_fd != NULL); + + ret = + drmIoctl(sw_timeline_fd, DRM_IOCTL_SRVKM_SW_SYNC_CREATE_FENCE_CMD, &data); + + if (unlikely(ret < 0)) { + return vk_errorf( + NULL, + VK_ERROR_OUT_OF_HOST_MEMORY, + "DRM_IOCTL_SRVKM_SW_SYNC_CREATE_FENCE_CMD failed, Errno: %s", + strerror(errno)); + } + + *new_fence_fd = data.fence; + if (sync_pt_idx) + *sync_pt_idx = data.sync_pt_idx; + + return VK_SUCCESS; +} + +VkResult pvr_srv_sw_sync_timeline_increment(int sw_timeline_fd, + uint64_t *sync_pt_idx) +{ + struct drm_srvkm_sw_timeline_advance_data data = { 0 }; + int ret; + + assert(sw_timeline_fd >= 0); + + ret = drmIoctl(sw_timeline_fd, DRM_IOCTL_SRVKM_SW_SYNC_INC_CMD, &data); + + if (unlikely(ret < 0)) { + return vk_errorf(NULL, + VK_ERROR_OUT_OF_HOST_MEMORY, + "DRM_IOCTL_SRVKM_SW_SYNC_INC_CMD failed, Errno: %s", + strerror(errno)); + } + + if (sync_pt_idx) + *sync_pt_idx = data.sync_pt_idx; + + return VK_SUCCESS; +} + VkResult pvr_srv_connection_create(int fd, uint64_t *const bvnc_out) { struct pvr_srv_bridge_connect_cmd cmd = { diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.h index 78c360648dc..1254fb7811e 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.h +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.h @@ -98,11 +98,28 @@ * These defines must be prefixed with "DRM_". */ #define DRM_SRVKM_CMD 0U /* PVR Services command. */ + +/* PVR Sync commands */ +#define DRM_SRVKM_SYNC_FORCE_SW_ONLY_CMD 2U + +/* PVR Software Sync commands */ +#define DRM_SRVKM_SW_SYNC_CREATE_FENCE_CMD 3U +#define DRM_SRVKM_SW_SYNC_INC_CMD 4U + +/* PVR Services Render Device Init command */ #define DRM_SRVKM_INIT 5U /* PVR Services Render Device Init command. */ /* These defines must be prefixed with "DRM_IOCTL_". */ #define DRM_IOCTL_SRVKM_CMD \ DRM_IOWR(DRM_COMMAND_BASE + DRM_SRVKM_CMD, struct drm_srvkm_cmd) +#define DRM_IOCTL_SRVKM_SYNC_FORCE_SW_ONLY_CMD \ + DRM_IO(DRM_COMMAND_BASE + DRM_SRVKM_SYNC_FORCE_SW_ONLY_CMD) +#define DRM_IOCTL_SRVKM_SW_SYNC_CREATE_FENCE_CMD \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_SRVKM_SW_SYNC_CREATE_FENCE_CMD, \ + struct drm_srvkm_sw_sync_create_fence_data) +#define DRM_IOCTL_SRVKM_SW_SYNC_INC_CMD \ + DRM_IOR(DRM_COMMAND_BASE + DRM_SRVKM_SW_SYNC_INC_CMD, \ + struct drm_srvkm_sw_timeline_advance_data) #define DRM_IOCTL_SRVKM_INIT \ DRM_IOWR(DRM_COMMAND_BASE + DRM_SRVKM_INIT, struct drm_srvkm_init_data) @@ -815,6 +832,17 @@ struct drm_srvkm_init_data { uint32_t init_module; }; +struct drm_srvkm_sw_sync_create_fence_data { + char name[32]; + __s32 fence; + __u32 pad; + __u64 sync_pt_idx; +}; + +struct drm_srvkm_sw_timeline_advance_data { + __u64 sync_pt_idx; +}; + /****************************************************************************** DRM helper enum ******************************************************************************/ @@ -830,6 +858,15 @@ enum pvr_srvkm_module_type { VkResult pvr_srv_init_module(int fd, enum pvr_srvkm_module_type module); +VkResult pvr_srv_set_timeline_sw_only(int sw_timeline_fd); + +VkResult pvr_srv_create_sw_fence(int sw_timeline_fd, + int *new_fence_fd, + uint64_t *sync_pt_idx); + +VkResult pvr_srv_sw_sync_timeline_increment(int sw_timeline_fd, + uint64_t *sync_pt_idx); + /****************************************************************************** Bridge function prototypes ******************************************************************************/ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c index 5208cd83446..e90fc8c98dd 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c @@ -21,10 +21,12 @@ * SOFTWARE. */ +#include #include #include #include #include +#include #include #include @@ -38,7 +40,6 @@ #include "pvr_srv_job_compute.h" #include "pvr_srv_sync.h" #include "pvr_winsys.h" -#include "util/libsync.h" #include "util/macros.h" #include "vk_alloc.h" #include "vk_log.h" @@ -242,34 +243,16 @@ VkResult pvr_srv_winsys_compute_submit( pvr_srv_compute_cmd_init(submit_info, &compute_cmd, dev_info); - for (uint32_t i = 0U; i < submit_info->wait_count; i++) { - struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]); - int ret; - - if (!submit_info->waits[i] || srv_wait_sync->fd < 0) - continue; - - if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_COMPUTE_BIT) { - ret = sync_accumulate("", &in_fd, srv_wait_sync->fd); - if (ret) { - result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); - goto end_close_in_fd; - } - - submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_COMPUTE_BIT; - } - } - - if (submit_info->barrier) { - struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->barrier); + if (submit_info->wait) { + struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->wait); if (srv_wait_sync->fd >= 0) { - int ret; - - ret = sync_accumulate("", &in_fd, srv_wait_sync->fd); - if (ret) { - result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); - goto end_close_in_fd; + in_fd = dup(srv_wait_sync->fd); + if (in_fd == -1) { + return vk_errorf(NULL, + VK_ERROR_OUT_OF_HOST_MEMORY, + "dup called on wait sync failed, Errno: %s", + strerror(errno)); } } } diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_null.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_null.c index d6d040d24f6..cbd079747bb 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_null.c +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_null.c @@ -35,22 +35,29 @@ #include "vk_sync.h" VkResult pvr_srv_winsys_null_job_submit(struct pvr_winsys *ws, - struct vk_sync **waits, + struct vk_sync_wait *waits, uint32_t wait_count, - struct vk_sync *signal_sync) + struct vk_sync_signal *signal) { - struct pvr_srv_sync *srv_signal_sync = to_srv_sync(signal_sync); int fd = -1; - assert(signal_sync); + /* Services doesn't support timeline syncs. + * Timeline syncs should be emulated by the Vulkan runtime and converted + * to binary syncs before this point. + */ + assert((signal->signal_value == 0) && + !(signal->sync->flags & VK_SYNC_IS_TIMELINE)); - for (uint32_t i = 0; i < wait_count; i++) { - struct pvr_srv_sync *srv_wait_sync = to_srv_sync(waits[i]); + for (uint32_t wait_idx = 0; wait_idx < wait_count; wait_idx++) { + struct pvr_srv_sync *srv_wait_sync = to_srv_sync(waits[wait_idx].sync); int ret; - if (!waits[i] || srv_wait_sync->fd < 0) + if (srv_wait_sync->fd < 0) continue; + assert((waits[wait_idx].wait_value == 0) && + !(waits[wait_idx].sync->flags & VK_SYNC_IS_TIMELINE)); + ret = sync_accumulate("", &fd, srv_wait_sync->fd); if (ret) { if (fd >= 0) @@ -60,7 +67,7 @@ VkResult pvr_srv_winsys_null_job_submit(struct pvr_winsys *ws, } } - pvr_srv_set_sync_payload(srv_signal_sync, fd); + pvr_srv_set_sync_payload(to_srv_sync(signal->sync), fd); return VK_SUCCESS; } diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_null.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_null.h index 294de7de60e..2f0c41e8592 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_null.h +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_null.h @@ -28,11 +28,12 @@ #include struct pvr_winsys; -struct vk_sync; +struct vk_sync_wait; +struct vk_sync_signal; VkResult pvr_srv_winsys_null_job_submit(struct pvr_winsys *ws, - struct vk_sync **waits, + struct vk_sync_wait *waits, uint32_t wait_count, - struct vk_sync *signal_sync); + struct vk_sync_signal *signal_sync); #endif /* PVR_SRV_JOB_NULL_H */ diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c index 9191da1f61c..95b1e72939c 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c @@ -22,6 +22,7 @@ */ #include +#include #include #include #include @@ -42,7 +43,6 @@ #include "pvr_srv_sync.h" #include "pvr_types.h" #include "pvr_winsys.h" -#include "util/libsync.h" #include "util/log.h" #include "util/macros.h" #include "vk_alloc.h" @@ -700,60 +700,32 @@ VkResult pvr_srv_winsys_render_submit( pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd, dev_info); pvr_srv_fragment_cmd_init(submit_info, &frag_cmd, dev_info); - for (uint32_t i = 0U; i < submit_info->wait_count; i++) { - struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]); - int ret; - - if (!submit_info->waits[i] || srv_wait_sync->fd < 0) - continue; - - if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_GEOM_BIT) { - ret = sync_accumulate("", &in_geom_fd, srv_wait_sync->fd); - if (ret) { - result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); - goto end_close_in_fds; - } - - submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_GEOM_BIT; - } - - if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_FRAG_BIT) { - ret = sync_accumulate("", &in_frag_fd, srv_wait_sync->fd); - if (ret) { - result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); - goto end_close_in_fds; - } - - submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_FRAG_BIT; - } - } - - if (submit_info->barrier_geom) { + if (submit_info->geometry.wait) { struct pvr_srv_sync *srv_wait_sync = - to_srv_sync(submit_info->barrier_geom); + to_srv_sync(submit_info->geometry.wait); if (srv_wait_sync->fd >= 0) { - int ret; - - ret = sync_accumulate("", &in_geom_fd, srv_wait_sync->fd); - if (ret) { - result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); - goto end_close_in_fds; + in_geom_fd = dup(srv_wait_sync->fd); + if (in_geom_fd == -1) { + return vk_errorf(NULL, + VK_ERROR_OUT_OF_HOST_MEMORY, + "dup called on wait sync failed, Errno: %s", + strerror(errno)); } } } - if (submit_info->barrier_frag) { + if (submit_info->fragment.wait) { struct pvr_srv_sync *srv_wait_sync = - to_srv_sync(submit_info->barrier_frag); + to_srv_sync(submit_info->fragment.wait); if (srv_wait_sync->fd >= 0) { - int ret; - - ret = sync_accumulate("", &in_frag_fd, srv_wait_sync->fd); - if (ret) { - result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); - goto end_close_in_fds; + in_frag_fd = dup(srv_wait_sync->fd); + if (in_frag_fd == -1) { + return vk_errorf(NULL, + VK_ERROR_OUT_OF_HOST_MEMORY, + "dup called on wait sync failed, Errno: %s", + strerror(errno)); } } } diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_transfer.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_transfer.c index b1e80a0a1ca..3e925f30270 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_transfer.c +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_transfer.c @@ -21,9 +21,11 @@ * SOFTWARE. */ +#include #include #include #include +#include #include #include @@ -37,7 +39,6 @@ #include "pvr_srv_job_transfer.h" #include "pvr_srv_sync.h" #include "pvr_winsys.h" -#include "util/libsync.h" #include "util/macros.h" #include "vk_alloc.h" #include "vk_log.h" @@ -271,34 +272,16 @@ VkResult pvr_srv_winsys_transfer_submit( cmds_ptr_arr[i] = &transfer_cmds[i]; } - for (uint32_t i = 0U; i < submit_info->wait_count; i++) { - struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]); - int ret; - - if (!submit_info->waits[i] || srv_wait_sync->fd < 0) - continue; - - if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_TRANSFER_BIT) { - ret = sync_accumulate("", &in_fd, srv_wait_sync->fd); - if (ret) { - result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); - goto end_close_in_fd; - } - - submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_TRANSFER_BIT; - } - } - - if (submit_info->barrier) { - struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->barrier); + if (submit_info->wait) { + struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->wait); if (srv_wait_sync->fd >= 0) { - int ret; - - ret = sync_accumulate("", &in_fd, srv_wait_sync->fd); - if (ret) { - result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); - goto end_close_in_fd; + in_fd = dup(srv_wait_sync->fd); + if (in_fd == -1) { + return vk_errorf(NULL, + VK_ERROR_OUT_OF_HOST_MEMORY, + "dup called on wait sync failed, Errno: %s", + strerror(errno)); } } } diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_sync.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_sync.c index b33c0eb78f4..c012d91240c 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_sync.c +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_sync.c @@ -287,13 +287,20 @@ static VkResult pvr_srv_sync_export_sync_file(struct vk_device *device, int *sync_file) { struct pvr_srv_sync *srv_sync = to_srv_sync(sync); + VkResult result; int fd; if (srv_sync->fd < 0) { - *sync_file = -1; - return VK_SUCCESS; + struct pvr_device *driver_device = + container_of(device, struct pvr_device, vk); + + result = pvr_srv_sync_get_presignaled_sync(driver_device, &srv_sync); + if (result != VK_SUCCESS) + return result; } + assert(srv_sync->fd >= 0); + fd = dup(srv_sync->fd); if (fd < 0) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);