diff --git a/src/gallium/drivers/zink/zink_batch.c b/src/gallium/drivers/zink/zink_batch.c index f5e7ba45bd8..2a3012ae217 100644 --- a/src/gallium/drivers/zink/zink_batch.c +++ b/src/gallium/drivers/zink/zink_batch.c @@ -86,6 +86,9 @@ zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs) struct zink_screen *screen = zink_screen(ctx->base.screen); VkResult result = VKSCR(ResetCommandPool)(screen->dev, bs->cmdpool, 0); + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkResetCommandPool failed (%s)", vk_Result_to_str(result)); + result = VKSCR(ResetCommandPool)(screen->dev, bs->unsynchronized_cmdpool, 0); if (result != VK_SUCCESS) mesa_loge("ZINK: vkResetCommandPool failed (%s)", vk_Result_to_str(result)); @@ -187,6 +190,7 @@ zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs) */ bs->fence.submitted = false; bs->has_barriers = false; + bs->has_unsync = false; if (bs->fence.batch_id) zink_screen_update_last_finished(screen, bs->fence.batch_id); bs->fence.batch_id = 0; @@ -289,6 +293,10 @@ zink_batch_state_destroy(struct zink_screen *screen, struct zink_batch_state *bs VKSCR(FreeCommandBuffers)(screen->dev, bs->cmdpool, 1, &bs->reordered_cmdbuf); if (bs->cmdpool) VKSCR(DestroyCommandPool)(screen->dev, bs->cmdpool, NULL); + if (bs->unsynchronized_cmdbuf) + VKSCR(FreeCommandBuffers)(screen->dev, bs->unsynchronized_cmdpool, 1, &bs->unsynchronized_cmdbuf); + if (bs->unsynchronized_cmdpool) + VKSCR(DestroyCommandPool)(screen->dev, bs->unsynchronized_cmdpool, NULL); free(bs->real_objs.objs); free(bs->slab_objs.objs); free(bs->sparse_objs.objs); @@ -330,12 +338,17 @@ create_batch_state(struct zink_context *ctx) mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result)); goto fail; } + result = VKSCR(CreateCommandPool)(screen->dev, &cpci, NULL, &bs->unsynchronized_cmdpool); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateCommandPool failed (%s)", vk_Result_to_str(result)); + goto fail; + } VkCommandBuffer cmdbufs[2]; VkCommandBufferAllocateInfo cbai = {0}; cbai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - cbai.commandPool = bs->cmdpool; cbai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cbai.commandPool = bs->cmdpool; cbai.commandBufferCount = 2; result = VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, cmdbufs); @@ -346,6 +359,14 @@ create_batch_state(struct zink_context *ctx) bs->cmdbuf = cmdbufs[0]; bs->reordered_cmdbuf = cmdbufs[1]; + cbai.commandPool = bs->unsynchronized_cmdpool; + cbai.commandBufferCount = 1; + result = VKSCR(AllocateCommandBuffers)(screen->dev, &cbai, &bs->unsynchronized_cmdbuf); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkAllocateCommandBuffers failed (%s)", vk_Result_to_str(result)); + goto fail; + } + #define SET_CREATE_OR_FAIL(ptr) \ if (!_mesa_set_init(ptr, bs, _mesa_hash_pointer, _mesa_key_pointer_equal)) \ goto fail @@ -376,6 +397,7 @@ create_batch_state(struct zink_context *ctx) cnd_init(&bs->usage.flush); mtx_init(&bs->usage.mtx, mtx_plain); + simple_mtx_init(&bs->exportable_lock, mtx_plain); memset(&bs->buffer_indices_hashlist, -1, sizeof(bs->buffer_indices_hashlist)); if (!zink_batch_descriptor_init(screen, bs)) @@ -497,6 +519,10 @@ zink_start_batch(struct zink_context *ctx, struct zink_batch *batch) if (result != VK_SUCCESS) mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); + result = VKCTX(BeginCommandBuffer)(batch->state->unsynchronized_cmdbuf, &cbbi); + if (result != VK_SUCCESS) + mesa_loge("ZINK: vkBeginCommandBuffer failed (%s)", vk_Result_to_str(result)); + batch->state->fence.completed = false; if (ctx->last_fence) { struct zink_batch_state *last_state = zink_batch_state(ctx->last_fence); @@ -511,6 +537,7 @@ zink_start_batch(struct zink_context *ctx, struct zink_batch *batch) capture_label.pNext = NULL; capture_label.pLabelName = "vr-marker,frame_end,type,application"; memset(capture_label.color, 0, sizeof(capture_label.color)); + VKCTX(CmdInsertDebugUtilsLabelEXT)(batch->state->unsynchronized_cmdbuf, &capture_label); VKCTX(CmdInsertDebugUtilsLabelEXT)(batch->state->reordered_cmdbuf, &capture_label); VKCTX(CmdInsertDebugUtilsLabelEXT)(batch->state->cmdbuf, &capture_label); } @@ -530,6 +557,7 @@ zink_start_batch(struct zink_context *ctx, struct zink_batch *batch) if (screen->info.have_EXT_attachment_feedback_loop_dynamic_state) { VKCTX(CmdSetAttachmentFeedbackLoopEnableEXT)(ctx->batch.state->cmdbuf, 0); VKCTX(CmdSetAttachmentFeedbackLoopEnableEXT)(ctx->batch.state->reordered_cmdbuf, 0); + VKCTX(CmdSetAttachmentFeedbackLoopEnableEXT)(ctx->batch.state->unsynchronized_cmdbuf, 0); } } @@ -612,8 +640,10 @@ submit_queue(void *data, void *gdata, int thread_index) si[ZINK_SUBMIT_CMDBUF].waitSemaphoreCount = util_dynarray_num_elements(&bs->wait_semaphores, VkSemaphore); si[ZINK_SUBMIT_CMDBUF].pWaitSemaphores = bs->wait_semaphores.data; si[ZINK_SUBMIT_CMDBUF].pWaitDstStageMask = bs->wait_semaphore_stages.data; - VkCommandBuffer cmdbufs[2]; + VkCommandBuffer cmdbufs[3]; unsigned c = 0; + if (bs->has_unsync) + cmdbufs[c++] = bs->unsynchronized_cmdbuf; if (bs->has_barriers) cmdbufs[c++] = bs->reordered_cmdbuf; cmdbufs[c++] = bs->cmdbuf; @@ -666,6 +696,14 @@ submit_queue(void *data, void *gdata, int thread_index) goto end; } } + if (bs->has_unsync) { + result = VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf); + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); + bs->is_device_lost = true; + goto end; + } + } if (!si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount) num_si--; diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index d06afa922d3..1b500b21df7 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -3314,6 +3314,9 @@ flush_batch(struct zink_context *ctx, bool sync) /* start rp to do all the clears */ zink_batch_rp(ctx); zink_batch_no_rp_safe(ctx); + + util_queue_fence_wait(&ctx->unsync_fence); + util_queue_fence_reset(&ctx->flush_fence); zink_end_batch(ctx, batch); ctx->deferred_fence = NULL; @@ -3351,6 +3354,7 @@ flush_batch(struct zink_context *ctx, bool sync) tc_renderpass_info_reset(&ctx->dynamic_fb.tc_info); ctx->rp_tc_info_updated = true; } + util_queue_fence_signal(&ctx->flush_fence); } void @@ -4456,6 +4460,11 @@ zink_copy_image_buffer(struct zink_context *ctx, struct zink_resource *dst, stru bool needs_present_readback = false; bool buf2img = buf == src; + bool unsync = !!(map_flags & PIPE_MAP_UNSYNCHRONIZED); + if (unsync) { + util_queue_fence_wait(&ctx->flush_fence); + util_queue_fence_reset(&ctx->unsync_fence); + } if (buf2img) { if (zink_is_swapchain(img)) { @@ -4466,9 +4475,11 @@ zink_copy_image_buffer(struct zink_context *ctx, struct zink_resource *dst, stru box.x = dstx; box.y = dsty; box.z = dstz; - zink_resource_image_transfer_dst_barrier(ctx, img, dst_level, &box); - zink_screen(ctx->base.screen)->buffer_barrier(ctx, buf, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + zink_resource_image_transfer_dst_barrier(ctx, img, dst_level, &box, unsync); + if (!unsync) + zink_screen(ctx->base.screen)->buffer_barrier(ctx, buf, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); } else { + assert(!(map_flags & PIPE_MAP_UNSYNCHRONIZED)); if (zink_is_swapchain(img)) needs_present_readback = zink_kopper_acquire_readback(ctx, img, &use_img); zink_screen(ctx->base.screen)->image_barrier(ctx, use_img, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 0, 0); @@ -4514,12 +4525,17 @@ zink_copy_image_buffer(struct zink_context *ctx, struct zink_resource *dst, stru region.imageExtent.width = src_box->width; region.imageExtent.height = src_box->height; - /* never promote to unordered if swapchain was acquired */ - VkCommandBuffer cmdbuf = needs_present_readback ? + VkCommandBuffer cmdbuf = unsync ? + ctx->batch.state->unsynchronized_cmdbuf : + /* never promote to unordered if swapchain was acquired */ + needs_present_readback ? ctx->batch.state->cmdbuf : buf2img ? zink_get_cmdbuf(ctx, buf, use_img) : zink_get_cmdbuf(ctx, use_img, buf); zink_batch_reference_resource_rw(batch, use_img, buf2img); zink_batch_reference_resource_rw(batch, buf, !buf2img); + if (unsync) { + ctx->batch.state->has_unsync = true; + } /* we're using u_transfer_helper_deinterleave, which means we'll be getting PIPE_MAP_* usage * to indicate whether to copy either the depth or stencil aspects @@ -4578,7 +4594,10 @@ zink_copy_image_buffer(struct zink_context *ctx, struct zink_resource *dst, stru } zink_cmd_debug_marker_end(ctx, cmdbuf, marker); } + if (unsync) + util_queue_fence_signal(&ctx->unsync_fence); if (needs_present_readback) { + assert(!unsync); if (buf2img) { img->obj->unordered_write = false; buf->obj->unordered_read = false; @@ -5286,6 +5305,9 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) zink_context_resource_init(&ctx->base); zink_context_query_init(&ctx->base); + util_queue_fence_init(&ctx->flush_fence); + util_queue_fence_init(&ctx->unsync_fence); + list_inithead(&ctx->query_pools); _mesa_set_init(&ctx->update_barriers[0][0], ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); _mesa_set_init(&ctx->update_barriers[1][0], ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h index 2eacd4b10a9..09348d95825 100644 --- a/src/gallium/drivers/zink/zink_context.h +++ b/src/gallium/drivers/zink/zink_context.h @@ -134,7 +134,7 @@ zink_check_unordered_transfer_access(struct zink_resource *res, unsigned level, bool zink_check_valid_buffer_src_access(struct zink_context *ctx, struct zink_resource *res, unsigned offset, unsigned size); void -zink_resource_image_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned level, const struct pipe_box *box); +zink_resource_image_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned level, const struct pipe_box *box, bool unsync); bool zink_resource_buffer_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned offset, unsigned size); void diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c index b07bf408b55..d0b478f3ee6 100644 --- a/src/gallium/drivers/zink/zink_resource.c +++ b/src/gallium/drivers/zink/zink_resource.c @@ -2307,7 +2307,6 @@ zink_image_map(struct pipe_context *pctx, zink_fb_clears_apply_region(ctx, pres, zink_rect_from_box(box)); } if (!res->linear || !res->obj->host_visible) { - assert(!(usage & PIPE_MAP_UNSYNCHRONIZED)); enum pipe_format format = pres->format; if (usage & PIPE_MAP_DEPTH_ONLY) format = util_format_get_depth_only(pres->format); @@ -2337,6 +2336,7 @@ zink_image_map(struct pipe_context *pctx, struct zink_resource *staging_res = zink_resource(trans->staging_res); if (usage & PIPE_MAP_READ) { + assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)); /* force multi-context sync */ if (zink_resource_usage_is_unflushed_write(res)) zink_resource_usage_wait(ctx, res, ZINK_RESOURCE_ACCESS_WRITE); diff --git a/src/gallium/drivers/zink/zink_synchronization.cpp b/src/gallium/drivers/zink/zink_synchronization.cpp index fdaa0f618fb..05a904f3231 100644 --- a/src/gallium/drivers/zink/zink_synchronization.cpp +++ b/src/gallium/drivers/zink/zink_synchronization.cpp @@ -320,7 +320,7 @@ resource_check_defer_image_barrier(struct zink_context *ctx, struct zink_resourc _mesa_set_add(ctx->need_barriers[is_compute], res); } -template +template void zink_resource_image_barrier(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline) { @@ -341,11 +341,18 @@ zink_resource_image_barrier(struct zink_context *ctx, struct zink_resource *res, res->obj->unordered_write = true; if (is_write || zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, ZINK_RESOURCE_ACCESS_RW)) res->obj->unordered_read = true; + } else { + assert(!UNSYNCHRONIZED); } - /* if current batch usage exists with ordered non-transfer access, never promote - * this avoids layout dsync - */ - if (zink_resource_usage_matches(res, ctx->batch.state) && !ctx->unordered_blitting && + if (UNSYNCHRONIZED) { + cmdbuf = ctx->batch.state->unsynchronized_cmdbuf; + res->obj->unordered_write = true; + res->obj->unordered_read = true; + ctx->batch.state->has_unsync = true; + } else if (zink_resource_usage_matches(res, ctx->batch.state) && !ctx->unordered_blitting && + /* if current batch usage exists with ordered non-transfer access, never promote + * this avoids layout dsync + */ (!res->obj->unordered_read || !res->obj->unordered_write)) { cmdbuf = ctx->batch.state->cmdbuf; res->obj->unordered_write = false; @@ -417,7 +424,8 @@ zink_resource_image_barrier(struct zink_context *ctx, struct zink_resource *res, } zink_cmd_debug_marker_end(ctx, cmdbuf, marker); - resource_check_defer_image_barrier(ctx, res, new_layout, pipeline); + if (!UNSYNCHRONIZED) + resource_check_defer_image_barrier(ctx, res, new_layout, pipeline); if (is_write) res->obj->last_write = flags; @@ -425,6 +433,8 @@ zink_resource_image_barrier(struct zink_context *ctx, struct zink_resource *res, res->obj->access = flags; res->obj->access_stage = pipeline; res->layout = new_layout; + if (res->obj->exportable) + simple_mtx_lock(&ctx->batch.state->exportable_lock); if (res->obj->dt) { struct kopper_displaytarget *cdt = res->obj->dt; if (cdt->swapchain->num_acquires && res->obj->dt_idx != UINT32_MAX) { @@ -441,12 +451,14 @@ zink_resource_image_barrier(struct zink_context *ctx, struct zink_resource *res, if (new_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) zink_resource_copies_reset(res); if (res->obj->exportable && queue_import) { - for (; res; res = zink_resource(res->base.b.next)) { - VkSemaphore sem = zink_screen_export_dmabuf_semaphore(zink_screen(ctx->base.screen), res); + for (struct zink_resource *r = res; r; r = zink_resource(r->base.b.next)) { + VkSemaphore sem = zink_screen_export_dmabuf_semaphore(zink_screen(ctx->base.screen), r); if (sem) util_dynarray_append(&ctx->batch.state->fd_wait_semaphores, VkSemaphore, sem); } } + if (res->obj->exportable) + simple_mtx_unlock(&ctx->batch.state->exportable_lock); } bool @@ -466,7 +478,7 @@ zink_check_valid_buffer_src_access(struct zink_context *ctx, struct zink_resourc } void -zink_resource_image_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned level, const struct pipe_box *box) +zink_resource_image_transfer_dst_barrier(struct zink_context *ctx, struct zink_resource *res, unsigned level, const struct pipe_box *box, bool unsync) { if (res->obj->copies_need_reset) zink_resource_copies_reset(res); @@ -474,7 +486,10 @@ zink_resource_image_transfer_dst_barrier(struct zink_context *ctx, struct zink_r if (res->layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || zink_screen(ctx->base.screen)->driver_workarounds.broken_cache_semantics || zink_check_unordered_transfer_access(res, level, box)) { - zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + if (unsync) + zink_screen(ctx->base.screen)->image_barrier_unsync(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + else + zink_screen(ctx->base.screen)->image_barrier(ctx, res, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); } else { res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT; res->obj->last_write = VK_ACCESS_TRANSFER_WRITE_BIT; @@ -699,9 +714,11 @@ zink_synchronization_init(struct zink_screen *screen) { if (screen->info.have_vulkan13 || screen->info.have_KHR_synchronization2) { screen->buffer_barrier = zink_resource_buffer_barrier; - screen->image_barrier = zink_resource_image_barrier; + screen->image_barrier = zink_resource_image_barrier; + screen->image_barrier_unsync = zink_resource_image_barrier; } else { screen->buffer_barrier = zink_resource_buffer_barrier; - screen->image_barrier = zink_resource_image_barrier; + screen->image_barrier = zink_resource_image_barrier; + screen->image_barrier_unsync = zink_resource_image_barrier; } } diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index 95867de8639..98d3337a9bd 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -599,6 +599,8 @@ struct zink_batch_state { VkCommandPool cmdpool; VkCommandBuffer cmdbuf; VkCommandBuffer reordered_cmdbuf; + VkCommandPool unsynchronized_cmdpool; + VkCommandBuffer unsynchronized_cmdbuf; VkSemaphore signal_semaphore; //external signal semaphore struct util_dynarray signal_semaphores; //external signal semaphores struct util_dynarray wait_semaphores; //external wait semaphores @@ -620,6 +622,8 @@ struct zink_batch_state { VkAccessFlags unordered_write_access; VkPipelineStageFlags unordered_write_stages; + simple_mtx_t exportable_lock; + struct util_queue_fence flush_completed; struct set programs; @@ -654,6 +658,7 @@ struct zink_batch_state { bool is_device_lost; bool has_barriers; + bool has_unsync; }; static inline struct zink_batch_state * @@ -1502,6 +1507,7 @@ struct zink_screen { void (*buffer_barrier)(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline); void (*image_barrier)(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); + void (*image_barrier_unsync)(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); bool compact_descriptors; /**< toggled if descriptor set ids are compacted */ uint8_t desc_set_id[ZINK_MAX_DESCRIPTOR_SETS]; /**< converts enum zink_descriptor_type -> the actual set id */ @@ -1781,6 +1787,9 @@ struct zink_context { struct pipe_device_reset_callback reset; + struct util_queue_fence unsync_fence; //unsigned during unsync recording (blocks flush ops) + struct util_queue_fence flush_fence; //unsigned during flush (blocks unsync ops) + struct zink_fence *deferred_fence; struct zink_fence *last_fence; //the last command buffer submitted struct zink_batch_state *batch_states; //list of submitted batch states: ordered by increasing timeline id