diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index 1e80f46c3ad..486313a5403 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -4771,8 +4771,13 @@ end: void zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src, - unsigned dst_offset, unsigned src_offset, unsigned size) + unsigned dst_offset, unsigned src_offset, unsigned size, bool unsync) { + if (unsync) { + util_queue_fence_wait(&ctx->flush_fence); + util_queue_fence_reset(&ctx->unsync_fence); + } + VkBufferCopy region; region.srcOffset = src_offset; region.dstOffset = dst_offset; @@ -4783,11 +4788,15 @@ zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zin /* must barrier if something wrote the valid buffer range */ bool valid_write = zink_check_valid_buffer_src_access(ctx, src, src_offset, size); bool unordered_src = !valid_write && !zink_check_unordered_transfer_access(src, 0, &box); - zink_screen(ctx->base.screen)->buffer_barrier(ctx, src, VK_ACCESS_TRANSFER_READ_BIT, 0); + /* unsync should only occur during subdata with staging resource */ + if (!unsync) + zink_screen(ctx->base.screen)->buffer_barrier(ctx, src, VK_ACCESS_TRANSFER_READ_BIT, 0); bool unordered_dst = zink_resource_buffer_transfer_dst_barrier(ctx, dst, dst_offset, size); bool can_unorder = unordered_dst && unordered_src && !ctx->no_reorder; - VkCommandBuffer cmdbuf = can_unorder ? ctx->bs->reordered_cmdbuf : zink_get_cmdbuf(ctx, src, dst); + VkCommandBuffer cmdbuf = unsync ? ctx->bs->unsynchronized_cmdbuf : + can_unorder ? ctx->bs->reordered_cmdbuf : zink_get_cmdbuf(ctx, src, dst); ctx->bs->has_reordered_work |= can_unorder; + ctx->bs->has_unsync |= unsync; zink_batch_reference_resource_rw(ctx, src, false); zink_batch_reference_resource_rw(ctx, dst, true); if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) { @@ -4804,6 +4813,9 @@ zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zin bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "copy_buffer(%d)", size); VKCTX(CmdCopyBuffer)(cmdbuf, src->obj->buffer, dst->obj->buffer, 1, ®ion); zink_cmd_debug_marker_end(ctx, cmdbuf, marker); + + if (unsync) + util_queue_fence_signal(&ctx->unsync_fence); } void @@ -5108,7 +5120,7 @@ zink_resource_copy_region(struct pipe_context *pctx, zink_cmd_debug_marker_end(ctx, cmdbuf, marker); } else if (dst->base.b.target == PIPE_BUFFER && src->base.b.target == PIPE_BUFFER) { - zink_copy_buffer(ctx, dst, src, dstx, src_box->x, src_box->width); + zink_copy_buffer(ctx, dst, src, dstx, src_box->x, src_box->width, false); } else zink_copy_image_buffer(ctx, dst, src, dst_level, dstx, dsty, dstz, src_level, src_box, 0); if (ctx->oom_flush && !ctx->in_rp && !ctx->unordered_blitting) diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h index 5ac504f07cf..aebf64099ea 100644 --- a/src/gallium/drivers/zink/zink_context.h +++ b/src/gallium/drivers/zink/zink_context.h @@ -202,7 +202,7 @@ void zink_cmd_debug_marker_end(struct zink_context *ctx, VkCommandBuffer cmdbuf,bool emitted); void zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src, - unsigned dst_offset, unsigned src_offset, unsigned size); + unsigned dst_offset, unsigned src_offset, unsigned size, bool unsync); #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/zink/zink_query.c b/src/gallium/drivers/zink/zink_query.c index 5ee1ff24c9c..00c54da8f62 100644 --- a/src/gallium/drivers/zink/zink_query.c +++ b/src/gallium/drivers/zink/zink_query.c @@ -1449,7 +1449,7 @@ zink_get_query_result_resource(struct pipe_context *pctx, } struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size); copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag); - zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size); + zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size, false); pipe_resource_reference(&staging, NULL); return; } @@ -1474,7 +1474,7 @@ zink_get_query_result_resource(struct pipe_context *pctx, /* internal qbo always writes 64bit value so we can just direct copy */ zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffers[0]), offset, get_buffer_offset(query), - result_size); + result_size, false); } else /* have to do a new copy for 32bit */ copy_results_to_buffer(ctx, query, res, offset, 1, size_flags); diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c index be0984254a9..80a8dd7b898 100644 --- a/src/gallium/drivers/zink/zink_resource.c +++ b/src/gallium/drivers/zink/zink_resource.c @@ -2344,9 +2344,8 @@ zink_buffer_map(struct pipe_context *pctx, bool is_cached_mem = (screen->info.mem_props.memoryTypes[res->obj->bo->base.base.placement].propertyFlags & VK_STAGING_RAM) == VK_STAGING_RAM; /* but this is only viable with a certain amount of vram since it may fully duplicate lots of large buffers */ bool host_mem_type_check = screen->always_cached_upload ? is_cached_mem : res->obj->host_visible; - if (usage & PIPE_MAP_DISCARD_RANGE && - ((!res->obj->host_visible || !(usage & (PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_PERSISTENT))) || - (!host_mem_type_check && !(usage & (PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_PERSISTENT))))) { + if (usage & PIPE_MAP_DISCARD_RANGE && !(usage & PIPE_MAP_PERSISTENT) && + (!host_mem_type_check || !(usage & (PIPE_MAP_UNSYNCHRONIZED)))) { /* Check if mapping this buffer would cause waiting for the GPU. */ @@ -2356,20 +2355,26 @@ zink_buffer_map(struct pipe_context *pctx, /* Do a wait-free write-only transfer using a temporary buffer. */ unsigned offset; - /* If we are not called from the driver thread, we have - * to use the uploader from u_threaded_context, which is - * local to the calling thread. - */ - struct u_upload_mgr *mgr; - if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC) - mgr = ctx->tc->base.stream_uploader; - else - mgr = ctx->base.stream_uploader; - u_upload_alloc(mgr, 0, box->width, - screen->info.props.limits.minMemoryMapAlignment, &offset, - (struct pipe_resource **)&trans->staging_res, (void **)&ptr); + if (usage & PIPE_MAP_UNSYNCHRONIZED) { + trans->offset = box->x % MAX2(screen->info.props.limits.minMemoryMapAlignment, 1 << MIN_SLAB_ORDER); + trans->staging_res = pipe_buffer_create(&screen->base, PIPE_BIND_LINEAR, PIPE_USAGE_STAGING, box->width + trans->offset); + trans->unsync_upload = true; + } else { + /* If we are not called from the driver thread, we have + * to use the uploader from u_threaded_context, which is + * local to the calling thread. + */ + struct u_upload_mgr *mgr; + if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC) + mgr = ctx->tc->base.stream_uploader; + else + mgr = ctx->base.stream_uploader; + u_upload_alloc(mgr, 0, box->width, + screen->info.props.limits.minMemoryMapAlignment, &offset, + (struct pipe_resource **)&trans->staging_res, (void **)&ptr); + trans->offset = offset; + } res = zink_resource(trans->staging_res); - trans->offset = offset; usage |= PIPE_MAP_UNSYNCHRONIZED; ptr = ((uint8_t *)ptr); } else { @@ -2401,7 +2406,7 @@ overwrite: ctx = screen->copy_context; } if (usage & PIPE_MAP_READ) - zink_copy_buffer(ctx, staging_res, res, trans->offset, box->x, box->width); + zink_copy_buffer(ctx, staging_res, res, trans->offset, box->x, box->width, false); res = staging_res; usage &= ~PIPE_MAP_UNSYNCHRONIZED; map_offset = trans->offset; @@ -2752,7 +2757,7 @@ zink_transfer_flush_region(struct pipe_context *pctx, struct zink_resource *staging_res = zink_resource(trans->staging_res); if (ptrans->resource->target == PIPE_BUFFER) - zink_copy_buffer(ctx, res, staging_res, dst_offset, src_offset, size); + zink_copy_buffer(ctx, res, staging_res, dst_offset, src_offset, size, trans->unsync_upload); else zink_transfer_copy_bufimage(ctx, res, staging_res, trans); } diff --git a/src/gallium/drivers/zink/zink_synchronization.cpp b/src/gallium/drivers/zink/zink_synchronization.cpp index a91d80e7a10..07068e4dff5 100644 --- a/src/gallium/drivers/zink/zink_synchronization.cpp +++ b/src/gallium/drivers/zink/zink_synchronization.cpp @@ -689,7 +689,7 @@ buffer_needs_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineS -template +template void zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline) { @@ -708,6 +708,7 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res } bool unordered_usage_matches = res->obj->unordered_access && usage_matches; bool unordered = unordered_res_exec(ctx, res, is_write); + assert(!UNSYNCHRONIZED || !usage_matches); if (!buffer_needs_barrier(res, flags, pipeline, unordered)) return; if (completed) { @@ -734,18 +735,20 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res * - there is no current-batch unordered access AND previous batch usage is not write access * - there is current-batch unordered access AND the unordered access is not write access */ - bool can_skip_unordered = !unordered ? false : !zink_resource_access_is_write(!unordered_usage_matches ? res->obj->access : res->obj->unordered_access); + bool can_skip_unordered = !unordered || UNSYNCHRONIZED ? false : !zink_resource_access_is_write(!unordered_usage_matches ? res->obj->access : res->obj->unordered_access); /* ordered barriers can be skipped if both: * - there is no current access * - there is no current-batch unordered access */ - bool can_skip_ordered = unordered ? false : (!res->obj->access && !unordered_usage_matches); + bool can_skip_ordered = unordered || UNSYNCHRONIZED ? false : (!res->obj->access && !unordered_usage_matches); if (ctx->no_reorder) can_skip_unordered = can_skip_ordered = false; if (!can_skip_unordered && !can_skip_ordered) { - VkCommandBuffer cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL); + VkCommandBuffer cmdbuf = UNSYNCHRONIZED ? ctx->bs->unsynchronized_cmdbuf : + is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL); bool marker = false; + ctx->bs->has_unsync |= UNSYNCHRONIZED; if (unlikely(zink_tracing)) { char buf[4096]; zink_string_vkflags_unroll(buf, sizeof(buf), flags, (zink_vkflags_func)vk_AccessFlagBits_to_str); @@ -758,7 +761,8 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res zink_cmd_debug_marker_end(ctx, cmdbuf, marker); } - resource_check_defer_buffer_barrier(ctx, res, pipeline); + if (!UNSYNCHRONIZED) + resource_check_defer_buffer_barrier(ctx, res, pipeline); if (is_write) res->obj->last_write = flags; @@ -784,11 +788,13 @@ void zink_synchronization_init(struct zink_screen *screen) { if (screen->info.have_vulkan13 || screen->info.have_KHR_synchronization2) { - screen->buffer_barrier = zink_resource_buffer_barrier; + screen->buffer_barrier = zink_resource_buffer_barrier; + screen->buffer_barrier_unsync = zink_resource_buffer_barrier; screen->image_barrier = zink_resource_image_barrier; screen->image_barrier_unsync = zink_resource_image_barrier; } else { - screen->buffer_barrier = zink_resource_buffer_barrier; + screen->buffer_barrier = zink_resource_buffer_barrier; + screen->buffer_barrier_unsync = zink_resource_buffer_barrier; screen->image_barrier = zink_resource_image_barrier; screen->image_barrier_unsync = zink_resource_image_barrier; } diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index a226bcaab87..5430197f099 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -1371,6 +1371,7 @@ struct zink_transfer { struct pipe_resource *staging_res; unsigned offset; unsigned depthPitch; + bool unsync_upload; }; @@ -1515,6 +1516,7 @@ struct zink_screen { struct vk_uncompacted_dispatch_table vk; void (*buffer_barrier)(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline); + void (*buffer_barrier_unsync)(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline); void (*image_barrier)(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); void (*image_barrier_unsync)(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline);