zink: implement unsynchronized staging uploads for buffers
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

similar to images, this comes from unsynchronized texture_subdata
serialization in tc

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34186>
This commit is contained in:
Mike Blumenkrantz 2025-03-25 11:42:36 -04:00 committed by Marge Bot
parent c0b1a23e35
commit 5d46e2bf3c
6 changed files with 57 additions and 32 deletions

View file

@ -4771,8 +4771,13 @@ end:
void void
zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src, zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src,
unsigned dst_offset, unsigned src_offset, unsigned size) unsigned dst_offset, unsigned src_offset, unsigned size, bool unsync)
{ {
if (unsync) {
util_queue_fence_wait(&ctx->flush_fence);
util_queue_fence_reset(&ctx->unsync_fence);
}
VkBufferCopy region; VkBufferCopy region;
region.srcOffset = src_offset; region.srcOffset = src_offset;
region.dstOffset = dst_offset; region.dstOffset = dst_offset;
@ -4783,11 +4788,15 @@ zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zin
/* must barrier if something wrote the valid buffer range */ /* must barrier if something wrote the valid buffer range */
bool valid_write = zink_check_valid_buffer_src_access(ctx, src, src_offset, size); bool valid_write = zink_check_valid_buffer_src_access(ctx, src, src_offset, size);
bool unordered_src = !valid_write && !zink_check_unordered_transfer_access(src, 0, &box); bool unordered_src = !valid_write && !zink_check_unordered_transfer_access(src, 0, &box);
zink_screen(ctx->base.screen)->buffer_barrier(ctx, src, VK_ACCESS_TRANSFER_READ_BIT, 0); /* unsync should only occur during subdata with staging resource */
if (!unsync)
zink_screen(ctx->base.screen)->buffer_barrier(ctx, src, VK_ACCESS_TRANSFER_READ_BIT, 0);
bool unordered_dst = zink_resource_buffer_transfer_dst_barrier(ctx, dst, dst_offset, size); bool unordered_dst = zink_resource_buffer_transfer_dst_barrier(ctx, dst, dst_offset, size);
bool can_unorder = unordered_dst && unordered_src && !ctx->no_reorder; bool can_unorder = unordered_dst && unordered_src && !ctx->no_reorder;
VkCommandBuffer cmdbuf = can_unorder ? ctx->bs->reordered_cmdbuf : zink_get_cmdbuf(ctx, src, dst); VkCommandBuffer cmdbuf = unsync ? ctx->bs->unsynchronized_cmdbuf :
can_unorder ? ctx->bs->reordered_cmdbuf : zink_get_cmdbuf(ctx, src, dst);
ctx->bs->has_reordered_work |= can_unorder; ctx->bs->has_reordered_work |= can_unorder;
ctx->bs->has_unsync |= unsync;
zink_batch_reference_resource_rw(ctx, src, false); zink_batch_reference_resource_rw(ctx, src, false);
zink_batch_reference_resource_rw(ctx, dst, true); zink_batch_reference_resource_rw(ctx, dst, true);
if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) { if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
@ -4804,6 +4813,9 @@ zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zin
bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "copy_buffer(%d)", size); bool marker = zink_cmd_debug_marker_begin(ctx, cmdbuf, "copy_buffer(%d)", size);
VKCTX(CmdCopyBuffer)(cmdbuf, src->obj->buffer, dst->obj->buffer, 1, &region); VKCTX(CmdCopyBuffer)(cmdbuf, src->obj->buffer, dst->obj->buffer, 1, &region);
zink_cmd_debug_marker_end(ctx, cmdbuf, marker); zink_cmd_debug_marker_end(ctx, cmdbuf, marker);
if (unsync)
util_queue_fence_signal(&ctx->unsync_fence);
} }
void void
@ -5108,7 +5120,7 @@ zink_resource_copy_region(struct pipe_context *pctx,
zink_cmd_debug_marker_end(ctx, cmdbuf, marker); zink_cmd_debug_marker_end(ctx, cmdbuf, marker);
} else if (dst->base.b.target == PIPE_BUFFER && } else if (dst->base.b.target == PIPE_BUFFER &&
src->base.b.target == PIPE_BUFFER) { src->base.b.target == PIPE_BUFFER) {
zink_copy_buffer(ctx, dst, src, dstx, src_box->x, src_box->width); zink_copy_buffer(ctx, dst, src, dstx, src_box->x, src_box->width, false);
} else } else
zink_copy_image_buffer(ctx, dst, src, dst_level, dstx, dsty, dstz, src_level, src_box, 0); zink_copy_image_buffer(ctx, dst, src, dst_level, dstx, dsty, dstz, src_level, src_box, 0);
if (ctx->oom_flush && !ctx->in_rp && !ctx->unordered_blitting) if (ctx->oom_flush && !ctx->in_rp && !ctx->unordered_blitting)

View file

@ -202,7 +202,7 @@ void
zink_cmd_debug_marker_end(struct zink_context *ctx, VkCommandBuffer cmdbuf,bool emitted); zink_cmd_debug_marker_end(struct zink_context *ctx, VkCommandBuffer cmdbuf,bool emitted);
void void
zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src, zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src,
unsigned dst_offset, unsigned src_offset, unsigned size); unsigned dst_offset, unsigned src_offset, unsigned size, bool unsync);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -1449,7 +1449,7 @@ zink_get_query_result_resource(struct pipe_context *pctx,
} }
struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size); struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size);
copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag); copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size); zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size, false);
pipe_resource_reference(&staging, NULL); pipe_resource_reference(&staging, NULL);
return; return;
} }
@ -1474,7 +1474,7 @@ zink_get_query_result_resource(struct pipe_context *pctx,
/* internal qbo always writes 64bit value so we can just direct copy */ /* internal qbo always writes 64bit value so we can just direct copy */
zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffers[0]), offset, zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffers[0]), offset,
get_buffer_offset(query), get_buffer_offset(query),
result_size); result_size, false);
} else } else
/* have to do a new copy for 32bit */ /* have to do a new copy for 32bit */
copy_results_to_buffer(ctx, query, res, offset, 1, size_flags); copy_results_to_buffer(ctx, query, res, offset, 1, size_flags);

View file

@ -2344,9 +2344,8 @@ zink_buffer_map(struct pipe_context *pctx,
bool is_cached_mem = (screen->info.mem_props.memoryTypes[res->obj->bo->base.base.placement].propertyFlags & VK_STAGING_RAM) == VK_STAGING_RAM; bool is_cached_mem = (screen->info.mem_props.memoryTypes[res->obj->bo->base.base.placement].propertyFlags & VK_STAGING_RAM) == VK_STAGING_RAM;
/* but this is only viable with a certain amount of vram since it may fully duplicate lots of large buffers */ /* but this is only viable with a certain amount of vram since it may fully duplicate lots of large buffers */
bool host_mem_type_check = screen->always_cached_upload ? is_cached_mem : res->obj->host_visible; bool host_mem_type_check = screen->always_cached_upload ? is_cached_mem : res->obj->host_visible;
if (usage & PIPE_MAP_DISCARD_RANGE && if (usage & PIPE_MAP_DISCARD_RANGE && !(usage & PIPE_MAP_PERSISTENT) &&
((!res->obj->host_visible || !(usage & (PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_PERSISTENT))) || (!host_mem_type_check || !(usage & (PIPE_MAP_UNSYNCHRONIZED)))) {
(!host_mem_type_check && !(usage & (PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_PERSISTENT))))) {
/* Check if mapping this buffer would cause waiting for the GPU. /* Check if mapping this buffer would cause waiting for the GPU.
*/ */
@ -2356,20 +2355,26 @@ zink_buffer_map(struct pipe_context *pctx,
/* Do a wait-free write-only transfer using a temporary buffer. */ /* Do a wait-free write-only transfer using a temporary buffer. */
unsigned offset; unsigned offset;
/* If we are not called from the driver thread, we have if (usage & PIPE_MAP_UNSYNCHRONIZED) {
* to use the uploader from u_threaded_context, which is trans->offset = box->x % MAX2(screen->info.props.limits.minMemoryMapAlignment, 1 << MIN_SLAB_ORDER);
* local to the calling thread. trans->staging_res = pipe_buffer_create(&screen->base, PIPE_BIND_LINEAR, PIPE_USAGE_STAGING, box->width + trans->offset);
*/ trans->unsync_upload = true;
struct u_upload_mgr *mgr; } else {
if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC) /* If we are not called from the driver thread, we have
mgr = ctx->tc->base.stream_uploader; * to use the uploader from u_threaded_context, which is
else * local to the calling thread.
mgr = ctx->base.stream_uploader; */
u_upload_alloc(mgr, 0, box->width, struct u_upload_mgr *mgr;
screen->info.props.limits.minMemoryMapAlignment, &offset, if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
(struct pipe_resource **)&trans->staging_res, (void **)&ptr); mgr = ctx->tc->base.stream_uploader;
else
mgr = ctx->base.stream_uploader;
u_upload_alloc(mgr, 0, box->width,
screen->info.props.limits.minMemoryMapAlignment, &offset,
(struct pipe_resource **)&trans->staging_res, (void **)&ptr);
trans->offset = offset;
}
res = zink_resource(trans->staging_res); res = zink_resource(trans->staging_res);
trans->offset = offset;
usage |= PIPE_MAP_UNSYNCHRONIZED; usage |= PIPE_MAP_UNSYNCHRONIZED;
ptr = ((uint8_t *)ptr); ptr = ((uint8_t *)ptr);
} else { } else {
@ -2401,7 +2406,7 @@ overwrite:
ctx = screen->copy_context; ctx = screen->copy_context;
} }
if (usage & PIPE_MAP_READ) if (usage & PIPE_MAP_READ)
zink_copy_buffer(ctx, staging_res, res, trans->offset, box->x, box->width); zink_copy_buffer(ctx, staging_res, res, trans->offset, box->x, box->width, false);
res = staging_res; res = staging_res;
usage &= ~PIPE_MAP_UNSYNCHRONIZED; usage &= ~PIPE_MAP_UNSYNCHRONIZED;
map_offset = trans->offset; map_offset = trans->offset;
@ -2752,7 +2757,7 @@ zink_transfer_flush_region(struct pipe_context *pctx,
struct zink_resource *staging_res = zink_resource(trans->staging_res); struct zink_resource *staging_res = zink_resource(trans->staging_res);
if (ptrans->resource->target == PIPE_BUFFER) if (ptrans->resource->target == PIPE_BUFFER)
zink_copy_buffer(ctx, res, staging_res, dst_offset, src_offset, size); zink_copy_buffer(ctx, res, staging_res, dst_offset, src_offset, size, trans->unsync_upload);
else else
zink_transfer_copy_bufimage(ctx, res, staging_res, trans); zink_transfer_copy_bufimage(ctx, res, staging_res, trans);
} }

View file

@ -689,7 +689,7 @@ buffer_needs_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineS
template <barrier_type BARRIER_API> template <barrier_type BARRIER_API, bool UNSYNCHRONIZED>
void void
zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline) zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline)
{ {
@ -708,6 +708,7 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res
} }
bool unordered_usage_matches = res->obj->unordered_access && usage_matches; bool unordered_usage_matches = res->obj->unordered_access && usage_matches;
bool unordered = unordered_res_exec(ctx, res, is_write); bool unordered = unordered_res_exec(ctx, res, is_write);
assert(!UNSYNCHRONIZED || !usage_matches);
if (!buffer_needs_barrier(res, flags, pipeline, unordered)) if (!buffer_needs_barrier(res, flags, pipeline, unordered))
return; return;
if (completed) { if (completed) {
@ -734,18 +735,20 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res
* - there is no current-batch unordered access AND previous batch usage is not write access * - there is no current-batch unordered access AND previous batch usage is not write access
* - there is current-batch unordered access AND the unordered access is not write access * - there is current-batch unordered access AND the unordered access is not write access
*/ */
bool can_skip_unordered = !unordered ? false : !zink_resource_access_is_write(!unordered_usage_matches ? res->obj->access : res->obj->unordered_access); bool can_skip_unordered = !unordered || UNSYNCHRONIZED ? false : !zink_resource_access_is_write(!unordered_usage_matches ? res->obj->access : res->obj->unordered_access);
/* ordered barriers can be skipped if both: /* ordered barriers can be skipped if both:
* - there is no current access * - there is no current access
* - there is no current-batch unordered access * - there is no current-batch unordered access
*/ */
bool can_skip_ordered = unordered ? false : (!res->obj->access && !unordered_usage_matches); bool can_skip_ordered = unordered || UNSYNCHRONIZED ? false : (!res->obj->access && !unordered_usage_matches);
if (ctx->no_reorder) if (ctx->no_reorder)
can_skip_unordered = can_skip_ordered = false; can_skip_unordered = can_skip_ordered = false;
if (!can_skip_unordered && !can_skip_ordered) { if (!can_skip_unordered && !can_skip_ordered) {
VkCommandBuffer cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL); VkCommandBuffer cmdbuf = UNSYNCHRONIZED ? ctx->bs->unsynchronized_cmdbuf :
is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL);
bool marker = false; bool marker = false;
ctx->bs->has_unsync |= UNSYNCHRONIZED;
if (unlikely(zink_tracing)) { if (unlikely(zink_tracing)) {
char buf[4096]; char buf[4096];
zink_string_vkflags_unroll(buf, sizeof(buf), flags, (zink_vkflags_func)vk_AccessFlagBits_to_str); zink_string_vkflags_unroll(buf, sizeof(buf), flags, (zink_vkflags_func)vk_AccessFlagBits_to_str);
@ -758,7 +761,8 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res
zink_cmd_debug_marker_end(ctx, cmdbuf, marker); zink_cmd_debug_marker_end(ctx, cmdbuf, marker);
} }
resource_check_defer_buffer_barrier(ctx, res, pipeline); if (!UNSYNCHRONIZED)
resource_check_defer_buffer_barrier(ctx, res, pipeline);
if (is_write) if (is_write)
res->obj->last_write = flags; res->obj->last_write = flags;
@ -784,11 +788,13 @@ void
zink_synchronization_init(struct zink_screen *screen) zink_synchronization_init(struct zink_screen *screen)
{ {
if (screen->info.have_vulkan13 || screen->info.have_KHR_synchronization2) { if (screen->info.have_vulkan13 || screen->info.have_KHR_synchronization2) {
screen->buffer_barrier = zink_resource_buffer_barrier<barrier_KHR_synchronzation2>; screen->buffer_barrier = zink_resource_buffer_barrier<barrier_KHR_synchronzation2, false>;
screen->buffer_barrier_unsync = zink_resource_buffer_barrier<barrier_KHR_synchronzation2, true>;
screen->image_barrier = zink_resource_image_barrier<barrier_KHR_synchronzation2, false>; screen->image_barrier = zink_resource_image_barrier<barrier_KHR_synchronzation2, false>;
screen->image_barrier_unsync = zink_resource_image_barrier<barrier_KHR_synchronzation2, true>; screen->image_barrier_unsync = zink_resource_image_barrier<barrier_KHR_synchronzation2, true>;
} else { } else {
screen->buffer_barrier = zink_resource_buffer_barrier<barrier_default>; screen->buffer_barrier = zink_resource_buffer_barrier<barrier_default, false>;
screen->buffer_barrier_unsync = zink_resource_buffer_barrier<barrier_default, true>;
screen->image_barrier = zink_resource_image_barrier<barrier_default, false>; screen->image_barrier = zink_resource_image_barrier<barrier_default, false>;
screen->image_barrier_unsync = zink_resource_image_barrier<barrier_default, true>; screen->image_barrier_unsync = zink_resource_image_barrier<barrier_default, true>;
} }

View file

@ -1371,6 +1371,7 @@ struct zink_transfer {
struct pipe_resource *staging_res; struct pipe_resource *staging_res;
unsigned offset; unsigned offset;
unsigned depthPitch; unsigned depthPitch;
bool unsync_upload;
}; };
@ -1515,6 +1516,7 @@ struct zink_screen {
struct vk_uncompacted_dispatch_table vk; struct vk_uncompacted_dispatch_table vk;
void (*buffer_barrier)(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline); void (*buffer_barrier)(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline);
void (*buffer_barrier_unsync)(struct zink_context *ctx, struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline);
void (*image_barrier)(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); void (*image_barrier)(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline);
void (*image_barrier_unsync)(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline); void (*image_barrier_unsync)(struct zink_context *ctx, struct zink_resource *res, VkImageLayout new_layout, VkAccessFlags flags, VkPipelineStageFlags pipeline);