zink: rework handling of unordered->ordered write buffer barriers

this improves handling for barriers that originate from a write in the
unordered cmdbuf, adding tracking to resources to better determine access
in the unordered cmdbuf and then utilizing that to generate a single split
memory barrier added at the end of the unordered cmdbuf for all the buffers
written to on that cmdbuf

the next step will be to also merge the read access down onto the end-of-cmdbuf
barrier so that all stream upload-type functionality becomes a single barrier

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22027>
This commit is contained in:
Mike Blumenkrantz 2023-03-20 10:49:55 -04:00 committed by Marge Bot
parent e83513832b
commit 46212427f9
3 changed files with 164 additions and 34 deletions

View file

@ -31,8 +31,10 @@ reset_obj(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_r
obj->unordered_read = true;
obj->unordered_write = true;
obj->access = 0;
obj->unordered_access = 0;
obj->last_write = 0;
obj->access_stage = 0;
obj->unordered_access_stage = 0;
obj->copies_need_reset = true;
/* also prune dead view objects */
simple_mtx_lock(&obj->view_lock);
@ -151,6 +153,9 @@ zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs)
}
bs->swapchain = NULL;
bs->unordered_write_access = 0;
bs->unordered_write_stages = 0;
/* only reset submitted here so that tc fence desync can pick up the 'completed' flag
* before the state is reused
*/
@ -573,6 +578,16 @@ submit_queue(void *data, void *gdata, int thread_index)
goto end;
}
if (bs->has_barriers) {
if (bs->unordered_write_access) {
VkMemoryBarrier mb;
mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
mb.pNext = NULL;
mb.srcAccessMask = bs->unordered_write_access;
mb.dstAccessMask = 0;
VKSCR(CmdPipelineBarrier)(bs->barrier_cmdbuf,
bs->unordered_write_stages, 0,
0, 1, &mb, 0, NULL, 0, NULL);
}
result = VKSCR(EndCommandBuffer)(bs->barrier_cmdbuf);
if (result != VK_SUCCESS) {
mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));

View file

@ -3959,16 +3959,25 @@ zink_resource_buffer_transfer_dst_barrier(struct zink_context *ctx, struct zink_
zink_resource_copies_reset(res);
bool unordered = true;
struct pipe_box box = {offset, 0, 0, size, 0, 0};
bool can_unordered_write = unordered_res_exec(ctx, res, true);
/* must barrier if something read the valid buffer range */
bool valid_read = res->obj->access && util_ranges_intersect(&res->valid_buffer_range, offset, offset + size) && !unordered_res_exec(ctx, res, true);
if (zink_screen(ctx->base.screen)->driver_workarounds.broken_cache_semantics ||
zink_check_unordered_transfer_access(res, 0, &box) || valid_read) {
bool valid_read = (res->obj->access || res->obj->unordered_access) &&
util_ranges_intersect(&res->valid_buffer_range, offset, offset + size) && !can_unordered_write;
if (valid_read || zink_screen(ctx->base.screen)->driver_workarounds.broken_cache_semantics || zink_check_unordered_transfer_access(res, 0, &box)) {
zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
unordered = res->obj->unordered_write;
} else {
res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT;
res->obj->unordered_access = VK_ACCESS_TRANSFER_WRITE_BIT;
res->obj->last_write = VK_ACCESS_TRANSFER_WRITE_BIT;
res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
res->obj->unordered_access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
ctx->batch.state->unordered_write_access |= VK_ACCESS_TRANSFER_WRITE_BIT;
ctx->batch.state->unordered_write_stages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
if (!zink_resource_usage_matches(res, ctx->batch.state)) {
res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT;
res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
res->obj->ordered_access_is_copied = true;
}
}
zink_resource_copy_box_add(res, 0, &box);
/* this return value implies that the caller could do an unordered op on this resource */
@ -4015,16 +4024,12 @@ pipeline_access_stage(VkAccessFlags flags)
}
ALWAYS_INLINE static bool
zink_resource_buffer_needs_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline)
buffer_needs_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline, bool unordered)
{
if (!res->obj->access || !res->obj->access_stage)
return true;
if (!pipeline)
pipeline = pipeline_access_stage(flags);
return zink_resource_access_is_write(res->obj->access) ||
return zink_resource_access_is_write(unordered ? res->obj->unordered_access : res->obj->access) ||
zink_resource_access_is_write(flags) ||
(res->obj->access_stage & pipeline) != pipeline ||
(res->obj->access & flags) != flags;
((unordered ? res->obj->unordered_access_stage : res->obj->access_stage) & pipeline) != pipeline ||
((unordered ? res->obj->unordered_access : res->obj->access) & flags) != flags;
}
void
@ -4032,20 +4037,59 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res
{
if (!pipeline)
pipeline = pipeline_access_stage(flags);
if (!zink_resource_buffer_needs_barrier(res, flags, pipeline))
return;
bool is_write = zink_resource_access_is_write(flags);
bool unordered = unordered_res_exec(ctx, res, is_write);
if (!buffer_needs_barrier(res, flags, pipeline, unordered))
return;
enum zink_resource_access rw = is_write ? ZINK_RESOURCE_ACCESS_RW : ZINK_RESOURCE_ACCESS_WRITE;
if (res->obj->access && !zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, rw)) {
bool completed = zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, rw);
bool usage_matches = !completed && zink_resource_usage_matches(res, ctx->batch.state);
bool unordered_usage_matches = res->obj->unordered_access && usage_matches;
if (completed) {
/* reset access on complete */
res->obj->access = 0;
res->obj->access_stage = 0;
res->obj->last_write = 0;
} else if (unordered && unordered_usage_matches && res->obj->ordered_access_is_copied) {
/* always reset propagated access to avoid weirdness */
res->obj->access = 0;
res->obj->access_stage = 0;
} else if (!unordered && !unordered_usage_matches) {
/* reset unordered access on first ordered barrier */
res->obj->unordered_access = 0;
res->obj->unordered_access_stage = 0;
}
if (!usage_matches) {
/* reset unordered on first new cmdbuf barrier */
res->obj->unordered_access = 0;
res->obj->unordered_access_stage = 0;
res->obj->ordered_access_is_copied = false;
}
/* unordered barriers can be skipped if either:
* - there is no current-batch unordered access
* - the unordered access is not write access
*/
bool can_skip_unordered = !unordered ? false : (!unordered_usage_matches || !zink_resource_access_is_write(res->obj->unordered_access));
/* ordered barriers can be skipped if both:
* - there is no current access
* - there is no current-batch unordered access
*/
bool can_skip_ordered = unordered ? false : (!res->obj->access && !unordered_usage_matches);
if (!can_skip_unordered && !can_skip_ordered) {
VkMemoryBarrier bmb;
bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
bmb.pNext = NULL;
bmb.srcAccessMask = res->obj->access;
bmb.dstAccessMask = flags;
assert(res->obj->access_stage);
VkCommandBuffer cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL);
VkPipelineStageFlagBits stages = res->obj->access_stage ? res->obj->access_stage : pipeline_access_stage(res->obj->access);;
if (unordered) {
stages = usage_matches ? res->obj->unordered_access_stage : stages;
bmb.srcAccessMask = usage_matches ? res->obj->unordered_access : res->obj->access;
} else {
bmb.srcAccessMask = res->obj->access;
}
VkCommandBuffer cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL);
bool marker = false;
if (unlikely(zink_tracing)) {
char buf[4096];
@ -4061,7 +4105,7 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res
}
VKCTX(CmdPipelineBarrier)(
cmdbuf,
res->obj->access_stage ? res->obj->access_stage : pipeline_access_stage(res->obj->access),
stages,
pipeline,
0,
1, &bmb,
@ -4075,9 +4119,20 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res
if (is_write)
res->obj->last_write = flags;
res->obj->access = flags;
res->obj->access_stage = pipeline;
if (unordered) {
/* these should get automatically emitted during submission */
res->obj->unordered_access = flags;
res->obj->unordered_access_stage = pipeline;
if (is_write) {
ctx->batch.state->unordered_write_access |= flags;
ctx->batch.state->unordered_write_stages |= pipeline;
}
}
if (!unordered || !usage_matches || res->obj->ordered_access_is_copied) {
res->obj->access = flags;
res->obj->access_stage = pipeline;
res->obj->ordered_access_is_copied = unordered;
}
if (pipeline != VK_PIPELINE_STAGE_TRANSFER_BIT && is_write)
zink_resource_copies_reset(res);
}
@ -4087,21 +4142,60 @@ zink_resource_buffer_barrier2(struct zink_context *ctx, struct zink_resource *re
{
if (!pipeline)
pipeline = pipeline_access_stage(flags);
if (!zink_resource_buffer_needs_barrier(res, flags, pipeline))
return;
bool is_write = zink_resource_access_is_write(flags);
bool unordered = unordered_res_exec(ctx, res, is_write);
if (!buffer_needs_barrier(res, flags, pipeline, unordered))
return;
enum zink_resource_access rw = is_write ? ZINK_RESOURCE_ACCESS_RW : ZINK_RESOURCE_ACCESS_WRITE;
if (res->obj->access && !zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, rw)) {
bool completed = zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, rw);
bool usage_matches = !completed && zink_resource_usage_matches(res, ctx->batch.state);
bool unordered_usage_matches = res->obj->unordered_access && usage_matches;
if (completed) {
/* reset access on complete */
res->obj->access = 0;
res->obj->access_stage = 0;
res->obj->last_write = 0;
} else if (unordered && unordered_usage_matches && res->obj->ordered_access_is_copied) {
/* always reset propagated access to avoid weirdness */
res->obj->access = 0;
res->obj->access_stage = 0;
} else if (!unordered && !unordered_usage_matches) {
/* reset unordered access on first ordered barrier */
res->obj->unordered_access = 0;
res->obj->unordered_access_stage = 0;
}
if (!usage_matches) {
/* reset unordered on first new cmdbuf barrier */
res->obj->unordered_access = 0;
res->obj->unordered_access_stage = 0;
res->obj->ordered_access_is_copied = false;
}
/* unordered barriers can be skipped if either:
* - there is no current-batch unordered access
* - the unordered access is not write access
*/
bool can_skip_unordered = !unordered ? false : (!unordered_usage_matches || !zink_resource_access_is_write(res->obj->unordered_access));
/* ordered barriers can be skipped if both:
* - there is no current access
* - there is no current-batch unordered access
*/
bool can_skip_ordered = unordered ? false : (!res->obj->access && !unordered_usage_matches);
if (!can_skip_unordered && !can_skip_ordered) {
VkMemoryBarrier2 bmb;
bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2;
bmb.pNext = NULL;
bmb.srcStageMask = res->obj->access_stage ? res->obj->access_stage : pipeline_access_stage(res->obj->access);
bmb.srcAccessMask = res->obj->access;
VkPipelineStageFlagBits stages = res->obj->access_stage ? res->obj->access_stage : pipeline_access_stage(res->obj->access);;
if (unordered) {
bmb.srcStageMask = usage_matches ? res->obj->unordered_access_stage : stages;
bmb.srcAccessMask = usage_matches ? res->obj->unordered_access : res->obj->access;
} else {
bmb.srcStageMask = stages;
bmb.srcAccessMask = res->obj->access;
}
bmb.dstStageMask = pipeline;
bmb.dstAccessMask = flags;
assert(res->obj->access_stage);
VkCommandBuffer cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL);
VkDependencyInfo dep = {
VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
NULL,
@ -4113,6 +4207,7 @@ zink_resource_buffer_barrier2(struct zink_context *ctx, struct zink_resource *re
0,
NULL
};
VkCommandBuffer cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL);
bool marker = false;
if (unlikely(zink_tracing)) {
char buf[4096];
@ -4134,9 +4229,20 @@ zink_resource_buffer_barrier2(struct zink_context *ctx, struct zink_resource *re
if (is_write)
res->obj->last_write = flags;
res->obj->access = flags;
res->obj->access_stage = pipeline;
if (unordered) {
/* these should get automatically emitted during submission */
res->obj->unordered_access = flags;
res->obj->unordered_access_stage = pipeline;
if (is_write) {
ctx->batch.state->unordered_write_access |= flags;
ctx->batch.state->unordered_write_stages |= pipeline;
}
}
if (!unordered || !usage_matches || res->obj->ordered_access_is_copied) {
res->obj->access = flags;
res->obj->access_stage = pipeline;
res->obj->ordered_access_is_copied = unordered;
}
if (pipeline != VK_PIPELINE_STAGE_TRANSFER_BIT && is_write)
zink_resource_copies_reset(res);
}

View file

@ -559,6 +559,9 @@ struct zink_batch_state {
struct util_dynarray acquires;
struct util_dynarray acquire_flags;
VkAccessFlagBits unordered_write_access;
VkPipelineStageFlagBits unordered_write_stages;
struct util_queue_fence flush_completed;
struct set programs;
@ -1117,8 +1120,14 @@ struct zink_resource_object {
VkPipelineStageFlagBits access_stage;
VkAccessFlagBits access;
VkPipelineStageFlagBits unordered_access_stage;
VkAccessFlagBits unordered_access;
VkAccessFlagBits last_write;
/* 'access' is propagated from unordered_access to handle ops occurring
* in the ordered cmdbuf which can promote barriers to unordered
*/
bool ordered_access_is_copied;
bool unordered_read;
bool unordered_write;
bool copies_valid;