From d179c5d28e6ae86dd95d906fff8197e35270dad8 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Mon, 22 Mar 2021 11:01:30 -0400 Subject: [PATCH] zink: implement threaded context this includes: * async buffer mapping/replacement * async queue submission * async (threaded) gallium flush handling the main churn here is from handling async gallium flushes, which involves creating multiple gallium fences (zink_tc_fence) for each zink fence (zink_fence). a tc fence may begin waiting for completion at any time, even before the zink_fence has had its cmdbuf(s) submitted, so handling this type of desync ends up needing almost a complete rewrite of the existing queue architecture Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/zink_batch.c | 40 ++- src/gallium/drivers/zink/zink_batch.h | 21 ++ src/gallium/drivers/zink/zink_blit.c | 8 +- src/gallium/drivers/zink/zink_clear.c | 4 +- src/gallium/drivers/zink/zink_context.c | 231 +++++++++++----- src/gallium/drivers/zink/zink_context.h | 3 + src/gallium/drivers/zink/zink_descriptors.c | 4 +- src/gallium/drivers/zink/zink_fence.c | 123 +++++++-- src/gallium/drivers/zink/zink_fence.h | 32 ++- src/gallium/drivers/zink/zink_query.c | 10 +- src/gallium/drivers/zink/zink_resource.c | 285 ++++++++++++++------ src/gallium/drivers/zink/zink_resource.h | 7 +- src/gallium/drivers/zink/zink_screen.c | 4 +- src/gallium/drivers/zink/zink_screen.h | 3 + src/gallium/drivers/zink/zink_surface.c | 4 +- 15 files changed, 574 insertions(+), 205 deletions(-) diff --git a/src/gallium/drivers/zink/zink_batch.c b/src/gallium/drivers/zink/zink_batch.c index 33bbae2def3..22d79871ede 100644 --- a/src/gallium/drivers/zink/zink_batch.c +++ b/src/gallium/drivers/zink/zink_batch.c @@ -16,6 +16,12 @@ #include "wsi_common.h" +void +debug_describe_zink_batch_state(char *buf, const struct zink_batch_state *ptr) +{ + sprintf(buf, "zink_batch_state"); +} + void zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs) { @@ -82,12 +88,14 @@ zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs) } bs->flush_res = NULL; - bs->fence.deferred_ctx = NULL; bs->descs_used = 0; ctx->resource_size -= bs->resource_size; bs->resource_size = 0; + /* only reset submitted here so that tc fence desync can pick up the 'completed' flag + * before the state is reused + */ bs->fence.submitted = false; bs->fence.batch_id = 0; } @@ -95,6 +103,7 @@ zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs) void zink_clear_batch_state(struct zink_context *ctx, struct zink_batch_state *bs) { + bs->fence.completed = true; zink_reset_batch_state(ctx, bs); } @@ -104,6 +113,7 @@ zink_batch_reset_all(struct zink_context *ctx) simple_mtx_lock(&ctx->batch_mtx); hash_table_foreach(&ctx->batch_states, entry) { struct zink_batch_state *bs = entry->data; + bs->fence.completed = true; zink_reset_batch_state(ctx, bs); _mesa_hash_table_remove(&ctx->batch_states, entry); util_dynarray_append(&ctx->free_batch_states, struct zink_batch_state *, bs); @@ -117,6 +127,11 @@ zink_batch_state_destroy(struct zink_screen *screen, struct zink_batch_state *bs if (!bs) return; + util_queue_fence_destroy(&bs->flush_completed); + + if (bs->fence.fence) + vkDestroyFence(screen->dev, bs->fence.fence, NULL); + if (bs->cmdbuf) vkFreeCommandBuffers(screen->dev, bs->cmdpool, 1, &bs->cmdbuf); if (bs->cmdpool) @@ -160,6 +175,7 @@ create_batch_state(struct zink_context *ctx) goto fail bs->ctx = ctx; + pipe_reference_init(&bs->reference, 1); SET_CREATE_OR_FAIL(bs->fbs); SET_CREATE_OR_FAIL(bs->fence.resources); @@ -176,9 +192,10 @@ create_batch_state(struct zink_context *ctx) if (vkCreateFence(screen->dev, &fci, NULL, &bs->fence.fence) != VK_SUCCESS) goto fail; - pipe_reference_init(&bs->fence.reference, 1); simple_mtx_init(&bs->fence.resource_mtx, mtx_plain); + util_queue_fence_init(&bs->flush_completed); + return bs; fail: zink_batch_state_destroy(screen, bs); @@ -190,8 +207,9 @@ find_unused_state(struct hash_entry *entry) { struct zink_fence *fence = entry->data; /* we can't reset these from fence_finish because threads */ + bool completed = p_atomic_read(&fence->completed); bool submitted = p_atomic_read(&fence->submitted); - return !submitted; + return submitted && completed; } static struct zink_batch_state * @@ -253,9 +271,13 @@ zink_start_batch(struct zink_context *ctx, struct zink_batch *batch) debug_printf("vkBeginCommandBuffer failed\n"); batch->state->fence.batch_id = ctx->curr_batch; + batch->state->fence.completed = false; if (ctx->last_fence) { struct zink_batch_state *last_state = zink_batch_state(ctx->last_fence); batch->last_batch_id = last_state->fence.batch_id; + } else { + if (zink_screen(ctx->base.screen)->threaded) + util_queue_init(&batch->flush_queue, "zfq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL); } if (!ctx->queries_disabled) zink_resume_queries(ctx, batch); @@ -332,9 +354,15 @@ zink_end_batch(struct zink_context *ctx, struct zink_batch *batch) simple_mtx_unlock(&ctx->batch_mtx); ctx->resource_size += batch->state->resource_size; - batch->state->queue = batch->queue; - submit_queue(batch->state, 0); - post_submit(batch->state, 0); + if (util_queue_is_initialized(&batch->flush_queue)) { + batch->state->queue = batch->thread_queue; + util_queue_add_job(&batch->flush_queue, batch->state, &batch->state->flush_completed, + submit_queue, post_submit, 0); + } else { + batch->state->queue = batch->queue; + submit_queue(batch->state, 0); + post_submit(batch->state, 0); + } } void diff --git a/src/gallium/drivers/zink/zink_batch.h b/src/gallium/drivers/zink/zink_batch.h index 6490a38f223..fdc67016f8c 100644 --- a/src/gallium/drivers/zink/zink_batch.h +++ b/src/gallium/drivers/zink/zink_batch.h @@ -51,11 +51,14 @@ struct zink_batch_usage { struct zink_batch_state { struct zink_fence fence; + struct pipe_reference reference; struct zink_context *ctx; VkCommandPool cmdpool; VkCommandBuffer cmdbuf; VkQueue queue; //duplicated from batch for threading + struct util_queue_fence flush_completed; + struct zink_resource *flush_res; unsigned short descs_used; //number of descriptors currently allocated @@ -82,6 +85,8 @@ struct zink_batch { uint32_t last_batch_id; VkQueue queue; //gfx+compute + VkQueue thread_queue; //gfx+compute + struct util_queue flush_queue; //TODO: move to wsi bool has_work; bool in_rp; //renderpass is currently active @@ -142,6 +147,22 @@ zink_batch_reference_bufferview(struct zink_batch *batch, struct zink_buffer_vie void zink_batch_reference_surface(struct zink_batch *batch, struct zink_surface *surface); +void +debug_describe_zink_batch_state(char *buf, const struct zink_batch_state *ptr); + +static inline void +zink_batch_state_reference(struct zink_screen *screen, + struct zink_batch_state **dst, + struct zink_batch_state *src) +{ + struct zink_batch_state *old_dst = dst ? *dst : NULL; + + if (pipe_reference_described(old_dst ? &old_dst->reference : NULL, src ? &src->reference : NULL, + (debug_reference_descriptor)debug_describe_zink_batch_state)) + zink_batch_state_destroy(screen, old_dst); + if (dst) *dst = src; +} + bool zink_batch_add_desc_set(struct zink_batch *batch, struct zink_descriptor_set *zds); diff --git a/src/gallium/drivers/zink/zink_blit.c b/src/gallium/drivers/zink/zink_blit.c index 829fbd357b0..3a567baba5f 100644 --- a/src/gallium/drivers/zink/zink_blit.c +++ b/src/gallium/drivers/zink/zink_blit.c @@ -54,7 +54,7 @@ blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info) region.srcOffset.x = info->src.box.x; region.srcOffset.y = info->src.box.y; - if (src->base.array_size > 1) { + if (src->base.b.array_size > 1) { region.srcOffset.z = 0; region.srcSubresource.baseArrayLayer = info->src.box.z; region.srcSubresource.layerCount = info->src.box.depth; @@ -70,7 +70,7 @@ blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info) region.dstOffset.x = info->dst.box.x; region.dstOffset.y = info->dst.box.y; - if (dst->base.array_size > 1) { + if (dst->base.b.array_size > 1) { region.dstOffset.z = 0; region.dstSubresource.baseArrayLayer = info->dst.box.z; region.dstSubresource.layerCount = info->dst.box.depth; @@ -138,7 +138,7 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info) region.srcOffsets[1].x = info->src.box.x + info->src.box.width; region.srcOffsets[1].y = info->src.box.y + info->src.box.height; - if (src->base.array_size > 1) { + if (src->base.b.array_size > 1) { region.srcOffsets[0].z = 0; region.srcOffsets[1].z = 1; region.srcSubresource.baseArrayLayer = info->src.box.z; @@ -157,7 +157,7 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info) region.dstOffsets[1].x = info->dst.box.x + info->dst.box.width; region.dstOffsets[1].y = info->dst.box.y + info->dst.box.height; - if (dst->base.array_size > 1) { + if (dst->base.b.array_size > 1) { region.dstOffsets[0].z = 0; region.dstOffsets[1].z = 1; region.dstSubresource.baseArrayLayer = info->dst.box.z; diff --git a/src/gallium/drivers/zink/zink_clear.c b/src/gallium/drivers/zink/zink_clear.c index e1a6dd31d2b..5c5dcd9dfc2 100644 --- a/src/gallium/drivers/zink/zink_clear.c +++ b/src/gallium/drivers/zink/zink_clear.c @@ -380,8 +380,8 @@ zink_clear_texture(struct pipe_context *pctx, zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS); util_clear_render_target(pctx, surf, &color, box->x, box->y, box->width, box->height); } - if (res->base.target == PIPE_BUFFER) - util_range_add(&res->base, &res->valid_buffer_range, box->x, box->x + box->width); + if (res->base.b.target == PIPE_BUFFER) + util_range_add(&res->base.b, &res->valid_buffer_range, box->x, box->x + box->width); } else { float depth = 0.0; uint8_t stencil = 0; diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index 681cc2d9764..cb784bbfa40 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -57,9 +57,10 @@ static void incr_curr_batch(struct zink_context *ctx) { - ctx->curr_batch++; - if (!ctx->curr_batch) - ctx->curr_batch = 1; + struct zink_screen *screen = zink_screen(ctx->base.screen); + ctx->curr_batch = p_atomic_inc_return(&screen->curr_batch); + if (!ctx->curr_batch) //never use batchid 0 + incr_curr_batch(ctx); } static struct zink_resource * @@ -299,22 +300,23 @@ zink_context_destroy(struct pipe_context *pctx) pipe_resource_reference(&ctx->dummy_vertex_buffer, NULL); pipe_resource_reference(&ctx->dummy_xfb_buffer, NULL); + if (ctx->tc) + util_queue_destroy(&ctx->batch.flush_queue); + for (unsigned i = 0; i < ARRAY_SIZE(ctx->null_buffers); i++) pipe_resource_reference(&ctx->null_buffers[i], NULL); simple_mtx_destroy(&ctx->batch_mtx); - struct zink_fence *fence = zink_fence(&ctx->batch.state); zink_clear_batch_state(ctx, ctx->batch.state); - zink_fence_reference(screen, &fence, NULL); + zink_batch_state_reference(screen, &ctx->batch.state, NULL); hash_table_foreach(&ctx->batch_states, entry) { - fence = entry->data; - zink_clear_batch_state(ctx, entry->data); - zink_fence_reference(screen, &fence, NULL); + struct zink_batch_state *bs = entry->data; + zink_clear_batch_state(ctx, bs); + zink_batch_state_reference(screen, &bs, NULL); } util_dynarray_foreach(&ctx->free_batch_states, struct zink_batch_state*, bs) { - fence = zink_fence(*bs); zink_clear_batch_state(ctx, *bs); - zink_fence_reference(screen, &fence, NULL); + zink_batch_state_reference(screen, bs, NULL); } if (ctx->framebuffer) { @@ -335,6 +337,7 @@ zink_context_destroy(struct pipe_context *pctx) _mesa_hash_table_destroy(ctx->program_cache, NULL); _mesa_hash_table_destroy(ctx->compute_program_cache, NULL); _mesa_hash_table_destroy(ctx->render_pass_cache, NULL); + slab_destroy_child(&ctx->transfer_pool_unsync); zink_descriptor_pool_deinit(ctx); @@ -964,10 +967,10 @@ zink_set_shader_buffers(struct pipe_context *pctx, struct zink_resource *res = zink_resource(buffers[i].buffer); res->bind_history |= BITFIELD_BIT(ZINK_DESCRIPTOR_TYPE_SSBO); res->bind_stages |= 1 << p_stage; - pipe_resource_reference(&ssbo->buffer, &res->base); + pipe_resource_reference(&ssbo->buffer, &res->base.b); ssbo->buffer_offset = buffers[i].buffer_offset; ssbo->buffer_size = MIN2(buffers[i].buffer_size, res->obj->size - ssbo->buffer_offset); - util_range_add(&res->base, &res->valid_buffer_range, ssbo->buffer_offset, + util_range_add(&res->base.b, &res->valid_buffer_range, ssbo->buffer_offset, ssbo->buffer_offset + ssbo->buffer_size); update = true; } else { @@ -1023,7 +1026,7 @@ zink_set_shader_images(struct pipe_context *pctx, if (images[i].resource->target == PIPE_BUFFER) { image_view->buffer_view = get_buffer_view(ctx, res, images[i].format, images[i].u.buf.offset, images[i].u.buf.size); assert(image_view->buffer_view); - util_range_add(&res->base, &res->valid_buffer_range, images[i].u.buf.offset, + util_range_add(&res->base.b, &res->valid_buffer_range, images[i].u.buf.offset, images[i].u.buf.offset + images[i].u.buf.size); } else { struct pipe_surface tmpl = {}; @@ -1032,7 +1035,7 @@ zink_set_shader_images(struct pipe_context *pctx, tmpl.u.tex.level = images[i].u.tex.level; tmpl.u.tex.first_layer = images[i].u.tex.first_layer; tmpl.u.tex.last_layer = images[i].u.tex.last_layer; - image_view->surface = zink_surface(pctx->create_surface(pctx, &res->base, &tmpl)); + image_view->surface = zink_surface(pctx->create_surface(pctx, &res->base.b, &tmpl)); assert(image_view->surface); } update = true; @@ -1075,7 +1078,7 @@ zink_set_sampler_views(struct pipe_context *pctx, struct zink_sampler_view *b = zink_sampler_view(pview); if (b && b->base.texture) { struct zink_resource *res = zink_resource(b->base.texture); - if (res->base.target == PIPE_BUFFER && + if (res->base.b.target == PIPE_BUFFER && res->bind_history & BITFIELD64_BIT(ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW)) { /* if this resource has been rebound while it wasn't set here, * its backing resource will have changed and thus we need to update @@ -1183,7 +1186,7 @@ get_render_pass(struct zink_context *ctx) struct zink_resource *zsbuf = zink_resource(fb->zsbuf->texture); struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS]; state.rts[fb->nr_cbufs].format = zsbuf->format; - state.rts[fb->nr_cbufs].samples = zsbuf->base.nr_samples > 0 ? zsbuf->base.nr_samples : VK_SAMPLE_COUNT_1_BIT; + state.rts[fb->nr_cbufs].samples = zsbuf->base.b.nr_samples > 0 ? zsbuf->base.b.nr_samples : VK_SAMPLE_COUNT_1_BIT; state.rts[fb->nr_cbufs].clear_color = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) && !zink_fb_clear_first_needs_explicit(fb_clear) && (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH); @@ -1375,12 +1378,22 @@ zink_end_render_pass(struct zink_context *ctx, struct zink_batch *batch) } static void -flush_batch(struct zink_context *ctx) +sync_flush(struct zink_context *ctx, struct zink_batch_state *bs) +{ + if (util_queue_is_initialized(&ctx->batch.flush_queue)) + util_queue_fence_wait(&bs->flush_completed); +} + +static void +flush_batch(struct zink_context *ctx, bool sync) { struct zink_batch *batch = &ctx->batch; zink_end_render_pass(ctx, batch); zink_end_batch(ctx, batch); + if (sync) + sync_flush(ctx, ctx->batch.state); + if (ctx->batch.state->is_device_lost && ctx->reset.reset) { ctx->is_device_lost = true; ctx->reset.reset(ctx->reset.data, PIPE_GUILTY_CONTEXT_RESET); @@ -1416,7 +1429,7 @@ zink_batch_no_rp(struct zink_context *ctx) void zink_flush_queue(struct zink_context *ctx) { - flush_batch(ctx); + flush_batch(ctx, true); } static bool @@ -1778,7 +1791,7 @@ zink_resource_buffer_barrier_init(VkBufferMemoryBarrier *bmb, struct zink_resour VK_QUEUE_FAMILY_IGNORED, res->obj->buffer, res->obj->offset, - res->base.width0 + res->base.b.width0 }; return zink_resource_buffer_needs_barrier(res, flags, pipeline); } @@ -1810,7 +1823,7 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_batch *batch, bool zink_resource_needs_barrier(struct zink_resource *res, VkImageLayout layout, VkAccessFlags flags, VkPipelineStageFlags pipeline) { - if (res->base.target == PIPE_BUFFER) + if (res->base.b.target == PIPE_BUFFER) return zink_resource_buffer_needs_barrier(res, flags, pipeline); return zink_resource_image_needs_barrier(res, layout, flags, pipeline); } @@ -1818,7 +1831,7 @@ zink_resource_needs_barrier(struct zink_resource *res, VkImageLayout layout, VkA void zink_resource_barrier(struct zink_context *ctx, struct zink_batch *batch, struct zink_resource *res, VkImageLayout layout, VkAccessFlags flags, VkPipelineStageFlags pipeline) { - if (res->base.target == PIPE_BUFFER) + if (res->base.b.target == PIPE_BUFFER) zink_resource_buffer_barrier(ctx, batch, res, flags, pipeline); else zink_resource_image_barrier(ctx, batch, res, layout, flags, pipeline); @@ -1857,8 +1870,9 @@ zink_flush(struct pipe_context *pctx, { struct zink_context *ctx = zink_context(pctx); bool deferred = flags & PIPE_FLUSH_DEFERRED; + bool deferred_fence = false; struct zink_batch *batch = &ctx->batch; - struct zink_fence *fence = &batch->state->fence; + struct zink_fence *fence = NULL; struct zink_screen *screen = zink_screen(ctx->base.screen); if (!deferred && ctx->clears_enabled) { @@ -1866,31 +1880,72 @@ zink_flush(struct pipe_context *pctx, zink_begin_render_pass(ctx, batch); } - if (deferred) - batch->state->fence.deferred_ctx = pctx; - else if (batch->has_work) { - if (flags & PIPE_FLUSH_END_OF_FRAME) { - if (ctx->fb_state.nr_cbufs) - zink_end_render_pass(ctx, batch); - for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) - zink_resource_image_barrier(ctx, batch, - ctx->fb_state.cbufs[i] ? zink_resource(ctx->fb_state.cbufs[i]->texture) : NULL, - VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 0, 0); - if (screen->needs_mesa_flush_wsi && ctx->fb_state.cbufs[0]) - batch->state->flush_res = zink_resource(ctx->fb_state.cbufs[0]->texture); - } - flush_batch(ctx); + if (flags & PIPE_FLUSH_END_OF_FRAME && ctx->fb_state.nr_cbufs) { + zink_end_render_pass(ctx, batch); + for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) + zink_resource_image_barrier(ctx, batch, + ctx->fb_state.cbufs[i] ? zink_resource(ctx->fb_state.cbufs[i]->texture) : NULL, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 0, 0); + if (zink_screen(pctx->screen)->needs_mesa_flush_wsi && ctx->fb_state.cbufs[0]) + batch->state->flush_res = zink_resource(ctx->fb_state.cbufs[0]->texture); } - if (!pfence) - return; - if (deferred && !batch->has_work) { - fence = ctx->last_fence; + if (!batch->has_work) { + if (pfence) { + /* reuse last fence */ + fence = ctx->last_fence; + } + if (!deferred) { + struct zink_batch_state *last = zink_batch_state(ctx->last_fence); + if (last) { + sync_flush(ctx, last); + if (last->is_device_lost && ctx->reset.reset) { + ctx->is_device_lost = true; + ctx->reset.reset(ctx->reset.data, PIPE_GUILTY_CONTEXT_RESET); + } + } + } + } else { + fence = &batch->state->fence; + if (deferred && !(flags & PIPE_FLUSH_FENCE_FD) && pfence) + deferred_fence = true; + else + flush_batch(ctx, true); } - zink_fence_reference(screen, - (struct zink_fence **)pfence, - fence); - if (flags & PIPE_FLUSH_END_OF_FRAME) { + + if (pfence) { + struct zink_tc_fence *mfence; + + if (flags & TC_FLUSH_ASYNC) { + mfence = zink_tc_fence(*pfence); + assert(mfence); + } else { + mfence = zink_create_tc_fence(); + + screen->base.fence_reference(&screen->base, pfence, NULL); + *pfence = (struct pipe_fence_handle *)mfence; + } + + zink_batch_state_reference(screen, NULL, zink_batch_state(fence)); + mfence->fence = fence; + if (fence) + mfence->batch_id = fence->batch_id; + + if (deferred_fence) { + assert(fence); + mfence->deferred_ctx = pctx; + mfence->deferred_id = fence->batch_id; + } + + if (!fence || flags & TC_FLUSH_ASYNC) { + if (!util_queue_fence_is_signalled(&mfence->ready)) + util_queue_fence_signal(&mfence->ready); + } + } + if (fence && !(flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC))) + sync_flush(ctx, zink_batch_state(fence)); + + if (flags & PIPE_FLUSH_END_OF_FRAME && !(flags & TC_FLUSH_ASYNC) && !deferred) { /* if the first frame has not yet occurred, we need an explicit fence here * in some cases in order to correctly draw the first frame, though it's * unknown at this time why this is the case @@ -1907,10 +1962,11 @@ zink_maybe_flush_or_stall(struct zink_context *ctx) struct zink_screen *screen = zink_screen(ctx->base.screen); /* flush anytime our total batch memory usage is potentially >= 1/10 of total system memory */ if (ctx->batch.state->resource_size >= screen->total_mem / 10) - flush_batch(ctx); + flush_batch(ctx, true); if (ctx->resource_size >= screen->total_mem / 10 || _mesa_hash_table_num_entries(&ctx->batch_states) > 10) { - zink_vkfence_wait(zink_screen(ctx->base.screen), ctx->last_fence, PIPE_TIMEOUT_INFINITE); + sync_flush(ctx, zink_batch_state(ctx->last_fence)); + zink_vkfence_wait(screen, ctx->last_fence, PIPE_TIMEOUT_INFINITE); zink_batch_reset_all(ctx); } } @@ -1923,7 +1979,8 @@ zink_fence_wait(struct pipe_context *pctx) if (ctx->batch.has_work) pctx->flush(pctx, NULL, PIPE_FLUSH_HINT_FINISH); if (ctx->last_fence) { - zink_vkfence_wait(zink_screen(pctx->screen), ctx->last_fence, PIPE_TIMEOUT_INFINITE); + sync_flush(ctx, zink_batch_state(ctx->last_fence)); + zink_vkfence_wait(zink_screen(ctx->base.screen), ctx->last_fence, PIPE_TIMEOUT_INFINITE); zink_batch_reset_all(ctx); } } @@ -1935,8 +1992,7 @@ zink_wait_on_batch(struct zink_context *ctx, uint32_t batch_id) assert(bs); if (!batch_id || bs->fence.batch_id == batch_id) /* not submitted yet */ - flush_batch(ctx); - + flush_batch(ctx, true); simple_mtx_lock(&ctx->batch_mtx); struct zink_fence *fence; @@ -1945,16 +2001,17 @@ zink_wait_on_batch(struct zink_context *ctx, uint32_t batch_id) fence = ctx->last_fence; else { struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&ctx->batch_states, batch_id, (void*)(uintptr_t)batch_id); + /* if we can't find it, it must have finished already */ if (!he) { - /* batch has already completed */ - simple_mtx_unlock(&ctx->batch_mtx); - return; + simple_mtx_unlock(&ctx->batch_mtx); + return; } fence = he->data; } - assert(fence); - ctx->base.screen->fence_finish(ctx->base.screen, &ctx->base, (struct pipe_fence_handle*)fence, PIPE_TIMEOUT_INFINITE); simple_mtx_unlock(&ctx->batch_mtx); + assert(fence); + sync_flush(ctx, zink_batch_state(fence)); + zink_vkfence_wait(zink_screen(ctx->base.screen), fence, PIPE_TIMEOUT_INFINITE); } bool @@ -1984,6 +2041,9 @@ zink_check_batch_completion(struct zink_context *ctx, uint32_t batch_id) } simple_mtx_unlock(&ctx->batch_mtx); assert(fence); + if (util_queue_is_initialized(&ctx->batch.flush_queue) && + !util_queue_fence_is_signalled(&zink_batch_state(fence)->flush_completed)) + return false; return zink_vkfence_wait(zink_screen(ctx->base.screen), fence, 0); } @@ -2136,7 +2196,7 @@ zink_copy_buffer(struct zink_context *ctx, struct zink_batch *batch, struct zink assert(!batch->in_rp); zink_batch_reference_resource_rw(batch, src, false); zink_batch_reference_resource_rw(batch, dst, true); - util_range_add(&dst->base, &dst->valid_buffer_range, dst_offset, dst_offset + size); + util_range_add(&dst->base.b, &dst->valid_buffer_range, dst_offset, dst_offset + size); zink_resource_buffer_barrier(ctx, batch, src, VK_ACCESS_TRANSFER_READ_BIT, 0); zink_resource_buffer_barrier(ctx, batch, dst, VK_ACCESS_TRANSFER_WRITE_BIT, 0); vkCmdCopyBuffer(batch->state->cmdbuf, src->obj->buffer, dst->obj->buffer, 1, ®ion); @@ -2147,8 +2207,8 @@ zink_copy_image_buffer(struct zink_context *ctx, struct zink_batch *batch, struc unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, unsigned src_level, const struct pipe_box *src_box, enum pipe_map_flags map_flags) { - struct zink_resource *img = dst->base.target == PIPE_BUFFER ? src : dst; - struct zink_resource *buf = dst->base.target == PIPE_BUFFER ? dst : src; + struct zink_resource *img = dst->base.b.target == PIPE_BUFFER ? src : dst; + struct zink_resource *buf = dst->base.b.target == PIPE_BUFFER ? dst : src; if (!batch) batch = zink_batch_no_rp(ctx); @@ -2161,7 +2221,7 @@ zink_copy_image_buffer(struct zink_context *ctx, struct zink_batch *batch, struc } else { zink_resource_image_barrier(ctx, batch, img, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 0, 0); zink_resource_buffer_barrier(ctx, batch, buf, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); - util_range_add(&dst->base, &dst->valid_buffer_range, dstx, dstx + src_box->width); + util_range_add(&dst->base.b, &dst->valid_buffer_range, dstx, dstx + src_box->width); } VkBufferImageCopy region = {}; @@ -2169,7 +2229,7 @@ zink_copy_image_buffer(struct zink_context *ctx, struct zink_batch *batch, struc region.bufferRowLength = 0; region.bufferImageHeight = 0; region.imageSubresource.mipLevel = buf2img ? dst_level : src_level; - switch (img->base.target) { + switch (img->base.b.target) { case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY: case PIPE_TEXTURE_2D_ARRAY: @@ -2246,10 +2306,10 @@ zink_resource_copy_region(struct pipe_context *pctx, struct zink_resource *dst = zink_resource(pdst); struct zink_resource *src = zink_resource(psrc); struct zink_context *ctx = zink_context(pctx); - if (dst->base.target != PIPE_BUFFER && src->base.target != PIPE_BUFFER) { + if (dst->base.b.target != PIPE_BUFFER && src->base.b.target != PIPE_BUFFER) { VkImageCopy region = {}; - if (util_format_get_num_planes(src->base.format) == 1 && - util_format_get_num_planes(dst->base.format) == 1) { + if (util_format_get_num_planes(src->base.b.format) == 1 && + util_format_get_num_planes(dst->base.b.format) == 1) { /* If neither the calling command’s srcImage nor the calling command’s dstImage * has a multi-planar image format then the aspectMask member of srcSubresource * and dstSubresource must match @@ -2265,7 +2325,7 @@ zink_resource_copy_region(struct pipe_context *pctx, region.srcSubresource.aspectMask = src->aspect; region.srcSubresource.mipLevel = src_level; - switch (src->base.target) { + switch (src->base.b.target) { case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY: case PIPE_TEXTURE_2D_ARRAY: @@ -2296,7 +2356,7 @@ zink_resource_copy_region(struct pipe_context *pctx, region.dstSubresource.aspectMask = dst->aspect; region.dstSubresource.mipLevel = dst_level; - switch (dst->base.target) { + switch (dst->base.b.target) { case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY: case PIPE_TEXTURE_2D_ARRAY: @@ -2332,8 +2392,8 @@ zink_resource_copy_region(struct pipe_context *pctx, vkCmdCopyImage(batch->state->cmdbuf, src->obj->image, src->layout, dst->obj->image, dst->layout, 1, ®ion); - } else if (dst->base.target == PIPE_BUFFER && - src->base.target == PIPE_BUFFER) { + } else if (dst->base.b.target == PIPE_BUFFER && + src->base.b.target == PIPE_BUFFER) { zink_copy_buffer(ctx, NULL, dst, src, dstx, src_box->x, src_box->width); } else zink_copy_image_buffer(ctx, NULL, dst, src, dst_level, dstx, dsty, dstz, src_level, src_box, 0); @@ -2437,7 +2497,7 @@ zink_rebind_framebuffer(struct zink_context *ctx, struct zink_resource *res) void zink_resource_rebind(struct zink_context *ctx, struct zink_resource *res) { - assert(res->base.target == PIPE_BUFFER); + assert(res->base.b.target == PIPE_BUFFER); if (res->bind_history & ZINK_RESOURCE_USAGE_STREAMOUT) ctx->dirty_so_targets = true; @@ -2461,7 +2521,7 @@ zink_resource_rebind(struct zink_context *ctx, struct zink_resource *res) switch (type) { case ZINK_DESCRIPTOR_TYPE_SSBO: { struct pipe_shader_buffer *ssbo = &ctx->ssbos[shader][i]; - util_range_add(&res->base, &res->valid_buffer_range, ssbo->buffer_offset, + util_range_add(&res->base.b, &res->valid_buffer_range, ssbo->buffer_offset, ssbo->buffer_offset + ssbo->buffer_size); break; } @@ -2483,7 +2543,7 @@ zink_resource_rebind(struct zink_context *ctx, struct zink_resource *res) image_view->buffer_view = get_buffer_view(ctx, res, image_view->base.format, image_view->base.u.buf.offset, image_view->base.u.buf.size); assert(image_view->buffer_view); - util_range_add(&res->base, &res->valid_buffer_range, image_view->base.u.buf.offset, + util_range_add(&res->base.b, &res->valid_buffer_range, image_view->base.u.buf.offset, image_view->base.u.buf.offset + image_view->base.u.buf.size); break; } @@ -2497,6 +2557,19 @@ zink_resource_rebind(struct zink_context *ctx, struct zink_resource *res) } } +static void +zink_context_replace_buffer_storage(struct pipe_context *pctx, struct pipe_resource *dst, struct pipe_resource *src) +{ + struct zink_resource *d = zink_resource(dst); + struct zink_resource *s = zink_resource(src); + + assert(d->internal_format == s->internal_format); + zink_resource_object_reference(zink_screen(pctx->screen), &d->obj, s->obj); + d->access = s->access; + d->access_stage = s->access_stage; + zink_resource_rebind(zink_context(pctx), d); +} + struct pipe_context * zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { @@ -2570,6 +2643,7 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) ctx->gfx_pipeline_state.have_EXT_extended_dynamic_state = screen->info.have_EXT_extended_dynamic_state; slab_create_child(&ctx->transfer_pool, &screen->transfer_pool); + slab_create_child(&ctx->transfer_pool_unsync, &screen->transfer_pool); ctx->base.stream_uploader = u_upload_create_default(&ctx->base); ctx->base.const_uploader = u_upload_create_default(&ctx->base); @@ -2593,12 +2667,15 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) goto fail; vkGetDeviceQueue(screen->dev, screen->gfx_queue, 0, &ctx->batch.queue); + if (screen->threaded && screen->max_queues > 1) + vkGetDeviceQueue(screen->dev, screen->gfx_queue, 1, &ctx->batch.thread_queue); + else + ctx->batch.thread_queue = ctx->batch.queue; incr_curr_batch(ctx); zink_start_batch(ctx, &ctx->batch); if (!ctx->batch.state) goto fail; - simple_mtx_init(&ctx->batch_mtx, mtx_plain); ctx->program_cache = _mesa_hash_table_create(NULL, @@ -2626,7 +2703,19 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) if (!zink_descriptor_pool_init(ctx)) goto fail; - return &ctx->base; + if (!(flags & PIPE_CONTEXT_PREFER_THREADED) || flags & PIPE_CONTEXT_COMPUTE_ONLY) { + return &ctx->base; + } + + struct threaded_context *tc = (struct threaded_context*)threaded_context_create(&ctx->base, &screen->transfer_pool, + zink_context_replace_buffer_storage, + zink_create_tc_fence_for_tc, &ctx->tc); + + if (tc && (struct zink_context*)tc != ctx) { + tc->bytes_mapped_limit = screen->total_mem / 4; + } + + return (struct pipe_context*)tc; fail: if (ctx) diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h index fe9f73fcc3e..1abbb900be4 100644 --- a/src/gallium/drivers/zink/zink_context.h +++ b/src/gallium/drivers/zink/zink_context.h @@ -38,6 +38,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "util/u_rect.h" +#include "util/u_threaded_context.h" #include "util/slab.h" #include "util/list.h" @@ -125,7 +126,9 @@ struct zink_viewport_state { struct zink_context { struct pipe_context base; + struct threaded_context *tc; struct slab_child_pool transfer_pool; + struct slab_child_pool transfer_pool_unsync; struct blitter_context *blitter; struct pipe_device_reset_callback reset; diff --git a/src/gallium/drivers/zink/zink_descriptors.c b/src/gallium/drivers/zink/zink_descriptors.c index 3a1a1865b88..0f871c2ee4e 100644 --- a/src/gallium/drivers/zink/zink_descriptors.c +++ b/src/gallium/drivers/zink/zink_descriptors.c @@ -997,7 +997,7 @@ handle_image_descriptor(struct zink_screen *screen, struct zink_resource *res, e default: unreachable("unknown descriptor type"); } - } else if (res->base.target != PIPE_BUFFER) { + } else if (res->base.b.target != PIPE_BUFFER) { assert(layout != VK_IMAGE_LAYOUT_UNDEFINED); image_info->imageLayout = layout; image_info->imageView = imageview; @@ -1062,7 +1062,7 @@ update_sampler_descriptors(struct zink_context *ctx, struct zink_descriptor_set struct pipe_sampler_view *psampler_view = ctx->sampler_views[stage][index + k]; struct zink_sampler_view *sampler_view = zink_sampler_view(psampler_view); res = psampler_view ? zink_resource(psampler_view->texture) : NULL; - if (res && res->base.target == PIPE_BUFFER) { + if (res && res->base.b.target == PIPE_BUFFER) { bufferview = sampler_view->buffer_view->buffer_view; } else if (res) { imageview = sampler_view->image_view->image_view; diff --git a/src/gallium/drivers/zink/zink_fence.c b/src/gallium/drivers/zink/zink_fence.c index 97e7c2d84d2..4f5a097f486 100644 --- a/src/gallium/drivers/zink/zink_fence.c +++ b/src/gallium/drivers/zink/zink_fence.c @@ -48,22 +48,46 @@ zink_fence_clear_resources(struct zink_screen *screen, struct zink_fence *fence) } static void -destroy_fence(struct zink_screen *screen, struct zink_fence *fence) +destroy_fence(struct zink_screen *screen, struct zink_tc_fence *mfence) { - if (fence->fence) - vkDestroyFence(screen->dev, fence->fence, NULL); - zink_batch_state_destroy(screen, zink_batch_state(fence)); + struct zink_batch_state *bs = zink_batch_state(mfence->fence); + mfence->fence = NULL; + zink_batch_state_reference(screen, &bs, NULL); + tc_unflushed_batch_token_reference(&mfence->tc_token, NULL); + FREE(mfence); +} + +struct zink_tc_fence * +zink_create_tc_fence(void) +{ + struct zink_tc_fence *mfence = CALLOC_STRUCT(zink_tc_fence); + if (!mfence) + return NULL; + pipe_reference_init(&mfence->reference, 1); + util_queue_fence_init(&mfence->ready); + return mfence; +} + +struct pipe_fence_handle * +zink_create_tc_fence_for_tc(struct pipe_context *pctx, struct tc_unflushed_batch_token *tc_token) +{ + struct zink_tc_fence *mfence = zink_create_tc_fence(); + if (!mfence) + return NULL; + util_queue_fence_reset(&mfence->ready); + tc_unflushed_batch_token_reference(&mfence->tc_token, tc_token); + return (struct pipe_fence_handle*)mfence; } void zink_fence_reference(struct zink_screen *screen, - struct zink_fence **ptr, - struct zink_fence *fence) + struct zink_tc_fence **ptr, + struct zink_tc_fence *mfence) { - if (pipe_reference(&(*ptr)->reference, &fence->reference)) + if (pipe_reference(&(*ptr)->reference, &mfence->reference)) destroy_fence(screen, *ptr); - *ptr = fence; + *ptr = mfence; } static void @@ -71,16 +95,57 @@ fence_reference(struct pipe_screen *pscreen, struct pipe_fence_handle **pptr, struct pipe_fence_handle *pfence) { - zink_fence_reference(zink_screen(pscreen), (struct zink_fence **)pptr, - zink_fence(pfence)); + zink_fence_reference(zink_screen(pscreen), (struct zink_tc_fence **)pptr, + zink_tc_fence(pfence)); +} + +static bool +tc_fence_finish(struct zink_context *ctx, struct zink_tc_fence *mfence, uint64_t *timeout_ns) +{ + if (!util_queue_fence_is_signalled(&mfence->ready)) { + int64_t abs_timeout = os_time_get_absolute_timeout(*timeout_ns); + if (mfence->tc_token) { + /* Ensure that zink_flush will be called for + * this mfence, but only if we're in the API thread + * where the context is current. + * + * Note that the batch containing the flush may already + * be in flight in the driver thread, so the mfence + * may not be ready yet when this call returns. + */ + threaded_context_flush(&ctx->base, mfence->tc_token, *timeout_ns == 0); + } + + if (!timeout_ns) + return false; + + /* this is a tc mfence, so we're just waiting on the queue mfence to complete + * after being signaled by the real mfence + */ + if (*timeout_ns == PIPE_TIMEOUT_INFINITE) { + util_queue_fence_wait(&mfence->ready); + } else { + if (!util_queue_fence_wait_timeout(&mfence->ready, abs_timeout)) + return false; + } + if (*timeout_ns && *timeout_ns != PIPE_TIMEOUT_INFINITE) { + int64_t time_ns = os_time_get_nano(); + *timeout_ns = abs_timeout > time_ns ? abs_timeout - time_ns : 0; + } + } + + return true; } bool zink_vkfence_wait(struct zink_screen *screen, struct zink_fence *fence, uint64_t timeout_ns) { - if (!fence->submitted) + if (p_atomic_read(&fence->completed)) return true; + assert(fence->batch_id); + assert(fence->submitted); + bool success; if (timeout_ns) @@ -89,22 +154,40 @@ zink_vkfence_wait(struct zink_screen *screen, struct zink_fence *fence, uint64_t success = vkGetFenceStatus(screen->dev, fence->fence) == VK_SUCCESS; if (success) { + p_atomic_set(&fence->completed, true); zink_fence_clear_resources(screen, fence); - p_atomic_set(&fence->submitted, false); } return success; } static bool -zink_fence_finish(struct zink_screen *screen, struct pipe_context *pctx, struct zink_fence *fence, +zink_fence_finish(struct zink_screen *screen, struct pipe_context *pctx, struct zink_tc_fence *mfence, uint64_t timeout_ns) { - if (pctx && fence->deferred_ctx == pctx) { + pctx = threaded_context_unwrap_sync(pctx); + struct zink_context *ctx = zink_context(pctx); + + if (pctx && mfence->deferred_ctx == pctx && mfence->deferred_id == ctx->curr_batch) { zink_context(pctx)->batch.has_work = true; /* this must be the current batch */ - pctx->flush(pctx, NULL, 0); + pctx->flush(pctx, NULL, !timeout_ns ? PIPE_FLUSH_ASYNC : 0); + if (!timeout_ns) + return false; } + /* need to ensure the tc mfence has been flushed before we wait */ + bool tc_finish = tc_fence_finish(ctx, mfence, &timeout_ns); + struct zink_fence *fence = mfence->fence; + if (!tc_finish || (fence && !fence->submitted)) + return fence ? p_atomic_read(&fence->completed) : false; + + /* this was an invalid flush, just return completed */ + if (!mfence->fence) + return true; + /* if the zink fence has a different batch id then it must have completed and been recycled already */ + if (mfence->fence->batch_id != mfence->batch_id) + return true; + return zink_vkfence_wait(screen, fence, timeout_ns); } @@ -112,24 +195,24 @@ static bool fence_finish(struct pipe_screen *pscreen, struct pipe_context *pctx, struct pipe_fence_handle *pfence, uint64_t timeout_ns) { - return zink_fence_finish(zink_screen(pscreen), pctx, zink_fence(pfence), + return zink_fence_finish(zink_screen(pscreen), pctx, zink_tc_fence(pfence), timeout_ns); } void zink_fence_server_sync(struct pipe_context *pctx, struct pipe_fence_handle *pfence) { - struct zink_fence *fence = zink_fence(pfence); + struct zink_tc_fence *mfence = zink_tc_fence(pfence); - if (pctx && fence->deferred_ctx == pctx) + if (pctx && mfence->deferred_ctx == pctx) return; - if (fence->deferred_ctx) { + if (mfence->deferred_ctx) { zink_context(pctx)->batch.has_work = true; /* this must be the current batch */ pctx->flush(pctx, NULL, 0); } - zink_fence_finish(zink_screen(pctx->screen), pctx, fence, PIPE_TIMEOUT_INFINITE); + zink_fence_finish(zink_screen(pctx->screen), pctx, mfence, PIPE_TIMEOUT_INFINITE); } void diff --git a/src/gallium/drivers/zink/zink_fence.h b/src/gallium/drivers/zink/zink_fence.h index 59a4a65f86e..00c4a2f2fca 100644 --- a/src/gallium/drivers/zink/zink_fence.h +++ b/src/gallium/drivers/zink/zink_fence.h @@ -26,6 +26,7 @@ #include "util/simple_mtx.h" #include "util/u_inlines.h" +#include "util/u_queue.h" #include @@ -36,14 +37,25 @@ struct zink_batch_state; struct zink_context; struct zink_screen; -struct zink_fence { +struct tc_unflushed_batch_token; + +struct zink_tc_fence { struct pipe_reference reference; - VkFence fence; + struct tc_unflushed_batch_token *tc_token; + struct util_queue_fence ready; struct pipe_context *deferred_ctx; + uint32_t deferred_id; + struct zink_fence *fence; + uint32_t batch_id; +}; + +struct zink_fence { + VkFence fence; uint32_t batch_id; simple_mtx_t resource_mtx; struct set *resources; /* resources need access removed asap, so they're on the fence */ bool submitted; + bool completed; }; static inline struct zink_fence * @@ -52,10 +64,22 @@ zink_fence(void *pfence) return (struct zink_fence *)pfence; } +static inline struct zink_tc_fence * +zink_tc_fence(void *pfence) +{ + return (struct zink_tc_fence *)pfence; +} + +struct zink_tc_fence * +zink_create_tc_fence(void); + +struct pipe_fence_handle * +zink_create_tc_fence_for_tc(struct pipe_context *pctx, struct tc_unflushed_batch_token *tc_token); + void zink_fence_reference(struct zink_screen *screen, - struct zink_fence **ptr, - struct zink_fence *fence); + struct zink_tc_fence **ptr, + struct zink_tc_fence *fence); void zink_fence_server_sync(struct pipe_context *pctx, struct pipe_fence_handle *pfence); diff --git a/src/gallium/drivers/zink/zink_query.c b/src/gallium/drivers/zink/zink_query.c index 26b60ec3e5f..79b2076412d 100644 --- a/src/gallium/drivers/zink/zink_query.c +++ b/src/gallium/drivers/zink/zink_query.c @@ -21,6 +21,7 @@ struct zink_query_buffer { }; struct zink_query { + struct threaded_query base; enum pipe_query_type type; VkQueryPool query_pool; @@ -526,7 +527,7 @@ copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query, /* if it's a single query that doesn't need special handling, we can copy it and be done */ zink_batch_reference_resource_rw(batch, res, true); zink_resource_buffer_barrier(ctx, batch, res, VK_ACCESS_TRANSFER_WRITE_BIT, 0); - util_range_add(&res->base, &res->valid_buffer_range, offset, offset + result_size); + util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size); assert(query_id < NUM_QUERIES); vkCmdCopyQueryPoolResults(batch->state->cmdbuf, pool, query_id, num_results, res->obj->buffer, offset, 0, flags); @@ -733,6 +734,9 @@ zink_end_query(struct pipe_context *pctx, struct zink_query *query = (struct zink_query *)q; struct zink_batch *batch = &ctx->batch; + /* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */ + threaded_context_unwrap_sync(pctx); + if (needs_stats_list(query)) list_delinit(&query->stats_list); if (query->active) @@ -753,7 +757,7 @@ zink_get_query_result(struct pipe_context *pctx, if (query->needs_update) update_qbo(ctx, query); - if (query->batch_id.usage == ctx->curr_batch) + if (!threaded_query(q)->flushed && query->batch_id.usage == ctx->curr_batch) pctx->flush(pctx, NULL, 0); return get_query_result(pctx, q, wait, result); @@ -882,7 +886,7 @@ zink_render_condition(struct pipe_context *pctx, copy_results_to_buffer(ctx, query, res, 0, num_results, flags); } else { /* these need special handling */ - force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base, 0); + force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0); } query->predicate_dirty = false; } diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c index 1926e40d952..a660eee55e4 100644 --- a/src/gallium/drivers/zink/zink_resource.c +++ b/src/gallium/drivers/zink/zink_resource.c @@ -25,6 +25,7 @@ #include "zink_batch.h" #include "zink_context.h" +#include "zink_fence.h" #include "zink_program.h" #include "zink_screen.h" @@ -48,6 +49,21 @@ #include "drm-uapi/drm_fourcc.h" #endif +static void +zink_transfer_flush_region(struct pipe_context *pctx, + struct pipe_transfer *ptrans, + const struct pipe_box *box); +static void * +zink_transfer_map(struct pipe_context *pctx, + struct pipe_resource *pres, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **transfer); +static void +zink_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *ptrans); + void debug_describe_zink_resource_object(char *buf, const struct zink_resource_object *ptr) { @@ -147,6 +163,7 @@ zink_resource_destroy(struct pipe_screen *pscreen, util_range_destroy(&res->valid_buffer_range); zink_resource_object_reference(screen, &res->obj, NULL); + threaded_resource_deinit(pres); FREE(res); } @@ -494,6 +511,14 @@ fail1: return NULL; } +static const struct u_resource_vtbl zink_resource_vtbl = { + NULL, + zink_resource_destroy, + zink_transfer_map, + zink_transfer_flush_region, + zink_transfer_unmap, +}; + static struct pipe_resource * resource_create(struct pipe_screen *pscreen, const struct pipe_resource *templ, @@ -503,10 +528,12 @@ resource_create(struct pipe_screen *pscreen, struct zink_screen *screen = zink_screen(pscreen); struct zink_resource *res = CALLOC_STRUCT(zink_resource); - res->base = *templ; + res->base.b = *templ; - pipe_reference_init(&res->base.reference, 1); - res->base.screen = pscreen; + res->base.vtbl = &zink_resource_vtbl; + threaded_resource_init(&res->base.b); + pipe_reference_init(&res->base.b.reference, 1); + res->base.b.screen = pscreen; bool optimal_tiling = false; res->obj = resource_object_create(screen, templ, whandle, &optimal_tiling); @@ -528,15 +555,15 @@ resource_create(struct pipe_screen *pscreen, if (screen->winsys && (templ->bind & PIPE_BIND_DISPLAY_TARGET)) { struct sw_winsys *winsys = screen->winsys; res->dt = winsys->displaytarget_create(screen->winsys, - res->base.bind, - res->base.format, + res->base.b.bind, + res->base.b.format, templ->width0, templ->height0, 64, NULL, &res->dt_stride); } - return &res->base; + return &res->base.b; } static struct pipe_resource * @@ -556,7 +583,7 @@ zink_resource_get_handle(struct pipe_screen *pscreen, struct zink_resource *res = zink_resource(tex); struct zink_screen *screen = zink_screen(pscreen); - if (res->base.target != PIPE_BUFFER) { + if (res->base.b.target != PIPE_BUFFER) { VkImageSubresource sub_res = {}; VkSubresourceLayout sub_res_layout = {}; @@ -602,18 +629,15 @@ zink_resource_from_handle(struct pipe_screen *pscreen, #endif } -static void -zink_resource_invalidate(struct pipe_context *pctx, struct pipe_resource *pres) +static bool +invalidate_buffer(struct zink_context *ctx, struct zink_resource *res) { - struct zink_context *ctx = zink_context(pctx); - struct zink_resource *res = zink_resource(pres); - struct zink_screen *screen = zink_screen(pctx->screen); + struct zink_screen *screen = zink_screen(ctx->base.screen); - if (pres->target != PIPE_BUFFER) - return; + assert(res->base.b.target == PIPE_BUFFER); if (res->valid_buffer_range.start > res->valid_buffer_range.end) - return; + return false; if (res->bind_history & ZINK_RESOURCE_USAGE_STREAMOUT) ctx->dirty_so_targets = true; @@ -622,13 +646,13 @@ zink_resource_invalidate(struct pipe_context *pctx, struct pipe_resource *pres) util_range_set_empty(&res->valid_buffer_range); if (!get_resource_usage(res)) - return; + return false; struct zink_resource_object *old_obj = res->obj; - struct zink_resource_object *new_obj = resource_object_create(screen, pres, NULL, NULL); + struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, NULL); if (!new_obj) { debug_printf("new backing resource alloc failed!"); - return; + return false; } res->obj = new_obj; res->access_stage = 0; @@ -636,6 +660,15 @@ zink_resource_invalidate(struct pipe_context *pctx, struct pipe_resource *pres) zink_resource_rebind(ctx, res); zink_descriptor_set_refs_clear(&old_obj->desc_set_refs, old_obj); zink_resource_object_reference(screen, &old_obj, NULL); + return true; +} + + +static void +zink_resource_invalidate(struct pipe_context *pctx, struct pipe_resource *pres) +{ + if (pres->target == PIPE_BUFFER) + invalidate_buffer(zink_context(pctx), zink_resource(pres)); } static void @@ -644,18 +677,18 @@ zink_transfer_copy_bufimage(struct zink_context *ctx, struct zink_resource *src, struct zink_transfer *trans) { - assert((trans->base.usage & (PIPE_MAP_DEPTH_ONLY | PIPE_MAP_STENCIL_ONLY)) != + assert((trans->base.b.usage & (PIPE_MAP_DEPTH_ONLY | PIPE_MAP_STENCIL_ONLY)) != (PIPE_MAP_DEPTH_ONLY | PIPE_MAP_STENCIL_ONLY)); - bool buf2img = src->base.target == PIPE_BUFFER; + bool buf2img = src->base.b.target == PIPE_BUFFER; - struct pipe_box box = trans->base.box; + struct pipe_box box = trans->base.b.box; int x = box.x; if (buf2img) box.x = src->obj->offset + trans->offset; - zink_copy_image_buffer(ctx, NULL, dst, src, trans->base.level, buf2img ? x : dst->obj->offset, - box.y, box.z, trans->base.level, &box, trans->base.usage); + zink_copy_image_buffer(ctx, NULL, dst, src, trans->base.b.level, buf2img ? x : dst->obj->offset, + box.y, box.z, trans->base.b.level, &box, trans->base.b.usage); } bool @@ -690,6 +723,34 @@ init_mem_range(struct zink_screen *screen, struct zink_resource *res, VkDeviceSi return range; } +bool +zink_resource_has_curr_read_usage(struct zink_context *ctx, struct zink_resource *res) +{ + return zink_batch_usage_matches(&res->obj->reads, ctx->curr_batch); +} + +static uint32_t +get_most_recent_access(struct zink_resource *res, enum zink_resource_access flags) +{ + uint32_t usage[3]; // read, write, failure + uint32_t latest = ARRAY_SIZE(usage) - 1; + usage[latest] = 0; + + if (flags & ZINK_RESOURCE_ACCESS_READ) { + usage[0] = p_atomic_read(&res->obj->reads.usage); + if (usage[0] > usage[latest]) { + latest = 0; + } + } + if (flags & ZINK_RESOURCE_ACCESS_WRITE) { + usage[1] = p_atomic_read(&res->obj->writes.usage); + if (usage[1] > usage[latest]) { + latest = 1; + } + } + return usage[latest]; +} + static void * map_resource(struct zink_screen *screen, struct zink_resource *res) { @@ -722,40 +783,74 @@ buffer_transfer_map(struct zink_context *ctx, struct zink_resource *res, unsigne struct zink_screen *screen = zink_screen(ctx->base.screen); void *ptr = NULL; - if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) { - if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) { - /* Replace the backing storage with a fresh buffer for non-async maps */ - //if (!(usage & TC_TRANSFER_MAP_NO_INVALIDATE)) - zink_resource_invalidate(&ctx->base, &res->base); + /* See if the buffer range being mapped has never been initialized, + * in which case it can be mapped unsynchronized. */ + if (!(usage & (PIPE_MAP_UNSYNCHRONIZED | TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) && + usage & PIPE_MAP_WRITE && !res->base.is_shared && + !util_ranges_intersect(&res->valid_buffer_range, box->x, box->x + box->width)) { + usage |= PIPE_MAP_UNSYNCHRONIZED; + } - /* If we can discard the whole resource, we can discard the range. */ + /* If discarding the entire range, discard the whole resource instead. */ + if (usage & PIPE_MAP_DISCARD_RANGE && box->x == 0 && box->width == res->base.b.width0) { + usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE; + } + + if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE && + !(usage & (PIPE_MAP_UNSYNCHRONIZED | TC_TRANSFER_MAP_NO_INVALIDATE))) { + assert(usage & PIPE_MAP_WRITE); + + if (invalidate_buffer(ctx, res)) { + /* At this point, the buffer is always idle. */ + usage |= PIPE_MAP_UNSYNCHRONIZED; + } else { + /* Fall back to a temporary buffer. */ usage |= PIPE_MAP_DISCARD_RANGE; } - if (util_ranges_intersect(&res->valid_buffer_range, box->x, box->x + box->width)) { - /* special case compute reads since they aren't handled by zink_fence_wait() */ - if (usage & PIPE_MAP_WRITE && zink_resource_has_usage(res, ZINK_RESOURCE_ACCESS_READ)) - resource_sync_reads(ctx, res); - if (usage & PIPE_MAP_READ && zink_resource_has_usage(res, ZINK_RESOURCE_ACCESS_WRITE)) - resource_sync_writes_from_batch_usage(ctx, res); - else if (usage & PIPE_MAP_WRITE && zink_resource_has_usage(res, ZINK_RESOURCE_ACCESS_RW)) { - /* need to wait for all rendering to finish - * TODO: optimize/fix this to be much less obtrusive - * mesa/mesa#2966 - */ + } - struct u_upload_mgr *mgr = ctx->base.stream_uploader; - u_upload_alloc(mgr, 0, box->width + box->x, - screen->info.props.limits.minMemoryMapAlignment, &trans->offset, - (struct pipe_resource **)&trans->staging_res, (void **)&ptr); + if ((usage & PIPE_MAP_WRITE) && + (usage & PIPE_MAP_DISCARD_RANGE || + (!(usage & PIPE_MAP_READ) && zink_resource_has_usage(res, ZINK_RESOURCE_ACCESS_RW))) && + !(usage & (PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_PERSISTENT))) { - res = zink_resource(trans->staging_res); - /* replacing existing map, still need to increment refcount for tracking since - * unmaps will still occur - */ - p_atomic_inc(&res->obj->map_count); - res->obj->map = ptr; - } + /* Check if mapping this buffer would cause waiting for the GPU. + */ + + uint32_t latest_access = get_most_recent_access(res, ZINK_RESOURCE_ACCESS_RW); + if (zink_resource_has_curr_read_usage(ctx, res) || + (latest_access && !zink_check_batch_completion(ctx, latest_access))) { + /* Do a wait-free write-only transfer using a temporary buffer. */ + unsigned offset; + + /* If we are not called from the driver thread, we have + * to use the uploader from u_threaded_context, which is + * local to the calling thread. + */ + struct u_upload_mgr *mgr; + if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC) + mgr = ctx->tc->base.stream_uploader; + else + mgr = ctx->base.stream_uploader; + u_upload_alloc(mgr, 0, box->width + box->x, + screen->info.props.limits.minMemoryMapAlignment, &offset, + (struct pipe_resource **)&trans->staging_res, (void **)&ptr); + res = zink_resource(trans->staging_res); + trans->offset = offset; + /* replacing existing map, still need to increment refcount for tracking since + * unmaps will still occur + */ + p_atomic_inc(&res->obj->map_count); + res->obj->map = ptr; + } else { + /* At this point, the buffer is always idle (we checked it above). */ + usage |= PIPE_MAP_UNSYNCHRONIZED; } + } else if ((usage & PIPE_MAP_READ) && !(usage & PIPE_MAP_PERSISTENT)) { + assert(!(usage & (TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_THREAD_SAFE))); + uint32_t latest_write = get_most_recent_access(res, ZINK_RESOURCE_ACCESS_WRITE); + if (latest_write) + zink_wait_on_batch(ctx, latest_write); } if (!ptr) { @@ -782,9 +877,9 @@ buffer_transfer_map(struct zink_context *ctx, struct zink_resource *res, unsigne return NULL; } } - trans->base.usage = usage; + trans->base.b.usage = usage; if (usage & PIPE_MAP_WRITE) - util_range_add(&res->base, &res->valid_buffer_range, box->x, box->x + box->width); + util_range_add(&res->base.b, &res->valid_buffer_range, box->x, box->x + box->width); return ptr; } @@ -800,17 +895,24 @@ zink_transfer_map(struct pipe_context *pctx, struct zink_screen *screen = zink_screen(pctx->screen); struct zink_resource *res = zink_resource(pres); - struct zink_transfer *trans = slab_alloc(&ctx->transfer_pool); + struct zink_transfer *trans; + + if (usage & PIPE_MAP_THREAD_SAFE) + trans = malloc(sizeof(*trans)); + else if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC) + trans = slab_alloc(&ctx->transfer_pool_unsync); + else + trans = slab_alloc(&ctx->transfer_pool); if (!trans) return NULL; memset(trans, 0, sizeof(*trans)); - pipe_resource_reference(&trans->base.resource, pres); + pipe_resource_reference(&trans->base.b.resource, pres); - trans->base.resource = pres; - trans->base.level = level; - trans->base.usage = usage; - trans->base.box = *box; + trans->base.b.resource = pres; + trans->base.b.level = level; + trans->base.b.usage = usage; + trans->base.b.box = *box; void *ptr, *base; if (pres->target == PIPE_BUFFER) { @@ -829,9 +931,9 @@ zink_transfer_map(struct pipe_context *pctx, format = util_format_get_depth_only(pres->format); else if (usage & PIPE_MAP_STENCIL_ONLY) format = PIPE_FORMAT_S8_UINT; - trans->base.stride = util_format_get_stride(format, box->width); - trans->base.layer_stride = util_format_get_2d_size(format, - trans->base.stride, + trans->base.b.stride = util_format_get_stride(format, box->width); + trans->base.b.layer_stride = util_format_get_2d_size(format, + trans->base.b.stride, box->height); struct pipe_resource templ = *pres; @@ -839,7 +941,7 @@ zink_transfer_map(struct pipe_context *pctx, templ.usage = PIPE_USAGE_STAGING; templ.target = PIPE_BUFFER; templ.bind = 0; - templ.width0 = trans->base.layer_stride * box->depth; + templ.width0 = trans->base.b.layer_stride * box->depth; templ.height0 = templ.depth0 = 0; templ.last_level = 0; templ.array_size = 1; @@ -881,11 +983,11 @@ zink_transfer_map(struct pipe_context *pctx, }; VkSubresourceLayout srl; vkGetImageSubresourceLayout(screen->dev, res->obj->image, &isr, &srl); - trans->base.stride = srl.rowPitch; - trans->base.layer_stride = srl.arrayPitch; + trans->base.b.stride = srl.rowPitch; + trans->base.b.layer_stride = srl.arrayPitch; trans->offset = srl.offset; trans->depthPitch = srl.depthPitch; - const struct util_format_description *desc = util_format_description(res->base.format); + const struct util_format_description *desc = util_format_description(res->base.b.format); unsigned offset = srl.offset + box->z * srl.depthPitch + (box->y / desc->block.height) * srl.rowPitch + @@ -901,7 +1003,7 @@ zink_transfer_map(struct pipe_context *pctx, if ((usage & PIPE_MAP_PERSISTENT) && !(usage & PIPE_MAP_COHERENT)) res->obj->persistent_maps++; - *transfer = &trans->base; + *transfer = &trans->base.b; return ptr; } @@ -914,7 +1016,7 @@ zink_transfer_flush_region(struct pipe_context *pctx, struct zink_resource *res = zink_resource(ptrans->resource); struct zink_transfer *trans = (struct zink_transfer *)ptrans; - if (trans->base.usage & PIPE_MAP_WRITE) { + if (trans->base.b.usage & PIPE_MAP_WRITE) { struct zink_screen *screen = zink_screen(pctx->screen); struct zink_resource *m = trans->staging_res ? zink_resource(trans->staging_res) : res; @@ -923,11 +1025,11 @@ zink_transfer_flush_region(struct pipe_context *pctx, size = box->width; offset = trans->offset + box->x; } else { - size = box->width * box->height * util_format_get_blocksize(m->base.format); + size = box->width * box->height * util_format_get_blocksize(m->base.b.format); offset = trans->offset + box->z * trans->depthPitch + - util_format_get_2d_size(m->base.format, trans->base.stride, box->y) + - util_format_get_stride(m->base.format, box->x); + util_format_get_2d_size(m->base.b.format, trans->base.b.stride, box->y) + + util_format_get_stride(m->base.b.format, box->x); assert(offset + size <= res->obj->size); } if (!m->obj->coherent) { @@ -954,7 +1056,7 @@ zink_transfer_unmap(struct pipe_context *pctx, struct zink_resource *res = zink_resource(ptrans->resource); struct zink_transfer *trans = (struct zink_transfer *)ptrans; - if (!(trans->base.usage & (PIPE_MAP_FLUSH_EXPLICIT | PIPE_MAP_COHERENT))) { + if (!(trans->base.b.usage & (PIPE_MAP_FLUSH_EXPLICIT | PIPE_MAP_COHERENT))) { zink_transfer_flush_region(pctx, ptrans, &ptrans->box); } @@ -962,13 +1064,21 @@ zink_transfer_unmap(struct pipe_context *pctx, unmap_resource(screen, zink_resource(trans->staging_res)); } else unmap_resource(screen, res); - if ((trans->base.usage & PIPE_MAP_PERSISTENT) && !(trans->base.usage & PIPE_MAP_COHERENT)) + if ((trans->base.b.usage & PIPE_MAP_PERSISTENT) && !(trans->base.b.usage & PIPE_MAP_COHERENT)) res->obj->persistent_maps--; if (trans->staging_res) pipe_resource_reference(&trans->staging_res, NULL); - pipe_resource_reference(&trans->base.resource, NULL); - slab_free(&ctx->transfer_pool, ptrans); + pipe_resource_reference(&trans->base.b.resource, NULL); + + if (trans->base.b.usage & PIPE_MAP_THREAD_SAFE) { + free(trans); + } else { + /* Don't use pool_transfers_unsync. We are always in the driver + * thread. Freeing an object into a different pool is allowed. + */ + slab_free(&ctx->transfer_pool, ptrans); + } } static void @@ -1011,13 +1121,14 @@ zink_resource_object_init_storage(struct zink_context *ctx, struct zink_resource { struct zink_screen *screen = zink_screen(ctx->base.screen); /* base resource already has the cap */ - if (res->base.bind & PIPE_BIND_SHADER_IMAGE) + if (res->base.b.bind & PIPE_BIND_SHADER_IMAGE) return true; if (res->obj->is_buffer) { if (res->obj->sbuffer) return true; - VkBufferCreateInfo bci = create_bci(screen, &res->base, res->base.bind | PIPE_BIND_SHADER_IMAGE); + VkBufferCreateInfo bci = create_bci(screen, &res->base.b, res->base.b.bind | PIPE_BIND_SHADER_IMAGE); bci.size = res->obj->size; + VkBuffer buffer; if (vkCreateBuffer(screen->dev, &bci, NULL, &buffer) != VK_SUCCESS) return false; @@ -1025,26 +1136,26 @@ zink_resource_object_init_storage(struct zink_context *ctx, struct zink_resource res->obj->sbuffer = res->obj->buffer; res->obj->buffer = buffer; } else { - zink_fb_clears_apply_region(ctx, &res->base, (struct u_rect){0, res->base.width0, 0, res->base.height0}); + zink_fb_clears_apply_region(ctx, &res->base.b, (struct u_rect){0, res->base.b.width0, 0, res->base.b.height0}); zink_resource_image_barrier(ctx, NULL, res, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 0, 0); - res->base.bind |= PIPE_BIND_SHADER_IMAGE; + res->base.b.bind |= PIPE_BIND_SHADER_IMAGE; struct zink_resource_object *old_obj = res->obj; - struct zink_resource_object *new_obj = resource_object_create(screen, &res->base, NULL, &res->optimal_tiling); + struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, &res->optimal_tiling); if (!new_obj) { debug_printf("new backing resource alloc failed!"); - res->base.bind &= ~PIPE_BIND_SHADER_IMAGE; + res->base.b.bind &= ~PIPE_BIND_SHADER_IMAGE; return false; } struct zink_resource staging = *res; staging.obj = old_obj; res->obj = new_obj; zink_descriptor_set_refs_clear(&old_obj->desc_set_refs, old_obj); - for (unsigned i = 0; i <= res->base.last_level; i++) { + for (unsigned i = 0; i <= res->base.b.last_level; i++) { struct pipe_box box = {0, 0, 0, - u_minify(res->base.width0, i), - u_minify(res->base.height0, i), res->base.array_size}; - box.depth = util_num_layers(&res->base, i); - ctx->base.resource_copy_region(&ctx->base, &res->base, i, 0, 0, 0, &staging.base, i, &box); + u_minify(res->base.b.width0, i), + u_minify(res->base.b.height0, i), res->base.b.array_size}; + box.depth = util_num_layers(&res->base.b, i); + ctx->base.resource_copy_region(&ctx->base, &res->base.b, i, 0, 0, 0, &staging.base.b, i, &box); } zink_resource_object_reference(screen, &old_obj, NULL); } @@ -1068,7 +1179,7 @@ zink_resource_object_init_storage(struct zink_context *ctx, struct zink_resource for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) { for (unsigned j = 0; j < ctx->num_sampler_views[i]; j++) { struct zink_sampler_view *sv = zink_sampler_view(ctx->sampler_views[i][j]); - if (sv && sv->base.texture == &res->base) { + if (sv && sv->base.texture == &res->base.b) { struct pipe_surface *psurf = &sv->image_view->base; zink_rebind_surface(ctx, &psurf); sv->image_view = zink_surface(psurf); diff --git a/src/gallium/drivers/zink/zink_resource.h b/src/gallium/drivers/zink/zink_resource.h index 86baae9dcba..0602ba1b22d 100644 --- a/src/gallium/drivers/zink/zink_resource.h +++ b/src/gallium/drivers/zink/zink_resource.h @@ -35,6 +35,7 @@ struct zink_context; #include "util/u_transfer.h" #include "util/u_range.h" #include "util/u_dynarray.h" +#include "util/u_threaded_context.h" #include "zink_batch.h" #include "zink_descriptors.h" @@ -81,7 +82,7 @@ struct zink_resource_object { }; struct zink_resource { - struct pipe_resource base; + struct threaded_resource base; enum pipe_format internal_format:16; @@ -107,7 +108,7 @@ struct zink_resource { }; struct zink_transfer { - struct pipe_transfer base; + struct threaded_transfer base; struct pipe_resource *staging_res; unsigned offset; unsigned depthPitch; @@ -137,7 +138,7 @@ bool zink_resource_has_usage(struct zink_resource *res, enum zink_resource_access usage); bool -zink_resource_has_usage_for_id(struct zink_resource *res, uint32_t id); +zink_resource_has_curr_read_usage(struct zink_context *ctx, struct zink_resource *res); void zink_resource_desc_set_add(struct zink_resource *res, struct zink_descriptor_set *zds, unsigned idx); diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index 6cc8d6226cf..000dd0e2d61 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -1013,6 +1013,7 @@ update_queue_props(struct zink_screen *screen) for (uint32_t i = 0; i < num_queues; i++) { if (props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { screen->gfx_queue = i; + screen->max_queues = props[i].queueCount; screen->timestamp_valid_bits = props[i].timestampValidBits; break; } @@ -1336,7 +1337,7 @@ zink_create_logical_device(struct zink_screen *screen) float dummy = 0.0f; qci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; qci.queueFamilyIndex = screen->gfx_queue; - qci.queueCount = 1; + qci.queueCount = screen->threaded && screen->max_queues > 1 ? 2 : 1; qci.pQueuePriorities = &dummy; VkDeviceCreateInfo dci = {}; @@ -1392,6 +1393,7 @@ zink_internal_create_screen(const struct pipe_screen_config *config) return NULL; util_cpu_detect(); + screen->threaded = util_get_cpu_caps()->nr_cpus > 1 && debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1); zink_debug = debug_get_option_zink_debug(); diff --git a/src/gallium/drivers/zink/zink_screen.h b/src/gallium/drivers/zink/zink_screen.h index 10e83040491..d8677404428 100644 --- a/src/gallium/drivers/zink/zink_screen.h +++ b/src/gallium/drivers/zink/zink_screen.h @@ -51,6 +51,8 @@ struct hash_table; struct zink_screen { struct pipe_screen base; + bool threaded; + uint32_t curr_batch; //the current batch id struct sw_winsys *winsys; @@ -88,6 +90,7 @@ struct zink_screen { bool have_triangle_fans; uint32_t gfx_queue; + uint32_t max_queues; uint32_t timestamp_valid_bits; VkDevice dev; VkDebugUtilsMessengerEXT debugUtilsCallbackHandle; diff --git a/src/gallium/drivers/zink/zink_surface.c b/src/gallium/drivers/zink/zink_surface.c index 798911ea437..568645f4508 100644 --- a/src/gallium/drivers/zink/zink_surface.c +++ b/src/gallium/drivers/zink/zink_surface.c @@ -40,7 +40,7 @@ create_ivci(struct zink_screen *screen, ivci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; ivci.image = res->obj->image; - switch (res->base.target) { + switch (res->base.b.target) { case PIPE_TEXTURE_1D: ivci.viewType = VK_IMAGE_VIEW_TYPE_1D; break; @@ -88,7 +88,7 @@ create_ivci(struct zink_screen *screen, ivci.subresourceRange.levelCount = 1; ivci.subresourceRange.baseArrayLayer = templ->u.tex.first_layer; ivci.subresourceRange.layerCount = 1 + templ->u.tex.last_layer - templ->u.tex.first_layer; - ivci.viewType = zink_surface_clamp_viewtype(ivci.viewType, templ->u.tex.first_layer, templ->u.tex.last_layer, res->base.array_size); + ivci.viewType = zink_surface_clamp_viewtype(ivci.viewType, templ->u.tex.first_layer, templ->u.tex.last_layer, res->base.b.array_size); return ivci; }