From 4e87d81d20a084f550c601e9e4dc4e44fbf9a750 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 31 May 2023 11:20:17 -0400 Subject: [PATCH] zink: add a dgc debug mode for testing this is useful for drivers trying to implement DGC since there is no cts do not use. it will not make anything faster. Part-of: --- src/gallium/drivers/zink/zink_batch.c | 11 + src/gallium/drivers/zink/zink_blit.c | 5 + src/gallium/drivers/zink/zink_clear.c | 4 + src/gallium/drivers/zink/zink_clear.h | 1 + src/gallium/drivers/zink/zink_context.c | 221 +++++++++++++++++ src/gallium/drivers/zink/zink_context.h | 14 ++ src/gallium/drivers/zink/zink_descriptors.c | 8 + src/gallium/drivers/zink/zink_device_info.py | 1 + src/gallium/drivers/zink/zink_draw.cpp | 234 ++++++++++++++++-- src/gallium/drivers/zink/zink_pipeline.c | 24 +- src/gallium/drivers/zink/zink_pipeline.h | 3 +- src/gallium/drivers/zink/zink_program.c | 9 +- .../drivers/zink/zink_program_state.hpp | 4 +- src/gallium/drivers/zink/zink_query.c | 6 + src/gallium/drivers/zink/zink_resource.c | 12 +- src/gallium/drivers/zink/zink_screen.c | 20 +- src/gallium/drivers/zink/zink_screen.h | 3 - src/gallium/drivers/zink/zink_state.c | 4 + src/gallium/drivers/zink/zink_types.h | 40 +++ 19 files changed, 591 insertions(+), 33 deletions(-) diff --git a/src/gallium/drivers/zink/zink_batch.c b/src/gallium/drivers/zink/zink_batch.c index e7c0ee7d124..fcfe301603e 100644 --- a/src/gallium/drivers/zink/zink_batch.c +++ b/src/gallium/drivers/zink/zink_batch.c @@ -117,6 +117,13 @@ zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs) VKSCR(DestroyQueryPool)(screen->dev, *pool, NULL); util_dynarray_clear(&bs->dead_querypools); + util_dynarray_foreach(&bs->dgc.pipelines, VkPipeline, pipeline) + VKSCR(DestroyPipeline)(screen->dev, *pipeline, NULL); + util_dynarray_clear(&bs->dgc.pipelines); + util_dynarray_foreach(&bs->dgc.layouts, VkIndirectCommandsLayoutNV, iclayout) + VKSCR(DestroyIndirectCommandsLayoutNV)(screen->dev, *iclayout, NULL); + util_dynarray_clear(&bs->dgc.layouts); + /* framebuffers are appended to the batch state in which they are destroyed * to ensure deferred deletion without destroying in-use objects */ @@ -272,6 +279,8 @@ zink_batch_state_destroy(struct zink_screen *screen, struct zink_batch_state *bs free(bs->slab_objs.objs); free(bs->sparse_objs.objs); util_dynarray_fini(&bs->dead_querypools); + util_dynarray_fini(&bs->dgc.pipelines); + util_dynarray_fini(&bs->dgc.layouts); util_dynarray_fini(&bs->swapchain_obj); util_dynarray_fini(&bs->zombie_samplers); util_dynarray_fini(&bs->dead_framebuffers); @@ -333,6 +342,8 @@ create_batch_state(struct zink_context *ctx) SET_CREATE_OR_FAIL(&bs->active_queries); util_dynarray_init(&bs->wait_semaphores, NULL); util_dynarray_init(&bs->dead_querypools, NULL); + util_dynarray_init(&bs->dgc.pipelines, NULL); + util_dynarray_init(&bs->dgc.layouts, NULL); util_dynarray_init(&bs->wait_semaphore_stages, NULL); util_dynarray_init(&bs->zombie_samplers, NULL); util_dynarray_init(&bs->dead_framebuffers, NULL); diff --git a/src/gallium/drivers/zink/zink_blit.c b/src/gallium/drivers/zink/zink_blit.c index 15e6be51411..5f39a9b1f8b 100644 --- a/src/gallium/drivers/zink/zink_blit.c +++ b/src/gallium/drivers/zink/zink_blit.c @@ -74,6 +74,8 @@ blit_resolve(struct zink_context *ctx, const struct pipe_blit_info *info, bool * VkCommandBuffer cmdbuf = *needs_present_readback ? ctx->batch.state->cmdbuf : zink_get_cmdbuf(ctx, src, dst); + if (cmdbuf == ctx->batch.state->cmdbuf) + zink_flush_dgc_if_enabled(ctx); zink_batch_reference_resource_rw(batch, src, false); zink_batch_reference_resource_rw(batch, dst, true); @@ -266,6 +268,8 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info, bool *n VkCommandBuffer cmdbuf = *needs_present_readback ? ctx->batch.state->cmdbuf : zink_get_cmdbuf(ctx, src, dst); + if (cmdbuf == ctx->batch.state->cmdbuf) + zink_flush_dgc_if_enabled(ctx); zink_batch_reference_resource_rw(batch, src, false); zink_batch_reference_resource_rw(batch, dst, true); @@ -393,6 +397,7 @@ zink_blit(struct pipe_context *pctx, if (whole) pctx->invalidate_resource(pctx, info->dst.resource); + zink_flush_dgc_if_enabled(ctx); ctx->unordered_blitting = !(info->render_condition_enable && ctx->render_condition_active) && zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering && !needs_present_readback && diff --git a/src/gallium/drivers/zink/zink_clear.c b/src/gallium/drivers/zink/zink_clear.c index a8756ae272f..b8591811359 100644 --- a/src/gallium/drivers/zink/zink_clear.c +++ b/src/gallium/drivers/zink/zink_clear.c @@ -53,6 +53,8 @@ clear_in_rp(struct pipe_context *pctx, struct zink_context *ctx = zink_context(pctx); struct pipe_framebuffer_state *fb = &ctx->fb_state; + zink_flush_dgc_if_enabled(ctx); + VkClearAttachment attachments[1 + PIPE_MAX_COLOR_BUFS]; int num_attachments = 0; @@ -645,6 +647,7 @@ zink_clear_render_target(struct pipe_context *pctx, struct pipe_surface *dst, bool render_condition_enabled) { struct zink_context *ctx = zink_context(pctx); + zink_flush_dgc_if_enabled(ctx); bool render_condition_active = ctx->render_condition_active; if (!render_condition_enabled && render_condition_active) { zink_stop_conditional_render(ctx); @@ -670,6 +673,7 @@ zink_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *dst, bool render_condition_enabled) { struct zink_context *ctx = zink_context(pctx); + zink_flush_dgc_if_enabled(ctx); bool render_condition_active = ctx->render_condition_active; if (!render_condition_enabled && render_condition_active) { zink_stop_conditional_render(ctx); diff --git a/src/gallium/drivers/zink/zink_clear.h b/src/gallium/drivers/zink/zink_clear.h index eb17f15f544..8df6c70aa42 100644 --- a/src/gallium/drivers/zink/zink_clear.h +++ b/src/gallium/drivers/zink/zink_clear.h @@ -26,6 +26,7 @@ #include "util/u_rect.h" #include "zink_types.h" +#include "zink_screen.h" void zink_clear(struct pipe_context *pctx, diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index 74905004f4a..34c4caf0195 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -30,6 +30,7 @@ #include "zink_helpers.h" #include "zink_inlines.h" #include "zink_kopper.h" +#include "zink_pipeline.h" #include "zink_program.h" #include "zink_query.h" #include "zink_render_pass.h" @@ -206,6 +207,18 @@ zink_context_destroy(struct pipe_context *pctx) _mesa_hash_table_destroy(ctx->render_pass_cache, NULL); slab_destroy_child(&ctx->transfer_pool_unsync); + if (zink_debug & ZINK_DEBUG_DGC) { + for (unsigned i = 0; i < ARRAY_SIZE(ctx->dgc.upload); i++) + u_upload_destroy(ctx->dgc.upload[i]); + for (unsigned i = 0; i < ARRAY_SIZE(ctx->dgc.buffers); i++) { + if (!ctx->dgc.buffers[i]) + continue; + struct pipe_resource *pres = &ctx->dgc.buffers[i]->base.b; + pipe_resource_reference(&pres, NULL); + } + util_dynarray_fini(&ctx->dgc.pipelines); + } + zink_descriptors_deinit(ctx); if (!(ctx->flags & ZINK_CONTEXT_COPY_ONLY)) @@ -1333,6 +1346,7 @@ zink_set_viewport_states(struct pipe_context *pctx, ctx->vp_state.viewport_states[start_slot + i] = state[i]; ctx->vp_state_changed = true; + zink_flush_dgc_if_enabled(ctx); } static void @@ -1345,6 +1359,7 @@ zink_set_scissor_states(struct pipe_context *pctx, for (unsigned i = 0; i < num_scissors; i++) ctx->vp_state.scissor_states[start_slot + i] = states[i]; ctx->scissor_changed = true; + zink_flush_dgc_if_enabled(ctx); } static void @@ -2470,6 +2485,7 @@ zink_set_patch_vertices(struct pipe_context *pctx, uint8_t patch_vertices) VKCTX(CmdSetPatchControlPointsEXT)(ctx->batch.state->cmdbuf, patch_vertices); else ctx->gfx_pipeline_state.dirty = true; + zink_flush_dgc_if_enabled(ctx); } } @@ -2858,6 +2874,7 @@ zink_batch_no_rp_safe(struct zink_context *ctx) { if (!ctx->batch.in_rp) return; + zink_flush_dgc_if_enabled(ctx); if (ctx->render_condition.query) zink_stop_conditional_render(ctx); /* suspend all queries that were started in a renderpass @@ -3494,6 +3511,7 @@ zink_set_framebuffer_state(struct pipe_context *pctx, zink_update_fs_key_samples(ctx); if (ctx->gfx_pipeline_state.rast_samples != rast_samples) { ctx->sample_locations_changed |= ctx->gfx_pipeline_state.sample_locations_enabled; + zink_flush_dgc_if_enabled(ctx); if (screen->have_full_ds3) ctx->sample_mask_changed = true; else @@ -3516,6 +3534,7 @@ zink_set_blend_color(struct pipe_context *pctx, { struct zink_context *ctx = zink_context(pctx); memcpy(ctx->blend_constants, color->color, sizeof(float) * 4); + zink_flush_dgc_if_enabled(ctx); } static void @@ -3523,6 +3542,7 @@ zink_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) { struct zink_context *ctx = zink_context(pctx); ctx->gfx_pipeline_state.sample_mask = sample_mask; + zink_flush_dgc_if_enabled(ctx); if (zink_screen(pctx->screen)->have_full_ds3) ctx->sample_mask_changed = true; else @@ -3535,6 +3555,7 @@ zink_set_min_samples(struct pipe_context *pctx, unsigned min_samples) struct zink_context *ctx = zink_context(pctx); ctx->gfx_pipeline_state.min_samples = min_samples - 1; ctx->gfx_pipeline_state.dirty = true; + zink_flush_dgc_if_enabled(ctx); } static void @@ -3549,6 +3570,7 @@ zink_set_sample_locations(struct pipe_context *pctx, size_t size, const uint8_t if (locations) memcpy(ctx->sample_locations, locations, size); + zink_flush_dgc_if_enabled(ctx); } static void @@ -3975,6 +3997,7 @@ zink_set_stream_output_targets(struct pipe_context *pctx, /* TODO: possibly avoid rebinding on resume if resuming from same buffers? */ ctx->dirty_so_targets = true; } + zink_flush_dgc_if_enabled(ctx); } void @@ -4739,6 +4762,197 @@ zink_emit_string_marker(struct pipe_context *pctx, free(temp); } +VkIndirectCommandsLayoutTokenNV * +zink_dgc_add_token(struct zink_context *ctx, VkIndirectCommandsTokenTypeNV type, void **mem) +{ + size_t size = 0; + struct zink_screen *screen = zink_screen(ctx->base.screen); + VkIndirectCommandsLayoutTokenNV *ret = util_dynarray_grow(&ctx->dgc.tokens, VkIndirectCommandsLayoutTokenNV, 1); + ret->sType = VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_TOKEN_NV; + ret->pNext = NULL; + ret->tokenType = type; + ret->vertexDynamicStride = ctx->gfx_pipeline_state.uses_dynamic_stride; + ret->indirectStateFlags = 0; + ret->indexTypeCount = 0; + switch (type) { + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: + ret->stream = ZINK_DGC_VBO; + size = sizeof(VkBindVertexBufferIndirectCommandNV); + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV: + ret->stream = ZINK_DGC_IB; + size = sizeof(VkBindIndexBufferIndirectCommandNV); + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV: + ret->stream = ZINK_DGC_PSO; + size = sizeof(VkBindShaderGroupIndirectCommandNV); + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV: + ret->stream = ZINK_DGC_PUSH; + ret->pushconstantPipelineLayout = ctx->dgc.last_prog->base.layout; + ret->pushconstantShaderStageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; + size = sizeof(float) * 6; //size for full tess level upload every time + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV: + ret->stream = ZINK_DGC_DRAW; + size = sizeof(VkDrawIndirectCommand); + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV: + ret->stream = ZINK_DGC_DRAW; + size = sizeof(VkDrawIndexedIndirectCommand); + break; + default: + unreachable("ack"); + } + struct zink_resource *old = NULL; + unsigned stream_count = screen->info.nv_dgc_props.maxIndirectCommandsStreamCount >= ZINK_DGC_MAX ? ZINK_DGC_MAX : 1; + if (stream_count == 1) + ret->stream = 0; + unsigned stream = ret->stream; + bool max_exceeded = !ctx->dgc.max_size[stream]; + ret->offset = ctx->dgc.cur_offsets[stream]; + if (ctx->dgc.buffers[stream]) { + /* detect end of buffer */ + if (ctx->dgc.bind_offsets[stream] + ctx->dgc.cur_offsets[stream] + size > ctx->dgc.buffers[stream]->base.b.width0) { + old = ctx->dgc.buffers[stream]; + ctx->dgc.buffers[stream] = NULL; + max_exceeded = true; + } + } + if (!ctx->dgc.buffers[stream]) { + if (max_exceeded) + ctx->dgc.max_size[stream] += size * 5; + uint8_t *ptr; + unsigned offset; + u_upload_alloc(ctx->dgc.upload[stream], 0, ctx->dgc.max_size[stream], + screen->info.props.limits.minMemoryMapAlignment, &offset, + (struct pipe_resource **)&ctx->dgc.buffers[stream], (void **)&ptr); + size_t cur_size = old ? (ctx->dgc.cur_offsets[stream] - ctx->dgc.bind_offsets[stream]) : 0; + if (old) { + struct pipe_resource *pold = &old->base.b; + /* copy and delete old buffer */ + zink_batch_reference_resource_rw(&ctx->batch, old, true); + memcpy(ptr + offset, ctx->dgc.maps[stream] + ctx->dgc.bind_offsets[stream], cur_size); + pipe_resource_reference(&pold, NULL); + } + ctx->dgc.maps[stream] = ptr; + ctx->dgc.bind_offsets[stream] = offset; + ctx->dgc.cur_offsets[stream] = cur_size; + } + *mem = ctx->dgc.maps[stream] + ctx->dgc.cur_offsets[stream]; + ctx->dgc.cur_offsets[stream] += size; + return ret; +} + +void +zink_flush_dgc(struct zink_context *ctx) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct zink_batch_state *bs = ctx->batch.state; + if (!ctx->dgc.valid) + return; + + /* tokens should be created as they are used */ + unsigned num_cmds = util_dynarray_num_elements(&ctx->dgc.tokens, VkIndirectCommandsLayoutTokenNV); + assert(num_cmds); + VkIndirectCommandsLayoutTokenNV *cmds = ctx->dgc.tokens.data; + uint32_t strides[ZINK_DGC_MAX] = {0}; + + unsigned stream_count = screen->info.nv_dgc_props.maxIndirectCommandsStreamCount >= ZINK_DGC_MAX ? ZINK_DGC_MAX : 1; + VkIndirectCommandsStreamNV streams[ZINK_DGC_MAX]; + for (unsigned i = 0; i < stream_count; i++) { + if (ctx->dgc.buffers[i]) { + streams[i].buffer = ctx->dgc.buffers[i]->obj->buffer; + streams[i].offset = ctx->dgc.bind_offsets[i]; + } else { + streams[i].buffer = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; + streams[i].offset = 0; + } + } + /* this is a stupid pipeline that will never actually be used as anything but a container */ + VkPipeline pipeline = VK_NULL_HANDLE; + if (screen->info.nv_dgc_props.maxGraphicsShaderGroupCount == 1) { + /* RADV doesn't support shader pipeline binds, so use this hacky path */ + pipeline = ctx->gfx_pipeline_state.pipeline; + } else { + VkPrimitiveTopology vkmode = zink_primitive_topology(ctx->gfx_pipeline_state.gfx_prim_mode); + pipeline = zink_create_gfx_pipeline(screen, ctx->dgc.last_prog, ctx->dgc.last_prog->objs, &ctx->gfx_pipeline_state, ctx->gfx_pipeline_state.element_state->binding_map, vkmode, false, &ctx->dgc.pipelines); + assert(pipeline); + util_dynarray_append(&bs->dgc.pipelines, VkPipeline, pipeline); + VKCTX(CmdBindPipelineShaderGroupNV)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline, 0); + } + unsigned remaining = num_cmds; + for (unsigned i = 0; i < num_cmds; i += screen->info.nv_dgc_props.maxIndirectCommandsTokenCount, remaining -= screen->info.nv_dgc_props.maxIndirectCommandsTokenCount) { + VkIndirectCommandsLayoutCreateInfoNV lci = { + VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NV, + NULL, + 0, + VK_PIPELINE_BIND_POINT_GRAPHICS, + MIN2(remaining, screen->info.nv_dgc_props.maxIndirectCommandsTokenCount), + cmds + i, + stream_count, + strides + }; + VkIndirectCommandsLayoutNV iclayout; + VkResult res = VKSCR(CreateIndirectCommandsLayoutNV)(screen->dev, &lci, NULL, &iclayout); + assert(res == VK_SUCCESS); + util_dynarray_append(&bs->dgc.layouts, VkIndirectCommandsLayoutNV, iclayout); + + /* a lot of hacks to set up a preprocess buffer */ + VkGeneratedCommandsMemoryRequirementsInfoNV info = { + VK_STRUCTURE_TYPE_GENERATED_COMMANDS_MEMORY_REQUIREMENTS_INFO_NV, + NULL, + VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline, + iclayout, + 1 + }; + VkMemoryRequirements2 reqs = { + VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2 + }; + VKSCR(GetGeneratedCommandsMemoryRequirementsNV)(screen->dev, &info, &reqs); + struct pipe_resource templ = {0}; + templ.target = PIPE_BUFFER; + templ.format = PIPE_FORMAT_R8_UNORM; + templ.bind = 0; + templ.usage = PIPE_USAGE_IMMUTABLE; + templ.flags = 0; + templ.width0 = reqs.memoryRequirements.size; + templ.height0 = 1; + templ.depth0 = 1; + templ.array_size = 1; + uint64_t params[] = {reqs.memoryRequirements.size, reqs.memoryRequirements.alignment, reqs.memoryRequirements.memoryTypeBits}; + struct pipe_resource *pres = screen->base.resource_create_with_modifiers(&screen->base, &templ, params, 3); + assert(pres); + zink_batch_reference_resource_rw(&ctx->batch, zink_resource(pres), true); + + VkGeneratedCommandsInfoNV gen = { + VK_STRUCTURE_TYPE_GENERATED_COMMANDS_INFO_NV, + NULL, + VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline, + iclayout, + stream_count, + streams, + 1, + zink_resource(pres)->obj->buffer, + 0, + pres->width0, + VK_NULL_HANDLE, + 0, + VK_NULL_HANDLE, + 0 + }; + VKCTX(CmdExecuteGeneratedCommandsNV)(ctx->batch.state->cmdbuf, VK_FALSE, &gen); + + pipe_resource_reference(&pres, NULL); + } + util_dynarray_clear(&ctx->dgc.pipelines); + util_dynarray_clear(&ctx->dgc.tokens); + ctx->dgc.valid = false; + ctx->pipeline_changed[0] = true; + zink_select_draw_vbo(ctx); +} struct pipe_surface * zink_get_dummy_pipe_surface(struct zink_context *ctx, int samples_index) @@ -4907,6 +5121,13 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) for (int i = 0; i < ARRAY_SIZE(ctx->fb_clears); i++) util_dynarray_init(&ctx->fb_clears[i].clears, ctx); + if (zink_debug & ZINK_DEBUG_DGC) { + util_dynarray_init(&ctx->dgc.pipelines, ctx); + util_dynarray_init(&ctx->dgc.tokens, ctx); + for (unsigned i = 0; i < ARRAY_SIZE(ctx->dgc.upload); i++) + ctx->dgc.upload[i] = u_upload_create_default(&ctx->base); + } + if (!is_copy_only) { ctx->blitter = util_blitter_create(&ctx->base); if (!ctx->blitter) diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h index c961af077dd..604d4414273 100644 --- a/src/gallium/drivers/zink/zink_context.h +++ b/src/gallium/drivers/zink/zink_context.h @@ -54,6 +54,7 @@ struct zink_vertex_elements_state; util_debug_message(&ctx->dbg, PERF_INFO, __VA_ARGS__); \ } while(0) + static inline struct zink_resource * zink_descriptor_surface_resource(struct zink_descriptor_surface *ds) { @@ -202,6 +203,19 @@ zink_cmd_debug_marker_end(struct zink_context *ctx, VkCommandBuffer cmdbuf,bool void zink_copy_buffer(struct zink_context *ctx, struct zink_resource *dst, struct zink_resource *src, unsigned dst_offset, unsigned src_offset, unsigned size); + +VkIndirectCommandsLayoutTokenNV * +zink_dgc_add_token(struct zink_context *ctx, VkIndirectCommandsTokenTypeNV type, void **mem); +void +zink_flush_dgc(struct zink_context *ctx); + +static ALWAYS_INLINE void +zink_flush_dgc_if_enabled(struct zink_context *ctx) +{ + if (unlikely(zink_debug & ZINK_DEBUG_DGC)) + zink_flush_dgc(ctx); +} + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/zink/zink_descriptors.c b/src/gallium/drivers/zink/zink_descriptors.c index 696b2a37f0b..12da7df99de 100644 --- a/src/gallium/drivers/zink/zink_descriptors.c +++ b/src/gallium/drivers/zink/zink_descriptors.c @@ -1193,6 +1193,7 @@ zink_descriptors_update_masked_buffer(struct zink_context *ctx, bool is_compute, bs->dd.cur_db_offset[type] = bs->dd.db_offset; bs->dd.db_offset += pg->dd.db_size[type]; } + zink_flush_dgc_if_enabled(ctx); /* templates are indexed by the set id, so increment type by 1 * (this is effectively an optimization of indirecting through screen->desc_set_id) */ @@ -1229,6 +1230,7 @@ zink_descriptors_update_masked(struct zink_context *ctx, bool is_compute, uint8_ u_foreach_bit(type, changed_sets) { assert(type + 1 < pg->num_dsl); if (pg->dd.pool_key[type]) { + zink_flush_dgc_if_enabled(ctx); /* templates are indexed by the set id, so increment type by 1 * (this is effectively an optimization of indirecting through screen->desc_set_id) */ @@ -1249,6 +1251,7 @@ zink_descriptors_update_masked(struct zink_context *ctx, bool is_compute, uint8_ continue; /* same set indexing as above */ assert(bs->dd.sets[is_compute][type + 1]); + zink_flush_dgc_if_enabled(ctx); VKSCR(CmdBindDescriptorSets)(bs->cmdbuf, is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, /* same set indexing as above */ @@ -1343,6 +1346,7 @@ zink_descriptors_update(struct zink_context *ctx, bool is_compute) enlarge_db(ctx); changed_sets = pg->dd.binding_usage; ctx->dd.push_state_changed[is_compute] = true; + zink_flush_dgc_if_enabled(ctx); } if (!bs->dd.db_bound) @@ -1386,6 +1390,7 @@ zink_descriptors_update(struct zink_context *ctx, bool is_compute) bs->dd.cur_db_offset[ZINK_DESCRIPTOR_TYPE_UNIFORMS] = bs->dd.db_offset; bs->dd.db_offset += ctx->dd.db_size[is_compute]; } + zink_flush_dgc_if_enabled(ctx); VKCTX(CmdSetDescriptorBufferOffsetsEXT)(bs->cmdbuf, is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, pg->layout, @@ -1393,6 +1398,9 @@ zink_descriptors_update(struct zink_context *ctx, bool is_compute) &index, &offset); } else { + if (ctx->dd.push_state_changed[0]) { + zink_flush_dgc_if_enabled(ctx); + } if (have_KHR_push_descriptor) { if (ctx->dd.push_state_changed[is_compute]) VKCTX(CmdPushDescriptorSetWithTemplateKHR)(bs->cmdbuf, pg->dd.templates[0], diff --git a/src/gallium/drivers/zink/zink_device_info.py b/src/gallium/drivers/zink/zink_device_info.py index 21f2fa480f1..b4722cdc83e 100644 --- a/src/gallium/drivers/zink/zink_device_info.py +++ b/src/gallium/drivers/zink/zink_device_info.py @@ -133,6 +133,7 @@ EXTENSIONS = [ alias="feedback_loop", features=True), Extension("VK_EXT_attachment_feedback_loop_dynamic_state", alias="feedback_dyn", features=True), + Extension("VK_NV_device_generated_commands", alias="nv_dgc", features=True, properties=True), Extension("VK_EXT_fragment_shader_interlock", alias="interlock", features=True, diff --git a/src/gallium/drivers/zink/zink_draw.cpp b/src/gallium/drivers/zink/zink_draw.cpp index 3ddb32bb73a..6ee08a48e6c 100644 --- a/src/gallium/drivers/zink/zink_draw.cpp +++ b/src/gallium/drivers/zink/zink_draw.cpp @@ -113,6 +113,34 @@ barrier_draw_buffers(struct zink_context *ctx, const struct pipe_draw_info *dinf } } +static void +bind_vertex_buffers_dgc(struct zink_context *ctx) +{ + struct zink_vertex_elements_state *elems = ctx->element_state; + + ctx->vertex_buffers_dirty = false; + if (!elems->hw_state.num_bindings) + return; + for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) { + struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->hw_state.binding_map[i]; + assert(vb); + VkBindVertexBufferIndirectCommandNV *ptr; + VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV, (void**)&ptr); + token->vertexBindingUnit = ctx->element_state->hw_state.binding_map[i]; + if (vb->buffer.resource) { + struct zink_resource *res = zink_resource(vb->buffer.resource); + assert(res->obj->bda); + ptr->bufferAddress = res->obj->bda + vb->buffer_offset; + ptr->size = res->base.b.width0; + ptr->stride = vb->stride; + } else { + ptr->bufferAddress = 0; + ptr->size = 0; + ptr->stride = 0; + } + } +} + template static void zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx) @@ -171,6 +199,47 @@ update_drawid(struct zink_context *ctx, unsigned draw_id) &draw_id); } +static void +update_drawid_dgc(struct zink_context *ctx, unsigned draw_id) +{ + uint32_t *ptr; + VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr); + token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, draw_id); + token->pushconstantSize = sizeof(unsigned); + *ptr = draw_id; +} + +ALWAYS_INLINE static void +draw_indexed_dgc_need_index_buffer_unref(struct zink_context *ctx, + const struct pipe_draw_info *dinfo, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws, + unsigned draw_id, + bool needs_drawid) +{ + if (dinfo->increment_draw_id && needs_drawid) { + for (unsigned i = 0; i < num_draws; i++) { + update_drawid_dgc(ctx, draw_id); + VkDrawIndexedIndirectCommand *ptr, cmd = { + draws[i].count, dinfo->instance_count, 0, draws[i].index_bias, dinfo->start_instance + }; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr); + *ptr = cmd; + draw_id++; + } + } else { + if (needs_drawid) + update_drawid_dgc(ctx, draw_id); + for (unsigned i = 0; i < num_draws; i++) { + VkDrawIndexedIndirectCommand *ptr, cmd = { + draws[i].count, dinfo->instance_count, 0, draws[i].index_bias, dinfo->start_instance + }; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr); + *ptr = cmd; + } + } +} + ALWAYS_INLINE static void draw_indexed_need_index_buffer_unref(struct zink_context *ctx, const struct pipe_draw_info *dinfo, @@ -199,6 +268,37 @@ draw_indexed_need_index_buffer_unref(struct zink_context *ctx, } } +ALWAYS_INLINE static void +draw_indexed_dgc(struct zink_context *ctx, + const struct pipe_draw_info *dinfo, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws, + unsigned draw_id, + bool needs_drawid) +{ + if (dinfo->increment_draw_id && needs_drawid) { + for (unsigned i = 0; i < num_draws; i++) { + update_drawid_dgc(ctx, draw_id); + VkDrawIndexedIndirectCommand *ptr, cmd = { + draws[i].count, dinfo->instance_count, draws[i].start, draws[i].index_bias, dinfo->start_instance + }; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr); + *ptr = cmd; + draw_id++; + } + } else { + if (needs_drawid) + update_drawid_dgc(ctx, draw_id); + for (unsigned i = 0; i < num_draws; i++) { + VkDrawIndexedIndirectCommand *ptr, cmd = { + draws[i].count, dinfo->instance_count, draws[i].start, draws[i].index_bias, dinfo->start_instance + }; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr); + *ptr = cmd; + } + } +} + template ALWAYS_INLINE static void draw_indexed(struct zink_context *ctx, @@ -234,6 +334,37 @@ draw_indexed(struct zink_context *ctx, } } +ALWAYS_INLINE static void +draw_dgc(struct zink_context *ctx, + const struct pipe_draw_info *dinfo, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws, + unsigned draw_id, + bool needs_drawid) +{ + if (dinfo->increment_draw_id && needs_drawid) { + for (unsigned i = 0; i < num_draws; i++) { + update_drawid_dgc(ctx, draw_id); + VkDrawIndirectCommand *ptr, cmd = { + draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance + }; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV, (void**)&ptr); + *ptr = cmd; + draw_id++; + } + } else { + if (needs_drawid) + update_drawid_dgc(ctx, draw_id); + for (unsigned i = 0; i < num_draws; i++) { + VkDrawIndirectCommand *ptr, cmd = { + draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance + }; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV, (void**)&ptr); + *ptr = cmd; + } + } +} + template ALWAYS_INLINE static void draw(struct zink_context *ctx, @@ -267,7 +398,7 @@ draw(struct zink_context *ctx, template static bool -update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum mesa_prim mode) +update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum mesa_prim mode, bool can_dgc) { VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline; const struct zink_screen *screen = zink_screen(ctx->base.screen); @@ -286,8 +417,18 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum } if (pipeline) { pipeline_changed = prev_pipeline != pipeline; - if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw) - VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw) { + ctx->dgc.last_prog = ctx->curr_program; + if (unlikely(can_dgc && screen->info.nv_dgc_props.maxGraphicsShaderGroupCount == 1)) { + VkBindShaderGroupIndirectCommandNV *ptr; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV, (void**)&ptr); + util_dynarray_append(&ctx->dgc.pipelines, VkPipeline, pipeline); + /* zero-indexed -> base + group + num_pipelines-1 = base + num_pipelines */ + ptr->groupIndex = util_dynarray_num_elements(&ctx->dgc.pipelines, VkPipeline) + 1; + } else { + VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + } ctx->shobj_draw = false; } else { if (BATCH_CHANGED || shaders_changed || !ctx->shobj_draw) { @@ -383,6 +524,9 @@ zink_draw(struct pipe_context *pctx, zink_rebind_all_images(ctx); } + if (mode_changed) + zink_flush_dgc_if_enabled(ctx); + unsigned index_offset = 0; unsigned index_size = dinfo->index_size; struct pipe_resource *index_buffer = NULL; @@ -433,6 +577,10 @@ zink_draw(struct pipe_context *pctx, if (!ctx->blitting) zink_update_barriers(ctx, false, index_buffer, dindirect ? dindirect->buffer : NULL, dindirect ? dindirect->indirect_draw_count : NULL); + bool can_dgc = false; + if (unlikely(zink_debug & ZINK_DEBUG_DGC)) + can_dgc = !so_target && !ctx->num_so_targets && (!dindirect || !dindirect->buffer); + /* ensure synchronization between doing streamout with counter buffer * and using counter buffer for indirect draw */ @@ -519,7 +667,15 @@ zink_draw(struct pipe_context *pctx, VK_INDEX_TYPE_UINT32, }; struct zink_resource *res = zink_resource(index_buffer); - VKCTX(CmdBindIndexBuffer)(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type[index_size >> 1]); + if (unlikely(can_dgc)) { + VkBindIndexBufferIndirectCommandNV *ptr; + zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV, (void**)&ptr); + ptr->bufferAddress = res->obj->bda + index_offset; + ptr->size = res->base.b.width0; + ptr->indexType = index_type[index_size >> 1]; + } else { + VKCTX(CmdBindIndexBuffer)(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type[index_size >> 1]); + } } if (DYNAMIC_STATE < ZINK_DYNAMIC_STATE2) { if (ctx->gfx_pipeline_state.dyn_state2.primitive_restart != dinfo->primitive_restart) @@ -530,7 +686,7 @@ zink_draw(struct pipe_context *pctx, if (have_streamout && ctx->dirty_so_targets) zink_emit_stream_output_targets(pctx); - bool pipeline_changed = update_gfx_pipeline(ctx, batch->state, mode); + bool pipeline_changed = update_gfx_pipeline(ctx, batch->state, mode, can_dgc); if (BATCH_CHANGED || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) { VkViewport viewports[PIPE_MAX_VIEWPORTS]; @@ -717,7 +873,9 @@ zink_draw(struct pipe_context *pctx, if (!DRAW_STATE) { if (BATCH_CHANGED || ctx->vertex_buffers_dirty) { - if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || ctx->gfx_pipeline_state.uses_dynamic_stride) + if (unlikely(can_dgc)) + bind_vertex_buffers_dgc(ctx); + else if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || ctx->gfx_pipeline_state.uses_dynamic_stride) zink_bind_vertex_buffers(batch, ctx); else zink_bind_vertex_buffers(batch, ctx); @@ -729,9 +887,8 @@ zink_draw(struct pipe_context *pctx, zink_select_draw_vbo(ctx); } - if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || mode_changed)) { + if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || mode_changed)) VKCTX(CmdSetPrimitiveTopologyEXT)(batch->state->cmdbuf, zink_primitive_topology(mode)); - } if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->primitive_restart != dinfo->primitive_restart)) { VKCTX(CmdSetPrimitiveRestartEnableEXT)(batch->state->cmdbuf, dinfo->primitive_restart); @@ -754,15 +911,31 @@ zink_draw(struct pipe_context *pctx, if (reads_basevertex) { unsigned draw_mode_is_indexed = index_size > 0; - VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, - offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned), - &draw_mode_is_indexed); + if (unlikely(can_dgc)) { + uint32_t *ptr; + VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr); + token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed); + token->pushconstantSize = sizeof(unsigned); + *ptr = draw_mode_is_indexed; + } else { + VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, + offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned), + &draw_mode_is_indexed); + } } if (ctx->curr_program->shaders[MESA_SHADER_TESS_CTRL] && ctx->curr_program->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated) { - VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, - offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6, - &ctx->tess_levels[0]); + if (unlikely(can_dgc)) { + float *ptr; + VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr); + token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, default_inner_level); + token->pushconstantSize = sizeof(float) * 6; + memcpy(ptr, &ctx->tess_levels[0], sizeof(float) * 6); + } else { + VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, + offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6, + &ctx->tess_levels[0]); + } } if (!screen->optimal_keys) { @@ -870,10 +1043,16 @@ zink_draw(struct pipe_context *pctx, } else VKCTX(CmdDrawIndexedIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride); } else { - if (need_index_buffer_unref) + if (unlikely(can_dgc)) { + if (need_index_buffer_unref) + draw_indexed_dgc_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); + else + draw_indexed_dgc(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); + } else if (need_index_buffer_unref) { draw_indexed_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); - else + } else { draw_indexed(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); + } } } else { if (so_target && screen->info.tf_props.transformFeedbackDraw) { @@ -905,13 +1084,17 @@ zink_draw(struct pipe_context *pctx, } else VKCTX(CmdDrawIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride); } else { - draw(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); + if (unlikely(can_dgc)) + draw_dgc(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); + else + draw(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); } } if (unlikely(zink_tracing)) zink_cmd_debug_marker_end(ctx, batch->state->cmdbuf, marker); + ctx->dgc.valid = can_dgc; if (have_streamout) { for (unsigned i = 0; i < ctx->num_so_targets; i++) { struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); @@ -923,6 +1106,7 @@ zink_draw(struct pipe_context *pctx, } VKCTX(CmdEndTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets); } + batch->has_work = true; batch->last_was_compute = false; ctx->batch.work_count = work_count; @@ -985,9 +1169,19 @@ zink_bind_vertex_state(struct zink_context *ctx, struct pipe_vertex_state *vstat struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource); zink_batch_resource_usage_set(&ctx->batch, res, false, true); VkDeviceSize offset = vstate->input.vbuffer.buffer_offset; - VKCTX(CmdBindVertexBuffers)(cmdbuf, 0, - zstate->velems.hw_state.num_bindings, - &res->obj->buffer, &offset); + if (unlikely(zink_debug & ZINK_DEBUG_DGC)) { + VkBindVertexBufferIndirectCommandNV *ptr; + VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV, (void**)&ptr); + token->vertexBindingUnit = 0; + token->vertexDynamicStride = VK_FALSE; + ptr->bufferAddress = res->obj->bda + offset; + ptr->size = res->base.b.width0; + ptr->stride = 0; + } else { + VKCTX(CmdBindVertexBuffers)(cmdbuf, 0, + zstate->velems.hw_state.num_bindings, + &res->obj->buffer, &offset); + } } template diff --git a/src/gallium/drivers/zink/zink_pipeline.c b/src/gallium/drivers/zink/zink_pipeline.c index bc3a667569d..72bf0a5ae5b 100644 --- a/src/gallium/drivers/zink/zink_pipeline.c +++ b/src/gallium/drivers/zink/zink_pipeline.c @@ -43,7 +43,8 @@ zink_create_gfx_pipeline(struct zink_screen *screen, struct zink_gfx_pipeline_state *state, const uint8_t *binding_map, VkPrimitiveTopology primitive_topology, - bool optimize) + bool optimize, + struct util_dynarray *dgc) { struct zink_rasterizer_hw_state *hw_rast_state = (void*)&state->dyn_state3; VkPipelineVertexInputStateCreateInfo vertex_input_state; @@ -405,6 +406,27 @@ zink_create_gfx_pipeline(struct zink_screen *screen, pci.pStages = shader_stages; pci.stageCount = num_stages; + VkGraphicsShaderGroupCreateInfoNV gci = { + VK_STRUCTURE_TYPE_GRAPHICS_SHADER_GROUP_CREATE_INFO_NV, + NULL, + pci.stageCount, + pci.pStages, + pci.pVertexInputState, + pci.pTessellationState + }; + VkGraphicsPipelineShaderGroupsCreateInfoNV dgci = { + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_SHADER_GROUPS_CREATE_INFO_NV, + pci.pNext, + 1, + &gci, + dgc ? util_dynarray_num_elements(dgc, VkPipeline) : 0, + dgc ? dgc->data : NULL + }; + if (zink_debug & ZINK_DEBUG_DGC) { + pci.flags |= VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV; + pci.pNext = &dgci; + } + VkPipeline pipeline; VkResult result = VKSCR(CreateGraphicsPipelines)(screen->dev, prog->base.pipeline_cache, 1, &pci, NULL, &pipeline); diff --git a/src/gallium/drivers/zink/zink_pipeline.h b/src/gallium/drivers/zink/zink_pipeline.h index 90585f22b5c..ba62986e536 100644 --- a/src/gallium/drivers/zink/zink_pipeline.h +++ b/src/gallium/drivers/zink/zink_pipeline.h @@ -47,7 +47,8 @@ zink_create_gfx_pipeline(struct zink_screen *screen, struct zink_gfx_pipeline_state *state, const uint8_t *binding_map, VkPrimitiveTopology primitive_topology, - bool optimize); + bool optimize, + struct util_dynarray *dgc); VkPipeline zink_create_compute_pipeline(struct zink_screen *screen, struct zink_compute_program *comp, struct zink_compute_pipeline_state *state); diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index c7f6b97c30d..cd2677488d4 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -789,7 +789,7 @@ optimized_compile_job(void *data, void *gdata, int thread_index) if (pc_entry->gpl.gkey) pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->gpl.ikey->pipeline, &pc_entry->gpl.gkey->pipeline, 1, pc_entry->gpl.okey->pipeline, true); else - pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, pc_entry->prog->objs, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true); + pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, pc_entry->prog->objs, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true, NULL); if (pipeline) { pc_entry->gpl.unoptimized_pipeline = pc_entry->pipeline; pc_entry->pipeline = pipeline; @@ -807,7 +807,7 @@ optimized_shobj_compile_job(void *data, void *gdata, int thread_index) objs[i].mod = VK_NULL_HANDLE; objs[i].spirv = pc_entry->shobjs[i].spirv; } - pc_entry->pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, NULL, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true); + pc_entry->pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, NULL, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true, NULL); /* no unoptimized_pipeline dance */ } @@ -1640,6 +1640,9 @@ zink_get_compute_pipeline(struct zink_screen *screen, static void bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader) { + /* RADV doesn't support binding pipelines in DGC */ + if (zink_screen(ctx->base.screen)->info.nv_dgc_props.maxGraphicsShaderGroupCount == 0) + zink_flush_dgc_if_enabled(ctx); if (shader && shader->info.num_inlinable_uniforms) ctx->shader_has_inlinable_uniforms_mask |= 1 << stage; else @@ -2138,7 +2141,7 @@ zink_link_gfx_shader(struct pipe_context *pctx, void **shaders) generate_gfx_program_modules(ctx, screen, prog, &ctx->gfx_pipeline_state); VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, &ctx->gfx_pipeline_state, ctx->gfx_pipeline_state.element_state->binding_map, - shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true); + shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true, NULL); print_pipeline_stats(screen, pipeline); } else { if (zink_screen(pctx->screen)->info.have_EXT_shader_object) diff --git a/src/gallium/drivers/zink/zink_program_state.hpp b/src/gallium/drivers/zink/zink_program_state.hpp index 18fd3fe3ca3..3cb2f4b2da9 100644 --- a/src/gallium/drivers/zink/zink_program_state.hpp +++ b/src/gallium/drivers/zink/zink_program_state.hpp @@ -216,9 +216,9 @@ zink_get_gfx_pipeline(struct zink_context *ctx, } else { /* optimize by default only when expecting precompiles in order to reduce stuttering */ if (DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT) - pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, state, state->element_state->binding_map, vkmode, !HAVE_LIB); + pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, state, state->element_state->binding_map, vkmode, !HAVE_LIB, NULL); else - pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, state, NULL, vkmode, !HAVE_LIB); + pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, state, NULL, vkmode, !HAVE_LIB, NULL); } if (pipeline == VK_NULL_HANDLE) return VK_NULL_HANDLE; diff --git a/src/gallium/drivers/zink/zink_query.c b/src/gallium/drivers/zink/zink_query.c index 640ea6c4063..c7cb97c3919 100644 --- a/src/gallium/drivers/zink/zink_query.c +++ b/src/gallium/drivers/zink/zink_query.c @@ -887,6 +887,8 @@ begin_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_quer return; } + zink_flush_dgc_if_enabled(ctx); + update_query_id(ctx, q); q->predicate_dirty = true; if (q->needs_reset) @@ -995,6 +997,8 @@ end_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT) return; + zink_flush_dgc_if_enabled(ctx); + ASSERTED struct zink_query_buffer *qbo = q->curr_qbo; assert(qbo); assert(!is_time_query(q)); @@ -1282,6 +1286,7 @@ zink_start_conditional_render(struct zink_context *ctx) void zink_stop_conditional_render(struct zink_context *ctx) { + zink_flush_dgc_if_enabled(ctx); struct zink_batch *batch = &ctx->batch; zink_clear_apply_conditionals(ctx); if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || !ctx->render_condition.active) @@ -1301,6 +1306,7 @@ zink_render_condition(struct pipe_context *pctx, zink_batch_no_rp(ctx); VkQueryResultFlagBits flags = 0; + zink_flush_dgc_if_enabled(ctx); if (query == NULL) { /* force conditional clears if they exist */ if (ctx->clears_enabled && !ctx->batch.in_rp) diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c index d6851178c2e..d35f517bccb 100644 --- a/src/gallium/drivers/zink/zink_resource.c +++ b/src/gallium/drivers/zink/zink_resource.c @@ -695,7 +695,15 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t } } - VKSCR(GetBufferMemoryRequirements)(screen->dev, obj->buffer, &reqs); + if (modifiers_count) { + assert(modifiers_count == 3); + /* this is the DGC path because there's no other way to pass mem bits and I don't wanna copy/paste everything around */ + reqs.size = modifiers[0]; + reqs.alignment = modifiers[1]; + reqs.memoryTypeBits = modifiers[2]; + } else { + VKSCR(GetBufferMemoryRequirements)(screen->dev, obj->buffer, &reqs); + } if (templ->usage == PIPE_USAGE_STAGING) flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; else if (templ->usage == PIPE_USAGE_STREAM) @@ -1266,7 +1274,7 @@ resource_create(struct pipe_screen *pscreen, */ res->base.b.flags |= PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY; } - if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB || zink_debug & ZINK_DEBUG_DGC) zink_resource_get_address(screen, res); } else { if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index aab92605bf0..b4e51a3d7fe 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -96,6 +96,7 @@ zink_debug_options[] = { { "optimal_keys", ZINK_DEBUG_OPTIMAL_KEYS, "Debug/use optimal_keys" }, { "noopt", ZINK_DEBUG_NOOPT, "Disable async optimized pipeline compiles" }, { "nobgc", ZINK_DEBUG_NOBGC, "Disable all async pipeline compiles" }, + { "dgc", ZINK_DEBUG_DGC, "Use DGC (driver testing only)" }, DEBUG_NAMED_VALUE_END }; @@ -2665,6 +2666,17 @@ init_optimal_keys(struct zink_screen *screen) screen->info.have_EXT_shader_object = false; if (screen->info.have_EXT_shader_object) screen->have_full_ds3 = true; + if (zink_debug & ZINK_DEBUG_DGC) { + if (!screen->optimal_keys) { + mesa_loge("zink: can't DGC without optimal_keys!"); + zink_debug &= ~ZINK_DEBUG_DGC; + } else { + screen->info.have_EXT_multi_draw = false; + screen->info.have_EXT_shader_object = false; + screen->info.have_EXT_graphics_pipeline_library = false; + screen->info.have_EXT_vertex_input_dynamic_state = false; + } + } } static struct disk_cache * @@ -2877,6 +2889,12 @@ zink_internal_create_screen(const struct pipe_screen_config *config) mesa_loge("zink: KHR_timeline_semaphore is required"); goto fail; } + if (zink_debug & ZINK_DEBUG_DGC) { + if (!screen->info.have_NV_device_generated_commands) { + mesa_loge("zink: can't use DGC without NV_device_generated_commands"); + goto fail; + } + } init_driver_workarounds(screen); @@ -3004,7 +3022,7 @@ zink_internal_create_screen(const struct pipe_screen_config *config) slab_create_parent(&screen->transfer_pool, sizeof(struct zink_transfer), 16); - screen->driconf.inline_uniforms = debug_get_bool_option("ZINK_INLINE_UNIFORMS", screen->is_cpu); + screen->driconf.inline_uniforms = debug_get_bool_option("ZINK_INLINE_UNIFORMS", screen->is_cpu) && !(zink_debug & ZINK_DEBUG_DGC); screen->total_video_mem = get_video_mem(screen); screen->clamp_video_mem = screen->total_video_mem * 0.8; diff --git a/src/gallium/drivers/zink/zink_screen.h b/src/gallium/drivers/zink/zink_screen.h index 15daafa238b..40c94fb71b4 100644 --- a/src/gallium/drivers/zink/zink_screen.h +++ b/src/gallium/drivers/zink/zink_screen.h @@ -31,9 +31,6 @@ extern "C" { #endif -extern uint32_t zink_debug; -extern bool zink_tracing; - struct util_dl_library; void diff --git a/src/gallium/drivers/zink/zink_state.c b/src/gallium/drivers/zink/zink_state.c index 5bffbd05530..404d2660b0c 100644 --- a/src/gallium/drivers/zink/zink_state.c +++ b/src/gallium/drivers/zink/zink_state.c @@ -179,6 +179,7 @@ zink_bind_vertex_elements_state(struct pipe_context *pctx, { struct zink_context *ctx = zink_context(pctx); struct zink_gfx_pipeline_state *state = &ctx->gfx_pipeline_state; + zink_flush_dgc_if_enabled(ctx); ctx->element_state = cso; if (cso) { if (state->element_state != &ctx->element_state->hw_state) { @@ -403,6 +404,7 @@ zink_bind_blend_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); struct zink_gfx_pipeline_state* state = &zink_context(pctx)->gfx_pipeline_state; + zink_flush_dgc_if_enabled(ctx); struct zink_blend_state *blend = cso; if (state->blend_state != cso) { @@ -513,6 +515,7 @@ zink_bind_depth_stencil_alpha_state(struct pipe_context *pctx, void *cso) struct zink_context *ctx = zink_context(pctx); bool prev_zswrite = ctx->dsa_state ? ctx->dsa_state->hw_state.depth_write || ctx->dsa_state->hw_state.stencil_test : false; + zink_flush_dgc_if_enabled(ctx); ctx->dsa_state = cso; if (cso) { @@ -658,6 +661,7 @@ zink_bind_rasterizer_state(struct pipe_context *pctx, void *cso) bool rasterizer_discard = ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false; bool half_pixel_center = ctx->rast_state ? ctx->rast_state->base.half_pixel_center : true; float line_width = ctx->rast_state ? ctx->rast_state->base.line_width : 1.0; + zink_flush_dgc_if_enabled(ctx); ctx->rast_state = cso; if (ctx->rast_state) { diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index 5d38097821a..cbb1d3517da 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -97,6 +97,18 @@ #define VKCTX(fn) zink_screen(ctx->base.screen)->vk.fn #define VKSCR(fn) screen->vk.fn +#ifdef __cplusplus +extern "C" { +#endif + +extern uint32_t zink_debug; +extern bool zink_tracing; + +#ifdef __cplusplus +} +#endif + + /** enums */ /* features for draw/program templates */ @@ -225,6 +237,7 @@ enum zink_debug { ZINK_DEBUG_OPTIMAL_KEYS = (1<<14), ZINK_DEBUG_NOOPT = (1<<15), ZINK_DEBUG_NOBGC = (1<<16), + ZINK_DEBUG_DGC = (1<<17), }; enum zink_pv_emulation_primitive { @@ -235,6 +248,15 @@ enum zink_pv_emulation_primitive { ZINK_PVE_PRIMITIVE_FAN = 3, }; +enum zink_dgc_buffer { + ZINK_DGC_VBO, + ZINK_DGC_IB, + ZINK_DGC_PSO, + ZINK_DGC_PUSH, + ZINK_DGC_DRAW, + ZINK_DGC_MAX, +}; + /** fence types */ struct tc_unflushed_batch_token; @@ -578,6 +600,11 @@ struct zink_batch_state { struct util_dynarray acquires; struct util_dynarray acquire_flags; + struct { + struct util_dynarray pipelines; + struct util_dynarray layouts; + } dgc; + VkAccessFlags unordered_write_access; VkPipelineStageFlags unordered_write_stages; @@ -1832,6 +1859,19 @@ struct zink_context { bool active; //this is the internal vk state } render_condition; + struct { + bool valid; + struct u_upload_mgr *upload[ZINK_DGC_MAX]; + struct zink_resource *buffers[ZINK_DGC_MAX]; + struct zink_gfx_program *last_prog; + uint8_t *maps[ZINK_DGC_MAX]; + size_t bind_offsets[ZINK_DGC_MAX]; + size_t cur_offsets[ZINK_DGC_MAX]; + size_t max_size[ZINK_DGC_MAX]; + struct util_dynarray pipelines; + struct util_dynarray tokens; + } dgc; + struct pipe_resource *dummy_vertex_buffer; struct pipe_resource *dummy_xfb_buffer; struct pipe_surface *dummy_surface[7];