diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi10-flakes.txt b/src/gallium/drivers/zink/ci/zink-radv-navi10-flakes.txt index 7de99c7aede..c2ff26b8a10 100644 --- a/src/gallium/drivers/zink/ci/zink-radv-navi10-flakes.txt +++ b/src/gallium/drivers/zink/ci/zink-radv-navi10-flakes.txt @@ -74,3 +74,10 @@ KHR-GL46.packed_pixels.varied_rectangle.rgb4_format_green spec@!opengl 1.1@copypixels-sync spec@amd_pinned_memory@map-buffer offset=0 spec@glsl-1.30@execution@texelfetch fs sampler2darray 98x1x9-98x129x9 + + +# mesh shaders somehow? +glx@glx-visuals-depth +glx@glx-visuals-depth -pixmap +glx@glx-visuals-stencil +glx@glx-visuals-stencil -pixmap diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi31-flakes.txt b/src/gallium/drivers/zink/ci/zink-radv-navi31-flakes.txt index 0f9b590e500..ecaeec7b33a 100644 --- a/src/gallium/drivers/zink/ci/zink-radv-navi31-flakes.txt +++ b/src/gallium/drivers/zink/ci/zink-radv-navi31-flakes.txt @@ -221,3 +221,7 @@ spec@glsl-1.50@gs-max-output spec@glsl-4.30@execution@built-in-functions@cs-clamp-ivec4-ivec4-ivec4 spec@oes_texture_view@rendering-formats spec@ovr_multiview@triangle_compare_max_basemax_layersmax + +# mesh shaders somehow? +glx@glx-visuals-depth +glx@glx-visuals-stencil diff --git a/src/gallium/drivers/zink/ci/zink-radv-polaris10-flakes.txt b/src/gallium/drivers/zink/ci/zink-radv-polaris10-flakes.txt index 3cf52a1fd34..72a7912c4db 100644 --- a/src/gallium/drivers/zink/ci/zink-radv-polaris10-flakes.txt +++ b/src/gallium/drivers/zink/ci/zink-radv-polaris10-flakes.txt @@ -105,3 +105,11 @@ spec@ext_texture_lod_bias@lodbias spec@glsl-1.30@execution@texelfetch fs sampler1darray 1x71-501x71 spec@glsl-1.30@execution@texelfetch fs sampler2d 1x71-501x71 spec@glsl-1.30@execution@texelfetch fs sampler2darray 1x129x9-98x129x9 + + +# mesh shaders somehow? +glx@glx-visuals-depth +glx@glx-visuals-depth -pixmap +glx@glx-visuals-stencil +glx@glx-visuals-stencil -pixmap + diff --git a/src/gallium/drivers/zink/ci/zink-radv-vangogh-flakes.txt b/src/gallium/drivers/zink/ci/zink-radv-vangogh-flakes.txt index 71c8851a82c..cc86e0ed15f 100644 --- a/src/gallium/drivers/zink/ci/zink-radv-vangogh-flakes.txt +++ b/src/gallium/drivers/zink/ci/zink-radv-vangogh-flakes.txt @@ -421,3 +421,9 @@ KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-one-buffer-per-s dEQP-GLES31.functional.copy_image.mixed.viewclass_64_bits_mixed.rg32f_r11_eac.texture3d_to_cubemap shaders@glsl-fs-loop spec@arb_gpu_shader5@texturegather@vs-rgba-3-unorm-2d + + + +# mesh shaders somehow? +glx@glx-visuals-stencil +glx@glx-visuals-stencil -pixmap diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index af16080535e..524d8902537 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -3320,7 +3320,7 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st sci.pSetLayouts = pg->dsl; } else { sci.setLayoutCount = zs->info.stage == MESA_SHADER_COMPUTE ? 1 : ZINK_GFX_SHADER_COUNT; - dsl[zs->info.stage] = zs->precompile.dsl;; + dsl[zink_descriptor_stage_idx(zs->info.stage)] = zs->precompile.dsl;; sci.pSetLayouts = dsl; } VkPushConstantRange pcr; @@ -4495,10 +4495,7 @@ zink_binding(mesa_shader_stage stage, VkDescriptorType type, int index, bool com if (stage == MESA_SHADER_NONE) { UNREACHABLE("not supported"); } else { - unsigned base = stage; - /* clamp compute bindings for better driver efficiency */ - if (mesa_shader_stage_is_compute(stage)) - base = 0; + unsigned base = zink_descriptor_stage_idx(stage); switch (type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: @@ -6291,7 +6288,8 @@ zink_shader_init(struct zink_screen *screen, struct zink_shader *zs) ztype = ZINK_DESCRIPTOR_TYPE_UBO; /* buffer 0 is a push descriptor */ var->data.descriptor_set = !!var->data.driver_location; - var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) : + unsigned clamped_stage = mesa_shader_stage_is_compute(nir->info.stage) ? MESA_SHADER_COMPUTE : mesa_shader_stage_is_mesh(nir->info.stage) ? nir->info.stage - MESA_SHADER_TASK : nir->info.stage; + var->data.binding = !var->data.driver_location ? clamped_stage : zink_binding(nir->info.stage, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, var->data.driver_location, @@ -6436,25 +6434,26 @@ gfx_shader_prune(struct zink_screen *screen, struct zink_shader *shader) if (!prog) return false; mesa_shader_stage stage = shader->info.stage; - assert(stage < ZINK_GFX_SHADER_COUNT); + assert(!mesa_shader_stage_is_compute(stage)); util_queue_fence_wait(&prog->base.cache_fence); unsigned stages_present = prog->stages_present; + bool is_mesh = (stages_present & BITFIELD_BIT(MESA_SHADER_MESH)) > 0; unsigned stages_remaining = prog->stages_remaining; - if (prog->shaders[MESA_SHADER_TESS_CTRL] && - prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated) { + if (!is_mesh && prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated) { stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL); stages_remaining &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL); } - unsigned idx = zink_program_cache_stages(stages_present); + unsigned idx = is_mesh ? zink_mesh_cache_stages(stages_present) : zink_program_cache_stages(stages_present); if (!prog->base.removed && stages_present == stages_remaining && (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) { - struct hash_table *ht = &prog->base.ctx->program_cache[idx]; - simple_mtx_lock(&prog->base.ctx->program_lock[idx]); + struct hash_table *ht = is_mesh ? &prog->base.ctx->mesh_cache[idx] : &prog->base.ctx->program_cache[idx]; + simple_mtx_t *lock = is_mesh ? &prog->base.ctx->mesh_lock[idx] : &prog->base.ctx->program_lock[idx]; + simple_mtx_lock(lock); struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders); assert(he && he->data == prog); _mesa_hash_table_remove(ht, he); prog->base.removed = true; - simple_mtx_unlock(&prog->base.ctx->program_lock[idx]); + simple_mtx_unlock(lock); for (int i = 0; i < ARRAY_SIZE(prog->pipelines); ++i) { hash_table_foreach(&prog->pipelines[i], table_entry) { @@ -6473,8 +6472,7 @@ gfx_shader_prune(struct zink_screen *screen, struct zink_shader *shader) prog->shaders[MESA_SHADER_TESS_CTRL] = NULL; if (stage != MESA_SHADER_FRAGMENT && prog->shaders[MESA_SHADER_GEOMETRY] && - prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent == - shader) { + prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent == shader) { prog->shaders[MESA_SHADER_GEOMETRY] = NULL; } zink_gfx_program_reference(screen, &prog, NULL); @@ -6494,10 +6492,17 @@ zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader) struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*); if (!libs->removed) { libs->removed = true; - unsigned idx = zink_program_cache_stages(libs->stages_present); - simple_mtx_lock(&screen->pipeline_libs_lock[idx]); - _mesa_set_remove_key(&screen->pipeline_libs[idx], libs); - simple_mtx_unlock(&screen->pipeline_libs_lock[idx]); + if (libs->stages_present & BITFIELD_BIT(MESA_SHADER_MESH)) { + unsigned idx = (libs->stages_present & BITFIELD_BIT(MESA_SHADER_TASK)) > 0; + simple_mtx_lock(&screen->mesh_pipeline_libs_lock[idx]); + _mesa_set_remove_key(&screen->mesh_pipeline_libs[idx], libs); + simple_mtx_unlock(&screen->mesh_pipeline_libs_lock[idx]); + } else { + unsigned idx = zink_program_cache_stages(libs->stages_present); + simple_mtx_lock(&screen->pipeline_libs_lock[idx]); + _mesa_set_remove_key(&screen->pipeline_libs[idx], libs); + simple_mtx_unlock(&screen->pipeline_libs_lock[idx]); + } } zink_gfx_lib_cache_unref(screen, libs); } @@ -6507,7 +6512,7 @@ zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader) zink_gfx_shader_free(screen, shader->non_fs.generated_tcs); shader->non_fs.generated_tcs = NULL; } - if (shader->info.stage != MESA_SHADER_FRAGMENT) { + if (shader->info.stage < MESA_SHADER_FRAGMENT) { for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) { for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) { if (shader->non_fs.generated_gs[i][j]) { diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index a215afee2f8..72386f417ae 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -152,6 +152,16 @@ zink_context_destroy(struct pipe_context *pctx) simple_mtx_unlock((&ctx->program_lock[i])); } + for (unsigned i = 0; i < ARRAY_SIZE(ctx->mesh_cache); i++) { + simple_mtx_lock((&ctx->mesh_lock[i])); + hash_table_foreach(&ctx->mesh_cache[i], entry) { + struct zink_program *pg = entry->data; + zink_program_finish(ctx, pg); + pg->removed = true; + } + simple_mtx_unlock((&ctx->mesh_lock[i])); + } + if (ctx->blitter) util_blitter_destroy(ctx->blitter); @@ -245,6 +255,10 @@ zink_context_destroy(struct pipe_context *pctx) _mesa_hash_table_clear(&ctx->program_cache[i], NULL); for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_lock); i++) simple_mtx_destroy(&ctx->program_lock[i]); + for (unsigned i = 0; i < ARRAY_SIZE(ctx->mesh_cache); i++) + _mesa_hash_table_clear(&ctx->mesh_cache[i], NULL); + for (unsigned i = 0; i < ARRAY_SIZE(ctx->mesh_lock); i++) + simple_mtx_destroy(&ctx->mesh_lock[i]); slab_destroy_child(&ctx->transfer_pool_unsync); zink_descriptors_deinit(ctx); @@ -2246,11 +2260,11 @@ update_feedback_loop_state(struct zink_context *ctx, unsigned idx, unsigned feed if (feedback_loops != ctx->feedback_loops) { if (idx == PIPE_MAX_COLOR_BUFS && !zink_screen(ctx->base.screen)->driver_workarounds.always_feedback_loop_zs) { if (ctx->gfx_pipeline_state.feedback_loop_zs) - ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dirty = ctx->gfx_pipeline_state.mesh_dirty = true; ctx->gfx_pipeline_state.feedback_loop_zs = false; } else if (idx < PIPE_MAX_COLOR_BUFS && !zink_screen(ctx->base.screen)->driver_workarounds.always_feedback_loop) { if (ctx->gfx_pipeline_state.feedback_loop) - ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dirty = ctx->gfx_pipeline_state.mesh_dirty = true; ctx->gfx_pipeline_state.feedback_loop = false; } update_feedback_loop_dynamic_state(ctx); @@ -3800,6 +3814,7 @@ flush_batch(struct zink_context *ctx, bool sync) ctx->dirty_so_targets = true; memset(ctx->pipeline_changed, 1, sizeof(ctx->pipeline_changed)); zink_select_draw_vbo(ctx); + zink_select_draw_mesh_tasks(ctx); zink_select_launch_grid(ctx); if (ctx->oom_stall) @@ -3875,11 +3890,11 @@ unbind_fb_surface(struct zink_context *ctx, const struct pipe_surface *surf, uns if (feedback_loops != ctx->feedback_loops) { if (idx == PIPE_MAX_COLOR_BUFS && !zink_screen(ctx->base.screen)->driver_workarounds.always_feedback_loop_zs) { if (ctx->gfx_pipeline_state.feedback_loop_zs) - ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dirty = ctx->gfx_pipeline_state.mesh_dirty = true; ctx->gfx_pipeline_state.feedback_loop_zs = false; } else if (idx < PIPE_MAX_COLOR_BUFS && !zink_screen(ctx->base.screen)->driver_workarounds.always_feedback_loop) { if (ctx->gfx_pipeline_state.feedback_loop) - ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dirty = ctx->gfx_pipeline_state.mesh_dirty = true; ctx->gfx_pipeline_state.feedback_loop = false; } if (zink_screen(ctx->base.screen)->info.have_KHR_unified_image_layouts && zink_screen(ctx->base.screen)->info.have_EXT_attachment_feedback_loop_layout) { @@ -4113,7 +4128,7 @@ zink_set_framebuffer_state(struct pipe_context *pctx, if (screen->have_full_ds3) ctx->sample_mask_changed = true; else - ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dirty = ctx->gfx_pipeline_state.mesh_dirty = true; } ctx->gfx_pipeline_state.rast_samples = rast_samples; @@ -4146,7 +4161,7 @@ zink_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) if (zink_screen(pctx->screen)->have_full_ds3) ctx->sample_mask_changed = true; else - ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dirty = ctx->gfx_pipeline_state.mesh_dirty = true; } static void @@ -4154,7 +4169,7 @@ zink_set_min_samples(struct pipe_context *pctx, unsigned min_samples) { struct zink_context *ctx = zink_context(pctx); ctx->gfx_pipeline_state.min_samples = min_samples - 1; - ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dirty = ctx->gfx_pipeline_state.mesh_dirty = true; } static void @@ -5520,7 +5535,7 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) ctx->flags = flags; memset(ctx->pipeline_changed, 1, sizeof(ctx->pipeline_changed)); - ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dirty = ctx->gfx_pipeline_state.mesh_dirty = true; ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch = 1; ctx->gfx_pipeline_state.uses_dynamic_stride = screen->info.have_EXT_extended_dynamic_state || screen->info.have_EXT_vertex_input_dynamic_state; @@ -5773,7 +5788,7 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) } } if (!is_copy_only) { - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + for (unsigned i = 0; i < MESA_SHADER_MESH_STAGES; i++) { /* need to update these based on screen config for null descriptors */ for (unsigned j = 0; j < ARRAY_SIZE(ctx->di.t.ubos[i]); j++) { if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { @@ -5807,6 +5822,7 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) } zink_select_draw_vbo(ctx); + zink_select_draw_mesh_tasks(ctx); zink_select_launch_grid(ctx); if (!is_copy_only && zink_debug & ZINK_DEBUG_SHADERDB) { diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h index 1ca29b56c7b..dbd30e19fb7 100644 --- a/src/gallium/drivers/zink/zink_context.h +++ b/src/gallium/drivers/zink/zink_context.h @@ -62,7 +62,7 @@ zink_fb_clear_enabled(const struct zink_context *ctx, unsigned idx) return ctx->clears_enabled & (PIPE_CLEAR_COLOR0 << idx); } -static inline uint32_t +static ALWAYS_INLINE uint32_t zink_program_cache_stages(uint32_t stages_present) { return (stages_present & ((1 << MESA_SHADER_TESS_CTRL) | @@ -70,6 +70,12 @@ zink_program_cache_stages(uint32_t stages_present) (1 << MESA_SHADER_GEOMETRY))) >> 1; } +static ALWAYS_INLINE uint32_t +zink_mesh_cache_stages(uint32_t stages_present) +{ + return !!(stages_present & BITFIELD_BIT(MESA_SHADER_TASK)); +} + static ALWAYS_INLINE bool zink_is_zsbuf_used(const struct zink_context *ctx) { @@ -118,7 +124,8 @@ zink_fence_wait(struct pipe_context *ctx); static ALWAYS_INLINE void zink_update_dirty_gfx_stages(struct zink_context *ctx, uint32_t pstages) { - ctx->dirty_gfx_stages |= pstages; + ctx->dirty_gfx_stages |= (pstages & BITFIELD_MASK(MESA_SHADER_COMPUTE)); + ctx->dirty_mesh_stages |= (pstages & (BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_MESH) | BITFIELD_BIT(MESA_SHADER_TASK))); } void @@ -195,6 +202,10 @@ zink_pipeline_flags_from_pipe_stage(mesa_shader_stage pstage) return VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; case MESA_SHADER_COMPUTE: return VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + case MESA_SHADER_TASK: + return VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT; + case MESA_SHADER_MESH: + return VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT; default: UNREACHABLE("unknown shader stage"); } diff --git a/src/gallium/drivers/zink/zink_descriptors.c b/src/gallium/drivers/zink/zink_descriptors.c index b87b8a5adaa..3e222366b9a 100644 --- a/src/gallium/drivers/zink/zink_descriptors.c +++ b/src/gallium/drivers/zink/zink_descriptors.c @@ -249,6 +249,8 @@ create_gfx_layout(struct zink_context *ctx, struct zink_descriptor_layout_key ** VkDescriptorType vktype = get_push_types(screen, &dsl_type); for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { unsigned stage_bits = BITFIELD_BIT(i); + if (i < MESA_SHADER_TESS_EVAL && screen->info.have_EXT_mesh_shader) + stage_bits |= BITFIELD_BIT(i + MESA_SHADER_TASK); init_push_binding(&bindings[i], i, stage_bits, vktype); } if (fbfetch) { @@ -657,10 +659,11 @@ zink_descriptor_program_init(struct zink_context *ctx, struct zink_program *pg) for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) wd_count[i + 1] = pg->dd.pool_key[i] ? pg->dd.pool_key[i]->layout->num_bindings : 0; - enum zink_pipeline_idx pidx = pg->is_compute ? ZINK_PIPELINE_COMPUTE : ZINK_PIPELINE_GFX; + enum zink_pipeline_idx pidx = pg->is_compute ? ZINK_PIPELINE_COMPUTE : stages[MESA_SHADER_VERTEX]? ZINK_PIPELINE_GFX : ZINK_PIPELINE_MESH; VkDescriptorUpdateTemplateEntry *push_entries[] = { ctx->dd.push_entries, &ctx->dd.compute_push_entry, + ctx->dd.mesh_push_entries, }; for (unsigned i = 0; i < pg->num_dsl; i++) { bool is_push = i == 0; @@ -1173,7 +1176,7 @@ zink_descriptors_update_masked_buffer(struct zink_context *ctx, enum zink_pipeli struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_batch_state *bs = ctx->bs; bool is_compute = pidx == ZINK_PIPELINE_COMPUTE; - struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base; + struct zink_program *pg = is_compute ? &ctx->curr_compute->base : pidx == ZINK_PIPELINE_GFX ? &ctx->curr_program->base : &ctx->mesh_program->base; /* skip if no descriptors are updated */ if (!pg->dd.binding_usage || (!changed_sets && !bind_sets)) @@ -1249,7 +1252,7 @@ zink_descriptors_update_masked(struct zink_context *ctx, enum zink_pipeline_idx struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_batch_state *bs = ctx->bs; bool is_compute = pidx == ZINK_PIPELINE_COMPUTE; - struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base; + struct zink_program *pg = is_compute ? &ctx->curr_compute->base : pidx == ZINK_PIPELINE_GFX ? &ctx->curr_program->base : &ctx->mesh_program->base; VkDescriptorSet desc_sets[ZINK_DESCRIPTOR_BASE_TYPES]; /* skip if no descriptors are updated */ @@ -1321,7 +1324,8 @@ zink_descriptors_update(struct zink_context *ctx, enum zink_pipeline_idx pidx) { struct zink_batch_state *bs = ctx->bs; bool is_compute = pidx == ZINK_PIPELINE_COMPUTE; - struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base; + bool is_mesh = pidx == ZINK_PIPELINE_MESH; + struct zink_program *pg = is_compute ? &ctx->curr_compute->base : pidx == ZINK_PIPELINE_GFX ? &ctx->curr_program->base : &ctx->mesh_program->base; struct zink_screen *screen = zink_screen(ctx->base.screen); bool have_KHR_push_descriptor = screen->info.have_KHR_push_descriptor; @@ -1401,7 +1405,9 @@ zink_descriptors_update(struct zink_context *ctx, enum zink_pipeline_idx pidx) info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; info.pNext = NULL; info.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - info.data.pUniformBuffer = &ctx->di.db.ubos[is_compute ? MESA_SHADER_COMPUTE : i][0]; + /* mesh push consts pack as VS/TCS */ + enum mesa_shader_stage stage = is_compute ? MESA_SHADER_COMPUTE : is_mesh && i <= MESA_SHADER_TESS_CTRL ? i + MESA_SHADER_TASK : i; + info.data.pUniformBuffer = &ctx->di.db.ubos[stage][0]; uint64_t stage_offset = offset + (is_compute ? 0 : ctx->dd.db_offset[i]); VKSCR(GetDescriptorEXT)(screen->dev, &info, screen->info.db_props.robustUniformBufferDescriptorSize, bs->dd.db_map + stage_offset); @@ -1484,15 +1490,20 @@ zink_context_invalidate_descriptor_state(struct zink_context *ctx, mesa_shader_s ctx->dd.push_state_changed[ZINK_PIPELINE_COMPUTE] = true; else if (shader < MESA_SHADER_FRAGMENT) ctx->dd.push_state_changed[ZINK_PIPELINE_GFX] = true; + else if (shader > MESA_SHADER_FRAGMENT) + ctx->dd.push_state_changed[ZINK_PIPELINE_MESH] = true; else - ctx->dd.push_state_changed[ZINK_PIPELINE_GFX] = true; + ctx->dd.push_state_changed[ZINK_PIPELINE_GFX] = ctx->dd.push_state_changed[ZINK_PIPELINE_MESH] = true; } else { if (shader == MESA_SHADER_COMPUTE) ctx->dd.state_changed[ZINK_PIPELINE_COMPUTE] |= BITFIELD_BIT(type); else if (shader < MESA_SHADER_FRAGMENT) ctx->dd.state_changed[ZINK_PIPELINE_GFX] |= BITFIELD_BIT(type); + else if (shader > MESA_SHADER_FRAGMENT) + ctx->dd.state_changed[ZINK_PIPELINE_MESH] |= BITFIELD_BIT(type); else { ctx->dd.state_changed[ZINK_PIPELINE_GFX] |= BITFIELD_BIT(type); + ctx->dd.state_changed[ZINK_PIPELINE_MESH] |= BITFIELD_BIT(type); } } } @@ -1504,8 +1515,10 @@ zink_context_invalidate_descriptor_state_compact(struct zink_context *ctx, mesa_ ctx->dd.push_state_changed[ZINK_PIPELINE_COMPUTE] = true; else if (shader < MESA_SHADER_FRAGMENT) ctx->dd.push_state_changed[ZINK_PIPELINE_GFX] = true; + else if (shader > MESA_SHADER_FRAGMENT) + ctx->dd.push_state_changed[ZINK_PIPELINE_MESH] = true; else - ctx->dd.push_state_changed[ZINK_PIPELINE_GFX] = true; + ctx->dd.push_state_changed[ZINK_PIPELINE_GFX] = ctx->dd.push_state_changed[ZINK_PIPELINE_MESH] = true; else { if (type > ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW) type -= ZINK_DESCRIPTOR_COMPACT; @@ -1513,8 +1526,11 @@ zink_context_invalidate_descriptor_state_compact(struct zink_context *ctx, mesa_ ctx->dd.state_changed[ZINK_PIPELINE_COMPUTE] |= BITFIELD_BIT(type); else if (shader < MESA_SHADER_FRAGMENT) ctx->dd.state_changed[ZINK_PIPELINE_GFX] |= BITFIELD_BIT(type); + else if (shader > MESA_SHADER_FRAGMENT) + ctx->dd.state_changed[ZINK_PIPELINE_MESH] |= BITFIELD_BIT(type); else { ctx->dd.state_changed[ZINK_PIPELINE_GFX] |= BITFIELD_BIT(type); + ctx->dd.state_changed[ZINK_PIPELINE_MESH] |= BITFIELD_BIT(type); } } } @@ -1659,6 +1675,13 @@ zink_descriptors_init(struct zink_context *ctx) VkDescriptorUpdateTemplateEntry *entry = &ctx->dd.push_entries[i]; init_push_template_entry(entry, i, i); } + /* task+mesh occupy vs+tcs */ + init_push_template_entry(&ctx->dd.mesh_push_entries[0], zink_descriptor_stage_idx(MESA_SHADER_TASK), MESA_SHADER_TASK); + init_push_template_entry(&ctx->dd.mesh_push_entries[1], zink_descriptor_stage_idx(MESA_SHADER_MESH), MESA_SHADER_MESH); + for (unsigned i = MESA_SHADER_TESS_EVAL; i < ZINK_GFX_SHADER_COUNT; i++) { + VkDescriptorUpdateTemplateEntry *entry = &ctx->dd.mesh_push_entries[i]; + init_push_template_entry(entry, i, i); + } init_push_template_entry(&ctx->dd.compute_push_entry, MESA_SHADER_COMPUTE, MESA_SHADER_COMPUTE); VkDescriptorUpdateTemplateEntry *entry = &ctx->dd.push_entries[ZINK_GFX_SHADER_COUNT]; //fbfetch entry->dstBinding = ZINK_FBFETCH_BINDING; diff --git a/src/gallium/drivers/zink/zink_descriptors.h b/src/gallium/drivers/zink/zink_descriptors.h index 30527124e05..62cb5dd295e 100644 --- a/src/gallium/drivers/zink/zink_descriptors.h +++ b/src/gallium/drivers/zink/zink_descriptors.h @@ -135,6 +135,17 @@ zink_descriptor_type_from_bindless_index(unsigned idx) } } +ALWAYS_INLINE static unsigned +zink_descriptor_stage_idx(enum mesa_shader_stage stage) +{ + if (stage == MESA_SHADER_TASK || stage == MESA_SHADER_MESH) + return stage - MESA_SHADER_TASK; + /* clamp compute bindings for better driver efficiency */ + if (mesa_shader_stage_is_compute(stage)) + return 0; + return stage; +} + bool zink_descriptor_layouts_init(struct zink_screen *screen); diff --git a/src/gallium/drivers/zink/zink_draw.cpp b/src/gallium/drivers/zink/zink_draw.cpp index e41d37d9dfb..69fda1fd42d 100644 --- a/src/gallium/drivers/zink/zink_draw.cpp +++ b/src/gallium/drivers/zink/zink_draw.cpp @@ -267,9 +267,9 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum VkPipeline pipeline = VK_NULL_HANDLE; if (!ctx->curr_program->base.uses_shobj) { if (screen->info.have_EXT_graphics_pipeline_library) - pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode); + pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode); else - pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode); + pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode); } if (pipeline) { pipeline_changed = prev_pipeline != pipeline; @@ -961,6 +961,180 @@ zink_draw(struct pipe_context *pctx, ctx->last_work_was_compute = false; ctx->work_count = work_count; + if (!ctx->pipeline_changed[ZINK_PIPELINE_MESH]) { + ctx->pipeline_changed[ZINK_PIPELINE_MESH] = true; + zink_select_draw_mesh_tasks(ctx); + } + /* flush if there's >100k draws */ + if (!ctx->unordered_blitting && (unlikely(work_count >= 30000) || ctx->oom_flush)) + pctx->flush(pctx, NULL, 0); +} + + +template +static bool +update_mesh_pipeline(struct zink_context *ctx, struct zink_batch_state *bs) +{ + VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline; + const struct zink_screen *screen = zink_screen(ctx->base.screen); + bool shaders_changed = ctx->mesh_dirty || ctx->dirty_mesh_stages; + zink_mesh_program_update_optimal(ctx); + bool pipeline_changed = false; + VkPipeline pipeline = VK_NULL_HANDLE; + if (!ctx->mesh_program->base.uses_shobj) { + if (screen->info.have_EXT_graphics_pipeline_library) + pipeline = zink_get_gfx_pipeline(ctx, ctx->mesh_program, &ctx->gfx_pipeline_state, MESA_PRIM_COUNT); + else + pipeline = zink_get_gfx_pipeline(ctx, ctx->mesh_program, &ctx->gfx_pipeline_state, MESA_PRIM_COUNT); + } + if (pipeline) { + pipeline_changed = prev_pipeline != pipeline; + if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw) + VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + ctx->shobj_draw = false; + } else { + if (BATCH_CHANGED || shaders_changed || !ctx->shobj_draw) { + VkShaderStageFlagBits stages[] = { + VK_SHADER_STAGE_VERTEX_BIT, + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, + VK_SHADER_STAGE_GEOMETRY_BIT, + VK_SHADER_STAGE_FRAGMENT_BIT, + VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM, + VK_SHADER_STAGE_TASK_BIT_EXT, + VK_SHADER_STAGE_MESH_BIT_EXT, + }; + /* always rebind all stages */ + VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->mesh_program->objects); + VKCTX(CmdBindShadersEXT)(bs->cmdbuf, 2, &stages[MESA_SHADER_TASK], &ctx->mesh_program->objects[MESA_SHADER_TASK]); + VKCTX(CmdSetSampleLocationsEnableEXT)(bs->cmdbuf, ctx->gfx_pipeline_state.sample_locations_enabled); + VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE); + pipeline_changed = true; + } + ctx->shobj_draw = true; + } + return pipeline_changed; +} + +template +void +zink_draw_mesh_tasks(struct pipe_context *pctx, const struct pipe_grid_info *info) +{ + MESA_TRACE_FUNC(); + + struct zink_context *ctx = zink_context(pctx); + struct zink_screen *screen = zink_screen(pctx->screen); + struct zink_batch_state *bs = ctx->bs; + unsigned work_count = ctx->work_count; + + if (ctx->memory_barrier && !ctx->blitting) + zink_flush_memory_barrier(ctx, false); + + if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter && !ctx->blitting)) { + ctx->buffer_rebind_counter = screen->buffer_rebind_counter; + zink_rebind_all_buffers(ctx); + } + + if (unlikely(ctx->image_rebind_counter < screen->image_rebind_counter && !ctx->blitting)) { + ctx->image_rebind_counter = screen->image_rebind_counter; + zink_rebind_all_images(ctx); + } + + if (info->indirect) { + check_buffer_barrier(ctx, info->indirect, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); + if (info->indirect_draw_count) + check_buffer_barrier(ctx, info->indirect_draw_count, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); + } + zink_update_barriers(ctx, false, NULL, info->indirect, info->indirect_draw_count); + + if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) { + zink_batch_no_rp(ctx); + VkMemoryBarrier mb; + mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mb.pNext = NULL; + mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + VKSCR(CmdPipelineBarrier)(ctx->bs->cmdbuf, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, 1, &mb, 0, NULL, 0, NULL); + } + + bool need_rp_update = !ctx->in_rp || ctx->rp_changed; + zink_batch_rp(ctx); + /* check dead swapchain */ + if (unlikely(!ctx->in_rp)) + return; + + if (BATCH_CHANGED) + zink_update_descriptor_refs(ctx, false); + + bool pipeline_changed = need_rp_update || ctx->mesh_dirty || ctx->dirty_mesh_stages || BATCH_CHANGED ? + update_mesh_pipeline(ctx, bs) : + false; + + emit_dynamic_state(ctx, pipeline_changed, ctx->vp_state.mesh_num_viewports); + + struct zink_rasterizer_state *rast_state = ctx->rast_state; + bool using_depth_bias = !!rast_state->offset_fill; + if (BATCH_CHANGED || using_depth_bias != ctx->was_using_depth_bias || ctx->depth_bias_changed) { + if (using_depth_bias) { + VKCTX(CmdSetDepthBias)(bs->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale); + } else { + VKCTX(CmdSetDepthBias)(bs->cmdbuf, 0.0f, 0.0f, 0.0f); + } + } + ctx->was_using_depth_bias = using_depth_bias; + ctx->depth_bias_changed = false; + + if (zink_program_has_descriptors(&ctx->mesh_program->base) && (BATCH_CHANGED || ctx->dd.push_state_changed[ZINK_PIPELINE_MESH] || ctx->dd.state_changed[ZINK_PIPELINE_MESH] || pipeline_changed)) + zink_descriptors_update(ctx, ZINK_PIPELINE_MESH); + + if (ctx->di.any_bindless_dirty && + /* some apps (d3dretrace) call MakeTextureHandleResidentARB randomly */ + zink_program_has_descriptors(&ctx->mesh_program->base) && + ctx->mesh_program->base.dd.bindless) + zink_descriptors_update_bindless(ctx); + + if (BATCH_CHANGED) { + ctx->pipeline_changed[ZINK_PIPELINE_MESH] = false; + zink_select_draw_mesh_tasks(ctx); + } + + bool marker = false; + if (unlikely(zink_tracing)) { + marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "draw_mesh(%u cbufs|%s, %dx%d)", + ctx->fb_state.nr_cbufs, + ctx->fb_state.zsbuf.texture ? "zsbuf" : "", + lround(ctx->fb_state.width), lround(ctx->fb_state.height)); + } + + if (info->indirect) { + struct zink_resource *indirect = zink_resource(info->indirect); + zink_batch_reference_resource_rw(ctx, indirect, false); + if (info->indirect_draw_count) { + struct zink_resource *indirect_draw_count = zink_resource(info->indirect_draw_count); + zink_batch_reference_resource_rw(ctx, indirect_draw_count, false); + VKCTX(CmdDrawMeshTasksIndirectCountEXT)(bs->cmdbuf, indirect->obj->buffer, info->indirect_offset, + indirect_draw_count->obj->buffer, info->indirect_draw_count_offset, + info->draw_count, info->indirect_stride); + } else + VKCTX(CmdDrawMeshTasksIndirectEXT)(bs->cmdbuf, indirect->obj->buffer, info->indirect_offset, info->draw_count, info->indirect_stride); + } else { + VKCTX(CmdDrawMeshTasksEXT)(bs->cmdbuf, info->grid[0], info->grid[1], info->grid[2]); + } + + if (unlikely(zink_tracing)) + zink_cmd_debug_marker_end(ctx, bs->cmdbuf, marker); + + ctx->last_work_was_compute = false; + ctx->work_count++; + if (!ctx->pipeline_changed[ZINK_PIPELINE_GFX]) { + ctx->pipeline_changed[ZINK_PIPELINE_GFX] = true; + zink_select_draw_vbo(ctx); + } /* flush if there's >100k draws */ if (!ctx->unordered_blitting && (unlikely(work_count >= 30000) || ctx->oom_flush)) pctx->flush(pctx, NULL, 0); @@ -1222,6 +1396,12 @@ zink_invalid_draw_vbo(struct pipe_context *pipe, UNREACHABLE("vertex shader not bound"); } +static void +zink_invalid_draw_mesh_tasks(struct pipe_context *pctx, const struct pipe_grid_info *dinfo) +{ + UNREACHABLE("mesh shader not bound"); +} + static void zink_invalid_draw_vertex_state(struct pipe_context *pipe, struct pipe_vertex_state *vstate, @@ -1303,6 +1483,35 @@ equals_gfx_program(const void *a, const void *b) return !memcmp(a, b, sizeof(void*) * ZINK_GFX_SHADER_COUNT); } +template +static uint32_t +hash_mesh_program(const void *key) +{ + const struct zink_shader **shaders = (const struct zink_shader**)key; + uint32_t base_hash = shaders[MESA_SHADER_MESH]->hash ^ shaders[MESA_SHADER_FRAGMENT]->hash; + if (HAS_TASK == 0) //MS+FS + return base_hash; + //TS+MS+FS + return base_hash ^ shaders[MESA_SHADER_TASK]->hash; +} + +template +static bool +equals_mesh_program(const void *a, const void *b) +{ + const void **sa = (const void**)a; + const void **sb = (const void**)b; + STATIC_ASSERT(MESA_SHADER_TASK == 6); + STATIC_ASSERT(MESA_SHADER_MESH == 7); + STATIC_ASSERT(MESA_SHADER_FRAGMENT == 4); + if (HAS_TASK == 0) //MS+FS + return sa[MESA_SHADER_MESH] == sb[MESA_SHADER_MESH] && + sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT]; + //TS+MS+FS + return sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT] && + !memcmp(&sa[MESA_SHADER_TASK], &sb[MESA_SHADER_TASK], sizeof(void*) * 2); +} + extern "C" void zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen) @@ -1355,6 +1564,15 @@ zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen) _mesa_hash_table_init(&ctx->program_cache[7], ctx, hash_gfx_program<7>, equals_gfx_program<7>); for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_lock); i++) simple_mtx_init(&ctx->program_lock[i], mtx_plain); + + ctx->base.draw_mesh_tasks = zink_draw_mesh_tasks; + _mesa_hash_table_init(&ctx->mesh_cache[0], ctx, hash_mesh_program<0>, equals_mesh_program<0>); + _mesa_hash_table_init(&ctx->mesh_cache[1], ctx, hash_mesh_program<1>, equals_mesh_program<1>); + for (unsigned i = 0; i < ARRAY_SIZE(ctx->mesh_lock); i++) + simple_mtx_init(&ctx->mesh_lock[i], mtx_plain); + ctx->draw_mesh_tasks[0] = zink_draw_mesh_tasks; + ctx->draw_mesh_tasks[1] = zink_draw_mesh_tasks; + ctx->base.draw_mesh_tasks = zink_invalid_draw_mesh_tasks; } void @@ -1380,4 +1598,9 @@ zink_init_screen_pipeline_libs(struct zink_screen *screen) _mesa_set_init(&screen->pipeline_libs[7], NULL, hash_gfx_program<7>, equals_gfx_program<7>); for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs_lock); i++) simple_mtx_init(&screen->pipeline_libs_lock[i], mtx_plain); + + _mesa_set_init(&screen->mesh_pipeline_libs[0], NULL, hash_mesh_program<0>, equals_mesh_program<0>); + _mesa_set_init(&screen->mesh_pipeline_libs[1], NULL, hash_mesh_program<1>, equals_mesh_program<1>); + for (unsigned i = 0; i < ARRAY_SIZE(screen->mesh_pipeline_libs_lock); i++) + simple_mtx_init(&screen->mesh_pipeline_libs_lock[i], mtx_plain); } diff --git a/src/gallium/drivers/zink/zink_inlines.h b/src/gallium/drivers/zink/zink_inlines.h index 44d4474d99a..67d04d7b9d1 100644 --- a/src/gallium/drivers/zink/zink_inlines.h +++ b/src/gallium/drivers/zink/zink_inlines.h @@ -5,16 +5,23 @@ static inline void zink_select_draw_vbo(struct zink_context *ctx) { - ctx->base.draw_vbo = ctx->draw_vbo[ctx->pipeline_changed[0]]; - ctx->base.draw_vertex_state = ctx->draw_state[ctx->pipeline_changed[0]]; + ctx->base.draw_vbo = ctx->draw_vbo[ctx->pipeline_changed[ZINK_PIPELINE_GFX]]; + ctx->base.draw_vertex_state = ctx->draw_state[ctx->pipeline_changed[ZINK_PIPELINE_GFX]]; assert(ctx->base.draw_vbo); assert(ctx->base.draw_vertex_state); } +static inline void +zink_select_draw_mesh_tasks(struct zink_context *ctx) +{ + ctx->base.draw_mesh_tasks = ctx->draw_mesh_tasks[ctx->pipeline_changed[ZINK_PIPELINE_MESH]]; + assert(ctx->base.draw_mesh_tasks); +} + static inline void zink_select_launch_grid(struct zink_context *ctx) { - ctx->base.launch_grid = ctx->launch_grid[ctx->pipeline_changed[1]]; + ctx->base.launch_grid = ctx->launch_grid[ctx->pipeline_changed[ZINK_PIPELINE_COMPUTE]]; assert(ctx->base.launch_grid); } diff --git a/src/gallium/drivers/zink/zink_pipeline.c b/src/gallium/drivers/zink/zink_pipeline.c index 199e3c7d67f..ce943a47e88 100644 --- a/src/gallium/drivers/zink/zink_pipeline.c +++ b/src/gallium/drivers/zink/zink_pipeline.c @@ -36,6 +36,8 @@ #include "util/u_debug.h" #include "util/u_prim.h" +#define GFX_INPUT_MESH 32 + VkPipeline zink_create_gfx_pipeline(struct zink_screen *screen, struct zink_gfx_program *prog, @@ -47,7 +49,8 @@ zink_create_gfx_pipeline(struct zink_screen *screen, { struct zink_rasterizer_hw_state *hw_rast_state = (void*)&state->dyn_state3; VkPipelineVertexInputStateCreateInfo vertex_input_state; - bool needs_vi = !screen->info.have_EXT_vertex_input_dynamic_state; + bool is_mesh = !prog->shaders[MESA_SHADER_VERTEX]; + bool needs_vi = !screen->info.have_EXT_vertex_input_dynamic_state && !is_mesh; if (needs_vi) { memset(&vertex_input_state, 0, sizeof(vertex_input_state)); vertex_input_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; @@ -76,7 +79,7 @@ zink_create_gfx_pipeline(struct zink_screen *screen, VkPipelineInputAssemblyStateCreateInfo primitive_state = {0}; primitive_state.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; primitive_state.topology = primitive_topology; - if (!screen->info.have_EXT_extended_dynamic_state2) { + if (!screen->info.have_EXT_extended_dynamic_state2 && !is_mesh) { switch (primitive_topology) { case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: @@ -226,15 +229,18 @@ zink_create_gfx_pipeline(struct zink_screen *screen, dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VIEWPORT; dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_SCISSOR; } - if (screen->info.have_EXT_vertex_input_dynamic_state) - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_EXT; - else if (screen->info.have_EXT_extended_dynamic_state && state->uses_dynamic_stride && state->element_state->num_attribs) - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE; - if (screen->info.have_EXT_extended_dynamic_state2) { - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE; + if (screen->info.have_EXT_extended_dynamic_state2) dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE; - if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) - dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT; + if (!is_mesh) { + if (screen->info.have_EXT_vertex_input_dynamic_state) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_EXT; + else if (screen->info.have_EXT_extended_dynamic_state && state->uses_dynamic_stride && state->element_state->num_attribs) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE; + if (screen->info.have_EXT_extended_dynamic_state2) { + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE; + if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) + dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT; + } } if (screen->info.have_EXT_extended_dynamic_state3) { dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT; @@ -270,10 +276,10 @@ zink_create_gfx_pipeline(struct zink_screen *screen, if (screen->info.have_EXT_color_write_enable) dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT; - assert(state->rast_prim != MESA_PRIM_COUNT || zink_debug & ZINK_DEBUG_SHADERDB); + assert(state->rast_prim != MESA_PRIM_COUNT || zink_debug & ZINK_DEBUG_SHADERDB || is_mesh); VkPipelineRasterizationLineStateCreateInfoEXT rast_line_state; - if (screen->info.have_EXT_line_rasterization && + if (screen->info.have_EXT_line_rasterization && !is_mesh && !state->shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.lower_line_smooth) { rast_line_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT; rast_line_state.pNext = rast_state.pNext; @@ -354,9 +360,11 @@ zink_create_gfx_pipeline(struct zink_screen *screen, pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; pci.layout = prog->base.layout; pci.pNext = &state->rendering_info; - if (needs_vi) - pci.pVertexInputState = &vertex_input_state; - pci.pInputAssemblyState = &primitive_state; + if (!is_mesh) { + if (needs_vi) + pci.pVertexInputState = &vertex_input_state; + pci.pInputAssemblyState = &primitive_state; + } pci.pRasterizationState = &rast_state; pci.pColorBlendState = &blend_state; pci.pMultisampleState = &ms_state; @@ -377,10 +385,10 @@ zink_create_gfx_pipeline(struct zink_screen *screen, tdci.domainOrigin = VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT; } - VkPipelineShaderStageCreateInfo shader_stages[ZINK_GFX_SHADER_COUNT]; - VkShaderModuleCreateInfo smci[ZINK_GFX_SHADER_COUNT] = {0}; + VkPipelineShaderStageCreateInfo shader_stages[MESA_SHADER_MESH_STAGES]; + VkShaderModuleCreateInfo smci[MESA_SHADER_MESH_STAGES] = {0}; uint32_t num_stages = 0; - for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + for (int i = 0; i < MESA_SHADER_MESH_STAGES; ++i) { if (!(prog->stages_present & BITFIELD_BIT(i))) continue; @@ -691,6 +699,35 @@ zink_create_gfx_pipeline_input(struct zink_screen *screen, return pipeline; } +static VkPipeline +zink_create_mesh_pipeline_input(struct zink_screen *screen) +{ + VkGraphicsPipelineLibraryCreateInfoEXT gplci = { + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT, + NULL, + VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT + }; + + VkGraphicsPipelineCreateInfo pci = {0}; + pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pci.pNext = &gplci; + pci.flags = VK_PIPELINE_CREATE_LIBRARY_BIT_KHR | VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT; + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + pci.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + + VkPipeline pipeline; + VkResult result; + VRAM_ALLOC_LOOP(result, + VKSCR(CreateGraphicsPipelines)(screen->dev, VK_NULL_HANDLE, 1, &pci, NULL, &pipeline), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkCreateGraphicsPipelines failed (%s)", vk_Result_to_str(result)); + return VK_NULL_HANDLE; + } + ); + + return pipeline; +} + static VkPipeline create_gfx_pipeline_library(struct zink_screen *screen, struct zink_shader_object *objs, unsigned stage_mask, VkPipelineLayout layout, VkPipelineCache pipeline_cache) { @@ -704,7 +741,7 @@ create_gfx_pipeline_library(struct zink_screen *screen, struct zink_shader_objec &rendering_info, 0 }; - if (stage_mask & BITFIELD_BIT(MESA_SHADER_VERTEX)) + if (stage_mask & (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_MESH))) gplci.flags |= VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT; if (stage_mask & BITFIELD_BIT(MESA_SHADER_FRAGMENT)) gplci.flags |= VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT; @@ -743,7 +780,7 @@ create_gfx_pipeline_library(struct zink_screen *screen, struct zink_shader_objec dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_FRONT_FACE; dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_CULL_MODE; dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE; - if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints) + if (screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints && !(stage_mask & BITFIELD_BIT(MESA_SHADER_MESH))) dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT; dynamicStateEnables[state_count++] = VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT; @@ -794,9 +831,9 @@ create_gfx_pipeline_library(struct zink_screen *screen, struct zink_shader_objec tdci.domainOrigin = VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT; } - VkPipelineShaderStageCreateInfo shader_stages[ZINK_GFX_SHADER_COUNT]; + VkPipelineShaderStageCreateInfo shader_stages[MESA_SHADER_MESH_STAGES]; uint32_t num_stages = 0; - for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + for (int i = 0; i < MESA_SHADER_MESH_STAGES; ++i) { if (!(stage_mask & BITFIELD_BIT(i))) continue; @@ -892,6 +929,21 @@ zink_create_gfx_pipeline_combined(struct zink_screen *screen, struct zink_gfx_pr } +/* vertex input pipeline library states with mesh: nothing matters */ +struct zink_gfx_input_key * +zink_find_or_create_input_mesh(struct zink_context *ctx) +{ + uint32_t hash = GFX_INPUT_MESH; + struct set_entry *he = _mesa_set_search_pre_hashed(&ctx->gfx_inputs, hash, &hash); + if (!he) { + struct zink_gfx_input_key *ikey = rzalloc(ctx, struct zink_gfx_input_key); + ikey->idx = hash; + ikey->pipeline = zink_create_mesh_pipeline_input(zink_screen(ctx->base.screen)); + he = _mesa_set_add_pre_hashed(&ctx->gfx_inputs, hash, ikey); + } + return (struct zink_gfx_input_key *)he->key; +} + /* vertex input pipeline library states with dynamic vertex input: only the topology matters */ struct zink_gfx_input_key * zink_find_or_create_input_dynamic(struct zink_context *ctx, VkPrimitiveTopology vkmode) diff --git a/src/gallium/drivers/zink/zink_pipeline.h b/src/gallium/drivers/zink/zink_pipeline.h index 7b050f15efb..b45c769dee4 100644 --- a/src/gallium/drivers/zink/zink_pipeline.h +++ b/src/gallium/drivers/zink/zink_pipeline.h @@ -39,6 +39,8 @@ struct zink_gfx_input_key * zink_find_or_create_input(struct zink_context *ctx, VkPrimitiveTopology vkmode); struct zink_gfx_input_key * zink_find_or_create_input_dynamic(struct zink_context *ctx, VkPrimitiveTopology vkmode); +struct zink_gfx_input_key * +zink_find_or_create_input_mesh(struct zink_context *ctx); VkPipeline zink_create_gfx_pipeline(struct zink_screen *screen, diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index e8bf1e193e7..316f36e1071 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -49,7 +49,7 @@ static void gfx_program_precompile_job(void *data, void *gdata, int thread_index); struct zink_gfx_program * -create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch); +create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch, bool is_mesh); void debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr) @@ -423,6 +423,7 @@ static void generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state) { assert(!prog->objs[MESA_SHADER_VERTEX].mod); + assert(!(prog->stages_present & BITFIELD_BIT(MESA_SHADER_MESH))); uint32_t variant_hash = 0; bool default_variants = true; for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { @@ -458,8 +459,8 @@ generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *scree static void generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state) { - assert(!prog->objs[MESA_SHADER_VERTEX].mod); - for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { + assert(!prog->objs[MESA_SHADER_VERTEX].mod && !prog->objs[MESA_SHADER_MESH].mod); + for (unsigned i = 0; i < MESA_SHADER_MESH_STAGES; i++) { if (!(prog->stages_present & BITFIELD_BIT(i))) continue; @@ -470,8 +471,11 @@ generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_scree prog->objects[i] = zm->obj.obj; } - state->modules_changed = true; - prog->last_variant_hash = state->optimal_key; + if (prog->shaders[MESA_SHADER_VERTEX]) + state->modules_changed = true; + else + state->mesh_modules_changed = true; + prog->last_variant_hash = prog->shaders[MESA_SHADER_MESH] ? state->mesh_optimal_key : state->optimal_key; } static uint32_t @@ -488,6 +492,21 @@ equals_pipeline_lib_generated_tcs(const void *a, const void *b) return !memcmp(a, b, sizeof(uint32_t)); } +static uint32_t +hash_pipeline_lib_mesh(const void *key) +{ + const struct zink_gfx_library_key *gkey = key; + return gkey->optimal_key; +} + +static bool +equals_pipeline_lib_mesh(const void *a, const void *b) +{ + const struct zink_gfx_library_key *ak = a; + const struct zink_gfx_library_key *bk = b; + return ak->optimal_key == bk->optimal_key; +} + static uint32_t hash_pipeline_lib(const void *key) { @@ -631,7 +650,7 @@ zink_gfx_program_update(struct zink_context *ctx) update_gfx_program(ctx, prog); } else { ctx->dirty_gfx_stages |= ctx->shader_stages; - prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash); + prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash, false); zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false); _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); prog->base.removed = false; @@ -702,6 +721,26 @@ update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *pr prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key; } +static void +update_mesh_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog) +{ + const union zink_shader_key_optimal *key = (union zink_shader_key_optimal*)&ctx->gfx_pipeline_state.mesh_optimal_key; + const union zink_shader_key_optimal *last_prog_key = (union zink_shader_key_optimal*)&prog->last_variant_hash; + const bool shadow_needs_shader_swizzle = last_prog_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT)); + if (key->fs_bits != last_prog_key->fs_bits || + /* always recheck shadow swizzles since they aren't directly part of the key */ + unlikely(shadow_needs_shader_swizzle)) { + assert(!prog->is_separable); + bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT); + ctx->gfx_pipeline_state.modules_changed |= changed; + if (unlikely(shadow_needs_shader_swizzle)) { + struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data; + ctx->gfx_pipeline_state.shadow = (struct zink_zs_swizzle_key*)pzm[0]->key + sizeof(uint16_t); + } + } + prog->last_variant_hash = ctx->gfx_pipeline_state.mesh_optimal_key; +} + static struct zink_gfx_program * replace_separable_prog(struct zink_context *ctx, struct hash_entry *entry, struct zink_gfx_program *prog) { @@ -709,7 +748,7 @@ replace_separable_prog(struct zink_context *ctx, struct hash_entry *entry, struc struct zink_gfx_program *real = prog->full_prog ? prog->full_prog : /* this will be NULL with ZINK_DEBUG_NOOPT */ - zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash); + zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash, false); entry->data = real; entry->key = real->shaders; real->base.removed = false; @@ -749,7 +788,7 @@ zink_gfx_program_update_optimal(struct zink_context *ctx) } } else if (must_replace) { /* this is a non-separable, incompatible prog which needs replacement */ - struct zink_gfx_program *real = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash); + struct zink_gfx_program *real = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash, false); generate_gfx_program_modules_optimal(ctx, screen, real, &ctx->gfx_pipeline_state); entry->data = real; entry->key = real->shaders; @@ -762,7 +801,7 @@ zink_gfx_program_update_optimal(struct zink_context *ctx) update_gfx_program_optimal(ctx, prog); } else { ctx->dirty_gfx_stages |= ctx->shader_stages; - prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch); + prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, false); prog->base.removed = false; _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); if (!prog->is_separable) { @@ -804,6 +843,90 @@ zink_gfx_program_update_optimal(struct zink_context *ctx) ctx->last_vertex_stage_dirty = false; } +void +zink_mesh_program_update_optimal(struct zink_context *ctx) +{ + MESA_TRACE_FUNC(); + struct zink_screen *screen = zink_screen(ctx->base.screen); + struct hash_table *ht = &ctx->mesh_cache[zink_mesh_cache_stages(ctx->shader_stages)]; + const uint32_t hash = ctx->mesh_hash; + simple_mtx_t *lock = &ctx->mesh_lock[zink_mesh_cache_stages(ctx->shader_stages)]; + assert(ctx->gfx_stages[MESA_SHADER_MESH]); + if (ctx->mesh_dirty) { + struct zink_gfx_program *prog = NULL; + ctx->gfx_pipeline_state.mesh_optimal_key = zink_sanitize_optimal_key_mesh(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); + simple_mtx_lock(lock); + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages); + + if (ctx->mesh_program) + ctx->gfx_pipeline_state.mesh_final_hash ^= ctx->mesh_program->last_variant_hash; + if (entry) { + prog = (struct zink_gfx_program*)entry->data; + bool must_replace = prog->base.uses_shobj ? !zink_can_use_shader_objects_mesh(ctx) : (prog->is_separable && !zink_can_use_pipeline_libs_mesh(ctx)); + if (prog->is_separable) { + /* shader variants can't be handled by separable programs: sync and compile */ + if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT_MESH(ctx->gfx_pipeline_state.mesh_optimal_key) || must_replace) + util_queue_fence_wait(&prog->base.cache_fence); + /* If the optimized linked pipeline is done compiling, swap it into place. */ + if (util_queue_fence_is_signalled(&prog->base.cache_fence) && + /* but only if needed for ZINK_DEBUG=noopt */ + (!(zink_debug & ZINK_DEBUG_NOOPT) || !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT_MESH(ctx->gfx_pipeline_state.mesh_optimal_key) || must_replace)) { + prog = replace_separable_prog(ctx, entry, prog); + } + } else if (must_replace) { + /* this is a non-separable, incompatible prog which needs replacement */ + struct zink_gfx_program *real = zink_create_gfx_program(ctx, ctx->gfx_stages, 0, ctx->mesh_hash, true); + generate_gfx_program_modules_optimal(ctx, screen, real, &ctx->gfx_pipeline_state); + entry->data = real; + entry->key = real->shaders; + real->base.removed = false; + prog->base.removed = true; + prog = real; + } else if (!prog->base.precompile_done) { + util_queue_fence_wait(&prog->base.cache_fence); + } + update_mesh_program_optimal(ctx, prog); + } else { + ctx->dirty_gfx_stages |= ctx->shader_stages; + prog = create_gfx_program_separable(ctx, ctx->gfx_stages, 0, true); + prog->base.removed = false; + _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); + if (!prog->is_separable) { + zink_screen_get_pipeline_cache(screen, &prog->base, false); + perf_debug(ctx, "zink[gfx_compile]: new program created (probably legacy GL features in use)\n"); + generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state); + } + } + simple_mtx_unlock(lock); + if (prog && prog != ctx->mesh_program) + zink_batch_reference_program(ctx, &prog->base); + ctx->mesh_program = prog; + ctx->gfx_pipeline_state.mesh_final_hash ^= ctx->mesh_program->last_variant_hash; + } else if (ctx->dirty_mesh_stages) { + /* remove old hash */ + ctx->gfx_pipeline_state.mesh_optimal_key = zink_sanitize_optimal_key_mesh(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); + ctx->gfx_pipeline_state.mesh_final_hash ^= ctx->mesh_program->last_variant_hash; + + bool must_replace = ctx->mesh_program->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (ctx->mesh_program->is_separable && !zink_can_use_pipeline_libs(ctx)); + if (must_replace || (ctx->mesh_program->is_separable && !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT_MESH(ctx->gfx_pipeline_state.mesh_optimal_key))) { + struct zink_gfx_program *prog = ctx->mesh_program; + + util_queue_fence_wait(&prog->base.cache_fence); + /* shader variants can't be handled by separable programs: sync and compile */ + perf_debug(ctx, "zink[gfx_compile]: non-default shader variant required with separate shader object program\n"); + simple_mtx_lock(lock); + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages); + ctx->mesh_program = replace_separable_prog(ctx, entry, prog); + simple_mtx_unlock(lock); + } + update_mesh_program_optimal(ctx, ctx->mesh_program); + /* apply new hash */ + ctx->gfx_pipeline_state.mesh_final_hash ^= ctx->mesh_program->last_variant_hash; + } + ctx->dirty_mesh_stages = 0; + ctx->mesh_dirty = false; +} + static void optimized_compile_job(void *data, void *gdata, int thread_index) { @@ -826,8 +949,8 @@ optimized_shobj_compile_job(void *data, void *gdata, int thread_index) struct zink_gfx_pipeline_cache_entry *pc_entry = data; struct zink_screen *screen = gdata; - struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT]; - for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + struct zink_shader_object objs[MESA_SHADER_MESH_STAGES]; + for (unsigned i = 0; i < MESA_SHADER_MESH_STAGES; i++) { objs[i].mod = VK_NULL_HANDLE; objs[i].spirv = pc_entry->shobjs[i].spirv; } @@ -1014,7 +1137,7 @@ create_program(struct zink_context *ctx, bool is_compute) static void assign_io(struct zink_screen *screen, - nir_shader *shaders[ZINK_GFX_SHADER_COUNT]) + nir_shader *shaders[MESA_SHADER_MESH_STAGES]) { for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) { nir_shader *producer = shaders[i]; @@ -1029,6 +1152,19 @@ assign_io(struct zink_screen *screen, } } +static void +assign_io_mesh(struct zink_screen *screen, + nir_shader *shaders[MESA_SHADER_MESH_STAGES]) +{ + nir_shader *producer = shaders[MESA_SHADER_TASK]; + nir_shader *consumer = shaders[MESA_SHADER_MESH]; + if (producer) + zink_compiler_assign_io(screen, producer, consumer); + producer = shaders[MESA_SHADER_MESH]; + consumer = shaders[MESA_SHADER_FRAGMENT]; + zink_compiler_assign_io(screen, producer, consumer); +} + void zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs) { @@ -1096,11 +1232,54 @@ find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *pr return libs; } +static struct zink_gfx_lib_cache * +create_lib_cache_mesh(struct zink_gfx_program *prog) +{ + struct zink_gfx_lib_cache *libs = CALLOC_STRUCT(zink_gfx_lib_cache); + libs->stages_present = prog->stages_present; + simple_mtx_init(&libs->lock, mtx_plain); + _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib_mesh, equals_pipeline_lib_mesh); + return libs; +} + +static struct zink_gfx_lib_cache * +find_or_create_lib_cache_mesh(struct zink_screen *screen, struct zink_gfx_program *prog) +{ + unsigned idx = !!prog->shaders[MESA_SHADER_TASK]; + struct set *ht = &screen->mesh_pipeline_libs[idx]; + const uint32_t hash = prog->gfx_hash; + + simple_mtx_lock(&screen->mesh_pipeline_libs_lock[idx]); + bool found = false; + struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found); + struct zink_gfx_lib_cache *libs; + if (found) { + libs = (void*)entry->key; + } else { + libs = create_lib_cache_mesh(prog); + memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders)); + entry->key = libs; + unsigned refs = 0; + for (unsigned i = MESA_SHADER_FRAGMENT; i < MESA_SHADER_MESH_STAGES; i++) { + if (prog->shaders[i]) { + simple_mtx_lock(&prog->shaders[i]->lock); + util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs); + simple_mtx_unlock(&prog->shaders[i]->lock); + refs++; + } + } + p_atomic_set(&libs->refcount, refs); + } + simple_mtx_unlock(&screen->mesh_pipeline_libs_lock[idx]); + return libs; +} + static struct zink_gfx_program * gfx_program_create(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch, - uint32_t gfx_hash) + uint32_t gfx_hash, + bool is_mesh) { struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_gfx_program *prog = create_program(ctx, false); @@ -1111,14 +1290,14 @@ gfx_program_create(struct zink_context *ctx, prog->base.removed = true; prog->optimal_keys = screen->optimal_keys; - for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + for (int i = is_mesh ? MESA_SHADER_FRAGMENT : 0; i < (is_mesh ? MESA_SHADER_MESH_STAGES : MESA_SHADER_STAGES); ++i) { util_dynarray_init(&prog->shader_cache[i][0][0], prog->base.ralloc_ctx); util_dynarray_init(&prog->shader_cache[i][0][1], prog->base.ralloc_ctx); util_dynarray_init(&prog->shader_cache[i][1][0], prog->base.ralloc_ctx); util_dynarray_init(&prog->shader_cache[i][1][1], prog->base.ralloc_ctx); if (stages[i]) { prog->shaders[i] = stages[i]; - prog->stages_present |= BITFIELD_BIT(i); + prog->stages_present |= BITFIELD_BIT(stages[i]->info.stage); if (i != MESA_SHADER_FRAGMENT) prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated; prog->needs_inlining |= prog->shaders[i]->needs_inlining; @@ -1132,7 +1311,7 @@ gfx_program_create(struct zink_context *ctx, prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL); } prog->stages_remaining = prog->stages_present; - for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + for (int i = 0; i < MESA_SHADER_MESH_STAGES; ++i) { if (prog->shaders[i]) { simple_mtx_lock(&prog->shaders[i]->lock); _mesa_set_add(prog->shaders[i]->programs, prog); @@ -1142,7 +1321,9 @@ gfx_program_create(struct zink_context *ctx, } p_atomic_dec(&prog->base.reference.count); - if (stages[MESA_SHADER_GEOMETRY]) + if (is_mesh) + prog->last_vertex_stage = stages[MESA_SHADER_MESH]; + else if (stages[MESA_SHADER_GEOMETRY]) prog->last_vertex_stage = stages[MESA_SHADER_GEOMETRY]; else if (stages[MESA_SHADER_TESS_EVAL]) prog->last_vertex_stage = stages[MESA_SHADER_TESS_EVAL]; @@ -1167,14 +1348,15 @@ static struct zink_gfx_program * gfx_program_init(struct zink_context *ctx, struct zink_gfx_program *prog) { struct zink_screen *screen = zink_screen(ctx->base.screen); - nir_shader *nir[ZINK_GFX_SHADER_COUNT]; + nir_shader *nir[MESA_SHADER_MESH_STAGES]; + bool is_mesh = !!prog->shaders[MESA_SHADER_MESH]; /* iterate in reverse order to create TES before generated TCS */ - for (int i = MESA_SHADER_FRAGMENT; i >= MESA_SHADER_VERTEX; i--) { + for (int i = MESA_SHADER_MESH; i >= MESA_SHADER_VERTEX; i--) { if (prog->shaders[i]) { util_queue_fence_wait(&prog->shaders[i]->precompile.fence); /* this may have already been precompiled for separate shader */ - if (i == MESA_SHADER_TESS_CTRL && prog->shaders[i]->non_fs.is_generated && prog->shaders[MESA_SHADER_TESS_CTRL]->nir) + if (prog->shaders[i]->info.stage == MESA_SHADER_TESS_CTRL && prog->shaders[i]->non_fs.is_generated && prog->shaders[MESA_SHADER_TESS_CTRL]->nir) zink_shader_tcs_init(screen, prog->shaders[MESA_SHADER_TESS_CTRL], nir[MESA_SHADER_TESS_EVAL], &nir[i]); else nir[i] = zink_shader_deserialize(screen, prog->shaders[i]); @@ -1182,21 +1364,29 @@ gfx_program_init(struct zink_context *ctx, struct zink_gfx_program *prog) nir[i] = NULL; } } - assign_io(screen, nir); - for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + + if (is_mesh) + assign_io_mesh(screen, nir); + else + assign_io(screen, nir); + for (unsigned i = 0; i < MESA_SHADER_MESH_STAGES; i++) { if (nir[i]) zink_shader_serialize_blob(nir[i], &prog->blobs[i]); ralloc_free(nir[i]); } - if (screen->optimal_keys) - prog->libs = find_or_create_lib_cache(screen, prog); + if (screen->optimal_keys) { + if (is_mesh) + prog->libs = find_or_create_lib_cache_mesh(screen, prog); + else + prog->libs = find_or_create_lib_cache(screen, prog); + } if (prog->libs) p_atomic_inc(&prog->libs->refcount); struct mesa_blake3 sctx; _mesa_blake3_init(&sctx); - for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + for (int i = 0; i < MESA_SHADER_MESH_STAGES; ++i) { if (prog->shaders[i]) _mesa_blake3_update(&sctx, prog->shaders[i]->base.sha1, sizeof(prog->shaders[i]->base.sha1)); } @@ -1217,9 +1407,10 @@ struct zink_gfx_program * zink_create_gfx_program(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch, - uint32_t gfx_hash) + uint32_t gfx_hash, + bool is_mesh) { - struct zink_gfx_program *prog = gfx_program_create(ctx, stages, vertices_per_patch, gfx_hash); + struct zink_gfx_program *prog = gfx_program_create(ctx, stages, vertices_per_patch, gfx_hash, is_mesh); if (prog) prog = gfx_program_init(ctx, prog); return prog; @@ -1235,7 +1426,7 @@ create_linked_separable_job(void *data, void *gdata, int thread_index) /* this is a dead program */ if (prog->base.removed) return; - prog->full_prog = gfx_program_create(prog->base.ctx, prog->shaders, 0, prog->gfx_hash); + prog->full_prog = gfx_program_create(prog->base.ctx, prog->shaders, 0, prog->gfx_hash, !!prog->shaders[MESA_SHADER_MESH]); /* block gfx_shader_prune in the main thread */ util_queue_fence_reset(&prog->full_prog->base.cache_fence); /* add an ownership ref */ @@ -1247,24 +1438,28 @@ create_linked_separable_job(void *data, void *gdata, int thread_index) } struct zink_gfx_program * -create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch) +create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch, bool is_mesh) { struct zink_screen *screen = zink_screen(ctx->base.screen); bool is_separate = true; - for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) + for (unsigned i = 0; i < MESA_SHADER_MESH_STAGES; i++) is_separate &= !stages[i] || stages[i]->info.separate_shader; /* filter cases that need real pipelines */ + bool is_default = is_mesh ? ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT_MESH(ctx->gfx_pipeline_state.optimal_key) : + ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key); + bool can_gpl = is_mesh ? zink_can_use_pipeline_libs_mesh(ctx) : zink_can_use_pipeline_libs(ctx); + uint32_t hash = is_mesh ? ctx->mesh_hash : ctx->gfx_hash; if (!is_separate || /* TODO: maybe try variants? grimace */ - !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || - !zink_can_use_pipeline_libs(ctx)) - return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash); - for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + !is_default || + !can_gpl) + return zink_create_gfx_program(ctx, stages, vertices_per_patch, hash, is_mesh); + for (unsigned i = 0; i < MESA_SHADER_MESH_STAGES; i++) { /* ensure async shader creation is done */ if (stages[i]) { util_queue_fence_wait(&stages[i]->precompile.fence); if (!stages[i]->precompile.obj.mod) - return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash); + return zink_create_gfx_program(ctx, stages, vertices_per_patch, hash, is_mesh); } } @@ -1273,8 +1468,10 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag goto fail; prog->is_separable = true; - prog->gfx_hash = ctx->gfx_hash; - prog->base.uses_shobj = screen->info.have_EXT_shader_object && !stages[MESA_SHADER_VERTEX]->info.view_mask && !BITSET_TEST(stages[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); + prog->gfx_hash = hash; + prog->base.uses_shobj = screen->info.have_EXT_shader_object && + (is_mesh || !stages[MESA_SHADER_VERTEX]->info.view_mask) && + !BITSET_TEST(stages[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); prog->stages_remaining = prog->stages_present = ctx->shader_stages; memcpy(prog->shaders, stages, sizeof(prog->shaders)); @@ -1292,7 +1489,7 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag } unsigned refs = 0; - for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + for (int i = 0; i < MESA_SHADER_MESH_STAGES; ++i) { if (prog->shaders[i]) { simple_mtx_lock(&prog->shaders[i]->lock); _mesa_set_add(prog->shaders[i]->programs, prog); @@ -1313,7 +1510,7 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag _mesa_hash_table_init(&prog->pipelines[i], prog->base.ralloc_ctx, NULL, zink_get_gfx_pipeline_eq_func(screen, prog)); } - for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + for (int i = 0; i < MESA_SHADER_MESH_STAGES; ++i) { if (!prog->shaders[i] || !prog->shaders[i]->precompile.dsl) continue; int idx = !i ? 0 : screen->info.have_EXT_shader_object ? i : 1; @@ -1330,7 +1527,7 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag } prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT); - prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key; + prog->last_variant_hash = is_mesh ? ctx->gfx_pipeline_state.mesh_optimal_key : ctx->gfx_pipeline_state.optimal_key; if (!screen->info.have_EXT_shader_object) { VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl}; @@ -1608,7 +1805,7 @@ zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descrip return get_num_bindings(comp->shader, type); } struct zink_gfx_program *prog = (void*)pg; - for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + for (unsigned i = 0; i < MESA_SHADER_MESH_STAGES; i++) { if (prog->shaders[i]) num_bindings += get_num_bindings(prog->shaders[i], type); } @@ -1656,7 +1853,7 @@ zink_destroy_gfx_program(struct zink_screen *screen, deinit_program(screen, &prog->base); - for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { + for (int i = 0; i < MESA_SHADER_MESH_STAGES; ++i) { if (prog->shaders[i]) { _mesa_set_remove_key(prog->shaders[i]->programs, prog); prog->shaders[i] = NULL; @@ -1789,8 +1986,12 @@ bind_gfx_stage(struct zink_context *ctx, mesa_shader_stage stage, struct zink_sh else ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage); - if (ctx->gfx_stages[stage]) - ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash; + if (ctx->gfx_stages[stage]) { + if (stage < MESA_SHADER_COMPUTE) + ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash; + if (stage == MESA_SHADER_FRAGMENT || stage > MESA_SHADER_COMPUTE) + ctx->mesh_hash ^= ctx->gfx_stages[stage]->hash; + } if (stage == MESA_SHADER_GEOMETRY && ctx->is_generated_gs_bound && (!shader || !shader->non_fs.parent)) { ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY); @@ -1798,17 +1999,33 @@ bind_gfx_stage(struct zink_context *ctx, mesa_shader_stage stage, struct zink_sh } ctx->gfx_stages[stage] = shader; - ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX]; - ctx->gfx_pipeline_state.modules_changed = true; + if (stage < MESA_SHADER_COMPUTE) + ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX]; + if (stage == MESA_SHADER_FRAGMENT || stage > MESA_SHADER_COMPUTE) + ctx->mesh_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_MESH]; + if (stage <= MESA_SHADER_FRAGMENT) + ctx->gfx_pipeline_state.modules_changed = true; + if (stage >= MESA_SHADER_FRAGMENT) + ctx->gfx_pipeline_state.mesh_modules_changed = true; if (shader) { ctx->shader_stages |= BITFIELD_BIT(stage); - ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash; + if (stage < MESA_SHADER_COMPUTE) + ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash; + if (stage == MESA_SHADER_FRAGMENT || stage > MESA_SHADER_COMPUTE) + ctx->mesh_hash ^= ctx->gfx_stages[stage]->hash; if (shader->info.uses_bindless) zink_descriptors_init_bindless(ctx); } else { - if (ctx->curr_program) - ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; - ctx->curr_program = NULL; + if (stage < MESA_SHADER_COMPUTE) { + if (ctx->curr_program) + ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + ctx->curr_program = NULL; + } + if (stage == MESA_SHADER_FRAGMENT || stage > MESA_SHADER_COMPUTE) { + if (ctx->mesh_program) + ctx->gfx_pipeline_state.mesh_final_hash ^= ctx->mesh_program->last_variant_hash; + ctx->mesh_program = NULL; + } ctx->shader_stages &= ~BITFIELD_BIT(stage); } } @@ -2005,7 +2222,7 @@ zink_bind_fs_state(struct pipe_context *pctx, zink_update_fs_key_samples(ctx); if (zink_screen(pctx->screen)->info.have_EXT_rasterization_order_attachment_access) { if (ctx->gfx_pipeline_state.rast_attachment_order != info->fs.uses_fbfetch_output) - ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dirty = ctx->gfx_pipeline_state.mesh_dirty = true; ctx->gfx_pipeline_state.rast_attachment_order = info->fs.uses_fbfetch_output; } zink_set_zs_needs_shader_swizzle_key(ctx, MESA_SHADER_FRAGMENT, false); @@ -2018,6 +2235,34 @@ zink_bind_fs_state(struct pipe_context *pctx, zink_update_fbfetch(ctx); } +static void +zink_bind_ts_state(struct pipe_context *pctx, + void *cso) +{ + struct zink_context *ctx = zink_context(pctx); + if (!cso && !ctx->gfx_stages[MESA_SHADER_TASK]) + return; + bind_gfx_stage(ctx, MESA_SHADER_TASK, cso); +} + +static void +zink_bind_ms_state(struct pipe_context *pctx, + void *cso) +{ + struct zink_context *ctx = zink_context(pctx); + if (!cso && !ctx->gfx_stages[MESA_SHADER_MESH]) + return; + bind_gfx_stage(ctx, MESA_SHADER_MESH, cso); + + if (!cso) + return; + + if (ctx->gfx_stages[MESA_SHADER_MESH]->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK)) + ctx->vp_state.mesh_num_viewports = MIN2(zink_screen(pctx->screen)->info.props.limits.maxViewports, PIPE_MAX_VIEWPORTS); + else + ctx->vp_state.mesh_num_viewports = 1; +} + static void zink_bind_gs_state(struct pipe_context *pctx, void *cso) @@ -2120,14 +2365,15 @@ struct zink_gfx_library_key * zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state) { struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key); + bool is_mesh = !prog->shaders[MESA_SHADER_VERTEX]; if (!gkey) { mesa_loge("ZINK: failed to allocate gkey!"); return NULL; } - gkey->optimal_key = state->optimal_key; - assert(gkey->optimal_key); - for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) + gkey->optimal_key = !is_mesh ? state->optimal_key : state->mesh_optimal_key; + assert(is_mesh || gkey->optimal_key); + for (unsigned i = 0; i < MESA_SHADER_MESH_STAGES; i++) gkey->modules[i] = prog->objs[i].mod; gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog); _mesa_set_add(&prog->libs->libs, gkey); @@ -2193,13 +2439,13 @@ zink_link_gfx_shader(struct pipe_context *pctx, void **shaders) if (zshaders[MESA_SHADER_FRAGMENT] && zshaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading) return; /* can't precompile fixedfunc */ - if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT]) { + if ((!shaders[MESA_SHADER_VERTEX] && !shaders[MESA_SHADER_MESH]) || !shaders[MESA_SHADER_FRAGMENT]) { /* handled directly from shader create */ return; } unsigned hash = 0; unsigned shader_stages = 0; - for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + for (unsigned i = 0; i < MESA_SHADER_MESH_STAGES; i++) { if (zshaders[i]) { hash ^= zshaders[i]->hash; shader_stages |= BITFIELD_BIT(i); @@ -2210,19 +2456,21 @@ zink_link_gfx_shader(struct pipe_context *pctx, void **shaders) /* can't do fixedfunc tes either */ if (tess && !shaders[MESA_SHADER_TESS_EVAL]) return; - struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)]; - simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]); + bool is_mesh = (shader_stages & BITFIELD_BIT(MESA_SHADER_MESH)) > 0; + struct hash_table *ht = is_mesh ? &ctx->mesh_cache[zink_mesh_cache_stages(shader_stages)] : &ctx->program_cache[zink_program_cache_stages(shader_stages)]; + simple_mtx_t *lock = is_mesh ? &ctx->mesh_lock[zink_mesh_cache_stages(shader_stages)] : &ctx->program_lock[zink_program_cache_stages(shader_stages)]; + simple_mtx_lock(lock); /* link can be called repeatedly with the same shaders: ignore */ if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) { - simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]); + simple_mtx_unlock(lock); return; } - struct zink_gfx_program *prog = gfx_program_create(ctx, zshaders, 3, hash); + struct zink_gfx_program *prog = gfx_program_create(ctx, zshaders, 3, hash, is_mesh); u_foreach_bit(i, shader_stages) assert(prog->shaders[i]); _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); prog->base.removed = false; - simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]); + simple_mtx_unlock(lock); if (zink_debug & ZINK_DEBUG_SHADERDB) { struct zink_screen *screen = zink_screen(pctx->screen); gfx_program_init(ctx, prog); @@ -2237,7 +2485,8 @@ zink_link_gfx_shader(struct pipe_context *pctx, void **shaders) VKSCR(DestroyPipeline)(screen->dev, pipeline, NULL); } else { if (zink_screen(pctx->screen)->info.have_EXT_shader_object) - prog->base.uses_shobj = !zshaders[MESA_SHADER_VERTEX]->info.view_mask && !BITSET_TEST(zshaders[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); + prog->base.uses_shobj = (!zshaders[MESA_SHADER_VERTEX] || !zshaders[MESA_SHADER_VERTEX]->info.view_mask) && + !BITSET_TEST(zshaders[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); if (zink_debug & ZINK_DEBUG_NOBGC) { gfx_program_precompile_job(prog, pctx->screen, 0); } else { @@ -2258,7 +2507,7 @@ precompile_separate_shader(struct zink_shader *zs, struct zink_screen *screen) { zs->precompile.obj = zink_shader_compile_separate(screen, zs); if (!screen->info.have_EXT_shader_object) { - struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT] = {0}; + struct zink_shader_object objs[MESA_SHADER_MESH_STAGES] = {0}; objs[zs->info.stage].mod = zs->precompile.obj.mod; zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, objs, zs->precompile.layout, zs->info.stage); } @@ -2279,7 +2528,7 @@ gfx_shader_init_job(void *data, void *gdata, int thread_index) } if (zs->info.separate_shader && zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && (screen->info.have_EXT_shader_object || - (screen->info.have_EXT_graphics_pipeline_library && (zs->info.stage == MESA_SHADER_FRAGMENT || zs->info.stage == MESA_SHADER_VERTEX)))) { + (screen->info.have_EXT_graphics_pipeline_library && (zs->info.stage == MESA_SHADER_FRAGMENT || zs->info.stage == MESA_SHADER_VERTEX || zs->info.stage == MESA_SHADER_MESH)))) { /* sample shading can't precompile */ if (zs->info.stage != MESA_SHADER_FRAGMENT || !zs->info.fs.uses_sample_shading) precompile_separate_shader(zs, screen); @@ -2349,6 +2598,14 @@ zink_program_init(struct zink_context *ctx) ctx->base.bind_tes_state = zink_bind_tes_state; ctx->base.delete_tes_state = zink_delete_cached_shader_state; + ctx->base.create_ts_state = zink_create_cached_shader_state; + ctx->base.bind_ts_state = zink_bind_ts_state; + ctx->base.delete_ts_state = zink_delete_cached_shader_state; + + ctx->base.create_ms_state = zink_create_cached_shader_state; + ctx->base.bind_ms_state = zink_bind_ms_state; + ctx->base.delete_ms_state = zink_delete_cached_shader_state; + ctx->base.create_compute_state = zink_create_cs_state; ctx->base.bind_compute_state = zink_bind_cs_state; ctx->base.get_compute_state_info = zink_get_compute_state_info; diff --git a/src/gallium/drivers/zink/zink_program.h b/src/gallium/drivers/zink/zink_program.h index 8714ca0fc51..654e127fe2c 100644 --- a/src/gallium/drivers/zink/zink_program.h +++ b/src/gallium/drivers/zink/zink_program.h @@ -123,6 +123,8 @@ void zink_gfx_program_update(struct zink_context *ctx); void zink_gfx_program_update_optimal(struct zink_context *ctx); +void +zink_mesh_program_update_optimal(struct zink_context *ctx); struct zink_gfx_library_key * @@ -156,7 +158,8 @@ struct zink_gfx_program * zink_create_gfx_program(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch, - uint32_t gfx_hash); + uint32_t gfx_hash, + bool is_mesh); void zink_destroy_gfx_program(struct zink_screen *screen, @@ -432,6 +435,31 @@ zink_can_use_shader_objects(const struct zink_context *ctx) !ctx->is_generated_gs_bound; } +ALWAYS_INLINE static bool +zink_can_use_pipeline_libs_mesh(const struct zink_context *ctx) +{ + return + /* this is just terrible */ + !zink_get_fs_base_key(ctx)->shadow_needs_shader_swizzle && + /* TODO: is sample shading even possible to handle with GPL? */ + !ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading && + !zink_get_fs_base_key(ctx)->fbfetch_ms && + !ctx->gfx_pipeline_state.force_persample_interp && + !ctx->gfx_pipeline_state.min_samples; +} + +/* stricter requirements */ +ALWAYS_INLINE static bool +zink_can_use_shader_objects_mesh(const struct zink_context *ctx) +{ + return + ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT_MESH(ctx->gfx_pipeline_state.optimal_key) && + /* TODO: is sample shading even possible to handle with GPL? */ + !ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading && + !ctx->gfx_pipeline_state.force_persample_interp && + !ctx->gfx_pipeline_state.min_samples; +} + bool zink_set_rasterizer_discard(struct zink_context *ctx, bool disable); void @@ -465,6 +493,18 @@ zink_sanitize_optimal_key(struct zink_shader **shaders, uint32_t val) k.fs.force_dual_color_blend = false; return k.val; } + +static inline uint32_t +zink_sanitize_optimal_key_mesh(struct zink_shader **shaders, uint32_t val) +{ + union zink_shader_key_optimal k; + k.val = zink_shader_key_optimal_mesh(val); + if (!zink_shader_uses_samples(shaders[MESA_SHADER_FRAGMENT])) + k.fs.samples = false; + if (!(shaders[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1))) + k.fs.force_dual_color_blend = false; + return k.val; +} #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/zink/zink_program_state.hpp b/src/gallium/drivers/zink/zink_program_state.hpp index c45ddf2d706..c23237ad98b 100644 --- a/src/gallium/drivers/zink/zink_program_state.hpp +++ b/src/gallium/drivers/zink/zink_program_state.hpp @@ -98,7 +98,7 @@ check_vertex_strides(struct zink_context *ctx) * in theory, zink supports many feature levels, * but it's important to provide a more optimized codepath for drivers that support all the best features */ -template +template VkPipeline zink_get_gfx_pipeline(struct zink_context *ctx, struct zink_gfx_program *prog, @@ -109,78 +109,114 @@ zink_get_gfx_pipeline(struct zink_context *ctx, struct zink_screen *screen = zink_screen(ctx->base.screen); bool uses_dynamic_stride = state->uses_dynamic_stride; - VkPrimitiveTopology vkmode = zink_primitive_topology(mode); - const unsigned idx = screen->info.dynamic_state3_props.dynamicPrimitiveTopologyUnrestricted ? + VkPrimitiveTopology vkmode = IS_MESH ? VK_PRIMITIVE_TOPOLOGY_MAX_ENUM : zink_primitive_topology(mode); + const unsigned idx = IS_MESH || screen->info.dynamic_state3_props.dynamicPrimitiveTopologyUnrestricted ? 0 : get_pipeline_idx= ZINK_DYNAMIC_STATE>(mode, vkmode); assert(idx <= ARRAY_SIZE(prog->pipelines)); - if (!state->dirty && !state->modules_changed && - ((DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2) && !ctx->vertex_state_changed) && - idx == state->idx) - return state->pipeline; + if (IS_MESH) { + if (!state->mesh_dirty && !state->mesh_modules_changed) + return state->mesh_pipeline; + } else { + if (!state->dirty && !state->modules_changed && idx == state->idx && !ctx->vertex_state_changed && + ((DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2))) + return state->pipeline; + } struct hash_entry *entry = NULL; /* recalc the base pipeline state hash */ - if (state->dirty) { - if (state->pipeline) //avoid on first hash + if (IS_MESH) { + if (state->mesh_dirty) { + if (state->mesh_pipeline) //avoid on first hash + state->mesh_final_hash ^= state->mesh_hash; + state->mesh_hash = hash_gfx_pipeline_state(state, screen); + state->mesh_final_hash ^= state->mesh_hash; + state->mesh_dirty = false; + } + } else { + if (state->dirty) { + if (state->pipeline) //avoid on first hash + state->final_hash ^= state->hash; + state->hash = hash_gfx_pipeline_state(state, screen); state->final_hash ^= state->hash; - state->hash = hash_gfx_pipeline_state(state, screen); - state->final_hash ^= state->hash; - state->dirty = false; + state->dirty = false; + } } /* extra safety asserts for optimal path to catch refactoring bugs */ if (prog->optimal_keys) { ASSERTED const union zink_shader_key_optimal *opt = (union zink_shader_key_optimal*)&prog->last_variant_hash; ASSERTED union zink_shader_key_optimal sanitized = {}; - sanitized.val = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); + if (IS_MESH) { + sanitized.val = zink_sanitize_optimal_key_mesh(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); + assert(state->mesh_optimal_key == sanitized.val); + } else { + sanitized.val = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); + assert(state->optimal_key == sanitized.val); + } assert(opt->val == sanitized.val); - assert(state->optimal_key == sanitized.val); } - /* recalc vertex state if missing optimal extensions */ - if (DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT && ctx->vertex_state_changed) { - if (state->pipeline) - state->final_hash ^= state->vertex_hash; - /* even if dynamic stride is available, it may not be usable with the current pipeline */ - if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) + + if (IS_MESH) { + state->mesh_modules_changed = false; + + if (prog->last_finalized_hash[idx] == state->mesh_final_hash && + !prog->inline_variants && likely(prog->last_pipeline[idx]) && + /* this data is too big to compare in the fast-path */ + likely(!prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask)) { + state->mesh_pipeline = prog->last_pipeline[idx]->pipeline; + return state->mesh_pipeline; + } + } else { + if (DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT && ctx->vertex_state_changed) { + /* recalc vertex state if missing optimal extensions */ + if (state->pipeline) + state->final_hash ^= state->vertex_hash; + /* even if dynamic stride is available, it may not be usable with the current pipeline */ + if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) #if defined(MVK_VERSION) - if (screen->have_dynamic_state_vertex_input_binding_stride) + if (screen->have_dynamic_state_vertex_input_binding_stride) #endif - uses_dynamic_stride = check_vertex_strides(ctx); - if (!uses_dynamic_stride) { - uint32_t hash = 0; - /* if we don't have dynamic states, we have to hash the enabled vertex buffer bindings */ - uint32_t vertex_buffers_enabled_mask = state->vertex_buffers_enabled_mask; - hash = XXH32(&vertex_buffers_enabled_mask, sizeof(uint32_t), hash); + uses_dynamic_stride = check_vertex_strides(ctx); + if (!uses_dynamic_stride) { + uint32_t hash = 0; + /* if we don't have dynamic states, we have to hash the enabled vertex buffer bindings */ + uint32_t vertex_buffers_enabled_mask = state->vertex_buffers_enabled_mask; + hash = XXH32(&vertex_buffers_enabled_mask, sizeof(uint32_t), hash); - for (unsigned i = 0; i < state->element_state->num_bindings; i++) { - const unsigned buffer_id = ctx->element_state->hw_state.binding_map[i]; - struct pipe_vertex_buffer *vb = ctx->vertex_buffers + buffer_id; - state->vertex_strides[buffer_id] = vb->buffer.resource ? state->element_state->b.strides[i] : 0; - hash = XXH32(&state->vertex_strides[buffer_id], sizeof(uint32_t), hash); + for (unsigned i = 0; i < state->element_state->num_bindings; i++) { + const unsigned buffer_id = ctx->element_state->hw_state.binding_map[i]; + struct pipe_vertex_buffer *vb = ctx->vertex_buffers + buffer_id; + state->vertex_strides[buffer_id] = vb->buffer.resource ? state->element_state->b.strides[i] : 0; + hash = XXH32(&state->vertex_strides[buffer_id], sizeof(uint32_t), hash); + } + state->vertex_hash = hash ^ state->element_state->hash; + } else + state->vertex_hash = state->element_state->hash; + state->final_hash ^= state->vertex_hash; + } + + state->modules_changed = false; + state->uses_dynamic_stride = uses_dynamic_stride; + state->idx = idx; + + /* shortcut for reusing previous pipeline across program changes */ + if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2) { + if (prog->last_finalized_hash[idx] == state->final_hash && + !prog->inline_variants && likely(prog->last_pipeline[idx]) && + /* this data is too big to compare in the fast-path */ + likely(!prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask)) { + state->pipeline = prog->last_pipeline[idx]->pipeline; + return state->pipeline; } - state->vertex_hash = hash ^ state->element_state->hash; - } else - state->vertex_hash = state->element_state->hash; - state->final_hash ^= state->vertex_hash; - } - state->modules_changed = false; - state->uses_dynamic_stride = uses_dynamic_stride; - state->idx = idx; - - /* shortcut for reusing previous pipeline across program changes */ - if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2) { - if (prog->last_finalized_hash[idx] == state->final_hash && - !prog->inline_variants && likely(prog->last_pipeline[idx]) && - /* this data is too big to compare in the fast-path */ - likely(!prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask)) { - state->pipeline = prog->last_pipeline[idx]->pipeline; - return state->pipeline; } } - entry = _mesa_hash_table_search_pre_hashed(&prog->pipelines[idx], state->final_hash, state); + + unsigned final_hash = IS_MESH ? state->mesh_final_hash : state->final_hash; + entry = _mesa_hash_table_search_pre_hashed(&prog->pipelines[idx], final_hash, state); if (!entry) { + bool can_gpl = IS_MESH ? zink_can_use_pipeline_libs_mesh(ctx) : zink_can_use_pipeline_libs(ctx); /* always wait on async precompile/cache fence */ util_queue_fence_wait(&prog->base.cache_fence); struct zink_gfx_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(zink_gfx_pipeline_cache_entry); @@ -194,14 +230,15 @@ zink_get_gfx_pipeline(struct zink_context *ctx, pc_entry->prog = prog; /* init the optimized background compile fence */ util_queue_fence_init(&pc_entry->fence); - entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[idx], state->final_hash, pc_entry, pc_entry); + entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[idx], final_hash, pc_entry, pc_entry); if (prog->base.uses_shobj && !prog->is_separable) { memcpy(pc_entry->shobjs, prog->objs, sizeof(prog->objs)); zink_gfx_program_compile_queue(ctx, pc_entry); - } else if (HAVE_LIB && zink_can_use_pipeline_libs(ctx)) { + } else if (HAVE_LIB && can_gpl) { + uint32_t optimal_key = IS_MESH ? ctx->gfx_pipeline_state.mesh_optimal_key : ctx->gfx_pipeline_state.optimal_key; /* this is the graphics pipeline library path: find/construct all partial pipelines */ simple_mtx_lock(&prog->libs->lock); - struct set_entry *he = _mesa_set_search(&prog->libs->libs, &ctx->gfx_pipeline_state.optimal_key); + struct set_entry *he = _mesa_set_search(&prog->libs->libs, &optimal_key); struct zink_gfx_library_key *gkey; if (he) { gkey = (struct zink_gfx_library_key *)he->key; @@ -210,7 +247,8 @@ zink_get_gfx_pipeline(struct zink_context *ctx, gkey = zink_create_pipeline_lib(screen, prog, &ctx->gfx_pipeline_state); } simple_mtx_unlock(&prog->libs->lock); - struct zink_gfx_input_key *ikey = DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT ? + struct zink_gfx_input_key *ikey = IS_MESH ? zink_find_or_create_input_mesh(ctx) : + DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT ? zink_find_or_create_input_dynamic(ctx, vkmode) : zink_find_or_create_input(ctx, vkmode); struct zink_gfx_output_key *okey = DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3 && screen->have_full_ds3 ? @@ -232,7 +270,7 @@ zink_get_gfx_pipeline(struct zink_context *ctx, } } else { /* optimize by default only when expecting precompiles in order to reduce stuttering */ - if (DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT) + if (DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT && !IS_MESH) pc_entry->pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, state, state->element_state->binding_map, vkmode, !HAVE_LIB); else pc_entry->pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, state, NULL, vkmode, !HAVE_LIB); @@ -250,7 +288,7 @@ zink_get_gfx_pipeline(struct zink_context *ctx, state->pipeline = cache_entry->pipeline; /* update states for fastpath */ if (DYNAMIC_STATE >= ZINK_DYNAMIC_VERTEX_INPUT) { - prog->last_finalized_hash[idx] = state->final_hash; + prog->last_finalized_hash[idx] = final_hash; prog->last_pipeline[idx] = cache_entry; } return state->pipeline; @@ -396,6 +434,8 @@ get_gfx_pipeline_stage_eq_func(struct zink_gfx_program *prog, bool optimal_keys) equals_gfx_pipeline_state_func zink_get_gfx_pipeline_eq_func(struct zink_screen *screen, struct zink_gfx_program *prog) { + if (prog->shaders[MESA_SHADER_MESH]) + return get_gfx_pipeline_stage_eq_func(prog, screen->optimal_keys); if (screen->info.have_EXT_extended_dynamic_state) { if (screen->info.have_EXT_extended_dynamic_state2) { if (screen->info.have_EXT_extended_dynamic_state3) { diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index 893cf406aba..a233e0a2fc5 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -1493,6 +1493,8 @@ zink_destroy_screen(struct pipe_screen *pscreen) for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs); i++) _mesa_set_fini(&screen->pipeline_libs[i], NULL); + for (unsigned i = 0; i < ARRAY_SIZE(screen->mesh_pipeline_libs); i++) + _mesa_set_fini(&screen->mesh_pipeline_libs[i], NULL); u_transfer_helper_destroy(pscreen->transfer_helper); if (util_queue_is_initialized(&screen->cache_get_thread)) { @@ -3528,10 +3530,6 @@ zink_internal_create_screen(const struct pipe_screen_config *config, int64_t dev goto fail; } - zink_init_shader_caps(screen); - zink_init_compute_caps(screen); - zink_init_screen_caps(screen); - if (screen->info.have_EXT_sample_locations) { VkMultisamplePropertiesEXT prop; prop.sType = VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT; @@ -3675,6 +3673,10 @@ zink_internal_create_screen(const struct pipe_screen_config *config, int64_t dev init_optimal_keys(screen); + zink_init_shader_caps(screen); + zink_init_compute_caps(screen); + zink_init_screen_caps(screen); + screen->screen_id = p_atomic_inc_return(&num_screens); zink_tracing = screen->instance_info->have_EXT_debug_utils && (u_trace_is_enabled(U_TRACE_TYPE_PERFETTO) || u_trace_is_enabled(U_TRACE_TYPE_MARKERS)); diff --git a/src/gallium/drivers/zink/zink_shader_keys.h b/src/gallium/drivers/zink/zink_shader_keys.h index 1dab2447fd8..ea883007387 100644 --- a/src/gallium/drivers/zink/zink_shader_keys.h +++ b/src/gallium/drivers/zink/zink_shader_keys.h @@ -155,6 +155,8 @@ union zink_shader_key_optimal { /* the default key has only last_vertex_stage set*/ #define ZINK_SHADER_KEY_OPTIMAL_DEFAULT (1<<0) +/* mesh default key has no bits set */ +#define ZINK_SHADER_KEY_OPTIMAL_DEFAULT_MESH (0) /* Ignore patch_vertices bits that would only be used if we had to generate the missing TCS */ static inline uint32_t zink_shader_key_optimal_no_tcs(uint32_t key) @@ -164,7 +166,17 @@ zink_shader_key_optimal_no_tcs(uint32_t key) k.tcs_bits = 0; return k.val; } +static inline uint32_t +zink_shader_key_optimal_mesh(uint32_t key) +{ + union zink_shader_key_optimal k; + k.val = key; + k.vs_bits = 0; + k.tcs_bits = 0; + return k.val; +} #define ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(key) (zink_shader_key_optimal_no_tcs(key) == ZINK_SHADER_KEY_OPTIMAL_DEFAULT) +#define ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT_MESH(key) (zink_shader_key_optimal_mesh(key) == ZINK_SHADER_KEY_OPTIMAL_DEFAULT_MESH) static inline const struct zink_fs_key_base * zink_fs_key_base(const struct zink_shader_key *key) diff --git a/src/gallium/drivers/zink/zink_state.c b/src/gallium/drivers/zink/zink_state.c index 402ac5034ca..24175aceeed 100644 --- a/src/gallium/drivers/zink/zink_state.c +++ b/src/gallium/drivers/zink/zink_state.c @@ -640,7 +640,7 @@ zink_bind_rasterizer_state(struct pipe_context *pctx, void *cso) if (clip_halfz != ctx->rast_state->base.clip_halfz) { if (screen->info.have_EXT_depth_clip_control) - ctx->gfx_pipeline_state.dirty = true; + ctx->gfx_pipeline_state.dirty = ctx->gfx_pipeline_state.mesh_dirty = true; else zink_set_last_vertex_key(ctx)->clip_halfz = ctx->rast_state->base.clip_halfz; ctx->vp_state_changed = true; diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index 7f1fef53811..ebde7b45080 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -103,6 +103,7 @@ enum zink_pipeline_idx { ZINK_PIPELINE_GFX, ZINK_PIPELINE_COMPUTE, + ZINK_PIPELINE_MESH, ZINK_PIPELINE_MAX, }; @@ -431,8 +432,8 @@ struct zink_descriptor_data { bool bindless_bound; bool bindless_init; bool has_fbfetch; - bool push_state_changed[ZINK_PIPELINE_MAX]; //gfx, compute - uint8_t state_changed[ZINK_PIPELINE_MAX]; //gfx, compute + bool push_state_changed[ZINK_PIPELINE_MAX]; //gfx, compute, mesh + uint8_t state_changed[ZINK_PIPELINE_MAX]; //gfx, compute, mesh struct zink_descriptor_layout_key *push_layout_keys[2]; //gfx, compute struct zink_descriptor_layout *push_dsl[2]; //gfx, compute VkDescriptorUpdateTemplate push_template[2]; //gfx, compute @@ -454,13 +455,14 @@ struct zink_descriptor_data { } db; }; - struct zink_program *pg[ZINK_PIPELINE_MAX]; //gfx, compute + struct zink_program *pg[ZINK_PIPELINE_MAX]; //gfx, compute, mesh VkDescriptorUpdateTemplateEntry push_entries[MESA_SHADER_STAGES]; //gfx+fbfetch VkDescriptorUpdateTemplateEntry compute_push_entry; + VkDescriptorUpdateTemplateEntry mesh_push_entries[MESA_SHADER_STAGES]; //mesh+fbfetch /* push descriptor layout size and binding offsets */ - uint32_t db_size[ZINK_PIPELINE_MAX]; //gfx, compute + uint32_t db_size[ZINK_PIPELINE_MAX]; //gfx, compute, mesh uint32_t db_offset[ZINK_GFX_SHADER_COUNT + 1]; //gfx + fbfetch /* compute offset is always 0 */ }; @@ -520,20 +522,20 @@ struct zink_batch_descriptor_data { unsigned pool_size[ZINK_DESCRIPTOR_BASE_TYPES]; /* this array is sized based on the max zink_descriptor_pool_key::id used by the batch; members may be NULL */ struct util_dynarray pools[ZINK_DESCRIPTOR_BASE_TYPES]; - struct zink_descriptor_pool_multi push_pool[2]; //gfx, compute + struct zink_descriptor_pool_multi push_pool[2]; //gfx, compute, mesh /* the current program (for descriptor updating) */ - struct zink_program *pg[ZINK_PIPELINE_MAX]; //gfx, compute + struct zink_program *pg[ZINK_PIPELINE_MAX]; //gfx, compute, mesh /* the current pipeline compatibility id (for pipeline compatibility rules) */ - uint32_t compat_id[ZINK_PIPELINE_MAX]; //gfx, compute + uint32_t compat_id[ZINK_PIPELINE_MAX]; //gfx, compute, mesh /* the current set layout */ - VkDescriptorSetLayout dsl[ZINK_PIPELINE_MAX][ZINK_DESCRIPTOR_BASE_TYPES]; //gfx, compute + VkDescriptorSetLayout dsl[ZINK_PIPELINE_MAX][ZINK_DESCRIPTOR_BASE_TYPES]; //gfx, compute, mesh union { /* the current set for a given type; used for rebinding if pipeline compat id changes and current set must be rebound */ - VkDescriptorSet sets[ZINK_PIPELINE_MAX][ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; //gfx, compute + VkDescriptorSet sets[ZINK_PIPELINE_MAX][ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; //gfx, compute, mesh uint64_t cur_db_offset[ZINK_DESCRIPTOR_NON_BINDLESS_TYPES]; //the current offset of a descriptor buffer for rebinds }; /* mask of push descriptor usage */ - unsigned push_usage[ZINK_PIPELINE_MAX]; //gfx, compute + unsigned push_usage[ZINK_PIPELINE_MAX]; //gfx, compute, mesh struct zink_resource *db; //the descriptor buffer for a given type uint8_t *db_map; //the host map for the buffer @@ -891,7 +893,9 @@ struct zink_gfx_pipeline_state { /* Pre-hashed value for table lookup, invalid when zero. * Members after this point are not included in pipeline state hash key */ uint32_t hash; + uint32_t mesh_hash; bool dirty; + bool mesh_dirty; struct zink_pipeline_dynamic_state1 dyn_state1; @@ -899,14 +903,18 @@ struct zink_gfx_pipeline_state { struct zink_pipeline_dynamic_state3 dyn_state3; union { - VkShaderModule modules[MESA_SHADER_STAGES - 1]; + VkShaderModule modules[MESA_SHADER_MESH_STAGES - 1]; uint32_t optimal_key; }; + uint32_t mesh_optimal_key; + bool modules_changed; + bool mesh_modules_changed; uint32_t vertex_hash; uint32_t final_hash; + uint32_t mesh_final_hash; uint32_t _pad2; /* order matches zink_gfx_input_key */ @@ -936,6 +944,7 @@ struct zink_gfx_pipeline_state { VkFormat rendering_formats[PIPE_MAX_COLOR_BUFS]; VkPipelineRenderingCreateInfo rendering_info; VkPipeline pipeline; + VkPipeline mesh_pipeline; enum mesa_prim gfx_prim_mode; //pending mode }; @@ -1034,7 +1043,7 @@ typedef bool (*equals_gfx_pipeline_state_func)(const void *a, const void *b); struct zink_gfx_library_key { uint32_t optimal_key; //equals_pipeline_lib_optimal - VkShaderModule modules[ZINK_GFX_SHADER_COUNT]; + VkShaderModule modules[MESA_SHADER_MESH_STAGES]; VkPipeline pipeline; }; @@ -1086,13 +1095,13 @@ struct zink_gfx_pipeline_cache_entry { struct zink_gfx_output_key *okey; VkPipeline unoptimized_pipeline; } gpl; - struct zink_shader_object shobjs[ZINK_GFX_SHADER_COUNT]; + struct zink_shader_object shobjs[MESA_SHADER_MESH_STAGES]; }; }; struct zink_gfx_lib_cache { /* for hashing */ - struct zink_shader *shaders[ZINK_GFX_SHADER_COUNT]; + struct zink_shader *shaders[MESA_SHADER_MESH_STAGES]; unsigned refcount; bool removed; //once removed from cache uint8_t stages_present; @@ -1110,16 +1119,16 @@ struct zink_gfx_program { uint32_t stages_remaining; //mask of zink_shader remaining in this program uint32_t gfx_hash; //from ctx->gfx_hash - struct zink_shader *shaders[ZINK_GFX_SHADER_COUNT]; + struct zink_shader *shaders[MESA_SHADER_MESH_STAGES]; struct zink_shader *last_vertex_stage; - struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT]; + struct zink_shader_object objs[MESA_SHADER_MESH_STAGES]; /* full */ - VkShaderEXT objects[ZINK_GFX_SHADER_COUNT]; - uint32_t module_hash[ZINK_GFX_SHADER_COUNT]; - struct blob blobs[ZINK_GFX_SHADER_COUNT]; - struct util_dynarray shader_cache[ZINK_GFX_SHADER_COUNT][2][2]; //normal, nonseamless cubes, inline uniforms - unsigned inlined_variant_count[ZINK_GFX_SHADER_COUNT]; + VkShaderEXT objects[MESA_SHADER_MESH_STAGES]; + uint32_t module_hash[MESA_SHADER_MESH_STAGES]; + struct blob blobs[MESA_SHADER_MESH_STAGES]; + struct util_dynarray shader_cache[MESA_SHADER_MESH_STAGES][2][2]; //normal, nonseamless cubes, inline uniforms + unsigned inlined_variant_count[MESA_SHADER_MESH_STAGES]; uint32_t default_variant_hash; uint8_t inline_variants; //which stages are using inlined uniforms bool needs_inlining; // whether this program requires some uniforms to be inlined @@ -1253,6 +1262,7 @@ struct zink_resource_object { bool is_aux; }; +/* "gfx" includes mesh here */ struct zink_resource { struct threaded_resource base; @@ -1271,8 +1281,8 @@ struct zink_resource { uint8_t vbo_bind_count; uint8_t so_bind_count; //not counted in all_binds bool so_valid; - uint32_t ubo_bind_mask[MESA_SHADER_STAGES]; - uint32_t ssbo_bind_mask[MESA_SHADER_STAGES]; + uint32_t ubo_bind_mask[MESA_SHADER_MESH_STAGES]; + uint32_t ssbo_bind_mask[MESA_SHADER_MESH_STAGES]; }; struct { bool linear; @@ -1286,8 +1296,8 @@ struct zink_resource { VkImageAspectFlags aspect; }; }; - uint32_t sampler_binds[MESA_SHADER_STAGES]; - uint32_t image_binds[MESA_SHADER_STAGES]; + uint32_t sampler_binds[MESA_SHADER_MESH_STAGES]; + uint32_t image_binds[MESA_SHADER_MESH_STAGES]; uint16_t sampler_bind_count[2]; //gfx, compute uint16_t image_bind_count[2]; //gfx, compute uint16_t write_bind_count[2]; //gfx, compute @@ -1404,6 +1414,10 @@ struct zink_screen { struct set pipeline_libs[8]; simple_mtx_t pipeline_libs_lock[8]; + /* there is only [task] and [notask]*/ + struct set mesh_pipeline_libs[2]; + simple_mtx_t mesh_pipeline_libs_lock[2]; + simple_mtx_t desc_set_layouts_lock; struct hash_table desc_set_layouts[ZINK_DESCRIPTOR_BASE_TYPES]; simple_mtx_t desc_pool_keys_lock; @@ -1638,6 +1652,7 @@ struct zink_viewport_state { struct pipe_viewport_state viewport_states[PIPE_MAX_VIEWPORTS]; struct pipe_scissor_state scissor_states[PIPE_MAX_VIEWPORTS]; uint8_t num_viewports; + uint8_t mesh_num_viewports; }; struct zink_descriptor_db_info { @@ -1679,6 +1694,7 @@ typedef void (*pipe_draw_vertex_state_func)(struct pipe_context *ctx, unsigned num_draws); typedef void (*pipe_launch_grid_func)(struct pipe_context *pipe, const struct pipe_grid_info *info); +typedef void (*pipe_draw_mesh_tasks_func)(struct pipe_context *pipe, const struct pipe_grid_info *info); enum zink_ds3_state { ZINK_DS3_RAST_STIPPLE, @@ -1711,6 +1727,7 @@ struct zink_context { pipe_draw_func draw_vbo[2]; //batch changed pipe_draw_vertex_state_func draw_state[2]; //batch changed pipe_launch_grid_func launch_grid[2]; //batch changed + pipe_draw_mesh_tasks_func draw_mesh_tasks[2]; //batch changed struct pipe_device_reset_callback reset; @@ -1732,10 +1749,10 @@ struct zink_context { unsigned shader_has_inlinable_uniforms_mask; unsigned inlinable_uniforms_valid_mask; - struct pipe_constant_buffer ubos[MESA_SHADER_STAGES][PIPE_MAX_CONSTANT_BUFFERS]; - struct pipe_shader_buffer ssbos[MESA_SHADER_STAGES][PIPE_MAX_SHADER_BUFFERS]; - uint32_t writable_ssbos[MESA_SHADER_STAGES]; - struct zink_image_view image_views[MESA_SHADER_STAGES][ZINK_MAX_SHADER_IMAGES]; + struct pipe_constant_buffer ubos[MESA_SHADER_MESH_STAGES][PIPE_MAX_CONSTANT_BUFFERS]; + struct pipe_shader_buffer ssbos[MESA_SHADER_MESH_STAGES][PIPE_MAX_SHADER_BUFFERS]; + uint32_t writable_ssbos[MESA_SHADER_MESH_STAGES]; + struct zink_image_view image_views[MESA_SHADER_MESH_STAGES][ZINK_MAX_SHADER_IMAGES]; uint32_t transient_attachments; struct pipe_framebuffer_state fb_state; @@ -1747,9 +1764,9 @@ struct zink_context { struct zink_depth_stencil_alpha_state *dsa_state; bool has_swapchain; - bool pipeline_changed[ZINK_PIPELINE_MAX]; //gfx, compute + bool pipeline_changed[ZINK_PIPELINE_MAX]; //gfx, compute, mesh - struct zink_shader *gfx_stages[ZINK_GFX_SHADER_COUNT]; + struct zink_shader *gfx_stages[MESA_SHADER_MESH_STAGES]; struct zink_shader *last_vertex_stage; bool shader_reads_drawid; bool shader_reads_basevertex; @@ -1764,13 +1781,20 @@ struct zink_context { struct set gfx_inputs; struct set gfx_outputs; + /* the only possibilities are [no-task][task] */ + struct hash_table mesh_cache[2]; + simple_mtx_t mesh_lock[2]; + uint32_t mesh_hash; + struct zink_gfx_program *mesh_program; + struct zink_descriptor_data dd; struct zink_compute_pipeline_state compute_pipeline_state; struct zink_compute_program *curr_compute; - unsigned shader_stages : ZINK_GFX_SHADER_COUNT; /* mask of bound gfx shader stages */ + uint8_t shader_stages; /* mask of bound gfx shader stages */ uint8_t dirty_gfx_stages; /* mask of changed gfx shader stages */ + uint8_t dirty_mesh_stages; /* mask of changed mesh shader stages */ bool last_vertex_stage_dirty; bool compute_dirty; bool is_generated_gs_bound; @@ -1804,8 +1828,8 @@ struct zink_context { struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; bool vertex_buffers_dirty; - struct zink_sampler_state *sampler_states[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS]; - struct pipe_sampler_view *sampler_views[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS]; + struct zink_sampler_state *sampler_states[MESA_SHADER_MESH_STAGES][PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view *sampler_views[MESA_SHADER_MESH_STAGES][PIPE_MAX_SAMPLERS]; struct zink_viewport_state vp_state; bool vp_state_changed; @@ -1861,32 +1885,32 @@ struct zink_context { struct { /* descriptor info */ - uint8_t num_ubos[MESA_SHADER_STAGES]; + uint8_t num_ubos[MESA_SHADER_MESH_STAGES]; - uint8_t num_ssbos[MESA_SHADER_STAGES]; + uint8_t num_ssbos[MESA_SHADER_MESH_STAGES]; struct util_dynarray global_bindings; - VkDescriptorImageInfo textures[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS]; - uint32_t emulate_nonseamless[MESA_SHADER_STAGES]; - uint32_t cubes[MESA_SHADER_STAGES]; - uint8_t num_samplers[MESA_SHADER_STAGES]; - uint8_t num_sampler_views[MESA_SHADER_STAGES]; + VkDescriptorImageInfo textures[MESA_SHADER_MESH_STAGES][PIPE_MAX_SAMPLERS]; + uint32_t emulate_nonseamless[MESA_SHADER_MESH_STAGES]; + uint32_t cubes[MESA_SHADER_MESH_STAGES]; + uint8_t num_samplers[MESA_SHADER_MESH_STAGES]; + uint8_t num_sampler_views[MESA_SHADER_MESH_STAGES]; - VkDescriptorImageInfo images[MESA_SHADER_STAGES][ZINK_MAX_SHADER_IMAGES]; - uint8_t num_images[MESA_SHADER_STAGES]; + VkDescriptorImageInfo images[MESA_SHADER_MESH_STAGES][ZINK_MAX_SHADER_IMAGES]; + uint8_t num_images[MESA_SHADER_MESH_STAGES]; union { struct { - VkDescriptorBufferInfo ubos[MESA_SHADER_STAGES][PIPE_MAX_CONSTANT_BUFFERS]; - VkDescriptorBufferInfo ssbos[MESA_SHADER_STAGES][PIPE_MAX_SHADER_BUFFERS]; - VkBufferView tbos[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS]; - VkBufferView texel_images[MESA_SHADER_STAGES][ZINK_MAX_SHADER_IMAGES]; + VkDescriptorBufferInfo ubos[MESA_SHADER_MESH_STAGES][PIPE_MAX_CONSTANT_BUFFERS]; + VkDescriptorBufferInfo ssbos[MESA_SHADER_MESH_STAGES][PIPE_MAX_SHADER_BUFFERS]; + VkBufferView tbos[MESA_SHADER_MESH_STAGES][PIPE_MAX_SAMPLERS]; + VkBufferView texel_images[MESA_SHADER_MESH_STAGES][ZINK_MAX_SHADER_IMAGES]; } t; struct { - VkDescriptorAddressInfoEXT ubos[MESA_SHADER_STAGES][PIPE_MAX_CONSTANT_BUFFERS]; - VkDescriptorAddressInfoEXT ssbos[MESA_SHADER_STAGES][PIPE_MAX_SHADER_BUFFERS]; - VkDescriptorAddressInfoEXT tbos[MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS]; - VkDescriptorAddressInfoEXT texel_images[MESA_SHADER_STAGES][ZINK_MAX_SHADER_IMAGES]; + VkDescriptorAddressInfoEXT ubos[MESA_SHADER_MESH_STAGES][PIPE_MAX_CONSTANT_BUFFERS]; + VkDescriptorAddressInfoEXT ssbos[MESA_SHADER_MESH_STAGES][PIPE_MAX_SHADER_BUFFERS]; + VkDescriptorAddressInfoEXT tbos[MESA_SHADER_MESH_STAGES][PIPE_MAX_SAMPLERS]; + VkDescriptorAddressInfoEXT texel_images[MESA_SHADER_MESH_STAGES][ZINK_MAX_SHADER_IMAGES]; } db; }; @@ -1894,9 +1918,9 @@ struct zink_context { uint8_t fbfetch_db[ZINK_FBFETCH_DESCRIPTOR_SIZE]; /* the current state of the zs swizzle data */ - struct zink_zs_swizzle_key zs_swizzle[MESA_SHADER_STAGES]; + struct zink_zs_swizzle_key zs_swizzle[MESA_SHADER_MESH_STAGES]; - struct zink_resource *descriptor_res[ZINK_DESCRIPTOR_BASE_TYPES][MESA_SHADER_STAGES][PIPE_MAX_SAMPLERS]; + struct zink_resource *descriptor_res[ZINK_DESCRIPTOR_BASE_TYPES][MESA_SHADER_MESH_STAGES][PIPE_MAX_SAMPLERS]; struct { struct util_idalloc tex_slots; //img, buffer @@ -1923,6 +1947,7 @@ struct zink_context { bool null_fbfetch_init; } di; void (*invalidate_descriptor_state)(struct zink_context *ctx, mesa_shader_stage shader, enum zink_descriptor_type type, unsigned, unsigned); + /* "gfx" includes mesh */ struct set *need_barriers[2]; //gfx, compute struct set update_barriers[2][2]; //[gfx, compute][current, next] uint8_t barrier_set_idx[2]; @@ -1939,6 +1964,7 @@ struct zink_context { bool dirty_so_targets; bool gfx_dirty; + bool mesh_dirty; bool shobj_draw : 1; //using shader objects for draw bool is_device_lost;