diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 50414a51037..75a16f41545 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -3879,9 +3879,25 @@ remove_interpolate_at_sample(struct nir_builder *b, nir_intrinsic_instr *interp, return true; } +static void +zink_optimized_st_emulation_passes(nir_shader *nir, struct zink_shader *zs, + const struct zink_st_variant_key *key) +{ + if (!nir->info.io_lowered) + return; +} + +static void +zink_emulation_passes(nir_shader *nir, struct zink_shader *zs) +{ + if (!nir->info.io_lowered) + return; +} + struct zink_shader_object -zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, - nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg) +zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, nir_shader *nir, + const struct zink_shader_key *key, const struct zink_st_variant_key *st_key, + bool compile_uber, const void *extra_data, struct zink_program *pg) { bool need_optimize = true; bool inlined_uniforms = false; @@ -3891,8 +3907,18 @@ zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shad NIR_PASS(_, nir, nir_lower_sample_shading); } + if (compile_uber) { + zink_emulation_passes(nir, zs); + need_optimize = true; + } + else if (st_key) { + zink_optimized_st_emulation_passes(nir, zs, st_key); + need_optimize = true; + } + NIR_PASS(_, nir, add_derefs); NIR_PASS(_, nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8); + if (key) { if (key->inline_uniforms) { NIR_PASS(_, nir, nir_inline_uniforms, @@ -4077,7 +4103,7 @@ zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shad } struct zink_shader_object -zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs) +zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs, bool compile_uber) { nir_shader *nir = zs->nir; /* TODO: maybe compile multiple variants for different set counts for compact mode? */ @@ -4107,6 +4133,10 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs) default: break; } } + + if (compile_uber) + zink_emulation_passes(nir, zs); + NIR_PASS(_, nir, add_derefs); NIR_PASS(_, nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8); if (screen->driconf.inline_uniforms) { @@ -4114,6 +4144,7 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs) NIR_PASS(_, nir, rewrite_bo_access, screen); NIR_PASS(_, nir, remove_bo_access, zs); } + optimize_nir(nir, zs, true); zink_descriptor_shader_init(screen, zs); nir_shader *nir_clone = NULL; @@ -4128,7 +4159,7 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs) zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, 32); zink_shader_tcs_init(screen, zs->non_fs.generated_tcs, nir_clone, &nir_tcs); nir_tcs->info.separate_shader = true; - zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs); + zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs, compile_uber); ralloc_free(nir_tcs); zs->non_fs.generated_tcs->nir = NULL; } @@ -6448,11 +6479,13 @@ gfx_shader_prune(struct zink_screen *screen, struct zink_shader *shader) prog->base.removed = true; simple_mtx_unlock(lock); - for (int i = 0; i < ARRAY_SIZE(prog->pipelines); ++i) { - hash_table_foreach(&prog->pipelines[i], table_entry) { - struct zink_gfx_pipeline_cache_entry *pc_entry = table_entry->data; + for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) { + for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { + hash_table_foreach(&prog->pipelines[r][i], table_entry) { + struct zink_gfx_pipeline_cache_entry *pc_entry = table_entry->data; - util_queue_fence_wait(&pc_entry->fence); + util_queue_fence_wait(&pc_entry->fence); + } } } } @@ -6468,7 +6501,10 @@ gfx_shader_prune(struct zink_screen *screen, struct zink_shader *shader) prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent == shader) { prog->shaders[MESA_SHADER_GEOMETRY] = NULL; } - zink_gfx_program_reference(screen, &prog, NULL); + + /* variant programs are owned and destroyed by their parent */ + if (!prog->is_variant_program) + zink_gfx_program_reference(screen, &prog, NULL); return true; } diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index bec8fae913e..0c33a493b38 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -63,9 +63,11 @@ void zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer); /* pass very large shader key data with extra_data */ struct zink_shader_object -zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg); +zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, nir_shader *nir, + const struct zink_shader_key *key, const struct zink_st_variant_key *st_key, + bool compile_uber, const void *extra_data, struct zink_program *pg); struct zink_shader_object -zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs); +zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs, bool compile_uber); struct zink_shader * zink_shader_create(struct zink_screen *screen, struct nir_shader *nir); void diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index 889699d80f2..413d46de30a 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -3721,8 +3721,8 @@ zink_update_descriptor_refs(struct zink_context *ctx, bool compute) res->obj->unordered_read = false; } } - if (ctx->curr_program) - zink_batch_reference_program(ctx, &ctx->curr_program->base); + if (ctx->curr_program_uber || ctx->curr_program) + zink_batch_reference_program(ctx, &ctx->curr_program_uber->base); } if (ctx->di.bindless_refs_dirty) { ctx->di.bindless_refs_dirty = false; diff --git a/src/gallium/drivers/zink/zink_draw.cpp b/src/gallium/drivers/zink/zink_draw.cpp index 7476d19edbb..a28024f4d67 100644 --- a/src/gallium/drivers/zink/zink_draw.cpp +++ b/src/gallium/drivers/zink/zink_draw.cpp @@ -265,11 +265,11 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum zink_gfx_program_update(ctx); bool pipeline_changed = false; VkPipeline pipeline = VK_NULL_HANDLE; - if (!ctx->curr_program->base.uses_shobj) { + if (!(ctx->gfx_pipeline_state.uber_required ? ctx->curr_program_uber : ctx->curr_program)->base.uses_shobj) { if (screen->info.have_EXT_graphics_pipeline_library) - pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode); + pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program_uber, ctx->curr_program, &ctx->gfx_pipeline_state, mode); else - pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode); + pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, ctx->curr_program, &ctx->gfx_pipeline_state, mode); assert(pipeline); pipeline_changed = prev_pipeline != pipeline || ctx->shobj_draw; if (BATCH_CHANGED || pipeline_changed) @@ -285,7 +285,8 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum VK_SHADER_STAGE_FRAGMENT_BIT, }; /* always rebind all stages */ - VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects); + VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, + ctx->gfx_pipeline_state.uber_required ? ctx->curr_program_uber->objects : ctx->curr_program->objects); if (screen->info.have_EXT_mesh_shader) { /* must always unbind mesh stages */ VkShaderStageFlagBits mesh_stages[] = { @@ -994,9 +995,9 @@ update_mesh_pipeline(struct zink_context *ctx, struct zink_batch_state *bs) VkPipeline pipeline = VK_NULL_HANDLE; if (!ctx->mesh_program->base.uses_shobj) { if (screen->info.have_EXT_graphics_pipeline_library) - pipeline = zink_get_gfx_pipeline(ctx, ctx->mesh_program, &ctx->gfx_pipeline_state, MESA_PRIM_COUNT); + pipeline = zink_get_gfx_pipeline(ctx, ctx->mesh_program, ctx->mesh_program, &ctx->gfx_pipeline_state, MESA_PRIM_COUNT); else - pipeline = zink_get_gfx_pipeline(ctx, ctx->mesh_program, &ctx->gfx_pipeline_state, MESA_PRIM_COUNT); + pipeline = zink_get_gfx_pipeline(ctx, ctx->mesh_program, ctx->mesh_program, &ctx->gfx_pipeline_state, MESA_PRIM_COUNT); assert(pipeline); pipeline_changed = prev_pipeline != pipeline || ctx->shobj_draw; if (BATCH_CHANGED || pipeline_changed) diff --git a/src/gallium/drivers/zink/zink_pipeline.c b/src/gallium/drivers/zink/zink_pipeline.c index a5363a98481..1114bf1186e 100644 --- a/src/gallium/drivers/zink/zink_pipeline.c +++ b/src/gallium/drivers/zink/zink_pipeline.c @@ -884,10 +884,10 @@ create_gfx_pipeline_library(struct zink_screen *screen, struct zink_shader_objec } VkPipeline -zink_create_gfx_pipeline_library(struct zink_screen *screen, struct zink_gfx_program *prog) +zink_create_gfx_pipeline_library(struct zink_screen *screen, struct zink_shader_object *objs, struct zink_gfx_program *prog) { u_rwlock_wrlock(&prog->base.pipeline_cache_lock); - VkPipeline pipeline = create_gfx_pipeline_library(screen, prog->objs, prog->stages_present, prog->base.layout, prog->base.pipeline_cache); + VkPipeline pipeline = create_gfx_pipeline_library(screen, objs, prog->stages_present, prog->base.layout, prog->base.pipeline_cache); u_rwlock_wrunlock(&prog->base.pipeline_cache_lock); return pipeline; } diff --git a/src/gallium/drivers/zink/zink_pipeline.h b/src/gallium/drivers/zink/zink_pipeline.h index 7b050f15efb..aa66cf0dc21 100644 --- a/src/gallium/drivers/zink/zink_pipeline.h +++ b/src/gallium/drivers/zink/zink_pipeline.h @@ -58,7 +58,7 @@ zink_create_gfx_pipeline_input(struct zink_screen *screen, const uint8_t *binding_map, VkPrimitiveTopology primitive_topology); VkPipeline -zink_create_gfx_pipeline_library(struct zink_screen *screen, struct zink_gfx_program *prog); +zink_create_gfx_pipeline_library(struct zink_screen *screen, struct zink_shader_object *objs, struct zink_gfx_program *prog); VkPipeline zink_create_gfx_pipeline_output(struct zink_screen *screen, struct zink_gfx_pipeline_state *state); VkPipeline diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index 00b1ee702a5..e63f4735a73 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -41,6 +41,7 @@ #include "nir_serialize.h" #include "nir.h" #include "nir/nir_draw_helpers.h" +#include "util/u_queue.h" /* for pipeline cache */ #define XXH_INLINE_ALL @@ -48,9 +49,34 @@ static void gfx_program_precompile_job(void *data, void *gdata, int thread_index); +static void +precompile_variant_job(void *data, void *gdata, int thread_index); +static void +precompile_variant_separate_shader_job(void *data, void *gdata, int thread_index); struct zink_gfx_program * create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch, bool is_mesh); +struct precompile_variant_data { + struct zink_gfx_program *prog; + struct zink_gfx_pipeline_state state; +}; + +struct precompile_separate_variant_data { + struct zink_program *prog; + struct zink_shader_module *zm; + struct zink_shader *zs; + struct blob *blob; + bool uses_shobj; + struct zink_shader_key key; + struct zink_st_variant_key st_key; + bool has_key; +}; + +struct program_variant_key { + uint32_t key, st_key; + struct zink_gfx_program *prog; +}; + void debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr) { @@ -69,9 +95,9 @@ shader_key_matches_tcs_nongenerated(const struct zink_shader_module *zm, const s if (zm->num_uniforms != num_uniforms || zm->has_nonseamless != !!key->base.nonseamless_cube_mask || zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle) return false; - const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0; - return (!nonseamless_size || !memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)) && - (!num_uniforms || !memcmp(zm->key + zm->key_size + nonseamless_size, + const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(union zink_st_small_key) : 0; + return (!nonseamless_size || !memcmp(zm->key + zm->key_size + sizeof(union zink_st_small_key), &key->base.nonseamless_cube_mask, nonseamless_size)) && + (!num_uniforms || !memcmp(zm->key + zm->key_size + sizeof(union zink_st_small_key) + nonseamless_size, key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))); } @@ -84,13 +110,13 @@ shader_key_matches(const struct zink_shader_module *zm, if (has_inline) { if (zm->num_uniforms != num_uniforms || (num_uniforms && - memcmp(zm->key + zm->key_size + nonseamless_size, + memcmp(zm->key + zm->key_size + sizeof(union zink_st_small_key) + nonseamless_size, key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)))) return false; } if (!has_nonseamless) { if (zm->has_nonseamless != !!key->base.nonseamless_cube_mask || - (nonseamless_size && memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size))) + (nonseamless_size && memcmp(zm->key + zm->key_size + sizeof(union zink_st_small_key), &key->base.nonseamless_cube_mask, nonseamless_size))) return false; } if (zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle) @@ -142,18 +168,19 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; const bool shadow_needs_shader_swizzle = key->base.needs_zs_shader_swizzle || (stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle); - zm = malloc(sizeof(struct zink_shader_module) + key->size + + zm = malloc(sizeof(struct zink_shader_module) + sizeof(union zink_st_small_key) + key->size + (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) + (shadow_needs_shader_swizzle ? sizeof(struct zink_zs_swizzle_key) : 0)); if (!zm) { return NULL; } + util_queue_fence_init(&zm->fence); unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs.patch_vertices; if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) { assert(ctx); //TODO async zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base); } else { - zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage], &prog->base); + zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &state->st_key, false, &ctx->di.zs_swizzle[stage], &prog->base); } if (!zm->obj.mod) { FREE(zm); @@ -168,20 +195,22 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr zm->key_size = 0; memset(zm->key, 0, key->size); } + uint16_t st_val = state->st_key.small_key.val; + memcpy(zm->key + key->size, &st_val, sizeof(st_val)); if (!has_nonseamless && nonseamless_size) { /* nonseamless mask gets added to base key if it exists */ - memcpy(zm->key + key->size, &key->base.nonseamless_cube_mask, nonseamless_size); + memcpy(zm->key + key->size + sizeof(st_val), &key->base.nonseamless_cube_mask, nonseamless_size); } zm->needs_zs_shader_swizzle = shadow_needs_shader_swizzle; zm->has_nonseamless = has_nonseamless ? 0 : !!nonseamless_size; if (inline_size) - memcpy(zm->key + key->size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t)); + memcpy(zm->key + key->size + sizeof(st_val) + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t)); if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) zm->hash = patch_vertices; else zm->hash = shader_module_hash(zm); if (unlikely(shadow_needs_shader_swizzle)) { - memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)); + memcpy(zm->key + key->size + sizeof(st_val) + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)); zm->hash ^= _mesa_hash_data(&ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)); } zm->default_variant = !shadow_needs_shader_swizzle && !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*); @@ -219,9 +248,12 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen continue; if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless)) continue; + uint16_t st_val = state->st_key.small_key.val; + if (memcmp(iter->key + iter->key_size, &st_val, sizeof(st_val))) + continue; if (unlikely(shadow_needs_shader_swizzle)) { /* shadow swizzle data needs a manual compare since it's so fat */ - if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t), + if (memcmp(iter->key + iter->key_size + sizeof(st_val) + nonseamless_size + iter->num_uniforms * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key))) continue; } @@ -241,7 +273,8 @@ ALWAYS_INLINE static struct zink_shader_module * create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_shader *zs, struct zink_gfx_program *prog, mesa_shader_stage stage, - struct zink_gfx_pipeline_state *state) + struct zink_gfx_pipeline_state *state, + bool unpopulated, bool compile_uber) { struct zink_shader_module *zm; uint16_t *key; @@ -258,23 +291,30 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr key = NULL; } size_t key_size = sizeof(uint16_t); - zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_zs_swizzle_key) : 0)); + zm = calloc(1, sizeof(struct zink_shader_module) + + sizeof(union zink_st_small_key) + + (key ? key_size : 0) + + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_zs_swizzle_key) : 0)); if (!zm) { return NULL; } - if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) { - assert(ctx || screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints); - unsigned patch_vertices = 3; - if (ctx) { - struct zink_tcs_key *tcs = (struct zink_tcs_key*)key; - patch_vertices = tcs->patch_vertices; - } - zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base); - } else { - zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), - (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL, &prog->base); + util_queue_fence_init(&zm->fence); + if (!unpopulated) { + if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) { + assert(ctx || screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints); + unsigned patch_vertices = 3; + if (ctx) { + struct zink_tcs_key *tcs = (struct zink_tcs_key*)key; + patch_vertices = tcs->patch_vertices; + } + zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base); + } else { + zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), + (struct zink_shader_key*)key, &state->st_key, compile_uber, + shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL, &prog->base); + } } - if (!zm->obj.mod) { + if (!zm->obj.mod && !unpopulated) { FREE(zm); return NULL; } @@ -288,9 +328,17 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr *data = (*key) & mask; if (unlikely(shadow_needs_shader_swizzle)) memcpy(&data[1], &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)); + uint16_t st_val = state->st_key.small_key.val; + uint8_t *p = (uint8_t*)&data[1]; + if (unlikely(shadow_needs_shader_swizzle)) + p += sizeof(struct zink_zs_swizzle_key); + memcpy(p, &st_val, sizeof(st_val)); } zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*); - util_dynarray_append(&prog->shader_cache[stage][0][0], zm); + if (!compile_uber) + util_dynarray_append(&prog->shader_cache[stage][0][0], zm); + else + util_dynarray_append(&prog->uber_modules, zm); return zm; } @@ -298,7 +346,7 @@ ALWAYS_INLINE static struct zink_shader_module * get_shader_module_for_stage_optimal_key(struct zink_context *ctx, struct zink_screen *screen, struct zink_shader *zs, struct zink_gfx_program *prog, mesa_shader_stage stage, - struct zink_gfx_pipeline_state *state, uint16_t *key) + struct zink_gfx_pipeline_state *state, uint16_t *key, uint16_t *st_key) { /* non-generated tcs won't use the shader key */ const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; @@ -324,6 +372,12 @@ get_shader_module_for_stage_optimal_key(struct zink_context *ctx, struct zink_sc if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key))) continue; } + uint16_t st_val = *st_key; + uint8_t *p = iter->key + sizeof(union zink_st_small_key); + if (unlikely(shadow_needs_shader_swizzle)) + p += sizeof(struct zink_zs_swizzle_key); + if (memcmp(p, &st_val, sizeof(st_val))) + continue; } if (i > 0) { struct zink_shader_module *zero = pzm[0]; @@ -360,16 +414,18 @@ get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen mesa_shader_stage stage, struct zink_gfx_pipeline_state *state) { - uint16_t *key; + uint16_t *key, st_key; key = get_shader_module_optimal_key(ctx, prog, zs, stage); + st_key = state->st_key.small_key.val; - return get_shader_module_for_stage_optimal_key(ctx, screen, zs, prog, stage, state, key); + return get_shader_module_for_stage_optimal_key(ctx, screen, zs, prog, stage, state, key, &st_key); } static void zink_destroy_shader_module(struct zink_screen *screen, struct zink_shader_module *zm) { + util_queue_fence_wait(&zm->fence); if (zm->shobj) VKSCR(DestroyShaderEXT)(screen->dev, zm->obj.obj, NULL); else @@ -480,7 +536,7 @@ generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *scree } static void -generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state) +generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state, bool compile_uber) { assert(!prog->objs[MESA_SHADER_VERTEX].mod && !prog->objs[MESA_SHADER_MESH].mod); for (unsigned i = 0; i < MESA_SHADER_MESH_STAGES; i++) { @@ -489,7 +545,7 @@ generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_scree assert(prog->shaders[i]); - struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state); + struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state, false, compile_uber); prog->objs[i] = zm->obj; prog->objects[i] = zm->obj.obj; } @@ -498,21 +554,11 @@ generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_scree state->modules_changed = true; else state->mesh_modules_changed = true; - prog->last_variant_hash = prog->shaders[MESA_SHADER_MESH] ? state->mesh_optimal_key : state->optimal_key; -} -static uint32_t -hash_pipeline_lib_generated_tcs(const void *key) -{ - const struct zink_gfx_library_key *gkey = key; - return gkey->optimal_key; -} - - -static bool -equals_pipeline_lib_generated_tcs(const void *a, const void *b) -{ - return !memcmp(a, b, sizeof(uint32_t)); + if (!compile_uber) { + prog->last_variant_hash = prog->shaders[MESA_SHADER_MESH] ? state->mesh_optimal_key : state->optimal_key; + prog->st_key = state->st_key.small_key.val; + } } static uint32_t @@ -530,25 +576,6 @@ equals_pipeline_lib_mesh(const void *a, const void *b) return ak->optimal_key == bk->optimal_key; } -static uint32_t -hash_pipeline_lib(const void *key) -{ - const struct zink_gfx_library_key *gkey = key; - /* remove generated tcs bits */ - return zink_shader_key_optimal_no_tcs(gkey->optimal_key); -} - -static bool -equals_pipeline_lib(const void *a, const void *b) -{ - const struct zink_gfx_library_key *ak = a; - const struct zink_gfx_library_key *bk = b; - /* remove generated tcs bits */ - uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key); - uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key); - return val_a == val_b; -} - uint32_t hash_gfx_input_dynamic(const void *key) { @@ -673,7 +700,7 @@ zink_gfx_program_update(struct zink_context *ctx) update_gfx_program(ctx, prog); } else { ctx->dirty_gfx_stages |= ctx->shader_stages; - prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash, false); + prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash, false, false); zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false); _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); prog->base.removed = false; @@ -682,7 +709,8 @@ zink_gfx_program_update(struct zink_context *ctx) simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); if (prog && prog != ctx->curr_program) zink_batch_reference_program(ctx, &prog->base); - ctx->curr_program = prog; + ctx->curr_program_uber = ctx->curr_program = prog; + ctx->gfx_pipeline_state.uber_required = false; ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; ctx->gfx_dirty = false; } else if (ctx->dirty_gfx_stages) { @@ -695,41 +723,72 @@ zink_gfx_program_update(struct zink_context *ctx) ctx->dirty_gfx_stages = 0; } -ALWAYS_INLINE static bool -update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, mesa_shader_stage pstage) +ALWAYS_INLINE static void +gfx_program_cache_populate_queue(struct zink_context *ctx, struct zink_gfx_program *prog, mesa_shader_stage pstage, struct zink_shader_module *zm) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (screen->info.have_EXT_graphics_pipeline_library) + util_queue_fence_wait(&prog->base.cache_fence); + struct precompile_separate_variant_data *data = CALLOC_STRUCT(precompile_separate_variant_data); + data->prog = &prog->base; + data->zs = prog->shaders[pstage]; + data->blob = &prog->blobs[pstage]; + data->uses_shobj = prog->base.uses_shobj; + data->zm = zm; + struct zink_shader_key* keyp = (struct zink_shader_key*)get_shader_module_optimal_key(ctx, prog, data->zs, pstage); + if (keyp) + data->key = *keyp; + data->has_key = !!keyp; + data->st_key = ctx->gfx_pipeline_state.st_key; + if (zink_debug & ZINK_DEBUG_NOBGC) { + precompile_variant_separate_shader_job(data, screen, 0); + } else { + util_queue_add_job(&screen->cache_get_thread, data, &zm->fence, precompile_variant_separate_shader_job, NULL, 0); + } +} + +ALWAYS_INLINE static struct zink_shader_module * +update_or_queue_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, mesa_shader_stage pstage, bool async) { struct zink_screen *screen = zink_screen(ctx->base.screen); if (screen->info.have_EXT_graphics_pipeline_library) util_queue_fence_wait(&prog->base.cache_fence); struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state); + bool entry_found = !!zm; + bool async_done = zm && util_queue_fence_is_signalled(&zm->fence); if (!zm) { - zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state); + zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state, async, false); perf_debug(ctx, "zink[gfx_compile]: %s shader variant required\n", _mesa_shader_stage_to_string(pstage)); } - - bool changed = prog->objs[pstage].mod != zm->obj.mod; - prog->objs[pstage] = zm->obj; - prog->objects[pstage] = zm->obj.obj; - return changed; + if (!async || async_done) { + return zm; + } else { + if (!entry_found) + gfx_program_cache_populate_queue(ctx, prog, pstage, zm); + } + return NULL; } -static void -update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog) +static bool +update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, struct zink_gfx_program *variant_prog, bool async) { + bool async_done = true; + struct zink_shader_module *zms[3] = {0}; const union zink_shader_key_optimal *key = (union zink_shader_key_optimal*)&ctx->gfx_pipeline_state.optimal_key; const union zink_shader_key_optimal *last_prog_key = (union zink_shader_key_optimal*)&prog->last_variant_hash; - if (key->vs_bits != last_prog_key->vs_bits) { - assert(!prog->is_separable); - bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->info.stage); - ctx->gfx_pipeline_state.modules_changed |= changed; + bool st_key_diff = ctx->gfx_pipeline_state.st_key.small_key.val != prog->st_key; + if (st_key_diff || key->vs_bits != last_prog_key->vs_bits) { + assert(!variant_prog->is_separable); + zms[0] = update_or_queue_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->info.stage, async); + async_done &= !!zms[0]; } const bool shadow_needs_shader_swizzle = last_prog_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT)); - if (key->fs_bits != last_prog_key->fs_bits || + if (st_key_diff || key->fs_bits != last_prog_key->fs_bits || /* always recheck shadow swizzles since they aren't directly part of the key */ unlikely(shadow_needs_shader_swizzle)) { - assert(!prog->is_separable); - bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT); - ctx->gfx_pipeline_state.modules_changed |= changed; + assert(!variant_prog->is_separable); + zms[1] = update_or_queue_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT, async); + async_done &= !!zms[1]; if (unlikely(shadow_needs_shader_swizzle)) { struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data; ctx->gfx_pipeline_state.shadow = (struct zink_zs_swizzle_key*)pzm[0]->key + sizeof(uint16_t); @@ -737,11 +796,77 @@ update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *pr } if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated && key->tcs_bits != last_prog_key->tcs_bits) { - assert(!prog->is_separable); - bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL); - ctx->gfx_pipeline_state.modules_changed |= changed; + assert(!variant_prog->is_separable); + zms[2] = update_or_queue_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL, async); + async_done &= !!zms[2]; } - prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key; + mesa_shader_stage stages[] = {ctx->last_vertex_stage->info.stage, MESA_SHADER_FRAGMENT, MESA_SHADER_TESS_CTRL}; + if (async_done) { + for (int i = 0;i < 3; i++) { + if (!zms[i]) + continue; + variant_prog->objs[stages[i]] = zms[i]->obj; + variant_prog->objects[stages[i]] = zms[i]->obj.obj; + } + variant_prog->last_variant_hash = prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key; + variant_prog->st_key = prog->st_key = ctx->gfx_pipeline_state.st_key.small_key.val; + } + return async_done; +} + +static bool +update_gfx_program_missing_shaders(struct zink_context *ctx, struct zink_gfx_program *prog, + struct zink_gfx_program *variant_prog, bool async) +{ + bool async_done = true; + for (int rstage = 0; rstage < MESA_SHADER_COMPUTE; rstage++) { + assert(!!variant_prog->shaders[rstage] == !!prog->shaders[rstage]); + if (variant_prog->shaders[rstage] && !variant_prog->objs[rstage].mod) { + assert(!variant_prog->is_separable); + struct zink_shader_module *mod = update_or_queue_gfx_shader_module_optimal(ctx, prog, rstage, async); + async_done &= !!mod; + if (mod) { + bool changed = variant_prog->objs[rstage].mod != mod->obj.mod; + variant_prog->objs[rstage] = mod->obj; + variant_prog->objects[rstage] = mod->obj.obj; + ctx->gfx_pipeline_state.modules_changed |= changed; + } + } + } + return async_done; +} + +static void +copy_gfx_program_missing_shaders(struct zink_context *ctx, struct zink_gfx_program *base_prog, + struct zink_gfx_program *variant_prog) +{ + for (int rstage = 0; rstage < MESA_SHADER_COMPUTE; rstage++) { + assert(!!variant_prog->shaders[rstage] == !!base_prog->shaders[rstage]); + if (variant_prog->shaders[rstage] && !variant_prog->objs[rstage].mod) { + bool changed = variant_prog->objs[rstage].mod != base_prog->objs[rstage].mod; + variant_prog->objs[rstage] = base_prog->objs[rstage]; + variant_prog->objects[rstage] = base_prog->objects[rstage]; + ctx->gfx_pipeline_state.modules_changed |= changed; + } + } +} + +ALWAYS_INLINE static bool +update_gfx_shader_module_mesh(struct zink_context *ctx, struct zink_gfx_program *prog, mesa_shader_stage pstage) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + if (screen->info.have_EXT_graphics_pipeline_library) + util_queue_fence_wait(&prog->base.cache_fence); + struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state); + if (!zm) { + zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state, false, false); + perf_debug(ctx, "zink[gfx_compile]: %s shader variant required\n", _mesa_shader_stage_to_string(pstage)); + } + + bool changed = prog->objs[pstage].mod != zm->obj.mod; + prog->objs[pstage] = zm->obj; + prog->objects[pstage] = zm->obj.obj; + return changed; } static void @@ -754,7 +879,7 @@ update_mesh_program_optimal(struct zink_context *ctx, struct zink_gfx_program *p /* always recheck shadow swizzles since they aren't directly part of the key */ unlikely(shadow_needs_shader_swizzle)) { assert(!prog->is_separable); - bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT); + bool changed = update_gfx_shader_module_mesh(ctx, prog, MESA_SHADER_FRAGMENT); ctx->gfx_pipeline_state.modules_changed |= changed; if (unlikely(shadow_needs_shader_swizzle)) { struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data; @@ -771,7 +896,7 @@ replace_separable_prog(struct zink_context *ctx, struct hash_entry *entry, struc struct zink_gfx_program *real = prog->full_prog ? prog->full_prog : /* this will be NULL with ZINK_DEBUG_NOOPT */ - zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash, false); + zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash, false, false); entry->data = real; entry->key = real->shaders; real->base.removed = false; @@ -780,12 +905,116 @@ replace_separable_prog(struct zink_context *ctx, struct hash_entry *entry, struc return real; } +static uint32_t +hash_gfx_program(const void *key) +{ + const uint32_t *k = key; + + return XXH32(k, sizeof(uint32_t[2]), 0); +} + +static bool +equals_program_variant(const void *a, const void *b) +{ + const struct program_variant_key *ak = a; + const struct program_variant_key *bk = b; + uint32_t val_a = ak->key; + uint32_t val_b = bk->key; + uint32_t val_a_st = ak->st_key; + uint32_t val_b_st = bk->st_key; + return val_a == val_b && val_a_st == val_b_st; +} + +#define CURR_KEY_PROGRAM(ctx) (ctx->gfx_pipeline_state.uber_required ? ctx->curr_program_uber: ctx->curr_program) + +static void +async_variant_program_update(struct zink_context *ctx, bool can_use_uber, bool needs_emulation) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + bool needs_uber = false; + if (!ctx->curr_program_uber->is_separable && (!ctx->curr_program_uber->base_variant || needs_emulation)) { + struct program_variant_key prog_variant_key = {0}; + prog_variant_key.key = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;//ctx->gfx_pipeline_state.optimal_key; + prog_variant_key.st_key = ctx->gfx_pipeline_state.st_key.small_key.val; + struct set_entry * variant_entry = _mesa_set_search(&ctx->curr_program_uber->variants, &prog_variant_key); + struct zink_gfx_program *variant; + if (!variant_entry) { + variant = zink_create_gfx_program(ctx, ctx->gfx_stages, 0, ctx->curr_program_uber->gfx_hash, false, true); + variant->base.uses_shobj = ctx->curr_program_uber->base.uses_shobj; + util_queue_fence_init(&variant->base.cache_fence); + struct program_variant_key *prog_variant_key_p = MALLOC(sizeof(struct program_variant_key)); + memcpy(prog_variant_key_p, &prog_variant_key, sizeof(struct program_variant_key)); + prog_variant_key_p->prog = variant; + variant->uber_variant = ctx->curr_program_uber; + _mesa_set_add(&ctx->curr_program_uber->variants, prog_variant_key_p); + needs_uber = true; + } else + variant = ((struct program_variant_key *)variant_entry->key)->prog; + /* fetches shader modules from cache and starts async compilation on a miss */ + bool async_done = update_gfx_program_optimal(ctx, ctx->curr_program_uber, variant, can_use_uber); + assert(can_use_uber || async_done); + if (async_done) { + if (ctx->curr_program_uber->base_variant) + copy_gfx_program_missing_shaders(ctx, ctx->curr_program_uber->base_variant, variant); + else + async_done = update_gfx_program_missing_shaders(ctx, ctx->curr_program_uber, variant, can_use_uber); + } + assert(can_use_uber || async_done); + needs_uber &= !async_done; + + if (async_done && !variant->started_compiling) { + /* Modules are ready but the program isn't. Start a job for it. */ + struct precompile_variant_data *data = CALLOC_STRUCT(precompile_variant_data); + data->prog = variant; + data->state = ctx->gfx_pipeline_state; + if (can_use_uber && !(zink_debug & ZINK_DEBUG_NOBGC)) + util_queue_add_job(&screen->cache_get_thread, data, &variant->base.cache_fence, precompile_variant_job, NULL, 0); + else + precompile_variant_job(data, screen, 0); + variant->started_compiling = true; + } + if (!can_use_uber) + util_queue_fence_wait(&variant->base.cache_fence); + bool variant_prog_ready = variant->started_compiling && + (!can_use_uber || util_queue_fence_is_signalled(&variant->base.cache_fence)); + assert(can_use_uber || variant_prog_ready); + if(variant_prog_ready) { + /* variant prog is ready, use it */ + if (ctx->curr_program != variant) { + ctx->gfx_pipeline_state.modules_changed = true; + ctx->curr_program = variant; + } + assert(async_done); + if (!needs_emulation) + ctx->curr_program_uber->base_variant = variant; + } + needs_uber |= !async_done || !variant_prog_ready; + } else if (ctx->curr_program_uber->base_variant && !needs_emulation) { + ctx->curr_program = ctx->curr_program_uber->base_variant; + ctx->curr_program_uber->last_variant_hash = ctx->curr_program->last_variant_hash; + ctx->curr_program_uber->st_key = ctx->curr_program->st_key; + needs_uber = false; + } else if (ctx->curr_program_uber->is_separable) { + assert(can_use_uber); + ctx->curr_program = ctx->curr_program_uber; + needs_uber = true; + } + if (ctx->gfx_pipeline_state.uber_required != needs_uber) { + ctx->gfx_pipeline_state.modules_changed = true; + ctx->gfx_pipeline_state.uber_required = needs_uber; + } + + if (needs_uber || !ctx->curr_program_uber) + ctx->curr_program = ctx->curr_program_uber; +} + void zink_gfx_program_update_optimal(struct zink_context *ctx) { MESA_TRACE_FUNC(); struct zink_screen *screen = zink_screen(ctx->base.screen); assert(!ctx->gfx_stages[MESA_SHADER_TESS_CTRL] || !ctx->gfx_stages[MESA_SHADER_TESS_CTRL]->non_fs.is_generated); + struct zink_gfx_program *old_prog = ctx->curr_program_uber; if (ctx->gfx_dirty) { struct zink_gfx_program *prog = NULL; ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); @@ -794,72 +1023,93 @@ zink_gfx_program_update_optimal(struct zink_context *ctx) simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages); - if (ctx->curr_program) - ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + if (CURR_KEY_PROGRAM(ctx)) { + ctx->gfx_pipeline_state.final_hash ^= CURR_KEY_PROGRAM(ctx)->last_variant_hash; + ctx->gfx_pipeline_state.final_hash ^= CURR_KEY_PROGRAM(ctx)->st_key; + } + bool needs_emulation = needs_st_emulation(ctx) || (ctx->gfx_pipeline_state.optimal_key != ZINK_SHADER_KEY_OPTIMAL_DEFAULT); + bool can_use_uber = zink_can_use_uber(ctx); if (entry) { prog = (struct zink_gfx_program*)entry->data; - bool must_replace = prog->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (prog->is_separable && !zink_can_use_pipeline_libs(ctx)); - if (prog->is_separable) { - /* shader variants can't be handled by separable programs: sync and compile */ - if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace) + if (prog->is_separable && !(zink_debug & ZINK_DEBUG_NOOPT)) { + /* if uber cannot be used we need to compile the variant synchrously, + * so we need the full prog: sync and compile */ + if (!can_use_uber) util_queue_fence_wait(&prog->base.cache_fence); /* If the optimized linked pipeline is done compiling, swap it into place. */ - if (util_queue_fence_is_signalled(&prog->base.cache_fence) && - /* but only if needed for ZINK_DEBUG=noopt */ - (!(zink_debug & ZINK_DEBUG_NOOPT) || !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace)) { + if (util_queue_fence_is_signalled(&prog->base.cache_fence)) { prog = replace_separable_prog(ctx, entry, prog); } - } else if (must_replace) { - /* this is a non-separable, incompatible prog which needs replacement */ - struct zink_gfx_program *real = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash, false); - generate_gfx_program_modules_optimal(ctx, screen, real, &ctx->gfx_pipeline_state); - entry->data = real; - entry->key = real->shaders; - real->base.removed = false; - prog->base.removed = true; - prog = real; - } else if (!prog->base.precompile_done) { - util_queue_fence_wait(&prog->base.cache_fence); } - update_gfx_program_optimal(ctx, prog); + ctx->curr_program_uber = prog; + async_variant_program_update(ctx, can_use_uber, needs_emulation); } else { ctx->dirty_gfx_stages |= ctx->shader_stages; prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, false); + ctx->gfx_pipeline_state.uber_required = true; prog->base.removed = false; _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); if (!prog->is_separable) { - zink_screen_get_pipeline_cache(screen, &prog->base, false); perf_debug(ctx, "zink[gfx_compile]: new program created (probably legacy GL features in use)\n"); - generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state); + prog->is_uber_program = true; + { + struct zink_gfx_pipeline_state state = {0}; + state.shader_keys_optimal.key.vs_base.last_vertex_stage = true; + state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard + state.optimal_key = state.shader_keys_optimal.key.val; + generate_gfx_program_modules_optimal(NULL, screen, prog, &state, prog->is_uber_program); + zink_screen_get_pipeline_cache(screen, &prog->base, true); + if (!prog->base.uses_shobj) { + simple_mtx_lock(&prog->libs->lock); + zink_create_pipeline_lib(screen, prog, &state, prog->is_uber_program); + simple_mtx_unlock(&prog->libs->lock); + } + zink_screen_update_pipeline_cache(screen, &prog->base, true); + } + if (needs_emulation && !can_use_uber) { + ctx->curr_program_uber = prog; + async_variant_program_update(ctx, can_use_uber, needs_emulation); + } } } simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); - if (prog && prog != ctx->curr_program) - zink_batch_reference_program(ctx, &prog->base); - ctx->curr_program = prog; - ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + ctx->curr_program_uber = prog; + if (ctx->gfx_pipeline_state.uber_required) + ctx->curr_program = prog; + if (ctx->curr_program_uber && ctx->curr_program_uber != old_prog) + { + assert(!ctx->curr_program_uber->is_variant_program); + zink_batch_reference_program(ctx, &ctx->curr_program_uber->base); + } + ctx->gfx_pipeline_state.final_hash ^= CURR_KEY_PROGRAM(ctx)->last_variant_hash; + ctx->gfx_pipeline_state.final_hash ^= CURR_KEY_PROGRAM(ctx)->st_key; } else if (ctx->dirty_gfx_stages) { /* remove old hash */ ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); - ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; - - bool must_replace = ctx->curr_program->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (ctx->curr_program->is_separable && !zink_can_use_pipeline_libs(ctx)); - if (must_replace || (ctx->curr_program->is_separable && !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key))) { - struct zink_gfx_program *prog = ctx->curr_program; - - util_queue_fence_wait(&prog->base.cache_fence); - /* shader variants can't be handled by separable programs: sync and compile */ - perf_debug(ctx, "zink[gfx_compile]: non-default shader variant required with separate shader object program\n"); - struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)]; - const uint32_t hash = ctx->gfx_hash; - simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); - struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages); - ctx->curr_program = replace_separable_prog(ctx, entry, prog); - simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); + ctx->gfx_pipeline_state.final_hash ^= CURR_KEY_PROGRAM(ctx)->last_variant_hash; + ctx->gfx_pipeline_state.final_hash ^= CURR_KEY_PROGRAM(ctx)->st_key; + bool needs_emulation = needs_st_emulation(ctx) || (ctx->gfx_pipeline_state.optimal_key != ZINK_SHADER_KEY_OPTIMAL_DEFAULT); + bool can_use_uber = zink_can_use_uber(ctx); + if (ctx->curr_program->is_separable && !(zink_debug & ZINK_DEBUG_NOOPT)) { + struct zink_gfx_program *prog = ctx->curr_program_uber; + if (needs_emulation || ctx->curr_program_uber->is_separable) { + if (!can_use_uber) + util_queue_fence_wait(&prog->base.cache_fence); + perf_debug(ctx, "zink[gfx_compile]: non-default shader variant required with separate shader object program\n"); + if (util_queue_fence_is_signalled(&prog->base.cache_fence)) { + struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)]; + const uint32_t hash = ctx->gfx_hash; + simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages); + ctx->curr_program_uber = replace_separable_prog(ctx, entry, prog); + simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); + } + } } - update_gfx_program_optimal(ctx, ctx->curr_program); + async_variant_program_update(ctx, can_use_uber, needs_emulation); /* apply new hash */ - ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + ctx->gfx_pipeline_state.final_hash ^= CURR_KEY_PROGRAM(ctx)->last_variant_hash; + ctx->gfx_pipeline_state.final_hash ^= CURR_KEY_PROGRAM(ctx)->st_key; } ctx->dirty_gfx_stages = 0; ctx->gfx_dirty = false; @@ -898,8 +1148,8 @@ zink_mesh_program_update_optimal(struct zink_context *ctx) } } else if (must_replace) { /* this is a non-separable, incompatible prog which needs replacement */ - struct zink_gfx_program *real = zink_create_gfx_program(ctx, ctx->gfx_stages, 0, ctx->mesh_hash, true); - generate_gfx_program_modules_optimal(ctx, screen, real, &ctx->gfx_pipeline_state); + struct zink_gfx_program *real = zink_create_gfx_program(ctx, ctx->gfx_stages, 0, ctx->mesh_hash, true, false); + generate_gfx_program_modules_optimal(ctx, screen, real, &ctx->gfx_pipeline_state, false); entry->data = real; entry->key = real->shaders; real->base.removed = false; @@ -917,7 +1167,7 @@ zink_mesh_program_update_optimal(struct zink_context *ctx) if (!prog->is_separable) { zink_screen_get_pipeline_cache(screen, &prog->base, false); perf_debug(ctx, "zink[gfx_compile]: new program created (probably legacy GL features in use)\n"); - generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state); + generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state, false); } } simple_mtx_unlock(lock); @@ -960,8 +1210,10 @@ optimized_compile_job(void *data, void *gdata, int thread_index) VkPrimitiveTopology vkmode = is_mesh ? VK_PRIMITIVE_TOPOLOGY_MAX_ENUM : zink_primitive_topology(pc_entry->state.gfx_prim_mode); if (pc_entry->gpl.gkey) pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->gpl.ikey ? pc_entry->gpl.ikey->pipeline : VK_NULL_HANDLE, &pc_entry->gpl.gkey->pipeline, 1, pc_entry->gpl.okey->pipeline, true, false); - else - pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, pc_entry->prog->objs, &pc_entry->state, pc_entry->state.element_state->binding_map, vkmode, true); + else { + struct zink_shader_object *objs = pc_entry->prog->objs; + pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, pc_entry->state.element_state->binding_map, vkmode, true); + } if (pipeline) { pc_entry->gpl.unoptimized_pipeline = pc_entry->pipeline; pc_entry->pipeline = pipeline; @@ -1009,10 +1261,12 @@ zink_program_finish(struct zink_context *ctx, struct zink_program *pg) if (pg->is_compute) return; struct zink_gfx_program *prog = (struct zink_gfx_program*)pg; - for (int i = 0; i < ARRAY_SIZE(prog->pipelines); ++i) { - hash_table_foreach(&prog->pipelines[i], entry) { - struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data; - util_queue_fence_wait(&pc_entry->fence); + for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) { + for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { + hash_table_foreach(&prog->pipelines[r][i], entry) { + struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data; + util_queue_fence_wait(&pc_entry->fence); + } } } } @@ -1073,7 +1327,7 @@ update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *c return; } zm->shobj = false; - zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL, &comp->base); + zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, NULL, false, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL, &comp->base); if (!zm->obj.spirv) { FREE(zm); return; @@ -1198,7 +1452,11 @@ zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache * { if (!p_atomic_dec_zero(&libs->refcount)) return; - + if (libs->lib) { + struct zink_gfx_library_key *gkey = libs->lib; + VKSCR(DestroyPipeline)(screen->dev, gkey->pipeline, NULL); + FREE(gkey); + } simple_mtx_destroy(&libs->lock); set_foreach_remove(&libs->libs, he) { struct zink_gfx_library_key *gkey = (void*)he->key; @@ -1217,10 +1475,6 @@ create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs) if (generated_tcs) libs->stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL); simple_mtx_init(&libs->lock, mtx_plain); - if (generated_tcs) - _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs); - else - _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib, equals_pipeline_lib); return libs; } @@ -1229,6 +1483,8 @@ find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *pr { unsigned stages_present = prog->stages_present; bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated; + if (prog->is_variant_program) + return create_lib_cache(prog, generated_tcs); if (generated_tcs) stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL); unsigned idx = zink_program_cache_stages(stages_present); @@ -1307,7 +1563,7 @@ gfx_program_create(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch, uint32_t gfx_hash, - bool is_mesh) + bool is_mesh, bool variant) { struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_gfx_program *prog = create_program(ctx, false); @@ -1317,6 +1573,7 @@ gfx_program_create(struct zink_context *ctx, prog->gfx_hash = gfx_hash; prog->base.removed = true; prog->optimal_keys = screen->optimal_keys; + prog->is_variant_program = variant; for (int i = is_mesh ? MESA_SHADER_FRAGMENT : 0; i < (is_mesh ? MESA_SHADER_MESH_STAGES : MESA_SHADER_STAGES); ++i) { util_dynarray_init(&prog->shader_cache[i][0][0], prog->base.ralloc_ctx); @@ -1331,6 +1588,7 @@ gfx_program_create(struct zink_context *ctx, prog->needs_inlining |= prog->shaders[i]->needs_inlining; } } + util_dynarray_init(&prog->uber_modules, prog->base.ralloc_ctx); if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) { util_queue_fence_wait(&stages[MESA_SHADER_TESS_EVAL]->precompile.fence); if (!prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs) @@ -1340,13 +1598,17 @@ gfx_program_create(struct zink_context *ctx, } prog->stages_remaining = prog->stages_present; for (int i = 0; i < MESA_SHADER_MESH_STAGES; ++i) { - if (prog->shaders[i]) { + if (prog->shaders[i] && !variant) { simple_mtx_lock(&prog->shaders[i]->lock); _mesa_set_add(prog->shaders[i]->programs, prog); simple_mtx_unlock(&prog->shaders[i]->lock); zink_gfx_program_reference(screen, NULL, prog); } } + + if (variant) + zink_gfx_program_reference(screen, NULL, prog); + p_atomic_dec(&prog->base.reference.count); if (is_mesh) @@ -1360,8 +1622,12 @@ gfx_program_create(struct zink_context *ctx, prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX] && prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags; - for (int i = 0; i < ARRAY_SIZE(prog->pipelines); ++i) { - _mesa_hash_table_init(&prog->pipelines[i], prog->base.ralloc_ctx, NULL, zink_get_gfx_pipeline_eq_func(screen, prog)); + _mesa_set_init(&prog->variants, prog->base.ralloc_ctx, hash_gfx_program, equals_program_variant); + + for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) { + for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { + _mesa_hash_table_init(&prog->pipelines[r][i], prog->base.ralloc_ctx, NULL, zink_get_gfx_pipeline_eq_func(screen, prog)); + } } return prog; @@ -1436,9 +1702,9 @@ zink_create_gfx_program(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch, uint32_t gfx_hash, - bool is_mesh) + bool is_mesh, bool variant) { - struct zink_gfx_program *prog = gfx_program_create(ctx, stages, vertices_per_patch, gfx_hash, is_mesh); + struct zink_gfx_program *prog = gfx_program_create(ctx, stages, vertices_per_patch, gfx_hash, is_mesh, variant); if (prog) prog = gfx_program_init(ctx, prog); return prog; @@ -1454,7 +1720,8 @@ create_linked_separable_job(void *data, void *gdata, int thread_index) /* this is a dead program */ if (prog->base.removed) return; - prog->full_prog = gfx_program_create(prog->base.ctx, prog->shaders, 0, prog->gfx_hash, !!prog->shaders[MESA_SHADER_MESH]); + prog->full_prog = gfx_program_create(prog->base.ctx, prog->shaders, 0, prog->gfx_hash, !!prog->shaders[MESA_SHADER_MESH], false); + prog->full_prog->is_uber_program = prog->is_uber_program; /* block gfx_shader_prune in the main thread */ util_queue_fence_reset(&prog->full_prog->base.cache_fence); /* add an ownership ref */ @@ -1479,15 +1746,16 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag uint32_t hash = is_mesh ? ctx->mesh_hash : ctx->gfx_hash; if (!is_separate || /* TODO: maybe try variants? grimace */ + /* TODO allow if uber is usable */ !is_default || !can_gpl) - return zink_create_gfx_program(ctx, stages, vertices_per_patch, hash, is_mesh); + return zink_create_gfx_program(ctx, stages, vertices_per_patch, hash, is_mesh, false); for (unsigned i = 0; i < MESA_SHADER_MESH_STAGES; i++) { /* ensure async shader creation is done */ if (stages[i]) { util_queue_fence_wait(&stages[i]->precompile.fence); - if (!stages[i]->precompile.obj.mod) - return zink_create_gfx_program(ctx, stages, vertices_per_patch, hash, is_mesh); + if (!stages[i]->precompile.obj.mod && !stages[i]->precompile.obj.mod) + return zink_create_gfx_program(ctx, stages, vertices_per_patch, hash, is_mesh, false); } } @@ -1496,6 +1764,7 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag goto fail; prog->is_separable = true; + prog->is_uber_program = true; prog->gfx_hash = hash; prog->base.uses_shobj = screen->info.have_EXT_shader_object && ((stages[MESA_SHADER_VERTEX] && !stages[MESA_SHADER_VERTEX]->info.view_mask) || @@ -1535,8 +1804,12 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag */ p_atomic_add(&prog->base.reference.count, refs - 1); - for (int i = 0; i < ARRAY_SIZE(prog->pipelines); ++i) { - _mesa_hash_table_init(&prog->pipelines[i], prog->base.ralloc_ctx, NULL, zink_get_gfx_pipeline_eq_func(screen, prog)); + _mesa_set_init(&prog->variants, prog->base.ralloc_ctx, hash_gfx_program, equals_program_variant); + + for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) { + for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { + _mesa_hash_table_init(&prog->pipelines[r][i], prog->base.ralloc_ctx, NULL, zink_get_gfx_pipeline_eq_func(screen, prog)); + } } for (int i = 0; i < MESA_SHADER_MESH_STAGES; ++i) { @@ -1557,18 +1830,25 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT); prog->last_variant_hash = is_mesh ? ctx->gfx_pipeline_state.mesh_optimal_key : ctx->gfx_pipeline_state.optimal_key; + prog->st_key = ctx->gfx_pipeline_state.st_key.small_key.val; if (!prog->base.uses_shobj) { - VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl}; - struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key); - if (!gkey) { - mesa_loge("ZINK: failed to allocate gkey!"); - goto fail; + if (!is_mesh) { + VkPipeline uber_libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl}; + prog->libs->lib = CALLOC_STRUCT(zink_gfx_library_key); + prog->libs->lib->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, uber_libs, 2, VK_NULL_HANDLE, false, false); + } else { + VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl}; + struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key); + if (!gkey) { + mesa_loge("ZINK: failed to allocate gkey!"); + goto fail; + } + gkey->optimal_key = prog->last_variant_hash; + assert(gkey->optimal_key); + gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false, false); + _mesa_set_add(&prog->libs->libs, gkey); } - gkey->optimal_key = prog->last_variant_hash; - assert(gkey->optimal_key); - gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false, false); - _mesa_set_add(&prog->libs->libs, gkey); } if (!(zink_debug & ZINK_DEBUG_NOOPT)) @@ -1722,7 +2002,7 @@ precompile_compute_job(void *data, void *gdata, int thread_index) comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module); assert(comp->module); comp->module->shobj = false; - comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL, &comp->base); + comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL, false, NULL, &comp->base); /* comp->nir will be freed by zink_shader_compile */ comp->nir = NULL; assert(comp->module->obj.spirv); @@ -1869,21 +2149,41 @@ zink_destroy_gfx_program(struct zink_screen *screen, { if (prog->is_separable) zink_gfx_program_reference(screen, &prog->full_prog, NULL); - for (int i = 0; i < ARRAY_SIZE(prog->pipelines); ++i) { - hash_table_foreach(&prog->pipelines[i], entry) { - struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data; + for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) { + for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { + hash_table_foreach(&prog->pipelines[r][i], entry) { + struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data; - util_queue_fence_wait(&pc_entry->fence); - VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL); - VKSCR(DestroyPipeline)(screen->dev, pc_entry->gpl.unoptimized_pipeline, NULL); - free(pc_entry); + util_queue_fence_wait(&pc_entry->fence); + VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL); + VKSCR(DestroyPipeline)(screen->dev, pc_entry->gpl.unoptimized_pipeline, NULL); + free(pc_entry); + } } } + /* wait for all async compilation jobs */ + for (unsigned stage = 0; stage < ZINK_GFX_SHADER_COUNT; stage++) { + struct util_dynarray *shader_cache = &prog->shader_cache[stage][0][0]; + unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *); + struct zink_shader_module **pzm = shader_cache->data; + for (unsigned i = 0; i < count; i++) { + struct zink_shader_module *iter = pzm[i]; + util_queue_fence_wait(&iter->fence); + } + } + + set_foreach(&prog->variants, entry) { + struct program_variant_key *prog_variant_key = (void*)entry->key; + assert(prog_variant_key->prog->is_variant_program); + zink_destroy_gfx_program(screen, prog_variant_key->prog); + FREE(prog_variant_key); + } + deinit_program(screen, &prog->base); for (int i = 0; i < MESA_SHADER_MESH_STAGES; ++i) { - if (prog->shaders[i]) { + if (prog->shaders[i] && !prog->is_variant_program) { _mesa_set_remove_key(prog->shaders[i]->programs, prog); prog->shaders[i] = NULL; } @@ -1895,6 +2195,10 @@ zink_destroy_gfx_program(struct zink_screen *screen, blob_finish(&prog->blobs[i]); } } + while (util_dynarray_contains(&prog->uber_modules, void*)) { + struct zink_shader_module *zm = util_dynarray_pop(&prog->uber_modules, struct zink_shader_module*); + zink_destroy_shader_module(screen, zm); + } if (prog->libs) zink_gfx_lib_cache_unref(screen, prog->libs); @@ -2046,8 +2350,11 @@ bind_gfx_stage(struct zink_context *ctx, mesa_shader_stage stage, struct zink_sh zink_descriptors_init_bindless(ctx); } else { if (stage < MESA_SHADER_COMPUTE) { - if (ctx->curr_program) - ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; + if (ctx->curr_program_uber) { + ctx->gfx_pipeline_state.final_hash ^= CURR_KEY_PROGRAM(ctx)->last_variant_hash; + ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program_uber->st_key; + } + ctx->curr_program_uber = NULL; ctx->curr_program = NULL; } if (stage == MESA_SHADER_FRAGMENT || stage > MESA_SHADER_COMPUTE) { @@ -2391,7 +2698,7 @@ zink_delete_cs_shader_state(struct pipe_context *pctx, void *cso) /* caller must lock prog->libs->lock */ struct zink_gfx_library_key * -zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state) +zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state, bool compile_uber) { struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key); bool is_mesh = !prog->shaders[MESA_SHADER_VERTEX]; @@ -2401,11 +2708,15 @@ zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *pr } gkey->optimal_key = !is_mesh ? state->optimal_key : state->mesh_optimal_key; + gkey->st_key = state->st_key.small_key.val; assert(is_mesh || gkey->optimal_key); for (unsigned i = 0; i < MESA_SHADER_MESH_STAGES; i++) gkey->modules[i] = prog->objs[i].mod; - gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog); - _mesa_set_add(&prog->libs->libs, gkey); + gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog->objs, prog); + if (is_mesh) + _mesa_set_add(&prog->libs->libs, gkey); + else + prog->libs->lib = gkey; return gkey; } @@ -2433,6 +2744,26 @@ print_exe_stages(VkShaderStageFlags stages) UNREACHABLE("unhandled combination of stages!"); } +static void +precompile_variant_job(void *data, void *gdata, int thread_index) +{ + struct zink_screen *screen = gdata; + struct precompile_variant_data *precompile_data = data; + struct zink_gfx_program *prog = precompile_data->prog; + struct zink_gfx_pipeline_state *state = &precompile_data->state; + + //generate_gfx_program_modules_optimal(NULL, screen, prog, state); + zink_screen_get_pipeline_cache(screen, &prog->base, true); + if (!screen->info.have_EXT_shader_object) { + simple_mtx_lock(&prog->libs->lock); + zink_create_pipeline_lib(screen, prog, state, false); + simple_mtx_unlock(&prog->libs->lock); + } + zink_screen_update_pipeline_cache(screen, &prog->base, true); + + FREE(data); +} + static void gfx_program_precompile_job(void *data, void *gdata, int thread_index) { @@ -2446,11 +2777,11 @@ gfx_program_precompile_job(void *data, void *gdata, int thread_index) state.shader_keys_optimal.key.vs_base.last_vertex_stage = true; state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard state.optimal_key = state.shader_keys_optimal.key.val; - generate_gfx_program_modules_optimal(NULL, screen, prog, &state); + generate_gfx_program_modules_optimal(NULL, screen, prog, &state, prog->is_uber_program); zink_screen_get_pipeline_cache(screen, &prog->base, true); if (!prog->base.uses_shobj) { simple_mtx_lock(&prog->libs->lock); - zink_create_pipeline_lib(screen, prog, &state); + zink_create_pipeline_lib(screen, prog, &state, prog->is_uber_program); simple_mtx_unlock(&prog->libs->lock); } prog->base.precompile_done = true; @@ -2494,17 +2825,18 @@ zink_link_gfx_shader(struct pipe_context *pctx, void **shaders) simple_mtx_unlock(lock); return; } - struct zink_gfx_program *prog = gfx_program_create(ctx, zshaders, 3, hash, is_mesh); + struct zink_gfx_program *prog = gfx_program_create(ctx, zshaders, 3, hash, is_mesh, false); u_foreach_bit(i, shader_stages) assert(prog->shaders[i]); _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); prog->base.removed = false; + prog->is_uber_program = true; simple_mtx_unlock(lock); if (zink_debug & ZINK_DEBUG_SHADERDB) { struct zink_screen *screen = zink_screen(pctx->screen); gfx_program_init(ctx, prog); if (screen->optimal_keys) - generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state); + generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state, false); else generate_gfx_program_modules(ctx, screen, prog, &ctx->gfx_pipeline_state); VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, &ctx->gfx_pipeline_state, @@ -2535,7 +2867,7 @@ zink_delete_shader_state(struct pipe_context *pctx, void *cso) static void precompile_separate_shader(struct zink_shader *zs, struct zink_screen *screen) { - zs->precompile.obj = zink_shader_compile_separate(screen, zs); + zs->precompile.obj = zink_shader_compile_separate(screen, zs, zs->is_uber); if (!screen->info.have_EXT_shader_object) { struct zink_shader_object objs[MESA_SHADER_MESH_STAGES] = {0}; objs[zs->info.stage].mod = zs->precompile.obj.mod; @@ -2543,6 +2875,20 @@ precompile_separate_shader(struct zink_shader *zs, struct zink_screen *screen) } } +static void +precompile_variant_separate_shader_job(void *data, void *gdata, int thread_index) +{ + struct zink_screen *screen = gdata; + struct precompile_separate_variant_data *precompile_data = data; + + nir_shader *nir = zink_shader_blob_deserialize(screen, precompile_data->blob); + precompile_data->zm->obj = zink_shader_compile(screen, precompile_data->uses_shobj, precompile_data->zs, nir, + precompile_data->has_key ? &precompile_data->key: NULL, + &precompile_data->st_key, + false, NULL, precompile_data->prog); + FREE(data); +} + static void gfx_shader_init_job(void *data, void *gdata, int thread_index) { @@ -2581,6 +2927,7 @@ zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader zink_descriptor_util_init_fbfetch(zink_context(pctx)); struct zink_shader *zs = zink_shader_create(zink_screen(pctx->screen), nir); + zs->is_uber = true; if (zink_debug & ZINK_DEBUG_NOBGC) gfx_shader_init_job(zs, screen, 0); else @@ -2593,6 +2940,8 @@ static void zink_delete_cached_shader_state(struct pipe_context *pctx, void *cso) { struct zink_screen *screen = zink_screen(pctx->screen); + // HACK this is oversyncing but we have no way of konwing which jobs use this zink_shader + util_queue_finish(&screen->cache_get_thread); util_shader_reference(pctx, &screen->shaders, &cso, NULL); } diff --git a/src/gallium/drivers/zink/zink_program.h b/src/gallium/drivers/zink/zink_program.h index 3d5ffc170c9..6e2b0a202a6 100644 --- a/src/gallium/drivers/zink/zink_program.h +++ b/src/gallium/drivers/zink/zink_program.h @@ -128,7 +128,7 @@ zink_mesh_program_update_optimal(struct zink_context *ctx); struct zink_gfx_library_key * -zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state); +zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state, bool compile_uber); uint32_t hash_gfx_output(const void *key); uint32_t hash_gfx_output_ds3(const void *key); uint32_t hash_gfx_input(const void *key); @@ -159,7 +159,7 @@ zink_create_gfx_program(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch, uint32_t gfx_hash, - bool is_mesh); + bool is_mesh, bool variant); void zink_destroy_gfx_program(struct zink_screen *screen, @@ -405,6 +405,27 @@ zink_set_zs_needs_shader_swizzle_key(struct zink_context *ctx, mesa_shader_stage zink_set_shader_key_base(ctx, pstage)->needs_zs_shader_swizzle = enable; } +static inline const union zink_st_small_key * +zink_get_st_small_key(struct zink_context *ctx) +{ + assert(zink_screen(ctx->base.screen)->optimal_keys); + return &ctx->gfx_pipeline_state.st_key.small_key; +} + +static inline union zink_st_small_key * +zink_set_st_small_key(struct zink_context *ctx) +{ + ctx->dirty_gfx_stages |= ctx->shader_stages & (MESA_SHADER_VERTEX | MESA_SHADER_GEOMETRY | MESA_SHADER_FRAGMENT); + assert(zink_screen(ctx->base.screen)->optimal_keys); + return &ctx->gfx_pipeline_state.st_key.small_key; +} + +static inline bool +needs_st_emulation(struct zink_context *ctx) +{ + return ctx->gfx_pipeline_state.st_key.small_key.val != 0; +} + ALWAYS_INLINE static bool zink_can_use_pipeline_libs(const struct zink_context *ctx) { @@ -464,6 +485,14 @@ zink_can_use_shader_objects_mesh(const struct zink_context *ctx) !ctx->fb_state.viewmask; } +ALWAYS_INLINE static bool +zink_can_use_uber(struct zink_context *ctx) +{ + bool generated_tcs = ctx->gfx_stages[MESA_SHADER_TESS_EVAL] && !ctx->gfx_stages[MESA_SHADER_TESS_CTRL]; + return zink_shader_key_optimal_no_tcs(ctx->gfx_pipeline_state.optimal_key) == ZINK_SHADER_KEY_OPTIMAL_DEFAULT && + zink_can_use_pipeline_libs(ctx) && (!generated_tcs || ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch == 3); +} + bool zink_set_rasterizer_discard(struct zink_context *ctx, bool disable); void diff --git a/src/gallium/drivers/zink/zink_program_state.hpp b/src/gallium/drivers/zink/zink_program_state.hpp index a175072e45b..3f38271dab2 100644 --- a/src/gallium/drivers/zink/zink_program_state.hpp +++ b/src/gallium/drivers/zink/zink_program_state.hpp @@ -102,6 +102,7 @@ template VkPipeline zink_get_gfx_pipeline(struct zink_context *ctx, struct zink_gfx_program *prog, + struct zink_gfx_program *variant_prog, struct zink_gfx_pipeline_state *state, enum mesa_prim mode) { @@ -113,7 +114,7 @@ zink_get_gfx_pipeline(struct zink_context *ctx, const unsigned idx = IS_MESH || screen->info.dynamic_state3_props.dynamicPrimitiveTopologyUnrestricted ? 0 : get_pipeline_idx= ZINK_DYNAMIC_STATE>(mode, vkmode); - assert(idx <= ARRAY_SIZE(prog->pipelines)); + assert(idx <= ARRAY_SIZE(prog->pipelines[0])); if (IS_MESH) { if (!state->mesh_dirty && !state->mesh_modules_changed) return state->mesh_pipeline; @@ -144,27 +145,28 @@ zink_get_gfx_pipeline(struct zink_context *ctx, } } /* extra safety asserts for optimal path to catch refactoring bugs */ - if (prog->optimal_keys) { + if (variant_prog->optimal_keys) { ASSERTED const union zink_shader_key_optimal *opt = (union zink_shader_key_optimal*)&prog->last_variant_hash; ASSERTED union zink_shader_key_optimal sanitized = {}; if (IS_MESH) { sanitized.val = zink_sanitize_optimal_key_mesh(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); + assert(opt->val == sanitized.val); assert(state->mesh_optimal_key == sanitized.val); - } else { + } else if (!state->uber_required) { sanitized.val = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); + assert(opt->val == sanitized.val); assert(state->optimal_key == sanitized.val); } - assert(opt->val == sanitized.val); } if (IS_MESH) { state->mesh_modules_changed = false; - if (prog->last_finalized_hash[idx] == state->mesh_final_hash && - !prog->inline_variants && likely(prog->last_pipeline[idx]) && + if (prog->last_finalized_hash[0][idx] == state->mesh_final_hash && + !prog->inline_variants && likely(prog->last_pipeline[0][idx]) && /* this data is too big to compare in the fast-path */ likely(!prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask)) { - state->mesh_pipeline = prog->last_pipeline[idx]->pipeline; + state->mesh_pipeline = prog->last_pipeline[0][idx]->pipeline; return state->mesh_pipeline; } } else { @@ -202,23 +204,22 @@ zink_get_gfx_pipeline(struct zink_context *ctx, /* shortcut for reusing previous pipeline across program changes */ if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2) { - if (prog->last_finalized_hash[idx] == state->final_hash && - !prog->inline_variants && likely(prog->last_pipeline[idx]) && - /* this data is too big to compare in the fast-path */ - likely(!prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask)) { - state->pipeline = prog->last_pipeline[idx]->pipeline; + if (variant_prog->last_finalized_hash[state->uber_required][idx] == state->final_hash && + !variant_prog->inline_variants && likely(variant_prog->last_pipeline[state->uber_required][idx]) && + /* this data is too big to compare in the fast-path */ + likely(!variant_prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask)) { + state->pipeline = variant_prog->last_pipeline[state->uber_required][idx]->pipeline; return state->pipeline; } } } - unsigned final_hash = IS_MESH ? state->mesh_final_hash : state->final_hash; - entry = _mesa_hash_table_search_pre_hashed(&prog->pipelines[idx], final_hash, state); + entry = _mesa_hash_table_search_pre_hashed(&variant_prog->pipelines[state->uber_required][idx], final_hash, state); if (!entry) { bool can_gpl = IS_MESH ? zink_can_use_pipeline_libs_mesh(ctx) : zink_can_use_pipeline_libs(ctx); /* always wait on async precompile/cache fence */ - util_queue_fence_wait(&prog->base.cache_fence); + util_queue_fence_wait(&variant_prog->base.cache_fence); struct zink_gfx_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(zink_gfx_pipeline_cache_entry); if (!pc_entry) return VK_NULL_HANDLE; @@ -227,28 +228,47 @@ zink_get_gfx_pipeline(struct zink_context *ctx, */ memcpy(&pc_entry->state, state, sizeof(*state)); pc_entry->state.rendering_info.pColorAttachmentFormats = pc_entry->state.rendering_formats; - pc_entry->prog = prog; + pc_entry->prog = state->uber_required ? prog : variant_prog; /* init the optimized background compile fence */ util_queue_fence_init(&pc_entry->fence); - entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[idx], final_hash, pc_entry, pc_entry); - if (prog->base.uses_shobj && !prog->is_separable) { - memcpy(pc_entry->shobjs, prog->objs, sizeof(prog->objs)); + entry = _mesa_hash_table_insert_pre_hashed(&variant_prog->pipelines[state->uber_required][idx], final_hash, pc_entry, pc_entry); + if (variant_prog->base.uses_shobj && !variant_prog->is_separable) { + memcpy(pc_entry->shobjs, variant_prog->objs, sizeof(variant_prog->objs)); zink_gfx_program_compile_queue(ctx, pc_entry); } else if (HAVE_LIB && can_gpl) { uint32_t optimal_key = IS_MESH ? ctx->gfx_pipeline_state.mesh_optimal_key : ctx->gfx_pipeline_state.optimal_key; /* this is the graphics pipeline library path: find/construct all partial pipelines */ - simple_mtx_lock(&prog->libs->lock); - struct set_entry *he = _mesa_set_search(&prog->libs->libs, &optimal_key); struct zink_gfx_library_key *gkey; - if (he) { - gkey = (struct zink_gfx_library_key *)he->key; + if (IS_MESH) { + simple_mtx_lock(&prog->libs->lock); + struct set_entry *he = _mesa_set_search(&prog->libs->libs, &optimal_key); + if (he) { + gkey = (struct zink_gfx_library_key *)he->key; + } else { + assert(!prog->is_separable); + gkey = zink_create_pipeline_lib(screen, prog, &ctx->gfx_pipeline_state, false); + } + simple_mtx_unlock(&prog->libs->lock); } else { - assert(!prog->is_separable); - gkey = zink_create_pipeline_lib(screen, prog, &ctx->gfx_pipeline_state); + if (state->uber_required) { + simple_mtx_lock(&prog->libs->lock); + assert(prog->libs->lib); + gkey = prog->libs->lib; + simple_mtx_unlock(&prog->libs->lock); + } else { + simple_mtx_lock(&variant_prog->libs->lock); + if (variant_prog->libs->lib) { + gkey = variant_prog->libs->lib; + assert(gkey->optimal_key == optimal_key); + assert(gkey->st_key == state->st_key.small_key.val); + } else { + assert(!variant_prog->is_separable); + gkey = zink_create_pipeline_lib(screen, variant_prog, &ctx->gfx_pipeline_state, false); + } + simple_mtx_unlock(&variant_prog->libs->lock); + } } - simple_mtx_unlock(&prog->libs->lock); - struct zink_gfx_input_key *ikey = IS_MESH ? NULL : - DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT ? + struct zink_gfx_input_key *ikey = DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT ? zink_find_or_create_input_dynamic(ctx, vkmode) : zink_find_or_create_input(ctx, vkmode); struct zink_gfx_output_key *okey = DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3 && screen->have_full_ds3 ? @@ -259,29 +279,30 @@ zink_get_gfx_pipeline(struct zink_context *ctx, pc_entry->gpl.gkey = gkey; pc_entry->gpl.okey = okey; /* try to hit optimized compile cache first if possible */ - if (!prog->is_separable) - pc_entry->pipeline = zink_create_gfx_pipeline_combined(screen, prog, ikey ? ikey->pipeline : VK_NULL_HANDLE, &gkey->pipeline, 1, okey->pipeline, true, true); + if (!variant_prog->is_separable) + pc_entry->pipeline = zink_create_gfx_pipeline_combined(screen, variant_prog, ikey ? ikey->pipeline : VK_NULL_HANDLE, &gkey->pipeline, 1, okey->pipeline, true, true); if (!pc_entry->pipeline) { /* create the non-optimized pipeline first using fast-linking to avoid stuttering */ - pc_entry->pipeline = zink_create_gfx_pipeline_combined(screen, prog, ikey ? ikey->pipeline : VK_NULL_HANDLE, &gkey->pipeline, 1, okey->pipeline, false, false); - if (!prog->is_separable) + pc_entry->pipeline = zink_create_gfx_pipeline_combined(screen, variant_prog, ikey ? ikey->pipeline : VK_NULL_HANDLE, &gkey->pipeline, 1, okey->pipeline, false, false); + if (!variant_prog->is_separable) /* trigger async optimized pipeline compile if this was the fast-linked unoptimized pipeline */ zink_gfx_program_compile_queue(ctx, pc_entry); } } else { + struct zink_shader_object *objs = state->uber_required ? prog->objs : variant_prog->objs; /* optimize by default only when expecting precompiles in order to reduce stuttering */ if (DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT && !IS_MESH) - pc_entry->pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, state, state->element_state->binding_map, vkmode, !HAVE_LIB); + pc_entry->pipeline = zink_create_gfx_pipeline(screen, variant_prog, objs, state, state->element_state->binding_map, vkmode, !HAVE_LIB); else - pc_entry->pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, state, NULL, vkmode, !HAVE_LIB); - if (HAVE_LIB && !prog->is_separable) + pc_entry->pipeline = zink_create_gfx_pipeline(screen, variant_prog, objs, state, NULL, vkmode, !HAVE_LIB); + if (HAVE_LIB && !variant_prog->is_separable) /* trigger async optimized pipeline compile if this was an unoptimized pipeline */ zink_gfx_program_compile_queue(ctx, pc_entry); } if (pc_entry->pipeline == VK_NULL_HANDLE) return VK_NULL_HANDLE; - zink_screen_update_pipeline_cache(screen, &prog->base, false); + zink_screen_update_pipeline_cache(screen, &variant_prog->base, false); } struct zink_gfx_pipeline_cache_entry *cache_entry = (struct zink_gfx_pipeline_cache_entry *)entry->data; @@ -291,8 +312,8 @@ zink_get_gfx_pipeline(struct zink_context *ctx, state->pipeline = cache_entry->pipeline; /* update states for fastpath */ if (DYNAMIC_STATE >= ZINK_DYNAMIC_VERTEX_INPUT) { - prog->last_finalized_hash[idx] = final_hash; - prog->last_pipeline[idx] = cache_entry; + variant_prog->last_finalized_hash[state->uber_required][idx] = final_hash; + variant_prog->last_pipeline[state->uber_required][idx] = cache_entry; } return IS_MESH ? state->mesh_pipeline : state->pipeline; } @@ -355,6 +376,8 @@ equals_gfx_pipeline_state(const void *a, const void *b) if (STAGE_MASK & STAGE_MASK_OPTIMAL) { if (sa->optimal_key != sb->optimal_key) return false; + if (sa->st_key.small_key.val != sb->st_key.small_key.val) + return false; if (STAGE_MASK & STAGE_MASK_OPTIMAL_SHADOW) { if (sa->shadow != sb->shadow) return false; diff --git a/src/gallium/drivers/zink/zink_shader_keys.h b/src/gallium/drivers/zink/zink_shader_keys.h index ea883007387..a76a6556cc4 100644 --- a/src/gallium/drivers/zink/zink_shader_keys.h +++ b/src/gallium/drivers/zink/zink_shader_keys.h @@ -28,6 +28,41 @@ #include "compiler/shader_info.h" +union zink_st_small_key { + struct { + /** for ARB_color_buffer_float */ + uint8_t clamp_color:1; + /* for user-defined clip-planes */ + uint8_t lower_ucp:1; + /* Whether st_variant::driver_shader is for the draw module, + * not for the driver. + */ + uint8_t is_draw_shader:1; + uint8_t lower_flatshade:1; + uint8_t lower_alpha_test:1; + uint16_t pad: 11; // from here not key + }; + uint16_t val; +}; + +struct zink_st_variant_key +{ + union zink_st_small_key small_key; + + uint8_t ucp_enables: 8; + + unsigned lower_alpha_func:3; + + + uint32_t pad2: 5; //next array aligned to uint32 for easy access + + /* bitmask of sampler units; PIPE_CAP_GL_CLAMP */ + uint32_t gl_clamp[3]; + + /* needs more than 128 bytes */ + struct pipe_clip_state ucp_state; +}; + struct zink_vs_key_base { bool last_vertex_stage : 1; bool clip_halfz : 1; diff --git a/src/gallium/drivers/zink/zink_state.c b/src/gallium/drivers/zink/zink_state.c index 24175aceeed..2415a65351d 100644 --- a/src/gallium/drivers/zink/zink_state.c +++ b/src/gallium/drivers/zink/zink_state.c @@ -722,6 +722,21 @@ zink_bind_rasterizer_state(struct pipe_context *pctx, void *cso) if (!screen->optimal_keys) zink_update_gs_key_rectangular_line(ctx); + + if (screen->optimal_keys) { + struct zink_st_variant_key *key = &ctx->gfx_pipeline_state.st_key; + + if ((zink_get_st_small_key(ctx)->clamp_color) != ctx->rast_state->base.clamp_fragment_color) + zink_set_st_small_key(ctx)->clamp_color = ctx->rast_state->base.clamp_fragment_color; + + if ((zink_get_st_small_key(ctx)->lower_flatshade) != ctx->rast_state->base.flatshade) + zink_set_st_small_key(ctx)->lower_flatshade = ctx->rast_state->base.flatshade; + + key->ucp_enables = ctx->rast_state->base.clip_plane_enable; + + if ((zink_get_st_small_key(ctx)->lower_ucp) != !!key->ucp_enables) + zink_set_st_small_key(ctx)->lower_ucp = !!key->ucp_enables; + } } } diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index c2305ece7f7..bddbb9e87c4 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -795,6 +795,7 @@ struct zink_shader_object { VkShaderModule mod; }; struct spirv_shader *spirv; + VkPipeline gpl; }; struct zink_shader { @@ -824,6 +825,7 @@ struct zink_shader { bool has_uniforms; bool has_edgeflags; bool needs_inlining; + bool is_uber; struct spirv_shader *spirv; struct { @@ -932,6 +934,7 @@ struct zink_gfx_pipeline_state { uint32_t vertex_strides[PIPE_MAX_ATTRIBS]; struct zink_vertex_elements_hw_state *element_state; struct zink_zs_swizzle_key *shadow; + bool uber_required; // emulation needed && !async compilation done enum mesa_prim shader_rast_prim, rast_prim; /* reduced type or max for unknown */ union { struct { @@ -942,6 +945,7 @@ struct zink_gfx_pipeline_state { union zink_shader_key_optimal key; } shader_keys_optimal; }; + struct zink_st_variant_key st_key; struct zink_blend_state *blend_state; VkFormat rendering_formats[PIPE_MAX_COLOR_BUFS]; VkPipelineRenderingCreateInfo rendering_info; @@ -1008,6 +1012,7 @@ enum zink_gfx_push_constant_member { */ struct zink_shader_module { struct zink_shader_object obj; + struct util_queue_fence fence; uint32_t hash; bool shobj; bool default_variant; @@ -1049,6 +1054,7 @@ typedef bool (*equals_gfx_pipeline_state_func)(const void *a, const void *b); struct zink_gfx_library_key { uint32_t optimal_key; //equals_pipeline_lib_optimal + uint32_t st_key; VkShaderModule modules[MESA_SHADER_MESH_STAGES]; VkPipeline pipeline; }; @@ -1115,6 +1121,13 @@ struct zink_gfx_lib_cache { simple_mtx_t lock; struct set libs; //zink_gfx_library_key -> VkPipeline + struct zink_gfx_library_key *lib; //zink_gfx_library_key -> VkPipeline +}; + +struct zink_gfx_program_variant_key { + uint32_t optimal_key; //equals_pipeline_lib_optimal + uint32_t st_key; + struct zink_gfx_program *prog; }; struct zink_gfx_program { @@ -1135,21 +1148,29 @@ struct zink_gfx_program { uint32_t module_hash[MESA_SHADER_MESH_STAGES]; struct blob blobs[MESA_SHADER_MESH_STAGES]; struct util_dynarray shader_cache[MESA_SHADER_MESH_STAGES][2][2]; //normal, nonseamless cubes, inline uniforms + struct util_dynarray uber_modules; + struct set variants; + struct zink_gfx_program *base_variant; //quick access to base varitant (only !NULL when done compiling) + struct zink_gfx_program *uber_variant; unsigned inlined_variant_count[MESA_SHADER_MESH_STAGES]; uint32_t default_variant_hash; uint8_t inline_variants; //which stages are using inlined uniforms bool needs_inlining; // whether this program requires some uniforms to be inlined bool has_edgeflags; bool optimal_keys; + bool started_compiling; + bool is_uber_program; + bool is_variant_program; /* separable */ struct zink_gfx_program *full_prog; - struct hash_table pipelines[11]; // [number of draw modes we support] + struct hash_table pipelines[2][11]; // [uber_emulation][number of draw modes we support] uint32_t last_variant_hash; + uint32_t st_key; - uint32_t last_finalized_hash[4]; //[primtype idx] - struct zink_gfx_pipeline_cache_entry *last_pipeline[4]; //[primtype idx] + uint32_t last_finalized_hash[2][4]; //[uber_emulation][primtype idx] + struct zink_gfx_pipeline_cache_entry *last_pipeline[2][4]; //[uber_emulation][primtype idx] struct zink_gfx_lib_cache *libs; }; @@ -1787,6 +1808,7 @@ struct zink_context { simple_mtx_t program_lock[8]; uint32_t gfx_hash; struct zink_gfx_program *curr_program; + struct zink_gfx_program *curr_program_uber; struct set gfx_inputs; struct set gfx_outputs;