diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 2352fe17aa4..73b87f1f1f2 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -4231,6 +4231,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model; util_queue_fence_init(&ret->precompile.fence); + util_dynarray_init(&ret->pipeline_libs, ret); ret->hash = _mesa_hash_pointer(ret); ret->programs = _mesa_pointer_set_create(NULL); @@ -4500,6 +4501,17 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader) prog->base.removed = true; simple_mtx_unlock(&prog->ctx->program_lock[idx]); util_queue_fence_wait(&prog->base.cache_fence); + + while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) { + struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*); + if (!libs->removed) { + libs->removed = true; + simple_mtx_lock(&screen->pipeline_libs_lock[idx]); + _mesa_set_remove_key(&screen->pipeline_libs[idx], libs); + simple_mtx_unlock(&screen->pipeline_libs_lock[idx]); + } + zink_gfx_lib_cache_unref(screen, libs); + } } if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) { prog->shaders[stage] = NULL; diff --git a/src/gallium/drivers/zink/zink_draw.cpp b/src/gallium/drivers/zink/zink_draw.cpp index dd099db85b0..368ab25a900 100644 --- a/src/gallium/drivers/zink/zink_draw.cpp +++ b/src/gallium/drivers/zink/zink_draw.cpp @@ -1215,3 +1215,18 @@ zink_init_grid_functions(struct zink_context *ctx) */ ctx->base.launch_grid = zink_invalid_launch_grid; } + +void +zink_init_screen_pipeline_libs(struct zink_screen *screen) +{ + _mesa_set_init(&screen->pipeline_libs[0], screen, hash_gfx_program<0>, equals_gfx_program<0>); + _mesa_set_init(&screen->pipeline_libs[1], screen, hash_gfx_program<1>, equals_gfx_program<1>); + _mesa_set_init(&screen->pipeline_libs[2], screen, hash_gfx_program<2>, equals_gfx_program<2>); + _mesa_set_init(&screen->pipeline_libs[3], screen, hash_gfx_program<3>, equals_gfx_program<3>); + _mesa_set_init(&screen->pipeline_libs[4], screen, hash_gfx_program<4>, equals_gfx_program<4>); + _mesa_set_init(&screen->pipeline_libs[5], screen, hash_gfx_program<5>, equals_gfx_program<5>); + _mesa_set_init(&screen->pipeline_libs[6], screen, hash_gfx_program<6>, equals_gfx_program<6>); + _mesa_set_init(&screen->pipeline_libs[7], screen, hash_gfx_program<7>, equals_gfx_program<7>); + for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs_lock); i++) + simple_mtx_init(&screen->pipeline_libs_lock[i], mtx_plain); +} \ No newline at end of file diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index fd0dc0301eb..be7d841f2bc 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -957,6 +957,42 @@ create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs) return libs; } +static struct zink_gfx_lib_cache * +find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *prog) +{ + unsigned stages_present = prog->stages_present; + bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated; + if (generated_tcs) + stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL); + unsigned idx = zink_program_cache_stages(stages_present); + struct set *ht = &screen->pipeline_libs[idx]; + const uint32_t hash = prog->gfx_hash; + + simple_mtx_lock(&screen->pipeline_libs_lock[idx]); + bool found = false; + struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found); + struct zink_gfx_lib_cache *libs; + if (found) { + libs = (void*)entry->key; + } else { + libs = create_lib_cache(prog, generated_tcs); + memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders)); + entry->key = libs; + unsigned refs = 0; + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { + if (prog->shaders[i] && (!generated_tcs || i != MESA_SHADER_TESS_CTRL)) { + simple_mtx_lock(&prog->shaders[i]->lock); + util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs); + simple_mtx_unlock(&prog->shaders[i]->lock); + refs++; + } + } + p_atomic_set(&libs->refcount, refs); + } + simple_mtx_unlock(&screen->pipeline_libs_lock[idx]); + return libs; +} + struct zink_gfx_program * zink_create_gfx_program(struct zink_context *ctx, struct zink_shader **stages, @@ -981,13 +1017,11 @@ zink_create_gfx_program(struct zink_context *ctx, prog->stages_present |= BITFIELD_BIT(i); } } - bool generated_tcs = false; if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) { prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] = zink_shader_tcs_create(screen, stages[MESA_SHADER_VERTEX], vertices_per_patch); prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL); - generated_tcs = true; } prog->stages_remaining = prog->stages_present; @@ -1010,8 +1044,7 @@ zink_create_gfx_program(struct zink_context *ctx, } } - prog->libs = create_lib_cache(prog, generated_tcs); - p_atomic_set(&prog->libs, 1); + prog->libs = find_or_create_lib_cache(screen, prog); struct mesa_sha1 sctx; _mesa_sha1_init(&sctx); @@ -1077,6 +1110,7 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag prog->shaders[MESA_SHADER_FRAGMENT] = stages[MESA_SHADER_FRAGMENT]; prog->last_vertex_stage = stages[MESA_SHADER_VERTEX]; prog->libs = create_lib_cache(prog, false); + /* this libs cache is owned by the program */ p_atomic_set(&prog->libs->refcount, 1); unsigned refs = 0; @@ -1409,7 +1443,8 @@ zink_destroy_gfx_program(struct zink_screen *screen, ralloc_free(prog->nir[i]); } } - zink_gfx_lib_cache_unref(screen, prog->libs); + if (prog->is_separable) + zink_gfx_lib_cache_unref(screen, prog->libs); ralloc_free(prog); } diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index 9f7f4488c52..98abc2417e9 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -1435,6 +1435,12 @@ zink_destroy_screen(struct pipe_screen *pscreen) } #endif disk_cache_destroy(screen->disk_cache); + + for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs); i++) + _mesa_set_clear(&screen->pipeline_libs[i], NULL); + for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs_lock); i++) + simple_mtx_destroy(&screen->pipeline_libs_lock[i]); + zink_bo_deinit(screen); util_live_shader_cache_deinit(&screen->shaders); @@ -2938,6 +2944,7 @@ zink_internal_create_screen(const struct pipe_screen_config *config) screen->buffer_barrier = zink_resource_buffer_barrier; } + zink_init_screen_pipeline_libs(screen); if (!init_layouts(screen)) goto fail; diff --git a/src/gallium/drivers/zink/zink_screen.h b/src/gallium/drivers/zink/zink_screen.h index 1eed13c197c..d65f50ac88d 100644 --- a/src/gallium/drivers/zink/zink_screen.h +++ b/src/gallium/drivers/zink/zink_screen.h @@ -34,6 +34,9 @@ extern "C" { extern uint32_t zink_debug; struct util_dl_library; +void +zink_init_screen_pipeline_libs(struct zink_screen *screen); + /* update last_finished to account for batch_id wrapping */ static inline void diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index 35c2d78919f..0c81b9eee40 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -747,6 +747,7 @@ struct zink_shader { simple_mtx_t lock; struct set *programs; + struct util_dynarray pipeline_libs; union { struct { @@ -984,7 +985,10 @@ struct zink_gfx_pipeline_cache_entry { }; struct zink_gfx_lib_cache { + /* for hashing */ + struct zink_shader *shaders[ZINK_GFX_SHADER_COUNT]; unsigned refcount; + bool removed; //once removed from cache simple_mtx_t lock; struct set libs; //zink_gfx_library_key -> VkPipeline @@ -1285,6 +1289,12 @@ struct zink_screen { struct util_queue cache_put_thread; struct util_queue cache_get_thread; + /* there are 5 gfx stages, but VS and FS are assumed to be always present, + * thus only 3 stages need to be considered, giving 2^3 = 8 program caches. + */ + struct set pipeline_libs[8]; + simple_mtx_t pipeline_libs_lock[8]; + simple_mtx_t desc_set_layouts_lock; struct hash_table desc_set_layouts[ZINK_DESCRIPTOR_BASE_TYPES]; simple_mtx_t desc_pool_keys_lock;