diff --git a/src/freedreno/vulkan/meson.build b/src/freedreno/vulkan/meson.build index d9b5631777a..09a727c83c3 100644 --- a/src/freedreno/vulkan/meson.build +++ b/src/freedreno/vulkan/meson.build @@ -44,7 +44,6 @@ libtu_files = files( 'tu_nir_lower_multiview.c', 'tu_pass.c', 'tu_pipeline.c', - 'tu_pipeline_cache.c', 'tu_private.h', 'tu_query.c', 'tu_shader.c', diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index a14d2646fdf..441a673a197 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -230,6 +230,11 @@ get_device_extensions(const struct tu_physical_device *device, }; } +static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = { + &tu_shaders_ops, + NULL, +}; + VkResult tu_physical_device_init(struct tu_physical_device *device, struct tu_instance *instance) @@ -275,13 +280,6 @@ tu_physical_device_init(struct tu_physical_device *device, goto fail_free_name; } - /* The gpu id is already embedded in the uuid so we just pass "tu" - * when creating the cache. - */ - char buf[VK_UUID_SIZE * 2 + 1]; - disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2); - device->disk_cache = disk_cache_create(device->name, buf, 0); - fd_get_driver_uuid(device->driver_uuid); fd_get_device_uuid(device->device_uuid, &device->dev_id); @@ -298,21 +296,28 @@ tu_physical_device_init(struct tu_physical_device *device, &supported_extensions, &dispatch_table); if (result != VK_SUCCESS) - goto fail_free_cache; + goto fail_free_name; #if TU_HAS_SURFACE result = tu_wsi_init(device); if (result != VK_SUCCESS) { vk_startup_errorf(instance, result, "WSI init failure"); vk_physical_device_finish(&device->vk); - goto fail_free_cache; + goto fail_free_name; } #endif + /* The gpu id is already embedded in the uuid so we just pass "tu" + * when creating the cache. + */ + char buf[VK_UUID_SIZE * 2 + 1]; + disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2); + device->vk.disk_cache = disk_cache_create(device->name, buf, 0); + + device->vk.pipeline_cache_import_ops = cache_import_ops; + return VK_SUCCESS; -fail_free_cache: - disk_cache_destroy(device->disk_cache); fail_free_name: vk_free(&instance->vk.alloc, (void *)device->name); return result; @@ -325,7 +330,6 @@ tu_physical_device_finish(struct tu_physical_device *device) tu_wsi_finish(device); #endif - disk_cache_destroy(device->disk_cache); close(device->local_fd); if (device->master_fd != -1) close(device->master_fd); @@ -1790,6 +1794,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, &(struct ir3_compiler_options) { .robust_ubo_access = robust_buffer_access2, .push_ubo_with_preamble = true, + .disable_cache = true, }); if (!device->compiler) { result = vk_startup_errorf(physical_device->instance, @@ -1851,16 +1856,11 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, /* initialize to ones so ffs can be used to find unused slots */ BITSET_ONES(device->custom_border_color); - VkPipelineCacheCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; - ci.pNext = NULL; - ci.flags = 0; - ci.pInitialData = NULL; - ci.initialDataSize = 0; - VkPipelineCache pc; - result = - tu_CreatePipelineCache(tu_device_to_handle(device), &ci, NULL, &pc); - if (result != VK_SUCCESS) { + struct vk_pipeline_cache_create_info pcc_info = { }; + device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info, + false); + if (!device->mem_cache) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; vk_startup_errorf(device->instance, result, "create pipeline cache failed"); goto fail_pipeline_cache; } @@ -1929,8 +1929,6 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, } pthread_condattr_destroy(&condattr); - device->mem_cache = tu_pipeline_cache_from_handle(pc); - result = tu_autotune_init(&device->autotune, device); if (result != VK_SUCCESS) { goto fail_timeline_cond; @@ -1959,7 +1957,7 @@ fail_prepare_perfcntrs_pass_cs: fail_perfcntrs_pass_entries_alloc: free(device->perfcntrs_pass_cs); fail_perfcntrs_pass_alloc: - tu_DestroyPipelineCache(tu_device_to_handle(device), pc, NULL); + vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc); fail_pipeline_cache: tu_destroy_clear_blit_shaders(device); fail_global_bo_map: @@ -2009,8 +2007,7 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) ir3_compiler_destroy(device->compiler); - VkPipelineCache pc = tu_pipeline_cache_to_handle(device->mem_cache); - tu_DestroyPipelineCache(tu_device_to_handle(device), pc, NULL); + vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc); if (device->perfcntrs_pass_cs) { free(device->perfcntrs_pass_cs_entries); diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index d97141348bf..17bd86fd0ac 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -250,13 +250,12 @@ struct tu_pipeline_builder { struct tu_device *device; void *mem_ctx; - struct tu_pipeline_cache *cache; + struct vk_pipeline_cache *cache; struct tu_pipeline_layout *layout; const VkAllocationCallbacks *alloc; const VkGraphicsPipelineCreateInfo *create_info; - struct tu_shader *shaders[MESA_SHADER_FRAGMENT + 1]; - struct ir3_shader_variant *variants[MESA_SHADER_FRAGMENT + 1]; + struct tu_compiled_shaders *shaders; struct ir3_shader_variant *binning_variant; uint64_t shader_iova[MESA_SHADER_FRAGMENT + 1]; uint64_t binning_vs_iova; @@ -660,7 +659,7 @@ tu6_emit_xs(struct tu_cs *cs, } static void -tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader, +tu6_emit_cs_config(struct tu_cs *cs, const struct ir3_shader_variant *v, const struct tu_pvtmem_config *pvtmem, uint64_t binary_iova) @@ -1686,8 +1685,8 @@ tu6_emit_program_config(struct tu_cs *cs, .gs_state = true, .fs_state = true, .gfx_ibo = true)); - for (; stage < ARRAY_SIZE(builder->shaders); stage++) { - tu6_emit_xs_config(cs, stage, builder->variants[stage]); + for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) { + tu6_emit_xs_config(cs, stage, builder->shaders->variants[stage]); } } @@ -1697,16 +1696,16 @@ tu6_emit_program(struct tu_cs *cs, bool binning_pass, struct tu_pipeline *pipeline) { - const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX]; + const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX]; const struct ir3_shader_variant *bs = builder->binning_variant; - const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL]; - const struct ir3_shader_variant *ds = builder->variants[MESA_SHADER_TESS_EVAL]; - const struct ir3_shader_variant *gs = builder->variants[MESA_SHADER_GEOMETRY]; - const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT]; + const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL]; + const struct ir3_shader_variant *ds = builder->shaders->variants[MESA_SHADER_TESS_EVAL]; + const struct ir3_shader_variant *gs = builder->shaders->variants[MESA_SHADER_GEOMETRY]; + const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT]; gl_shader_stage stage = MESA_SHADER_VERTEX; uint32_t cps_per_patch = builder->create_info->pTessellationState ? builder->create_info->pTessellationState->patchControlPoints : 0; - bool multi_pos_output = builder->shaders[MESA_SHADER_VERTEX]->multi_pos_output; + bool multi_pos_output = builder->shaders->multi_pos_output; /* Don't use the binning pass variant when GS is present because we don't * support compiling correct binning pass variants with GS. @@ -1717,8 +1716,8 @@ tu6_emit_program(struct tu_cs *cs, stage++; } - for (; stage < ARRAY_SIZE(builder->shaders); stage++) { - const struct ir3_shader_variant *xs = builder->variants[stage]; + for (; stage < ARRAY_SIZE(builder->shaders->variants); stage++) { + const struct ir3_shader_variant *xs = builder->shaders->variants[stage]; if (stage == MESA_SHADER_FRAGMENT && binning_pass) fs = xs = NULL; @@ -2255,24 +2254,23 @@ tu_pipeline_allocate_cs(struct tu_device *dev, struct tu_pipeline *pipeline, struct tu_pipeline_layout *layout, struct tu_pipeline_builder *builder, - struct tu_pipeline_cache *cache, struct ir3_shader_variant *compute) { uint32_t size = 1024 + tu6_load_state_size(pipeline, layout, compute); /* graphics case: */ if (builder) { - for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) { - if (builder->variants[i]) { - size += builder->variants[i]->info.size / 4; + for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { + if (builder->shaders->variants[i]) { + size += builder->shaders->variants[i]->info.size / 4; } } size += builder->binning_variant->info.size / 4; builder->additional_cs_reserve_size = 0; - for (unsigned i = 0; i < ARRAY_SIZE(builder->variants); i++) { - struct ir3_shader_variant *variant = builder->variants[i]; + for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { + struct ir3_shader_variant *variant = builder->shaders->variants[i]; if (variant) { builder->additional_cs_reserve_size += tu_xs_get_additional_cs_size_dwords(variant); @@ -2445,10 +2443,248 @@ tu_link_shaders(struct tu_pipeline_builder *builder, } } +static void +tu_shader_key_init(struct tu_shader_key *key, + const VkPipelineShaderStageCreateInfo *stage_info, + struct tu_device *dev) +{ + enum ir3_wavesize_option api_wavesize, real_wavesize; + + if (stage_info) { + if (stage_info->flags & + VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) { + api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE; + } else { + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *size_info = + vk_find_struct_const(stage_info->pNext, + PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT); + + if (size_info) { + if (size_info->requiredSubgroupSize == dev->compiler->threadsize_base) { + api_wavesize = IR3_SINGLE_ONLY; + } else { + assert(size_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2); + api_wavesize = IR3_DOUBLE_ONLY; + } + } else { + /* Match the exposed subgroupSize. */ + api_wavesize = IR3_DOUBLE_ONLY; + } + + if (stage_info->flags & + VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) + real_wavesize = api_wavesize; + else if (api_wavesize == IR3_SINGLE_ONLY) + real_wavesize = IR3_SINGLE_ONLY; + else + real_wavesize = IR3_SINGLE_OR_DOUBLE; + } + } else { + api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE; + } + + key->api_wavesize = api_wavesize; + key->real_wavesize = real_wavesize; +} + +static void +tu_hash_stage(struct mesa_sha1 *ctx, + const VkPipelineShaderStageCreateInfo *stage, + const struct tu_shader_key *key) +{ + VK_FROM_HANDLE(vk_shader_module, module, stage->module); + const VkSpecializationInfo *spec_info = stage->pSpecializationInfo; + + _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); + _mesa_sha1_update(ctx, stage->pName, strlen(stage->pName)); + if (spec_info && spec_info->mapEntryCount) { + _mesa_sha1_update(ctx, spec_info->pMapEntries, + spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); + _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); + } + + _mesa_sha1_update(ctx, key, sizeof(*key)); +} + +/* Hash flags which can affect ir3 shader compilation which aren't known until + * logical device creation. + */ +static void +tu_hash_compiler(struct mesa_sha1 *ctx, const struct ir3_compiler *compiler) +{ + _mesa_sha1_update(ctx, &compiler->robust_ubo_access, + sizeof(compiler->robust_ubo_access)); + _mesa_sha1_update(ctx, &ir3_shader_debug, sizeof(ir3_shader_debug)); +} + +static void +tu_hash_shaders(unsigned char *hash, + const VkPipelineShaderStageCreateInfo **stages, + const struct tu_pipeline_layout *layout, + const struct tu_shader_key *keys, + const struct ir3_shader_key *ir3_key, + const struct ir3_compiler *compiler) +{ + struct mesa_sha1 ctx; + + _mesa_sha1_init(&ctx); + + if (layout) + _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); + + _mesa_sha1_update(&ctx, ir3_key, sizeof(ir3_key)); + + for (int i = 0; i < MESA_SHADER_STAGES; ++i) { + if (stages[i]) { + tu_hash_stage(&ctx, stages[i], &keys[i]); + } + } + tu_hash_compiler(&ctx, compiler); + _mesa_sha1_final(&ctx, hash); +} + +static void +tu_hash_compute(unsigned char *hash, + const VkPipelineShaderStageCreateInfo *stage, + const struct tu_pipeline_layout *layout, + const struct tu_shader_key *key, + const struct ir3_compiler *compiler) +{ + struct mesa_sha1 ctx; + + _mesa_sha1_init(&ctx); + + if (layout) + _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); + + tu_hash_stage(&ctx, stage, key); + + tu_hash_compiler(&ctx, compiler); + _mesa_sha1_final(&ctx, hash); +} + +static bool +tu_shaders_serialize(struct vk_pipeline_cache_object *object, + struct blob *blob); + +static struct vk_pipeline_cache_object * +tu_shaders_deserialize(struct vk_device *device, + const void *key_data, size_t key_size, + struct blob_reader *blob); + +static void +tu_shaders_destroy(struct vk_pipeline_cache_object *object) +{ + struct tu_compiled_shaders *shaders = + container_of(object, struct tu_compiled_shaders, base); + + for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) + ralloc_free(shaders->variants[i]); + + vk_pipeline_cache_object_finish(&shaders->base); + vk_free(&object->device->alloc, shaders); +} + +const struct vk_pipeline_cache_object_ops tu_shaders_ops = { + .serialize = tu_shaders_serialize, + .deserialize = tu_shaders_deserialize, + .destroy = tu_shaders_destroy, +}; + +static struct tu_compiled_shaders * +tu_shaders_init(struct tu_device *dev, const void *key_data, size_t key_size) +{ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct tu_compiled_shaders, shaders, 1); + VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size); + + if (!vk_multialloc_zalloc(&ma, &dev->vk.alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) + return NULL; + + memcpy(obj_key_data, key_data, key_size); + vk_pipeline_cache_object_init(&dev->vk, &shaders->base, + &tu_shaders_ops, obj_key_data, key_size); + + return shaders; +} + +static bool +tu_shaders_serialize(struct vk_pipeline_cache_object *object, + struct blob *blob) +{ + struct tu_compiled_shaders *shaders = + container_of(object, struct tu_compiled_shaders, base); + + blob_write_bytes(blob, shaders->push_consts, sizeof(shaders->push_consts)); + blob_write_uint8(blob, shaders->active_desc_sets); + blob_write_uint8(blob, shaders->multi_pos_output); + + for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) { + if (shaders->variants[i]) { + blob_write_uint8(blob, 1); + ir3_store_variant(blob, shaders->variants[i]); + } else { + blob_write_uint8(blob, 0); + } + } + + return true; +} + +static struct vk_pipeline_cache_object * +tu_shaders_deserialize(struct vk_device *_device, + const void *key_data, size_t key_size, + struct blob_reader *blob) +{ + struct tu_device *dev = container_of(_device, struct tu_device, vk); + struct tu_compiled_shaders *shaders = + tu_shaders_init(dev, key_data, key_size); + + if (!shaders) + return NULL; + + blob_copy_bytes(blob, shaders->push_consts, sizeof(shaders->push_consts)); + shaders->active_desc_sets = blob_read_uint8(blob); + shaders->multi_pos_output = blob_read_uint8(blob); + + for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) { + bool has_shader = blob_read_uint8(blob); + if (has_shader) { + shaders->variants[i] = ir3_retrieve_variant(blob, dev->compiler, NULL); + } + } + + return &shaders->base; +} + +static struct tu_compiled_shaders * +tu_pipeline_cache_lookup(struct vk_pipeline_cache *cache, + const void *key_data, size_t key_size) +{ + struct vk_pipeline_cache_object *object = + vk_pipeline_cache_lookup_object(cache, key_data, key_size, + &tu_shaders_ops, NULL); + if (object) + return container_of(object, struct tu_compiled_shaders, base); + else + return NULL; +} + +static struct tu_compiled_shaders * +tu_pipeline_cache_insert(struct vk_pipeline_cache *cache, + struct tu_compiled_shaders *shaders) +{ + struct vk_pipeline_cache_object *object = + vk_pipeline_cache_add_object(cache, &shaders->base); + return container_of(object, struct tu_compiled_shaders, base); +} + static VkResult tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) { + VkResult result = VK_SUCCESS; const struct ir3_compiler *compiler = builder->device->compiler; const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = { NULL @@ -2459,10 +2695,40 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, stage_infos[stage] = &builder->create_info->pStages[i]; } - struct ir3_shader_key key = {}; - tu_pipeline_shader_key_init(&key, pipeline, builder->create_info); + struct tu_shader_key keys[ARRAY_SIZE(stage_infos)] = { }; + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < ARRAY_SIZE(keys); stage++) { + tu_shader_key_init(&keys[stage], stage_infos[stage], builder->device); + } - nir_shader *nir[ARRAY_SIZE(builder->shaders)] = { NULL }; + struct ir3_shader_key ir3_key = {}; + tu_pipeline_shader_key_init(&ir3_key, pipeline, builder->create_info); + + keys[MESA_SHADER_VERTEX].multiview_mask = builder->multiview_mask; + keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask; + + unsigned char pipeline_sha1[20]; + tu_hash_shaders(pipeline_sha1, stage_infos, builder->layout, keys, &ir3_key, compiler); + + const bool executable_info = builder->create_info->flags & + VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; + + char *nir_initial_disasm[ARRAY_SIZE(stage_infos)] = { NULL }; + + struct tu_compiled_shaders *compiled_shaders; + + if (!executable_info) { + compiled_shaders = + tu_pipeline_cache_lookup(builder->cache, &pipeline_sha1, + sizeof(pipeline_sha1)); + + if (compiled_shaders) + goto done; + } + + nir_shader *nir[ARRAY_SIZE(stage_infos)] = { NULL }; + + struct tu_shader *shaders[ARRAY_SIZE(nir)] = { NULL }; for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir); stage++) { @@ -2471,8 +2737,10 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, continue; nir[stage] = tu_spirv_to_nir(builder->device, builder->mem_ctx, stage_info, stage); - if (!nir[stage]) - return VK_ERROR_OUT_OF_HOST_MEMORY; + if (!nir[stage]) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } } if (!nir[MESA_SHADER_FRAGMENT]) { @@ -2484,11 +2752,6 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, nir[MESA_SHADER_FRAGMENT] = fs_b.shader; } - const bool executable_info = builder->create_info->flags & - VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; - - char *nir_initial_disasm[ARRAY_SIZE(builder->shaders)] = { NULL }; - if (executable_info) { for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir); stage++) { @@ -2509,26 +2772,27 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, continue; struct tu_shader *shader = - tu_shader_create(builder->device, nir[stage], stage_infos[stage], - builder->multiview_mask, builder->layout, - builder->alloc); - if (!shader) - return VK_ERROR_OUT_OF_HOST_MEMORY; + tu_shader_create(builder->device, nir[stage], &keys[stage], + builder->layout, builder->alloc); + if (!shader) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } /* In SPIR-V generated from GLSL, the primitive mode is specified in the * tessellation evaluation shader, but in SPIR-V generated from HLSL, * the mode is specified in the tessellation control shader. */ if ((stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_TESS_CTRL) && - key.tessellation == IR3_TESS_NONE) { - key.tessellation = tu6_get_tessmode(shader); + ir3_key.tessellation == IR3_TESS_NONE) { + ir3_key.tessellation = tu6_get_tessmode(shader); } if (stage > MESA_SHADER_TESS_CTRL) { if (stage == MESA_SHADER_FRAGMENT) { - key.tcs_store_primid = key.tcs_store_primid || + ir3_key.tcs_store_primid = ir3_key.tcs_store_primid || (nir[stage]->info.inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID)); } else { - key.tcs_store_primid = key.tcs_store_primid || + ir3_key.tcs_store_primid = ir3_key.tcs_store_primid || BITSET_TEST(nir[stage]->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); } } @@ -2537,85 +2801,121 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, * which is set in tu_lower_io. */ desc_sets |= shader->active_desc_sets; - builder->shaders[stage] = shader; + shaders[stage] = shader; } - pipeline->active_desc_sets = desc_sets; - struct tu_shader *last_shader = builder->shaders[MESA_SHADER_GEOMETRY]; + struct tu_shader *last_shader = shaders[MESA_SHADER_GEOMETRY]; if (!last_shader) - last_shader = builder->shaders[MESA_SHADER_TESS_EVAL]; + last_shader = shaders[MESA_SHADER_TESS_EVAL]; if (!last_shader) - last_shader = builder->shaders[MESA_SHADER_VERTEX]; + last_shader = shaders[MESA_SHADER_VERTEX]; uint64_t outputs_written = last_shader->ir3_shader->nir->info.outputs_written; - key.layer_zero = !(outputs_written & VARYING_BIT_LAYER); - key.view_zero = !(outputs_written & VARYING_BIT_VIEWPORT); + ir3_key.layer_zero = !(outputs_written & VARYING_BIT_LAYER); + ir3_key.view_zero = !(outputs_written & VARYING_BIT_VIEWPORT); - pipeline->tess.patch_type = key.tessellation; + compiled_shaders = + tu_shaders_init(builder->device, &pipeline_sha1, sizeof(pipeline_sha1)); - for (gl_shader_stage stage = MESA_SHADER_VERTEX; - stage < ARRAY_SIZE(builder->shaders); stage++) { - if (!builder->shaders[stage]) - continue; - - bool created; - builder->variants[stage] = - ir3_shader_get_variant(builder->shaders[stage]->ir3_shader, - &key, false, executable_info, &created); - if (!builder->variants[stage]) - return VK_ERROR_OUT_OF_HOST_MEMORY; + if (!compiled_shaders) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; } - uint32_t safe_constlens = ir3_trim_constlen(builder->variants, compiler); - - key.safe_constlen = true; + compiled_shaders->active_desc_sets = desc_sets; + compiled_shaders->multi_pos_output = + shaders[MESA_SHADER_VERTEX]->multi_pos_output; for (gl_shader_stage stage = MESA_SHADER_VERTEX; - stage < ARRAY_SIZE(builder->shaders); stage++) { - if (!builder->shaders[stage]) + stage < ARRAY_SIZE(shaders); stage++) { + if (!shaders[stage]) + continue; + + compiled_shaders->variants[stage] = + ir3_shader_create_variant(shaders[stage]->ir3_shader, &ir3_key, + executable_info); + if (!compiled_shaders->variants[stage]) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + compiled_shaders->push_consts[stage] = shaders[stage]->push_consts; + } + + uint32_t safe_constlens = ir3_trim_constlen(compiled_shaders->variants, compiler); + + ir3_key.safe_constlen = true; + + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < ARRAY_SIZE(shaders); stage++) { + if (!shaders[stage]) continue; if (safe_constlens & (1 << stage)) { - bool created; - builder->variants[stage] = - ir3_shader_get_variant(builder->shaders[stage]->ir3_shader, - &key, false, executable_info, &created); - if (!builder->variants[stage]) - return VK_ERROR_OUT_OF_HOST_MEMORY; + ralloc_free(compiled_shaders->variants[stage]); + compiled_shaders->variants[stage] = + ir3_shader_create_variant(shaders[stage]->ir3_shader, &ir3_key, + executable_info); + if (!compiled_shaders->variants[stage]) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } } } - const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; - struct ir3_shader_variant *variant; - - if (vs->ir3_shader->stream_output.num_outputs || - !ir3_has_binning_vs(&key)) { - variant = builder->variants[MESA_SHADER_VERTEX]; - } else { - bool created; - key.safe_constlen = !!(safe_constlens & (1 << MESA_SHADER_VERTEX)); - variant = ir3_shader_get_variant(vs->ir3_shader, &key, - true, executable_info, &created); - if (!variant) - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - builder->binning_variant = variant; - for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir); stage++) { - if (builder->variants[stage]) { - tu_append_executable(pipeline, builder->variants[stage], + if (shaders[stage]) { + tu_shader_destroy(builder->device, shaders[stage], builder->alloc); + } + } + + compiled_shaders = + tu_pipeline_cache_insert(builder->cache, compiled_shaders); + +done: + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < ARRAY_SIZE(nir); stage++) { + if (compiled_shaders->variants[stage]) { + tu_append_executable(pipeline, compiled_shaders->variants[stage], nir_initial_disasm[stage]); } } - if (builder->binning_variant != builder->variants[MESA_SHADER_VERTEX]) { - tu_append_executable(pipeline, builder->binning_variant, NULL); + struct ir3_shader_variant *vs = + compiled_shaders->variants[MESA_SHADER_VERTEX]; + + struct ir3_shader_variant *variant; + if (!vs->stream_output.num_outputs && ir3_has_binning_vs(&vs->key)) { + tu_append_executable(pipeline, vs->binning, NULL); + variant = vs->binning; + } else { + variant = vs; + } + + builder->binning_variant = variant; + + builder->shaders = compiled_shaders; + + pipeline->active_desc_sets = compiled_shaders->active_desc_sets; + if (compiled_shaders->variants[MESA_SHADER_TESS_CTRL]) { + pipeline->tess.patch_type = + compiled_shaders->variants[MESA_SHADER_TESS_CTRL]->key.tessellation; } return VK_SUCCESS; + +fail: + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < ARRAY_SIZE(nir); stage++) { + if (shaders[stage]) { + tu_shader_destroy(builder->device, shaders[stage], builder->alloc); + } + } + + if (compiled_shaders) + vk_pipeline_cache_object_unref(&compiled_shaders->base); + + return result; } static void @@ -2722,12 +3022,12 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder, static void tu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link, - struct tu_shader *shader, + struct tu_push_constant_range *push_consts, struct ir3_shader_variant *v) { link->const_state = *ir3_const_state(v); link->constlen = v->constlen; - link->push_consts = shader->push_consts; + link->push_consts = *push_consts; } static void @@ -2765,13 +3065,13 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, } pipeline->active_stages = stages; - for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders); i++) { - if (!builder->shaders[i]) + for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { + if (!builder->shaders->variants[i]) continue; tu_pipeline_set_linkage(&pipeline->program.link[i], - builder->shaders[i], - builder->variants[i]); + &builder->shaders->push_consts[i], + builder->shaders->variants[i]); } } @@ -2781,7 +3081,7 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder, { const VkPipelineVertexInputStateCreateInfo *vi_info = builder->create_info->pVertexInputState; - const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX]; + const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX]; const struct ir3_shader_variant *bs = builder->binning_variant; /* Bindings may contain holes */ @@ -2847,7 +3147,7 @@ tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder, vk_find_struct_const(tess_info->pNext, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO); pipeline->tess.upper_left_domain_origin = !domain_info || domain_info->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT; - const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL]; + const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL]; pipeline->tess.param_stride = hs->output_size * 4; } @@ -3092,8 +3392,8 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, .bfref = ds_info->back.reference & 0xff)); } - if (builder->shaders[MESA_SHADER_FRAGMENT]) { - const struct ir3_shader_variant *fs = &builder->shaders[MESA_SHADER_FRAGMENT]->ir3_shader->variants[0]; + if (builder->shaders->variants[MESA_SHADER_FRAGMENT]) { + const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT]; if (fs->has_kill || fs->no_earlyz || fs->writes_pos) { pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE; } @@ -3300,18 +3600,19 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder, result = tu_pipeline_builder_compile_shaders(builder, *pipeline); if (result != VK_SUCCESS) { vk_object_free(&builder->device->vk, builder->alloc, *pipeline); - return result; + return VK_ERROR_OUT_OF_HOST_MEMORY; } result = tu_pipeline_allocate_cs(builder->device, *pipeline, - builder->layout, builder, builder->cache, NULL); + builder->layout, builder, NULL); if (result != VK_SUCCESS) { vk_object_free(&builder->device->vk, builder->alloc, *pipeline); return result; } - for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) - builder->shader_iova[i] = tu_upload_variant(*pipeline, builder->variants[i]); + for (uint32_t i = 0; i < ARRAY_SIZE(builder->shader_iova); i++) + builder->shader_iova[i] = + tu_upload_variant(*pipeline, builder->shaders->variants[i]); builder->binning_vs_iova = tu_upload_variant(*pipeline, builder->binning_variant); @@ -3323,10 +3624,10 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder, uint32_t pvtmem_size = 0; bool per_wave = true; - for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) { - if (builder->variants[i]) { - pvtmem_size = MAX2(pvtmem_size, builder->variants[i]->pvtmem_size); - if (!builder->variants[i]->pvtmem_per_wave) + for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { + if (builder->shaders->variants[i]) { + pvtmem_size = MAX2(pvtmem_size, builder->shaders->variants[i]->pvtmem_size); + if (!builder->shaders->variants[i]->pvtmem_per_wave) per_wave = false; } } @@ -3362,11 +3663,8 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder, static void tu_pipeline_builder_finish(struct tu_pipeline_builder *builder) { - for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders); i++) { - if (!builder->shaders[i]) - continue; - tu_shader_destroy(builder->device, builder->shaders[i], builder->alloc); - } + if (builder->shaders) + vk_pipeline_cache_object_unref(&builder->shaders->base); ralloc_free(builder->mem_ctx); } @@ -3374,7 +3672,7 @@ static void tu_pipeline_builder_init_graphics( struct tu_pipeline_builder *builder, struct tu_device *dev, - struct tu_pipeline_cache *cache, + struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *create_info, const VkAllocationCallbacks *alloc) { @@ -3461,7 +3759,9 @@ tu_graphics_pipeline_create(VkDevice device, VkPipeline *pPipeline) { TU_FROM_HANDLE(tu_device, dev, device); - TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache); + TU_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache); + + cache = cache ? cache : dev->mem_cache; struct tu_pipeline_builder builder; tu_pipeline_builder_init_graphics(&builder, dev, cache, @@ -3509,11 +3809,13 @@ tu_compute_pipeline_create(VkDevice device, VkPipeline *pPipeline) { TU_FROM_HANDLE(tu_device, dev, device); - TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache); + TU_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache); TU_FROM_HANDLE(tu_pipeline_layout, layout, pCreateInfo->layout); const VkPipelineShaderStageCreateInfo *stage_info = &pCreateInfo->stage; VkResult result; + cache = cache ? cache : dev->mem_cache; + struct tu_pipeline *pipeline; *pPipeline = VK_NULL_HANDLE; @@ -3526,38 +3828,73 @@ tu_compute_pipeline_create(VkDevice device, pipeline->executables_mem_ctx = ralloc_context(NULL); util_dynarray_init(&pipeline->executables, pipeline->executables_mem_ctx); - struct ir3_shader_key key = {}; + struct tu_shader_key key; + tu_shader_key_init(&key, stage_info, dev); void *pipeline_mem_ctx = ralloc_context(NULL); - nir_shader *nir = tu_spirv_to_nir(dev, pipeline_mem_ctx, stage_info, MESA_SHADER_COMPUTE); + + unsigned char pipeline_sha1[20]; + tu_hash_compute(pipeline_sha1, stage_info, layout, &key, dev->compiler); + + struct tu_compiled_shaders *compiled = NULL; const bool executable_info = pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; - char *nir_initial_disasm = executable_info ? - nir_shader_as_str(nir, pipeline->executables_mem_ctx) : NULL; + if (!executable_info) + compiled = tu_pipeline_cache_lookup(cache, pipeline_sha1, sizeof(pipeline_sha1)); - struct tu_shader *shader = - tu_shader_create(dev, nir, stage_info, 0, layout, pAllocator); - if (!shader) { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto fail; + char *nir_initial_disasm = NULL; + + if (!compiled) { + struct ir3_shader_key ir3_key = {}; + + nir_shader *nir = tu_spirv_to_nir(dev, pipeline_mem_ctx, stage_info, + MESA_SHADER_COMPUTE); + + nir_initial_disasm = executable_info ? + nir_shader_as_str(nir, pipeline->executables_mem_ctx) : NULL; + + struct tu_shader *shader = + tu_shader_create(dev, nir, &key, layout, pAllocator); + if (!shader) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + compiled = tu_shaders_init(dev, &pipeline_sha1, sizeof(pipeline_sha1)); + if (!compiled) { + tu_shader_destroy(dev, shader, pAllocator); + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + compiled->active_desc_sets = shader->active_desc_sets; + compiled->push_consts[MESA_SHADER_COMPUTE] = shader->push_consts; + + struct ir3_shader_variant *v = + ir3_shader_create_variant(shader->ir3_shader, &ir3_key, executable_info); + + tu_shader_destroy(dev, shader, pAllocator); + + if (!v) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + compiled->variants[MESA_SHADER_COMPUTE] = v; + + compiled = tu_pipeline_cache_insert(cache, compiled); } - pipeline->active_desc_sets = shader->active_desc_sets; + pipeline->active_desc_sets = compiled->active_desc_sets; - bool created; - struct ir3_shader_variant *v = - ir3_shader_get_variant(shader->ir3_shader, &key, false, executable_info, &created); - if (!v) { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto fail; - } + struct ir3_shader_variant *v = compiled->variants[MESA_SHADER_COMPUTE]; tu_pipeline_set_linkage(&pipeline->program.link[MESA_SHADER_COMPUTE], - shader, v); + &compiled->push_consts[MESA_SHADER_COMPUTE], v); - result = tu_pipeline_allocate_cs(dev, pipeline, layout, NULL, cache, v); + result = tu_pipeline_allocate_cs(dev, pipeline, layout, NULL, v); if (result != VK_SUCCESS) goto fail; @@ -3574,14 +3911,14 @@ tu_compute_pipeline_create(VkDevice device, struct tu_cs prog_cs; uint32_t additional_reserve_size = tu_xs_get_additional_cs_size_dwords(v); tu_cs_begin_sub_stream(&pipeline->cs, 64 + additional_reserve_size, &prog_cs); - tu6_emit_cs_config(&prog_cs, shader, v, &pvtmem, shader_iova); + tu6_emit_cs_config(&prog_cs, v, &pvtmem, shader_iova); pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs); tu6_emit_load_state(pipeline, layout, true); tu_append_executable(pipeline, v, nir_initial_disasm); - tu_shader_destroy(dev, shader, pAllocator); + vk_pipeline_cache_object_unref(&compiled->base); ralloc_free(pipeline_mem_ctx); *pPipeline = tu_pipeline_to_handle(pipeline); @@ -3589,8 +3926,8 @@ tu_compute_pipeline_create(VkDevice device, return VK_SUCCESS; fail: - if (shader) - tu_shader_destroy(dev, shader, pAllocator); + if (compiled) + vk_pipeline_cache_object_unref(&compiled->base); ralloc_free(pipeline_mem_ctx); diff --git a/src/freedreno/vulkan/tu_pipeline_cache.c b/src/freedreno/vulkan/tu_pipeline_cache.c deleted file mode 100644 index 5cfc79fbfb0..00000000000 --- a/src/freedreno/vulkan/tu_pipeline_cache.c +++ /dev/null @@ -1,379 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include "util/debug.h" -#include "util/disk_cache.h" -#include "util/mesa-sha1.h" -#include "util/u_atomic.h" -#include "vulkan/util/vk_util.h" - -struct cache_entry_variant_info -{ -}; - -struct cache_entry -{ - union { - unsigned char sha1[20]; - uint32_t sha1_dw[5]; - }; - uint32_t code_sizes[MESA_SHADER_STAGES]; - struct tu_shader_variant *variants[MESA_SHADER_STAGES]; - char code[0]; -}; - -static void -tu_pipeline_cache_init(struct tu_pipeline_cache *cache, - struct tu_device *device) -{ - cache->device = device; - pthread_mutex_init(&cache->mutex, NULL); - - cache->modified = false; - cache->kernel_count = 0; - cache->total_size = 0; - cache->table_size = 1024; - const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); - cache->hash_table = malloc(byte_size); - - /* We don't consider allocation failure fatal, we just start with a 0-sized - * cache. Disable caching when we want to keep shader debug info, since - * we don't get the debug info on cached shaders. */ - if (cache->hash_table == NULL) - cache->table_size = 0; - else - memset(cache->hash_table, 0, byte_size); -} - -static void -tu_pipeline_cache_finish(struct tu_pipeline_cache *cache) -{ - for (unsigned i = 0; i < cache->table_size; ++i) - if (cache->hash_table[i]) { - vk_free(&cache->alloc, cache->hash_table[i]); - } - pthread_mutex_destroy(&cache->mutex); - free(cache->hash_table); -} - -static uint32_t -entry_size(struct cache_entry *entry) -{ - size_t ret = sizeof(*entry); - for (int i = 0; i < MESA_SHADER_STAGES; ++i) - if (entry->code_sizes[i]) - ret += - sizeof(struct cache_entry_variant_info) + entry->code_sizes[i]; - return ret; -} - -static struct cache_entry * -tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache, - const unsigned char *sha1) -{ - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) sha1); - - if (cache->table_size == 0) - return NULL; - - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - struct cache_entry *entry = cache->hash_table[index]; - - if (!entry) - return NULL; - - if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { - return entry; - } - } - - unreachable("hash table should never be full"); -} - -static struct cache_entry * -tu_pipeline_cache_search(struct tu_pipeline_cache *cache, - const unsigned char *sha1) -{ - struct cache_entry *entry; - - pthread_mutex_lock(&cache->mutex); - - entry = tu_pipeline_cache_search_unlocked(cache, sha1); - - pthread_mutex_unlock(&cache->mutex); - - return entry; -} - -static void -tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache, - struct cache_entry *entry) -{ - const uint32_t mask = cache->table_size - 1; - const uint32_t start = entry->sha1_dw[0]; - - /* We'll always be able to insert when we get here. */ - assert(cache->kernel_count < cache->table_size / 2); - - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - if (!cache->hash_table[index]) { - cache->hash_table[index] = entry; - break; - } - } - - cache->total_size += entry_size(entry); - cache->kernel_count++; -} - -static VkResult -tu_pipeline_cache_grow(struct tu_pipeline_cache *cache) -{ - const uint32_t table_size = cache->table_size * 2; - const uint32_t old_table_size = cache->table_size; - const size_t byte_size = table_size * sizeof(cache->hash_table[0]); - struct cache_entry **table; - struct cache_entry **old_table = cache->hash_table; - - table = malloc(byte_size); - if (table == NULL) - return vk_error(cache, VK_ERROR_OUT_OF_HOST_MEMORY); - - cache->hash_table = table; - cache->table_size = table_size; - cache->kernel_count = 0; - cache->total_size = 0; - - memset(cache->hash_table, 0, byte_size); - for (uint32_t i = 0; i < old_table_size; i++) { - struct cache_entry *entry = old_table[i]; - if (!entry) - continue; - - tu_pipeline_cache_set_entry(cache, entry); - } - - free(old_table); - - return VK_SUCCESS; -} - -static void -tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache, - struct cache_entry *entry) -{ - if (cache->kernel_count == cache->table_size / 2) - tu_pipeline_cache_grow(cache); - - /* Failing to grow that hash table isn't fatal, but may mean we don't - * have enough space to add this new kernel. Only add it if there's room. - */ - if (cache->kernel_count < cache->table_size / 2) - tu_pipeline_cache_set_entry(cache, entry); -} - -static void -tu_pipeline_cache_load(struct tu_pipeline_cache *cache, - const void *data, - size_t size) -{ - struct tu_device *device = cache->device; - struct vk_pipeline_cache_header header; - - if (size < sizeof(header)) - return; - memcpy(&header, data, sizeof(header)); - if (header.header_size < sizeof(header)) - return; - if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) - return; - if (header.vendor_id != 0x5143) - return; - if (header.device_id != device->physical_device->dev_id.chip_id) - return; - if (memcmp(header.uuid, device->physical_device->cache_uuid, - VK_UUID_SIZE) != 0) - return; - - char *end = (void *) data + size; - char *p = (void *) data + header.header_size; - - while (end - p >= sizeof(struct cache_entry)) { - struct cache_entry *entry = (struct cache_entry *) p; - struct cache_entry *dest_entry; - size_t size = entry_size(entry); - if (end - p < size) - break; - - dest_entry = - vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); - if (dest_entry) { - memcpy(dest_entry, entry, size); - for (int i = 0; i < MESA_SHADER_STAGES; ++i) - dest_entry->variants[i] = NULL; - tu_pipeline_cache_add_entry(cache, dest_entry); - } - p += size; - } -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_CreatePipelineCache(VkDevice _device, - const VkPipelineCacheCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipelineCache *pPipelineCache) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_pipeline_cache *cache; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); - assert(pCreateInfo->flags == 0); - - cache = vk_object_alloc(&device->vk, pAllocator, sizeof(*cache), - VK_OBJECT_TYPE_PIPELINE_CACHE); - if (cache == NULL) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - if (pAllocator) - cache->alloc = *pAllocator; - else - cache->alloc = device->vk.alloc; - - tu_pipeline_cache_init(cache, device); - - if (pCreateInfo->initialDataSize > 0) { - tu_pipeline_cache_load(cache, pCreateInfo->pInitialData, - pCreateInfo->initialDataSize); - } - - *pPipelineCache = tu_pipeline_cache_to_handle(cache); - - return VK_SUCCESS; -} - -VKAPI_ATTR void VKAPI_CALL -tu_DestroyPipelineCache(VkDevice _device, - VkPipelineCache _cache, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache); - - if (!cache) - return; - tu_pipeline_cache_finish(cache); - - vk_object_free(&device->vk, pAllocator, cache); -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_GetPipelineCacheData(VkDevice _device, - VkPipelineCache _cache, - size_t *pDataSize, - void *pData) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache); - struct vk_pipeline_cache_header *header; - VkResult result = VK_SUCCESS; - - pthread_mutex_lock(&cache->mutex); - - const size_t size = sizeof(*header) + cache->total_size; - if (pData == NULL) { - pthread_mutex_unlock(&cache->mutex); - *pDataSize = size; - return VK_SUCCESS; - } - if (*pDataSize < sizeof(*header)) { - pthread_mutex_unlock(&cache->mutex); - *pDataSize = 0; - return VK_INCOMPLETE; - } - void *p = pData, *end = pData + *pDataSize; - header = p; - header->header_size = sizeof(*header); - header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; - header->vendor_id = 0x5143; - header->device_id = device->physical_device->dev_id.chip_id; - memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE); - p += header->header_size; - - struct cache_entry *entry; - for (uint32_t i = 0; i < cache->table_size; i++) { - if (!cache->hash_table[i]) - continue; - entry = cache->hash_table[i]; - const uint32_t size = entry_size(entry); - if (end < p + size) { - result = VK_INCOMPLETE; - break; - } - - memcpy(p, entry, size); - for (int j = 0; j < MESA_SHADER_STAGES; ++j) - ((struct cache_entry *) p)->variants[j] = NULL; - p += size; - } - *pDataSize = p - pData; - - pthread_mutex_unlock(&cache->mutex); - return result; -} - -static void -tu_pipeline_cache_merge(struct tu_pipeline_cache *dst, - struct tu_pipeline_cache *src) -{ - for (uint32_t i = 0; i < src->table_size; i++) { - struct cache_entry *entry = src->hash_table[i]; - if (!entry || tu_pipeline_cache_search(dst, entry->sha1)) - continue; - - tu_pipeline_cache_add_entry(dst, entry); - - src->hash_table[i] = NULL; - } -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_MergePipelineCaches(VkDevice _device, - VkPipelineCache destCache, - uint32_t srcCacheCount, - const VkPipelineCache *pSrcCaches) -{ - TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache); - - for (uint32_t i = 0; i < srcCacheCount; i++) { - TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]); - - tu_pipeline_cache_merge(dst, src); - } - - return VK_SUCCESS; -} diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index d9a7d826287..e87097728d2 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -65,6 +65,7 @@ #include "vk_log.h" #include "vk_physical_device.h" #include "vk_shader_module.h" +#include "vk_pipeline_cache.h" #include "wsi_common.h" #include "ir3/ir3_compiler.h" @@ -239,11 +240,6 @@ struct tu_physical_device /* Address space and global fault count for this local_fd with DRM backend */ uint64_t fault_count; - /* This is the drivers on-disk cache used as a fallback as opposed to - * the pipeline cache defined by apps. - */ - struct disk_cache *disk_cache; - struct tu_memory_heap heap; struct vk_sync_type syncobj_type; @@ -521,7 +517,7 @@ struct tu_device struct ir3_compiler *compiler; /* Backup in-memory cache to be used if the app doesn't provide one */ - struct tu_pipeline_cache *mem_cache; + struct vk_pipeline_cache *mem_cache; #define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */ @@ -1367,6 +1363,24 @@ struct tu_shader bool multi_pos_output; }; +struct tu_shader_key { + unsigned multiview_mask; + enum ir3_wavesize_option api_wavesize, real_wavesize; +}; + +struct tu_compiled_shaders +{ + struct vk_pipeline_cache_object base; + + struct tu_push_constant_range push_consts[MESA_SHADER_STAGES]; + uint8_t active_desc_sets; + bool multi_pos_output; + + struct ir3_shader_variant *variants[MESA_SHADER_STAGES]; +}; + +extern const struct vk_pipeline_cache_object_ops tu_shaders_ops; + bool tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output, struct tu_device *dev); @@ -1380,8 +1394,7 @@ tu_spirv_to_nir(struct tu_device *dev, struct tu_shader * tu_shader_create(struct tu_device *dev, nir_shader *nir, - const VkPipelineShaderStageCreateInfo *stage_info, - unsigned multiview_mask, + const struct tu_shader_key *key, struct tu_pipeline_layout *layout, const VkAllocationCallbacks *alloc); diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index 1af0dbaa959..c977b1d8566 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -700,8 +700,7 @@ tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info) struct tu_shader * tu_shader_create(struct tu_device *dev, nir_shader *nir, - const VkPipelineShaderStageCreateInfo *stage_info, - unsigned multiview_mask, + const struct tu_shader_key *key, struct tu_pipeline_layout *layout, const VkAllocationCallbacks *alloc) { @@ -729,7 +728,7 @@ tu_shader_create(struct tu_device *dev, * sampling function. gl_Layer doesn't work when * multiview is enabled. */ - .use_view_id_for_layer = multiview_mask != 0, + .use_view_id_for_layer = key->multiview_mask != 0, }); } @@ -740,8 +739,8 @@ tu_shader_create(struct tu_device *dev, */ ir3_nir_lower_io_to_temporaries(nir); - if (nir->info.stage == MESA_SHADER_VERTEX && multiview_mask) { - tu_nir_lower_multiview(nir, multiview_mask, + if (nir->info.stage == MESA_SHADER_VERTEX && key->multiview_mask) { + tu_nir_lower_multiview(nir, key->multiview_mask, &shader->multi_pos_output, dev); } @@ -801,46 +800,11 @@ tu_shader_create(struct tu_device *dev, ir3_finalize_nir(dev->compiler, nir); - enum ir3_wavesize_option api_wavesize, real_wavesize; - - if (stage_info) { - if (stage_info->flags & - VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) { - api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE; - } else { - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *size_info = - vk_find_struct_const(stage_info->pNext, - PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT); - - if (size_info) { - if (size_info->requiredSubgroupSize == dev->compiler->threadsize_base) { - api_wavesize = IR3_SINGLE_ONLY; - } else { - assert(size_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2); - api_wavesize = IR3_DOUBLE_ONLY; - } - } else { - /* Match the exposed subgroupSize. */ - api_wavesize = IR3_DOUBLE_ONLY; - } - - if (stage_info->flags & - VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) - real_wavesize = api_wavesize; - else if (api_wavesize == IR3_SINGLE_ONLY) - real_wavesize = IR3_SINGLE_ONLY; - else - real_wavesize = IR3_SINGLE_OR_DOUBLE; - } - } else { - api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE; - } - shader->ir3_shader = ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) { .reserved_user_consts = align(shader->push_consts.count, 4), - .api_wavesize = api_wavesize, - .real_wavesize = real_wavesize, + .api_wavesize = key->api_wavesize, + .real_wavesize = key->real_wavesize, }, &so_info); return shader;