mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-18 00:48:07 +02:00
We don't want to let the default pipeline cache grow without limit. We choose a maximum number of entries that should work for all real world applications. CTS will exceed that limit, but that is okay, as it will prevent us from running out of memory. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
711 lines
22 KiB
C
711 lines
22 KiB
C
/*
|
|
* Copyright © 2019 Raspberry Pi
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "v3dv_private.h"
|
|
#include "vulkan/util/vk_util.h"
|
|
#include "util/blob.h"
|
|
#include "nir/nir_serialize.h"
|
|
|
|
static const bool dump_stats = false;
|
|
static const bool dump_stats_verbose = false;
|
|
|
|
/* Shared for nir/variants */
|
|
#define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096
|
|
|
|
static uint32_t
|
|
sha1_hash_func(const void *sha1)
|
|
{
|
|
return _mesa_hash_data(sha1, 20);
|
|
}
|
|
|
|
static bool
|
|
sha1_compare_func(const void *sha1_a, const void *sha1_b)
|
|
{
|
|
return memcmp(sha1_a, sha1_b, 20) == 0;
|
|
}
|
|
|
|
struct serialized_nir {
|
|
unsigned char sha1_key[20];
|
|
size_t size;
|
|
char data[0];
|
|
};
|
|
|
|
static void
|
|
cache_dump_stats(struct v3dv_pipeline_cache *cache)
|
|
{
|
|
if (!dump_stats_verbose)
|
|
return;
|
|
|
|
fprintf(stderr, " NIR cache entries: %d\n", cache->nir_stats.count);
|
|
fprintf(stderr, " NIR cache miss count: %d\n", cache->nir_stats.miss);
|
|
fprintf(stderr, " NIR cache hit count: %d\n", cache->nir_stats.hit);
|
|
|
|
fprintf(stderr, " variant cache entries: %d\n", cache->variant_stats.count);
|
|
fprintf(stderr, " variant cache miss count: %d\n", cache->variant_stats.miss);
|
|
fprintf(stderr, " variant cache hit count: %d\n", cache->variant_stats.hit);
|
|
}
|
|
|
|
void
|
|
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
|
|
struct v3dv_pipeline_cache *cache,
|
|
nir_shader *nir,
|
|
unsigned char sha1_key[20])
|
|
{
|
|
if (!cache || !cache->nir_cache)
|
|
return;
|
|
|
|
if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
|
|
return;
|
|
|
|
pthread_mutex_lock(&cache->mutex);
|
|
struct hash_entry *entry =
|
|
_mesa_hash_table_search(cache->nir_cache, sha1_key);
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
if (entry)
|
|
return;
|
|
|
|
struct blob blob;
|
|
blob_init(&blob);
|
|
|
|
nir_serialize(&blob, nir, false);
|
|
if (blob.out_of_memory) {
|
|
blob_finish(&blob);
|
|
return;
|
|
}
|
|
|
|
pthread_mutex_lock(&cache->mutex);
|
|
/* Because ralloc isn't thread-safe, we have to do all this inside the
|
|
* lock. We could unlock for the big memcpy but it's probably not worth
|
|
* the hassle.
|
|
*/
|
|
entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
|
|
if (entry) {
|
|
blob_finish(&blob);
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
return;
|
|
}
|
|
|
|
struct serialized_nir *snir =
|
|
ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
|
|
memcpy(snir->sha1_key, sha1_key, 20);
|
|
snir->size = blob.size;
|
|
memcpy(snir->data, blob.data, blob.size);
|
|
|
|
blob_finish(&blob);
|
|
|
|
cache->nir_stats.count++;
|
|
if (unlikely(dump_stats)) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, snir->sha1_key);
|
|
fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
|
|
cache_dump_stats(cache);
|
|
}
|
|
|
|
_mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
|
|
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
}
|
|
|
|
nir_shader*
|
|
v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
|
|
struct v3dv_pipeline_cache *cache,
|
|
const nir_shader_compiler_options *nir_options,
|
|
unsigned char sha1_key[20])
|
|
{
|
|
if (!cache || !cache->nir_cache)
|
|
return NULL;
|
|
|
|
if (unlikely(dump_stats)) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, sha1_key);
|
|
|
|
fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
|
|
}
|
|
|
|
const struct serialized_nir *snir = NULL;
|
|
|
|
pthread_mutex_lock(&cache->mutex);
|
|
struct hash_entry *entry =
|
|
_mesa_hash_table_search(cache->nir_cache, sha1_key);
|
|
if (entry)
|
|
snir = entry->data;
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
|
|
if (snir) {
|
|
struct blob_reader blob;
|
|
blob_reader_init(&blob, snir->data, snir->size);
|
|
|
|
/* We use context NULL as we want the p_stage to keep the reference to
|
|
* nir, as we keep open the possibility of provide a shader variant
|
|
* after cache creation
|
|
*/
|
|
nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
|
|
if (blob.overrun) {
|
|
ralloc_free(nir);
|
|
} else {
|
|
cache->nir_stats.hit++;
|
|
cache_dump_stats(cache);
|
|
return nir;
|
|
}
|
|
}
|
|
|
|
cache->nir_stats.miss++;
|
|
cache_dump_stats(cache);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
void
|
|
v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
|
|
struct v3dv_device *device,
|
|
bool cache_enabled)
|
|
{
|
|
cache->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
|
|
|
|
cache->device = device;
|
|
pthread_mutex_init(&cache->mutex, NULL);
|
|
|
|
if (cache_enabled) {
|
|
cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
|
|
sha1_compare_func);
|
|
cache->nir_stats.miss = 0;
|
|
cache->nir_stats.hit = 0;
|
|
cache->nir_stats.count = 0;
|
|
|
|
cache->variant_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
|
|
sha1_compare_func);
|
|
cache->variant_stats.miss = 0;
|
|
cache->variant_stats.hit = 0;
|
|
cache->variant_stats.count = 0;
|
|
} else {
|
|
cache->nir_cache = NULL;
|
|
cache->variant_cache = NULL;
|
|
}
|
|
|
|
}
|
|
|
|
struct v3dv_shader_variant*
|
|
v3dv_pipeline_cache_search_for_variant(struct v3dv_pipeline *pipeline,
|
|
struct v3dv_pipeline_cache *cache,
|
|
unsigned char sha1_key[20])
|
|
{
|
|
if (!cache || !cache->variant_cache)
|
|
return NULL;
|
|
|
|
if (unlikely(dump_stats)) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, sha1_key);
|
|
|
|
fprintf(stderr, "pipeline cache %p, search variant with key %s\n", cache, sha1buf);
|
|
}
|
|
|
|
pthread_mutex_lock(&cache->mutex);
|
|
|
|
struct hash_entry *entry =
|
|
_mesa_hash_table_search(cache->variant_cache, sha1_key);
|
|
|
|
if (entry) {
|
|
struct v3dv_shader_variant *variant =
|
|
(struct v3dv_shader_variant *) entry->data;
|
|
|
|
cache->variant_stats.hit++;
|
|
if (unlikely(dump_stats)) {
|
|
fprintf(stderr, "\tcache hit: %p\n", variant);
|
|
cache_dump_stats(cache);
|
|
}
|
|
|
|
if (variant)
|
|
v3dv_shader_variant_ref(variant);
|
|
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
return variant;
|
|
}
|
|
|
|
cache->variant_stats.miss++;
|
|
if (unlikely(dump_stats)) {
|
|
fprintf(stderr, "\tcache miss\n");
|
|
cache_dump_stats(cache);
|
|
}
|
|
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
return NULL;
|
|
}
|
|
|
|
void
|
|
v3dv_pipeline_cache_upload_variant(struct v3dv_pipeline *pipeline,
|
|
struct v3dv_pipeline_cache *cache,
|
|
struct v3dv_shader_variant *variant)
|
|
{
|
|
if (!cache || !cache->variant_cache)
|
|
return;
|
|
|
|
if (cache->variant_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
|
|
return;
|
|
|
|
pthread_mutex_lock(&cache->mutex);
|
|
struct hash_entry *entry =
|
|
_mesa_hash_table_search(cache->variant_cache, variant->variant_sha1);
|
|
|
|
if (entry) {
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
return;
|
|
}
|
|
|
|
v3dv_shader_variant_ref(variant);
|
|
_mesa_hash_table_insert(cache->variant_cache, variant->variant_sha1, variant);
|
|
cache->variant_stats.count++;
|
|
if (unlikely(dump_stats)) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, variant->variant_sha1);
|
|
|
|
fprintf(stderr, "pipeline cache %p, new variant entry with key %s\n\t%p\n",
|
|
cache, sha1buf, variant);
|
|
cache_dump_stats(cache);
|
|
}
|
|
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
}
|
|
|
|
static struct serialized_nir*
|
|
serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,
|
|
struct blob_reader *blob)
|
|
{
|
|
const unsigned char *sha1_key = blob_read_bytes(blob, 20);
|
|
uint32_t snir_size = blob_read_uint32(blob);
|
|
const char* snir_data = blob_read_bytes(blob, snir_size);
|
|
if (blob->overrun)
|
|
return NULL;
|
|
|
|
struct serialized_nir *snir =
|
|
ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);
|
|
memcpy(snir->sha1_key, sha1_key, 20);
|
|
snir->size = snir_size;
|
|
memcpy(snir->data, snir_data, snir_size);
|
|
|
|
return snir;
|
|
}
|
|
|
|
static struct v3dv_shader_variant*
|
|
shader_variant_create_from_blob(struct v3dv_device *device,
|
|
struct blob_reader *blob)
|
|
{
|
|
VkResult result;
|
|
|
|
gl_shader_stage stage = blob_read_uint32(blob);
|
|
bool is_coord = blob_read_uint8(blob);
|
|
|
|
uint32_t v3d_key_size = blob_read_uint32(blob);
|
|
const struct v3d_key *v3d_key = blob_read_bytes(blob, v3d_key_size);
|
|
|
|
const unsigned char *variant_sha1 = blob_read_bytes(blob, 20);
|
|
|
|
uint32_t prog_data_size = blob_read_uint32(blob);
|
|
/* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
|
|
assert(prog_data_size == v3d_prog_data_size(stage));
|
|
|
|
const void *prog_data = blob_read_bytes(blob, prog_data_size);
|
|
if (blob->overrun)
|
|
return NULL;
|
|
|
|
uint32_t ulist_count = blob_read_uint32(blob);
|
|
uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
|
|
const void *contents_data = blob_read_bytes(blob, contents_size);
|
|
if (blob->overrun)
|
|
return NULL;
|
|
|
|
uint ulist_data_size = sizeof(uint32_t) * ulist_count;
|
|
const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
|
|
if (blob->overrun)
|
|
return NULL;
|
|
|
|
uint32_t qpu_insts_size = blob_read_uint32(blob);
|
|
const uint64_t *qpu_insts = blob_read_bytes(blob, qpu_insts_size);
|
|
if (blob->overrun)
|
|
return NULL;
|
|
|
|
/* shader_variant_create expects a newly created prog_data for their own,
|
|
* as it is what the v3d compiler returns. So we are also allocating one
|
|
* (including the uniform list) and filled it up with the data that we read
|
|
* from the blob
|
|
*/
|
|
struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
|
|
memcpy(new_prog_data, prog_data, prog_data_size);
|
|
struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
|
|
ulist->count = ulist_count;
|
|
ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
|
|
memcpy(ulist->contents, contents_data, contents_size);
|
|
ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
|
|
memcpy(ulist->data, ulist_data_data, ulist_data_size);
|
|
|
|
return v3dv_shader_variant_create(device, stage, is_coord,
|
|
variant_sha1,
|
|
v3d_key, v3d_key_size,
|
|
new_prog_data, prog_data_size,
|
|
qpu_insts, qpu_insts_size,
|
|
&result);
|
|
}
|
|
|
|
static void
|
|
pipeline_cache_load(struct v3dv_pipeline_cache *cache,
|
|
size_t size,
|
|
const void *data)
|
|
{
|
|
struct v3dv_device *device = cache->device;
|
|
struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
|
|
struct vk_pipeline_cache_header header;
|
|
|
|
if (cache->variant_cache == NULL)
|
|
return;
|
|
|
|
struct blob_reader blob;
|
|
blob_reader_init(&blob, data, size);
|
|
|
|
blob_copy_bytes(&blob, &header, sizeof(header));
|
|
if (size < sizeof(header))
|
|
return;
|
|
memcpy(&header, data, sizeof(header));
|
|
if (header.header_size < sizeof(header))
|
|
return;
|
|
if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
|
|
return;
|
|
if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))
|
|
return;
|
|
if (header.device_id != v3dv_physical_device_device_id(pdevice))
|
|
return;
|
|
if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
|
|
return;
|
|
|
|
uint32_t nir_count = blob_read_uint32(&blob);
|
|
if (blob.overrun)
|
|
return;
|
|
|
|
for (uint32_t i = 0; i < nir_count; i++) {
|
|
struct serialized_nir *snir =
|
|
serialized_nir_create_from_blob(cache, &blob);
|
|
|
|
if (!snir)
|
|
break;
|
|
|
|
_mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
|
|
cache->nir_stats.count++;
|
|
}
|
|
|
|
uint32_t count = blob_read_uint32(&blob);
|
|
if (blob.overrun)
|
|
return;
|
|
|
|
for (uint32_t i = 0; i < count; i++) {
|
|
struct v3dv_shader_variant *variant =
|
|
shader_variant_create_from_blob(device, &blob);
|
|
if (!variant)
|
|
break;
|
|
_mesa_hash_table_insert(cache->variant_cache, variant->variant_sha1, variant);
|
|
cache->variant_stats.count++;
|
|
}
|
|
|
|
if (unlikely(dump_stats)) {
|
|
fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and "
|
|
"%i variant entries\n", cache, nir_count, count);
|
|
cache_dump_stats(cache);
|
|
}
|
|
}
|
|
|
|
VkResult
|
|
v3dv_CreatePipelineCache(VkDevice _device,
|
|
const VkPipelineCacheCreateInfo *pCreateInfo,
|
|
const VkAllocationCallbacks *pAllocator,
|
|
VkPipelineCache *pPipelineCache)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
|
struct v3dv_pipeline_cache *cache;
|
|
|
|
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
|
|
assert(pCreateInfo->flags == 0);
|
|
|
|
cache = vk_alloc2(&device->alloc, pAllocator,
|
|
sizeof(*cache), 8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
|
|
if (cache == NULL)
|
|
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
v3dv_pipeline_cache_init(cache, device,
|
|
device->instance->pipeline_cache_enabled);
|
|
|
|
if (pCreateInfo->initialDataSize > 0) {
|
|
pipeline_cache_load(cache,
|
|
pCreateInfo->initialDataSize,
|
|
pCreateInfo->pInitialData);
|
|
}
|
|
|
|
*pPipelineCache = v3dv_pipeline_cache_to_handle(cache);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void
|
|
v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
|
|
{
|
|
pthread_mutex_destroy(&cache->mutex);
|
|
|
|
if (cache->nir_cache) {
|
|
hash_table_foreach(cache->nir_cache, entry)
|
|
ralloc_free(entry->data);
|
|
|
|
_mesa_hash_table_destroy(cache->nir_cache, NULL);
|
|
}
|
|
|
|
if (cache->variant_cache) {
|
|
hash_table_foreach(cache->variant_cache, entry) {
|
|
struct v3dv_shader_variant *variant = entry->data;
|
|
if (variant)
|
|
v3dv_shader_variant_unref(cache->device, variant);
|
|
}
|
|
|
|
_mesa_hash_table_destroy(cache->variant_cache, NULL);
|
|
|
|
}
|
|
}
|
|
|
|
void
|
|
v3dv_DestroyPipelineCache(VkDevice _device,
|
|
VkPipelineCache _cache,
|
|
const VkAllocationCallbacks *pAllocator)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
|
V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
|
|
|
|
if (!cache)
|
|
return;
|
|
|
|
v3dv_pipeline_cache_finish(cache);
|
|
|
|
vk_free2(&device->alloc, pAllocator, cache);
|
|
}
|
|
|
|
VkResult
|
|
v3dv_MergePipelineCaches(VkDevice device,
|
|
VkPipelineCache dstCache,
|
|
uint32_t srcCacheCount,
|
|
const VkPipelineCache *pSrcCaches)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);
|
|
|
|
if (!dst->variant_cache || !dst->nir_cache)
|
|
return VK_SUCCESS;
|
|
|
|
for (uint32_t i = 0; i < srcCacheCount; i++) {
|
|
V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);
|
|
if (!src->variant_cache || !src->nir_cache)
|
|
continue;
|
|
|
|
hash_table_foreach(src->nir_cache, entry) {
|
|
struct serialized_nir *src_snir = entry->data;
|
|
assert(src_snir);
|
|
|
|
if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))
|
|
continue;
|
|
|
|
/* FIXME: we are using serialized nir shaders because they are
|
|
* convenient to create and store on the cache, but requires to do a
|
|
* copy here (and some other places) of the serialized NIR. Perhaps
|
|
* it would make sense to move to handle the NIR shaders with shared
|
|
* structures with ref counts, as the variants.
|
|
*/
|
|
struct serialized_nir *snir_dst =
|
|
ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);
|
|
memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);
|
|
snir_dst->size = src_snir->size;
|
|
memcpy(snir_dst->data, src_snir->data, src_snir->size);
|
|
|
|
_mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);
|
|
dst->nir_stats.count++;
|
|
if (unlikely(dump_stats)) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, snir_dst->sha1_key);
|
|
|
|
fprintf(stderr, "pipeline cache %p, added nir entry %s "
|
|
"from pipeline cache %p\n",
|
|
dst, sha1buf, src);
|
|
cache_dump_stats(dst);
|
|
}
|
|
}
|
|
|
|
hash_table_foreach(src->variant_cache, entry) {
|
|
struct v3dv_shader_variant *variant = entry->data;
|
|
assert(variant);
|
|
|
|
if (_mesa_hash_table_search(dst->variant_cache, variant->variant_sha1))
|
|
continue;
|
|
|
|
v3dv_shader_variant_ref(variant);
|
|
_mesa_hash_table_insert(dst->variant_cache, variant->variant_sha1, variant);
|
|
|
|
dst->variant_stats.count++;
|
|
if (unlikely(dump_stats)) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, variant->variant_sha1);
|
|
|
|
fprintf(stderr, "pipeline cache %p, added variant entry %s "
|
|
"from pipeline cache %p\n",
|
|
dst, sha1buf, src);
|
|
cache_dump_stats(dst);
|
|
}
|
|
}
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static bool
|
|
shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
|
|
struct blob *blob)
|
|
{
|
|
blob_write_uint32(blob, variant->stage);
|
|
blob_write_uint8(blob, variant->is_coord);
|
|
|
|
blob_write_uint32(blob, variant->v3d_key_size);
|
|
blob_write_bytes(blob, &variant->key, variant->v3d_key_size);
|
|
|
|
blob_write_bytes(blob, variant->variant_sha1, sizeof(variant->variant_sha1));
|
|
|
|
blob_write_uint32(blob, variant->prog_data_size);
|
|
blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
|
|
|
|
struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
|
|
blob_write_uint32(blob, ulist->count);
|
|
blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
|
|
blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
|
|
|
|
blob_write_uint32(blob, variant->qpu_insts_size);
|
|
assert(variant->assembly_bo->map);
|
|
blob_write_bytes(blob, variant->assembly_bo->map, variant->qpu_insts_size);
|
|
|
|
return !blob->out_of_memory;
|
|
}
|
|
|
|
VkResult
|
|
v3dv_GetPipelineCacheData(VkDevice _device,
|
|
VkPipelineCache _cache,
|
|
size_t *pDataSize,
|
|
void *pData)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
|
V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
|
|
|
|
struct blob blob;
|
|
if (pData) {
|
|
blob_init_fixed(&blob, pData, *pDataSize);
|
|
} else {
|
|
blob_init_fixed(&blob, NULL, SIZE_MAX);
|
|
}
|
|
|
|
struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
|
|
VkResult result = VK_SUCCESS;
|
|
|
|
pthread_mutex_lock(&cache->mutex);
|
|
|
|
struct vk_pipeline_cache_header header = {
|
|
.header_size = sizeof(struct vk_pipeline_cache_header),
|
|
.header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
|
|
.vendor_id = v3dv_physical_device_vendor_id(pdevice),
|
|
.device_id = v3dv_physical_device_device_id(pdevice),
|
|
};
|
|
memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
|
|
blob_write_bytes(&blob, &header, sizeof(header));
|
|
|
|
uint32_t nir_count = 0;
|
|
intptr_t nir_count_offset = blob_reserve_uint32(&blob);
|
|
if (nir_count_offset < 0) {
|
|
*pDataSize = 0;
|
|
blob_finish(&blob);
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
return VK_INCOMPLETE;
|
|
}
|
|
|
|
if (cache->nir_cache) {
|
|
hash_table_foreach(cache->nir_cache, entry) {
|
|
const struct serialized_nir *snir = entry->data;
|
|
|
|
size_t save_size = blob.size;
|
|
|
|
blob_write_bytes(&blob, snir->sha1_key, 20);
|
|
blob_write_uint32(&blob, snir->size);
|
|
blob_write_bytes(&blob, snir->data, snir->size);
|
|
|
|
if (blob.out_of_memory) {
|
|
blob.size = save_size;
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
result = VK_INCOMPLETE;
|
|
break;
|
|
}
|
|
|
|
nir_count++;
|
|
}
|
|
}
|
|
blob_overwrite_uint32(&blob, nir_count_offset, nir_count);
|
|
|
|
uint32_t count = 0;
|
|
intptr_t count_offset = blob_reserve_uint32(&blob);
|
|
if (count_offset < 0) {
|
|
*pDataSize = 0;
|
|
blob_finish(&blob);
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
return VK_INCOMPLETE;
|
|
}
|
|
|
|
if (cache->variant_cache) {
|
|
hash_table_foreach(cache->variant_cache, entry) {
|
|
struct v3dv_shader_variant *variant = entry->data;
|
|
|
|
size_t save_size = blob.size;
|
|
if (!shader_variant_write_to_blob(variant, &blob)) {
|
|
/* If it fails reset to the previous size and bail */
|
|
blob.size = save_size;
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
result = VK_INCOMPLETE;
|
|
break;
|
|
}
|
|
|
|
count++;
|
|
}
|
|
}
|
|
|
|
blob_overwrite_uint32(&blob, count_offset, count);
|
|
|
|
*pDataSize = blob.size;
|
|
|
|
blob_finish(&blob);
|
|
|
|
if (unlikely(dump_stats)) {
|
|
assert(count <= cache->variant_stats.count);
|
|
fprintf(stderr, "GetPipelineCacheData: serializing cache %p, "
|
|
"%i nir shader entries "
|
|
"%i variant entries, %u DataSize\n",
|
|
cache, nir_count, count, (uint32_t) *pDataSize);
|
|
}
|
|
|
|
pthread_mutex_unlock(&cache->mutex);
|
|
|
|
return result;
|
|
}
|