mesa/src/intel/vulkan/anv_pipeline_cache.c
Jason Ekstrand dcb6a68bb4 anv: Move shader hashing to anv_pipeline
Shader hashing is very closely related to shader compilation.  Putting
them right next to each other in anv_pipeline makes it easier to verify
that we're actually hashing everything we need to be hashing.  The only
real change (other than the order of hashing) is that we now hash in the
shader stage.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
2017-05-03 11:25:46 -07:00

534 lines
17 KiB
C

/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "util/hash_table.h"
#include "util/debug.h"
#include "anv_private.h"
static size_t
anv_shader_bin_size(uint32_t prog_data_size, uint32_t nr_params,
uint32_t key_size,
uint32_t surface_count, uint32_t sampler_count)
{
const uint32_t binding_data_size =
(surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);
return align_u32(sizeof(struct anv_shader_bin), 8) +
align_u32(prog_data_size, 8) +
align_u32(nr_params * sizeof(void *), 8) +
align_u32(sizeof(uint32_t) + key_size, 8) +
align_u32(binding_data_size, 8);
}
struct anv_shader_bin *
anv_shader_bin_create(struct anv_device *device,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size, const void *prog_data_param,
const struct anv_pipeline_bind_map *bind_map)
{
const size_t size =
anv_shader_bin_size(prog_data_size, prog_data->nr_params, key_size,
bind_map->surface_count, bind_map->sampler_count);
struct anv_shader_bin *shader =
vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!shader)
return NULL;
shader->ref_cnt = 1;
shader->kernel =
anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
memcpy(shader->kernel.map, kernel_data, kernel_size);
shader->kernel_size = kernel_size;
shader->bind_map = *bind_map;
shader->prog_data_size = prog_data_size;
/* Now we fill out the floating data at the end */
void *data = shader;
data += align_u32(sizeof(struct anv_shader_bin), 8);
shader->prog_data = data;
struct brw_stage_prog_data *new_prog_data = data;
memcpy(data, prog_data, prog_data_size);
data += align_u32(prog_data_size, 8);
assert(prog_data->nr_pull_params == 0);
assert(prog_data->nr_image_params == 0);
new_prog_data->param = data;
uint32_t param_size = prog_data->nr_params * sizeof(void *);
memcpy(data, prog_data_param, param_size);
data += align_u32(param_size, 8);
shader->key = data;
struct anv_shader_bin_key *key = data;
key->size = key_size;
memcpy(key->data, key_data, key_size);
data += align_u32(sizeof(*key) + key_size, 8);
shader->bind_map.surface_to_descriptor = data;
memcpy(data, bind_map->surface_to_descriptor,
bind_map->surface_count * sizeof(struct anv_pipeline_binding));
data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);
shader->bind_map.sampler_to_descriptor = data;
memcpy(data, bind_map->sampler_to_descriptor,
bind_map->sampler_count * sizeof(struct anv_pipeline_binding));
return shader;
}
void
anv_shader_bin_destroy(struct anv_device *device,
struct anv_shader_bin *shader)
{
assert(shader->ref_cnt == 0);
anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
vk_free(&device->alloc, shader);
}
static size_t
anv_shader_bin_data_size(const struct anv_shader_bin *shader)
{
return anv_shader_bin_size(shader->prog_data_size,
shader->prog_data->nr_params, shader->key->size,
shader->bind_map.surface_count,
shader->bind_map.sampler_count) +
align_u32(shader->kernel_size, 8);
}
static void
anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
{
size_t struct_size =
anv_shader_bin_size(shader->prog_data_size,
shader->prog_data->nr_params, shader->key->size,
shader->bind_map.surface_count,
shader->bind_map.sampler_count);
memcpy(data, shader, struct_size);
data += struct_size;
memcpy(data, shader->kernel.map, shader->kernel_size);
}
/* Remaining work:
*
* - Compact binding table layout so it's tight and not dependent on
* descriptor set layout.
*
* - Review prog_data struct for size and cacheability: struct
* brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
* bit quantities etc; param, pull_param, and image_params are pointers, we
* just need the compation map. use bit fields for all bools, eg
* dual_src_blend.
*/
static uint32_t
shader_bin_key_hash_func(const void *void_key)
{
const struct anv_shader_bin_key *key = void_key;
return _mesa_hash_data(key->data, key->size);
}
static bool
shader_bin_key_compare_func(const void *void_a, const void *void_b)
{
const struct anv_shader_bin_key *a = void_a, *b = void_b;
if (a->size != b->size)
return false;
return memcmp(a->data, b->data, a->size) == 0;
}
void
anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
struct anv_device *device,
bool cache_enabled)
{
cache->device = device;
pthread_mutex_init(&cache->mutex, NULL);
if (cache_enabled) {
cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
shader_bin_key_compare_func);
} else {
cache->cache = NULL;
}
}
void
anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
{
pthread_mutex_destroy(&cache->mutex);
if (cache->cache) {
/* This is a bit unfortunate. In order to keep things from randomly
* going away, the shader cache has to hold a reference to all shader
* binaries it contains. We unref them when we destroy the cache.
*/
struct hash_entry *entry;
hash_table_foreach(cache->cache, entry)
anv_shader_bin_unref(cache->device, entry->data);
_mesa_hash_table_destroy(cache->cache, NULL);
}
}
static struct anv_shader_bin *
anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
const void *key_data, uint32_t key_size)
{
uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
struct anv_shader_bin_key *key = (void *)vla;
key->size = key_size;
memcpy(key->data, key_data, key_size);
struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
if (entry)
return entry->data;
else
return NULL;
}
struct anv_shader_bin *
anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
const void *key_data, uint32_t key_size)
{
if (!cache->cache)
return NULL;
pthread_mutex_lock(&cache->mutex);
struct anv_shader_bin *shader =
anv_pipeline_cache_search_locked(cache, key_data, key_size);
pthread_mutex_unlock(&cache->mutex);
/* We increment refcount before handing it to the caller */
if (shader)
anv_shader_bin_ref(shader);
return shader;
}
static struct anv_shader_bin *
anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size,
const void *prog_data_param,
const struct anv_pipeline_bind_map *bind_map)
{
struct anv_shader_bin *shader =
anv_pipeline_cache_search_locked(cache, key_data, key_size);
if (shader)
return shader;
struct anv_shader_bin *bin =
anv_shader_bin_create(cache->device, key_data, key_size,
kernel_data, kernel_size,
prog_data, prog_data_size, prog_data_param,
bind_map);
if (!bin)
return NULL;
_mesa_hash_table_insert(cache->cache, bin->key, bin);
return bin;
}
struct anv_shader_bin *
anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size,
const struct anv_pipeline_bind_map *bind_map)
{
if (cache->cache) {
pthread_mutex_lock(&cache->mutex);
struct anv_shader_bin *bin =
anv_pipeline_cache_add_shader(cache, key_data, key_size,
kernel_data, kernel_size,
prog_data, prog_data_size,
prog_data->param, bind_map);
pthread_mutex_unlock(&cache->mutex);
/* We increment refcount before handing it to the caller */
if (bin)
anv_shader_bin_ref(bin);
return bin;
} else {
/* In this case, we're not caching it so the caller owns it entirely */
return anv_shader_bin_create(cache->device, key_data, key_size,
kernel_data, kernel_size,
prog_data, prog_data_size,
prog_data->param, bind_map);
}
}
struct cache_header {
uint32_t header_size;
uint32_t header_version;
uint32_t vendor_id;
uint32_t device_id;
uint8_t uuid[VK_UUID_SIZE];
};
static void
anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
const void *data, size_t size)
{
struct anv_device *device = cache->device;
struct anv_physical_device *pdevice = &device->instance->physicalDevice;
struct cache_header header;
if (cache->cache == NULL)
return;
if (size < sizeof(header))
return;
memcpy(&header, data, sizeof(header));
if (header.header_size < sizeof(header))
return;
if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
return;
if (header.vendor_id != 0x8086)
return;
if (header.device_id != device->chipset_id)
return;
if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
return;
const void *end = data + size;
const void *p = data + header.header_size;
/* Count is the total number of valid entries */
uint32_t count;
if (p + sizeof(count) >= end)
return;
memcpy(&count, p, sizeof(count));
p += align_u32(sizeof(count), 8);
for (uint32_t i = 0; i < count; i++) {
struct anv_shader_bin bin;
if (p + sizeof(bin) > end)
break;
memcpy(&bin, p, sizeof(bin));
p += align_u32(sizeof(struct anv_shader_bin), 8);
const struct brw_stage_prog_data *prog_data = p;
p += align_u32(bin.prog_data_size, 8);
if (p > end)
break;
uint32_t param_size = prog_data->nr_params * sizeof(void *);
const void *prog_data_param = p;
p += align_u32(param_size, 8);
struct anv_shader_bin_key key;
if (p + sizeof(key) > end)
break;
memcpy(&key, p, sizeof(key));
const void *key_data = p + sizeof(key);
p += align_u32(sizeof(key) + key.size, 8);
/* We're going to memcpy this so getting rid of const is fine */
struct anv_pipeline_binding *bindings = (void *)p;
p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
sizeof(struct anv_pipeline_binding), 8);
bin.bind_map.surface_to_descriptor = bindings;
bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;
const void *kernel_data = p;
p += align_u32(bin.kernel_size, 8);
if (p > end)
break;
anv_pipeline_cache_add_shader(cache, key_data, key.size,
kernel_data, bin.kernel_size,
prog_data, bin.prog_data_size,
prog_data_param, &bin.bind_map);
}
}
static bool
pipeline_cache_enabled()
{
static int enabled = -1;
if (enabled < 0)
enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
return enabled;
}
VkResult anv_CreatePipelineCache(
VkDevice _device,
const VkPipelineCacheCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkPipelineCache* pPipelineCache)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_pipeline_cache *cache;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
assert(pCreateInfo->flags == 0);
cache = vk_alloc2(&device->alloc, pAllocator,
sizeof(*cache), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (cache == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());
if (pCreateInfo->initialDataSize > 0)
anv_pipeline_cache_load(cache,
pCreateInfo->pInitialData,
pCreateInfo->initialDataSize);
*pPipelineCache = anv_pipeline_cache_to_handle(cache);
return VK_SUCCESS;
}
void anv_DestroyPipelineCache(
VkDevice _device,
VkPipelineCache _cache,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
if (!cache)
return;
anv_pipeline_cache_finish(cache);
vk_free2(&device->alloc, pAllocator, cache);
}
VkResult anv_GetPipelineCacheData(
VkDevice _device,
VkPipelineCache _cache,
size_t* pDataSize,
void* pData)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
struct anv_physical_device *pdevice = &device->instance->physicalDevice;
struct cache_header *header;
if (pData == NULL) {
size_t size = align_u32(sizeof(*header), 8) +
align_u32(sizeof(uint32_t), 8);
if (cache->cache) {
struct hash_entry *entry;
hash_table_foreach(cache->cache, entry)
size += anv_shader_bin_data_size(entry->data);
}
*pDataSize = size;
return VK_SUCCESS;
}
if (*pDataSize < sizeof(*header)) {
*pDataSize = 0;
return VK_INCOMPLETE;
}
void *p = pData, *end = pData + *pDataSize;
header = p;
header->header_size = sizeof(*header);
header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
header->vendor_id = 0x8086;
header->device_id = device->chipset_id;
memcpy(header->uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
p += align_u32(header->header_size, 8);
uint32_t *count = p;
p += align_u32(sizeof(*count), 8);
*count = 0;
VkResult result = VK_SUCCESS;
if (cache->cache) {
struct hash_entry *entry;
hash_table_foreach(cache->cache, entry) {
struct anv_shader_bin *shader = entry->data;
size_t data_size = anv_shader_bin_data_size(entry->data);
if (p + data_size > end) {
result = VK_INCOMPLETE;
break;
}
anv_shader_bin_write_data(shader, p);
p += data_size;
(*count)++;
}
}
*pDataSize = p - pData;
return result;
}
VkResult anv_MergePipelineCaches(
VkDevice _device,
VkPipelineCache destCache,
uint32_t srcCacheCount,
const VkPipelineCache* pSrcCaches)
{
ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
if (!dst->cache)
return VK_SUCCESS;
for (uint32_t i = 0; i < srcCacheCount; i++) {
ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
if (!src->cache)
continue;
struct hash_entry *entry;
hash_table_foreach(src->cache, entry) {
struct anv_shader_bin *bin = entry->data;
assert(bin);
if (_mesa_hash_table_search(dst->cache, bin->key))
continue;
anv_shader_bin_ref(bin);
_mesa_hash_table_insert(dst->cache, bin->key, bin);
}
}
return VK_SUCCESS;
}