mesa/src/gallium/drivers/iris/iris_program_cache.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

500 lines
16 KiB
C
Raw Normal View History

2018-01-20 02:47:04 -08:00
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
2018-01-20 02:47:04 -08:00
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
2018-01-20 02:47:04 -08:00
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
2018-01-20 02:47:04 -08:00
*/
/**
* @file iris_program_cache.c
*
* The in-memory program cache. This is basically a hash table mapping
* API-specified shaders and a state key to a compiled variant. It also
* takes care of uploading shader assembly into a BO for use on the GPU.
*/
2018-01-20 02:47:04 -08:00
#include <stdio.h>
#include <errno.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "util/u_atomic.h"
#include "util/u_upload_mgr.h"
2018-01-20 02:47:04 -08:00
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "intel/compiler/brw_compiler.h"
#include "intel/compiler/brw_nir.h"
#include "intel/compiler/elk/elk_compiler.h"
#include "intel/compiler/elk/elk_nir.h"
2018-01-20 02:47:04 -08:00
#include "iris_context.h"
2018-04-21 23:27:15 -07:00
#include "iris_resource.h"
2018-01-20 02:47:04 -08:00
struct keybox {
uint16_t size;
2018-01-20 02:47:04 -08:00
enum iris_program_cache_id cache_id;
uint8_t data[0];
};
2018-04-21 23:27:15 -07:00
static struct keybox *
make_keybox(void *mem_ctx,
enum iris_program_cache_id cache_id,
const void *key,
uint32_t key_size)
{
2018-01-20 02:47:04 -08:00
struct keybox *keybox =
2018-04-21 23:27:15 -07:00
ralloc_size(mem_ctx, sizeof(struct keybox) + key_size);
2018-01-20 02:47:04 -08:00
keybox->cache_id = cache_id;
2018-04-21 23:27:15 -07:00
keybox->size = key_size;
memcpy(keybox->data, key, key_size);
2018-01-20 02:47:04 -08:00
return keybox;
}
static uint32_t
keybox_hash(const void *void_key)
{
const struct keybox *key = void_key;
return _mesa_hash_data(&key->cache_id, key->size + sizeof(key->cache_id));
}
static bool
keybox_equals(const void *void_a, const void *void_b)
{
const struct keybox *a = void_a, *b = void_b;
if (a->size != b->size)
return false;
return memcmp(a->data, b->data, a->size) == 0;
}
struct iris_compiled_shader *
2018-04-21 23:27:15 -07:00
iris_find_cached_shader(struct iris_context *ice,
enum iris_program_cache_id cache_id,
uint32_t key_size,
const void *key)
2018-04-21 23:27:15 -07:00
{
struct keybox *keybox = make_keybox(NULL, cache_id, key, key_size);
2018-04-21 23:27:15 -07:00
struct hash_entry *entry =
_mesa_hash_table_search(ice->shaders.cache, keybox);
ralloc_free(keybox);
2018-04-21 23:27:15 -07:00
return entry ? entry->data : NULL;
}
void
iris: Refcount shader variants There is a small gap of time where the currently bound uncompiled shaders, and compiled shader variant, are out of sync. Specifically, between pipe->bind_*_state() and the next draw. Currently, shaders variants live entirely within a single context, and when deleting an iris_uncompiled_shader, we check if any of its variants are currently bound, and defer deleting those until the next iris_update_compiled_shaders() hook runs and binds new shaders to replace them. (This is due to the time gap between binding new uncompiled shaders, and updating variants at draw time when we have the required NOS in place.) This works pretty well in a single context world. But as we move to share compiled shader variants across multiple contexts, it breaks down. When deleting a shader, we can't look at all contexts to see if its variants are bound anywhere. We can't even quantify whether those contexts will run a future draw any time soon, to update and unbind. One fairly crazy solution would be to delete the variants anyway, and leave the stale pointers to dead variants in place. This requires removing any code that compares old and new variants. Today, we do that sometimes for seeing if the old/new shaders toggled some feature. Worse than that, though, we don't just have to avoid dereferences, we'd have to avoid pointer comparisons. If we free a variant, and quickly allocate a new variant, malloc may return the same pointer. If it's for the same shader stage, we may get a new different program that has the same pointer as a previously bound stale one, causing us to think nothing had changed when we really needed to do updates. Again, this is doable, but leaves the code fragile - we'd have to guard against future patches adding such checks back in. So, don't do that. Instead, do basic reference counting. When a variant is bound in a context, up the reference. When it's unbound, decrement it. When it hits zero, we know it's not bound anywhere and is safe to delete, with no stale references. This ends up being reasonably cheap anyway, since the atomic is usually uncontested. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7668>
2021-01-27 13:46:26 -08:00
iris_delete_shader_variant(struct iris_compiled_shader *shader)
{
iris: Refcount shader variants There is a small gap of time where the currently bound uncompiled shaders, and compiled shader variant, are out of sync. Specifically, between pipe->bind_*_state() and the next draw. Currently, shaders variants live entirely within a single context, and when deleting an iris_uncompiled_shader, we check if any of its variants are currently bound, and defer deleting those until the next iris_update_compiled_shaders() hook runs and binds new shaders to replace them. (This is due to the time gap between binding new uncompiled shaders, and updating variants at draw time when we have the required NOS in place.) This works pretty well in a single context world. But as we move to share compiled shader variants across multiple contexts, it breaks down. When deleting a shader, we can't look at all contexts to see if its variants are bound anywhere. We can't even quantify whether those contexts will run a future draw any time soon, to update and unbind. One fairly crazy solution would be to delete the variants anyway, and leave the stale pointers to dead variants in place. This requires removing any code that compares old and new variants. Today, we do that sometimes for seeing if the old/new shaders toggled some feature. Worse than that, though, we don't just have to avoid dereferences, we'd have to avoid pointer comparisons. If we free a variant, and quickly allocate a new variant, malloc may return the same pointer. If it's for the same shader stage, we may get a new different program that has the same pointer as a previously bound stale one, causing us to think nothing had changed when we really needed to do updates. Again, this is doable, but leaves the code fragile - we'd have to guard against future patches adding such checks back in. So, don't do that. Instead, do basic reference counting. When a variant is bound in a context, up the reference. When it's unbound, decrement it. When it hits zero, we know it's not bound anywhere and is safe to delete, with no stale references. This ends up being reasonably cheap anyway, since the atomic is usually uncontested. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7668>
2021-01-27 13:46:26 -08:00
pipe_resource_reference(&shader->assembly.res, NULL);
iris: Add the variant to the list as early as possible I tried to find a way to break this into some smaller commits, but everything is very intertwined. :( When searching the variants list in the iris_uncompiled_shader, add the new variant if it is not found. This will be necessary for threaded shader compilation. This conceptually simple change had a bunch of fallout. Much of this was at least conceptually borrowed from radeonsi. - Other threads might find a variant in the list before the variant has been compiled. To accomdate this, add a fence. Each thread will wait on the fence in the variant when searching the list. - A variant in the list may fail compilation. To accomodate this, add a flag. All paths will examine iris_compiled_shader::compilation_failed before trying to use the variant. - The race condition between multiple threads trying to create the same variant at the same time is handled *before* both thread spend the effort to compile the shader. The means that iris_upload_shader cannot change shaders on the caller, so it does not need to return anything. v2: Change "found" parameter of find_or_add_variant to "added." This inverts the values returned, and it probably makes uses of the returned value more easily understood. Always set the value in the called function. Suggested by Ken. v3: Move shader->compilation_failed check to avoid shader != NULL test. Rearrange some logic and add a comment in iris_update_compiled_tcs. Suggested by Ken. Don't call find_or_add_variant in iris_create_shader_state. See https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229#note_1000843 for more details. Noticed by Ken. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229>
2021-06-02 14:30:08 -07:00
util_queue_fence_destroy(&shader->ready);
iris: Refcount shader variants There is a small gap of time where the currently bound uncompiled shaders, and compiled shader variant, are out of sync. Specifically, between pipe->bind_*_state() and the next draw. Currently, shaders variants live entirely within a single context, and when deleting an iris_uncompiled_shader, we check if any of its variants are currently bound, and defer deleting those until the next iris_update_compiled_shaders() hook runs and binds new shaders to replace them. (This is due to the time gap between binding new uncompiled shaders, and updating variants at draw time when we have the required NOS in place.) This works pretty well in a single context world. But as we move to share compiled shader variants across multiple contexts, it breaks down. When deleting a shader, we can't look at all contexts to see if its variants are bound anywhere. We can't even quantify whether those contexts will run a future draw any time soon, to update and unbind. One fairly crazy solution would be to delete the variants anyway, and leave the stale pointers to dead variants in place. This requires removing any code that compares old and new variants. Today, we do that sometimes for seeing if the old/new shaders toggled some feature. Worse than that, though, we don't just have to avoid dereferences, we'd have to avoid pointer comparisons. If we free a variant, and quickly allocate a new variant, malloc may return the same pointer. If it's for the same shader stage, we may get a new different program that has the same pointer as a previously bound stale one, causing us to think nothing had changed when we really needed to do updates. Again, this is doable, but leaves the code fragile - we'd have to guard against future patches adding such checks back in. So, don't do that. Instead, do basic reference counting. When a variant is bound in a context, up the reference. When it's unbound, decrement it. When it hits zero, we know it's not bound anywhere and is safe to delete, with no stale references. This ends up being reasonably cheap anyway, since the atomic is usually uncontested. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7668>
2021-01-27 13:46:26 -08:00
ralloc_free(shader);
}
struct iris_compiled_shader *
iris_create_shader_variant(const struct iris_screen *screen,
void *mem_ctx,
gl_shader_stage stage,
enum iris_program_cache_id cache_id,
uint32_t key_size,
const void *key)
{
#ifndef NDEBUG
if (cache_id == IRIS_CACHE_BLORP) {
/* Blorp shader must have a mem_ctx. */
assert(mem_ctx != NULL);
} else if (cache_id == IRIS_CACHE_TCS) {
/* Pass-through tessellation control shaders (generated by the driver)
* will have a mem_ctx, and other tessellation control shaders will not.
*/
} else {
/* Shaders that are neither blorp nor tessellation control must not have
* a mem_ctx.
*/
assert(mem_ctx == NULL);
}
#endif
struct iris_compiled_shader *shader =
rzalloc_size(mem_ctx, sizeof(struct iris_compiled_shader) +
screen->vtbl.derived_program_state_size(cache_id));
pipe_reference_init(&shader->ref, 1);
iris: Add the variant to the list as early as possible I tried to find a way to break this into some smaller commits, but everything is very intertwined. :( When searching the variants list in the iris_uncompiled_shader, add the new variant if it is not found. This will be necessary for threaded shader compilation. This conceptually simple change had a bunch of fallout. Much of this was at least conceptually borrowed from radeonsi. - Other threads might find a variant in the list before the variant has been compiled. To accomdate this, add a fence. Each thread will wait on the fence in the variant when searching the list. - A variant in the list may fail compilation. To accomodate this, add a flag. All paths will examine iris_compiled_shader::compilation_failed before trying to use the variant. - The race condition between multiple threads trying to create the same variant at the same time is handled *before* both thread spend the effort to compile the shader. The means that iris_upload_shader cannot change shaders on the caller, so it does not need to return anything. v2: Change "found" parameter of find_or_add_variant to "added." This inverts the values returned, and it probably makes uses of the returned value more easily understood. Always set the value in the called function. Suggested by Ken. v3: Move shader->compilation_failed check to avoid shader != NULL test. Rearrange some logic and add a comment in iris_update_compiled_tcs. Suggested by Ken. Don't call find_or_add_variant in iris_create_shader_state. See https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229#note_1000843 for more details. Noticed by Ken. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229>
2021-06-02 14:30:08 -07:00
util_queue_fence_init(&shader->ready);
util_queue_fence_reset(&shader->ready);
if (cache_id != IRIS_CACHE_BLORP) {
assert(key_size <= sizeof(union iris_any_prog_key));
memcpy(&shader->key, key, key_size);
}
shader->stage = stage;
return shader;
}
iris: Add the variant to the list as early as possible I tried to find a way to break this into some smaller commits, but everything is very intertwined. :( When searching the variants list in the iris_uncompiled_shader, add the new variant if it is not found. This will be necessary for threaded shader compilation. This conceptually simple change had a bunch of fallout. Much of this was at least conceptually borrowed from radeonsi. - Other threads might find a variant in the list before the variant has been compiled. To accomdate this, add a fence. Each thread will wait on the fence in the variant when searching the list. - A variant in the list may fail compilation. To accomodate this, add a flag. All paths will examine iris_compiled_shader::compilation_failed before trying to use the variant. - The race condition between multiple threads trying to create the same variant at the same time is handled *before* both thread spend the effort to compile the shader. The means that iris_upload_shader cannot change shaders on the caller, so it does not need to return anything. v2: Change "found" parameter of find_or_add_variant to "added." This inverts the values returned, and it probably makes uses of the returned value more easily understood. Always set the value in the called function. Suggested by Ken. v3: Move shader->compilation_failed check to avoid shader != NULL test. Rearrange some logic and add a comment in iris_update_compiled_tcs. Suggested by Ken. Don't call find_or_add_variant in iris_create_shader_state. See https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229#note_1000843 for more details. Noticed by Ken. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229>
2021-06-02 14:30:08 -07:00
void
iris_upload_shader(struct iris_screen *screen,
iris: Store a list of shader variants in the shader itself We've traditionally stored shader variants in a per-context hash table, based on a key with many per-stage fields. On older hardware supported by i965, there were potentially quite a few variants, as many features had to be emulated in shaders, including things like texture swizzling. However, on the modern hardware targeted by iris, our NOS dependencies are much smaller. We almost always guess the correct state when doing the initial precompile, and so we have maybe 1-3 variants. iris NOS keys are also dramatically smaller (4 to 24 bytes) than i965's. Unlike the classic world, Gallium also provides a single kind of object for API shaders---pipe_shader_state aka iris_uncompiled_shader. We can simply store a list of shader variants there. This makes it possible to access shader variants across contexts, rather than compiling them separately for each context, which better matches how the APIs work. To look up variants, we simply walk the list and memcmp the keys. Since the list is almost always singular (and rarely ever long), and the keys are tiny, this should be quite low overhead. We continue storing internally generated shaders for BLORP and passthrough TCS in the per-context hash table, as they don't have an associated pipe_shader_state / iris_uncompiled_shader object. (There can also be many BLORP shaders, and the blit keys are large, so having a hash table rather than a list makes sense there.) Because iris_uncompiled_shaders are shared across multiple contexts, we do require locking when accessing this list. Fortunately, this is a per-shader lock, rather than a global one. Additionally, since we only append variants to the list, and generate the first one at precompile time (while only one context has the uncompiled shader), we can assume that it is safe to access that first entry without locking the list. This means that we only have to lock when we have multiple variants, which is relatively uncommon. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7668>
2020-11-16 13:17:08 -08:00
struct iris_uncompiled_shader *ish,
struct iris_compiled_shader *shader,
struct hash_table *driver_shaders,
struct u_upload_mgr *uploader,
2018-04-21 23:27:15 -07:00
enum iris_program_cache_id cache_id,
uint32_t key_size,
const void *key,
const void *assembly)
2018-01-20 02:47:04 -08:00
{
const struct intel_device_info *devinfo = screen->devinfo;
u_upload_alloc(uploader, 0, shader->program_size, 64,
&shader->assembly.offset, &shader->assembly.res,
&shader->map);
memcpy(shader->map, assembly, shader->program_size);
struct iris_resource *res = (void *) shader->assembly.res;
uint64_t shader_data_addr = res->bo->address +
shader->assembly.offset +
shader->const_data_offset;
if (screen->brw) {
struct brw_shader_reloc_value reloc_values[] = {
{
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
.value = shader_data_addr,
},
{
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
.value = shader_data_addr >> 32,
},
};
brw_write_shader_relocs(&screen->brw->isa, shader->map,
shader->brw_prog_data, reloc_values,
ARRAY_SIZE(reloc_values));
} else {
struct elk_shader_reloc_value reloc_values[] = {
{
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
.value = shader_data_addr,
},
{
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
.value = shader_data_addr >> 32,
},
};
elk_write_shader_relocs(&screen->elk->isa, shader->map,
shader->elk_prog_data, reloc_values,
ARRAY_SIZE(reloc_values));
}
2018-04-22 21:25:51 -07:00
2018-04-21 23:27:15 -07:00
/* Store the 3DSTATE shader packets and other derived state. */
screen->vtbl.store_derived_program_state(devinfo, cache_id, shader);
2018-04-21 23:27:15 -07:00
iris: Add the variant to the list as early as possible I tried to find a way to break this into some smaller commits, but everything is very intertwined. :( When searching the variants list in the iris_uncompiled_shader, add the new variant if it is not found. This will be necessary for threaded shader compilation. This conceptually simple change had a bunch of fallout. Much of this was at least conceptually borrowed from radeonsi. - Other threads might find a variant in the list before the variant has been compiled. To accomdate this, add a fence. Each thread will wait on the fence in the variant when searching the list. - A variant in the list may fail compilation. To accomodate this, add a flag. All paths will examine iris_compiled_shader::compilation_failed before trying to use the variant. - The race condition between multiple threads trying to create the same variant at the same time is handled *before* both thread spend the effort to compile the shader. The means that iris_upload_shader cannot change shaders on the caller, so it does not need to return anything. v2: Change "found" parameter of find_or_add_variant to "added." This inverts the values returned, and it probably makes uses of the returned value more easily understood. Always set the value in the called function. Suggested by Ken. v3: Move shader->compilation_failed check to avoid shader != NULL test. Rearrange some logic and add a comment in iris_update_compiled_tcs. Suggested by Ken. Don't call find_or_add_variant in iris_create_shader_state. See https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229#note_1000843 for more details. Noticed by Ken. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229>
2021-06-02 14:30:08 -07:00
util_queue_fence_signal(&shader->ready);
iris: Store a list of shader variants in the shader itself We've traditionally stored shader variants in a per-context hash table, based on a key with many per-stage fields. On older hardware supported by i965, there were potentially quite a few variants, as many features had to be emulated in shaders, including things like texture swizzling. However, on the modern hardware targeted by iris, our NOS dependencies are much smaller. We almost always guess the correct state when doing the initial precompile, and so we have maybe 1-3 variants. iris NOS keys are also dramatically smaller (4 to 24 bytes) than i965's. Unlike the classic world, Gallium also provides a single kind of object for API shaders---pipe_shader_state aka iris_uncompiled_shader. We can simply store a list of shader variants there. This makes it possible to access shader variants across contexts, rather than compiling them separately for each context, which better matches how the APIs work. To look up variants, we simply walk the list and memcmp the keys. Since the list is almost always singular (and rarely ever long), and the keys are tiny, this should be quite low overhead. We continue storing internally generated shaders for BLORP and passthrough TCS in the per-context hash table, as they don't have an associated pipe_shader_state / iris_uncompiled_shader object. (There can also be many BLORP shaders, and the blit keys are large, so having a hash table rather than a list makes sense there.) Because iris_uncompiled_shaders are shared across multiple contexts, we do require locking when accessing this list. Fortunately, this is a per-shader lock, rather than a global one. Additionally, since we only append variants to the list, and generate the first one at precompile time (while only one context has the uncompiled shader), we can assume that it is safe to access that first entry without locking the list. This means that we only have to lock when we have multiple variants, which is relatively uncommon. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7668>
2020-11-16 13:17:08 -08:00
iris: Add the variant to the list as early as possible I tried to find a way to break this into some smaller commits, but everything is very intertwined. :( When searching the variants list in the iris_uncompiled_shader, add the new variant if it is not found. This will be necessary for threaded shader compilation. This conceptually simple change had a bunch of fallout. Much of this was at least conceptually borrowed from radeonsi. - Other threads might find a variant in the list before the variant has been compiled. To accomdate this, add a fence. Each thread will wait on the fence in the variant when searching the list. - A variant in the list may fail compilation. To accomodate this, add a flag. All paths will examine iris_compiled_shader::compilation_failed before trying to use the variant. - The race condition between multiple threads trying to create the same variant at the same time is handled *before* both thread spend the effort to compile the shader. The means that iris_upload_shader cannot change shaders on the caller, so it does not need to return anything. v2: Change "found" parameter of find_or_add_variant to "added." This inverts the values returned, and it probably makes uses of the returned value more easily understood. Always set the value in the called function. Suggested by Ken. v3: Move shader->compilation_failed check to avoid shader != NULL test. Rearrange some logic and add a comment in iris_update_compiled_tcs. Suggested by Ken. Don't call find_or_add_variant in iris_create_shader_state. See https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229#note_1000843 for more details. Noticed by Ken. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229>
2021-06-02 14:30:08 -07:00
if (!ish) {
iris: Store a list of shader variants in the shader itself We've traditionally stored shader variants in a per-context hash table, based on a key with many per-stage fields. On older hardware supported by i965, there were potentially quite a few variants, as many features had to be emulated in shaders, including things like texture swizzling. However, on the modern hardware targeted by iris, our NOS dependencies are much smaller. We almost always guess the correct state when doing the initial precompile, and so we have maybe 1-3 variants. iris NOS keys are also dramatically smaller (4 to 24 bytes) than i965's. Unlike the classic world, Gallium also provides a single kind of object for API shaders---pipe_shader_state aka iris_uncompiled_shader. We can simply store a list of shader variants there. This makes it possible to access shader variants across contexts, rather than compiling them separately for each context, which better matches how the APIs work. To look up variants, we simply walk the list and memcmp the keys. Since the list is almost always singular (and rarely ever long), and the keys are tiny, this should be quite low overhead. We continue storing internally generated shaders for BLORP and passthrough TCS in the per-context hash table, as they don't have an associated pipe_shader_state / iris_uncompiled_shader object. (There can also be many BLORP shaders, and the blit keys are large, so having a hash table rather than a list makes sense there.) Because iris_uncompiled_shaders are shared across multiple contexts, we do require locking when accessing this list. Fortunately, this is a per-shader lock, rather than a global one. Additionally, since we only append variants to the list, and generate the first one at precompile time (while only one context has the uncompiled shader), we can assume that it is safe to access that first entry without locking the list. This means that we only have to lock when we have multiple variants, which is relatively uncommon. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7668>
2020-11-16 13:17:08 -08:00
struct keybox *keybox = make_keybox(shader, cache_id, key, key_size);
_mesa_hash_table_insert(driver_shaders, keybox, shader);
iris: Store a list of shader variants in the shader itself We've traditionally stored shader variants in a per-context hash table, based on a key with many per-stage fields. On older hardware supported by i965, there were potentially quite a few variants, as many features had to be emulated in shaders, including things like texture swizzling. However, on the modern hardware targeted by iris, our NOS dependencies are much smaller. We almost always guess the correct state when doing the initial precompile, and so we have maybe 1-3 variants. iris NOS keys are also dramatically smaller (4 to 24 bytes) than i965's. Unlike the classic world, Gallium also provides a single kind of object for API shaders---pipe_shader_state aka iris_uncompiled_shader. We can simply store a list of shader variants there. This makes it possible to access shader variants across contexts, rather than compiling them separately for each context, which better matches how the APIs work. To look up variants, we simply walk the list and memcmp the keys. Since the list is almost always singular (and rarely ever long), and the keys are tiny, this should be quite low overhead. We continue storing internally generated shaders for BLORP and passthrough TCS in the per-context hash table, as they don't have an associated pipe_shader_state / iris_uncompiled_shader object. (There can also be many BLORP shaders, and the blit keys are large, so having a hash table rather than a list makes sense there.) Because iris_uncompiled_shaders are shared across multiple contexts, we do require locking when accessing this list. Fortunately, this is a per-shader lock, rather than a global one. Additionally, since we only append variants to the list, and generate the first one at precompile time (while only one context has the uncompiled shader), we can assume that it is safe to access that first entry without locking the list. This means that we only have to lock when we have multiple variants, which is relatively uncommon. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7668>
2020-11-16 13:17:08 -08:00
}
2018-04-21 23:27:15 -07:00
}
bool
iris_blorp_lookup_shader(struct blorp_batch *blorp_batch,
const void *key, uint32_t key_size,
uint32_t *kernel_out, void *prog_data_out)
{
struct blorp_context *blorp = blorp_batch->blorp;
struct iris_context *ice = blorp->driver_ctx;
struct iris_batch *batch = blorp_batch->driver_batch;
struct iris_screen *screen = batch->screen;
2018-04-21 23:27:15 -07:00
struct iris_compiled_shader *shader =
iris_find_cached_shader(ice, IRIS_CACHE_BLORP, key_size, key);
2018-01-20 02:47:04 -08:00
2018-04-21 23:27:15 -07:00
if (!shader)
return false;
2018-06-28 00:57:49 -07:00
struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
*kernel_out =
iris_bo_offset_from_base_address(bo) + shader->assembly.offset;
*((void **) prog_data_out) = screen->brw ? (void *)shader->brw_prog_data
: (void *)shader->elk_prog_data;
2018-04-21 23:27:15 -07:00
iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
2018-04-21 23:27:15 -07:00
return true;
}
bool
iris_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage,
2018-04-21 23:27:15 -07:00
const void *key, uint32_t key_size,
const void *kernel, UNUSED uint32_t kernel_size,
const void *prog_data_templ,
2018-04-21 23:27:15 -07:00
UNUSED uint32_t prog_data_size,
uint32_t *kernel_out, void *prog_data_out)
{
struct blorp_context *blorp = blorp_batch->blorp;
struct iris_context *ice = blorp->driver_ctx;
struct iris_batch *batch = blorp_batch->driver_batch;
struct iris_screen *screen = batch->screen;
2018-04-21 23:27:15 -07:00
struct iris_binding_table bt;
memset(&bt, 0, sizeof(bt));
2018-04-21 23:27:15 -07:00
struct iris_compiled_shader *shader =
iris_create_shader_variant(screen, ice->shaders.cache, stage,
IRIS_CACHE_BLORP, key_size, key);
void *prog_data = ralloc_size(NULL, prog_data_size);
memcpy(prog_data, prog_data_templ, prog_data_size);
if (screen->brw) {
iris_apply_brw_prog_data(shader, prog_data);
} else {
assert(screen->elk);
iris_apply_elk_prog_data(shader, prog_data);
}
iris_finalize_program(shader, NULL, NULL, 0, 0, 0, &bt);
iris_upload_shader(screen, NULL, shader, ice->shaders.cache,
ice->shaders.uploader_driver,
IRIS_CACHE_BLORP, key_size, key, kernel);
2018-04-21 23:27:15 -07:00
2018-06-28 00:57:49 -07:00
struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
*kernel_out =
iris_bo_offset_from_base_address(bo) + shader->assembly.offset;
*((void **) prog_data_out) = screen->brw ? (void *)shader->brw_prog_data
: (void *)shader->elk_prog_data;
2018-04-21 23:27:15 -07:00
iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
2018-04-21 23:27:15 -07:00
return true;
2018-01-20 02:47:04 -08:00
}
void
iris_init_program_cache(struct iris_context *ice)
{
ice->shaders.cache =
_mesa_hash_table_create(ice, keybox_hash, keybox_equals);
2018-01-20 02:47:04 -08:00
ice->shaders.uploader_driver =
u_upload_create(&ice->ctx, 64 * 1024,
PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
IRIS_RESOURCE_FLAG_SHADER_MEMZONE |
IRIS_RESOURCE_FLAG_DEVICE_MEM);
ice->shaders.uploader_unsync =
u_upload_create(&ice->ctx, 64 * 1024,
PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
IRIS_RESOURCE_FLAG_SHADER_MEMZONE |
IRIS_RESOURCE_FLAG_DEVICE_MEM);
2018-01-20 02:47:04 -08:00
}
void
iris_destroy_program_cache(struct iris_context *ice)
{
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
iris: Refcount shader variants There is a small gap of time where the currently bound uncompiled shaders, and compiled shader variant, are out of sync. Specifically, between pipe->bind_*_state() and the next draw. Currently, shaders variants live entirely within a single context, and when deleting an iris_uncompiled_shader, we check if any of its variants are currently bound, and defer deleting those until the next iris_update_compiled_shaders() hook runs and binds new shaders to replace them. (This is due to the time gap between binding new uncompiled shaders, and updating variants at draw time when we have the required NOS in place.) This works pretty well in a single context world. But as we move to share compiled shader variants across multiple contexts, it breaks down. When deleting a shader, we can't look at all contexts to see if its variants are bound anywhere. We can't even quantify whether those contexts will run a future draw any time soon, to update and unbind. One fairly crazy solution would be to delete the variants anyway, and leave the stale pointers to dead variants in place. This requires removing any code that compares old and new variants. Today, we do that sometimes for seeing if the old/new shaders toggled some feature. Worse than that, though, we don't just have to avoid dereferences, we'd have to avoid pointer comparisons. If we free a variant, and quickly allocate a new variant, malloc may return the same pointer. If it's for the same shader stage, we may get a new different program that has the same pointer as a previously bound stale one, causing us to think nothing had changed when we really needed to do updates. Again, this is doable, but leaves the code fragile - we'd have to guard against future patches adding such checks back in. So, don't do that. Instead, do basic reference counting. When a variant is bound in a context, up the reference. When it's unbound, decrement it. When it hits zero, we know it's not bound anywhere and is safe to delete, with no stale references. This ends up being reasonably cheap anyway, since the atomic is usually uncontested. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7668>
2021-01-27 13:46:26 -08:00
iris_shader_variant_reference(&ice->shaders.prog[i], NULL);
2018-01-20 02:47:04 -08:00
}
iris_shader_variant_reference(&ice->shaders.last_vue_shader, NULL);
2018-06-16 09:56:59 -07:00
hash_table_foreach(ice->shaders.cache, entry) {
struct iris_compiled_shader *shader = entry->data;
iris: Refcount shader variants There is a small gap of time where the currently bound uncompiled shaders, and compiled shader variant, are out of sync. Specifically, between pipe->bind_*_state() and the next draw. Currently, shaders variants live entirely within a single context, and when deleting an iris_uncompiled_shader, we check if any of its variants are currently bound, and defer deleting those until the next iris_update_compiled_shaders() hook runs and binds new shaders to replace them. (This is due to the time gap between binding new uncompiled shaders, and updating variants at draw time when we have the required NOS in place.) This works pretty well in a single context world. But as we move to share compiled shader variants across multiple contexts, it breaks down. When deleting a shader, we can't look at all contexts to see if its variants are bound anywhere. We can't even quantify whether those contexts will run a future draw any time soon, to update and unbind. One fairly crazy solution would be to delete the variants anyway, and leave the stale pointers to dead variants in place. This requires removing any code that compares old and new variants. Today, we do that sometimes for seeing if the old/new shaders toggled some feature. Worse than that, though, we don't just have to avoid dereferences, we'd have to avoid pointer comparisons. If we free a variant, and quickly allocate a new variant, malloc may return the same pointer. If it's for the same shader stage, we may get a new different program that has the same pointer as a previously bound stale one, causing us to think nothing had changed when we really needed to do updates. Again, this is doable, but leaves the code fragile - we'd have to guard against future patches adding such checks back in. So, don't do that. Instead, do basic reference counting. When a variant is bound in a context, up the reference. When it's unbound, decrement it. When it hits zero, we know it's not bound anywhere and is safe to delete, with no stale references. This ends up being reasonably cheap anyway, since the atomic is usually uncontested. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7668>
2021-01-27 13:46:26 -08:00
iris_delete_shader_variant(shader);
2018-06-16 09:56:59 -07:00
}
u_upload_destroy(ice->shaders.uploader_driver);
u_upload_destroy(ice->shaders.uploader_unsync);
ralloc_free(ice->shaders.cache);
2018-01-20 02:47:04 -08:00
}
static void
link_libintel_shaders(nir_shader *nir, const nir_shader *libintel)
{
nir_link_shader_functions(nir, libintel);
NIR_PASS_V(nir, nir_inline_functions);
NIR_PASS_V(nir, nir_remove_non_entrypoints);
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
glsl_get_cl_type_size_align);
NIR_PASS_V(nir, nir_opt_deref);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
nir_var_mem_global,
nir_address_format_62bit_generic);
}
void
iris_ensure_indirect_generation_shader(struct iris_batch *batch)
{
struct iris_context *ice = batch->ice;
if (ice->draw.generation.shader)
return;
struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
const struct {
char name[40];
} key = {
.name = "iris-generation-shader",
};
ice->draw.generation.shader =
iris_find_cached_shader(ice, IRIS_CACHE_BLORP, sizeof(key), &key);
if (ice->draw.generation.shader != NULL)
return;
const nir_shader_compiler_options *nir_options =
screen->brw ? screen->brw->nir_options[MESA_SHADER_COMPUTE]
: screen->elk->nir_options[MESA_SHADER_COMPUTE];
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
nir_options,
"iris-indirect-generate");
uint32_t uniform_size =
screen->vtbl.call_generation_shader(screen, &b);
nir_shader *nir = b.shader;
void *mem_ctx = ralloc_context(NULL);
link_libintel_shaders(nir, screen->vtbl.load_shader_lib(screen, mem_ctx));
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_opt_cse);
NIR_PASS_V(nir, nir_opt_gcm, true);
NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_split_per_member_structs);
if (screen->brw) {
struct brw_nir_compiler_opts opts = {};
brw_preprocess_nir(screen->brw, nir, &opts);
} else {
assert(screen->elk);
struct elk_nir_compiler_opts opts = {};
elk_preprocess_nir(screen->elk, nir, &opts);
}
NIR_PASS_V(nir, nir_propagate_invariant, false);
NIR_PASS_V(nir, nir_lower_input_attachments,
&(nir_input_attachment_options) {
.use_fragcoord_sysval = true,
.use_layer_id_sysval = true,
});
/* Reset sizes before gathering information */
nir->global_mem_size = 0;
nir->scratch_size = 0;
nir->info.shared_size = 0;
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
NIR_PASS_V(nir, nir_copy_prop);
NIR_PASS_V(nir, nir_opt_constant_folding);
NIR_PASS_V(nir, nir_opt_dce);
/* Do vectorizing here. For some reason when trying to do it in the back
* this just isn't working.
*/
nir_load_store_vectorize_options options = {
.modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global,
.callback = brw_nir_should_vectorize_mem,
.robust_modes = (nir_variable_mode)0,
};
NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
nir->num_uniforms = uniform_size;
struct iris_compiled_shader *shader =
iris_create_shader_variant(screen, ice->shaders.cache,
MESA_SHADER_FRAGMENT,
IRIS_CACHE_BLORP,
sizeof(key), &key);
const unsigned *program;
if (screen->brw) {
union brw_any_prog_key prog_key;
memset(&prog_key, 0, sizeof(prog_key));
struct brw_wm_prog_data *prog_data = ralloc_size(NULL, sizeof(*prog_data));
memset(prog_data, 0, sizeof(*prog_data));
prog_data->base.nr_params = nir->num_uniforms / 4;
brw_nir_analyze_ubo_ranges(screen->brw, nir, prog_data->base.ubo_ranges);
struct brw_compile_stats stats[3];
struct brw_compile_fs_params params = {
.base = {
.nir = nir,
.log_data = &ice->dbg,
.debug_flag = DEBUG_WM,
.stats = stats,
.mem_ctx = mem_ctx,
},
.key = &prog_key.wm,
.prog_data = prog_data,
};
program = brw_compile_fs(screen->brw, &params);
assert(program);
iris_apply_brw_prog_data(shader, &prog_data->base);
} else {
union elk_any_prog_key prog_key;
memset(&prog_key, 0, sizeof(prog_key));
struct elk_wm_prog_data *prog_data = ralloc_size(NULL, sizeof(*prog_data));
memset(prog_data, 0, sizeof(*prog_data));
prog_data->base.nr_params = nir->num_uniforms / 4;
elk_nir_analyze_ubo_ranges(screen->elk, nir, prog_data->base.ubo_ranges);
struct elk_compile_stats stats[3];
struct elk_compile_fs_params params = {
.base = {
.nir = nir,
.log_data = &ice->dbg,
.debug_flag = DEBUG_WM,
.stats = stats,
.mem_ctx = mem_ctx,
},
.key = &prog_key.wm,
.prog_data = prog_data,
};
program = elk_compile_fs(screen->elk, &params);
assert(program);
iris_apply_elk_prog_data(shader, &prog_data->base);
}
struct iris_binding_table bt;
memset(&bt, 0, sizeof(bt));
iris_finalize_program(shader, NULL, NULL, 0, 0, 0, &bt);
iris_upload_shader(screen, NULL, shader, ice->shaders.cache,
ice->shaders.uploader_driver,
IRIS_CACHE_BLORP, sizeof(key), &key, program);
ralloc_free(mem_ctx);
struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
ice->draw.generation.shader = shader;
}