mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
radeonsi: move shader cache code to new file
Reviewed-by: David Rosca <david.rosca@amd.com> Reviewed-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41133>
This commit is contained in:
parent
68a383531d
commit
a325be9548
7 changed files with 433 additions and 388 deletions
|
|
@ -7,4 +7,35 @@
|
|||
#ifndef SI_GFX_H
|
||||
#define SI_GFX_H
|
||||
|
||||
#include "util/mesa-blake3.h"
|
||||
#include "util/u_stub_gfx_compute.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct si_screen;
|
||||
struct si_shader;
|
||||
struct si_shader_selector;
|
||||
|
||||
/* si_shader_cache.c */
|
||||
MESAPROC void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
|
||||
unsigned wave_size, unsigned char ir_blake3_cache_key[BLAKE3_KEY_LEN]) TAILV;
|
||||
|
||||
MESAPROC bool si_init_shader_cache(struct si_screen *sscreen) TAILB;
|
||||
|
||||
MESAPROC void si_init_screen_live_shader_cache(struct si_screen *sscreen) TAILV;
|
||||
|
||||
MESAPROC void si_destroy_shader_cache(struct si_screen *sscreen) TAILV;
|
||||
|
||||
MESAPROC bool si_shader_cache_load_shader(struct si_screen *sscreen, unsigned char ir_blake3_cache_key[BLAKE3_KEY_LEN],
|
||||
struct si_shader *shader) TAILB;
|
||||
|
||||
MESAPROC void si_shader_cache_insert_shader(struct si_screen *sscreen, unsigned char ir_blake3_cache_key[BLAKE3_KEY_LEN],
|
||||
struct si_shader *shader, bool insert_into_disk_cache) TAILV;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* SI_GFX_H */
|
||||
|
|
|
|||
391
src/gallium/drivers/radeonsi/gfx/si_shader_cache.c
Normal file
391
src/gallium/drivers/radeonsi/gfx/si_shader_cache.c
Normal file
|
|
@ -0,0 +1,391 @@
|
|||
/*
|
||||
* Copyright 2026 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "si_gfx.h"
|
||||
#include "si_pipe.h"
|
||||
#include "si_shader.h"
|
||||
|
||||
#include "util/blob.h"
|
||||
#include "util/crc32.h"
|
||||
#include "util/disk_cache.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "nir.h"
|
||||
#include "nir_serialize.h"
|
||||
|
||||
/**
|
||||
* Return the IR key for the shader cache.
|
||||
*/
|
||||
void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
|
||||
unsigned wave_size, unsigned char ir_blake3_cache_key[BLAKE3_KEY_LEN])
|
||||
{
|
||||
struct blob blob = {};
|
||||
unsigned ir_size;
|
||||
void *ir_binary;
|
||||
|
||||
if (sel->nir_binary) {
|
||||
ir_binary = sel->nir_binary;
|
||||
ir_size = sel->nir_size;
|
||||
} else {
|
||||
assert(sel->nir);
|
||||
|
||||
blob_init(&blob);
|
||||
/* Keep debug info if NIR debug prints are in use. */
|
||||
nir_serialize(&blob, sel->nir, NIR_DEBUG(PRINT) == 0);
|
||||
ir_binary = blob.data;
|
||||
ir_size = blob.size;
|
||||
}
|
||||
|
||||
/* These settings affect the compilation, but they are not derived
|
||||
* from the input shader IR.
|
||||
*/
|
||||
unsigned shader_variant_flags = 0;
|
||||
|
||||
if (ngg)
|
||||
shader_variant_flags |= 1 << 0;
|
||||
/* bit gap */
|
||||
if (wave_size == 32)
|
||||
shader_variant_flags |= 1 << 2;
|
||||
/* bit gap */
|
||||
/* use_ngg_culling disables NGG passthrough for non-culling shaders to reduce context
|
||||
* rolls, which can be changed with AMD_DEBUG=nonggc or AMD_DEBUG=nggc.
|
||||
*/
|
||||
if (sel->screen->use_ngg_culling)
|
||||
shader_variant_flags |= 1 << 4;
|
||||
if (sel->screen->record_llvm_ir)
|
||||
shader_variant_flags |= 1 << 5;
|
||||
if (sel->screen->info.has_image_opcodes)
|
||||
shader_variant_flags |= 1 << 6;
|
||||
if (sel->screen->options.no_infinite_interp)
|
||||
shader_variant_flags |= 1 << 7;
|
||||
if (sel->screen->options.clamp_div_by_zero)
|
||||
shader_variant_flags |= 1 << 8;
|
||||
if ((sel->stage == MESA_SHADER_VERTEX ||
|
||||
sel->stage == MESA_SHADER_TESS_EVAL ||
|
||||
sel->stage == MESA_SHADER_GEOMETRY) &&
|
||||
!es &&
|
||||
sel->screen->options.vrs2x2)
|
||||
shader_variant_flags |= 1 << 10;
|
||||
if (sel->screen->options.inline_uniforms)
|
||||
shader_variant_flags |= 1 << 11;
|
||||
if (sel->screen->options.clear_lds)
|
||||
shader_variant_flags |= 1 << 12;
|
||||
|
||||
blake3_hasher ctx;
|
||||
_mesa_blake3_init(&ctx);
|
||||
_mesa_blake3_update(&ctx, &shader_variant_flags, 4);
|
||||
_mesa_blake3_update(&ctx, ir_binary, ir_size);
|
||||
_mesa_blake3_final(&ctx, ir_blake3_cache_key);
|
||||
|
||||
if (ir_binary == blob.data)
|
||||
blob_finish(&blob);
|
||||
}
|
||||
|
||||
/** Copy "data" to "ptr" and return the next dword following copied data. */
|
||||
static uint32_t *write_data(uint32_t *ptr, const void *data, unsigned size)
|
||||
{
|
||||
/* data may be NULL if size == 0 */
|
||||
if (size)
|
||||
memcpy(ptr, data, size);
|
||||
ptr += DIV_ROUND_UP(size, 4);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/** Read data from "ptr". Return the next dword following the data. */
|
||||
static uint32_t *read_data(uint32_t *ptr, void *data, unsigned size)
|
||||
{
|
||||
memcpy(data, ptr, size);
|
||||
ptr += DIV_ROUND_UP(size, 4);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the size as uint followed by the data. Return the next dword
|
||||
* following the copied data.
|
||||
*/
|
||||
static uint32_t *write_chunk(uint32_t *ptr, const void *data, unsigned size)
|
||||
{
|
||||
*ptr++ = size;
|
||||
return write_data(ptr, data, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the size as uint followed by the data. Return both via parameters.
|
||||
* Return the next dword following the data.
|
||||
*/
|
||||
static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
|
||||
{
|
||||
*size = *ptr++;
|
||||
assert(*data == NULL);
|
||||
if (!*size)
|
||||
return ptr;
|
||||
*data = malloc(*size);
|
||||
return read_data(ptr, *data, *size);
|
||||
}
|
||||
|
||||
struct si_shader_blob_head {
|
||||
uint32_t size;
|
||||
uint32_t type;
|
||||
uint32_t crc32;
|
||||
};
|
||||
|
||||
/**
|
||||
* Return the shader binary in a buffer.
|
||||
*/
|
||||
static uint32_t *si_get_shader_binary(struct si_shader *shader)
|
||||
{
|
||||
/* There is always a size of data followed by the data itself. */
|
||||
unsigned llvm_ir_size =
|
||||
shader->binary.llvm_ir_string ? strlen(shader->binary.llvm_ir_string) + 1 : 0;
|
||||
|
||||
/* Refuse to allocate overly large buffers and guard against integer
|
||||
* overflow. */
|
||||
if (shader->binary.code_size > UINT_MAX / 4 || llvm_ir_size > UINT_MAX / 4 ||
|
||||
shader->binary.num_symbols > UINT_MAX / 32)
|
||||
return NULL;
|
||||
|
||||
unsigned size = sizeof(struct si_shader_blob_head) +
|
||||
align(sizeof(shader->config), 4) +
|
||||
align(sizeof(shader->info), 4) +
|
||||
4 + 4 + align(shader->binary.code_size, 4) +
|
||||
4 + shader->binary.num_symbols * 8 +
|
||||
4 + align(llvm_ir_size, 4) +
|
||||
4 + align(shader->binary.disasm_size, 4);
|
||||
uint32_t *buffer = (uint32_t*)CALLOC(1, size);
|
||||
if (!buffer)
|
||||
return NULL;
|
||||
|
||||
struct si_shader_blob_head *head = (struct si_shader_blob_head *)buffer;
|
||||
head->type = shader->binary.type;
|
||||
head->size = size;
|
||||
|
||||
uint32_t *data = buffer + sizeof(*head) / 4;
|
||||
uint32_t *ptr = data;
|
||||
|
||||
ptr = write_data(ptr, &shader->config, sizeof(shader->config));
|
||||
ptr = write_data(ptr, &shader->info, sizeof(shader->info));
|
||||
ptr = write_data(ptr, &shader->binary.exec_size, 4);
|
||||
ptr = write_chunk(ptr, shader->binary.code_buffer, shader->binary.code_size);
|
||||
ptr = write_chunk(ptr, shader->binary.symbols, shader->binary.num_symbols * 8);
|
||||
ptr = write_chunk(ptr, shader->binary.llvm_ir_string, llvm_ir_size);
|
||||
ptr = write_chunk(ptr, shader->binary.disasm_string, shader->binary.disasm_size);
|
||||
assert((char *)ptr - (char *)buffer == (ptrdiff_t)size);
|
||||
|
||||
/* Compute CRC32. */
|
||||
head->crc32 = util_hash_crc32(data, size - sizeof(*head));
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static bool si_load_shader_binary(struct si_shader *shader, void *binary)
|
||||
{
|
||||
struct si_shader_blob_head *head = (struct si_shader_blob_head *)binary;
|
||||
unsigned chunk_size;
|
||||
unsigned code_size;
|
||||
|
||||
uint32_t *ptr = (uint32_t *)binary + sizeof(*head) / 4;
|
||||
if (util_hash_crc32(ptr, head->size - sizeof(*head)) != head->crc32) {
|
||||
mesa_loge("binary shader has invalid CRC32");
|
||||
return false;
|
||||
}
|
||||
|
||||
shader->binary.type = (enum si_shader_binary_type)head->type;
|
||||
ptr = read_data(ptr, &shader->config, sizeof(shader->config));
|
||||
ptr = read_data(ptr, &shader->info, sizeof(shader->info));
|
||||
ptr = read_data(ptr, &shader->binary.exec_size, 4);
|
||||
ptr = read_chunk(ptr, (void **)&shader->binary.code_buffer, &code_size);
|
||||
shader->binary.code_size = code_size;
|
||||
ptr = read_chunk(ptr, (void **)&shader->binary.symbols, &chunk_size);
|
||||
shader->binary.num_symbols = chunk_size / 8;
|
||||
ptr = read_chunk(ptr, (void **)&shader->binary.llvm_ir_string, &chunk_size);
|
||||
ptr = read_chunk(ptr, (void **)&shader->binary.disasm_string, &chunk_size);
|
||||
shader->binary.disasm_size = chunk_size;
|
||||
|
||||
if (!shader->is_gs_copy_shader &&
|
||||
shader->selector->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
|
||||
shader->gs_copy_shader = CALLOC_STRUCT(si_shader);
|
||||
if (!shader->gs_copy_shader)
|
||||
return false;
|
||||
|
||||
shader->gs_copy_shader->is_gs_copy_shader = true;
|
||||
|
||||
if (!si_load_shader_binary(shader->gs_copy_shader, (uint8_t*)binary + head->size)) {
|
||||
FREE(shader->gs_copy_shader);
|
||||
shader->gs_copy_shader = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
util_queue_fence_init(&shader->gs_copy_shader->ready);
|
||||
shader->gs_copy_shader->selector = shader->selector;
|
||||
shader->gs_copy_shader->is_gs_copy_shader = true;
|
||||
shader->gs_copy_shader->wave_size =
|
||||
si_determine_wave_size(shader->selector->screen, shader->gs_copy_shader);
|
||||
|
||||
si_shader_binary_upload(shader->selector->screen, shader->gs_copy_shader, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert a shader into the cache. It's assumed the shader is not in the cache.
|
||||
* Use si_shader_cache_load_shader before calling this.
|
||||
*/
|
||||
void si_shader_cache_insert_shader(struct si_screen *sscreen, unsigned char ir_blake3_cache_key[BLAKE3_KEY_LEN],
|
||||
struct si_shader *shader, bool insert_into_disk_cache)
|
||||
{
|
||||
uint32_t *hw_binary;
|
||||
struct hash_entry *entry;
|
||||
uint8_t key[CACHE_KEY_SIZE];
|
||||
bool memory_cache_full = sscreen->shader_cache_size >= sscreen->shader_cache_max_size;
|
||||
|
||||
if (!insert_into_disk_cache && memory_cache_full)
|
||||
return;
|
||||
|
||||
entry = _mesa_hash_table_search(sscreen->shader_cache, ir_blake3_cache_key);
|
||||
if (entry)
|
||||
return; /* already added */
|
||||
|
||||
hw_binary = si_get_shader_binary(shader);
|
||||
if (!hw_binary)
|
||||
return;
|
||||
|
||||
unsigned size = *hw_binary;
|
||||
|
||||
if (shader->selector->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
|
||||
uint32_t *gs_copy_binary = si_get_shader_binary(shader->gs_copy_shader);
|
||||
if (!gs_copy_binary) {
|
||||
FREE(hw_binary);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Combine both binaries. */
|
||||
size += *gs_copy_binary;
|
||||
uint32_t *combined_binary = (uint32_t*)MALLOC(size);
|
||||
if (!combined_binary) {
|
||||
FREE(hw_binary);
|
||||
FREE(gs_copy_binary);
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(combined_binary, hw_binary, *hw_binary);
|
||||
memcpy(combined_binary + *hw_binary / 4, gs_copy_binary, *gs_copy_binary);
|
||||
FREE(hw_binary);
|
||||
FREE(gs_copy_binary);
|
||||
hw_binary = combined_binary;
|
||||
}
|
||||
|
||||
if (!memory_cache_full) {
|
||||
if (_mesa_hash_table_insert(sscreen->shader_cache,
|
||||
mem_dup(ir_blake3_cache_key, 20),
|
||||
hw_binary) == NULL) {
|
||||
FREE(hw_binary);
|
||||
return;
|
||||
}
|
||||
|
||||
sscreen->shader_cache_size += size;
|
||||
}
|
||||
|
||||
if (sscreen->disk_shader_cache && insert_into_disk_cache) {
|
||||
disk_cache_compute_key(sscreen->disk_shader_cache, ir_blake3_cache_key, 20, key);
|
||||
disk_cache_put(sscreen->disk_shader_cache, key, hw_binary, size, NULL);
|
||||
}
|
||||
|
||||
if (memory_cache_full)
|
||||
FREE(hw_binary);
|
||||
}
|
||||
|
||||
bool si_shader_cache_load_shader(struct si_screen *sscreen, unsigned char ir_blake3_cache_key[BLAKE3_KEY_LEN],
|
||||
struct si_shader *shader)
|
||||
{
|
||||
struct hash_entry *entry = _mesa_hash_table_search(sscreen->shader_cache, ir_blake3_cache_key);
|
||||
|
||||
if (entry) {
|
||||
if (si_load_shader_binary(shader, entry->data)) {
|
||||
p_atomic_inc(&sscreen->num_memory_shader_cache_hits);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
p_atomic_inc(&sscreen->num_memory_shader_cache_misses);
|
||||
|
||||
if (!sscreen->disk_shader_cache)
|
||||
return false;
|
||||
|
||||
unsigned char blake3[CACHE_KEY_SIZE];
|
||||
disk_cache_compute_key(sscreen->disk_shader_cache, ir_blake3_cache_key, 20, blake3);
|
||||
|
||||
size_t total_size;
|
||||
uint32_t *buffer = (uint32_t*)disk_cache_get(sscreen->disk_shader_cache, blake3, &total_size);
|
||||
if (buffer) {
|
||||
unsigned size = *buffer;
|
||||
unsigned gs_copy_binary_size = 0;
|
||||
|
||||
/* The GS copy shader binary is after the GS binary. */
|
||||
if (shader->selector->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg)
|
||||
gs_copy_binary_size = buffer[size / 4];
|
||||
|
||||
if (total_size >= sizeof(uint32_t) && size + gs_copy_binary_size == total_size) {
|
||||
if (si_load_shader_binary(shader, buffer)) {
|
||||
free(buffer);
|
||||
si_shader_cache_insert_shader(sscreen, ir_blake3_cache_key, shader, false);
|
||||
p_atomic_inc(&sscreen->num_disk_shader_cache_hits);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
/* Something has gone wrong discard the item from the cache and
|
||||
* rebuild/link from source.
|
||||
*/
|
||||
assert(!"Invalid radeonsi shader disk cache item!");
|
||||
disk_cache_remove(sscreen->disk_shader_cache, blake3);
|
||||
}
|
||||
}
|
||||
|
||||
free(buffer);
|
||||
p_atomic_inc(&sscreen->num_disk_shader_cache_misses);
|
||||
return false;
|
||||
}
|
||||
|
||||
static uint32_t si_shader_cache_key_hash(const void *key)
|
||||
{
|
||||
/* Take the first dword of BLAKE3. */
|
||||
return *(uint32_t *)key;
|
||||
}
|
||||
|
||||
static bool si_shader_cache_key_equals(const void *a, const void *b)
|
||||
{
|
||||
/* Compare BLAKE3s. */
|
||||
return memcmp(a, b, 20) == 0;
|
||||
}
|
||||
|
||||
static void si_destroy_shader_cache_entry(struct hash_entry *entry)
|
||||
{
|
||||
FREE((void *)entry->key);
|
||||
FREE(entry->data);
|
||||
}
|
||||
|
||||
bool si_init_shader_cache(struct si_screen *sscreen)
|
||||
{
|
||||
(void)simple_mtx_init(&sscreen->shader_cache_mutex, mtx_plain);
|
||||
sscreen->shader_cache =
|
||||
_mesa_hash_table_create(NULL, si_shader_cache_key_hash, si_shader_cache_key_equals);
|
||||
sscreen->shader_cache_size = 0;
|
||||
/* Maximum size: 64MB on 32 bits, 1GB else */
|
||||
sscreen->shader_cache_max_size = ((sizeof(void *) == 4) ? 64 : 1024) * 1024 * 1024;
|
||||
|
||||
return sscreen->shader_cache != NULL;
|
||||
}
|
||||
|
||||
void si_destroy_shader_cache(struct si_screen *sscreen)
|
||||
{
|
||||
if (sscreen->shader_cache)
|
||||
_mesa_hash_table_destroy(sscreen->shader_cache, si_destroy_shader_cache_entry);
|
||||
simple_mtx_destroy(&sscreen->shader_cache_mutex);
|
||||
}
|
||||
|
||||
void si_init_screen_live_shader_cache(struct si_screen *sscreen)
|
||||
{
|
||||
util_live_shader_cache_init(&sscreen->live_shader_cache, si_create_shader_selector,
|
||||
si_destroy_shader_selector);
|
||||
}
|
||||
|
|
@ -124,6 +124,7 @@ if with_gfx_compute
|
|||
'si_compute_blit.c',
|
||||
'si_compute.c',
|
||||
'si_debug_gfx_compute.c',
|
||||
'gfx/si_shader_cache.c',
|
||||
'si_nir_mediump.c',
|
||||
'si_mesh_shader.c',
|
||||
'si_shader_aco.c',
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#include "ac_shader_util.h"
|
||||
#include "si_build_pm4.h"
|
||||
#include "si_shader_internal.h"
|
||||
#include "gfx/si_gfx.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "si_tracepoints.h"
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
*/
|
||||
|
||||
#include "si_pipe.h"
|
||||
#include "gfx/si_gfx.h"
|
||||
#include "mm/si_mm.h"
|
||||
|
||||
#include "driver_ddebug/dd_util.h"
|
||||
|
|
|
|||
|
|
@ -474,17 +474,8 @@ void si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st);
|
|||
void si_emit_dpbb_state(struct si_context *sctx, unsigned index);
|
||||
|
||||
/* si_state_shaders.cpp */
|
||||
void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
|
||||
unsigned wave_size, unsigned char ir_blake3_cache_key[BLAKE3_KEY_LEN]);
|
||||
bool si_shader_cache_load_shader(struct si_screen *sscreen, unsigned char ir_blake3_cache_key[BLAKE3_KEY_LEN],
|
||||
struct si_shader *shader);
|
||||
void si_shader_cache_insert_shader(struct si_screen *sscreen, unsigned char ir_blake3_cache_key[BLAKE3_KEY_LEN],
|
||||
struct si_shader *shader, bool insert_into_disk_cache);
|
||||
bool si_shader_mem_ordered(struct si_shader *shader);
|
||||
MESAPROC void si_init_screen_live_shader_cache(struct si_screen *sscreen) TAILV;
|
||||
void si_init_shader_functions(struct si_context *sctx);
|
||||
MESAPROC bool si_init_shader_cache(struct si_screen *sscreen) TAILBT;
|
||||
MESAPROC void si_destroy_shader_cache(struct si_screen *sscreen) TAILV;
|
||||
void si_schedule_initial_compile(struct si_context *sctx, mesa_shader_stage stage,
|
||||
struct util_queue_fence *ready_fence,
|
||||
struct si_compiler_ctx_state *compiler_ctx_state, void *job,
|
||||
|
|
@ -510,6 +501,9 @@ bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx);
|
|||
void si_update_tess_io_layout_state(struct si_context *sctx);
|
||||
void si_update_common_shader_state(struct si_context *sctx, struct si_shader_selector *sel,
|
||||
mesa_shader_stage type);
|
||||
void *si_create_shader_selector(struct pipe_context *ctx,
|
||||
const struct pipe_shader_state *state);
|
||||
void si_destroy_shader_selector(struct pipe_context *ctx, void *cso);
|
||||
|
||||
/* si_state_draw.cpp */
|
||||
void si_cp_dma_prefetch(struct radeon_cmdbuf *cs,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "gfx/si_gfx.h"
|
||||
#include "ac_cmdbuf_cp.h"
|
||||
#include "ac_nir.h"
|
||||
#include "ac_shader_util.h"
|
||||
|
|
@ -13,8 +14,7 @@
|
|||
#include "nir/tgsi_to_nir.h"
|
||||
#include "si_build_pm4.h"
|
||||
#include "sid.h"
|
||||
#include "util/crc32.h"
|
||||
#include "util/disk_cache.h"
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/mesa-blake3.h"
|
||||
#include "util/u_async_debug.h"
|
||||
|
|
@ -125,374 +125,6 @@ static bool si_shader_uses_bindless_images(struct si_shader_selector *selector)
|
|||
|
||||
/* SHADER_CACHE */
|
||||
|
||||
/**
|
||||
* Return the IR key for the shader cache.
|
||||
*/
|
||||
void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
|
||||
unsigned wave_size, unsigned char ir_blake3_cache_key[BLAKE3_KEY_LEN])
|
||||
{
|
||||
struct blob blob = {};
|
||||
unsigned ir_size;
|
||||
void *ir_binary;
|
||||
|
||||
if (sel->nir_binary) {
|
||||
ir_binary = sel->nir_binary;
|
||||
ir_size = sel->nir_size;
|
||||
} else {
|
||||
assert(sel->nir);
|
||||
|
||||
blob_init(&blob);
|
||||
/* Keep debug info if NIR debug prints are in use. */
|
||||
nir_serialize(&blob, sel->nir, NIR_DEBUG(PRINT) == 0);
|
||||
ir_binary = blob.data;
|
||||
ir_size = blob.size;
|
||||
}
|
||||
|
||||
/* These settings affect the compilation, but they are not derived
|
||||
* from the input shader IR.
|
||||
*/
|
||||
unsigned shader_variant_flags = 0;
|
||||
|
||||
if (ngg)
|
||||
shader_variant_flags |= 1 << 0;
|
||||
/* bit gap */
|
||||
if (wave_size == 32)
|
||||
shader_variant_flags |= 1 << 2;
|
||||
/* bit gap */
|
||||
/* use_ngg_culling disables NGG passthrough for non-culling shaders to reduce context
|
||||
* rolls, which can be changed with AMD_DEBUG=nonggc or AMD_DEBUG=nggc.
|
||||
*/
|
||||
if (sel->screen->use_ngg_culling)
|
||||
shader_variant_flags |= 1 << 4;
|
||||
if (sel->screen->record_llvm_ir)
|
||||
shader_variant_flags |= 1 << 5;
|
||||
if (sel->screen->info.has_image_opcodes)
|
||||
shader_variant_flags |= 1 << 6;
|
||||
if (sel->screen->options.no_infinite_interp)
|
||||
shader_variant_flags |= 1 << 7;
|
||||
if (sel->screen->options.clamp_div_by_zero)
|
||||
shader_variant_flags |= 1 << 8;
|
||||
if ((sel->stage == MESA_SHADER_VERTEX ||
|
||||
sel->stage == MESA_SHADER_TESS_EVAL ||
|
||||
sel->stage == MESA_SHADER_GEOMETRY) &&
|
||||
!es &&
|
||||
sel->screen->options.vrs2x2)
|
||||
shader_variant_flags |= 1 << 10;
|
||||
if (sel->screen->options.inline_uniforms)
|
||||
shader_variant_flags |= 1 << 11;
|
||||
if (sel->screen->options.clear_lds)
|
||||
shader_variant_flags |= 1 << 12;
|
||||
|
||||
blake3_hasher ctx;
|
||||
_mesa_blake3_init(&ctx);
|
||||
_mesa_blake3_update(&ctx, &shader_variant_flags, 4);
|
||||
_mesa_blake3_update(&ctx, ir_binary, ir_size);
|
||||
_mesa_blake3_final(&ctx, ir_blake3_cache_key);
|
||||
|
||||
if (ir_binary == blob.data)
|
||||
blob_finish(&blob);
|
||||
}
|
||||
|
||||
/** Copy "data" to "ptr" and return the next dword following copied data. */
|
||||
static uint32_t *write_data(uint32_t *ptr, const void *data, unsigned size)
|
||||
{
|
||||
/* data may be NULL if size == 0 */
|
||||
if (size)
|
||||
memcpy(ptr, data, size);
|
||||
ptr += DIV_ROUND_UP(size, 4);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/** Read data from "ptr". Return the next dword following the data. */
|
||||
static uint32_t *read_data(uint32_t *ptr, void *data, unsigned size)
|
||||
{
|
||||
memcpy(data, ptr, size);
|
||||
ptr += DIV_ROUND_UP(size, 4);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the size as uint followed by the data. Return the next dword
|
||||
* following the copied data.
|
||||
*/
|
||||
static uint32_t *write_chunk(uint32_t *ptr, const void *data, unsigned size)
|
||||
{
|
||||
*ptr++ = size;
|
||||
return write_data(ptr, data, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the size as uint followed by the data. Return both via parameters.
|
||||
* Return the next dword following the data.
|
||||
*/
|
||||
static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
|
||||
{
|
||||
*size = *ptr++;
|
||||
assert(*data == NULL);
|
||||
if (!*size)
|
||||
return ptr;
|
||||
*data = malloc(*size);
|
||||
return read_data(ptr, *data, *size);
|
||||
}
|
||||
|
||||
struct si_shader_blob_head {
|
||||
uint32_t size;
|
||||
uint32_t type;
|
||||
uint32_t crc32;
|
||||
};
|
||||
|
||||
/**
|
||||
* Return the shader binary in a buffer.
|
||||
*/
|
||||
static uint32_t *si_get_shader_binary(struct si_shader *shader)
|
||||
{
|
||||
/* There is always a size of data followed by the data itself. */
|
||||
unsigned llvm_ir_size =
|
||||
shader->binary.llvm_ir_string ? strlen(shader->binary.llvm_ir_string) + 1 : 0;
|
||||
|
||||
/* Refuse to allocate overly large buffers and guard against integer
|
||||
* overflow. */
|
||||
if (shader->binary.code_size > UINT_MAX / 4 || llvm_ir_size > UINT_MAX / 4 ||
|
||||
shader->binary.num_symbols > UINT_MAX / 32)
|
||||
return NULL;
|
||||
|
||||
unsigned size = sizeof(struct si_shader_blob_head) +
|
||||
align(sizeof(shader->config), 4) +
|
||||
align(sizeof(shader->info), 4) +
|
||||
4 + 4 + align(shader->binary.code_size, 4) +
|
||||
4 + shader->binary.num_symbols * 8 +
|
||||
4 + align(llvm_ir_size, 4) +
|
||||
4 + align(shader->binary.disasm_size, 4);
|
||||
uint32_t *buffer = (uint32_t*)CALLOC(1, size);
|
||||
if (!buffer)
|
||||
return NULL;
|
||||
|
||||
struct si_shader_blob_head *head = (struct si_shader_blob_head *)buffer;
|
||||
head->type = shader->binary.type;
|
||||
head->size = size;
|
||||
|
||||
uint32_t *data = buffer + sizeof(*head) / 4;
|
||||
uint32_t *ptr = data;
|
||||
|
||||
ptr = write_data(ptr, &shader->config, sizeof(shader->config));
|
||||
ptr = write_data(ptr, &shader->info, sizeof(shader->info));
|
||||
ptr = write_data(ptr, &shader->binary.exec_size, 4);
|
||||
ptr = write_chunk(ptr, shader->binary.code_buffer, shader->binary.code_size);
|
||||
ptr = write_chunk(ptr, shader->binary.symbols, shader->binary.num_symbols * 8);
|
||||
ptr = write_chunk(ptr, shader->binary.llvm_ir_string, llvm_ir_size);
|
||||
ptr = write_chunk(ptr, shader->binary.disasm_string, shader->binary.disasm_size);
|
||||
assert((char *)ptr - (char *)buffer == (ptrdiff_t)size);
|
||||
|
||||
/* Compute CRC32. */
|
||||
head->crc32 = util_hash_crc32(data, size - sizeof(*head));
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static bool si_load_shader_binary(struct si_shader *shader, void *binary)
|
||||
{
|
||||
struct si_shader_blob_head *head = (struct si_shader_blob_head *)binary;
|
||||
unsigned chunk_size;
|
||||
unsigned code_size;
|
||||
|
||||
uint32_t *ptr = (uint32_t *)binary + sizeof(*head) / 4;
|
||||
if (util_hash_crc32(ptr, head->size - sizeof(*head)) != head->crc32) {
|
||||
mesa_loge("binary shader has invalid CRC32");
|
||||
return false;
|
||||
}
|
||||
|
||||
shader->binary.type = (enum si_shader_binary_type)head->type;
|
||||
ptr = read_data(ptr, &shader->config, sizeof(shader->config));
|
||||
ptr = read_data(ptr, &shader->info, sizeof(shader->info));
|
||||
ptr = read_data(ptr, &shader->binary.exec_size, 4);
|
||||
ptr = read_chunk(ptr, (void **)&shader->binary.code_buffer, &code_size);
|
||||
shader->binary.code_size = code_size;
|
||||
ptr = read_chunk(ptr, (void **)&shader->binary.symbols, &chunk_size);
|
||||
shader->binary.num_symbols = chunk_size / 8;
|
||||
ptr = read_chunk(ptr, (void **)&shader->binary.llvm_ir_string, &chunk_size);
|
||||
ptr = read_chunk(ptr, (void **)&shader->binary.disasm_string, &chunk_size);
|
||||
shader->binary.disasm_size = chunk_size;
|
||||
|
||||
if (!shader->is_gs_copy_shader &&
|
||||
shader->selector->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
|
||||
shader->gs_copy_shader = CALLOC_STRUCT(si_shader);
|
||||
if (!shader->gs_copy_shader)
|
||||
return false;
|
||||
|
||||
shader->gs_copy_shader->is_gs_copy_shader = true;
|
||||
|
||||
if (!si_load_shader_binary(shader->gs_copy_shader, (uint8_t*)binary + head->size)) {
|
||||
FREE(shader->gs_copy_shader);
|
||||
shader->gs_copy_shader = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
util_queue_fence_init(&shader->gs_copy_shader->ready);
|
||||
shader->gs_copy_shader->selector = shader->selector;
|
||||
shader->gs_copy_shader->is_gs_copy_shader = true;
|
||||
shader->gs_copy_shader->wave_size =
|
||||
si_determine_wave_size(shader->selector->screen, shader->gs_copy_shader);
|
||||
|
||||
si_shader_binary_upload(shader->selector->screen, shader->gs_copy_shader, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert a shader into the cache. It's assumed the shader is not in the cache.
|
||||
* Use si_shader_cache_load_shader before calling this.
|
||||
*/
|
||||
void si_shader_cache_insert_shader(struct si_screen *sscreen, unsigned char ir_blake3_cache_key[BLAKE3_KEY_LEN],
|
||||
struct si_shader *shader, bool insert_into_disk_cache)
|
||||
{
|
||||
uint32_t *hw_binary;
|
||||
struct hash_entry *entry;
|
||||
uint8_t key[CACHE_KEY_SIZE];
|
||||
bool memory_cache_full = sscreen->shader_cache_size >= sscreen->shader_cache_max_size;
|
||||
|
||||
if (!insert_into_disk_cache && memory_cache_full)
|
||||
return;
|
||||
|
||||
entry = _mesa_hash_table_search(sscreen->shader_cache, ir_blake3_cache_key);
|
||||
if (entry)
|
||||
return; /* already added */
|
||||
|
||||
hw_binary = si_get_shader_binary(shader);
|
||||
if (!hw_binary)
|
||||
return;
|
||||
|
||||
unsigned size = *hw_binary;
|
||||
|
||||
if (shader->selector->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
|
||||
uint32_t *gs_copy_binary = si_get_shader_binary(shader->gs_copy_shader);
|
||||
if (!gs_copy_binary) {
|
||||
FREE(hw_binary);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Combine both binaries. */
|
||||
size += *gs_copy_binary;
|
||||
uint32_t *combined_binary = (uint32_t*)MALLOC(size);
|
||||
if (!combined_binary) {
|
||||
FREE(hw_binary);
|
||||
FREE(gs_copy_binary);
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(combined_binary, hw_binary, *hw_binary);
|
||||
memcpy(combined_binary + *hw_binary / 4, gs_copy_binary, *gs_copy_binary);
|
||||
FREE(hw_binary);
|
||||
FREE(gs_copy_binary);
|
||||
hw_binary = combined_binary;
|
||||
}
|
||||
|
||||
if (!memory_cache_full) {
|
||||
if (_mesa_hash_table_insert(sscreen->shader_cache,
|
||||
mem_dup(ir_blake3_cache_key, 20),
|
||||
hw_binary) == NULL) {
|
||||
FREE(hw_binary);
|
||||
return;
|
||||
}
|
||||
|
||||
sscreen->shader_cache_size += size;
|
||||
}
|
||||
|
||||
if (sscreen->disk_shader_cache && insert_into_disk_cache) {
|
||||
disk_cache_compute_key(sscreen->disk_shader_cache, ir_blake3_cache_key, 20, key);
|
||||
disk_cache_put(sscreen->disk_shader_cache, key, hw_binary, size, NULL);
|
||||
}
|
||||
|
||||
if (memory_cache_full)
|
||||
FREE(hw_binary);
|
||||
}
|
||||
|
||||
bool si_shader_cache_load_shader(struct si_screen *sscreen, unsigned char ir_blake3_cache_key[BLAKE3_KEY_LEN],
|
||||
struct si_shader *shader)
|
||||
{
|
||||
struct hash_entry *entry = _mesa_hash_table_search(sscreen->shader_cache, ir_blake3_cache_key);
|
||||
|
||||
if (entry) {
|
||||
if (si_load_shader_binary(shader, entry->data)) {
|
||||
p_atomic_inc(&sscreen->num_memory_shader_cache_hits);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
p_atomic_inc(&sscreen->num_memory_shader_cache_misses);
|
||||
|
||||
if (!sscreen->disk_shader_cache)
|
||||
return false;
|
||||
|
||||
unsigned char blake3[CACHE_KEY_SIZE];
|
||||
disk_cache_compute_key(sscreen->disk_shader_cache, ir_blake3_cache_key, 20, blake3);
|
||||
|
||||
size_t total_size;
|
||||
uint32_t *buffer = (uint32_t*)disk_cache_get(sscreen->disk_shader_cache, blake3, &total_size);
|
||||
if (buffer) {
|
||||
unsigned size = *buffer;
|
||||
unsigned gs_copy_binary_size = 0;
|
||||
|
||||
/* The GS copy shader binary is after the GS binary. */
|
||||
if (shader->selector->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg)
|
||||
gs_copy_binary_size = buffer[size / 4];
|
||||
|
||||
if (total_size >= sizeof(uint32_t) && size + gs_copy_binary_size == total_size) {
|
||||
if (si_load_shader_binary(shader, buffer)) {
|
||||
free(buffer);
|
||||
si_shader_cache_insert_shader(sscreen, ir_blake3_cache_key, shader, false);
|
||||
p_atomic_inc(&sscreen->num_disk_shader_cache_hits);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
/* Something has gone wrong discard the item from the cache and
|
||||
* rebuild/link from source.
|
||||
*/
|
||||
assert(!"Invalid radeonsi shader disk cache item!");
|
||||
disk_cache_remove(sscreen->disk_shader_cache, blake3);
|
||||
}
|
||||
}
|
||||
|
||||
free(buffer);
|
||||
p_atomic_inc(&sscreen->num_disk_shader_cache_misses);
|
||||
return false;
|
||||
}
|
||||
|
||||
static uint32_t si_shader_cache_key_hash(const void *key)
|
||||
{
|
||||
/* Take the first dword of BLAKE3. */
|
||||
return *(uint32_t *)key;
|
||||
}
|
||||
|
||||
static bool si_shader_cache_key_equals(const void *a, const void *b)
|
||||
{
|
||||
/* Compare BLAKE3s. */
|
||||
return memcmp(a, b, 20) == 0;
|
||||
}
|
||||
|
||||
static void si_destroy_shader_cache_entry(struct hash_entry *entry)
|
||||
{
|
||||
FREE((void *)entry->key);
|
||||
FREE(entry->data);
|
||||
}
|
||||
|
||||
bool si_init_shader_cache(struct si_screen *sscreen)
|
||||
{
|
||||
(void)simple_mtx_init(&sscreen->shader_cache_mutex, mtx_plain);
|
||||
sscreen->shader_cache =
|
||||
_mesa_hash_table_create(NULL, si_shader_cache_key_hash, si_shader_cache_key_equals);
|
||||
sscreen->shader_cache_size = 0;
|
||||
/* Maximum size: 64MB on 32 bits, 1GB else */
|
||||
sscreen->shader_cache_max_size = ((sizeof(void *) == 4) ? 64 : 1024) * 1024 * 1024;
|
||||
|
||||
return sscreen->shader_cache != NULL;
|
||||
}
|
||||
|
||||
void si_destroy_shader_cache(struct si_screen *sscreen)
|
||||
{
|
||||
if (sscreen->shader_cache)
|
||||
_mesa_hash_table_destroy(sscreen->shader_cache, si_destroy_shader_cache_entry);
|
||||
simple_mtx_destroy(&sscreen->shader_cache_mutex);
|
||||
}
|
||||
|
||||
/* SHADER STATES */
|
||||
|
||||
|
|
@ -3454,8 +3086,8 @@ void si_schedule_initial_compile(struct si_context *sctx, mesa_shader_stage stag
|
|||
util_queue_fence_wait(ready_fence);
|
||||
}
|
||||
|
||||
static void *si_create_shader_selector(struct pipe_context *ctx,
|
||||
const struct pipe_shader_state *state)
|
||||
void *si_create_shader_selector(struct pipe_context *ctx,
|
||||
const struct pipe_shader_state *state)
|
||||
{
|
||||
struct si_screen *sscreen = (struct si_screen *)ctx->screen;
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
|
@ -3976,7 +3608,7 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
|
|||
si_pm4_free_state(sctx, &shader->pm4, state_index);
|
||||
}
|
||||
|
||||
static void si_destroy_shader_selector(struct pipe_context *ctx, void *cso)
|
||||
void si_destroy_shader_selector(struct pipe_context *ctx, void *cso)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct si_shader_selector *sel = (struct si_shader_selector *)cso;
|
||||
|
|
@ -4845,12 +4477,6 @@ static void gfx12_emit_tess_io_layout_state(struct si_context *sctx, unsigned in
|
|||
radeon_end(); /* don't track context rolls on GFX12 */
|
||||
}
|
||||
|
||||
void si_init_screen_live_shader_cache(struct si_screen *sscreen)
|
||||
{
|
||||
util_live_shader_cache_init(&sscreen->live_shader_cache, si_create_shader_selector,
|
||||
si_destroy_shader_selector);
|
||||
}
|
||||
|
||||
template<int NUM_INTERP>
|
||||
static void si_emit_spi_map(struct si_context *sctx, unsigned index)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue