mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 15:40:11 +01:00
radeonsi/sqtt: re-export shaders in a single bo
RGP expects a pipeline's shaders to be all stored sequentially, eg: [vs][ps][gs] As such, it assumes a single bo is dumped to the .rgp file, with the following info: * va of the bo * offset to each shader inside the bo For radeonsi, the shaders are stored individually, so we may have a big gap between the shaders forming a pipeline => we can produce very large file because the layout in the file must match the one in memory (see the warning in ac_rgp_file_write_elf_text). This commit implements a workaround: gfx shaders are re-exported as a pipeline. To update the shader address, a new state is created (sqtt_pipeline), which will overwrite the needed _PGM_LO_* registers. This reduces DeuxEX rgp captures from 150GB+ to less than 100MB. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18865>
This commit is contained in:
parent
5a9a43c8f9
commit
8034a71430
10 changed files with 151 additions and 23 deletions
|
|
@ -53,6 +53,8 @@ struct ac_thread_trace_data {
|
||||||
struct rgp_queue_event rgp_queue_event;
|
struct rgp_queue_event rgp_queue_event;
|
||||||
|
|
||||||
struct rgp_clock_calibration rgp_clock_calibration;
|
struct rgp_clock_calibration rgp_clock_calibration;
|
||||||
|
|
||||||
|
struct hash_table_u64 *pipeline_bos;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define SQTT_BUFFER_ALIGN_SHIFT 12
|
#define SQTT_BUFFER_ALIGN_SHIFT 12
|
||||||
|
|
|
||||||
|
|
@ -320,11 +320,18 @@ static void si_bind_compute_state(struct pipe_context *ctx, void *state)
|
||||||
program->shader.binary.elf_buffer,
|
program->shader.binary.elf_buffer,
|
||||||
program->shader.binary.elf_size,
|
program->shader.binary.elf_size,
|
||||||
0);
|
0);
|
||||||
uint64_t base_address = program->shader.bo->gpu_address;
|
|
||||||
|
|
||||||
struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
|
struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
|
||||||
if (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline_code_hash)) {
|
if (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline_code_hash)) {
|
||||||
si_sqtt_register_pipeline(sctx, pipeline_code_hash, base_address, true);
|
/* Short lived fake pipeline: we don't need to reupload the compute shaders,
|
||||||
|
* as we do for the gfx ones so just create a temp pipeline to be able to
|
||||||
|
* call si_sqtt_register_pipeline, and then drop it.
|
||||||
|
*/
|
||||||
|
struct si_sqtt_fake_pipeline pipeline = { 0 };
|
||||||
|
pipeline.code_hash = pipeline_code_hash;
|
||||||
|
pipeline.bo = program->shader.bo;
|
||||||
|
|
||||||
|
si_sqtt_register_pipeline(sctx, &pipeline, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
si_sqtt_describe_pipeline_bind(sctx, pipeline_code_hash, 1);
|
si_sqtt_describe_pipeline_bind(sctx, pipeline_code_hash, 1);
|
||||||
|
|
|
||||||
|
|
@ -912,6 +912,13 @@ struct si_saved_cs {
|
||||||
int64_t time_flush;
|
int64_t time_flush;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct si_sqtt_fake_pipeline {
|
||||||
|
struct si_pm4_state pm4; /* base class */
|
||||||
|
uint64_t code_hash;
|
||||||
|
struct si_resource *bo;
|
||||||
|
uint32_t offset[SI_NUM_GRAPHICS_SHADERS];
|
||||||
|
};
|
||||||
|
|
||||||
struct si_small_prim_cull_info {
|
struct si_small_prim_cull_info {
|
||||||
float scale[2], translate[2];
|
float scale[2], translate[2];
|
||||||
float scale_no_aa[2], translate_no_aa[2];
|
float scale_no_aa[2], translate_no_aa[2];
|
||||||
|
|
@ -1646,7 +1653,7 @@ void si_sqtt_write_event_marker(struct si_context* sctx, struct radeon_cmdbuf *r
|
||||||
uint32_t vertex_offset_user_data,
|
uint32_t vertex_offset_user_data,
|
||||||
uint32_t instance_offset_user_data,
|
uint32_t instance_offset_user_data,
|
||||||
uint32_t draw_index_user_data);
|
uint32_t draw_index_user_data);
|
||||||
bool si_sqtt_register_pipeline(struct si_context* sctx, uint64_t pipeline_hash, uint64_t base_address, bool is_compute);
|
bool si_sqtt_register_pipeline(struct si_context* sctx, struct si_sqtt_fake_pipeline *pipeline, bool is_compute);
|
||||||
bool si_sqtt_pipeline_is_registered(struct ac_thread_trace_data *thread_trace_data,
|
bool si_sqtt_pipeline_is_registered(struct ac_thread_trace_data *thread_trace_data,
|
||||||
uint64_t pipeline_hash);
|
uint64_t pipeline_hash);
|
||||||
void si_sqtt_describe_pipeline_bind(struct si_context* sctx, uint64_t pipeline_hash, int bind_point);
|
void si_sqtt_describe_pipeline_bind(struct si_context* sctx, uint64_t pipeline_hash, int bind_point);
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,9 @@ struct si_pm4_state {
|
||||||
/* commands for the DE */
|
/* commands for the DE */
|
||||||
uint16_t max_dw;
|
uint16_t max_dw;
|
||||||
|
|
||||||
|
/* Used by SQTT to override the shader address */
|
||||||
|
uint16_t reg_va_low_idx;
|
||||||
|
|
||||||
/* This must be the last field because the array can continue after the structure. */
|
/* This must be the last field because the array can continue after the structure. */
|
||||||
uint32_t pm4[64];
|
uint32_t pm4[64];
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -817,8 +817,8 @@ static unsigned get_lds_granularity(struct si_screen *screen, gl_shader_stage st
|
||||||
screen->info.gfx_level >= GFX7 ? 512 : 256;
|
screen->info.gfx_level >= GFX7 ? 512 : 256;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool si_shader_binary_open(struct si_screen *screen, struct si_shader *shader,
|
bool si_shader_binary_open(struct si_screen *screen, struct si_shader *shader,
|
||||||
struct ac_rtld_binary *rtld)
|
struct ac_rtld_binary *rtld)
|
||||||
{
|
{
|
||||||
const struct si_shader_selector *sel = shader->selector;
|
const struct si_shader_selector *sel = shader->selector;
|
||||||
const char *part_elfs[5];
|
const char *part_elfs[5];
|
||||||
|
|
@ -889,8 +889,8 @@ static unsigned si_get_shader_binary_size(struct si_screen *screen, struct si_sh
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool si_get_external_symbol(enum amd_gfx_level gfx_level, void *data, const char *name,
|
bool si_get_external_symbol(enum amd_gfx_level gfx_level, void *data, const char *name,
|
||||||
uint64_t *value)
|
uint64_t *value)
|
||||||
{
|
{
|
||||||
uint64_t *scratch_va = data;
|
uint64_t *scratch_va = data;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -960,6 +960,8 @@ struct si_shader_part {
|
||||||
};
|
};
|
||||||
|
|
||||||
/* si_shader.c */
|
/* si_shader.c */
|
||||||
|
struct ac_rtld_binary;
|
||||||
|
|
||||||
void si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir);
|
void si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir);
|
||||||
bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
|
bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
|
||||||
struct si_shader *shader, struct util_debug_callback *debug);
|
struct si_shader *shader, struct util_debug_callback *debug);
|
||||||
|
|
@ -979,6 +981,10 @@ const char *si_get_shader_name(const struct si_shader *shader);
|
||||||
void si_shader_binary_clean(struct si_shader_binary *binary);
|
void si_shader_binary_clean(struct si_shader_binary *binary);
|
||||||
struct nir_shader *si_deserialize_shader(struct si_shader_selector *sel);
|
struct nir_shader *si_deserialize_shader(struct si_shader_selector *sel);
|
||||||
unsigned si_get_ps_num_interp(struct si_shader *ps);
|
unsigned si_get_ps_num_interp(struct si_shader *ps);
|
||||||
|
bool si_shader_binary_open(struct si_screen *screen, struct si_shader *shader,
|
||||||
|
struct ac_rtld_binary *rtld);
|
||||||
|
bool si_get_external_symbol(enum amd_gfx_level gfx_level, void *data, const char *name,
|
||||||
|
uint64_t *value);
|
||||||
|
|
||||||
/* si_shader_info.c */
|
/* si_shader_info.c */
|
||||||
void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
|
void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include "hash_table.h"
|
||||||
#include "si_pipe.h"
|
#include "si_pipe.h"
|
||||||
#include "si_build_pm4.h"
|
#include "si_build_pm4.h"
|
||||||
#include "si_compute.h"
|
#include "si_compute.h"
|
||||||
|
|
@ -55,6 +56,8 @@ si_thread_trace_init_bo(struct si_context *sctx)
|
||||||
1 << SQTT_BUFFER_ALIGN_SHIFT);
|
1 << SQTT_BUFFER_ALIGN_SHIFT);
|
||||||
size += sctx->thread_trace->buffer_size * (uint64_t)max_se;
|
size += sctx->thread_trace->buffer_size * (uint64_t)max_se;
|
||||||
|
|
||||||
|
sctx->thread_trace->pipeline_bos = _mesa_hash_table_u64_create(NULL);
|
||||||
|
|
||||||
sctx->thread_trace->bo =
|
sctx->thread_trace->bo =
|
||||||
ws->buffer_create(ws, size, 4096,
|
ws->buffer_create(ws, size, 4096,
|
||||||
RADEON_DOMAIN_VRAM,
|
RADEON_DOMAIN_VRAM,
|
||||||
|
|
@ -697,6 +700,12 @@ si_destroy_thread_trace(struct si_context *sctx)
|
||||||
}
|
}
|
||||||
simple_mtx_destroy(&sctx->thread_trace->rgp_code_object.lock);
|
simple_mtx_destroy(&sctx->thread_trace->rgp_code_object.lock);
|
||||||
|
|
||||||
|
hash_table_foreach(sctx->thread_trace->pipeline_bos->table, entry) {
|
||||||
|
struct si_sqtt_fake_pipeline *pipeline = (struct si_sqtt_fake_pipeline *)entry->data;
|
||||||
|
si_resource_reference(&pipeline->bo, NULL);
|
||||||
|
FREE(pipeline);
|
||||||
|
}
|
||||||
|
|
||||||
free(sctx->thread_trace);
|
free(sctx->thread_trace);
|
||||||
sctx->thread_trace = NULL;
|
sctx->thread_trace = NULL;
|
||||||
|
|
||||||
|
|
@ -1010,7 +1019,7 @@ si_sqtt_pipe_to_rgp_shader_stage(union si_shader_key* key, enum pipe_shader_type
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
si_sqtt_add_code_object(struct si_context* sctx,
|
si_sqtt_add_code_object(struct si_context* sctx,
|
||||||
uint64_t pipeline_hash,
|
struct si_sqtt_fake_pipeline *pipeline,
|
||||||
bool is_compute)
|
bool is_compute)
|
||||||
{
|
{
|
||||||
struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
|
struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
|
||||||
|
|
@ -1023,8 +1032,8 @@ si_sqtt_add_code_object(struct si_context* sctx,
|
||||||
|
|
||||||
record->shader_stages_mask = 0;
|
record->shader_stages_mask = 0;
|
||||||
record->num_shaders_combined = 0;
|
record->num_shaders_combined = 0;
|
||||||
record->pipeline_hash[0] = pipeline_hash;
|
record->pipeline_hash[0] = pipeline->code_hash;
|
||||||
record->pipeline_hash[1] = pipeline_hash;
|
record->pipeline_hash[1] = pipeline->code_hash;
|
||||||
|
|
||||||
for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
|
for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
|
||||||
struct si_shader *shader;
|
struct si_shader *shader;
|
||||||
|
|
@ -1051,7 +1060,7 @@ si_sqtt_add_code_object(struct si_context* sctx,
|
||||||
}
|
}
|
||||||
memcpy(code, shader->binary.uploaded_code, shader->binary.uploaded_code_size);
|
memcpy(code, shader->binary.uploaded_code, shader->binary.uploaded_code_size);
|
||||||
|
|
||||||
uint64_t va = shader->bo->gpu_address;
|
uint64_t va = pipeline->bo->gpu_address + pipeline->offset[i];
|
||||||
unsigned gl_shader_stage = tgsi_processor_to_shader_stage(i);
|
unsigned gl_shader_stage = tgsi_processor_to_shader_stage(i);
|
||||||
record->shader_data[gl_shader_stage].hash[0] = _mesa_hash_data(code, shader->binary.uploaded_code_size);
|
record->shader_data[gl_shader_stage].hash[0] = _mesa_hash_data(code, shader->binary.uploaded_code_size);
|
||||||
record->shader_data[gl_shader_stage].hash[1] = record->shader_data[gl_shader_stage].hash[0];
|
record->shader_data[gl_shader_stage].hash[1] = record->shader_data[gl_shader_stage].hash[0];
|
||||||
|
|
@ -1079,21 +1088,21 @@ si_sqtt_add_code_object(struct si_context* sctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
si_sqtt_register_pipeline(struct si_context* sctx, uint64_t pipeline_hash, uint64_t base_address, bool is_compute)
|
si_sqtt_register_pipeline(struct si_context* sctx, struct si_sqtt_fake_pipeline *pipeline, bool is_compute)
|
||||||
{
|
{
|
||||||
struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
|
struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
|
||||||
|
|
||||||
assert (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline_hash));
|
assert (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline->code_hash));
|
||||||
|
|
||||||
bool result = ac_sqtt_add_pso_correlation(thread_trace_data, pipeline_hash);
|
bool result = ac_sqtt_add_pso_correlation(thread_trace_data, pipeline->code_hash);
|
||||||
if (!result)
|
if (!result)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
result = ac_sqtt_add_code_object_loader_event(thread_trace_data, pipeline_hash, base_address);
|
result = ac_sqtt_add_code_object_loader_event(thread_trace_data, pipeline->code_hash, pipeline->bo->gpu_address);
|
||||||
if (!result)
|
if (!result)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return si_sqtt_add_code_object(sctx, pipeline_hash, is_compute);
|
return si_sqtt_add_code_object(sctx, pipeline, is_compute);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
|
|
@ -187,6 +187,7 @@ union si_state {
|
||||||
struct si_pm4_state *vgt_shader_config;
|
struct si_pm4_state *vgt_shader_config;
|
||||||
struct si_shader *vs;
|
struct si_shader *vs;
|
||||||
struct si_shader *ps;
|
struct si_shader *ps;
|
||||||
|
struct si_sqtt_fake_pipeline *sqtt_pipeline;
|
||||||
} named;
|
} named;
|
||||||
struct si_pm4_state *array[sizeof(struct si_state_named) / sizeof(struct si_pm4_state *)];
|
struct si_pm4_state *array[sizeof(struct si_state_named) / sizeof(struct si_pm4_state *)];
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,8 @@
|
||||||
#include "util/u_index_modify.h"
|
#include "util/u_index_modify.h"
|
||||||
#include "util/u_prim.h"
|
#include "util/u_prim.h"
|
||||||
#include "util/u_upload_mgr.h"
|
#include "util/u_upload_mgr.h"
|
||||||
|
#include "ac_rtld.h"
|
||||||
|
#include "si_build_pm4.h"
|
||||||
|
|
||||||
#if (GFX_VER == 6)
|
#if (GFX_VER == 6)
|
||||||
#define GFX(name) name##GFX6
|
#define GFX(name) name##GFX6
|
||||||
|
|
@ -303,28 +305,107 @@ static bool si_update_shaders(struct si_context *sctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(sctx->screen->debug_flags & DBG(SQTT) && sctx->thread_trace)) {
|
if (unlikely(sctx->screen->debug_flags & DBG(SQTT) && sctx->thread_trace)) {
|
||||||
/* Pretend the bound shaders form a vk pipeline */
|
/* Pretend the bound shaders form a vk pipeline. Include the scratch size in
|
||||||
uint32_t pipeline_code_hash = 0;
|
* the hash calculation to force re-emitting the pipeline if the scratch bo
|
||||||
uint64_t base_address = ~0;
|
* changes.
|
||||||
|
*/
|
||||||
|
uint64_t scratch_bo_size = sctx->scratch_buffer ? sctx->scratch_buffer->bo_size : 0;
|
||||||
|
uint64_t pipeline_code_hash = scratch_bo_size;
|
||||||
|
uint32_t total_size = 0;
|
||||||
|
|
||||||
|
/* Compute pipeline code hash. */
|
||||||
for (int i = 0; i < SI_NUM_GRAPHICS_SHADERS; i++) {
|
for (int i = 0; i < SI_NUM_GRAPHICS_SHADERS; i++) {
|
||||||
struct si_shader *shader = sctx->shaders[i].current;
|
struct si_shader *shader = sctx->shaders[i].current;
|
||||||
if (sctx->shaders[i].cso && shader) {
|
if (sctx->shaders[i].cso && shader) {
|
||||||
pipeline_code_hash = _mesa_hash_data_with_seed(
|
pipeline_code_hash = XXH64(
|
||||||
shader->binary.elf_buffer,
|
shader->binary.elf_buffer,
|
||||||
shader->binary.elf_size,
|
shader->binary.elf_size,
|
||||||
pipeline_code_hash);
|
pipeline_code_hash);
|
||||||
base_address = MIN2(base_address,
|
|
||||||
shader->bo->gpu_address);
|
total_size += ALIGN(shader->binary.uploaded_code_size, 256);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct si_sqtt_fake_pipeline *pipeline = NULL;
|
||||||
struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
|
struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
|
||||||
if (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline_code_hash)) {
|
if (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline_code_hash)) {
|
||||||
si_sqtt_register_pipeline(sctx, pipeline_code_hash, base_address, false);
|
/* This is a new pipeline. Allocate a new bo to hold all the shaders. Without
|
||||||
|
* this, shader code export process creates huge rgp files because RGP assumes
|
||||||
|
* the shaders live sequentially in memory (shader N address = shader 0 + offset N)
|
||||||
|
*/
|
||||||
|
struct si_resource *bo = si_aligned_buffer_create(
|
||||||
|
&sctx->screen->b,
|
||||||
|
(sctx->screen->info.cpdma_prefetch_writes_memory ? 0 : SI_RESOURCE_FLAG_READ_ONLY) |
|
||||||
|
SI_RESOURCE_FLAG_DRIVER_INTERNAL | SI_RESOURCE_FLAG_32BIT,
|
||||||
|
PIPE_USAGE_IMMUTABLE, align(total_size, SI_CPDMA_ALIGNMENT), 256);
|
||||||
|
|
||||||
|
char *ptr = (char *) (bo ? sctx->screen->ws->buffer_map(sctx->screen->ws,
|
||||||
|
bo->buf, NULL,
|
||||||
|
(enum pipe_map_flags)(PIPE_MAP_READ_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY)) :
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
uint32_t offset = 0;
|
||||||
|
uint64_t scratch_va = sctx->scratch_buffer ? sctx->scratch_buffer->gpu_address : 0;
|
||||||
|
|
||||||
|
if (ptr) {
|
||||||
|
pipeline = (struct si_sqtt_fake_pipeline *)
|
||||||
|
CALLOC(1, sizeof(struct si_sqtt_fake_pipeline));
|
||||||
|
pipeline->code_hash = pipeline_code_hash;
|
||||||
|
si_resource_reference(&pipeline->bo, bo);
|
||||||
|
|
||||||
|
/* Re-upload all gfx shaders and init PM4. */
|
||||||
|
si_pm4_clear_state(&pipeline->pm4);
|
||||||
|
|
||||||
|
for (int i = 0; i < SI_NUM_GRAPHICS_SHADERS; i++) {
|
||||||
|
struct si_shader *shader = sctx->shaders[i].current;
|
||||||
|
if (sctx->shaders[i].cso && shader) {
|
||||||
|
struct ac_rtld_binary binary;
|
||||||
|
si_shader_binary_open(sctx->screen, shader, &binary);
|
||||||
|
|
||||||
|
struct ac_rtld_upload_info u = {};
|
||||||
|
u.binary = &binary;
|
||||||
|
u.get_external_symbol = si_get_external_symbol;
|
||||||
|
u.cb_data = &scratch_va;
|
||||||
|
u.rx_va = bo->gpu_address + offset;
|
||||||
|
u.rx_ptr = ptr + offset;
|
||||||
|
|
||||||
|
int size = ac_rtld_upload(&u);
|
||||||
|
ac_rtld_close(&binary);
|
||||||
|
|
||||||
|
pipeline->offset[i] = offset;
|
||||||
|
|
||||||
|
offset += align(size, 256);
|
||||||
|
|
||||||
|
struct si_pm4_state *pm4 = &shader->pm4;
|
||||||
|
|
||||||
|
uint32_t va_low = (pipeline->bo->gpu_address + pipeline->offset[i]) >> 8;
|
||||||
|
assert(PKT3_IT_OPCODE_G(pm4->pm4[pm4->reg_va_low_idx - 2]) == PKT3_SET_SH_REG);
|
||||||
|
uint32_t reg = (pm4->pm4[pm4->reg_va_low_idx - 1] << 2) + SI_SH_REG_OFFSET;
|
||||||
|
si_pm4_set_reg(&pipeline->pm4, reg, va_low);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sctx->screen->ws->buffer_unmap(sctx->screen->ws, bo->buf);
|
||||||
|
|
||||||
|
_mesa_hash_table_u64_insert(sctx->thread_trace->pipeline_bos,
|
||||||
|
pipeline_code_hash, pipeline);
|
||||||
|
|
||||||
|
si_sqtt_register_pipeline(sctx, pipeline, false);
|
||||||
|
} else {
|
||||||
|
if (bo)
|
||||||
|
si_resource_reference(&bo, NULL);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pipeline = (struct si_sqtt_fake_pipeline *)
|
||||||
|
_mesa_hash_table_u64_search(sctx->thread_trace->pipeline_bos, pipeline_code_hash);
|
||||||
}
|
}
|
||||||
|
assert(pipeline);
|
||||||
|
|
||||||
|
pipeline->code_hash = pipeline_code_hash;
|
||||||
|
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, pipeline->bo,
|
||||||
|
RADEON_USAGE_READ | RADEON_PRIO_SHADER_BINARY);
|
||||||
|
|
||||||
si_sqtt_describe_pipeline_bind(sctx, pipeline_code_hash, 0);
|
si_sqtt_describe_pipeline_bind(sctx, pipeline_code_hash, 0);
|
||||||
|
si_pm4_bind_state(sctx, sqtt_pipeline, pipeline);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((GFX_VERSION <= GFX8 &&
|
if ((GFX_VERSION <= GFX8 &&
|
||||||
|
|
|
||||||
|
|
@ -695,6 +695,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
|
|
||||||
va = shader->bo->gpu_address;
|
va = shader->bo->gpu_address;
|
||||||
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||||
|
pm4->reg_va_low_idx = pm4->ndw - 1;
|
||||||
|
|
||||||
shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||||
S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
|
S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
|
||||||
|
|
@ -729,6 +730,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
} else {
|
} else {
|
||||||
si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
|
si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||||
}
|
}
|
||||||
|
pm4->reg_va_low_idx = pm4->ndw - 1;
|
||||||
|
|
||||||
unsigned num_user_sgprs = si_get_num_vs_user_sgprs(shader, GFX9_TCS_NUM_USER_SGPR);
|
unsigned num_user_sgprs = si_get_num_vs_user_sgprs(shader, GFX9_TCS_NUM_USER_SGPR);
|
||||||
|
|
||||||
|
|
@ -741,6 +743,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
shader->config.rsrc2 |= S_00B42C_USER_SGPR_MSB_GFX9(num_user_sgprs >> 5);
|
shader->config.rsrc2 |= S_00B42C_USER_SGPR_MSB_GFX9(num_user_sgprs >> 5);
|
||||||
} else {
|
} else {
|
||||||
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
|
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
|
||||||
|
pm4->reg_va_low_idx = pm4->ndw - 1;
|
||||||
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS,
|
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS,
|
||||||
S_00B424_MEM_BASE(sscreen->info.address32_hi >> 8));
|
S_00B424_MEM_BASE(sscreen->info.address32_hi >> 8));
|
||||||
|
|
||||||
|
|
@ -816,6 +819,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
oc_lds_en = shader->selector->stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
|
oc_lds_en = shader->selector->stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
|
||||||
|
|
||||||
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||||
|
pm4->reg_va_low_idx = pm4->ndw - 1;
|
||||||
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES,
|
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES,
|
||||||
S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8));
|
S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8));
|
||||||
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
|
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
|
||||||
|
|
@ -1104,6 +1108,7 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
} else {
|
} else {
|
||||||
si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
|
si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||||
}
|
}
|
||||||
|
pm4->reg_va_low_idx = pm4->ndw - 1;
|
||||||
|
|
||||||
uint32_t rsrc1 = S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B228_DX10_CLAMP(1) |
|
uint32_t rsrc1 = S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B228_DX10_CLAMP(1) |
|
||||||
S_00B228_MEM_ORDERED(si_shader_mem_ordered(shader)) |
|
S_00B228_MEM_ORDERED(si_shader_mem_ordered(shader)) |
|
||||||
|
|
@ -1149,6 +1154,7 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
S_00B21C_WAVE_LIMIT(0x3F);
|
S_00B21C_WAVE_LIMIT(0x3F);
|
||||||
|
|
||||||
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
|
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
|
||||||
|
pm4->reg_va_low_idx = pm4->ndw - 1;
|
||||||
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS,
|
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS,
|
||||||
S_00B224_MEM_BASE(sscreen->info.address32_hi >> 8));
|
S_00B224_MEM_BASE(sscreen->info.address32_hi >> 8));
|
||||||
|
|
||||||
|
|
@ -1421,6 +1427,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
||||||
&late_alloc_wave64, &cu_mask);
|
&late_alloc_wave64, &cu_mask);
|
||||||
|
|
||||||
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||||
|
pm4->reg_va_low_idx = pm4->ndw - 1;
|
||||||
si_pm4_set_reg(
|
si_pm4_set_reg(
|
||||||
pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
|
pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
|
||||||
S_00B228_VGPRS((shader->config.num_vgprs - 1) / (shader->wave_size == 32 ? 8 : 4)) |
|
S_00B228_VGPRS((shader->config.num_vgprs - 1) / (shader->wave_size == 32 ? 8 : 4)) |
|
||||||
|
|
@ -1733,6 +1740,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
||||||
}
|
}
|
||||||
|
|
||||||
si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
|
si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
|
||||||
|
pm4->reg_va_low_idx = pm4->ndw - 1;
|
||||||
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS,
|
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS,
|
||||||
S_00B124_MEM_BASE(sscreen->info.address32_hi >> 8));
|
S_00B124_MEM_BASE(sscreen->info.address32_hi >> 8));
|
||||||
|
|
||||||
|
|
@ -2008,6 +2016,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
|
|
||||||
va = shader->bo->gpu_address;
|
va = shader->bo->gpu_address;
|
||||||
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
|
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
|
||||||
|
pm4->reg_va_low_idx = pm4->ndw - 1;
|
||||||
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS,
|
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS,
|
||||||
S_00B024_MEM_BASE(sscreen->info.address32_hi >> 8));
|
S_00B024_MEM_BASE(sscreen->info.address32_hi >> 8));
|
||||||
|
|
||||||
|
|
@ -2078,6 +2087,9 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (unlikely(sscreen->debug_flags & DBG(SQTT)))
|
||||||
|
assert(shader->pm4.reg_va_low_idx != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_clear_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,
|
static void si_clear_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue