Merge branch 'radv_rgp_26_support_gfx10_11' into 'main'

radv: add support for new performance counters in RGP 2.6 on GFX10-11

See merge request mesa/mesa!39013
This commit is contained in:
Samuel Pitoiset 2025-12-20 01:48:01 +01:00
commit d409992c6c
6 changed files with 1395 additions and 38 deletions

View file

@ -932,6 +932,28 @@ static struct ac_pc_block_base gfx10_UTCL1 = {
.num_spm_counters = 0,
};
/* gfx10_GCEA */
static unsigned gfx10_GCEA_select0[] = {
R_036800_GCEA_PERFCOUNTER2_SELECT,
};
static unsigned gfx10_GCEA_select1[] = {
R_036804_GCEA_PERFCOUNTER2_SELECT1,
};
static struct ac_pc_block_base gfx10_GCEA = {
.gpu_block = GCEA,
.name = "GCEA",
.num_counters = 1,
.select0 = gfx10_GCEA_select0,
.select1 = gfx10_GCEA_select1,
.counter0_lo = R_034980_GCEA_PERFCOUNTER2_LO,
.num_spm_counters = 1,
.num_spm_wires = 2,
.spm_block_select = AC_SPM_GLOBAL_BLOCK_GCEA,
};
/* gfx11_SQ_WQP */
static struct ac_pc_block_base gfx11_SQ_WGP = {
.gpu_block = SQ_WGP,
@ -1027,6 +1049,40 @@ static struct ac_pc_block_gfxdescr groups_gfx10[] = {
{&gfx10_TCP, 77},
{&cik_TD, 61},
{&gfx10_UTCL1, 15},
{&gfx10_GCEA, 88},
};
static struct ac_pc_block_gfxdescr groups_gfx103[] = {
{&cik_CB, 461},
{&gfx10_CHA, 45},
{&gfx10_CHCG, 35},
{&gfx10_CHC, 35},
{&cik_CPC, 47},
{&cik_CPF, 40},
{&cik_CPG, 82},
{&gfx10_DB, 370},
{&gfx10_GCR, 94},
{&cik_GDS, 123},
{&gfx10_GE, 315},
{&gfx10_GL1A, 36},
{&gfx10_GL1C, 64, 4},
{&gfx10_GL2A, 91},
{&gfx10_GL2C, 235},
{&cik_GRBM, 47},
{&cik_GRBMSE, 19},
{&gfx10_PA_PH, 960},
{&cik_PA_SC, 552},
{&gfx10_PA_SU, 266},
{&gfx10_RLC, 7},
{&gfx10_RMI, 258},
{&cik_SPI, 329},
{&gfx10_SQ, 509},
{&cik_SX, 225},
{&cik_TA, 226},
{&gfx10_TCP, 77},
{&cik_TD, 192},
{&gfx10_UTCL1, 15},
{&gfx10_GCEA, 89},
};
static struct ac_pc_block_gfxdescr groups_gfx11[] = {
@ -1060,6 +1116,7 @@ static struct ac_pc_block_gfxdescr groups_gfx11[] = {
{&cik_TD, 196},
{&gfx10_UTCL1, 65},
{&gfx11_SQ_WGP, 511, 4},
{&gfx10_GCEA, 86},
};
static struct ac_pc_block_gfxdescr groups_gfx12[] = {
@ -1237,10 +1294,13 @@ bool ac_init_perfcounters(const struct radeon_info *info,
num_blocks = ARRAY_SIZE(groups_gfx9);
break;
case GFX10:
case GFX10_3:
blocks = groups_gfx10;
num_blocks = ARRAY_SIZE(groups_gfx10);
break;
case GFX10_3:
blocks = groups_gfx103;
num_blocks = ARRAY_SIZE(groups_gfx103);
break;
case GFX11:
blocks = groups_gfx11;
num_blocks = ARRAY_SIZE(groups_gfx11);
@ -1290,8 +1350,14 @@ bool ac_init_perfcounters(const struct radeon_info *info,
} else if (!strcmp(block->b->b->name, "GL1C") ||
!strcmp(block->b->b->name, "SQ_WGP")) {
block->num_global_instances = block->num_instances * info->num_se * info->max_sa_per_se;
} else if (!strcmp(block->b->b->name, "GL2C")) {
} else if (!strcmp(block->b->b->name, "GL2C") ||
!strcmp(block->b->b->name, "GCEA")) {
block->num_instances = block->num_global_instances = info->num_tcc_blocks;
} else if (!strcmp(block->b->b->name, "CPF")) {
block->num_instances = block->num_global_instances = 1;
} else if (!strcmp(block->b->b->name, "TA") ||
!strcmp(block->b->b->name, "TD")) {
block->num_global_instances = block->num_instances;
}
}

View file

@ -67,7 +67,7 @@ enum ac_pc_gpu_block {
ATC = 0x1A,
ATCL2 = 0x1B,
MCVML2 = 0x1C,
EA = 0x1D,
GCEA = 0x1D,
RPB = 0x1E,
RMI = 0x1F,
UMCCH = 0x20,

View file

@ -58,6 +58,10 @@ enum sqtt_file_chunk_type
SQTT_FILE_CHUNK_TYPE_CODE_OBJECT_LOADER_EVENTS,
SQTT_FILE_CHUNK_TYPE_PSO_CORRELATION,
SQTT_FILE_CHUNK_TYPE_INSTRUMENTATION_TABLE,
SQTT_FILE_CHUNK_TYPE_FIRST_TOOLS_TYPE = 128,
SQTT_FILE_CHUNK_TYPE_DERIVED_SPM_DB = SQTT_FILE_CHUNK_TYPE_FIRST_TOOLS_TYPE,
SQTT_FILE_CHUNK_TYPE_COUNT
};
@ -992,10 +996,203 @@ static void ac_sqtt_dump_spm(const struct ac_spm_trace *spm_trace,
fseek(output, file_offset, SEEK_SET);
}
/**
* SQTT Derived SPM DB info.
*/
struct sqtt_derived_spm_group_info {
uint32_t size_in_bytes;
uint32_t offset;
uint32_t group_name_length;
uint32_t group_description_length;
uint32_t num_counters;
};
struct sqtt_derived_spm_counter_info {
uint32_t size_in_bytes;
uint32_t offset;
uint32_t counter_name_length;
uint32_t counter_description_length;
uint32_t num_components;
uint8_t usage_type;
};
struct sqtt_derived_spm_component_info {
uint32_t size_in_bytes;
uint32_t offset;
uint32_t component_name_length;
uint32_t component_description_length;
uint32_t usage_type;
};
struct sqtt_file_chunk_derived_spm_db {
struct sqtt_file_chunk_header header;
uint32_t offset;
uint32_t flags;
uint32_t num_timestamps;
uint32_t num_groups;
uint32_t num_counters;
uint32_t num_components;
uint32_t sampling_interval;
};
static_assert(sizeof(struct sqtt_file_chunk_derived_spm_db) == 44,
"sqtt_file_chunk_derived_spm_db doesn't match RGP spec");
static void ac_sqtt_fill_derived_spm_db(const struct ac_spm_derived_trace *spm_derived_trace,
struct sqtt_file_chunk_derived_spm_db *chunk,
size_t file_offset,
uint32_t chunk_size)
{
chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_DERIVED_SPM_DB;
chunk->header.chunk_id.index = 0;
chunk->header.major_version = 0;
chunk->header.minor_version = 0;
chunk->header.size_in_bytes = chunk_size;
chunk->offset = sizeof(*chunk);
chunk->flags = 0;
chunk->num_timestamps = spm_derived_trace->num_timestamps;
chunk->num_groups = spm_derived_trace->num_groups;
chunk->num_counters = spm_derived_trace->num_counters;
chunk->num_components = spm_derived_trace->num_components;
chunk->sampling_interval = spm_derived_trace->sample_interval;
}
static void ac_sqtt_dump_derived_spm(const struct ac_spm_derived_trace *spm_derived_trace,
size_t file_offset,
FILE *output)
{
struct sqtt_file_chunk_derived_spm_db derived_spm_db;
size_t file_derived_spm_db_offset = file_offset;
fseek(output, sizeof(struct sqtt_file_chunk_derived_spm_db), SEEK_CUR);
file_offset += sizeof(struct sqtt_file_chunk_derived_spm_db);
/* Dump timestamps. */
for (uint32_t i = 0; i < spm_derived_trace->num_timestamps; i++) {
uint64_t timestamp = spm_derived_trace->timestamps[i];
file_offset += sizeof(timestamp);
fwrite(&timestamp, sizeof(timestamp), 1, output);
}
/* Dump SPM groups. */
for (uint32_t i = 0; i < spm_derived_trace->num_groups; i++) {
const struct ac_spm_derived_group *group = &spm_derived_trace->groups[i];
const struct ac_spm_derived_group_descr *group_descr = group->descr;
struct sqtt_derived_spm_group_info group_info = {0};
const uint32_t num_counters = group_descr->num_counters;
const uint32_t name_length = strlen(group_descr->name);
group_info.size_in_bytes = sizeof(group_info) + name_length +
num_counters * sizeof(uint32_t);
group_info.offset = sizeof(group_info);
group_info.group_name_length = name_length;
group_info.num_counters = num_counters;
file_offset += sizeof(group_info) + group_info.group_name_length;
fwrite(&group_info, sizeof(group_info), 1, output);
fwrite(group_descr->name, group_info.group_name_length, 1, output);
for (uint32_t j = 0; j < group_descr->num_counters; j++) {
const struct ac_spm_derived_counter_descr *counter_descr = group_descr->counters[j];
uint32_t counter_id = counter_descr->id;
file_offset += sizeof(uint32_t);
fwrite(&counter_id, sizeof(uint32_t), 1, output);
}
}
/* Dump SPM counters. */
for (uint32_t i = 0; i < spm_derived_trace->num_counters; i++) {
const struct ac_spm_derived_counter *counter = &spm_derived_trace->counters[i];
const struct ac_spm_derived_counter_descr *counter_descr = counter->descr;
struct sqtt_derived_spm_counter_info counter_info = {0};
const uint32_t num_components = counter_descr->num_components;
const uint32_t name_length = strlen(counter_descr->name);
const uint32_t description_length = strlen(counter_descr->desc);
counter_info.size_in_bytes = sizeof(counter_info) + name_length +
description_length + num_components * sizeof(uint32_t);
counter_info.offset = sizeof(counter_info);
counter_info.counter_name_length = name_length;
counter_info.counter_description_length = description_length;
counter_info.num_components = num_components;
counter_info.usage_type = counter_descr->usage;
file_offset += sizeof(counter_info) + counter_info.counter_name_length +
counter_info.counter_description_length;
fwrite(&counter_info, sizeof(counter_info), 1, output);
fwrite(counter_descr->name, counter_info.counter_name_length, 1, output);
fwrite(counter_descr->desc, counter_info.counter_description_length, 1, output);
for (uint32_t j = 0; j < counter_descr->num_components; j++) {
const struct ac_spm_derived_component_descr *component_descr = counter_descr->components[j];
uint32_t component_id = component_descr->id;
file_offset += sizeof(uint32_t);
fwrite(&component_id, sizeof(uint32_t), 1, output);
}
}
/* Dump SPM components. */
for (uint32_t i = 0; i < spm_derived_trace->num_components; i++) {
const struct ac_spm_derived_component *component = &spm_derived_trace->components[i];
const struct ac_spm_derived_component_descr *component_descr = component->descr;
struct sqtt_derived_spm_component_info component_info = {0};
const uint32_t name_length = strlen(component_descr->name);
component_info.size_in_bytes = sizeof(component_info) + name_length;
component_info.offset = sizeof(component_info);
component_info.component_name_length = name_length;
component_info.usage_type = component_descr->usage;
file_offset += sizeof(component_info) + component_info.component_name_length +
component_info.component_description_length;
fwrite(&component_info, sizeof(component_info), 1, output);
fwrite(component_descr->name, component_info.component_name_length, 1, output);
}
/* Dump counter values. */
for (uint32_t i = 0; i < spm_derived_trace->num_counters; i++) {
const struct ac_spm_derived_counter *counter = &spm_derived_trace->counters[i];
assert(util_dynarray_num_elements(&counter->values, double) == spm_derived_trace->num_timestamps);
util_dynarray_foreach(&counter->values, double, value) {
file_offset += sizeof(double);
fwrite(value, sizeof(double), 1, output);
}
}
/* Dump component values. */
for (uint32_t i = 0; i < spm_derived_trace->num_components; i++) {
const struct ac_spm_derived_component *component = &spm_derived_trace->components[i];
assert(util_dynarray_num_elements(&component->values, double) == spm_derived_trace->num_timestamps);
util_dynarray_foreach(&component->values, double, value) {
file_offset += sizeof(double);
fwrite(value, sizeof(double), 1, output);
}
}
/* SQTT Derived SPM chunk. */
ac_sqtt_fill_derived_spm_db(spm_derived_trace, &derived_spm_db,
file_derived_spm_db_offset,
file_offset - file_derived_spm_db_offset);
fseek(output, file_derived_spm_db_offset, SEEK_SET);
fwrite(&derived_spm_db, sizeof(struct sqtt_file_chunk_derived_spm_db), 1, output);
fseek(output, file_offset, SEEK_SET);
}
#if defined(USE_LIBELF)
static void
ac_sqtt_dump_data(const struct radeon_info *rad_info, struct ac_sqtt_trace *sqtt_trace,
const struct ac_spm_trace *spm_trace, FILE *output)
const struct ac_spm_trace *spm_trace,
const struct ac_spm_derived_trace *spm_derived_trace,
FILE *output)
{
struct sqtt_file_chunk_asic_info asic_info = {0};
struct sqtt_file_chunk_cpu_info cpu_info = {0};
@ -1193,12 +1390,26 @@ ac_sqtt_dump_data(const struct radeon_info *rad_info, struct ac_sqtt_trace *sqtt
}
}
if (spm_trace) {
if (spm_derived_trace) {
ac_sqtt_dump_derived_spm(spm_derived_trace, file_offset, output);
} else if (spm_trace) {
ac_sqtt_dump_spm(spm_trace, file_offset, output);
}
}
#endif
static bool
ac_use_derived_spm_trace(const struct radeon_info *info,
const struct ac_spm_trace *spm_trace)
{
if (!spm_trace)
return false;
/* TODO: Enable for GFX12. */
return info->gfx_level >= GFX10 && info->gfx_level < GFX12;
return false;
}
int
ac_dump_rgp_capture(const struct radeon_info *info, struct ac_sqtt_trace *sqtt_trace,
const struct ac_spm_trace *spm_trace)
@ -1223,7 +1434,13 @@ ac_dump_rgp_capture(const struct radeon_info *info, struct ac_sqtt_trace *sqtt_t
if (!f)
return -1;
ac_sqtt_dump_data(info, sqtt_trace, spm_trace, f);
struct ac_spm_derived_trace *spm_derived_trace =
ac_use_derived_spm_trace(info, spm_trace) ? ac_spm_get_derived_trace(info, spm_trace) : NULL;
ac_sqtt_dump_data(info, sqtt_trace, spm_trace, spm_derived_trace, f);
if (spm_derived_trace)
ac_spm_destroy_derived_trace(spm_derived_trace);
fprintf(stderr, "RGP capture saved to '%s'\n", filename);

File diff suppressed because it is too large Load diff

View file

@ -11,6 +11,8 @@
#include "ac_perfcounter.h"
#include "util/u_dynarray.h"
struct ac_cmdbuf;
#define AC_SPM_MAX_COUNTER_PER_BLOCK 16
@ -35,7 +37,7 @@ enum ac_spm_global_block {
AC_SPM_GLOBAL_BLOCK_GL2C,
AC_SPM_GLOBAL_BLOCK_SDMA,
AC_SPM_GLOBAL_BLOCK_GUS,
AC_SPM_GLOBAL_BLOCK_EA,
AC_SPM_GLOBAL_BLOCK_GCEA,
AC_SPM_GLOBAL_BLOCK_CHA,
AC_SPM_GLOBAL_BLOCK_CHC,
AC_SPM_GLOBAL_BLOCK_CHCG,
@ -86,7 +88,44 @@ enum ac_spm_segment_type {
AC_SPM_SEGMENT_TYPE_COUNT,
};
enum ac_spm_raw_counter_id {
AC_SPM_TCP_PERF_SEL_REQ = 0,
AC_SPM_TCP_PERF_SEL_REQ_MISS,
AC_SPM_SQC_PERF_SEL_DCACHE_HITS,
AC_SPM_SQC_PERF_SEL_DCACHE_MISSES,
AC_SPM_SQC_PERF_SEL_DCACHE_MISSES_DUPLICATE,
AC_SPM_SQC_PERF_SEL_ICACHE_HITS,
AC_SPM_SQC_PERF_SEL_ICACHE_MISSES,
AC_SPM_SQC_PERF_SEL_ICACHE_MISSES_DUPLICATE,
AC_SPM_GL1C_PERF_SEL_REQ,
AC_SPM_GL1C_PERF_SEL_REQ_MISS,
AC_SPM_GL2C_PERF_SEL_REQ,
AC_SPM_GL2C_PERF_SEL_MISS,
AC_SPM_CPF_PERF_SEL_STAT_BUSY,
AC_SPM_SQC_PERF_SEL_LDS_BANK_CONFLICT,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_32B,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_64B,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_96B,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_128B,
AC_SPM_GL2C_PERF_SEL_EA_WRREQ,
AC_SPM_GL2C_PERF_SEL_EA_WRREQ_64B,
AC_SPM_GCEA_PERF_SEL_SARB_DRAM_SIZED_REQUESTS,
AC_SPM_GCEA_PERF_SEL_SARB_IO_SIZED_REQUESTS,
AC_SPM_TA_PERF_SEL_TA_BUSY,
AC_SPM_TCP_PERF_SEL_TCP_TA_REQ_STALL,
AC_SPM_TD_PERF_SEL_RAY_TRACING_BVH4_TRI_NODE,
AC_SPM_TD_PERF_SEL_RAY_TRACING_BVH4_FP16_BOX_NODE,
AC_SPM_TD_PERF_SEL_RAY_TRACING_BVH4_FP32_BOX_NODE,
AC_SPM_RAW_COUNTER_ID_COUNT,
};
enum ac_spm_raw_counter_op {
AC_SPM_RAW_COUNTER_OP_SUM = 0,
AC_SPM_RAW_COUNTER_OP_MAX,
};
struct ac_spm_counter_descr {
enum ac_spm_raw_counter_id id;
enum ac_pc_gpu_block gpu_block;
uint32_t event_id;
};
@ -119,6 +158,7 @@ struct ac_spm_muxsel_line {
struct ac_spm_counter_info {
/* General info. */
enum ac_spm_raw_counter_id id;
enum ac_pc_gpu_block gpu_block;
uint32_t instance;
uint32_t event_id;
@ -194,6 +234,122 @@ struct ac_spm_trace {
uint32_t num_samples;
};
enum ac_spm_group_id {
AC_SPM_GROUP_CACHE,
AC_SPM_GROUP_LDS,
AC_SPM_GROUP_MEMORY_BYTES,
AC_SPM_GROUP_MEMORY_PERCENTAGE,
AC_SPM_GROUP_RT,
AC_SPM_GROUP_COUNT,
};
enum ac_spm_counter_id {
AC_SPM_COUNTER_INST_CACHE_HIT,
AC_SPM_COUNTER_SCALAR_CACHE_HIT,
AC_SPM_COUNTER_L0_CACHE_HIT,
AC_SPM_COUNTER_L1_CACHE_HIT, /* < GFX12 */
AC_SPM_COUNTER_L2_CACHE_HIT,
AC_SPM_COUNTER_CS_LDS_BANK_CONFLICT,
AC_SPM_COUNTER_FETCH_SIZE,
AC_SPM_COUNTER_WRITE_SIZE,
AC_SPM_COUNTER_LOCAL_VID_MEM_BYTES,
AC_SPM_COUNTER_PCIE_BYTES,
AC_SPM_COUNTER_MEM_UNIT_BUSY,
AC_SPM_COUNTER_MEM_UNIT_STALLED,
AC_SPM_COUNTER_RAY_BOX_TESTS,
AC_SPM_COUNTER_RAY_TRI_TESTS,
AC_SPM_COUNTER_COUNT,
};
enum ac_spm_component_id {
AC_SPM_COMPONENT_INST_CACHE_REQUEST_COUNT,
AC_SPM_COMPONENT_INST_CACHE_HIT_COUNT,
AC_SPM_COMPONENT_INST_CACHE_MISS_COUNT,
AC_SPM_COMPONENT_SCALAR_CACHE_REQUEST_COUNT,
AC_SPM_COMPONENT_SCALAR_CACHE_HIT_COUNT,
AC_SPM_COMPONENT_SCALAR_CACHE_MISS_COUNT,
AC_SPM_COMPONENT_L0_CACHE_REQUEST_COUNT,
AC_SPM_COMPONENT_L0_CACHE_HIT_COUNT,
AC_SPM_COMPONENT_L0_CACHE_MISS_COUNT,
AC_SPM_COMPONENT_L1_CACHE_REQUEST_COUNT, /* < GFX12 */
AC_SPM_COMPONENT_L1_CACHE_HIT_COUNT, /* < GFX12 */
AC_SPM_COMPONENT_L1_CACHE_MISS_COUNT, /* < GFX12 */
AC_SPM_COMPONENT_L2_CACHE_REQUEST_COUNT,
AC_SPM_COMPONENT_L2_CACHE_HIT_COUNT,
AC_SPM_COMPONENT_L2_CACHE_MISS_COUNT,
AC_SPM_COMPONENT_GPU_BUSY_CYCLES,
AC_SPM_COMPONENT_CS_LDS_BANK_CONFLICT_CYCLES,
AC_SPM_COMPONENT_MEM_UNIT_BUSY_CYCLES,
AC_SPM_COMPONENT_MEM_UNIT_STALLED_CYCLES,
AC_SPM_COMPONENT_COUNT,
};
enum ac_spm_usage_type {
AC_SPM_USAGE_PERCENTAGE = 1,
AC_SPM_USAGE_CYCLES = 2,
AC_SPM_USAGE_BYTES = 4,
AC_SPM_USAGE_ITEMS = 5,
};
#define AC_SPM_MAX_COMPONENTS_PER_COUNTER 3
#define AC_SPM_MAX_COUNTERS_PER_GROUP 5
struct ac_spm_derived_component_descr {
enum ac_spm_component_id id;
enum ac_spm_counter_id counter_id;
const char *name;
enum ac_spm_usage_type usage;
};
struct ac_spm_derived_counter_descr {
enum ac_spm_counter_id id;
enum ac_spm_group_id group_id;
const char *name;
const char *desc;
enum ac_spm_usage_type usage;
uint32_t num_components;
struct ac_spm_derived_component_descr *components[AC_SPM_MAX_COMPONENTS_PER_COUNTER];
};
struct ac_spm_derived_group_descr {
enum ac_spm_group_id id;
const char *name;
uint32_t num_counters;
struct ac_spm_derived_counter_descr *counters[AC_SPM_MAX_COUNTERS_PER_GROUP];
};
struct ac_spm_derived_group {
const struct ac_spm_derived_group_descr *descr;
};
struct ac_spm_derived_counter {
const struct ac_spm_derived_counter_descr *descr;
struct util_dynarray values;
};
struct ac_spm_derived_component {
const struct ac_spm_derived_component_descr *descr;
struct util_dynarray values;
};
struct ac_spm_derived_trace {
uint32_t num_timestamps;
uint64_t *timestamps;
uint32_t num_groups;
struct ac_spm_derived_group groups[AC_SPM_GROUP_COUNT];
uint32_t num_counters;
struct ac_spm_derived_counter counters[AC_SPM_COUNTER_COUNT];
uint32_t num_components;
struct ac_spm_derived_component components[AC_SPM_COMPONENT_COUNT];
uint32_t sample_interval;
};
bool ac_init_spm(const struct radeon_info *info,
const struct ac_perfcounters *pc,
struct ac_spm *spm);
@ -201,6 +357,13 @@ void ac_destroy_spm(struct ac_spm *spm);
bool ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace);
struct ac_spm_derived_trace *
ac_spm_get_derived_trace(const struct radeon_info *info,
const struct ac_spm_trace *spm_trace);
void
ac_spm_destroy_derived_trace(struct ac_spm_derived_trace *spm_derived_trace);
void
ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
enum amd_ip_type ip_type, const struct ac_spm *spm,

View file

@ -3981,6 +3981,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (cmd_buffer->state.emitted_graphics_pipeline == pipeline)
return;
@ -3990,6 +3991,8 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
if (pipeline->sqtt_shaders_reloc) {
/* Emit shaders relocation because RGP requires them to be contiguous in memory. */
radv_sqtt_emit_relocated_shaders(cmd_buffer, pipeline);
radv_cs_add_buffer(device->ws, cs->b, pipeline->sqtt_shaders_reloc->bo);
}
if (radv_device_fault_detection_enabled(device))