mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
Merge branch 'radv_rgp_26_support_gfx10_11' into 'main'
radv: add support for new performance counters in RGP 2.6 on GFX10-11 See merge request mesa/mesa!39013
This commit is contained in:
commit
d409992c6c
6 changed files with 1395 additions and 38 deletions
|
|
@ -932,6 +932,28 @@ static struct ac_pc_block_base gfx10_UTCL1 = {
|
|||
.num_spm_counters = 0,
|
||||
};
|
||||
|
||||
/* gfx10_GCEA */
|
||||
static unsigned gfx10_GCEA_select0[] = {
|
||||
R_036800_GCEA_PERFCOUNTER2_SELECT,
|
||||
};
|
||||
|
||||
static unsigned gfx10_GCEA_select1[] = {
|
||||
R_036804_GCEA_PERFCOUNTER2_SELECT1,
|
||||
};
|
||||
static struct ac_pc_block_base gfx10_GCEA = {
|
||||
.gpu_block = GCEA,
|
||||
.name = "GCEA",
|
||||
.num_counters = 1,
|
||||
|
||||
.select0 = gfx10_GCEA_select0,
|
||||
.select1 = gfx10_GCEA_select1,
|
||||
.counter0_lo = R_034980_GCEA_PERFCOUNTER2_LO,
|
||||
|
||||
.num_spm_counters = 1,
|
||||
.num_spm_wires = 2,
|
||||
.spm_block_select = AC_SPM_GLOBAL_BLOCK_GCEA,
|
||||
};
|
||||
|
||||
/* gfx11_SQ_WQP */
|
||||
static struct ac_pc_block_base gfx11_SQ_WGP = {
|
||||
.gpu_block = SQ_WGP,
|
||||
|
|
@ -1027,6 +1049,40 @@ static struct ac_pc_block_gfxdescr groups_gfx10[] = {
|
|||
{&gfx10_TCP, 77},
|
||||
{&cik_TD, 61},
|
||||
{&gfx10_UTCL1, 15},
|
||||
{&gfx10_GCEA, 88},
|
||||
};
|
||||
|
||||
static struct ac_pc_block_gfxdescr groups_gfx103[] = {
|
||||
{&cik_CB, 461},
|
||||
{&gfx10_CHA, 45},
|
||||
{&gfx10_CHCG, 35},
|
||||
{&gfx10_CHC, 35},
|
||||
{&cik_CPC, 47},
|
||||
{&cik_CPF, 40},
|
||||
{&cik_CPG, 82},
|
||||
{&gfx10_DB, 370},
|
||||
{&gfx10_GCR, 94},
|
||||
{&cik_GDS, 123},
|
||||
{&gfx10_GE, 315},
|
||||
{&gfx10_GL1A, 36},
|
||||
{&gfx10_GL1C, 64, 4},
|
||||
{&gfx10_GL2A, 91},
|
||||
{&gfx10_GL2C, 235},
|
||||
{&cik_GRBM, 47},
|
||||
{&cik_GRBMSE, 19},
|
||||
{&gfx10_PA_PH, 960},
|
||||
{&cik_PA_SC, 552},
|
||||
{&gfx10_PA_SU, 266},
|
||||
{&gfx10_RLC, 7},
|
||||
{&gfx10_RMI, 258},
|
||||
{&cik_SPI, 329},
|
||||
{&gfx10_SQ, 509},
|
||||
{&cik_SX, 225},
|
||||
{&cik_TA, 226},
|
||||
{&gfx10_TCP, 77},
|
||||
{&cik_TD, 192},
|
||||
{&gfx10_UTCL1, 15},
|
||||
{&gfx10_GCEA, 89},
|
||||
};
|
||||
|
||||
static struct ac_pc_block_gfxdescr groups_gfx11[] = {
|
||||
|
|
@ -1060,6 +1116,7 @@ static struct ac_pc_block_gfxdescr groups_gfx11[] = {
|
|||
{&cik_TD, 196},
|
||||
{&gfx10_UTCL1, 65},
|
||||
{&gfx11_SQ_WGP, 511, 4},
|
||||
{&gfx10_GCEA, 86},
|
||||
};
|
||||
|
||||
static struct ac_pc_block_gfxdescr groups_gfx12[] = {
|
||||
|
|
@ -1237,10 +1294,13 @@ bool ac_init_perfcounters(const struct radeon_info *info,
|
|||
num_blocks = ARRAY_SIZE(groups_gfx9);
|
||||
break;
|
||||
case GFX10:
|
||||
case GFX10_3:
|
||||
blocks = groups_gfx10;
|
||||
num_blocks = ARRAY_SIZE(groups_gfx10);
|
||||
break;
|
||||
case GFX10_3:
|
||||
blocks = groups_gfx103;
|
||||
num_blocks = ARRAY_SIZE(groups_gfx103);
|
||||
break;
|
||||
case GFX11:
|
||||
blocks = groups_gfx11;
|
||||
num_blocks = ARRAY_SIZE(groups_gfx11);
|
||||
|
|
@ -1290,8 +1350,14 @@ bool ac_init_perfcounters(const struct radeon_info *info,
|
|||
} else if (!strcmp(block->b->b->name, "GL1C") ||
|
||||
!strcmp(block->b->b->name, "SQ_WGP")) {
|
||||
block->num_global_instances = block->num_instances * info->num_se * info->max_sa_per_se;
|
||||
} else if (!strcmp(block->b->b->name, "GL2C")) {
|
||||
} else if (!strcmp(block->b->b->name, "GL2C") ||
|
||||
!strcmp(block->b->b->name, "GCEA")) {
|
||||
block->num_instances = block->num_global_instances = info->num_tcc_blocks;
|
||||
} else if (!strcmp(block->b->b->name, "CPF")) {
|
||||
block->num_instances = block->num_global_instances = 1;
|
||||
} else if (!strcmp(block->b->b->name, "TA") ||
|
||||
!strcmp(block->b->b->name, "TD")) {
|
||||
block->num_global_instances = block->num_instances;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ enum ac_pc_gpu_block {
|
|||
ATC = 0x1A,
|
||||
ATCL2 = 0x1B,
|
||||
MCVML2 = 0x1C,
|
||||
EA = 0x1D,
|
||||
GCEA = 0x1D,
|
||||
RPB = 0x1E,
|
||||
RMI = 0x1F,
|
||||
UMCCH = 0x20,
|
||||
|
|
|
|||
|
|
@ -58,6 +58,10 @@ enum sqtt_file_chunk_type
|
|||
SQTT_FILE_CHUNK_TYPE_CODE_OBJECT_LOADER_EVENTS,
|
||||
SQTT_FILE_CHUNK_TYPE_PSO_CORRELATION,
|
||||
SQTT_FILE_CHUNK_TYPE_INSTRUMENTATION_TABLE,
|
||||
|
||||
SQTT_FILE_CHUNK_TYPE_FIRST_TOOLS_TYPE = 128,
|
||||
SQTT_FILE_CHUNK_TYPE_DERIVED_SPM_DB = SQTT_FILE_CHUNK_TYPE_FIRST_TOOLS_TYPE,
|
||||
|
||||
SQTT_FILE_CHUNK_TYPE_COUNT
|
||||
};
|
||||
|
||||
|
|
@ -992,10 +996,203 @@ static void ac_sqtt_dump_spm(const struct ac_spm_trace *spm_trace,
|
|||
fseek(output, file_offset, SEEK_SET);
|
||||
}
|
||||
|
||||
/**
|
||||
* SQTT Derived SPM DB info.
|
||||
*/
|
||||
struct sqtt_derived_spm_group_info {
|
||||
uint32_t size_in_bytes;
|
||||
uint32_t offset;
|
||||
uint32_t group_name_length;
|
||||
uint32_t group_description_length;
|
||||
uint32_t num_counters;
|
||||
};
|
||||
|
||||
struct sqtt_derived_spm_counter_info {
|
||||
uint32_t size_in_bytes;
|
||||
uint32_t offset;
|
||||
uint32_t counter_name_length;
|
||||
uint32_t counter_description_length;
|
||||
uint32_t num_components;
|
||||
uint8_t usage_type;
|
||||
};
|
||||
|
||||
struct sqtt_derived_spm_component_info {
|
||||
uint32_t size_in_bytes;
|
||||
uint32_t offset;
|
||||
uint32_t component_name_length;
|
||||
uint32_t component_description_length;
|
||||
uint32_t usage_type;
|
||||
};
|
||||
|
||||
struct sqtt_file_chunk_derived_spm_db {
|
||||
struct sqtt_file_chunk_header header;
|
||||
uint32_t offset;
|
||||
uint32_t flags;
|
||||
uint32_t num_timestamps;
|
||||
uint32_t num_groups;
|
||||
uint32_t num_counters;
|
||||
uint32_t num_components;
|
||||
uint32_t sampling_interval;
|
||||
};
|
||||
|
||||
static_assert(sizeof(struct sqtt_file_chunk_derived_spm_db) == 44,
|
||||
"sqtt_file_chunk_derived_spm_db doesn't match RGP spec");
|
||||
|
||||
static void ac_sqtt_fill_derived_spm_db(const struct ac_spm_derived_trace *spm_derived_trace,
|
||||
struct sqtt_file_chunk_derived_spm_db *chunk,
|
||||
size_t file_offset,
|
||||
uint32_t chunk_size)
|
||||
{
|
||||
chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_DERIVED_SPM_DB;
|
||||
chunk->header.chunk_id.index = 0;
|
||||
chunk->header.major_version = 0;
|
||||
chunk->header.minor_version = 0;
|
||||
chunk->header.size_in_bytes = chunk_size;
|
||||
|
||||
chunk->offset = sizeof(*chunk);
|
||||
chunk->flags = 0;
|
||||
chunk->num_timestamps = spm_derived_trace->num_timestamps;
|
||||
chunk->num_groups = spm_derived_trace->num_groups;
|
||||
chunk->num_counters = spm_derived_trace->num_counters;
|
||||
chunk->num_components = spm_derived_trace->num_components;
|
||||
chunk->sampling_interval = spm_derived_trace->sample_interval;
|
||||
}
|
||||
|
||||
static void ac_sqtt_dump_derived_spm(const struct ac_spm_derived_trace *spm_derived_trace,
|
||||
size_t file_offset,
|
||||
FILE *output)
|
||||
{
|
||||
struct sqtt_file_chunk_derived_spm_db derived_spm_db;
|
||||
size_t file_derived_spm_db_offset = file_offset;
|
||||
|
||||
fseek(output, sizeof(struct sqtt_file_chunk_derived_spm_db), SEEK_CUR);
|
||||
file_offset += sizeof(struct sqtt_file_chunk_derived_spm_db);
|
||||
|
||||
/* Dump timestamps. */
|
||||
for (uint32_t i = 0; i < spm_derived_trace->num_timestamps; i++) {
|
||||
uint64_t timestamp = spm_derived_trace->timestamps[i];
|
||||
|
||||
file_offset += sizeof(timestamp);
|
||||
fwrite(×tamp, sizeof(timestamp), 1, output);
|
||||
}
|
||||
|
||||
/* Dump SPM groups. */
|
||||
for (uint32_t i = 0; i < spm_derived_trace->num_groups; i++) {
|
||||
const struct ac_spm_derived_group *group = &spm_derived_trace->groups[i];
|
||||
const struct ac_spm_derived_group_descr *group_descr = group->descr;
|
||||
struct sqtt_derived_spm_group_info group_info = {0};
|
||||
|
||||
const uint32_t num_counters = group_descr->num_counters;
|
||||
const uint32_t name_length = strlen(group_descr->name);
|
||||
|
||||
group_info.size_in_bytes = sizeof(group_info) + name_length +
|
||||
num_counters * sizeof(uint32_t);
|
||||
group_info.offset = sizeof(group_info);
|
||||
group_info.group_name_length = name_length;
|
||||
group_info.num_counters = num_counters;
|
||||
|
||||
file_offset += sizeof(group_info) + group_info.group_name_length;
|
||||
fwrite(&group_info, sizeof(group_info), 1, output);
|
||||
fwrite(group_descr->name, group_info.group_name_length, 1, output);
|
||||
|
||||
for (uint32_t j = 0; j < group_descr->num_counters; j++) {
|
||||
const struct ac_spm_derived_counter_descr *counter_descr = group_descr->counters[j];
|
||||
uint32_t counter_id = counter_descr->id;
|
||||
|
||||
file_offset += sizeof(uint32_t);
|
||||
fwrite(&counter_id, sizeof(uint32_t), 1, output);
|
||||
}
|
||||
}
|
||||
|
||||
/* Dump SPM counters. */
|
||||
for (uint32_t i = 0; i < spm_derived_trace->num_counters; i++) {
|
||||
const struct ac_spm_derived_counter *counter = &spm_derived_trace->counters[i];
|
||||
const struct ac_spm_derived_counter_descr *counter_descr = counter->descr;
|
||||
struct sqtt_derived_spm_counter_info counter_info = {0};
|
||||
|
||||
const uint32_t num_components = counter_descr->num_components;
|
||||
const uint32_t name_length = strlen(counter_descr->name);
|
||||
const uint32_t description_length = strlen(counter_descr->desc);
|
||||
|
||||
counter_info.size_in_bytes = sizeof(counter_info) + name_length +
|
||||
description_length + num_components * sizeof(uint32_t);
|
||||
counter_info.offset = sizeof(counter_info);
|
||||
counter_info.counter_name_length = name_length;
|
||||
counter_info.counter_description_length = description_length;
|
||||
counter_info.num_components = num_components;
|
||||
counter_info.usage_type = counter_descr->usage;
|
||||
|
||||
file_offset += sizeof(counter_info) + counter_info.counter_name_length +
|
||||
counter_info.counter_description_length;
|
||||
fwrite(&counter_info, sizeof(counter_info), 1, output);
|
||||
fwrite(counter_descr->name, counter_info.counter_name_length, 1, output);
|
||||
fwrite(counter_descr->desc, counter_info.counter_description_length, 1, output);
|
||||
|
||||
for (uint32_t j = 0; j < counter_descr->num_components; j++) {
|
||||
const struct ac_spm_derived_component_descr *component_descr = counter_descr->components[j];
|
||||
uint32_t component_id = component_descr->id;
|
||||
|
||||
file_offset += sizeof(uint32_t);
|
||||
fwrite(&component_id, sizeof(uint32_t), 1, output);
|
||||
}
|
||||
}
|
||||
|
||||
/* Dump SPM components. */
|
||||
for (uint32_t i = 0; i < spm_derived_trace->num_components; i++) {
|
||||
const struct ac_spm_derived_component *component = &spm_derived_trace->components[i];
|
||||
const struct ac_spm_derived_component_descr *component_descr = component->descr;
|
||||
struct sqtt_derived_spm_component_info component_info = {0};
|
||||
|
||||
const uint32_t name_length = strlen(component_descr->name);
|
||||
|
||||
component_info.size_in_bytes = sizeof(component_info) + name_length;
|
||||
component_info.offset = sizeof(component_info);
|
||||
component_info.component_name_length = name_length;
|
||||
component_info.usage_type = component_descr->usage;
|
||||
|
||||
file_offset += sizeof(component_info) + component_info.component_name_length +
|
||||
component_info.component_description_length;
|
||||
fwrite(&component_info, sizeof(component_info), 1, output);
|
||||
fwrite(component_descr->name, component_info.component_name_length, 1, output);
|
||||
}
|
||||
|
||||
/* Dump counter values. */
|
||||
for (uint32_t i = 0; i < spm_derived_trace->num_counters; i++) {
|
||||
const struct ac_spm_derived_counter *counter = &spm_derived_trace->counters[i];
|
||||
|
||||
assert(util_dynarray_num_elements(&counter->values, double) == spm_derived_trace->num_timestamps);
|
||||
util_dynarray_foreach(&counter->values, double, value) {
|
||||
file_offset += sizeof(double);
|
||||
fwrite(value, sizeof(double), 1, output);
|
||||
}
|
||||
}
|
||||
|
||||
/* Dump component values. */
|
||||
for (uint32_t i = 0; i < spm_derived_trace->num_components; i++) {
|
||||
const struct ac_spm_derived_component *component = &spm_derived_trace->components[i];
|
||||
|
||||
assert(util_dynarray_num_elements(&component->values, double) == spm_derived_trace->num_timestamps);
|
||||
util_dynarray_foreach(&component->values, double, value) {
|
||||
file_offset += sizeof(double);
|
||||
fwrite(value, sizeof(double), 1, output);
|
||||
}
|
||||
}
|
||||
|
||||
/* SQTT Derived SPM chunk. */
|
||||
ac_sqtt_fill_derived_spm_db(spm_derived_trace, &derived_spm_db,
|
||||
file_derived_spm_db_offset,
|
||||
file_offset - file_derived_spm_db_offset);
|
||||
fseek(output, file_derived_spm_db_offset, SEEK_SET);
|
||||
fwrite(&derived_spm_db, sizeof(struct sqtt_file_chunk_derived_spm_db), 1, output);
|
||||
fseek(output, file_offset, SEEK_SET);
|
||||
}
|
||||
|
||||
#if defined(USE_LIBELF)
|
||||
static void
|
||||
ac_sqtt_dump_data(const struct radeon_info *rad_info, struct ac_sqtt_trace *sqtt_trace,
|
||||
const struct ac_spm_trace *spm_trace, FILE *output)
|
||||
const struct ac_spm_trace *spm_trace,
|
||||
const struct ac_spm_derived_trace *spm_derived_trace,
|
||||
FILE *output)
|
||||
{
|
||||
struct sqtt_file_chunk_asic_info asic_info = {0};
|
||||
struct sqtt_file_chunk_cpu_info cpu_info = {0};
|
||||
|
|
@ -1193,12 +1390,26 @@ ac_sqtt_dump_data(const struct radeon_info *rad_info, struct ac_sqtt_trace *sqtt
|
|||
}
|
||||
}
|
||||
|
||||
if (spm_trace) {
|
||||
if (spm_derived_trace) {
|
||||
ac_sqtt_dump_derived_spm(spm_derived_trace, file_offset, output);
|
||||
} else if (spm_trace) {
|
||||
ac_sqtt_dump_spm(spm_trace, file_offset, output);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool
|
||||
ac_use_derived_spm_trace(const struct radeon_info *info,
|
||||
const struct ac_spm_trace *spm_trace)
|
||||
{
|
||||
if (!spm_trace)
|
||||
return false;
|
||||
|
||||
/* TODO: Enable for GFX12. */
|
||||
return info->gfx_level >= GFX10 && info->gfx_level < GFX12;
|
||||
return false;
|
||||
}
|
||||
|
||||
int
|
||||
ac_dump_rgp_capture(const struct radeon_info *info, struct ac_sqtt_trace *sqtt_trace,
|
||||
const struct ac_spm_trace *spm_trace)
|
||||
|
|
@ -1223,7 +1434,13 @@ ac_dump_rgp_capture(const struct radeon_info *info, struct ac_sqtt_trace *sqtt_t
|
|||
if (!f)
|
||||
return -1;
|
||||
|
||||
ac_sqtt_dump_data(info, sqtt_trace, spm_trace, f);
|
||||
struct ac_spm_derived_trace *spm_derived_trace =
|
||||
ac_use_derived_spm_trace(info, spm_trace) ? ac_spm_get_derived_trace(info, spm_trace) : NULL;
|
||||
|
||||
ac_sqtt_dump_data(info, sqtt_trace, spm_trace, spm_derived_trace, f);
|
||||
|
||||
if (spm_derived_trace)
|
||||
ac_spm_destroy_derived_trace(spm_derived_trace);
|
||||
|
||||
fprintf(stderr, "RGP capture saved to '%s'\n", filename);
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -11,6 +11,8 @@
|
|||
|
||||
#include "ac_perfcounter.h"
|
||||
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
struct ac_cmdbuf;
|
||||
|
||||
#define AC_SPM_MAX_COUNTER_PER_BLOCK 16
|
||||
|
|
@ -35,7 +37,7 @@ enum ac_spm_global_block {
|
|||
AC_SPM_GLOBAL_BLOCK_GL2C,
|
||||
AC_SPM_GLOBAL_BLOCK_SDMA,
|
||||
AC_SPM_GLOBAL_BLOCK_GUS,
|
||||
AC_SPM_GLOBAL_BLOCK_EA,
|
||||
AC_SPM_GLOBAL_BLOCK_GCEA,
|
||||
AC_SPM_GLOBAL_BLOCK_CHA,
|
||||
AC_SPM_GLOBAL_BLOCK_CHC,
|
||||
AC_SPM_GLOBAL_BLOCK_CHCG,
|
||||
|
|
@ -86,7 +88,44 @@ enum ac_spm_segment_type {
|
|||
AC_SPM_SEGMENT_TYPE_COUNT,
|
||||
};
|
||||
|
||||
enum ac_spm_raw_counter_id {
|
||||
AC_SPM_TCP_PERF_SEL_REQ = 0,
|
||||
AC_SPM_TCP_PERF_SEL_REQ_MISS,
|
||||
AC_SPM_SQC_PERF_SEL_DCACHE_HITS,
|
||||
AC_SPM_SQC_PERF_SEL_DCACHE_MISSES,
|
||||
AC_SPM_SQC_PERF_SEL_DCACHE_MISSES_DUPLICATE,
|
||||
AC_SPM_SQC_PERF_SEL_ICACHE_HITS,
|
||||
AC_SPM_SQC_PERF_SEL_ICACHE_MISSES,
|
||||
AC_SPM_SQC_PERF_SEL_ICACHE_MISSES_DUPLICATE,
|
||||
AC_SPM_GL1C_PERF_SEL_REQ,
|
||||
AC_SPM_GL1C_PERF_SEL_REQ_MISS,
|
||||
AC_SPM_GL2C_PERF_SEL_REQ,
|
||||
AC_SPM_GL2C_PERF_SEL_MISS,
|
||||
AC_SPM_CPF_PERF_SEL_STAT_BUSY,
|
||||
AC_SPM_SQC_PERF_SEL_LDS_BANK_CONFLICT,
|
||||
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_32B,
|
||||
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_64B,
|
||||
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_96B,
|
||||
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_128B,
|
||||
AC_SPM_GL2C_PERF_SEL_EA_WRREQ,
|
||||
AC_SPM_GL2C_PERF_SEL_EA_WRREQ_64B,
|
||||
AC_SPM_GCEA_PERF_SEL_SARB_DRAM_SIZED_REQUESTS,
|
||||
AC_SPM_GCEA_PERF_SEL_SARB_IO_SIZED_REQUESTS,
|
||||
AC_SPM_TA_PERF_SEL_TA_BUSY,
|
||||
AC_SPM_TCP_PERF_SEL_TCP_TA_REQ_STALL,
|
||||
AC_SPM_TD_PERF_SEL_RAY_TRACING_BVH4_TRI_NODE,
|
||||
AC_SPM_TD_PERF_SEL_RAY_TRACING_BVH4_FP16_BOX_NODE,
|
||||
AC_SPM_TD_PERF_SEL_RAY_TRACING_BVH4_FP32_BOX_NODE,
|
||||
AC_SPM_RAW_COUNTER_ID_COUNT,
|
||||
};
|
||||
|
||||
enum ac_spm_raw_counter_op {
|
||||
AC_SPM_RAW_COUNTER_OP_SUM = 0,
|
||||
AC_SPM_RAW_COUNTER_OP_MAX,
|
||||
};
|
||||
|
||||
struct ac_spm_counter_descr {
|
||||
enum ac_spm_raw_counter_id id;
|
||||
enum ac_pc_gpu_block gpu_block;
|
||||
uint32_t event_id;
|
||||
};
|
||||
|
|
@ -119,6 +158,7 @@ struct ac_spm_muxsel_line {
|
|||
|
||||
struct ac_spm_counter_info {
|
||||
/* General info. */
|
||||
enum ac_spm_raw_counter_id id;
|
||||
enum ac_pc_gpu_block gpu_block;
|
||||
uint32_t instance;
|
||||
uint32_t event_id;
|
||||
|
|
@ -194,6 +234,122 @@ struct ac_spm_trace {
|
|||
uint32_t num_samples;
|
||||
};
|
||||
|
||||
enum ac_spm_group_id {
|
||||
AC_SPM_GROUP_CACHE,
|
||||
AC_SPM_GROUP_LDS,
|
||||
AC_SPM_GROUP_MEMORY_BYTES,
|
||||
AC_SPM_GROUP_MEMORY_PERCENTAGE,
|
||||
AC_SPM_GROUP_RT,
|
||||
AC_SPM_GROUP_COUNT,
|
||||
};
|
||||
|
||||
enum ac_spm_counter_id {
|
||||
AC_SPM_COUNTER_INST_CACHE_HIT,
|
||||
AC_SPM_COUNTER_SCALAR_CACHE_HIT,
|
||||
AC_SPM_COUNTER_L0_CACHE_HIT,
|
||||
AC_SPM_COUNTER_L1_CACHE_HIT, /* < GFX12 */
|
||||
AC_SPM_COUNTER_L2_CACHE_HIT,
|
||||
AC_SPM_COUNTER_CS_LDS_BANK_CONFLICT,
|
||||
AC_SPM_COUNTER_FETCH_SIZE,
|
||||
AC_SPM_COUNTER_WRITE_SIZE,
|
||||
AC_SPM_COUNTER_LOCAL_VID_MEM_BYTES,
|
||||
AC_SPM_COUNTER_PCIE_BYTES,
|
||||
AC_SPM_COUNTER_MEM_UNIT_BUSY,
|
||||
AC_SPM_COUNTER_MEM_UNIT_STALLED,
|
||||
AC_SPM_COUNTER_RAY_BOX_TESTS,
|
||||
AC_SPM_COUNTER_RAY_TRI_TESTS,
|
||||
AC_SPM_COUNTER_COUNT,
|
||||
};
|
||||
|
||||
enum ac_spm_component_id {
|
||||
AC_SPM_COMPONENT_INST_CACHE_REQUEST_COUNT,
|
||||
AC_SPM_COMPONENT_INST_CACHE_HIT_COUNT,
|
||||
AC_SPM_COMPONENT_INST_CACHE_MISS_COUNT,
|
||||
AC_SPM_COMPONENT_SCALAR_CACHE_REQUEST_COUNT,
|
||||
AC_SPM_COMPONENT_SCALAR_CACHE_HIT_COUNT,
|
||||
AC_SPM_COMPONENT_SCALAR_CACHE_MISS_COUNT,
|
||||
AC_SPM_COMPONENT_L0_CACHE_REQUEST_COUNT,
|
||||
AC_SPM_COMPONENT_L0_CACHE_HIT_COUNT,
|
||||
AC_SPM_COMPONENT_L0_CACHE_MISS_COUNT,
|
||||
AC_SPM_COMPONENT_L1_CACHE_REQUEST_COUNT, /* < GFX12 */
|
||||
AC_SPM_COMPONENT_L1_CACHE_HIT_COUNT, /* < GFX12 */
|
||||
AC_SPM_COMPONENT_L1_CACHE_MISS_COUNT, /* < GFX12 */
|
||||
AC_SPM_COMPONENT_L2_CACHE_REQUEST_COUNT,
|
||||
AC_SPM_COMPONENT_L2_CACHE_HIT_COUNT,
|
||||
AC_SPM_COMPONENT_L2_CACHE_MISS_COUNT,
|
||||
AC_SPM_COMPONENT_GPU_BUSY_CYCLES,
|
||||
AC_SPM_COMPONENT_CS_LDS_BANK_CONFLICT_CYCLES,
|
||||
AC_SPM_COMPONENT_MEM_UNIT_BUSY_CYCLES,
|
||||
AC_SPM_COMPONENT_MEM_UNIT_STALLED_CYCLES,
|
||||
AC_SPM_COMPONENT_COUNT,
|
||||
};
|
||||
|
||||
enum ac_spm_usage_type {
|
||||
AC_SPM_USAGE_PERCENTAGE = 1,
|
||||
AC_SPM_USAGE_CYCLES = 2,
|
||||
AC_SPM_USAGE_BYTES = 4,
|
||||
AC_SPM_USAGE_ITEMS = 5,
|
||||
};
|
||||
|
||||
#define AC_SPM_MAX_COMPONENTS_PER_COUNTER 3
|
||||
#define AC_SPM_MAX_COUNTERS_PER_GROUP 5
|
||||
|
||||
struct ac_spm_derived_component_descr {
|
||||
enum ac_spm_component_id id;
|
||||
enum ac_spm_counter_id counter_id;
|
||||
const char *name;
|
||||
enum ac_spm_usage_type usage;
|
||||
};
|
||||
|
||||
struct ac_spm_derived_counter_descr {
|
||||
enum ac_spm_counter_id id;
|
||||
enum ac_spm_group_id group_id;
|
||||
const char *name;
|
||||
const char *desc;
|
||||
enum ac_spm_usage_type usage;
|
||||
uint32_t num_components;
|
||||
struct ac_spm_derived_component_descr *components[AC_SPM_MAX_COMPONENTS_PER_COUNTER];
|
||||
};
|
||||
|
||||
struct ac_spm_derived_group_descr {
|
||||
enum ac_spm_group_id id;
|
||||
const char *name;
|
||||
uint32_t num_counters;
|
||||
struct ac_spm_derived_counter_descr *counters[AC_SPM_MAX_COUNTERS_PER_GROUP];
|
||||
};
|
||||
|
||||
struct ac_spm_derived_group {
|
||||
const struct ac_spm_derived_group_descr *descr;
|
||||
};
|
||||
|
||||
struct ac_spm_derived_counter {
|
||||
const struct ac_spm_derived_counter_descr *descr;
|
||||
|
||||
struct util_dynarray values;
|
||||
};
|
||||
|
||||
struct ac_spm_derived_component {
|
||||
const struct ac_spm_derived_component_descr *descr;
|
||||
|
||||
struct util_dynarray values;
|
||||
};
|
||||
|
||||
struct ac_spm_derived_trace {
|
||||
uint32_t num_timestamps;
|
||||
uint64_t *timestamps;
|
||||
|
||||
uint32_t num_groups;
|
||||
struct ac_spm_derived_group groups[AC_SPM_GROUP_COUNT];
|
||||
|
||||
uint32_t num_counters;
|
||||
struct ac_spm_derived_counter counters[AC_SPM_COUNTER_COUNT];
|
||||
|
||||
uint32_t num_components;
|
||||
struct ac_spm_derived_component components[AC_SPM_COMPONENT_COUNT];
|
||||
|
||||
uint32_t sample_interval;
|
||||
};
|
||||
|
||||
bool ac_init_spm(const struct radeon_info *info,
|
||||
const struct ac_perfcounters *pc,
|
||||
struct ac_spm *spm);
|
||||
|
|
@ -201,6 +357,13 @@ void ac_destroy_spm(struct ac_spm *spm);
|
|||
|
||||
bool ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace);
|
||||
|
||||
struct ac_spm_derived_trace *
|
||||
ac_spm_get_derived_trace(const struct radeon_info *info,
|
||||
const struct ac_spm_trace *spm_trace);
|
||||
|
||||
void
|
||||
ac_spm_destroy_derived_trace(struct ac_spm_derived_trace *spm_derived_trace);
|
||||
|
||||
void
|
||||
ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
enum amd_ip_type ip_type, const struct ac_spm *spm,
|
||||
|
|
|
|||
|
|
@ -3981,6 +3981,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
|
|||
{
|
||||
struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
if (cmd_buffer->state.emitted_graphics_pipeline == pipeline)
|
||||
return;
|
||||
|
|
@ -3990,6 +3991,8 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
|
|||
if (pipeline->sqtt_shaders_reloc) {
|
||||
/* Emit shaders relocation because RGP requires them to be contiguous in memory. */
|
||||
radv_sqtt_emit_relocated_shaders(cmd_buffer, pipeline);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs->b, pipeline->sqtt_shaders_reloc->bo);
|
||||
}
|
||||
|
||||
if (radv_device_fault_detection_enabled(device))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue