radv,aco: switch to shader statistics framework

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Gitlab: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12756
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35583>
This commit is contained in:
Rhys Perry 2025-06-17 15:07:04 +01:00 committed by Marge Bot
parent 2382d657ec
commit 325dfd809a
11 changed files with 110 additions and 138 deletions

View file

@ -20,36 +20,6 @@ using namespace aco;
namespace {
static const std::array<aco_compiler_statistic_info, aco_num_statistics> statistic_infos = []()
{
std::array<aco_compiler_statistic_info, aco_num_statistics> ret{};
ret[aco_statistic_hash] =
aco_compiler_statistic_info{"Hash", "CRC32 hash of code and constant data"};
ret[aco_statistic_instructions] =
aco_compiler_statistic_info{"Instructions", "Instruction count"};
ret[aco_statistic_copies] =
aco_compiler_statistic_info{"Copies", "Copy instructions created for pseudo-instructions"};
ret[aco_statistic_branches] = aco_compiler_statistic_info{"Branches", "Branch instructions"};
ret[aco_statistic_latency] =
aco_compiler_statistic_info{"Latency", "Issue cycles plus stall cycles"};
ret[aco_statistic_inv_throughput] = aco_compiler_statistic_info{
"Inverse Throughput", "Estimated busy cycles to execute one wave"};
ret[aco_statistic_vmem_clauses] = aco_compiler_statistic_info{
"VMEM Clause", "Number of VMEM clauses (includes 1-sized clauses)"};
ret[aco_statistic_smem_clauses] = aco_compiler_statistic_info{
"SMEM Clause", "Number of SMEM clauses (includes 1-sized clauses)"};
ret[aco_statistic_sgpr_presched] =
aco_compiler_statistic_info{"Pre-Sched SGPRs", "SGPR usage before scheduling"};
ret[aco_statistic_vgpr_presched] =
aco_compiler_statistic_info{"Pre-Sched VGPRs", "VGPR usage before scheduling"};
ret[aco_statistic_valu] = aco_compiler_statistic_info{"VALU", "Number of VALU instructions"};
ret[aco_statistic_salu] = aco_compiler_statistic_info{"SALU", "Number of SALU instructions"};
ret[aco_statistic_vmem] = aco_compiler_statistic_info{"VMEM", "Number of VMEM instructions"};
ret[aco_statistic_smem] = aco_compiler_statistic_info{"SMEM", "Number of SMEM instructions"};
ret[aco_statistic_vopd] = aco_compiler_statistic_info{"VOPD", "Number of VOPD instructions"};
return ret;
}();
static void
validate(Program* program)
{
@ -222,8 +192,7 @@ aco_compile_shader_part(const struct aco_compiler_options* options,
std::unique_ptr<Program> program{new Program};
program->collect_statistics = options->record_stats;
if (program->collect_statistics)
memset(program->statistics, 0, sizeof(program->statistics));
memset(&program->statistics, 0, sizeof(program->statistics));
program->debug.func = options->debug.func;
program->debug.private_data = options->debug.private_data;
@ -262,8 +231,7 @@ aco_compile_shader(const struct aco_compiler_options* options, const struct aco_
std::unique_ptr<Program> program{new Program};
program->collect_statistics = options->record_stats;
if (program->collect_statistics)
memset(program->statistics, 0, sizeof(program->statistics));
memset(&program->statistics, 0, sizeof(program->statistics));
program->debug.func = options->debug.func;
program->debug.private_data = options->debug.private_data;
@ -289,14 +257,9 @@ aco_compile_shader(const struct aco_compiler_options* options, const struct aco_
if (options->record_asm)
disasm = get_disasm_string(program.get(), code, exec_size);
size_t stats_size = 0;
if (program->collect_statistics)
stats_size = aco_num_statistics * sizeof(uint32_t);
(*build_binary)(binary, &config, llvm_ir.c_str(), llvm_ir.size(), disasm.c_str(), disasm.size(),
program->statistics, stats_size, exec_size, code.data(), code.size(),
symbols.data(), symbols.size(), program->debug_info.data(),
program->debug_info.size());
&program->statistics, exec_size, code.data(), code.size(), symbols.data(),
symbols.size(), program->debug_info.data(), program->debug_info.size());
}
void
@ -337,8 +300,8 @@ aco_compile_rt_prolog(const struct aco_compiler_options* options,
if (options->record_asm)
disasm = get_disasm_string(program.get(), code, exec_size);
(*build_prolog)(binary, &config, NULL, 0, disasm.c_str(), disasm.size(), program->statistics, 0,
exec_size, code.data(), code.size(), NULL, 0, NULL, 0);
(*build_prolog)(binary, &config, NULL, 0, disasm.c_str(), disasm.size(), NULL, exec_size,
code.data(), code.size(), NULL, 0, NULL, 0);
}
void
@ -437,8 +400,8 @@ aco_compile_trap_handler(const struct aco_compiler_options* options,
if (options->record_asm)
disasm = get_disasm_string(program.get(), code, exec_size);
(*build_binary)(binary, &config, NULL, 0, disasm.c_str(), disasm.size(), program->statistics, 0,
exec_size, code.data(), code.size(), NULL, 0, NULL, 0);
(*build_binary)(binary, &config, NULL, 0, disasm.c_str(), disasm.size(), NULL, exec_size,
code.data(), code.size(), NULL, 0, NULL, 0);
}
uint64_t
@ -514,8 +477,6 @@ aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
}
}
const aco_compiler_statistic_info* aco_statistic_infos = statistic_infos.data();
void
aco_print_asm(const struct radeon_info *info, unsigned wave_size,
uint32_t *binary, unsigned num_dw)

View file

@ -8,10 +8,11 @@
#define ACO_INTERFACE_H
#include "aco_shader_info.h"
#include "ac_shader_debug_info.h"
#include "nir_defines.h"
#include "util/shader_stats.h"
#include "ac_shader_debug_info.h"
#include "amd_family.h"
#ifdef __cplusplus
extern "C" {
@ -26,24 +27,17 @@ struct aco_vs_prolog_info;
struct aco_ps_epilog_info;
struct radeon_info;
struct aco_compiler_statistic_info {
char name[32];
char desc[64];
};
typedef void(aco_callback)(void** priv_ptr, const struct ac_shader_config* config,
const char* llvm_ir_str, unsigned llvm_ir_size, const char* disasm_str,
unsigned disasm_size, uint32_t* statistics, uint32_t stats_size,
uint32_t exec_size, const uint32_t* code, uint32_t code_dw,
const struct aco_symbol* symbols, unsigned num_symbols,
const struct ac_shader_debug_info* debug_info, unsigned debug_info_count);
unsigned disasm_size, struct amd_stats* stats, uint32_t exec_size,
const uint32_t* code, uint32_t code_dw, const struct aco_symbol* symbols,
unsigned num_symbols, const struct ac_shader_debug_info* debug_info,
unsigned debug_info_count);
typedef void(aco_shader_part_callback)(void** priv_ptr, uint32_t num_sgprs, uint32_t num_vgprs,
const uint32_t* code, uint32_t code_size,
const char* disasm_str, uint32_t disasm_size);
extern const struct aco_compiler_statistic_info* aco_statistic_infos;
void aco_compile_shader(const struct aco_compiler_options* options,
const struct aco_shader_info* info, unsigned shader_count,
struct nir_shader* const* shaders, const struct ac_shader_args* args,

View file

@ -12,6 +12,7 @@
#include "aco_util.h"
#include "util/compiler.h"
#include "util/shader_stats.h"
#include "ac_binary.h"
#include "ac_hw_stage.h"
@ -2162,7 +2163,7 @@ public:
CompilationProgress progress;
bool collect_statistics = false;
uint32_t statistics[aco_num_statistics];
amd_stats statistics;
float_mode next_fp_mode;
unsigned next_loop_depth = 0;

View file

@ -1806,8 +1806,7 @@ handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx,
bool skip_partial_copies = true;
for (auto it = copy_map.begin();;) {
if (copy_map.empty()) {
ctx->program->statistics[aco_statistic_copies] +=
ctx->instructions.size() - num_instructions_before;
ctx->program->statistics.copies += ctx->instructions.size() - num_instructions_before;
return;
}
if (it == copy_map.end()) {
@ -2085,8 +2084,7 @@ handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx,
break;
}
}
ctx->program->statistics[aco_statistic_copies] +=
ctx->instructions.size() - num_instructions_before;
ctx->program->statistics.copies += ctx->instructions.size() - num_instructions_before;
}
void
@ -2122,7 +2120,7 @@ handle_operands_linear_vgpr(std::map<PhysReg, copy_operation>& copy_map, lower_c
pi->scratch_sgpr = scratch_sgpr;
}
ctx->program->statistics[aco_statistic_copies] += scratch_sgpr == scc ? 2 : 4;
ctx->program->statistics.copies += scratch_sgpr == scc ? 2 : 4;
}
void

View file

@ -470,8 +470,8 @@ collect_presched_stats(Program* program)
RegisterDemand presched_demand;
for (Block& block : program->blocks)
presched_demand.update(block.register_demand);
program->statistics[aco_statistic_sgpr_presched] = presched_demand.sgpr;
program->statistics[aco_statistic_vgpr_presched] = presched_demand.vgpr;
program->statistics.presgprs = presched_demand.sgpr;
program->statistics.prevgprs = presched_demand.vgpr;
}
/* instructions/branches/vmem_clauses/smem_clauses/cycles */
@ -482,31 +482,31 @@ collect_preasm_stats(Program* program)
std::set<Instruction*> vmem_clause;
std::set<Instruction*> smem_clause;
program->statistics[aco_statistic_instructions] += block.instructions.size();
program->statistics.instrs += block.instructions.size();
for (aco_ptr<Instruction>& instr : block.instructions) {
const bool is_branch =
instr->isSOPP() && instr_info.classes[(int)instr->opcode] == instr_class::branch;
if (is_branch)
program->statistics[aco_statistic_branches]++;
program->statistics.branches++;
if (instr->isVALU() || instr->isVINTRP())
program->statistics[aco_statistic_valu]++;
program->statistics.valu++;
if (instr->isSALU() && !instr->isSOPP() &&
instr_info.classes[(int)instr->opcode] != instr_class::waitcnt)
program->statistics[aco_statistic_salu]++;
program->statistics.salu++;
if (instr->isVOPD())
program->statistics[aco_statistic_vopd]++;
program->statistics.vopd++;
if ((instr->isVMEM() || instr->isScratch() || instr->isGlobal()) &&
!instr->operands.empty()) {
if (std::none_of(vmem_clause.begin(), vmem_clause.end(),
[&](Instruction* other)
{ return should_form_clause(instr.get(), other); }))
program->statistics[aco_statistic_vmem_clauses]++;
program->statistics.vclause++;
vmem_clause.insert(instr.get());
program->statistics[aco_statistic_vmem]++;
program->statistics.vmem++;
} else {
vmem_clause.clear();
}
@ -515,10 +515,10 @@ collect_preasm_stats(Program* program)
if (std::none_of(smem_clause.begin(), smem_clause.end(),
[&](Instruction* other)
{ return should_form_clause(instr.get(), other); }))
program->statistics[aco_statistic_smem_clauses]++;
program->statistics.sclause++;
smem_clause.insert(instr.get());
program->statistics[aco_statistic_smem]++;
program->statistics.smem++;
} else {
smem_clause.clear();
}
@ -598,8 +598,8 @@ collect_preasm_stats(Program* program)
program->workgroup_size / (double)align(program->workgroup_size, program->wave_size);
wave64_per_cycle *= max_utilization;
program->statistics[aco_statistic_latency] = round(latency);
program->statistics[aco_statistic_inv_throughput] = round(1.0 / wave64_per_cycle);
program->statistics.latency = round(latency);
program->statistics.invthroughput = round(1.0 / wave64_per_cycle);
if (debug_flags & DEBUG_PERF_INFO) {
aco_print_program(program, stderr, print_no_ssa | print_perf_info);
@ -624,7 +624,7 @@ collect_preasm_stats(Program* program)
void
collect_postasm_stats(Program* program, const std::vector<uint32_t>& code)
{
program->statistics[aco_statistic_hash] = util_hash_crc32(code.data(), code.size() * 4);
program->statistics.hash = util_hash_crc32(code.data(), code.size() * 4);
}
Instruction_cycle_info

View file

@ -814,44 +814,41 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecut
VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, pStatistics, pStatisticCount);
vk_add_exec_statistic_u64(out, "Driver pipeline hash", "Driver pipeline hash used by RGP", pipeline->pipeline_hash);
vk_add_exec_statistic_u64(out, "SGPRs", "Number of SGPR registers allocated per subgroup", shader->config.num_sgprs);
vk_add_exec_statistic_u64(out, "VGPRs", "Number of VGPR registers allocated per subgroup", shader->config.num_vgprs);
vk_add_exec_statistic_u64(out, "Spilled SGPRs", "Number of SGPR registers spilled per subgroup",
shader->config.spilled_sgprs);
vk_add_exec_statistic_u64(out, "Spilled VGPRs", "Number of VGPR registers spilled per subgroup",
shader->config.spilled_vgprs);
vk_add_exec_statistic_u64(out, "Code size", "Code size in bytes", shader->exec_size);
vk_add_exec_statistic_u64(out, "LDS size", "LDS size in bytes per workgroup",
shader->config.lds_size * lds_increment);
vk_add_exec_statistic_u64(out, "Scratch size", "Private memory in bytes per subgroup",
shader->config.scratch_bytes_per_wave);
vk_add_exec_statistic_u64(out, "Subgroups per SIMD", "The maximum number of subgroups in flight on a SIMD unit",
shader->max_waves);
struct amd_stats stats = {};
if (shader->statistics)
stats = *shader->statistics;
stats.driverhash = pipeline->pipeline_hash;
stats.sgprs = shader->config.num_sgprs;
stats.vgprs = shader->config.num_vgprs;
stats.spillsgprs = shader->config.spilled_sgprs;
stats.spillvgprs = shader->config.spilled_vgprs;
stats.codesize = shader->exec_size;
stats.lds = shader->config.lds_size * lds_increment;
stats.scratch = shader->config.scratch_bytes_per_wave;
stats.maxwaves = shader->max_waves;
uint64_t inputs = 0;
switch (stage) {
case MESA_SHADER_VERTEX:
if (gfx_level <= GFX8 || (!shader->info.vs.as_es && !shader->info.vs.as_ls)) {
/* VS inputs when VS is a separate stage */
inputs += util_bitcount(shader->info.vs.input_slot_usage_mask);
stats.inputs += util_bitcount(shader->info.vs.input_slot_usage_mask);
}
break;
case MESA_SHADER_TESS_CTRL:
if (gfx_level >= GFX9) {
/* VS inputs when pipeline has tess */
inputs += util_bitcount(shader->info.vs.input_slot_usage_mask);
stats.inputs += util_bitcount(shader->info.vs.input_slot_usage_mask);
}
/* VS -> TCS inputs */
inputs += shader->info.tcs.num_linked_inputs;
stats.inputs += shader->info.tcs.num_linked_inputs;
break;
case MESA_SHADER_TESS_EVAL:
if (gfx_level <= GFX8 || !shader->info.tes.as_es) {
/* TCS -> TES inputs when TES is a separate stage */
inputs += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
stats.inputs += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
}
break;
@ -863,60 +860,57 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecut
if (gfx_level >= GFX9) {
if (shader->info.gs.es_type == MESA_SHADER_VERTEX) {
/* VS inputs when pipeline has GS but no tess */
inputs += util_bitcount(shader->info.vs.input_slot_usage_mask);
stats.inputs += util_bitcount(shader->info.vs.input_slot_usage_mask);
} else if (shader->info.gs.es_type == MESA_SHADER_TESS_EVAL) {
/* TCS -> TES inputs when pipeline has GS */
inputs += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
stats.inputs += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
}
}
/* VS -> GS or TES -> GS inputs */
inputs += shader->info.gs.num_linked_inputs;
stats.inputs += shader->info.gs.num_linked_inputs;
break;
case MESA_SHADER_FRAGMENT:
inputs += shader->info.ps.num_inputs;
stats.inputs += shader->info.ps.num_inputs;
break;
default:
/* Other stages don't have IO or we are not interested in them. */
break;
}
vk_add_exec_statistic_u64(out, "Combined inputs",
"Number of input slots reserved for the shader (including merged stages)", inputs);
uint64_t outputs = 0;
switch (stage) {
case MESA_SHADER_VERTEX:
if (!shader->info.vs.as_ls && !shader->info.vs.as_es) {
/* VS -> FS outputs. */
outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
shader->info.outinfo.prim_param_exports;
stats.outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
shader->info.outinfo.prim_param_exports;
} else if (gfx_level <= GFX8) {
/* VS -> TCS, VS -> GS outputs on GFX6-8 */
outputs += shader->info.vs.num_linked_outputs;
stats.outputs += shader->info.vs.num_linked_outputs;
}
break;
case MESA_SHADER_TESS_CTRL:
if (gfx_level >= GFX9) {
/* VS -> TCS outputs on GFX9+ */
outputs += shader->info.vs.num_linked_outputs;
stats.outputs += shader->info.vs.num_linked_outputs;
}
/* TCS -> TES outputs */
outputs += shader->info.tcs.io_info.highest_remapped_vram_output +
shader->info.tcs.io_info.highest_remapped_vram_patch_output;
stats.outputs += shader->info.tcs.io_info.highest_remapped_vram_output +
shader->info.tcs.io_info.highest_remapped_vram_patch_output;
break;
case MESA_SHADER_TESS_EVAL:
if (!shader->info.tes.as_es) {
/* TES -> FS outputs */
outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
shader->info.outinfo.prim_param_exports;
stats.outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
shader->info.outinfo.prim_param_exports;
} else if (gfx_level <= GFX8) {
/* TES -> GS outputs on GFX6-8 */
outputs += shader->info.tes.num_linked_outputs;
stats.outputs += shader->info.tes.num_linked_outputs;
}
break;
@ -928,48 +922,41 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecut
if (gfx_level >= GFX9) {
if (shader->info.gs.es_type == MESA_SHADER_VERTEX) {
/* VS -> GS outputs on GFX9+ */
outputs += shader->info.vs.num_linked_outputs;
stats.outputs += shader->info.vs.num_linked_outputs;
} else if (shader->info.gs.es_type == MESA_SHADER_TESS_EVAL) {
/* TES -> GS outputs on GFX9+ */
outputs += shader->info.tes.num_linked_outputs;
stats.outputs += shader->info.tes.num_linked_outputs;
}
}
if (shader->info.is_ngg) {
/* GS -> FS outputs (GFX10+ NGG) */
outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
shader->info.outinfo.prim_param_exports;
stats.outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
shader->info.outinfo.prim_param_exports;
} else {
/* GS -> FS outputs (GFX6-10.3 legacy) */
outputs += shader->info.gs.gsvs_vertex_size / 16;
stats.outputs += shader->info.gs.gsvs_vertex_size / 16;
}
break;
case MESA_SHADER_MESH:
/* MS -> FS outputs */
outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
shader->info.outinfo.prim_param_exports;
stats.outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
shader->info.outinfo.prim_param_exports;
break;
case MESA_SHADER_FRAGMENT:
outputs += DIV_ROUND_UP(util_bitcount(shader->info.ps.colors_written), 4) + !!shader->info.ps.writes_z +
!!shader->info.ps.writes_stencil + !!shader->info.ps.writes_sample_mask +
!!shader->info.ps.writes_mrt0_alpha;
stats.outputs += DIV_ROUND_UP(util_bitcount(shader->info.ps.colors_written), 4) + !!shader->info.ps.writes_z +
!!shader->info.ps.writes_stencil + !!shader->info.ps.writes_sample_mask +
!!shader->info.ps.writes_mrt0_alpha;
break;
default:
/* Other stages don't have IO or we are not interested in them. */
break;
}
vk_add_exec_statistic_u64(out, "Combined outputs",
"Number of output slots reserved for the shader (including merged stages)", outputs);
if (shader->statistics) {
for (unsigned i = 0; i < aco_num_statistics; i++) {
const struct aco_compiler_statistic_info *info = &aco_statistic_infos[i];
vk_add_exec_statistic_u64(out, info->name, info->desc, shader->statistics[i]);
}
}
vk_add_amd_stats(out, &stats);
return vk_outarray_status(&out);
}

View file

@ -95,7 +95,7 @@ radv_shader_cache_deserialize(struct vk_pipeline_cache *cache, const void *key_d
void
radv_shader_serialize(struct radv_shader *shader, struct blob *blob)
{
size_t stats_size = shader->statistics ? aco_num_statistics * sizeof(uint32_t) : 0;
size_t stats_size = shader->statistics ? sizeof(struct amd_stats) : 0;
size_t code_size = shader->code_size;
uint32_t total_size = sizeof(struct radv_shader_binary_legacy) + code_size + stats_size;

View file

@ -2984,14 +2984,15 @@ radv_dump_nir_shaders(const struct radv_instance *instance, struct nir_shader *c
static void
radv_aco_build_shader_binary(void **bin, const struct ac_shader_config *config, const char *llvm_ir_str,
unsigned llvm_ir_size, const char *disasm_str, unsigned disasm_size, uint32_t *statistics,
uint32_t stats_size, uint32_t exec_size, const uint32_t *code, uint32_t code_dw,
unsigned llvm_ir_size, const char *disasm_str, unsigned disasm_size,
struct amd_stats *statistics, uint32_t exec_size, const uint32_t *code, uint32_t code_dw,
const struct aco_symbol *symbols, unsigned num_symbols,
const struct ac_shader_debug_info *debug_info, unsigned debug_info_count)
{
struct radv_shader_binary **binary = (struct radv_shader_binary **)bin;
uint32_t debug_info_size = debug_info_count * sizeof(struct ac_shader_debug_info);
uint32_t stats_size = statistics ? sizeof(struct amd_stats) : 0;
size_t size = llvm_ir_size;
@ -3019,7 +3020,7 @@ radv_aco_build_shader_binary(void **bin, const struct ac_shader_config *config,
struct radv_shader_binary_layout layout = radv_shader_binary_get_layout(legacy_binary);
if (stats_size)
memcpy(layout.stats, statistics, stats_size);
amd_stats_serialize(layout.stats, statistics);
memcpy(layout.code, code, code_dw * sizeof(uint32_t));

View file

@ -12,6 +12,7 @@
#define RADV_SHADER_H
#include "util/mesa-blake3.h"
#include "util/shader_stats.h"
#include "util/u_math.h"
#include "vulkan/vulkan.h"
#include "ac_binary.h"
@ -460,7 +461,7 @@ struct radv_shader {
char *nir_string;
char *disasm_string;
char *ir_string;
uint32_t *statistics;
struct amd_stats *statistics;
struct ac_shader_debug_info *debug_info;
uint32_t debug_info_count;
};

View file

@ -105,7 +105,7 @@ si_fill_aco_shader_info(struct si_shader *shader, struct aco_shader_info *info,
static void
si_aco_build_shader_binary(void **data, const struct ac_shader_config *config,
const char *llvm_ir_str, unsigned llvm_ir_size, const char *disasm_str,
unsigned disasm_size, uint32_t *statistics, uint32_t stats_size,
unsigned disasm_size, struct amd_stats *statistics,
uint32_t exec_size, const uint32_t *code, uint32_t code_dw,
const struct aco_symbol *symbols, unsigned num_symbols,
const struct ac_shader_debug_info *debug_info, unsigned debug_info_count)

View file

@ -90,4 +90,33 @@
<stat name="TMU Fills" display="Fills">Number of times a register was filled from memory</stat>
<stat name="QPU Read Stalls" display="Read Stalls">Number of cycles the QPU stalls for a register read dependency</stat>
</isa>
<isa name="AMD">
<stat name="Driver pipeline hash" display="DriverHash" hash="true" type="u64">Driver pipeline hash used by RGP</stat>
<stat name="SGPRs">Number of SGPR registers allocated per subgroup</stat>
<stat name="VGPRs">Number of VGPR registers allocated per subgroup</stat>
<stat name="Spilled SGPRs" display="SpillSGPRs">Number of SGPR registers spilled per subgroup</stat>
<stat name="Spilled VGPRs" display="SpillVGPRs">Number of VGPR registers spilled per subgroup</stat>
<stat name="Code size" display="CodeSize">Code size in bytes</stat>
<stat name="LDS size" display="LDS">LDS size in bytes per workgroup</stat>
<stat name="Scratch size" display="Scratch">Private memory in bytes per subgroup</stat>
<stat name="Subgroups per SIMD" display="MaxWaves" more="better">The maximum number of subgroups in flight on a SIMD unit</stat>
<stat name="Combined inputs" display="Inputs">Number of input slots reserved for the shader (including merged stages)</stat>
<stat name="Combined outputs" display="Outputs">Number of output slots reserved for the shader (including merged stages)</stat>
<stat name="Hash" hash="true">CRC32 hash of code and constant data</stat>
<stat name="Instructions" display="Instrs">Instruction count</stat>
<stat name="Copies">Copy instructions created for pseudo-instructions</stat>
<stat name="Branches">Branch instructions</stat>
<stat name="Latency">Issue cycles plus stall cycles</stat>
<stat name="Inverse Throughput" display="InvThroughput">Estimated busy cycles to execute one wave</stat>
<stat name="VMEM Clause" display="VClause">Number of VMEM clauses (includes 1-sized clauses)</stat>
<stat name="SMEM Clause" display="SClause">Number of SMEM clauses (includes 1-sized clauses)</stat>
<stat name="Pre-Sched SGPRs" display="PreSGPRs">SGPR usage before scheduling</stat>
<stat name="Pre-Sched VGPRs" display="PreVGPRs">VGPR usage before scheduling</stat>
<stat name="VALU">Number of VALU instructions</stat>
<stat name="SALU">Number of SALU instructions</stat>
<stat name="VMEM">Number of VMEM instructions</stat>
<stat name="SMEM">Number of SMEM instructions</stat>
<stat name="VOPD" more="better">Number of VOPD instructions</stat>
</isa>
</shaderdb>