mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
aco: add ACO_DEBUG=perfinfo
This prints the program with each instruction's contribution to it's latency and various factors for the calculation of the Inverse Throughput statistic. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8994>
This commit is contained in:
parent
5d6a1095bf
commit
a0243f5c47
5 changed files with 29 additions and 2 deletions
|
|
@ -167,7 +167,7 @@ void aco_compile_shader(unsigned shader_count,
|
|||
if (program->chip_class >= GFX10)
|
||||
aco::form_hard_clauses(program.get());
|
||||
|
||||
if (program->collect_statistics)
|
||||
if (program->collect_statistics || (aco::debug_flags & aco::DEBUG_PERF_INFO))
|
||||
aco::collect_preasm_stats(program.get());
|
||||
|
||||
/* Assembly */
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ static const struct debug_control aco_debug_options[] = {
|
|||
{"novn", DEBUG_NO_VN},
|
||||
{"noopt", DEBUG_NO_OPT},
|
||||
{"nosched", DEBUG_NO_SCHED},
|
||||
{"perfinfo", DEBUG_PERF_INFO},
|
||||
{NULL, 0}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ enum {
|
|||
DEBUG_NO_VN = 0x10,
|
||||
DEBUG_NO_OPT = 0x20,
|
||||
DEBUG_NO_SCHED = 0x40,
|
||||
DEBUG_PERF_INFO = 0x80,
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -2012,6 +2013,7 @@ void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code);
|
|||
|
||||
enum print_flags {
|
||||
print_no_ssa = 0x1,
|
||||
print_perf_info = 0x2,
|
||||
};
|
||||
|
||||
void aco_print_operand(const Operand *operand, FILE *output, unsigned flags=0);
|
||||
|
|
|
|||
|
|
@ -875,6 +875,9 @@ void aco_print_block(const Block* block, FILE *output, unsigned flags)
|
|||
fprintf(output, "*/\n");
|
||||
for (auto const& instr : block->instructions) {
|
||||
fprintf(output, "\t");
|
||||
if (flags & print_perf_info)
|
||||
fprintf(output, "(%3u clk) ", instr->pass_flags);
|
||||
|
||||
aco_print_instr(instr.get(), output, flags);
|
||||
fprintf(output, "\n");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -488,8 +488,11 @@ void collect_preasm_stats(Program *program)
|
|||
for (unsigned pred : block.linear_preds)
|
||||
block_est.join(blocks[pred]);
|
||||
|
||||
for (aco_ptr<Instruction>& instr : block.instructions)
|
||||
for (aco_ptr<Instruction>& instr : block.instructions) {
|
||||
unsigned before = block_est.cur_cycle;
|
||||
block_est.add(instr);
|
||||
instr->pass_flags = block_est.cur_cycle - before;
|
||||
}
|
||||
|
||||
/* TODO: it would be nice to be able to consider estimated loop trip
|
||||
* counts used for loop unrolling.
|
||||
|
|
@ -541,6 +544,24 @@ void collect_preasm_stats(Program *program)
|
|||
|
||||
program->statistics[statistic_latency] = round(latency);
|
||||
program->statistics[statistic_inv_throughput] = round(1.0 / wave64_per_cycle);
|
||||
|
||||
if (debug_flags & DEBUG_PERF_INFO) {
|
||||
aco_print_program(program, stderr, print_no_ssa | print_perf_info);
|
||||
|
||||
fprintf(stderr, "num_waves: %u\n", program->num_waves);
|
||||
fprintf(stderr, "salu_smem_usage: %f\n", usage[(int)BlockCycleEstimator::scalar]);
|
||||
fprintf(stderr, "branch_sendmsg_usage: %f\n", usage[(int)BlockCycleEstimator::branch_sendmsg]);
|
||||
fprintf(stderr, "valu_usage: %f\n", usage[(int)BlockCycleEstimator::valu]);
|
||||
fprintf(stderr, "valu_complex_usage: %f\n", usage[(int)BlockCycleEstimator::valu_complex]);
|
||||
fprintf(stderr, "lds_usage: %f\n", usage[(int)BlockCycleEstimator::lds]);
|
||||
fprintf(stderr, "export_gds_usage: %f\n", usage[(int)BlockCycleEstimator::export_gds]);
|
||||
fprintf(stderr, "vmem_usage: %f\n", usage[(int)BlockCycleEstimator::vmem]);
|
||||
fprintf(stderr, "latency: %f\n", latency);
|
||||
fprintf(stderr, "parallelism: %f\n", parallelism);
|
||||
fprintf(stderr, "max_utilization: %f\n", max_utilization);
|
||||
fprintf(stderr, "wave64_per_cycle: %f\n", wave64_per_cycle);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue