aco/optimizer_postRA: Use unique_ptr + array for instruction indices.

According to perf, this roughly halves the impact of the post-RA
optimizer in ACO's compile times.

Measurement was taken using a debug optimized build using
NIR_DEBUG=novalidate RADV_DEBUG=nocache and replaying the Fossil DB
from the Doom Eternal shaders.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18103>
This commit is contained in:
Timur Kristóf 2022-08-17 08:12:51 +02:00 committed by Marge Bot
parent 6f598fe4e3
commit b542ab0243

View file

@ -63,11 +63,18 @@ Idx const_or_undef{UINT32_MAX, 2};
Idx overwritten_untrackable{UINT32_MAX, 3};
struct pr_opt_ctx {
using Idx_array = std::array<Idx, max_reg_cnt>;
Program* program;
Block* current_block;
uint32_t current_instr_idx;
std::vector<uint16_t> uses;
std::vector<std::array<Idx, max_reg_cnt>> instr_idx_by_regs;
std::unique_ptr<Idx_array[]> instr_idx_by_regs;
pr_opt_ctx(Program* p)
: program(p), current_block(nullptr), current_instr_idx(0), uses(dead_code_analysis(p)),
instr_idx_by_regs(std::unique_ptr<Idx_array[]>{new Idx_array[p->blocks.size()]})
{}
void reset_block(Block* block)
{
@ -557,10 +564,7 @@ process_instruction(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
void
optimize_postRA(Program* program)
{
pr_opt_ctx ctx;
ctx.program = program;
ctx.uses = dead_code_analysis(program);
ctx.instr_idx_by_regs.resize(program->blocks.size());
pr_opt_ctx ctx(program);
/* Forward pass
* Goes through each instruction exactly once, and can transform