aco: add and use Program::progress

This is used when printing the program and to avoid updating register
demand during post-RA liveness analysis.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10315>
This commit is contained in:
Rhys Perry 2021-04-20 17:35:41 +01:00 committed by Marge Bot
parent 2d36232e62
commit 776ba40115
9 changed files with 47 additions and 27 deletions

View file

@ -82,10 +82,8 @@ void aco_compile_shader(unsigned shader_count,
aco::select_trap_handler_shader(program.get(), shaders[0], &config, args);
else
aco::select_program(program.get(), shader_count, shaders, &config, args);
if (args->options->dump_preoptir) {
std::cerr << "After Instruction Selection:\n";
if (args->options->dump_preoptir)
aco_print_program(program.get(), stderr);
}
aco::live live_vars;
if (!args->is_trap_handler_shader) {
@ -142,15 +140,12 @@ void aco_compile_shader(unsigned shader_count,
/* Register Allocation */
aco::register_allocation(program.get(), live_vars.live_out);
if (args->options->dump_shader) {
std::cerr << "After RA:\n";
aco_print_program(program.get(), stderr);
}
if (aco::validate_ra(program.get())) {
std::cerr << "Program after RA validation failure:\n";
aco_print_program(program.get(), stderr);
abort();
} else if (args->options->dump_shader) {
aco_print_program(program.get(), stderr);
}
validate(program.get());

View file

@ -160,6 +160,8 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info,
program->wgp_mode = wgp_mode;
program->progress = CompilationProgress::after_isel;
program->next_fp_mode.preserve_signed_zero_inf_nan32 = false;
program->next_fp_mode.preserve_signed_zero_inf_nan16_64 = false;
program->next_fp_mode.must_flush_denorms32 = false;

View file

@ -1859,6 +1859,12 @@ struct DeviceInfo {
bool sram_ecc_enabled = false;
};
enum class CompilationProgress {
after_isel,
after_spilling,
after_ra,
};
class Program final {
public:
std::vector<Block> blocks;
@ -1889,6 +1895,8 @@ public:
bool needs_vcc = false;
bool needs_flat_scr = false;
CompilationProgress progress;
bool collect_statistics = false;
uint32_t statistics[num_statistics];
@ -1983,7 +1991,7 @@ void select_trap_handler_shader(Program *program, struct nir_shader *shader,
void lower_phis(Program* program);
void calc_min_waves(Program* program);
void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
live live_var_analysis(Program* program, bool update_register_demand=true);
live live_var_analysis(Program* program);
std::vector<uint16_t> dead_code_analysis(Program *program);
void dominator_tree(Program* program);
void insert_exec_mask(Program *program);

View file

@ -82,8 +82,7 @@ RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& in
namespace {
void process_live_temps_per_block(Program *program, live& lives, Block* block,
std::set<unsigned>& worklist, std::vector<uint16_t>& phi_sgpr_ops,
bool update_register_demand)
std::set<unsigned>& worklist, std::vector<uint16_t>& phi_sgpr_ops)
{
std::vector<RegisterDemand>& register_demand = lives.register_demand[block->index];
RegisterDemand new_demand;
@ -165,7 +164,7 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block,
/* update block's register demand for a last time */
block_register_demand.update(new_demand);
if (update_register_demand)
if (program->progress < CompilationProgress::after_ra)
block->register_demand = block_register_demand;
/* handle phi definitions */
@ -365,7 +364,7 @@ void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand)
}
}
live live_var_analysis(Program* program, bool update_register_demand)
live live_var_analysis(Program* program)
{
live result;
result.live_out.resize(program->blocks.size());
@ -383,13 +382,12 @@ live live_var_analysis(Program* program, bool update_register_demand)
std::set<unsigned>::reverse_iterator b_it = worklist.rbegin();
unsigned block_idx = *b_it;
worklist.erase(block_idx);
process_live_temps_per_block(program, result, &program->blocks[block_idx], worklist,
phi_sgpr_ops, update_register_demand);
process_live_temps_per_block(program, result, &program->blocks[block_idx], worklist, phi_sgpr_ops);
new_demand.update(program->blocks[block_idx].register_demand);
}
/* calculate the program's register demand and number of waves */
if (update_register_demand)
if (program->progress < CompilationProgress::after_ra)
update_vgpr_sgpr_demand(program, new_demand);
return result;

View file

@ -906,6 +906,19 @@ void aco_print_block(const Block* block, FILE *output, unsigned flags, const liv
void aco_print_program(const Program *program, FILE *output, const live& live_vars, unsigned flags)
{
switch (program->progress) {
case CompilationProgress::after_isel:
fprintf(output, "After Instruction Selection:\n");
break;
case CompilationProgress::after_spilling:
fprintf(output, "After Spilling:\n");
flags |= print_kill;
break;
case CompilationProgress::after_ra:
fprintf(output, "After RA:\n");
break;
}
print_stage(program->stage, output);
for (Block const& block : program->blocks)

View file

@ -2625,6 +2625,8 @@ void register_allocation(Program *program, std::vector<IDSet>& live_out_per_bloc
/* num_gpr = rnd_up(max_used_gpr + 1) */
program->config->num_vgprs = get_vgpr_alloc(program, ctx.max_used_vgpr + 1);
program->config->num_sgprs = get_sgpr_alloc(program, ctx.max_used_sgpr + 1);
program->progress = CompilationProgress::after_ra;
}
}

View file

@ -1704,6 +1704,8 @@ void spill(Program* program, live& live_vars)
program->config->spilled_vgprs = 0;
program->config->spilled_sgprs = 0;
program->progress = CompilationProgress::after_spilling;
/* no spilling when register pressure is low enough */
if (program->num_waves > 0)
return;

View file

@ -681,7 +681,7 @@ bool validate_ra(Program *program) {
return false;
bool err = false;
aco::live live_vars = aco::live_var_analysis(program, false);
aco::live live_vars = aco::live_var_analysis(program);
std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size());
uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves);

View file

@ -41,14 +41,14 @@ BEGIN_TEST(isel.interp.simple)
layout(location = 0) out vec4 out_color;
void main() {
//>> v1: %a_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.w
//! v1: %a = v_interp_p2_f32 %by, %pm:m0, %a_tmp attr0.w
//! v1: %a = v_interp_p2_f32 %by, %pm:m0, (kill)%a_tmp attr0.w
//! v1: %b_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.z
//! v1: %b = v_interp_p2_f32 %by, %pm:m0, %b_tmp attr0.z
//! v1: %b = v_interp_p2_f32 %by, %pm:m0, (kill)%b_tmp attr0.z
//! v1: %g_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.y
//! v1: %g = v_interp_p2_f32 %by, %pm:m0, %g_tmp attr0.y
//! v1: %r_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.x
//! v1: %r = v_interp_p2_f32 %by, %pm:m0, %r_tmp attr0.x
//! exp %r, %g, %b, %a mrt0
//! v1: %g = v_interp_p2_f32 %by, %pm:m0, (kill)%g_tmp attr0.y
//! v1: %r_tmp = v_interp_p1_f32 (kill)%bx, %pm:m0 attr0.x
//! v1: %r = v_interp_p2_f32 (kill)%by, (kill)%pm:m0, (kill)%r_tmp attr0.x
//! exp (kill)%r, (kill)%g, (kill)%b, (kill)%a mrt0
out_color = in_color;
}
);
@ -158,13 +158,13 @@ BEGIN_TEST(isel.sparse.clause)
//; funcs['sample_res'] = lambda _: 'v#_'
//; funcs['sample_coords'] = lambda _: '[v#_, v#_, v#_, v#_]'
//>> v5: (noCSE)%zero0 = p_create_vector 0, 0, 0, 0, 0
//>> v5: %_ = image_sample_lz_o %_, %_, %zero0, %_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero0, (kill)%_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: (noCSE)%zero1 = p_create_vector 0, 0, 0, 0, 0
//>> v5: %_ = image_sample_lz_o %_, %_, %zero1, %_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero1, (kill)%_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: (noCSE)%zero2 = p_create_vector 0, 0, 0, 0, 0
//>> v5: %_ = image_sample_lz_o %_, %_, %zero2, %_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero2, (kill)%_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: (noCSE)%zero3 = p_create_vector 0, 0, 0, 0, 0
//>> v5: %_ = image_sample_lz_o %_, %_, %zero3, %_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: %_ = image_sample_lz_o (kill)%_, (kill)%_, (kill)%zero3, (kill)%_, (kill)%_, (kill)%_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> s_clause 0x3
//! image_sample_lz_o @sample_res, @sample_coords, @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
//! image_sample_lz_o @sample_res, @sample_coords, @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe