diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp index e3f1986d9f1..20cb04c82a0 100644 --- a/src/amd/compiler/aco_interface.cpp +++ b/src/amd/compiler/aco_interface.cpp @@ -82,10 +82,8 @@ void aco_compile_shader(unsigned shader_count, aco::select_trap_handler_shader(program.get(), shaders[0], &config, args); else aco::select_program(program.get(), shader_count, shaders, &config, args); - if (args->options->dump_preoptir) { - std::cerr << "After Instruction Selection:\n"; + if (args->options->dump_preoptir) aco_print_program(program.get(), stderr); - } aco::live live_vars; if (!args->is_trap_handler_shader) { @@ -142,15 +140,12 @@ void aco_compile_shader(unsigned shader_count, /* Register Allocation */ aco::register_allocation(program.get(), live_vars.live_out); - if (args->options->dump_shader) { - std::cerr << "After RA:\n"; - aco_print_program(program.get(), stderr); - } if (aco::validate_ra(program.get())) { - std::cerr << "Program after RA validation failure:\n"; aco_print_program(program.get(), stderr); abort(); + } else if (args->options->dump_shader) { + aco_print_program(program.get(), stderr); } validate(program.get()); diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 01de6998ce4..36855a84c97 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -160,6 +160,8 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info, program->wgp_mode = wgp_mode; + program->progress = CompilationProgress::after_isel; + program->next_fp_mode.preserve_signed_zero_inf_nan32 = false; program->next_fp_mode.preserve_signed_zero_inf_nan16_64 = false; program->next_fp_mode.must_flush_denorms32 = false; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 89ff0b96273..2013567f993 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1859,6 +1859,12 @@ struct DeviceInfo { bool sram_ecc_enabled = false; }; +enum class CompilationProgress { + after_isel, + after_spilling, + after_ra, +}; + class Program final { public: std::vector blocks; @@ -1889,6 +1895,8 @@ public: bool needs_vcc = false; bool needs_flat_scr = false; + CompilationProgress progress; + bool collect_statistics = false; uint32_t statistics[num_statistics]; @@ -1983,7 +1991,7 @@ void select_trap_handler_shader(Program *program, struct nir_shader *shader, void lower_phis(Program* program); void calc_min_waves(Program* program); void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand); -live live_var_analysis(Program* program, bool update_register_demand=true); +live live_var_analysis(Program* program); std::vector dead_code_analysis(Program *program); void dominator_tree(Program* program); void insert_exec_mask(Program *program); diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 86a49ecf6b0..e9d1567e599 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -82,8 +82,7 @@ RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr& in namespace { void process_live_temps_per_block(Program *program, live& lives, Block* block, - std::set& worklist, std::vector& phi_sgpr_ops, - bool update_register_demand) + std::set& worklist, std::vector& phi_sgpr_ops) { std::vector& register_demand = lives.register_demand[block->index]; RegisterDemand new_demand; @@ -165,7 +164,7 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, /* update block's register demand for a last time */ block_register_demand.update(new_demand); - if (update_register_demand) + if (program->progress < CompilationProgress::after_ra) block->register_demand = block_register_demand; /* handle phi definitions */ @@ -365,7 +364,7 @@ void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand) } } -live live_var_analysis(Program* program, bool update_register_demand) +live live_var_analysis(Program* program) { live result; result.live_out.resize(program->blocks.size()); @@ -383,13 +382,12 @@ live live_var_analysis(Program* program, bool update_register_demand) std::set::reverse_iterator b_it = worklist.rbegin(); unsigned block_idx = *b_it; worklist.erase(block_idx); - process_live_temps_per_block(program, result, &program->blocks[block_idx], worklist, - phi_sgpr_ops, update_register_demand); + process_live_temps_per_block(program, result, &program->blocks[block_idx], worklist, phi_sgpr_ops); new_demand.update(program->blocks[block_idx].register_demand); } /* calculate the program's register demand and number of waves */ - if (update_register_demand) + if (program->progress < CompilationProgress::after_ra) update_vgpr_sgpr_demand(program, new_demand); return result; diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index ba2925e97ef..47a3e98e9c2 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -906,6 +906,19 @@ void aco_print_block(const Block* block, FILE *output, unsigned flags, const liv void aco_print_program(const Program *program, FILE *output, const live& live_vars, unsigned flags) { + switch (program->progress) { + case CompilationProgress::after_isel: + fprintf(output, "After Instruction Selection:\n"); + break; + case CompilationProgress::after_spilling: + fprintf(output, "After Spilling:\n"); + flags |= print_kill; + break; + case CompilationProgress::after_ra: + fprintf(output, "After RA:\n"); + break; + } + print_stage(program->stage, output); for (Block const& block : program->blocks) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 413d3f06172..d4b5cc90fef 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2625,6 +2625,8 @@ void register_allocation(Program *program, std::vector& live_out_per_bloc /* num_gpr = rnd_up(max_used_gpr + 1) */ program->config->num_vgprs = get_vgpr_alloc(program, ctx.max_used_vgpr + 1); program->config->num_sgprs = get_sgpr_alloc(program, ctx.max_used_sgpr + 1); + + program->progress = CompilationProgress::after_ra; } } diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index af21128d3cb..5df0fbc63a4 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -1704,6 +1704,8 @@ void spill(Program* program, live& live_vars) program->config->spilled_vgprs = 0; program->config->spilled_sgprs = 0; + program->progress = CompilationProgress::after_spilling; + /* no spilling when register pressure is low enough */ if (program->num_waves > 0) return; diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index e5c44d6dabd..ee6e4c7a2e6 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -681,7 +681,7 @@ bool validate_ra(Program *program) { return false; bool err = false; - aco::live live_vars = aco::live_var_analysis(program, false); + aco::live live_vars = aco::live_var_analysis(program); std::vector> phi_sgpr_ops(program->blocks.size()); uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves); diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index 15ef8c81400..9c11a07e7f9 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -41,14 +41,14 @@ BEGIN_TEST(isel.interp.simple) layout(location = 0) out vec4 out_color; void main() { //>> v1: %a_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.w - //! v1: %a = v_interp_p2_f32 %by, %pm:m0, %a_tmp attr0.w + //! v1: %a = v_interp_p2_f32 %by, %pm:m0, (kill)%a_tmp attr0.w //! v1: %b_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.z - //! v1: %b = v_interp_p2_f32 %by, %pm:m0, %b_tmp attr0.z + //! v1: %b = v_interp_p2_f32 %by, %pm:m0, (kill)%b_tmp attr0.z //! v1: %g_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.y - //! v1: %g = v_interp_p2_f32 %by, %pm:m0, %g_tmp attr0.y - //! v1: %r_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.x - //! v1: %r = v_interp_p2_f32 %by, %pm:m0, %r_tmp attr0.x - //! exp %r, %g, %b, %a mrt0 + //! v1: %g = v_interp_p2_f32 %by, %pm:m0, (kill)%g_tmp attr0.y + //! v1: %r_tmp = v_interp_p1_f32 (kill)%bx, %pm:m0 attr0.x + //! v1: %r = v_interp_p2_f32 (kill)%by, (kill)%pm:m0, (kill)%r_tmp attr0.x + //! exp (kill)%r, (kill)%g, (kill)%b, (kill)%a mrt0 out_color = in_color; } ); @@ -158,13 +158,13 @@ BEGIN_TEST(isel.sparse.clause) //; funcs['sample_res'] = lambda _: 'v#_' //; funcs['sample_coords'] = lambda _: '[v#_, v#_, v#_, v#_]' //>> v5: (noCSE)%zero0 = p_create_vector 0, 0, 0, 0, 0 - //>> v5: %_ = image_sample_lz_o %_, %_, %zero0, %_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation + //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero0, (kill)%_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation //>> v5: (noCSE)%zero1 = p_create_vector 0, 0, 0, 0, 0 - //>> v5: %_ = image_sample_lz_o %_, %_, %zero1, %_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation + //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero1, (kill)%_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation //>> v5: (noCSE)%zero2 = p_create_vector 0, 0, 0, 0, 0 - //>> v5: %_ = image_sample_lz_o %_, %_, %zero2, %_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation + //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero2, (kill)%_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation //>> v5: (noCSE)%zero3 = p_create_vector 0, 0, 0, 0, 0 - //>> v5: %_ = image_sample_lz_o %_, %_, %zero3, %_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation + //>> v5: %_ = image_sample_lz_o (kill)%_, (kill)%_, (kill)%zero3, (kill)%_, (kill)%_, (kill)%_ dmask:xyzw 2d tfe storage: semantics: scope:invocation //>> s_clause 0x3 //! image_sample_lz_o @sample_res, @sample_coords, @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe //! image_sample_lz_o @sample_res, @sample_coords, @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe