From 6ecbda83f898d983abcd461d6ea79fee04f04bbd Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 20 Apr 2024 09:24:29 +0200 Subject: [PATCH] aco/ra: remove gfx6/7 subdword paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Daniel Schürmann Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 23 ++++-------- src/amd/compiler/tests/helpers.cpp | 7 +--- src/amd/compiler/tests/helpers.h | 2 +- src/amd/compiler/tests/test_regalloc.cpp | 37 -------------------- 4 files changed, 9 insertions(+), 60 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 47fefded1e5..6de9761e008 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -498,14 +498,13 @@ unsigned get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr& instr, unsigned idx, RegClass rc) { + assert(gfx_level >= GFX8); if (instr->isPseudo()) { /* v_readfirstlane_b32 cannot use SDWA */ if (instr->opcode == aco_opcode::p_as_uniform) return 4; - else if (gfx_level >= GFX8) - return rc.bytes() % 2 == 0 ? 2 : 1; else - return 4; + return rc.bytes() % 2 == 0 ? 2 : 1; } assert(rc.bytes() <= 2); @@ -608,13 +607,13 @@ get_subdword_definition_info(Program* program, const aco_ptr& instr { amd_gfx_level gfx_level = program->gfx_level; + assert(gfx_level >= GFX8); + if (instr->isPseudo()) { if (instr->opcode == aco_opcode::p_interp_gfx11) return std::make_pair(4u, 4u); - else if (gfx_level >= GFX8) - return std::make_pair(rc.bytes() % 2 == 0 ? 2 : 1, rc.bytes()); else - return std::make_pair(4, rc.size() * 4u); + return std::make_pair(rc.bytes() % 2 == 0 ? 2 : 1, rc.bytes()); } if (instr->isVALU()) { @@ -2050,16 +2049,12 @@ handle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr) } /* if all operands are constant, no need to care either */ bool reads_linear = false; - bool reads_subdword = false; for (Operand& op : instr->operands) { if (op.isTemp() && op.getTemp().regClass().is_linear()) reads_linear = true; - if (op.isTemp() && op.regClass().is_subdword()) - reads_subdword = true; } - bool needs_scratch_reg = (writes_linear && reads_linear && reg_file[scc]) || - (ctx.program->gfx_level <= GFX7 && reads_subdword); - if (!needs_scratch_reg) + + if (!writes_linear || !reads_linear || !reg_file[scc]) return; instr->pseudo().needs_scratch_reg = true; @@ -2072,10 +2067,6 @@ handle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr) reg = ctx.max_used_sgpr + 1; for (; reg < ctx.program->max_reg_demand.sgpr && reg_file[PhysReg{(unsigned)reg}]; reg++) ; - if (reg == ctx.program->max_reg_demand.sgpr) { - assert(reads_subdword && reg_file[m0] == 0); - reg = m0; - } } adjust_max_used_regs(ctx, s1, reg); diff --git a/src/amd/compiler/tests/helpers.cpp b/src/amd/compiler/tests/helpers.cpp index bada6795963..5a2b1b2999b 100644 --- a/src/amd/compiler/tests/helpers.cpp +++ b/src/amd/compiler/tests/helpers.cpp @@ -238,7 +238,7 @@ finish_lower_subdword_test() } void -finish_ra_test(ra_test_policy policy, bool lower) +finish_ra_test(ra_test_policy policy) { finish_program(program.get()); if (!aco::validate_ir(program.get())) { @@ -255,11 +255,6 @@ finish_ra_test(ra_test_policy policy, bool lower) return; } - if (lower) { - aco::ssa_elimination(program.get()); - aco::lower_to_hw_instr(program.get()); - } - aco_print_program(program.get(), output); } diff --git a/src/amd/compiler/tests/helpers.h b/src/amd/compiler/tests/helpers.h index b287c0858a6..7e27cd53231 100644 --- a/src/amd/compiler/tests/helpers.h +++ b/src/amd/compiler/tests/helpers.h @@ -71,7 +71,7 @@ void finish_validator_test(); void finish_opt_test(); void finish_setup_reduce_temp_test(); void finish_lower_subdword_test(); -void finish_ra_test(aco::ra_test_policy, bool lower = false); +void finish_ra_test(aco::ra_test_policy); void finish_optimizer_postRA_test(); void finish_to_hw_instr_test(); void finish_schedule_vopd_test(); diff --git a/src/amd/compiler/tests/test_regalloc.cpp b/src/amd/compiler/tests/test_regalloc.cpp index cef6ab71ef3..16747c0e575 100644 --- a/src/amd/compiler/tests/test_regalloc.cpp +++ b/src/amd/compiler/tests/test_regalloc.cpp @@ -172,43 +172,6 @@ BEGIN_TEST(regalloc.precolor.different_regs) finish_ra_test(ra_test_policy()); END_TEST -BEGIN_TEST(regalloc.scratch_sgpr.create_vector) - if (!setup_cs("v1 s1", GFX7)) - return; - - Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::zero()); - - //>> v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24] - //! s1: %0:s[1] = s_mov_b32 0x1000001 - //! v1: %0:v[0] = v_mul_lo_u32 %0:s[1], %_:v[0][0:8] - bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand(v3b), Operand(tmp)); - - //! p_unit_test %_:s[0] - //! s_endpgm - bld.pseudo(aco_opcode::p_unit_test, inputs[1]); - - finish_ra_test(ra_test_policy(), true); -END_TEST - -BEGIN_TEST(regalloc.scratch_sgpr.create_vector_sgpr_operand) - if (!setup_cs("v2 s1", GFX7)) - return; - - Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::c32(4u)); - - //>> v1: %0:v[0] = v_mov_b32 %_:s[0] - //! v3b: %0:v[1][0:24] = v_and_b32 0xffffff, %0:v[1][0:24] - //! s1: %0:s[1] = s_mov_b32 0x1000001 - //! v1: %0:v[1] = v_mul_lo_u32 %0:s[1], %_:v[1][0:8] - bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), inputs[1], Operand(v3b), Operand(tmp)); - - //! p_unit_test %_:s[0] - //! s_endpgm - bld.pseudo(aco_opcode::p_unit_test, inputs[1]); - - finish_ra_test(ra_test_policy(), true); -END_TEST - BEGIN_TEST(regalloc.branch_def_phis_at_merge_block) //>> p_startpgm if (!setup_cs("", GFX10))