aco: remove Pseudo_instruction::tmp_in_scc

This information is redundant, now.

No fossil-changes.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32217>
This commit is contained in:
Daniel Schürmann 2024-11-19 10:26:26 +01:00 committed by Marge Bot
parent a1a4a6061c
commit 1ff9a0fe80
7 changed files with 24 additions and 27 deletions

View file

@ -1712,8 +1712,7 @@ struct Export_instruction : public Instruction {
static_assert(sizeof(Export_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
struct Pseudo_instruction : public Instruction {
PhysReg scratch_sgpr; /* might not be valid if it's not needed */
bool tmp_in_scc;
PhysReg scratch_sgpr; /* might not be valid if it's not needed */
bool needs_scratch_reg; /* if scratch_sgpr/scc can be written, initialized by RA. */
};
static_assert(sizeof(Pseudo_instruction) == sizeof(Instruction) + 4, "Unexpected padding");

View file

@ -1711,7 +1711,7 @@ handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx,
if (it->second.def.physReg() == scc)
writes_scc = true;
assert(!pi->tmp_in_scc || !(it->second.def.physReg() == pi->scratch_sgpr));
assert(!pi->needs_scratch_reg || it->second.def.physReg() != pi->scratch_sgpr);
/* if src and dst reg are the same, remove operation */
if (it->first == it->second.op.physReg()) {
@ -1753,7 +1753,7 @@ handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx,
}
/* first, handle paths in the location transfer graph */
bool preserve_scc = pi->tmp_in_scc && !writes_scc;
bool preserve_scc = pi->needs_scratch_reg && pi->scratch_sgpr != scc && !writes_scc;
bool skip_partial_copies = true;
for (auto it = copy_map.begin();;) {
if (copy_map.empty()) {
@ -2056,23 +2056,24 @@ handle_operands_linear_vgpr(std::map<PhysReg, copy_operation>& copy_map, lower_c
std::map<PhysReg, copy_operation> second_map(copy_map);
handle_operands(second_map, ctx, gfx_level, pi);
bool tmp_in_scc = pi->tmp_in_scc;
if (tmp_in_scc) {
bld.sop1(aco_opcode::s_mov_b32, Definition(pi->scratch_sgpr, s1), Operand(scc, s1));
pi->tmp_in_scc = false;
assert(pi->needs_scratch_reg);
PhysReg scratch_sgpr = pi->scratch_sgpr;
if (scratch_sgpr != scc) {
bld.sop1(aco_opcode::s_mov_b32, Definition(scratch_sgpr, s1), Operand(scc, s1));
pi->scratch_sgpr = scc;
}
bld.sop1(Builder::s_not, Definition(exec, bld.lm), Definition(scc, s1), Operand(exec, bld.lm));
handle_operands(copy_map, ctx, gfx_level, pi);
bld.sop1(Builder::s_not, Definition(exec, bld.lm), Definition(scc, s1), Operand(exec, bld.lm));
if (tmp_in_scc) {
bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(pi->scratch_sgpr, s1),
if (scratch_sgpr != scc) {
bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(scratch_sgpr, s1),
Operand::zero());
pi->tmp_in_scc = true;
pi->scratch_sgpr = scratch_sgpr;
}
ctx->program->statistics[aco_statistic_copies] += tmp_in_scc ? 4 : 2;
ctx->program->statistics[aco_statistic_copies] += scratch_sgpr == scc ? 2 : 4;
}
void

View file

@ -139,8 +139,6 @@ save_reg_writes(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
ctx.instr_idx_by_regs[ctx.current_block->index].begin() + r + dw_size, idx);
}
if (instr->isPseudo() && instr->pseudo().needs_scratch_reg) {
if (!instr->pseudo().tmp_in_scc)
ctx.instr_idx_by_regs[ctx.current_block->index][scc] = overwritten_unknown_instr;
ctx.instr_idx_by_regs[ctx.current_block->index][instr->pseudo().scratch_sgpr] =
overwritten_unknown_instr;
}

View file

@ -2043,7 +2043,6 @@ handle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr)
return;
instr->pseudo().needs_scratch_reg = true;
instr->pseudo().tmp_in_scc = reg_file[scc];
if (!reg_file[scc]) {
instr->pseudo().scratch_sgpr = scc;
@ -2322,7 +2321,6 @@ get_regs_for_phis(ra_ctx& ctx, Block& block, RegisterFile& register_file,
}
for (aco_ptr<Instruction>& phi : instructions) {
phi->pseudo().tmp_in_scc = register_file[scc];
phi->pseudo().scratch_sgpr = scratch_reg;
phi->pseudo().needs_scratch_reg = true;
}
@ -3000,7 +2998,6 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
handle_pseudo(ctx, tmp_file, pc.get());
} else {
pc->pseudo().needs_scratch_reg = may_swap_sgprs || linear_vgpr;
pc->pseudo().tmp_in_scc = false;
pc->pseudo().scratch_sgpr = scc;
}

View file

@ -81,8 +81,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
pc->operands[i] = phi_info.op;
i++;
}
/* this shouldn't be needed since we're only copying vgprs */
pc->pseudo().tmp_in_scc = false;
pc->pseudo().needs_scratch_reg = false;
block.instructions.insert(it, std::move(pc));
}
@ -102,7 +101,6 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
pc->operands[i] = phi_info.op;
i++;
}
pc->pseudo().tmp_in_scc = succ.instructions[0]->pseudo().tmp_in_scc;
pc->pseudo().scratch_sgpr = succ.instructions[0]->pseudo().scratch_sgpr;
pc->pseudo().needs_scratch_reg = succ.instructions[0]->pseudo().needs_scratch_reg;
auto it = std::prev(block.instructions.end());

View file

@ -554,12 +554,13 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def)
finish_ra_test(ra_test_policy());
//~gfx8_cbranch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] scc:1 scratch:s1
//~gfx8_branch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] scc:0 scratch:s253
//~gfx8_cbranch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] needs_scratch:1 scratch:s1
//~gfx8_branch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] needs_scratch:1 scratch:s253
aco_ptr<Instruction>& parallelcopy = program->blocks[0].instructions[6];
aco_print_instr(program->gfx_level, parallelcopy.get(), output);
if (parallelcopy->isPseudo()) {
fprintf(output, " scc:%u scratch:s%u\n", parallelcopy->pseudo().tmp_in_scc,
fprintf(output, " needs_scratch:%d scratch:s%u\n",
parallelcopy->pseudo().needs_scratch_reg,
parallelcopy->pseudo().scratch_sgpr.reg());
} else {
fprintf(output, "\n");

View file

@ -637,7 +637,7 @@ BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc)
Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1.as_linear()),
Operand(v1_lo, v1.as_linear()));
instr->pseudo().scratch_sgpr = m0;
instr->pseudo().tmp_in_scc = true;
instr->pseudo().needs_scratch_reg = true;
finish_to_hw_instr_test();
END_TEST
@ -660,7 +660,8 @@ BEGIN_TEST(to_hw_instr.swap_linear_vgpr)
Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear),
Definition(reg_v1, v1_linear), Operand(reg_v1, v1_linear),
Operand(reg_v0, v1_linear));
instr->pseudo().scratch_sgpr = m0;
instr->pseudo().scratch_sgpr = scc;
instr->pseudo().needs_scratch_reg = true;
finish_to_hw_instr_test();
END_TEST
@ -684,7 +685,8 @@ BEGIN_TEST(to_hw_instr.copy_linear_vgpr_v3)
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v3_linear),
Operand(reg_v4, v3_linear));
instr->pseudo().scratch_sgpr = m0;
instr->pseudo().scratch_sgpr = scc;
instr->pseudo().needs_scratch_reg = true;
finish_to_hw_instr_test();
END_TEST
@ -709,7 +711,8 @@ BEGIN_TEST(to_hw_instr.copy_linear_vgpr_coalesce)
Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear),
Definition(reg_v1, v1_linear), Operand(reg_v4, v1_linear),
Operand(reg_v5, v1_linear));
instr->pseudo().scratch_sgpr = m0;
instr->pseudo().scratch_sgpr = scc;
instr->pseudo().needs_scratch_reg = true;
finish_to_hw_instr_test();
END_TEST