aco: only set latekill in live_var_analysis

Cleaner to have this all in one place, in my opinion.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30368>
This commit is contained in:
Georg Lehmann 2024-07-25 17:15:15 +02:00 committed by Marge Bot
parent 510f5e55be
commit dedfff9dbf
8 changed files with 87 additions and 109 deletions

View file

@ -180,13 +180,8 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
if (ctx->options->gfx_level <= GFX7 || avoid_shared_vgprs) {
/* GFX6-7: there is no bpermute instruction */
Operand index_op(index);
Operand input_data(data);
index_op.setLateKill(true);
input_data.setLateKill(true);
return bld.pseudo(aco_opcode::p_bpermute_readlane, bld.def(v1), bld.def(bld.lm),
bld.def(bld.lm, vcc), index_op, input_data);
bld.def(bld.lm, vcc), index, data);
} else if (ctx->options->gfx_level >= GFX10 && ctx->program->wave_size == 64) {
/* GFX10 wave64 mode: emulate full-wave bpermute */
@ -199,11 +194,6 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
Operand same_half = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2),
index_is_lo_split.def(0).getTemp(), index_is_lo_n1);
Operand index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index);
Operand input_data(data);
index_x4.setLateKill(true);
input_data.setLateKill(true);
same_half.setLateKill(true);
if (ctx->options->gfx_level <= GFX10_3) {
/* We need one pair of shared VGPRs:
@ -212,11 +202,10 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
ctx->program->config->num_shared_vgprs = 2 * ctx->program->dev.vgpr_alloc_granule;
return bld.pseudo(aco_opcode::p_bpermute_shared_vgpr, bld.def(v1), bld.def(s2),
bld.def(s1, scc), index_x4, input_data, same_half);
bld.def(s1, scc), index_x4, data, same_half);
} else {
return bld.pseudo(aco_opcode::p_bpermute_permlane, bld.def(v1), bld.def(s2),
bld.def(s1, scc), Operand(v1.as_linear()), index_x4, input_data,
same_half);
bld.def(s1, scc), Operand(v1.as_linear()), index_x4, data, same_half);
}
} else {
/* GFX8-9 or GFX10 wave32: bpermute works normally */
@ -3610,11 +3599,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
Temp ref = get_alu_src(ctx, instr->src[0]);
Temp src = get_alu_src(ctx, instr->src[1], 2);
Temp accum = get_alu_src(ctx, instr->src[2], 4);
Builder::Result res = bld.vop3(aco_opcode::v_mqsad_u32_u8, Definition(dst), as_vgpr(ctx, src),
as_vgpr(ctx, ref), as_vgpr(ctx, accum));
res.instr->operands[0].setLateKill(true);
res.instr->operands[1].setLateKill(true);
res.instr->operands[2].setLateKill(true);
bld.vop3(aco_opcode::v_mqsad_u32_u8, Definition(dst), as_vgpr(ctx, src), as_vgpr(ctx, ref),
as_vgpr(ctx, accum));
emit_split_vector(ctx, dst, 4);
break;
}
@ -5613,13 +5599,9 @@ emit_interp_instr_gfx11(isel_context* ctx, unsigned idx, unsigned component, Tem
Builder bld(ctx->program, ctx->block);
if (in_exec_divergent_or_in_loop(ctx)) {
Operand prim_mask_op = bld.m0(prim_mask);
prim_mask_op.setLateKill(true); /* we don't want the bld.lm definition to use m0 */
Operand coord2_op(coord2);
coord2_op.setLateKill(true); /* we re-use the destination reg in the middle */
bld.pseudo(aco_opcode::p_interp_gfx11, Definition(dst), Operand(v1.as_linear()),
Operand::c32(idx), Operand::c32(component), Operand::c32(high_16bits), coord1,
coord2_op, prim_mask_op);
coord2, bld.m0(prim_mask));
return;
}
@ -5676,11 +5658,8 @@ emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src,
}
} else {
assert(!high_16bits);
Builder::Result interp_p1 = bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1,
bld.m0(prim_mask), idx, component);
if (ctx->program->dev.has_16bank_lds)
interp_p1->operands[0].setLateKill(true);
Temp interp_p1 = bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1,
bld.m0(prim_mask), idx, component);
bld.vintrp(aco_opcode::v_interp_p2_f32, Definition(dst), coord2, bld.m0(prim_mask), interp_p1,
idx, component);
@ -5696,11 +5675,9 @@ emit_interp_mov_instr(isel_context* ctx, unsigned idx, unsigned component, unsig
if (ctx->options->gfx_level >= GFX11) {
uint16_t dpp_ctrl = dpp_quad_perm(vertex_id, vertex_id, vertex_id, vertex_id);
if (in_exec_divergent_or_in_loop(ctx)) {
Operand prim_mask_op = bld.m0(prim_mask);
prim_mask_op.setLateKill(true); /* we don't want the bld.lm definition to use m0 */
bld.pseudo(aco_opcode::p_interp_gfx11, Definition(tmp), Operand(v1.as_linear()),
Operand::c32(idx), Operand::c32(component), Operand::c32(dpp_ctrl),
prim_mask_op);
bld.m0(prim_mask));
} else {
Temp p =
bld.ldsdir(aco_opcode::lds_param_load, bld.def(v1), bld.m0(prim_mask), idx, component);
@ -6189,11 +6166,8 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v
mimg->operands[0] = Operand(rsrc);
mimg->operands[1] = samp;
mimg->operands[2] = vdata;
for (unsigned i = 0; i < coords.size(); i++) {
for (unsigned i = 0; i < coords.size(); i++)
mimg->operands[3 + i] = Operand(coords[i]);
if (coords[i].regClass().is_linear_vgpr())
mimg->operands[3 + i].setLateKill(true);
}
mimg->mimg().strict_wqm = strict_wqm;
return &bld.insert(std::move(mimg))->mimg();
@ -8219,9 +8193,7 @@ create_fs_dual_src_export_gfx11(isel_context* ctx, const struct aco_export_mrt*
create_instruction(aco_opcode::p_dual_src_export_gfx11, Format::PSEUDO, 8, 6)};
for (unsigned i = 0; i < 4; i++) {
exp->operands[i] = mrt0 ? mrt0->out[i] : Operand(v1);
exp->operands[i].setLateKill(true);
exp->operands[i + 4] = mrt1 ? mrt1->out[i] : Operand(v1);
exp->operands[i + 4].setLateKill(true);
}
RegClass type = RegClass(RegType::vgpr, util_bitcount(mrt0->enabled_channels));
@ -8267,9 +8239,6 @@ visit_cmat_muladd(isel_context* ctx, nir_intrinsic_instr* instr)
Operand B(as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa)));
Operand C(as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa)));
A.setLateKill(true);
B.setLateKill(true);
VALU_instruction& vop3p = bld.vop3p(opcode, Definition(dst), A, B, C, 0, 0)->valu();
vop3p.neg_lo[0] = (signed_mask & 0x1) != 0;
vop3p.neg_lo[1] = (signed_mask & 0x2) != 0;
@ -10501,9 +10470,7 @@ visit_block(isel_context* ctx, nir_block* block)
if (ctx->block->kind & block_kind_top_level) {
Builder bld(ctx->program, ctx->block);
for (Temp tmp : ctx->unended_linear_vgprs) {
Operand op(tmp);
op.setLateKill(true);
bld.pseudo(aco_opcode::p_end_linear_vgpr, op);
bld.pseudo(aco_opcode::p_end_linear_vgpr, tmp);
}
ctx->unended_linear_vgprs.clear();
}
@ -11462,16 +11429,13 @@ add_startpgm(struct isel_context* ctx)
} else if (ctx->program->gfx_level <= GFX10_3 && ctx->program->stage != raytracing_cs) {
/* Manually initialize scratch. For RT stages scratch initialization is done in the prolog.
*/
Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset));
scratch_offset.setLateKill(true);
Operand scratch_addr = ctx->args->ring_offsets.used
? Operand(get_arg(ctx, ctx->args->ring_offsets))
: Operand(s2);
Builder bld(ctx->program, ctx->block);
bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc), scratch_addr,
scratch_offset);
get_arg(ctx, ctx->args->scratch_offset));
}
}

View file

@ -179,8 +179,13 @@ process_live_temps_per_block(live_ctx& ctx, Block* block)
ctx.program->needs_vcc |= instr_needs_vcc(insn);
insn->register_demand = RegisterDemand(new_demand.vgpr, new_demand.sgpr);
bool has_vgpr_def = false;
/* KILL */
for (Definition& definition : insn->definitions) {
has_vgpr_def |= definition.regClass().type() == RegType::vgpr &&
!definition.regClass().is_linear_vgpr();
if (!definition.isTemp()) {
continue;
}
@ -212,13 +217,39 @@ process_live_temps_per_block(live_ctx& ctx, Block* block)
if (insn->operands[op_idx].isOfType(RegType::sgpr))
insn->operands[op_idx].setLateKill(true);
}
} else if (insn->opcode == aco_opcode::p_bpermute_readlane ||
insn->opcode == aco_opcode::p_bpermute_permlane ||
insn->opcode == aco_opcode::p_bpermute_shared_vgpr ||
insn->opcode == aco_opcode::p_dual_src_export_gfx11 ||
insn->opcode == aco_opcode::v_mqsad_u32_u8) {
for (Operand& op : insn->operands)
op.setLateKill(true);
} else if (insn->opcode == aco_opcode::p_interp_gfx11) {
insn->operands.back().setLateKill(true); /* we don't want the bld.lm def to use m0 */
if (insn->operands.size() == 7)
insn->operands[5].setLateKill(true); /* we re-use the destination reg in the middle */
} else if (insn->opcode == aco_opcode::v_interp_p1_f32 && ctx.program->dev.has_16bank_lds) {
insn->operands[0].setLateKill(true);
} else if (insn->opcode == aco_opcode::p_init_scratch) {
insn->operands.back().setLateKill(true);
} else if (instr_info.classes[(int)insn->opcode] == instr_class::wmma) {
insn->operands[0].setLateKill(true);
insn->operands[1].setLateKill(true);
}
/* we need to do this in a separate loop because the next one can
* setKill() for several operands at once and we don't want to
* overwrite that in a later iteration */
for (Operand& op : insn->operands)
for (Operand& op : insn->operands) {
op.setKill(false);
/* Linear vgprs must be late kill: this is to ensure linear VGPR operands and
* normal VGPR definitions don't try to use the same register, which is problematic
* because of assignment restrictions.
*/
if (op.hasRegClass() && op.regClass().is_linear_vgpr() && !op.isUndefined() &&
has_vgpr_def)
op.setLateKill(true);
}
/* GEN */
for (unsigned i = 0; i < insn->operands.size(); ++i) {

View file

@ -64,11 +64,9 @@ setup_reduce_temp(Program* program)
aco_ptr<Instruction> end{create_instruction(
aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vtmp_inserted_at >= 0 ? 2 : 1, 0)};
end->operands[0] = Operand(reduceTmp);
end->operands[0].setLateKill(true);
if (vtmp_inserted_at >= 0) {
if (vtmp_inserted_at >= 0)
end->operands[1] = Operand(vtmp);
end->operands[1].setLateKill(true);
}
/* insert after the phis of the block */
std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi)
@ -164,16 +162,12 @@ setup_reduce_temp(Program* program)
if (instr->isReduction()) {
instr->operands[1] = Operand(reduceTmp);
instr->operands[1].setLateKill(true);
if (need_vtmp) {
if (need_vtmp)
instr->operands[2] = Operand(vtmp);
instr->operands[2].setLateKill(true);
}
} else {
assert(instr->opcode == aco_opcode::p_interp_gfx11 ||
instr->opcode == aco_opcode::p_bpermute_permlane);
instr->operands[0] = Operand(reduceTmp);
instr->operands[0].setLateKill(true);
}
}
}

View file

@ -1419,10 +1419,8 @@ end_unused_spill_vgprs(spill_ctx& ctx, Block& block, std::vector<Temp>& vgpr_spi
aco_ptr<Instruction> destr{
create_instruction(aco_opcode::p_end_linear_vgpr, Format::PSEUDO, temps.size(), 0)};
for (unsigned i = 0; i < temps.size(); i++) {
for (unsigned i = 0; i < temps.size(); i++)
destr->operands[i] = Operand(temps[i]);
destr->operands[i].setLateKill(true);
}
std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
while (is_phi(*it))
@ -1540,7 +1538,6 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
/* spill sgpr: just add the vgpr temp to operands */
Instruction* spill = create_instruction(aco_opcode::p_spill, Format::PSEUDO, 3, 0);
spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
spill->operands[0].setLateKill(true);
spill->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
spill->operands[2] = (*it)->operands[0];
instructions.emplace_back(aco_ptr<Instruction>(spill));
@ -1586,7 +1583,6 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
/* reload sgpr: just add the vgpr temp to operands */
Instruction* reload = create_instruction(aco_opcode::p_reload, Format::PSEUDO, 2, 1);
reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
reload->operands[0].setLateKill(true);
reload->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
reload->definitions[0] = (*it)->definitions[0];
instructions.emplace_back(aco_ptr<Instruction>(reload));

View file

@ -372,11 +372,6 @@ validate_ir(Program* program)
op.isUndefined())
continue;
/* Check that linear vgprs are late kill: this is to ensure linear VGPR operands and
* normal VGPR definitions don't try to use the same register, which is problematic
* because of assignment restrictions. */
check(op.isLateKill(), "Linear VGPR operands must be late kill", instr.get());
/* Only kill linear VGPRs in top-level blocks. Otherwise, we might have to move linear
* VGPRs to make space for normal ones and that isn't possible inside control flow. */
if (op.isKill()) {

View file

@ -39,7 +39,7 @@ BEGIN_TEST(d3d11_derivs.simple)
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (latekill)(kill)%wqm
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
@ -80,7 +80,7 @@ BEGIN_TEST(d3d11_derivs.constant)
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (latekill)(kill)%wqm
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
@ -151,7 +151,7 @@ BEGIN_TEST(d3d11_derivs.bias)
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, (latekill)%wqm, (kill)%bias 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (latekill)(kill)%wqm
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
@ -194,7 +194,7 @@ BEGIN_TEST(d3d11_derivs.offset)
//>> v4: %_ = image_sample_o (kill)%_, (kill)%_, v1: undef, (latekill)%wqm, (kill)%offset 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (latekill)(kill)%wqm
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
@ -239,7 +239,7 @@ BEGIN_TEST(d3d11_derivs.array)
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2darray da
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (latekill)(kill)%wqm
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_
@ -286,7 +286,7 @@ BEGIN_TEST(d3d11_derivs.bias_array)
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, (latekill)%wqm, (kill)%bias 2darray da
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (latekill)(kill)%wqm
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_
@ -331,7 +331,7 @@ BEGIN_TEST(d3d11_derivs._1d_gfx9)
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (latekill)(kill)%wqm
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
@ -373,7 +373,7 @@ BEGIN_TEST(d3d11_derivs._1d_array_gfx9)
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2darray da
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (latekill)(kill)%wqm
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.y ; $_
@ -420,7 +420,7 @@ BEGIN_TEST(d3d11_derivs.cube)
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm cube da
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (latekill)(kill)%wqm
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_
@ -467,7 +467,7 @@ BEGIN_TEST(d3d11_derivs.cube_array)
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm cube da
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (latekill)(kill)%wqm
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_cubeid_f32 v#rf, v#_, v#_, v#_ ; $_ $_
@ -548,7 +548,7 @@ BEGIN_TEST(d3d11_derivs.bc_optimize)
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (latekill)(kill)%wqm
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
END_TEST
@ -589,7 +589,7 @@ BEGIN_TEST(d3d11_derivs.get_lod)
//>> v2: %_ = image_get_lod (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (latekill)(kill)%wqm
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
END_TEST

View file

@ -33,7 +33,7 @@ BEGIN_TEST(setup_reduce_temp.divergent_if_phi)
program.get(), bld, Operand(inputs[0]),
[&]() -> void
{
//>> s1: %_, s2: %_, s1: %_:scc = p_reduce %a, (latekill)%lv, lv1: undef op:umin32 cluster_size:64
//>> s1: %_, s2: %_, s1: %_:scc = p_reduce %a, %lv, lv1: undef op:umin32 cluster_size:64
Instruction* reduce =
bld.reduction(aco_opcode::p_reduce, bld.def(s1), bld.def(bld.lm), bld.def(s1, scc),
inputs[1], Operand(v1.as_linear()), Operand(v1.as_linear()), umin32);
@ -45,7 +45,7 @@ BEGIN_TEST(setup_reduce_temp.divergent_if_phi)
});
bld.pseudo(aco_opcode::p_phi, bld.def(v1), Operand::c32(1), Operand::zero());
//>> /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
//! p_end_linear_vgpr (latekill)%lv
//! p_end_linear_vgpr %lv
finish_setup_reduce_temp_test();
END_TEST

View file

@ -297,9 +297,7 @@ END_TEST
static void
end_linear_vgpr(Temp tmp)
{
Operand op(tmp);
op.setLateKill(true);
bld.pseudo(aco_opcode::p_end_linear_vgpr, op);
bld.pseudo(aco_opcode::p_end_linear_vgpr, tmp);
}
BEGIN_TEST(regalloc.linear_vgpr.alloc.basic)
@ -308,10 +306,10 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.basic)
//>> lv1: %ltmp0:v[31] = p_start_linear_vgpr
//! lv1: %ltmp1:v[30] = p_start_linear_vgpr
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
//! p_end_linear_vgpr %ltmp0:v[31]
//! lv1: %ltmp2:v[31] = p_start_linear_vgpr
//! p_end_linear_vgpr (latekill)%ltmp1:v[30]
//! p_end_linear_vgpr (latekill)%ltmp2:v[31]
//! p_end_linear_vgpr %ltmp1:v[30]
//! p_end_linear_vgpr %ltmp2:v[31]
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
end_linear_vgpr(ltmp0);
@ -331,7 +329,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_grow)
//! lv1: %ltmp0:v[31] = p_start_linear_vgpr
//! lv1: %ltmp1:v[30] = p_start_linear_vgpr
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
//! p_end_linear_vgpr %ltmp0:v[31]
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
end_linear_vgpr(ltmp0);
@ -347,8 +345,8 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_grow)
//! lv2: %ltmp2:v[29-30] = p_start_linear_vgpr
Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v2.as_linear()));
//! p_end_linear_vgpr (latekill)%ltmp1_2:v[31]
//! p_end_linear_vgpr (latekill)%ltmp2:v[29-30]
//! p_end_linear_vgpr %ltmp1_2:v[31]
//! p_end_linear_vgpr %ltmp2:v[29-30]
end_linear_vgpr(ltmp1);
end_linear_vgpr(ltmp2);
@ -371,9 +369,9 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_shrink)
//! lv1: %ltmp2:v[29] = p_start_linear_vgpr
//! lv1: %ltmp3:v[28] = p_start_linear_vgpr
//! lv1: %ltmp4:v[27] = p_start_linear_vgpr
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
//! p_end_linear_vgpr (latekill)%ltmp2:v[29]
//! p_end_linear_vgpr (latekill)%ltmp4:v[27]
//! p_end_linear_vgpr %ltmp0:v[31]
//! p_end_linear_vgpr %ltmp2:v[29]
//! p_end_linear_vgpr %ltmp4:v[27]
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
@ -392,9 +390,9 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_shrink)
//! v28: %_:v[0-27] = p_unit_test
bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 28 * 4)));
//! p_end_linear_vgpr (latekill)%ltmp1_2:v[31]
//! p_end_linear_vgpr (latekill)%ltmp3_2:v[30]
//! p_end_linear_vgpr (latekill)%ltmp5:v[28-29]
//! p_end_linear_vgpr %ltmp1_2:v[31]
//! p_end_linear_vgpr %ltmp3_2:v[30]
//! p_end_linear_vgpr %ltmp5:v[28-29]
end_linear_vgpr(ltmp1);
end_linear_vgpr(ltmp3);
end_linear_vgpr(ltmp5);
@ -412,7 +410,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_for_normal)
//! lv1: %ltmp0:v[31] = p_start_linear_vgpr
//! lv1: %ltmp1:v[30] = p_start_linear_vgpr
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
//! p_end_linear_vgpr %ltmp0:v[31]
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
end_linear_vgpr(ltmp0);
@ -421,7 +419,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_for_normal)
//! v31: %_:v[0-30] = p_unit_test
bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 31 * 4)));
//! p_end_linear_vgpr (latekill)%ltmp1_2:v[31]
//! p_end_linear_vgpr %ltmp1_2:v[31]
end_linear_vgpr(ltmp1);
finish_ra_test(ra_test_policy{pessimistic});
@ -437,7 +435,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_for_vec)
//! lv1: %ltmp0:v[31] = p_start_linear_vgpr
//! lv1: %ltmp1:v[30] = p_start_linear_vgpr
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
//! p_end_linear_vgpr %ltmp0:v[31]
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
end_linear_vgpr(ltmp0);
@ -447,7 +445,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_for_vec)
RegClass v31 = RegClass::get(RegType::vgpr, 31 * 4);
bld.pseudo(aco_opcode::p_create_vector, bld.def(v31), Operand(v31));
//! p_end_linear_vgpr (latekill)%ltmp1_2:v[31]
//! p_end_linear_vgpr %ltmp1_2:v[31]
end_linear_vgpr(ltmp1);
finish_ra_test(ra_test_policy{pessimistic});
@ -467,7 +465,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.killed_op)
Temp tmp1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1));
//! lv1: %ltmp0:v[31] = p_start_linear_vgpr %tmp1:v[31]
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
//! p_end_linear_vgpr %ltmp0:v[31]
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()), tmp1);
end_linear_vgpr(ltmp0);
@ -494,7 +492,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.move_killed_op)
//~gfx8_optimistic! v1: %tmp1_2:v[31], v1: %tmp2_2:v[30] = p_parallelcopy %tmp1:v[30], %tmp2:v[31]
//~gfx8_pessimistic! v1: %tmp2_2:v[30], v1: %tmp1_2:v[31] = p_parallelcopy %tmp2:v[31], %tmp1:v[30]
//! lv1: %ltmp0:v[31] = p_start_linear_vgpr %tmp1_2:v[31]
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
//! p_end_linear_vgpr %ltmp0:v[31]
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()), tmp1);
end_linear_vgpr(ltmp0);
@ -514,7 +512,7 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def)
//>> lv2: %ltmp0:v[30-31] = p_start_linear_vgpr
//! lv1: %ltmp1:v[29] = p_start_linear_vgpr
//! lv1: %ltmp2:v[28] = p_start_linear_vgpr
//! p_end_linear_vgpr (latekill)%ltmp1:v[29]
//! p_end_linear_vgpr %ltmp1:v[29]
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v2.as_linear()));
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
@ -549,8 +547,8 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def)
program->blocks[2].logical_preds.push_back(1);
program->blocks[2].kind |= block_kind_top_level;
//! p_end_linear_vgpr (latekill)%ltmp0_2:v[30-31]
//! p_end_linear_vgpr (latekill)%ltmp2_2:v[29]
//! p_end_linear_vgpr %ltmp0_2:v[30-31]
//! p_end_linear_vgpr %ltmp2_2:v[29]
end_linear_vgpr(ltmp0);
end_linear_vgpr(ltmp2);
@ -578,7 +576,7 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_phis)
//>> lv1: %ltmp0:v[31] = p_start_linear_vgpr
//! lv1: %ltmp1:v[30] = p_start_linear_vgpr
//! lv1: %ltmp2:v[29] = p_start_linear_vgpr
//! p_end_linear_vgpr (latekill)%ltmp1:v[30]
//! p_end_linear_vgpr %ltmp1:v[30]
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
@ -614,8 +612,8 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_phis)
Temp tmp = bld.pseudo(aco_opcode::p_phi, bld.def(v30), Operand(v30));
bld.pseudo(aco_opcode::p_unit_test, tmp);
//! p_end_linear_vgpr (latekill)%ltmp0_2:v[31]
//! p_end_linear_vgpr (latekill)%ltmp2_2:v[30]
//! p_end_linear_vgpr %ltmp0_2:v[31]
//! p_end_linear_vgpr %ltmp2_2:v[30]
end_linear_vgpr(ltmp0);
end_linear_vgpr(ltmp2);