mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-10 12:30:11 +01:00
aco: only set latekill in live_var_analysis
Cleaner to have this all in one place, in my opinion. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30368>
This commit is contained in:
parent
510f5e55be
commit
dedfff9dbf
8 changed files with 87 additions and 109 deletions
|
|
@ -180,13 +180,8 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
|
|||
|
||||
if (ctx->options->gfx_level <= GFX7 || avoid_shared_vgprs) {
|
||||
/* GFX6-7: there is no bpermute instruction */
|
||||
Operand index_op(index);
|
||||
Operand input_data(data);
|
||||
index_op.setLateKill(true);
|
||||
input_data.setLateKill(true);
|
||||
|
||||
return bld.pseudo(aco_opcode::p_bpermute_readlane, bld.def(v1), bld.def(bld.lm),
|
||||
bld.def(bld.lm, vcc), index_op, input_data);
|
||||
bld.def(bld.lm, vcc), index, data);
|
||||
} else if (ctx->options->gfx_level >= GFX10 && ctx->program->wave_size == 64) {
|
||||
|
||||
/* GFX10 wave64 mode: emulate full-wave bpermute */
|
||||
|
|
@ -199,11 +194,6 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
|
|||
Operand same_half = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2),
|
||||
index_is_lo_split.def(0).getTemp(), index_is_lo_n1);
|
||||
Operand index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index);
|
||||
Operand input_data(data);
|
||||
|
||||
index_x4.setLateKill(true);
|
||||
input_data.setLateKill(true);
|
||||
same_half.setLateKill(true);
|
||||
|
||||
if (ctx->options->gfx_level <= GFX10_3) {
|
||||
/* We need one pair of shared VGPRs:
|
||||
|
|
@ -212,11 +202,10 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
|
|||
ctx->program->config->num_shared_vgprs = 2 * ctx->program->dev.vgpr_alloc_granule;
|
||||
|
||||
return bld.pseudo(aco_opcode::p_bpermute_shared_vgpr, bld.def(v1), bld.def(s2),
|
||||
bld.def(s1, scc), index_x4, input_data, same_half);
|
||||
bld.def(s1, scc), index_x4, data, same_half);
|
||||
} else {
|
||||
return bld.pseudo(aco_opcode::p_bpermute_permlane, bld.def(v1), bld.def(s2),
|
||||
bld.def(s1, scc), Operand(v1.as_linear()), index_x4, input_data,
|
||||
same_half);
|
||||
bld.def(s1, scc), Operand(v1.as_linear()), index_x4, data, same_half);
|
||||
}
|
||||
} else {
|
||||
/* GFX8-9 or GFX10 wave32: bpermute works normally */
|
||||
|
|
@ -3610,11 +3599,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
Temp ref = get_alu_src(ctx, instr->src[0]);
|
||||
Temp src = get_alu_src(ctx, instr->src[1], 2);
|
||||
Temp accum = get_alu_src(ctx, instr->src[2], 4);
|
||||
Builder::Result res = bld.vop3(aco_opcode::v_mqsad_u32_u8, Definition(dst), as_vgpr(ctx, src),
|
||||
as_vgpr(ctx, ref), as_vgpr(ctx, accum));
|
||||
res.instr->operands[0].setLateKill(true);
|
||||
res.instr->operands[1].setLateKill(true);
|
||||
res.instr->operands[2].setLateKill(true);
|
||||
bld.vop3(aco_opcode::v_mqsad_u32_u8, Definition(dst), as_vgpr(ctx, src), as_vgpr(ctx, ref),
|
||||
as_vgpr(ctx, accum));
|
||||
emit_split_vector(ctx, dst, 4);
|
||||
break;
|
||||
}
|
||||
|
|
@ -5613,13 +5599,9 @@ emit_interp_instr_gfx11(isel_context* ctx, unsigned idx, unsigned component, Tem
|
|||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
if (in_exec_divergent_or_in_loop(ctx)) {
|
||||
Operand prim_mask_op = bld.m0(prim_mask);
|
||||
prim_mask_op.setLateKill(true); /* we don't want the bld.lm definition to use m0 */
|
||||
Operand coord2_op(coord2);
|
||||
coord2_op.setLateKill(true); /* we re-use the destination reg in the middle */
|
||||
bld.pseudo(aco_opcode::p_interp_gfx11, Definition(dst), Operand(v1.as_linear()),
|
||||
Operand::c32(idx), Operand::c32(component), Operand::c32(high_16bits), coord1,
|
||||
coord2_op, prim_mask_op);
|
||||
coord2, bld.m0(prim_mask));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -5676,11 +5658,8 @@ emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src,
|
|||
}
|
||||
} else {
|
||||
assert(!high_16bits);
|
||||
Builder::Result interp_p1 = bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1,
|
||||
bld.m0(prim_mask), idx, component);
|
||||
|
||||
if (ctx->program->dev.has_16bank_lds)
|
||||
interp_p1->operands[0].setLateKill(true);
|
||||
Temp interp_p1 = bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1,
|
||||
bld.m0(prim_mask), idx, component);
|
||||
|
||||
bld.vintrp(aco_opcode::v_interp_p2_f32, Definition(dst), coord2, bld.m0(prim_mask), interp_p1,
|
||||
idx, component);
|
||||
|
|
@ -5696,11 +5675,9 @@ emit_interp_mov_instr(isel_context* ctx, unsigned idx, unsigned component, unsig
|
|||
if (ctx->options->gfx_level >= GFX11) {
|
||||
uint16_t dpp_ctrl = dpp_quad_perm(vertex_id, vertex_id, vertex_id, vertex_id);
|
||||
if (in_exec_divergent_or_in_loop(ctx)) {
|
||||
Operand prim_mask_op = bld.m0(prim_mask);
|
||||
prim_mask_op.setLateKill(true); /* we don't want the bld.lm definition to use m0 */
|
||||
bld.pseudo(aco_opcode::p_interp_gfx11, Definition(tmp), Operand(v1.as_linear()),
|
||||
Operand::c32(idx), Operand::c32(component), Operand::c32(dpp_ctrl),
|
||||
prim_mask_op);
|
||||
bld.m0(prim_mask));
|
||||
} else {
|
||||
Temp p =
|
||||
bld.ldsdir(aco_opcode::lds_param_load, bld.def(v1), bld.m0(prim_mask), idx, component);
|
||||
|
|
@ -6189,11 +6166,8 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v
|
|||
mimg->operands[0] = Operand(rsrc);
|
||||
mimg->operands[1] = samp;
|
||||
mimg->operands[2] = vdata;
|
||||
for (unsigned i = 0; i < coords.size(); i++) {
|
||||
for (unsigned i = 0; i < coords.size(); i++)
|
||||
mimg->operands[3 + i] = Operand(coords[i]);
|
||||
if (coords[i].regClass().is_linear_vgpr())
|
||||
mimg->operands[3 + i].setLateKill(true);
|
||||
}
|
||||
mimg->mimg().strict_wqm = strict_wqm;
|
||||
|
||||
return &bld.insert(std::move(mimg))->mimg();
|
||||
|
|
@ -8219,9 +8193,7 @@ create_fs_dual_src_export_gfx11(isel_context* ctx, const struct aco_export_mrt*
|
|||
create_instruction(aco_opcode::p_dual_src_export_gfx11, Format::PSEUDO, 8, 6)};
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
exp->operands[i] = mrt0 ? mrt0->out[i] : Operand(v1);
|
||||
exp->operands[i].setLateKill(true);
|
||||
exp->operands[i + 4] = mrt1 ? mrt1->out[i] : Operand(v1);
|
||||
exp->operands[i + 4].setLateKill(true);
|
||||
}
|
||||
|
||||
RegClass type = RegClass(RegType::vgpr, util_bitcount(mrt0->enabled_channels));
|
||||
|
|
@ -8267,9 +8239,6 @@ visit_cmat_muladd(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
Operand B(as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa)));
|
||||
Operand C(as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa)));
|
||||
|
||||
A.setLateKill(true);
|
||||
B.setLateKill(true);
|
||||
|
||||
VALU_instruction& vop3p = bld.vop3p(opcode, Definition(dst), A, B, C, 0, 0)->valu();
|
||||
vop3p.neg_lo[0] = (signed_mask & 0x1) != 0;
|
||||
vop3p.neg_lo[1] = (signed_mask & 0x2) != 0;
|
||||
|
|
@ -10501,9 +10470,7 @@ visit_block(isel_context* ctx, nir_block* block)
|
|||
if (ctx->block->kind & block_kind_top_level) {
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
for (Temp tmp : ctx->unended_linear_vgprs) {
|
||||
Operand op(tmp);
|
||||
op.setLateKill(true);
|
||||
bld.pseudo(aco_opcode::p_end_linear_vgpr, op);
|
||||
bld.pseudo(aco_opcode::p_end_linear_vgpr, tmp);
|
||||
}
|
||||
ctx->unended_linear_vgprs.clear();
|
||||
}
|
||||
|
|
@ -11462,16 +11429,13 @@ add_startpgm(struct isel_context* ctx)
|
|||
} else if (ctx->program->gfx_level <= GFX10_3 && ctx->program->stage != raytracing_cs) {
|
||||
/* Manually initialize scratch. For RT stages scratch initialization is done in the prolog.
|
||||
*/
|
||||
Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset));
|
||||
scratch_offset.setLateKill(true);
|
||||
|
||||
Operand scratch_addr = ctx->args->ring_offsets.used
|
||||
? Operand(get_arg(ctx, ctx->args->ring_offsets))
|
||||
: Operand(s2);
|
||||
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc), scratch_addr,
|
||||
scratch_offset);
|
||||
get_arg(ctx, ctx->args->scratch_offset));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -179,8 +179,13 @@ process_live_temps_per_block(live_ctx& ctx, Block* block)
|
|||
ctx.program->needs_vcc |= instr_needs_vcc(insn);
|
||||
insn->register_demand = RegisterDemand(new_demand.vgpr, new_demand.sgpr);
|
||||
|
||||
bool has_vgpr_def = false;
|
||||
|
||||
/* KILL */
|
||||
for (Definition& definition : insn->definitions) {
|
||||
has_vgpr_def |= definition.regClass().type() == RegType::vgpr &&
|
||||
!definition.regClass().is_linear_vgpr();
|
||||
|
||||
if (!definition.isTemp()) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -212,13 +217,39 @@ process_live_temps_per_block(live_ctx& ctx, Block* block)
|
|||
if (insn->operands[op_idx].isOfType(RegType::sgpr))
|
||||
insn->operands[op_idx].setLateKill(true);
|
||||
}
|
||||
} else if (insn->opcode == aco_opcode::p_bpermute_readlane ||
|
||||
insn->opcode == aco_opcode::p_bpermute_permlane ||
|
||||
insn->opcode == aco_opcode::p_bpermute_shared_vgpr ||
|
||||
insn->opcode == aco_opcode::p_dual_src_export_gfx11 ||
|
||||
insn->opcode == aco_opcode::v_mqsad_u32_u8) {
|
||||
for (Operand& op : insn->operands)
|
||||
op.setLateKill(true);
|
||||
} else if (insn->opcode == aco_opcode::p_interp_gfx11) {
|
||||
insn->operands.back().setLateKill(true); /* we don't want the bld.lm def to use m0 */
|
||||
if (insn->operands.size() == 7)
|
||||
insn->operands[5].setLateKill(true); /* we re-use the destination reg in the middle */
|
||||
} else if (insn->opcode == aco_opcode::v_interp_p1_f32 && ctx.program->dev.has_16bank_lds) {
|
||||
insn->operands[0].setLateKill(true);
|
||||
} else if (insn->opcode == aco_opcode::p_init_scratch) {
|
||||
insn->operands.back().setLateKill(true);
|
||||
} else if (instr_info.classes[(int)insn->opcode] == instr_class::wmma) {
|
||||
insn->operands[0].setLateKill(true);
|
||||
insn->operands[1].setLateKill(true);
|
||||
}
|
||||
|
||||
/* we need to do this in a separate loop because the next one can
|
||||
* setKill() for several operands at once and we don't want to
|
||||
* overwrite that in a later iteration */
|
||||
for (Operand& op : insn->operands)
|
||||
for (Operand& op : insn->operands) {
|
||||
op.setKill(false);
|
||||
/* Linear vgprs must be late kill: this is to ensure linear VGPR operands and
|
||||
* normal VGPR definitions don't try to use the same register, which is problematic
|
||||
* because of assignment restrictions.
|
||||
*/
|
||||
if (op.hasRegClass() && op.regClass().is_linear_vgpr() && !op.isUndefined() &&
|
||||
has_vgpr_def)
|
||||
op.setLateKill(true);
|
||||
}
|
||||
|
||||
/* GEN */
|
||||
for (unsigned i = 0; i < insn->operands.size(); ++i) {
|
||||
|
|
|
|||
|
|
@ -64,11 +64,9 @@ setup_reduce_temp(Program* program)
|
|||
aco_ptr<Instruction> end{create_instruction(
|
||||
aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vtmp_inserted_at >= 0 ? 2 : 1, 0)};
|
||||
end->operands[0] = Operand(reduceTmp);
|
||||
end->operands[0].setLateKill(true);
|
||||
if (vtmp_inserted_at >= 0) {
|
||||
if (vtmp_inserted_at >= 0)
|
||||
end->operands[1] = Operand(vtmp);
|
||||
end->operands[1].setLateKill(true);
|
||||
}
|
||||
|
||||
/* insert after the phis of the block */
|
||||
std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
|
||||
while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi)
|
||||
|
|
@ -164,16 +162,12 @@ setup_reduce_temp(Program* program)
|
|||
|
||||
if (instr->isReduction()) {
|
||||
instr->operands[1] = Operand(reduceTmp);
|
||||
instr->operands[1].setLateKill(true);
|
||||
if (need_vtmp) {
|
||||
if (need_vtmp)
|
||||
instr->operands[2] = Operand(vtmp);
|
||||
instr->operands[2].setLateKill(true);
|
||||
}
|
||||
} else {
|
||||
assert(instr->opcode == aco_opcode::p_interp_gfx11 ||
|
||||
instr->opcode == aco_opcode::p_bpermute_permlane);
|
||||
instr->operands[0] = Operand(reduceTmp);
|
||||
instr->operands[0].setLateKill(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1419,10 +1419,8 @@ end_unused_spill_vgprs(spill_ctx& ctx, Block& block, std::vector<Temp>& vgpr_spi
|
|||
|
||||
aco_ptr<Instruction> destr{
|
||||
create_instruction(aco_opcode::p_end_linear_vgpr, Format::PSEUDO, temps.size(), 0)};
|
||||
for (unsigned i = 0; i < temps.size(); i++) {
|
||||
for (unsigned i = 0; i < temps.size(); i++)
|
||||
destr->operands[i] = Operand(temps[i]);
|
||||
destr->operands[i].setLateKill(true);
|
||||
}
|
||||
|
||||
std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
|
||||
while (is_phi(*it))
|
||||
|
|
@ -1540,7 +1538,6 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
|
|||
/* spill sgpr: just add the vgpr temp to operands */
|
||||
Instruction* spill = create_instruction(aco_opcode::p_spill, Format::PSEUDO, 3, 0);
|
||||
spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
|
||||
spill->operands[0].setLateKill(true);
|
||||
spill->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
|
||||
spill->operands[2] = (*it)->operands[0];
|
||||
instructions.emplace_back(aco_ptr<Instruction>(spill));
|
||||
|
|
@ -1586,7 +1583,6 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
|
|||
/* reload sgpr: just add the vgpr temp to operands */
|
||||
Instruction* reload = create_instruction(aco_opcode::p_reload, Format::PSEUDO, 2, 1);
|
||||
reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
|
||||
reload->operands[0].setLateKill(true);
|
||||
reload->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
|
||||
reload->definitions[0] = (*it)->definitions[0];
|
||||
instructions.emplace_back(aco_ptr<Instruction>(reload));
|
||||
|
|
|
|||
|
|
@ -372,11 +372,6 @@ validate_ir(Program* program)
|
|||
op.isUndefined())
|
||||
continue;
|
||||
|
||||
/* Check that linear vgprs are late kill: this is to ensure linear VGPR operands and
|
||||
* normal VGPR definitions don't try to use the same register, which is problematic
|
||||
* because of assignment restrictions. */
|
||||
check(op.isLateKill(), "Linear VGPR operands must be late kill", instr.get());
|
||||
|
||||
/* Only kill linear VGPRs in top-level blocks. Otherwise, we might have to move linear
|
||||
* VGPRs to make space for normal ones and that isn't possible inside control flow. */
|
||||
if (op.isKill()) {
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ BEGIN_TEST(d3d11_derivs.simple)
|
|||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
|
|
@ -80,7 +80,7 @@ BEGIN_TEST(d3d11_derivs.constant)
|
|||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
|
|
@ -151,7 +151,7 @@ BEGIN_TEST(d3d11_derivs.bias)
|
|||
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, (latekill)%wqm, (kill)%bias 2d
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
|
|
@ -194,7 +194,7 @@ BEGIN_TEST(d3d11_derivs.offset)
|
|||
//>> v4: %_ = image_sample_o (kill)%_, (kill)%_, v1: undef, (latekill)%wqm, (kill)%offset 2d
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
|
|
@ -239,7 +239,7 @@ BEGIN_TEST(d3d11_derivs.array)
|
|||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2darray da
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_
|
||||
|
|
@ -286,7 +286,7 @@ BEGIN_TEST(d3d11_derivs.bias_array)
|
|||
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, (latekill)%wqm, (kill)%bias 2darray da
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_
|
||||
|
|
@ -331,7 +331,7 @@ BEGIN_TEST(d3d11_derivs._1d_gfx9)
|
|||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
|
|
@ -373,7 +373,7 @@ BEGIN_TEST(d3d11_derivs._1d_array_gfx9)
|
|||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2darray da
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.y ; $_
|
||||
|
|
@ -420,7 +420,7 @@ BEGIN_TEST(d3d11_derivs.cube)
|
|||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm cube da
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_
|
||||
|
|
@ -467,7 +467,7 @@ BEGIN_TEST(d3d11_derivs.cube_array)
|
|||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm cube da
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_cubeid_f32 v#rf, v#_, v#_, v#_ ; $_ $_
|
||||
|
|
@ -548,7 +548,7 @@ BEGIN_TEST(d3d11_derivs.bc_optimize)
|
|||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
END_TEST
|
||||
|
||||
|
|
@ -589,7 +589,7 @@ BEGIN_TEST(d3d11_derivs.get_lod)
|
|||
//>> v2: %_ = image_get_lod (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
END_TEST
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ BEGIN_TEST(setup_reduce_temp.divergent_if_phi)
|
|||
program.get(), bld, Operand(inputs[0]),
|
||||
[&]() -> void
|
||||
{
|
||||
//>> s1: %_, s2: %_, s1: %_:scc = p_reduce %a, (latekill)%lv, lv1: undef op:umin32 cluster_size:64
|
||||
//>> s1: %_, s2: %_, s1: %_:scc = p_reduce %a, %lv, lv1: undef op:umin32 cluster_size:64
|
||||
Instruction* reduce =
|
||||
bld.reduction(aco_opcode::p_reduce, bld.def(s1), bld.def(bld.lm), bld.def(s1, scc),
|
||||
inputs[1], Operand(v1.as_linear()), Operand(v1.as_linear()), umin32);
|
||||
|
|
@ -45,7 +45,7 @@ BEGIN_TEST(setup_reduce_temp.divergent_if_phi)
|
|||
});
|
||||
bld.pseudo(aco_opcode::p_phi, bld.def(v1), Operand::c32(1), Operand::zero());
|
||||
//>> /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
|
||||
//! p_end_linear_vgpr (latekill)%lv
|
||||
//! p_end_linear_vgpr %lv
|
||||
|
||||
finish_setup_reduce_temp_test();
|
||||
END_TEST
|
||||
|
|
|
|||
|
|
@ -297,9 +297,7 @@ END_TEST
|
|||
static void
|
||||
end_linear_vgpr(Temp tmp)
|
||||
{
|
||||
Operand op(tmp);
|
||||
op.setLateKill(true);
|
||||
bld.pseudo(aco_opcode::p_end_linear_vgpr, op);
|
||||
bld.pseudo(aco_opcode::p_end_linear_vgpr, tmp);
|
||||
}
|
||||
|
||||
BEGIN_TEST(regalloc.linear_vgpr.alloc.basic)
|
||||
|
|
@ -308,10 +306,10 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.basic)
|
|||
|
||||
//>> lv1: %ltmp0:v[31] = p_start_linear_vgpr
|
||||
//! lv1: %ltmp1:v[30] = p_start_linear_vgpr
|
||||
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
|
||||
//! p_end_linear_vgpr %ltmp0:v[31]
|
||||
//! lv1: %ltmp2:v[31] = p_start_linear_vgpr
|
||||
//! p_end_linear_vgpr (latekill)%ltmp1:v[30]
|
||||
//! p_end_linear_vgpr (latekill)%ltmp2:v[31]
|
||||
//! p_end_linear_vgpr %ltmp1:v[30]
|
||||
//! p_end_linear_vgpr %ltmp2:v[31]
|
||||
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
end_linear_vgpr(ltmp0);
|
||||
|
|
@ -331,7 +329,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_grow)
|
|||
|
||||
//! lv1: %ltmp0:v[31] = p_start_linear_vgpr
|
||||
//! lv1: %ltmp1:v[30] = p_start_linear_vgpr
|
||||
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
|
||||
//! p_end_linear_vgpr %ltmp0:v[31]
|
||||
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
end_linear_vgpr(ltmp0);
|
||||
|
|
@ -347,8 +345,8 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_grow)
|
|||
//! lv2: %ltmp2:v[29-30] = p_start_linear_vgpr
|
||||
Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v2.as_linear()));
|
||||
|
||||
//! p_end_linear_vgpr (latekill)%ltmp1_2:v[31]
|
||||
//! p_end_linear_vgpr (latekill)%ltmp2:v[29-30]
|
||||
//! p_end_linear_vgpr %ltmp1_2:v[31]
|
||||
//! p_end_linear_vgpr %ltmp2:v[29-30]
|
||||
end_linear_vgpr(ltmp1);
|
||||
end_linear_vgpr(ltmp2);
|
||||
|
||||
|
|
@ -371,9 +369,9 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_shrink)
|
|||
//! lv1: %ltmp2:v[29] = p_start_linear_vgpr
|
||||
//! lv1: %ltmp3:v[28] = p_start_linear_vgpr
|
||||
//! lv1: %ltmp4:v[27] = p_start_linear_vgpr
|
||||
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
|
||||
//! p_end_linear_vgpr (latekill)%ltmp2:v[29]
|
||||
//! p_end_linear_vgpr (latekill)%ltmp4:v[27]
|
||||
//! p_end_linear_vgpr %ltmp0:v[31]
|
||||
//! p_end_linear_vgpr %ltmp2:v[29]
|
||||
//! p_end_linear_vgpr %ltmp4:v[27]
|
||||
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
|
|
@ -392,9 +390,9 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_shrink)
|
|||
//! v28: %_:v[0-27] = p_unit_test
|
||||
bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 28 * 4)));
|
||||
|
||||
//! p_end_linear_vgpr (latekill)%ltmp1_2:v[31]
|
||||
//! p_end_linear_vgpr (latekill)%ltmp3_2:v[30]
|
||||
//! p_end_linear_vgpr (latekill)%ltmp5:v[28-29]
|
||||
//! p_end_linear_vgpr %ltmp1_2:v[31]
|
||||
//! p_end_linear_vgpr %ltmp3_2:v[30]
|
||||
//! p_end_linear_vgpr %ltmp5:v[28-29]
|
||||
end_linear_vgpr(ltmp1);
|
||||
end_linear_vgpr(ltmp3);
|
||||
end_linear_vgpr(ltmp5);
|
||||
|
|
@ -412,7 +410,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_for_normal)
|
|||
|
||||
//! lv1: %ltmp0:v[31] = p_start_linear_vgpr
|
||||
//! lv1: %ltmp1:v[30] = p_start_linear_vgpr
|
||||
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
|
||||
//! p_end_linear_vgpr %ltmp0:v[31]
|
||||
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
end_linear_vgpr(ltmp0);
|
||||
|
|
@ -421,7 +419,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_for_normal)
|
|||
//! v31: %_:v[0-30] = p_unit_test
|
||||
bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 31 * 4)));
|
||||
|
||||
//! p_end_linear_vgpr (latekill)%ltmp1_2:v[31]
|
||||
//! p_end_linear_vgpr %ltmp1_2:v[31]
|
||||
end_linear_vgpr(ltmp1);
|
||||
|
||||
finish_ra_test(ra_test_policy{pessimistic});
|
||||
|
|
@ -437,7 +435,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_for_vec)
|
|||
|
||||
//! lv1: %ltmp0:v[31] = p_start_linear_vgpr
|
||||
//! lv1: %ltmp1:v[30] = p_start_linear_vgpr
|
||||
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
|
||||
//! p_end_linear_vgpr %ltmp0:v[31]
|
||||
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
end_linear_vgpr(ltmp0);
|
||||
|
|
@ -447,7 +445,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_for_vec)
|
|||
RegClass v31 = RegClass::get(RegType::vgpr, 31 * 4);
|
||||
bld.pseudo(aco_opcode::p_create_vector, bld.def(v31), Operand(v31));
|
||||
|
||||
//! p_end_linear_vgpr (latekill)%ltmp1_2:v[31]
|
||||
//! p_end_linear_vgpr %ltmp1_2:v[31]
|
||||
end_linear_vgpr(ltmp1);
|
||||
|
||||
finish_ra_test(ra_test_policy{pessimistic});
|
||||
|
|
@ -467,7 +465,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.killed_op)
|
|||
Temp tmp1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1));
|
||||
|
||||
//! lv1: %ltmp0:v[31] = p_start_linear_vgpr %tmp1:v[31]
|
||||
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
|
||||
//! p_end_linear_vgpr %ltmp0:v[31]
|
||||
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()), tmp1);
|
||||
end_linear_vgpr(ltmp0);
|
||||
|
||||
|
|
@ -494,7 +492,7 @@ BEGIN_TEST(regalloc.linear_vgpr.alloc.move_killed_op)
|
|||
//~gfx8_optimistic! v1: %tmp1_2:v[31], v1: %tmp2_2:v[30] = p_parallelcopy %tmp1:v[30], %tmp2:v[31]
|
||||
//~gfx8_pessimistic! v1: %tmp2_2:v[30], v1: %tmp1_2:v[31] = p_parallelcopy %tmp2:v[31], %tmp1:v[30]
|
||||
//! lv1: %ltmp0:v[31] = p_start_linear_vgpr %tmp1_2:v[31]
|
||||
//! p_end_linear_vgpr (latekill)%ltmp0:v[31]
|
||||
//! p_end_linear_vgpr %ltmp0:v[31]
|
||||
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()), tmp1);
|
||||
end_linear_vgpr(ltmp0);
|
||||
|
||||
|
|
@ -514,7 +512,7 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def)
|
|||
//>> lv2: %ltmp0:v[30-31] = p_start_linear_vgpr
|
||||
//! lv1: %ltmp1:v[29] = p_start_linear_vgpr
|
||||
//! lv1: %ltmp2:v[28] = p_start_linear_vgpr
|
||||
//! p_end_linear_vgpr (latekill)%ltmp1:v[29]
|
||||
//! p_end_linear_vgpr %ltmp1:v[29]
|
||||
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v2.as_linear()));
|
||||
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
|
|
@ -549,8 +547,8 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def)
|
|||
program->blocks[2].logical_preds.push_back(1);
|
||||
program->blocks[2].kind |= block_kind_top_level;
|
||||
|
||||
//! p_end_linear_vgpr (latekill)%ltmp0_2:v[30-31]
|
||||
//! p_end_linear_vgpr (latekill)%ltmp2_2:v[29]
|
||||
//! p_end_linear_vgpr %ltmp0_2:v[30-31]
|
||||
//! p_end_linear_vgpr %ltmp2_2:v[29]
|
||||
end_linear_vgpr(ltmp0);
|
||||
end_linear_vgpr(ltmp2);
|
||||
|
||||
|
|
@ -578,7 +576,7 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_phis)
|
|||
//>> lv1: %ltmp0:v[31] = p_start_linear_vgpr
|
||||
//! lv1: %ltmp1:v[30] = p_start_linear_vgpr
|
||||
//! lv1: %ltmp2:v[29] = p_start_linear_vgpr
|
||||
//! p_end_linear_vgpr (latekill)%ltmp1:v[30]
|
||||
//! p_end_linear_vgpr %ltmp1:v[30]
|
||||
Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
|
||||
|
|
@ -614,8 +612,8 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_phis)
|
|||
Temp tmp = bld.pseudo(aco_opcode::p_phi, bld.def(v30), Operand(v30));
|
||||
bld.pseudo(aco_opcode::p_unit_test, tmp);
|
||||
|
||||
//! p_end_linear_vgpr (latekill)%ltmp0_2:v[31]
|
||||
//! p_end_linear_vgpr (latekill)%ltmp2_2:v[30]
|
||||
//! p_end_linear_vgpr %ltmp0_2:v[31]
|
||||
//! p_end_linear_vgpr %ltmp2_2:v[30]
|
||||
end_linear_vgpr(ltmp0);
|
||||
end_linear_vgpr(ltmp2);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue