aco/optimizer: delete combine_and_subbrev

This is now done in NIR. No Foz-DB changes on Navi21.

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33761>
This commit is contained in:
Georg Lehmann 2025-02-20 16:47:27 +01:00 committed by Marge Bot
parent d272a6e261
commit 7eb43c3b1c
2 changed files with 1 additions and 93 deletions

View file

@ -3230,46 +3230,6 @@ apply_ds_extract(opt_ctx& ctx, aco_ptr<Instruction>& extract)
return true;
}
/* v_and(a, v_subbrev_co(0, 0, vcc)) -> v_cndmask(0, a, vcc) */
bool
combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
if (instr->usesModifiers())
return false;
for (unsigned i = 0; i < 2; i++) {
Instruction* op_instr = follow_operand(ctx, instr->operands[i], true);
if (op_instr && op_instr->opcode == aco_opcode::v_subbrev_co_u32 &&
op_instr->operands[0].constantEquals(0) && op_instr->operands[1].constantEquals(0) &&
!op_instr->usesModifiers()) {
aco_ptr<Instruction> new_instr;
if (instr->operands[!i].isTemp() &&
instr->operands[!i].getTemp().type() == RegType::vgpr) {
new_instr.reset(create_instruction(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1));
} else if (ctx.program->gfx_level >= GFX10 ||
(instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
new_instr.reset(
create_instruction(aco_opcode::v_cndmask_b32, asVOP3(Format::VOP2), 3, 1));
} else {
return false;
}
new_instr->operands[0] = Operand::zero();
new_instr->operands[1] = instr->operands[!i];
new_instr->operands[2] = copy_operand(ctx, op_instr->operands[2]);
new_instr->definitions[0] = instr->definitions[0];
new_instr->pass_flags = instr->pass_flags;
instr = std::move(new_instr);
decrease_uses(ctx, op_instr);
ctx.info[instr->definitions[0].tempId()].label = 0;
return true;
}
}
return false;
}
/* v_and(a, not(b)) -> v_bfi_b32(b, 0, a)
* v_or(a, not(b)) -> v_bfi_b32(b, a, -1)
*/
@ -4197,9 +4157,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
} else if (instr->opcode == aco_opcode::s_abs_i32) {
combine_sabsdiff(ctx, instr);
} else if (instr->opcode == aco_opcode::v_and_b32) {
if (combine_and_subbrev(ctx, instr)) {
} else if (combine_v_andor_not(ctx, instr)) {
}
combine_v_andor_not(ctx, instr);
} else if (instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) {
/* set existing v_fma_f32 with label_mad so we can create v_fmamk_f32/v_fmaak_f32.
* since ctx.uses[mad_info::mul_temp_id] is always 0, we don't have to worry about

View file

@ -254,56 +254,6 @@ BEGIN_TEST(optimize.output_modifiers)
finish_opt_test();
END_TEST
Temp
create_subbrev_co(Operand op0, Operand op1, Operand op2)
{
return bld.vop2_e64(aco_opcode::v_subbrev_co_u32, bld.def(v1), bld.def(bld.lm), op0, op1, op2);
}
BEGIN_TEST(optimize.cndmask)
for (unsigned i = GFX9; i <= GFX10; i++) {
//>> v1: %a, s1: %b, s2: %c = p_startpgm
if (!setup_cs("v1 s1 s2", (amd_gfx_level)i))
continue;
Temp subbrev;
//! v1: %res0 = v_cndmask_b32 0, %a, %c
//! p_unit_test 0, %res0
subbrev = create_subbrev_co(Operand::zero(), Operand::zero(), Operand(inputs[2]));
writeout(0, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), inputs[0], subbrev));
//! v1: %res1 = v_cndmask_b32 0, 42, %c
//! p_unit_test 1, %res1
subbrev = create_subbrev_co(Operand::zero(), Operand::zero(), Operand(inputs[2]));
writeout(1, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(42u), subbrev));
//~gfx9! v1: %subbrev, s2: %_ = v_subbrev_co_u32 0, 0, %c
//~gfx9! v1: %res2 = v_and_b32 %b, %subbrev
//~gfx10! v1: %res2 = v_cndmask_b32 0, %b, %c
//! p_unit_test 2, %res2
subbrev = create_subbrev_co(Operand::zero(), Operand::zero(), Operand(inputs[2]));
writeout(2, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), inputs[1], subbrev));
//! v1: %subbrev1, s2: %_ = v_subbrev_co_u32 0, 0, %c
//! v1: %xor = v_xor_b32 %a, %subbrev1
//! v1: %res3 = v_cndmask_b32 0, %xor, %c
//! p_unit_test 3, %res3
subbrev = create_subbrev_co(Operand::zero(), Operand::zero(), Operand(inputs[2]));
Temp xor_a = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), inputs[0], subbrev);
writeout(3, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), xor_a, subbrev));
//! v1: %res4 = v_cndmask_b32 0, %a, %c
//! p_unit_test 4, %res4
Temp cndmask = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
Operand::c32(1u), Operand(inputs[2]));
Temp sub = bld.vsub32(bld.def(v1), Operand::zero(), cndmask);
writeout(4, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(inputs[0]), sub));
finish_opt_test();
}
END_TEST
BEGIN_TEST(optimize.add_lshl)
for (unsigned i = GFX8; i <= GFX10; i++) {
//>> s1: %a, v1: %b = p_startpgm