aco: combine a | ~b to bfi(b, a, -1)

Somehow I missed this when writing the a & ~b patch.

Foz-DB Navi21:
Totals from 1591 (1.20% of 132657) affected shaders:
Instrs: 2316379 -> 2315940 (-0.02%)
CodeSize: 12524240 -> 12528724 (+0.04%); split: -0.00%, +0.04%
Latency: 45393195 -> 45389285 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 8658991 -> 8657944 (-0.01%); split: -0.01%, +0.00%
Copies: 135777 -> 135778 (+0.00%)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24505>
This commit is contained in:
Georg Lehmann 2023-08-04 20:55:58 +02:00 committed by Marge Bot
parent 27eafbcd4e
commit 87d13ee73d

View file

@ -3588,9 +3588,11 @@ combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr)
return false; return false;
} }
/* v_and(a, not(b)) -> v_bfi_b32(b, 0, a) */ /* v_and(a, not(b)) -> v_bfi_b32(b, 0, a)
* v_or(a, not(b)) -> v_bfi_b32(b, a, -1)
*/
bool bool
combine_v_and_not(opt_ctx& ctx, aco_ptr<Instruction>& instr) combine_v_andor_not(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{ {
if (instr->usesModifiers()) if (instr->usesModifiers())
return false; return false;
@ -3606,15 +3608,20 @@ combine_v_and_not(opt_ctx& ctx, aco_ptr<Instruction>& instr)
Operand::zero(), Operand::zero(),
instr->operands[!i], instr->operands[!i],
}; };
if (instr->opcode == aco_opcode::v_or_b32) {
ops[1] = instr->operands[!i];
ops[2] = Operand::c32(-1);
}
if (!check_vop3_operands(ctx, 3, ops)) if (!check_vop3_operands(ctx, 3, ops))
continue; continue;
Instruction* new_instr = Instruction* new_instr =
create_instruction<VALU_instruction>(aco_opcode::v_bfi_b32, Format::VOP3, 3, 1); create_instruction<VALU_instruction>(aco_opcode::v_bfi_b32, Format::VOP3, 3, 1);
new_instr->operands[0] = copy_operand(ctx, op_instr->operands[0]); if (op_instr->operands[0].isTemp())
new_instr->operands[1] = Operand::zero(); ctx.uses[op_instr->operands[0].tempId()]++;
new_instr->operands[2] = instr->operands[!i]; for (unsigned j = 0; j < 3; j++)
new_instr->operands[j] = ops[j];
new_instr->definitions[0] = instr->definitions[0]; new_instr->definitions[0] = instr->definitions[0];
new_instr->pass_flags = instr->pass_flags; new_instr->pass_flags = instr->pass_flags;
instr.reset(new_instr); instr.reset(new_instr);
@ -4419,6 +4426,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
} else if (combine_three_valu_op(ctx, instr, aco_opcode::v_or_b32, aco_opcode::v_or3_b32, } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_or_b32, aco_opcode::v_or3_b32,
"012", 1 | 2)) { "012", 1 | 2)) {
} else if (combine_add_or_then_and_lshl(ctx, instr)) { } else if (combine_add_or_then_and_lshl(ctx, instr)) {
} else if (combine_v_andor_not(ctx, instr)) {
} }
} else if (instr->opcode == aco_opcode::v_xor_b32 && ctx.program->gfx_level >= GFX10) { } else if (instr->opcode == aco_opcode::v_xor_b32 && ctx.program->gfx_level >= GFX10) {
if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xor3_b32, "012", if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xor3_b32, "012",
@ -4496,7 +4504,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
combine_sabsdiff(ctx, instr); combine_sabsdiff(ctx, instr);
} else if (instr->opcode == aco_opcode::v_and_b32) { } else if (instr->opcode == aco_opcode::v_and_b32) {
if (combine_and_subbrev(ctx, instr)) { if (combine_and_subbrev(ctx, instr)) {
} else if (combine_v_and_not(ctx, instr)) { } else if (combine_v_andor_not(ctx, instr)) {
} }
} else if (instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) { } else if (instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) {
/* set existing v_fma_f32 with label_mad so we can create v_fmamk_f32/v_fmaak_f32. /* set existing v_fma_f32 with label_mad so we can create v_fmamk_f32/v_fmaak_f32.