aco/optimizer: use new helpers for bitwise n2 opts

Foz-DB Navi48:
Totals from 604 (0.73% of 82419) affected shaders:
Instrs: 2759878 -> 2758431 (-0.05%); split: -0.06%, +0.01%
CodeSize: 14801888 -> 14793412 (-0.06%); split: -0.06%, +0.01%
SpillSGPRs: 6237 -> 6233 (-0.06%)
Latency: 23509766 -> 23507853 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 7471297 -> 7471008 (-0.00%); split: -0.00%, +0.00%
Branches: 104979 -> 104977 (-0.00%)
PreSGPRs: 51506 -> 51408 (-0.19%); split: -0.20%, +0.01%
VALU: 1351564 -> 1351561 (-0.00%); split: -0.00%, +0.00%
SALU: 537430 -> 536266 (-0.22%); split: -0.23%, +0.01%
VOPD: 3834 -> 3833 (-0.03%)

Foz-DB Navi21:
Totals from 739 (0.90% of 82387) affected shaders:
Instrs: 2489644 -> 2488228 (-0.06%); split: -0.06%, +0.00%
CodeSize: 13930192 -> 13915972 (-0.10%); split: -0.11%, +0.00%
SpillSGPRs: 980 -> 976 (-0.41%)
Latency: 25027553 -> 25027845 (+0.00%); split: -0.01%, +0.01%
InvThroughput: 8591377 -> 8591097 (-0.00%); split: -0.00%, +0.00%
SClause: 78380 -> 78382 (+0.00%)
Copies: 275433 -> 275393 (-0.01%); split: -0.02%, +0.01%
Branches: 113718 -> 113716 (-0.00%)
PreSGPRs: 48377 -> 48260 (-0.24%); split: -0.27%, +0.03%
VALU: 1589250 -> 1589240 (-0.00%)
SALU: 420348 -> 418962 (-0.33%); split: -0.34%, +0.01%

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38530>
This commit is contained in:
Georg Lehmann 2024-12-16 12:27:10 +01:00 committed by Marge Bot
parent 758fe79ad5
commit ee0354e0f1

View file

@ -3469,46 +3469,6 @@ combine_salu_not_bitwise(opt_ctx& ctx, aco_ptr<Instruction>& instr)
return true;
}
/* s_and_b32(a, s_not_b32(b)) -> s_andn2_b32(a, b)
* s_or_b32(a, s_not_b32(b)) -> s_orn2_b32(a, b)
* s_and_b64(a, s_not_b64(b)) -> s_andn2_b64(a, b)
* s_or_b64(a, s_not_b64(b)) -> s_orn2_b64(a, b) */
bool
combine_salu_n2(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
if (instr->definitions[0].isTemp() && ctx.info[instr->definitions[0].tempId()].is_uniform_bool())
return false;
for (unsigned i = 0; i < 2; i++) {
Instruction* op2_instr = follow_operand(ctx, instr->operands[i]);
if (!op2_instr || (op2_instr->opcode != aco_opcode::s_not_b32 &&
op2_instr->opcode != aco_opcode::s_not_b64))
continue;
if (ctx.uses[op2_instr->definitions[1].tempId()])
continue;
if (instr->operands[!i].isLiteral() && op2_instr->operands[0].isLiteral() &&
instr->operands[!i].constantValue() != op2_instr->operands[0].constantValue())
continue;
ctx.uses[instr->operands[i].tempId()]--;
instr->operands[0] = instr->operands[!i];
instr->operands[1] = op2_instr->operands[0];
ctx.info[instr->definitions[0].tempId()].label = 0;
switch (instr->opcode) {
case aco_opcode::s_and_b32: instr->opcode = aco_opcode::s_andn2_b32; break;
case aco_opcode::s_or_b32: instr->opcode = aco_opcode::s_orn2_b32; break;
case aco_opcode::s_and_b64: instr->opcode = aco_opcode::s_andn2_b64; break;
case aco_opcode::s_or_b64: instr->opcode = aco_opcode::s_orn2_b64; break;
default: break;
}
return true;
}
return false;
}
/* s_abs_i32(s_sub_[iu]32(a, b)) -> s_absdiff_i32(a, b)
* s_abs_i32(s_add_[iu]32(a, #b)) -> s_absdiff_i32(a, -b)
*/
@ -4267,15 +4227,11 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
return;
}
if (instr->isSDWA()) {
} else if (instr->opcode == aco_opcode::v_not_b32 && ctx.program->gfx_level >= GFX10) {
if (instr->opcode == aco_opcode::v_not_b32 && ctx.program->gfx_level >= GFX10) {
combine_not_xor(ctx, instr);
} else if (instr->opcode == aco_opcode::s_not_b32 || instr->opcode == aco_opcode::s_not_b64) {
if (!combine_salu_not_bitwise(ctx, instr))
combine_inverse_comparison(ctx, instr);
} else if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_or_b32 ||
instr->opcode == aco_opcode::s_and_b64 || instr->opcode == aco_opcode::s_or_b64) {
combine_salu_n2(ctx, instr);
} else if (instr->opcode == aco_opcode::s_abs_i32) {
combine_sabsdiff(ctx, instr);
}
@ -4536,6 +4492,18 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
} else if (info.opcode == aco_opcode::v_and_b32) {
add_opt(v_not_b32, v_bfi_b32, 0x3, "10", insert_const_cb<1, 0>, true);
add_opt(s_not_b32, v_bfi_b32, 0x3, "10", insert_const_cb<1, 0>, true);
} else if (info.opcode == aco_opcode::s_and_b32) {
add_opt(s_not_b32, s_andn2_b32, 0x3, "01");
} else if (info.opcode == aco_opcode::s_and_b64) {
add_opt(s_not_b64, s_andn2_b64, 0x3, "01");
} else if (info.opcode == aco_opcode::s_or_b32) {
add_opt(s_not_b32, s_orn2_b32, 0x3, "01");
} else if (info.opcode == aco_opcode::s_or_b64) {
add_opt(s_not_b64, s_orn2_b64, 0x3, "01");
} else if (info.opcode == aco_opcode::s_xor_b32) {
add_opt(s_not_b32, s_xnor_b32, 0x3, "01");
} else if (info.opcode == aco_opcode::s_xor_b64) {
add_opt(s_not_b64, s_xnor_b64, 0x3, "01");
}
if (match_and_apply_patterns(ctx, info, patterns)) {