aco/optimizer: use new helpers for s_lshl<n>_add_u32

Foz-DB Navi48:
Totals from 7654 (9.29% of 82419) affected shaders:
Instrs: 6170479 -> 6174536 (+0.07%); split: -0.07%, +0.13%
CodeSize: 32489580 -> 32500100 (+0.03%); split: -0.07%, +0.10%
SpillSGPRs: 4253 -> 4224 (-0.68%); split: -0.71%, +0.02%
Latency: 60472662 -> 60489681 (+0.03%); split: -0.02%, +0.04%
InvThroughput: 9218099 -> 9218149 (+0.00%); split: -0.01%, +0.01%
VClause: 121094 -> 121089 (-0.00%); split: -0.01%, +0.00%
SClause: 178092 -> 179830 (+0.98%); split: -0.55%, +1.53%
Copies: 424495 -> 423756 (-0.17%); split: -0.57%, +0.40%
Branches: 120352 -> 120353 (+0.00%); split: -0.01%, +0.01%
PreSGPRs: 334391 -> 333381 (-0.30%); split: -0.33%, +0.02%
VALU: 3349394 -> 3349323 (-0.00%); split: -0.00%, +0.00%
SALU: 957913 -> 957149 (-0.08%); split: -0.25%, +0.17%
VOPD: 9177 -> 9179 (+0.02%); split: +0.03%, -0.01%

Foz-DB Navi21:
Totals from 7649 (9.28% of 82387) affected shaders:
Instrs: 6144605 -> 6143005 (-0.03%); split: -0.06%, +0.04%
CodeSize: 32685976 -> 32672380 (-0.04%); split: -0.08%, +0.04%
SpillSGPRs: 3079 -> 3067 (-0.39%); split: -0.42%, +0.03%
Latency: 64979945 -> 65002741 (+0.04%); split: -0.02%, +0.05%
InvThroughput: 14754398 -> 14754230 (-0.00%); split: -0.01%, +0.01%
VClause: 132336 -> 132357 (+0.02%); split: -0.02%, +0.03%
SClause: 190229 -> 191340 (+0.58%); split: -1.01%, +1.60%
Copies: 511915 -> 511287 (-0.12%); split: -0.44%, +0.32%
Branches: 157156 -> 157154 (-0.00%); split: -0.01%, +0.01%
PreSGPRs: 345761 -> 344826 (-0.27%); split: -0.33%, +0.05%
VALU: 3856887 -> 3856928 (+0.00%); split: -0.01%, +0.01%
SALU: 1001190 -> 1000362 (-0.08%); split: -0.22%, +0.14%

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38530>
This commit is contained in:
Georg Lehmann 2024-12-14 19:40:51 +01:00 committed by Marge Bot
parent d9919c3e10
commit 7108dac637

View file

@ -3038,14 +3038,6 @@ original_temp_id(opt_ctx& ctx, Temp tmp)
return tmp.id();
}
Operand
copy_operand(opt_ctx& ctx, Operand op)
{
if (op.isTemp())
ctx.uses[op.tempId()]++;
return op;
}
Instruction*
follow_operand(opt_ctx& ctx, Operand op, bool ignore_uses = false)
{
@ -3697,43 +3689,6 @@ combine_salu_n2(opt_ctx& ctx, aco_ptr<Instruction>& instr)
return false;
}
/* s_add_{i32,u32}(a, s_lshl_b32(b, <n>)) -> s_lshl<n>_add_u32(a, b) */
bool
combine_salu_lshl_add(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
if (instr->opcode == aco_opcode::s_add_i32 && ctx.uses[instr->definitions[1].tempId()])
return false;
for (unsigned i = 0; i < 2; i++) {
Instruction* op2_instr = follow_operand(ctx, instr->operands[i], true);
if (!op2_instr || op2_instr->opcode != aco_opcode::s_lshl_b32 ||
ctx.uses[op2_instr->definitions[1].tempId()])
continue;
if (!op2_instr->operands[1].isConstant())
continue;
uint32_t shift = op2_instr->operands[1].constantValue();
if (shift < 1 || shift > 4)
continue;
if (instr->operands[!i].isLiteral() && op2_instr->operands[0].isLiteral() &&
instr->operands[!i].constantValue() != op2_instr->operands[0].constantValue())
continue;
instr->operands[1] = instr->operands[!i];
instr->operands[0] = copy_operand(ctx, op2_instr->operands[0]);
decrease_and_dce(ctx, op2_instr->definitions[0].getTemp());
ctx.info[instr->definitions[0].tempId()].label = 0;
instr->opcode = std::array<aco_opcode, 4>{
aco_opcode::s_lshl1_add_u32, aco_opcode::s_lshl2_add_u32, aco_opcode::s_lshl3_add_u32,
aco_opcode::s_lshl4_add_u32}[shift - 1];
return true;
}
return false;
}
/* s_abs_i32(s_sub_[iu]32(a, b)) -> s_absdiff_i32(a, b)
* s_abs_i32(s_add_[iu]32(a, #b)) -> s_absdiff_i32(a, -b)
*/
@ -4642,9 +4597,6 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
} else if (instr->opcode == aco_opcode::v_lshlrev_b32 && ctx.program->gfx_level >= GFX9) {
combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add_lshl_u32, "120",
2);
} else if ((instr->opcode == aco_opcode::s_add_u32 || instr->opcode == aco_opcode::s_add_i32) &&
ctx.program->gfx_level >= GFX9) {
combine_salu_lshl_add(ctx, instr);
} else if (instr->opcode == aco_opcode::s_not_b32 || instr->opcode == aco_opcode::s_not_b64) {
if (!combine_salu_not_bitwise(ctx, instr))
combine_inverse_comparison(ctx, instr);
@ -4859,6 +4811,13 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
add_opt(s_add_i32, v_add3_u32, 0x3, "012", nullptr, true);
add_opt(v_lshlrev_b32, v_lshl_add_u32, 0x3, "210", nullptr, true);
add_opt(s_lshl_b32, v_lshl_add_u32, 0x3, "120", nullptr, true);
} else if ((info.opcode == aco_opcode::s_add_u32 ||
(info.opcode == aco_opcode::s_add_i32 && !ctx.uses[info.defs[1].tempId()])) &&
ctx.program->gfx_level >= GFX9) {
add_opt(s_lshl_b32, s_lshl1_add_u32, 0x3, "102", remove_const_cb<1>);
add_opt(s_lshl_b32, s_lshl2_add_u32, 0x3, "102", remove_const_cb<2>);
add_opt(s_lshl_b32, s_lshl3_add_u32, 0x3, "102", remove_const_cb<3>);
add_opt(s_lshl_b32, s_lshl4_add_u32, 0x3, "102", remove_const_cb<4>);
}
if (match_and_apply_patterns(ctx, info, patterns)) {