aco/optimizer: use new helpers for v_add_lshl_u32

Foz-DB Navi48:
Totals from 357 (0.43% of 82419) affected shaders:
Instrs: 244419 -> 243608 (-0.33%); split: -0.34%, +0.01%
CodeSize: 1302584 -> 1304188 (+0.12%); split: -0.00%, +0.13%
VGPRs: 21240 -> 21216 (-0.11%)
Latency: 1226165 -> 1225651 (-0.04%); split: -0.06%, +0.02%
InvThroughput: 162432 -> 161940 (-0.30%); split: -0.30%, +0.00%
Copies: 16607 -> 16610 (+0.02%)
PreSGPRs: 14082 -> 14135 (+0.38%)
PreVGPRs: 15917 -> 15914 (-0.02%)
VALU: 136308 -> 135699 (-0.45%)
SALU: 24415 -> 24418 (+0.01%)
VOPD: 333 -> 334 (+0.30%)

Foz-DB Navi21:
Totals from 319 (0.39% of 82387) affected shaders:
Instrs: 255434 -> 254831 (-0.24%)
CodeSize: 1375792 -> 1378164 (+0.17%)
VGPRs: 15360 -> 15344 (-0.10%)
Latency: 1405956 -> 1405181 (-0.06%)
InvThroughput: 174402 -> 173816 (-0.34%)
Copies: 25892 -> 25891 (-0.00%)
PreSGPRs: 14129 -> 14132 (+0.02%)
PreVGPRs: 12457 -> 12454 (-0.02%)
VALU: 139630 -> 139032 (-0.43%)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38530>
This commit is contained in:
Georg Lehmann 2024-12-14 19:52:25 +01:00 committed by Marge Bot
parent 7108dac637
commit 6a1caabd64

View file

@ -4594,9 +4594,6 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
instr->opcode == aco_opcode::v_subrev_co_u32 ||
instr->opcode == aco_opcode::v_subrev_co_u32_e64) {
combine_add_sub_b2i(ctx, instr, aco_opcode::v_subbrev_co_u32, 1);
} else if (instr->opcode == aco_opcode::v_lshlrev_b32 && ctx.program->gfx_level >= GFX9) {
combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add_lshl_u32, "120",
2);
} else if (instr->opcode == aco_opcode::s_not_b32 || instr->opcode == aco_opcode::s_not_b64) {
if (!combine_salu_not_bitwise(ctx, instr))
combine_inverse_comparison(ctx, instr);
@ -4818,6 +4815,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
add_opt(s_lshl_b32, s_lshl2_add_u32, 0x3, "102", remove_const_cb<2>);
add_opt(s_lshl_b32, s_lshl3_add_u32, 0x3, "102", remove_const_cb<3>);
add_opt(s_lshl_b32, s_lshl4_add_u32, 0x3, "102", remove_const_cb<4>);
} else if (info.opcode == aco_opcode::v_lshlrev_b32 && ctx.program->gfx_level >= GFX9) {
add_opt(v_add_u32, v_add_lshl_u32, 0x2, "120", nullptr, true);
}
if (match_and_apply_patterns(ctx, info, patterns)) {