aco/optimizer: add some bitop combining

Foz-DB Navi48:
Totals from 53 (0.06% of 82419) affected shaders:
Instrs: 172843 -> 172769 (-0.04%); split: -0.06%, +0.01%
CodeSize: 937308 -> 936924 (-0.04%); split: -0.04%, +0.00%
Latency: 454652 -> 454823 (+0.04%); split: -0.01%, +0.05%
InvThroughput: 89833 -> 89812 (-0.02%); split: -0.06%, +0.03%
PreSGPRs: 2926 -> 2929 (+0.10%)
PreVGPRs: 2920 -> 2919 (-0.03%); split: -0.07%, +0.03%
VALU: 76638 -> 76556 (-0.11%)
SALU: 37856 -> 37859 (+0.01%); split: -0.01%, +0.01%
VOPD: 10943 -> 10936 (-0.06%)

Foz-DB Navi21:
Totals from 59 (0.07% of 82387) affected shaders:
Instrs: 1047744 -> 1047578 (-0.02%)
CodeSize: 5641948 -> 5640780 (-0.02%)
Latency: 5116816 -> 5116957 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 1274035 -> 1274023 (-0.00%); split: -0.00%, +0.00%
VClause: 30744 -> 30745 (+0.00%)
PreSGPRs: 3329 -> 3333 (+0.12%)
PreVGPRs: 4130 -> 4129 (-0.02%); split: -0.05%, +0.02%
VALU: 689731 -> 689562 (-0.02%)
SALU: 162830 -> 162833 (+0.00%); split: -0.00%, +0.00%

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38530>
This commit is contained in:
Georg Lehmann 2025-03-01 11:26:02 +01:00 committed by Marge Bot
parent ee0354e0f1
commit 0e4d4aeef7

View file

@ -4428,6 +4428,9 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
} else if (info.opcode == aco_opcode::v_add_u32 && !info.clamp) {
assert(ctx.program->gfx_level >= GFX9);
add_opt(v_bcnt_u32_b32, v_bcnt_u32_b32, 0x3, "102", remove_const_cb<0>, true);
add_opt(s_bcnt1_i32_b32, v_bcnt_u32_b32, 0x3, "10", nullptr, true);
add_opt(v_mbcnt_lo_u32_b32, v_mbcnt_lo_u32_b32, 0x3, "102", remove_const_cb<0>, true);
add_opt(v_mbcnt_hi_u32_b32_e64, v_mbcnt_hi_u32_b32_e64, 0x3, "102", remove_const_cb<0>, true);
add_opt(v_mad_u32_u16, v_mad_u32_u16, 0x3, "1203", remove_const_cb<0>, true);
add_opt(v_mul_u32_u24, v_mad_u32_u24, 0x3, "120", nullptr, true);
add_opt(v_mul_i32_i24, v_mad_i32_i24, 0x3, "120", nullptr, true);
@ -4450,6 +4453,13 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (ctx.uses[info.defs[1].tempId()] == 0) {
add_opt(v_bcnt_u32_b32, v_bcnt_u32_b32, 0x3, "102",
and_cb<remove_const_cb<0>, pop_def_cb>);
add_opt(s_bcnt1_i32_b32, v_bcnt_u32_b32, 0x3, "10", pop_def_cb);
add_opt(v_mbcnt_lo_u32_b32, v_mbcnt_lo_u32_b32, 0x3, "102",
and_cb<remove_const_cb<0>, pop_def_cb>);
add_opt(v_mbcnt_hi_u32_b32, v_mbcnt_hi_u32_b32, 0x3, "102",
and_cb<remove_const_cb<0>, pop_def_cb>);
add_opt(v_mbcnt_hi_u32_b32_e64, v_mbcnt_hi_u32_b32_e64, 0x3, "102",
and_cb<remove_const_cb<0>, pop_def_cb>);
add_opt(v_mul_u32_u24, v_mad_u32_u24, 0x3, "120", pop_def_cb);
add_opt(v_mul_i32_i24, v_mad_i32_i24, 0x3, "120", pop_def_cb);
add_opt(v_lshlrev_b32, v_mad_u32_u24, 0x3, "210",