aco/optimizer: some more mul opts

Foz-DB Navi48:
Totals from 1650 (2.00% of 82419) affected shaders:
Instrs: 975716 -> 970609 (-0.52%); split: -0.53%, +0.00%
CodeSize: 4986260 -> 4982916 (-0.07%); split: -0.09%, +0.02%
Latency: 2795394 -> 2793211 (-0.08%); split: -0.09%, +0.01%
InvThroughput: 620892 -> 620914 (+0.00%); split: -0.00%, +0.01%
VClause: 18773 -> 18729 (-0.23%)
SClause: 13219 -> 13218 (-0.01%)
Copies: 53619 -> 53620 (+0.00%); split: -0.01%, +0.01%
VALU: 592094 -> 592096 (+0.00%); split: -0.00%, +0.00%
SALU: 96586 -> 93532 (-3.16%); split: -3.17%, +0.00%

Foz-DB Navi21:
Totals from 1647 (2.00% of 82387) affected shaders:
Instrs: 1104100 -> 1100149 (-0.36%); split: -0.36%, +0.00%
CodeSize: 5631092 -> 5637668 (+0.12%); split: -0.00%, +0.12%
Latency: 3503029 -> 3501621 (-0.04%); split: -0.05%, +0.01%
InvThroughput: 1088494 -> 1088495 (+0.00%); split: -0.00%, +0.00%
VClause: 20898 -> 20885 (-0.06%)
Copies: 72641 -> 72635 (-0.01%); split: -0.02%, +0.01%
VALU: 725593 -> 725592 (-0.00%); split: -0.00%, +0.00%
SALU: 139046 -> 135175 (-2.78%)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38530>
This commit is contained in:
Georg Lehmann 2025-02-10 17:13:08 +01:00 committed by Marge Bot
parent 92dbf42379
commit 0f7a1ce23e
2 changed files with 8 additions and 2 deletions

View file

@ -4441,6 +4441,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
add_opt(s_add_i32, v_add3_u32, 0x3, "012", nullptr, true);
add_opt(v_lshlrev_b32, v_lshl_add_u32, 0x3, "210", nullptr, true);
add_opt(s_lshl_b32, v_lshl_add_u32, 0x3, "120", nullptr, true);
add_opt(s_mul_i32, v_mad_u32_u24, 0x3, "120", check_mul_u24_cb, true);
/* v_add_u32(a, v_cndmask_b32(0, 1, cond)) -> v_addc_co_u32(a, 0, cond) */
add_opt(v_cndmask_b32, v_addc_co_u32, 0x3, "0132",
and_cb<and_cb<check_const_cb<1, 0>, remove_const_cb<1>>, add_lm_def_cb>, true);
@ -4466,6 +4467,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
and_cb<and_cb<shift_to_mad_cb<32>, check_mul_u24_cb>, pop_def_cb>);
add_opt(s_lshl_b32, v_mad_u32_u24, 0x3, "120",
and_cb<and_cb<shift_to_mad_cb<32>, check_mul_u24_cb>, pop_def_cb>);
add_opt(s_mul_i32, v_mad_u32_u24, 0x3, "120", and_cb<check_mul_u24_cb, pop_def_cb>);
}
} else if (info.opcode == aco_opcode::v_sub_u32 && !info.clamp) {
assert(ctx.program->gfx_level >= GFX9);
@ -4480,6 +4482,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
and_cb<shift_to_mad_cb<32>, neg_mul_to_i24_cb>);
add_opt(s_lshl_b32, v_mad_i32_i24, 0x2, "120",
and_cb<shift_to_mad_cb<32>, neg_mul_to_i24_cb>);
add_opt(v_mul_u32_u24, v_mad_i32_i24, 0x2, "120", neg_mul_to_i24_cb);
add_opt(s_mul_i32, v_mad_i32_i24, 0x2, "120", neg_mul_to_i24_cb);
} else if ((info.opcode == aco_opcode::v_sub_co_u32 ||
info.opcode == aco_opcode::v_sub_co_u32_e64) &&
!info.clamp) {
@ -4498,6 +4502,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
and_cb<and_cb<shift_to_mad_cb<32>, neg_mul_to_i24_cb>, pop_def_cb>);
add_opt(s_lshl_b32, v_mad_i32_i24, 0x2, "120",
and_cb<and_cb<shift_to_mad_cb<32>, neg_mul_to_i24_cb>, pop_def_cb>);
add_opt(v_mul_u32_u24, v_mad_i32_i24, 0x2, "120", and_cb<neg_mul_to_i24_cb, pop_def_cb>);
add_opt(s_mul_i32, v_mad_i32_i24, 0x2, "120", and_cb<neg_mul_to_i24_cb, pop_def_cb>);
}
} else if ((info.opcode == aco_opcode::s_add_u32 ||
(info.opcode == aco_opcode::s_add_i32 && !ctx.uses[info.defs[1].tempId()])) &&

View file

@ -1765,7 +1765,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_lo_u32, dst);
}
} else if (dst.regClass() == s1) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false);
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false, 0x3);
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
@ -1773,7 +1773,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
}
case nir_op_imul24_relaxed: {
if (dst.regClass() == s1) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false);
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false, 0x3);
} else if (dst.regClass() == v1) {
emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_i32_i24, dst, true);
} else {