mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
aco/optimizer: some more mul opts
Foz-DB Navi48: Totals from 1650 (2.00% of 82419) affected shaders: Instrs: 975716 -> 970609 (-0.52%); split: -0.53%, +0.00% CodeSize: 4986260 -> 4982916 (-0.07%); split: -0.09%, +0.02% Latency: 2795394 -> 2793211 (-0.08%); split: -0.09%, +0.01% InvThroughput: 620892 -> 620914 (+0.00%); split: -0.00%, +0.01% VClause: 18773 -> 18729 (-0.23%) SClause: 13219 -> 13218 (-0.01%) Copies: 53619 -> 53620 (+0.00%); split: -0.01%, +0.01% VALU: 592094 -> 592096 (+0.00%); split: -0.00%, +0.00% SALU: 96586 -> 93532 (-3.16%); split: -3.17%, +0.00% Foz-DB Navi21: Totals from 1647 (2.00% of 82387) affected shaders: Instrs: 1104100 -> 1100149 (-0.36%); split: -0.36%, +0.00% CodeSize: 5631092 -> 5637668 (+0.12%); split: -0.00%, +0.12% Latency: 3503029 -> 3501621 (-0.04%); split: -0.05%, +0.01% InvThroughput: 1088494 -> 1088495 (+0.00%); split: -0.00%, +0.00% VClause: 20898 -> 20885 (-0.06%) Copies: 72641 -> 72635 (-0.01%); split: -0.02%, +0.01% VALU: 725593 -> 725592 (-0.00%); split: -0.00%, +0.00% SALU: 139046 -> 135175 (-2.78%) Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38530>
This commit is contained in:
parent
92dbf42379
commit
0f7a1ce23e
2 changed files with 8 additions and 2 deletions
|
|
@ -4441,6 +4441,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
add_opt(s_add_i32, v_add3_u32, 0x3, "012", nullptr, true);
|
||||
add_opt(v_lshlrev_b32, v_lshl_add_u32, 0x3, "210", nullptr, true);
|
||||
add_opt(s_lshl_b32, v_lshl_add_u32, 0x3, "120", nullptr, true);
|
||||
add_opt(s_mul_i32, v_mad_u32_u24, 0x3, "120", check_mul_u24_cb, true);
|
||||
/* v_add_u32(a, v_cndmask_b32(0, 1, cond)) -> v_addc_co_u32(a, 0, cond) */
|
||||
add_opt(v_cndmask_b32, v_addc_co_u32, 0x3, "0132",
|
||||
and_cb<and_cb<check_const_cb<1, 0>, remove_const_cb<1>>, add_lm_def_cb>, true);
|
||||
|
|
@ -4466,6 +4467,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
and_cb<and_cb<shift_to_mad_cb<32>, check_mul_u24_cb>, pop_def_cb>);
|
||||
add_opt(s_lshl_b32, v_mad_u32_u24, 0x3, "120",
|
||||
and_cb<and_cb<shift_to_mad_cb<32>, check_mul_u24_cb>, pop_def_cb>);
|
||||
add_opt(s_mul_i32, v_mad_u32_u24, 0x3, "120", and_cb<check_mul_u24_cb, pop_def_cb>);
|
||||
}
|
||||
} else if (info.opcode == aco_opcode::v_sub_u32 && !info.clamp) {
|
||||
assert(ctx.program->gfx_level >= GFX9);
|
||||
|
|
@ -4480,6 +4482,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
and_cb<shift_to_mad_cb<32>, neg_mul_to_i24_cb>);
|
||||
add_opt(s_lshl_b32, v_mad_i32_i24, 0x2, "120",
|
||||
and_cb<shift_to_mad_cb<32>, neg_mul_to_i24_cb>);
|
||||
add_opt(v_mul_u32_u24, v_mad_i32_i24, 0x2, "120", neg_mul_to_i24_cb);
|
||||
add_opt(s_mul_i32, v_mad_i32_i24, 0x2, "120", neg_mul_to_i24_cb);
|
||||
} else if ((info.opcode == aco_opcode::v_sub_co_u32 ||
|
||||
info.opcode == aco_opcode::v_sub_co_u32_e64) &&
|
||||
!info.clamp) {
|
||||
|
|
@ -4498,6 +4502,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
and_cb<and_cb<shift_to_mad_cb<32>, neg_mul_to_i24_cb>, pop_def_cb>);
|
||||
add_opt(s_lshl_b32, v_mad_i32_i24, 0x2, "120",
|
||||
and_cb<and_cb<shift_to_mad_cb<32>, neg_mul_to_i24_cb>, pop_def_cb>);
|
||||
add_opt(v_mul_u32_u24, v_mad_i32_i24, 0x2, "120", and_cb<neg_mul_to_i24_cb, pop_def_cb>);
|
||||
add_opt(s_mul_i32, v_mad_i32_i24, 0x2, "120", and_cb<neg_mul_to_i24_cb, pop_def_cb>);
|
||||
}
|
||||
} else if ((info.opcode == aco_opcode::s_add_u32 ||
|
||||
(info.opcode == aco_opcode::s_add_i32 && !ctx.uses[info.defs[1].tempId()])) &&
|
||||
|
|
|
|||
|
|
@ -1765,7 +1765,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_lo_u32, dst);
|
||||
}
|
||||
} else if (dst.regClass() == s1) {
|
||||
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false);
|
||||
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false, 0x3);
|
||||
} else {
|
||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||
}
|
||||
|
|
@ -1773,7 +1773,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
}
|
||||
case nir_op_imul24_relaxed: {
|
||||
if (dst.regClass() == s1) {
|
||||
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false);
|
||||
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false, 0x3);
|
||||
} else if (dst.regClass() == v1) {
|
||||
emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_i32_i24, dst, true);
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue