mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 13:50:11 +01:00
aco/optimizer: reassociate mul(mul(a, const), b) into mul_omod(a, b)
Foz-DB Navi48: Totals from 14608 (14.96% of 97637) affected shaders: MaxWaves: 364201 -> 364421 (+0.06%) Instrs: 28051720 -> 28022503 (-0.10%); split: -0.13%, +0.03% CodeSize: 148938740 -> 148943480 (+0.00%); split: -0.04%, +0.04% VGPRs: 994520 -> 994004 (-0.05%); split: -0.05%, +0.00% SpillSGPRs: 45182 -> 45179 (-0.01%) Latency: 187734461 -> 187725301 (-0.00%); split: -0.07%, +0.06% InvThroughput: 33967002 -> 33949881 (-0.05%); split: -0.11%, +0.06% VClause: 495237 -> 495207 (-0.01%); split: -0.03%, +0.02% Copies: 2048324 -> 2047937 (-0.02%); split: -0.12%, +0.10% Branches: 598445 -> 598431 (-0.00%); split: -0.01%, +0.01% PreSGPRs: 877715 -> 877684 (-0.00%) PreVGPRs: 778146 -> 776383 (-0.23%); split: -0.23%, +0.00% VALU: 16413380 -> 16391508 (-0.13%); split: -0.15%, +0.01% SALU: 3685279 -> 3677655 (-0.21%); split: -0.23%, +0.02% VOPD: 26219 -> 25926 (-1.12%); split: +0.43%, -1.55% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38730>
This commit is contained in:
parent
125ac1626d
commit
a8f5ced670
1 changed files with 79 additions and 10 deletions
|
|
@ -3991,6 +3991,53 @@ create_med3_cb(opt_ctx& ctx, alu_opt_info& info)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
can_reassoc_omod(opt_ctx& ctx, const alu_opt_info& info, unsigned bit_size)
|
||||||
|
{
|
||||||
|
unsigned denorm = bit_size == 32 ? ctx.fp_mode.denorm32 : ctx.fp_mode.denorm16_64;
|
||||||
|
bool no_signed_zero =
|
||||||
|
info.opcode == aco_opcode::v_mul_legacy_f32 || !info.defs[0].isSZPreserve();
|
||||||
|
|
||||||
|
return no_signed_zero && !info.omod && !info.defs[0].isPrecise() && denorm == fp_denorm_flush;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool is_rcp>
|
||||||
|
bool
|
||||||
|
reassoc_omod_cb(opt_ctx& ctx, alu_opt_info& info)
|
||||||
|
{
|
||||||
|
if (info.defs[0].isPrecise())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
aco_type type = instr_info.alu_opcode_infos[(int)info.opcode].def_types[0];
|
||||||
|
|
||||||
|
for (unsigned op_idx = 0; op_idx < 2; op_idx++) {
|
||||||
|
uint64_t constant = 0;
|
||||||
|
if (!op_info_get_constant(ctx, info.operands[op_idx], type, &constant))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
double val = extract_float(constant, type.bit_size);
|
||||||
|
if (val < 0.0) {
|
||||||
|
info.operands[!op_idx].neg[0] ^= true;
|
||||||
|
val = fabs(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (val == (is_rcp ? 0.5 : 2.0))
|
||||||
|
info.omod = 1;
|
||||||
|
else if (val == (is_rcp ? 0.25 : 4.0))
|
||||||
|
info.omod = 2;
|
||||||
|
else if (val == (is_rcp ? 2.0 : 0.5))
|
||||||
|
info.omod = 3;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
|
||||||
|
info.operands.erase(std::next(info.operands.begin(), op_idx));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
template <unsigned bits>
|
template <unsigned bits>
|
||||||
bool
|
bool
|
||||||
shift_to_mad_cb(opt_ctx& ctx, alu_opt_info& info)
|
shift_to_mad_cb(opt_ctx& ctx, alu_opt_info& info)
|
||||||
|
|
@ -4293,16 +4340,38 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||||
} else if (info.opcode == aco_opcode::v_min_i16_e64) {
|
} else if (info.opcode == aco_opcode::v_min_i16_e64) {
|
||||||
add_opt(v_min_i16_e64, v_min3_i16, 0x3, "120", nullptr, true);
|
add_opt(v_min_i16_e64, v_min3_i16, 0x3, "120", nullptr, true);
|
||||||
add_opt(v_max_i16_e64, v_med3_i16, 0x3, "012", create_med3_cb<true>, true);
|
add_opt(v_max_i16_e64, v_med3_i16, 0x3, "012", create_med3_cb<true>, true);
|
||||||
} else if (((info.opcode == aco_opcode::v_mul_f32 && !info.defs[0].isNaNPreserve() &&
|
} else if (info.opcode == aco_opcode::v_mul_f32 || info.opcode == aco_opcode::v_mul_legacy_f32) {
|
||||||
!info.defs[0].isInfPreserve()) ||
|
bool legacy = info.opcode == aco_opcode::v_mul_legacy_f32;
|
||||||
(info.opcode == aco_opcode::v_mul_legacy_f32 && !info.defs[0].isSZPreserve())) &&
|
|
||||||
!info.clamp && !info.omod && !ctx.fp_mode.must_flush_denorms32) {
|
if ((legacy ? !info.defs[0].isSZPreserve()
|
||||||
/* v_mul_f32(a, v_cndmask_b32(0, 1.0, cond)) -> v_cndmask_b32(0, a, cond) */
|
: (!info.defs[0].isNaNPreserve() && !info.defs[0].isInfPreserve())) &&
|
||||||
add_opt(v_cndmask_b32, v_cndmask_b32, 0x3, "1032",
|
!info.clamp && !info.omod && !ctx.fp_mode.must_flush_denorms32) {
|
||||||
and_cb<check_const_cb<0, 0>, remove_const_cb<0x3f800000>>, true);
|
/* v_mul_f32(a, v_cndmask_b32(0, 1.0, cond)) -> v_cndmask_b32(0, a, cond) */
|
||||||
/* v_mul_f32(a, v_cndmask_b32(1.0, 0, cond)) -> v_cndmask_b32(a, 0, cond) */
|
add_opt(v_cndmask_b32, v_cndmask_b32, 0x3, "1032",
|
||||||
add_opt(v_cndmask_b32, v_cndmask_b32, 0x3, "0231",
|
and_cb<check_const_cb<0, 0>, remove_const_cb<0x3f800000>>, true);
|
||||||
and_cb<check_const_cb<1, 0>, remove_const_cb<0x3f800000>>, true);
|
/* v_mul_f32(a, v_cndmask_b32(1.0, 0, cond)) -> v_cndmask_b32(a, 0, cond) */
|
||||||
|
add_opt(v_cndmask_b32, v_cndmask_b32, 0x3, "0231",
|
||||||
|
and_cb<check_const_cb<1, 0>, remove_const_cb<0x3f800000>>, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (can_reassoc_omod(ctx, info, 32)) {
|
||||||
|
if (legacy) {
|
||||||
|
add_opt(v_mul_f32, v_mul_legacy_f32, 0x3, "120", reassoc_omod_cb<false>, true);
|
||||||
|
add_opt(v_mul_legacy_f32, v_mul_legacy_f32, 0x3, "120", reassoc_omod_cb<false>, true);
|
||||||
|
add_opt(s_mul_f32, v_mul_legacy_f32, 0x3, "120", reassoc_omod_cb<false>, true);
|
||||||
|
} else {
|
||||||
|
add_opt(v_mul_f32, v_mul_f32, 0x3, "120", reassoc_omod_cb<false>, true);
|
||||||
|
add_opt(v_mul_legacy_f32, v_mul_f32, 0x3, "120", reassoc_omod_cb<false>, true);
|
||||||
|
add_opt(s_mul_f32, v_mul_f32, 0x3, "120", reassoc_omod_cb<false>, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (info.opcode == aco_opcode::v_mul_f16 && can_reassoc_omod(ctx, info, 16)) {
|
||||||
|
add_opt(v_mul_f16, v_mul_f16, 0x3, "120", reassoc_omod_cb<false>, true);
|
||||||
|
add_opt(s_mul_f16, v_mul_f16, 0x3, "120", reassoc_omod_cb<false>, true);
|
||||||
|
} else if (info.opcode == aco_opcode::v_mul_f64 && can_reassoc_omod(ctx, info, 64)) {
|
||||||
|
add_opt(v_mul_f64, v_mul_f64, 0x3, "120", reassoc_omod_cb<false>, true);
|
||||||
|
} else if (info.opcode == aco_opcode::v_mul_f64_e64 && can_reassoc_omod(ctx, info, 64)) {
|
||||||
|
add_opt(v_mul_f64_e64, v_mul_f64_e64, 0x3, "120", reassoc_omod_cb<false>, true);
|
||||||
} else if (info.opcode == aco_opcode::v_add_u16 && !info.clamp) {
|
} else if (info.opcode == aco_opcode::v_add_u16 && !info.clamp) {
|
||||||
if (ctx.program->gfx_level < GFX9) {
|
if (ctx.program->gfx_level < GFX9) {
|
||||||
add_opt(v_mul_lo_u16, v_mad_legacy_u16, 0x3, "120");
|
add_opt(v_mul_lo_u16, v_mad_legacy_u16, 0x3, "120");
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue