aco/optimizer: create max3/min3/med3 with salu min/max

Foz-DB Navi48:
Totals from 175 (0.21% of 82419) affected shaders:
Instrs: 465863 -> 465260 (-0.13%); split: -0.13%, +0.00%
CodeSize: 2362264 -> 2360744 (-0.06%); split: -0.07%, +0.00%
Latency: 1548501 -> 1548371 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 227683 -> 227630 (-0.02%); split: -0.08%, +0.06%
Copies: 33646 -> 33648 (+0.01%)
PreSGPRs: 9996 -> 10004 (+0.08%)
VALU: 175836 -> 175850 (+0.01%)
SALU: 122094 -> 121621 (-0.39%); split: -0.39%, +0.00%

Foz-DB Navi21:
Totals from 1 (0.00% of 82387) affected shaders:
InvThroughput: 74 -> 76 (+2.70%)
VALU: 57 -> 58 (+1.75%)
SALU: 61 -> 60 (-1.64%)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38150>
This commit is contained in:
Georg Lehmann 2024-12-14 15:40:29 +01:00 committed by Marge Bot
parent d21734e024
commit f0e24284f5

View file

@ -4907,52 +4907,80 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
add_opt(s_mul_f16, s_fmac_f16, 0x3, "120", create_fma_cb);
} else if (info.opcode == aco_opcode::v_max_f32) {
add_opt(v_max_f32, v_max3_f32, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11)
add_opt(s_max_f32, v_max3_f32, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11) {
add_opt(v_min_f32, v_minmax_f32, 0x3, "120", nullptr, true);
else
add_opt(s_min_f32, v_minmax_f32, 0x3, "120", nullptr, true);
} else {
add_opt(v_min_f32, v_med3_f32, 0x3, "012", create_med3_cb<false>, true);
}
} else if (info.opcode == aco_opcode::v_min_f32) {
add_opt(v_min_f32, v_min3_f32, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11)
add_opt(s_min_f32, v_min3_f32, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11) {
add_opt(v_max_f32, v_maxmin_f32, 0x3, "120", nullptr, true);
else
add_opt(s_max_f32, v_maxmin_f32, 0x3, "120", nullptr, true);
} else {
add_opt(v_max_f32, v_med3_f32, 0x3, "012", create_med3_cb<true>, true);
}
} else if (info.opcode == aco_opcode::v_max_u32) {
add_opt(v_max_u32, v_max3_u32, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11)
add_opt(s_max_u32, v_max3_u32, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11) {
add_opt(v_min_u32, v_minmax_u32, 0x3, "120", nullptr, true);
else
add_opt(s_min_u32, v_minmax_u32, 0x3, "120", nullptr, true);
} else {
add_opt(v_min_u32, v_med3_u32, 0x3, "012", create_med3_cb<false>, true);
add_opt(s_min_u32, v_med3_u32, 0x3, "012", create_med3_cb<false>, true);
}
} else if (info.opcode == aco_opcode::v_min_u32) {
add_opt(v_min_u32, v_min3_u32, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11)
add_opt(s_min_u32, v_min3_u32, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11) {
add_opt(v_max_u32, v_maxmin_u32, 0x3, "120", nullptr, true);
else
add_opt(s_max_u32, v_maxmin_u32, 0x3, "120", nullptr, true);
} else {
add_opt(v_max_u32, v_med3_u32, 0x3, "012", create_med3_cb<true>, true);
add_opt(s_max_u32, v_med3_u32, 0x3, "012", create_med3_cb<true>, true);
}
} else if (info.opcode == aco_opcode::v_max_i32) {
add_opt(v_max_i32, v_max3_i32, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11)
add_opt(s_max_i32, v_max3_i32, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11) {
add_opt(v_min_i32, v_minmax_i32, 0x3, "120", nullptr, true);
else
add_opt(s_min_i32, v_minmax_i32, 0x3, "120", nullptr, true);
} else {
add_opt(v_min_i32, v_med3_i32, 0x3, "012", create_med3_cb<false>, true);
add_opt(s_min_i32, v_med3_i32, 0x3, "012", create_med3_cb<false>, true);
}
} else if (info.opcode == aco_opcode::v_min_i32) {
add_opt(v_min_i32, v_min3_i32, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11)
add_opt(s_min_i32, v_min3_i32, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11) {
add_opt(v_max_i32, v_maxmin_i32, 0x3, "120", nullptr, true);
else
add_opt(s_max_i32, v_maxmin_i32, 0x3, "120", nullptr, true);
} else {
add_opt(v_max_i32, v_med3_i32, 0x3, "012", create_med3_cb<true>, true);
add_opt(s_max_i32, v_med3_i32, 0x3, "012", create_med3_cb<true>, true);
}
} else if (info.opcode == aco_opcode::v_max_f16 && ctx.program->gfx_level >= GFX9) {
add_opt(v_max_f16, v_max3_f16, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11)
add_opt(s_max_f16, v_max3_f16, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11) {
add_opt(v_min_f16, v_minmax_f16, 0x3, "120", nullptr, true);
else
add_opt(s_min_f16, v_minmax_f16, 0x3, "120", nullptr, true);
} else {
add_opt(v_min_f16, v_med3_f16, 0x3, "012", create_med3_cb<false>, true);
}
} else if (info.opcode == aco_opcode::v_min_f16 && ctx.program->gfx_level >= GFX9) {
add_opt(v_min_f16, v_min3_f16, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11)
add_opt(s_min_f16, v_min3_f16, 0x3, "120", nullptr, true);
if (ctx.program->gfx_level >= GFX11) {
add_opt(v_max_f16, v_maxmin_f16, 0x3, "120", nullptr, true);
else
add_opt(s_max_f16, v_maxmin_f16, 0x3, "120", nullptr, true);
} else {
add_opt(v_max_f16, v_med3_f16, 0x3, "012", create_med3_cb<true>, true);
}
} else if (info.opcode == aco_opcode::v_max_u16 && ctx.program->gfx_level >= GFX9) {
add_opt(v_max_u16, v_max3_u16, 0x3, "120", nullptr, true);
add_opt(v_min_u16, v_med3_u16, 0x3, "012", create_med3_cb<false>, true);