mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 08:10:09 +01:00
aco/optimizer: create max3/min3/med3 with salu min/max
Foz-DB Navi48:
Totals from 175 (0.21% of 82419) affected shaders:
Instrs: 465863 -> 465260 (-0.13%); split: -0.13%, +0.00%
CodeSize: 2362264 -> 2360744 (-0.06%); split: -0.07%, +0.00%
Latency: 1548501 -> 1548371 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 227683 -> 227630 (-0.02%); split: -0.08%, +0.06%
Copies: 33646 -> 33648 (+0.01%)
PreSGPRs: 9996 -> 10004 (+0.08%)
VALU: 175836 -> 175850 (+0.01%)
SALU: 122094 -> 121621 (-0.39%); split: -0.39%, +0.00%
Foz-DB Navi21:
Totals from 1 (0.00% of 82387) affected shaders:
InvThroughput: 74 -> 76 (+2.70%)
VALU: 57 -> 58 (+1.75%)
SALU: 61 -> 60 (-1.64%)
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38150>
This commit is contained in:
parent
d21734e024
commit
f0e24284f5
1 changed files with 44 additions and 16 deletions
|
|
@ -4907,52 +4907,80 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
add_opt(s_mul_f16, s_fmac_f16, 0x3, "120", create_fma_cb);
|
||||
} else if (info.opcode == aco_opcode::v_max_f32) {
|
||||
add_opt(v_max_f32, v_max3_f32, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11)
|
||||
add_opt(s_max_f32, v_max3_f32, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11) {
|
||||
add_opt(v_min_f32, v_minmax_f32, 0x3, "120", nullptr, true);
|
||||
else
|
||||
add_opt(s_min_f32, v_minmax_f32, 0x3, "120", nullptr, true);
|
||||
} else {
|
||||
add_opt(v_min_f32, v_med3_f32, 0x3, "012", create_med3_cb<false>, true);
|
||||
}
|
||||
} else if (info.opcode == aco_opcode::v_min_f32) {
|
||||
add_opt(v_min_f32, v_min3_f32, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11)
|
||||
add_opt(s_min_f32, v_min3_f32, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11) {
|
||||
add_opt(v_max_f32, v_maxmin_f32, 0x3, "120", nullptr, true);
|
||||
else
|
||||
add_opt(s_max_f32, v_maxmin_f32, 0x3, "120", nullptr, true);
|
||||
} else {
|
||||
add_opt(v_max_f32, v_med3_f32, 0x3, "012", create_med3_cb<true>, true);
|
||||
}
|
||||
} else if (info.opcode == aco_opcode::v_max_u32) {
|
||||
add_opt(v_max_u32, v_max3_u32, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11)
|
||||
add_opt(s_max_u32, v_max3_u32, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11) {
|
||||
add_opt(v_min_u32, v_minmax_u32, 0x3, "120", nullptr, true);
|
||||
else
|
||||
add_opt(s_min_u32, v_minmax_u32, 0x3, "120", nullptr, true);
|
||||
} else {
|
||||
add_opt(v_min_u32, v_med3_u32, 0x3, "012", create_med3_cb<false>, true);
|
||||
add_opt(s_min_u32, v_med3_u32, 0x3, "012", create_med3_cb<false>, true);
|
||||
}
|
||||
} else if (info.opcode == aco_opcode::v_min_u32) {
|
||||
add_opt(v_min_u32, v_min3_u32, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11)
|
||||
add_opt(s_min_u32, v_min3_u32, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11) {
|
||||
add_opt(v_max_u32, v_maxmin_u32, 0x3, "120", nullptr, true);
|
||||
else
|
||||
add_opt(s_max_u32, v_maxmin_u32, 0x3, "120", nullptr, true);
|
||||
} else {
|
||||
add_opt(v_max_u32, v_med3_u32, 0x3, "012", create_med3_cb<true>, true);
|
||||
add_opt(s_max_u32, v_med3_u32, 0x3, "012", create_med3_cb<true>, true);
|
||||
}
|
||||
} else if (info.opcode == aco_opcode::v_max_i32) {
|
||||
add_opt(v_max_i32, v_max3_i32, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11)
|
||||
add_opt(s_max_i32, v_max3_i32, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11) {
|
||||
add_opt(v_min_i32, v_minmax_i32, 0x3, "120", nullptr, true);
|
||||
else
|
||||
add_opt(s_min_i32, v_minmax_i32, 0x3, "120", nullptr, true);
|
||||
} else {
|
||||
add_opt(v_min_i32, v_med3_i32, 0x3, "012", create_med3_cb<false>, true);
|
||||
add_opt(s_min_i32, v_med3_i32, 0x3, "012", create_med3_cb<false>, true);
|
||||
}
|
||||
} else if (info.opcode == aco_opcode::v_min_i32) {
|
||||
add_opt(v_min_i32, v_min3_i32, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11)
|
||||
add_opt(s_min_i32, v_min3_i32, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11) {
|
||||
add_opt(v_max_i32, v_maxmin_i32, 0x3, "120", nullptr, true);
|
||||
else
|
||||
add_opt(s_max_i32, v_maxmin_i32, 0x3, "120", nullptr, true);
|
||||
} else {
|
||||
add_opt(v_max_i32, v_med3_i32, 0x3, "012", create_med3_cb<true>, true);
|
||||
add_opt(s_max_i32, v_med3_i32, 0x3, "012", create_med3_cb<true>, true);
|
||||
}
|
||||
} else if (info.opcode == aco_opcode::v_max_f16 && ctx.program->gfx_level >= GFX9) {
|
||||
add_opt(v_max_f16, v_max3_f16, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11)
|
||||
add_opt(s_max_f16, v_max3_f16, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11) {
|
||||
add_opt(v_min_f16, v_minmax_f16, 0x3, "120", nullptr, true);
|
||||
else
|
||||
add_opt(s_min_f16, v_minmax_f16, 0x3, "120", nullptr, true);
|
||||
} else {
|
||||
add_opt(v_min_f16, v_med3_f16, 0x3, "012", create_med3_cb<false>, true);
|
||||
}
|
||||
} else if (info.opcode == aco_opcode::v_min_f16 && ctx.program->gfx_level >= GFX9) {
|
||||
add_opt(v_min_f16, v_min3_f16, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11)
|
||||
add_opt(s_min_f16, v_min3_f16, 0x3, "120", nullptr, true);
|
||||
if (ctx.program->gfx_level >= GFX11) {
|
||||
add_opt(v_max_f16, v_maxmin_f16, 0x3, "120", nullptr, true);
|
||||
else
|
||||
add_opt(s_max_f16, v_maxmin_f16, 0x3, "120", nullptr, true);
|
||||
} else {
|
||||
add_opt(v_max_f16, v_med3_f16, 0x3, "012", create_med3_cb<true>, true);
|
||||
}
|
||||
} else if (info.opcode == aco_opcode::v_max_u16 && ctx.program->gfx_level >= GFX9) {
|
||||
add_opt(v_max_u16, v_max3_u16, 0x3, "120", nullptr, true);
|
||||
add_opt(v_min_u16, v_med3_u16, 0x3, "012", create_med3_cb<false>, true);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue