aco/optimizer: apply omod to pseudo scalar trans instructions
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Foz-DB Navi48:
Totals from 2062 (2.11% of 97637) affected shaders:
Instrs: 8061281 -> 8055482 (-0.07%); split: -0.07%, +0.00%
CodeSize: 42727968 -> 42696504 (-0.07%); split: -0.07%, +0.00%
Latency: 54739436 -> 54737749 (-0.00%); split: -0.00%, +0.00%
InvThroughput: 10833704 -> 10833346 (-0.00%); split: -0.00%, +0.00%
VClause: 167276 -> 167275 (-0.00%)
SClause: 160183 -> 160163 (-0.01%); split: -0.02%, +0.01%
Copies: 684315 -> 683984 (-0.05%); split: -0.05%, +0.00%
PreSGPRs: 146747 -> 146746 (-0.00%)
VALU: 4377180 -> 4377168 (-0.00%); split: -0.00%, +0.00%
SALU: 1255321 -> 1251342 (-0.32%); split: -0.32%, +0.00%
VOPD: 16467 -> 16469 (+0.01%)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38658>
This commit is contained in:
Georg Lehmann 2025-03-13 20:27:54 +01:00 committed by Marge Bot
parent b82339d99e
commit d86f5f6bcb
2 changed files with 11 additions and 2 deletions

View file

@ -3694,7 +3694,8 @@ apply_output_impl(opt_ctx& ctx, aco_ptr<Instruction>& instr, Instruction* parent
else if (instr->opcode == aco_opcode::v_mul_f64 || instr->opcode == aco_opcode::v_mul_f64_e64 || else if (instr->opcode == aco_opcode::v_mul_f64 || instr->opcode == aco_opcode::v_mul_f64_e64 ||
instr->opcode == aco_opcode::v_mul_f32 || instr->opcode == aco_opcode::v_mul_f16 || instr->opcode == aco_opcode::v_mul_f32 || instr->opcode == aco_opcode::v_mul_f16 ||
instr->opcode == aco_opcode::v_pk_mul_f16 || instr->opcode == aco_opcode::v_pk_mul_f16 ||
instr->opcode == aco_opcode::v_mul_legacy_f32) instr->opcode == aco_opcode::v_mul_legacy_f32 ||
instr->opcode == aco_opcode::s_mul_f32 || instr->opcode == aco_opcode::s_mul_f16)
return apply_output_mul(ctx, instr, parent); return apply_output_mul(ctx, instr, parent);
else if (instr->opcode == aco_opcode::v_cvt_f16_f32) else if (instr->opcode == aco_opcode::v_cvt_f16_f32)
return apply_f2f16(ctx, instr, parent); return apply_f2f16(ctx, instr, parent);
@ -3722,6 +3723,8 @@ apply_output(opt_ctx& ctx, aco_ptr<Instruction>& instr)
case aco_opcode::v_mul_f16: case aco_opcode::v_mul_f16:
case aco_opcode::v_pk_mul_f16: case aco_opcode::v_pk_mul_f16:
case aco_opcode::v_mul_legacy_f32: case aco_opcode::v_mul_legacy_f32:
case aco_opcode::s_mul_f32:
case aco_opcode::s_mul_f16:
case aco_opcode::v_cvt_f16_f32: case aco_opcode::v_cvt_f16_f32:
case aco_opcode::v_med3_f32: case aco_opcode::v_med3_f32:
case aco_opcode::v_med3_f16: break; case aco_opcode::v_med3_f16: break;

View file

@ -2040,7 +2040,7 @@ BEGIN_TEST(optimizer.trans_inline_constant)
finish_opt_test(); finish_opt_test();
END_TEST END_TEST
BEGIN_TEST(optimizer.trans_no_omod) BEGIN_TEST(optimizer.trans_omod)
//>> s1: %a:s[0] = p_startpgm //>> s1: %a:s[0] = p_startpgm
if (!setup_cs("s1", GFX12)) if (!setup_cs("s1", GFX12))
return; return;
@ -2052,6 +2052,12 @@ BEGIN_TEST(optimizer.trans_no_omod)
writeout(0, bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), dst, writeout(0, bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), dst,
bld.copy(bld.def(v1), Operand::c32(0x3f000000)))); bld.copy(bld.def(v1), Operand::c32(0x3f000000))));
//! s1: %res1 = v_s_rcp_f32 -%a *0.5
//! p_unit_test 1, %res1
dst = bld.vop3(aco_opcode::v_s_rcp_f32, bld.def(s1), inputs[0]);
writeout(1, bld.sop2(aco_opcode::s_mul_f32, bld.def(s1), dst,
bld.copy(bld.def(s1), Operand::c32(0xbf000000))));
finish_opt_test(); finish_opt_test();
END_TEST END_TEST