From d86f5f6bcb6848d35c54a76b7b724dd2e628f79c Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Thu, 13 Mar 2025 20:27:54 +0100 Subject: [PATCH] aco/optimizer: apply omod to pseudo scalar trans instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi48: Totals from 2062 (2.11% of 97637) affected shaders: Instrs: 8061281 -> 8055482 (-0.07%); split: -0.07%, +0.00% CodeSize: 42727968 -> 42696504 (-0.07%); split: -0.07%, +0.00% Latency: 54739436 -> 54737749 (-0.00%); split: -0.00%, +0.00% InvThroughput: 10833704 -> 10833346 (-0.00%); split: -0.00%, +0.00% VClause: 167276 -> 167275 (-0.00%) SClause: 160183 -> 160163 (-0.01%); split: -0.02%, +0.01% Copies: 684315 -> 683984 (-0.05%); split: -0.05%, +0.00% PreSGPRs: 146747 -> 146746 (-0.00%) VALU: 4377180 -> 4377168 (-0.00%); split: -0.00%, +0.00% SALU: 1255321 -> 1251342 (-0.32%); split: -0.32%, +0.00% VOPD: 16467 -> 16469 (+0.01%) Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 5 ++++- src/amd/compiler/tests/test_optimizer.cpp | 8 +++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 8492fcac6c1..a48988c0029 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -3694,7 +3694,8 @@ apply_output_impl(opt_ctx& ctx, aco_ptr& instr, Instruction* parent else if (instr->opcode == aco_opcode::v_mul_f64 || instr->opcode == aco_opcode::v_mul_f64_e64 || instr->opcode == aco_opcode::v_mul_f32 || instr->opcode == aco_opcode::v_mul_f16 || instr->opcode == aco_opcode::v_pk_mul_f16 || - instr->opcode == aco_opcode::v_mul_legacy_f32) + instr->opcode == aco_opcode::v_mul_legacy_f32 || + instr->opcode == aco_opcode::s_mul_f32 || instr->opcode == aco_opcode::s_mul_f16) return apply_output_mul(ctx, instr, parent); else if (instr->opcode == aco_opcode::v_cvt_f16_f32) return apply_f2f16(ctx, instr, parent); @@ -3722,6 +3723,8 @@ apply_output(opt_ctx& ctx, aco_ptr& instr) case aco_opcode::v_mul_f16: case aco_opcode::v_pk_mul_f16: case aco_opcode::v_mul_legacy_f32: + case aco_opcode::s_mul_f32: + case aco_opcode::s_mul_f16: case aco_opcode::v_cvt_f16_f32: case aco_opcode::v_med3_f32: case aco_opcode::v_med3_f16: break; diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index 8abea38fc68..2906547f708 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -2040,7 +2040,7 @@ BEGIN_TEST(optimizer.trans_inline_constant) finish_opt_test(); END_TEST -BEGIN_TEST(optimizer.trans_no_omod) +BEGIN_TEST(optimizer.trans_omod) //>> s1: %a:s[0] = p_startpgm if (!setup_cs("s1", GFX12)) return; @@ -2052,6 +2052,12 @@ BEGIN_TEST(optimizer.trans_no_omod) writeout(0, bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), dst, bld.copy(bld.def(v1), Operand::c32(0x3f000000)))); + //! s1: %res1 = v_s_rcp_f32 -%a *0.5 + //! p_unit_test 1, %res1 + dst = bld.vop3(aco_opcode::v_s_rcp_f32, bld.def(s1), inputs[0]); + writeout(1, bld.sop2(aco_opcode::s_mul_f32, bld.def(s1), dst, + bld.copy(bld.def(s1), Operand::c32(0xbf000000)))); + finish_opt_test(); END_TEST