mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 20:20:18 +01:00
aco/optimizer: apply omod to pseudo scalar trans instructions
Foz-DB Navi48: Totals from 2062 (2.11% of 97637) affected shaders: Instrs: 8061281 -> 8055482 (-0.07%); split: -0.07%, +0.00% CodeSize: 42727968 -> 42696504 (-0.07%); split: -0.07%, +0.00% Latency: 54739436 -> 54737749 (-0.00%); split: -0.00%, +0.00% InvThroughput: 10833704 -> 10833346 (-0.00%); split: -0.00%, +0.00% VClause: 167276 -> 167275 (-0.00%) SClause: 160183 -> 160163 (-0.01%); split: -0.02%, +0.01% Copies: 684315 -> 683984 (-0.05%); split: -0.05%, +0.00% PreSGPRs: 146747 -> 146746 (-0.00%) VALU: 4377180 -> 4377168 (-0.00%); split: -0.00%, +0.00% SALU: 1255321 -> 1251342 (-0.32%); split: -0.32%, +0.00% VOPD: 16467 -> 16469 (+0.01%) Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38658>
This commit is contained in:
parent
b82339d99e
commit
d86f5f6bcb
2 changed files with 11 additions and 2 deletions
|
|
@ -3694,7 +3694,8 @@ apply_output_impl(opt_ctx& ctx, aco_ptr<Instruction>& instr, Instruction* parent
|
||||||
else if (instr->opcode == aco_opcode::v_mul_f64 || instr->opcode == aco_opcode::v_mul_f64_e64 ||
|
else if (instr->opcode == aco_opcode::v_mul_f64 || instr->opcode == aco_opcode::v_mul_f64_e64 ||
|
||||||
instr->opcode == aco_opcode::v_mul_f32 || instr->opcode == aco_opcode::v_mul_f16 ||
|
instr->opcode == aco_opcode::v_mul_f32 || instr->opcode == aco_opcode::v_mul_f16 ||
|
||||||
instr->opcode == aco_opcode::v_pk_mul_f16 ||
|
instr->opcode == aco_opcode::v_pk_mul_f16 ||
|
||||||
instr->opcode == aco_opcode::v_mul_legacy_f32)
|
instr->opcode == aco_opcode::v_mul_legacy_f32 ||
|
||||||
|
instr->opcode == aco_opcode::s_mul_f32 || instr->opcode == aco_opcode::s_mul_f16)
|
||||||
return apply_output_mul(ctx, instr, parent);
|
return apply_output_mul(ctx, instr, parent);
|
||||||
else if (instr->opcode == aco_opcode::v_cvt_f16_f32)
|
else if (instr->opcode == aco_opcode::v_cvt_f16_f32)
|
||||||
return apply_f2f16(ctx, instr, parent);
|
return apply_f2f16(ctx, instr, parent);
|
||||||
|
|
@ -3722,6 +3723,8 @@ apply_output(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||||
case aco_opcode::v_mul_f16:
|
case aco_opcode::v_mul_f16:
|
||||||
case aco_opcode::v_pk_mul_f16:
|
case aco_opcode::v_pk_mul_f16:
|
||||||
case aco_opcode::v_mul_legacy_f32:
|
case aco_opcode::v_mul_legacy_f32:
|
||||||
|
case aco_opcode::s_mul_f32:
|
||||||
|
case aco_opcode::s_mul_f16:
|
||||||
case aco_opcode::v_cvt_f16_f32:
|
case aco_opcode::v_cvt_f16_f32:
|
||||||
case aco_opcode::v_med3_f32:
|
case aco_opcode::v_med3_f32:
|
||||||
case aco_opcode::v_med3_f16: break;
|
case aco_opcode::v_med3_f16: break;
|
||||||
|
|
|
||||||
|
|
@ -2040,7 +2040,7 @@ BEGIN_TEST(optimizer.trans_inline_constant)
|
||||||
finish_opt_test();
|
finish_opt_test();
|
||||||
END_TEST
|
END_TEST
|
||||||
|
|
||||||
BEGIN_TEST(optimizer.trans_no_omod)
|
BEGIN_TEST(optimizer.trans_omod)
|
||||||
//>> s1: %a:s[0] = p_startpgm
|
//>> s1: %a:s[0] = p_startpgm
|
||||||
if (!setup_cs("s1", GFX12))
|
if (!setup_cs("s1", GFX12))
|
||||||
return;
|
return;
|
||||||
|
|
@ -2052,6 +2052,12 @@ BEGIN_TEST(optimizer.trans_no_omod)
|
||||||
writeout(0, bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), dst,
|
writeout(0, bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), dst,
|
||||||
bld.copy(bld.def(v1), Operand::c32(0x3f000000))));
|
bld.copy(bld.def(v1), Operand::c32(0x3f000000))));
|
||||||
|
|
||||||
|
//! s1: %res1 = v_s_rcp_f32 -%a *0.5
|
||||||
|
//! p_unit_test 1, %res1
|
||||||
|
dst = bld.vop3(aco_opcode::v_s_rcp_f32, bld.def(s1), inputs[0]);
|
||||||
|
writeout(1, bld.sop2(aco_opcode::s_mul_f32, bld.def(s1), dst,
|
||||||
|
bld.copy(bld.def(s1), Operand::c32(0xbf000000))));
|
||||||
|
|
||||||
finish_opt_test();
|
finish_opt_test();
|
||||||
END_TEST
|
END_TEST
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue