From 0c57340c2323cf712b00df44b0f452df6a5a1eb2 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sun, 17 Mar 2024 12:53:33 +0100 Subject: [PATCH] aco/builder: use 24bit mul if low bits of imm are zero Foz-DB Navi31: Totals from 39 (0.05% of 79395) affected shaders: Instrs: 62712 -> 62696 (-0.03%) CodeSize: 330096 -> 329896 (-0.06%) Latency: 192747 -> 192561 (-0.10%) InvThroughput: 34078 -> 33889 (-0.55%) VALU: 38979 -> 38963 (-0.04%) Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_builder_h.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index 7b42d384f8a..85dee11a7d3 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -417,6 +417,11 @@ public: Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool tmpu24=false, bool tmpi24=false) { assert(tmp.type() == RegType::vgpr); + /* Assume 24bit if high 8 bits of tmp don't impact the result. */ + if ((imm & 0xff) == 0) { + tmpu24 = true; + tmpi24 = true; + } tmpu24 &= imm <= 0xffffffu; tmpi24 &= imm <= 0x7fffffu || imm >= 0xff800000u; bool has_lshl_add = program->gfx_level >= GFX9;