diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index f5106a29847..7b42d384f8a 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -414,9 +414,11 @@ public: return op.op.getTemp(); } - Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool bits24=false) + Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool tmpu24=false, bool tmpi24=false) { assert(tmp.type() == RegType::vgpr); + tmpu24 &= imm <= 0xffffffu; + tmpi24 &= imm <= 0x7fffffu || imm >= 0xff800000u; bool has_lshl_add = program->gfx_level >= GFX9; /* v_mul_lo_u32 has 1.6x the latency of most VALU on GFX10 (8 vs 5 cycles), * compared to 4x the latency on 2 && util_is_power_of_two_nonzero(imm + 1u)) { @@ -467,7 +473,7 @@ public: Result v_mul24_imm(Definition dst, Temp tmp, uint32_t imm) { - return v_mul_imm(dst, tmp, imm, true); + return v_mul_imm(dst, tmp, imm & 0xffffffu, true); } Result copy(Definition dst, Op op) {