aco/builder: improve v_mul_imm for negative imm

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28223>
This commit is contained in:
Georg Lehmann 2024-03-16 11:48:15 +01:00 committed by Marge Bot
parent 4f6f2cea6a
commit b48a101d8f

View file

@ -414,9 +414,11 @@ public:
return op.op.getTemp();
}
Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool bits24=false)
Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool tmpu24=false, bool tmpi24=false)
{
assert(tmp.type() == RegType::vgpr);
tmpu24 &= imm <= 0xffffffu;
tmpi24 &= imm <= 0x7fffffu || imm >= 0xff800000u;
bool has_lshl_add = program->gfx_level >= GFX9;
/* v_mul_lo_u32 has 1.6x the latency of most VALU on GFX10 (8 vs 5 cycles),
* compared to 4x the latency on <GFX10. */
@ -425,10 +427,14 @@ public:
return copy(dst, Operand::zero());
} else if (imm == 1) {
return copy(dst, Operand(tmp));
} else if (imm == 0xffffffff) {
return vsub32(dst, Operand::zero(), tmp);
} else if (util_is_power_of_two_or_zero(imm)) {
return vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(ffs(imm) - 1u), tmp);
} else if (bits24) {
} else if (tmpu24) {
return vop2(aco_opcode::v_mul_u32_u24, dst, Operand::c32(imm), tmp);
} else if (tmpi24) {
return vop2(aco_opcode::v_mul_i32_i24, dst, Operand::c32(imm), tmp);
} else if (util_is_power_of_two_nonzero(imm - 1u)) {
return vadd32(dst, vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(ffs(imm - 1u) - 1u), tmp), tmp);
} else if (mul_cost > 2 && util_is_power_of_two_nonzero(imm + 1u)) {
@ -467,7 +473,7 @@ public:
Result v_mul24_imm(Definition dst, Temp tmp, uint32_t imm)
{
return v_mul_imm(dst, tmp, imm, true);
return v_mul_imm(dst, tmp, imm & 0xffffffu, true);
}
Result copy(Definition dst, Op op) {