intel/brw: Make bld.ADD(x, 0) emit no instructions and return x directly

There are a lot of places where we add 0 to an offset.  Avoiding
generating this can save us algebraic + copy_propagation later.

Cuts compile time in Borderlands 3 by -0.590631% +/- 0.170108% (n=25).

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29849>
This commit is contained in:
Kenneth Graunke 2024-06-18 13:49:17 -07:00
parent 068865ce81
commit 5cb15a6c67

View file

@ -610,7 +610,6 @@ namespace brw {
return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \ return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \
} }
ALU2(ADD)
ALU3(ADD3) ALU3(ADD3)
ALU2_ACC(ADDC) ALU2_ACC(ADDC)
ALU2(AND) ALU2(AND)
@ -672,6 +671,21 @@ namespace brw {
#undef _ALU1 #undef _ALU1
/** @} */ /** @} */
fs_inst *
ADD(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1) const
{
return alu2(BRW_OPCODE_ADD, dst, src0, src1);
}
fs_reg
ADD(const fs_reg &src0, const fs_reg &src1, fs_inst **out = NULL) const
{
if (src1.file == IMM && src1.ud == 0 && !out)
return src0;
return alu2(BRW_OPCODE_ADD, src0, src1, out);
}
/** /**
* CMP: Sets the low bit of the destination channels with the result * CMP: Sets the low bit of the destination channels with the result
* of the comparison, while the upper bits are undefined, and updates * of the comparison, while the upper bits are undefined, and updates