mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 11:18:08 +02:00
i915/corm: optimize seq/sne against zero to 2 instructions
When opts.seq_sne_opt is set and one operand is zero, use the abs+compare pattern: x == 0 becomes -abs(x) >= 0, and x != 0 becomes -abs(x) < 0. This reduces from 3 ALU instructions to 2. This is a variant dimension because it can increase register pressure in some shaders; the multi-variant framework picks the winner per-shader. shader-db (I915_FS=nir): 212/403 compiled, 3228 alu shader-db (I915_FS=both): nir won 212 (26 identical, 16 tied, 167 better, 3 only), 75 TGSI, 116 neither Assisted-by: Claude
This commit is contained in:
parent
75ef9f6d65
commit
4885eb02ab
1 changed files with 42 additions and 12 deletions
|
|
@ -394,21 +394,51 @@ emit_alu(struct nir_to_i915 *c, nir_alu_instr *alu)
|
|||
i915_emit_arith(p, A0_SGE, dest, mask, 0, src0, src1, 0);
|
||||
break;
|
||||
case nir_op_seq: {
|
||||
/* seq(a,b) = sge(a,b) * sge(b,a) */
|
||||
uint32_t tmp = i915_get_utemp(p);
|
||||
i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0,
|
||||
src0, src1, 0);
|
||||
i915_emit_arith(p, A0_SGE, dest, mask, 0, src1, src0, 0);
|
||||
i915_emit_arith(p, A0_MUL, dest, mask, 0, dest, tmp, 0);
|
||||
const uint32_t zero =
|
||||
swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
|
||||
if (c->opts.seq_sne_opt &&
|
||||
((src0 & UREG_XYZW_CHANNEL_MASK) == (zero & UREG_XYZW_CHANNEL_MASK) ||
|
||||
(src1 & UREG_XYZW_CHANNEL_MASK) == (zero & UREG_XYZW_CHANNEL_MASK))) {
|
||||
if ((src0 & UREG_XYZW_CHANNEL_MASK) == (zero & UREG_XYZW_CHANNEL_MASK))
|
||||
src0 = src1;
|
||||
/* x == 0 <-> -abs(x) >= 0: 2 insns instead of 3 */
|
||||
uint32_t tmp = i915_get_utemp(p);
|
||||
i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
|
||||
src0, negate(src0, 1, 1, 1, 1), 0);
|
||||
i915_emit_arith(p, A0_SGE, dest, mask, 0,
|
||||
negate(tmp, 1, 1, 1, 1), zero, 0);
|
||||
} else {
|
||||
/* seq(a,b) = sge(a,b) * sge(b,a) */
|
||||
uint32_t tmp = i915_get_utemp(p);
|
||||
i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0,
|
||||
src0, src1, 0);
|
||||
i915_emit_arith(p, A0_SGE, dest, mask, 0, src1, src0, 0);
|
||||
i915_emit_arith(p, A0_MUL, dest, mask, 0, dest, tmp, 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_sne: {
|
||||
/* sne(a,b) = slt(a,b) + slt(b,a) */
|
||||
uint32_t tmp = i915_get_utemp(p);
|
||||
i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0,
|
||||
src0, src1, 0);
|
||||
i915_emit_arith(p, A0_SLT, dest, mask, 0, src1, src0, 0);
|
||||
i915_emit_arith(p, A0_ADD, dest, mask, 0, dest, tmp, 0);
|
||||
const uint32_t zero =
|
||||
swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
|
||||
if (c->opts.seq_sne_opt &&
|
||||
((src0 & UREG_XYZW_CHANNEL_MASK) == (zero & UREG_XYZW_CHANNEL_MASK) ||
|
||||
(src1 & UREG_XYZW_CHANNEL_MASK) == (zero & UREG_XYZW_CHANNEL_MASK))) {
|
||||
if ((src0 & UREG_XYZW_CHANNEL_MASK) == (zero & UREG_XYZW_CHANNEL_MASK))
|
||||
src0 = src1;
|
||||
/* x != 0 <-> -abs(x) < 0: 2 insns instead of 3 */
|
||||
uint32_t tmp = i915_get_utemp(p);
|
||||
i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
|
||||
src0, negate(src0, 1, 1, 1, 1), 0);
|
||||
i915_emit_arith(p, A0_SLT, dest, mask, 0,
|
||||
negate(tmp, 1, 1, 1, 1), zero, 0);
|
||||
} else {
|
||||
/* sne(a,b) = slt(a,b) + slt(b,a) */
|
||||
uint32_t tmp = i915_get_utemp(p);
|
||||
i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0,
|
||||
src0, src1, 0);
|
||||
i915_emit_arith(p, A0_SLT, dest, mask, 0, src1, src0, 0);
|
||||
i915_emit_arith(p, A0_ADD, dest, mask, 0, dest, tmp, 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_fpow: {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue