pan/bi: Promote MUX to CSEL in the scheduler

Helps scheduling, and makes scheduling more predictable when deciding between
MUX and CSEL.

total tuples in shared programs: 1523328 -> 1516256 (-0.46%)
tuples in affected programs: 509800 -> 502728 (-1.39%)
helped: 1977
HURT: 181
helped stats (abs) min: 1.0 max: 48.0 x̄: 3.71 x̃: 2
helped stats (rel) min: 0.04% max: 14.29% x̄: 1.98% x̃: 1.28%
HURT stats (abs)   min: 1.0 max: 5.0 x̄: 1.43 x̃: 1
HURT stats (rel)   min: 0.14% max: 7.69% x̄: 1.40% x̃: 0.70%
95% mean confidence interval for tuples value: -3.47 -3.08
95% mean confidence interval for tuples %-change: -1.79% -1.60%
Tuples are helped.

total clauses in shared programs: 350552 -> 349906 (-0.18%)
clauses in affected programs: 34839 -> 34193 (-1.85%)
helped: 570
HURT: 49
helped stats (abs) min: 1.0 max: 16.0 x̄: 1.22 x̃: 1
helped stats (rel) min: 0.67% max: 20.00% x̄: 3.26% x̃: 2.22%
HURT stats (abs)   min: 1.0 max: 1.0 x̄: 1.00 x̃: 1
HURT stats (rel)   min: 0.92% max: 16.67% x̄: 4.31% x̃: 4.17%
95% mean confidence interval for clauses value: -1.13 -0.96
95% mean confidence interval for clauses %-change: -2.95% -2.38%
Clauses are helped.

total cycles in shared programs: 202589.37 -> 202512.25 (-0.04%)
cycles in affected programs: 7644.46 -> 7567.33 (-1.01%)
helped: 771
HURT: 147
helped stats (abs) min: 0.041665999999999315 max: 1.8333360000000027 x̄: 0.11 x̃: 0
helped stats (rel) min: 0.16% max: 14.29% x̄: 2.10% x̃: 1.35%
HURT stats (abs)   min: 0.041665999999999315 max: 0.3333340000000007 x̄: 0.07 x̃: 0
HURT stats (rel)   min: 0.24% max: 7.41% x̄: 1.49% x̃: 1.11%
95% mean confidence interval for cycles value: -0.09 -0.07
95% mean confidence interval for cycles %-change: -1.69% -1.36%
Cycles are helped.

total arith in shared programs: 56755.96 -> 56585.50 (-0.30%)
arith in affected programs: 18746.29 -> 18575.83 (-0.91%)
helped: 1605
HURT: 352
helped stats (abs) min: 0.04166399999999726 max: 1.8333360000000027 x̄: 0.12 x̃: 0
helped stats (rel) min: 0.07% max: 20.00% x̄: 1.92% x̃: 1.12%
HURT stats (abs)   min: 0.041665999999999315 max: 0.3333340000000007 x̄: 0.06 x̃: 0
HURT stats (rel)   min: 0.17% max: 33.33% x̄: 2.09% x̃: 1.08%
95% mean confidence interval for arith value: -0.09 -0.08
95% mean confidence interval for arith %-change: -1.34% -1.07%
Arith are helped.

total quadwords in shared programs: 1429737 -> 1424670 (-0.35%)
quadwords in affected programs: 418175 -> 413108 (-1.21%)
helped: 1682
HURT: 198
helped stats (abs) min: 1.0 max: 35.0 x̄: 3.17 x̃: 2
helped stats (rel) min: 0.04% max: 13.33% x̄: 1.72% x̃: 1.29%
HURT stats (abs)   min: 1.0 max: 5.0 x̄: 1.38 x̃: 1
HURT stats (rel)   min: 0.15% max: 7.41% x̄: 1.30% x̃: 0.92%
95% mean confidence interval for quadwords value: -2.86 -2.53
95% mean confidence interval for quadwords %-change: -1.48% -1.32%
Quadwords are helped.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14576>
This commit is contained in:
Alyssa Rosenzweig 2022-02-18 12:49:06 -05:00 committed by Marge Bot
parent a8418abd74
commit e392dd8237

View file

@ -490,6 +490,58 @@ bi_can_iaddc(bi_instr *ins)
ins->src[1].swizzle == BI_SWIZZLE_H01);
}
/*
* When MUX.i32 or MUX.v2i16 is used to multiplex entire sources, they can be
* replaced by CSEL as follows:
*
* MUX.neg(x, y, b) -> CSEL.s.lt(b, 0, x, y)
* MUX.int_zero(x, y, b) -> CSEL.i.eq(b, 0, x, y)
* MUX.fp_zero(x, y, b) -> CSEL.f.eq(b, 0, x, y)
*
* MUX.bit cannot be transformed like this.
*
* Note that MUX.v2i16 has partial support for swizzles, which CSEL.v2i16 lacks.
* So we must check the swizzles too.
*/
static bool
bi_can_csel(bi_instr *I)
{
return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
(I->mux != BI_MUX_BIT) &&
(I->src[0].swizzle == BI_SWIZZLE_H01) &&
(I->src[1].swizzle == BI_SWIZZLE_H01) &&
(I->src[2].swizzle == BI_SWIZZLE_H01);
}
static enum bi_opcode
bi_csel_for_mux(bool b32, enum bi_mux mux)
{
switch (mux) {
case BI_MUX_INT_ZERO:
return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
case BI_MUX_NEG:
return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
case BI_MUX_FP_ZERO:
return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
default:
unreachable("No CSEL for MUX.bit");
}
}
static void
bi_replace_mux_with_csel(bi_instr *I)
{
assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
I->op = bi_csel_for_mux(I->op == BI_OPCODE_MUX_I32, I->mux);
I->cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
bi_index vTrue = I->src[0], vFalse = I->src[1], cond = I->src[2];
I->src[0] = cond;
I->src[1] = bi_zero();
I->src[2] = vTrue;
I->src[3] = vFalse;
}
/*
* The encoding of *FADD.v2f16 only specifies a single abs flag. All abs
* encodings are permitted by swapping operands; however, this scheme fails if
@ -509,6 +561,10 @@ bi_can_fma(bi_instr *ins)
if (bi_can_iaddc(ins))
return true;
/* +MUX -> *CSEL */
if (bi_can_csel(ins))
return true;
/* *FADD.v2f16 has restricted abs modifiers, use +FADD.v2f16 instead */
if (ins->op == BI_OPCODE_FADD_V2F16 && bi_impacted_abs(ins))
return false;
@ -1220,6 +1276,8 @@ bi_take_instr(bi_context *ctx, struct bi_worklist st,
assert(bi_can_iaddc(instr));
instr->op = BI_OPCODE_IADDC_I32;
instr->src[2] = bi_zero();
} else if (fma && bi_can_csel(instr)) {
bi_replace_mux_with_csel(instr);
}
return instr;