spirv: create ffma more often

We will not be able to combine instructions into ffma later if they are
exact, so create them from the start. They can be lowered later if they
are unwanted.

fossil-db (GFX10.3):
Totals from 14697 (10.05% of 146267) affected shaders:
VGPRs: 645736 -> 614168 (-4.89%)
CodeSize: 59312768 -> 58735352 (-0.97%); split: -0.97%, +0.00%
MaxWaves: 372900 -> 376666 (+1.01%)
Instrs: 11339280 -> 11120882 (-1.93%); split: -1.93%, +0.00%
Latency: 284874519 -> 285277327 (+0.14%); split: -0.10%, +0.24%
InvThroughput: 68791374 -> 68526739 (-0.38%); split: -0.49%, +0.10%

fossil-db (GFX10):
Totals from 11039 (7.55% of 146267) affected shaders:
CodeSize: 54785444 -> 54785268 (-0.00%); split: -0.00%, +0.00%
Instrs: 10401349 -> 10401396 (+0.00%); split: -0.00%, +0.00%
Latency: 277781803 -> 278572890 (+0.28%); split: -0.00%, +0.29%
InvThroughput: 65035902 -> 65100855 (+0.10%); split: -0.00%, +0.10%

fossil-db (GFX9):
Totals from 24055 (16.43% of 146401) affected shaders:
SGPRs: 1790704 -> 1790640 (-0.00%)
VGPRs: 1105736 -> 1105716 (-0.00%)
CodeSize: 110944732 -> 110948812 (+0.00%); split: -0.00%, +0.01%
Instrs: 21609095 -> 21610227 (+0.01%); split: -0.00%, +0.01%
Latency: 756137596 -> 756145812 (+0.00%); split: -0.02%, +0.02%
InvThroughput: 344103825 -> 344112245 (+0.00%); split: -0.00%, +0.01%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8056>
This commit is contained in:
Rhys Perry 2020-06-16 14:28:18 +01:00 committed by Marge Bot
parent 28acc4120f
commit f6f9000f84
2 changed files with 25 additions and 31 deletions

View file

@ -122,8 +122,8 @@ matrix_multiply(struct vtn_builder *b,
nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1));
for (int j = src0_columns - 2; j >= 0; j--) {
dest->elems[i]->def =
nir_fadd(&b->nb, nir_fmul(&b->nb, src0->elems[j]->def,
nir_channel(&b->nb, src1->elems[i]->def, j)),
nir_ffma(&b->nb, src0->elems[j]->def,
nir_channel(&b->nb, src1->elems[i]->def, j),
dest->elems[i]->def);
}
}

View file

@ -202,19 +202,17 @@ build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1, bool piecewise)
nir_ssa_def *half = nir_imm_floatN_t(b, 0.5f, x->bit_size);
nir_ssa_def *abs_x = nir_fabs(b, x);
nir_ssa_def *p0_plus_xp1 = nir_fadd_imm(b, nir_fmul_imm(b, abs_x, p1), p0);
nir_ssa_def *p0_plus_xp1 = nir_ffma_imm12(b, abs_x, p1, p0);
nir_ssa_def *expr_tail =
nir_fadd_imm(b, nir_fmul(b, abs_x,
nir_fadd_imm(b, nir_fmul(b, abs_x,
p0_plus_xp1),
M_PI_4f - 1.0f)),
M_PI_2f);
nir_ffma_imm2(b, abs_x,
nir_ffma_imm2(b, abs_x, p0_plus_xp1, M_PI_4f - 1.0f),
M_PI_2f);
nir_ssa_def *result0 = nir_fmul(b, nir_fsign(b, x),
nir_fsub(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size),
nir_fmul(b, nir_fsqrt(b, nir_fsub(b, one, abs_x)),
expr_tail)));
nir_a_minus_bc(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size),
nir_fsqrt(b, nir_fsub(b, one, abs_x)),
expr_tail));
if (piecewise) {
/* approximation for |x| < 0.5 */
const float pS0 = 1.6666586697e-01f;
@ -225,15 +223,12 @@ build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1, bool piecewise)
nir_ssa_def *x2 = nir_fmul(b, x, x);
nir_ssa_def *p = nir_fmul(b,
x2,
nir_fadd_imm(b,
nir_fmul(b,
x2,
nir_fadd_imm(b, nir_fmul_imm(b, x2, pS2),
pS1)),
pS0));
nir_ffma_imm2(b, x2,
nir_ffma_imm12(b, x2, pS2, pS1),
pS0));
nir_ssa_def *q = nir_fadd(b, nir_fmul_imm(b, x2, qS1), one);
nir_ssa_def *result1 = nir_fadd(b, nir_fmul(b, x, nir_fdiv(b, p, q)), x);
nir_ssa_def *q = nir_ffma_imm1(b, x2, qS1, one);
nir_ssa_def *result1 = nir_ffma(b, x, nir_fdiv(b, p, q), x);
return nir_bcsel(b, nir_flt(b, abs_x, half), result1, result0);
} else {
return result0;
@ -414,9 +409,10 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
case GLSLstd450Reflect:
/* I - 2 * dot(N, I) * N */
dest->def =
nir_fsub(nb, src[0], nir_fmul(nb, NIR_IMM_FP(nb, 2.0),
nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
src[1])));
nir_a_minus_bc(nb, src[0],
src[1],
nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
NIR_IMM_FP(nb, 2.0)));
break;
case GLSLstd450Refract: {
@ -442,12 +438,12 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
}
/* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
nir_ssa_def *k =
nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta,
nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i)))));
nir_a_minus_bc(nb, one, eta,
nir_fmul(nb, eta, nir_a_minus_bc(nb, one, n_dot_i, n_dot_i)));
nir_ssa_def *result =
nir_fsub(nb, nir_fmul(nb, eta, I),
nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i),
nir_fsqrt(nb, k)), N));
nir_a_minus_bc(nb, nir_fmul(nb, eta, I),
nir_ffma(nb, eta, n_dot_i, nir_fsqrt(nb, k)),
N);
/* XXX: bcsel, or if statement? */
dest->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);
break;
@ -494,13 +490,11 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
case GLSLstd450Asinh:
dest->def = nir_fmul(nb, nir_fsign(nb, src[0]),
nir_flog(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
nir_fsqrt(nb, nir_fadd_imm(nb, nir_fmul(nb, src[0], src[0]),
1.0f)))));
nir_fsqrt(nb, nir_ffma_imm2(nb, src[0], src[0], 1.0f)))));
break;
case GLSLstd450Acosh:
dest->def = nir_flog(nb, nir_fadd(nb, src[0],
nir_fsqrt(nb, nir_fadd_imm(nb, nir_fmul(nb, src[0], src[0]),
-1.0f))));
nir_fsqrt(nb, nir_ffma_imm2(nb, src[0], src[0], -1.0f))));
break;
case GLSLstd450Atanh: {
nir_ssa_def *one = nir_imm_floatN_t(nb, 1.0, src[0]->bit_size);