mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
spirv: create ffma more often
We will not be able to combine instructions into ffma later if they are exact, so create them from the start. They can be lowered later if they are unwanted. fossil-db (GFX10.3): Totals from 14697 (10.05% of 146267) affected shaders: VGPRs: 645736 -> 614168 (-4.89%) CodeSize: 59312768 -> 58735352 (-0.97%); split: -0.97%, +0.00% MaxWaves: 372900 -> 376666 (+1.01%) Instrs: 11339280 -> 11120882 (-1.93%); split: -1.93%, +0.00% Latency: 284874519 -> 285277327 (+0.14%); split: -0.10%, +0.24% InvThroughput: 68791374 -> 68526739 (-0.38%); split: -0.49%, +0.10% fossil-db (GFX10): Totals from 11039 (7.55% of 146267) affected shaders: CodeSize: 54785444 -> 54785268 (-0.00%); split: -0.00%, +0.00% Instrs: 10401349 -> 10401396 (+0.00%); split: -0.00%, +0.00% Latency: 277781803 -> 278572890 (+0.28%); split: -0.00%, +0.29% InvThroughput: 65035902 -> 65100855 (+0.10%); split: -0.00%, +0.10% fossil-db (GFX9): Totals from 24055 (16.43% of 146401) affected shaders: SGPRs: 1790704 -> 1790640 (-0.00%) VGPRs: 1105736 -> 1105716 (-0.00%) CodeSize: 110944732 -> 110948812 (+0.00%); split: -0.00%, +0.01% Instrs: 21609095 -> 21610227 (+0.01%); split: -0.00%, +0.01% Latency: 756137596 -> 756145812 (+0.00%); split: -0.02%, +0.02% InvThroughput: 344103825 -> 344112245 (+0.00%); split: -0.00%, +0.01% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8056>
This commit is contained in:
parent
28acc4120f
commit
f6f9000f84
2 changed files with 25 additions and 31 deletions
|
|
@ -122,8 +122,8 @@ matrix_multiply(struct vtn_builder *b,
|
|||
nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1));
|
||||
for (int j = src0_columns - 2; j >= 0; j--) {
|
||||
dest->elems[i]->def =
|
||||
nir_fadd(&b->nb, nir_fmul(&b->nb, src0->elems[j]->def,
|
||||
nir_channel(&b->nb, src1->elems[i]->def, j)),
|
||||
nir_ffma(&b->nb, src0->elems[j]->def,
|
||||
nir_channel(&b->nb, src1->elems[i]->def, j),
|
||||
dest->elems[i]->def);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -202,19 +202,17 @@ build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1, bool piecewise)
|
|||
nir_ssa_def *half = nir_imm_floatN_t(b, 0.5f, x->bit_size);
|
||||
nir_ssa_def *abs_x = nir_fabs(b, x);
|
||||
|
||||
nir_ssa_def *p0_plus_xp1 = nir_fadd_imm(b, nir_fmul_imm(b, abs_x, p1), p0);
|
||||
nir_ssa_def *p0_plus_xp1 = nir_ffma_imm12(b, abs_x, p1, p0);
|
||||
|
||||
nir_ssa_def *expr_tail =
|
||||
nir_fadd_imm(b, nir_fmul(b, abs_x,
|
||||
nir_fadd_imm(b, nir_fmul(b, abs_x,
|
||||
p0_plus_xp1),
|
||||
M_PI_4f - 1.0f)),
|
||||
M_PI_2f);
|
||||
nir_ffma_imm2(b, abs_x,
|
||||
nir_ffma_imm2(b, abs_x, p0_plus_xp1, M_PI_4f - 1.0f),
|
||||
M_PI_2f);
|
||||
|
||||
nir_ssa_def *result0 = nir_fmul(b, nir_fsign(b, x),
|
||||
nir_fsub(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size),
|
||||
nir_fmul(b, nir_fsqrt(b, nir_fsub(b, one, abs_x)),
|
||||
expr_tail)));
|
||||
nir_a_minus_bc(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size),
|
||||
nir_fsqrt(b, nir_fsub(b, one, abs_x)),
|
||||
expr_tail));
|
||||
if (piecewise) {
|
||||
/* approximation for |x| < 0.5 */
|
||||
const float pS0 = 1.6666586697e-01f;
|
||||
|
|
@ -225,15 +223,12 @@ build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1, bool piecewise)
|
|||
nir_ssa_def *x2 = nir_fmul(b, x, x);
|
||||
nir_ssa_def *p = nir_fmul(b,
|
||||
x2,
|
||||
nir_fadd_imm(b,
|
||||
nir_fmul(b,
|
||||
x2,
|
||||
nir_fadd_imm(b, nir_fmul_imm(b, x2, pS2),
|
||||
pS1)),
|
||||
pS0));
|
||||
nir_ffma_imm2(b, x2,
|
||||
nir_ffma_imm12(b, x2, pS2, pS1),
|
||||
pS0));
|
||||
|
||||
nir_ssa_def *q = nir_fadd(b, nir_fmul_imm(b, x2, qS1), one);
|
||||
nir_ssa_def *result1 = nir_fadd(b, nir_fmul(b, x, nir_fdiv(b, p, q)), x);
|
||||
nir_ssa_def *q = nir_ffma_imm1(b, x2, qS1, one);
|
||||
nir_ssa_def *result1 = nir_ffma(b, x, nir_fdiv(b, p, q), x);
|
||||
return nir_bcsel(b, nir_flt(b, abs_x, half), result1, result0);
|
||||
} else {
|
||||
return result0;
|
||||
|
|
@ -414,9 +409,10 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
|
|||
case GLSLstd450Reflect:
|
||||
/* I - 2 * dot(N, I) * N */
|
||||
dest->def =
|
||||
nir_fsub(nb, src[0], nir_fmul(nb, NIR_IMM_FP(nb, 2.0),
|
||||
nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
|
||||
src[1])));
|
||||
nir_a_minus_bc(nb, src[0],
|
||||
src[1],
|
||||
nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
|
||||
NIR_IMM_FP(nb, 2.0)));
|
||||
break;
|
||||
|
||||
case GLSLstd450Refract: {
|
||||
|
|
@ -442,12 +438,12 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
|
|||
}
|
||||
/* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
|
||||
nir_ssa_def *k =
|
||||
nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta,
|
||||
nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i)))));
|
||||
nir_a_minus_bc(nb, one, eta,
|
||||
nir_fmul(nb, eta, nir_a_minus_bc(nb, one, n_dot_i, n_dot_i)));
|
||||
nir_ssa_def *result =
|
||||
nir_fsub(nb, nir_fmul(nb, eta, I),
|
||||
nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i),
|
||||
nir_fsqrt(nb, k)), N));
|
||||
nir_a_minus_bc(nb, nir_fmul(nb, eta, I),
|
||||
nir_ffma(nb, eta, n_dot_i, nir_fsqrt(nb, k)),
|
||||
N);
|
||||
/* XXX: bcsel, or if statement? */
|
||||
dest->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);
|
||||
break;
|
||||
|
|
@ -494,13 +490,11 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
|
|||
case GLSLstd450Asinh:
|
||||
dest->def = nir_fmul(nb, nir_fsign(nb, src[0]),
|
||||
nir_flog(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
|
||||
nir_fsqrt(nb, nir_fadd_imm(nb, nir_fmul(nb, src[0], src[0]),
|
||||
1.0f)))));
|
||||
nir_fsqrt(nb, nir_ffma_imm2(nb, src[0], src[0], 1.0f)))));
|
||||
break;
|
||||
case GLSLstd450Acosh:
|
||||
dest->def = nir_flog(nb, nir_fadd(nb, src[0],
|
||||
nir_fsqrt(nb, nir_fadd_imm(nb, nir_fmul(nb, src[0], src[0]),
|
||||
-1.0f))));
|
||||
nir_fsqrt(nb, nir_ffma_imm2(nb, src[0], src[0], -1.0f))));
|
||||
break;
|
||||
case GLSLstd450Atanh: {
|
||||
nir_ssa_def *one = nir_imm_floatN_t(nb, 1.0, src[0]->bit_size);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue