diff --git a/src/compiler/nir/nir_builtin_builder.c b/src/compiler/nir/nir_builtin_builder.c index ae055121afc..80fad4241f2 100644 --- a/src/compiler/nir/nir_builtin_builder.c +++ b/src/compiler/nir/nir_builtin_builder.c @@ -41,10 +41,10 @@ nir_cross3(nir_builder *b, nir_def *x, nir_def *y) unsigned yzx[3] = { 1, 2, 0 }; unsigned zxy[3] = { 2, 0, 1 }; - return nir_ffma_old(b, nir_swizzle(b, x, yzx, 3), - nir_swizzle(b, y, zxy, 3), - nir_fneg(b, nir_fmul(b, nir_swizzle(b, x, zxy, 3), - nir_swizzle(b, y, yzx, 3)))); + return nir_ffma_weak(b, nir_swizzle(b, x, yzx, 3), + nir_swizzle(b, y, zxy, 3), + nir_fneg(b, nir_fmul(b, nir_swizzle(b, x, zxy, 3), + nir_swizzle(b, y, yzx, 3)))); } nir_def * @@ -285,7 +285,7 @@ nir_atan(nir_builder *b, nir_def *y_over_x) nir_imm_floatN_t(b, -M_PI_2, bit_size)); /* multiply through by x while fixing up the range reduction */ - nir_def *tmp = nir_ffma_old(b, nir_fabs(b, u), res, bias); + nir_def *tmp = nir_ffma_weak(b, nir_fabs(b, u), res, bias); /* sign fixup */ return nir_copysign(b, tmp, y_over_x); diff --git a/src/compiler/nir/nir_lower_alu_width.c b/src/compiler/nir/nir_lower_alu_width.c index d34098da5a7..87ac5f5ffb5 100644 --- a/src/compiler/nir/nir_lower_alu_width.c +++ b/src/compiler/nir/nir_lower_alu_width.c @@ -125,20 +125,6 @@ lower_reduction(nir_alu_instr *alu, nir_op chan_op, nir_op merge_op, return last; } -static inline bool -will_lower_ffma(nir_shader *shader, unsigned bit_size) -{ - switch (bit_size) { - case 16: - return shader->options->lower_ffma16; - case 32: - return shader->options->lower_ffma32; - case 64: - return shader->options->lower_ffma64; - } - UNREACHABLE("bad bit size"); -} - static nir_def * lower_bfdot_to_bfdot2_bfadd(nir_builder *b, nir_alu_instr *alu) { @@ -183,12 +169,12 @@ lower_fdot(nir_alu_instr *alu, nir_builder *builder, bool is_bfloat16) /* If we don't want to lower ffma, create several ffma instead of fmul+fadd * and fusing later because fusing is not possible for exact fdot instructions. */ - if (!is_bfloat16 && will_lower_ffma(builder->shader, alu->def.bit_size)) + if (!is_bfloat16 && nir_prefers_fmad(builder->shader, alu->def.bit_size)) return lower_reduction(alu, nir_op_fmul, nir_op_fadd, builder, reverse_order); unsigned num_components = nir_op_infos[alu->op].input_sizes[0]; - const nir_op fma_op = is_bfloat16 ? nir_op_bffma : nir_op_ffma_old; + const nir_op fma_op = is_bfloat16 ? nir_op_bffma : nir_op_ffma_weak; const nir_op mul_op = is_bfloat16 ? nir_op_bfmul : nir_op_fmul; nir_def *prev = NULL; @@ -315,7 +301,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data) /* Only use reverse order for imprecise fdph, see explanation in lower_fdot. */ bool reverse_order = !(b->fp_math_ctrl & nir_fp_exact); - if (will_lower_ffma(b->shader, alu->def.bit_size)) { + if (nir_prefers_fmad(b->shader, alu->def.bit_size)) { nir_def *sum[4]; for (unsigned i = 0; i < 3; i++) { int dest = reverse_order ? 3 - i : i; @@ -328,12 +314,12 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data) } else if (reverse_order) { nir_def *sum = nir_channel(b, src1_vec, 3); for (int i = 2; i >= 0; i--) - sum = nir_ffma_old(b, nir_channel(b, src0_vec, i), nir_channel(b, src1_vec, i), sum); + sum = nir_ffma_weak(b, nir_channel(b, src0_vec, i), nir_channel(b, src1_vec, i), sum); return sum; } else { nir_def *sum = nir_fmul(b, nir_channel(b, src0_vec, 0), nir_channel(b, src1_vec, 0)); - sum = nir_ffma_old(b, nir_channel(b, src0_vec, 1), nir_channel(b, src1_vec, 1), sum); - sum = nir_ffma_old(b, nir_channel(b, src0_vec, 2), nir_channel(b, src1_vec, 2), sum); + sum = nir_ffma_weak(b, nir_channel(b, src0_vec, 1), nir_channel(b, src1_vec, 1), sum); + sum = nir_ffma_weak(b, nir_channel(b, src0_vec, 2), nir_channel(b, src1_vec, 2), sum); return nir_fadd(b, sum, nir_channel(b, src1_vec, 3)); } } diff --git a/src/compiler/nir/nir_lower_flrp.c b/src/compiler/nir/nir_lower_flrp.c index 0e4f9a6014b..2abbaf4fb38 100644 --- a/src/compiler/nir/nir_lower_flrp.c +++ b/src/compiler/nir/nir_lower_flrp.c @@ -52,8 +52,8 @@ replace_with_strict_ffma(struct nir_builder *bld, struct u_vector *dead_flrp, nir_def *const c = nir_ssa_for_alu_src(bld, alu, 2); nir_def *const neg_a = nir_fneg(bld, a); - nir_def *const inner_ffma = nir_ffma_old(bld, neg_a, c, a); - nir_def *const outer_ffma = nir_ffma_old(bld, b, c, inner_ffma); + nir_def *const inner_ffma = nir_ffma_weak(bld, neg_a, c, a); + nir_def *const outer_ffma = nir_ffma_weak(bld, b, c, inner_ffma); nir_def_rewrite_uses(&alu->def, outer_ffma); @@ -79,7 +79,7 @@ replace_with_single_ffma(struct nir_builder *bld, struct u_vector *dead_flrp, nir_def *const one_minus_c = nir_fadd(bld, nir_imm_floatN_t(bld, 1.0f, c->bit_size), neg_c); nir_def *const b_times_c = nir_fmul(bld, b, c); - nir_def *const final_ffma = nir_ffma_old(bld, a, one_minus_c, b_times_c); + nir_def *const final_ffma = nir_ffma_weak(bld, a, one_minus_c, b_times_c); nir_def_rewrite_uses(&alu->def, final_ffma); @@ -331,17 +331,8 @@ convert_flrp_instruction(nir_builder *bld, nir_alu_instr *alu, bool always_precise) { - bool have_ffma = false; unsigned bit_size = alu->def.bit_size; - - if (bit_size == 16) - have_ffma = !bld->shader->options->lower_ffma16; - else if (bit_size == 32) - have_ffma = !bld->shader->options->lower_ffma32; - else if (bit_size == 64) - have_ffma = !bld->shader->options->lower_ffma64; - else - UNREACHABLE("invalid bit_size"); + bool have_ffma = !nir_prefers_fmad(bld->shader, bit_size); bld->cursor = nir_before_instr(&alu->instr); bld->fp_math_ctrl = alu->fp_math_ctrl; diff --git a/src/compiler/nir/nir_lower_interpolation.c b/src/compiler/nir/nir_lower_interpolation.c index d5d5ff83a0a..f0752a47e0d 100644 --- a/src/compiler/nir/nir_lower_interpolation.c +++ b/src/compiler/nir/nir_lower_interpolation.c @@ -106,12 +106,12 @@ nir_lower_interpolation_instr(nir_builder *b, nir_instr *instr, void *cb_data) nir_def *bary = intr->src[0].ssa; nir_def *val; - val = nir_ffma_old(b, nir_channel(b, bary, 1), - nir_channel(b, iid, 1), - nir_channel(b, iid, 0)); - val = nir_ffma_old(b, nir_channel(b, bary, 0), - nir_channel(b, iid, 2), - val); + val = nir_ffma_weak(b, nir_channel(b, bary, 1), + nir_channel(b, iid, 1), + nir_channel(b, iid, 0)); + val = nir_ffma_weak(b, nir_channel(b, bary, 0), + nir_channel(b, iid, 2), + val); comps[i] = val; } diff --git a/src/compiler/nir/nir_lower_pntc_ytransform.c b/src/compiler/nir/nir_lower_pntc_ytransform.c index 26648540502..969199029e0 100644 --- a/src/compiler/nir/nir_lower_pntc_ytransform.c +++ b/src/compiler/nir/nir_lower_pntc_ytransform.c @@ -75,11 +75,11 @@ lower_load_pointcoord(lower_pntc_ytransform_state *state, nir_def *pntc = &intr->def; nir_def *transform = get_pntc_transform(state); - nir_def *flipped_y = nir_ffma_old(b, nir_channel(b, pntc, y_swizzle), - /* Flip the sign of y if we're flipping. */ - nir_channel(b, transform, 0), - /* The offset is 1 if we're flipping, 0 otherwise. */ - nir_channel(b, transform, 1)); + nir_def *flipped_y = nir_ffma_weak(b, nir_channel(b, pntc, y_swizzle), + /* Flip the sign of y if we're flipping. */ + nir_channel(b, transform, 0), + /* The offset is 1 if we're flipping, 0 otherwise. */ + nir_channel(b, transform, 1)); /* Reassemble the vector. */ pntc = nir_vector_insert_imm(b, pntc, flipped_y, y_swizzle); diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index afec28ce010..24d7e82a61e 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -409,7 +409,7 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, } nir_def *result = - nir_ffma_old(b, y, m0, nir_ffma_old(b, u, m1, nir_ffma_old(b, v, m2, offset))); + nir_ffma_weak(b, y, m0, nir_ffma_weak(b, u, m1, nir_ffma_weak(b, v, m2, offset))); nir_def_rewrite_uses(&tex->def, result); } diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c b/src/compiler/nir/nir_lower_wpos_ytransform.c index c8181d87f72..e965b64a206 100644 --- a/src/compiler/nir/nir_lower_wpos_ytransform.c +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c @@ -106,8 +106,8 @@ emit_wpos_adjustment(lower_wpos_ytransform_state *state, */ unsigned base = invert ? 0 : 2; /* wpos.y = wpos.y * trans.x/z + trans.y/w */ - wpos[1] = nir_ffma_old(b, wpos[1], nir_channel(b, wpostrans, base), - nir_channel(b, wpostrans, base + 1)); + wpos[1] = nir_ffma_weak(b, wpos[1], nir_channel(b, wpostrans, base), + nir_channel(b, wpostrans, base + 1)); } nir_def *new_wpos = nir_vec(b, &wpos[c], intr->num_components); @@ -258,8 +258,8 @@ lower_load_sample_pos(lower_wpos_ytransform_state *state, nir_def *scale = nir_channel(b, wpostrans, 0); nir_def *neg_scale = nir_channel(b, wpostrans, 2); /* Either y or 1-y for scale equal to 1 or -1 respectively. */ - nir_def *flipped_y = nir_ffma_old(b, nir_channel(b, pos, 1), scale, - nir_fmax(b, neg_scale, nir_imm_float(b, 0.0))); + nir_def *flipped_y = nir_ffma_weak(b, nir_channel(b, pos, 1), scale, + nir_fmax(b, neg_scale, nir_imm_float(b, 0.0))); nir_def *flipped_pos = nir_vector_insert_imm(b, pos, flipped_y, 1); nir_def_rewrite_uses_after(&intr->def, flipped_pos); diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 8b8b05fdf41..0d4103c40e2 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -101,7 +101,7 @@ denorm_ftz_64 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode def lowered_sincos(c): x = ('fsub', ('fmul', 2.0, ('ffract', ('fadd', ('fmul', 0.5 / pi, a), c))), 1.0) x = ('fmul', ('fsub', x, ('fmul', x, ('fabs', x))), 4.0) - return ('ffma_old', ('ffma_old', x, ('fabs', x), ('fneg', x)), 0.225, x) + return ('ffma_weak', ('ffma_weak', x, ('fabs', x), ('fneg', x)), 0.225, x) def intBitsToFloat(i): return struct.unpack('!f', struct.pack('!I', i))[0] diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c index 115d59cd65c..f5d6f1154dc 100644 --- a/src/compiler/nir/nir_opt_varyings.c +++ b/src/compiler/nir/nir_opt_varyings.c @@ -3920,9 +3920,9 @@ try_move_postdominator(struct linkage_info *linkage, defs[i] = nir_fmul(b, new_tes_loads[i], nir_channel(b, tesscoord, remap[i])); } else { - defs[i] = nir_ffma_old(b, new_tes_loads[i], - nir_channel(b, tesscoord, remap[i]), - defs[i - 1]); + defs[i] = nir_ffma_weak(b, new_tes_loads[i], + nir_channel(b, tesscoord, remap[i]), + defs[i - 1]); } } new_input = defs[2]; diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index cd530571edb..6a62e217930 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -430,7 +430,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, nir_fmul(nb, eta, nir_a_minus_bc(nb, one, n_dot_i, n_dot_i))); nir_def *result = nir_a_minus_bc(nb, nir_fmul(nb, eta, I), - nir_ffma_old(nb, eta, n_dot_i, nir_fsqrt(nb, k)), + nir_ffma_weak(nb, eta, n_dot_i, nir_fsqrt(nb, k)), N); /* XXX: bcsel, or if statement? */ dest->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);