nir: remove ffma_old

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41165>
This commit is contained in:
Karol Herbst 2026-04-21 20:33:03 +02:00 committed by Marge Bot
parent 099e876a38
commit e9c1cce35f
64 changed files with 14 additions and 310 deletions

View file

@ -985,7 +985,6 @@ ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
case nir_op_fsub:
case nir_op_fmul:
case nir_op_ffma:
case nir_op_ffma_old:
case nir_op_fdiv:
case nir_op_flrp:
case nir_op_fabs:

View file

@ -451,7 +451,6 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_op_e5m22f:
case nir_op_fmulz:
case nir_op_ffmaz:
case nir_op_ffmaz_old:
case nir_op_f2f64:
case nir_op_u2f64:
case nir_op_i2f64:
@ -487,7 +486,6 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_op_f2f16_rd: type = RegType::vgpr; break;
case nir_op_fmul:
case nir_op_ffma:
case nir_op_ffma_old:
case nir_op_fadd:
case nir_op_fsub:
case nir_op_fmax:

View file

@ -1925,8 +1925,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
}
break;
}
case nir_op_ffma:
case nir_op_ffma_old: {
case nir_op_ffma: {
if (dst.regClass() == v2b) {
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f16, dst, false, 3);
} else if (dst.regClass() == v1 && instr->def.bit_size == 16) {
@ -1962,8 +1961,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
}
break;
}
case nir_op_ffmaz:
case nir_op_ffmaz_old: {
case nir_op_ffmaz: {
if (dst.regClass() == v1) {
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_legacy_f32, dst,
ctx->block->fp_mode.must_flush_denorms32, 3);

View file

@ -760,13 +760,11 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
}
break;
case nir_op_ffma:
case nir_op_ffma_old:
/* FMA is slow on gfx6-8, so it shouldn't be used. */
assert(instr->def.bit_size != 32 || ctx->ac.gfx_level >= GFX9);
result = emit_fp_intrinsic(&ctx->ac, "llvm.fma", def_type, src[0], src[1], src[2]);
break;
case nir_op_ffmaz:
case nir_op_ffmaz_old:
assert(ctx->ac.gfx_level >= GFX10_3);
src[0] = ac_to_float(&ctx->ac, src[0]);
src[1] = ac_to_float(&ctx->ac, src[1]);

View file

@ -60,9 +60,6 @@ get_nir_options_for_stage(struct radv_compiler_info *compiler_info, mesa_shader_
ac_nir_set_options(compiler_info->ac, compiler_info->key.use_llvm, options);
options->lower_ffma16 = split_fma || compiler_info->ac->gfx_level < GFX9;
options->lower_ffma32 = split_fma || compiler_info->ac->gfx_level < GFX10_3;
options->lower_ffma64 = split_fma;
if (split_fma) {
options->float_mul_add16 |= nir_float_muladd_support_prefers_split;
options->float_mul_add32 |= nir_float_muladd_support_prefers_split;

View file

@ -1907,7 +1907,6 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr)
return agx_fmul_to(b, dst, s0, s1);
case nir_op_ffma:
case nir_op_ffma_old:
if (instr->def.bit_size == 16)
return agx_hfma_to(b, dst, s0, s1, s2);
else

View file

@ -348,8 +348,6 @@ agx_round_registers(unsigned halfregs)
static const nir_shader_compiler_options agx_nir_options = {
.lower_fdiv = true,
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.float_mul_add16 = nir_float_muladd_support_has_ffma | nir_float_muladd_support_fuse,
.float_mul_add32 = nir_float_muladd_support_has_ffma | nir_float_muladd_support_fuse,
.lower_flrp16 = true,

View file

@ -90,7 +90,6 @@ alu_cost(nir_alu_instr *alu)
case nir_op_fadd:
case nir_op_fmul:
case nir_op_ffma:
case nir_op_ffma_old:
case nir_op_iadd:
case nir_op_inot:
case nir_op_iand:

View file

@ -192,9 +192,6 @@ v3dv_pipeline_get_nir_options(const struct v3d_device_info *devinfo)
.lower_mul_2x32_64 = true,
.lower_fdiv = true,
.lower_find_lsb = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
.lower_flrp32 = true,
.lower_fpow = true,
.lower_fsqrt = true,

View file

@ -1722,7 +1722,6 @@ nir_alu_instr_is_mul_add(const nir_alu_instr *instr)
case nir_op_ffma:
case nir_op_ffma_weak:
case nir_op_fmad:
case nir_op_ffma_old:
return true;
default:
return false;
@ -1738,7 +1737,6 @@ nir_alu_instr_is_mul_add_z(const nir_alu_instr *instr)
switch (instr->op) {
case nir_op_ffmaz:
case nir_op_fmadz:
case nir_op_ffmaz_old:
return true;
default:
return false;

View file

@ -193,12 +193,6 @@ nir_fast_normalize(nir_builder *b, nir_def *vec)
return nir_fdiv(b, vec, nir_fast_length(b, vec));
}
static inline nir_def *
nir_fmad_old(nir_builder *b, nir_def *x, nir_def *y, nir_def *z)
{
return nir_fadd(b, nir_fmul(b, x, y), z);
}
static inline nir_def *
nir_maxmag(nir_builder *b, nir_def *x, nir_def *y)
{

View file

@ -85,7 +85,7 @@ lower_pos_write_dynamic(nir_builder *b, nir_intrinsic_instr *intr,
nir_def *c = nir_load_clip_z_coeff(b);
/* Lerp. If c = 0, reduces to z. If c = 1/2, reduces to (z + w)/2 */
nir_def *new_z = nir_ffma_old(b, nir_fneg(b, z), c, nir_ffma_old(b, w, c, z));
nir_def *new_z = nir_ffma_weak(b, nir_fneg(b, z), c, nir_ffma_weak(b, w, c, z));
nir_src_rewrite(&intr->src[0], nir_vector_insert_imm(b, pos, new_z, 2));
return true;
}

View file

@ -671,7 +671,6 @@ lower_doubles_instr_to_soft(nir_builder *b, nir_alu_instr *instr,
mangled_name = "__fmul64(u641;u641;";
break;
case nir_op_fmad:
case nir_op_ffma_old:
name = "__fmad64";
mangled_name = "__fmad64(u641;u641;u641;";
break;

View file

@ -75,7 +75,6 @@ lower_float_instr_to_soft(nir_builder *b, nir_instr *instr,
mangled_name = "__fmul32(u1;u1;";
break;
case nir_op_fmad:
case nir_op_ffma_old:
mangled_name = "__fmad32(u1;u1;u1;";
break;
case nir_op_fsat:

View file

@ -1192,22 +1192,6 @@ consistent not even within the same shader.
This is like GLSLs ``ffma``.
""")
triop("ffma_old", tfloat, _2src_commutative, """
if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
if (bit_size == 64)
dst = _mesa_double_fma_rtz(src0, src1, src2);
else if (bit_size == 32)
dst = _mesa_float_fma_rtz(src0, src1, src2);
else
dst = _mesa_double_to_float_rtz(_mesa_double_fma_rtz(src0, src1, src2));
} else {
if (bit_size == 32)
dst = fmaf(src0, src1, src2);
else
dst = fma(src0, src1, src2);
}
""")
triop("fmadz", tfloat32, _2src_commutative, """
if (src0 == 0.0 || src1 == 0.0) {
dst = 0.0 + src2;
@ -1246,21 +1230,6 @@ Unlike :nir:alu-op:`ffma`, anything (even infinity or NaN) multiplied by +/-0.0
``+0.0 + src2``.
""")
triop("ffmaz_old", tfloat32, _2src_commutative, """
if (src0 == 0.0 || src1 == 0.0)
dst = 0.0 + src2;
else if (nir_is_rounding_mode_rtz(execution_mode, 32))
dst = _mesa_float_fma_rtz(src0, src1, src2);
else
dst = fmaf(src0, src1, src2);
""", description = """
Floating-point multiply-add with modified zero handling.
Unlike :nir:alu-op:`ffma_old`, anything (even infinity or NaN) multiplied by +/-0.0 is
+0.0. ``ffmaz_old(0.0, inf, src2)`` and ``ffmaz_old(0.0, nan, src2)`` must be
``+0.0 + src2``.
""")
triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
triop("iadd3", tint, _2src_commutative, "(uint64_t)src0 + (uint64_t)src1 + (uint64_t)src2",

View file

@ -241,15 +241,6 @@ optimizations += [
(('usadd_4x8_vc4', a, ~0), ~0),
(('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
(('~fadd', ('fmulz', a, b), ('fmulz', a, c)), ('fmulz', a, ('fadd', b, c))),
(('~ffma_old', a, b, ('ffma_old(is_used_once)', a, c, d)), ('ffma_old', a, ('fadd', b, c), d)),
(('~ffma_old', a, b, ('fmul(is_used_once)', a, c)), ('fmul', a, ('fadd', b, c))),
(('~fadd', ('fmul(is_used_once)', a, b), ('ffma_old(is_used_once)', a, c, d)), ('ffma_old', a, ('fadd', b, c), d)),
(('~ffma_old', a, ('fmul(is_used_once)', b, c), ('fmul(is_used_once)', b, d)), ('fmul', b, ('ffma_old', a, c, d))),
(('~ffmaz_old', a, b, ('ffmaz_old(is_used_once)', a, c, d)), ('ffmaz_old', a, ('fadd', b, c), d)),
(('~ffmaz_old', a, b, ('fmulz(is_used_once)', a, c)), ('fmulz', a, ('fadd', b, c))),
(('~fadd', ('fmulz(is_used_once)', a, b), ('ffmaz_old(is_used_once)', a, c, d)), ('ffmaz_old', a, ('fadd', b, c), d)),
(('~fadd', ('fmulz(is_used_once)', a, b), ('ffmaz(is_used_once)', a, c, d)), ('ffmaz', a, ('fadd', b, c), d)),
(('~ffmaz_old', a, ('fmulz(is_used_once)', b, c), ('fmulz(is_used_once)', b, d)), ('fmulz', b, ('ffmaz_old', a, c, d))),
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
(('iadd', ('ishl', b, a), ('ishl', c, a)), ('ishl', ('iadd', b, c), a)),
(('iand', ('iand', a, b), ('iand(is_used_once)', a, c)), ('iand', ('iand', a, b), c)),
@ -286,11 +277,8 @@ optimizations += [
(('fmulz(nsz)', a, 'b(is_finite_not_zero)'), ('fmul', a, b)),
(('fmulz(nsz)', 'a(is_finite)', 'b(is_finite)'), ('fmul', a, b)),
(('fmulz', a, a), ('fmul', a, a)),
(('ffmaz_old(nsz)', a, 'b(is_finite_not_zero)', c), ('ffma_old', a, b, c)),
(('ffmaz(nsz)', a, 'b(is_finite_not_zero)', c), ('ffma', a, b, c)),
(('ffmaz_old', 'a(is_finite)', 'b(is_finite)', c), ('ffma_old', a, b, c)),
(('ffmaz', 'a(is_finite)', 'b(is_finite)', c), ('ffma', a, b, c)),
(('ffmaz_old', a, a, b), ('ffma_old', a, a, b)),
(('ffmaz', a, a, b), ('ffma', a, a, b)),
(('imul', a, 0), 0),
(('imul24_relaxed', a, 0), 0),
@ -310,32 +298,18 @@ optimizations += [
# If a != a: fsign(a)*a*a => 0*NaN*NaN => abs(NaN)*NaN
(('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)),
(('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)),
(('ffma_old(nsz,nnan)', 0.0, a, b), ('fcanonicalize', b)),
(('ffma(nsz,nnan)', 0.0, a, b), ('fcanonicalize', b)),
(('ffma_old(nsz,nnan)', -0.0, a, b), ('fcanonicalize', b)),
(('ffma(nsz,nnan)', -0.0, a, b), ('fcanonicalize', b)),
(('ffmaz_old', 0.0, a, b), ('fadd', 0.0, b)),
(('ffmaz', 0.0, a, b), ('fadd', 0.0, b)),
(('ffmaz_old', -0.0, a, b), ('fadd', 0.0, b)),
(('ffmaz', -0.0, a, b), ('fadd', 0.0, b)),
(('ffma_old(nsz)', a, b, 0.0), ('fmul', a, b)),
(('ffma(nsz)', a, b, 0.0), ('fmul', a, b)),
(('ffmaz_old(nsz)', a, b, 0.0), ('fmulz', a, b)),
(('ffmaz(nsz)', a, b, 0.0), ('fmulz', a, b)),
(('ffma_old', a, b, -0.0), ('fmul', a, b)),
(('ffma', a, b, -0.0), ('fmul', a, b)),
(('ffmaz_old', a, b, -0.0), ('fmulz', a, b)),
(('ffmaz', a, b, -0.0), ('fmulz', a, b)),
(('ffma_old', 1.0, a, b), ('fadd', a, b)),
(('ffma', 1.0, a, b), ('fadd', a, b)),
(('ffmaz_old(nsz)', 1.0, a, b), ('fadd', a, b)),
(('ffmaz(nsz)', 1.0, a, b), ('fadd', a, b)),
(('ffma_old', -1.0, a, b), ('fadd', ('fneg', a), b)),
(('ffma', -1.0, a, b), ('fadd', ('fneg', a), b)),
(('ffmaz_old(nsz)', -1.0, a, b), ('fadd', ('fneg', a), b)),
(('ffmaz(nsz)', -1.0, a, b), ('fadd', ('fneg', a), b)),
(('~ffma_old', '#a', '#b', c), ('fadd', ('fmul', a, b), c)),
(('~ffmaz_old', '#a', '#b', c), ('fadd', ('fmulz', a, b), c)),
(('flrp(nnan,nsz)', a, b, 0.0), ('fcanonicalize', a)),
(('flrp(nnan,nsz)', a, b, -0.0), ('fcanonicalize', a)),
(('flrp(nnan,nsz)', a, b, 1.0), ('fcanonicalize', b)),
@ -413,20 +387,12 @@ optimizations += [
('fmulz', 'ma', b), has_fmulz), {'ma' : a}),
# ffma(b==0.0 ? 0.0 : a, a==0.0 ? 0.0 : b, c) -> ffmaz(a, b, c)
*add_fabs_fneg((('ffma_old@32(nsz)', ('bcsel', ('feq', b, 0.0), 0.0, 'ma'), ('bcsel', ('feq', a, 0.0), 0.0, 'mb'), c),
('ffmaz_old', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}),
*add_fabs_fneg((('ffma@32(nsz)', ('bcsel', ('feq', b, 0.0), 0.0, 'ma'), ('bcsel', ('feq', a, 0.0), 0.0, 'mb'), c),
('ffmaz', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}),
*add_fabs_fneg((('ffma_old@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c),
('ffmaz_old', 'ma', b, c), has_fmulz), {'ma' : a}),
*add_fabs_fneg((('ffma@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c),
('ffmaz', 'ma', b, c), has_fmulz), {'ma' : a}),
*add_fabs_fneg((('ffma_old@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c),
('ffmaz_old', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}),
*add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c),
('ffmaz', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}),
*add_fabs_fneg((('ffma_old@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c),
('ffmaz_old', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}),
*add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c),
('ffmaz', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}),
@ -546,12 +512,6 @@ optimizations.extend([
(('~fadd', ('fmul', a, ('b2f', ('inot', 'c@1'))), ('fmul', b, ('b2f', c))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~fadd', a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma_old', a, ('b2f', ('inot', 'c@1')), ('fmul', b, ('b2f', 'c@1'))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma_old', b, ('b2f', 'c@1'), ('ffma_old', ('fneg', a), ('b2f', 'c@1'), a)), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma_old', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)), a), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma_old', ('b2f', 'c@1'), ('ffma_old', ('fneg', a), b, d), ('fmul', a, b)), ('bcsel', c, ('fcanonicalize', d), ('fmul', a, b))),
(('~flrp', ('fmul(is_used_once)', a, b), ('fmul(is_used_once)', a, c), d), ('fmul', ('flrp', b, c, d), a)),
(('~flrp', a, 0.0, c), ('fadd', ('fmul', ('fneg', a), c), a)),
@ -586,15 +546,6 @@ optimizations.extend([
(('fadd@32', a, ('fneg(is_used_once)', ('ffloor(is_used_once)', a))), ('ffract', a), '!options->lower_ffract'),
(('fadd@64', a, ('fneg(is_used_once)', ('ffloor(is_used_once)', a))), ('ffract', a), '!options->lower_ffract && !(options->lower_doubles_options & nir_lower_dfract)'),
(('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
(('ffma_old@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'),
(('ffma_old@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'),
(('ffma_old@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'),
(('ffmaz_old', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->lower_ffma32'),
# Always lower inexact ffma_old, because it will be fused back by late optimizations (nir_opt_algebraic_late).
(('ffma_old@16(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'),
(('ffma_old@32(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'),
(('ffma_old@64(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'),
(('ffmaz_old(contract)', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->fuse_ffma32'),
(('fmul', ('fadd', ('bcsel', a, ('fmul', b, c), 0), '#d'), '#e'),
('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', ('fadd', d, 0.0), e))),
@ -1659,7 +1610,6 @@ for compare in [('fneu', a, 0.0), ('inot', ('feq', a, 0.0))]:
optimizations.extend([
(('fmul', search_b2f, search_mod), replace_mod_mul),
(('ffma_old', search_b2f, search_mod, b), ('fadd', replace_mod, b)),
(('ffma', search_b2f, search_mod, b), ('fadd', replace_mod, b)),
])
@ -1688,7 +1638,6 @@ optimizations.extend([
(('iand', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('iand', a, b))),
(('ior', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('ior', a, b))),
(('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), ('b2f', ('iand', a, b))),
(('ffma_old', ('b2f', 'a@1'), ('b2f', 'b@1'), c), ('fadd', ('b2f', ('iand', a, b)), c)),
(('ffma', ('b2f', 'a@1'), ('b2f', 'b@1'), c), ('fadd', ('b2f', ('iand', a, b)), c)),
(('fadd', 1.0, ('fneg', ('b2f', a))), ('b2f', ('inot', a))),
(('fadd(nsz)', -1.0, ('b2f', a)), ('fneg', ('b2f', ('inot', a)))),
@ -2386,9 +2335,7 @@ optimizations.extend([
# Propagate negation up multiplication chains
(('fmul(is_used_by_non_fsat)', ('fneg', a), b), ('fneg', ('fmul', a, b))),
(('fmulz(is_used_by_non_fsat,nsz)', ('fneg', a), b), ('fneg', ('fmulz', a, b))),
(('ffma_old', ('fneg', a), ('fneg', b), c), ('ffma_old', a, b, c)),
(('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
(('ffmaz_old', ('fneg', a), ('fneg', b), c), ('ffmaz_old', a, b, c)),
(('ffmaz', ('fneg', a), ('fneg', b), c), ('ffmaz', a, b, c)),
(('imul', ('ineg', a), b), ('ineg', ('imul', a, b))),
@ -2397,14 +2344,9 @@ optimizations.extend([
(('~fmulz', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmulz', ('fmulz', a, c), b)),
(('~fmul', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)'), ('fmulz', ('fmul', a, c), b)),
(('imul', ('imul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('imul', ('imul', a, c), b)),
(('~ffma_old', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffma_old', ('fmul', a, c), b, d)),
(('~ffmaz_old', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffmaz_old', ('fmulz', a, c), b, d)),
(('~ffma_old', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)', d), ('ffmaz_old', ('fmul', a, c), b, d)),
# Prefer moving out a multiplication for more MAD/FMA-friendly code
(('~fadd', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_fmul)'), '#c'), ('fadd', ('fadd', a, c), b)),
(('~fadd', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd', ('fadd', a, c), b)),
(('~fadd', ('ffma_old(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffma_old', a, b, d), c)),
(('~fadd', ('ffmaz_old(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffmaz_old', a, b, d), c)),
(('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('iadd', ('iadd', a, c), b)),
# Reassociate constants in add/mul chains so they can be folded together.
@ -2413,16 +2355,9 @@ optimizations.extend([
(('~fmul', '#a', ('fmul', b, '#c')), ('fmul', ('fmul', a, c), b)),
(('~fmulz', '#a', ('fmulz', b, '#c')), ('fmulz', ('fmulz', a, c), b)),
(('~fmul', '#a(is_finite_not_zero)', ('fmulz', b, '#c')), ('fmulz', ('fmul', a, c), b)),
(('~ffma_old', '#a', ('fmul', b, '#c'), d), ('ffma_old', ('fmul', a, c), b, d)),
(('~ffmaz_old', '#a', ('fmulz', b, '#c'), d), ('ffmaz_old', ('fmulz', a, c), b, d)),
(('~ffmaz_old', '#a(is_finite_not_zero)', ('fmulz', b, '#c'), d), ('ffmaz_old', ('fmul', a, c), b, d)),
(('imul', '#a', ('imul', b, '#c')), ('imul', ('imul', a, c), b)),
(('~fadd', '#a', ('fadd', b, '#c')), ('fadd', ('fadd', a, c), b)),
(('~fadd', '#a', ('fneg', ('fadd', b, '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
(('~fadd', '#a', ('ffma_old', b, c, '#d')), ('ffma_old', b, c, ('fadd', a, d))),
(('~fadd', '#a', ('fneg', ('ffma_old', b, c, '#d'))), ('ffma_old', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
(('~fadd', '#a', ('ffmaz_old', b, c, '#d')), ('ffmaz_old', b, c, ('fadd', a, d))),
(('~fadd', '#a', ('fneg', ('ffmaz_old', b, c, '#d'))), ('ffmaz_old', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
(('iadd', '#a', ('iadd', b, '#c')), ('iadd', ('iadd', a, c), b)),
(('iand', '#a', ('iand', b, '#c')), ('iand', ('iand', a, c), b)),
(('ior', '#a', ('ior', b, '#c')), ('ior', ('ior', a, c), b)),
@ -3535,7 +3470,7 @@ for op in ['fadd', 'fdiv', 'fmod', 'fmul', 'fpow', 'frem', 'fsub']:
optimizations += [((op, a, '#b(is_nan)'), NAN, 'true', TestStatus.XFAIL if op == 'fpow' else TestStatus.PASS)] # some opcodes are not commutative. XFAIL is fpow(1.0, NaN) producing NaN instead of 1.0.
# NaN propagation: Trinary opcodes. If any operand is NaN, replace it with NaN.
for op in ['ffma_old', 'ffma', 'flrp']:
for op in ['ffma', 'flrp']:
optimizations += [((op, '#a(is_nan)', b, c), NAN)]
optimizations += [((op, a, '#b(is_nan)', c), NAN)] # some opcodes are not commutative
optimizations += [((op, a, b, '#c(is_nan)'), NAN)]
@ -3779,14 +3714,9 @@ for sz, mulz in itertools.product([16, 32, 64], [False, True]):
# (or fneg/fabs which are assumed to be propagated away), as a heuristic to
# avoid fusing in cases where it's harmful.
fmul = ('fmulz' if mulz else 'fmul') + '(is_only_used_by_fadd)'
ffma_old = 'ffmaz_old' if mulz else 'ffma_old'
fadd = 'fadd@{}(contract)'.format(sz)
option_old = 'options->fuse_ffma{}'.format(sz)
option_avoid_abs = 'options->avoid_ternary_with_fabs'
option_old_with_abs = f'options->fuse_ffma{sz} && !{option_avoid_abs}'
option = f'options->float_mul_add{sz}'
option_avoid_abs = 'options->avoid_ternary_with_fabs'
option_has_fmad = f'({option} & nir_float_muladd_support_has_fmad)'
option_has_ffma = f'({option} & nir_float_muladd_support_has_ffma)'
option_prefer_split = f'({option} & nir_float_muladd_support_prefers_split)'
@ -3795,19 +3725,6 @@ for sz, mulz in itertools.product([16, 32, 64], [False, True]):
option_fmad = f'{option_fuse} && (!{option_has_ffma} || {option_prefer_split}) && {option_has_fmad}'
option_ffma = f'{option_fuse} && (!{option_has_fmad} || !{option_prefer_split}) && {option_has_ffma}'
late_optimizations.extend([
((fadd, (fmul, a, b), c), (ffma_old, a, b, c), option_old),
((fadd, ('fneg(is_only_used_by_fadd)', (fmul, a, b)), c),
(ffma_old, ('fneg', a), b, c), option_old),
((fadd, ('fabs(is_only_used_by_fadd)', (fmul, a, b)), c),
(ffma_old, ('fabs', a), ('fabs', b), c), option_old_with_abs),
((fadd, ('fneg(is_only_used_by_fadd)', ('fabs', (fmul, a, b))), c),
(ffma_old, ('fneg', ('fabs', a)), ('fabs', b), c), option_old_with_abs),
])
for fmad in ['ffma', 'fmad']:
option = option_fmad if fmad == 'fmad' else option_ffma
# contract is only needed for ffma
@ -3922,10 +3839,6 @@ late_optimizations.extend([
# A similar operation could apply to any ffma(#a, b, #(-a/2)), but this
# particular operation is common for expanding values stored in a texture
# from [0,1] to [-1,1].
(('~ffma_old@32', a, 2.0, -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'),
(('~ffma_old@32', a, -2.0, -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'),
(('~ffma_old@32', a, -2.0, 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'),
(('~ffma_old@32', a, 2.0, 1.0), ('flrp', 1.0, -1.0, ('fneg', a)), '!options->lower_flrp32'),
(('~ffma@32', a, 2.0, -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'),
(('~ffma@32', a, -2.0, -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'),
(('~ffma@32', a, -2.0, 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'),
@ -3957,10 +3870,6 @@ late_optimizations.extend([
# Option 5: a * (2 - a)
#
# There are a lot of other possible combinations.
(('~ffma_old@32', ('fadd', b, ('fneg', a)), a, a), ('flrp', a, b, a), '!options->lower_flrp32'),
(('~ffma_old@32', a, 2.0, ('fneg', ('fmul', a, a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
(('~ffma_old@32', a, 2.0, ('fmul', ('fneg', a), a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
(('~ffma_old@32', a, ('fneg', a), ('fmul', 2.0, a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
(('~ffma@32', ('fadd', b, ('fneg', a)), a, a), ('flrp', a, b, a), '!options->lower_flrp32'),
(('~ffma@32', a, 2.0, ('fneg', ('fmul', a, a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
(('~ffma@32', a, 2.0, ('fmul', ('fneg', a), a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
@ -3997,7 +3906,7 @@ late_optimizations.extend([
# optimization in these stages. See bugzilla #111490. In tessellation
# stages applications seem to use 'precise' when necessary, so allow the
# optimization in those stages.
for fmad in ['ffma_old', 'ffma', 'fmad']:
for fmad in ['ffma', 'fmad']:
late_optimizations.extend([
(('~fadd', (f'{fmad}(is_used_once)', a, b, (f'{fmad}(is_used_once)', c, d, (fmad, e, 'f', ('fmul(is_used_once)', 'g(is_not_const_and_not_fsign)', 'h(is_not_const_and_not_fsign)')))), 'i(is_not_const)'),
(fmad, a, b, (fmad, c, d, (fmad, e, 'f', (fmad, 'g', 'h', 'i')))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
@ -4009,7 +3918,7 @@ for fmad in ['ffma_old', 'ffma', 'fmad']:
(fmad, ('fneg', a), b, (fmad, ('fneg', c), d, (fmad, ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
])
for fmadz in ['ffmaz_old', 'ffmaz', 'fmadz']:
for fmadz in ['ffmaz', 'fmadz']:
late_optimizations.extend([
(('~fadd', (f'{fmadz}(is_used_once)', a, b, (fmadz, c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
(fmadz, a, b, (fmadz, c, d, (fmadz, e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
@ -4146,7 +4055,7 @@ for op in ['fadd']:
(('bcsel', a, (op, b, c), (op + '(is_used_once)', b, d)), (op, b, ('bcsel', a, c, d))),
]
for op in ['ffma_old', 'ffma', 'ffmaz_old', 'ffmaz']:
for op in ['ffma', 'ffmaz']:
late_optimizations += [
(('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, c, e)), (op, b, c, ('bcsel', a, d, e))),
(('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, c, e)), (op, b, c, ('bcsel', a, d, e))),
@ -4159,8 +4068,6 @@ for op in ['ffma_old', 'ffma', 'ffmaz_old', 'ffmaz']:
late_optimizations += [
(('fmulz@32', a, b),
('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), 0.0, ('fmul', a, b)), 'options->lower_fmulz_with_abs_min'),
(('ffmaz_old@32', a, b, c),
('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), c, ('ffma_old@32', a, b, c)), 'options->lower_fmulz_with_abs_min'),
(('ffmaz@32', a, b, c),
('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), c, ('ffma@32', a, b, c)), 'options->lower_fmulz_with_abs_min')
]
@ -4182,7 +4089,7 @@ for op in ['fadd', 'fdiv', 'fmax', 'fmin', 'fmod', 'fmul', 'fpow', 'frem']:
late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b))), (op, a, b), 'true', TestStatus.UNSUPPORTED)]
# Ternary opcodes
for op in ['ffma_old', 'ffma', 'flrp']:
for op in ['ffma', 'flrp']:
late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b), ('f2fmp', c))), (op, a, b, c), 'true', TestStatus.UNSUPPORTED)]
# Comparison opcodes
@ -4237,7 +4144,6 @@ late_optimizations += [
distribute_src_mods = [
# Try to remove some spurious negations rather than pushing them down.
(('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)),
(('ffma_old', ('fneg', a), ('fneg', b), c), ('ffma_old', a, b, c)),
(('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
(('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b)),
(('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b)),
@ -4249,7 +4155,6 @@ distribute_src_mods = [
(('fabs', ('fmul(is_used_once)', a, b)), ('fmul', ('fabs', a), ('fabs', b))),
(('fabs', ('fmul_rtz(is_used_once)', a, b)), ('fmul_rtz', ('fabs', a), ('fabs', b))),
(('fneg', ('ffma_old(is_used_once,nsz)', a, b, c)), ('ffma_old', ('fneg', a), b, ('fneg', c))),
(('fneg', ('ffma(is_used_once,nsz)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))),
(('fneg', ('flrp(is_used_once)', a, b, c)), ('flrp', ('fneg', a), ('fneg', b), c), 'true', TestStatus.XFAIL), # XFAIL is -flrp(0, -1, 0) is 0.0 instead of -0.0
(('fneg', ('fadd(is_used_once,nsz)', a, b)), ('fadd', ('fneg', a), ('fneg', b))),

View file

@ -177,13 +177,11 @@ opt_alu_fp_math_ctrl(nir_alu_instr *alu, struct opt_fp_ctrl_state *state)
case nir_op_ffmaz:
case nir_op_fmadz:
case nir_op_ffmaz_old:
src_mark_preserve_sz(&alu->src[2].src, NULL);
break;
case nir_op_ffma:
case nir_op_ffma_weak:
case nir_op_fmad:
case nir_op_ffma_old:
if ((nir_analyze_fp_class(&state->fp_class_state, alu->src[2].src.ssa) & FP_CLASS_NEG_ZERO) &&
!nir_alu_srcs_equal(alu, alu, 0, 1)) {
src_mark_preserve_sz(&alu->src[0].src, NULL);

View file

@ -3393,8 +3393,6 @@ can_move_alu_across_interp(struct linkage_info *linkage, nir_alu_instr *alu)
case nir_op_ffmaz:
case nir_op_fmad:
case nir_op_fmadz:
case nir_op_ffma_old:
case nir_op_ffmaz_old:
return GET_SRC_INTERP(alu, 0) == FLAG_INTERP_CONVERGENT ||
GET_SRC_INTERP(alu, 1) == FLAG_INTERP_CONVERGENT;

View file

@ -836,8 +836,6 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
push_fp_query(state, alu->src[0].src.ssa);
push_fp_query(state, alu->src[1].src.ssa);
return;
case nir_op_ffma_old:
case nir_op_ffmaz_old:
case nir_op_ffma:
case nir_op_ffma_weak:
case nir_op_ffmaz:
@ -1328,9 +1326,7 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
case nir_op_fmad:
case nir_op_fmadz:
case nir_op_ffma:
case nir_op_ffma_old:
case nir_op_ffmaz:
case nir_op_ffmaz_old: {
case nir_op_ffmaz: {
bool mulz = nir_alu_instr_is_mul_add_z(alu);
bool src_eq = nir_alu_srcs_equal(alu, alu, 0, 1);
bool src_neg_eq = !nir_src_is_const(alu->src[0].src) && nir_alu_srcs_negative_equal(alu, alu, 0, 1);

View file

@ -275,12 +275,6 @@ MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_float_muladd_support)
typedef struct nir_shader_compiler_options {
bool lower_fdiv;
bool lower_ffma16;
bool lower_ffma32;
bool lower_ffma64;
bool fuse_ffma16;
bool fuse_ffma32;
bool fuse_ffma64;
nir_float_muladd_support float_mul_add16;
nir_float_muladd_support float_mul_add32;
nir_float_muladd_support float_mul_add64;

View file

@ -191,8 +191,6 @@ DEFINE_TEST(ffma_weak, 3)
DEFINE_TEST(ffmaz, 3)
DEFINE_TEST(fmad, 3)
DEFINE_TEST(fmadz, 3)
DEFINE_TEST(ffma_old, 3)
DEFINE_TEST(ffmaz_old, 3)
DEFINE_TEST(fabs, 1)
DEFINE_TEST(fneg, 1)
DEFINE_TEST(fexp2, 1)

View file

@ -655,8 +655,6 @@ movable_across_interp(nir_builder *b, nir_op op, unsigned interp[3],
case nir_op_ffmaz:
case nir_op_fmad:
case nir_op_fmadz:
case nir_op_ffma_old:
case nir_op_ffmaz_old:
return !divergent[0] || !divergent[1];
case nir_op_fdiv:

View file

@ -75,8 +75,7 @@ TEST_F(nir_opt_varyings_test_bicm_binary_alu, \
/* TES uses fadd and fmul for interpolation, so it's always present. */ \
if (MESA_SHADER_##consumer_stage != MESA_SHADER_TESS_EVAL || \
(nir_op_##alu != nir_op_fadd && nir_op_##alu != nir_op_fmul && \
nir_op_##alu != nir_op_fmad && nir_op_##alu != nir_op_ffma && \
nir_op_##alu != nir_op_ffma_old)) { \
nir_op_##alu != nir_op_fmad && nir_op_##alu != nir_op_ffma)) { \
ASSERT_TRUE(!shader_contains_alu_op(b2, nir_op_##alu, bitsize)); \
} \
} \

View file

@ -24,12 +24,6 @@ static const struct spirv_to_nir_options spirv_options = {
};
struct nir_shader_compiler_options generic_opts = {
/* TODO: Do we want to set has_*? Will drivers be able to lower
* appropriately?
*/
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.float_mul_add16 = nir_float_muladd_support_keep_weak_ffma,
.float_mul_add32 = nir_float_muladd_support_keep_weak_ffma,
.float_mul_add64 = nir_float_muladd_support_keep_weak_ffma,

View file

@ -129,21 +129,6 @@ static const nir_shader_compiler_options ir3_base_options = {
.lower_usub_borrow = true,
.lower_mul_high = true,
.lower_mul_2x32_64 = true,
/* ir3's mad is an unfused mul-add instruction, so we need to flag fma
* lowering so that CL can implement fused fma in software. GLSL,
* SPIRV, and NIR don't require either fused or unfused behavior from
* fma, and we'll turn mul+adds back into nir_op_ffma (again, implemented
* as unfused) during nir_opt_algebraic_late() (assuming it's not
* decorated with GLSL's precise, or SPIRV's NoContraction), or
* ir3_nir_opt_algebraic_late (if it is, since ir3's unfused mul-add is
* precise).
*/
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.float_mul_add16 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
.float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
.float_mul_add64 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,

View file

@ -727,7 +727,6 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
dst = ir3_ADD_F_rpt(b, dst_sz, src[0], 0, src[1], IR3_REG_FNEG);
break;
case nir_op_fmad:
case nir_op_ffma_old:
/* The scalar ALU doesn't support mad, so expand to mul+add so that we
* don't unnecessarily fall back to non-earlypreamble. This is safe
* because at least on a6xx+ mad is unfused.

View file

@ -3338,7 +3338,6 @@ do_alu_action(struct lp_build_nir_soa_context *bld,
result = lp_build_floor(float_bld, src[0]);
break;
case nir_op_ffma_weak:
case nir_op_ffma_old:
result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
break;
case nir_op_ffract: {

View file

@ -1573,7 +1573,6 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
/* This is fine as long as drivers implement TGSI MAD as fmad */
[nir_op_fmad] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD },
[nir_op_ffma_weak] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD },
[nir_op_ffma_old] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD },
[nir_op_ldexp] = { TGSI_OPCODE_LDEXP, 0 },
};
@ -4068,8 +4067,6 @@ const void *nir_to_tgsi_options(struct nir_shader *s,
const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
.fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,

View file

@ -42,9 +42,6 @@ etna_compiler_create(const char *renderer, const struct etna_core_info *info)
.lower_fpow = true,
.lower_fround_even = true,
.lower_ftrunc = true,
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.float_mul_add16 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
.float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
.float_mul_add64 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,

View file

@ -50,7 +50,7 @@ static const struct etna_op_info etna_ops[] = {
#define IOP(nir, op) IOPC(nir, op, TRUE)
#define UOP(nir, op) UOPC(nir, op, TRUE)
OP(mov, MOV), OP(fneg, MOV), OP(fabs, MOV), OP(fsat, MOV),
OP(fmul, MUL), OP(fadd, ADD), OP(ffma_old, MAD), OP(fmad, MAD),
OP(fmul, MUL), OP(fadd, ADD), OP(fmad, MAD),
OP(fdot2, DP2), OP(fdot3, DP3), OP(fdot4, DP4),
OPC(fmin, SELECT, GT), OPC(fmax, SELECT, LT),
OP(ffract, FRC), OP(frcp, RCP), OP(frsq, RSQ),

View file

@ -19,9 +19,6 @@ static const nir_shader_compiler_options options = {
.lower_fmod = true,
.lower_fdiv = true,
.lower_fceil = true,
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.float_mul_add16 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
.float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
.float_mul_add64 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
@ -313,7 +310,6 @@ instr_create_alu(struct ir2_context *ctx, nir_op opcode, unsigned ncomp)
[nir_op_fsub] = {ADDs, ADDv},
[nir_op_fmul] = {MULs, MULv},
[nir_op_fmad] = {-1, MULADDv},
[nir_op_ffma_old] = {-1, MULADDv},
[nir_op_fmax] = {MAXs, MAXv},
[nir_op_fmin] = {MINs, MINv},
[nir_op_ffloor] = {FLOORs, FLOORv},

View file

@ -109,7 +109,6 @@ i915_get_name(struct pipe_screen *screen)
static const nir_shader_compiler_options i915_compiler_options = {
.fdot_replicates = true,
.fuse_ffma32 = true,
.float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
.lower_bitops = true, /* required for !CAP_INTEGERS nir_to_tgsi */
.lower_extract_byte = true,
@ -137,9 +136,6 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_bitfield_insert = true,
.lower_bitfield_extract = true,
.lower_fdph = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
.lower_fmod = true,
.lower_hadd = true,
.lower_uadd_sat = true,

View file

@ -204,7 +204,6 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = {
[nir_op_fsat] = ppir_op_sat,
[nir_op_fclamp_pos] = ppir_op_clamp_pos,
[nir_op_fmad] = ppir_op_fmad,
[nir_op_ffma_old] = ppir_op_fmad,
};
static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)

View file

@ -43,9 +43,6 @@
#include "ir/lima_ir.h"
static const nir_shader_compiler_options vs_nir_options = {
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
.lower_fpow = true,
.lower_ffract = true,
.lower_fdiv = true,
@ -68,9 +65,6 @@ static const nir_shader_compiler_options vs_nir_options = {
};
static const nir_shader_compiler_options fs_nir_options = {
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.float_mul_add16 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
.float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
.float_mul_add64 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,

View file

@ -488,9 +488,6 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_bitfield_extract16 = true,
.lower_bitfield_extract = true,
.lower_fdph = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
.float_mul_add16 = nir_float_muladd_support_keep_weak_ffma,
.float_mul_add32 = nir_float_muladd_support_keep_weak_ffma,
.float_mul_add64 = nir_float_muladd_support_keep_weak_ffma,

View file

@ -471,12 +471,6 @@ Converter::getOperation(nir_op op)
case nir_op_ffma_weak:
assert(info->target < 0xc0);
return OP_MAD;
case nir_op_ffma_old:
case nir_op_ffmaz_old:
/* No FMA op pre-nvc0 */
if (info->target < 0xc0)
return OP_MAD;
return OP_FMA;
case nir_op_flog2:
return OP_LG2;
case nir_op_fmax:
@ -2622,8 +2616,6 @@ Converter::visit(nir_alu_instr *insn)
case nir_op_ffloor:
case nir_op_ffma:
case nir_op_ffmaz:
case nir_op_ffma_old:
case nir_op_ffmaz_old:
case nir_op_ffma_weak:
case nir_op_flog2:
case nir_op_fmax:
@ -2680,12 +2672,10 @@ Converter::visit(nir_alu_instr *insn)
case nir_op_fmul:
case nir_op_ffma:
case nir_op_ffma_weak:
case nir_op_ffma_old:
i->dnz = this->info->io.mul_zero_wins;
break;
case nir_op_fmulz:
case nir_op_ffmaz:
case nir_op_ffmaz_old:
i->dnz = true;
break;
default:
@ -3568,12 +3558,6 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
{
nir_shader_compiler_options op = {};
op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
op.lower_ffma16 = false;
op.lower_ffma32 = false;
op.lower_ffma64 = false;
op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */
op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */
op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */
if (chipset >= NVISA_GF100_CHIPSET) {
op.float_mul_add32 = nir_float_muladd_support_has_ffma;
op.float_mul_add64 = nir_float_muladd_support_has_ffma;

View file

@ -335,8 +335,6 @@ nv30_screen_is_format_supported(struct pipe_screen *pscreen,
}
static const nir_shader_compiler_options nv30_base_compiler_options = {
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
.lower_bitops = true,
.lower_extract_byte = true,

View file

@ -820,7 +820,6 @@ ntr_emit_alu(struct ntr_compile *c, nir_alu_instr *instr)
[nir_op_fmin] = TGSI_OPCODE_MIN,
[nir_op_fmax] = TGSI_OPCODE_MAX,
[nir_op_fmad] = TGSI_OPCODE_MAD,
[nir_op_ffma_old] = TGSI_OPCODE_MAD,
};
if (instr->op < ARRAY_SIZE(op_map) && op_map[instr->op] > 0) {

View file

@ -105,8 +105,6 @@ static struct disk_cache* r300_get_disk_shader_cache(struct pipe_screen* pscreen
#define COMMON_NIR_OPTIONS \
.fdot_replicates = true, \
.fuse_ffma32 = true, \
.fuse_ffma64 = true, \
.float_mul_add32 = \
nir_float_muladd_support_has_fmad | \
nir_float_muladd_support_fuse, \

View file

@ -1084,9 +1084,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
}
const struct nir_shader_compiler_options nir_options = {
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.float_mul_add16 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
.float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse,
.float_mul_add64 = nir_float_muladd_support_has_ffma | nir_float_muladd_support_fuse,

View file

@ -1640,7 +1640,6 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
case nir_op_fneu32:
return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false);
case nir_op_ffma:
case nir_op_ffma_old:
return emit_alu_fma_64bit(*alu, op3_fma_64, shader);
case nir_op_fadd:
@ -1958,12 +1957,10 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
return emit_unpack_64_2x32_split(*alu, 1, shader);
case nir_op_fmad:
case nir_op_ffma_old:
if (!shader.has_flag(Shader::sh_legacy_math_rules))
return emit_alu_op3(*alu, op3_muladd_ieee, shader);
FALLTHROUGH;
case nir_op_fmadz:
case nir_op_ffmaz_old:
return emit_alu_op3(*alu, op3_muladd, shader);
case nir_op_mov:

View file

@ -1034,7 +1034,6 @@ Lower64BitToVec2::support_fp64_op(nir_op op) const
case nir_op_flt32:
case nir_op_fneu32:
case nir_op_ffma:
case nir_op_ffma_old:
case nir_op_fadd:
case nir_op_fmul:
case nir_op_fmax:

View file

@ -323,12 +323,6 @@ static void si_init_screen_nir_options(struct si_screen *sscreen)
ac_nir_set_options(&sscreen->info.compiler_info, !sscreen->use_aco, options);
options->ignore_none_interpolation_in_sysval_gathering = true;
options->lower_ffma16 = sscreen->info.gfx_level < GFX9;
options->lower_ffma32 = !use_fma32;
options->lower_ffma64 = false;
options->fuse_ffma16 = sscreen->info.gfx_level >= GFX9;
options->fuse_ffma32 = use_fma32;
options->fuse_ffma64 = true;
bool use_fma32 = !(options->float_mul_add32 & nir_float_muladd_support_prefers_split) ||
(sscreen->info.gfx_level >= GFX9 && sscreen->options.force_use_fma32);

View file

@ -76,8 +76,6 @@ softpipe_get_name(struct pipe_screen *screen)
static const nir_shader_compiler_options sp_compiler_options = {
.fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.float_mul_add32 =
nir_float_muladd_support_has_fmad |
nir_float_muladd_support_fuse,

View file

@ -582,9 +582,6 @@ v3d_screen_get_compiler_options(struct pipe_screen *pscreen,
.lower_unpack_32_2x16_split = true,
.lower_fdiv = true,
.lower_find_lsb = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
.lower_flrp32 = true,
.lower_fpow = true,
.lower_fsqrt = true,

View file

@ -2135,9 +2135,6 @@ static const nir_shader_compiler_options nir_options = {
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_fdiv = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
.lower_flrp32 = true,
.lower_fmod = true,
.lower_fpow = true,

View file

@ -1061,8 +1061,6 @@ virgl_create_screen(struct virgl_winsys *vws, const struct pipe_screen_config *c
nir_float_muladd_support_fuse;
}
screen->compiler_options.no_integers = screen->caps.caps.v1.glsl_level < 130;
screen->compiler_options.lower_ffma32 = true;
screen->compiler_options.fuse_ffma32 = false;
screen->compiler_options.lower_image_offset_to_range_base = true;
screen->compiler_options.lower_atomic_offset_to_range_base = true;
screen->compiler_options.support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL);

View file

@ -1266,9 +1266,6 @@ zink_screen_init_compiler(struct zink_screen *screen)
static const struct nir_shader_compiler_options
default_options = {
.io_options = nir_io_has_intrinsics | nir_io_mediump_is_32bit,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
.lower_scmp = true,
.lower_fdph = true,
.lower_flrp32 = true,
@ -1317,7 +1314,6 @@ zink_screen_init_compiler(struct zink_screen *screen)
if (!screen->info.feats.features.shaderFloat64) {
screen->nir_options.lower_doubles_options = ~0;
screen->nir_options.lower_flrp64 = true;
screen->nir_options.lower_ffma64 = true;
/* soft fp64 function inlining will blow up loop bodies and effectively
* stop Vulkan drivers from unrolling the loops.
*/

View file

@ -39,7 +39,6 @@ static const struct spirv_to_nir_options spirv_options = {
/** NIR options. */
static const nir_shader_compiler_options nir_options = {
.discard_is_demote = true,
.fuse_ffma32 = true,
.float_mul_add32 = nir_float_muladd_support_has_ffma |
nir_float_muladd_support_fuse,

View file

@ -3178,7 +3178,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
break;
case nir_op_ffma:
case nir_op_ffma_old:
instr = pco_fmad(&tctx->b, dest, src[0], src[1], src[2]);
break;

View file

@ -1647,7 +1647,6 @@ brw_from_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr,
break;
case nir_op_ffma:
case nir_op_ffma_old:
if (nir_has_any_rounding_mode_enabled(execution_mode)) {
brw_rnd_mode rnd =
brw_rnd_mode_from_execution_mode(execution_mode);

View file

@ -86,9 +86,6 @@ elk_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
/* Prior to Gfx6, there are no three source operations, and Gfx11 loses
* LRP.
*/
nir_options->lower_ffma16 = devinfo->ver < 6;
nir_options->lower_ffma32 = devinfo->ver < 6;
nir_options->lower_ffma64 = devinfo->ver < 6;
nir_options->lower_flrp32 = devinfo->ver < 6;
if (devinfo->ver >= 6) {

View file

@ -1707,7 +1707,6 @@ fs_nir_emit_alu(nir_to_elk_state &ntb, nir_alu_instr *instr,
break;
case nir_op_ffma:
case nir_op_ffma_old:
if (nir_has_any_rounding_mode_enabled(execution_mode)) {
elk_rnd_mode rnd =
elk_rnd_mode_from_execution_mode(execution_mode);

View file

@ -1671,7 +1671,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_ffma:
case nir_op_ffma_old:
if (type_sz(dst.type) == 8) {
dst_reg mul_dst = dst_reg(this, glsl_dvec4_type());
emit(MUL(mul_dst, op[1], op[0]));

View file

@ -513,7 +513,6 @@ jay_emit_alu(struct nir_to_jay_state *nj, nir_alu_instr *alu)
break;
case nir_op_ffma:
case nir_op_ffma_old:
jay_MAD(b, type, dst, src[0], src[1], src[2]);
break;

View file

@ -375,7 +375,6 @@ alu_to_msl(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr)
alu_funclike(ctx, instr, "floor");
break;
case nir_op_ffma:
case nir_op_ffma_old:
alu_funclike(ctx, instr, "fma");
break;
case nir_op_ffract:

View file

@ -620,9 +620,6 @@ static const struct nir_shader_compiler_options draw_nir_options = {
.lower_bitfield_insert = true,
.lower_bitfield_extract = true,
.lower_fdph = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
.lower_flrp16 = true,
.lower_fmod = true,
.lower_hadd = true,

View file

@ -92,8 +92,6 @@ nir_options = {
.compact_arrays = true,
.lower_ineg = true,
.lower_fneg = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
.float_mul_add64 = nir_float_muladd_support_has_ffma,
.lower_isign = true,
.lower_fsign = true,
@ -2999,7 +2997,6 @@ emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
case nir_op_ffma:
case nir_op_ffma_old:
if (alu->def.bit_size == 64)
ctx->mod.feats.dx11_1_double_extensions = 1;
return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);

View file

@ -113,9 +113,6 @@ pub extern "C" fn nak_debug_no_ugpr() -> bool {
fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
nir_shader_compiler_options {
lower_fdiv: true,
fuse_ffma16: true,
fuse_ffma32: true,
fuse_ffma64: true,
float_mul_add16: nir_float_muladd_support_has_ffma
| nir_float_muladd_support_fuse,
float_mul_add32: nir_float_muladd_support_has_ffma

View file

@ -1056,7 +1056,7 @@ impl<'a> ShaderFromNir<'a> {
b.fexp2(srcs(0)).into()
}
}
nir_op_ffma | nir_op_ffma_old => {
nir_op_ffma => {
let ftype = FloatType::from_bits(alu.def.bit_size().into());
let dst;
if alu.def.bit_size() == 64 {
@ -1102,7 +1102,7 @@ impl<'a> ShaderFromNir<'a> {
}
dst
}
nir_op_ffmaz | nir_op_ffmaz_old => {
nir_op_ffmaz => {
assert!(alu.def.bit_size() == 32);
// DNZ implies FTZ so we need FTZ set or this is invalid
assert!(self.float_ctl.fp32.ftz);

View file

@ -117,7 +117,6 @@ vectorize_filter_cb(const nir_instr *instr, const void *data)
case nir_op_fmul:
case nir_op_fmul_rtz:
case nir_op_ffma:
case nir_op_ffma_old:
case nir_op_fsign:
case nir_op_fsat:
case nir_op_fmax:
@ -271,8 +270,6 @@ lower_bit_size_cb(const nir_instr *instr, void *data)
case nir_op_fmul_rtz:
case nir_op_ffma:
case nir_op_ffmaz:
case nir_op_ffma_old:
case nir_op_ffmaz_old:
case nir_op_fsign:
case nir_op_fsat:
case nir_op_fceil:

View file

@ -2976,7 +2976,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
switch (instr->op) {
case nir_op_ffma:
case nir_op_ffma_old:
bi_fma_to(b, sz, dst, s0, s1, s2);
break;

View file

@ -125,9 +125,6 @@ bool valhall_can_merge_workgroups(nir_shader *nir);
.has_ldexp = true, \
.has_isub = true, \
.vectorize_vec2_16bit = true, \
.fuse_ffma16 = true, \
.fuse_ffma32 = true, \
.fuse_ffma64 = true, \
.float_mul_add16 = nir_float_muladd_support_has_ffma | \
nir_float_muladd_support_fuse, \
.float_mul_add32 = nir_float_muladd_support_has_ffma | \

View file

@ -27,9 +27,6 @@ void midgard_compile_shader_nir(nir_shader *nir,
* solution. */
static const nir_shader_compiler_options midgard_nir_options = {
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
.lower_scmp = true,
.lower_flrp16 = true,
.lower_flrp32 = true,