diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c index 5134e23d5c5..d4640969617 100644 --- a/src/amd/common/nir/ac_nir.c +++ b/src/amd/common/nir/ac_nir.c @@ -985,7 +985,6 @@ ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu) case nir_op_fsub: case nir_op_fmul: case nir_op_ffma: - case nir_op_ffma_old: case nir_op_fdiv: case nir_op_flrp: case nir_op_fabs: diff --git a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp index 63a20cdd67a..145ab41bbd2 100644 --- a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp +++ b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp @@ -451,7 +451,6 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_op_e5m22f: case nir_op_fmulz: case nir_op_ffmaz: - case nir_op_ffmaz_old: case nir_op_f2f64: case nir_op_u2f64: case nir_op_i2f64: @@ -487,7 +486,6 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_op_f2f16_rd: type = RegType::vgpr; break; case nir_op_fmul: case nir_op_ffma: - case nir_op_ffma_old: case nir_op_fadd: case nir_op_fsub: case nir_op_fmax: diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp index 8195f0fc3ec..9aaa64a3f7f 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp @@ -1925,8 +1925,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } break; } - case nir_op_ffma: - case nir_op_ffma_old: { + case nir_op_ffma: { if (dst.regClass() == v2b) { emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f16, dst, false, 3); } else if (dst.regClass() == v1 && instr->def.bit_size == 16) { @@ -1962,8 +1961,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } break; } - case nir_op_ffmaz: - case nir_op_ffmaz_old: { + case nir_op_ffmaz: { if (dst.regClass() == v1) { emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_legacy_f32, dst, ctx->block->fp_mode.must_flush_denorms32, 3); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 4e19cb4bb41..5492b7b1785 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -760,13 +760,11 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) } break; case nir_op_ffma: - case nir_op_ffma_old: /* FMA is slow on gfx6-8, so it shouldn't be used. */ assert(instr->def.bit_size != 32 || ctx->ac.gfx_level >= GFX9); result = emit_fp_intrinsic(&ctx->ac, "llvm.fma", def_type, src[0], src[1], src[2]); break; case nir_op_ffmaz: - case nir_op_ffmaz_old: assert(ctx->ac.gfx_level >= GFX10_3); src[0] = ac_to_float(&ctx->ac, src[0]); src[1] = ac_to_float(&ctx->ac, src[1]); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 5fef711a5c1..a0893ba150f 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -60,9 +60,6 @@ get_nir_options_for_stage(struct radv_compiler_info *compiler_info, mesa_shader_ ac_nir_set_options(compiler_info->ac, compiler_info->key.use_llvm, options); - options->lower_ffma16 = split_fma || compiler_info->ac->gfx_level < GFX9; - options->lower_ffma32 = split_fma || compiler_info->ac->gfx_level < GFX10_3; - options->lower_ffma64 = split_fma; if (split_fma) { options->float_mul_add16 |= nir_float_muladd_support_prefers_split; options->float_mul_add32 |= nir_float_muladd_support_prefers_split; diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 62eb16c3b13..8070e610700 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -1907,7 +1907,6 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr) return agx_fmul_to(b, dst, s0, s1); case nir_op_ffma: - case nir_op_ffma_old: if (instr->def.bit_size == 16) return agx_hfma_to(b, dst, s0, s1, s2); else diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index ff4cf05b00f..ac56fb9bc98 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -348,8 +348,6 @@ agx_round_registers(unsigned halfregs) static const nir_shader_compiler_options agx_nir_options = { .lower_fdiv = true, - .fuse_ffma16 = true, - .fuse_ffma32 = true, .float_mul_add16 = nir_float_muladd_support_has_ffma | nir_float_muladd_support_fuse, .float_mul_add32 = nir_float_muladd_support_has_ffma | nir_float_muladd_support_fuse, .lower_flrp16 = true, diff --git a/src/asahi/compiler/agx_nir_opt_preamble.c b/src/asahi/compiler/agx_nir_opt_preamble.c index 85a20df4c64..fe6c07bc6aa 100644 --- a/src/asahi/compiler/agx_nir_opt_preamble.c +++ b/src/asahi/compiler/agx_nir_opt_preamble.c @@ -90,7 +90,6 @@ alu_cost(nir_alu_instr *alu) case nir_op_fadd: case nir_op_fmul: case nir_op_ffma: - case nir_op_ffma_old: case nir_op_iadd: case nir_op_inot: case nir_op_iand: diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index ac7a17d3cbc..14894811b00 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -192,9 +192,6 @@ v3dv_pipeline_get_nir_options(const struct v3d_device_info *devinfo) .lower_mul_2x32_64 = true, .lower_fdiv = true, .lower_find_lsb = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_flrp32 = true, .lower_fpow = true, .lower_fsqrt = true, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 97372e9a6d2..e9747beea30 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1722,7 +1722,6 @@ nir_alu_instr_is_mul_add(const nir_alu_instr *instr) case nir_op_ffma: case nir_op_ffma_weak: case nir_op_fmad: - case nir_op_ffma_old: return true; default: return false; @@ -1738,7 +1737,6 @@ nir_alu_instr_is_mul_add_z(const nir_alu_instr *instr) switch (instr->op) { case nir_op_ffmaz: case nir_op_fmadz: - case nir_op_ffmaz_old: return true; default: return false; diff --git a/src/compiler/nir/nir_builtin_builder.h b/src/compiler/nir/nir_builtin_builder.h index ab8a71e2c2f..48d4c5bc22d 100644 --- a/src/compiler/nir/nir_builtin_builder.h +++ b/src/compiler/nir/nir_builtin_builder.h @@ -193,12 +193,6 @@ nir_fast_normalize(nir_builder *b, nir_def *vec) return nir_fdiv(b, vec, nir_fast_length(b, vec)); } -static inline nir_def * -nir_fmad_old(nir_builder *b, nir_def *x, nir_def *y, nir_def *z) -{ - return nir_fadd(b, nir_fmul(b, x, y), z); -} - static inline nir_def * nir_maxmag(nir_builder *b, nir_def *x, nir_def *y) { diff --git a/src/compiler/nir/nir_lower_clip_halfz.c b/src/compiler/nir/nir_lower_clip_halfz.c index 6932fb905fd..75420f62aa6 100644 --- a/src/compiler/nir/nir_lower_clip_halfz.c +++ b/src/compiler/nir/nir_lower_clip_halfz.c @@ -85,7 +85,7 @@ lower_pos_write_dynamic(nir_builder *b, nir_intrinsic_instr *intr, nir_def *c = nir_load_clip_z_coeff(b); /* Lerp. If c = 0, reduces to z. If c = 1/2, reduces to (z + w)/2 */ - nir_def *new_z = nir_ffma_old(b, nir_fneg(b, z), c, nir_ffma_old(b, w, c, z)); + nir_def *new_z = nir_ffma_weak(b, nir_fneg(b, z), c, nir_ffma_weak(b, w, c, z)); nir_src_rewrite(&intr->src[0], nir_vector_insert_imm(b, pos, new_z, 2)); return true; } diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index f204476f289..4a0031db653 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -671,7 +671,6 @@ lower_doubles_instr_to_soft(nir_builder *b, nir_alu_instr *instr, mangled_name = "__fmul64(u641;u641;"; break; case nir_op_fmad: - case nir_op_ffma_old: name = "__fmad64"; mangled_name = "__fmad64(u641;u641;u641;"; break; diff --git a/src/compiler/nir/nir_lower_floats.c b/src/compiler/nir/nir_lower_floats.c index e1851eab220..3af22d8eb06 100644 --- a/src/compiler/nir/nir_lower_floats.c +++ b/src/compiler/nir/nir_lower_floats.c @@ -75,7 +75,6 @@ lower_float_instr_to_soft(nir_builder *b, nir_instr *instr, mangled_name = "__fmul32(u1;u1;"; break; case nir_op_fmad: - case nir_op_ffma_old: mangled_name = "__fmad32(u1;u1;u1;"; break; case nir_op_fsat: diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 91f03d0217e..13b4e6ad074 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1192,22 +1192,6 @@ consistent not even within the same shader. This is like GLSLs ``ffma``. """) -triop("ffma_old", tfloat, _2src_commutative, """ -if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { - if (bit_size == 64) - dst = _mesa_double_fma_rtz(src0, src1, src2); - else if (bit_size == 32) - dst = _mesa_float_fma_rtz(src0, src1, src2); - else - dst = _mesa_double_to_float_rtz(_mesa_double_fma_rtz(src0, src1, src2)); -} else { - if (bit_size == 32) - dst = fmaf(src0, src1, src2); - else - dst = fma(src0, src1, src2); -} -""") - triop("fmadz", tfloat32, _2src_commutative, """ if (src0 == 0.0 || src1 == 0.0) { dst = 0.0 + src2; @@ -1246,21 +1230,6 @@ Unlike :nir:alu-op:`ffma`, anything (even infinity or NaN) multiplied by +/-0.0 ``+0.0 + src2``. """) -triop("ffmaz_old", tfloat32, _2src_commutative, """ -if (src0 == 0.0 || src1 == 0.0) - dst = 0.0 + src2; -else if (nir_is_rounding_mode_rtz(execution_mode, 32)) - dst = _mesa_float_fma_rtz(src0, src1, src2); -else - dst = fmaf(src0, src1, src2); -""", description = """ -Floating-point multiply-add with modified zero handling. - -Unlike :nir:alu-op:`ffma_old`, anything (even infinity or NaN) multiplied by +/-0.0 is -+0.0. ``ffmaz_old(0.0, inf, src2)`` and ``ffmaz_old(0.0, nan, src2)`` must be -``+0.0 + src2``. -""") - triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2") triop("iadd3", tint, _2src_commutative, "(uint64_t)src0 + (uint64_t)src1 + (uint64_t)src2", diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 0d4103c40e2..cd7cf7b4164 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -241,15 +241,6 @@ optimizations += [ (('usadd_4x8_vc4', a, ~0), ~0), (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))), (('~fadd', ('fmulz', a, b), ('fmulz', a, c)), ('fmulz', a, ('fadd', b, c))), - (('~ffma_old', a, b, ('ffma_old(is_used_once)', a, c, d)), ('ffma_old', a, ('fadd', b, c), d)), - (('~ffma_old', a, b, ('fmul(is_used_once)', a, c)), ('fmul', a, ('fadd', b, c))), - (('~fadd', ('fmul(is_used_once)', a, b), ('ffma_old(is_used_once)', a, c, d)), ('ffma_old', a, ('fadd', b, c), d)), - (('~ffma_old', a, ('fmul(is_used_once)', b, c), ('fmul(is_used_once)', b, d)), ('fmul', b, ('ffma_old', a, c, d))), - (('~ffmaz_old', a, b, ('ffmaz_old(is_used_once)', a, c, d)), ('ffmaz_old', a, ('fadd', b, c), d)), - (('~ffmaz_old', a, b, ('fmulz(is_used_once)', a, c)), ('fmulz', a, ('fadd', b, c))), - (('~fadd', ('fmulz(is_used_once)', a, b), ('ffmaz_old(is_used_once)', a, c, d)), ('ffmaz_old', a, ('fadd', b, c), d)), - (('~fadd', ('fmulz(is_used_once)', a, b), ('ffmaz(is_used_once)', a, c, d)), ('ffmaz', a, ('fadd', b, c), d)), - (('~ffmaz_old', a, ('fmulz(is_used_once)', b, c), ('fmulz(is_used_once)', b, d)), ('fmulz', b, ('ffmaz_old', a, c, d))), (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))), (('iadd', ('ishl', b, a), ('ishl', c, a)), ('ishl', ('iadd', b, c), a)), (('iand', ('iand', a, b), ('iand(is_used_once)', a, c)), ('iand', ('iand', a, b), c)), @@ -286,11 +277,8 @@ optimizations += [ (('fmulz(nsz)', a, 'b(is_finite_not_zero)'), ('fmul', a, b)), (('fmulz(nsz)', 'a(is_finite)', 'b(is_finite)'), ('fmul', a, b)), (('fmulz', a, a), ('fmul', a, a)), - (('ffmaz_old(nsz)', a, 'b(is_finite_not_zero)', c), ('ffma_old', a, b, c)), (('ffmaz(nsz)', a, 'b(is_finite_not_zero)', c), ('ffma', a, b, c)), - (('ffmaz_old', 'a(is_finite)', 'b(is_finite)', c), ('ffma_old', a, b, c)), (('ffmaz', 'a(is_finite)', 'b(is_finite)', c), ('ffma', a, b, c)), - (('ffmaz_old', a, a, b), ('ffma_old', a, a, b)), (('ffmaz', a, a, b), ('ffma', a, a, b)), (('imul', a, 0), 0), (('imul24_relaxed', a, 0), 0), @@ -310,32 +298,18 @@ optimizations += [ # If a != a: fsign(a)*a*a => 0*NaN*NaN => abs(NaN)*NaN (('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)), (('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)), - (('ffma_old(nsz,nnan)', 0.0, a, b), ('fcanonicalize', b)), (('ffma(nsz,nnan)', 0.0, a, b), ('fcanonicalize', b)), - (('ffma_old(nsz,nnan)', -0.0, a, b), ('fcanonicalize', b)), (('ffma(nsz,nnan)', -0.0, a, b), ('fcanonicalize', b)), - (('ffmaz_old', 0.0, a, b), ('fadd', 0.0, b)), (('ffmaz', 0.0, a, b), ('fadd', 0.0, b)), - (('ffmaz_old', -0.0, a, b), ('fadd', 0.0, b)), (('ffmaz', -0.0, a, b), ('fadd', 0.0, b)), - (('ffma_old(nsz)', a, b, 0.0), ('fmul', a, b)), (('ffma(nsz)', a, b, 0.0), ('fmul', a, b)), - (('ffmaz_old(nsz)', a, b, 0.0), ('fmulz', a, b)), (('ffmaz(nsz)', a, b, 0.0), ('fmulz', a, b)), - (('ffma_old', a, b, -0.0), ('fmul', a, b)), (('ffma', a, b, -0.0), ('fmul', a, b)), - (('ffmaz_old', a, b, -0.0), ('fmulz', a, b)), (('ffmaz', a, b, -0.0), ('fmulz', a, b)), - (('ffma_old', 1.0, a, b), ('fadd', a, b)), (('ffma', 1.0, a, b), ('fadd', a, b)), - (('ffmaz_old(nsz)', 1.0, a, b), ('fadd', a, b)), (('ffmaz(nsz)', 1.0, a, b), ('fadd', a, b)), - (('ffma_old', -1.0, a, b), ('fadd', ('fneg', a), b)), (('ffma', -1.0, a, b), ('fadd', ('fneg', a), b)), - (('ffmaz_old(nsz)', -1.0, a, b), ('fadd', ('fneg', a), b)), (('ffmaz(nsz)', -1.0, a, b), ('fadd', ('fneg', a), b)), - (('~ffma_old', '#a', '#b', c), ('fadd', ('fmul', a, b), c)), - (('~ffmaz_old', '#a', '#b', c), ('fadd', ('fmulz', a, b), c)), (('flrp(nnan,nsz)', a, b, 0.0), ('fcanonicalize', a)), (('flrp(nnan,nsz)', a, b, -0.0), ('fcanonicalize', a)), (('flrp(nnan,nsz)', a, b, 1.0), ('fcanonicalize', b)), @@ -413,20 +387,12 @@ optimizations += [ ('fmulz', 'ma', b), has_fmulz), {'ma' : a}), # ffma(b==0.0 ? 0.0 : a, a==0.0 ? 0.0 : b, c) -> ffmaz(a, b, c) - *add_fabs_fneg((('ffma_old@32(nsz)', ('bcsel', ('feq', b, 0.0), 0.0, 'ma'), ('bcsel', ('feq', a, 0.0), 0.0, 'mb'), c), - ('ffmaz_old', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}), *add_fabs_fneg((('ffma@32(nsz)', ('bcsel', ('feq', b, 0.0), 0.0, 'ma'), ('bcsel', ('feq', a, 0.0), 0.0, 'mb'), c), ('ffmaz', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}), - *add_fabs_fneg((('ffma_old@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c), - ('ffmaz_old', 'ma', b, c), has_fmulz), {'ma' : a}), *add_fabs_fneg((('ffma@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c), ('ffmaz', 'ma', b, c), has_fmulz), {'ma' : a}), - *add_fabs_fneg((('ffma_old@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c), - ('ffmaz_old', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}), *add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c), ('ffmaz', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}), - *add_fabs_fneg((('ffma_old@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c), - ('ffmaz_old', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}), *add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c), ('ffmaz', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}), @@ -546,12 +512,6 @@ optimizations.extend([ (('~fadd', ('fmul', a, ('b2f', ('inot', 'c@1'))), ('fmul', b, ('b2f', c))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), (('~fadd', a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), - (('~ffma_old', a, ('b2f', ('inot', 'c@1')), ('fmul', b, ('b2f', 'c@1'))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), - (('~ffma_old', b, ('b2f', 'c@1'), ('ffma_old', ('fneg', a), ('b2f', 'c@1'), a)), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), - - (('~ffma_old', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)), a), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), - (('~ffma_old', ('b2f', 'c@1'), ('ffma_old', ('fneg', a), b, d), ('fmul', a, b)), ('bcsel', c, ('fcanonicalize', d), ('fmul', a, b))), - (('~flrp', ('fmul(is_used_once)', a, b), ('fmul(is_used_once)', a, c), d), ('fmul', ('flrp', b, c, d), a)), (('~flrp', a, 0.0, c), ('fadd', ('fmul', ('fneg', a), c), a)), @@ -586,15 +546,6 @@ optimizations.extend([ (('fadd@32', a, ('fneg(is_used_once)', ('ffloor(is_used_once)', a))), ('ffract', a), '!options->lower_ffract'), (('fadd@64', a, ('fneg(is_used_once)', ('ffloor(is_used_once)', a))), ('ffract', a), '!options->lower_ffract && !(options->lower_doubles_options & nir_lower_dfract)'), (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'), - (('ffma_old@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'), - (('ffma_old@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'), - (('ffma_old@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'), - (('ffmaz_old', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->lower_ffma32'), - # Always lower inexact ffma_old, because it will be fused back by late optimizations (nir_opt_algebraic_late). - (('ffma_old@16(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'), - (('ffma_old@32(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'), - (('ffma_old@64(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'), - (('ffmaz_old(contract)', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->fuse_ffma32'), (('fmul', ('fadd', ('bcsel', a, ('fmul', b, c), 0), '#d'), '#e'), ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', ('fadd', d, 0.0), e))), @@ -1659,7 +1610,6 @@ for compare in [('fneu', a, 0.0), ('inot', ('feq', a, 0.0))]: optimizations.extend([ (('fmul', search_b2f, search_mod), replace_mod_mul), - (('ffma_old', search_b2f, search_mod, b), ('fadd', replace_mod, b)), (('ffma', search_b2f, search_mod, b), ('fadd', replace_mod, b)), ]) @@ -1688,7 +1638,6 @@ optimizations.extend([ (('iand', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('iand', a, b))), (('ior', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('ior', a, b))), (('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), ('b2f', ('iand', a, b))), - (('ffma_old', ('b2f', 'a@1'), ('b2f', 'b@1'), c), ('fadd', ('b2f', ('iand', a, b)), c)), (('ffma', ('b2f', 'a@1'), ('b2f', 'b@1'), c), ('fadd', ('b2f', ('iand', a, b)), c)), (('fadd', 1.0, ('fneg', ('b2f', a))), ('b2f', ('inot', a))), (('fadd(nsz)', -1.0, ('b2f', a)), ('fneg', ('b2f', ('inot', a)))), @@ -2386,9 +2335,7 @@ optimizations.extend([ # Propagate negation up multiplication chains (('fmul(is_used_by_non_fsat)', ('fneg', a), b), ('fneg', ('fmul', a, b))), (('fmulz(is_used_by_non_fsat,nsz)', ('fneg', a), b), ('fneg', ('fmulz', a, b))), - (('ffma_old', ('fneg', a), ('fneg', b), c), ('ffma_old', a, b, c)), (('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)), - (('ffmaz_old', ('fneg', a), ('fneg', b), c), ('ffmaz_old', a, b, c)), (('ffmaz', ('fneg', a), ('fneg', b), c), ('ffmaz', a, b, c)), (('imul', ('ineg', a), b), ('ineg', ('imul', a, b))), @@ -2397,14 +2344,9 @@ optimizations.extend([ (('~fmulz', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmulz', ('fmulz', a, c), b)), (('~fmul', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)'), ('fmulz', ('fmul', a, c), b)), (('imul', ('imul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('imul', ('imul', a, c), b)), - (('~ffma_old', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffma_old', ('fmul', a, c), b, d)), - (('~ffmaz_old', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffmaz_old', ('fmulz', a, c), b, d)), - (('~ffma_old', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)', d), ('ffmaz_old', ('fmul', a, c), b, d)), # Prefer moving out a multiplication for more MAD/FMA-friendly code (('~fadd', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_fmul)'), '#c'), ('fadd', ('fadd', a, c), b)), (('~fadd', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd', ('fadd', a, c), b)), - (('~fadd', ('ffma_old(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffma_old', a, b, d), c)), - (('~fadd', ('ffmaz_old(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffmaz_old', a, b, d), c)), (('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('iadd', ('iadd', a, c), b)), # Reassociate constants in add/mul chains so they can be folded together. @@ -2413,16 +2355,9 @@ optimizations.extend([ (('~fmul', '#a', ('fmul', b, '#c')), ('fmul', ('fmul', a, c), b)), (('~fmulz', '#a', ('fmulz', b, '#c')), ('fmulz', ('fmulz', a, c), b)), (('~fmul', '#a(is_finite_not_zero)', ('fmulz', b, '#c')), ('fmulz', ('fmul', a, c), b)), - (('~ffma_old', '#a', ('fmul', b, '#c'), d), ('ffma_old', ('fmul', a, c), b, d)), - (('~ffmaz_old', '#a', ('fmulz', b, '#c'), d), ('ffmaz_old', ('fmulz', a, c), b, d)), - (('~ffmaz_old', '#a(is_finite_not_zero)', ('fmulz', b, '#c'), d), ('ffmaz_old', ('fmul', a, c), b, d)), (('imul', '#a', ('imul', b, '#c')), ('imul', ('imul', a, c), b)), (('~fadd', '#a', ('fadd', b, '#c')), ('fadd', ('fadd', a, c), b)), (('~fadd', '#a', ('fneg', ('fadd', b, '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))), - (('~fadd', '#a', ('ffma_old', b, c, '#d')), ('ffma_old', b, c, ('fadd', a, d))), - (('~fadd', '#a', ('fneg', ('ffma_old', b, c, '#d'))), ('ffma_old', ('fneg', b), c, ('fadd', a, ('fneg', d)))), - (('~fadd', '#a', ('ffmaz_old', b, c, '#d')), ('ffmaz_old', b, c, ('fadd', a, d))), - (('~fadd', '#a', ('fneg', ('ffmaz_old', b, c, '#d'))), ('ffmaz_old', ('fneg', b), c, ('fadd', a, ('fneg', d)))), (('iadd', '#a', ('iadd', b, '#c')), ('iadd', ('iadd', a, c), b)), (('iand', '#a', ('iand', b, '#c')), ('iand', ('iand', a, c), b)), (('ior', '#a', ('ior', b, '#c')), ('ior', ('ior', a, c), b)), @@ -3535,7 +3470,7 @@ for op in ['fadd', 'fdiv', 'fmod', 'fmul', 'fpow', 'frem', 'fsub']: optimizations += [((op, a, '#b(is_nan)'), NAN, 'true', TestStatus.XFAIL if op == 'fpow' else TestStatus.PASS)] # some opcodes are not commutative. XFAIL is fpow(1.0, NaN) producing NaN instead of 1.0. # NaN propagation: Trinary opcodes. If any operand is NaN, replace it with NaN. -for op in ['ffma_old', 'ffma', 'flrp']: +for op in ['ffma', 'flrp']: optimizations += [((op, '#a(is_nan)', b, c), NAN)] optimizations += [((op, a, '#b(is_nan)', c), NAN)] # some opcodes are not commutative optimizations += [((op, a, b, '#c(is_nan)'), NAN)] @@ -3779,14 +3714,9 @@ for sz, mulz in itertools.product([16, 32, 64], [False, True]): # (or fneg/fabs which are assumed to be propagated away), as a heuristic to # avoid fusing in cases where it's harmful. fmul = ('fmulz' if mulz else 'fmul') + '(is_only_used_by_fadd)' - ffma_old = 'ffmaz_old' if mulz else 'ffma_old' - - fadd = 'fadd@{}(contract)'.format(sz) - option_old = 'options->fuse_ffma{}'.format(sz) - option_avoid_abs = 'options->avoid_ternary_with_fabs' - option_old_with_abs = f'options->fuse_ffma{sz} && !{option_avoid_abs}' option = f'options->float_mul_add{sz}' + option_avoid_abs = 'options->avoid_ternary_with_fabs' option_has_fmad = f'({option} & nir_float_muladd_support_has_fmad)' option_has_ffma = f'({option} & nir_float_muladd_support_has_ffma)' option_prefer_split = f'({option} & nir_float_muladd_support_prefers_split)' @@ -3795,19 +3725,6 @@ for sz, mulz in itertools.product([16, 32, 64], [False, True]): option_fmad = f'{option_fuse} && (!{option_has_ffma} || {option_prefer_split}) && {option_has_fmad}' option_ffma = f'{option_fuse} && (!{option_has_fmad} || !{option_prefer_split}) && {option_has_ffma}' - late_optimizations.extend([ - ((fadd, (fmul, a, b), c), (ffma_old, a, b, c), option_old), - - ((fadd, ('fneg(is_only_used_by_fadd)', (fmul, a, b)), c), - (ffma_old, ('fneg', a), b, c), option_old), - - ((fadd, ('fabs(is_only_used_by_fadd)', (fmul, a, b)), c), - (ffma_old, ('fabs', a), ('fabs', b), c), option_old_with_abs), - - ((fadd, ('fneg(is_only_used_by_fadd)', ('fabs', (fmul, a, b))), c), - (ffma_old, ('fneg', ('fabs', a)), ('fabs', b), c), option_old_with_abs), - ]) - for fmad in ['ffma', 'fmad']: option = option_fmad if fmad == 'fmad' else option_ffma # contract is only needed for ffma @@ -3922,10 +3839,6 @@ late_optimizations.extend([ # A similar operation could apply to any ffma(#a, b, #(-a/2)), but this # particular operation is common for expanding values stored in a texture # from [0,1] to [-1,1]. - (('~ffma_old@32', a, 2.0, -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'), - (('~ffma_old@32', a, -2.0, -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'), - (('~ffma_old@32', a, -2.0, 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'), - (('~ffma_old@32', a, 2.0, 1.0), ('flrp', 1.0, -1.0, ('fneg', a)), '!options->lower_flrp32'), (('~ffma@32', a, 2.0, -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'), (('~ffma@32', a, -2.0, -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'), (('~ffma@32', a, -2.0, 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'), @@ -3957,10 +3870,6 @@ late_optimizations.extend([ # Option 5: a * (2 - a) # # There are a lot of other possible combinations. - (('~ffma_old@32', ('fadd', b, ('fneg', a)), a, a), ('flrp', a, b, a), '!options->lower_flrp32'), - (('~ffma_old@32', a, 2.0, ('fneg', ('fmul', a, a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'), - (('~ffma_old@32', a, 2.0, ('fmul', ('fneg', a), a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'), - (('~ffma_old@32', a, ('fneg', a), ('fmul', 2.0, a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'), (('~ffma@32', ('fadd', b, ('fneg', a)), a, a), ('flrp', a, b, a), '!options->lower_flrp32'), (('~ffma@32', a, 2.0, ('fneg', ('fmul', a, a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'), (('~ffma@32', a, 2.0, ('fmul', ('fneg', a), a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'), @@ -3997,7 +3906,7 @@ late_optimizations.extend([ # optimization in these stages. See bugzilla #111490. In tessellation # stages applications seem to use 'precise' when necessary, so allow the # optimization in those stages. -for fmad in ['ffma_old', 'ffma', 'fmad']: +for fmad in ['ffma', 'fmad']: late_optimizations.extend([ (('~fadd', (f'{fmad}(is_used_once)', a, b, (f'{fmad}(is_used_once)', c, d, (fmad, e, 'f', ('fmul(is_used_once)', 'g(is_not_const_and_not_fsign)', 'h(is_not_const_and_not_fsign)')))), 'i(is_not_const)'), (fmad, a, b, (fmad, c, d, (fmad, e, 'f', (fmad, 'g', 'h', 'i')))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), @@ -4009,7 +3918,7 @@ for fmad in ['ffma_old', 'ffma', 'fmad']: (fmad, ('fneg', a), b, (fmad, ('fneg', c), d, (fmad, ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), ]) -for fmadz in ['ffmaz_old', 'ffmaz', 'fmadz']: +for fmadz in ['ffmaz', 'fmadz']: late_optimizations.extend([ (('~fadd', (f'{fmadz}(is_used_once)', a, b, (fmadz, c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'), (fmadz, a, b, (fmadz, c, d, (fmadz, e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), @@ -4146,7 +4055,7 @@ for op in ['fadd']: (('bcsel', a, (op, b, c), (op + '(is_used_once)', b, d)), (op, b, ('bcsel', a, c, d))), ] -for op in ['ffma_old', 'ffma', 'ffmaz_old', 'ffmaz']: +for op in ['ffma', 'ffmaz']: late_optimizations += [ (('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, c, e)), (op, b, c, ('bcsel', a, d, e))), (('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, c, e)), (op, b, c, ('bcsel', a, d, e))), @@ -4159,8 +4068,6 @@ for op in ['ffma_old', 'ffma', 'ffmaz_old', 'ffmaz']: late_optimizations += [ (('fmulz@32', a, b), ('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), 0.0, ('fmul', a, b)), 'options->lower_fmulz_with_abs_min'), - (('ffmaz_old@32', a, b, c), - ('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), c, ('ffma_old@32', a, b, c)), 'options->lower_fmulz_with_abs_min'), (('ffmaz@32', a, b, c), ('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), c, ('ffma@32', a, b, c)), 'options->lower_fmulz_with_abs_min') ] @@ -4182,7 +4089,7 @@ for op in ['fadd', 'fdiv', 'fmax', 'fmin', 'fmod', 'fmul', 'fpow', 'frem']: late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b))), (op, a, b), 'true', TestStatus.UNSUPPORTED)] # Ternary opcodes -for op in ['ffma_old', 'ffma', 'flrp']: +for op in ['ffma', 'flrp']: late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b), ('f2fmp', c))), (op, a, b, c), 'true', TestStatus.UNSUPPORTED)] # Comparison opcodes @@ -4237,7 +4144,6 @@ late_optimizations += [ distribute_src_mods = [ # Try to remove some spurious negations rather than pushing them down. (('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)), - (('ffma_old', ('fneg', a), ('fneg', b), c), ('ffma_old', a, b, c)), (('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)), (('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b)), (('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b)), @@ -4249,7 +4155,6 @@ distribute_src_mods = [ (('fabs', ('fmul(is_used_once)', a, b)), ('fmul', ('fabs', a), ('fabs', b))), (('fabs', ('fmul_rtz(is_used_once)', a, b)), ('fmul_rtz', ('fabs', a), ('fabs', b))), - (('fneg', ('ffma_old(is_used_once,nsz)', a, b, c)), ('ffma_old', ('fneg', a), b, ('fneg', c))), (('fneg', ('ffma(is_used_once,nsz)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))), (('fneg', ('flrp(is_used_once)', a, b, c)), ('flrp', ('fneg', a), ('fneg', b), c), 'true', TestStatus.XFAIL), # XFAIL is -flrp(0, -1, 0) is 0.0 instead of -0.0 (('fneg', ('fadd(is_used_once,nsz)', a, b)), ('fadd', ('fneg', a), ('fneg', b))), diff --git a/src/compiler/nir/nir_opt_fp_math_ctrl.c b/src/compiler/nir/nir_opt_fp_math_ctrl.c index 04e7b4d26b8..3ce515b1eb5 100644 --- a/src/compiler/nir/nir_opt_fp_math_ctrl.c +++ b/src/compiler/nir/nir_opt_fp_math_ctrl.c @@ -177,13 +177,11 @@ opt_alu_fp_math_ctrl(nir_alu_instr *alu, struct opt_fp_ctrl_state *state) case nir_op_ffmaz: case nir_op_fmadz: - case nir_op_ffmaz_old: src_mark_preserve_sz(&alu->src[2].src, NULL); break; case nir_op_ffma: case nir_op_ffma_weak: case nir_op_fmad: - case nir_op_ffma_old: if ((nir_analyze_fp_class(&state->fp_class_state, alu->src[2].src.ssa) & FP_CLASS_NEG_ZERO) && !nir_alu_srcs_equal(alu, alu, 0, 1)) { src_mark_preserve_sz(&alu->src[0].src, NULL); diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c index f5d6f1154dc..bcfde98948b 100644 --- a/src/compiler/nir/nir_opt_varyings.c +++ b/src/compiler/nir/nir_opt_varyings.c @@ -3393,8 +3393,6 @@ can_move_alu_across_interp(struct linkage_info *linkage, nir_alu_instr *alu) case nir_op_ffmaz: case nir_op_fmad: case nir_op_fmadz: - case nir_op_ffma_old: - case nir_op_ffmaz_old: return GET_SRC_INTERP(alu, 0) == FLAG_INTERP_CONVERGENT || GET_SRC_INTERP(alu, 1) == FLAG_INTERP_CONVERGENT; diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index c88a6554b6a..3133f7d450d 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -836,8 +836,6 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32 push_fp_query(state, alu->src[0].src.ssa); push_fp_query(state, alu->src[1].src.ssa); return; - case nir_op_ffma_old: - case nir_op_ffmaz_old: case nir_op_ffma: case nir_op_ffma_weak: case nir_op_ffmaz: @@ -1328,9 +1326,7 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32 case nir_op_fmad: case nir_op_fmadz: case nir_op_ffma: - case nir_op_ffma_old: - case nir_op_ffmaz: - case nir_op_ffmaz_old: { + case nir_op_ffmaz: { bool mulz = nir_alu_instr_is_mul_add_z(alu); bool src_eq = nir_alu_srcs_equal(alu, alu, 0, 1); bool src_neg_eq = !nir_src_is_const(alu->src[0].src) && nir_alu_srcs_negative_equal(alu, alu, 0, 1); diff --git a/src/compiler/nir/nir_shader_compiler_options.h b/src/compiler/nir/nir_shader_compiler_options.h index f03b6be99f8..fb4131a1bf4 100644 --- a/src/compiler/nir/nir_shader_compiler_options.h +++ b/src/compiler/nir/nir_shader_compiler_options.h @@ -275,12 +275,6 @@ MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_float_muladd_support) typedef struct nir_shader_compiler_options { bool lower_fdiv; - bool lower_ffma16; - bool lower_ffma32; - bool lower_ffma64; - bool fuse_ffma16; - bool fuse_ffma32; - bool fuse_ffma64; nir_float_muladd_support float_mul_add16; nir_float_muladd_support float_mul_add32; nir_float_muladd_support float_mul_add64; diff --git a/src/compiler/nir/tests/fp_class_tests.cpp b/src/compiler/nir/tests/fp_class_tests.cpp index 68bc59f24f2..dec1d20e234 100644 --- a/src/compiler/nir/tests/fp_class_tests.cpp +++ b/src/compiler/nir/tests/fp_class_tests.cpp @@ -191,8 +191,6 @@ DEFINE_TEST(ffma_weak, 3) DEFINE_TEST(ffmaz, 3) DEFINE_TEST(fmad, 3) DEFINE_TEST(fmadz, 3) -DEFINE_TEST(ffma_old, 3) -DEFINE_TEST(ffmaz_old, 3) DEFINE_TEST(fabs, 1) DEFINE_TEST(fneg, 1) DEFINE_TEST(fexp2, 1) diff --git a/src/compiler/nir/tests/nir_opt_varyings_test.h b/src/compiler/nir/tests/nir_opt_varyings_test.h index a7bc43a4782..4d2e34cce9a 100644 --- a/src/compiler/nir/tests/nir_opt_varyings_test.h +++ b/src/compiler/nir/tests/nir_opt_varyings_test.h @@ -655,8 +655,6 @@ movable_across_interp(nir_builder *b, nir_op op, unsigned interp[3], case nir_op_ffmaz: case nir_op_fmad: case nir_op_fmadz: - case nir_op_ffma_old: - case nir_op_ffmaz_old: return !divergent[0] || !divergent[1]; case nir_op_fdiv: diff --git a/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp b/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp index 80a24a43092..209c8ef9c7a 100644 --- a/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp +++ b/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp @@ -75,8 +75,7 @@ TEST_F(nir_opt_varyings_test_bicm_binary_alu, \ /* TES uses fadd and fmul for interpolation, so it's always present. */ \ if (MESA_SHADER_##consumer_stage != MESA_SHADER_TESS_EVAL || \ (nir_op_##alu != nir_op_fadd && nir_op_##alu != nir_op_fmul && \ - nir_op_##alu != nir_op_fmad && nir_op_##alu != nir_op_ffma && \ - nir_op_##alu != nir_op_ffma_old)) { \ + nir_op_##alu != nir_op_fmad && nir_op_##alu != nir_op_ffma)) { \ ASSERT_TRUE(!shader_contains_alu_op(b2, nir_op_##alu, bitsize)); \ } \ } \ diff --git a/src/compiler/spirv/vtn_bindgen2.c b/src/compiler/spirv/vtn_bindgen2.c index fe856fc568a..6838a238a23 100644 --- a/src/compiler/spirv/vtn_bindgen2.c +++ b/src/compiler/spirv/vtn_bindgen2.c @@ -24,12 +24,6 @@ static const struct spirv_to_nir_options spirv_options = { }; struct nir_shader_compiler_options generic_opts = { - /* TODO: Do we want to set has_*? Will drivers be able to lower - * appropriately? - */ - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, .float_mul_add16 = nir_float_muladd_support_keep_weak_ffma, .float_mul_add32 = nir_float_muladd_support_keep_weak_ffma, .float_mul_add64 = nir_float_muladd_support_keep_weak_ffma, diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index caabf9efaae..61e40c95e56 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -129,21 +129,6 @@ static const nir_shader_compiler_options ir3_base_options = { .lower_usub_borrow = true, .lower_mul_high = true, .lower_mul_2x32_64 = true, - /* ir3's mad is an unfused mul-add instruction, so we need to flag fma - * lowering so that CL can implement fused fma in software. GLSL, - * SPIRV, and NIR don't require either fused or unfused behavior from - * fma, and we'll turn mul+adds back into nir_op_ffma (again, implemented - * as unfused) during nir_opt_algebraic_late() (assuming it's not - * decorated with GLSL's precise, or SPIRV's NoContraction), or - * ir3_nir_opt_algebraic_late (if it is, since ir3's unfused mul-add is - * precise). - */ - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, .float_mul_add16 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .float_mul_add64 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 388c5a01a44..4be8449146d 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -727,7 +727,6 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) dst = ir3_ADD_F_rpt(b, dst_sz, src[0], 0, src[1], IR3_REG_FNEG); break; case nir_op_fmad: - case nir_op_ffma_old: /* The scalar ALU doesn't support mad, so expand to mul+add so that we * don't unnecessarily fall back to non-earlypreamble. This is safe * because at least on a6xx+ mad is unfused. diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index c993fbf65be..02ca761c818 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -3338,7 +3338,6 @@ do_alu_action(struct lp_build_nir_soa_context *bld, result = lp_build_floor(float_bld, src[0]); break; case nir_op_ffma_weak: - case nir_op_ffma_old: result = lp_build_fmuladd(builder, src[0], src[1], src[2]); break; case nir_op_ffract: { diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index 0c65f0ef618..e602d5f7f3f 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -1573,7 +1573,6 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr) /* This is fine as long as drivers implement TGSI MAD as fmad */ [nir_op_fmad] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD }, [nir_op_ffma_weak] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD }, - [nir_op_ffma_old] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD }, [nir_op_ldexp] = { TGSI_OPCODE_LDEXP, 0 }, }; @@ -4068,8 +4067,6 @@ const void *nir_to_tgsi_options(struct nir_shader *s, const nir_shader_compiler_options nir_to_tgsi_compiler_options = { .fdot_replicates = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, .lower_extract_byte = true, .lower_extract_word = true, .lower_insert_byte = true, diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c index 3a181f09c7a..04f6e4ead06 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c @@ -42,9 +42,6 @@ etna_compiler_create(const char *renderer, const struct etna_core_info *info) .lower_fpow = true, .lower_fround_even = true, .lower_ftrunc = true, - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, .float_mul_add16 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .float_mul_add64 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c index a4f66775759..2ab2b5dc081 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c @@ -50,7 +50,7 @@ static const struct etna_op_info etna_ops[] = { #define IOP(nir, op) IOPC(nir, op, TRUE) #define UOP(nir, op) UOPC(nir, op, TRUE) OP(mov, MOV), OP(fneg, MOV), OP(fabs, MOV), OP(fsat, MOV), - OP(fmul, MUL), OP(fadd, ADD), OP(ffma_old, MAD), OP(fmad, MAD), + OP(fmul, MUL), OP(fadd, ADD), OP(fmad, MAD), OP(fdot2, DP2), OP(fdot3, DP3), OP(fdot4, DP4), OPC(fmin, SELECT, GT), OPC(fmax, SELECT, LT), OP(ffract, FRC), OP(frcp, RCP), OP(frsq, RSQ), diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c index 24a8ce0ff91..493f7121b66 100644 --- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c +++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c @@ -19,9 +19,6 @@ static const nir_shader_compiler_options options = { .lower_fmod = true, .lower_fdiv = true, .lower_fceil = true, - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, .float_mul_add16 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .float_mul_add64 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, @@ -313,7 +310,6 @@ instr_create_alu(struct ir2_context *ctx, nir_op opcode, unsigned ncomp) [nir_op_fsub] = {ADDs, ADDv}, [nir_op_fmul] = {MULs, MULv}, [nir_op_fmad] = {-1, MULADDv}, - [nir_op_ffma_old] = {-1, MULADDv}, [nir_op_fmax] = {MAXs, MAXv}, [nir_op_fmin] = {MINs, MINv}, [nir_op_ffloor] = {FLOORs, FLOORv}, diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 06dca93ded3..3f5a1ae28a1 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -109,7 +109,6 @@ i915_get_name(struct pipe_screen *screen) static const nir_shader_compiler_options i915_compiler_options = { .fdot_replicates = true, - .fuse_ffma32 = true, .float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .lower_bitops = true, /* required for !CAP_INTEGERS nir_to_tgsi */ .lower_extract_byte = true, @@ -137,9 +136,6 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_bitfield_insert = true, .lower_bitfield_extract = true, .lower_fdph = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_fmod = true, .lower_hadd = true, .lower_uadd_sat = true, diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c index 9281fc9b3f0..93c407e3f17 100644 --- a/src/gallium/drivers/lima/ir/pp/nir.c +++ b/src/gallium/drivers/lima/ir/pp/nir.c @@ -204,7 +204,6 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = { [nir_op_fsat] = ppir_op_sat, [nir_op_fclamp_pos] = ppir_op_clamp_pos, [nir_op_fmad] = ppir_op_fmad, - [nir_op_ffma_old] = ppir_op_fmad, }; static bool ppir_emit_alu(ppir_block *block, nir_instr *ni) diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index 946e67b46f0..8f8b4557a74 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -43,9 +43,6 @@ #include "ir/lima_ir.h" static const nir_shader_compiler_options vs_nir_options = { - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_fpow = true, .lower_ffract = true, .lower_fdiv = true, @@ -68,9 +65,6 @@ static const nir_shader_compiler_options vs_nir_options = { }; static const nir_shader_compiler_options fs_nir_options = { - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, .float_mul_add16 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .float_mul_add64 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index c3d75345ef0..0bb11ae964f 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -488,9 +488,6 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_bitfield_extract16 = true, .lower_bitfield_extract = true, .lower_fdph = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .float_mul_add16 = nir_float_muladd_support_keep_weak_ffma, .float_mul_add32 = nir_float_muladd_support_keep_weak_ffma, .float_mul_add64 = nir_float_muladd_support_keep_weak_ffma, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 8353b63dfbe..4889eeca771 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -471,12 +471,6 @@ Converter::getOperation(nir_op op) case nir_op_ffma_weak: assert(info->target < 0xc0); return OP_MAD; - case nir_op_ffma_old: - case nir_op_ffmaz_old: - /* No FMA op pre-nvc0 */ - if (info->target < 0xc0) - return OP_MAD; - return OP_FMA; case nir_op_flog2: return OP_LG2; case nir_op_fmax: @@ -2622,8 +2616,6 @@ Converter::visit(nir_alu_instr *insn) case nir_op_ffloor: case nir_op_ffma: case nir_op_ffmaz: - case nir_op_ffma_old: - case nir_op_ffmaz_old: case nir_op_ffma_weak: case nir_op_flog2: case nir_op_fmax: @@ -2680,12 +2672,10 @@ Converter::visit(nir_alu_instr *insn) case nir_op_fmul: case nir_op_ffma: case nir_op_ffma_weak: - case nir_op_ffma_old: i->dnz = this->info->io.mul_zero_wins; break; case nir_op_fmulz: case nir_op_ffmaz: - case nir_op_ffmaz_old: i->dnz = true; break; default: @@ -3568,12 +3558,6 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type) { nir_shader_compiler_options op = {}; op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET); - op.lower_ffma16 = false; - op.lower_ffma32 = false; - op.lower_ffma64 = false; - op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */ - op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */ - op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */ if (chipset >= NVISA_GF100_CHIPSET) { op.float_mul_add32 = nir_float_muladd_support_has_ffma; op.float_mul_add64 = nir_float_muladd_support_has_ffma; diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index ae9be535008..1aafd6129ba 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -335,8 +335,6 @@ nv30_screen_is_format_supported(struct pipe_screen *pscreen, } static const nir_shader_compiler_options nv30_base_compiler_options = { - .fuse_ffma32 = true, - .fuse_ffma64 = true, .float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .lower_bitops = true, .lower_extract_byte = true, diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c b/src/gallium/drivers/r300/compiler/nir_to_rc.c index 1afbee8f7e7..20b9a2ff2c9 100644 --- a/src/gallium/drivers/r300/compiler/nir_to_rc.c +++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c @@ -820,7 +820,6 @@ ntr_emit_alu(struct ntr_compile *c, nir_alu_instr *instr) [nir_op_fmin] = TGSI_OPCODE_MIN, [nir_op_fmax] = TGSI_OPCODE_MAX, [nir_op_fmad] = TGSI_OPCODE_MAD, - [nir_op_ffma_old] = TGSI_OPCODE_MAD, }; if (instr->op < ARRAY_SIZE(op_map) && op_map[instr->op] > 0) { diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index f52b7718b82..bc0ad5c7848 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -105,8 +105,6 @@ static struct disk_cache* r300_get_disk_shader_cache(struct pipe_screen* pscreen #define COMMON_NIR_OPTIONS \ .fdot_replicates = true, \ - .fuse_ffma32 = true, \ - .fuse_ffma64 = true, \ .float_mul_add32 = \ nir_float_muladd_support_has_fmad | \ nir_float_muladd_support_fuse, \ diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index c3fe7cbb517..2584b83a364 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -1084,9 +1084,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, } const struct nir_shader_compiler_options nir_options = { - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, .float_mul_add16 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .float_mul_add64 = nir_float_muladd_support_has_ffma | nir_float_muladd_support_fuse, diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index 8d295463cb3..c887e369079 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -1640,7 +1640,6 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader) case nir_op_fneu32: return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false); case nir_op_ffma: - case nir_op_ffma_old: return emit_alu_fma_64bit(*alu, op3_fma_64, shader); case nir_op_fadd: @@ -1958,12 +1957,10 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader) return emit_unpack_64_2x32_split(*alu, 1, shader); case nir_op_fmad: - case nir_op_ffma_old: if (!shader.has_flag(Shader::sh_legacy_math_rules)) return emit_alu_op3(*alu, op3_muladd_ieee, shader); FALLTHROUGH; case nir_op_fmadz: - case nir_op_ffmaz_old: return emit_alu_op3(*alu, op3_muladd, shader); case nir_op_mov: diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp index fc717c8a0b0..c6e888bf1ca 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp @@ -1034,7 +1034,6 @@ Lower64BitToVec2::support_fp64_op(nir_op op) const case nir_op_flt32: case nir_op_fneu32: case nir_op_ffma: - case nir_op_ffma_old: case nir_op_fadd: case nir_op_fmul: case nir_op_fmax: diff --git a/src/gallium/drivers/radeonsi/gfx/si_gfx_screen.c b/src/gallium/drivers/radeonsi/gfx/si_gfx_screen.c index e621a19ead4..4c002589fc5 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_gfx_screen.c +++ b/src/gallium/drivers/radeonsi/gfx/si_gfx_screen.c @@ -323,12 +323,6 @@ static void si_init_screen_nir_options(struct si_screen *sscreen) ac_nir_set_options(&sscreen->info.compiler_info, !sscreen->use_aco, options); options->ignore_none_interpolation_in_sysval_gathering = true; - options->lower_ffma16 = sscreen->info.gfx_level < GFX9; - options->lower_ffma32 = !use_fma32; - options->lower_ffma64 = false; - options->fuse_ffma16 = sscreen->info.gfx_level >= GFX9; - options->fuse_ffma32 = use_fma32; - options->fuse_ffma64 = true; bool use_fma32 = !(options->float_mul_add32 & nir_float_muladd_support_prefers_split) || (sscreen->info.gfx_level >= GFX9 && sscreen->options.force_use_fma32); diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 0bf64d994cf..449ab0c75d8 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -76,8 +76,6 @@ softpipe_get_name(struct pipe_screen *screen) static const nir_shader_compiler_options sp_compiler_options = { .fdot_replicates = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, .float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index f2a979ee9a6..9c01d31c01b 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -582,9 +582,6 @@ v3d_screen_get_compiler_options(struct pipe_screen *pscreen, .lower_unpack_32_2x16_split = true, .lower_fdiv = true, .lower_find_lsb = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_flrp32 = true, .lower_fpow = true, .lower_fsqrt = true, diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index ffe94eb390f..00cb35533f1 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2135,9 +2135,6 @@ static const nir_shader_compiler_options nir_options = { .lower_insert_byte = true, .lower_insert_word = true, .lower_fdiv = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_flrp32 = true, .lower_fmod = true, .lower_fpow = true, diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c index e114a5c9bc2..6634111f18f 100644 --- a/src/gallium/drivers/virgl/virgl_screen.c +++ b/src/gallium/drivers/virgl/virgl_screen.c @@ -1061,8 +1061,6 @@ virgl_create_screen(struct virgl_winsys *vws, const struct pipe_screen_config *c nir_float_muladd_support_fuse; } screen->compiler_options.no_integers = screen->caps.caps.v1.glsl_level < 130; - screen->compiler_options.lower_ffma32 = true; - screen->compiler_options.fuse_ffma32 = false; screen->compiler_options.lower_image_offset_to_range_base = true; screen->compiler_options.lower_atomic_offset_to_range_base = true; screen->compiler_options.support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL); diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index f31211b7f1e..17e8b9a5ffc 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -1266,9 +1266,6 @@ zink_screen_init_compiler(struct zink_screen *screen) static const struct nir_shader_compiler_options default_options = { .io_options = nir_io_has_intrinsics | nir_io_mediump_is_32bit, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_scmp = true, .lower_fdph = true, .lower_flrp32 = true, @@ -1317,7 +1314,6 @@ zink_screen_init_compiler(struct zink_screen *screen) if (!screen->info.feats.features.shaderFloat64) { screen->nir_options.lower_doubles_options = ~0; screen->nir_options.lower_flrp64 = true; - screen->nir_options.lower_ffma64 = true; /* soft fp64 function inlining will blow up loop bodies and effectively * stop Vulkan drivers from unrolling the loops. */ diff --git a/src/imagination/pco/pco_nir.c b/src/imagination/pco/pco_nir.c index 99b9faea8a0..1b227d9ea8c 100644 --- a/src/imagination/pco/pco_nir.c +++ b/src/imagination/pco/pco_nir.c @@ -39,7 +39,6 @@ static const struct spirv_to_nir_options spirv_options = { /** NIR options. */ static const nir_shader_compiler_options nir_options = { .discard_is_demote = true, - .fuse_ffma32 = true, .float_mul_add32 = nir_float_muladd_support_has_ffma | nir_float_muladd_support_fuse, diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index f2dc8fd37da..a8cbd3c4736 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -3178,7 +3178,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) break; case nir_op_ffma: - case nir_op_ffma_old: instr = pco_fmad(&tctx->b, dest, src[0], src[1], src[2]); break; diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index e237c56487f..217c167d63e 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -1647,7 +1647,6 @@ brw_from_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, break; case nir_op_ffma: - case nir_op_ffma_old: if (nir_has_any_rounding_mode_enabled(execution_mode)) { brw_rnd_mode rnd = brw_rnd_mode_from_execution_mode(execution_mode); diff --git a/src/intel/compiler/elk/elk_compiler.c b/src/intel/compiler/elk/elk_compiler.c index d5588b40363..17b250bf5de 100644 --- a/src/intel/compiler/elk/elk_compiler.c +++ b/src/intel/compiler/elk/elk_compiler.c @@ -86,9 +86,6 @@ elk_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) /* Prior to Gfx6, there are no three source operations, and Gfx11 loses * LRP. */ - nir_options->lower_ffma16 = devinfo->ver < 6; - nir_options->lower_ffma32 = devinfo->ver < 6; - nir_options->lower_ffma64 = devinfo->ver < 6; nir_options->lower_flrp32 = devinfo->ver < 6; if (devinfo->ver >= 6) { diff --git a/src/intel/compiler/elk/elk_fs_nir.cpp b/src/intel/compiler/elk/elk_fs_nir.cpp index d5069765827..474cd7647f5 100644 --- a/src/intel/compiler/elk/elk_fs_nir.cpp +++ b/src/intel/compiler/elk/elk_fs_nir.cpp @@ -1707,7 +1707,6 @@ fs_nir_emit_alu(nir_to_elk_state &ntb, nir_alu_instr *instr, break; case nir_op_ffma: - case nir_op_ffma_old: if (nir_has_any_rounding_mode_enabled(execution_mode)) { elk_rnd_mode rnd = elk_rnd_mode_from_execution_mode(execution_mode); diff --git a/src/intel/compiler/elk/elk_vec4_nir.cpp b/src/intel/compiler/elk/elk_vec4_nir.cpp index 5b22533f594..324cbfe31af 100644 --- a/src/intel/compiler/elk/elk_vec4_nir.cpp +++ b/src/intel/compiler/elk/elk_vec4_nir.cpp @@ -1671,7 +1671,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_ffma: - case nir_op_ffma_old: if (type_sz(dst.type) == 8) { dst_reg mul_dst = dst_reg(this, glsl_dvec4_type()); emit(MUL(mul_dst, op[1], op[0])); diff --git a/src/intel/compiler/jay/jay_from_nir.c b/src/intel/compiler/jay/jay_from_nir.c index ce07f1a9543..f5ca76378d4 100644 --- a/src/intel/compiler/jay/jay_from_nir.c +++ b/src/intel/compiler/jay/jay_from_nir.c @@ -513,7 +513,6 @@ jay_emit_alu(struct nir_to_jay_state *nj, nir_alu_instr *alu) break; case nir_op_ffma: - case nir_op_ffma_old: jay_MAD(b, type, dst, src[0], src[1], src[2]); break; diff --git a/src/kosmickrisp/compiler/nir_to_msl.c b/src/kosmickrisp/compiler/nir_to_msl.c index 13ab89f918c..eb10c028aab 100644 --- a/src/kosmickrisp/compiler/nir_to_msl.c +++ b/src/kosmickrisp/compiler/nir_to_msl.c @@ -375,7 +375,6 @@ alu_to_msl(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr) alu_funclike(ctx, instr, "floor"); break; case nir_op_ffma: - case nir_op_ffma_old: alu_funclike(ctx, instr, "fma"); break; case nir_op_ffract: diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index a106ee5ddf4..34d1719ecff 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -620,9 +620,6 @@ static const struct nir_shader_compiler_options draw_nir_options = { .lower_bitfield_insert = true, .lower_bitfield_extract = true, .lower_fdph = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_flrp16 = true, .lower_fmod = true, .lower_hadd = true, diff --git a/src/microsoft/compiler/nir_to_dxil.c b/src/microsoft/compiler/nir_to_dxil.c index 555b7c1db36..09a3203bf98 100644 --- a/src/microsoft/compiler/nir_to_dxil.c +++ b/src/microsoft/compiler/nir_to_dxil.c @@ -92,8 +92,6 @@ nir_options = { .compact_arrays = true, .lower_ineg = true, .lower_fneg = true, - .lower_ffma16 = true, - .lower_ffma32 = true, .float_mul_add64 = nir_float_muladd_support_has_ffma, .lower_isign = true, .lower_fsign = true, @@ -2999,7 +2997,6 @@ emit_alu(struct ntd_context *ctx, nir_alu_instr *alu) case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]); case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]); case nir_op_ffma: - case nir_op_ffma_old: if (alu->def.bit_size == 64) ctx->mod.feats.dx11_1_double_extensions = 1; return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]); diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index a0f12639a7a..0bee0aeacfd 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -113,9 +113,6 @@ pub extern "C" fn nak_debug_no_ugpr() -> bool { fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options { nir_shader_compiler_options { lower_fdiv: true, - fuse_ffma16: true, - fuse_ffma32: true, - fuse_ffma64: true, float_mul_add16: nir_float_muladd_support_has_ffma | nir_float_muladd_support_fuse, float_mul_add32: nir_float_muladd_support_has_ffma diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 559cf115067..04e618a8074 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -1056,7 +1056,7 @@ impl<'a> ShaderFromNir<'a> { b.fexp2(srcs(0)).into() } } - nir_op_ffma | nir_op_ffma_old => { + nir_op_ffma => { let ftype = FloatType::from_bits(alu.def.bit_size().into()); let dst; if alu.def.bit_size() == 64 { @@ -1102,7 +1102,7 @@ impl<'a> ShaderFromNir<'a> { } dst } - nir_op_ffmaz | nir_op_ffmaz_old => { + nir_op_ffmaz => { assert!(alu.def.bit_size() == 32); // DNZ implies FTZ so we need FTZ set or this is invalid assert!(self.float_ctl.fp32.ftz); diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 875028e86ee..9c129859c63 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -117,7 +117,6 @@ vectorize_filter_cb(const nir_instr *instr, const void *data) case nir_op_fmul: case nir_op_fmul_rtz: case nir_op_ffma: - case nir_op_ffma_old: case nir_op_fsign: case nir_op_fsat: case nir_op_fmax: @@ -271,8 +270,6 @@ lower_bit_size_cb(const nir_instr *instr, void *data) case nir_op_fmul_rtz: case nir_op_ffma: case nir_op_ffmaz: - case nir_op_ffma_old: - case nir_op_ffmaz_old: case nir_op_fsign: case nir_op_fsat: case nir_op_fceil: diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index f27e5ef861e..989a36b7046 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -2976,7 +2976,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) switch (instr->op) { case nir_op_ffma: - case nir_op_ffma_old: bi_fma_to(b, sz, dst, s0, s1, s2); break; diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.h b/src/panfrost/compiler/bifrost/bifrost_compile.h index d5fe7f00044..777f7e710f6 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.h +++ b/src/panfrost/compiler/bifrost/bifrost_compile.h @@ -125,9 +125,6 @@ bool valhall_can_merge_workgroups(nir_shader *nir); .has_ldexp = true, \ .has_isub = true, \ .vectorize_vec2_16bit = true, \ - .fuse_ffma16 = true, \ - .fuse_ffma32 = true, \ - .fuse_ffma64 = true, \ .float_mul_add16 = nir_float_muladd_support_has_ffma | \ nir_float_muladd_support_fuse, \ .float_mul_add32 = nir_float_muladd_support_has_ffma | \ diff --git a/src/panfrost/compiler/midgard/midgard_compile.h b/src/panfrost/compiler/midgard/midgard_compile.h index 962890797c1..d42292b00de 100644 --- a/src/panfrost/compiler/midgard/midgard_compile.h +++ b/src/panfrost/compiler/midgard/midgard_compile.h @@ -27,9 +27,6 @@ void midgard_compile_shader_nir(nir_shader *nir, * solution. */ static const nir_shader_compiler_options midgard_nir_options = { - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_scmp = true, .lower_flrp16 = true, .lower_flrp32 = true,