diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c index f66984b9dcc..d9888a4fc91 100644 --- a/src/amd/common/nir/ac_nir.c +++ b/src/amd/common/nir/ac_nir.c @@ -972,7 +972,7 @@ ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu) case nir_op_fadd: case nir_op_fsub: case nir_op_fmul: - case nir_op_ffma: + case nir_op_ffma_old: case nir_op_fdiv: case nir_op_flrp: case nir_op_fabs: diff --git a/src/amd/common/nir/ac_nir_cull.c b/src/amd/common/nir/ac_nir_cull.c index 03b172062a1..ce15b552784 100644 --- a/src/amd/common/nir/ac_nir_cull.c +++ b/src/amd/common/nir/ac_nir_cull.c @@ -170,8 +170,8 @@ cull_small_primitive_triangle(nir_builder *b, bool use_point_tri_intersection, vp_translate[chan] = nir_channel(b, vp, 2 + chan); /* Convert the position to screen-space coordinates. */ - nir_def *min = nir_ffma(b, bbox_min[chan], vp_scale[chan], vp_translate[chan]); - nir_def *max = nir_ffma(b, bbox_max[chan], vp_scale[chan], vp_translate[chan]); + nir_def *min = nir_ffma_old(b, bbox_min[chan], vp_scale[chan], vp_translate[chan]); + nir_def *max = nir_ffma_old(b, bbox_max[chan], vp_scale[chan], vp_translate[chan]); /* Scale the bounding box according to precision. */ min = nir_fsub(b, min, small_prim_precision); @@ -251,7 +251,7 @@ cull_small_primitive_triangle(nir_builder *b, bool use_point_tri_intersection, /* Transform the coordinates to screen space. */ for (unsigned vtx = 0; vtx < 3; ++vtx) { for (unsigned chan = 0; chan < 2; ++chan) - screen_pos[vtx][chan] = nir_ffma(b, pos[vtx][chan], vp_scale[chan], vp_translate[chan]); + screen_pos[vtx][chan] = nir_ffma_old(b, pos[vtx][chan], vp_scale[chan], vp_translate[chan]); } /* small_prim_precision is the rasterization precision in X an Y axes, meaning it's the size of @@ -436,8 +436,8 @@ cull_small_primitive_line(nir_builder *b, nir_def *pos[3][4], nir_def *vp_scale = nir_channel(b, vp, chan); nir_def *vp_translate = nir_channel(b, vp, 2 + chan); - v0[chan] = nir_ffma(b, pos[0][chan], vp_scale, vp_translate); - v1[chan] = nir_ffma(b, pos[1][chan], vp_scale, vp_translate); + v0[chan] = nir_ffma_old(b, pos[0][chan], vp_scale, vp_translate); + v1[chan] = nir_ffma_old(b, pos[1][chan], vp_scale, vp_translate); } /* Rotate the viewport by 45 degrees, so that diamonds become squares. */ diff --git a/src/amd/common/nir/ac_nir_lower_intrinsics_to_args.c b/src/amd/common/nir/ac_nir_lower_intrinsics_to_args.c index db8cd7d251d..db54aafa241 100644 --- a/src/amd/common/nir/ac_nir_lower_intrinsics_to_args.c +++ b/src/amd/common/nir/ac_nir_lower_intrinsics_to_args.c @@ -409,8 +409,8 @@ lower_intrinsic_to_arg(nir_builder *b, nir_intrinsic_instr *intrin, void *state) nir_def *ddy_j = nir_ddy(b, j); /* Interpolate standard barycentrics by offset. */ - nir_def *offset_i = nir_ffma(b, ddy_i, offset_y, nir_ffma(b, ddx_i, offset_x, i)); - nir_def *offset_j = nir_ffma(b, ddy_j, offset_y, nir_ffma(b, ddx_j, offset_x, j)); + nir_def *offset_i = nir_ffma_old(b, ddy_i, offset_y, nir_ffma_old(b, ddx_i, offset_x, i)); + nir_def *offset_j = nir_ffma_old(b, ddy_j, offset_y, nir_ffma_old(b, ddx_j, offset_x, j)); replacement = nir_vec2(b, offset_i, offset_j); break; } diff --git a/src/amd/common/nir/ac_nir_lower_ps_early.c b/src/amd/common/nir/ac_nir_lower_ps_early.c index 82ee4ba8220..768d3804f30 100644 --- a/src/amd/common/nir/ac_nir_lower_ps_early.c +++ b/src/amd/common/nir/ac_nir_lower_ps_early.c @@ -300,8 +300,8 @@ lower_load_barycentric_at_offset(nir_builder *b, nir_def *offset, enum glsl_inte nir_def *offset_y = nir_channel(b, offset, 1); /* Interpolate standard barycentrics by offset. */ - nir_def *offset_i = nir_ffma(b, ddy_i, offset_y, nir_ffma(b, ddx_i, offset_x, i)); - nir_def *offset_j = nir_ffma(b, ddy_j, offset_y, nir_ffma(b, ddx_j, offset_x, j)); + nir_def *offset_i = nir_ffma_old(b, ddy_i, offset_y, nir_ffma_old(b, ddx_i, offset_x, i)); + nir_def *offset_j = nir_ffma_old(b, ddy_j, offset_y, nir_ffma_old(b, ddx_j, offset_x, j)); return nir_vec2(b, offset_i, offset_j); } diff --git a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp index 145ab41bbd2..942be54400b 100644 --- a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp +++ b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp @@ -450,7 +450,7 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_op_e4m3fn2f: case nir_op_e5m22f: case nir_op_fmulz: - case nir_op_ffmaz: + case nir_op_ffmaz_old: case nir_op_f2f64: case nir_op_u2f64: case nir_op_i2f64: @@ -485,7 +485,7 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_op_f2f16_ru: case nir_op_f2f16_rd: type = RegType::vgpr; break; case nir_op_fmul: - case nir_op_ffma: + case nir_op_ffma_old: case nir_op_fadd: case nir_op_fsub: case nir_op_fmax: diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp index 9aaa64a3f7f..3527d2cd262 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp @@ -1925,7 +1925,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } break; } - case nir_op_ffma: { + case nir_op_ffma_old: { if (dst.regClass() == v2b) { emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f16, dst, false, 3); } else if (dst.regClass() == v1 && instr->def.bit_size == 16) { @@ -1961,7 +1961,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } break; } - case nir_op_ffmaz: { + case nir_op_ffmaz_old: { if (dst.regClass() == v1) { emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_legacy_f32, dst, ctx->block->fp_mode.must_flush_denorms32, 3); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 5492b7b1785..3ddc1383403 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -759,12 +759,12 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = ac_build_canonicalize(&ctx->ac, result, instr->def.bit_size); } break; - case nir_op_ffma: + case nir_op_ffma_old: /* FMA is slow on gfx6-8, so it shouldn't be used. */ assert(instr->def.bit_size != 32 || ctx->ac.gfx_level >= GFX9); result = emit_fp_intrinsic(&ctx->ac, "llvm.fma", def_type, src[0], src[1], src[2]); break; - case nir_op_ffmaz: + case nir_op_ffmaz_old: assert(ctx->ac.gfx_level >= GFX10_3); src[0] = ac_to_float(&ctx->ac, src[0]); src[1] = ac_to_float(&ctx->ac, src[1]); diff --git a/src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c b/src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c index 8f8b64e1eda..3a377933295 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c +++ b/src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c @@ -64,7 +64,7 @@ pass(nir_builder *b, nir_intrinsic_instr *intrin, void *data) nir_def *mul = nir_bcsel(b, cond, nir_imm_float(b, 0.0625f), nir_imm_float(b, -0.0)); /* adjusted_frag_z = dFdxFine(frag_z) * 0.0625 + frag_z */ - frag_z = nir_ffma(b, nir_ddx_fine(b, frag_z), mul, frag_z); + frag_z = nir_ffma_old(b, nir_ddx_fine(b, frag_z), mul, frag_z); nir_def_rewrite_uses_after(&intrin->def, frag_z); diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 8070e610700..c2ef0471aca 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -1906,7 +1906,7 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr) else return agx_fmul_to(b, dst, s0, s1); - case nir_op_ffma: + case nir_op_ffma_old: if (instr->def.bit_size == 16) return agx_hfma_to(b, dst, s0, s1, s2); else @@ -3559,7 +3559,7 @@ libagx_frcp(nir_builder *b, nir_def *x) * = fma(fma(-x, u, 1), u, u) */ nir_def *one = nir_imm_float(b, 1.0); - nir_def *u_2 = nir_ffma(b, nir_ffma(b, nir_fneg(b, x), u, one), u, u); + nir_def *u_2 = nir_ffma_old(b, nir_ffma_old(b, nir_fneg(b, x), u, one), u, u); /* If the original value was infinite, frcp will generate the correct zero. * However, the Newton-Raphson step would multiply 0 * Inf and get a NaN. So diff --git a/src/asahi/compiler/agx_nir_lower_interpolation.c b/src/asahi/compiler/agx_nir_lower_interpolation.c index b5e9597c4e1..d7ed4fe0845 100644 --- a/src/asahi/compiler/agx_nir_lower_interpolation.c +++ b/src/asahi/compiler/agx_nir_lower_interpolation.c @@ -55,10 +55,10 @@ interpolate_at_offset(nir_builder *b, nir_def *cf, nir_def *offset, nir_def *pos = nir_fadd(b, center, nir_f2f32(b, offset)); /* Interpolate with the given coefficients */ - nir_def *interp = nir_ffma(b, nir_channel(b, pos, 1), nir_channel(b, cf, 1), + nir_def *interp = nir_ffma_old(b, nir_channel(b, pos, 1), nir_channel(b, cf, 1), nir_channel(b, cf, 2)); - interp = nir_ffma(b, nir_channel(b, pos, 0), nir_channel(b, cf, 0), interp); + interp = nir_ffma_old(b, nir_channel(b, pos, 0), nir_channel(b, cf, 0), interp); /* Divide by RHW. This load will be lowered recursively. */ if (perspective) { diff --git a/src/asahi/compiler/agx_nir_opt_preamble.c b/src/asahi/compiler/agx_nir_opt_preamble.c index fe6c07bc6aa..49d30c8b0df 100644 --- a/src/asahi/compiler/agx_nir_opt_preamble.c +++ b/src/asahi/compiler/agx_nir_opt_preamble.c @@ -89,7 +89,7 @@ alu_cost(nir_alu_instr *alu) case nir_op_f2f16_rtne: case nir_op_fadd: case nir_op_fmul: - case nir_op_ffma: + case nir_op_ffma_old: case nir_op_iadd: case nir_op_inot: case nir_op_iand: diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index ef891296bd7..c163724437a 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -2569,7 +2569,7 @@ nir_visitor::visit(ir_expression *ir) case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break; case ir_triop_fma: - result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]); + result = nir_ffma_old(&b, srcs[0], srcs[1], srcs[2]); break; case ir_triop_lrp: result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]); diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 693928fd17f..4c847864e4f 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -1907,7 +1907,7 @@ nir_def_all_uses_ignore_sign_bit(const nir_def *def) nir_alu_instr *alu = nir_instr_as_alu(instr); if (alu->op == nir_op_fabs) { continue; - } else if (alu->op == nir_op_fmul || alu->op == nir_op_ffma) { + } else if (alu->op == nir_op_fmul || alu->op == nir_op_ffma_old) { nir_alu_src *alu_src = list_entry(use, nir_alu_src, src); unsigned src_index = alu_src - alu->src; /* a * a doesn't care about sign of a. */ diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index e81a15b8488..6e824a94b79 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -1367,27 +1367,27 @@ nir_ffma_imm12(nir_builder *build, nir_def *src0, double src1, double src2) build->shader->options->avoid_ternary_with_two_constants) return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2); else - return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), - nir_imm_floatN_t(build, src2, src0->bit_size)); + return nir_ffma_old(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), + nir_imm_floatN_t(build, src2, src0->bit_size)); } static inline nir_def * nir_ffma_imm1(nir_builder *build, nir_def *src0, double src1, nir_def *src2) { - return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2); + return nir_ffma_old(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2); } static inline nir_def * nir_ffma_imm2(nir_builder *build, nir_def *src0, nir_def *src1, double src2) { - return nir_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size)); + return nir_ffma_old(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size)); } static inline nir_def * nir_a_minus_bc(nir_builder *build, nir_def *src0, nir_def *src1, nir_def *src2) { - return nir_ffma(build, nir_fneg(build, src1), src2, src0); + return nir_ffma_old(build, nir_fneg(build, src1), src2, src0); } static inline nir_def * diff --git a/src/compiler/nir/nir_builtin_builder.c b/src/compiler/nir/nir_builtin_builder.c index f64b3b7e10a..0df18ae4c1d 100644 --- a/src/compiler/nir/nir_builtin_builder.c +++ b/src/compiler/nir/nir_builtin_builder.c @@ -41,10 +41,10 @@ nir_cross3(nir_builder *b, nir_def *x, nir_def *y) unsigned yzx[3] = { 1, 2, 0 }; unsigned zxy[3] = { 2, 0, 1 }; - return nir_ffma(b, nir_swizzle(b, x, yzx, 3), - nir_swizzle(b, y, zxy, 3), - nir_fneg(b, nir_fmul(b, nir_swizzle(b, x, zxy, 3), - nir_swizzle(b, y, yzx, 3)))); + return nir_ffma_old(b, nir_swizzle(b, x, yzx, 3), + nir_swizzle(b, y, zxy, 3), + nir_fneg(b, nir_fmul(b, nir_swizzle(b, x, zxy, 3), + nir_swizzle(b, y, yzx, 3)))); } nir_def * @@ -285,7 +285,7 @@ nir_atan(nir_builder *b, nir_def *y_over_x) nir_imm_floatN_t(b, -M_PI_2, bit_size)); /* multiply through by x while fixing up the range reduction */ - nir_def *tmp = nir_ffma(b, nir_fabs(b, u), res, bias); + nir_def *tmp = nir_ffma_old(b, nir_fabs(b, u), res, bias); /* sign fixup */ return nir_copysign(b, tmp, y_over_x); diff --git a/src/compiler/nir/nir_lower_alu_width.c b/src/compiler/nir/nir_lower_alu_width.c index a495098948c..d34098da5a7 100644 --- a/src/compiler/nir/nir_lower_alu_width.c +++ b/src/compiler/nir/nir_lower_alu_width.c @@ -188,7 +188,7 @@ lower_fdot(nir_alu_instr *alu, nir_builder *builder, bool is_bfloat16) unsigned num_components = nir_op_infos[alu->op].input_sizes[0]; - const nir_op fma_op = is_bfloat16 ? nir_op_bffma : nir_op_ffma; + const nir_op fma_op = is_bfloat16 ? nir_op_bffma : nir_op_ffma_old; const nir_op mul_op = is_bfloat16 ? nir_op_bfmul : nir_op_fmul; nir_def *prev = NULL; @@ -328,12 +328,12 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data) } else if (reverse_order) { nir_def *sum = nir_channel(b, src1_vec, 3); for (int i = 2; i >= 0; i--) - sum = nir_ffma(b, nir_channel(b, src0_vec, i), nir_channel(b, src1_vec, i), sum); + sum = nir_ffma_old(b, nir_channel(b, src0_vec, i), nir_channel(b, src1_vec, i), sum); return sum; } else { nir_def *sum = nir_fmul(b, nir_channel(b, src0_vec, 0), nir_channel(b, src1_vec, 0)); - sum = nir_ffma(b, nir_channel(b, src0_vec, 1), nir_channel(b, src1_vec, 1), sum); - sum = nir_ffma(b, nir_channel(b, src0_vec, 2), nir_channel(b, src1_vec, 2), sum); + sum = nir_ffma_old(b, nir_channel(b, src0_vec, 1), nir_channel(b, src1_vec, 1), sum); + sum = nir_ffma_old(b, nir_channel(b, src0_vec, 2), nir_channel(b, src1_vec, 2), sum); return nir_fadd(b, sum, nir_channel(b, src1_vec, 3)); } } diff --git a/src/compiler/nir/nir_lower_clip_halfz.c b/src/compiler/nir/nir_lower_clip_halfz.c index 1cd9d07dba3..6932fb905fd 100644 --- a/src/compiler/nir/nir_lower_clip_halfz.c +++ b/src/compiler/nir/nir_lower_clip_halfz.c @@ -85,7 +85,7 @@ lower_pos_write_dynamic(nir_builder *b, nir_intrinsic_instr *intr, nir_def *c = nir_load_clip_z_coeff(b); /* Lerp. If c = 0, reduces to z. If c = 1/2, reduces to (z + w)/2 */ - nir_def *new_z = nir_ffma(b, nir_fneg(b, z), c, nir_ffma(b, w, c, z)); + nir_def *new_z = nir_ffma_old(b, nir_fneg(b, z), c, nir_ffma_old(b, w, c, z)); nir_src_rewrite(&intr->src[0], nir_vector_insert_imm(b, pos, new_z, 2)); return true; } diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 48952d040c8..3b85ba8f736 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -172,8 +172,8 @@ lower_rcp(nir_builder *b, nir_def *src) * See https://en.wikipedia.org/wiki/Division_algorithm for more details. */ - ra = nir_ffma(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra); - ra = nir_ffma(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra); + ra = nir_ffma_old(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra); + ra = nir_ffma_old(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra); return fix_inv_result(b, ra, src, new_exp); } @@ -299,18 +299,18 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt) nir_def *one_half = nir_imm_double(b, 0.5); nir_def *h_0 = nir_fmul(b, one_half, ra); nir_def *g_0 = nir_fmul(b, src, ra); - nir_def *r_0 = nir_ffma(b, nir_fneg(b, h_0), g_0, one_half); - nir_def *h_1 = nir_ffma(b, h_0, r_0, h_0); + nir_def *r_0 = nir_ffma_old(b, nir_fneg(b, h_0), g_0, one_half); + nir_def *h_1 = nir_ffma_old(b, h_0, r_0, h_0); nir_def *res; if (sqrt) { - nir_def *g_1 = nir_ffma(b, g_0, r_0, g_0); - nir_def *r_1 = nir_ffma(b, nir_fneg(b, g_1), g_1, src); - res = nir_ffma(b, h_1, r_1, g_1); + nir_def *g_1 = nir_ffma_old(b, g_0, r_0, g_0); + nir_def *r_1 = nir_ffma_old(b, nir_fneg(b, g_1), g_1, src); + res = nir_ffma_old(b, h_1, r_1, g_1); } else { nir_def *y_1 = nir_fmul_imm(b, h_1, 2.0); - nir_def *r_1 = nir_ffma(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src), + nir_def *r_1 = nir_ffma_old(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src), one_half); - res = nir_ffma(b, y_1, r_1, y_1); + res = nir_ffma_old(b, y_1, r_1, y_1); } if (sqrt) { @@ -654,7 +654,7 @@ lower_doubles_instr_to_soft(nir_builder *b, nir_alu_instr *instr, name = "__fmul64"; mangled_name = "__fmul64(u641;u641;"; break; - case nir_op_ffma: + case nir_op_ffma_old: name = "__fmad64"; mangled_name = "__fmad64(u641;u641;u641;"; break; diff --git a/src/compiler/nir/nir_lower_floats.c b/src/compiler/nir/nir_lower_floats.c index 2bf36f51c97..82a283a64a0 100644 --- a/src/compiler/nir/nir_lower_floats.c +++ b/src/compiler/nir/nir_lower_floats.c @@ -74,7 +74,7 @@ lower_float_instr_to_soft(nir_builder *b, nir_instr *instr, case nir_op_fmul: mangled_name = "__fmul32(u1;u1;"; break; - case nir_op_ffma: + case nir_op_ffma_old: mangled_name = "__fmad32(u1;u1;u1;"; break; case nir_op_fsat: diff --git a/src/compiler/nir/nir_lower_flrp.c b/src/compiler/nir/nir_lower_flrp.c index 2e6cbeda9a4..0e4f9a6014b 100644 --- a/src/compiler/nir/nir_lower_flrp.c +++ b/src/compiler/nir/nir_lower_flrp.c @@ -52,8 +52,8 @@ replace_with_strict_ffma(struct nir_builder *bld, struct u_vector *dead_flrp, nir_def *const c = nir_ssa_for_alu_src(bld, alu, 2); nir_def *const neg_a = nir_fneg(bld, a); - nir_def *const inner_ffma = nir_ffma(bld, neg_a, c, a); - nir_def *const outer_ffma = nir_ffma(bld, b, c, inner_ffma); + nir_def *const inner_ffma = nir_ffma_old(bld, neg_a, c, a); + nir_def *const outer_ffma = nir_ffma_old(bld, b, c, inner_ffma); nir_def_rewrite_uses(&alu->def, outer_ffma); @@ -79,7 +79,7 @@ replace_with_single_ffma(struct nir_builder *bld, struct u_vector *dead_flrp, nir_def *const one_minus_c = nir_fadd(bld, nir_imm_floatN_t(bld, 1.0f, c->bit_size), neg_c); nir_def *const b_times_c = nir_fmul(bld, b, c); - nir_def *const final_ffma = nir_ffma(bld, a, one_minus_c, b_times_c); + nir_def *const final_ffma = nir_ffma_old(bld, a, one_minus_c, b_times_c); nir_def_rewrite_uses(&alu->def, final_ffma); diff --git a/src/compiler/nir/nir_lower_interpolation.c b/src/compiler/nir/nir_lower_interpolation.c index 53f8987f086..d5d5ff83a0a 100644 --- a/src/compiler/nir/nir_lower_interpolation.c +++ b/src/compiler/nir/nir_lower_interpolation.c @@ -106,11 +106,11 @@ nir_lower_interpolation_instr(nir_builder *b, nir_instr *instr, void *cb_data) nir_def *bary = intr->src[0].ssa; nir_def *val; - val = nir_ffma(b, nir_channel(b, bary, 1), - nir_channel(b, iid, 1), - nir_channel(b, iid, 0)); - val = nir_ffma(b, nir_channel(b, bary, 0), - nir_channel(b, iid, 2), + val = nir_ffma_old(b, nir_channel(b, bary, 1), + nir_channel(b, iid, 1), + nir_channel(b, iid, 0)); + val = nir_ffma_old(b, nir_channel(b, bary, 0), + nir_channel(b, iid, 2), val); comps[i] = val; diff --git a/src/compiler/nir/nir_lower_pntc_ytransform.c b/src/compiler/nir/nir_lower_pntc_ytransform.c index f097a7fc1c6..26648540502 100644 --- a/src/compiler/nir/nir_lower_pntc_ytransform.c +++ b/src/compiler/nir/nir_lower_pntc_ytransform.c @@ -75,7 +75,7 @@ lower_load_pointcoord(lower_pntc_ytransform_state *state, nir_def *pntc = &intr->def; nir_def *transform = get_pntc_transform(state); - nir_def *flipped_y = nir_ffma(b, nir_channel(b, pntc, y_swizzle), + nir_def *flipped_y = nir_ffma_old(b, nir_channel(b, pntc, y_swizzle), /* Flip the sign of y if we're flipping. */ nir_channel(b, transform, 0), /* The offset is 1 if we're flipping, 0 otherwise. */ diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index f180e4acaea..afec28ce010 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -409,7 +409,7 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, } nir_def *result = - nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset))); + nir_ffma_old(b, y, m0, nir_ffma_old(b, u, m1, nir_ffma_old(b, v, m2, offset))); nir_def_rewrite_uses(&tex->def, result); } diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c b/src/compiler/nir/nir_lower_wpos_ytransform.c index ba00245edfc..c8181d87f72 100644 --- a/src/compiler/nir/nir_lower_wpos_ytransform.c +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c @@ -106,7 +106,7 @@ emit_wpos_adjustment(lower_wpos_ytransform_state *state, */ unsigned base = invert ? 0 : 2; /* wpos.y = wpos.y * trans.x/z + trans.y/w */ - wpos[1] = nir_ffma(b, wpos[1], nir_channel(b, wpostrans, base), + wpos[1] = nir_ffma_old(b, wpos[1], nir_channel(b, wpostrans, base), nir_channel(b, wpostrans, base + 1)); } @@ -258,7 +258,7 @@ lower_load_sample_pos(lower_wpos_ytransform_state *state, nir_def *scale = nir_channel(b, wpostrans, 0); nir_def *neg_scale = nir_channel(b, wpostrans, 2); /* Either y or 1-y for scale equal to 1 or -1 respectively. */ - nir_def *flipped_y = nir_ffma(b, nir_channel(b, pos, 1), scale, + nir_def *flipped_y = nir_ffma_old(b, nir_channel(b, pos, 1), scale, nir_fmax(b, neg_scale, nir_imm_float(b, 0.0))); nir_def *flipped_pos = nir_vector_insert_imm(b, pos, flipped_y, 1); diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index e748aeaafa7..b486e205e55 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1132,7 +1132,7 @@ def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr, [src1_size, src2_size, src3_size], [tuint, tuint, tuint], False, "", const_expr, description) -triop("ffma", tfloat, _2src_commutative, """ +triop("ffma_old", tfloat, _2src_commutative, """ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_fma_rtz(src0, src1, src2); @@ -1148,7 +1148,7 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { } """) -triop("ffmaz", tfloat32, _2src_commutative, """ +triop("ffmaz_old", tfloat32, _2src_commutative, """ if (src0 == 0.0 || src1 == 0.0) dst = 0.0 + src2; else if (nir_is_rounding_mode_rtz(execution_mode, 32)) @@ -1158,8 +1158,8 @@ else """, description = """ Floating-point multiply-add with modified zero handling. -Unlike :nir:alu-op:`ffma`, anything (even infinity or NaN) multiplied by +/-0.0 is -+0.0. ``ffmaz(0.0, inf, src2)`` and ``ffmaz(0.0, nan, src2)`` must be +Unlike :nir:alu-op:`ffma_old`, anything (even infinity or NaN) multiplied by +/-0.0 is ++0.0. ``ffmaz_old(0.0, inf, src2)`` and ``ffmaz_old(0.0, nan, src2)`` must be ``+0.0 + src2``. """) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 08ed843049d..6d10d262054 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -101,7 +101,7 @@ denorm_ftz_64 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode def lowered_sincos(c): x = ('fsub', ('fmul', 2.0, ('ffract', ('fadd', ('fmul', 0.5 / pi, a), c))), 1.0) x = ('fmul', ('fsub', x, ('fmul', x, ('fabs', x))), 4.0) - return ('ffma', ('ffma', x, ('fabs', x), ('fneg', x)), 0.225, x) + return ('ffma_old', ('ffma_old', x, ('fabs', x), ('fneg', x)), 0.225, x) def intBitsToFloat(i): return struct.unpack('!f', struct.pack('!I', i))[0] @@ -241,14 +241,14 @@ optimizations += [ (('usadd_4x8_vc4', a, ~0), ~0), (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))), (('~fadd', ('fmulz', a, b), ('fmulz', a, c)), ('fmulz', a, ('fadd', b, c))), - (('~ffma', a, b, ('ffma(is_used_once)', a, c, d)), ('ffma', a, ('fadd', b, c), d)), - (('~ffma', a, b, ('fmul(is_used_once)', a, c)), ('fmul', a, ('fadd', b, c))), - (('~fadd', ('fmul(is_used_once)', a, b), ('ffma(is_used_once)', a, c, d)), ('ffma', a, ('fadd', b, c), d)), - (('~ffma', a, ('fmul(is_used_once)', b, c), ('fmul(is_used_once)', b, d)), ('fmul', b, ('ffma', a, c, d))), - (('~ffmaz', a, b, ('ffmaz(is_used_once)', a, c, d)), ('ffmaz', a, ('fadd', b, c), d)), - (('~ffmaz', a, b, ('fmulz(is_used_once)', a, c)), ('fmulz', a, ('fadd', b, c))), - (('~fadd', ('fmulz(is_used_once)', a, b), ('ffmaz(is_used_once)', a, c, d)), ('ffmaz', a, ('fadd', b, c), d)), - (('~ffmaz', a, ('fmulz(is_used_once)', b, c), ('fmulz(is_used_once)', b, d)), ('fmulz', b, ('ffmaz', a, c, d))), + (('~ffma_old', a, b, ('ffma_old(is_used_once)', a, c, d)), ('ffma_old', a, ('fadd', b, c), d)), + (('~ffma_old', a, b, ('fmul(is_used_once)', a, c)), ('fmul', a, ('fadd', b, c))), + (('~fadd', ('fmul(is_used_once)', a, b), ('ffma_old(is_used_once)', a, c, d)), ('ffma_old', a, ('fadd', b, c), d)), + (('~ffma_old', a, ('fmul(is_used_once)', b, c), ('fmul(is_used_once)', b, d)), ('fmul', b, ('ffma_old', a, c, d))), + (('~ffmaz_old', a, b, ('ffmaz_old(is_used_once)', a, c, d)), ('ffmaz_old', a, ('fadd', b, c), d)), + (('~ffmaz_old', a, b, ('fmulz(is_used_once)', a, c)), ('fmulz', a, ('fadd', b, c))), + (('~fadd', ('fmulz(is_used_once)', a, b), ('ffmaz_old(is_used_once)', a, c, d)), ('ffmaz_old', a, ('fadd', b, c), d)), + (('~ffmaz_old', a, ('fmulz(is_used_once)', b, c), ('fmulz(is_used_once)', b, d)), ('fmulz', b, ('ffmaz_old', a, c, d))), (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))), (('iadd', ('ishl', b, a), ('ishl', c, a)), ('ishl', ('iadd', b, c), a)), (('iand', ('iand', a, b), ('iand(is_used_once)', a, c)), ('iand', ('iand', a, b), c)), @@ -285,9 +285,9 @@ optimizations += [ (('fmulz(nsz)', a, 'b(is_finite_not_zero)'), ('fmul', a, b)), (('fmulz(nsz)', 'a(is_finite)', 'b(is_finite)'), ('fmul', a, b)), (('fmulz', a, a), ('fmul', a, a)), - (('ffmaz(nsz)', a, 'b(is_finite_not_zero)', c), ('ffma', a, b, c)), - (('ffmaz', 'a(is_finite)', 'b(is_finite)', c), ('ffma', a, b, c)), - (('ffmaz', a, a, b), ('ffma', a, a, b)), + (('ffmaz_old(nsz)', a, 'b(is_finite_not_zero)', c), ('ffma_old', a, b, c)), + (('ffmaz_old', 'a(is_finite)', 'b(is_finite)', c), ('ffma_old', a, b, c)), + (('ffmaz_old', a, a, b), ('ffma_old', a, a, b)), (('imul', a, 0), 0), (('imul24_relaxed', a, 0), 0), (('umul24_relaxed', a, 0), 0), @@ -306,20 +306,20 @@ optimizations += [ # If a != a: fsign(a)*a*a => 0*NaN*NaN => abs(NaN)*NaN (('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)), (('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)), - (('ffma(nsz,nnan)', 0.0, a, b), ('fcanonicalize', b)), - (('ffma(nsz,nnan)', -0.0, a, b), ('fcanonicalize', b)), - (('ffmaz', 0.0, a, b), ('fadd', 0.0, b)), - (('ffmaz', -0.0, a, b), ('fadd', 0.0, b)), - (('ffma(nsz)', a, b, 0.0), ('fmul', a, b)), - (('ffmaz(nsz)', a, b, 0.0), ('fmulz', a, b)), - (('ffma', a, b, -0.0), ('fmul', a, b)), - (('ffmaz', a, b, -0.0), ('fmulz', a, b)), - (('ffma', 1.0, a, b), ('fadd', a, b)), - (('ffmaz(nsz)', 1.0, a, b), ('fadd', a, b)), - (('ffma', -1.0, a, b), ('fadd', ('fneg', a), b)), - (('ffmaz(nsz)', -1.0, a, b), ('fadd', ('fneg', a), b)), - (('~ffma', '#a', '#b', c), ('fadd', ('fmul', a, b), c)), - (('~ffmaz', '#a', '#b', c), ('fadd', ('fmulz', a, b), c)), + (('ffma_old(nsz,nnan)', 0.0, a, b), ('fcanonicalize', b)), + (('ffma_old(nsz,nnan)', -0.0, a, b), ('fcanonicalize', b)), + (('ffmaz_old', 0.0, a, b), ('fadd', 0.0, b)), + (('ffmaz_old', -0.0, a, b), ('fadd', 0.0, b)), + (('ffma_old(nsz)', a, b, 0.0), ('fmul', a, b)), + (('ffmaz_old(nsz)', a, b, 0.0), ('fmulz', a, b)), + (('ffma_old', a, b, -0.0), ('fmul', a, b)), + (('ffmaz_old', a, b, -0.0), ('fmulz', a, b)), + (('ffma_old', 1.0, a, b), ('fadd', a, b)), + (('ffmaz_old(nsz)', 1.0, a, b), ('fadd', a, b)), + (('ffma_old', -1.0, a, b), ('fadd', ('fneg', a), b)), + (('ffmaz_old(nsz)', -1.0, a, b), ('fadd', ('fneg', a), b)), + (('~ffma_old', '#a', '#b', c), ('fadd', ('fmul', a, b), c)), + (('~ffmaz_old', '#a', '#b', c), ('fadd', ('fmulz', a, b), c)), (('flrp(nnan,nsz)', a, b, 0.0), ('fcanonicalize', a)), (('flrp(nnan,nsz)', a, b, -0.0), ('fcanonicalize', a)), (('flrp(nnan,nsz)', a, b, 1.0), ('fcanonicalize', b)), @@ -397,14 +397,14 @@ optimizations += [ ('fmulz', 'ma', b), has_fmulz), {'ma' : a}), # ffma(b==0.0 ? 0.0 : a, a==0.0 ? 0.0 : b, c) -> ffmaz(a, b, c) - *add_fabs_fneg((('ffma@32(nsz)', ('bcsel', ('feq', b, 0.0), 0.0, 'ma'), ('bcsel', ('feq', a, 0.0), 0.0, 'mb'), c), - ('ffmaz', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}), - *add_fabs_fneg((('ffma@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c), - ('ffmaz', 'ma', b, c), has_fmulz), {'ma' : a}), - *add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c), - ('ffmaz', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}), - *add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c), - ('ffmaz', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}), + *add_fabs_fneg((('ffma_old@32(nsz)', ('bcsel', ('feq', b, 0.0), 0.0, 'ma'), ('bcsel', ('feq', a, 0.0), 0.0, 'mb'), c), + ('ffmaz_old', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}), + *add_fabs_fneg((('ffma_old@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c), + ('ffmaz_old', 'ma', b, c), has_fmulz), {'ma' : a}), + *add_fabs_fneg((('ffma_old@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c), + ('ffmaz_old', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}), + *add_fabs_fneg((('ffma_old@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c), + ('ffmaz_old', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}), # b == 0.0 ? 1.0 : fexp2(fmul(a, b)) -> fexp2(fmulz(a, b)) *add_fabs_fneg((('bcsel(nsz,nnan,ninf)', ('feq', b, 0.0), 1.0, ('fexp2', ('fmul@32', a, 'mb'))), @@ -500,11 +500,11 @@ optimizations.extend([ (('~fadd', ('fmul', a, ('b2f', ('inot', 'c@1'))), ('fmul', b, ('b2f', c))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), (('~fadd', a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), - (('~ffma', a, ('b2f', ('inot', 'c@1')), ('fmul', b, ('b2f', 'c@1'))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), - (('~ffma', b, ('b2f', 'c@1'), ('ffma', ('fneg', a), ('b2f', 'c@1'), a)), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), + (('~ffma_old', a, ('b2f', ('inot', 'c@1')), ('fmul', b, ('b2f', 'c@1'))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), + (('~ffma_old', b, ('b2f', 'c@1'), ('ffma_old', ('fneg', a), ('b2f', 'c@1'), a)), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), - (('~ffma', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)), a), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), - (('~ffma', ('b2f', 'c@1'), ('ffma', ('fneg', a), b, d), ('fmul', a, b)), ('bcsel', c, ('fcanonicalize', d), ('fmul', a, b))), + (('~ffma_old', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)), a), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))), + (('~ffma_old', ('b2f', 'c@1'), ('ffma_old', ('fneg', a), b, d), ('fmul', a, b)), ('bcsel', c, ('fcanonicalize', d), ('fmul', a, b))), (('~flrp', ('fmul(is_used_once)', a, b), ('fmul(is_used_once)', a, c), d), ('fmul', ('flrp', b, c, d), a)), @@ -540,15 +540,15 @@ optimizations.extend([ (('fadd@32', a, ('fneg(is_used_once)', ('ffloor(is_used_once)', a))), ('ffract', a), '!options->lower_ffract'), (('fadd@64', a, ('fneg(is_used_once)', ('ffloor(is_used_once)', a))), ('ffract', a), '!options->lower_ffract && !(options->lower_doubles_options & nir_lower_dfract)'), (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'), - (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'), - (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'), - (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'), - (('ffmaz', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->lower_ffma32'), - # Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late). - (('ffma@16(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'), - (('ffma@32(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'), - (('ffma@64(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'), - (('ffmaz(contract)', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->fuse_ffma32'), + (('ffma_old@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'), + (('ffma_old@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'), + (('ffma_old@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'), + (('ffmaz_old', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->lower_ffma32'), + # Always lower inexact ffma_old, because it will be fused back by late optimizations (nir_opt_algebraic_late). + (('ffma_old@16(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'), + (('ffma_old@32(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'), + (('ffma_old@64(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'), + (('ffmaz_old(contract)', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->fuse_ffma32'), (('fmul', ('fadd', ('bcsel', a, ('fmul', b, c), 0), '#d'), '#e'), ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', ('fadd', d, 0.0), e))), @@ -1613,7 +1613,7 @@ for compare in [('fneu', a, 0.0), ('inot', ('feq', a, 0.0))]: optimizations.extend([ (('fmul', search_b2f, search_mod), replace_mod_mul), - (('ffma', search_b2f, search_mod, b), ('fadd', replace_mod, b)), + (('ffma_old', search_b2f, search_mod, b), ('fadd', replace_mod, b)), ]) optimizations.extend([ @@ -1641,7 +1641,7 @@ optimizations.extend([ (('iand', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('iand', a, b))), (('ior', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('ior', a, b))), (('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), ('b2f', ('iand', a, b))), - (('ffma', ('b2f', 'a@1'), ('b2f', 'b@1'), c), ('fadd', ('b2f', ('iand', a, b)), c)), + (('ffma_old', ('b2f', 'a@1'), ('b2f', 'b@1'), c), ('fadd', ('b2f', ('iand', a, b)), c)), (('fadd', 1.0, ('fneg', ('b2f', a))), ('b2f', ('inot', a))), (('fadd(nsz)', -1.0, ('b2f', a)), ('fneg', ('b2f', ('inot', a)))), (('fsat', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('b2f', ('ior', a, b))), @@ -2338,8 +2338,8 @@ optimizations.extend([ # Propagate negation up multiplication chains (('fmul(is_used_by_non_fsat)', ('fneg', a), b), ('fneg', ('fmul', a, b))), (('fmulz(is_used_by_non_fsat,nsz)', ('fneg', a), b), ('fneg', ('fmulz', a, b))), - (('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)), - (('ffmaz', ('fneg', a), ('fneg', b), c), ('ffmaz', a, b, c)), + (('ffma_old', ('fneg', a), ('fneg', b), c), ('ffma_old', a, b, c)), + (('ffmaz_old', ('fneg', a), ('fneg', b), c), ('ffmaz_old', a, b, c)), (('imul', ('ineg', a), b), ('ineg', ('imul', a, b))), # Propagate constants up multiplication chains @@ -2347,14 +2347,14 @@ optimizations.extend([ (('~fmulz', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmulz', ('fmulz', a, c), b)), (('~fmul', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)'), ('fmulz', ('fmul', a, c), b)), (('imul', ('imul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('imul', ('imul', a, c), b)), - (('~ffma', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffma', ('fmul', a, c), b, d)), - (('~ffmaz', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffmaz', ('fmulz', a, c), b, d)), - (('~ffma', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)', d), ('ffmaz', ('fmul', a, c), b, d)), + (('~ffma_old', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffma_old', ('fmul', a, c), b, d)), + (('~ffmaz_old', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffmaz_old', ('fmulz', a, c), b, d)), + (('~ffma_old', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)', d), ('ffmaz_old', ('fmul', a, c), b, d)), # Prefer moving out a multiplication for more MAD/FMA-friendly code (('~fadd', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_fmul)'), '#c'), ('fadd', ('fadd', a, c), b)), (('~fadd', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd', ('fadd', a, c), b)), - (('~fadd', ('ffma(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffma', a, b, d), c)), - (('~fadd', ('ffmaz(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffmaz', a, b, d), c)), + (('~fadd', ('ffma_old(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffma_old', a, b, d), c)), + (('~fadd', ('ffmaz_old(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffmaz_old', a, b, d), c)), (('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('iadd', ('iadd', a, c), b)), # Reassociate constants in add/mul chains so they can be folded together. @@ -2363,16 +2363,16 @@ optimizations.extend([ (('~fmul', '#a', ('fmul', b, '#c')), ('fmul', ('fmul', a, c), b)), (('~fmulz', '#a', ('fmulz', b, '#c')), ('fmulz', ('fmulz', a, c), b)), (('~fmul', '#a(is_finite_not_zero)', ('fmulz', b, '#c')), ('fmulz', ('fmul', a, c), b)), - (('~ffma', '#a', ('fmul', b, '#c'), d), ('ffma', ('fmul', a, c), b, d)), - (('~ffmaz', '#a', ('fmulz', b, '#c'), d), ('ffmaz', ('fmulz', a, c), b, d)), - (('~ffmaz', '#a(is_finite_not_zero)', ('fmulz', b, '#c'), d), ('ffmaz', ('fmul', a, c), b, d)), + (('~ffma_old', '#a', ('fmul', b, '#c'), d), ('ffma_old', ('fmul', a, c), b, d)), + (('~ffmaz_old', '#a', ('fmulz', b, '#c'), d), ('ffmaz_old', ('fmulz', a, c), b, d)), + (('~ffmaz_old', '#a(is_finite_not_zero)', ('fmulz', b, '#c'), d), ('ffmaz_old', ('fmul', a, c), b, d)), (('imul', '#a', ('imul', b, '#c')), ('imul', ('imul', a, c), b)), (('~fadd', '#a', ('fadd', b, '#c')), ('fadd', ('fadd', a, c), b)), (('~fadd', '#a', ('fneg', ('fadd', b, '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))), - (('~fadd', '#a', ('ffma', b, c, '#d')), ('ffma', b, c, ('fadd', a, d))), - (('~fadd', '#a', ('fneg', ('ffma', b, c, '#d'))), ('ffma', ('fneg', b), c, ('fadd', a, ('fneg', d)))), - (('~fadd', '#a', ('ffmaz', b, c, '#d')), ('ffmaz', b, c, ('fadd', a, d))), - (('~fadd', '#a', ('fneg', ('ffmaz', b, c, '#d'))), ('ffmaz', ('fneg', b), c, ('fadd', a, ('fneg', d)))), + (('~fadd', '#a', ('ffma_old', b, c, '#d')), ('ffma_old', b, c, ('fadd', a, d))), + (('~fadd', '#a', ('fneg', ('ffma_old', b, c, '#d'))), ('ffma_old', ('fneg', b), c, ('fadd', a, ('fneg', d)))), + (('~fadd', '#a', ('ffmaz_old', b, c, '#d')), ('ffmaz_old', b, c, ('fadd', a, d))), + (('~fadd', '#a', ('fneg', ('ffmaz_old', b, c, '#d'))), ('ffmaz_old', ('fneg', b), c, ('fadd', a, ('fneg', d)))), (('iadd', '#a', ('iadd', b, '#c')), ('iadd', ('iadd', a, c), b)), (('iand', '#a', ('iand', b, '#c')), ('iand', ('iand', a, c), b)), (('ior', '#a', ('ior', b, '#c')), ('ior', ('ior', a, c), b)), @@ -3485,7 +3485,7 @@ for op in ['fadd', 'fdiv', 'fmod', 'fmul', 'fpow', 'frem', 'fsub']: optimizations += [((op, a, '#b(is_nan)'), NAN, 'true', TestStatus.XFAIL if op == 'fpow' else TestStatus.PASS)] # some opcodes are not commutative. XFAIL is fpow(1.0, NaN) producing NaN instead of 1.0. # NaN propagation: Trinary opcodes. If any operand is NaN, replace it with NaN. -for op in ['ffma', 'flrp']: +for op in ['ffma_old', 'flrp']: optimizations += [((op, '#a(is_nan)', b, c), NAN)] optimizations += [((op, a, '#b(is_nan)', c), NAN)] # some opcodes are not commutative optimizations += [((op, a, b, '#c(is_nan)'), NAN)] @@ -3562,7 +3562,7 @@ for i in range(2, 4 + 1): ] # This section contains "late" optimizations that should be run before -# creating ffmas and calling regular optimizations for the final time. +# creating ffma and calling regular optimizations for the final time. # Optimizations should go here if they help code generation and conflict # with the regular optimizations. before_ffma_optimizations = [ @@ -3729,23 +3729,23 @@ for sz, mulz in itertools.product([16, 32, 64], [False, True]): # (or fneg/fabs which are assumed to be propagated away), as a heuristic to # avoid fusing in cases where it's harmful. fmul = ('fmulz' if mulz else 'fmul') + '(is_only_used_by_fadd)' - ffma = 'ffmaz' if mulz else 'ffma' + ffma_old = 'ffmaz_old' if mulz else 'ffma_old' fadd = 'fadd@{}(contract)'.format(sz) - option = 'options->fuse_ffma{}'.format(sz) - option_with_abs = 'options->fuse_ffma{} && !options->avoid_ternary_with_fabs'.format(sz) + option_old = 'options->fuse_ffma{}'.format(sz) + option_old_with_abs = 'options->fuse_ffma{} && !options->avoid_ternary_with_fabs'.format(sz) late_optimizations.extend([ - ((fadd, (fmul, a, b), c), (ffma, a, b, c), option), + ((fadd, (fmul, a, b), c), (ffma_old, a, b, c), option_old), ((fadd, ('fneg(is_only_used_by_fadd)', (fmul, a, b)), c), - (ffma, ('fneg', a), b, c), option), + (ffma_old, ('fneg', a), b, c), option_old), ((fadd, ('fabs(is_only_used_by_fadd)', (fmul, a, b)), c), - (ffma, ('fabs', a), ('fabs', b), c), option_with_abs), + (ffma_old, ('fabs', a), ('fabs', b), c), option_old_with_abs), ((fadd, ('fneg(is_only_used_by_fadd)', ('fabs', (fmul, a, b))), c), - (ffma, ('fneg', ('fabs', a)), ('fabs', b), c), option_with_abs), + (ffma_old, ('fneg', ('fabs', a)), ('fabs', b), c), option_old_with_abs), ]) late_optimizations.extend([ @@ -3843,10 +3843,10 @@ late_optimizations.extend([ # A similar operation could apply to any ffma(#a, b, #(-a/2)), but this # particular operation is common for expanding values stored in a texture # from [0,1] to [-1,1]. - (('~ffma@32', a, 2.0, -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'), - (('~ffma@32', a, -2.0, -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'), - (('~ffma@32', a, -2.0, 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'), - (('~ffma@32', a, 2.0, 1.0), ('flrp', 1.0, -1.0, ('fneg', a)), '!options->lower_flrp32'), + (('~ffma_old@32', a, 2.0, -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'), + (('~ffma_old@32', a, -2.0, -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'), + (('~ffma_old@32', a, -2.0, 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'), + (('~ffma_old@32', a, 2.0, 1.0), ('flrp', 1.0, -1.0, ('fneg', a)), '!options->lower_flrp32'), (('~fadd@32', ('fmul(is_used_once)', 2.0, a), -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'), (('~fadd@32', ('fmul(is_used_once)', -2.0, a), -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'), (('~fadd@32', ('fmul(is_used_once)', -2.0, a), 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'), @@ -3870,10 +3870,10 @@ late_optimizations.extend([ # Option 5: a * (2 - a) # # There are a lot of other possible combinations. - (('~ffma@32', ('fadd', b, ('fneg', a)), a, a), ('flrp', a, b, a), '!options->lower_flrp32'), - (('~ffma@32', a, 2.0, ('fneg', ('fmul', a, a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'), - (('~ffma@32', a, 2.0, ('fmul', ('fneg', a), a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'), - (('~ffma@32', a, ('fneg', a), ('fmul', 2.0, a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'), + (('~ffma_old@32', ('fadd', b, ('fneg', a)), a, a), ('flrp', a, b, a), '!options->lower_flrp32'), + (('~ffma_old@32', a, 2.0, ('fneg', ('fmul', a, a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'), + (('~ffma_old@32', a, 2.0, ('fmul', ('fneg', a), a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'), + (('~ffma_old@32', a, ('fneg', a), ('fmul', 2.0, a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'), (('~fmul@32', a, ('fadd', 2.0, ('fneg', a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'), # we do these late so that we don't get in the way of creating ffmas @@ -3901,21 +3901,21 @@ late_optimizations.extend([ # optimization in these stages. See bugzilla #111490. In tessellation # stages applications seem to use 'precise' when necessary, so allow the # optimization in those stages. - (('~fadd', ('ffma(is_used_once)', a, b, ('ffma(is_used_once)', c, d, ('ffma', e, 'f', ('fmul(is_used_once)', 'g(is_not_const_and_not_fsign)', 'h(is_not_const_and_not_fsign)')))), 'i(is_not_const)'), - ('ffma', a, b, ('ffma', c, d, ('ffma', e, 'f', ('ffma', 'g', 'h', 'i')))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), - (('~fadd', ('ffma(is_used_once)', a, b, ('ffma', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'), - ('ffma', a, b, ('ffma', c, d, ('ffma', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), - (('~fadd', ('ffma(is_used_once)', a, b, ('fmul(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'), - ('ffma', a, b, ('ffma', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), - (('~fadd', ('fneg', ('ffma(is_used_once)', a, b, ('ffma', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'), - ('ffma', ('fneg', a), b, ('ffma', ('fneg', c), d, ('ffma', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), + (('~fadd', ('ffma_old(is_used_once)', a, b, ('ffma_old(is_used_once)', c, d, ('ffma_old', e, 'f', ('fmul(is_used_once)', 'g(is_not_const_and_not_fsign)', 'h(is_not_const_and_not_fsign)')))), 'i(is_not_const)'), + ('ffma_old', a, b, ('ffma_old', c, d, ('ffma_old', e, 'f', ('ffma_old', 'g', 'h', 'i')))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), + (('~fadd', ('ffma_old(is_used_once)', a, b, ('ffma_old', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'), + ('ffma_old', a, b, ('ffma_old', c, d, ('ffma_old', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), + (('~fadd', ('ffma_old(is_used_once)', a, b, ('fmul(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'), + ('ffma_old', a, b, ('ffma_old', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), + (('~fadd', ('fneg', ('ffma_old(is_used_once)', a, b, ('ffma_old', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'), + ('ffma_old', ('fneg', a), b, ('ffma_old', ('fneg', c), d, ('ffma_old', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), - (('~fadd', ('ffmaz(is_used_once)', a, b, ('ffmaz', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'), - ('ffmaz', a, b, ('ffmaz', c, d, ('ffmaz', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), - (('~fadd', ('ffmaz(is_used_once)', a, b, ('fmulz(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'), - ('ffmaz', a, b, ('ffmaz', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), - (('~fadd', ('fneg', ('ffmaz(is_used_once)', a, b, ('ffmaz', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'), - ('ffmaz', ('fneg', a), b, ('ffmaz', ('fneg', c), d, ('ffmaz', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), + (('~fadd', ('ffmaz_old(is_used_once)', a, b, ('ffmaz_old', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'), + ('ffmaz_old', a, b, ('ffmaz_old', c, d, ('ffmaz_old', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), + (('~fadd', ('ffmaz_old(is_used_once)', a, b, ('fmulz(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'), + ('ffmaz_old', a, b, ('ffmaz_old', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), + (('~fadd', ('fneg', ('ffmaz_old(is_used_once)', a, b, ('ffmaz_old', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'), + ('ffmaz_old', ('fneg', a), b, ('ffmaz_old', ('fneg', c), d, ('ffmaz_old', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), (('fmul(contract)', a, ('ldexp(is_used_once)', 1.0, b)), ('ldexp', a, b), 'options->has_ldexp'), (('frcp(contract,ninf)', ('ldexp', 1.0, b)), ('ldexp', 1.0, ('ineg', b)), 'options->has_ldexp'), @@ -4042,7 +4042,7 @@ for op in ['fadd']: (('bcsel', a, (op, b, c), (op + '(is_used_once)', b, d)), (op, b, ('bcsel', a, c, d))), ] -for op in ['ffma', 'ffmaz']: +for op in ['ffma_old', 'ffmaz_old']: late_optimizations += [ (('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, c, e)), (op, b, c, ('bcsel', a, d, e))), (('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, c, e)), (op, b, c, ('bcsel', a, d, e))), @@ -4055,8 +4055,8 @@ for op in ['ffma', 'ffmaz']: late_optimizations += [ (('fmulz@32', a, b), ('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), 0.0, ('fmul', a, b)), 'options->lower_fmulz_with_abs_min'), - (('ffmaz@32', a, b, c), - ('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), c, ('ffma@32', a, b, c)), 'options->lower_fmulz_with_abs_min') + (('ffmaz_old@32', a, b, c), + ('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), c, ('ffma_old@32', a, b, c)), 'options->lower_fmulz_with_abs_min') ] # mediump: If an opcode is surrounded by conversions, remove the conversions. @@ -4076,7 +4076,7 @@ for op in ['fadd', 'fdiv', 'fmax', 'fmin', 'fmod', 'fmul', 'fpow', 'frem']: late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b))), (op, a, b), 'true', TestStatus.UNSUPPORTED)] # Ternary opcodes -for op in ['ffma', 'flrp']: +for op in ['ffma_old', 'flrp']: late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b), ('f2fmp', c))), (op, a, b, c), 'true', TestStatus.UNSUPPORTED)] # Comparison opcodes @@ -4131,7 +4131,7 @@ late_optimizations += [ distribute_src_mods = [ # Try to remove some spurious negations rather than pushing them down. (('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)), - (('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)), + (('ffma_old', ('fneg', a), ('fneg', b), c), ('ffma_old', a, b, c)), (('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b)), (('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b)), (('fdot4_replicated', ('fneg', a), ('fneg', b)), ('fdot4_replicated', a, b)), @@ -4142,7 +4142,7 @@ distribute_src_mods = [ (('fabs', ('fmul(is_used_once)', a, b)), ('fmul', ('fabs', a), ('fabs', b))), (('fabs', ('fmul_rtz(is_used_once)', a, b)), ('fmul_rtz', ('fabs', a), ('fabs', b))), - (('fneg', ('ffma(is_used_once,nsz)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))), + (('fneg', ('ffma_old(is_used_once,nsz)', a, b, c)), ('ffma_old', ('fneg', a), b, ('fneg', c))), (('fneg', ('flrp(is_used_once)', a, b, c)), ('flrp', ('fneg', a), ('fneg', b), c), 'true', TestStatus.XFAIL), # XFAIL is -flrp(0, -1, 0) is 0.0 instead of -0.0 (('fneg', ('fadd(is_used_once,nsz)', a, b)), ('fadd', ('fneg', a), ('fneg', b))), diff --git a/src/compiler/nir/nir_opt_fp_math_ctrl.c b/src/compiler/nir/nir_opt_fp_math_ctrl.c index e467743413e..96533f5f3e6 100644 --- a/src/compiler/nir/nir_opt_fp_math_ctrl.c +++ b/src/compiler/nir/nir_opt_fp_math_ctrl.c @@ -175,10 +175,10 @@ opt_alu_fp_math_ctrl(nir_alu_instr *alu, struct opt_fp_ctrl_state *state) break; } - case nir_op_ffmaz: + case nir_op_ffmaz_old: src_mark_preserve_sz(&alu->src[2].src, NULL); break; - case nir_op_ffma: + case nir_op_ffma_old: if ((nir_analyze_fp_class(&state->fp_class_state, alu->src[2].src.ssa) & FP_CLASS_NEG_ZERO) && !nir_alu_srcs_equal(alu, alu, 0, 1)) { src_mark_preserve_sz(&alu->src[0].src, NULL); diff --git a/src/compiler/nir/nir_opt_undef.c b/src/compiler/nir/nir_opt_undef.c index f578bad47b5..33eee0f77cd 100644 --- a/src/compiler/nir/nir_opt_undef.c +++ b/src/compiler/nir/nir_opt_undef.c @@ -222,7 +222,7 @@ visit_undef_use(nir_src *src, struct visit_info *info) info->replace_undef_with_constant = true; if (nir_op_infos[alu->op].input_types[i] & nir_type_float && alu->op != nir_op_fmulz && - (alu->op != nir_op_ffmaz || i == 2) && + (alu->op != nir_op_ffmaz_old || i == 2) && alu->op != nir_op_pack_half_2x16_rtz_split) info->prefer_nan = true; } diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c index d2c1e57f739..9d6f0a679fd 100644 --- a/src/compiler/nir/nir_opt_varyings.c +++ b/src/compiler/nir/nir_opt_varyings.c @@ -3253,7 +3253,7 @@ find_tes_triangle_interp_1fmul_2ffma(struct linkage_info *linkage, unsigned i) /* Reject exact ops because we are going to do an inexact transformation * with it. */ - if (!alu || (alu->op != nir_op_fmul && alu->op != nir_op_ffma) || + if (!alu || (alu->op != nir_op_fmul && alu->op != nir_op_ffma_old) || nir_alu_instr_is_exact(alu) || !gather_fmul_tess_coord(iter->instr, alu, vertex_index, &tess_coord_swizzle, &tess_coord_used, @@ -3263,7 +3263,7 @@ find_tes_triangle_interp_1fmul_2ffma(struct linkage_info *linkage, unsigned i) /* The multiplication must only be used by ffma. */ if (alu->op == nir_op_fmul) { nir_alu_instr *ffma = get_single_use_as_alu(&alu->def); - if (!ffma || ffma->op != nir_op_ffma) + if (!ffma || ffma->op != nir_op_ffma_old) return false; if (num_fmuls == 1) @@ -3388,8 +3388,8 @@ can_move_alu_across_interp(struct linkage_info *linkage, nir_alu_instr *alu) */ case nir_op_fmul: case nir_op_fmulz: - case nir_op_ffma: - case nir_op_ffmaz: + case nir_op_ffma_old: + case nir_op_ffmaz_old: return GET_SRC_INTERP(alu, 0) == FLAG_INTERP_CONVERGENT || GET_SRC_INTERP(alu, 1) == FLAG_INTERP_CONVERGENT; @@ -3915,7 +3915,7 @@ try_move_postdominator(struct linkage_info *linkage, defs[i] = nir_fmul(b, new_tes_loads[i], nir_channel(b, tesscoord, remap[i])); } else { - defs[i] = nir_ffma(b, new_tes_loads[i], + defs[i] = nir_ffma_old(b, new_tes_loads[i], nir_channel(b, tesscoord, remap[i]), defs[i - 1]); } diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index afecd578f51..1ac17c81698 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -836,8 +836,8 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32 push_fp_query(state, alu->src[0].src.ssa); push_fp_query(state, alu->src[1].src.ssa); return; - case nir_op_ffma: - case nir_op_ffmaz: + case nir_op_ffma_old: + case nir_op_ffmaz_old: case nir_op_flrp: push_fp_query(state, alu->src[0].src.ssa); push_fp_query(state, alu->src[1].src.ssa); @@ -1320,9 +1320,9 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32 break; } - case nir_op_ffma: - case nir_op_ffmaz: { - bool mulz = alu->op == nir_op_ffmaz; + case nir_op_ffma_old: + case nir_op_ffmaz_old: { + bool mulz = alu->op == nir_op_ffmaz_old; bool src_eq = nir_alu_srcs_equal(alu, alu, 0, 1); bool src_neg_eq = !nir_src_is_const(alu->src[0].src) && nir_alu_srcs_negative_equal(alu, alu, 0, 1); fp_class_mask r_mul = fmul_fp_class(src_res[0], src_res[1], mulz, src_eq, src_neg_eq); diff --git a/src/compiler/nir/tests/fp_class_tests.cpp b/src/compiler/nir/tests/fp_class_tests.cpp index d0b891bfd53..ac50fa75496 100644 --- a/src/compiler/nir/tests/fp_class_tests.cpp +++ b/src/compiler/nir/tests/fp_class_tests.cpp @@ -186,8 +186,8 @@ DEFINE_TEST(fmul, 2) DEFINE_TEST(fmulz, 2) DEFINE_TEST(fpow, 2) DEFINE_TEST(fdot2, 2) -DEFINE_TEST(ffma, 3) -DEFINE_TEST(ffmaz, 3) +DEFINE_TEST(ffma_old, 3) +DEFINE_TEST(ffmaz_old, 3) DEFINE_TEST(fabs, 1) DEFINE_TEST(fneg, 1) DEFINE_TEST(fexp2, 1) diff --git a/src/compiler/nir/tests/nir_opt_varyings_test.h b/src/compiler/nir/tests/nir_opt_varyings_test.h index 32f4c0aa090..32978c1b69c 100644 --- a/src/compiler/nir/tests/nir_opt_varyings_test.h +++ b/src/compiler/nir/tests/nir_opt_varyings_test.h @@ -158,7 +158,7 @@ protected: nir_def *build_uniform_expr(nir_builder *b, unsigned bit_size, unsigned index) { - return nir_fsqrt(b, nir_ffma(b, load_uniform(b, bit_size, index), + return nir_fsqrt(b, nir_ffma_old(b, load_uniform(b, bit_size, index), nir_imm_floatN_t(b, 3.14, bit_size), load_ubo(b, bit_size, index))); } @@ -254,13 +254,13 @@ protected: if (contains) { return shader_contains_uniform(b, bit_size, index) && shader_contains_ubo(b, bit_size, index) && - shader_contains_alu_op(b, nir_op_ffma, bit_size) && + shader_contains_alu_op(b, nir_op_ffma_old, bit_size) && shader_contains_alu_op(b, nir_op_fsqrt, bit_size) && shader_contains_const_float(b, 3.14, bit_size); } else { return !shader_contains_uniform(b, bit_size, index) && !shader_contains_ubo(b, bit_size, index) && - !shader_contains_alu_op(b, nir_op_ffma, bit_size) && + !shader_contains_alu_op(b, nir_op_ffma_old, bit_size) && !shader_contains_alu_op(b, nir_op_fsqrt, bit_size) && !shader_contains_const_float(b, 3.14, bit_size); } @@ -553,7 +553,7 @@ load_interpolated_input_tes(nir_builder *b, gl_varying_slot slot, if (i == 0) def[i] = nir_fmul(b, def[i], nir_channel(b, tesscoord, remap[i])); else - def[i] = nir_ffma(b, def[i], nir_channel(b, tesscoord, remap[i]), + def[i] = nir_ffma_old(b, def[i], nir_channel(b, tesscoord, remap[i]), def[i - 1]); } else { def[i] = nir_fmul(b, def[i], nir_channel(b, tesscoord, remap[i])); @@ -650,8 +650,8 @@ movable_across_interp(nir_builder *b, nir_op op, unsigned interp[3], case nir_op_fmul: case nir_op_fmulz: - case nir_op_ffma: - case nir_op_ffmaz: + case nir_op_ffma_old: + case nir_op_ffmaz_old: return !divergent[0] || !divergent[1]; case nir_op_fdiv: diff --git a/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp b/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp index 72cb5525ff5..79d80ce2e9e 100644 --- a/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp +++ b/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp @@ -75,7 +75,7 @@ TEST_F(nir_opt_varyings_test_bicm_binary_alu, \ /* TES uses fadd and fmul for interpolation, so it's always present. */ \ if (MESA_SHADER_##consumer_stage != MESA_SHADER_TESS_EVAL || \ (nir_op_##alu != nir_op_fadd && nir_op_##alu != nir_op_fmul && \ - nir_op_##alu != nir_op_ffma)) { \ + nir_op_##alu != nir_op_ffma_old)) { \ ASSERT_TRUE(!shader_contains_alu_op(b2, nir_op_##alu, bitsize)); \ } \ } \ diff --git a/src/compiler/nir/tests/serialize_tests.cpp b/src/compiler/nir/tests/serialize_tests.cpp index 8c6ce2129ea..92d589d115e 100644 --- a/src/compiler/nir/tests/serialize_tests.cpp +++ b/src/compiler/nir/tests/serialize_tests.cpp @@ -165,7 +165,7 @@ TEST_P(nir_serialize_all_test, alu_vec) TEST_P(nir_serialize_all_test, alu_two_components_full_swizzle) { nir_def *undef = nir_undef(b, 2, 32); - nir_def *fma = nir_ffma(b, undef, undef, undef); + nir_def *fma = nir_ffma_old(b, undef, undef, undef); nir_alu_instr *fma_alu = nir_def_as_alu(fma); fma->num_components = GetParam(); diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c index d506b2f4f9f..db23c274fb2 100644 --- a/src/compiler/spirv/vtn_alu.c +++ b/src/compiler/spirv/vtn_alu.c @@ -83,7 +83,7 @@ matrix_multiply(struct vtn_builder *b, nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1)); for (int j = src0_columns - 2; j >= 0; j--) { dest->elems[i]->def = - nir_ffma(&b->nb, src0->elems[j]->def, + nir_ffma_old(&b->nb, src0->elems[j]->def, nir_channel(&b->nb, src1->elems[i]->def, j), dest->elems[i]->def); } diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index 5a292a4a475..02a1c9c1620 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -169,7 +169,7 @@ vtn_nir_alu_op_for_spirv_glsl_opcode(struct vtn_builder *b, case GLSLstd450UMax: return nir_op_umax; case GLSLstd450SMax: return nir_op_imax; case GLSLstd450FMix: return nir_op_flrp; - case GLSLstd450Fma: return nir_op_ffma; + case GLSLstd450Fma: return nir_op_ffma_old; case GLSLstd450FindILsb: return nir_op_find_lsb; case GLSLstd450FindSMsb: return nir_op_ifind_msb; case GLSLstd450FindUMsb: return nir_op_ufind_msb; @@ -430,7 +430,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, nir_fmul(nb, eta, nir_a_minus_bc(nb, one, n_dot_i, n_dot_i))); nir_def *result = nir_a_minus_bc(nb, nir_fmul(nb, eta, I), - nir_ffma(nb, eta, n_dot_i, nir_fsqrt(nb, k)), + nir_ffma_old(nb, eta, n_dot_i, nir_fsqrt(nb, k)), N); /* XXX: bcsel, or if statement? */ dest->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result); diff --git a/src/compiler/spirv/vtn_opencl.c b/src/compiler/spirv/vtn_opencl.c index 4f11b69994a..6ebdbf18720 100644 --- a/src/compiler/spirv/vtn_opencl.c +++ b/src/compiler/spirv/vtn_opencl.c @@ -655,7 +655,7 @@ handle_special(struct vtn_builder *b, uint32_t opcode, if (lower) res = nir_fmad(nb, srcs[0], srcs[1], srcs[2]); else - res = nir_ffma(nb, srcs[0], srcs[1], srcs[2]); + res = nir_ffma_old(nb, srcs[0], srcs[1], srcs[2]); nb->fp_math_ctrl = save_math_ctrl; return res; @@ -703,7 +703,7 @@ handle_special(struct vtn_builder *b, uint32_t opcode, /* OpenCL FMA is not allowed to be split. */ const bool save_math_ctrl = nb->fp_math_ctrl; nb->fp_math_ctrl |= nir_fp_exact; - nir_def *res = nir_ffma(nb, srcs[0], srcs[1], srcs[2]); + nir_def *res = nir_ffma_old(nb, srcs[0], srcs[1], srcs[2]); nb->fp_math_ctrl = save_math_ctrl; return res; } diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 60496e4336d..aab752dd5b1 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -726,7 +726,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) case nir_op_fsub: dst = ir3_ADD_F_rpt(b, dst_sz, src[0], 0, src[1], IR3_REG_FNEG); break; - case nir_op_ffma: + case nir_op_ffma_old: /* The scalar ALU doesn't support mad, so expand to mul+add so that we * don't unnecessarily fall back to non-earlypreamble. This is safe * because at least on a6xx+ mad is unfused. diff --git a/src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_offset.c b/src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_offset.c index 119b51590ff..94a7a13ef73 100644 --- a/src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_offset.c +++ b/src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_offset.c @@ -33,8 +33,8 @@ ir3_nir_lower_load_barycentric_at_offset_instr(nir_builder *b, nir_instr *instr, * times the derivatives of ij in screen space. */ nir_def *new_ij = ij; - new_ij = nir_ffma(b, chan(off, 0), nir_ddx(b, ij), new_ij); - new_ij = nir_ffma(b, chan(off, 1), nir_ddy(b, ij), new_ij); + new_ij = nir_ffma_old(b, chan(off, 0), nir_ddx(b, ij), new_ij); + new_ij = nir_ffma_old(b, chan(off, 1), nir_ddy(b, ij), new_ij); return new_ij; } else { @@ -52,8 +52,8 @@ ir3_nir_lower_load_barycentric_at_offset_instr(nir_builder *b, nir_instr *instr, /* Get the offset value from pixel center for ij, and also for w. */ nir_def *pos = sij; - pos = nir_ffma(b, chan(off, 0), nir_ddx(b, sij), pos); - pos = nir_ffma(b, chan(off, 1), nir_ddy(b, sij), pos); + pos = nir_ffma_old(b, chan(off, 0), nir_ddx(b, sij), pos); + pos = nir_ffma_old(b, chan(off, 1), nir_ddy(b, sij), pos); /* convert back into screen space, dividing by the offset 1/w */ return nir_fmul(b, nir_trim_vector(b, pos, 2), diff --git a/src/freedreno/ir3/ir3_nir_opt_algebraic_late.py b/src/freedreno/ir3/ir3_nir_opt_algebraic_late.py index c35af819224..09821073522 100644 --- a/src/freedreno/ir3/ir3_nir_opt_algebraic_late.py +++ b/src/freedreno/ir3/ir3_nir_opt_algebraic_late.py @@ -20,7 +20,7 @@ for sz in [16, 32]: # (or fneg/fabs which are assumed to be propagated away), as a heuristic to # avoid fusing in cases where it's harmful. fmul = 'fmul(is_only_used_by_fadd)' - ffma = 'ffma' + ffma = 'ffma_old' fadd = 'fadd@{}'.format(sz) diff --git a/src/freedreno/ir3/ir3_nir_trig.py b/src/freedreno/ir3/ir3_nir_trig.py index f66cac25c7c..27efcdd41a9 100644 --- a/src/freedreno/ir3/ir3_nir_trig.py +++ b/src/freedreno/ir3/ir3_nir_trig.py @@ -7,8 +7,8 @@ import argparse import sys trig_workarounds = [ - (('fsin', 'x@32'), ('fsin', ('!ffma', 6.2831853, ('ffract', ('!ffma', 0.15915494, 'x', 0.5)), -3.14159265))), - (('fcos', 'x@32'), ('fcos', ('!ffma', 6.2831853, ('ffract', ('!ffma', 0.15915494, 'x', 0.5)), -3.14159265))), + (('fsin', 'x@32'), ('fsin', ('!ffma_old', 6.2831853, ('ffract', ('!ffma_old', 0.15915494, 'x', 0.5)), -3.14159265))), + (('fcos', 'x@32'), ('fcos', ('!ffma_old', 6.2831853, ('ffract', ('!ffma_old', 0.15915494, 'x', 0.5)), -3.14159265))), ] diff --git a/src/freedreno/vulkan/tu_subsampled_image.cc b/src/freedreno/vulkan/tu_subsampled_image.cc index 2f361f82f55..a55f6bbe585 100644 --- a/src/freedreno/vulkan/tu_subsampled_image.cc +++ b/src/freedreno/vulkan/tu_subsampled_image.cc @@ -149,7 +149,7 @@ tu_get_subsampled_coordinates(nir_builder *b, nir_def *hdr_scale = nir_channels(b, hdr0, 0x3); nir_def *hdr_offset = nir_channels(b, hdr0, 0xc); - nir_def *bin = nir_f2u16(b, nir_ffma(b, coords, hdr_scale, hdr_offset)); + nir_def *bin = nir_f2u16(b, nir_ffma_old(b, coords, hdr_scale, hdr_offset)); nir_def *bin_idx = nir_iadd(b, nir_imul(b, nir_channel(b, bin, 1), nir_u2u16(b, bin_stride)), nir_channel(b, bin, 0)); @@ -166,7 +166,7 @@ tu_get_subsampled_coordinates(nir_builder *b, nir_def *bin_scale = nir_channels(b, bin_data, 0x3); nir_def *bin_offset = nir_channels(b, bin_data, 0xc); - return nir_ffma(b, coords, bin_scale, bin_offset); + return nir_ffma_old(b, coords, bin_scale, bin_offset); } /* Calculate the y coordinate in subsampled space of a given number of tiles diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index 8743845ae7e..e36a9fc8b92 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -3337,7 +3337,7 @@ do_alu_action(struct lp_build_nir_soa_context *bld, case nir_op_ffloor: result = lp_build_floor(float_bld, src[0]); break; - case nir_op_ffma: + case nir_op_ffma_old: result = lp_build_fmuladd(builder, src[0], src[1], src[2]); break; case nir_op_ffract: { diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index 6e52f4ca7e2..a0f53d53386 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -1570,7 +1570,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr) [nir_op_fmax] = { TGSI_OPCODE_MAX, TGSI_OPCODE_DMAX }, [nir_op_imax] = { TGSI_OPCODE_IMAX, TGSI_OPCODE_I64MAX }, [nir_op_umax] = { TGSI_OPCODE_UMAX, TGSI_OPCODE_U64MAX }, - [nir_op_ffma] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD }, + [nir_op_ffma_old] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD }, [nir_op_ldexp] = { TGSI_OPCODE_LDEXP, 0 }, }; diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index a38543707b5..9d79d031647 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1650,7 +1650,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_MAX] = nir_op_fmax, [TGSI_OPCODE_SLT] = nir_op_slt, [TGSI_OPCODE_SGE] = nir_op_sge, - [TGSI_OPCODE_MAD] = nir_op_ffma, + [TGSI_OPCODE_MAD] = nir_op_ffma_old, [TGSI_OPCODE_LRP] = 0, [TGSI_OPCODE_SQRT] = nir_op_fsqrt, [TGSI_OPCODE_FRC] = nir_op_ffract, diff --git a/src/gallium/drivers/asahi/agx_blit.c b/src/gallium/drivers/asahi/agx_blit.c index 12901f71241..6adba875bbe 100644 --- a/src/gallium/drivers/asahi/agx_blit.c +++ b/src/gallium/drivers/asahi/agx_blit.c @@ -116,7 +116,7 @@ asahi_blit_compute_shader(struct pipe_context *ctx, struct asahi_blit_key *key) { /* For pixels within the copy area, texture from the source */ nir_def *coords_el_2d = - nir_ffma(b, nir_u2f32(b, logical_id_el_2d), trans_scale, trans_offs); + nir_ffma_old(b, nir_u2f32(b, logical_id_el_2d), trans_scale, trans_offs); nir_def *coords_el_nd = coords_el_2d; if (layer) { diff --git a/src/gallium/drivers/d3d12/d3d12_lower_point_sprite.c b/src/gallium/drivers/d3d12/d3d12_lower_point_sprite.c index 461f54174bd..04266e59bdd 100644 --- a/src/gallium/drivers/d3d12/d3d12_lower_point_sprite.c +++ b/src/gallium/drivers/d3d12/d3d12_lower_point_sprite.c @@ -180,11 +180,11 @@ lower_emit_vertex(nir_intrinsic_instr *instr, nir_builder *b, struct lower_state /* pos = scaled_point_size * point_dir + point_pos */ nir_def *point_dir = get_point_dir(b, state, i); nir_def *pos = nir_vec4(b, - nir_ffma(b, + nir_ffma_old(b, point_width, nir_channel(b, point_dir, 0), nir_channel(b, state->point_pos, 0)), - nir_ffma(b, + nir_ffma_old(b, point_height, nir_channel(b, point_dir, 1), nir_channel(b, state->point_pos, 1)), diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c index 758052b3601..708d94e0f26 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c @@ -50,7 +50,7 @@ static const struct etna_op_info etna_ops[] = { #define IOP(nir, op) IOPC(nir, op, TRUE) #define UOP(nir, op) UOPC(nir, op, TRUE) OP(mov, MOV), OP(fneg, MOV), OP(fabs, MOV), OP(fsat, MOV), - OP(fmul, MUL), OP(fadd, ADD), OP(ffma, MAD), + OP(fmul, MUL), OP(fadd, ADD), OP(ffma_old, MAD), OP(fdot2, DP2), OP(fdot3, DP3), OP(fdot4, DP4), OPC(fmin, SELECT, GT), OPC(fmax, SELECT, LT), OP(ffract, FRC), OP(frcp, RCP), OP(frsq, RSQ), diff --git a/src/gallium/drivers/etnaviv/etnaviv_nir_lower_texture.c b/src/gallium/drivers/etnaviv/etnaviv_nir_lower_texture.c index 2528d783928..0776eac7753 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_nir_lower_texture.c +++ b/src/gallium/drivers/etnaviv/etnaviv_nir_lower_texture.c @@ -118,7 +118,7 @@ lower_tex_offset(nir_builder *b, nir_tex_instr *tex, UNUSED void *data) * value, matching the expected behaviour of Vivante GPU. */ nir_def *lod_raw = nir_flog2(b, max_derivative); - nir_def *lod_fixed_point = nir_ffma(b, lod_raw, nir_imm_float(b, 0.5f), + nir_def *lod_fixed_point = nir_ffma_old(b, lod_raw, nir_imm_float(b, 0.5f), nir_imm_float(b, 393216.0f)); /* Extract 16-bit fractional part */ @@ -135,7 +135,7 @@ lower_tex_offset(nir_builder *b, nir_tex_instr *tex, UNUSED void *data) * This reverses the fixed-point encoding to get final LOD value */ nir_def *lod_float = nir_u2f32(b, lod_quantized); - lod = nir_ffma(b, lod_float, nir_imm_float(b, 1.0f/32.0f), nir_imm_float(b, 0.5f)); + lod = nir_ffma_old(b, lod_float, nir_imm_float(b, 1.0f/32.0f), nir_imm_float(b, 0.5f)); /* floor and convert to int */ lod = nir_ffloor(b, lod); diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c index ce974181664..be180a98034 100644 --- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c +++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c @@ -309,7 +309,7 @@ instr_create_alu(struct ir2_context *ctx, nir_op opcode, unsigned ncomp) [nir_op_fadd] = {ADDs, ADDv}, [nir_op_fsub] = {ADDs, ADDv}, [nir_op_fmul] = {MULs, MULv}, - [nir_op_ffma] = {-1, MULADDv}, + [nir_op_ffma_old] = {-1, MULADDv}, [nir_op_fmax] = {MAXs, MAXv}, [nir_op_fmin] = {MINs, MINv}, [nir_op_ffloor] = {FLOORs, FLOORv}, @@ -748,7 +748,7 @@ emit_tex(struct ir2_context *ctx, nir_tex_instr *tex) rcp->src[0] = ir2_src(reg_idx, IR2_SWIZZLE_Z, IR2_SRC_REG); rcp->src[0].abs = true; - coord_xy = instr_create_alu_reg(ctx, nir_op_ffma, 3, instr); + coord_xy = instr_create_alu_reg(ctx, nir_op_ffma_old, 3, instr); coord_xy->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG); coord_xy->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA); coord_xy->src[2] = load_const(ctx, (float[]){1.5f}, 1); @@ -868,7 +868,7 @@ extra_position_exports(struct ir2_context *ctx, bool binning) sc->src[0] = ctx->position; sc->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA); - wincoord = instr_create_alu(ctx, nir_op_ffma, 4); + wincoord = instr_create_alu(ctx, nir_op_ffma_old, 4); wincoord->src[0] = ir2_src(66, 0, IR2_SRC_CONST); wincoord->src[1] = ir2_src(sc->idx, 0, IR2_SRC_SSA); wincoord->src[2] = ir2_src(65, 0, IR2_SRC_CONST); @@ -895,13 +895,13 @@ extra_position_exports(struct ir2_context *ctx, bool binning) /* 8 max set in freedreno_screen.. unneeded instrs patched out */ for (int i = 0; i < 8; i++) { - instr = instr_create_alu(ctx, nir_op_ffma, 4); + instr = instr_create_alu(ctx, nir_op_ffma_old, 4); instr->src[0] = ir2_src(1, IR2_SWIZZLE_WYWW, IR2_SRC_CONST); instr->src[1] = ir2_src(off->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA); instr->src[2] = ir2_src(3 + i, 0, IR2_SRC_CONST); instr->alu.export = 32; - instr = instr_create_alu(ctx, nir_op_ffma, 4); + instr = instr_create_alu(ctx, nir_op_ffma_old, 4); instr->src[0] = ir2_src(68 + i * 2, 0, IR2_SRC_CONST); instr->src[1] = ir2_src(wincoord->idx, 0, IR2_SRC_SSA); instr->src[2] = ir2_src(67 + i * 2, 0, IR2_SRC_CONST); diff --git a/src/gallium/drivers/lima/ir/lima_nir_duplicate.c b/src/gallium/drivers/lima/ir/lima_nir_duplicate.c index 16cb0570259..9108152515a 100644 --- a/src/gallium/drivers/lima/ir/lima_nir_duplicate.c +++ b/src/gallium/drivers/lima/ir/lima_nir_duplicate.c @@ -49,7 +49,7 @@ duplicate_def_at_use(nir_builder *b, nir_def *def, bool duplicate_for_ffma) if (duplicate_for_ffma && last_parent_instr->type == nir_instr_type_alu && - nir_instr_as_alu(last_parent_instr)->op == nir_op_ffma) { + nir_instr_as_alu(last_parent_instr)->op == nir_op_ffma_old) { last_parent_instr = NULL; } } diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c index 444aea7d51c..762314e2959 100644 --- a/src/gallium/drivers/lima/ir/pp/nir.c +++ b/src/gallium/drivers/lima/ir/pp/nir.c @@ -203,7 +203,7 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = { [nir_op_ftrunc] = ppir_op_trunc, [nir_op_fsat] = ppir_op_sat, [nir_op_fclamp_pos] = ppir_op_clamp_pos, - [nir_op_ffma] = ppir_op_fmad, + [nir_op_ffma_old] = ppir_op_fmad, }; static bool ppir_emit_alu(ppir_block *block, nir_instr *ni) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 454ec6d6708..140839be25e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -464,8 +464,8 @@ Converter::getOperation(nir_op op) return OP_EX2; case nir_op_ffloor: return OP_FLOOR; - case nir_op_ffma: - case nir_op_ffmaz: + case nir_op_ffma_old: + case nir_op_ffmaz_old: /* No FMA op pre-nvc0 */ if (info->target < 0xc0) return OP_MAD; @@ -2613,8 +2613,8 @@ Converter::visit(nir_alu_instr *insn) case nir_op_udiv: case nir_op_fexp2: case nir_op_ffloor: - case nir_op_ffma: - case nir_op_ffmaz: + case nir_op_ffma_old: + case nir_op_ffmaz_old: case nir_op_flog2: case nir_op_fmax: case nir_op_imax: @@ -2668,11 +2668,11 @@ Converter::visit(nir_alu_instr *insn) switch (op) { case nir_op_fmul: - case nir_op_ffma: + case nir_op_ffma_old: i->dnz = this->info->io.mul_zero_wins; break; case nir_op_fmulz: - case nir_op_ffmaz: + case nir_op_ffmaz_old: i->dnz = true; break; default: diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c b/src/gallium/drivers/r300/compiler/nir_to_rc.c index 2efb5604902..969a4df0759 100644 --- a/src/gallium/drivers/r300/compiler/nir_to_rc.c +++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c @@ -819,7 +819,7 @@ ntr_emit_alu(struct ntr_compile *c, nir_alu_instr *instr) [nir_op_fmin] = TGSI_OPCODE_MIN, [nir_op_fmax] = TGSI_OPCODE_MAX, - [nir_op_ffma] = TGSI_OPCODE_MAD, + [nir_op_ffma_old] = TGSI_OPCODE_MAD, }; if (instr->op < ARRAY_SIZE(op_map) && op_map[instr->op] > 0) { diff --git a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py index 1e1be554c36..421a28dcfe7 100644 --- a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py +++ b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py @@ -52,11 +52,11 @@ r300_nir_prepare_presubtract = [ (('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))), (('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))), # Bias presubtract 1 - 2 * x expects MAD -a 2.0 1.0 form. - (('ffma', 2.0, ('fneg', a), 1.0), ('ffma', ('fneg', a), 2.0, 1.0)), - (('ffma', a, -2.0, 1.0), ('ffma', ('fneg', a), 2.0, 1.0)), - (('ffma', -2.0, a, 1.0), ('ffma', ('fneg', a), 2.0, 1.0)), - (('ffma', 2.0, a, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), - (('ffma', a, 2.0, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), + (('ffma_old', 2.0, ('fneg', a), 1.0), ('ffma_old', ('fneg', a), 2.0, 1.0)), + (('ffma_old', a, -2.0, 1.0), ('ffma_old', ('fneg', a), 2.0, 1.0)), + (('ffma_old', -2.0, a, 1.0), ('ffma_old', ('fneg', a), 2.0, 1.0)), + (('ffma_old', 2.0, a, -1.0), ('fneg', ('ffma_old', ('fneg', a), 2.0, 1.0))), + (('ffma_old', a, 2.0, -1.0), ('fneg', ('ffma_old', ('fneg', a), 2.0, 1.0))), # x * 2 can be usually folded into output modifier for the previous # instruction, but that only works if x is a temporary. If it is input or # constant just convert it to add instead. @@ -85,7 +85,7 @@ r300_nir_opt_algebraic_late = [ # This is very late flrp lowering to clean up after bcsel->fcsel->flrp. r300_nir_lower_flrp = [ - (('flrp', a, b, c), ('ffma', b, c, ('ffma', ('fneg', a), c, a))) + (('flrp', a, b, c), ('ffma_old', b, c, ('ffma_old', ('fneg', a), c, a))) ] # Lower fcsel_ge from ftrunc on r300 diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index c25999e0eca..8fc7ba1f338 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -1639,7 +1639,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader) return emit_alu_op2_64bit_one_dst(*alu, op2_setgt_64, shader, true); case nir_op_fneu32: return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false); - case nir_op_ffma: + case nir_op_ffma_old: return emit_alu_fma_64bit(*alu, op3_fma_64, shader); case nir_op_fadd: @@ -1956,11 +1956,11 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader) case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(*alu, 1, shader); - case nir_op_ffma: + case nir_op_ffma_old: if (!shader.has_flag(Shader::sh_legacy_math_rules)) return emit_alu_op3(*alu, op3_muladd_ieee, shader); FALLTHROUGH; - case nir_op_ffmaz: + case nir_op_ffmaz_old: return emit_alu_op3(*alu, op3_muladd, shader); case nir_op_mov: diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp index c6e888bf1ca..9306c132ead 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp @@ -1033,7 +1033,7 @@ Lower64BitToVec2::support_fp64_op(nir_op op) const case nir_op_fge32: case nir_op_flt32: case nir_op_fneu32: - case nir_op_ffma: + case nir_op_ffma_old: case nir_op_fadd: case nir_op_fmul: case nir_op_fmax: diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index d16cd164541..46af5a23876 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -246,8 +246,8 @@ lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data) for (size_t i = 0; i < 4; i++) { pos = nir_vec4(b, - nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x), - nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y), + nir_ffma_old(b, half_w_delta, point_dir[i][0], point_pos_x), + nir_ffma_old(b, half_h_delta, point_dir[i][1], point_pos_y), nir_channel(b, point_pos, 2), nir_channel(b, point_pos, 3)); diff --git a/src/imagination/pco/pco_nir_pvfio.c b/src/imagination/pco/pco_nir_pvfio.c index 5541f0d2851..c12599fc7ca 100644 --- a/src/imagination/pco/pco_nir_pvfio.c +++ b/src/imagination/pco/pco_nir_pvfio.c @@ -1288,12 +1288,12 @@ static nir_def *alu_iter(nir_builder *b, .component = component, .io_semantics = io_semantics); - nir_def *result = nir_ffma(b, + nir_def *result = nir_ffma_old(b, nir_channel(b, coeffs, 1), nir_channel(b, coords, 1), nir_channel(b, coeffs, 2)); result = - nir_ffma(b, nir_channel(b, coeffs, 0), nir_channel(b, coords, 0), result); + nir_ffma_old(b, nir_channel(b, coeffs, 0), nir_channel(b, coords, 0), result); return result; } diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index a8cbd3c4736..830ff65acdc 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -3177,7 +3177,7 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) instr = pco_fmul(&tctx->b, dest, src[0], src[1]); break; - case nir_op_ffma: + case nir_op_ffma_old: instr = pco_fmad(&tctx->b, dest, src[0], src[1], src[2]); break; diff --git a/src/imagination/vulkan/pvr_usc.c b/src/imagination/vulkan/pvr_usc.c index 6b18bbb99b4..5345be24308 100644 --- a/src/imagination/vulkan/pvr_usc.c +++ b/src/imagination/vulkan/pvr_usc.c @@ -658,7 +658,7 @@ static nir_def *resolve_samples(nir_builder *b, switch (resolve_op) { case PVR_RESOLVE_BLEND: - op = nir_op_ffma; + op = nir_op_ffma_old; coeff = nir_imm_float(b, 1.0 / num_samples); break; @@ -683,7 +683,7 @@ static nir_def *resolve_samples(nir_builder *b, for (unsigned i = 1; i < num_samples; i++) { if (resolve_op == PVR_RESOLVE_BLEND) - accum = nir_ffma(b, samples[i], coeff, accum); + accum = nir_ffma_old(b, samples[i], coeff, accum); else accum = nir_build_alu2(b, op, samples[i], accum); } diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index 217c167d63e..341b1027ea0 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -1646,7 +1646,7 @@ brw_from_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, inst->saturate = true; break; - case nir_op_ffma: + case nir_op_ffma_old: if (nir_has_any_rounding_mode_enabled(execution_mode)) { brw_rnd_mode rnd = brw_rnd_mode_from_execution_mode(execution_mode); diff --git a/src/intel/compiler/elk/elk_fs_nir.cpp b/src/intel/compiler/elk/elk_fs_nir.cpp index 474cd7647f5..b48a23c7355 100644 --- a/src/intel/compiler/elk/elk_fs_nir.cpp +++ b/src/intel/compiler/elk/elk_fs_nir.cpp @@ -1706,7 +1706,7 @@ fs_nir_emit_alu(nir_to_elk_state &ntb, nir_alu_instr *instr, bld.emit(ELK_FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]); break; - case nir_op_ffma: + case nir_op_ffma_old: if (nir_has_any_rounding_mode_enabled(execution_mode)) { elk_rnd_mode rnd = elk_rnd_mode_from_execution_mode(execution_mode); diff --git a/src/intel/compiler/elk/elk_vec4_nir.cpp b/src/intel/compiler/elk/elk_vec4_nir.cpp index 324cbfe31af..37f947be97e 100644 --- a/src/intel/compiler/elk/elk_vec4_nir.cpp +++ b/src/intel/compiler/elk/elk_vec4_nir.cpp @@ -1670,7 +1670,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) emit(SHR(dst, op[0], op[1])); break; - case nir_op_ffma: + case nir_op_ffma_old: if (type_sz(dst.type) == 8) { dst_reg mul_dst = dst_reg(this, glsl_dvec4_type()); emit(MUL(mul_dst, op[1], op[0])); diff --git a/src/intel/compiler/intel_nir_opt_peephole_ffma.c b/src/intel/compiler/intel_nir_opt_peephole_ffma.c index 82419958cf3..6a13a863501 100644 --- a/src/intel/compiler/intel_nir_opt_peephole_ffma.c +++ b/src/intel/compiler/intel_nir_opt_peephole_ffma.c @@ -201,7 +201,7 @@ intel_nir_opt_peephole_ffma_instr(nir_builder *b, if (negate) mul_src[0] = nir_fneg(b, mul_src[0]); - nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma); + nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma_old); ffma->fp_math_ctrl = b->fp_math_ctrl; for (unsigned i = 0; i < 2; i++) { diff --git a/src/intel/compiler/jay/jay_from_nir.c b/src/intel/compiler/jay/jay_from_nir.c index f5ca76378d4..3e1b345f1ee 100644 --- a/src/intel/compiler/jay/jay_from_nir.c +++ b/src/intel/compiler/jay/jay_from_nir.c @@ -512,7 +512,7 @@ jay_emit_alu(struct nir_to_jay_state *nj, nir_alu_instr *alu) jay_BFI2(b, dst, src[0], src[1], src[2]); break; - case nir_op_ffma: + case nir_op_ffma_old: jay_MAD(b, type, dst, src[0], src[1], src[2]); break; diff --git a/src/kosmickrisp/compiler/nir_to_msl.c b/src/kosmickrisp/compiler/nir_to_msl.c index eb10c028aab..1a9768763a7 100644 --- a/src/kosmickrisp/compiler/nir_to_msl.c +++ b/src/kosmickrisp/compiler/nir_to_msl.c @@ -374,7 +374,7 @@ alu_to_msl(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr) case nir_op_ffloor: alu_funclike(ctx, instr, "floor"); break; - case nir_op_ffma: + case nir_op_ffma_old: alu_funclike(ctx, instr, "fma"); break; case nir_op_ffract: diff --git a/src/mesa/state_tracker/st_atifs_to_nir.c b/src/mesa/state_tracker/st_atifs_to_nir.c index c534ae008d8..eb72f3ed3e5 100644 --- a/src/mesa/state_tracker/st_atifs_to_nir.c +++ b/src/mesa/state_tracker/st_atifs_to_nir.c @@ -231,7 +231,7 @@ emit_arith_inst(struct st_translate *t, return nir_fmul(t->b, src[0], src[1]); case GL_MAD_ATI: - return nir_ffma(t->b, src[0], src[1], src[2]); + return nir_ffma_old(t->b, src[0], src[1], src[2]); case GL_LERP_ATI: return nir_flrp(t->b, src[2], src[1], src[0]); diff --git a/src/mesa/state_tracker/st_nir_lower_drawpixels.c b/src/mesa/state_tracker/st_nir_lower_drawpixels.c index aba9721812e..6cc64a56b70 100644 --- a/src/mesa/state_tracker/st_nir_lower_drawpixels.c +++ b/src/mesa/state_tracker/st_nir_lower_drawpixels.c @@ -114,7 +114,7 @@ lower_color(nir_builder *b, lower_drawpixels_state *state, nir_intrinsic_instr * /* Apply the scale and bias. */ if (state->options->scale_and_bias) { /* MAD def, def, scale, bias; */ - def = nir_ffma(b, def, get_scale(b, state), get_bias(b, state)); + def = nir_ffma_old(b, def, get_scale(b, state), get_bias(b, state)); } if (state->options->pixel_maps) { diff --git a/src/microsoft/compiler/nir_to_dxil.c b/src/microsoft/compiler/nir_to_dxil.c index cfa766a70d7..9121598ce09 100644 --- a/src/microsoft/compiler/nir_to_dxil.c +++ b/src/microsoft/compiler/nir_to_dxil.c @@ -2995,7 +2995,7 @@ emit_alu(struct ntd_context *ctx, nir_alu_instr *alu) case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]); case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]); case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]); - case nir_op_ffma: + case nir_op_ffma_old: if (alu->def.bit_size == 64) ctx->mod.feats.dx11_1_double_extensions = 1; return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]); diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 04e618a8074..f441b1344de 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -1056,7 +1056,7 @@ impl<'a> ShaderFromNir<'a> { b.fexp2(srcs(0)).into() } } - nir_op_ffma => { + nir_op_ffma_old => { let ftype = FloatType::from_bits(alu.def.bit_size().into()); let dst; if alu.def.bit_size() == 64 { @@ -1102,7 +1102,7 @@ impl<'a> ShaderFromNir<'a> { } dst } - nir_op_ffmaz => { + nir_op_ffmaz_old => { assert!(alu.def.bit_size() == 32); // DNZ implies FTZ so we need FTZ set or this is invalid assert!(self.float_ctl.fp32.ftz); diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 9c129859c63..16ee1cb5ccb 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -116,7 +116,7 @@ vectorize_filter_cb(const nir_instr *instr, const void *data) case nir_op_fneu: case nir_op_fmul: case nir_op_fmul_rtz: - case nir_op_ffma: + case nir_op_ffma_old: case nir_op_fsign: case nir_op_fsat: case nir_op_fmax: @@ -268,8 +268,8 @@ lower_bit_size_cb(const nir_instr *instr, void *data) case nir_op_fneu: case nir_op_fmul: case nir_op_fmul_rtz: - case nir_op_ffma: - case nir_op_ffmaz: + case nir_op_ffma_old: + case nir_op_ffmaz_old: case nir_op_fsign: case nir_op_fsat: case nir_op_fceil: diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index 989a36b7046..1dda2c6e3ec 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -2975,7 +2975,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) srcs > 2 ? bi_alu_src_index(b, instr->src[2], comps) : bi_null(); switch (instr->op) { - case nir_op_ffma: + case nir_op_ffma_old: bi_fma_to(b, sz, dst, s0, s1, s2); break; diff --git a/src/vulkan/runtime/vk_meta_blit_resolve.c b/src/vulkan/runtime/vk_meta_blit_resolve.c index c8a495a2123..a2640fa4ee0 100644 --- a/src/vulkan/runtime/vk_meta_blit_resolve.c +++ b/src/vulkan/runtime/vk_meta_blit_resolve.c @@ -218,7 +218,7 @@ build_blit_shader(const struct vk_meta_blit_key *key) nir_def *out_coord_xy = nir_load_frag_coord(b); out_coord_xy = nir_trim_vector(b, out_coord_xy, 2); - nir_def *src_coord_xy = nir_ffma(b, out_coord_xy, xy_scale, xy_off); + nir_def *src_coord_xy = nir_ffma_old(b, out_coord_xy, xy_scale, xy_off); nir_def *z_xform = load_struct_var(b, push, 1); nir_def *out_layer = nir_load_layer_id(b); @@ -227,7 +227,7 @@ build_blit_shader(const struct vk_meta_blit_key *key) nir_def *z_off = nir_channel(b, z_xform, 0); nir_def *z_scale = nir_channel(b, z_xform, 1); nir_def *out_coord_z = nir_fadd_imm(b, nir_u2f32(b, out_layer), 0.5); - nir_def *src_coord_z = nir_ffma(b, out_coord_z, z_scale, z_off); + nir_def *src_coord_z = nir_ffma_old(b, out_coord_z, z_scale, z_off); src_coord = nir_vec3(b, nir_channel(b, src_coord_xy, 0), nir_channel(b, src_coord_xy, 1), src_coord_z);