nir: rename ffma to ffma_old

We'll get three new opcodes to properly model float multiply-add.
ffma_old is temporary and will be deleted at the end of this series.

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41165>
This commit is contained in:
Karol Herbst 2026-04-19 22:21:06 +02:00 committed by Marge Bot
parent bdb5301281
commit a9b18f8607
74 changed files with 261 additions and 261 deletions

View file

@ -972,7 +972,7 @@ ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
case nir_op_fadd:
case nir_op_fsub:
case nir_op_fmul:
case nir_op_ffma:
case nir_op_ffma_old:
case nir_op_fdiv:
case nir_op_flrp:
case nir_op_fabs:

View file

@ -170,8 +170,8 @@ cull_small_primitive_triangle(nir_builder *b, bool use_point_tri_intersection,
vp_translate[chan] = nir_channel(b, vp, 2 + chan);
/* Convert the position to screen-space coordinates. */
nir_def *min = nir_ffma(b, bbox_min[chan], vp_scale[chan], vp_translate[chan]);
nir_def *max = nir_ffma(b, bbox_max[chan], vp_scale[chan], vp_translate[chan]);
nir_def *min = nir_ffma_old(b, bbox_min[chan], vp_scale[chan], vp_translate[chan]);
nir_def *max = nir_ffma_old(b, bbox_max[chan], vp_scale[chan], vp_translate[chan]);
/* Scale the bounding box according to precision. */
min = nir_fsub(b, min, small_prim_precision);
@ -251,7 +251,7 @@ cull_small_primitive_triangle(nir_builder *b, bool use_point_tri_intersection,
/* Transform the coordinates to screen space. */
for (unsigned vtx = 0; vtx < 3; ++vtx) {
for (unsigned chan = 0; chan < 2; ++chan)
screen_pos[vtx][chan] = nir_ffma(b, pos[vtx][chan], vp_scale[chan], vp_translate[chan]);
screen_pos[vtx][chan] = nir_ffma_old(b, pos[vtx][chan], vp_scale[chan], vp_translate[chan]);
}
/* small_prim_precision is the rasterization precision in X an Y axes, meaning it's the size of
@ -436,8 +436,8 @@ cull_small_primitive_line(nir_builder *b, nir_def *pos[3][4],
nir_def *vp_scale = nir_channel(b, vp, chan);
nir_def *vp_translate = nir_channel(b, vp, 2 + chan);
v0[chan] = nir_ffma(b, pos[0][chan], vp_scale, vp_translate);
v1[chan] = nir_ffma(b, pos[1][chan], vp_scale, vp_translate);
v0[chan] = nir_ffma_old(b, pos[0][chan], vp_scale, vp_translate);
v1[chan] = nir_ffma_old(b, pos[1][chan], vp_scale, vp_translate);
}
/* Rotate the viewport by 45 degrees, so that diamonds become squares. */

View file

@ -409,8 +409,8 @@ lower_intrinsic_to_arg(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
nir_def *ddy_j = nir_ddy(b, j);
/* Interpolate standard barycentrics by offset. */
nir_def *offset_i = nir_ffma(b, ddy_i, offset_y, nir_ffma(b, ddx_i, offset_x, i));
nir_def *offset_j = nir_ffma(b, ddy_j, offset_y, nir_ffma(b, ddx_j, offset_x, j));
nir_def *offset_i = nir_ffma_old(b, ddy_i, offset_y, nir_ffma_old(b, ddx_i, offset_x, i));
nir_def *offset_j = nir_ffma_old(b, ddy_j, offset_y, nir_ffma_old(b, ddx_j, offset_x, j));
replacement = nir_vec2(b, offset_i, offset_j);
break;
}

View file

@ -300,8 +300,8 @@ lower_load_barycentric_at_offset(nir_builder *b, nir_def *offset, enum glsl_inte
nir_def *offset_y = nir_channel(b, offset, 1);
/* Interpolate standard barycentrics by offset. */
nir_def *offset_i = nir_ffma(b, ddy_i, offset_y, nir_ffma(b, ddx_i, offset_x, i));
nir_def *offset_j = nir_ffma(b, ddy_j, offset_y, nir_ffma(b, ddx_j, offset_x, j));
nir_def *offset_i = nir_ffma_old(b, ddy_i, offset_y, nir_ffma_old(b, ddx_i, offset_x, i));
nir_def *offset_j = nir_ffma_old(b, ddy_j, offset_y, nir_ffma_old(b, ddx_j, offset_x, j));
return nir_vec2(b, offset_i, offset_j);
}

View file

@ -450,7 +450,7 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_op_e4m3fn2f:
case nir_op_e5m22f:
case nir_op_fmulz:
case nir_op_ffmaz:
case nir_op_ffmaz_old:
case nir_op_f2f64:
case nir_op_u2f64:
case nir_op_i2f64:
@ -485,7 +485,7 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_op_f2f16_ru:
case nir_op_f2f16_rd: type = RegType::vgpr; break;
case nir_op_fmul:
case nir_op_ffma:
case nir_op_ffma_old:
case nir_op_fadd:
case nir_op_fsub:
case nir_op_fmax:

View file

@ -1925,7 +1925,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
}
break;
}
case nir_op_ffma: {
case nir_op_ffma_old: {
if (dst.regClass() == v2b) {
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f16, dst, false, 3);
} else if (dst.regClass() == v1 && instr->def.bit_size == 16) {
@ -1961,7 +1961,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
}
break;
}
case nir_op_ffmaz: {
case nir_op_ffmaz_old: {
if (dst.regClass() == v1) {
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_legacy_f32, dst,
ctx->block->fp_mode.must_flush_denorms32, 3);

View file

@ -759,12 +759,12 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
result = ac_build_canonicalize(&ctx->ac, result, instr->def.bit_size);
}
break;
case nir_op_ffma:
case nir_op_ffma_old:
/* FMA is slow on gfx6-8, so it shouldn't be used. */
assert(instr->def.bit_size != 32 || ctx->ac.gfx_level >= GFX9);
result = emit_fp_intrinsic(&ctx->ac, "llvm.fma", def_type, src[0], src[1], src[2]);
break;
case nir_op_ffmaz:
case nir_op_ffmaz_old:
assert(ctx->ac.gfx_level >= GFX10_3);
src[0] = ac_to_float(&ctx->ac, src[0]);
src[1] = ac_to_float(&ctx->ac, src[1]);

View file

@ -64,7 +64,7 @@ pass(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
nir_def *mul = nir_bcsel(b, cond, nir_imm_float(b, 0.0625f), nir_imm_float(b, -0.0));
/* adjusted_frag_z = dFdxFine(frag_z) * 0.0625 + frag_z */
frag_z = nir_ffma(b, nir_ddx_fine(b, frag_z), mul, frag_z);
frag_z = nir_ffma_old(b, nir_ddx_fine(b, frag_z), mul, frag_z);
nir_def_rewrite_uses_after(&intrin->def, frag_z);

View file

@ -1906,7 +1906,7 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr)
else
return agx_fmul_to(b, dst, s0, s1);
case nir_op_ffma:
case nir_op_ffma_old:
if (instr->def.bit_size == 16)
return agx_hfma_to(b, dst, s0, s1, s2);
else
@ -3559,7 +3559,7 @@ libagx_frcp(nir_builder *b, nir_def *x)
* = fma(fma(-x, u, 1), u, u)
*/
nir_def *one = nir_imm_float(b, 1.0);
nir_def *u_2 = nir_ffma(b, nir_ffma(b, nir_fneg(b, x), u, one), u, u);
nir_def *u_2 = nir_ffma_old(b, nir_ffma_old(b, nir_fneg(b, x), u, one), u, u);
/* If the original value was infinite, frcp will generate the correct zero.
* However, the Newton-Raphson step would multiply 0 * Inf and get a NaN. So

View file

@ -55,10 +55,10 @@ interpolate_at_offset(nir_builder *b, nir_def *cf, nir_def *offset,
nir_def *pos = nir_fadd(b, center, nir_f2f32(b, offset));
/* Interpolate with the given coefficients */
nir_def *interp = nir_ffma(b, nir_channel(b, pos, 1), nir_channel(b, cf, 1),
nir_def *interp = nir_ffma_old(b, nir_channel(b, pos, 1), nir_channel(b, cf, 1),
nir_channel(b, cf, 2));
interp = nir_ffma(b, nir_channel(b, pos, 0), nir_channel(b, cf, 0), interp);
interp = nir_ffma_old(b, nir_channel(b, pos, 0), nir_channel(b, cf, 0), interp);
/* Divide by RHW. This load will be lowered recursively. */
if (perspective) {

View file

@ -89,7 +89,7 @@ alu_cost(nir_alu_instr *alu)
case nir_op_f2f16_rtne:
case nir_op_fadd:
case nir_op_fmul:
case nir_op_ffma:
case nir_op_ffma_old:
case nir_op_iadd:
case nir_op_inot:
case nir_op_iand:

View file

@ -2569,7 +2569,7 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
case ir_triop_fma:
result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);
result = nir_ffma_old(&b, srcs[0], srcs[1], srcs[2]);
break;
case ir_triop_lrp:
result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]);

View file

@ -1907,7 +1907,7 @@ nir_def_all_uses_ignore_sign_bit(const nir_def *def)
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (alu->op == nir_op_fabs) {
continue;
} else if (alu->op == nir_op_fmul || alu->op == nir_op_ffma) {
} else if (alu->op == nir_op_fmul || alu->op == nir_op_ffma_old) {
nir_alu_src *alu_src = list_entry(use, nir_alu_src, src);
unsigned src_index = alu_src - alu->src;
/* a * a doesn't care about sign of a. */

View file

@ -1367,27 +1367,27 @@ nir_ffma_imm12(nir_builder *build, nir_def *src0, double src1, double src2)
build->shader->options->avoid_ternary_with_two_constants)
return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2);
else
return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
nir_imm_floatN_t(build, src2, src0->bit_size));
return nir_ffma_old(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
nir_imm_floatN_t(build, src2, src0->bit_size));
}
static inline nir_def *
nir_ffma_imm1(nir_builder *build, nir_def *src0, double src1, nir_def *src2)
{
return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
return nir_ffma_old(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
}
static inline nir_def *
nir_ffma_imm2(nir_builder *build, nir_def *src0, nir_def *src1, double src2)
{
return nir_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
return nir_ffma_old(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
}
static inline nir_def *
nir_a_minus_bc(nir_builder *build, nir_def *src0, nir_def *src1,
nir_def *src2)
{
return nir_ffma(build, nir_fneg(build, src1), src2, src0);
return nir_ffma_old(build, nir_fneg(build, src1), src2, src0);
}
static inline nir_def *

View file

@ -41,10 +41,10 @@ nir_cross3(nir_builder *b, nir_def *x, nir_def *y)
unsigned yzx[3] = { 1, 2, 0 };
unsigned zxy[3] = { 2, 0, 1 };
return nir_ffma(b, nir_swizzle(b, x, yzx, 3),
nir_swizzle(b, y, zxy, 3),
nir_fneg(b, nir_fmul(b, nir_swizzle(b, x, zxy, 3),
nir_swizzle(b, y, yzx, 3))));
return nir_ffma_old(b, nir_swizzle(b, x, yzx, 3),
nir_swizzle(b, y, zxy, 3),
nir_fneg(b, nir_fmul(b, nir_swizzle(b, x, zxy, 3),
nir_swizzle(b, y, yzx, 3))));
}
nir_def *
@ -285,7 +285,7 @@ nir_atan(nir_builder *b, nir_def *y_over_x)
nir_imm_floatN_t(b, -M_PI_2, bit_size));
/* multiply through by x while fixing up the range reduction */
nir_def *tmp = nir_ffma(b, nir_fabs(b, u), res, bias);
nir_def *tmp = nir_ffma_old(b, nir_fabs(b, u), res, bias);
/* sign fixup */
return nir_copysign(b, tmp, y_over_x);

View file

@ -188,7 +188,7 @@ lower_fdot(nir_alu_instr *alu, nir_builder *builder, bool is_bfloat16)
unsigned num_components = nir_op_infos[alu->op].input_sizes[0];
const nir_op fma_op = is_bfloat16 ? nir_op_bffma : nir_op_ffma;
const nir_op fma_op = is_bfloat16 ? nir_op_bffma : nir_op_ffma_old;
const nir_op mul_op = is_bfloat16 ? nir_op_bfmul : nir_op_fmul;
nir_def *prev = NULL;
@ -328,12 +328,12 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
} else if (reverse_order) {
nir_def *sum = nir_channel(b, src1_vec, 3);
for (int i = 2; i >= 0; i--)
sum = nir_ffma(b, nir_channel(b, src0_vec, i), nir_channel(b, src1_vec, i), sum);
sum = nir_ffma_old(b, nir_channel(b, src0_vec, i), nir_channel(b, src1_vec, i), sum);
return sum;
} else {
nir_def *sum = nir_fmul(b, nir_channel(b, src0_vec, 0), nir_channel(b, src1_vec, 0));
sum = nir_ffma(b, nir_channel(b, src0_vec, 1), nir_channel(b, src1_vec, 1), sum);
sum = nir_ffma(b, nir_channel(b, src0_vec, 2), nir_channel(b, src1_vec, 2), sum);
sum = nir_ffma_old(b, nir_channel(b, src0_vec, 1), nir_channel(b, src1_vec, 1), sum);
sum = nir_ffma_old(b, nir_channel(b, src0_vec, 2), nir_channel(b, src1_vec, 2), sum);
return nir_fadd(b, sum, nir_channel(b, src1_vec, 3));
}
}

View file

@ -85,7 +85,7 @@ lower_pos_write_dynamic(nir_builder *b, nir_intrinsic_instr *intr,
nir_def *c = nir_load_clip_z_coeff(b);
/* Lerp. If c = 0, reduces to z. If c = 1/2, reduces to (z + w)/2 */
nir_def *new_z = nir_ffma(b, nir_fneg(b, z), c, nir_ffma(b, w, c, z));
nir_def *new_z = nir_ffma_old(b, nir_fneg(b, z), c, nir_ffma_old(b, w, c, z));
nir_src_rewrite(&intr->src[0], nir_vector_insert_imm(b, pos, new_z, 2));
return true;
}

View file

@ -172,8 +172,8 @@ lower_rcp(nir_builder *b, nir_def *src)
* See https://en.wikipedia.org/wiki/Division_algorithm for more details.
*/
ra = nir_ffma(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra);
ra = nir_ffma(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra);
ra = nir_ffma_old(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra);
ra = nir_ffma_old(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra);
return fix_inv_result(b, ra, src, new_exp);
}
@ -299,18 +299,18 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt)
nir_def *one_half = nir_imm_double(b, 0.5);
nir_def *h_0 = nir_fmul(b, one_half, ra);
nir_def *g_0 = nir_fmul(b, src, ra);
nir_def *r_0 = nir_ffma(b, nir_fneg(b, h_0), g_0, one_half);
nir_def *h_1 = nir_ffma(b, h_0, r_0, h_0);
nir_def *r_0 = nir_ffma_old(b, nir_fneg(b, h_0), g_0, one_half);
nir_def *h_1 = nir_ffma_old(b, h_0, r_0, h_0);
nir_def *res;
if (sqrt) {
nir_def *g_1 = nir_ffma(b, g_0, r_0, g_0);
nir_def *r_1 = nir_ffma(b, nir_fneg(b, g_1), g_1, src);
res = nir_ffma(b, h_1, r_1, g_1);
nir_def *g_1 = nir_ffma_old(b, g_0, r_0, g_0);
nir_def *r_1 = nir_ffma_old(b, nir_fneg(b, g_1), g_1, src);
res = nir_ffma_old(b, h_1, r_1, g_1);
} else {
nir_def *y_1 = nir_fmul_imm(b, h_1, 2.0);
nir_def *r_1 = nir_ffma(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src),
nir_def *r_1 = nir_ffma_old(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src),
one_half);
res = nir_ffma(b, y_1, r_1, y_1);
res = nir_ffma_old(b, y_1, r_1, y_1);
}
if (sqrt) {
@ -654,7 +654,7 @@ lower_doubles_instr_to_soft(nir_builder *b, nir_alu_instr *instr,
name = "__fmul64";
mangled_name = "__fmul64(u641;u641;";
break;
case nir_op_ffma:
case nir_op_ffma_old:
name = "__fmad64";
mangled_name = "__fmad64(u641;u641;u641;";
break;

View file

@ -74,7 +74,7 @@ lower_float_instr_to_soft(nir_builder *b, nir_instr *instr,
case nir_op_fmul:
mangled_name = "__fmul32(u1;u1;";
break;
case nir_op_ffma:
case nir_op_ffma_old:
mangled_name = "__fmad32(u1;u1;u1;";
break;
case nir_op_fsat:

View file

@ -52,8 +52,8 @@ replace_with_strict_ffma(struct nir_builder *bld, struct u_vector *dead_flrp,
nir_def *const c = nir_ssa_for_alu_src(bld, alu, 2);
nir_def *const neg_a = nir_fneg(bld, a);
nir_def *const inner_ffma = nir_ffma(bld, neg_a, c, a);
nir_def *const outer_ffma = nir_ffma(bld, b, c, inner_ffma);
nir_def *const inner_ffma = nir_ffma_old(bld, neg_a, c, a);
nir_def *const outer_ffma = nir_ffma_old(bld, b, c, inner_ffma);
nir_def_rewrite_uses(&alu->def, outer_ffma);
@ -79,7 +79,7 @@ replace_with_single_ffma(struct nir_builder *bld, struct u_vector *dead_flrp,
nir_def *const one_minus_c =
nir_fadd(bld, nir_imm_floatN_t(bld, 1.0f, c->bit_size), neg_c);
nir_def *const b_times_c = nir_fmul(bld, b, c);
nir_def *const final_ffma = nir_ffma(bld, a, one_minus_c, b_times_c);
nir_def *const final_ffma = nir_ffma_old(bld, a, one_minus_c, b_times_c);
nir_def_rewrite_uses(&alu->def, final_ffma);

View file

@ -106,11 +106,11 @@ nir_lower_interpolation_instr(nir_builder *b, nir_instr *instr, void *cb_data)
nir_def *bary = intr->src[0].ssa;
nir_def *val;
val = nir_ffma(b, nir_channel(b, bary, 1),
nir_channel(b, iid, 1),
nir_channel(b, iid, 0));
val = nir_ffma(b, nir_channel(b, bary, 0),
nir_channel(b, iid, 2),
val = nir_ffma_old(b, nir_channel(b, bary, 1),
nir_channel(b, iid, 1),
nir_channel(b, iid, 0));
val = nir_ffma_old(b, nir_channel(b, bary, 0),
nir_channel(b, iid, 2),
val);
comps[i] = val;

View file

@ -75,7 +75,7 @@ lower_load_pointcoord(lower_pntc_ytransform_state *state,
nir_def *pntc = &intr->def;
nir_def *transform = get_pntc_transform(state);
nir_def *flipped_y = nir_ffma(b, nir_channel(b, pntc, y_swizzle),
nir_def *flipped_y = nir_ffma_old(b, nir_channel(b, pntc, y_swizzle),
/* Flip the sign of y if we're flipping. */
nir_channel(b, transform, 0),
/* The offset is 1 if we're flipping, 0 otherwise. */

View file

@ -409,7 +409,7 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
}
nir_def *result =
nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
nir_ffma_old(b, y, m0, nir_ffma_old(b, u, m1, nir_ffma_old(b, v, m2, offset)));
nir_def_rewrite_uses(&tex->def, result);
}

View file

@ -106,7 +106,7 @@ emit_wpos_adjustment(lower_wpos_ytransform_state *state,
*/
unsigned base = invert ? 0 : 2;
/* wpos.y = wpos.y * trans.x/z + trans.y/w */
wpos[1] = nir_ffma(b, wpos[1], nir_channel(b, wpostrans, base),
wpos[1] = nir_ffma_old(b, wpos[1], nir_channel(b, wpostrans, base),
nir_channel(b, wpostrans, base + 1));
}
@ -258,7 +258,7 @@ lower_load_sample_pos(lower_wpos_ytransform_state *state,
nir_def *scale = nir_channel(b, wpostrans, 0);
nir_def *neg_scale = nir_channel(b, wpostrans, 2);
/* Either y or 1-y for scale equal to 1 or -1 respectively. */
nir_def *flipped_y = nir_ffma(b, nir_channel(b, pos, 1), scale,
nir_def *flipped_y = nir_ffma_old(b, nir_channel(b, pos, 1), scale,
nir_fmax(b, neg_scale, nir_imm_float(b, 0.0)));
nir_def *flipped_pos = nir_vector_insert_imm(b, pos, flipped_y, 1);

View file

@ -1132,7 +1132,7 @@ def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr,
[src1_size, src2_size, src3_size],
[tuint, tuint, tuint], False, "", const_expr, description)
triop("ffma", tfloat, _2src_commutative, """
triop("ffma_old", tfloat, _2src_commutative, """
if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
if (bit_size == 64)
dst = _mesa_double_fma_rtz(src0, src1, src2);
@ -1148,7 +1148,7 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
}
""")
triop("ffmaz", tfloat32, _2src_commutative, """
triop("ffmaz_old", tfloat32, _2src_commutative, """
if (src0 == 0.0 || src1 == 0.0)
dst = 0.0 + src2;
else if (nir_is_rounding_mode_rtz(execution_mode, 32))
@ -1158,8 +1158,8 @@ else
""", description = """
Floating-point multiply-add with modified zero handling.
Unlike :nir:alu-op:`ffma`, anything (even infinity or NaN) multiplied by +/-0.0 is
+0.0. ``ffmaz(0.0, inf, src2)`` and ``ffmaz(0.0, nan, src2)`` must be
Unlike :nir:alu-op:`ffma_old`, anything (even infinity or NaN) multiplied by +/-0.0 is
+0.0. ``ffmaz_old(0.0, inf, src2)`` and ``ffmaz_old(0.0, nan, src2)`` must be
``+0.0 + src2``.
""")

View file

@ -101,7 +101,7 @@ denorm_ftz_64 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode
def lowered_sincos(c):
x = ('fsub', ('fmul', 2.0, ('ffract', ('fadd', ('fmul', 0.5 / pi, a), c))), 1.0)
x = ('fmul', ('fsub', x, ('fmul', x, ('fabs', x))), 4.0)
return ('ffma', ('ffma', x, ('fabs', x), ('fneg', x)), 0.225, x)
return ('ffma_old', ('ffma_old', x, ('fabs', x), ('fneg', x)), 0.225, x)
def intBitsToFloat(i):
return struct.unpack('!f', struct.pack('!I', i))[0]
@ -241,14 +241,14 @@ optimizations += [
(('usadd_4x8_vc4', a, ~0), ~0),
(('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
(('~fadd', ('fmulz', a, b), ('fmulz', a, c)), ('fmulz', a, ('fadd', b, c))),
(('~ffma', a, b, ('ffma(is_used_once)', a, c, d)), ('ffma', a, ('fadd', b, c), d)),
(('~ffma', a, b, ('fmul(is_used_once)', a, c)), ('fmul', a, ('fadd', b, c))),
(('~fadd', ('fmul(is_used_once)', a, b), ('ffma(is_used_once)', a, c, d)), ('ffma', a, ('fadd', b, c), d)),
(('~ffma', a, ('fmul(is_used_once)', b, c), ('fmul(is_used_once)', b, d)), ('fmul', b, ('ffma', a, c, d))),
(('~ffmaz', a, b, ('ffmaz(is_used_once)', a, c, d)), ('ffmaz', a, ('fadd', b, c), d)),
(('~ffmaz', a, b, ('fmulz(is_used_once)', a, c)), ('fmulz', a, ('fadd', b, c))),
(('~fadd', ('fmulz(is_used_once)', a, b), ('ffmaz(is_used_once)', a, c, d)), ('ffmaz', a, ('fadd', b, c), d)),
(('~ffmaz', a, ('fmulz(is_used_once)', b, c), ('fmulz(is_used_once)', b, d)), ('fmulz', b, ('ffmaz', a, c, d))),
(('~ffma_old', a, b, ('ffma_old(is_used_once)', a, c, d)), ('ffma_old', a, ('fadd', b, c), d)),
(('~ffma_old', a, b, ('fmul(is_used_once)', a, c)), ('fmul', a, ('fadd', b, c))),
(('~fadd', ('fmul(is_used_once)', a, b), ('ffma_old(is_used_once)', a, c, d)), ('ffma_old', a, ('fadd', b, c), d)),
(('~ffma_old', a, ('fmul(is_used_once)', b, c), ('fmul(is_used_once)', b, d)), ('fmul', b, ('ffma_old', a, c, d))),
(('~ffmaz_old', a, b, ('ffmaz_old(is_used_once)', a, c, d)), ('ffmaz_old', a, ('fadd', b, c), d)),
(('~ffmaz_old', a, b, ('fmulz(is_used_once)', a, c)), ('fmulz', a, ('fadd', b, c))),
(('~fadd', ('fmulz(is_used_once)', a, b), ('ffmaz_old(is_used_once)', a, c, d)), ('ffmaz_old', a, ('fadd', b, c), d)),
(('~ffmaz_old', a, ('fmulz(is_used_once)', b, c), ('fmulz(is_used_once)', b, d)), ('fmulz', b, ('ffmaz_old', a, c, d))),
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
(('iadd', ('ishl', b, a), ('ishl', c, a)), ('ishl', ('iadd', b, c), a)),
(('iand', ('iand', a, b), ('iand(is_used_once)', a, c)), ('iand', ('iand', a, b), c)),
@ -285,9 +285,9 @@ optimizations += [
(('fmulz(nsz)', a, 'b(is_finite_not_zero)'), ('fmul', a, b)),
(('fmulz(nsz)', 'a(is_finite)', 'b(is_finite)'), ('fmul', a, b)),
(('fmulz', a, a), ('fmul', a, a)),
(('ffmaz(nsz)', a, 'b(is_finite_not_zero)', c), ('ffma', a, b, c)),
(('ffmaz', 'a(is_finite)', 'b(is_finite)', c), ('ffma', a, b, c)),
(('ffmaz', a, a, b), ('ffma', a, a, b)),
(('ffmaz_old(nsz)', a, 'b(is_finite_not_zero)', c), ('ffma_old', a, b, c)),
(('ffmaz_old', 'a(is_finite)', 'b(is_finite)', c), ('ffma_old', a, b, c)),
(('ffmaz_old', a, a, b), ('ffma_old', a, a, b)),
(('imul', a, 0), 0),
(('imul24_relaxed', a, 0), 0),
(('umul24_relaxed', a, 0), 0),
@ -306,20 +306,20 @@ optimizations += [
# If a != a: fsign(a)*a*a => 0*NaN*NaN => abs(NaN)*NaN
(('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)),
(('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)),
(('ffma(nsz,nnan)', 0.0, a, b), ('fcanonicalize', b)),
(('ffma(nsz,nnan)', -0.0, a, b), ('fcanonicalize', b)),
(('ffmaz', 0.0, a, b), ('fadd', 0.0, b)),
(('ffmaz', -0.0, a, b), ('fadd', 0.0, b)),
(('ffma(nsz)', a, b, 0.0), ('fmul', a, b)),
(('ffmaz(nsz)', a, b, 0.0), ('fmulz', a, b)),
(('ffma', a, b, -0.0), ('fmul', a, b)),
(('ffmaz', a, b, -0.0), ('fmulz', a, b)),
(('ffma', 1.0, a, b), ('fadd', a, b)),
(('ffmaz(nsz)', 1.0, a, b), ('fadd', a, b)),
(('ffma', -1.0, a, b), ('fadd', ('fneg', a), b)),
(('ffmaz(nsz)', -1.0, a, b), ('fadd', ('fneg', a), b)),
(('~ffma', '#a', '#b', c), ('fadd', ('fmul', a, b), c)),
(('~ffmaz', '#a', '#b', c), ('fadd', ('fmulz', a, b), c)),
(('ffma_old(nsz,nnan)', 0.0, a, b), ('fcanonicalize', b)),
(('ffma_old(nsz,nnan)', -0.0, a, b), ('fcanonicalize', b)),
(('ffmaz_old', 0.0, a, b), ('fadd', 0.0, b)),
(('ffmaz_old', -0.0, a, b), ('fadd', 0.0, b)),
(('ffma_old(nsz)', a, b, 0.0), ('fmul', a, b)),
(('ffmaz_old(nsz)', a, b, 0.0), ('fmulz', a, b)),
(('ffma_old', a, b, -0.0), ('fmul', a, b)),
(('ffmaz_old', a, b, -0.0), ('fmulz', a, b)),
(('ffma_old', 1.0, a, b), ('fadd', a, b)),
(('ffmaz_old(nsz)', 1.0, a, b), ('fadd', a, b)),
(('ffma_old', -1.0, a, b), ('fadd', ('fneg', a), b)),
(('ffmaz_old(nsz)', -1.0, a, b), ('fadd', ('fneg', a), b)),
(('~ffma_old', '#a', '#b', c), ('fadd', ('fmul', a, b), c)),
(('~ffmaz_old', '#a', '#b', c), ('fadd', ('fmulz', a, b), c)),
(('flrp(nnan,nsz)', a, b, 0.0), ('fcanonicalize', a)),
(('flrp(nnan,nsz)', a, b, -0.0), ('fcanonicalize', a)),
(('flrp(nnan,nsz)', a, b, 1.0), ('fcanonicalize', b)),
@ -397,14 +397,14 @@ optimizations += [
('fmulz', 'ma', b), has_fmulz), {'ma' : a}),
# ffma(b==0.0 ? 0.0 : a, a==0.0 ? 0.0 : b, c) -> ffmaz(a, b, c)
*add_fabs_fneg((('ffma@32(nsz)', ('bcsel', ('feq', b, 0.0), 0.0, 'ma'), ('bcsel', ('feq', a, 0.0), 0.0, 'mb'), c),
('ffmaz', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}),
*add_fabs_fneg((('ffma@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c),
('ffmaz', 'ma', b, c), has_fmulz), {'ma' : a}),
*add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c),
('ffmaz', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}),
*add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c),
('ffmaz', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}),
*add_fabs_fneg((('ffma_old@32(nsz)', ('bcsel', ('feq', b, 0.0), 0.0, 'ma'), ('bcsel', ('feq', a, 0.0), 0.0, 'mb'), c),
('ffmaz_old', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}),
*add_fabs_fneg((('ffma_old@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c),
('ffmaz_old', 'ma', b, c), has_fmulz), {'ma' : a}),
*add_fabs_fneg((('ffma_old@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c),
('ffmaz_old', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}),
*add_fabs_fneg((('ffma_old@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c),
('ffmaz_old', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}),
# b == 0.0 ? 1.0 : fexp2(fmul(a, b)) -> fexp2(fmulz(a, b))
*add_fabs_fneg((('bcsel(nsz,nnan,ninf)', ('feq', b, 0.0), 1.0, ('fexp2', ('fmul@32', a, 'mb'))),
@ -500,11 +500,11 @@ optimizations.extend([
(('~fadd', ('fmul', a, ('b2f', ('inot', 'c@1'))), ('fmul', b, ('b2f', c))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~fadd', a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma', a, ('b2f', ('inot', 'c@1')), ('fmul', b, ('b2f', 'c@1'))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma', b, ('b2f', 'c@1'), ('ffma', ('fneg', a), ('b2f', 'c@1'), a)), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma_old', a, ('b2f', ('inot', 'c@1')), ('fmul', b, ('b2f', 'c@1'))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma_old', b, ('b2f', 'c@1'), ('ffma_old', ('fneg', a), ('b2f', 'c@1'), a)), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)), a), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma', ('b2f', 'c@1'), ('ffma', ('fneg', a), b, d), ('fmul', a, b)), ('bcsel', c, ('fcanonicalize', d), ('fmul', a, b))),
(('~ffma_old', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)), a), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma_old', ('b2f', 'c@1'), ('ffma_old', ('fneg', a), b, d), ('fmul', a, b)), ('bcsel', c, ('fcanonicalize', d), ('fmul', a, b))),
(('~flrp', ('fmul(is_used_once)', a, b), ('fmul(is_used_once)', a, c), d), ('fmul', ('flrp', b, c, d), a)),
@ -540,15 +540,15 @@ optimizations.extend([
(('fadd@32', a, ('fneg(is_used_once)', ('ffloor(is_used_once)', a))), ('ffract', a), '!options->lower_ffract'),
(('fadd@64', a, ('fneg(is_used_once)', ('ffloor(is_used_once)', a))), ('ffract', a), '!options->lower_ffract && !(options->lower_doubles_options & nir_lower_dfract)'),
(('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
(('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'),
(('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'),
(('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'),
(('ffmaz', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->lower_ffma32'),
# Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late).
(('ffma@16(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'),
(('ffma@32(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'),
(('ffma@64(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'),
(('ffmaz(contract)', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->fuse_ffma32'),
(('ffma_old@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'),
(('ffma_old@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'),
(('ffma_old@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'),
(('ffmaz_old', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->lower_ffma32'),
# Always lower inexact ffma_old, because it will be fused back by late optimizations (nir_opt_algebraic_late).
(('ffma_old@16(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'),
(('ffma_old@32(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'),
(('ffma_old@64(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'),
(('ffmaz_old(contract)', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->fuse_ffma32'),
(('fmul', ('fadd', ('bcsel', a, ('fmul', b, c), 0), '#d'), '#e'),
('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', ('fadd', d, 0.0), e))),
@ -1613,7 +1613,7 @@ for compare in [('fneu', a, 0.0), ('inot', ('feq', a, 0.0))]:
optimizations.extend([
(('fmul', search_b2f, search_mod), replace_mod_mul),
(('ffma', search_b2f, search_mod, b), ('fadd', replace_mod, b)),
(('ffma_old', search_b2f, search_mod, b), ('fadd', replace_mod, b)),
])
optimizations.extend([
@ -1641,7 +1641,7 @@ optimizations.extend([
(('iand', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('iand', a, b))),
(('ior', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('ior', a, b))),
(('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), ('b2f', ('iand', a, b))),
(('ffma', ('b2f', 'a@1'), ('b2f', 'b@1'), c), ('fadd', ('b2f', ('iand', a, b)), c)),
(('ffma_old', ('b2f', 'a@1'), ('b2f', 'b@1'), c), ('fadd', ('b2f', ('iand', a, b)), c)),
(('fadd', 1.0, ('fneg', ('b2f', a))), ('b2f', ('inot', a))),
(('fadd(nsz)', -1.0, ('b2f', a)), ('fneg', ('b2f', ('inot', a)))),
(('fsat', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('b2f', ('ior', a, b))),
@ -2338,8 +2338,8 @@ optimizations.extend([
# Propagate negation up multiplication chains
(('fmul(is_used_by_non_fsat)', ('fneg', a), b), ('fneg', ('fmul', a, b))),
(('fmulz(is_used_by_non_fsat,nsz)', ('fneg', a), b), ('fneg', ('fmulz', a, b))),
(('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
(('ffmaz', ('fneg', a), ('fneg', b), c), ('ffmaz', a, b, c)),
(('ffma_old', ('fneg', a), ('fneg', b), c), ('ffma_old', a, b, c)),
(('ffmaz_old', ('fneg', a), ('fneg', b), c), ('ffmaz_old', a, b, c)),
(('imul', ('ineg', a), b), ('ineg', ('imul', a, b))),
# Propagate constants up multiplication chains
@ -2347,14 +2347,14 @@ optimizations.extend([
(('~fmulz', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmulz', ('fmulz', a, c), b)),
(('~fmul', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)'), ('fmulz', ('fmul', a, c), b)),
(('imul', ('imul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('imul', ('imul', a, c), b)),
(('~ffma', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffma', ('fmul', a, c), b, d)),
(('~ffmaz', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffmaz', ('fmulz', a, c), b, d)),
(('~ffma', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)', d), ('ffmaz', ('fmul', a, c), b, d)),
(('~ffma_old', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffma_old', ('fmul', a, c), b, d)),
(('~ffmaz_old', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffmaz_old', ('fmulz', a, c), b, d)),
(('~ffma_old', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)', d), ('ffmaz_old', ('fmul', a, c), b, d)),
# Prefer moving out a multiplication for more MAD/FMA-friendly code
(('~fadd', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_fmul)'), '#c'), ('fadd', ('fadd', a, c), b)),
(('~fadd', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd', ('fadd', a, c), b)),
(('~fadd', ('ffma(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffma', a, b, d), c)),
(('~fadd', ('ffmaz(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffmaz', a, b, d), c)),
(('~fadd', ('ffma_old(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffma_old', a, b, d), c)),
(('~fadd', ('ffmaz_old(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffmaz_old', a, b, d), c)),
(('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('iadd', ('iadd', a, c), b)),
# Reassociate constants in add/mul chains so they can be folded together.
@ -2363,16 +2363,16 @@ optimizations.extend([
(('~fmul', '#a', ('fmul', b, '#c')), ('fmul', ('fmul', a, c), b)),
(('~fmulz', '#a', ('fmulz', b, '#c')), ('fmulz', ('fmulz', a, c), b)),
(('~fmul', '#a(is_finite_not_zero)', ('fmulz', b, '#c')), ('fmulz', ('fmul', a, c), b)),
(('~ffma', '#a', ('fmul', b, '#c'), d), ('ffma', ('fmul', a, c), b, d)),
(('~ffmaz', '#a', ('fmulz', b, '#c'), d), ('ffmaz', ('fmulz', a, c), b, d)),
(('~ffmaz', '#a(is_finite_not_zero)', ('fmulz', b, '#c'), d), ('ffmaz', ('fmul', a, c), b, d)),
(('~ffma_old', '#a', ('fmul', b, '#c'), d), ('ffma_old', ('fmul', a, c), b, d)),
(('~ffmaz_old', '#a', ('fmulz', b, '#c'), d), ('ffmaz_old', ('fmulz', a, c), b, d)),
(('~ffmaz_old', '#a(is_finite_not_zero)', ('fmulz', b, '#c'), d), ('ffmaz_old', ('fmul', a, c), b, d)),
(('imul', '#a', ('imul', b, '#c')), ('imul', ('imul', a, c), b)),
(('~fadd', '#a', ('fadd', b, '#c')), ('fadd', ('fadd', a, c), b)),
(('~fadd', '#a', ('fneg', ('fadd', b, '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
(('~fadd', '#a', ('ffma', b, c, '#d')), ('ffma', b, c, ('fadd', a, d))),
(('~fadd', '#a', ('fneg', ('ffma', b, c, '#d'))), ('ffma', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
(('~fadd', '#a', ('ffmaz', b, c, '#d')), ('ffmaz', b, c, ('fadd', a, d))),
(('~fadd', '#a', ('fneg', ('ffmaz', b, c, '#d'))), ('ffmaz', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
(('~fadd', '#a', ('ffma_old', b, c, '#d')), ('ffma_old', b, c, ('fadd', a, d))),
(('~fadd', '#a', ('fneg', ('ffma_old', b, c, '#d'))), ('ffma_old', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
(('~fadd', '#a', ('ffmaz_old', b, c, '#d')), ('ffmaz_old', b, c, ('fadd', a, d))),
(('~fadd', '#a', ('fneg', ('ffmaz_old', b, c, '#d'))), ('ffmaz_old', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
(('iadd', '#a', ('iadd', b, '#c')), ('iadd', ('iadd', a, c), b)),
(('iand', '#a', ('iand', b, '#c')), ('iand', ('iand', a, c), b)),
(('ior', '#a', ('ior', b, '#c')), ('ior', ('ior', a, c), b)),
@ -3485,7 +3485,7 @@ for op in ['fadd', 'fdiv', 'fmod', 'fmul', 'fpow', 'frem', 'fsub']:
optimizations += [((op, a, '#b(is_nan)'), NAN, 'true', TestStatus.XFAIL if op == 'fpow' else TestStatus.PASS)] # some opcodes are not commutative. XFAIL is fpow(1.0, NaN) producing NaN instead of 1.0.
# NaN propagation: Trinary opcodes. If any operand is NaN, replace it with NaN.
for op in ['ffma', 'flrp']:
for op in ['ffma_old', 'flrp']:
optimizations += [((op, '#a(is_nan)', b, c), NAN)]
optimizations += [((op, a, '#b(is_nan)', c), NAN)] # some opcodes are not commutative
optimizations += [((op, a, b, '#c(is_nan)'), NAN)]
@ -3562,7 +3562,7 @@ for i in range(2, 4 + 1):
]
# This section contains "late" optimizations that should be run before
# creating ffmas and calling regular optimizations for the final time.
# creating ffma and calling regular optimizations for the final time.
# Optimizations should go here if they help code generation and conflict
# with the regular optimizations.
before_ffma_optimizations = [
@ -3729,23 +3729,23 @@ for sz, mulz in itertools.product([16, 32, 64], [False, True]):
# (or fneg/fabs which are assumed to be propagated away), as a heuristic to
# avoid fusing in cases where it's harmful.
fmul = ('fmulz' if mulz else 'fmul') + '(is_only_used_by_fadd)'
ffma = 'ffmaz' if mulz else 'ffma'
ffma_old = 'ffmaz_old' if mulz else 'ffma_old'
fadd = 'fadd@{}(contract)'.format(sz)
option = 'options->fuse_ffma{}'.format(sz)
option_with_abs = 'options->fuse_ffma{} && !options->avoid_ternary_with_fabs'.format(sz)
option_old = 'options->fuse_ffma{}'.format(sz)
option_old_with_abs = 'options->fuse_ffma{} && !options->avoid_ternary_with_fabs'.format(sz)
late_optimizations.extend([
((fadd, (fmul, a, b), c), (ffma, a, b, c), option),
((fadd, (fmul, a, b), c), (ffma_old, a, b, c), option_old),
((fadd, ('fneg(is_only_used_by_fadd)', (fmul, a, b)), c),
(ffma, ('fneg', a), b, c), option),
(ffma_old, ('fneg', a), b, c), option_old),
((fadd, ('fabs(is_only_used_by_fadd)', (fmul, a, b)), c),
(ffma, ('fabs', a), ('fabs', b), c), option_with_abs),
(ffma_old, ('fabs', a), ('fabs', b), c), option_old_with_abs),
((fadd, ('fneg(is_only_used_by_fadd)', ('fabs', (fmul, a, b))), c),
(ffma, ('fneg', ('fabs', a)), ('fabs', b), c), option_with_abs),
(ffma_old, ('fneg', ('fabs', a)), ('fabs', b), c), option_old_with_abs),
])
late_optimizations.extend([
@ -3843,10 +3843,10 @@ late_optimizations.extend([
# A similar operation could apply to any ffma(#a, b, #(-a/2)), but this
# particular operation is common for expanding values stored in a texture
# from [0,1] to [-1,1].
(('~ffma@32', a, 2.0, -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'),
(('~ffma@32', a, -2.0, -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'),
(('~ffma@32', a, -2.0, 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'),
(('~ffma@32', a, 2.0, 1.0), ('flrp', 1.0, -1.0, ('fneg', a)), '!options->lower_flrp32'),
(('~ffma_old@32', a, 2.0, -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'),
(('~ffma_old@32', a, -2.0, -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'),
(('~ffma_old@32', a, -2.0, 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'),
(('~ffma_old@32', a, 2.0, 1.0), ('flrp', 1.0, -1.0, ('fneg', a)), '!options->lower_flrp32'),
(('~fadd@32', ('fmul(is_used_once)', 2.0, a), -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'),
(('~fadd@32', ('fmul(is_used_once)', -2.0, a), -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'),
(('~fadd@32', ('fmul(is_used_once)', -2.0, a), 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'),
@ -3870,10 +3870,10 @@ late_optimizations.extend([
# Option 5: a * (2 - a)
#
# There are a lot of other possible combinations.
(('~ffma@32', ('fadd', b, ('fneg', a)), a, a), ('flrp', a, b, a), '!options->lower_flrp32'),
(('~ffma@32', a, 2.0, ('fneg', ('fmul', a, a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
(('~ffma@32', a, 2.0, ('fmul', ('fneg', a), a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
(('~ffma@32', a, ('fneg', a), ('fmul', 2.0, a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
(('~ffma_old@32', ('fadd', b, ('fneg', a)), a, a), ('flrp', a, b, a), '!options->lower_flrp32'),
(('~ffma_old@32', a, 2.0, ('fneg', ('fmul', a, a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
(('~ffma_old@32', a, 2.0, ('fmul', ('fneg', a), a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
(('~ffma_old@32', a, ('fneg', a), ('fmul', 2.0, a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
(('~fmul@32', a, ('fadd', 2.0, ('fneg', a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
# we do these late so that we don't get in the way of creating ffmas
@ -3901,21 +3901,21 @@ late_optimizations.extend([
# optimization in these stages. See bugzilla #111490. In tessellation
# stages applications seem to use 'precise' when necessary, so allow the
# optimization in those stages.
(('~fadd', ('ffma(is_used_once)', a, b, ('ffma(is_used_once)', c, d, ('ffma', e, 'f', ('fmul(is_used_once)', 'g(is_not_const_and_not_fsign)', 'h(is_not_const_and_not_fsign)')))), 'i(is_not_const)'),
('ffma', a, b, ('ffma', c, d, ('ffma', e, 'f', ('ffma', 'g', 'h', 'i')))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('ffma(is_used_once)', a, b, ('ffma', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
('ffma', a, b, ('ffma', c, d, ('ffma', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('ffma(is_used_once)', a, b, ('fmul(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'),
('ffma', a, b, ('ffma', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('fneg', ('ffma(is_used_once)', a, b, ('ffma', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'),
('ffma', ('fneg', a), b, ('ffma', ('fneg', c), d, ('ffma', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('ffma_old(is_used_once)', a, b, ('ffma_old(is_used_once)', c, d, ('ffma_old', e, 'f', ('fmul(is_used_once)', 'g(is_not_const_and_not_fsign)', 'h(is_not_const_and_not_fsign)')))), 'i(is_not_const)'),
('ffma_old', a, b, ('ffma_old', c, d, ('ffma_old', e, 'f', ('ffma_old', 'g', 'h', 'i')))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('ffma_old(is_used_once)', a, b, ('ffma_old', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
('ffma_old', a, b, ('ffma_old', c, d, ('ffma_old', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('ffma_old(is_used_once)', a, b, ('fmul(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'),
('ffma_old', a, b, ('ffma_old', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('fneg', ('ffma_old(is_used_once)', a, b, ('ffma_old', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'),
('ffma_old', ('fneg', a), b, ('ffma_old', ('fneg', c), d, ('ffma_old', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('ffmaz(is_used_once)', a, b, ('ffmaz', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
('ffmaz', a, b, ('ffmaz', c, d, ('ffmaz', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('ffmaz(is_used_once)', a, b, ('fmulz(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'),
('ffmaz', a, b, ('ffmaz', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('fneg', ('ffmaz(is_used_once)', a, b, ('ffmaz', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'),
('ffmaz', ('fneg', a), b, ('ffmaz', ('fneg', c), d, ('ffmaz', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('ffmaz_old(is_used_once)', a, b, ('ffmaz_old', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
('ffmaz_old', a, b, ('ffmaz_old', c, d, ('ffmaz_old', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('ffmaz_old(is_used_once)', a, b, ('fmulz(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'),
('ffmaz_old', a, b, ('ffmaz_old', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('~fadd', ('fneg', ('ffmaz_old(is_used_once)', a, b, ('ffmaz_old', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'),
('ffmaz_old', ('fneg', a), b, ('ffmaz_old', ('fneg', c), d, ('ffmaz_old', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
(('fmul(contract)', a, ('ldexp(is_used_once)', 1.0, b)), ('ldexp', a, b), 'options->has_ldexp'),
(('frcp(contract,ninf)', ('ldexp', 1.0, b)), ('ldexp', 1.0, ('ineg', b)), 'options->has_ldexp'),
@ -4042,7 +4042,7 @@ for op in ['fadd']:
(('bcsel', a, (op, b, c), (op + '(is_used_once)', b, d)), (op, b, ('bcsel', a, c, d))),
]
for op in ['ffma', 'ffmaz']:
for op in ['ffma_old', 'ffmaz_old']:
late_optimizations += [
(('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, c, e)), (op, b, c, ('bcsel', a, d, e))),
(('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, c, e)), (op, b, c, ('bcsel', a, d, e))),
@ -4055,8 +4055,8 @@ for op in ['ffma', 'ffmaz']:
late_optimizations += [
(('fmulz@32', a, b),
('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), 0.0, ('fmul', a, b)), 'options->lower_fmulz_with_abs_min'),
(('ffmaz@32', a, b, c),
('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), c, ('ffma@32', a, b, c)), 'options->lower_fmulz_with_abs_min')
(('ffmaz_old@32', a, b, c),
('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), c, ('ffma_old@32', a, b, c)), 'options->lower_fmulz_with_abs_min')
]
# mediump: If an opcode is surrounded by conversions, remove the conversions.
@ -4076,7 +4076,7 @@ for op in ['fadd', 'fdiv', 'fmax', 'fmin', 'fmod', 'fmul', 'fpow', 'frem']:
late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b))), (op, a, b), 'true', TestStatus.UNSUPPORTED)]
# Ternary opcodes
for op in ['ffma', 'flrp']:
for op in ['ffma_old', 'flrp']:
late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b), ('f2fmp', c))), (op, a, b, c), 'true', TestStatus.UNSUPPORTED)]
# Comparison opcodes
@ -4131,7 +4131,7 @@ late_optimizations += [
distribute_src_mods = [
# Try to remove some spurious negations rather than pushing them down.
(('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)),
(('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
(('ffma_old', ('fneg', a), ('fneg', b), c), ('ffma_old', a, b, c)),
(('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b)),
(('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b)),
(('fdot4_replicated', ('fneg', a), ('fneg', b)), ('fdot4_replicated', a, b)),
@ -4142,7 +4142,7 @@ distribute_src_mods = [
(('fabs', ('fmul(is_used_once)', a, b)), ('fmul', ('fabs', a), ('fabs', b))),
(('fabs', ('fmul_rtz(is_used_once)', a, b)), ('fmul_rtz', ('fabs', a), ('fabs', b))),
(('fneg', ('ffma(is_used_once,nsz)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))),
(('fneg', ('ffma_old(is_used_once,nsz)', a, b, c)), ('ffma_old', ('fneg', a), b, ('fneg', c))),
(('fneg', ('flrp(is_used_once)', a, b, c)), ('flrp', ('fneg', a), ('fneg', b), c), 'true', TestStatus.XFAIL), # XFAIL is -flrp(0, -1, 0) is 0.0 instead of -0.0
(('fneg', ('fadd(is_used_once,nsz)', a, b)), ('fadd', ('fneg', a), ('fneg', b))),

View file

@ -175,10 +175,10 @@ opt_alu_fp_math_ctrl(nir_alu_instr *alu, struct opt_fp_ctrl_state *state)
break;
}
case nir_op_ffmaz:
case nir_op_ffmaz_old:
src_mark_preserve_sz(&alu->src[2].src, NULL);
break;
case nir_op_ffma:
case nir_op_ffma_old:
if ((nir_analyze_fp_class(&state->fp_class_state, alu->src[2].src.ssa) & FP_CLASS_NEG_ZERO) &&
!nir_alu_srcs_equal(alu, alu, 0, 1)) {
src_mark_preserve_sz(&alu->src[0].src, NULL);

View file

@ -222,7 +222,7 @@ visit_undef_use(nir_src *src, struct visit_info *info)
info->replace_undef_with_constant = true;
if (nir_op_infos[alu->op].input_types[i] & nir_type_float &&
alu->op != nir_op_fmulz &&
(alu->op != nir_op_ffmaz || i == 2) &&
(alu->op != nir_op_ffmaz_old || i == 2) &&
alu->op != nir_op_pack_half_2x16_rtz_split)
info->prefer_nan = true;
}

View file

@ -3253,7 +3253,7 @@ find_tes_triangle_interp_1fmul_2ffma(struct linkage_info *linkage, unsigned i)
/* Reject exact ops because we are going to do an inexact transformation
* with it.
*/
if (!alu || (alu->op != nir_op_fmul && alu->op != nir_op_ffma) ||
if (!alu || (alu->op != nir_op_fmul && alu->op != nir_op_ffma_old) ||
nir_alu_instr_is_exact(alu) ||
!gather_fmul_tess_coord(iter->instr, alu, vertex_index,
&tess_coord_swizzle, &tess_coord_used,
@ -3263,7 +3263,7 @@ find_tes_triangle_interp_1fmul_2ffma(struct linkage_info *linkage, unsigned i)
/* The multiplication must only be used by ffma. */
if (alu->op == nir_op_fmul) {
nir_alu_instr *ffma = get_single_use_as_alu(&alu->def);
if (!ffma || ffma->op != nir_op_ffma)
if (!ffma || ffma->op != nir_op_ffma_old)
return false;
if (num_fmuls == 1)
@ -3388,8 +3388,8 @@ can_move_alu_across_interp(struct linkage_info *linkage, nir_alu_instr *alu)
*/
case nir_op_fmul:
case nir_op_fmulz:
case nir_op_ffma:
case nir_op_ffmaz:
case nir_op_ffma_old:
case nir_op_ffmaz_old:
return GET_SRC_INTERP(alu, 0) == FLAG_INTERP_CONVERGENT ||
GET_SRC_INTERP(alu, 1) == FLAG_INTERP_CONVERGENT;
@ -3915,7 +3915,7 @@ try_move_postdominator(struct linkage_info *linkage,
defs[i] = nir_fmul(b, new_tes_loads[i],
nir_channel(b, tesscoord, remap[i]));
} else {
defs[i] = nir_ffma(b, new_tes_loads[i],
defs[i] = nir_ffma_old(b, new_tes_loads[i],
nir_channel(b, tesscoord, remap[i]),
defs[i - 1]);
}

View file

@ -836,8 +836,8 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
push_fp_query(state, alu->src[0].src.ssa);
push_fp_query(state, alu->src[1].src.ssa);
return;
case nir_op_ffma:
case nir_op_ffmaz:
case nir_op_ffma_old:
case nir_op_ffmaz_old:
case nir_op_flrp:
push_fp_query(state, alu->src[0].src.ssa);
push_fp_query(state, alu->src[1].src.ssa);
@ -1320,9 +1320,9 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
break;
}
case nir_op_ffma:
case nir_op_ffmaz: {
bool mulz = alu->op == nir_op_ffmaz;
case nir_op_ffma_old:
case nir_op_ffmaz_old: {
bool mulz = alu->op == nir_op_ffmaz_old;
bool src_eq = nir_alu_srcs_equal(alu, alu, 0, 1);
bool src_neg_eq = !nir_src_is_const(alu->src[0].src) && nir_alu_srcs_negative_equal(alu, alu, 0, 1);
fp_class_mask r_mul = fmul_fp_class(src_res[0], src_res[1], mulz, src_eq, src_neg_eq);

View file

@ -186,8 +186,8 @@ DEFINE_TEST(fmul, 2)
DEFINE_TEST(fmulz, 2)
DEFINE_TEST(fpow, 2)
DEFINE_TEST(fdot2, 2)
DEFINE_TEST(ffma, 3)
DEFINE_TEST(ffmaz, 3)
DEFINE_TEST(ffma_old, 3)
DEFINE_TEST(ffmaz_old, 3)
DEFINE_TEST(fabs, 1)
DEFINE_TEST(fneg, 1)
DEFINE_TEST(fexp2, 1)

View file

@ -158,7 +158,7 @@ protected:
nir_def *build_uniform_expr(nir_builder *b, unsigned bit_size, unsigned index)
{
return nir_fsqrt(b, nir_ffma(b, load_uniform(b, bit_size, index),
return nir_fsqrt(b, nir_ffma_old(b, load_uniform(b, bit_size, index),
nir_imm_floatN_t(b, 3.14, bit_size),
load_ubo(b, bit_size, index)));
}
@ -254,13 +254,13 @@ protected:
if (contains) {
return shader_contains_uniform(b, bit_size, index) &&
shader_contains_ubo(b, bit_size, index) &&
shader_contains_alu_op(b, nir_op_ffma, bit_size) &&
shader_contains_alu_op(b, nir_op_ffma_old, bit_size) &&
shader_contains_alu_op(b, nir_op_fsqrt, bit_size) &&
shader_contains_const_float(b, 3.14, bit_size);
} else {
return !shader_contains_uniform(b, bit_size, index) &&
!shader_contains_ubo(b, bit_size, index) &&
!shader_contains_alu_op(b, nir_op_ffma, bit_size) &&
!shader_contains_alu_op(b, nir_op_ffma_old, bit_size) &&
!shader_contains_alu_op(b, nir_op_fsqrt, bit_size) &&
!shader_contains_const_float(b, 3.14, bit_size);
}
@ -553,7 +553,7 @@ load_interpolated_input_tes(nir_builder *b, gl_varying_slot slot,
if (i == 0)
def[i] = nir_fmul(b, def[i], nir_channel(b, tesscoord, remap[i]));
else
def[i] = nir_ffma(b, def[i], nir_channel(b, tesscoord, remap[i]),
def[i] = nir_ffma_old(b, def[i], nir_channel(b, tesscoord, remap[i]),
def[i - 1]);
} else {
def[i] = nir_fmul(b, def[i], nir_channel(b, tesscoord, remap[i]));
@ -650,8 +650,8 @@ movable_across_interp(nir_builder *b, nir_op op, unsigned interp[3],
case nir_op_fmul:
case nir_op_fmulz:
case nir_op_ffma:
case nir_op_ffmaz:
case nir_op_ffma_old:
case nir_op_ffmaz_old:
return !divergent[0] || !divergent[1];
case nir_op_fdiv:

View file

@ -75,7 +75,7 @@ TEST_F(nir_opt_varyings_test_bicm_binary_alu, \
/* TES uses fadd and fmul for interpolation, so it's always present. */ \
if (MESA_SHADER_##consumer_stage != MESA_SHADER_TESS_EVAL || \
(nir_op_##alu != nir_op_fadd && nir_op_##alu != nir_op_fmul && \
nir_op_##alu != nir_op_ffma)) { \
nir_op_##alu != nir_op_ffma_old)) { \
ASSERT_TRUE(!shader_contains_alu_op(b2, nir_op_##alu, bitsize)); \
} \
} \

View file

@ -165,7 +165,7 @@ TEST_P(nir_serialize_all_test, alu_vec)
TEST_P(nir_serialize_all_test, alu_two_components_full_swizzle)
{
nir_def *undef = nir_undef(b, 2, 32);
nir_def *fma = nir_ffma(b, undef, undef, undef);
nir_def *fma = nir_ffma_old(b, undef, undef, undef);
nir_alu_instr *fma_alu = nir_def_as_alu(fma);
fma->num_components = GetParam();

View file

@ -83,7 +83,7 @@ matrix_multiply(struct vtn_builder *b,
nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1));
for (int j = src0_columns - 2; j >= 0; j--) {
dest->elems[i]->def =
nir_ffma(&b->nb, src0->elems[j]->def,
nir_ffma_old(&b->nb, src0->elems[j]->def,
nir_channel(&b->nb, src1->elems[i]->def, j),
dest->elems[i]->def);
}

View file

@ -169,7 +169,7 @@ vtn_nir_alu_op_for_spirv_glsl_opcode(struct vtn_builder *b,
case GLSLstd450UMax: return nir_op_umax;
case GLSLstd450SMax: return nir_op_imax;
case GLSLstd450FMix: return nir_op_flrp;
case GLSLstd450Fma: return nir_op_ffma;
case GLSLstd450Fma: return nir_op_ffma_old;
case GLSLstd450FindILsb: return nir_op_find_lsb;
case GLSLstd450FindSMsb: return nir_op_ifind_msb;
case GLSLstd450FindUMsb: return nir_op_ufind_msb;
@ -430,7 +430,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
nir_fmul(nb, eta, nir_a_minus_bc(nb, one, n_dot_i, n_dot_i)));
nir_def *result =
nir_a_minus_bc(nb, nir_fmul(nb, eta, I),
nir_ffma(nb, eta, n_dot_i, nir_fsqrt(nb, k)),
nir_ffma_old(nb, eta, n_dot_i, nir_fsqrt(nb, k)),
N);
/* XXX: bcsel, or if statement? */
dest->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);

View file

@ -655,7 +655,7 @@ handle_special(struct vtn_builder *b, uint32_t opcode,
if (lower)
res = nir_fmad(nb, srcs[0], srcs[1], srcs[2]);
else
res = nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
res = nir_ffma_old(nb, srcs[0], srcs[1], srcs[2]);
nb->fp_math_ctrl = save_math_ctrl;
return res;
@ -703,7 +703,7 @@ handle_special(struct vtn_builder *b, uint32_t opcode,
/* OpenCL FMA is not allowed to be split. */
const bool save_math_ctrl = nb->fp_math_ctrl;
nb->fp_math_ctrl |= nir_fp_exact;
nir_def *res = nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
nir_def *res = nir_ffma_old(nb, srcs[0], srcs[1], srcs[2]);
nb->fp_math_ctrl = save_math_ctrl;
return res;
}

View file

@ -726,7 +726,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
case nir_op_fsub:
dst = ir3_ADD_F_rpt(b, dst_sz, src[0], 0, src[1], IR3_REG_FNEG);
break;
case nir_op_ffma:
case nir_op_ffma_old:
/* The scalar ALU doesn't support mad, so expand to mul+add so that we
* don't unnecessarily fall back to non-earlypreamble. This is safe
* because at least on a6xx+ mad is unfused.

View file

@ -33,8 +33,8 @@ ir3_nir_lower_load_barycentric_at_offset_instr(nir_builder *b, nir_instr *instr,
* times the derivatives of ij in screen space.
*/
nir_def *new_ij = ij;
new_ij = nir_ffma(b, chan(off, 0), nir_ddx(b, ij), new_ij);
new_ij = nir_ffma(b, chan(off, 1), nir_ddy(b, ij), new_ij);
new_ij = nir_ffma_old(b, chan(off, 0), nir_ddx(b, ij), new_ij);
new_ij = nir_ffma_old(b, chan(off, 1), nir_ddy(b, ij), new_ij);
return new_ij;
} else {
@ -52,8 +52,8 @@ ir3_nir_lower_load_barycentric_at_offset_instr(nir_builder *b, nir_instr *instr,
/* Get the offset value from pixel center for ij, and also for w. */
nir_def *pos = sij;
pos = nir_ffma(b, chan(off, 0), nir_ddx(b, sij), pos);
pos = nir_ffma(b, chan(off, 1), nir_ddy(b, sij), pos);
pos = nir_ffma_old(b, chan(off, 0), nir_ddx(b, sij), pos);
pos = nir_ffma_old(b, chan(off, 1), nir_ddy(b, sij), pos);
/* convert back into screen space, dividing by the offset 1/w */
return nir_fmul(b, nir_trim_vector(b, pos, 2),

View file

@ -20,7 +20,7 @@ for sz in [16, 32]:
# (or fneg/fabs which are assumed to be propagated away), as a heuristic to
# avoid fusing in cases where it's harmful.
fmul = 'fmul(is_only_used_by_fadd)'
ffma = 'ffma'
ffma = 'ffma_old'
fadd = 'fadd@{}'.format(sz)

View file

@ -7,8 +7,8 @@ import argparse
import sys
trig_workarounds = [
(('fsin', 'x@32'), ('fsin', ('!ffma', 6.2831853, ('ffract', ('!ffma', 0.15915494, 'x', 0.5)), -3.14159265))),
(('fcos', 'x@32'), ('fcos', ('!ffma', 6.2831853, ('ffract', ('!ffma', 0.15915494, 'x', 0.5)), -3.14159265))),
(('fsin', 'x@32'), ('fsin', ('!ffma_old', 6.2831853, ('ffract', ('!ffma_old', 0.15915494, 'x', 0.5)), -3.14159265))),
(('fcos', 'x@32'), ('fcos', ('!ffma_old', 6.2831853, ('ffract', ('!ffma_old', 0.15915494, 'x', 0.5)), -3.14159265))),
]

View file

@ -149,7 +149,7 @@ tu_get_subsampled_coordinates(nir_builder *b,
nir_def *hdr_scale = nir_channels(b, hdr0, 0x3);
nir_def *hdr_offset = nir_channels(b, hdr0, 0xc);
nir_def *bin = nir_f2u16(b, nir_ffma(b, coords, hdr_scale, hdr_offset));
nir_def *bin = nir_f2u16(b, nir_ffma_old(b, coords, hdr_scale, hdr_offset));
nir_def *bin_idx = nir_iadd(b, nir_imul(b, nir_channel(b, bin, 1),
nir_u2u16(b, bin_stride)),
nir_channel(b, bin, 0));
@ -166,7 +166,7 @@ tu_get_subsampled_coordinates(nir_builder *b,
nir_def *bin_scale = nir_channels(b, bin_data, 0x3);
nir_def *bin_offset = nir_channels(b, bin_data, 0xc);
return nir_ffma(b, coords, bin_scale, bin_offset);
return nir_ffma_old(b, coords, bin_scale, bin_offset);
}
/* Calculate the y coordinate in subsampled space of a given number of tiles

View file

@ -3337,7 +3337,7 @@ do_alu_action(struct lp_build_nir_soa_context *bld,
case nir_op_ffloor:
result = lp_build_floor(float_bld, src[0]);
break;
case nir_op_ffma:
case nir_op_ffma_old:
result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
break;
case nir_op_ffract: {

View file

@ -1570,7 +1570,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
[nir_op_fmax] = { TGSI_OPCODE_MAX, TGSI_OPCODE_DMAX },
[nir_op_imax] = { TGSI_OPCODE_IMAX, TGSI_OPCODE_I64MAX },
[nir_op_umax] = { TGSI_OPCODE_UMAX, TGSI_OPCODE_U64MAX },
[nir_op_ffma] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD },
[nir_op_ffma_old] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD },
[nir_op_ldexp] = { TGSI_OPCODE_LDEXP, 0 },
};

View file

@ -1650,7 +1650,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_MAX] = nir_op_fmax,
[TGSI_OPCODE_SLT] = nir_op_slt,
[TGSI_OPCODE_SGE] = nir_op_sge,
[TGSI_OPCODE_MAD] = nir_op_ffma,
[TGSI_OPCODE_MAD] = nir_op_ffma_old,
[TGSI_OPCODE_LRP] = 0,
[TGSI_OPCODE_SQRT] = nir_op_fsqrt,
[TGSI_OPCODE_FRC] = nir_op_ffract,

View file

@ -116,7 +116,7 @@ asahi_blit_compute_shader(struct pipe_context *ctx, struct asahi_blit_key *key)
{
/* For pixels within the copy area, texture from the source */
nir_def *coords_el_2d =
nir_ffma(b, nir_u2f32(b, logical_id_el_2d), trans_scale, trans_offs);
nir_ffma_old(b, nir_u2f32(b, logical_id_el_2d), trans_scale, trans_offs);
nir_def *coords_el_nd = coords_el_2d;
if (layer) {

View file

@ -180,11 +180,11 @@ lower_emit_vertex(nir_intrinsic_instr *instr, nir_builder *b, struct lower_state
/* pos = scaled_point_size * point_dir + point_pos */
nir_def *point_dir = get_point_dir(b, state, i);
nir_def *pos = nir_vec4(b,
nir_ffma(b,
nir_ffma_old(b,
point_width,
nir_channel(b, point_dir, 0),
nir_channel(b, state->point_pos, 0)),
nir_ffma(b,
nir_ffma_old(b,
point_height,
nir_channel(b, point_dir, 1),
nir_channel(b, state->point_pos, 1)),

View file

@ -50,7 +50,7 @@ static const struct etna_op_info etna_ops[] = {
#define IOP(nir, op) IOPC(nir, op, TRUE)
#define UOP(nir, op) UOPC(nir, op, TRUE)
OP(mov, MOV), OP(fneg, MOV), OP(fabs, MOV), OP(fsat, MOV),
OP(fmul, MUL), OP(fadd, ADD), OP(ffma, MAD),
OP(fmul, MUL), OP(fadd, ADD), OP(ffma_old, MAD),
OP(fdot2, DP2), OP(fdot3, DP3), OP(fdot4, DP4),
OPC(fmin, SELECT, GT), OPC(fmax, SELECT, LT),
OP(ffract, FRC), OP(frcp, RCP), OP(frsq, RSQ),

View file

@ -118,7 +118,7 @@ lower_tex_offset(nir_builder *b, nir_tex_instr *tex, UNUSED void *data)
* value, matching the expected behaviour of Vivante GPU.
*/
nir_def *lod_raw = nir_flog2(b, max_derivative);
nir_def *lod_fixed_point = nir_ffma(b, lod_raw, nir_imm_float(b, 0.5f),
nir_def *lod_fixed_point = nir_ffma_old(b, lod_raw, nir_imm_float(b, 0.5f),
nir_imm_float(b, 393216.0f));
/* Extract 16-bit fractional part */
@ -135,7 +135,7 @@ lower_tex_offset(nir_builder *b, nir_tex_instr *tex, UNUSED void *data)
* This reverses the fixed-point encoding to get final LOD value
*/
nir_def *lod_float = nir_u2f32(b, lod_quantized);
lod = nir_ffma(b, lod_float, nir_imm_float(b, 1.0f/32.0f), nir_imm_float(b, 0.5f));
lod = nir_ffma_old(b, lod_float, nir_imm_float(b, 1.0f/32.0f), nir_imm_float(b, 0.5f));
/* floor and convert to int */
lod = nir_ffloor(b, lod);

View file

@ -309,7 +309,7 @@ instr_create_alu(struct ir2_context *ctx, nir_op opcode, unsigned ncomp)
[nir_op_fadd] = {ADDs, ADDv},
[nir_op_fsub] = {ADDs, ADDv},
[nir_op_fmul] = {MULs, MULv},
[nir_op_ffma] = {-1, MULADDv},
[nir_op_ffma_old] = {-1, MULADDv},
[nir_op_fmax] = {MAXs, MAXv},
[nir_op_fmin] = {MINs, MINv},
[nir_op_ffloor] = {FLOORs, FLOORv},
@ -748,7 +748,7 @@ emit_tex(struct ir2_context *ctx, nir_tex_instr *tex)
rcp->src[0] = ir2_src(reg_idx, IR2_SWIZZLE_Z, IR2_SRC_REG);
rcp->src[0].abs = true;
coord_xy = instr_create_alu_reg(ctx, nir_op_ffma, 3, instr);
coord_xy = instr_create_alu_reg(ctx, nir_op_ffma_old, 3, instr);
coord_xy->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
coord_xy->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
coord_xy->src[2] = load_const(ctx, (float[]){1.5f}, 1);
@ -868,7 +868,7 @@ extra_position_exports(struct ir2_context *ctx, bool binning)
sc->src[0] = ctx->position;
sc->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
wincoord = instr_create_alu(ctx, nir_op_ffma, 4);
wincoord = instr_create_alu(ctx, nir_op_ffma_old, 4);
wincoord->src[0] = ir2_src(66, 0, IR2_SRC_CONST);
wincoord->src[1] = ir2_src(sc->idx, 0, IR2_SRC_SSA);
wincoord->src[2] = ir2_src(65, 0, IR2_SRC_CONST);
@ -895,13 +895,13 @@ extra_position_exports(struct ir2_context *ctx, bool binning)
/* 8 max set in freedreno_screen.. unneeded instrs patched out */
for (int i = 0; i < 8; i++) {
instr = instr_create_alu(ctx, nir_op_ffma, 4);
instr = instr_create_alu(ctx, nir_op_ffma_old, 4);
instr->src[0] = ir2_src(1, IR2_SWIZZLE_WYWW, IR2_SRC_CONST);
instr->src[1] = ir2_src(off->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
instr->src[2] = ir2_src(3 + i, 0, IR2_SRC_CONST);
instr->alu.export = 32;
instr = instr_create_alu(ctx, nir_op_ffma, 4);
instr = instr_create_alu(ctx, nir_op_ffma_old, 4);
instr->src[0] = ir2_src(68 + i * 2, 0, IR2_SRC_CONST);
instr->src[1] = ir2_src(wincoord->idx, 0, IR2_SRC_SSA);
instr->src[2] = ir2_src(67 + i * 2, 0, IR2_SRC_CONST);

View file

@ -49,7 +49,7 @@ duplicate_def_at_use(nir_builder *b, nir_def *def, bool duplicate_for_ffma)
if (duplicate_for_ffma &&
last_parent_instr->type == nir_instr_type_alu &&
nir_instr_as_alu(last_parent_instr)->op == nir_op_ffma) {
nir_instr_as_alu(last_parent_instr)->op == nir_op_ffma_old) {
last_parent_instr = NULL;
}
}

View file

@ -203,7 +203,7 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = {
[nir_op_ftrunc] = ppir_op_trunc,
[nir_op_fsat] = ppir_op_sat,
[nir_op_fclamp_pos] = ppir_op_clamp_pos,
[nir_op_ffma] = ppir_op_fmad,
[nir_op_ffma_old] = ppir_op_fmad,
};
static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)

View file

@ -464,8 +464,8 @@ Converter::getOperation(nir_op op)
return OP_EX2;
case nir_op_ffloor:
return OP_FLOOR;
case nir_op_ffma:
case nir_op_ffmaz:
case nir_op_ffma_old:
case nir_op_ffmaz_old:
/* No FMA op pre-nvc0 */
if (info->target < 0xc0)
return OP_MAD;
@ -2613,8 +2613,8 @@ Converter::visit(nir_alu_instr *insn)
case nir_op_udiv:
case nir_op_fexp2:
case nir_op_ffloor:
case nir_op_ffma:
case nir_op_ffmaz:
case nir_op_ffma_old:
case nir_op_ffmaz_old:
case nir_op_flog2:
case nir_op_fmax:
case nir_op_imax:
@ -2668,11 +2668,11 @@ Converter::visit(nir_alu_instr *insn)
switch (op) {
case nir_op_fmul:
case nir_op_ffma:
case nir_op_ffma_old:
i->dnz = this->info->io.mul_zero_wins;
break;
case nir_op_fmulz:
case nir_op_ffmaz:
case nir_op_ffmaz_old:
i->dnz = true;
break;
default:

View file

@ -819,7 +819,7 @@ ntr_emit_alu(struct ntr_compile *c, nir_alu_instr *instr)
[nir_op_fmin] = TGSI_OPCODE_MIN,
[nir_op_fmax] = TGSI_OPCODE_MAX,
[nir_op_ffma] = TGSI_OPCODE_MAD,
[nir_op_ffma_old] = TGSI_OPCODE_MAD,
};
if (instr->op < ARRAY_SIZE(op_map) && op_map[instr->op] > 0) {

View file

@ -52,11 +52,11 @@ r300_nir_prepare_presubtract = [
(('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))),
(('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))),
# Bias presubtract 1 - 2 * x expects MAD -a 2.0 1.0 form.
(('ffma', 2.0, ('fneg', a), 1.0), ('ffma', ('fneg', a), 2.0, 1.0)),
(('ffma', a, -2.0, 1.0), ('ffma', ('fneg', a), 2.0, 1.0)),
(('ffma', -2.0, a, 1.0), ('ffma', ('fneg', a), 2.0, 1.0)),
(('ffma', 2.0, a, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
(('ffma', a, 2.0, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
(('ffma_old', 2.0, ('fneg', a), 1.0), ('ffma_old', ('fneg', a), 2.0, 1.0)),
(('ffma_old', a, -2.0, 1.0), ('ffma_old', ('fneg', a), 2.0, 1.0)),
(('ffma_old', -2.0, a, 1.0), ('ffma_old', ('fneg', a), 2.0, 1.0)),
(('ffma_old', 2.0, a, -1.0), ('fneg', ('ffma_old', ('fneg', a), 2.0, 1.0))),
(('ffma_old', a, 2.0, -1.0), ('fneg', ('ffma_old', ('fneg', a), 2.0, 1.0))),
# x * 2 can be usually folded into output modifier for the previous
# instruction, but that only works if x is a temporary. If it is input or
# constant just convert it to add instead.
@ -85,7 +85,7 @@ r300_nir_opt_algebraic_late = [
# This is very late flrp lowering to clean up after bcsel->fcsel->flrp.
r300_nir_lower_flrp = [
(('flrp', a, b, c), ('ffma', b, c, ('ffma', ('fneg', a), c, a)))
(('flrp', a, b, c), ('ffma_old', b, c, ('ffma_old', ('fneg', a), c, a)))
]
# Lower fcsel_ge from ftrunc on r300

View file

@ -1639,7 +1639,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
return emit_alu_op2_64bit_one_dst(*alu, op2_setgt_64, shader, true);
case nir_op_fneu32:
return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false);
case nir_op_ffma:
case nir_op_ffma_old:
return emit_alu_fma_64bit(*alu, op3_fma_64, shader);
case nir_op_fadd:
@ -1956,11 +1956,11 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
case nir_op_unpack_64_2x32_split_y:
return emit_unpack_64_2x32_split(*alu, 1, shader);
case nir_op_ffma:
case nir_op_ffma_old:
if (!shader.has_flag(Shader::sh_legacy_math_rules))
return emit_alu_op3(*alu, op3_muladd_ieee, shader);
FALLTHROUGH;
case nir_op_ffmaz:
case nir_op_ffmaz_old:
return emit_alu_op3(*alu, op3_muladd, shader);
case nir_op_mov:

View file

@ -1033,7 +1033,7 @@ Lower64BitToVec2::support_fp64_op(nir_op op) const
case nir_op_fge32:
case nir_op_flt32:
case nir_op_fneu32:
case nir_op_ffma:
case nir_op_ffma_old:
case nir_op_fadd:
case nir_op_fmul:
case nir_op_fmax:

View file

@ -246,8 +246,8 @@ lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
for (size_t i = 0; i < 4; i++) {
pos = nir_vec4(b,
nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
nir_ffma_old(b, half_w_delta, point_dir[i][0], point_pos_x),
nir_ffma_old(b, half_h_delta, point_dir[i][1], point_pos_y),
nir_channel(b, point_pos, 2),
nir_channel(b, point_pos, 3));

View file

@ -1288,12 +1288,12 @@ static nir_def *alu_iter(nir_builder *b,
.component = component,
.io_semantics = io_semantics);
nir_def *result = nir_ffma(b,
nir_def *result = nir_ffma_old(b,
nir_channel(b, coeffs, 1),
nir_channel(b, coords, 1),
nir_channel(b, coeffs, 2));
result =
nir_ffma(b, nir_channel(b, coeffs, 0), nir_channel(b, coords, 0), result);
nir_ffma_old(b, nir_channel(b, coeffs, 0), nir_channel(b, coords, 0), result);
return result;
}

View file

@ -3177,7 +3177,7 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
instr = pco_fmul(&tctx->b, dest, src[0], src[1]);
break;
case nir_op_ffma:
case nir_op_ffma_old:
instr = pco_fmad(&tctx->b, dest, src[0], src[1], src[2]);
break;

View file

@ -658,7 +658,7 @@ static nir_def *resolve_samples(nir_builder *b,
switch (resolve_op) {
case PVR_RESOLVE_BLEND:
op = nir_op_ffma;
op = nir_op_ffma_old;
coeff = nir_imm_float(b, 1.0 / num_samples);
break;
@ -683,7 +683,7 @@ static nir_def *resolve_samples(nir_builder *b,
for (unsigned i = 1; i < num_samples; i++) {
if (resolve_op == PVR_RESOLVE_BLEND)
accum = nir_ffma(b, samples[i], coeff, accum);
accum = nir_ffma_old(b, samples[i], coeff, accum);
else
accum = nir_build_alu2(b, op, samples[i], accum);
}

View file

@ -1646,7 +1646,7 @@ brw_from_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr,
inst->saturate = true;
break;
case nir_op_ffma:
case nir_op_ffma_old:
if (nir_has_any_rounding_mode_enabled(execution_mode)) {
brw_rnd_mode rnd =
brw_rnd_mode_from_execution_mode(execution_mode);

View file

@ -1706,7 +1706,7 @@ fs_nir_emit_alu(nir_to_elk_state &ntb, nir_alu_instr *instr,
bld.emit(ELK_FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
break;
case nir_op_ffma:
case nir_op_ffma_old:
if (nir_has_any_rounding_mode_enabled(execution_mode)) {
elk_rnd_mode rnd =
elk_rnd_mode_from_execution_mode(execution_mode);

View file

@ -1670,7 +1670,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
emit(SHR(dst, op[0], op[1]));
break;
case nir_op_ffma:
case nir_op_ffma_old:
if (type_sz(dst.type) == 8) {
dst_reg mul_dst = dst_reg(this, glsl_dvec4_type());
emit(MUL(mul_dst, op[1], op[0]));

View file

@ -201,7 +201,7 @@ intel_nir_opt_peephole_ffma_instr(nir_builder *b,
if (negate)
mul_src[0] = nir_fneg(b, mul_src[0]);
nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma);
nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma_old);
ffma->fp_math_ctrl = b->fp_math_ctrl;
for (unsigned i = 0; i < 2; i++) {

View file

@ -512,7 +512,7 @@ jay_emit_alu(struct nir_to_jay_state *nj, nir_alu_instr *alu)
jay_BFI2(b, dst, src[0], src[1], src[2]);
break;
case nir_op_ffma:
case nir_op_ffma_old:
jay_MAD(b, type, dst, src[0], src[1], src[2]);
break;

View file

@ -374,7 +374,7 @@ alu_to_msl(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr)
case nir_op_ffloor:
alu_funclike(ctx, instr, "floor");
break;
case nir_op_ffma:
case nir_op_ffma_old:
alu_funclike(ctx, instr, "fma");
break;
case nir_op_ffract:

View file

@ -231,7 +231,7 @@ emit_arith_inst(struct st_translate *t,
return nir_fmul(t->b, src[0], src[1]);
case GL_MAD_ATI:
return nir_ffma(t->b, src[0], src[1], src[2]);
return nir_ffma_old(t->b, src[0], src[1], src[2]);
case GL_LERP_ATI:
return nir_flrp(t->b, src[2], src[1], src[0]);

View file

@ -114,7 +114,7 @@ lower_color(nir_builder *b, lower_drawpixels_state *state, nir_intrinsic_instr *
/* Apply the scale and bias. */
if (state->options->scale_and_bias) {
/* MAD def, def, scale, bias; */
def = nir_ffma(b, def, get_scale(b, state), get_bias(b, state));
def = nir_ffma_old(b, def, get_scale(b, state), get_bias(b, state));
}
if (state->options->pixel_maps) {

View file

@ -2995,7 +2995,7 @@ emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
case nir_op_ffma:
case nir_op_ffma_old:
if (alu->def.bit_size == 64)
ctx->mod.feats.dx11_1_double_extensions = 1;
return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);

View file

@ -1056,7 +1056,7 @@ impl<'a> ShaderFromNir<'a> {
b.fexp2(srcs(0)).into()
}
}
nir_op_ffma => {
nir_op_ffma_old => {
let ftype = FloatType::from_bits(alu.def.bit_size().into());
let dst;
if alu.def.bit_size() == 64 {
@ -1102,7 +1102,7 @@ impl<'a> ShaderFromNir<'a> {
}
dst
}
nir_op_ffmaz => {
nir_op_ffmaz_old => {
assert!(alu.def.bit_size() == 32);
// DNZ implies FTZ so we need FTZ set or this is invalid
assert!(self.float_ctl.fp32.ftz);

View file

@ -116,7 +116,7 @@ vectorize_filter_cb(const nir_instr *instr, const void *data)
case nir_op_fneu:
case nir_op_fmul:
case nir_op_fmul_rtz:
case nir_op_ffma:
case nir_op_ffma_old:
case nir_op_fsign:
case nir_op_fsat:
case nir_op_fmax:
@ -268,8 +268,8 @@ lower_bit_size_cb(const nir_instr *instr, void *data)
case nir_op_fneu:
case nir_op_fmul:
case nir_op_fmul_rtz:
case nir_op_ffma:
case nir_op_ffmaz:
case nir_op_ffma_old:
case nir_op_ffmaz_old:
case nir_op_fsign:
case nir_op_fsat:
case nir_op_fceil:

View file

@ -2975,7 +2975,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
srcs > 2 ? bi_alu_src_index(b, instr->src[2], comps) : bi_null();
switch (instr->op) {
case nir_op_ffma:
case nir_op_ffma_old:
bi_fma_to(b, sz, dst, s0, s1, s2);
break;

View file

@ -218,7 +218,7 @@ build_blit_shader(const struct vk_meta_blit_key *key)
nir_def *out_coord_xy = nir_load_frag_coord(b);
out_coord_xy = nir_trim_vector(b, out_coord_xy, 2);
nir_def *src_coord_xy = nir_ffma(b, out_coord_xy, xy_scale, xy_off);
nir_def *src_coord_xy = nir_ffma_old(b, out_coord_xy, xy_scale, xy_off);
nir_def *z_xform = load_struct_var(b, push, 1);
nir_def *out_layer = nir_load_layer_id(b);
@ -227,7 +227,7 @@ build_blit_shader(const struct vk_meta_blit_key *key)
nir_def *z_off = nir_channel(b, z_xform, 0);
nir_def *z_scale = nir_channel(b, z_xform, 1);
nir_def *out_coord_z = nir_fadd_imm(b, nir_u2f32(b, out_layer), 0.5);
nir_def *src_coord_z = nir_ffma(b, out_coord_z, z_scale, z_off);
nir_def *src_coord_z = nir_ffma_old(b, out_coord_z, z_scale, z_off);
src_coord = nir_vec3(b, nir_channel(b, src_coord_xy, 0),
nir_channel(b, src_coord_xy, 1),
src_coord_z);