mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-24 06:18:10 +02:00
nir: rename ffma to ffma_old
We'll get three new opcodes to properly model float multiply-add. ffma_old is temporary and will be deleted at the end of this series. Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41165>
This commit is contained in:
parent
bdb5301281
commit
a9b18f8607
74 changed files with 261 additions and 261 deletions
|
|
@ -972,7 +972,7 @@ ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
|
|||
case nir_op_fadd:
|
||||
case nir_op_fsub:
|
||||
case nir_op_fmul:
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
case nir_op_fdiv:
|
||||
case nir_op_flrp:
|
||||
case nir_op_fabs:
|
||||
|
|
|
|||
|
|
@ -170,8 +170,8 @@ cull_small_primitive_triangle(nir_builder *b, bool use_point_tri_intersection,
|
|||
vp_translate[chan] = nir_channel(b, vp, 2 + chan);
|
||||
|
||||
/* Convert the position to screen-space coordinates. */
|
||||
nir_def *min = nir_ffma(b, bbox_min[chan], vp_scale[chan], vp_translate[chan]);
|
||||
nir_def *max = nir_ffma(b, bbox_max[chan], vp_scale[chan], vp_translate[chan]);
|
||||
nir_def *min = nir_ffma_old(b, bbox_min[chan], vp_scale[chan], vp_translate[chan]);
|
||||
nir_def *max = nir_ffma_old(b, bbox_max[chan], vp_scale[chan], vp_translate[chan]);
|
||||
|
||||
/* Scale the bounding box according to precision. */
|
||||
min = nir_fsub(b, min, small_prim_precision);
|
||||
|
|
@ -251,7 +251,7 @@ cull_small_primitive_triangle(nir_builder *b, bool use_point_tri_intersection,
|
|||
/* Transform the coordinates to screen space. */
|
||||
for (unsigned vtx = 0; vtx < 3; ++vtx) {
|
||||
for (unsigned chan = 0; chan < 2; ++chan)
|
||||
screen_pos[vtx][chan] = nir_ffma(b, pos[vtx][chan], vp_scale[chan], vp_translate[chan]);
|
||||
screen_pos[vtx][chan] = nir_ffma_old(b, pos[vtx][chan], vp_scale[chan], vp_translate[chan]);
|
||||
}
|
||||
|
||||
/* small_prim_precision is the rasterization precision in X an Y axes, meaning it's the size of
|
||||
|
|
@ -436,8 +436,8 @@ cull_small_primitive_line(nir_builder *b, nir_def *pos[3][4],
|
|||
nir_def *vp_scale = nir_channel(b, vp, chan);
|
||||
nir_def *vp_translate = nir_channel(b, vp, 2 + chan);
|
||||
|
||||
v0[chan] = nir_ffma(b, pos[0][chan], vp_scale, vp_translate);
|
||||
v1[chan] = nir_ffma(b, pos[1][chan], vp_scale, vp_translate);
|
||||
v0[chan] = nir_ffma_old(b, pos[0][chan], vp_scale, vp_translate);
|
||||
v1[chan] = nir_ffma_old(b, pos[1][chan], vp_scale, vp_translate);
|
||||
}
|
||||
|
||||
/* Rotate the viewport by 45 degrees, so that diamonds become squares. */
|
||||
|
|
|
|||
|
|
@ -409,8 +409,8 @@ lower_intrinsic_to_arg(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
|
|||
nir_def *ddy_j = nir_ddy(b, j);
|
||||
|
||||
/* Interpolate standard barycentrics by offset. */
|
||||
nir_def *offset_i = nir_ffma(b, ddy_i, offset_y, nir_ffma(b, ddx_i, offset_x, i));
|
||||
nir_def *offset_j = nir_ffma(b, ddy_j, offset_y, nir_ffma(b, ddx_j, offset_x, j));
|
||||
nir_def *offset_i = nir_ffma_old(b, ddy_i, offset_y, nir_ffma_old(b, ddx_i, offset_x, i));
|
||||
nir_def *offset_j = nir_ffma_old(b, ddy_j, offset_y, nir_ffma_old(b, ddx_j, offset_x, j));
|
||||
replacement = nir_vec2(b, offset_i, offset_j);
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -300,8 +300,8 @@ lower_load_barycentric_at_offset(nir_builder *b, nir_def *offset, enum glsl_inte
|
|||
nir_def *offset_y = nir_channel(b, offset, 1);
|
||||
|
||||
/* Interpolate standard barycentrics by offset. */
|
||||
nir_def *offset_i = nir_ffma(b, ddy_i, offset_y, nir_ffma(b, ddx_i, offset_x, i));
|
||||
nir_def *offset_j = nir_ffma(b, ddy_j, offset_y, nir_ffma(b, ddx_j, offset_x, j));
|
||||
nir_def *offset_i = nir_ffma_old(b, ddy_i, offset_y, nir_ffma_old(b, ddx_i, offset_x, i));
|
||||
nir_def *offset_j = nir_ffma_old(b, ddy_j, offset_y, nir_ffma_old(b, ddx_j, offset_x, j));
|
||||
return nir_vec2(b, offset_i, offset_j);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -450,7 +450,7 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
case nir_op_e4m3fn2f:
|
||||
case nir_op_e5m22f:
|
||||
case nir_op_fmulz:
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_ffmaz_old:
|
||||
case nir_op_f2f64:
|
||||
case nir_op_u2f64:
|
||||
case nir_op_i2f64:
|
||||
|
|
@ -485,7 +485,7 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
case nir_op_f2f16_ru:
|
||||
case nir_op_f2f16_rd: type = RegType::vgpr; break;
|
||||
case nir_op_fmul:
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
case nir_op_fadd:
|
||||
case nir_op_fsub:
|
||||
case nir_op_fmax:
|
||||
|
|
|
|||
|
|
@ -1925,7 +1925,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_ffma: {
|
||||
case nir_op_ffma_old: {
|
||||
if (dst.regClass() == v2b) {
|
||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f16, dst, false, 3);
|
||||
} else if (dst.regClass() == v1 && instr->def.bit_size == 16) {
|
||||
|
|
@ -1961,7 +1961,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_ffmaz: {
|
||||
case nir_op_ffmaz_old: {
|
||||
if (dst.regClass() == v1) {
|
||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_legacy_f32, dst,
|
||||
ctx->block->fp_mode.must_flush_denorms32, 3);
|
||||
|
|
|
|||
|
|
@ -759,12 +759,12 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
result = ac_build_canonicalize(&ctx->ac, result, instr->def.bit_size);
|
||||
}
|
||||
break;
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
/* FMA is slow on gfx6-8, so it shouldn't be used. */
|
||||
assert(instr->def.bit_size != 32 || ctx->ac.gfx_level >= GFX9);
|
||||
result = emit_fp_intrinsic(&ctx->ac, "llvm.fma", def_type, src[0], src[1], src[2]);
|
||||
break;
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_ffmaz_old:
|
||||
assert(ctx->ac.gfx_level >= GFX10_3);
|
||||
src[0] = ac_to_float(&ctx->ac, src[0]);
|
||||
src[1] = ac_to_float(&ctx->ac, src[1]);
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ pass(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
|||
nir_def *mul = nir_bcsel(b, cond, nir_imm_float(b, 0.0625f), nir_imm_float(b, -0.0));
|
||||
|
||||
/* adjusted_frag_z = dFdxFine(frag_z) * 0.0625 + frag_z */
|
||||
frag_z = nir_ffma(b, nir_ddx_fine(b, frag_z), mul, frag_z);
|
||||
frag_z = nir_ffma_old(b, nir_ddx_fine(b, frag_z), mul, frag_z);
|
||||
|
||||
nir_def_rewrite_uses_after(&intrin->def, frag_z);
|
||||
|
||||
|
|
|
|||
|
|
@ -1906,7 +1906,7 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr)
|
|||
else
|
||||
return agx_fmul_to(b, dst, s0, s1);
|
||||
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
if (instr->def.bit_size == 16)
|
||||
return agx_hfma_to(b, dst, s0, s1, s2);
|
||||
else
|
||||
|
|
@ -3559,7 +3559,7 @@ libagx_frcp(nir_builder *b, nir_def *x)
|
|||
* = fma(fma(-x, u, 1), u, u)
|
||||
*/
|
||||
nir_def *one = nir_imm_float(b, 1.0);
|
||||
nir_def *u_2 = nir_ffma(b, nir_ffma(b, nir_fneg(b, x), u, one), u, u);
|
||||
nir_def *u_2 = nir_ffma_old(b, nir_ffma_old(b, nir_fneg(b, x), u, one), u, u);
|
||||
|
||||
/* If the original value was infinite, frcp will generate the correct zero.
|
||||
* However, the Newton-Raphson step would multiply 0 * Inf and get a NaN. So
|
||||
|
|
|
|||
|
|
@ -55,10 +55,10 @@ interpolate_at_offset(nir_builder *b, nir_def *cf, nir_def *offset,
|
|||
nir_def *pos = nir_fadd(b, center, nir_f2f32(b, offset));
|
||||
|
||||
/* Interpolate with the given coefficients */
|
||||
nir_def *interp = nir_ffma(b, nir_channel(b, pos, 1), nir_channel(b, cf, 1),
|
||||
nir_def *interp = nir_ffma_old(b, nir_channel(b, pos, 1), nir_channel(b, cf, 1),
|
||||
nir_channel(b, cf, 2));
|
||||
|
||||
interp = nir_ffma(b, nir_channel(b, pos, 0), nir_channel(b, cf, 0), interp);
|
||||
interp = nir_ffma_old(b, nir_channel(b, pos, 0), nir_channel(b, cf, 0), interp);
|
||||
|
||||
/* Divide by RHW. This load will be lowered recursively. */
|
||||
if (perspective) {
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ alu_cost(nir_alu_instr *alu)
|
|||
case nir_op_f2f16_rtne:
|
||||
case nir_op_fadd:
|
||||
case nir_op_fmul:
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
case nir_op_iadd:
|
||||
case nir_op_inot:
|
||||
case nir_op_iand:
|
||||
|
|
|
|||
|
|
@ -2569,7 +2569,7 @@ nir_visitor::visit(ir_expression *ir)
|
|||
|
||||
case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
|
||||
case ir_triop_fma:
|
||||
result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);
|
||||
result = nir_ffma_old(&b, srcs[0], srcs[1], srcs[2]);
|
||||
break;
|
||||
case ir_triop_lrp:
|
||||
result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]);
|
||||
|
|
|
|||
|
|
@ -1907,7 +1907,7 @@ nir_def_all_uses_ignore_sign_bit(const nir_def *def)
|
|||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
if (alu->op == nir_op_fabs) {
|
||||
continue;
|
||||
} else if (alu->op == nir_op_fmul || alu->op == nir_op_ffma) {
|
||||
} else if (alu->op == nir_op_fmul || alu->op == nir_op_ffma_old) {
|
||||
nir_alu_src *alu_src = list_entry(use, nir_alu_src, src);
|
||||
unsigned src_index = alu_src - alu->src;
|
||||
/* a * a doesn't care about sign of a. */
|
||||
|
|
|
|||
|
|
@ -1367,27 +1367,27 @@ nir_ffma_imm12(nir_builder *build, nir_def *src0, double src1, double src2)
|
|||
build->shader->options->avoid_ternary_with_two_constants)
|
||||
return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2);
|
||||
else
|
||||
return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
|
||||
nir_imm_floatN_t(build, src2, src0->bit_size));
|
||||
return nir_ffma_old(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
|
||||
nir_imm_floatN_t(build, src2, src0->bit_size));
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
nir_ffma_imm1(nir_builder *build, nir_def *src0, double src1, nir_def *src2)
|
||||
{
|
||||
return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
|
||||
return nir_ffma_old(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
nir_ffma_imm2(nir_builder *build, nir_def *src0, nir_def *src1, double src2)
|
||||
{
|
||||
return nir_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
|
||||
return nir_ffma_old(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
nir_a_minus_bc(nir_builder *build, nir_def *src0, nir_def *src1,
|
||||
nir_def *src2)
|
||||
{
|
||||
return nir_ffma(build, nir_fneg(build, src1), src2, src0);
|
||||
return nir_ffma_old(build, nir_fneg(build, src1), src2, src0);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
|
|
|
|||
|
|
@ -41,10 +41,10 @@ nir_cross3(nir_builder *b, nir_def *x, nir_def *y)
|
|||
unsigned yzx[3] = { 1, 2, 0 };
|
||||
unsigned zxy[3] = { 2, 0, 1 };
|
||||
|
||||
return nir_ffma(b, nir_swizzle(b, x, yzx, 3),
|
||||
nir_swizzle(b, y, zxy, 3),
|
||||
nir_fneg(b, nir_fmul(b, nir_swizzle(b, x, zxy, 3),
|
||||
nir_swizzle(b, y, yzx, 3))));
|
||||
return nir_ffma_old(b, nir_swizzle(b, x, yzx, 3),
|
||||
nir_swizzle(b, y, zxy, 3),
|
||||
nir_fneg(b, nir_fmul(b, nir_swizzle(b, x, zxy, 3),
|
||||
nir_swizzle(b, y, yzx, 3))));
|
||||
}
|
||||
|
||||
nir_def *
|
||||
|
|
@ -285,7 +285,7 @@ nir_atan(nir_builder *b, nir_def *y_over_x)
|
|||
nir_imm_floatN_t(b, -M_PI_2, bit_size));
|
||||
|
||||
/* multiply through by x while fixing up the range reduction */
|
||||
nir_def *tmp = nir_ffma(b, nir_fabs(b, u), res, bias);
|
||||
nir_def *tmp = nir_ffma_old(b, nir_fabs(b, u), res, bias);
|
||||
|
||||
/* sign fixup */
|
||||
return nir_copysign(b, tmp, y_over_x);
|
||||
|
|
|
|||
|
|
@ -188,7 +188,7 @@ lower_fdot(nir_alu_instr *alu, nir_builder *builder, bool is_bfloat16)
|
|||
|
||||
unsigned num_components = nir_op_infos[alu->op].input_sizes[0];
|
||||
|
||||
const nir_op fma_op = is_bfloat16 ? nir_op_bffma : nir_op_ffma;
|
||||
const nir_op fma_op = is_bfloat16 ? nir_op_bffma : nir_op_ffma_old;
|
||||
const nir_op mul_op = is_bfloat16 ? nir_op_bfmul : nir_op_fmul;
|
||||
|
||||
nir_def *prev = NULL;
|
||||
|
|
@ -328,12 +328,12 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
|
|||
} else if (reverse_order) {
|
||||
nir_def *sum = nir_channel(b, src1_vec, 3);
|
||||
for (int i = 2; i >= 0; i--)
|
||||
sum = nir_ffma(b, nir_channel(b, src0_vec, i), nir_channel(b, src1_vec, i), sum);
|
||||
sum = nir_ffma_old(b, nir_channel(b, src0_vec, i), nir_channel(b, src1_vec, i), sum);
|
||||
return sum;
|
||||
} else {
|
||||
nir_def *sum = nir_fmul(b, nir_channel(b, src0_vec, 0), nir_channel(b, src1_vec, 0));
|
||||
sum = nir_ffma(b, nir_channel(b, src0_vec, 1), nir_channel(b, src1_vec, 1), sum);
|
||||
sum = nir_ffma(b, nir_channel(b, src0_vec, 2), nir_channel(b, src1_vec, 2), sum);
|
||||
sum = nir_ffma_old(b, nir_channel(b, src0_vec, 1), nir_channel(b, src1_vec, 1), sum);
|
||||
sum = nir_ffma_old(b, nir_channel(b, src0_vec, 2), nir_channel(b, src1_vec, 2), sum);
|
||||
return nir_fadd(b, sum, nir_channel(b, src1_vec, 3));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ lower_pos_write_dynamic(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
nir_def *c = nir_load_clip_z_coeff(b);
|
||||
|
||||
/* Lerp. If c = 0, reduces to z. If c = 1/2, reduces to (z + w)/2 */
|
||||
nir_def *new_z = nir_ffma(b, nir_fneg(b, z), c, nir_ffma(b, w, c, z));
|
||||
nir_def *new_z = nir_ffma_old(b, nir_fneg(b, z), c, nir_ffma_old(b, w, c, z));
|
||||
nir_src_rewrite(&intr->src[0], nir_vector_insert_imm(b, pos, new_z, 2));
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -172,8 +172,8 @@ lower_rcp(nir_builder *b, nir_def *src)
|
|||
* See https://en.wikipedia.org/wiki/Division_algorithm for more details.
|
||||
*/
|
||||
|
||||
ra = nir_ffma(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra);
|
||||
ra = nir_ffma(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra);
|
||||
ra = nir_ffma_old(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra);
|
||||
ra = nir_ffma_old(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra);
|
||||
|
||||
return fix_inv_result(b, ra, src, new_exp);
|
||||
}
|
||||
|
|
@ -299,18 +299,18 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt)
|
|||
nir_def *one_half = nir_imm_double(b, 0.5);
|
||||
nir_def *h_0 = nir_fmul(b, one_half, ra);
|
||||
nir_def *g_0 = nir_fmul(b, src, ra);
|
||||
nir_def *r_0 = nir_ffma(b, nir_fneg(b, h_0), g_0, one_half);
|
||||
nir_def *h_1 = nir_ffma(b, h_0, r_0, h_0);
|
||||
nir_def *r_0 = nir_ffma_old(b, nir_fneg(b, h_0), g_0, one_half);
|
||||
nir_def *h_1 = nir_ffma_old(b, h_0, r_0, h_0);
|
||||
nir_def *res;
|
||||
if (sqrt) {
|
||||
nir_def *g_1 = nir_ffma(b, g_0, r_0, g_0);
|
||||
nir_def *r_1 = nir_ffma(b, nir_fneg(b, g_1), g_1, src);
|
||||
res = nir_ffma(b, h_1, r_1, g_1);
|
||||
nir_def *g_1 = nir_ffma_old(b, g_0, r_0, g_0);
|
||||
nir_def *r_1 = nir_ffma_old(b, nir_fneg(b, g_1), g_1, src);
|
||||
res = nir_ffma_old(b, h_1, r_1, g_1);
|
||||
} else {
|
||||
nir_def *y_1 = nir_fmul_imm(b, h_1, 2.0);
|
||||
nir_def *r_1 = nir_ffma(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src),
|
||||
nir_def *r_1 = nir_ffma_old(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src),
|
||||
one_half);
|
||||
res = nir_ffma(b, y_1, r_1, y_1);
|
||||
res = nir_ffma_old(b, y_1, r_1, y_1);
|
||||
}
|
||||
|
||||
if (sqrt) {
|
||||
|
|
@ -654,7 +654,7 @@ lower_doubles_instr_to_soft(nir_builder *b, nir_alu_instr *instr,
|
|||
name = "__fmul64";
|
||||
mangled_name = "__fmul64(u641;u641;";
|
||||
break;
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
name = "__fmad64";
|
||||
mangled_name = "__fmad64(u641;u641;u641;";
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ lower_float_instr_to_soft(nir_builder *b, nir_instr *instr,
|
|||
case nir_op_fmul:
|
||||
mangled_name = "__fmul32(u1;u1;";
|
||||
break;
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
mangled_name = "__fmad32(u1;u1;u1;";
|
||||
break;
|
||||
case nir_op_fsat:
|
||||
|
|
|
|||
|
|
@ -52,8 +52,8 @@ replace_with_strict_ffma(struct nir_builder *bld, struct u_vector *dead_flrp,
|
|||
nir_def *const c = nir_ssa_for_alu_src(bld, alu, 2);
|
||||
|
||||
nir_def *const neg_a = nir_fneg(bld, a);
|
||||
nir_def *const inner_ffma = nir_ffma(bld, neg_a, c, a);
|
||||
nir_def *const outer_ffma = nir_ffma(bld, b, c, inner_ffma);
|
||||
nir_def *const inner_ffma = nir_ffma_old(bld, neg_a, c, a);
|
||||
nir_def *const outer_ffma = nir_ffma_old(bld, b, c, inner_ffma);
|
||||
|
||||
nir_def_rewrite_uses(&alu->def, outer_ffma);
|
||||
|
||||
|
|
@ -79,7 +79,7 @@ replace_with_single_ffma(struct nir_builder *bld, struct u_vector *dead_flrp,
|
|||
nir_def *const one_minus_c =
|
||||
nir_fadd(bld, nir_imm_floatN_t(bld, 1.0f, c->bit_size), neg_c);
|
||||
nir_def *const b_times_c = nir_fmul(bld, b, c);
|
||||
nir_def *const final_ffma = nir_ffma(bld, a, one_minus_c, b_times_c);
|
||||
nir_def *const final_ffma = nir_ffma_old(bld, a, one_minus_c, b_times_c);
|
||||
|
||||
nir_def_rewrite_uses(&alu->def, final_ffma);
|
||||
|
||||
|
|
|
|||
|
|
@ -106,11 +106,11 @@ nir_lower_interpolation_instr(nir_builder *b, nir_instr *instr, void *cb_data)
|
|||
nir_def *bary = intr->src[0].ssa;
|
||||
nir_def *val;
|
||||
|
||||
val = nir_ffma(b, nir_channel(b, bary, 1),
|
||||
nir_channel(b, iid, 1),
|
||||
nir_channel(b, iid, 0));
|
||||
val = nir_ffma(b, nir_channel(b, bary, 0),
|
||||
nir_channel(b, iid, 2),
|
||||
val = nir_ffma_old(b, nir_channel(b, bary, 1),
|
||||
nir_channel(b, iid, 1),
|
||||
nir_channel(b, iid, 0));
|
||||
val = nir_ffma_old(b, nir_channel(b, bary, 0),
|
||||
nir_channel(b, iid, 2),
|
||||
val);
|
||||
|
||||
comps[i] = val;
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ lower_load_pointcoord(lower_pntc_ytransform_state *state,
|
|||
|
||||
nir_def *pntc = &intr->def;
|
||||
nir_def *transform = get_pntc_transform(state);
|
||||
nir_def *flipped_y = nir_ffma(b, nir_channel(b, pntc, y_swizzle),
|
||||
nir_def *flipped_y = nir_ffma_old(b, nir_channel(b, pntc, y_swizzle),
|
||||
/* Flip the sign of y if we're flipping. */
|
||||
nir_channel(b, transform, 0),
|
||||
/* The offset is 1 if we're flipping, 0 otherwise. */
|
||||
|
|
|
|||
|
|
@ -409,7 +409,7 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
|
|||
}
|
||||
|
||||
nir_def *result =
|
||||
nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
|
||||
nir_ffma_old(b, y, m0, nir_ffma_old(b, u, m1, nir_ffma_old(b, v, m2, offset)));
|
||||
|
||||
nir_def_rewrite_uses(&tex->def, result);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ emit_wpos_adjustment(lower_wpos_ytransform_state *state,
|
|||
*/
|
||||
unsigned base = invert ? 0 : 2;
|
||||
/* wpos.y = wpos.y * trans.x/z + trans.y/w */
|
||||
wpos[1] = nir_ffma(b, wpos[1], nir_channel(b, wpostrans, base),
|
||||
wpos[1] = nir_ffma_old(b, wpos[1], nir_channel(b, wpostrans, base),
|
||||
nir_channel(b, wpostrans, base + 1));
|
||||
}
|
||||
|
||||
|
|
@ -258,7 +258,7 @@ lower_load_sample_pos(lower_wpos_ytransform_state *state,
|
|||
nir_def *scale = nir_channel(b, wpostrans, 0);
|
||||
nir_def *neg_scale = nir_channel(b, wpostrans, 2);
|
||||
/* Either y or 1-y for scale equal to 1 or -1 respectively. */
|
||||
nir_def *flipped_y = nir_ffma(b, nir_channel(b, pos, 1), scale,
|
||||
nir_def *flipped_y = nir_ffma_old(b, nir_channel(b, pos, 1), scale,
|
||||
nir_fmax(b, neg_scale, nir_imm_float(b, 0.0)));
|
||||
nir_def *flipped_pos = nir_vector_insert_imm(b, pos, flipped_y, 1);
|
||||
|
||||
|
|
|
|||
|
|
@ -1132,7 +1132,7 @@ def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr,
|
|||
[src1_size, src2_size, src3_size],
|
||||
[tuint, tuint, tuint], False, "", const_expr, description)
|
||||
|
||||
triop("ffma", tfloat, _2src_commutative, """
|
||||
triop("ffma_old", tfloat, _2src_commutative, """
|
||||
if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
|
||||
if (bit_size == 64)
|
||||
dst = _mesa_double_fma_rtz(src0, src1, src2);
|
||||
|
|
@ -1148,7 +1148,7 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
|
|||
}
|
||||
""")
|
||||
|
||||
triop("ffmaz", tfloat32, _2src_commutative, """
|
||||
triop("ffmaz_old", tfloat32, _2src_commutative, """
|
||||
if (src0 == 0.0 || src1 == 0.0)
|
||||
dst = 0.0 + src2;
|
||||
else if (nir_is_rounding_mode_rtz(execution_mode, 32))
|
||||
|
|
@ -1158,8 +1158,8 @@ else
|
|||
""", description = """
|
||||
Floating-point multiply-add with modified zero handling.
|
||||
|
||||
Unlike :nir:alu-op:`ffma`, anything (even infinity or NaN) multiplied by +/-0.0 is
|
||||
+0.0. ``ffmaz(0.0, inf, src2)`` and ``ffmaz(0.0, nan, src2)`` must be
|
||||
Unlike :nir:alu-op:`ffma_old`, anything (even infinity or NaN) multiplied by +/-0.0 is
|
||||
+0.0. ``ffmaz_old(0.0, inf, src2)`` and ``ffmaz_old(0.0, nan, src2)`` must be
|
||||
``+0.0 + src2``.
|
||||
""")
|
||||
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ denorm_ftz_64 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode
|
|||
def lowered_sincos(c):
|
||||
x = ('fsub', ('fmul', 2.0, ('ffract', ('fadd', ('fmul', 0.5 / pi, a), c))), 1.0)
|
||||
x = ('fmul', ('fsub', x, ('fmul', x, ('fabs', x))), 4.0)
|
||||
return ('ffma', ('ffma', x, ('fabs', x), ('fneg', x)), 0.225, x)
|
||||
return ('ffma_old', ('ffma_old', x, ('fabs', x), ('fneg', x)), 0.225, x)
|
||||
|
||||
def intBitsToFloat(i):
|
||||
return struct.unpack('!f', struct.pack('!I', i))[0]
|
||||
|
|
@ -241,14 +241,14 @@ optimizations += [
|
|||
(('usadd_4x8_vc4', a, ~0), ~0),
|
||||
(('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
|
||||
(('~fadd', ('fmulz', a, b), ('fmulz', a, c)), ('fmulz', a, ('fadd', b, c))),
|
||||
(('~ffma', a, b, ('ffma(is_used_once)', a, c, d)), ('ffma', a, ('fadd', b, c), d)),
|
||||
(('~ffma', a, b, ('fmul(is_used_once)', a, c)), ('fmul', a, ('fadd', b, c))),
|
||||
(('~fadd', ('fmul(is_used_once)', a, b), ('ffma(is_used_once)', a, c, d)), ('ffma', a, ('fadd', b, c), d)),
|
||||
(('~ffma', a, ('fmul(is_used_once)', b, c), ('fmul(is_used_once)', b, d)), ('fmul', b, ('ffma', a, c, d))),
|
||||
(('~ffmaz', a, b, ('ffmaz(is_used_once)', a, c, d)), ('ffmaz', a, ('fadd', b, c), d)),
|
||||
(('~ffmaz', a, b, ('fmulz(is_used_once)', a, c)), ('fmulz', a, ('fadd', b, c))),
|
||||
(('~fadd', ('fmulz(is_used_once)', a, b), ('ffmaz(is_used_once)', a, c, d)), ('ffmaz', a, ('fadd', b, c), d)),
|
||||
(('~ffmaz', a, ('fmulz(is_used_once)', b, c), ('fmulz(is_used_once)', b, d)), ('fmulz', b, ('ffmaz', a, c, d))),
|
||||
(('~ffma_old', a, b, ('ffma_old(is_used_once)', a, c, d)), ('ffma_old', a, ('fadd', b, c), d)),
|
||||
(('~ffma_old', a, b, ('fmul(is_used_once)', a, c)), ('fmul', a, ('fadd', b, c))),
|
||||
(('~fadd', ('fmul(is_used_once)', a, b), ('ffma_old(is_used_once)', a, c, d)), ('ffma_old', a, ('fadd', b, c), d)),
|
||||
(('~ffma_old', a, ('fmul(is_used_once)', b, c), ('fmul(is_used_once)', b, d)), ('fmul', b, ('ffma_old', a, c, d))),
|
||||
(('~ffmaz_old', a, b, ('ffmaz_old(is_used_once)', a, c, d)), ('ffmaz_old', a, ('fadd', b, c), d)),
|
||||
(('~ffmaz_old', a, b, ('fmulz(is_used_once)', a, c)), ('fmulz', a, ('fadd', b, c))),
|
||||
(('~fadd', ('fmulz(is_used_once)', a, b), ('ffmaz_old(is_used_once)', a, c, d)), ('ffmaz_old', a, ('fadd', b, c), d)),
|
||||
(('~ffmaz_old', a, ('fmulz(is_used_once)', b, c), ('fmulz(is_used_once)', b, d)), ('fmulz', b, ('ffmaz_old', a, c, d))),
|
||||
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
|
||||
(('iadd', ('ishl', b, a), ('ishl', c, a)), ('ishl', ('iadd', b, c), a)),
|
||||
(('iand', ('iand', a, b), ('iand(is_used_once)', a, c)), ('iand', ('iand', a, b), c)),
|
||||
|
|
@ -285,9 +285,9 @@ optimizations += [
|
|||
(('fmulz(nsz)', a, 'b(is_finite_not_zero)'), ('fmul', a, b)),
|
||||
(('fmulz(nsz)', 'a(is_finite)', 'b(is_finite)'), ('fmul', a, b)),
|
||||
(('fmulz', a, a), ('fmul', a, a)),
|
||||
(('ffmaz(nsz)', a, 'b(is_finite_not_zero)', c), ('ffma', a, b, c)),
|
||||
(('ffmaz', 'a(is_finite)', 'b(is_finite)', c), ('ffma', a, b, c)),
|
||||
(('ffmaz', a, a, b), ('ffma', a, a, b)),
|
||||
(('ffmaz_old(nsz)', a, 'b(is_finite_not_zero)', c), ('ffma_old', a, b, c)),
|
||||
(('ffmaz_old', 'a(is_finite)', 'b(is_finite)', c), ('ffma_old', a, b, c)),
|
||||
(('ffmaz_old', a, a, b), ('ffma_old', a, a, b)),
|
||||
(('imul', a, 0), 0),
|
||||
(('imul24_relaxed', a, 0), 0),
|
||||
(('umul24_relaxed', a, 0), 0),
|
||||
|
|
@ -306,20 +306,20 @@ optimizations += [
|
|||
# If a != a: fsign(a)*a*a => 0*NaN*NaN => abs(NaN)*NaN
|
||||
(('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)),
|
||||
(('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)),
|
||||
(('ffma(nsz,nnan)', 0.0, a, b), ('fcanonicalize', b)),
|
||||
(('ffma(nsz,nnan)', -0.0, a, b), ('fcanonicalize', b)),
|
||||
(('ffmaz', 0.0, a, b), ('fadd', 0.0, b)),
|
||||
(('ffmaz', -0.0, a, b), ('fadd', 0.0, b)),
|
||||
(('ffma(nsz)', a, b, 0.0), ('fmul', a, b)),
|
||||
(('ffmaz(nsz)', a, b, 0.0), ('fmulz', a, b)),
|
||||
(('ffma', a, b, -0.0), ('fmul', a, b)),
|
||||
(('ffmaz', a, b, -0.0), ('fmulz', a, b)),
|
||||
(('ffma', 1.0, a, b), ('fadd', a, b)),
|
||||
(('ffmaz(nsz)', 1.0, a, b), ('fadd', a, b)),
|
||||
(('ffma', -1.0, a, b), ('fadd', ('fneg', a), b)),
|
||||
(('ffmaz(nsz)', -1.0, a, b), ('fadd', ('fneg', a), b)),
|
||||
(('~ffma', '#a', '#b', c), ('fadd', ('fmul', a, b), c)),
|
||||
(('~ffmaz', '#a', '#b', c), ('fadd', ('fmulz', a, b), c)),
|
||||
(('ffma_old(nsz,nnan)', 0.0, a, b), ('fcanonicalize', b)),
|
||||
(('ffma_old(nsz,nnan)', -0.0, a, b), ('fcanonicalize', b)),
|
||||
(('ffmaz_old', 0.0, a, b), ('fadd', 0.0, b)),
|
||||
(('ffmaz_old', -0.0, a, b), ('fadd', 0.0, b)),
|
||||
(('ffma_old(nsz)', a, b, 0.0), ('fmul', a, b)),
|
||||
(('ffmaz_old(nsz)', a, b, 0.0), ('fmulz', a, b)),
|
||||
(('ffma_old', a, b, -0.0), ('fmul', a, b)),
|
||||
(('ffmaz_old', a, b, -0.0), ('fmulz', a, b)),
|
||||
(('ffma_old', 1.0, a, b), ('fadd', a, b)),
|
||||
(('ffmaz_old(nsz)', 1.0, a, b), ('fadd', a, b)),
|
||||
(('ffma_old', -1.0, a, b), ('fadd', ('fneg', a), b)),
|
||||
(('ffmaz_old(nsz)', -1.0, a, b), ('fadd', ('fneg', a), b)),
|
||||
(('~ffma_old', '#a', '#b', c), ('fadd', ('fmul', a, b), c)),
|
||||
(('~ffmaz_old', '#a', '#b', c), ('fadd', ('fmulz', a, b), c)),
|
||||
(('flrp(nnan,nsz)', a, b, 0.0), ('fcanonicalize', a)),
|
||||
(('flrp(nnan,nsz)', a, b, -0.0), ('fcanonicalize', a)),
|
||||
(('flrp(nnan,nsz)', a, b, 1.0), ('fcanonicalize', b)),
|
||||
|
|
@ -397,14 +397,14 @@ optimizations += [
|
|||
('fmulz', 'ma', b), has_fmulz), {'ma' : a}),
|
||||
|
||||
# ffma(b==0.0 ? 0.0 : a, a==0.0 ? 0.0 : b, c) -> ffmaz(a, b, c)
|
||||
*add_fabs_fneg((('ffma@32(nsz)', ('bcsel', ('feq', b, 0.0), 0.0, 'ma'), ('bcsel', ('feq', a, 0.0), 0.0, 'mb'), c),
|
||||
('ffmaz', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}),
|
||||
*add_fabs_fneg((('ffma@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c),
|
||||
('ffmaz', 'ma', b, c), has_fmulz), {'ma' : a}),
|
||||
*add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c),
|
||||
('ffmaz', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}),
|
||||
*add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c),
|
||||
('ffmaz', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}),
|
||||
*add_fabs_fneg((('ffma_old@32(nsz)', ('bcsel', ('feq', b, 0.0), 0.0, 'ma'), ('bcsel', ('feq', a, 0.0), 0.0, 'mb'), c),
|
||||
('ffmaz_old', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}),
|
||||
*add_fabs_fneg((('ffma_old@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c),
|
||||
('ffmaz_old', 'ma', b, c), has_fmulz), {'ma' : a}),
|
||||
*add_fabs_fneg((('ffma_old@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c),
|
||||
('ffmaz_old', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}),
|
||||
*add_fabs_fneg((('ffma_old@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c),
|
||||
('ffmaz_old', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}),
|
||||
|
||||
# b == 0.0 ? 1.0 : fexp2(fmul(a, b)) -> fexp2(fmulz(a, b))
|
||||
*add_fabs_fneg((('bcsel(nsz,nnan,ninf)', ('feq', b, 0.0), 1.0, ('fexp2', ('fmul@32', a, 'mb'))),
|
||||
|
|
@ -500,11 +500,11 @@ optimizations.extend([
|
|||
(('~fadd', ('fmul', a, ('b2f', ('inot', 'c@1'))), ('fmul', b, ('b2f', c))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
|
||||
(('~fadd', a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
|
||||
|
||||
(('~ffma', a, ('b2f', ('inot', 'c@1')), ('fmul', b, ('b2f', 'c@1'))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
|
||||
(('~ffma', b, ('b2f', 'c@1'), ('ffma', ('fneg', a), ('b2f', 'c@1'), a)), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
|
||||
(('~ffma_old', a, ('b2f', ('inot', 'c@1')), ('fmul', b, ('b2f', 'c@1'))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
|
||||
(('~ffma_old', b, ('b2f', 'c@1'), ('ffma_old', ('fneg', a), ('b2f', 'c@1'), a)), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
|
||||
|
||||
(('~ffma', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)), a), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
|
||||
(('~ffma', ('b2f', 'c@1'), ('ffma', ('fneg', a), b, d), ('fmul', a, b)), ('bcsel', c, ('fcanonicalize', d), ('fmul', a, b))),
|
||||
(('~ffma_old', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)), a), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
|
||||
(('~ffma_old', ('b2f', 'c@1'), ('ffma_old', ('fneg', a), b, d), ('fmul', a, b)), ('bcsel', c, ('fcanonicalize', d), ('fmul', a, b))),
|
||||
|
||||
(('~flrp', ('fmul(is_used_once)', a, b), ('fmul(is_used_once)', a, c), d), ('fmul', ('flrp', b, c, d), a)),
|
||||
|
||||
|
|
@ -540,15 +540,15 @@ optimizations.extend([
|
|||
(('fadd@32', a, ('fneg(is_used_once)', ('ffloor(is_used_once)', a))), ('ffract', a), '!options->lower_ffract'),
|
||||
(('fadd@64', a, ('fneg(is_used_once)', ('ffloor(is_used_once)', a))), ('ffract', a), '!options->lower_ffract && !(options->lower_doubles_options & nir_lower_dfract)'),
|
||||
(('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
|
||||
(('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'),
|
||||
(('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'),
|
||||
(('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'),
|
||||
(('ffmaz', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->lower_ffma32'),
|
||||
# Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late).
|
||||
(('ffma@16(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'),
|
||||
(('ffma@32(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'),
|
||||
(('ffma@64(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'),
|
||||
(('ffmaz(contract)', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->fuse_ffma32'),
|
||||
(('ffma_old@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'),
|
||||
(('ffma_old@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'),
|
||||
(('ffma_old@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'),
|
||||
(('ffmaz_old', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->lower_ffma32'),
|
||||
# Always lower inexact ffma_old, because it will be fused back by late optimizations (nir_opt_algebraic_late).
|
||||
(('ffma_old@16(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'),
|
||||
(('ffma_old@32(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'),
|
||||
(('ffma_old@64(contract)', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'),
|
||||
(('ffmaz_old(contract)', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->fuse_ffma32'),
|
||||
|
||||
(('fmul', ('fadd', ('bcsel', a, ('fmul', b, c), 0), '#d'), '#e'),
|
||||
('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', ('fadd', d, 0.0), e))),
|
||||
|
|
@ -1613,7 +1613,7 @@ for compare in [('fneu', a, 0.0), ('inot', ('feq', a, 0.0))]:
|
|||
|
||||
optimizations.extend([
|
||||
(('fmul', search_b2f, search_mod), replace_mod_mul),
|
||||
(('ffma', search_b2f, search_mod, b), ('fadd', replace_mod, b)),
|
||||
(('ffma_old', search_b2f, search_mod, b), ('fadd', replace_mod, b)),
|
||||
])
|
||||
|
||||
optimizations.extend([
|
||||
|
|
@ -1641,7 +1641,7 @@ optimizations.extend([
|
|||
(('iand', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('iand', a, b))),
|
||||
(('ior', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('ior', a, b))),
|
||||
(('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), ('b2f', ('iand', a, b))),
|
||||
(('ffma', ('b2f', 'a@1'), ('b2f', 'b@1'), c), ('fadd', ('b2f', ('iand', a, b)), c)),
|
||||
(('ffma_old', ('b2f', 'a@1'), ('b2f', 'b@1'), c), ('fadd', ('b2f', ('iand', a, b)), c)),
|
||||
(('fadd', 1.0, ('fneg', ('b2f', a))), ('b2f', ('inot', a))),
|
||||
(('fadd(nsz)', -1.0, ('b2f', a)), ('fneg', ('b2f', ('inot', a)))),
|
||||
(('fsat', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('b2f', ('ior', a, b))),
|
||||
|
|
@ -2338,8 +2338,8 @@ optimizations.extend([
|
|||
# Propagate negation up multiplication chains
|
||||
(('fmul(is_used_by_non_fsat)', ('fneg', a), b), ('fneg', ('fmul', a, b))),
|
||||
(('fmulz(is_used_by_non_fsat,nsz)', ('fneg', a), b), ('fneg', ('fmulz', a, b))),
|
||||
(('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
|
||||
(('ffmaz', ('fneg', a), ('fneg', b), c), ('ffmaz', a, b, c)),
|
||||
(('ffma_old', ('fneg', a), ('fneg', b), c), ('ffma_old', a, b, c)),
|
||||
(('ffmaz_old', ('fneg', a), ('fneg', b), c), ('ffmaz_old', a, b, c)),
|
||||
(('imul', ('ineg', a), b), ('ineg', ('imul', a, b))),
|
||||
|
||||
# Propagate constants up multiplication chains
|
||||
|
|
@ -2347,14 +2347,14 @@ optimizations.extend([
|
|||
(('~fmulz', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmulz', ('fmulz', a, c), b)),
|
||||
(('~fmul', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)'), ('fmulz', ('fmul', a, c), b)),
|
||||
(('imul', ('imul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('imul', ('imul', a, c), b)),
|
||||
(('~ffma', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffma', ('fmul', a, c), b, d)),
|
||||
(('~ffmaz', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffmaz', ('fmulz', a, c), b, d)),
|
||||
(('~ffma', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)', d), ('ffmaz', ('fmul', a, c), b, d)),
|
||||
(('~ffma_old', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffma_old', ('fmul', a, c), b, d)),
|
||||
(('~ffmaz_old', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffmaz_old', ('fmulz', a, c), b, d)),
|
||||
(('~ffma_old', ('fmulz(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite_not_zero)', d), ('ffmaz_old', ('fmul', a, c), b, d)),
|
||||
# Prefer moving out a multiplication for more MAD/FMA-friendly code
|
||||
(('~fadd', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_fmul)'), '#c'), ('fadd', ('fadd', a, c), b)),
|
||||
(('~fadd', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd', ('fadd', a, c), b)),
|
||||
(('~fadd', ('ffma(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffma', a, b, d), c)),
|
||||
(('~fadd', ('ffmaz(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffmaz', a, b, d), c)),
|
||||
(('~fadd', ('ffma_old(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffma_old', a, b, d), c)),
|
||||
(('~fadd', ('ffmaz_old(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffmaz_old', a, b, d), c)),
|
||||
(('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('iadd', ('iadd', a, c), b)),
|
||||
|
||||
# Reassociate constants in add/mul chains so they can be folded together.
|
||||
|
|
@ -2363,16 +2363,16 @@ optimizations.extend([
|
|||
(('~fmul', '#a', ('fmul', b, '#c')), ('fmul', ('fmul', a, c), b)),
|
||||
(('~fmulz', '#a', ('fmulz', b, '#c')), ('fmulz', ('fmulz', a, c), b)),
|
||||
(('~fmul', '#a(is_finite_not_zero)', ('fmulz', b, '#c')), ('fmulz', ('fmul', a, c), b)),
|
||||
(('~ffma', '#a', ('fmul', b, '#c'), d), ('ffma', ('fmul', a, c), b, d)),
|
||||
(('~ffmaz', '#a', ('fmulz', b, '#c'), d), ('ffmaz', ('fmulz', a, c), b, d)),
|
||||
(('~ffmaz', '#a(is_finite_not_zero)', ('fmulz', b, '#c'), d), ('ffmaz', ('fmul', a, c), b, d)),
|
||||
(('~ffma_old', '#a', ('fmul', b, '#c'), d), ('ffma_old', ('fmul', a, c), b, d)),
|
||||
(('~ffmaz_old', '#a', ('fmulz', b, '#c'), d), ('ffmaz_old', ('fmulz', a, c), b, d)),
|
||||
(('~ffmaz_old', '#a(is_finite_not_zero)', ('fmulz', b, '#c'), d), ('ffmaz_old', ('fmul', a, c), b, d)),
|
||||
(('imul', '#a', ('imul', b, '#c')), ('imul', ('imul', a, c), b)),
|
||||
(('~fadd', '#a', ('fadd', b, '#c')), ('fadd', ('fadd', a, c), b)),
|
||||
(('~fadd', '#a', ('fneg', ('fadd', b, '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
|
||||
(('~fadd', '#a', ('ffma', b, c, '#d')), ('ffma', b, c, ('fadd', a, d))),
|
||||
(('~fadd', '#a', ('fneg', ('ffma', b, c, '#d'))), ('ffma', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
|
||||
(('~fadd', '#a', ('ffmaz', b, c, '#d')), ('ffmaz', b, c, ('fadd', a, d))),
|
||||
(('~fadd', '#a', ('fneg', ('ffmaz', b, c, '#d'))), ('ffmaz', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
|
||||
(('~fadd', '#a', ('ffma_old', b, c, '#d')), ('ffma_old', b, c, ('fadd', a, d))),
|
||||
(('~fadd', '#a', ('fneg', ('ffma_old', b, c, '#d'))), ('ffma_old', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
|
||||
(('~fadd', '#a', ('ffmaz_old', b, c, '#d')), ('ffmaz_old', b, c, ('fadd', a, d))),
|
||||
(('~fadd', '#a', ('fneg', ('ffmaz_old', b, c, '#d'))), ('ffmaz_old', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
|
||||
(('iadd', '#a', ('iadd', b, '#c')), ('iadd', ('iadd', a, c), b)),
|
||||
(('iand', '#a', ('iand', b, '#c')), ('iand', ('iand', a, c), b)),
|
||||
(('ior', '#a', ('ior', b, '#c')), ('ior', ('ior', a, c), b)),
|
||||
|
|
@ -3485,7 +3485,7 @@ for op in ['fadd', 'fdiv', 'fmod', 'fmul', 'fpow', 'frem', 'fsub']:
|
|||
optimizations += [((op, a, '#b(is_nan)'), NAN, 'true', TestStatus.XFAIL if op == 'fpow' else TestStatus.PASS)] # some opcodes are not commutative. XFAIL is fpow(1.0, NaN) producing NaN instead of 1.0.
|
||||
|
||||
# NaN propagation: Trinary opcodes. If any operand is NaN, replace it with NaN.
|
||||
for op in ['ffma', 'flrp']:
|
||||
for op in ['ffma_old', 'flrp']:
|
||||
optimizations += [((op, '#a(is_nan)', b, c), NAN)]
|
||||
optimizations += [((op, a, '#b(is_nan)', c), NAN)] # some opcodes are not commutative
|
||||
optimizations += [((op, a, b, '#c(is_nan)'), NAN)]
|
||||
|
|
@ -3562,7 +3562,7 @@ for i in range(2, 4 + 1):
|
|||
]
|
||||
|
||||
# This section contains "late" optimizations that should be run before
|
||||
# creating ffmas and calling regular optimizations for the final time.
|
||||
# creating ffma and calling regular optimizations for the final time.
|
||||
# Optimizations should go here if they help code generation and conflict
|
||||
# with the regular optimizations.
|
||||
before_ffma_optimizations = [
|
||||
|
|
@ -3729,23 +3729,23 @@ for sz, mulz in itertools.product([16, 32, 64], [False, True]):
|
|||
# (or fneg/fabs which are assumed to be propagated away), as a heuristic to
|
||||
# avoid fusing in cases where it's harmful.
|
||||
fmul = ('fmulz' if mulz else 'fmul') + '(is_only_used_by_fadd)'
|
||||
ffma = 'ffmaz' if mulz else 'ffma'
|
||||
ffma_old = 'ffmaz_old' if mulz else 'ffma_old'
|
||||
|
||||
fadd = 'fadd@{}(contract)'.format(sz)
|
||||
option = 'options->fuse_ffma{}'.format(sz)
|
||||
option_with_abs = 'options->fuse_ffma{} && !options->avoid_ternary_with_fabs'.format(sz)
|
||||
option_old = 'options->fuse_ffma{}'.format(sz)
|
||||
option_old_with_abs = 'options->fuse_ffma{} && !options->avoid_ternary_with_fabs'.format(sz)
|
||||
|
||||
late_optimizations.extend([
|
||||
((fadd, (fmul, a, b), c), (ffma, a, b, c), option),
|
||||
((fadd, (fmul, a, b), c), (ffma_old, a, b, c), option_old),
|
||||
|
||||
((fadd, ('fneg(is_only_used_by_fadd)', (fmul, a, b)), c),
|
||||
(ffma, ('fneg', a), b, c), option),
|
||||
(ffma_old, ('fneg', a), b, c), option_old),
|
||||
|
||||
((fadd, ('fabs(is_only_used_by_fadd)', (fmul, a, b)), c),
|
||||
(ffma, ('fabs', a), ('fabs', b), c), option_with_abs),
|
||||
(ffma_old, ('fabs', a), ('fabs', b), c), option_old_with_abs),
|
||||
|
||||
((fadd, ('fneg(is_only_used_by_fadd)', ('fabs', (fmul, a, b))), c),
|
||||
(ffma, ('fneg', ('fabs', a)), ('fabs', b), c), option_with_abs),
|
||||
(ffma_old, ('fneg', ('fabs', a)), ('fabs', b), c), option_old_with_abs),
|
||||
])
|
||||
|
||||
late_optimizations.extend([
|
||||
|
|
@ -3843,10 +3843,10 @@ late_optimizations.extend([
|
|||
# A similar operation could apply to any ffma(#a, b, #(-a/2)), but this
|
||||
# particular operation is common for expanding values stored in a texture
|
||||
# from [0,1] to [-1,1].
|
||||
(('~ffma@32', a, 2.0, -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'),
|
||||
(('~ffma@32', a, -2.0, -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'),
|
||||
(('~ffma@32', a, -2.0, 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'),
|
||||
(('~ffma@32', a, 2.0, 1.0), ('flrp', 1.0, -1.0, ('fneg', a)), '!options->lower_flrp32'),
|
||||
(('~ffma_old@32', a, 2.0, -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'),
|
||||
(('~ffma_old@32', a, -2.0, -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'),
|
||||
(('~ffma_old@32', a, -2.0, 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'),
|
||||
(('~ffma_old@32', a, 2.0, 1.0), ('flrp', 1.0, -1.0, ('fneg', a)), '!options->lower_flrp32'),
|
||||
(('~fadd@32', ('fmul(is_used_once)', 2.0, a), -1.0), ('flrp', -1.0, 1.0, a ), '!options->lower_flrp32'),
|
||||
(('~fadd@32', ('fmul(is_used_once)', -2.0, a), -1.0), ('flrp', -1.0, 1.0, ('fneg', a)), '!options->lower_flrp32'),
|
||||
(('~fadd@32', ('fmul(is_used_once)', -2.0, a), 1.0), ('flrp', 1.0, -1.0, a ), '!options->lower_flrp32'),
|
||||
|
|
@ -3870,10 +3870,10 @@ late_optimizations.extend([
|
|||
# Option 5: a * (2 - a)
|
||||
#
|
||||
# There are a lot of other possible combinations.
|
||||
(('~ffma@32', ('fadd', b, ('fneg', a)), a, a), ('flrp', a, b, a), '!options->lower_flrp32'),
|
||||
(('~ffma@32', a, 2.0, ('fneg', ('fmul', a, a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
|
||||
(('~ffma@32', a, 2.0, ('fmul', ('fneg', a), a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
|
||||
(('~ffma@32', a, ('fneg', a), ('fmul', 2.0, a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
|
||||
(('~ffma_old@32', ('fadd', b, ('fneg', a)), a, a), ('flrp', a, b, a), '!options->lower_flrp32'),
|
||||
(('~ffma_old@32', a, 2.0, ('fneg', ('fmul', a, a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
|
||||
(('~ffma_old@32', a, 2.0, ('fmul', ('fneg', a), a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
|
||||
(('~ffma_old@32', a, ('fneg', a), ('fmul', 2.0, a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
|
||||
(('~fmul@32', a, ('fadd', 2.0, ('fneg', a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
|
||||
|
||||
# we do these late so that we don't get in the way of creating ffmas
|
||||
|
|
@ -3901,21 +3901,21 @@ late_optimizations.extend([
|
|||
# optimization in these stages. See bugzilla #111490. In tessellation
|
||||
# stages applications seem to use 'precise' when necessary, so allow the
|
||||
# optimization in those stages.
|
||||
(('~fadd', ('ffma(is_used_once)', a, b, ('ffma(is_used_once)', c, d, ('ffma', e, 'f', ('fmul(is_used_once)', 'g(is_not_const_and_not_fsign)', 'h(is_not_const_and_not_fsign)')))), 'i(is_not_const)'),
|
||||
('ffma', a, b, ('ffma', c, d, ('ffma', e, 'f', ('ffma', 'g', 'h', 'i')))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
(('~fadd', ('ffma(is_used_once)', a, b, ('ffma', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
|
||||
('ffma', a, b, ('ffma', c, d, ('ffma', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
(('~fadd', ('ffma(is_used_once)', a, b, ('fmul(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'),
|
||||
('ffma', a, b, ('ffma', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
(('~fadd', ('fneg', ('ffma(is_used_once)', a, b, ('ffma', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'),
|
||||
('ffma', ('fneg', a), b, ('ffma', ('fneg', c), d, ('ffma', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
(('~fadd', ('ffma_old(is_used_once)', a, b, ('ffma_old(is_used_once)', c, d, ('ffma_old', e, 'f', ('fmul(is_used_once)', 'g(is_not_const_and_not_fsign)', 'h(is_not_const_and_not_fsign)')))), 'i(is_not_const)'),
|
||||
('ffma_old', a, b, ('ffma_old', c, d, ('ffma_old', e, 'f', ('ffma_old', 'g', 'h', 'i')))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
(('~fadd', ('ffma_old(is_used_once)', a, b, ('ffma_old', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
|
||||
('ffma_old', a, b, ('ffma_old', c, d, ('ffma_old', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
(('~fadd', ('ffma_old(is_used_once)', a, b, ('fmul(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'),
|
||||
('ffma_old', a, b, ('ffma_old', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
(('~fadd', ('fneg', ('ffma_old(is_used_once)', a, b, ('ffma_old', c, d, ('fmul(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'),
|
||||
('ffma_old', ('fneg', a), b, ('ffma_old', ('fneg', c), d, ('ffma_old', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
|
||||
(('~fadd', ('ffmaz(is_used_once)', a, b, ('ffmaz', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
|
||||
('ffmaz', a, b, ('ffmaz', c, d, ('ffmaz', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
(('~fadd', ('ffmaz(is_used_once)', a, b, ('fmulz(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'),
|
||||
('ffmaz', a, b, ('ffmaz', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
(('~fadd', ('fneg', ('ffmaz(is_used_once)', a, b, ('ffmaz', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'),
|
||||
('ffmaz', ('fneg', a), b, ('ffmaz', ('fneg', c), d, ('ffmaz', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
(('~fadd', ('ffmaz_old(is_used_once)', a, b, ('ffmaz_old', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'),
|
||||
('ffmaz_old', a, b, ('ffmaz_old', c, d, ('ffmaz_old', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
(('~fadd', ('ffmaz_old(is_used_once)', a, b, ('fmulz(is_used_once)', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'),
|
||||
('ffmaz_old', a, b, ('ffmaz_old', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
(('~fadd', ('fneg', ('ffmaz_old(is_used_once)', a, b, ('ffmaz_old', c, d, ('fmulz(is_used_once)', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)')))), 'g(is_not_const)'),
|
||||
('ffmaz_old', ('fneg', a), b, ('ffmaz_old', ('fneg', c), d, ('ffmaz_old', ('fneg', e), 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
|
||||
(('fmul(contract)', a, ('ldexp(is_used_once)', 1.0, b)), ('ldexp', a, b), 'options->has_ldexp'),
|
||||
(('frcp(contract,ninf)', ('ldexp', 1.0, b)), ('ldexp', 1.0, ('ineg', b)), 'options->has_ldexp'),
|
||||
|
|
@ -4042,7 +4042,7 @@ for op in ['fadd']:
|
|||
(('bcsel', a, (op, b, c), (op + '(is_used_once)', b, d)), (op, b, ('bcsel', a, c, d))),
|
||||
]
|
||||
|
||||
for op in ['ffma', 'ffmaz']:
|
||||
for op in ['ffma_old', 'ffmaz_old']:
|
||||
late_optimizations += [
|
||||
(('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, c, e)), (op, b, c, ('bcsel', a, d, e))),
|
||||
(('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, c, e)), (op, b, c, ('bcsel', a, d, e))),
|
||||
|
|
@ -4055,8 +4055,8 @@ for op in ['ffma', 'ffmaz']:
|
|||
late_optimizations += [
|
||||
(('fmulz@32', a, b),
|
||||
('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), 0.0, ('fmul', a, b)), 'options->lower_fmulz_with_abs_min'),
|
||||
(('ffmaz@32', a, b, c),
|
||||
('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), c, ('ffma@32', a, b, c)), 'options->lower_fmulz_with_abs_min')
|
||||
(('ffmaz_old@32', a, b, c),
|
||||
('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), c, ('ffma_old@32', a, b, c)), 'options->lower_fmulz_with_abs_min')
|
||||
]
|
||||
|
||||
# mediump: If an opcode is surrounded by conversions, remove the conversions.
|
||||
|
|
@ -4076,7 +4076,7 @@ for op in ['fadd', 'fdiv', 'fmax', 'fmin', 'fmod', 'fmul', 'fpow', 'frem']:
|
|||
late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b))), (op, a, b), 'true', TestStatus.UNSUPPORTED)]
|
||||
|
||||
# Ternary opcodes
|
||||
for op in ['ffma', 'flrp']:
|
||||
for op in ['ffma_old', 'flrp']:
|
||||
late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b), ('f2fmp', c))), (op, a, b, c), 'true', TestStatus.UNSUPPORTED)]
|
||||
|
||||
# Comparison opcodes
|
||||
|
|
@ -4131,7 +4131,7 @@ late_optimizations += [
|
|||
distribute_src_mods = [
|
||||
# Try to remove some spurious negations rather than pushing them down.
|
||||
(('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)),
|
||||
(('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
|
||||
(('ffma_old', ('fneg', a), ('fneg', b), c), ('ffma_old', a, b, c)),
|
||||
(('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b)),
|
||||
(('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b)),
|
||||
(('fdot4_replicated', ('fneg', a), ('fneg', b)), ('fdot4_replicated', a, b)),
|
||||
|
|
@ -4142,7 +4142,7 @@ distribute_src_mods = [
|
|||
(('fabs', ('fmul(is_used_once)', a, b)), ('fmul', ('fabs', a), ('fabs', b))),
|
||||
(('fabs', ('fmul_rtz(is_used_once)', a, b)), ('fmul_rtz', ('fabs', a), ('fabs', b))),
|
||||
|
||||
(('fneg', ('ffma(is_used_once,nsz)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))),
|
||||
(('fneg', ('ffma_old(is_used_once,nsz)', a, b, c)), ('ffma_old', ('fneg', a), b, ('fneg', c))),
|
||||
(('fneg', ('flrp(is_used_once)', a, b, c)), ('flrp', ('fneg', a), ('fneg', b), c), 'true', TestStatus.XFAIL), # XFAIL is -flrp(0, -1, 0) is 0.0 instead of -0.0
|
||||
(('fneg', ('fadd(is_used_once,nsz)', a, b)), ('fadd', ('fneg', a), ('fneg', b))),
|
||||
|
||||
|
|
|
|||
|
|
@ -175,10 +175,10 @@ opt_alu_fp_math_ctrl(nir_alu_instr *alu, struct opt_fp_ctrl_state *state)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_ffmaz_old:
|
||||
src_mark_preserve_sz(&alu->src[2].src, NULL);
|
||||
break;
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
if ((nir_analyze_fp_class(&state->fp_class_state, alu->src[2].src.ssa) & FP_CLASS_NEG_ZERO) &&
|
||||
!nir_alu_srcs_equal(alu, alu, 0, 1)) {
|
||||
src_mark_preserve_sz(&alu->src[0].src, NULL);
|
||||
|
|
|
|||
|
|
@ -222,7 +222,7 @@ visit_undef_use(nir_src *src, struct visit_info *info)
|
|||
info->replace_undef_with_constant = true;
|
||||
if (nir_op_infos[alu->op].input_types[i] & nir_type_float &&
|
||||
alu->op != nir_op_fmulz &&
|
||||
(alu->op != nir_op_ffmaz || i == 2) &&
|
||||
(alu->op != nir_op_ffmaz_old || i == 2) &&
|
||||
alu->op != nir_op_pack_half_2x16_rtz_split)
|
||||
info->prefer_nan = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3253,7 +3253,7 @@ find_tes_triangle_interp_1fmul_2ffma(struct linkage_info *linkage, unsigned i)
|
|||
/* Reject exact ops because we are going to do an inexact transformation
|
||||
* with it.
|
||||
*/
|
||||
if (!alu || (alu->op != nir_op_fmul && alu->op != nir_op_ffma) ||
|
||||
if (!alu || (alu->op != nir_op_fmul && alu->op != nir_op_ffma_old) ||
|
||||
nir_alu_instr_is_exact(alu) ||
|
||||
!gather_fmul_tess_coord(iter->instr, alu, vertex_index,
|
||||
&tess_coord_swizzle, &tess_coord_used,
|
||||
|
|
@ -3263,7 +3263,7 @@ find_tes_triangle_interp_1fmul_2ffma(struct linkage_info *linkage, unsigned i)
|
|||
/* The multiplication must only be used by ffma. */
|
||||
if (alu->op == nir_op_fmul) {
|
||||
nir_alu_instr *ffma = get_single_use_as_alu(&alu->def);
|
||||
if (!ffma || ffma->op != nir_op_ffma)
|
||||
if (!ffma || ffma->op != nir_op_ffma_old)
|
||||
return false;
|
||||
|
||||
if (num_fmuls == 1)
|
||||
|
|
@ -3388,8 +3388,8 @@ can_move_alu_across_interp(struct linkage_info *linkage, nir_alu_instr *alu)
|
|||
*/
|
||||
case nir_op_fmul:
|
||||
case nir_op_fmulz:
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_ffma_old:
|
||||
case nir_op_ffmaz_old:
|
||||
return GET_SRC_INTERP(alu, 0) == FLAG_INTERP_CONVERGENT ||
|
||||
GET_SRC_INTERP(alu, 1) == FLAG_INTERP_CONVERGENT;
|
||||
|
||||
|
|
@ -3915,7 +3915,7 @@ try_move_postdominator(struct linkage_info *linkage,
|
|||
defs[i] = nir_fmul(b, new_tes_loads[i],
|
||||
nir_channel(b, tesscoord, remap[i]));
|
||||
} else {
|
||||
defs[i] = nir_ffma(b, new_tes_loads[i],
|
||||
defs[i] = nir_ffma_old(b, new_tes_loads[i],
|
||||
nir_channel(b, tesscoord, remap[i]),
|
||||
defs[i - 1]);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -836,8 +836,8 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
|
|||
push_fp_query(state, alu->src[0].src.ssa);
|
||||
push_fp_query(state, alu->src[1].src.ssa);
|
||||
return;
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_ffma_old:
|
||||
case nir_op_ffmaz_old:
|
||||
case nir_op_flrp:
|
||||
push_fp_query(state, alu->src[0].src.ssa);
|
||||
push_fp_query(state, alu->src[1].src.ssa);
|
||||
|
|
@ -1320,9 +1320,9 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffmaz: {
|
||||
bool mulz = alu->op == nir_op_ffmaz;
|
||||
case nir_op_ffma_old:
|
||||
case nir_op_ffmaz_old: {
|
||||
bool mulz = alu->op == nir_op_ffmaz_old;
|
||||
bool src_eq = nir_alu_srcs_equal(alu, alu, 0, 1);
|
||||
bool src_neg_eq = !nir_src_is_const(alu->src[0].src) && nir_alu_srcs_negative_equal(alu, alu, 0, 1);
|
||||
fp_class_mask r_mul = fmul_fp_class(src_res[0], src_res[1], mulz, src_eq, src_neg_eq);
|
||||
|
|
|
|||
|
|
@ -186,8 +186,8 @@ DEFINE_TEST(fmul, 2)
|
|||
DEFINE_TEST(fmulz, 2)
|
||||
DEFINE_TEST(fpow, 2)
|
||||
DEFINE_TEST(fdot2, 2)
|
||||
DEFINE_TEST(ffma, 3)
|
||||
DEFINE_TEST(ffmaz, 3)
|
||||
DEFINE_TEST(ffma_old, 3)
|
||||
DEFINE_TEST(ffmaz_old, 3)
|
||||
DEFINE_TEST(fabs, 1)
|
||||
DEFINE_TEST(fneg, 1)
|
||||
DEFINE_TEST(fexp2, 1)
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ protected:
|
|||
|
||||
nir_def *build_uniform_expr(nir_builder *b, unsigned bit_size, unsigned index)
|
||||
{
|
||||
return nir_fsqrt(b, nir_ffma(b, load_uniform(b, bit_size, index),
|
||||
return nir_fsqrt(b, nir_ffma_old(b, load_uniform(b, bit_size, index),
|
||||
nir_imm_floatN_t(b, 3.14, bit_size),
|
||||
load_ubo(b, bit_size, index)));
|
||||
}
|
||||
|
|
@ -254,13 +254,13 @@ protected:
|
|||
if (contains) {
|
||||
return shader_contains_uniform(b, bit_size, index) &&
|
||||
shader_contains_ubo(b, bit_size, index) &&
|
||||
shader_contains_alu_op(b, nir_op_ffma, bit_size) &&
|
||||
shader_contains_alu_op(b, nir_op_ffma_old, bit_size) &&
|
||||
shader_contains_alu_op(b, nir_op_fsqrt, bit_size) &&
|
||||
shader_contains_const_float(b, 3.14, bit_size);
|
||||
} else {
|
||||
return !shader_contains_uniform(b, bit_size, index) &&
|
||||
!shader_contains_ubo(b, bit_size, index) &&
|
||||
!shader_contains_alu_op(b, nir_op_ffma, bit_size) &&
|
||||
!shader_contains_alu_op(b, nir_op_ffma_old, bit_size) &&
|
||||
!shader_contains_alu_op(b, nir_op_fsqrt, bit_size) &&
|
||||
!shader_contains_const_float(b, 3.14, bit_size);
|
||||
}
|
||||
|
|
@ -553,7 +553,7 @@ load_interpolated_input_tes(nir_builder *b, gl_varying_slot slot,
|
|||
if (i == 0)
|
||||
def[i] = nir_fmul(b, def[i], nir_channel(b, tesscoord, remap[i]));
|
||||
else
|
||||
def[i] = nir_ffma(b, def[i], nir_channel(b, tesscoord, remap[i]),
|
||||
def[i] = nir_ffma_old(b, def[i], nir_channel(b, tesscoord, remap[i]),
|
||||
def[i - 1]);
|
||||
} else {
|
||||
def[i] = nir_fmul(b, def[i], nir_channel(b, tesscoord, remap[i]));
|
||||
|
|
@ -650,8 +650,8 @@ movable_across_interp(nir_builder *b, nir_op op, unsigned interp[3],
|
|||
|
||||
case nir_op_fmul:
|
||||
case nir_op_fmulz:
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_ffma_old:
|
||||
case nir_op_ffmaz_old:
|
||||
return !divergent[0] || !divergent[1];
|
||||
|
||||
case nir_op_fdiv:
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ TEST_F(nir_opt_varyings_test_bicm_binary_alu, \
|
|||
/* TES uses fadd and fmul for interpolation, so it's always present. */ \
|
||||
if (MESA_SHADER_##consumer_stage != MESA_SHADER_TESS_EVAL || \
|
||||
(nir_op_##alu != nir_op_fadd && nir_op_##alu != nir_op_fmul && \
|
||||
nir_op_##alu != nir_op_ffma)) { \
|
||||
nir_op_##alu != nir_op_ffma_old)) { \
|
||||
ASSERT_TRUE(!shader_contains_alu_op(b2, nir_op_##alu, bitsize)); \
|
||||
} \
|
||||
} \
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ TEST_P(nir_serialize_all_test, alu_vec)
|
|||
TEST_P(nir_serialize_all_test, alu_two_components_full_swizzle)
|
||||
{
|
||||
nir_def *undef = nir_undef(b, 2, 32);
|
||||
nir_def *fma = nir_ffma(b, undef, undef, undef);
|
||||
nir_def *fma = nir_ffma_old(b, undef, undef, undef);
|
||||
nir_alu_instr *fma_alu = nir_def_as_alu(fma);
|
||||
|
||||
fma->num_components = GetParam();
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ matrix_multiply(struct vtn_builder *b,
|
|||
nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1));
|
||||
for (int j = src0_columns - 2; j >= 0; j--) {
|
||||
dest->elems[i]->def =
|
||||
nir_ffma(&b->nb, src0->elems[j]->def,
|
||||
nir_ffma_old(&b->nb, src0->elems[j]->def,
|
||||
nir_channel(&b->nb, src1->elems[i]->def, j),
|
||||
dest->elems[i]->def);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ vtn_nir_alu_op_for_spirv_glsl_opcode(struct vtn_builder *b,
|
|||
case GLSLstd450UMax: return nir_op_umax;
|
||||
case GLSLstd450SMax: return nir_op_imax;
|
||||
case GLSLstd450FMix: return nir_op_flrp;
|
||||
case GLSLstd450Fma: return nir_op_ffma;
|
||||
case GLSLstd450Fma: return nir_op_ffma_old;
|
||||
case GLSLstd450FindILsb: return nir_op_find_lsb;
|
||||
case GLSLstd450FindSMsb: return nir_op_ifind_msb;
|
||||
case GLSLstd450FindUMsb: return nir_op_ufind_msb;
|
||||
|
|
@ -430,7 +430,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
|
|||
nir_fmul(nb, eta, nir_a_minus_bc(nb, one, n_dot_i, n_dot_i)));
|
||||
nir_def *result =
|
||||
nir_a_minus_bc(nb, nir_fmul(nb, eta, I),
|
||||
nir_ffma(nb, eta, n_dot_i, nir_fsqrt(nb, k)),
|
||||
nir_ffma_old(nb, eta, n_dot_i, nir_fsqrt(nb, k)),
|
||||
N);
|
||||
/* XXX: bcsel, or if statement? */
|
||||
dest->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);
|
||||
|
|
|
|||
|
|
@ -655,7 +655,7 @@ handle_special(struct vtn_builder *b, uint32_t opcode,
|
|||
if (lower)
|
||||
res = nir_fmad(nb, srcs[0], srcs[1], srcs[2]);
|
||||
else
|
||||
res = nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
|
||||
res = nir_ffma_old(nb, srcs[0], srcs[1], srcs[2]);
|
||||
|
||||
nb->fp_math_ctrl = save_math_ctrl;
|
||||
return res;
|
||||
|
|
@ -703,7 +703,7 @@ handle_special(struct vtn_builder *b, uint32_t opcode,
|
|||
/* OpenCL FMA is not allowed to be split. */
|
||||
const bool save_math_ctrl = nb->fp_math_ctrl;
|
||||
nb->fp_math_ctrl |= nir_fp_exact;
|
||||
nir_def *res = nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
|
||||
nir_def *res = nir_ffma_old(nb, srcs[0], srcs[1], srcs[2]);
|
||||
nb->fp_math_ctrl = save_math_ctrl;
|
||||
return res;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -726,7 +726,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
|
|||
case nir_op_fsub:
|
||||
dst = ir3_ADD_F_rpt(b, dst_sz, src[0], 0, src[1], IR3_REG_FNEG);
|
||||
break;
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
/* The scalar ALU doesn't support mad, so expand to mul+add so that we
|
||||
* don't unnecessarily fall back to non-earlypreamble. This is safe
|
||||
* because at least on a6xx+ mad is unfused.
|
||||
|
|
|
|||
|
|
@ -33,8 +33,8 @@ ir3_nir_lower_load_barycentric_at_offset_instr(nir_builder *b, nir_instr *instr,
|
|||
* times the derivatives of ij in screen space.
|
||||
*/
|
||||
nir_def *new_ij = ij;
|
||||
new_ij = nir_ffma(b, chan(off, 0), nir_ddx(b, ij), new_ij);
|
||||
new_ij = nir_ffma(b, chan(off, 1), nir_ddy(b, ij), new_ij);
|
||||
new_ij = nir_ffma_old(b, chan(off, 0), nir_ddx(b, ij), new_ij);
|
||||
new_ij = nir_ffma_old(b, chan(off, 1), nir_ddy(b, ij), new_ij);
|
||||
|
||||
return new_ij;
|
||||
} else {
|
||||
|
|
@ -52,8 +52,8 @@ ir3_nir_lower_load_barycentric_at_offset_instr(nir_builder *b, nir_instr *instr,
|
|||
|
||||
/* Get the offset value from pixel center for ij, and also for w. */
|
||||
nir_def *pos = sij;
|
||||
pos = nir_ffma(b, chan(off, 0), nir_ddx(b, sij), pos);
|
||||
pos = nir_ffma(b, chan(off, 1), nir_ddy(b, sij), pos);
|
||||
pos = nir_ffma_old(b, chan(off, 0), nir_ddx(b, sij), pos);
|
||||
pos = nir_ffma_old(b, chan(off, 1), nir_ddy(b, sij), pos);
|
||||
|
||||
/* convert back into screen space, dividing by the offset 1/w */
|
||||
return nir_fmul(b, nir_trim_vector(b, pos, 2),
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ for sz in [16, 32]:
|
|||
# (or fneg/fabs which are assumed to be propagated away), as a heuristic to
|
||||
# avoid fusing in cases where it's harmful.
|
||||
fmul = 'fmul(is_only_used_by_fadd)'
|
||||
ffma = 'ffma'
|
||||
ffma = 'ffma_old'
|
||||
|
||||
fadd = 'fadd@{}'.format(sz)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ import argparse
|
|||
import sys
|
||||
|
||||
trig_workarounds = [
|
||||
(('fsin', 'x@32'), ('fsin', ('!ffma', 6.2831853, ('ffract', ('!ffma', 0.15915494, 'x', 0.5)), -3.14159265))),
|
||||
(('fcos', 'x@32'), ('fcos', ('!ffma', 6.2831853, ('ffract', ('!ffma', 0.15915494, 'x', 0.5)), -3.14159265))),
|
||||
(('fsin', 'x@32'), ('fsin', ('!ffma_old', 6.2831853, ('ffract', ('!ffma_old', 0.15915494, 'x', 0.5)), -3.14159265))),
|
||||
(('fcos', 'x@32'), ('fcos', ('!ffma_old', 6.2831853, ('ffract', ('!ffma_old', 0.15915494, 'x', 0.5)), -3.14159265))),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ tu_get_subsampled_coordinates(nir_builder *b,
|
|||
nir_def *hdr_scale = nir_channels(b, hdr0, 0x3);
|
||||
nir_def *hdr_offset = nir_channels(b, hdr0, 0xc);
|
||||
|
||||
nir_def *bin = nir_f2u16(b, nir_ffma(b, coords, hdr_scale, hdr_offset));
|
||||
nir_def *bin = nir_f2u16(b, nir_ffma_old(b, coords, hdr_scale, hdr_offset));
|
||||
nir_def *bin_idx = nir_iadd(b, nir_imul(b, nir_channel(b, bin, 1),
|
||||
nir_u2u16(b, bin_stride)),
|
||||
nir_channel(b, bin, 0));
|
||||
|
|
@ -166,7 +166,7 @@ tu_get_subsampled_coordinates(nir_builder *b,
|
|||
nir_def *bin_scale = nir_channels(b, bin_data, 0x3);
|
||||
nir_def *bin_offset = nir_channels(b, bin_data, 0xc);
|
||||
|
||||
return nir_ffma(b, coords, bin_scale, bin_offset);
|
||||
return nir_ffma_old(b, coords, bin_scale, bin_offset);
|
||||
}
|
||||
|
||||
/* Calculate the y coordinate in subsampled space of a given number of tiles
|
||||
|
|
|
|||
|
|
@ -3337,7 +3337,7 @@ do_alu_action(struct lp_build_nir_soa_context *bld,
|
|||
case nir_op_ffloor:
|
||||
result = lp_build_floor(float_bld, src[0]);
|
||||
break;
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
|
||||
break;
|
||||
case nir_op_ffract: {
|
||||
|
|
|
|||
|
|
@ -1570,7 +1570,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
|
|||
[nir_op_fmax] = { TGSI_OPCODE_MAX, TGSI_OPCODE_DMAX },
|
||||
[nir_op_imax] = { TGSI_OPCODE_IMAX, TGSI_OPCODE_I64MAX },
|
||||
[nir_op_umax] = { TGSI_OPCODE_UMAX, TGSI_OPCODE_U64MAX },
|
||||
[nir_op_ffma] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD },
|
||||
[nir_op_ffma_old] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD },
|
||||
[nir_op_ldexp] = { TGSI_OPCODE_LDEXP, 0 },
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1650,7 +1650,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
|
|||
[TGSI_OPCODE_MAX] = nir_op_fmax,
|
||||
[TGSI_OPCODE_SLT] = nir_op_slt,
|
||||
[TGSI_OPCODE_SGE] = nir_op_sge,
|
||||
[TGSI_OPCODE_MAD] = nir_op_ffma,
|
||||
[TGSI_OPCODE_MAD] = nir_op_ffma_old,
|
||||
[TGSI_OPCODE_LRP] = 0,
|
||||
[TGSI_OPCODE_SQRT] = nir_op_fsqrt,
|
||||
[TGSI_OPCODE_FRC] = nir_op_ffract,
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ asahi_blit_compute_shader(struct pipe_context *ctx, struct asahi_blit_key *key)
|
|||
{
|
||||
/* For pixels within the copy area, texture from the source */
|
||||
nir_def *coords_el_2d =
|
||||
nir_ffma(b, nir_u2f32(b, logical_id_el_2d), trans_scale, trans_offs);
|
||||
nir_ffma_old(b, nir_u2f32(b, logical_id_el_2d), trans_scale, trans_offs);
|
||||
|
||||
nir_def *coords_el_nd = coords_el_2d;
|
||||
if (layer) {
|
||||
|
|
|
|||
|
|
@ -180,11 +180,11 @@ lower_emit_vertex(nir_intrinsic_instr *instr, nir_builder *b, struct lower_state
|
|||
/* pos = scaled_point_size * point_dir + point_pos */
|
||||
nir_def *point_dir = get_point_dir(b, state, i);
|
||||
nir_def *pos = nir_vec4(b,
|
||||
nir_ffma(b,
|
||||
nir_ffma_old(b,
|
||||
point_width,
|
||||
nir_channel(b, point_dir, 0),
|
||||
nir_channel(b, state->point_pos, 0)),
|
||||
nir_ffma(b,
|
||||
nir_ffma_old(b,
|
||||
point_height,
|
||||
nir_channel(b, point_dir, 1),
|
||||
nir_channel(b, state->point_pos, 1)),
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ static const struct etna_op_info etna_ops[] = {
|
|||
#define IOP(nir, op) IOPC(nir, op, TRUE)
|
||||
#define UOP(nir, op) UOPC(nir, op, TRUE)
|
||||
OP(mov, MOV), OP(fneg, MOV), OP(fabs, MOV), OP(fsat, MOV),
|
||||
OP(fmul, MUL), OP(fadd, ADD), OP(ffma, MAD),
|
||||
OP(fmul, MUL), OP(fadd, ADD), OP(ffma_old, MAD),
|
||||
OP(fdot2, DP2), OP(fdot3, DP3), OP(fdot4, DP4),
|
||||
OPC(fmin, SELECT, GT), OPC(fmax, SELECT, LT),
|
||||
OP(ffract, FRC), OP(frcp, RCP), OP(frsq, RSQ),
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ lower_tex_offset(nir_builder *b, nir_tex_instr *tex, UNUSED void *data)
|
|||
* value, matching the expected behaviour of Vivante GPU.
|
||||
*/
|
||||
nir_def *lod_raw = nir_flog2(b, max_derivative);
|
||||
nir_def *lod_fixed_point = nir_ffma(b, lod_raw, nir_imm_float(b, 0.5f),
|
||||
nir_def *lod_fixed_point = nir_ffma_old(b, lod_raw, nir_imm_float(b, 0.5f),
|
||||
nir_imm_float(b, 393216.0f));
|
||||
|
||||
/* Extract 16-bit fractional part */
|
||||
|
|
@ -135,7 +135,7 @@ lower_tex_offset(nir_builder *b, nir_tex_instr *tex, UNUSED void *data)
|
|||
* This reverses the fixed-point encoding to get final LOD value
|
||||
*/
|
||||
nir_def *lod_float = nir_u2f32(b, lod_quantized);
|
||||
lod = nir_ffma(b, lod_float, nir_imm_float(b, 1.0f/32.0f), nir_imm_float(b, 0.5f));
|
||||
lod = nir_ffma_old(b, lod_float, nir_imm_float(b, 1.0f/32.0f), nir_imm_float(b, 0.5f));
|
||||
|
||||
/* floor and convert to int */
|
||||
lod = nir_ffloor(b, lod);
|
||||
|
|
|
|||
|
|
@ -309,7 +309,7 @@ instr_create_alu(struct ir2_context *ctx, nir_op opcode, unsigned ncomp)
|
|||
[nir_op_fadd] = {ADDs, ADDv},
|
||||
[nir_op_fsub] = {ADDs, ADDv},
|
||||
[nir_op_fmul] = {MULs, MULv},
|
||||
[nir_op_ffma] = {-1, MULADDv},
|
||||
[nir_op_ffma_old] = {-1, MULADDv},
|
||||
[nir_op_fmax] = {MAXs, MAXv},
|
||||
[nir_op_fmin] = {MINs, MINv},
|
||||
[nir_op_ffloor] = {FLOORs, FLOORv},
|
||||
|
|
@ -748,7 +748,7 @@ emit_tex(struct ir2_context *ctx, nir_tex_instr *tex)
|
|||
rcp->src[0] = ir2_src(reg_idx, IR2_SWIZZLE_Z, IR2_SRC_REG);
|
||||
rcp->src[0].abs = true;
|
||||
|
||||
coord_xy = instr_create_alu_reg(ctx, nir_op_ffma, 3, instr);
|
||||
coord_xy = instr_create_alu_reg(ctx, nir_op_ffma_old, 3, instr);
|
||||
coord_xy->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
|
||||
coord_xy->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
|
||||
coord_xy->src[2] = load_const(ctx, (float[]){1.5f}, 1);
|
||||
|
|
@ -868,7 +868,7 @@ extra_position_exports(struct ir2_context *ctx, bool binning)
|
|||
sc->src[0] = ctx->position;
|
||||
sc->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
|
||||
|
||||
wincoord = instr_create_alu(ctx, nir_op_ffma, 4);
|
||||
wincoord = instr_create_alu(ctx, nir_op_ffma_old, 4);
|
||||
wincoord->src[0] = ir2_src(66, 0, IR2_SRC_CONST);
|
||||
wincoord->src[1] = ir2_src(sc->idx, 0, IR2_SRC_SSA);
|
||||
wincoord->src[2] = ir2_src(65, 0, IR2_SRC_CONST);
|
||||
|
|
@ -895,13 +895,13 @@ extra_position_exports(struct ir2_context *ctx, bool binning)
|
|||
|
||||
/* 8 max set in freedreno_screen.. unneeded instrs patched out */
|
||||
for (int i = 0; i < 8; i++) {
|
||||
instr = instr_create_alu(ctx, nir_op_ffma, 4);
|
||||
instr = instr_create_alu(ctx, nir_op_ffma_old, 4);
|
||||
instr->src[0] = ir2_src(1, IR2_SWIZZLE_WYWW, IR2_SRC_CONST);
|
||||
instr->src[1] = ir2_src(off->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
|
||||
instr->src[2] = ir2_src(3 + i, 0, IR2_SRC_CONST);
|
||||
instr->alu.export = 32;
|
||||
|
||||
instr = instr_create_alu(ctx, nir_op_ffma, 4);
|
||||
instr = instr_create_alu(ctx, nir_op_ffma_old, 4);
|
||||
instr->src[0] = ir2_src(68 + i * 2, 0, IR2_SRC_CONST);
|
||||
instr->src[1] = ir2_src(wincoord->idx, 0, IR2_SRC_SSA);
|
||||
instr->src[2] = ir2_src(67 + i * 2, 0, IR2_SRC_CONST);
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ duplicate_def_at_use(nir_builder *b, nir_def *def, bool duplicate_for_ffma)
|
|||
|
||||
if (duplicate_for_ffma &&
|
||||
last_parent_instr->type == nir_instr_type_alu &&
|
||||
nir_instr_as_alu(last_parent_instr)->op == nir_op_ffma) {
|
||||
nir_instr_as_alu(last_parent_instr)->op == nir_op_ffma_old) {
|
||||
last_parent_instr = NULL;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -203,7 +203,7 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = {
|
|||
[nir_op_ftrunc] = ppir_op_trunc,
|
||||
[nir_op_fsat] = ppir_op_sat,
|
||||
[nir_op_fclamp_pos] = ppir_op_clamp_pos,
|
||||
[nir_op_ffma] = ppir_op_fmad,
|
||||
[nir_op_ffma_old] = ppir_op_fmad,
|
||||
};
|
||||
|
||||
static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
|
||||
|
|
|
|||
|
|
@ -464,8 +464,8 @@ Converter::getOperation(nir_op op)
|
|||
return OP_EX2;
|
||||
case nir_op_ffloor:
|
||||
return OP_FLOOR;
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_ffma_old:
|
||||
case nir_op_ffmaz_old:
|
||||
/* No FMA op pre-nvc0 */
|
||||
if (info->target < 0xc0)
|
||||
return OP_MAD;
|
||||
|
|
@ -2613,8 +2613,8 @@ Converter::visit(nir_alu_instr *insn)
|
|||
case nir_op_udiv:
|
||||
case nir_op_fexp2:
|
||||
case nir_op_ffloor:
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_ffma_old:
|
||||
case nir_op_ffmaz_old:
|
||||
case nir_op_flog2:
|
||||
case nir_op_fmax:
|
||||
case nir_op_imax:
|
||||
|
|
@ -2668,11 +2668,11 @@ Converter::visit(nir_alu_instr *insn)
|
|||
|
||||
switch (op) {
|
||||
case nir_op_fmul:
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
i->dnz = this->info->io.mul_zero_wins;
|
||||
break;
|
||||
case nir_op_fmulz:
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_ffmaz_old:
|
||||
i->dnz = true;
|
||||
break;
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -819,7 +819,7 @@ ntr_emit_alu(struct ntr_compile *c, nir_alu_instr *instr)
|
|||
|
||||
[nir_op_fmin] = TGSI_OPCODE_MIN,
|
||||
[nir_op_fmax] = TGSI_OPCODE_MAX,
|
||||
[nir_op_ffma] = TGSI_OPCODE_MAD,
|
||||
[nir_op_ffma_old] = TGSI_OPCODE_MAD,
|
||||
};
|
||||
|
||||
if (instr->op < ARRAY_SIZE(op_map) && op_map[instr->op] > 0) {
|
||||
|
|
|
|||
|
|
@ -52,11 +52,11 @@ r300_nir_prepare_presubtract = [
|
|||
(('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))),
|
||||
(('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))),
|
||||
# Bias presubtract 1 - 2 * x expects MAD -a 2.0 1.0 form.
|
||||
(('ffma', 2.0, ('fneg', a), 1.0), ('ffma', ('fneg', a), 2.0, 1.0)),
|
||||
(('ffma', a, -2.0, 1.0), ('ffma', ('fneg', a), 2.0, 1.0)),
|
||||
(('ffma', -2.0, a, 1.0), ('ffma', ('fneg', a), 2.0, 1.0)),
|
||||
(('ffma', 2.0, a, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
|
||||
(('ffma', a, 2.0, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
|
||||
(('ffma_old', 2.0, ('fneg', a), 1.0), ('ffma_old', ('fneg', a), 2.0, 1.0)),
|
||||
(('ffma_old', a, -2.0, 1.0), ('ffma_old', ('fneg', a), 2.0, 1.0)),
|
||||
(('ffma_old', -2.0, a, 1.0), ('ffma_old', ('fneg', a), 2.0, 1.0)),
|
||||
(('ffma_old', 2.0, a, -1.0), ('fneg', ('ffma_old', ('fneg', a), 2.0, 1.0))),
|
||||
(('ffma_old', a, 2.0, -1.0), ('fneg', ('ffma_old', ('fneg', a), 2.0, 1.0))),
|
||||
# x * 2 can be usually folded into output modifier for the previous
|
||||
# instruction, but that only works if x is a temporary. If it is input or
|
||||
# constant just convert it to add instead.
|
||||
|
|
@ -85,7 +85,7 @@ r300_nir_opt_algebraic_late = [
|
|||
|
||||
# This is very late flrp lowering to clean up after bcsel->fcsel->flrp.
|
||||
r300_nir_lower_flrp = [
|
||||
(('flrp', a, b, c), ('ffma', b, c, ('ffma', ('fneg', a), c, a)))
|
||||
(('flrp', a, b, c), ('ffma_old', b, c, ('ffma_old', ('fneg', a), c, a)))
|
||||
]
|
||||
|
||||
# Lower fcsel_ge from ftrunc on r300
|
||||
|
|
|
|||
|
|
@ -1639,7 +1639,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
|
|||
return emit_alu_op2_64bit_one_dst(*alu, op2_setgt_64, shader, true);
|
||||
case nir_op_fneu32:
|
||||
return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false);
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
return emit_alu_fma_64bit(*alu, op3_fma_64, shader);
|
||||
|
||||
case nir_op_fadd:
|
||||
|
|
@ -1956,11 +1956,11 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
|
|||
case nir_op_unpack_64_2x32_split_y:
|
||||
return emit_unpack_64_2x32_split(*alu, 1, shader);
|
||||
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
if (!shader.has_flag(Shader::sh_legacy_math_rules))
|
||||
return emit_alu_op3(*alu, op3_muladd_ieee, shader);
|
||||
FALLTHROUGH;
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_ffmaz_old:
|
||||
return emit_alu_op3(*alu, op3_muladd, shader);
|
||||
|
||||
case nir_op_mov:
|
||||
|
|
|
|||
|
|
@ -1033,7 +1033,7 @@ Lower64BitToVec2::support_fp64_op(nir_op op) const
|
|||
case nir_op_fge32:
|
||||
case nir_op_flt32:
|
||||
case nir_op_fneu32:
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
case nir_op_fadd:
|
||||
case nir_op_fmul:
|
||||
case nir_op_fmax:
|
||||
|
|
|
|||
|
|
@ -246,8 +246,8 @@ lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
|
|||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
pos = nir_vec4(b,
|
||||
nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
|
||||
nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
|
||||
nir_ffma_old(b, half_w_delta, point_dir[i][0], point_pos_x),
|
||||
nir_ffma_old(b, half_h_delta, point_dir[i][1], point_pos_y),
|
||||
nir_channel(b, point_pos, 2),
|
||||
nir_channel(b, point_pos, 3));
|
||||
|
||||
|
|
|
|||
|
|
@ -1288,12 +1288,12 @@ static nir_def *alu_iter(nir_builder *b,
|
|||
.component = component,
|
||||
.io_semantics = io_semantics);
|
||||
|
||||
nir_def *result = nir_ffma(b,
|
||||
nir_def *result = nir_ffma_old(b,
|
||||
nir_channel(b, coeffs, 1),
|
||||
nir_channel(b, coords, 1),
|
||||
nir_channel(b, coeffs, 2));
|
||||
result =
|
||||
nir_ffma(b, nir_channel(b, coeffs, 0), nir_channel(b, coords, 0), result);
|
||||
nir_ffma_old(b, nir_channel(b, coeffs, 0), nir_channel(b, coords, 0), result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3177,7 +3177,7 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
|
|||
instr = pco_fmul(&tctx->b, dest, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
instr = pco_fmad(&tctx->b, dest, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -658,7 +658,7 @@ static nir_def *resolve_samples(nir_builder *b,
|
|||
|
||||
switch (resolve_op) {
|
||||
case PVR_RESOLVE_BLEND:
|
||||
op = nir_op_ffma;
|
||||
op = nir_op_ffma_old;
|
||||
coeff = nir_imm_float(b, 1.0 / num_samples);
|
||||
break;
|
||||
|
||||
|
|
@ -683,7 +683,7 @@ static nir_def *resolve_samples(nir_builder *b,
|
|||
|
||||
for (unsigned i = 1; i < num_samples; i++) {
|
||||
if (resolve_op == PVR_RESOLVE_BLEND)
|
||||
accum = nir_ffma(b, samples[i], coeff, accum);
|
||||
accum = nir_ffma_old(b, samples[i], coeff, accum);
|
||||
else
|
||||
accum = nir_build_alu2(b, op, samples[i], accum);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1646,7 +1646,7 @@ brw_from_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr,
|
|||
inst->saturate = true;
|
||||
break;
|
||||
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
if (nir_has_any_rounding_mode_enabled(execution_mode)) {
|
||||
brw_rnd_mode rnd =
|
||||
brw_rnd_mode_from_execution_mode(execution_mode);
|
||||
|
|
|
|||
|
|
@ -1706,7 +1706,7 @@ fs_nir_emit_alu(nir_to_elk_state &ntb, nir_alu_instr *instr,
|
|||
bld.emit(ELK_FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
|
||||
break;
|
||||
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
if (nir_has_any_rounding_mode_enabled(execution_mode)) {
|
||||
elk_rnd_mode rnd =
|
||||
elk_rnd_mode_from_execution_mode(execution_mode);
|
||||
|
|
|
|||
|
|
@ -1670,7 +1670,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
|||
emit(SHR(dst, op[0], op[1]));
|
||||
break;
|
||||
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
if (type_sz(dst.type) == 8) {
|
||||
dst_reg mul_dst = dst_reg(this, glsl_dvec4_type());
|
||||
emit(MUL(mul_dst, op[1], op[0]));
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ intel_nir_opt_peephole_ffma_instr(nir_builder *b,
|
|||
if (negate)
|
||||
mul_src[0] = nir_fneg(b, mul_src[0]);
|
||||
|
||||
nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma);
|
||||
nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma_old);
|
||||
ffma->fp_math_ctrl = b->fp_math_ctrl;
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
|
|
|
|||
|
|
@ -512,7 +512,7 @@ jay_emit_alu(struct nir_to_jay_state *nj, nir_alu_instr *alu)
|
|||
jay_BFI2(b, dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
jay_MAD(b, type, dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -374,7 +374,7 @@ alu_to_msl(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr)
|
|||
case nir_op_ffloor:
|
||||
alu_funclike(ctx, instr, "floor");
|
||||
break;
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
alu_funclike(ctx, instr, "fma");
|
||||
break;
|
||||
case nir_op_ffract:
|
||||
|
|
|
|||
|
|
@ -231,7 +231,7 @@ emit_arith_inst(struct st_translate *t,
|
|||
return nir_fmul(t->b, src[0], src[1]);
|
||||
|
||||
case GL_MAD_ATI:
|
||||
return nir_ffma(t->b, src[0], src[1], src[2]);
|
||||
return nir_ffma_old(t->b, src[0], src[1], src[2]);
|
||||
|
||||
case GL_LERP_ATI:
|
||||
return nir_flrp(t->b, src[2], src[1], src[0]);
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ lower_color(nir_builder *b, lower_drawpixels_state *state, nir_intrinsic_instr *
|
|||
/* Apply the scale and bias. */
|
||||
if (state->options->scale_and_bias) {
|
||||
/* MAD def, def, scale, bias; */
|
||||
def = nir_ffma(b, def, get_scale(b, state), get_bias(b, state));
|
||||
def = nir_ffma_old(b, def, get_scale(b, state), get_bias(b, state));
|
||||
}
|
||||
|
||||
if (state->options->pixel_maps) {
|
||||
|
|
|
|||
|
|
@ -2995,7 +2995,7 @@ emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
|
|||
case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
|
||||
case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
|
||||
case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
if (alu->def.bit_size == 64)
|
||||
ctx->mod.feats.dx11_1_double_extensions = 1;
|
||||
return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
|
||||
|
|
|
|||
|
|
@ -1056,7 +1056,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.fexp2(srcs(0)).into()
|
||||
}
|
||||
}
|
||||
nir_op_ffma => {
|
||||
nir_op_ffma_old => {
|
||||
let ftype = FloatType::from_bits(alu.def.bit_size().into());
|
||||
let dst;
|
||||
if alu.def.bit_size() == 64 {
|
||||
|
|
@ -1102,7 +1102,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
dst
|
||||
}
|
||||
nir_op_ffmaz => {
|
||||
nir_op_ffmaz_old => {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
// DNZ implies FTZ so we need FTZ set or this is invalid
|
||||
assert!(self.float_ctl.fp32.ftz);
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ vectorize_filter_cb(const nir_instr *instr, const void *data)
|
|||
case nir_op_fneu:
|
||||
case nir_op_fmul:
|
||||
case nir_op_fmul_rtz:
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
case nir_op_fsign:
|
||||
case nir_op_fsat:
|
||||
case nir_op_fmax:
|
||||
|
|
@ -268,8 +268,8 @@ lower_bit_size_cb(const nir_instr *instr, void *data)
|
|||
case nir_op_fneu:
|
||||
case nir_op_fmul:
|
||||
case nir_op_fmul_rtz:
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_ffma_old:
|
||||
case nir_op_ffmaz_old:
|
||||
case nir_op_fsign:
|
||||
case nir_op_fsat:
|
||||
case nir_op_fceil:
|
||||
|
|
|
|||
|
|
@ -2975,7 +2975,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
srcs > 2 ? bi_alu_src_index(b, instr->src[2], comps) : bi_null();
|
||||
|
||||
switch (instr->op) {
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffma_old:
|
||||
bi_fma_to(b, sz, dst, s0, s1, s2);
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -218,7 +218,7 @@ build_blit_shader(const struct vk_meta_blit_key *key)
|
|||
|
||||
nir_def *out_coord_xy = nir_load_frag_coord(b);
|
||||
out_coord_xy = nir_trim_vector(b, out_coord_xy, 2);
|
||||
nir_def *src_coord_xy = nir_ffma(b, out_coord_xy, xy_scale, xy_off);
|
||||
nir_def *src_coord_xy = nir_ffma_old(b, out_coord_xy, xy_scale, xy_off);
|
||||
|
||||
nir_def *z_xform = load_struct_var(b, push, 1);
|
||||
nir_def *out_layer = nir_load_layer_id(b);
|
||||
|
|
@ -227,7 +227,7 @@ build_blit_shader(const struct vk_meta_blit_key *key)
|
|||
nir_def *z_off = nir_channel(b, z_xform, 0);
|
||||
nir_def *z_scale = nir_channel(b, z_xform, 1);
|
||||
nir_def *out_coord_z = nir_fadd_imm(b, nir_u2f32(b, out_layer), 0.5);
|
||||
nir_def *src_coord_z = nir_ffma(b, out_coord_z, z_scale, z_off);
|
||||
nir_def *src_coord_z = nir_ffma_old(b, out_coord_z, z_scale, z_off);
|
||||
src_coord = nir_vec3(b, nir_channel(b, src_coord_xy, 0),
|
||||
nir_channel(b, src_coord_xy, 1),
|
||||
src_coord_z);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue