nir: update ffma helpers to use new opcodes

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41165>
This commit is contained in:
Karol Herbst 2026-04-23 03:26:47 +02:00 committed by Marge Bot
parent aeea2e7c1f
commit 109d93dd98
10 changed files with 39 additions and 39 deletions

View file

@ -152,12 +152,12 @@ prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_def **coord, nir_src
sc = nir_fadd_imm(b, sc, 1.5);
tc = nir_fadd_imm(b, tc, 1.5);
} else {
sc = nir_ffma_imm2(b, sc, invma, 1.5);
tc = nir_ffma_imm2(b, tc, invma, 1.5);
sc = nir_ffma_weak_imm2(b, sc, invma, 1.5);
tc = nir_ffma_weak_imm2(b, tc, invma, 1.5);
}
if (tex->is_array && coords[3])
id = nir_ffma_imm1(b, coords[3], 8.0, id);
id = nir_ffma_weak_imm1(b, coords[3], 8.0, id);
*coord = nir_vec3(b, sc, tc, id);

View file

@ -413,8 +413,8 @@ BEGIN_TEST(d3d11_derivs.cube)
pbld.add_vsfs(vs, fs);
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
//>> v1: %y = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
//>> v1: %x = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da
@ -425,8 +425,8 @@ BEGIN_TEST(d3d11_derivs.cube)
//>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_
//>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
//; success = rx+1 == ry and rx+2 == rf
//>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
@ -458,10 +458,10 @@ BEGIN_TEST(d3d11_derivs.cube_array)
pbld.add_vsfs(vs, fs);
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
//>> v1: %layer = v_rndne_f32 (kill)%_
//>> v1: %face_layer = v_fmamk_f32 (kill)%layer, (kill)%face, 0x41000000
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face_layer
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da
@ -655,8 +655,8 @@ BEGIN_TEST(d3d11_derivs.cube_txd_to_tex)
pbld.add_vsfs(vs, fs);
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
//>> v1: %y = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
//>> v1: %x = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da
@ -667,8 +667,8 @@ BEGIN_TEST(d3d11_derivs.cube_txd_to_tex)
//>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_
//>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
//; success = rx+1 == ry and rx+2 == rf
//>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_

View file

@ -1361,33 +1361,33 @@ nir_uclamp(nir_builder *b,
}
static inline nir_def *
nir_ffma_imm12(nir_builder *build, nir_def *src0, double src1, double src2)
nir_ffma_weak_imm12(nir_builder *build, nir_def *src0, double src1, double src2)
{
if (build->shader->options &&
build->shader->options->avoid_ternary_with_two_constants)
return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2);
else
return nir_ffma_old(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
nir_imm_floatN_t(build, src2, src0->bit_size));
return nir_ffma_weak(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
nir_imm_floatN_t(build, src2, src0->bit_size));
}
static inline nir_def *
nir_ffma_imm1(nir_builder *build, nir_def *src0, double src1, nir_def *src2)
nir_ffma_weak_imm1(nir_builder *build, nir_def *src0, double src1, nir_def *src2)
{
return nir_ffma_old(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
return nir_ffma_weak(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
}
static inline nir_def *
nir_ffma_imm2(nir_builder *build, nir_def *src0, nir_def *src1, double src2)
nir_ffma_weak_imm2(nir_builder *build, nir_def *src0, nir_def *src1, double src2)
{
return nir_ffma_old(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
return nir_ffma_weak(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
}
static inline nir_def *
nir_a_minus_bc(nir_builder *build, nir_def *src0, nir_def *src1,
nir_def *src2)
{
return nir_ffma_old(build, nir_fneg(build, src1), src2, src0);
return nir_ffma_weak(build, nir_fneg(build, src1), src2, src0);
}
static inline nir_def *

View file

@ -199,12 +199,12 @@ build_asin(nir_builder *b, nir_def *x, bool piecewise)
}
nir_def *abs_x = nir_fabs(b, x);
nir_def *p0_plus_xp1 = nir_ffma_imm12(b, abs_x, -0.0187293, 0.0742610);
nir_def *p0_plus_xp1 = nir_ffma_weak_imm12(b, abs_x, -0.0187293, 0.0742610);
nir_def *expr_tail =
nir_ffma_imm2(b, abs_x,
nir_ffma_imm2(b, abs_x, p0_plus_xp1, -0.2121144),
1.5707288);
nir_ffma_weak_imm2(b, abs_x,
nir_ffma_weak_imm2(b, abs_x, p0_plus_xp1, -0.2121144),
1.5707288);
nir_def *result0 = nir_fmul(b, nir_fsign(b, x),
nir_a_minus_bc(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size),
@ -217,7 +217,7 @@ build_asin(nir_builder *b, nir_def *x, bool piecewise)
nir_def *x2 = nir_fmul(b, x, x);
nir_def *result1 = nir_fmul(b,
x,
nir_ffma_imm12(b, x2, (1.0/6.0), 1.0));
nir_ffma_weak_imm12(b, x2, (1.0/6.0), 1.0));
return nir_bcsel(b,
nir_flt_imm(b, abs_x, 0.21502245),
result1,
@ -277,7 +277,7 @@ nir_atan(nir_builder *b, nir_def *y_over_x)
nir_def *res = nir_imm_floatN_t(b, coeffs[0], bit_size);
for (unsigned i = 1; i < ARRAY_SIZE(coeffs); ++i) {
res = nir_ffma_imm2(b, res, x_2, coeffs[i]);
res = nir_ffma_weak_imm2(b, res, x_2, coeffs[i]);
}
/* range-reduction fixup value */
@ -359,7 +359,7 @@ nir_atan2(nir_builder *b, nir_def *y, nir_def *x)
* coordinate system.
*/
nir_def *arc =
nir_ffma_imm1(b, nir_b2fN(b, flip, bit_size), M_PI_2, nir_atan(b, tan));
nir_ffma_weak_imm1(b, nir_b2fN(b, flip, bit_size), M_PI_2, nir_atan(b, tan));
/* Rather convoluted calculation of the sign of the result. When x < 0 we
* cannot use fsign because we need to be able to distinguish between

View file

@ -1075,7 +1075,7 @@ static nir_def *
build_convert_inf_to_nan(nir_builder *b, nir_def *x)
{
/* Do x*0 + x. The multiplication by 0 can't be optimized out. */
nir_def *fma = nir_ffma_imm1(b, x, 0, x);
nir_def *fma = nir_ffma_weak_imm1(b, x, 0, x);
nir_def_as_alu(fma)->fp_math_ctrl = nir_fp_preserve_nan | nir_fp_preserve_inf | nir_fp_exact;
return fma;
}

View file

@ -20,7 +20,7 @@ vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, SpvOp ext_opcode,
def = nir_cube_amd(&b->nb, vtn_get_nir_ssa(b, w[5]));
nir_def *st = nir_swizzle(&b->nb, def, (unsigned[]){1, 0}, 2);
nir_def *invma = nir_frcp(&b->nb, nir_channel(&b->nb, def, 2));
def = nir_ffma_imm2(&b->nb, st, invma, 0.5);
def = nir_ffma_weak_imm2(&b->nb, st, invma, 0.5);
break;
}
case TimeAMD: {

View file

@ -503,11 +503,11 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
case GLSLstd450Asinh:
dest->def = nir_fmul(nb, nir_fsign(nb, src[0]),
nir_flog(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
nir_fsqrt(nb, nir_ffma_imm2(nb, src[0], src[0], 1.0f)))));
nir_fsqrt(nb, nir_ffma_weak_imm2(nb, src[0], src[0], 1.0f)))));
break;
case GLSLstd450Acosh:
dest->def = nir_flog(nb, nir_fadd(nb, src[0],
nir_fsqrt(nb, nir_ffma_imm2(nb, src[0], src[0], -1.0f))));
nir_fsqrt(nb, nir_ffma_weak_imm2(nb, src[0], src[0], -1.0f))));
break;
case GLSLstd450Atanh: {
dest->def =

View file

@ -214,7 +214,7 @@ static inline nir_def *cs_chroma_offset(struct cs_shader *s, nir_def *src, unsig
nir_def *offset = nir_channels(b, s->params[3], 0x3 << 2);
if (flags & COORDS_CHROMA)
return nir_fadd(b, src, offset);
return nir_ffma_imm1(b, offset, -0.5f, src);
return nir_ffma_weak_imm1(b, offset, -0.5f, src);
}
static inline nir_def *cs_clamp(struct cs_shader *s, nir_def *src, unsigned flags)

View file

@ -91,15 +91,15 @@ LowerSinCos::lower(nir_instr *instr)
assert(alu->op == nir_op_fsin || alu->op == nir_op_fcos);
auto fract = nir_ffract(b,
nir_ffma_imm12(b,
nir_ssa_for_alu_src(b, alu, 0),
0.15915494,
0.5));
nir_ffma_weak_imm12(b,
nir_ssa_for_alu_src(b, alu, 0),
0.15915494,
0.5));
auto normalized =
m_gxf_level != R600
? nir_fadd_imm(b, fract, -0.5)
: nir_ffma_imm12(b, fract, 2.0f * M_PI, -M_PI);
: nir_ffma_weak_imm12(b, fract, 2.0f * M_PI, -M_PI);
if (alu->op == nir_op_fsin)
return nir_fsin_normalized_2_pi(b, normalized);

View file

@ -81,9 +81,9 @@ nir_convert_ycbcr_to_rgb(nir_builder *b,
nir_def *expanded_channels =
nir_vec4(b,
nir_ffma_imm12(b, nir_channel(b, raw_channels, 0), range_coeffs[2][0], range_coeffs[2][1]),
nir_ffma_imm12(b, nir_channel(b, raw_channels, 1), range_coeffs[0][0], range_coeffs[0][1]),
nir_ffma_imm12(b, nir_channel(b, raw_channels, 2), range_coeffs[1][0], range_coeffs[1][1]),
nir_ffma_weak_imm12(b, nir_channel(b, raw_channels, 0), range_coeffs[2][0], range_coeffs[2][1]),
nir_ffma_weak_imm12(b, nir_channel(b, raw_channels, 1), range_coeffs[0][0], range_coeffs[0][1]),
nir_ffma_weak_imm12(b, nir_channel(b, raw_channels, 2), range_coeffs[1][0], range_coeffs[1][1]),
nir_channel(b, raw_channels, 3));
if (model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)