diff --git a/src/amd/common/nir/ac_nir_lower_tex_coords.c b/src/amd/common/nir/ac_nir_lower_tex_coords.c index 06d677baece..ed515bc9bbb 100644 --- a/src/amd/common/nir/ac_nir_lower_tex_coords.c +++ b/src/amd/common/nir/ac_nir_lower_tex_coords.c @@ -152,12 +152,12 @@ prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_def **coord, nir_src sc = nir_fadd_imm(b, sc, 1.5); tc = nir_fadd_imm(b, tc, 1.5); } else { - sc = nir_ffma_imm2(b, sc, invma, 1.5); - tc = nir_ffma_imm2(b, tc, invma, 1.5); + sc = nir_ffma_weak_imm2(b, sc, invma, 1.5); + tc = nir_ffma_weak_imm2(b, tc, invma, 1.5); } if (tex->is_array && coords[3]) - id = nir_ffma_imm1(b, coords[3], 8.0, id); + id = nir_ffma_weak_imm1(b, coords[3], 8.0, id); *coord = nir_vec3(b, sc, tc, id); diff --git a/src/amd/compiler/tests/test_d3d11_derivs.cpp b/src/amd/compiler/tests/test_d3d11_derivs.cpp index 08feac4fdc8..57276e9e4d8 100644 --- a/src/amd/compiler/tests/test_d3d11_derivs.cpp +++ b/src/amd/compiler/tests/test_d3d11_derivs.cpp @@ -413,8 +413,8 @@ BEGIN_TEST(d3d11_derivs.cube) pbld.add_vsfs(vs, fs); //>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_ - //>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000 - //>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000 + //>> v1: %y = v_fmaak_f32 (kill)%_, %_, 0x3fc00000 + //>> v1: %x = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000 //>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face //>> BB1 //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da @@ -425,8 +425,8 @@ BEGIN_TEST(d3d11_derivs.cube) //>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_ //>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_ - //>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ //>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ + //>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ //>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_ //; success = rx+1 == ry and rx+2 == rf //>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_ @@ -458,10 +458,10 @@ BEGIN_TEST(d3d11_derivs.cube_array) pbld.add_vsfs(vs, fs); //>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_ - //>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000 - //>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000 //>> v1: %layer = v_rndne_f32 (kill)%_ //>> v1: %face_layer = v_fmamk_f32 (kill)%layer, (kill)%face, 0x41000000 + //>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000 + //>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000 //>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face_layer //>> BB1 //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da @@ -655,8 +655,8 @@ BEGIN_TEST(d3d11_derivs.cube_txd_to_tex) pbld.add_vsfs(vs, fs); //>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_ - //>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000 - //>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000 + //>> v1: %y = v_fmaak_f32 (kill)%_, %_, 0x3fc00000 + //>> v1: %x = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000 //>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face //>> BB1 //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da @@ -667,8 +667,8 @@ BEGIN_TEST(d3d11_derivs.cube_txd_to_tex) //>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_ //>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_ - //>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ //>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ + //>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ //>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_ //; success = rx+1 == ry and rx+2 == rf //>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_ diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 6e824a94b79..a58b98dd7d0 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -1361,33 +1361,33 @@ nir_uclamp(nir_builder *b, } static inline nir_def * -nir_ffma_imm12(nir_builder *build, nir_def *src0, double src1, double src2) +nir_ffma_weak_imm12(nir_builder *build, nir_def *src0, double src1, double src2) { if (build->shader->options && build->shader->options->avoid_ternary_with_two_constants) return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2); else - return nir_ffma_old(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), - nir_imm_floatN_t(build, src2, src0->bit_size)); + return nir_ffma_weak(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), + nir_imm_floatN_t(build, src2, src0->bit_size)); } static inline nir_def * -nir_ffma_imm1(nir_builder *build, nir_def *src0, double src1, nir_def *src2) +nir_ffma_weak_imm1(nir_builder *build, nir_def *src0, double src1, nir_def *src2) { - return nir_ffma_old(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2); + return nir_ffma_weak(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2); } static inline nir_def * -nir_ffma_imm2(nir_builder *build, nir_def *src0, nir_def *src1, double src2) +nir_ffma_weak_imm2(nir_builder *build, nir_def *src0, nir_def *src1, double src2) { - return nir_ffma_old(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size)); + return nir_ffma_weak(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size)); } static inline nir_def * nir_a_minus_bc(nir_builder *build, nir_def *src0, nir_def *src1, nir_def *src2) { - return nir_ffma_old(build, nir_fneg(build, src1), src2, src0); + return nir_ffma_weak(build, nir_fneg(build, src1), src2, src0); } static inline nir_def * diff --git a/src/compiler/nir/nir_builtin_builder.c b/src/compiler/nir/nir_builtin_builder.c index 0df18ae4c1d..ae055121afc 100644 --- a/src/compiler/nir/nir_builtin_builder.c +++ b/src/compiler/nir/nir_builtin_builder.c @@ -199,12 +199,12 @@ build_asin(nir_builder *b, nir_def *x, bool piecewise) } nir_def *abs_x = nir_fabs(b, x); - nir_def *p0_plus_xp1 = nir_ffma_imm12(b, abs_x, -0.0187293, 0.0742610); + nir_def *p0_plus_xp1 = nir_ffma_weak_imm12(b, abs_x, -0.0187293, 0.0742610); nir_def *expr_tail = - nir_ffma_imm2(b, abs_x, - nir_ffma_imm2(b, abs_x, p0_plus_xp1, -0.2121144), - 1.5707288); + nir_ffma_weak_imm2(b, abs_x, + nir_ffma_weak_imm2(b, abs_x, p0_plus_xp1, -0.2121144), + 1.5707288); nir_def *result0 = nir_fmul(b, nir_fsign(b, x), nir_a_minus_bc(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size), @@ -217,7 +217,7 @@ build_asin(nir_builder *b, nir_def *x, bool piecewise) nir_def *x2 = nir_fmul(b, x, x); nir_def *result1 = nir_fmul(b, x, - nir_ffma_imm12(b, x2, (1.0/6.0), 1.0)); + nir_ffma_weak_imm12(b, x2, (1.0/6.0), 1.0)); return nir_bcsel(b, nir_flt_imm(b, abs_x, 0.21502245), result1, @@ -277,7 +277,7 @@ nir_atan(nir_builder *b, nir_def *y_over_x) nir_def *res = nir_imm_floatN_t(b, coeffs[0], bit_size); for (unsigned i = 1; i < ARRAY_SIZE(coeffs); ++i) { - res = nir_ffma_imm2(b, res, x_2, coeffs[i]); + res = nir_ffma_weak_imm2(b, res, x_2, coeffs[i]); } /* range-reduction fixup value */ @@ -359,7 +359,7 @@ nir_atan2(nir_builder *b, nir_def *y, nir_def *x) * coordinate system. */ nir_def *arc = - nir_ffma_imm1(b, nir_b2fN(b, flip, bit_size), M_PI_2, nir_atan(b, tan)); + nir_ffma_weak_imm1(b, nir_b2fN(b, flip, bit_size), M_PI_2, nir_atan(b, tan)); /* Rather convoluted calculation of the sign of the result. When x < 0 we * cannot use fsign because we need to be able to distinguish between diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c index 19954900c03..115d59cd65c 100644 --- a/src/compiler/nir/nir_opt_varyings.c +++ b/src/compiler/nir/nir_opt_varyings.c @@ -1075,7 +1075,7 @@ static nir_def * build_convert_inf_to_nan(nir_builder *b, nir_def *x) { /* Do x*0 + x. The multiplication by 0 can't be optimized out. */ - nir_def *fma = nir_ffma_imm1(b, x, 0, x); + nir_def *fma = nir_ffma_weak_imm1(b, x, 0, x); nir_def_as_alu(fma)->fp_math_ctrl = nir_fp_preserve_nan | nir_fp_preserve_inf | nir_fp_exact; return fma; } diff --git a/src/compiler/spirv/vtn_amd.c b/src/compiler/spirv/vtn_amd.c index 88087702d3d..3adfaf838ae 100644 --- a/src/compiler/spirv/vtn_amd.c +++ b/src/compiler/spirv/vtn_amd.c @@ -20,7 +20,7 @@ vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, SpvOp ext_opcode, def = nir_cube_amd(&b->nb, vtn_get_nir_ssa(b, w[5])); nir_def *st = nir_swizzle(&b->nb, def, (unsigned[]){1, 0}, 2); nir_def *invma = nir_frcp(&b->nb, nir_channel(&b->nb, def, 2)); - def = nir_ffma_imm2(&b->nb, st, invma, 0.5); + def = nir_ffma_weak_imm2(&b->nb, st, invma, 0.5); break; } case TimeAMD: { diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index 02a1c9c1620..cd530571edb 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -503,11 +503,11 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Asinh: dest->def = nir_fmul(nb, nir_fsign(nb, src[0]), nir_flog(nb, nir_fadd(nb, nir_fabs(nb, src[0]), - nir_fsqrt(nb, nir_ffma_imm2(nb, src[0], src[0], 1.0f))))); + nir_fsqrt(nb, nir_ffma_weak_imm2(nb, src[0], src[0], 1.0f))))); break; case GLSLstd450Acosh: dest->def = nir_flog(nb, nir_fadd(nb, src[0], - nir_fsqrt(nb, nir_ffma_imm2(nb, src[0], src[0], -1.0f)))); + nir_fsqrt(nb, nir_ffma_weak_imm2(nb, src[0], src[0], -1.0f)))); break; case GLSLstd450Atanh: { dest->def = diff --git a/src/gallium/auxiliary/vl/vl_compositor_cs.c b/src/gallium/auxiliary/vl/vl_compositor_cs.c index 2fc191ee8ae..94e99a27ca1 100644 --- a/src/gallium/auxiliary/vl/vl_compositor_cs.c +++ b/src/gallium/auxiliary/vl/vl_compositor_cs.c @@ -214,7 +214,7 @@ static inline nir_def *cs_chroma_offset(struct cs_shader *s, nir_def *src, unsig nir_def *offset = nir_channels(b, s->params[3], 0x3 << 2); if (flags & COORDS_CHROMA) return nir_fadd(b, src, offset); - return nir_ffma_imm1(b, offset, -0.5f, src); + return nir_ffma_weak_imm1(b, offset, -0.5f, src); } static inline nir_def *cs_clamp(struct cs_shader *s, nir_def *src, unsigned flags) diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_alu.cpp index b10d1ed429f..ebd45b9ffd9 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_alu.cpp @@ -91,15 +91,15 @@ LowerSinCos::lower(nir_instr *instr) assert(alu->op == nir_op_fsin || alu->op == nir_op_fcos); auto fract = nir_ffract(b, - nir_ffma_imm12(b, - nir_ssa_for_alu_src(b, alu, 0), - 0.15915494, - 0.5)); + nir_ffma_weak_imm12(b, + nir_ssa_for_alu_src(b, alu, 0), + 0.15915494, + 0.5)); auto normalized = m_gxf_level != R600 ? nir_fadd_imm(b, fract, -0.5) - : nir_ffma_imm12(b, fract, 2.0f * M_PI, -M_PI); + : nir_ffma_weak_imm12(b, fract, 2.0f * M_PI, -M_PI); if (alu->op == nir_op_fsin) return nir_fsin_normalized_2_pi(b, normalized); diff --git a/src/vulkan/runtime/vk_nir_convert_ycbcr.c b/src/vulkan/runtime/vk_nir_convert_ycbcr.c index 7dec0f76c5c..717bdc44a51 100644 --- a/src/vulkan/runtime/vk_nir_convert_ycbcr.c +++ b/src/vulkan/runtime/vk_nir_convert_ycbcr.c @@ -81,9 +81,9 @@ nir_convert_ycbcr_to_rgb(nir_builder *b, nir_def *expanded_channels = nir_vec4(b, - nir_ffma_imm12(b, nir_channel(b, raw_channels, 0), range_coeffs[2][0], range_coeffs[2][1]), - nir_ffma_imm12(b, nir_channel(b, raw_channels, 1), range_coeffs[0][0], range_coeffs[0][1]), - nir_ffma_imm12(b, nir_channel(b, raw_channels, 2), range_coeffs[1][0], range_coeffs[1][1]), + nir_ffma_weak_imm12(b, nir_channel(b, raw_channels, 0), range_coeffs[2][0], range_coeffs[2][1]), + nir_ffma_weak_imm12(b, nir_channel(b, raw_channels, 1), range_coeffs[0][0], range_coeffs[0][1]), + nir_ffma_weak_imm12(b, nir_channel(b, raw_channels, 2), range_coeffs[1][0], range_coeffs[1][1]), nir_channel(b, raw_channels, 3)); if (model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)