mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-24 01:58:16 +02:00
nir: update ffma helpers to use new opcodes
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41165>
This commit is contained in:
parent
aeea2e7c1f
commit
109d93dd98
10 changed files with 39 additions and 39 deletions
|
|
@ -152,12 +152,12 @@ prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_def **coord, nir_src
|
|||
sc = nir_fadd_imm(b, sc, 1.5);
|
||||
tc = nir_fadd_imm(b, tc, 1.5);
|
||||
} else {
|
||||
sc = nir_ffma_imm2(b, sc, invma, 1.5);
|
||||
tc = nir_ffma_imm2(b, tc, invma, 1.5);
|
||||
sc = nir_ffma_weak_imm2(b, sc, invma, 1.5);
|
||||
tc = nir_ffma_weak_imm2(b, tc, invma, 1.5);
|
||||
}
|
||||
|
||||
if (tex->is_array && coords[3])
|
||||
id = nir_ffma_imm1(b, coords[3], 8.0, id);
|
||||
id = nir_ffma_weak_imm1(b, coords[3], 8.0, id);
|
||||
|
||||
*coord = nir_vec3(b, sc, tc, id);
|
||||
|
||||
|
|
|
|||
|
|
@ -413,8 +413,8 @@ BEGIN_TEST(d3d11_derivs.cube)
|
|||
pbld.add_vsfs(vs, fs);
|
||||
|
||||
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
|
||||
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
|
||||
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
|
||||
//>> v1: %y = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
|
||||
//>> v1: %x = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
|
||||
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face
|
||||
//>> BB1
|
||||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da
|
||||
|
|
@ -425,8 +425,8 @@ BEGIN_TEST(d3d11_derivs.cube)
|
|||
|
||||
//>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_
|
||||
//>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_
|
||||
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
||||
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
||||
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
||||
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
|
||||
//; success = rx+1 == ry and rx+2 == rf
|
||||
//>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
|
||||
|
|
@ -458,10 +458,10 @@ BEGIN_TEST(d3d11_derivs.cube_array)
|
|||
pbld.add_vsfs(vs, fs);
|
||||
|
||||
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
|
||||
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
|
||||
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
|
||||
//>> v1: %layer = v_rndne_f32 (kill)%_
|
||||
//>> v1: %face_layer = v_fmamk_f32 (kill)%layer, (kill)%face, 0x41000000
|
||||
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
|
||||
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
|
||||
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face_layer
|
||||
//>> BB1
|
||||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da
|
||||
|
|
@ -655,8 +655,8 @@ BEGIN_TEST(d3d11_derivs.cube_txd_to_tex)
|
|||
pbld.add_vsfs(vs, fs);
|
||||
|
||||
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
|
||||
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
|
||||
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
|
||||
//>> v1: %y = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
|
||||
//>> v1: %x = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
|
||||
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face
|
||||
//>> BB1
|
||||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da
|
||||
|
|
@ -667,8 +667,8 @@ BEGIN_TEST(d3d11_derivs.cube_txd_to_tex)
|
|||
|
||||
//>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_
|
||||
//>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_
|
||||
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
||||
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
||||
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
||||
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
|
||||
//; success = rx+1 == ry and rx+2 == rf
|
||||
//>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
|
||||
|
|
|
|||
|
|
@ -1361,33 +1361,33 @@ nir_uclamp(nir_builder *b,
|
|||
}
|
||||
|
||||
static inline nir_def *
|
||||
nir_ffma_imm12(nir_builder *build, nir_def *src0, double src1, double src2)
|
||||
nir_ffma_weak_imm12(nir_builder *build, nir_def *src0, double src1, double src2)
|
||||
{
|
||||
if (build->shader->options &&
|
||||
build->shader->options->avoid_ternary_with_two_constants)
|
||||
return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2);
|
||||
else
|
||||
return nir_ffma_old(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
|
||||
nir_imm_floatN_t(build, src2, src0->bit_size));
|
||||
return nir_ffma_weak(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
|
||||
nir_imm_floatN_t(build, src2, src0->bit_size));
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
nir_ffma_imm1(nir_builder *build, nir_def *src0, double src1, nir_def *src2)
|
||||
nir_ffma_weak_imm1(nir_builder *build, nir_def *src0, double src1, nir_def *src2)
|
||||
{
|
||||
return nir_ffma_old(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
|
||||
return nir_ffma_weak(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
nir_ffma_imm2(nir_builder *build, nir_def *src0, nir_def *src1, double src2)
|
||||
nir_ffma_weak_imm2(nir_builder *build, nir_def *src0, nir_def *src1, double src2)
|
||||
{
|
||||
return nir_ffma_old(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
|
||||
return nir_ffma_weak(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
nir_a_minus_bc(nir_builder *build, nir_def *src0, nir_def *src1,
|
||||
nir_def *src2)
|
||||
{
|
||||
return nir_ffma_old(build, nir_fneg(build, src1), src2, src0);
|
||||
return nir_ffma_weak(build, nir_fneg(build, src1), src2, src0);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
|
|
|
|||
|
|
@ -199,12 +199,12 @@ build_asin(nir_builder *b, nir_def *x, bool piecewise)
|
|||
}
|
||||
nir_def *abs_x = nir_fabs(b, x);
|
||||
|
||||
nir_def *p0_plus_xp1 = nir_ffma_imm12(b, abs_x, -0.0187293, 0.0742610);
|
||||
nir_def *p0_plus_xp1 = nir_ffma_weak_imm12(b, abs_x, -0.0187293, 0.0742610);
|
||||
|
||||
nir_def *expr_tail =
|
||||
nir_ffma_imm2(b, abs_x,
|
||||
nir_ffma_imm2(b, abs_x, p0_plus_xp1, -0.2121144),
|
||||
1.5707288);
|
||||
nir_ffma_weak_imm2(b, abs_x,
|
||||
nir_ffma_weak_imm2(b, abs_x, p0_plus_xp1, -0.2121144),
|
||||
1.5707288);
|
||||
|
||||
nir_def *result0 = nir_fmul(b, nir_fsign(b, x),
|
||||
nir_a_minus_bc(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size),
|
||||
|
|
@ -217,7 +217,7 @@ build_asin(nir_builder *b, nir_def *x, bool piecewise)
|
|||
nir_def *x2 = nir_fmul(b, x, x);
|
||||
nir_def *result1 = nir_fmul(b,
|
||||
x,
|
||||
nir_ffma_imm12(b, x2, (1.0/6.0), 1.0));
|
||||
nir_ffma_weak_imm12(b, x2, (1.0/6.0), 1.0));
|
||||
return nir_bcsel(b,
|
||||
nir_flt_imm(b, abs_x, 0.21502245),
|
||||
result1,
|
||||
|
|
@ -277,7 +277,7 @@ nir_atan(nir_builder *b, nir_def *y_over_x)
|
|||
nir_def *res = nir_imm_floatN_t(b, coeffs[0], bit_size);
|
||||
|
||||
for (unsigned i = 1; i < ARRAY_SIZE(coeffs); ++i) {
|
||||
res = nir_ffma_imm2(b, res, x_2, coeffs[i]);
|
||||
res = nir_ffma_weak_imm2(b, res, x_2, coeffs[i]);
|
||||
}
|
||||
|
||||
/* range-reduction fixup value */
|
||||
|
|
@ -359,7 +359,7 @@ nir_atan2(nir_builder *b, nir_def *y, nir_def *x)
|
|||
* coordinate system.
|
||||
*/
|
||||
nir_def *arc =
|
||||
nir_ffma_imm1(b, nir_b2fN(b, flip, bit_size), M_PI_2, nir_atan(b, tan));
|
||||
nir_ffma_weak_imm1(b, nir_b2fN(b, flip, bit_size), M_PI_2, nir_atan(b, tan));
|
||||
|
||||
/* Rather convoluted calculation of the sign of the result. When x < 0 we
|
||||
* cannot use fsign because we need to be able to distinguish between
|
||||
|
|
|
|||
|
|
@ -1075,7 +1075,7 @@ static nir_def *
|
|||
build_convert_inf_to_nan(nir_builder *b, nir_def *x)
|
||||
{
|
||||
/* Do x*0 + x. The multiplication by 0 can't be optimized out. */
|
||||
nir_def *fma = nir_ffma_imm1(b, x, 0, x);
|
||||
nir_def *fma = nir_ffma_weak_imm1(b, x, 0, x);
|
||||
nir_def_as_alu(fma)->fp_math_ctrl = nir_fp_preserve_nan | nir_fp_preserve_inf | nir_fp_exact;
|
||||
return fma;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, SpvOp ext_opcode,
|
|||
def = nir_cube_amd(&b->nb, vtn_get_nir_ssa(b, w[5]));
|
||||
nir_def *st = nir_swizzle(&b->nb, def, (unsigned[]){1, 0}, 2);
|
||||
nir_def *invma = nir_frcp(&b->nb, nir_channel(&b->nb, def, 2));
|
||||
def = nir_ffma_imm2(&b->nb, st, invma, 0.5);
|
||||
def = nir_ffma_weak_imm2(&b->nb, st, invma, 0.5);
|
||||
break;
|
||||
}
|
||||
case TimeAMD: {
|
||||
|
|
|
|||
|
|
@ -503,11 +503,11 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
|
|||
case GLSLstd450Asinh:
|
||||
dest->def = nir_fmul(nb, nir_fsign(nb, src[0]),
|
||||
nir_flog(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
|
||||
nir_fsqrt(nb, nir_ffma_imm2(nb, src[0], src[0], 1.0f)))));
|
||||
nir_fsqrt(nb, nir_ffma_weak_imm2(nb, src[0], src[0], 1.0f)))));
|
||||
break;
|
||||
case GLSLstd450Acosh:
|
||||
dest->def = nir_flog(nb, nir_fadd(nb, src[0],
|
||||
nir_fsqrt(nb, nir_ffma_imm2(nb, src[0], src[0], -1.0f))));
|
||||
nir_fsqrt(nb, nir_ffma_weak_imm2(nb, src[0], src[0], -1.0f))));
|
||||
break;
|
||||
case GLSLstd450Atanh: {
|
||||
dest->def =
|
||||
|
|
|
|||
|
|
@ -214,7 +214,7 @@ static inline nir_def *cs_chroma_offset(struct cs_shader *s, nir_def *src, unsig
|
|||
nir_def *offset = nir_channels(b, s->params[3], 0x3 << 2);
|
||||
if (flags & COORDS_CHROMA)
|
||||
return nir_fadd(b, src, offset);
|
||||
return nir_ffma_imm1(b, offset, -0.5f, src);
|
||||
return nir_ffma_weak_imm1(b, offset, -0.5f, src);
|
||||
}
|
||||
|
||||
static inline nir_def *cs_clamp(struct cs_shader *s, nir_def *src, unsigned flags)
|
||||
|
|
|
|||
|
|
@ -91,15 +91,15 @@ LowerSinCos::lower(nir_instr *instr)
|
|||
assert(alu->op == nir_op_fsin || alu->op == nir_op_fcos);
|
||||
|
||||
auto fract = nir_ffract(b,
|
||||
nir_ffma_imm12(b,
|
||||
nir_ssa_for_alu_src(b, alu, 0),
|
||||
0.15915494,
|
||||
0.5));
|
||||
nir_ffma_weak_imm12(b,
|
||||
nir_ssa_for_alu_src(b, alu, 0),
|
||||
0.15915494,
|
||||
0.5));
|
||||
|
||||
auto normalized =
|
||||
m_gxf_level != R600
|
||||
? nir_fadd_imm(b, fract, -0.5)
|
||||
: nir_ffma_imm12(b, fract, 2.0f * M_PI, -M_PI);
|
||||
: nir_ffma_weak_imm12(b, fract, 2.0f * M_PI, -M_PI);
|
||||
|
||||
if (alu->op == nir_op_fsin)
|
||||
return nir_fsin_normalized_2_pi(b, normalized);
|
||||
|
|
|
|||
|
|
@ -81,9 +81,9 @@ nir_convert_ycbcr_to_rgb(nir_builder *b,
|
|||
|
||||
nir_def *expanded_channels =
|
||||
nir_vec4(b,
|
||||
nir_ffma_imm12(b, nir_channel(b, raw_channels, 0), range_coeffs[2][0], range_coeffs[2][1]),
|
||||
nir_ffma_imm12(b, nir_channel(b, raw_channels, 1), range_coeffs[0][0], range_coeffs[0][1]),
|
||||
nir_ffma_imm12(b, nir_channel(b, raw_channels, 2), range_coeffs[1][0], range_coeffs[1][1]),
|
||||
nir_ffma_weak_imm12(b, nir_channel(b, raw_channels, 0), range_coeffs[2][0], range_coeffs[2][1]),
|
||||
nir_ffma_weak_imm12(b, nir_channel(b, raw_channels, 1), range_coeffs[0][0], range_coeffs[0][1]),
|
||||
nir_ffma_weak_imm12(b, nir_channel(b, raw_channels, 2), range_coeffs[1][0], range_coeffs[1][1]),
|
||||
nir_channel(b, raw_channels, 3));
|
||||
|
||||
if (model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue