diff --git a/spa/plugins/audioconvert/fmt-ops-avx2.c b/spa/plugins/audioconvert/fmt-ops-avx2.c index d2227305a..5a9303f21 100644 --- a/spa/plugins/audioconvert/fmt-ops-avx2.c +++ b/spa/plugins/audioconvert/fmt-ops-avx2.c @@ -559,7 +559,7 @@ conv_f32d_to_s32_1s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R for(n = 0; n < unrolled; n += 4) { in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), scale); in[0] = _MM_CLAMP_PS(in[0], int_min, int_max); - out[0] = _mm_cvttps_epi32(in[0]); + out[0] = _mm_cvtps_epi32(in[0]); out[0] = _mm_slli_epi32(out[0], 8); out[1] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(0, 3, 2, 1)); out[2] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(1, 0, 3, 2)); @@ -606,8 +606,8 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _MM256_CLAMP_PS(in[0], int_min, int_max); in[1] = _MM256_CLAMP_PS(in[1], int_min, int_max); - out[0] = _mm256_cvttps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ - out[1] = _mm256_cvttps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ + out[0] = _mm256_cvtps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ + out[1] = _mm256_cvtps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ out[0] = _mm256_slli_epi32(out[0], 8); out[1] = _mm256_slli_epi32(out[1], 8); @@ -649,7 +649,7 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _mm_mul_ps(in[0], scale); in[0] = _MM_CLAMP_PS(in[0], int_min, int_max); - out[0] = _mm_cvttps_epi32(in[0]); + out[0] = _mm_cvtps_epi32(in[0]); out[0] = _mm_slli_epi32(out[0], 8); _mm_storel_epi64((__m128i*)d, out[0]); d += n_channels; @@ -688,10 +688,10 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[2] = _MM256_CLAMP_PS(in[2], int_min, int_max); in[3] = _MM256_CLAMP_PS(in[3], int_min, int_max); - out[0] = _mm256_cvttps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ - out[1] = _mm256_cvttps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ - out[2] = _mm256_cvttps_epi32(in[2]); /* c0 c1 c2 c3 c4 c5 c6 c7 */ - out[3] = _mm256_cvttps_epi32(in[3]); /* d0 d1 d2 d3 d4 d5 d6 d7 */ + out[0] = _mm256_cvtps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ + out[1] = _mm256_cvtps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ + out[2] = _mm256_cvtps_epi32(in[2]); /* c0 c1 c2 c3 c4 c5 c6 c7 */ + out[3] = _mm256_cvtps_epi32(in[3]); /* d0 d1 d2 d3 d4 d5 d6 d7 */ out[0] = _mm256_slli_epi32(out[0], 8); out[1] = _mm256_slli_epi32(out[1], 8); out[2] = _mm256_slli_epi32(out[2], 8); @@ -735,7 +735,7 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _mm_mul_ps(in[0], scale); in[0] = _MM_CLAMP_PS(in[0], int_min, int_max); - out[0] = _mm_cvttps_epi32(in[0]); + out[0] = _mm_cvtps_epi32(in[0]); out[0] = _mm_slli_epi32(out[0], 8); _mm_storeu_si128((__m128i*)d, out[0]); d += n_channels; @@ -778,8 +778,8 @@ conv_f32d_to_s16_1s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R for(n = 0; n < unrolled; n += 8) { in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), int_scale); in[1] = _mm_mul_ps(_mm_load_ps(&s0[n+4]), int_scale); - out[0] = _mm_cvttps_epi32(in[0]); - out[1] = _mm_cvttps_epi32(in[1]); + out[0] = _mm_cvtps_epi32(in[0]); + out[1] = _mm_cvtps_epi32(in[1]); out[0] = _mm_packs_epi32(out[0], out[1]); d[0*n_channels] = _mm_extract_epi16(out[0], 0); @@ -821,8 +821,8 @@ conv_f32d_to_s16_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _mm256_mul_ps(_mm256_load_ps(&s0[n+0]), int_scale); in[1] = _mm256_mul_ps(_mm256_load_ps(&s1[n+0]), int_scale); - out[0] = _mm256_cvttps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ - out[1] = _mm256_cvttps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ + out[0] = _mm256_cvtps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ + out[1] = _mm256_cvtps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ t[0] = _mm256_unpacklo_epi32(out[0], out[1]); /* a0 b0 a1 b1 a4 b4 a5 b5 */ t[1] = _mm256_unpackhi_epi32(out[0], out[1]); /* a2 b2 a3 b3 a6 b6 a7 b7 */ @@ -881,10 +881,10 @@ conv_f32d_to_s16_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[2] = _mm256_mul_ps(_mm256_load_ps(&s2[n]), int_scale); in[3] = _mm256_mul_ps(_mm256_load_ps(&s3[n]), int_scale); - t[0] = _mm256_cvttps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ - t[1] = _mm256_cvttps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ - t[2] = _mm256_cvttps_epi32(in[2]); /* c0 c1 c2 c3 c4 c5 c6 c7 */ - t[3] = _mm256_cvttps_epi32(in[3]); /* d0 d1 d2 d3 d4 d5 d6 d7 */ + t[0] = _mm256_cvtps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ + t[1] = _mm256_cvtps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ + t[2] = _mm256_cvtps_epi32(in[2]); /* c0 c1 c2 c3 c4 c5 c6 c7 */ + t[3] = _mm256_cvtps_epi32(in[3]); /* d0 d1 d2 d3 d4 d5 d6 d7 */ t[0] = _mm256_packs_epi32(t[0], t[2]); /* a0 a1 a2 a3 c0 c1 c2 c3 a4 a5 a6 a7 c4 c5 c6 c7 */ t[1] = _mm256_packs_epi32(t[1], t[3]); /* b0 b1 b2 b3 d0 d1 d2 d3 b4 b5 b6 b7 d4 d5 d6 d7 */ @@ -979,10 +979,10 @@ conv_f32d_to_s16_4_avx2(struct convert *conv, void * SPA_RESTRICT dst[], const v in[2] = _mm256_mul_ps(_mm256_load_ps(&s2[n]), int_scale); in[3] = _mm256_mul_ps(_mm256_load_ps(&s3[n]), int_scale); - t[0] = _mm256_cvttps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ - t[1] = _mm256_cvttps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ - t[2] = _mm256_cvttps_epi32(in[2]); /* c0 c1 c2 c3 c4 c5 c6 c7 */ - t[3] = _mm256_cvttps_epi32(in[3]); /* d0 d1 d2 d3 d4 d5 d6 d7 */ + t[0] = _mm256_cvtps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ + t[1] = _mm256_cvtps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ + t[2] = _mm256_cvtps_epi32(in[2]); /* c0 c1 c2 c3 c4 c5 c6 c7 */ + t[3] = _mm256_cvtps_epi32(in[3]); /* d0 d1 d2 d3 d4 d5 d6 d7 */ t[0] = _mm256_packs_epi32(t[0], t[2]); /* a0 a1 a2 a3 c0 c1 c2 c3 a4 a5 a6 a7 c4 c5 c6 c7 */ t[1] = _mm256_packs_epi32(t[1], t[3]); /* b0 b1 b2 b3 d0 d1 d2 d3 b4 b5 b6 b7 d4 d5 d6 d7 */ @@ -1044,10 +1044,10 @@ conv_f32d_to_s16_2_avx2(struct convert *conv, void * SPA_RESTRICT dst[], const v in[2] = _mm256_mul_ps(_mm256_load_ps(&s0[n+8]), int_scale); in[3] = _mm256_mul_ps(_mm256_load_ps(&s1[n+8]), int_scale); - out[0] = _mm256_cvttps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ - out[1] = _mm256_cvttps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ - out[2] = _mm256_cvttps_epi32(in[2]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ - out[3] = _mm256_cvttps_epi32(in[3]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ + out[0] = _mm256_cvtps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ + out[1] = _mm256_cvtps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ + out[2] = _mm256_cvtps_epi32(in[2]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ + out[3] = _mm256_cvtps_epi32(in[3]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ t[0] = _mm256_unpacklo_epi32(out[0], out[1]); /* a0 b0 a1 b1 a4 b4 a5 b5 */ t[1] = _mm256_unpackhi_epi32(out[0], out[1]); /* a2 b2 a3 b3 a6 b6 a7 b7 */ diff --git a/spa/plugins/audioconvert/fmt-ops-sse2.c b/spa/plugins/audioconvert/fmt-ops-sse2.c index 7c713daa9..672aeeb01 100644 --- a/spa/plugins/audioconvert/fmt-ops-sse2.c +++ b/spa/plugins/audioconvert/fmt-ops-sse2.c @@ -402,7 +402,7 @@ conv_f32d_to_s32_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R for(n = 0; n < unrolled; n += 4) { in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), scale); in[0] = _MM_CLAMP_PS(in[0], int_min, int_max); - out[0] = _mm_cvttps_epi32(in[0]); + out[0] = _mm_cvtps_epi32(in[0]); out[0] = _mm_slli_epi32(out[0], 8); out[1] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(0, 3, 2, 1)); out[2] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(1, 0, 3, 2)); @@ -449,8 +449,8 @@ conv_f32d_to_s32_2s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _MM_CLAMP_PS(in[0], int_min, int_max); in[1] = _MM_CLAMP_PS(in[1], int_min, int_max); - out[0] = _mm_cvttps_epi32(in[0]); - out[1] = _mm_cvttps_epi32(in[1]); + out[0] = _mm_cvtps_epi32(in[0]); + out[1] = _mm_cvtps_epi32(in[1]); out[0] = _mm_slli_epi32(out[0], 8); out[1] = _mm_slli_epi32(out[1], 8); @@ -471,7 +471,7 @@ conv_f32d_to_s32_2s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _mm_mul_ps(in[0], scale); in[0] = _MM_CLAMP_PS(in[0], int_min, int_max); - out[0] = _mm_cvttps_epi32(in[0]); + out[0] = _mm_cvtps_epi32(in[0]); out[0] = _mm_slli_epi32(out[0], 8); _mm_storel_epi64((__m128i*)d, out[0]); d += n_channels; @@ -512,10 +512,10 @@ conv_f32d_to_s32_4s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R _MM_TRANSPOSE4_PS(in[0], in[1], in[2], in[3]); - out[0] = _mm_cvttps_epi32(in[0]); - out[1] = _mm_cvttps_epi32(in[1]); - out[2] = _mm_cvttps_epi32(in[2]); - out[3] = _mm_cvttps_epi32(in[3]); + out[0] = _mm_cvtps_epi32(in[0]); + out[1] = _mm_cvtps_epi32(in[1]); + out[2] = _mm_cvtps_epi32(in[2]); + out[3] = _mm_cvtps_epi32(in[3]); out[0] = _mm_slli_epi32(out[0], 8); out[1] = _mm_slli_epi32(out[1], 8); out[2] = _mm_slli_epi32(out[2], 8); @@ -539,7 +539,7 @@ conv_f32d_to_s32_4s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _mm_mul_ps(in[0], scale); in[0] = _MM_CLAMP_PS(in[0], int_min, int_max); - out[0] = _mm_cvttps_epi32(in[0]); + out[0] = _mm_cvtps_epi32(in[0]); out[0] = _mm_slli_epi32(out[0], 8); _mm_storeu_si128((__m128i*)d, out[0]); d += n_channels; @@ -626,7 +626,7 @@ conv_f32d_to_s32_1s_dither_sse2(struct convert *conv, void * SPA_RESTRICT dst, c in[0] = _mm_mul_ps(_mm_load_ps(&s[n]), scale); in[0] = _mm_add_ps(in[0], _mm_load_ps(&dither[n])); in[0] = _MM_CLAMP_PS(in[0], int_min, int_max); - out[0] = _mm_cvttps_epi32(in[0]); + out[0] = _mm_cvtps_epi32(in[0]); out[0] = _mm_slli_epi32(out[0], 8); out[1] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(0, 3, 2, 1)); out[2] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(1, 0, 3, 2)); @@ -1035,8 +1035,8 @@ conv_f32_to_s16_1_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RES for(n = 0; n < unrolled; n += 8) { in[0] = _mm_mul_ps(_mm_load_ps(&s[n]), int_scale); in[1] = _mm_mul_ps(_mm_load_ps(&s[n+4]), int_scale); - out[0] = _mm_cvttps_epi32(in[0]); - out[1] = _mm_cvttps_epi32(in[1]); + out[0] = _mm_cvtps_epi32(in[0]); + out[1] = _mm_cvtps_epi32(in[1]); out[0] = _mm_packs_epi32(out[0], out[1]); _mm_storeu_si128((__m128i*)(d+0), out[0]); d += 8; @@ -1085,8 +1085,8 @@ conv_f32d_to_s16_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R for(n = 0; n < unrolled; n += 8) { in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), int_scale); in[1] = _mm_mul_ps(_mm_load_ps(&s0[n+4]), int_scale); - out[0] = _mm_cvttps_epi32(in[0]); - out[1] = _mm_cvttps_epi32(in[1]); + out[0] = _mm_cvtps_epi32(in[0]); + out[1] = _mm_cvtps_epi32(in[1]); out[0] = _mm_packs_epi32(out[0], out[1]); d[0*n_channels] = _mm_extract_epi16(out[0], 0); @@ -1102,7 +1102,7 @@ conv_f32d_to_s16_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R for(; n < n_samples; n++) { in[0] = _mm_mul_ss(_mm_load_ss(&s0[n]), int_scale); in[0] = _MM_CLAMP_SS(in[0], int_min, int_max); - *d = _mm_cvttss_si32(in[0]); + *d = _mm_cvtss_si32(in[0]); d += n_channels; } } @@ -1130,8 +1130,8 @@ conv_f32d_to_s16_2s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), int_scale); in[1] = _mm_mul_ps(_mm_load_ps(&s1[n]), int_scale); - t[0] = _mm_cvttps_epi32(in[0]); - t[1] = _mm_cvttps_epi32(in[1]); + t[0] = _mm_cvtps_epi32(in[0]); + t[1] = _mm_cvtps_epi32(in[1]); t[0] = _mm_packs_epi32(t[0], t[0]); t[1] = _mm_packs_epi32(t[1], t[1]); @@ -1185,10 +1185,10 @@ conv_f32d_to_s16_4s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[2] = _mm_mul_ps(_mm_load_ps(&s2[n]), int_scale); in[3] = _mm_mul_ps(_mm_load_ps(&s3[n]), int_scale); - t[0] = _mm_cvttps_epi32(in[0]); - t[1] = _mm_cvttps_epi32(in[1]); - t[2] = _mm_cvttps_epi32(in[2]); - t[3] = _mm_cvttps_epi32(in[3]); + t[0] = _mm_cvtps_epi32(in[0]); + t[1] = _mm_cvtps_epi32(in[1]); + t[2] = _mm_cvtps_epi32(in[2]); + t[3] = _mm_cvtps_epi32(in[3]); t[0] = _mm_packs_epi32(t[0], t[2]); t[1] = _mm_packs_epi32(t[1], t[3]); @@ -1261,8 +1261,8 @@ conv_f32d_to_s16_1s_dither_sse2(struct convert *conv, void * SPA_RESTRICT dst, c in[1] = _mm_mul_ps(_mm_load_ps(&s0[n+4]), int_scale); in[0] = _mm_add_ps(in[0], _mm_load_ps(&dither[n])); in[1] = _mm_add_ps(in[1], _mm_load_ps(&dither[n+4])); - out[0] = _mm_cvttps_epi32(in[0]); - out[1] = _mm_cvttps_epi32(in[1]); + out[0] = _mm_cvtps_epi32(in[0]); + out[1] = _mm_cvtps_epi32(in[1]); out[0] = _mm_packs_epi32(out[0], out[1]); d[0*n_channels] = _mm_extract_epi16(out[0], 0); @@ -1279,7 +1279,7 @@ conv_f32d_to_s16_1s_dither_sse2(struct convert *conv, void * SPA_RESTRICT dst, c in[0] = _mm_mul_ss(_mm_load_ss(&s0[n]), int_scale); in[0] = _mm_add_ss(in[0], _mm_load_ss(&dither[n])); in[0] = _MM_CLAMP_SS(in[0], int_min, int_max); - *d = _mm_cvttss_si32(in[0]); + *d = _mm_cvtss_si32(in[0]); d += n_channels; } } @@ -1326,8 +1326,8 @@ conv_f32_to_s16_1_dither_sse2(struct convert *conv, void * SPA_RESTRICT dst, con in[1] = _mm_mul_ps(_mm_load_ps(&s[n+4]), int_scale); in[0] = _mm_add_ps(in[0], _mm_load_ps(&dither[n])); in[1] = _mm_add_ps(in[1], _mm_load_ps(&dither[n+4])); - out[0] = _mm_cvttps_epi32(in[0]); - out[1] = _mm_cvttps_epi32(in[1]); + out[0] = _mm_cvtps_epi32(in[0]); + out[1] = _mm_cvtps_epi32(in[1]); out[0] = _mm_packs_epi32(out[0], out[1]); _mm_storeu_si128((__m128i*)(&d[n]), out[0]); } @@ -1382,10 +1382,10 @@ conv_f32d_to_s16_2_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const v in[2] = _mm_mul_ps(_mm_load_ps(&s0[n+4]), int_scale); in[3] = _mm_mul_ps(_mm_load_ps(&s1[n+4]), int_scale); - out[0] = _mm_cvttps_epi32(in[0]); - out[1] = _mm_cvttps_epi32(in[1]); - out[2] = _mm_cvttps_epi32(in[2]); - out[3] = _mm_cvttps_epi32(in[3]); + out[0] = _mm_cvtps_epi32(in[0]); + out[1] = _mm_cvtps_epi32(in[1]); + out[2] = _mm_cvtps_epi32(in[2]); + out[3] = _mm_cvtps_epi32(in[3]); out[0] = _mm_packs_epi32(out[0], out[2]); out[1] = _mm_packs_epi32(out[1], out[3]); diff --git a/spa/plugins/audioconvert/fmt-ops.h b/spa/plugins/audioconvert/fmt-ops.h index 4aec16a39..ae42a11a9 100644 --- a/spa/plugins/audioconvert/fmt-ops.h +++ b/spa/plugins/audioconvert/fmt-ops.h @@ -35,12 +35,12 @@ #include #include -#define ROUND(v) (v) +#define f32_round(a) lrintf(a) #define ITOF(type,v,scale,offs) \ (((type)(v)) * (1.0f / (scale)) - (offs)) #define FTOI(type,v,scale,offs,dither,min,max) \ - (type)ROUND(SPA_CLAMP((v) * (scale) + (offs) + (dither), min, max)) + (type)f32_round(SPA_CLAMP((v) * (scale) + (offs) + (dither), min, max)) #define FMT_OPS_MAX_ALIGN 32 diff --git a/spa/plugins/audioconvert/test-fmt-ops.c b/spa/plugins/audioconvert/test-fmt-ops.c index 1880e16a1..75bfb4453 100644 --- a/spa/plugins/audioconvert/test-fmt-ops.c +++ b/spa/plugins/audioconvert/test-fmt-ops.c @@ -652,7 +652,7 @@ static void run_test_noise(uint32_t fmt, uint32_t noise, uint32_t flags) } convert_process(&conv, op, ip, N_SAMPLES); - range = (1 << conv.noise) - 1; + range = 1 << conv.noise; all_zero = true; for (i = 0; i < conv.n_channels * N_SAMPLES; i++) {