diff --git a/spa/plugins/audioconvert/audioconvert.c b/spa/plugins/audioconvert/audioconvert.c index 113b7a3b1..390f6b6d6 100644 --- a/spa/plugins/audioconvert/audioconvert.c +++ b/spa/plugins/audioconvert/audioconvert.c @@ -634,6 +634,24 @@ static int impl_node_enum_params(void *object, int seq, SPA_PROP_INFO_type, SPA_POD_CHOICE_RANGE_Int(this->dir[1].conv.noise, 0, 16), SPA_PROP_INFO_params, SPA_POD_Bool(true)); break; + case 23: + spa_pod_builder_push_object(&b, &f[0], SPA_TYPE_OBJECT_PropInfo, id); + spa_pod_builder_add(&b, + SPA_PROP_INFO_name, SPA_POD_String("dither.method"), + SPA_PROP_INFO_description, SPA_POD_String("The dithering method"), + SPA_PROP_INFO_type, SPA_POD_String( + dither_method_info[this->dir[1].conv.method].label), + SPA_PROP_INFO_params, SPA_POD_Bool(true), + 0); + spa_pod_builder_prop(&b, SPA_PROP_INFO_labels, 0); + spa_pod_builder_push_struct(&b, &f[1]); + for (i = 0; i < SPA_N_ELEMENTS(channelmix_upmix_info); i++) { + spa_pod_builder_string(&b, dither_method_info[i].label); + spa_pod_builder_string(&b, dither_method_info[i].description); + } + spa_pod_builder_pop(&b, &f[1]); + param = spa_pod_builder_pop(&b, &f[0]); + break; default: return 0; } @@ -704,6 +722,8 @@ static int impl_node_enum_params(void *object, int seq, spa_pod_builder_bool(&b, p->resample_disabled); spa_pod_builder_string(&b, "dither.noise"); spa_pod_builder_int(&b, this->dir[1].conv.noise); + spa_pod_builder_string(&b, "dither.method"); + spa_pod_builder_string(&b, dither_method_info[this->dir[1].conv.method].label); spa_pod_builder_pop(&b, &f[1]); param = spa_pod_builder_pop(&b, &f[0]); break; @@ -775,6 +795,8 @@ static int audioconvert_set_param(struct impl *this, const char *k, const char * this->props.resample_disabled = spa_atob(s); else if (spa_streq(k, "dither.noise")) spa_atou32(s, &this->dir[1].conv.noise, 0); + else if (spa_streq(k, "dither.method")) + this->dir[1].conv.method = dither_method_from_label(s); else return 0; return 1; @@ -1410,14 +1432,15 @@ static int setup_out_convert(struct impl *this) out->conv.quantize = calc_width(&dst_info) * 8; out->conv.src_fmt = src_info.info.raw.format; out->conv.dst_fmt = dst_info.info.raw.format; + out->conv.rate = dst_info.info.raw.rate; out->conv.n_channels = dst_info.info.raw.channels; out->conv.cpu_flags = this->cpu_flags; if ((res = convert_init(&out->conv)) < 0) return res; - spa_log_debug(this->log, "%p: got converter features %08x:%08x quant:%d:%d passthrough:%d %s", this, - this->cpu_flags, out->conv.cpu_flags, + spa_log_debug(this->log, "%p: got converter features %08x:%08x quant:%d:%d:%d passthrough:%d %s", this, + this->cpu_flags, out->conv.cpu_flags, out->conv.method, out->conv.quantize, out->conv.noise, out->conv.is_passthrough, out->conv.func_name); diff --git a/spa/plugins/audioconvert/fmt-ops-avx2.c b/spa/plugins/audioconvert/fmt-ops-avx2.c index 5c9ea6793..0ced69274 100644 --- a/spa/plugins/audioconvert/fmt-ops-avx2.c +++ b/spa/plugins/audioconvert/fmt-ops-avx2.c @@ -550,7 +550,7 @@ conv_f32d_to_s32_1s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R __m128 in[1]; __m128i out[4]; __m128 scale = _mm_set1_ps(S32_SCALE); - __m128 int_min = _mm_set1_ps(S32_MIN); + __m128 int_max = _mm_set1_ps(S32_MAX); if (SPA_IS_ALIGNED(s0, 16)) unrolled = n_samples & ~3; @@ -559,7 +559,7 @@ conv_f32d_to_s32_1s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R for(n = 0; n < unrolled; n += 4) { in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), scale); - in[0] = _mm_min_ps(in[0], int_min); + in[0] = _mm_min_ps(in[0], int_max); out[0] = _mm_cvtps_epi32(in[0]); out[1] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(0, 3, 2, 1)); out[2] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(1, 0, 3, 2)); @@ -574,7 +574,7 @@ conv_f32d_to_s32_1s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R for(; n < n_samples; n++) { in[0] = _mm_load_ss(&s0[n]); in[0] = _mm_mul_ss(in[0], scale); - in[0] = _mm_min_ss(in[0], int_min); + in[0] = _mm_min_ss(in[0], int_max); *d = _mm_cvtss_si32(in[0]); d += n_channels; } @@ -590,7 +590,7 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R __m256 in[2]; __m256i out[2], t[2]; __m256 scale = _mm256_set1_ps(S32_SCALE); - __m256 int_min = _mm256_set1_ps(S32_MIN); + __m256 int_max = _mm256_set1_ps(S32_MAX); if (SPA_IS_ALIGNED(s0, 32) && SPA_IS_ALIGNED(s1, 32)) @@ -602,8 +602,8 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _mm256_mul_ps(_mm256_load_ps(&s0[n]), scale); in[1] = _mm256_mul_ps(_mm256_load_ps(&s1[n]), scale); - in[0] = _mm256_min_ps(in[0], int_min); - in[1] = _mm256_min_ps(in[1], int_min); + in[0] = _mm256_min_ps(in[0], int_max); + in[1] = _mm256_min_ps(in[1], int_max); out[0] = _mm256_cvtps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ out[1] = _mm256_cvtps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ @@ -636,7 +636,7 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R __m128 in[2]; __m128i out[2]; __m128 scale = _mm_set1_ps(S32_SCALE); - __m128 int_min = _mm_set1_ps(S32_MIN); + __m128 int_max = _mm_set1_ps(S32_MAX); in[0] = _mm_load_ss(&s0[n]); in[1] = _mm_load_ss(&s1[n]); @@ -644,7 +644,7 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _mm_unpacklo_ps(in[0], in[1]); in[0] = _mm_mul_ps(in[0], scale); - in[0] = _mm_min_ps(in[0], int_min); + in[0] = _mm_min_ps(in[0], int_max); out[0] = _mm_cvtps_epi32(in[0]); _mm_storel_epi64((__m128i*)d, out[0]); d += n_channels; @@ -661,7 +661,7 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R __m256 in[4]; __m256i out[4], t[4]; __m256 scale = _mm256_set1_ps(S32_SCALE); - __m256 int_min = _mm256_set1_ps(S32_MIN); + __m256 int_max = _mm256_set1_ps(S32_MAX); if (SPA_IS_ALIGNED(s0, 32) && SPA_IS_ALIGNED(s1, 32) && @@ -677,10 +677,10 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[2] = _mm256_mul_ps(_mm256_load_ps(&s2[n]), scale); in[3] = _mm256_mul_ps(_mm256_load_ps(&s3[n]), scale); - in[0] = _mm256_min_ps(in[0], int_min); - in[1] = _mm256_min_ps(in[1], int_min); - in[2] = _mm256_min_ps(in[2], int_min); - in[3] = _mm256_min_ps(in[3], int_min); + in[0] = _mm256_min_ps(in[0], int_max); + in[1] = _mm256_min_ps(in[1], int_max); + in[2] = _mm256_min_ps(in[2], int_max); + in[3] = _mm256_min_ps(in[3], int_max); out[0] = _mm256_cvtps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */ out[1] = _mm256_cvtps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */ @@ -711,7 +711,7 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R __m128 in[4]; __m128i out[4]; __m128 scale = _mm_set1_ps(S32_SCALE); - __m128 int_min = _mm_set1_ps(S32_MIN); + __m128 int_max = _mm_set1_ps(S32_MAX); in[0] = _mm_load_ss(&s0[n]); in[1] = _mm_load_ss(&s1[n]); @@ -723,7 +723,7 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _mm_unpacklo_ps(in[0], in[1]); in[0] = _mm_mul_ps(in[0], scale); - in[0] = _mm_min_ps(in[0], int_min); + in[0] = _mm_min_ps(in[0], int_max); out[0] = _mm_cvtps_epi32(in[0]); _mm_storeu_si128((__m128i*)d, out[0]); d += n_channels; diff --git a/spa/plugins/audioconvert/fmt-ops-c.c b/spa/plugins/audioconvert/fmt-ops-c.c index f4d762c57..44ffb30c4 100644 --- a/spa/plugins/audioconvert/fmt-ops-c.c +++ b/spa/plugins/audioconvert/fmt-ops-c.c @@ -737,23 +737,24 @@ conv_f64d_to_f32_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * } /* 32 bit xorshift PRNG, see https://en.wikipedia.org/wiki/Xorshift */ -static inline uint32_t +static inline int32_t xorshift(uint32_t *state) { uint32_t x = *state; x ^= x << 13; x ^= x >> 17; x ^= x << 5; - return (*state = x); + return (int32_t)(*state = x); } static inline void update_dither_c(struct convert *conv, uint32_t n_samples) { - uint32_t n, mask = conv->mask; - int32_t offset = conv->offset + conv->bias; + uint32_t n; + float *dither = conv->dither, scale = conv->scale; + uint32_t *state = &conv->random[0]; for (n = 0; n < n_samples; n++) - conv->dither[n] = offset + (int32_t)(xorshift(&conv->random[0]) & mask); + dither[n] = xorshift(state) * scale; } void @@ -771,6 +772,27 @@ conv_f32d_to_u8d_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * } } +void +conv_f32d_to_u8d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_samples) +{ + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; + + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); + + for (i = 0; i < n_channels; i++) { + const float *s = src[i]; + uint8_t *d = dst[i]; + + for (j = 0; j < n_samples;) { + chunk = SPA_MIN(n_samples - j, dither_size); + for (k = 0; k < chunk; k++, j++) + d[j] = F32_TO_U8_D(s[j], dither[k]); + } + } +} + void conv_f32_to_u8_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples) @@ -813,6 +835,26 @@ conv_f32d_to_u8_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * } } +void +conv_f32d_to_u8_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_samples) +{ + const float **s = (const float **) src; + uint8_t *d = dst[0]; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; + + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); + + for (j = 0; j < n_samples;) { + chunk = SPA_MIN(n_samples - j, dither_size); + for (k = 0; k < chunk; k++, j++) { + for (i = 0; i < n_channels; i++) + *d++ = F32_TO_U8_D(s[i][j], dither[k]); + } + } +} + void conv_f32d_to_s8d_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples) @@ -828,6 +870,27 @@ conv_f32d_to_s8d_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * } } +void +conv_f32d_to_s8d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_samples) +{ + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; + + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); + + for (i = 0; i < n_channels; i++) { + const float *s = src[i]; + int8_t *d = dst[i]; + + for (j = 0; j < n_samples;) { + chunk = SPA_MIN(n_samples - j, dither_size); + for (k = 0; k < chunk; k++, j++) + d[j] = F32_TO_S8_D(s[j], dither[k]); + } + } +} + void conv_f32_to_s8_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples) @@ -870,6 +933,26 @@ conv_f32d_to_s8_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * } } +void +conv_f32d_to_s8_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_samples) +{ + const float **s = (const float **) src; + int8_t *d = dst[0]; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; + + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); + + for (j = 0; j < n_samples;) { + chunk = SPA_MIN(n_samples - j, dither_size); + for (k = 0; k < chunk; k++, j++) { + for (i = 0; i < n_channels; i++) + *d++ = F32_TO_S8_D(s[i][j], dither[k]); + } + } +} + void conv_f32d_to_alaw_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples) @@ -944,21 +1027,19 @@ void conv_f32d_to_s16d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples) { - uint32_t i, j, k, chunk, n_channels = conv->n_channels; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; - update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size)); + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); for (i = 0; i < n_channels; i++) { const float *s = src[i]; int16_t *d = dst[i]; - int32_t v; for (j = 0; j < n_samples;) { - chunk = SPA_MIN(n_samples - j, conv->dither_size); - for (k = 0; k < chunk; k++, j++) { - v = F32_TO_S24(s[j]) + conv->dither[k]; - d[j] = v >> 8; - } + chunk = SPA_MIN(n_samples - j, dither_size); + for (k = 0; k < chunk; k++, j++) + d[j] = F32_TO_S16_D(s[j], dither[k]); } } } @@ -1011,18 +1092,16 @@ conv_f32d_to_s16_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const { const float **s = (const float **) src; int16_t *d = dst[0]; - uint32_t i, j, k, chunk, n_channels = conv->n_channels; - int32_t v; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; - update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size)); + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); for (j = 0; j < n_samples;) { - chunk = SPA_MIN(n_samples - j, conv->dither_size); + chunk = SPA_MIN(n_samples - j, dither_size); for (k = 0; k < chunk; k++, j++) { - for (i = 0; i < n_channels; i++) { - v = F32_TO_S24(s[i][j]) + conv->dither[k]; - *d++ = v >> 8; - } + for (i = 0; i < n_channels; i++) + *d++ = F32_TO_S16_D(s[i][j], dither[k]); } } } @@ -1046,19 +1125,17 @@ conv_f32d_to_s16s_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], cons uint32_t n_samples) { const float **s = (const float **) src; - int16_t *d = dst[0]; - uint32_t i, j, k, chunk, n_channels = conv->n_channels; - int32_t v; + uint16_t *d = dst[0]; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; - update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size)); + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); for (j = 0; j < n_samples;) { - chunk = SPA_MIN(n_samples - j, conv->dither_size); + chunk = SPA_MIN(n_samples - j, dither_size); for (k = 0; k < chunk; k++, j++) { - for (i = 0; i < n_channels; i++) { - v = F32_TO_S24(s[i][j]) + conv->dither[k]; - *d++ = bswap_16(v >> 8); - } + for (i = 0; i < n_channels; i++) + *d++ = F32_TO_S16S_D(s[i][j], dither[k]); } } } @@ -1110,18 +1187,19 @@ void conv_f32d_to_s32d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples) { - uint32_t i, j, k, chunk, n_channels = conv->n_channels; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; - update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size)); + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); for (i = 0; i < n_channels; i++) { const float *s = src[i]; int32_t *d = dst[i]; for (j = 0; j < n_samples;) { - chunk = SPA_MIN(n_samples - j, conv->dither_size); + chunk = SPA_MIN(n_samples - j, dither_size); for (k = 0; k < chunk; k++, j++) - d[j] = F32_TO_S32(s[j]) + conv->dither[k]; + d[j] = F32_TO_S32_D(s[j], dither[k]); } } } @@ -1174,15 +1252,16 @@ conv_f32d_to_s32_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const { const float **s = (const float **) src; int32_t *d = dst[0]; - uint32_t i, j, k, chunk, n_channels = conv->n_channels; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; - update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size)); + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); for (j = 0; j < n_samples;) { - chunk = SPA_MIN(n_samples - j, conv->dither_size); + chunk = SPA_MIN(n_samples - j, dither_size); for (k = 0; k < chunk; k++, j++) { for (i = 0; i < n_channels; i++) - *d++ = F32_TO_S32(s[i][j]) + conv->dither[k]; + *d++ = F32_TO_S32_D(s[i][j], dither[k]); } } } @@ -1192,7 +1271,7 @@ conv_f32d_to_s32s_c(struct convert *conv, void * SPA_RESTRICT dst[], const void uint32_t n_samples) { const float **s = (const float **) src; - int32_t *d = dst[0]; + uint32_t *d = dst[0]; uint32_t i, j, n_channels = conv->n_channels; for (j = 0; j < n_samples; j++) { @@ -1206,18 +1285,17 @@ conv_f32d_to_s32s_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], cons uint32_t n_samples) { const float **s = (const float **) src; - int32_t *d = dst[0], v; - uint32_t i, j, k, chunk, n_channels = conv->n_channels; + uint32_t *d = dst[0]; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; - update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size)); + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); for (j = 0; j < n_samples;) { - chunk = SPA_MIN(n_samples - j, conv->dither_size); + chunk = SPA_MIN(n_samples - j, dither_size); for (k = 0; k < chunk; k++, j++) { - for (i = 0; i < n_channels; i++) { - v = F32_TO_S32(s[i][j]) + conv->dither[k]; - *d++ = bswap_32(v); - } + for (i = 0; i < n_channels; i++) + *d++ = F32_TO_S32S_D(s[i][j], dither[k]); } } } @@ -1346,20 +1424,19 @@ void conv_f32d_to_s24d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples) { - uint32_t i, j, k, chunk, n_channels = conv->n_channels; - int32_t v; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; - update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size)); + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); for (i = 0; i < n_channels; i++) { const float *s = src[i]; uint8_t *d = dst[i]; for (j = 0; j < n_samples;) { - chunk = SPA_MIN(n_samples - j, conv->dither_size); + chunk = SPA_MIN(n_samples - j, dither_size); for (k = 0; k < chunk; k++, j++) { - v = F32_TO_S24(s[j]) + conv->dither[k]; - write_s24(d, v); + write_s24(d, F32_TO_S24_D(s[j], dither[k])); d += 3; } } @@ -1419,24 +1496,22 @@ conv_f32d_to_s24_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const { const float **s = (const float **) src; uint8_t *d = dst[0]; - uint32_t i, j, k, chunk, n_channels = conv->n_channels; - int32_t v; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; - update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size)); + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); for (j = 0; j < n_samples;) { - chunk = SPA_MIN(n_samples - j, conv->dither_size); + chunk = SPA_MIN(n_samples - j, dither_size); for (k = 0; k < chunk; k++, j++) { for (i = 0; i < n_channels; i++) { - v = F32_TO_S24(s[i][j]) + conv->dither[k]; - write_s24(d, v); + write_s24(d, F32_TO_S24_D(s[i][j], dither[k])); d += 3; } } } } - void conv_f32d_to_s24s_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples) @@ -1459,17 +1534,16 @@ conv_f32d_to_s24s_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], cons { const float **s = (const float **) src; uint8_t *d = dst[0]; - uint32_t i, j, k, chunk, n_channels = conv->n_channels; - int32_t v; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; - update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size)); + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); for (j = 0; j < n_samples;) { - chunk = SPA_MIN(n_samples - j, conv->dither_size); + chunk = SPA_MIN(n_samples - j, dither_size); for (k = 0; k < chunk; k++, j++) { for (i = 0; i < n_channels; i++) { - v = F32_TO_S24(s[i][j]) + conv->dither[k]; - write_s24s(d, v); + write_s24s(d, F32_TO_S24_D(s[i][j], dither[k])); d += 3; } } @@ -1495,18 +1569,19 @@ void conv_f32d_to_s24_32d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples) { - uint32_t i, j, k, chunk, n_channels = conv->n_channels; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; - update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size)); + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); for (i = 0; i < n_channels; i++) { const float *s = src[i]; int32_t *d = dst[i]; for (j = 0; j < n_samples;) { - chunk = SPA_MIN(n_samples - j, conv->dither_size); + chunk = SPA_MIN(n_samples - j, dither_size); for (k = 0; k < chunk; k++, j++) - d[j] = F32_TO_S24_32(s[j]) + conv->dither[k]; + d[j] = F32_TO_S24_32_D(s[j], dither[k]); } } } @@ -1587,15 +1662,16 @@ conv_f32d_to_s24_32_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], co { const float **s = (const float **) src; int32_t *d = dst[0]; - uint32_t i, j, k, chunk, n_channels = conv->n_channels; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; - update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size)); + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); for (j = 0; j < n_samples;) { - chunk = SPA_MIN(n_samples - j, conv->dither_size); + chunk = SPA_MIN(n_samples - j, dither_size); for (k = 0; k < chunk; k++, j++) { for (i = 0; i < n_channels; i++) - *d++ = F32_TO_S24_32(s[i][j]) + conv->dither[k]; + *d++ = F32_TO_S24_32_D(s[i][j], dither[k]); } } } @@ -1619,18 +1695,17 @@ conv_f32d_to_s24_32s_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], c uint32_t n_samples) { const float **s = (const float **) src; - int32_t *d = dst[0], v; - uint32_t i, j, k, chunk, n_channels = conv->n_channels; + int32_t *d = dst[0]; + uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size; + float *dither = conv->dither; - update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size)); + update_dither_c(conv, SPA_MIN(n_samples, dither_size)); for (j = 0; j < n_samples;) { - chunk = SPA_MIN(n_samples - j, conv->dither_size); + chunk = SPA_MIN(n_samples - j, dither_size); for (k = 0; k < chunk; k++, j++) { - for (i = 0; i < n_channels; i++) { - v = F32_TO_S24_32(s[i][j]) + conv->dither[k]; - *d++ = bswap_32(v); - } + for (i = 0; i < n_channels; i++) + *d++ = F32_TO_S24_32S_D(s[i][j], dither[k]); } } } diff --git a/spa/plugins/audioconvert/fmt-ops-sse2.c b/spa/plugins/audioconvert/fmt-ops-sse2.c index 4fd13a1d4..6d811914c 100644 --- a/spa/plugins/audioconvert/fmt-ops-sse2.c +++ b/spa/plugins/audioconvert/fmt-ops-sse2.c @@ -385,7 +385,7 @@ conv_f32d_to_s32_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R __m128 in[1]; __m128i out[4]; __m128 scale = _mm_set1_ps(S32_SCALE); - __m128 int_min = _mm_set1_ps(S32_MIN); + __m128 int_max = _mm_set1_ps(S32_MAX); if (SPA_IS_ALIGNED(s0, 16)) unrolled = n_samples & ~3; @@ -394,7 +394,7 @@ conv_f32d_to_s32_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R for(n = 0; n < unrolled; n += 4) { in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), scale); - in[0] = _mm_min_ps(in[0], int_min); + in[0] = _mm_min_ps(in[0], int_max); out[0] = _mm_cvtps_epi32(in[0]); out[1] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(0, 3, 2, 1)); out[2] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(1, 0, 3, 2)); @@ -409,7 +409,7 @@ conv_f32d_to_s32_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R for(; n < n_samples; n++) { in[0] = _mm_load_ss(&s0[n]); in[0] = _mm_mul_ss(in[0], scale); - in[0] = _mm_min_ss(in[0], int_min); + in[0] = _mm_min_ss(in[0], int_max); *d = _mm_cvtss_si32(in[0]); d += n_channels; } @@ -425,7 +425,7 @@ conv_f32d_to_s32_2s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R __m128 in[2]; __m128i out[2], t[2]; __m128 scale = _mm_set1_ps(S32_SCALE); - __m128 int_min = _mm_set1_ps(S32_MIN); + __m128 int_max = _mm_set1_ps(S32_MAX); if (SPA_IS_ALIGNED(s0, 16) && SPA_IS_ALIGNED(s1, 16)) @@ -437,8 +437,8 @@ conv_f32d_to_s32_2s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), scale); in[1] = _mm_mul_ps(_mm_load_ps(&s1[n]), scale); - in[0] = _mm_min_ps(in[0], int_min); - in[1] = _mm_min_ps(in[1], int_min); + in[0] = _mm_min_ps(in[0], int_max); + in[1] = _mm_min_ps(in[1], int_max); out[0] = _mm_cvtps_epi32(in[0]); out[1] = _mm_cvtps_epi32(in[1]); @@ -459,7 +459,7 @@ conv_f32d_to_s32_2s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _mm_unpacklo_ps(in[0], in[1]); in[0] = _mm_mul_ps(in[0], scale); - in[0] = _mm_min_ps(in[0], int_min); + in[0] = _mm_min_ps(in[0], int_max); out[0] = _mm_cvtps_epi32(in[0]); _mm_storel_epi64((__m128i*)d, out[0]); d += n_channels; @@ -476,7 +476,7 @@ conv_f32d_to_s32_4s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R __m128 in[4]; __m128i out[4]; __m128 scale = _mm_set1_ps(S32_SCALE); - __m128 int_min = _mm_set1_ps(S32_MIN); + __m128 int_max = _mm_set1_ps(S32_MAX); if (SPA_IS_ALIGNED(s0, 16) && SPA_IS_ALIGNED(s1, 16) && @@ -492,10 +492,10 @@ conv_f32d_to_s32_4s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[2] = _mm_mul_ps(_mm_load_ps(&s2[n]), scale); in[3] = _mm_mul_ps(_mm_load_ps(&s3[n]), scale); - in[0] = _mm_min_ps(in[0], int_min); - in[1] = _mm_min_ps(in[1], int_min); - in[2] = _mm_min_ps(in[2], int_min); - in[3] = _mm_min_ps(in[3], int_min); + in[0] = _mm_min_ps(in[0], int_max); + in[1] = _mm_min_ps(in[1], int_max); + in[2] = _mm_min_ps(in[2], int_max); + in[3] = _mm_min_ps(in[3], int_max); _MM_TRANSPOSE4_PS(in[0], in[1], in[2], in[3]); @@ -521,7 +521,7 @@ conv_f32d_to_s32_4s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R in[0] = _mm_unpacklo_ps(in[0], in[1]); in[0] = _mm_mul_ps(in[0], scale); - in[0] = _mm_min_ps(in[0], int_min); + in[0] = _mm_min_ps(in[0], int_max); out[0] = _mm_cvtps_epi32(in[0]); _mm_storeu_si128((__m128i*)d, out[0]); d += n_channels; @@ -543,6 +543,92 @@ conv_f32d_to_s32_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const voi conv_f32d_to_s32_1s_sse2(conv, &d[i], &src[i], n_channels, n_samples); } +static inline void update_dither_sse2(struct convert *conv, uint32_t n_samples) +{ + uint32_t n; + const uint32_t *r = SPA_PTR_ALIGN(conv->random, 16, uint32_t); + float *dither = SPA_PTR_ALIGN(conv->dither, 16, float); + __m128 scale = _mm_set1_ps(conv->scale), out[1]; + __m128i in[1], t[1]; + + for (n = 0; n < n_samples; n += 4) { + /* 32 bit xorshift PRNG, see https://en.wikipedia.org/wiki/Xorshift */ + in[0] = _mm_load_si128((__m128i*)r); + t[0] = _mm_slli_epi32(in[0], 13); + in[0] = _mm_xor_si128(in[0], t[0]); + t[0] = _mm_srli_epi32(in[0], 17); + in[0] = _mm_xor_si128(in[0], t[0]); + t[0] = _mm_slli_epi32(in[0], 5); + in[0] = _mm_xor_si128(in[0], t[0]); + _mm_store_si128((__m128i*)r, in[0]); + + out[0] = _mm_cvtepi32_ps(in[0]); + out[0] = _mm_mul_ps(out[0], scale); + _mm_store_ps(&dither[n], out[0]); + } +} + +static void +conv_f32d_to_s32_1s_dither_sse2(struct convert *conv, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src, + uint32_t n_channels, uint32_t n_samples) +{ + const float *s = src; + float *dither = SPA_PTR_ALIGN(conv->dither, 16, float); + int32_t *d = dst; + uint32_t n, unrolled; + __m128 in[1]; + __m128i out[4]; + __m128 scale = _mm_set1_ps(S32_SCALE); + __m128 int_max = _mm_set1_ps(S32_MAX); + + if (SPA_IS_ALIGNED(s, 16)) + unrolled = n_samples & ~3; + else + unrolled = 0; + + for(n = 0; n < unrolled; n += 4) { + in[0] = _mm_mul_ps(_mm_load_ps(&s[n]), scale); + in[0] = _mm_add_ps(in[0], _mm_load_ps(&dither[n])); + in[0] = _mm_min_ps(in[0], int_max); + out[0] = _mm_cvtps_epi32(in[0]); + out[1] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(0, 3, 2, 1)); + out[2] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(1, 0, 3, 2)); + out[3] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(2, 1, 0, 3)); + + d[0*n_channels] = _mm_cvtsi128_si32(out[0]); + d[1*n_channels] = _mm_cvtsi128_si32(out[1]); + d[2*n_channels] = _mm_cvtsi128_si32(out[2]); + d[3*n_channels] = _mm_cvtsi128_si32(out[3]); + d += 4*n_channels; + } + for(; n < n_samples; n++) { + in[0] = _mm_load_ss(&s[n]); + in[0] = _mm_mul_ss(in[0], scale); + in[0] = _mm_add_ss(in[0], _mm_load_ss(&dither[n])); + in[0] = _mm_min_ss(in[0], int_max); + *d = _mm_cvtss_si32(in[0]); + d += n_channels; + } +} + +void +conv_f32d_to_s32_dither_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_samples) +{ + int32_t *d = dst[0]; + uint32_t i, k, chunk, n_channels = conv->n_channels; + + update_dither_sse2(conv, SPA_MIN(n_samples, conv->dither_size)); + + for(i = 0; i < n_channels; i++) { + const float *s = src[i]; + for(k = 0; k < n_samples; k += chunk) { + chunk = SPA_MIN(n_samples - k, conv->dither_size); + conv_f32d_to_s32_1s_dither_sse2(conv, &d[i + k*n_channels], &s[k], n_channels, chunk); + } + } +} + static void conv_interleave_32_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples) diff --git a/spa/plugins/audioconvert/fmt-ops.c b/spa/plugins/audioconvert/fmt-ops.c index ccd91f2d3..7ad743f57 100644 --- a/spa/plugins/audioconvert/fmt-ops.c +++ b/spa/plugins/audioconvert/fmt-ops.c @@ -170,13 +170,17 @@ static struct conv_info conv_table[] = /* from f32 */ MAKE(F32, U8, 0, conv_f32_to_u8_c), + MAKE(F32P, U8P, 0, conv_f32d_to_u8d_dither_c, 0, CONV_DITHER), MAKE(F32P, U8P, 0, conv_f32d_to_u8d_c), MAKE(F32, U8P, 0, conv_f32_to_u8d_c), + MAKE(F32P, U8, 0, conv_f32d_to_u8_dither_c, 0, CONV_DITHER), MAKE(F32P, U8, 0, conv_f32d_to_u8_c), MAKE(F32, S8, 0, conv_f32_to_s8_c), + MAKE(F32P, S8P, 0, conv_f32d_to_s8d_dither_c, 0, CONV_DITHER), MAKE(F32P, S8P, 0, conv_f32d_to_s8d_c), MAKE(F32, S8P, 0, conv_f32_to_s8d_c), + MAKE(F32P, S8, 0, conv_f32d_to_s8_dither_c, 0, CONV_DITHER), MAKE(F32P, S8, 0, conv_f32d_to_s8_c), MAKE(F32P, ALAW, 0, conv_f32d_to_alaw_c), @@ -224,7 +228,11 @@ static struct conv_info conv_table[] = MAKE(F32P, S32P, 0, conv_f32d_to_s32d_c), MAKE(F32, S32P, 0, conv_f32_to_s32d_c), +#if defined (HAVE_SSE2) + MAKE(F32P, S32, 0, conv_f32d_to_s32_dither_sse2, SPA_CPU_FLAG_SSE2, CONV_DITHER), +#endif MAKE(F32P, S32, 0, conv_f32d_to_s32_dither_c, 0, CONV_DITHER), + #if defined (HAVE_AVX2) MAKE(F32P, S32, 0, conv_f32d_to_s32_avx2, SPA_CPU_FLAG_AVX2), #endif @@ -357,17 +365,38 @@ static void impl_convert_free(struct convert *conv) conv->dither = NULL; } +static bool need_dither(uint32_t format) +{ + switch (format) { + case SPA_AUDIO_FORMAT_U8: + case SPA_AUDIO_FORMAT_U8P: + case SPA_AUDIO_FORMAT_S8: + case SPA_AUDIO_FORMAT_S8P: + case SPA_AUDIO_FORMAT_ULAW: + case SPA_AUDIO_FORMAT_ALAW: + case SPA_AUDIO_FORMAT_S16P: + case SPA_AUDIO_FORMAT_S16: + case SPA_AUDIO_FORMAT_S16_OE: + return true; + } + return false; +} + int convert_init(struct convert *conv) { const struct conv_info *info; - uint32_t i, shift, dither_flags; + uint32_t i, dither_flags; - shift = 24u - SPA_MIN(conv->quantize, 24u); - shift += conv->noise; + conv->scale = 1.0f / (float)(INT32_MAX >> conv->noise); - conv->mask = (1ULL << (shift + 1)) - 1; - conv->offset = shift < 32 ? -(1ULL << shift) : 0; - conv->bias = shift > 0 ? 1 << (shift - 1) : 0; + /* disable dither if not needed */ + if (!need_dither(conv->dst_fmt)) + conv->method = DITHER_METHOD_NONE; + + /* don't use shaped for too low rates, it moves the noise to + * audible ranges */ + if (conv->method == DITHER_METHOD_SHAPED_5 && conv->rate < 32000) + conv->method = DITHER_METHOD_TRIANGULAR; dither_flags = 0; if (conv->method != DITHER_METHOD_NONE || conv->noise) diff --git a/spa/plugins/audioconvert/fmt-ops.h b/spa/plugins/audioconvert/fmt-ops.h index f4373a122..0cb990ded 100644 --- a/spa/plugins/audioconvert/fmt-ops.h +++ b/spa/plugins/audioconvert/fmt-ops.h @@ -37,72 +37,92 @@ #define FMT_OPS_MAX_ALIGN 32 -#define U8_MIN 0 -#define U8_MAX 255 -#define U8_SCALE 127.5f -#define U8_OFFS 128 -#define U8_TO_F32(v) ((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0) -#define F32_TO_U8(v) (uint8_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U8_SCALE) + U8_OFFS) +#define U8_MIN 0u +#define U8_MAX 255u +#define U8_SCALE 127.5f +#define U8_OFFS 128.f +#define U8_TO_F32(v) ((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0) +#define F32_TO_U8(v) (uint8_t)SPA_CLAMP((v) * U8_SCALE + U8_OFFS, U8_MIN, U8_MAX) +#define F32_TO_U8_D(v,d) (uint8_t)SPA_CLAMP((v) * U8_SCALE + U8_OFFS + (d), U8_MIN, U8_MAX) -#define S8_MIN -127 -#define S8_MAX 127 -#define S8_MAX_F 127.0f -#define S8_SCALE 127.0f -#define S8_TO_F32(v) (((int8_t)(v)) * (1.0f / S8_SCALE)) -#define F32_TO_S8(v) (int8_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S8_SCALE) +#define S8_MIN -127 +#define S8_MAX 127 +#define S8_MAX_F 127.0f +#define S8_SCALE 127.0f +#define S8_TO_F32(v) (((int8_t)(v)) * (1.0f / S8_SCALE)) +#define F32_TO_S8(v) (int8_t)SPA_CLAMP((v) * S8_SCALE, S8_MIN, S8_MAX) +#define F32_TO_S8_D(v,d) (int8_t)SPA_CLAMP((v) * S8_SCALE + (d), S8_MIN, S8_MAX) -#define U16_MIN 0 -#define U16_MAX 65535 -#define U16_SCALE 32767.5f -#define U16_OFFS 32768 -#define U16_TO_F32(v) ((((uint16_t)(v)) * (1.0f / U16_OFFS)) - 1.0) -#define U16S_TO_F32(v) (((uint16_t)bswap_16((uint16_t)(v)) * (1.0f / U16_OFFS)) - 1.0) -#define F32_TO_U16(v) (uint16_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U16_SCALE) + U16_OFFS) -#define F32_TO_U16S(v) ((uint16_t)bswap_16((uint16_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U16_SCALE) + U16_OFFS))) +#define U16_MIN 0u +#define U16_MAX 65535u +#define U16_SCALE 32767.5f +#define U16_OFFS 32768.f +#define U16_TO_F32(v) ((((uint16_t)(v)) * (1.0f / U16_OFFS)) - 1.0) +#define U16S_TO_F32(v) (((uint16_t)bswap_16((uint16_t)(v)) * (1.0f / U16_OFFS)) - 1.0) +#define F32_TO_U16(v) (uint16_t)SPA_CLAMP((v) * U16_SCALE + U16_OFFS, U16_MIN, U16_MAX) +#define F32_TO_U16_D(v,d) (uint16_t)SPA_CLAMP((v) * U16_SCALE + U16_OFFS + (d), U16_MIN, U16_MAX) +#define F32_TO_U16S(v) bswap_16(F32_TO_U16(v)) +#define F32_TO_U16S_D(v,d) bswap_16(F32_TO_U16_D(v,d)) -#define S16_MIN -32767 -#define S16_MAX 32767 -#define S16_MAX_F 32767.0f -#define S16_SCALE 32767.0f -#define S16_TO_F32(v) (((int16_t)(v)) * (1.0f / S16_SCALE)) -#define S16S_TO_F32(v) (((int16_t)bswap_16((uint16_t)v)) * (1.0f / S16_SCALE)) -#define F32_TO_S16(v) (int16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE) -#define F32_TO_S16S(v) ((int16_t)bswap_16((uint16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE))) +#define S16_MIN -32767 +#define S16_MAX 32767 +#define S16_MAX_F 32767.0f +#define S16_SCALE 32767.0f +#define S16_TO_F32(v) (((int16_t)(v)) * (1.0f / S16_SCALE)) +#define S16S_TO_F32(v) (((int16_t)bswap_16(v)) * (1.0f / S16_SCALE)) +#define F32_TO_S16(v) (int16_t)SPA_CLAMP((v) * S16_SCALE, S16_MIN, S16_MAX) +#define F32_TO_S16_D(v,d) (int16_t)SPA_CLAMP((v) * S16_SCALE + (d), S16_MIN, S16_MAX) +#define F32_TO_S16S(v) bswap_16(F32_TO_S16(v)) +#define F32_TO_S16S_D(v,d) bswap_16(F32_TO_S16_D(v,d)) -#define U24_MIN 0 -#define U24_MAX 16777215 -#define U24_SCALE 8388607.5f -#define U24_OFFS 8388608 -#define U24_TO_F32(v) ((((uint32_t)(v)) * (1.0f / U24_OFFS)) - 1.0) -#define F32_TO_U24(v) (uint32_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U24_SCALE) + U24_OFFS) +#define U24_MIN 0u +#define U24_MAX 16777215u +#define U24_SCALE 8388607.5f +#define U24_OFFS 8388608.f +#define U24_TO_F32(v) ((((uint32_t)(v)) * (1.0f / U24_OFFS)) - 1.0) +#define F32_TO_U24(v) (uint32_t)SPA_CLAMP((v) * U24_SCALE + U24_OFFS, U24_MIN, U24_MAX) +#define F32_TO_U24_D(v,d) (uint32_t)SPA_CLAMP((v) * U24_SCALE + U24_OFFS + (d), U24_MIN, U24_MAX) -#define S24_MIN -8388607 -#define S24_MAX 8388607 -#define S24_MAX_F 8388607.0f -#define S24_SCALE 8388607.0f -#define S24_TO_F32(v) (((int32_t)(v)) * (1.0f / S24_SCALE)) -#define F32_TO_S24(v) (int32_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S24_SCALE) +#define S24_MIN -8388607 +#define S24_MAX 8388607 +#define S24_MAX_F 8388607.0f +#define S24_SCALE 8388607.0f +#define S24_TO_F32(v) (((int32_t)(v)) * (1.0f / S24_SCALE)) +#define F32_TO_S24(v) (int32_t)SPA_CLAMP((v) * S24_SCALE, S24_MIN, S24_MAX) +#define F32_TO_S24_D(v,d) (int32_t)SPA_CLAMP((v) * S24_SCALE + (d), S24_MIN, S24_MAX) -#define U32_TO_F32(v) U24_TO_F32(((uint32_t)(v)) >> 8) -#define F32_TO_U32(v) (F32_TO_U24(v) << 8) +#define U32_MIN 0u +#define U32_MAX 4294967040u +#define U32_SCALE 2147483520.f +#define U32_OFFS 2147483520.f +#define U32_TO_F32(v) ((((uint32_t)(v)) * (1.0f / U32_OFFS)) - 1.0) +#define F32_TO_U32(v) (uint32_t)SPA_CLAMP((v) * U32_SCALE + U32_OFFS, U32_MIN, U32_MAX) +#define F32_TO_U32_D(v,d) (uint32_t)SPA_CLAMP((v) * U32_SCALE + U32_OFFS + (d), U32_MIN, U32_MAX) -#define S32_SCALE 2147483648.0f -#define S32_MIN 2147483520.0f - -#define S32_TO_F32(v) S24_TO_F32(((int32_t)(v)) >> 8) -#define S32S_TO_F32(v) S24_TO_F32(((int32_t)bswap_32(v)) >> 8) -#define F32_TO_S32(v) (F32_TO_S24(v) << 8) -#define F32_TO_S32S(v) bswap_32((F32_TO_S24(v) << 8)) +#define S32_MIN -2147483520 +#define S32_MAX 2147483520 +#define S32_MAX_F 2147483520.f +#define S32_SCALE 2147483648.f +#define S32_TO_F32(v) (((int32_t)(v)) * (1.0f / S32_SCALE)) +#define S32S_TO_F32(v) (((int32_t)bswap_32(v)) * (1.0f / S32_SCALE)) +#define F32_TO_S32(v) (int32_t)SPA_CLAMP((v) * S32_SCALE, S32_MIN, S32_MAX) +#define F32_TO_S32_D(v,d) (int32_t)SPA_CLAMP((v) * S32_SCALE + (d), S32_MIN, S32_MAX) +#define F32_TO_S32S(v) bswap_32(F32_TO_S32(v)) +#define F32_TO_S32S_D(v,d) bswap_32(F32_TO_S32_D(v,d)) #define U24_32_TO_F32(v) U32_TO_F32((v)<<8) #define U24_32S_TO_F32(v) U32_TO_F32(((int32_t)bswap_32(v))<<8) #define F32_TO_U24_32(v) F32_TO_U24(v) #define F32_TO_U24_32S(v) bswap_32(F32_TO_U24(v)) +#define F32_TO_U24_32_D(v,d) F32_TO_U24_D(v,d) +#define F32_TO_U24_32S_D(v,d) bswap_32(F32_TO_U24_D(v,d)) #define S24_32_TO_F32(v) S32_TO_F32((v)<<8) #define S24_32S_TO_F32(v) S32_TO_F32(((int32_t)bswap_32(v))<<8) #define F32_TO_S24_32(v) F32_TO_S24(v) #define F32_TO_S24_32S(v) bswap_32(F32_TO_S24(v)) +#define F32_TO_S24_32_D(v,d) F32_TO_S24_D(v,d) +#define F32_TO_S24_32S_D(v,d) bswap_32(F32_TO_S24_D(v,d)) static inline uint32_t read_u24(const void *src) { @@ -190,16 +210,15 @@ struct convert { uint32_t src_fmt; uint32_t dst_fmt; uint32_t n_channels; + uint32_t rate; uint32_t cpu_flags; const char *func_name; unsigned int is_passthrough:1; - int32_t bias; - int32_t offset; - uint32_t mask; + float scale; uint32_t random[16 + FMT_OPS_MAX_ALIGN/4]; - int32_t *dither; + float *dither; uint32_t dither_size; float ns_data[MAX_NS]; @@ -295,13 +314,17 @@ DEFINE_FUNCTION(f64_to_f32d, c); DEFINE_FUNCTION(f64s_to_f32d, c); DEFINE_FUNCTION(f64d_to_f32, c); DEFINE_FUNCTION(f32d_to_u8d, c); +DEFINE_FUNCTION(f32d_to_u8d_dither, c); DEFINE_FUNCTION(f32_to_u8, c); DEFINE_FUNCTION(f32_to_u8d, c); DEFINE_FUNCTION(f32d_to_u8, c); +DEFINE_FUNCTION(f32d_to_u8_dither, c); DEFINE_FUNCTION(f32d_to_s8d, c); +DEFINE_FUNCTION(f32d_to_s8d_dither, c); DEFINE_FUNCTION(f32_to_s8, c); DEFINE_FUNCTION(f32_to_s8d, c); DEFINE_FUNCTION(f32d_to_s8, c); +DEFINE_FUNCTION(f32d_to_s8_dither, c); DEFINE_FUNCTION(f32d_to_alaw, c); DEFINE_FUNCTION(f32d_to_ulaw, c); DEFINE_FUNCTION(f32_to_u16, c); @@ -375,6 +398,7 @@ DEFINE_FUNCTION(s16_to_f32d, sse2); DEFINE_FUNCTION(s24_to_f32d, sse2); DEFINE_FUNCTION(s32_to_f32d, sse2); DEFINE_FUNCTION(f32d_to_s32, sse2); +DEFINE_FUNCTION(f32d_to_s32_dither, sse2); DEFINE_FUNCTION(f32_to_s16, sse2); DEFINE_FUNCTION(f32d_to_s16_2, sse2); DEFINE_FUNCTION(f32d_to_s16, sse2);