diff --git a/spa/plugins/audioconvert/audioconvert.c b/spa/plugins/audioconvert/audioconvert.c index 5f0b53426..35f7b5b2c 100644 --- a/spa/plugins/audioconvert/audioconvert.c +++ b/spa/plugins/audioconvert/audioconvert.c @@ -2613,6 +2613,8 @@ static int impl_clear(struct spa_handle *handle) if (this->resample.free) resample_free(&this->resample); + if (this->dither.free) + dither_free(&this->dither); return 0; } diff --git a/spa/plugins/audioconvert/dither-ops-c.c b/spa/plugins/audioconvert/dither-ops-c.c index 57f731dfc..688709938 100644 --- a/spa/plugins/audioconvert/dither-ops-c.c +++ b/spa/plugins/audioconvert/dither-ops-c.c @@ -24,19 +24,45 @@ #include "dither-ops.h" + +/* 32 bit xorshift PRNG, see https://en.wikipedia.org/wiki/Xorshift */ +static inline uint32_t +xorshift(uint32_t *state) +{ + uint32_t x = *state; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return (*state = x); +} + +static inline void update_dither_c(struct dither *dt, uint32_t n_samples) +{ + uint32_t n; + for (n = 0; n < n_samples; n++) + dt->dither[n] = ((int32_t)xorshift(&dt->random[0])) * dt->scale; +} + void dither_f32_c(struct dither *dt, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples) { - uint32_t i, n; + uint32_t i, n, m, chunk; const float **s = (const float**)src; float **d = (float**)dst; - const float *t = dt->tab; - int tab_idx = dt->tab_idx; + float *t = dt->dither; - for (i = 0; i < dt->n_channels; i++) { - for (n = 0; n < n_samples; n++) - d[i][n] = s[i][n] + t[tab_idx++ & DITHER_MOD]; - tab_idx += 61; + chunk = SPA_MIN(n_samples, dt->dither_size); + update_dither_c(dt, chunk); + + for (n = 0; n < n_samples; n += chunk) { + chunk = SPA_MIN(n_samples - n, dt->dither_size); + + for (i = 0; i < dt->n_channels; i++) { + float *di = &d[i][n]; + const float *si = &s[i][n]; + + for (m = 0; m < chunk; m++) + di[m] = si[m] + t[m]; + } } - dt->tab_idx = tab_idx & DITHER_MOD; } diff --git a/spa/plugins/audioconvert/dither-ops-sse2.c b/spa/plugins/audioconvert/dither-ops-sse2.c new file mode 100644 index 000000000..ec4871566 --- /dev/null +++ b/spa/plugins/audioconvert/dither-ops-sse2.c @@ -0,0 +1,97 @@ +/* Spa + * + * Copyright © 2022 Wim Taymans + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "dither-ops.h" + +#include + +static inline void update_dither_sse2(struct dither *dt, uint32_t n_samples) +{ + uint32_t n; + const uint32_t *r = dt->random; + __m128 scale = _mm_set1_ps(dt->scale), out[1]; + __m128i in[1], t[1]; + + for (n = 0; n < n_samples; n += 4) { + + /* 32 bit xorshift PRNG, see https://en.wikipedia.org/wiki/Xorshift */ + in[0] = _mm_loadu_si128((__m128i*)r); + t[0] = _mm_slli_epi32(in[0], 13); + in[0] = _mm_xor_si128(in[0], t[0]); + t[0] = _mm_srli_epi32(in[0], 17); + in[0] = _mm_xor_si128(in[0], t[0]); + t[0] = _mm_slli_epi32(in[0], 5); + in[0] = _mm_xor_si128(in[0], t[0]); + _mm_storeu_si128((__m128i*)r, in[0]); + + out[0] = _mm_cvtepi32_ps(in[0]); + out[0] = _mm_mul_ps(out[0], scale); + _mm_storeu_ps(&dt->dither[n], out[0]); + } +} + +void dither_f32_sse2(struct dither *dt, void * SPA_RESTRICT dst[], + const void * SPA_RESTRICT src[], uint32_t n_samples) +{ + uint32_t i, n, m, chunk, unrolled; + const float **s = (const float**)src; + float **d = (float**)dst; + float *t = dt->dither; + __m128 in[4]; + + chunk = SPA_MIN(n_samples, dt->dither_size); + update_dither_sse2(dt, chunk); + + for (n = 0; n < n_samples; n += chunk) { + chunk = SPA_MIN(n_samples - n, dt->dither_size); + + for (i = 0; i < dt->n_channels; i++) { + float *di = &d[i][n]; + const float *si = &s[i][n]; + + if (SPA_IS_ALIGNED(di, 16) && + SPA_IS_ALIGNED(si, 16)) + unrolled = chunk & ~15; + else + unrolled = 0; + + for (m = 0; m < unrolled; m += 16) { + in[0] = _mm_load_ps(&si[m ]); + in[1] = _mm_load_ps(&si[m + 4]); + in[2] = _mm_load_ps(&si[m + 8]); + in[3] = _mm_load_ps(&si[m + 12]); + in[0] = _mm_add_ps(in[0], _mm_load_ps(&t[m ])); + in[1] = _mm_add_ps(in[1], _mm_load_ps(&t[m + 4])); + in[2] = _mm_add_ps(in[2], _mm_load_ps(&t[m + 8])); + in[3] = _mm_add_ps(in[3], _mm_load_ps(&t[m + 12])); + _mm_store_ps(&di[m ], in[0]); + _mm_store_ps(&di[m + 4], in[1]); + _mm_store_ps(&di[m + 8], in[2]); + _mm_store_ps(&di[m + 12], in[3]); + } + for (; m < chunk; m++) + di[m] = si[m] + t[m]; + } + } +} diff --git a/spa/plugins/audioconvert/dither-ops.c b/spa/plugins/audioconvert/dither-ops.c index 0428ce35a..834ce2b76 100644 --- a/spa/plugins/audioconvert/dither-ops.c +++ b/spa/plugins/audioconvert/dither-ops.c @@ -34,6 +34,8 @@ #include "dither-ops.h" +#define DITHER_SIZE (1<<10) + typedef void (*dither_func_t) (struct dither *d, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples); @@ -42,6 +44,9 @@ static const struct dither_info { uint32_t cpu_flags; } dither_table[] = { +#if defined (HAVE_SSE2) + { dither_f32_sse2, 0 }, +#endif { dither_f32_c, 0 }, }; @@ -61,6 +66,8 @@ static const struct dither_info *find_dither_info(uint32_t cpu_flags) static void impl_dither_free(struct dither *d) { d->process = NULL; + free(d->dither); + d->dither = NULL; } int dither_init(struct dither *d) @@ -72,11 +79,19 @@ int dither_init(struct dither *d) if (info == NULL) return -ENOTSUP; - if (d->intensity >= 64) + if (d->intensity >= 32) return -EINVAL; - for (i = 0; i < SPA_N_ELEMENTS(d->tab); i++) - d->tab[i] = (drand48() - 0.5) / (UINT64_C(1) << d->intensity); + d->scale = 1.0f / (1ULL << (31 + d->intensity)); + + d->dither_size = DITHER_SIZE; + d->dither = calloc(d->dither_size + 8, sizeof(float)); + if (d->dither == NULL) + return -errno; + + for (i = 0; i < SPA_N_ELEMENTS(d->random); i++) + d->random[i] = random(); + d->free = impl_dither_free; d->process = info->process; diff --git a/spa/plugins/audioconvert/dither-ops.h b/spa/plugins/audioconvert/dither-ops.h index 19545472f..39cdd4874 100644 --- a/spa/plugins/audioconvert/dither-ops.h +++ b/spa/plugins/audioconvert/dither-ops.h @@ -29,9 +29,6 @@ #include #include -#define DITHER_SIZE (1<<8) -#define DITHER_MOD (DITHER_SIZE-1) - struct dither { uint32_t intensity; #define DITHER_METHOD_NONE 0 @@ -48,8 +45,10 @@ struct dither { const void * SPA_RESTRICT src[], uint32_t n_samples); void (*free) (struct dither *d); - float tab[DITHER_SIZE]; - int tab_idx; + uint32_t random[16]; + float *dither; + uint32_t dither_size; + float scale; }; int dither_init(struct dither *d); @@ -87,5 +86,8 @@ void dither_##name##_##arch(struct dither *d, \ #define DITHER_OPS_MAX_ALIGN 16 DEFINE_FUNCTION(f32, c); +#if defined(HAVE_SSE2) +DEFINE_FUNCTION(f32, sse2); +#endif #undef DEFINE_FUNCTION diff --git a/spa/plugins/audioconvert/meson.build b/spa/plugins/audioconvert/meson.build index e966a47d0..97b1962a6 100644 --- a/spa/plugins/audioconvert/meson.build +++ b/spa/plugins/audioconvert/meson.build @@ -22,7 +22,8 @@ if have_sse endif if have_sse2 audioconvert_sse2 = static_library('audioconvert_sse2', - ['fmt-ops-sse2.c' ], + ['fmt-ops-sse2.c', + 'dither-ops-sse2.c' ], c_args : [sse2_args, '-O3', '-DHAVE_SSE2'], dependencies : [ spa_dep ], install : false