diff --git a/spa/plugins/audioconvert/audioconvert.c b/spa/plugins/audioconvert/audioconvert.c
index 113b7a3b1..390f6b6d6 100644
--- a/spa/plugins/audioconvert/audioconvert.c
+++ b/spa/plugins/audioconvert/audioconvert.c
@@ -634,6 +634,24 @@ static int impl_node_enum_params(void *object, int seq,
 				SPA_PROP_INFO_type, SPA_POD_CHOICE_RANGE_Int(this->dir[1].conv.noise, 0, 16),
 				SPA_PROP_INFO_params, SPA_POD_Bool(true));
 			break;
+		case 23:
+			spa_pod_builder_push_object(&b, &f[0], SPA_TYPE_OBJECT_PropInfo, id);
+			spa_pod_builder_add(&b,
+				SPA_PROP_INFO_name, SPA_POD_String("dither.method"),
+				SPA_PROP_INFO_description, SPA_POD_String("The dithering method"),
+				SPA_PROP_INFO_type, SPA_POD_String(
+					dither_method_info[this->dir[1].conv.method].label),
+				SPA_PROP_INFO_params, SPA_POD_Bool(true),
+				0);
+			spa_pod_builder_prop(&b, SPA_PROP_INFO_labels, 0);
+			spa_pod_builder_push_struct(&b, &f[1]);
+			for (i = 0; i < SPA_N_ELEMENTS(channelmix_upmix_info); i++) {
+				spa_pod_builder_string(&b, dither_method_info[i].label);
+				spa_pod_builder_string(&b, dither_method_info[i].description);
+			}
+			spa_pod_builder_pop(&b, &f[1]);
+			param = spa_pod_builder_pop(&b, &f[0]);
+			break;
 		default:
 			return 0;
 		}
@@ -704,6 +722,8 @@ static int impl_node_enum_params(void *object, int seq,
 			spa_pod_builder_bool(&b, p->resample_disabled);
 			spa_pod_builder_string(&b, "dither.noise");
 			spa_pod_builder_int(&b, this->dir[1].conv.noise);
+			spa_pod_builder_string(&b, "dither.method");
+			spa_pod_builder_string(&b, dither_method_info[this->dir[1].conv.method].label);
 			spa_pod_builder_pop(&b, &f[1]);
 			param = spa_pod_builder_pop(&b, &f[0]);
 			break;
@@ -775,6 +795,8 @@ static int audioconvert_set_param(struct impl *this, const char *k, const char *
 		this->props.resample_disabled = spa_atob(s);
 	else if (spa_streq(k, "dither.noise"))
 		spa_atou32(s, &this->dir[1].conv.noise, 0);
+	else if (spa_streq(k, "dither.method"))
+		this->dir[1].conv.method = dither_method_from_label(s);
 	else
 		return 0;
 	return 1;
@@ -1410,14 +1432,15 @@ static int setup_out_convert(struct impl *this)
 	out->conv.quantize = calc_width(&dst_info) * 8;
 	out->conv.src_fmt = src_info.info.raw.format;
 	out->conv.dst_fmt = dst_info.info.raw.format;
+	out->conv.rate = dst_info.info.raw.rate;
 	out->conv.n_channels = dst_info.info.raw.channels;
 	out->conv.cpu_flags = this->cpu_flags;
 
 	if ((res = convert_init(&out->conv)) < 0)
 		return res;
 
-	spa_log_debug(this->log, "%p: got converter features %08x:%08x quant:%d:%d passthrough:%d %s", this,
-			this->cpu_flags, out->conv.cpu_flags,
+	spa_log_debug(this->log, "%p: got converter features %08x:%08x quant:%d:%d:%d passthrough:%d %s", this,
+			this->cpu_flags, out->conv.cpu_flags, out->conv.method,
 			out->conv.quantize, out->conv.noise,
 			out->conv.is_passthrough, out->conv.func_name);
 
diff --git a/spa/plugins/audioconvert/fmt-ops-avx2.c b/spa/plugins/audioconvert/fmt-ops-avx2.c
index 5c9ea6793..0ced69274 100644
--- a/spa/plugins/audioconvert/fmt-ops-avx2.c
+++ b/spa/plugins/audioconvert/fmt-ops-avx2.c
@@ -550,7 +550,7 @@ conv_f32d_to_s32_1s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 	__m128 in[1];
 	__m128i out[4];
 	__m128 scale = _mm_set1_ps(S32_SCALE);
-	__m128 int_min = _mm_set1_ps(S32_MIN);
+	__m128 int_max = _mm_set1_ps(S32_MAX);
 
 	if (SPA_IS_ALIGNED(s0, 16))
 		unrolled = n_samples & ~3;
@@ -559,7 +559,7 @@ conv_f32d_to_s32_1s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 
 	for(n = 0; n < unrolled; n += 4) {
 		in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), scale);
-		in[0] = _mm_min_ps(in[0], int_min);
+		in[0] = _mm_min_ps(in[0], int_max);
 		out[0] = _mm_cvtps_epi32(in[0]);
 		out[1] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(0, 3, 2, 1));
 		out[2] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(1, 0, 3, 2));
@@ -574,7 +574,7 @@ conv_f32d_to_s32_1s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 	for(; n < n_samples; n++) {
 		in[0] = _mm_load_ss(&s0[n]);
 		in[0] = _mm_mul_ss(in[0], scale);
-		in[0] = _mm_min_ss(in[0], int_min);
+		in[0] = _mm_min_ss(in[0], int_max);
 		*d = _mm_cvtss_si32(in[0]);
 		d += n_channels;
 	}
@@ -590,7 +590,7 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 	__m256 in[2];
 	__m256i out[2], t[2];
 	__m256 scale = _mm256_set1_ps(S32_SCALE);
-	__m256 int_min = _mm256_set1_ps(S32_MIN);
+	__m256 int_max = _mm256_set1_ps(S32_MAX);
 
 	if (SPA_IS_ALIGNED(s0, 32) &&
 	    SPA_IS_ALIGNED(s1, 32))
@@ -602,8 +602,8 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		in[0] = _mm256_mul_ps(_mm256_load_ps(&s0[n]), scale);
 		in[1] = _mm256_mul_ps(_mm256_load_ps(&s1[n]), scale);
 
-		in[0] = _mm256_min_ps(in[0], int_min);
-		in[1] = _mm256_min_ps(in[1], int_min);
+		in[0] = _mm256_min_ps(in[0], int_max);
+		in[1] = _mm256_min_ps(in[1], int_max);
 
 		out[0] = _mm256_cvtps_epi32(in[0]);	/* a0 a1 a2 a3 a4 a5 a6 a7 */
 		out[1] = _mm256_cvtps_epi32(in[1]);	/* b0 b1 b2 b3 b4 b5 b6 b7 */
@@ -636,7 +636,7 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		__m128 in[2];
 		__m128i out[2];
 		__m128 scale = _mm_set1_ps(S32_SCALE);
-		__m128 int_min = _mm_set1_ps(S32_MIN);
+		__m128 int_max = _mm_set1_ps(S32_MAX);
 
 		in[0] = _mm_load_ss(&s0[n]);
 		in[1] = _mm_load_ss(&s1[n]);
@@ -644,7 +644,7 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		in[0] = _mm_unpacklo_ps(in[0], in[1]);
 
 		in[0] = _mm_mul_ps(in[0], scale);
-		in[0] = _mm_min_ps(in[0], int_min);
+		in[0] = _mm_min_ps(in[0], int_max);
 		out[0] = _mm_cvtps_epi32(in[0]);
 		_mm_storel_epi64((__m128i*)d, out[0]);
 		d += n_channels;
@@ -661,7 +661,7 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 	__m256 in[4];
 	__m256i out[4], t[4];
 	__m256 scale = _mm256_set1_ps(S32_SCALE);
-	__m256 int_min = _mm256_set1_ps(S32_MIN);
+	__m256 int_max = _mm256_set1_ps(S32_MAX);
 
 	if (SPA_IS_ALIGNED(s0, 32) &&
 	    SPA_IS_ALIGNED(s1, 32) &&
@@ -677,10 +677,10 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		in[2] = _mm256_mul_ps(_mm256_load_ps(&s2[n]), scale);
 		in[3] = _mm256_mul_ps(_mm256_load_ps(&s3[n]), scale);
 
-		in[0] = _mm256_min_ps(in[0], int_min);
-		in[1] = _mm256_min_ps(in[1], int_min);
-		in[2] = _mm256_min_ps(in[2], int_min);
-		in[3] = _mm256_min_ps(in[3], int_min);
+		in[0] = _mm256_min_ps(in[0], int_max);
+		in[1] = _mm256_min_ps(in[1], int_max);
+		in[2] = _mm256_min_ps(in[2], int_max);
+		in[3] = _mm256_min_ps(in[3], int_max);
 
 		out[0] = _mm256_cvtps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */
 		out[1] = _mm256_cvtps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */
@@ -711,7 +711,7 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		__m128 in[4];
 		__m128i out[4];
 		__m128 scale = _mm_set1_ps(S32_SCALE);
-		__m128 int_min = _mm_set1_ps(S32_MIN);
+		__m128 int_max = _mm_set1_ps(S32_MAX);
 
 		in[0] = _mm_load_ss(&s0[n]);
 		in[1] = _mm_load_ss(&s1[n]);
@@ -723,7 +723,7 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		in[0] = _mm_unpacklo_ps(in[0], in[1]);
 
 		in[0] = _mm_mul_ps(in[0], scale);
-		in[0] = _mm_min_ps(in[0], int_min);
+		in[0] = _mm_min_ps(in[0], int_max);
 		out[0] = _mm_cvtps_epi32(in[0]);
 		_mm_storeu_si128((__m128i*)d, out[0]);
 		d += n_channels;
diff --git a/spa/plugins/audioconvert/fmt-ops-c.c b/spa/plugins/audioconvert/fmt-ops-c.c
index f4d762c57..44ffb30c4 100644
--- a/spa/plugins/audioconvert/fmt-ops-c.c
+++ b/spa/plugins/audioconvert/fmt-ops-c.c
@@ -737,23 +737,24 @@ conv_f64d_to_f32_c(struct convert *conv, void * SPA_RESTRICT dst[], const void *
 }
 
 /* 32 bit xorshift PRNG, see https://en.wikipedia.org/wiki/Xorshift */
-static inline uint32_t
+static inline int32_t
 xorshift(uint32_t *state)
 {
   uint32_t x = *state;
   x ^= x << 13;
   x ^= x >> 17;
   x ^= x << 5;
-  return (*state = x);
+  return (int32_t)(*state = x);
 }
 
 static inline void update_dither_c(struct convert *conv, uint32_t n_samples)
 {
-	uint32_t n, mask = conv->mask;
-	int32_t offset = conv->offset + conv->bias;
+	uint32_t n;
+	float *dither = conv->dither, scale = conv->scale;
+	uint32_t *state = &conv->random[0];
 
 	for (n = 0; n < n_samples; n++)
-		conv->dither[n] = offset + (int32_t)(xorshift(&conv->random[0]) & mask);
+		dither[n] = xorshift(state) * scale;
 }
 
 void
@@ -771,6 +772,27 @@ conv_f32d_to_u8d_c(struct convert *conv, void * SPA_RESTRICT dst[], const void *
 	}
 }
 
+void
+conv_f32d_to_u8d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
+		uint32_t n_samples)
+{
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
+
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
+
+	for (i = 0; i < n_channels; i++) {
+		const float *s = src[i];
+		uint8_t *d = dst[i];
+
+		for (j = 0; j < n_samples;) {
+			chunk = SPA_MIN(n_samples - j, dither_size);
+			for (k = 0; k < chunk; k++, j++)
+				d[j] = F32_TO_U8_D(s[j], dither[k]);
+		}
+	}
+}
+
 void
 conv_f32_to_u8_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
@@ -813,6 +835,26 @@ conv_f32d_to_u8_c(struct convert *conv, void * SPA_RESTRICT dst[], const void *
 	}
 }
 
+void
+conv_f32d_to_u8_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
+		uint32_t n_samples)
+{
+	const float **s = (const float **) src;
+	uint8_t *d = dst[0];
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
+
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
+
+	for (j = 0; j < n_samples;) {
+		chunk = SPA_MIN(n_samples - j, dither_size);
+		for (k = 0; k < chunk; k++, j++) {
+			for (i = 0; i < n_channels; i++)
+				*d++ = F32_TO_U8_D(s[i][j], dither[k]);
+		}
+	}
+}
+
 void
 conv_f32d_to_s8d_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
@@ -828,6 +870,27 @@ conv_f32d_to_s8d_c(struct convert *conv, void * SPA_RESTRICT dst[], const void *
 	}
 }
 
+void
+conv_f32d_to_s8d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
+		uint32_t n_samples)
+{
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
+
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
+
+	for (i = 0; i < n_channels; i++) {
+		const float *s = src[i];
+		int8_t *d = dst[i];
+
+		for (j = 0; j < n_samples;) {
+			chunk = SPA_MIN(n_samples - j, dither_size);
+			for (k = 0; k < chunk; k++, j++)
+				d[j] = F32_TO_S8_D(s[j], dither[k]);
+		}
+	}
+}
+
 void
 conv_f32_to_s8_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
@@ -870,6 +933,26 @@ conv_f32d_to_s8_c(struct convert *conv, void * SPA_RESTRICT dst[], const void *
 	}
 }
 
+void
+conv_f32d_to_s8_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
+		uint32_t n_samples)
+{
+	const float **s = (const float **) src;
+	int8_t *d = dst[0];
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
+
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
+
+	for (j = 0; j < n_samples;) {
+		chunk = SPA_MIN(n_samples - j, dither_size);
+		for (k = 0; k < chunk; k++, j++) {
+			for (i = 0; i < n_channels; i++)
+				*d++ = F32_TO_S8_D(s[i][j], dither[k]);
+		}
+	}
+}
+
 void
 conv_f32d_to_alaw_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
@@ -944,21 +1027,19 @@ void
 conv_f32d_to_s16d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
 {
-	uint32_t i, j, k, chunk, n_channels = conv->n_channels;
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
 
-	update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size));
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
 
 	for (i = 0; i < n_channels; i++) {
 		const float *s = src[i];
 		int16_t *d = dst[i];
-		int32_t v;
 
 		for (j = 0; j < n_samples;) {
-			chunk = SPA_MIN(n_samples - j, conv->dither_size);
-			for (k = 0; k < chunk; k++, j++) {
-				v = F32_TO_S24(s[j]) + conv->dither[k];
-				d[j] = v >> 8;
-			}
+			chunk = SPA_MIN(n_samples - j, dither_size);
+			for (k = 0; k < chunk; k++, j++)
+				d[j] = F32_TO_S16_D(s[j], dither[k]);
 		}
 	}
 }
@@ -1011,18 +1092,16 @@ conv_f32d_to_s16_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const
 {
 	const float **s = (const float **) src;
 	int16_t *d = dst[0];
-	uint32_t i, j, k, chunk, n_channels = conv->n_channels;
-	int32_t v;
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
 
-	update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size));
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
 
 	for (j = 0; j < n_samples;) {
-		chunk = SPA_MIN(n_samples - j, conv->dither_size);
+		chunk = SPA_MIN(n_samples - j, dither_size);
 		for (k = 0; k < chunk; k++, j++) {
-			for (i = 0; i < n_channels; i++) {
-				v = F32_TO_S24(s[i][j]) + conv->dither[k];
-				*d++ = v >> 8;
-			}
+			for (i = 0; i < n_channels; i++)
+				*d++ = F32_TO_S16_D(s[i][j], dither[k]);
 		}
 	}
 }
@@ -1046,19 +1125,17 @@ conv_f32d_to_s16s_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], cons
 		uint32_t n_samples)
 {
 	const float **s = (const float **) src;
-	int16_t *d = dst[0];
-	uint32_t i, j, k, chunk, n_channels = conv->n_channels;
-	int32_t v;
+	uint16_t *d = dst[0];
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
 
-	update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size));
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
 
 	for (j = 0; j < n_samples;) {
-		chunk = SPA_MIN(n_samples - j, conv->dither_size);
+		chunk = SPA_MIN(n_samples - j, dither_size);
 		for (k = 0; k < chunk; k++, j++) {
-			for (i = 0; i < n_channels; i++) {
-				v = F32_TO_S24(s[i][j]) + conv->dither[k];
-				*d++ = bswap_16(v >> 8);
-			}
+			for (i = 0; i < n_channels; i++)
+				*d++ = F32_TO_S16S_D(s[i][j], dither[k]);
 		}
 	}
 }
@@ -1110,18 +1187,19 @@ void
 conv_f32d_to_s32d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
 {
-	uint32_t i, j, k, chunk, n_channels = conv->n_channels;
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
 
-	update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size));
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
 
 	for (i = 0; i < n_channels; i++) {
 		const float *s = src[i];
 		int32_t *d = dst[i];
 
 		for (j = 0; j < n_samples;) {
-			chunk = SPA_MIN(n_samples - j, conv->dither_size);
+			chunk = SPA_MIN(n_samples - j, dither_size);
 			for (k = 0; k < chunk; k++, j++)
-				d[j] = F32_TO_S32(s[j]) + conv->dither[k];
+				d[j] = F32_TO_S32_D(s[j], dither[k]);
 		}
 	}
 }
@@ -1174,15 +1252,16 @@ conv_f32d_to_s32_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const
 {
 	const float **s = (const float **) src;
 	int32_t *d = dst[0];
-	uint32_t i, j, k, chunk, n_channels = conv->n_channels;
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
 
-	update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size));
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
 
 	for (j = 0; j < n_samples;) {
-		chunk = SPA_MIN(n_samples - j, conv->dither_size);
+		chunk = SPA_MIN(n_samples - j, dither_size);
 		for (k = 0; k < chunk; k++, j++) {
 			for (i = 0; i < n_channels; i++)
-				*d++ = F32_TO_S32(s[i][j]) + conv->dither[k];
+				*d++ = F32_TO_S32_D(s[i][j], dither[k]);
 		}
 	}
 }
@@ -1192,7 +1271,7 @@ conv_f32d_to_s32s_c(struct convert *conv, void * SPA_RESTRICT dst[], const void
 		uint32_t n_samples)
 {
 	const float **s = (const float **) src;
-	int32_t *d = dst[0];
+	uint32_t *d = dst[0];
 	uint32_t i, j, n_channels = conv->n_channels;
 
 	for (j = 0; j < n_samples; j++) {
@@ -1206,18 +1285,17 @@ conv_f32d_to_s32s_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], cons
 		uint32_t n_samples)
 {
 	const float **s = (const float **) src;
-	int32_t *d = dst[0], v;
-	uint32_t i, j, k, chunk, n_channels = conv->n_channels;
+	uint32_t *d = dst[0];
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
 
-	update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size));
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
 
 	for (j = 0; j < n_samples;) {
-		chunk = SPA_MIN(n_samples - j, conv->dither_size);
+		chunk = SPA_MIN(n_samples - j, dither_size);
 		for (k = 0; k < chunk; k++, j++) {
-			for (i = 0; i < n_channels; i++) {
-				v = F32_TO_S32(s[i][j]) + conv->dither[k];
-				*d++ = bswap_32(v);
-			}
+			for (i = 0; i < n_channels; i++)
+				*d++ = F32_TO_S32S_D(s[i][j], dither[k]);
 		}
 	}
 }
@@ -1346,20 +1424,19 @@ void
 conv_f32d_to_s24d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
 {
-	uint32_t i, j, k, chunk, n_channels = conv->n_channels;
-	int32_t v;
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
 
-	update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size));
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
 
 	for (i = 0; i < n_channels; i++) {
 		const float *s = src[i];
 		uint8_t *d = dst[i];
 
 		for (j = 0; j < n_samples;) {
-			chunk = SPA_MIN(n_samples - j, conv->dither_size);
+			chunk = SPA_MIN(n_samples - j, dither_size);
 			for (k = 0; k < chunk; k++, j++) {
-				v = F32_TO_S24(s[j]) + conv->dither[k];
-				write_s24(d, v);
+				write_s24(d, F32_TO_S24_D(s[j], dither[k]));
 				d += 3;
 			}
 		}
@@ -1419,24 +1496,22 @@ conv_f32d_to_s24_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const
 {
 	const float **s = (const float **) src;
 	uint8_t *d = dst[0];
-	uint32_t i, j, k, chunk, n_channels = conv->n_channels;
-	int32_t v;
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
 
-	update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size));
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
 
 	for (j = 0; j < n_samples;) {
-		chunk = SPA_MIN(n_samples - j, conv->dither_size);
+		chunk = SPA_MIN(n_samples - j, dither_size);
 		for (k = 0; k < chunk; k++, j++) {
 			for (i = 0; i < n_channels; i++) {
-				v = F32_TO_S24(s[i][j]) + conv->dither[k];
-				write_s24(d, v);
+				write_s24(d, F32_TO_S24_D(s[i][j], dither[k]));
 				d += 3;
 			}
 		}
 	}
 }
 
-
 void
 conv_f32d_to_s24s_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
@@ -1459,17 +1534,16 @@ conv_f32d_to_s24s_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], cons
 {
 	const float **s = (const float **) src;
 	uint8_t *d = dst[0];
-	uint32_t i, j, k, chunk, n_channels = conv->n_channels;
-	int32_t v;
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
 
-	update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size));
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
 
 	for (j = 0; j < n_samples;) {
-		chunk = SPA_MIN(n_samples - j, conv->dither_size);
+		chunk = SPA_MIN(n_samples - j, dither_size);
 		for (k = 0; k < chunk; k++, j++) {
 			for (i = 0; i < n_channels; i++) {
-				v = F32_TO_S24(s[i][j]) + conv->dither[k];
-				write_s24s(d, v);
+				write_s24s(d, F32_TO_S24_D(s[i][j], dither[k]));
 				d += 3;
 			}
 		}
@@ -1495,18 +1569,19 @@ void
 conv_f32d_to_s24_32d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
 {
-	uint32_t i, j, k, chunk, n_channels = conv->n_channels;
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
 
-	update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size));
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
 
 	for (i = 0; i < n_channels; i++) {
 		const float *s = src[i];
 		int32_t *d = dst[i];
 
 		for (j = 0; j < n_samples;) {
-			chunk = SPA_MIN(n_samples - j, conv->dither_size);
+			chunk = SPA_MIN(n_samples - j, dither_size);
 			for (k = 0; k < chunk; k++, j++)
-				d[j] = F32_TO_S24_32(s[j]) + conv->dither[k];
+				d[j] = F32_TO_S24_32_D(s[j], dither[k]);
 		}
 	}
 }
@@ -1587,15 +1662,16 @@ conv_f32d_to_s24_32_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], co
 {
 	const float **s = (const float **) src;
 	int32_t *d = dst[0];
-	uint32_t i, j, k, chunk, n_channels = conv->n_channels;
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
 
-	update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size));
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
 
 	for (j = 0; j < n_samples;) {
-		chunk = SPA_MIN(n_samples - j, conv->dither_size);
+		chunk = SPA_MIN(n_samples - j, dither_size);
 		for (k = 0; k < chunk; k++, j++) {
 			for (i = 0; i < n_channels; i++)
-				*d++ = F32_TO_S24_32(s[i][j]) + conv->dither[k];
+				*d++ = F32_TO_S24_32_D(s[i][j], dither[k]);
 		}
 	}
 }
@@ -1619,18 +1695,17 @@ conv_f32d_to_s24_32s_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], c
 		uint32_t n_samples)
 {
 	const float **s = (const float **) src;
-	int32_t *d = dst[0], v;
-	uint32_t i, j, k, chunk, n_channels = conv->n_channels;
+	int32_t *d = dst[0];
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
 
-	update_dither_c(conv, SPA_MIN(n_samples, conv->dither_size));
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
 
 	for (j = 0; j < n_samples;) {
-		chunk = SPA_MIN(n_samples - j, conv->dither_size);
+		chunk = SPA_MIN(n_samples - j, dither_size);
 		for (k = 0; k < chunk; k++, j++) {
-			for (i = 0; i < n_channels; i++) {
-				v = F32_TO_S24_32(s[i][j]) + conv->dither[k];
-				*d++ = bswap_32(v);
-			}
+			for (i = 0; i < n_channels; i++)
+				*d++ = F32_TO_S24_32S_D(s[i][j], dither[k]);
 		}
 	}
 }
diff --git a/spa/plugins/audioconvert/fmt-ops-sse2.c b/spa/plugins/audioconvert/fmt-ops-sse2.c
index 4fd13a1d4..6d811914c 100644
--- a/spa/plugins/audioconvert/fmt-ops-sse2.c
+++ b/spa/plugins/audioconvert/fmt-ops-sse2.c
@@ -385,7 +385,7 @@ conv_f32d_to_s32_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 	__m128 in[1];
 	__m128i out[4];
 	__m128 scale = _mm_set1_ps(S32_SCALE);
-	__m128 int_min = _mm_set1_ps(S32_MIN);
+	__m128 int_max = _mm_set1_ps(S32_MAX);
 
 	if (SPA_IS_ALIGNED(s0, 16))
 		unrolled = n_samples & ~3;
@@ -394,7 +394,7 @@ conv_f32d_to_s32_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 
 	for(n = 0; n < unrolled; n += 4) {
 		in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), scale);
-		in[0] = _mm_min_ps(in[0], int_min);
+		in[0] = _mm_min_ps(in[0], int_max);
 		out[0] = _mm_cvtps_epi32(in[0]);
 		out[1] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(0, 3, 2, 1));
 		out[2] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(1, 0, 3, 2));
@@ -409,7 +409,7 @@ conv_f32d_to_s32_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 	for(; n < n_samples; n++) {
 		in[0] = _mm_load_ss(&s0[n]);
 		in[0] = _mm_mul_ss(in[0], scale);
-		in[0] = _mm_min_ss(in[0], int_min);
+		in[0] = _mm_min_ss(in[0], int_max);
 		*d = _mm_cvtss_si32(in[0]);
 		d += n_channels;
 	}
@@ -425,7 +425,7 @@ conv_f32d_to_s32_2s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 	__m128 in[2];
 	__m128i out[2], t[2];
 	__m128 scale = _mm_set1_ps(S32_SCALE);
-	__m128 int_min = _mm_set1_ps(S32_MIN);
+	__m128 int_max = _mm_set1_ps(S32_MAX);
 
 	if (SPA_IS_ALIGNED(s0, 16) &&
 	    SPA_IS_ALIGNED(s1, 16))
@@ -437,8 +437,8 @@ conv_f32d_to_s32_2s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), scale);
 		in[1] = _mm_mul_ps(_mm_load_ps(&s1[n]), scale);
 
-		in[0] = _mm_min_ps(in[0], int_min);
-		in[1] = _mm_min_ps(in[1], int_min);
+		in[0] = _mm_min_ps(in[0], int_max);
+		in[1] = _mm_min_ps(in[1], int_max);
 
 		out[0] = _mm_cvtps_epi32(in[0]);
 		out[1] = _mm_cvtps_epi32(in[1]);
@@ -459,7 +459,7 @@ conv_f32d_to_s32_2s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		in[0] = _mm_unpacklo_ps(in[0], in[1]);
 
 		in[0] = _mm_mul_ps(in[0], scale);
-		in[0] = _mm_min_ps(in[0], int_min);
+		in[0] = _mm_min_ps(in[0], int_max);
 		out[0] = _mm_cvtps_epi32(in[0]);
 		_mm_storel_epi64((__m128i*)d, out[0]);
 		d += n_channels;
@@ -476,7 +476,7 @@ conv_f32d_to_s32_4s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 	__m128 in[4];
 	__m128i out[4];
 	__m128 scale = _mm_set1_ps(S32_SCALE);
-	__m128 int_min = _mm_set1_ps(S32_MIN);
+	__m128 int_max = _mm_set1_ps(S32_MAX);
 
 	if (SPA_IS_ALIGNED(s0, 16) &&
 	    SPA_IS_ALIGNED(s1, 16) &&
@@ -492,10 +492,10 @@ conv_f32d_to_s32_4s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		in[2] = _mm_mul_ps(_mm_load_ps(&s2[n]), scale);
 		in[3] = _mm_mul_ps(_mm_load_ps(&s3[n]), scale);
 
-		in[0] = _mm_min_ps(in[0], int_min);
-		in[1] = _mm_min_ps(in[1], int_min);
-		in[2] = _mm_min_ps(in[2], int_min);
-		in[3] = _mm_min_ps(in[3], int_min);
+		in[0] = _mm_min_ps(in[0], int_max);
+		in[1] = _mm_min_ps(in[1], int_max);
+		in[2] = _mm_min_ps(in[2], int_max);
+		in[3] = _mm_min_ps(in[3], int_max);
 
 		_MM_TRANSPOSE4_PS(in[0], in[1], in[2], in[3]);
 
@@ -521,7 +521,7 @@ conv_f32d_to_s32_4s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		in[0] = _mm_unpacklo_ps(in[0], in[1]);
 
 		in[0] = _mm_mul_ps(in[0], scale);
-		in[0] = _mm_min_ps(in[0], int_min);
+		in[0] = _mm_min_ps(in[0], int_max);
 		out[0] = _mm_cvtps_epi32(in[0]);
 		_mm_storeu_si128((__m128i*)d, out[0]);
 		d += n_channels;
@@ -543,6 +543,92 @@ conv_f32d_to_s32_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const voi
 		conv_f32d_to_s32_1s_sse2(conv, &d[i], &src[i], n_channels, n_samples);
 }
 
+static inline void update_dither_sse2(struct convert *conv, uint32_t n_samples)
+{
+	uint32_t n;
+	const uint32_t *r = SPA_PTR_ALIGN(conv->random, 16, uint32_t);
+	float *dither = SPA_PTR_ALIGN(conv->dither, 16, float);
+	__m128 scale = _mm_set1_ps(conv->scale), out[1];
+	__m128i in[1], t[1];
+
+	for (n = 0; n < n_samples; n += 4) {
+		/* 32 bit xorshift PRNG, see https://en.wikipedia.org/wiki/Xorshift */
+		in[0] = _mm_load_si128((__m128i*)r);
+		t[0] = _mm_slli_epi32(in[0], 13);
+		in[0] = _mm_xor_si128(in[0], t[0]);
+		t[0] = _mm_srli_epi32(in[0], 17);
+		in[0] = _mm_xor_si128(in[0], t[0]);
+		t[0] = _mm_slli_epi32(in[0], 5);
+		in[0] = _mm_xor_si128(in[0], t[0]);
+		_mm_store_si128((__m128i*)r, in[0]);
+
+		out[0] = _mm_cvtepi32_ps(in[0]);
+		out[0] = _mm_mul_ps(out[0], scale);
+		_mm_store_ps(&dither[n], out[0]);
+	}
+}
+
+static void
+conv_f32d_to_s32_1s_dither_sse2(struct convert *conv, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src,
+		uint32_t n_channels, uint32_t n_samples)
+{
+	const float *s = src;
+	float *dither = SPA_PTR_ALIGN(conv->dither, 16, float);
+	int32_t *d = dst;
+	uint32_t n, unrolled;
+	__m128 in[1];
+	__m128i out[4];
+	__m128 scale = _mm_set1_ps(S32_SCALE);
+	__m128 int_max = _mm_set1_ps(S32_MAX);
+
+	if (SPA_IS_ALIGNED(s, 16))
+		unrolled = n_samples & ~3;
+	else
+		unrolled = 0;
+
+	for(n = 0; n < unrolled; n += 4) {
+		in[0] = _mm_mul_ps(_mm_load_ps(&s[n]), scale);
+		in[0] = _mm_add_ps(in[0], _mm_load_ps(&dither[n]));
+		in[0] = _mm_min_ps(in[0], int_max);
+		out[0] = _mm_cvtps_epi32(in[0]);
+		out[1] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(0, 3, 2, 1));
+		out[2] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(1, 0, 3, 2));
+		out[3] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(2, 1, 0, 3));
+
+		d[0*n_channels] = _mm_cvtsi128_si32(out[0]);
+		d[1*n_channels] = _mm_cvtsi128_si32(out[1]);
+		d[2*n_channels] = _mm_cvtsi128_si32(out[2]);
+		d[3*n_channels] = _mm_cvtsi128_si32(out[3]);
+		d += 4*n_channels;
+	}
+	for(; n < n_samples; n++) {
+		in[0] = _mm_load_ss(&s[n]);
+		in[0] = _mm_mul_ss(in[0], scale);
+		in[0] = _mm_add_ss(in[0], _mm_load_ss(&dither[n]));
+		in[0] = _mm_min_ss(in[0], int_max);
+		*d = _mm_cvtss_si32(in[0]);
+		d += n_channels;
+	}
+}
+
+void
+conv_f32d_to_s32_dither_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
+		uint32_t n_samples)
+{
+	int32_t *d = dst[0];
+	uint32_t i, k, chunk, n_channels = conv->n_channels;
+
+	update_dither_sse2(conv, SPA_MIN(n_samples, conv->dither_size));
+
+	for(i = 0; i < n_channels; i++) {
+		const float *s = src[i];
+		for(k = 0; k < n_samples; k += chunk) {
+			chunk = SPA_MIN(n_samples - k, conv->dither_size);
+			conv_f32d_to_s32_1s_dither_sse2(conv, &d[i + k*n_channels], &s[k], n_channels, chunk);
+		}
+	}
+}
+
 static void
 conv_interleave_32_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[],
 		uint32_t n_channels, uint32_t n_samples)
diff --git a/spa/plugins/audioconvert/fmt-ops.c b/spa/plugins/audioconvert/fmt-ops.c
index ccd91f2d3..7ad743f57 100644
--- a/spa/plugins/audioconvert/fmt-ops.c
+++ b/spa/plugins/audioconvert/fmt-ops.c
@@ -170,13 +170,17 @@ static struct conv_info conv_table[] =
 
 	/* from f32 */
 	MAKE(F32, U8, 0, conv_f32_to_u8_c),
+	MAKE(F32P, U8P, 0, conv_f32d_to_u8d_dither_c, 0, CONV_DITHER),
 	MAKE(F32P, U8P, 0, conv_f32d_to_u8d_c),
 	MAKE(F32, U8P, 0, conv_f32_to_u8d_c),
+	MAKE(F32P, U8, 0, conv_f32d_to_u8_dither_c, 0, CONV_DITHER),
 	MAKE(F32P, U8, 0, conv_f32d_to_u8_c),
 
 	MAKE(F32, S8, 0, conv_f32_to_s8_c),
+	MAKE(F32P, S8P, 0, conv_f32d_to_s8d_dither_c, 0, CONV_DITHER),
 	MAKE(F32P, S8P, 0, conv_f32d_to_s8d_c),
 	MAKE(F32, S8P, 0, conv_f32_to_s8d_c),
+	MAKE(F32P, S8, 0, conv_f32d_to_s8_dither_c, 0, CONV_DITHER),
 	MAKE(F32P, S8, 0, conv_f32d_to_s8_c),
 
 	MAKE(F32P, ALAW, 0, conv_f32d_to_alaw_c),
@@ -224,7 +228,11 @@ static struct conv_info conv_table[] =
 	MAKE(F32P, S32P, 0, conv_f32d_to_s32d_c),
 	MAKE(F32, S32P, 0, conv_f32_to_s32d_c),
 
+#if defined (HAVE_SSE2)
+	MAKE(F32P, S32, 0, conv_f32d_to_s32_dither_sse2, SPA_CPU_FLAG_SSE2, CONV_DITHER),
+#endif
 	MAKE(F32P, S32, 0, conv_f32d_to_s32_dither_c, 0, CONV_DITHER),
+
 #if defined (HAVE_AVX2)
 	MAKE(F32P, S32, 0, conv_f32d_to_s32_avx2, SPA_CPU_FLAG_AVX2),
 #endif
@@ -357,17 +365,38 @@ static void impl_convert_free(struct convert *conv)
 	conv->dither = NULL;
 }
 
+static bool need_dither(uint32_t format)
+{
+	switch (format) {
+	case SPA_AUDIO_FORMAT_U8:
+	case SPA_AUDIO_FORMAT_U8P:
+	case SPA_AUDIO_FORMAT_S8:
+	case SPA_AUDIO_FORMAT_S8P:
+	case SPA_AUDIO_FORMAT_ULAW:
+	case SPA_AUDIO_FORMAT_ALAW:
+	case SPA_AUDIO_FORMAT_S16P:
+	case SPA_AUDIO_FORMAT_S16:
+	case SPA_AUDIO_FORMAT_S16_OE:
+		return true;
+	}
+	return false;
+}
+
 int convert_init(struct convert *conv)
 {
 	const struct conv_info *info;
-	uint32_t i, shift, dither_flags;
+	uint32_t i, dither_flags;
 
-	shift = 24u - SPA_MIN(conv->quantize, 24u);
-	shift += conv->noise;
+	conv->scale = 1.0f / (float)(INT32_MAX >> conv->noise);
 
-	conv->mask = (1ULL << (shift + 1)) - 1;
-	conv->offset = shift < 32 ? -(1ULL << shift) : 0;
-	conv->bias = shift > 0 ? 1 << (shift - 1) : 0;
+	/* disable dither if not needed */
+	if (!need_dither(conv->dst_fmt))
+		conv->method = DITHER_METHOD_NONE;
+
+	/* don't use shaped for too low rates, it moves the noise to
+	 * audible ranges */
+	if (conv->method == DITHER_METHOD_SHAPED_5 && conv->rate < 32000)
+		conv->method = DITHER_METHOD_TRIANGULAR;
 
 	dither_flags = 0;
 	if (conv->method != DITHER_METHOD_NONE || conv->noise)
diff --git a/spa/plugins/audioconvert/fmt-ops.h b/spa/plugins/audioconvert/fmt-ops.h
index f4373a122..0cb990ded 100644
--- a/spa/plugins/audioconvert/fmt-ops.h
+++ b/spa/plugins/audioconvert/fmt-ops.h
@@ -37,72 +37,92 @@
 
 #define FMT_OPS_MAX_ALIGN	32
 
-#define U8_MIN		0
-#define U8_MAX		255
-#define U8_SCALE	127.5f
-#define U8_OFFS		128
-#define U8_TO_F32(v)	((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0)
-#define F32_TO_U8(v)	(uint8_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U8_SCALE) + U8_OFFS)
+#define U8_MIN			0u
+#define U8_MAX			255u
+#define U8_SCALE		127.5f
+#define U8_OFFS			128.f
+#define U8_TO_F32(v)		((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0)
+#define F32_TO_U8(v)		(uint8_t)SPA_CLAMP((v) * U8_SCALE + U8_OFFS, U8_MIN, U8_MAX)
+#define F32_TO_U8_D(v,d)	(uint8_t)SPA_CLAMP((v) * U8_SCALE + U8_OFFS + (d), U8_MIN, U8_MAX)
 
-#define S8_MIN		-127
-#define S8_MAX		127
-#define S8_MAX_F	127.0f
-#define S8_SCALE	127.0f
-#define S8_TO_F32(v)	(((int8_t)(v)) * (1.0f / S8_SCALE))
-#define F32_TO_S8(v)	(int8_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S8_SCALE)
+#define S8_MIN			-127
+#define S8_MAX			127
+#define S8_MAX_F		127.0f
+#define S8_SCALE		127.0f
+#define S8_TO_F32(v)		(((int8_t)(v)) * (1.0f / S8_SCALE))
+#define F32_TO_S8(v)		(int8_t)SPA_CLAMP((v) * S8_SCALE, S8_MIN, S8_MAX)
+#define F32_TO_S8_D(v,d)	(int8_t)SPA_CLAMP((v) * S8_SCALE + (d), S8_MIN, S8_MAX)
 
-#define U16_MIN		0
-#define U16_MAX		65535
-#define U16_SCALE	32767.5f
-#define U16_OFFS	32768
-#define U16_TO_F32(v)	((((uint16_t)(v)) * (1.0f / U16_OFFS)) - 1.0)
-#define U16S_TO_F32(v)	(((uint16_t)bswap_16((uint16_t)(v)) * (1.0f / U16_OFFS)) - 1.0)
-#define F32_TO_U16(v)	(uint16_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U16_SCALE) + U16_OFFS)
-#define F32_TO_U16S(v)	((uint16_t)bswap_16((uint16_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U16_SCALE) + U16_OFFS)))
+#define U16_MIN			0u
+#define U16_MAX			65535u
+#define U16_SCALE		32767.5f
+#define U16_OFFS		32768.f
+#define U16_TO_F32(v)		((((uint16_t)(v)) * (1.0f / U16_OFFS)) - 1.0)
+#define U16S_TO_F32(v)		(((uint16_t)bswap_16((uint16_t)(v)) * (1.0f / U16_OFFS)) - 1.0)
+#define F32_TO_U16(v)		(uint16_t)SPA_CLAMP((v) * U16_SCALE + U16_OFFS, U16_MIN, U16_MAX)
+#define F32_TO_U16_D(v,d)	(uint16_t)SPA_CLAMP((v) * U16_SCALE + U16_OFFS + (d), U16_MIN, U16_MAX)
+#define F32_TO_U16S(v)		bswap_16(F32_TO_U16(v))
+#define F32_TO_U16S_D(v,d)	bswap_16(F32_TO_U16_D(v,d))
 
-#define S16_MIN		-32767
-#define S16_MAX		32767
-#define S16_MAX_F	32767.0f
-#define S16_SCALE	32767.0f
-#define S16_TO_F32(v)	(((int16_t)(v)) * (1.0f / S16_SCALE))
-#define S16S_TO_F32(v)	(((int16_t)bswap_16((uint16_t)v)) * (1.0f / S16_SCALE))
-#define F32_TO_S16(v)	(int16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE)
-#define F32_TO_S16S(v)	((int16_t)bswap_16((uint16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE)))
+#define S16_MIN			-32767
+#define S16_MAX			32767
+#define S16_MAX_F		32767.0f
+#define S16_SCALE		32767.0f
+#define S16_TO_F32(v)		(((int16_t)(v)) * (1.0f / S16_SCALE))
+#define S16S_TO_F32(v)		(((int16_t)bswap_16(v)) * (1.0f / S16_SCALE))
+#define F32_TO_S16(v)		(int16_t)SPA_CLAMP((v) * S16_SCALE, S16_MIN, S16_MAX)
+#define F32_TO_S16_D(v,d)	(int16_t)SPA_CLAMP((v) * S16_SCALE + (d), S16_MIN, S16_MAX)
+#define F32_TO_S16S(v)		bswap_16(F32_TO_S16(v))
+#define F32_TO_S16S_D(v,d)	bswap_16(F32_TO_S16_D(v,d))
 
-#define U24_MIN		0
-#define U24_MAX		16777215
-#define U24_SCALE	8388607.5f
-#define U24_OFFS	8388608
-#define U24_TO_F32(v)	((((uint32_t)(v)) * (1.0f / U24_OFFS)) - 1.0)
-#define F32_TO_U24(v)	(uint32_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U24_SCALE) + U24_OFFS)
+#define U24_MIN			0u
+#define U24_MAX			16777215u
+#define U24_SCALE		8388607.5f
+#define U24_OFFS		8388608.f
+#define U24_TO_F32(v)		((((uint32_t)(v)) * (1.0f / U24_OFFS)) - 1.0)
+#define F32_TO_U24(v)		(uint32_t)SPA_CLAMP((v) * U24_SCALE + U24_OFFS, U24_MIN, U24_MAX)
+#define F32_TO_U24_D(v,d)	(uint32_t)SPA_CLAMP((v) * U24_SCALE + U24_OFFS + (d), U24_MIN, U24_MAX)
 
-#define S24_MIN		-8388607
-#define S24_MAX		8388607
-#define S24_MAX_F	8388607.0f
-#define S24_SCALE	8388607.0f
-#define S24_TO_F32(v)	(((int32_t)(v)) * (1.0f / S24_SCALE))
-#define F32_TO_S24(v)	(int32_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S24_SCALE)
+#define S24_MIN			-8388607
+#define S24_MAX			8388607
+#define S24_MAX_F		8388607.0f
+#define S24_SCALE		8388607.0f
+#define S24_TO_F32(v)		(((int32_t)(v)) * (1.0f / S24_SCALE))
+#define F32_TO_S24(v)		(int32_t)SPA_CLAMP((v) * S24_SCALE, S24_MIN, S24_MAX)
+#define F32_TO_S24_D(v,d)	(int32_t)SPA_CLAMP((v) * S24_SCALE + (d), S24_MIN, S24_MAX)
 
-#define U32_TO_F32(v)	U24_TO_F32(((uint32_t)(v)) >> 8)
-#define F32_TO_U32(v)	(F32_TO_U24(v) << 8)
+#define U32_MIN			0u
+#define U32_MAX			4294967040u
+#define U32_SCALE		2147483520.f
+#define U32_OFFS		2147483520.f
+#define U32_TO_F32(v)		((((uint32_t)(v)) * (1.0f / U32_OFFS)) - 1.0)
+#define F32_TO_U32(v)		(uint32_t)SPA_CLAMP((v) * U32_SCALE + U32_OFFS, U32_MIN, U32_MAX)
+#define F32_TO_U32_D(v,d)	(uint32_t)SPA_CLAMP((v) * U32_SCALE + U32_OFFS + (d), U32_MIN, U32_MAX)
 
-#define S32_SCALE	2147483648.0f
-#define S32_MIN		2147483520.0f
-
-#define S32_TO_F32(v)	S24_TO_F32(((int32_t)(v)) >> 8)
-#define S32S_TO_F32(v)	S24_TO_F32(((int32_t)bswap_32(v)) >> 8)
-#define F32_TO_S32(v)	(F32_TO_S24(v) << 8)
-#define F32_TO_S32S(v)	bswap_32((F32_TO_S24(v) << 8))
+#define S32_MIN			-2147483520
+#define S32_MAX			2147483520
+#define S32_MAX_F		2147483520.f
+#define S32_SCALE		2147483648.f
+#define S32_TO_F32(v)		(((int32_t)(v)) * (1.0f / S32_SCALE))
+#define S32S_TO_F32(v)		(((int32_t)bswap_32(v)) * (1.0f / S32_SCALE))
+#define F32_TO_S32(v)		(int32_t)SPA_CLAMP((v) * S32_SCALE, S32_MIN, S32_MAX)
+#define F32_TO_S32_D(v,d)	(int32_t)SPA_CLAMP((v) * S32_SCALE + (d), S32_MIN, S32_MAX)
+#define F32_TO_S32S(v)		bswap_32(F32_TO_S32(v))
+#define F32_TO_S32S_D(v,d)	bswap_32(F32_TO_S32_D(v,d))
 
 #define U24_32_TO_F32(v)	U32_TO_F32((v)<<8)
 #define U24_32S_TO_F32(v)	U32_TO_F32(((int32_t)bswap_32(v))<<8)
 #define F32_TO_U24_32(v)	F32_TO_U24(v)
 #define F32_TO_U24_32S(v)	bswap_32(F32_TO_U24(v))
+#define F32_TO_U24_32_D(v,d)	F32_TO_U24_D(v,d)
+#define F32_TO_U24_32S_D(v,d)	bswap_32(F32_TO_U24_D(v,d))
 
 #define S24_32_TO_F32(v)	S32_TO_F32((v)<<8)
 #define S24_32S_TO_F32(v)	S32_TO_F32(((int32_t)bswap_32(v))<<8)
 #define F32_TO_S24_32(v)	F32_TO_S24(v)
 #define F32_TO_S24_32S(v)	bswap_32(F32_TO_S24(v))
+#define F32_TO_S24_32_D(v,d)	F32_TO_S24_D(v,d)
+#define F32_TO_S24_32S_D(v,d)	bswap_32(F32_TO_S24_D(v,d))
 
 static inline uint32_t read_u24(const void *src)
 {
@@ -190,16 +210,15 @@ struct convert {
 	uint32_t src_fmt;
 	uint32_t dst_fmt;
 	uint32_t n_channels;
+	uint32_t rate;
 	uint32_t cpu_flags;
 	const char *func_name;
 
 	unsigned int is_passthrough:1;
 
-	int32_t bias;
-	int32_t offset;
-	uint32_t mask;
+	float scale;
 	uint32_t random[16 + FMT_OPS_MAX_ALIGN/4];
-	int32_t *dither;
+	float *dither;
 	uint32_t dither_size;
 
 	float ns_data[MAX_NS];
@@ -295,13 +314,17 @@ DEFINE_FUNCTION(f64_to_f32d, c);
 DEFINE_FUNCTION(f64s_to_f32d, c);
 DEFINE_FUNCTION(f64d_to_f32, c);
 DEFINE_FUNCTION(f32d_to_u8d, c);
+DEFINE_FUNCTION(f32d_to_u8d_dither, c);
 DEFINE_FUNCTION(f32_to_u8, c);
 DEFINE_FUNCTION(f32_to_u8d, c);
 DEFINE_FUNCTION(f32d_to_u8, c);
+DEFINE_FUNCTION(f32d_to_u8_dither, c);
 DEFINE_FUNCTION(f32d_to_s8d, c);
+DEFINE_FUNCTION(f32d_to_s8d_dither, c);
 DEFINE_FUNCTION(f32_to_s8, c);
 DEFINE_FUNCTION(f32_to_s8d, c);
 DEFINE_FUNCTION(f32d_to_s8, c);
+DEFINE_FUNCTION(f32d_to_s8_dither, c);
 DEFINE_FUNCTION(f32d_to_alaw, c);
 DEFINE_FUNCTION(f32d_to_ulaw, c);
 DEFINE_FUNCTION(f32_to_u16, c);
@@ -375,6 +398,7 @@ DEFINE_FUNCTION(s16_to_f32d, sse2);
 DEFINE_FUNCTION(s24_to_f32d, sse2);
 DEFINE_FUNCTION(s32_to_f32d, sse2);
 DEFINE_FUNCTION(f32d_to_s32, sse2);
+DEFINE_FUNCTION(f32d_to_s32_dither, sse2);
 DEFINE_FUNCTION(f32_to_s16, sse2);
 DEFINE_FUNCTION(f32d_to_s16_2, sse2);
 DEFINE_FUNCTION(f32d_to_s16, sse2);