aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMax Kellermann <max@duempel.org>2009-02-18 22:27:58 +0100
committerMax Kellermann <max@duempel.org>2009-02-18 22:27:58 +0100
commit7f701744a7aa65ff4c123c293791670ae0d6b8c4 (patch)
tree1d0f616a3fb4bc89714dc0c1fea0ef13578a8d2f
parent76dc9ac5f9437268bc167bc681168ef2c1f7420b (diff)
downloadmpd-7f701744a7aa65ff4c123c293791670ae0d6b8c4.tar.gz
mpd-7f701744a7aa65ff4c123c293791670ae0d6b8c4.tar.xz
mpd-7f701744a7aa65ff4c123c293791670ae0d6b8c4.zip
pcm_volume: optimized pcm_volume_change_24() on i386
Added an inline assembly function for the 64 bit multiplication. Benchmark results on a Pentium II 266 MHz, 512 MB of 24 bit PCM data: dd if=/dev/zero bs=64k count=8k | time ./test/software_volume 48000:24:2 >/dev/null Before this patch 22.94s, after this patch 7.24s.
Diffstat (limited to '')
-rw-r--r--src/pcm_volume.c41
1 files changed, 40 insertions, 1 deletions
diff --git a/src/pcm_volume.c b/src/pcm_volume.c
index 2dff14ef5..cb1d82b00 100644
--- a/src/pcm_volume.c
+++ b/src/pcm_volume.c
@@ -58,16 +58,55 @@ pcm_volume_change_16(int16_t *buffer, unsigned num_samples, int volume)
}
}
+#if __i386__
+/**
+ * Optimized volume function for i386. Use the EDX:EAX 2*32 bit
+ * multiplication result instead of emulating 64 bit multiplication.
+ */
+static inline int32_t
+pcm_volume_sample_24(int32_t sample, int32_t volume, int32_t dither)
+{
+ int32_t result;
+
+ asm(/* edx:eax = sample * volume */
+ "imul %2\n"
+
+ /* "add %3, %1\n" dithering disabled for now, because we
+ have no overflow check - is dithering really important
+ here? */
+
+ /* eax = edx:eax / PCM_VOLUME_1 */
+ "sal $22, %%edx\n"
+ "shr $10, %1\n"
+ "or %%edx, %1\n"
+
+ : "=a"(result)
+ : "0"(sample), "r"(volume) /* , "r"(dither) */
+ : "edx"
+ );
+
+ return result;
+}
+#endif
+
static void
pcm_volume_change_24(int32_t *buffer, unsigned num_samples, int volume)
{
while (num_samples > 0) {
+#if __i386__
+ /* assembly version for i386 */
+ int32_t sample = *buffer;
+
+ sample = pcm_volume_sample_24(sample, volume,
+ pcm_volume_dither());
+#else
+ /* portable version */
int64_t sample = *buffer;
sample = (sample * volume + pcm_volume_dither() +
PCM_VOLUME_1 / 2)
/ PCM_VOLUME_1;
-
+#endif
*buffer++ = pcm_range(sample, 24);
--num_samples;
}