From 7f701744a7aa65ff4c123c293791670ae0d6b8c4 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Wed, 18 Feb 2009 22:27:58 +0100 Subject: pcm_volume: optimized pcm_volume_change_24() on i386 Added an inline assembly function for the 64 bit multiplication. Benchmark results on a Pentium II 266 MHz, 512 MB of 24 bit PCM data: dd if=/dev/zero bs=64k count=8k | time ./test/software_volume 48000:24:2 >/dev/null Before this patch 22.94s, after this patch 7.24s. --- src/pcm_volume.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/src/pcm_volume.c b/src/pcm_volume.c index 2dff14ef5..cb1d82b00 100644 --- a/src/pcm_volume.c +++ b/src/pcm_volume.c @@ -58,16 +58,55 @@ pcm_volume_change_16(int16_t *buffer, unsigned num_samples, int volume) } } +#if __i386__ +/** + * Optimized volume function for i386. Use the EDX:EAX 2*32 bit + * multiplication result instead of emulating 64 bit multiplication. + */ +static inline int32_t +pcm_volume_sample_24(int32_t sample, int32_t volume, int32_t dither) +{ + int32_t result; + + asm(/* edx:eax = sample * volume */ + "imul %2\n" + + /* "add %3, %1\n" dithering disabled for now, because we + have no overflow check - is dithering really important + here? */ + + /* eax = edx:eax / PCM_VOLUME_1 */ + "sal $22, %%edx\n" + "shr $10, %1\n" + "or %%edx, %1\n" + + : "=a"(result) + : "0"(sample), "r"(volume) /* , "r"(dither) */ + : "edx" + ); + + return result; +} +#endif + static void pcm_volume_change_24(int32_t *buffer, unsigned num_samples, int volume) { while (num_samples > 0) { +#if __i386__ + /* assembly version for i386 */ + int32_t sample = *buffer; + + sample = pcm_volume_sample_24(sample, volume, + pcm_volume_dither()); +#else + /* portable version */ int64_t sample = *buffer; sample = (sample * volume + pcm_volume_dither() + PCM_VOLUME_1 / 2) / PCM_VOLUME_1; - +#endif *buffer++ = pcm_range(sample, 24); --num_samples; } -- cgit v1.2.3