diff options
author | Max Kellermann <max@duempel.org> | 2009-02-18 22:27:58 +0100 |
---|---|---|
committer | Max Kellermann <max@duempel.org> | 2009-02-18 22:27:58 +0100 |
commit | 7f701744a7aa65ff4c123c293791670ae0d6b8c4 (patch) | |
tree | 1d0f616a3fb4bc89714dc0c1fea0ef13578a8d2f /src/pcm_volume.c | |
parent | 76dc9ac5f9437268bc167bc681168ef2c1f7420b (diff) | |
download | mpd-7f701744a7aa65ff4c123c293791670ae0d6b8c4.tar.gz mpd-7f701744a7aa65ff4c123c293791670ae0d6b8c4.tar.xz mpd-7f701744a7aa65ff4c123c293791670ae0d6b8c4.zip |
pcm_volume: optimized pcm_volume_change_24() on i386
Added an inline assembly function for the 64 bit multiplication.
Benchmark results on a Pentium II 266 MHz, 512 MB of 24 bit PCM data:
dd if=/dev/zero bs=64k count=8k |
time ./test/software_volume 48000:24:2 >/dev/null
Before this patch 22.94s, after this patch 7.24s.
Diffstat (limited to '')
-rw-r--r-- | src/pcm_volume.c | 41 |
1 files changed, 40 insertions, 1 deletions
diff --git a/src/pcm_volume.c b/src/pcm_volume.c index 2dff14ef5..cb1d82b00 100644 --- a/src/pcm_volume.c +++ b/src/pcm_volume.c @@ -58,16 +58,55 @@ pcm_volume_change_16(int16_t *buffer, unsigned num_samples, int volume) } } +#if __i386__ +/** + * Optimized volume function for i386. Use the EDX:EAX 2*32 bit + * multiplication result instead of emulating 64 bit multiplication. + */ +static inline int32_t +pcm_volume_sample_24(int32_t sample, int32_t volume, int32_t dither) +{ + int32_t result; + + asm(/* edx:eax = sample * volume */ + "imul %2\n" + + /* "add %3, %1\n" dithering disabled for now, because we + have no overflow check - is dithering really important + here? */ + + /* eax = edx:eax / PCM_VOLUME_1 */ + "sal $22, %%edx\n" + "shr $10, %1\n" + "or %%edx, %1\n" + + : "=a"(result) + : "0"(sample), "r"(volume) /* , "r"(dither) */ + : "edx" + ); + + return result; +} +#endif + static void pcm_volume_change_24(int32_t *buffer, unsigned num_samples, int volume) { while (num_samples > 0) { +#if __i386__ + /* assembly version for i386 */ + int32_t sample = *buffer; + + sample = pcm_volume_sample_24(sample, volume, + pcm_volume_dither()); +#else + /* portable version */ int64_t sample = *buffer; sample = (sample * volume + pcm_volume_dither() + PCM_VOLUME_1 / 2) / PCM_VOLUME_1; - +#endif *buffer++ = pcm_range(sample, 24); --num_samples; } |