pcm/Neon: apply bit shift during float->int conversion

Avoid multiplication. This is a speedup of 20%.
author: Max Kellermann <max@duempel.org> 2014-03-16 08:57:04 +0100
committer: Max Kellermann <max@duempel.org> 2014-03-16 09:05:30 +0100
commit: 6f4775a8eeb30d0fde622f4e89979179af47d219 (patch)
tree: 4bf9185d16cecb79921e4e961c37e477cdb29977 /src
parent: c7e2f558a0608eda343ab6f4d8801204497bebf5 (diff)
download: mpd-6f4775a8eeb30d0fde622f4e89979179af47d219.tar.gz
mpd-6f4775a8eeb30d0fde622f4e89979179af47d219.tar.xz
mpd-6f4775a8eeb30d0fde622f4e89979179af47d219.zip
1 files changed, 2 insertions, 7 deletions
diff --git a/src/pcm/Neon.hxx b/src/pcm/Neon.hxx
index a6f52db42..2d3a59d2b 100644
--- a/src/pcm/Neon.hxx
+++ b/src/pcm/Neon.hxx
@@ -68,21 +68,16 @@ struct NeonFloatTo16 {
 	static constexpr size_t BLOCK_SIZE = 16;
 
 	void Convert(int16_t *dst, const float *src, const size_t n) const {
-		const float32x4_t factor =
-			vdupq_n_f32(1 << (DstTraits::BITS - 1));
-
 		for (unsigned i = 0; i < n / BLOCK_SIZE;
 		     ++i, src += BLOCK_SIZE, dst += BLOCK_SIZE) {
 			/* load 16 float samples into 4 quad
 			   registers */
 			float32x4x4_t value = vld4q_f32(src);
 
-			/* apply factor */
-			neon_x4_b(vmulq_f32, value, value, factor);
-
 			/* convert to 32 bit integer */
 			int32x4x4_t ivalue;
-			neon_x4_u(vcvtq_s32_f32, ivalue, value);
+			neon_x4_b(vcvtq_n_s32_f32, ivalue, value,
+				  DstTraits::BITS - 1);
 
 			/* convert to 16 bit integer with saturation */
 			int16x4x4_t nvalue;
author	Max Kellermann <max@duempel.org>	2014-03-16 08:57:04 +0100
committer	Max Kellermann <max@duempel.org>	2014-03-16 09:05:30 +0100
commit	6f4775a8eeb30d0fde622f4e89979179af47d219 (patch)
tree	4bf9185d16cecb79921e4e961c37e477cdb29977 /src
parent	c7e2f558a0608eda343ab6f4d8801204497bebf5 (diff)
download	mpd-6f4775a8eeb30d0fde622f4e89979179af47d219.tar.gz mpd-6f4775a8eeb30d0fde622f4e89979179af47d219.tar.xz mpd-6f4775a8eeb30d0fde622f4e89979179af47d219.zip