From 7c6fd8a6ba548bc2a5499b2ef078f1ada023fb44 Mon Sep 17 00:00:00 2001 From: Victoria Zhislina Date: Fri, 15 Jun 2018 16:38:16 +0300 Subject: vabd(q)_u8 and vabd(q)_u16 optimization --- NEON_2_SSE.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/NEON_2_SSE.h b/NEON_2_SSE.h index 26ea519..fea109a 100644 --- a/NEON_2_SSE.h +++ b/NEON_2_SSE.h @@ -5826,24 +5826,18 @@ _NEON2SSE_INLINE int32x4_t vabdq_s32(int32x4_t a, int32x4_t b) // VABD.S32 q0,q0 uint8x16_t vabdq_u8(uint8x16_t a, uint8x16_t b); // VABD.U8 q0,q0,q0 _NEON2SSE_INLINE uint8x16_t vabdq_u8(uint8x16_t a, uint8x16_t b) //no abs for unsigned { - __m128i cmp, difab, difba; - cmp = vcgtq_u8(a,b); - difab = _mm_sub_epi8(a,b); - difba = _mm_sub_epi8 (b,a); - difab = _mm_and_si128(cmp, difab); - difba = _mm_andnot_si128(cmp, difba); + __m128i difab, difba; + difab = _mm_subs_epu8(a,b); + difba = _mm_subs_epu8 (b,a); return _mm_or_si128(difab, difba); } uint16x8_t vabdq_u16(uint16x8_t a, uint16x8_t b); // VABD.s16 q0,q0,q0 _NEON2SSE_INLINE uint16x8_t vabdq_u16(uint16x8_t a, uint16x8_t b) { - __m128i cmp, difab, difba; - cmp = vcgtq_u16(a,b); - difab = _mm_sub_epi16(a,b); - difba = _mm_sub_epi16 (b,a); - difab = _mm_and_si128(cmp, difab); - difba = _mm_andnot_si128(cmp, difba); + __m128i difab, difba; + difab = _mm_subs_epu16(a,b); + difba = _mm_subs_epu16 (b,a); return _mm_or_si128(difab, difba); } -- cgit v1.2.3