diff options
author | fbarchard@google.com <fbarchard@google.com> | 2014-10-16 23:46:48 +0000 |
---|---|---|
committer | fbarchard@google.com <fbarchard@google.com> | 2014-10-16 23:46:48 +0000 |
commit | 3dbaaf003281e8219ef5a12cb3cfc8e5c7e4e335 (patch) | |
tree | 725fca234a9859a64e0972cb9a8278ca4f10ef73 | |
parent | e7376886031468a1a776fe7c93aaf177000c8f56 (diff) | |
download | libyuv-3dbaaf003281e8219ef5a12cb3cfc8e5c7e4e335.tar.gz |
switch win64 intrinsics to loadu / storeu for unaligned memory.
BUG=372
TESTED=untested
R=brucedawson@google.com, harryjin@google.com
Review URL: https://webrtc-codereview.appspot.com/30729004
git-svn-id: http://libyuv.googlecode.com/svn/trunk@1124 16f28f9a-4ce2-e073-06de-1de4eb20be90
-rw-r--r-- | README.chromium | 2 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 | ||||
-rw-r--r-- | source/row_win.cc | 10 |
3 files changed, 7 insertions, 7 deletions
diff --git a/README.chromium b/README.chromium index 3cbe9fb..0cf5693 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1123 +Version: 1124 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 3949920..1f26da2 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1123 +#define LIBYUV_VERSION 1124 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_win.cc b/source/row_win.cc index e26a622..969d78c 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -89,8 +89,8 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); - xmm1 = _mm_load_si128(&xmm0); - xmm2 = _mm_load_si128(&xmm0); + xmm1 = _mm_loadu_si128(&xmm0); + xmm2 = _mm_loadu_si128(&xmm0); xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB); xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG); xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR); @@ -112,12 +112,12 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, xmm2 = _mm_packus_epi16(xmm2, xmm2); xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); xmm2 = _mm_unpacklo_epi8(xmm2, xmm5); - xmm1 = _mm_load_si128(&xmm0); + xmm1 = _mm_loadu_si128(&xmm0); xmm0 = _mm_unpacklo_epi16(xmm0, xmm2); xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); - _mm_store_si128((__m128i *)dst_argb, xmm0); - _mm_store_si128((__m128i *)(dst_argb + 16), xmm1); + _mm_storeu_si128((__m128i *)dst_argb, xmm0); + _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); y_buf += 8; u_buf += 4; |