diff options
author | fbarchard@google.com <fbarchard@google.com@16f28f9a-4ce2-e073-06de-1de4eb20be90> | 2014-05-20 19:22:30 +0000 |
---|---|---|
committer | fbarchard@google.com <fbarchard@google.com@16f28f9a-4ce2-e073-06de-1de4eb20be90> | 2014-05-20 19:22:30 +0000 |
commit | b18413e568ae742114eabb8450b180db98a83be0 (patch) | |
tree | 416664e471bbc85a971df6f10615ecdaf66d1f0d | |
parent | 8b857c0ac6f0e8cb0fc0b9410ea0d15b3afda4ab (diff) | |
download | libyuv-b18413e568ae742114eabb8450b180db98a83be0.tar.gz |
YUV scaling with 16 bit planes
BUG=331
TESTED=libyuv_unittest --gunit_also_run_disabled_tests --gtest_filter=**.ScaleFrom1280x720*
R=debargha@google.com, tpsiaki@google.com
Review URL: https://webrtc-codereview.appspot.com/17569004
git-svn-id: http://libyuv.googlecode.com/svn/trunk@1005 16f28f9a-4ce2-e073-06de-1de4eb20be90
-rw-r--r-- | README.chromium | 2 | ||||
-rw-r--r-- | include/libyuv/planar_functions.h | 5 | ||||
-rw-r--r-- | include/libyuv/row.h | 9 | ||||
-rw-r--r-- | include/libyuv/scale.h | 17 | ||||
-rw-r--r-- | include/libyuv/scale_row.h | 40 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 | ||||
-rw-r--r-- | source/planar_functions.cc | 49 | ||||
-rw-r--r-- | source/row_common.cc | 39 | ||||
-rw-r--r-- | source/scale.cc | 790 | ||||
-rw-r--r-- | source/scale_common.cc | 393 | ||||
-rw-r--r-- | unit_test/scale_test.cc | 147 |
11 files changed, 1491 insertions, 2 deletions
diff --git a/README.chromium b/README.chromium index 2ef9a54..4742220 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1004 +Version: 1005 License: BSD License File: LICENSE diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index ac516c5..d10a169 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -28,6 +28,11 @@ void CopyPlane(const uint8* src_y, int src_stride_y, uint8* dst_y, int dst_stride_y, int width, int height); +LIBYUV_API +void CopyPlane_16(const uint16* src_y, int src_stride_y, + uint16* dst_y, int dst_stride_y, + int width, int height); + // Set a plane of data to a 32 bit value. LIBYUV_API void SetPlane(uint8* dst_y, int dst_stride_y, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 356f87c..3d3fe11 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -773,6 +773,8 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count); void CopyRow_MIPS(const uint8* src, uint8* dst, int count); void CopyRow_C(const uint8* src, uint8* dst, int count); +void CopyRow_16_C(const uint16* src, uint16* dst, int count); + void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); @@ -1458,6 +1460,9 @@ void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, uint8* dst_uv, int pix); +void HalfRow_16_C(const uint16* src_uv, int src_uv_stride, + uint16* dst_uv, int pix); + void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix); void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, @@ -1639,6 +1644,10 @@ void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); +void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr, + ptrdiff_t src_stride_ptr, + int width, int source_y_fraction); + // Sobel images. void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, uint8* dst_sobelx, int width); diff --git a/include/libyuv/scale.h b/include/libyuv/scale.h index 592b8ed..a3bc07e 100644 --- a/include/libyuv/scale.h +++ b/include/libyuv/scale.h @@ -34,6 +34,12 @@ void ScalePlane(const uint8* src, int src_stride, int dst_width, int dst_height, enum FilterMode filtering); +void ScalePlane_16(const uint16* src, int src_stride, + int src_width, int src_height, + uint16* dst, int dst_stride, + int dst_width, int dst_height, + enum FilterMode filtering); + // Scales a YUV 4:2:0 image from the src width and height to the // dst width and height. // If filtering is kFilterNone, a simple nearest-neighbor algorithm is @@ -55,6 +61,17 @@ int I420Scale(const uint8* src_y, int src_stride_y, int dst_width, int dst_height, enum FilterMode filtering); +LIBYUV_API +int I420Scale_16(const uint16* src_y, int src_stride_y, + const uint16* src_u, int src_stride_u, + const uint16* src_v, int src_stride_v, + int src_width, int src_height, + uint16* dst_y, int dst_stride_y, + uint16* dst_u, int dst_stride_u, + uint16* dst_v, int dst_stride_v, + int dst_width, int dst_height, + enum FilterMode filtering); + #ifdef __cplusplus // Legacy API. Deprecated. LIBYUV_API diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index 13eccc4..8dc0762 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -70,6 +70,13 @@ void ScalePlaneVertical(int src_height, int x, int y, int dy, int bpp, enum FilterMode filtering); +void ScalePlaneVertical_16(int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint16* src_argb, uint16* dst_argb, + int x, int y, int dy, + int wpp, enum FilterMode filtering); + // Simplify the filtering based on scale factors. enum FilterMode ScaleFilterReduce(int src_width, int src_height, int dst_width, int dst_height, @@ -97,37 +104,70 @@ void ScaleSlope(int src_width, int src_height, void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); +void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width); void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); +void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width); void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); +void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width); void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); +void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width); void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); +void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width); void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); +void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width); void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* d, int dst_width); +void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* d, int dst_width); void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* d, int dst_width); +void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* d, int dst_width); void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx); +void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr, + int dst_width, int x, int dx); void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int, int); +void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr, + int dst_width, int, int); void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx); +void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr, + int dst_width, int x, int dx); void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx); +void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr, + int dst_width, int x, int dx); void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); +void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width); void ScaleRowDown38_3_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); +void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr, + ptrdiff_t src_stride, + uint16* dst_ptr, int dst_width); void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); +void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst_ptr, int dst_width); void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, uint16* dst_ptr, int src_width, int src_height); +void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint32* dst_ptr, int src_width, int src_height); void ScaleARGBRowDown2_C(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index cf2455c..2d4af9c 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1004 +#define LIBYUV_VERSION 1005 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index f0a8989..f1297ca 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -73,6 +73,55 @@ void CopyPlane(const uint8* src_y, int src_stride_y, } } +LIBYUV_API +void CopyPlane_16(const uint16* src_y, int src_stride_y, + uint16* dst_y, int dst_stride_y, + int width, int height) { + int y; + void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C; + // Coalesce rows. + if (src_stride_y == width && + dst_stride_y == width) { + width *= height; + height = 1; + src_stride_y = dst_stride_y = 0; + } +#if defined(HAS_COPYROW_16_X86) + if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { + CopyRow = CopyRow_16_X86; + } +#endif +#if defined(HAS_COPYROW_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && + IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && + IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + CopyRow = CopyRow_16_SSE2; + } +#endif +#if defined(HAS_COPYROW_16_ERMS) + if (TestCpuFlag(kCpuHasERMS)) { + CopyRow = CopyRow_16_ERMS; + } +#endif +#if defined(HAS_COPYROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { + CopyRow = CopyRow_16_NEON; + } +#endif +#if defined(HAS_COPYROW_16_MIPS) + if (TestCpuFlag(kCpuHasMIPS)) { + CopyRow = CopyRow_16_MIPS; + } +#endif + + // Copy plane + for (y = 0; y < height; ++y) { + CopyRow(src_y, dst_y, width); + src_y += src_stride_y; + dst_y += dst_stride_y; + } +} + // Copy I422. LIBYUV_API int I422Copy(const uint8* src_y, int src_stride_y, diff --git a/source/row_common.cc b/source/row_common.cc index 135bdc9..fa2b752 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1565,6 +1565,10 @@ void CopyRow_C(const uint8* src, uint8* dst, int count) { memcpy(dst, src, count); } +void CopyRow_16_C(const uint16* src, uint16* dst, int count) { + memcpy(dst, src, count * 2); +} + void SetRow_C(uint8* dst, uint32 v8, int count) { #ifdef _MSC_VER // VC will generate rep stosb. @@ -1890,6 +1894,14 @@ void HalfRow_C(const uint8* src_uv, int src_uv_stride, } } +void HalfRow_16_C(const uint16* src_uv, int src_uv_stride, + uint16* dst_uv, int pix) { + int x; + for (x = 0; x < pix; ++x) { + dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; + } +} + // C version 2x2 -> 2x1. void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, @@ -1918,6 +1930,33 @@ void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, } } +void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr, + ptrdiff_t src_stride, + int width, int source_y_fraction) { + int y1_fraction = source_y_fraction; + int y0_fraction = 256 - y1_fraction; + const uint16* src_ptr1 = src_ptr + src_stride; + int x; + if (source_y_fraction == 0) { + memcpy(dst_ptr, src_ptr, width * 2); + return; + } + if (source_y_fraction == 128) { + HalfRow_16_C(src_ptr, (int)(src_stride), dst_ptr, width); + return; + } + for (x = 0; x < width - 1; x += 2) { + dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; + dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8; + src_ptr += 2; + src_ptr1 += 2; + dst_ptr += 2; + } + if (width & 1) { + dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; + } +} + // Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix) { diff --git a/source/scale.cc b/source/scale.cc index b3893cc..5b33b5f 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -90,6 +90,62 @@ static void ScalePlaneDown2(int src_width, int src_height, } } +static void ScalePlaneDown2_16(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint16* src_ptr, uint16* dst_ptr, + enum FilterMode filtering) { + int y; + void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst_ptr, int dst_width) = + filtering == kFilterNone ? ScaleRowDown2_16_C : + (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C : + ScaleRowDown2Box_16_C); + int row_stride = src_stride << 1; + if (!filtering) { + src_ptr += src_stride; // Point to odd rows. + src_stride = 0; + } + +#if defined(HAS_SCALEROWDOWN2_16_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { + ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON : + ScaleRowDown2_16_NEON; + } +#elif defined(HAS_SCALEROWDOWN2_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { + ScaleRowDown2 = filtering == kFilterNone ? + ScaleRowDown2_Unaligned_16_SSE2 : + (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 : + ScaleRowDown2Box_Unaligned_16_SSE2); + if (IS_ALIGNED(src_ptr, 16) && + IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) && + IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { + ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 : + (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 : + ScaleRowDown2Box_16_SSE2); + } + } +#elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2) + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) && + IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && + IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { + ScaleRowDown2 = filtering ? + ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2; + } +#endif + + if (filtering == kFilterLinear) { + src_stride = 0; + } + // TODO(fbarchard): Loop through source height to allow odd height. + for (y = 0; y < dst_height; ++y) { + ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); + src_ptr += row_stride; + dst_ptr += dst_stride; + } +} + // Scale plane, 1/4 // This is an optimized version for scaling down a plane to 1/4 of // its original size. @@ -137,6 +193,51 @@ static void ScalePlaneDown4(int src_width, int src_height, } } +static void ScalePlaneDown4_16(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint16* src_ptr, uint16* dst_ptr, + enum FilterMode filtering) { + int y; + void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst_ptr, int dst_width) = + filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C; + int row_stride = src_stride << 2; + if (!filtering) { + src_ptr += src_stride * 2; // Point to row 2. + src_stride = 0; + } +#if defined(HAS_SCALEROWDOWN4_16_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { + ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON : + ScaleRowDown4_16_NEON; + } +#elif defined(HAS_SCALEROWDOWN4_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) && + IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { + ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 : + ScaleRowDown4_16_SSE2; + } +#elif defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2) + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) && + IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && + IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { + ScaleRowDown4 = filtering ? + ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2; + } +#endif + + if (filtering == kFilterLinear) { + src_stride = 0; + } + for (y = 0; y < dst_height; ++y) { + ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); + src_ptr += row_stride; + dst_ptr += dst_stride; + } +} + // Scale plane down, 3/4 static void ScalePlaneDown34(int src_width, int src_height, @@ -219,6 +320,86 @@ static void ScalePlaneDown34(int src_width, int src_height, } } +static void ScalePlaneDown34_16(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint16* src_ptr, uint16* dst_ptr, + enum FilterMode filtering) { + int y; + void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst_ptr, int dst_width); + void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst_ptr, int dst_width); + const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + assert(dst_width % 3 == 0); + if (!filtering) { + ScaleRowDown34_0 = ScaleRowDown34_16_C; + ScaleRowDown34_1 = ScaleRowDown34_16_C; + } else { + ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C; + ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C; + } +#if defined(HAS_SCALEROWDOWN34_16_NEON) + if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { + if (!filtering) { + ScaleRowDown34_0 = ScaleRowDown34_16_NEON; + ScaleRowDown34_1 = ScaleRowDown34_16_NEON; + } else { + ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON; + ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON; + } + } +#endif +#if defined(HAS_SCALEROWDOWN34_16_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && + IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { + if (!filtering) { + ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3; + ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3; + } else { + ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3; + ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3; + } + } +#endif +#if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2) + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) && + IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && + IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { + if (!filtering) { + ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2; + ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2; + } else { + ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2; + ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2; + } + } +#endif + + for (y = 0; y < dst_height - 2; y += 3) { + ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); + src_ptr += src_stride; + dst_ptr += dst_stride; + ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); + src_ptr += src_stride; + dst_ptr += dst_stride; + ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, + dst_ptr, dst_width); + src_ptr += src_stride * 2; + dst_ptr += dst_stride; + } + + // Remainder 1 or 2 rows with last row vertically unfiltered + if ((dst_height % 3) == 2) { + ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); + src_ptr += src_stride; + dst_ptr += dst_stride; + ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); + } else if ((dst_height % 3) == 1) { + ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); + } +} + // Scale plane, 3/8 // This is an optimized version for scaling down a plane to 3/8 @@ -312,6 +493,83 @@ static void ScalePlaneDown38(int src_width, int src_height, } } +static void ScalePlaneDown38_16(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint16* src_ptr, uint16* dst_ptr, + enum FilterMode filtering) { + int y; + void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst_ptr, int dst_width); + void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst_ptr, int dst_width); + const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + assert(dst_width % 3 == 0); + if (!filtering) { + ScaleRowDown38_3 = ScaleRowDown38_16_C; + ScaleRowDown38_2 = ScaleRowDown38_16_C; + } else { + ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C; + ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C; + } +#if defined(HAS_SCALEROWDOWN38_16_NEON) + if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { + if (!filtering) { + ScaleRowDown38_3 = ScaleRowDown38_16_NEON; + ScaleRowDown38_2 = ScaleRowDown38_16_NEON; + } else { + ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON; + ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON; + } + } +#elif defined(HAS_SCALEROWDOWN38_16_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && + IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { + if (!filtering) { + ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3; + ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3; + } else { + ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3; + ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3; + } + } +#elif defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2) + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) && + IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && + IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { + if (!filtering) { + ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2; + ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2; + } else { + ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2; + ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2; + } + } +#endif + + for (y = 0; y < dst_height - 2; y += 3) { + ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); + src_ptr += src_stride * 3; + dst_ptr += dst_stride; + ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); + src_ptr += src_stride * 3; + dst_ptr += dst_stride; + ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); + src_ptr += src_stride * 2; + dst_ptr += dst_stride; + } + + // Remainder 1 or 2 rows with last row vertically unfiltered + if ((dst_height % 3) == 2) { + ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); + src_ptr += src_stride * 3; + dst_ptr += dst_stride; + ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); + } else if ((dst_height % 3) == 1) { + ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); + } +} + static __inline uint32 SumBox(int iboxwidth, int iboxheight, ptrdiff_t src_stride, const uint8* src_ptr) { uint32 sum = 0u; @@ -328,6 +586,22 @@ static __inline uint32 SumBox(int iboxwidth, int iboxheight, return sum; } +static __inline uint32 SumBox_16(int iboxwidth, int iboxheight, + ptrdiff_t src_stride, const uint16* src_ptr) { + uint32 sum = 0u; + int y; + assert(iboxwidth > 0); + assert(iboxheight > 0); + for (y = 0; y < iboxheight; ++y) { + int x; + for (x = 0; x < iboxwidth; ++x) { + sum += src_ptr[x]; + } + src_ptr += src_stride; + } + return sum; +} + static void ScalePlaneBoxRow_C(int dst_width, int boxheight, int x, int dx, ptrdiff_t src_stride, const uint8* src_ptr, uint8* dst_ptr) { @@ -342,6 +616,20 @@ static void ScalePlaneBoxRow_C(int dst_width, int boxheight, } } +static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight, + int x, int dx, ptrdiff_t src_stride, + const uint16* src_ptr, uint16* dst_ptr) { + int i; + int boxwidth; + for (i = 0; i < dst_width; ++i) { + int ix = x >> 16; + x += dx; + boxwidth = (x >> 16) - ix; + *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) / + (boxwidth * boxheight); + } +} + static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { uint32 sum = 0u; int x; @@ -352,6 +640,16 @@ static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { return sum; } +static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) { + uint32 sum = 0u; + int x; + assert(iboxwidth > 0); + for (x = 0; x < iboxwidth; ++x) { + sum += src_ptr[x]; + } + return sum; +} + static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, const uint16* src_ptr, uint8* dst_ptr) { int i; @@ -369,6 +667,24 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, } } +static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx, + const uint32* src_ptr, uint16* dst_ptr) { + int i; + int scaletbl[2]; + int minboxwidth = (dx >> 16); + int* scaleptr = scaletbl - minboxwidth; + int boxwidth; + scaletbl[0] = 65536 / (minboxwidth * boxheight); + scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); + for (i = 0; i < dst_width; ++i) { + int ix = x >> 16; + x += dx; + boxwidth = (x >> 16) - ix; + *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) * + scaleptr[boxwidth] >> 16; + } +} + static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, const uint16* src_ptr, uint8* dst_ptr) { int boxwidth = (dx >> 16); @@ -380,6 +696,17 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, } } +static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx, + const uint32* src_ptr, uint16* dst_ptr) { + int boxwidth = (dx >> 16); + int scaleval = 65536 / (boxwidth * boxheight); + int i; + for (i = 0; i < dst_width; ++i) { + *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16; + x += boxwidth; + } +} + // Scale plane down to any dimensions, with interpolation. // (boxfilter). // @@ -459,6 +786,78 @@ static void ScalePlaneBox(int src_width, int src_height, } } +static void ScalePlaneBox_16(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint16* src_ptr, uint16* dst_ptr) { + int j; + // Initial source x/y coordinate and step values as 16.16 fixed point. + int x = 0; + int y = 0; + int dx = 0; + int dy = 0; + const int max_y = (src_height << 16); + ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, + &x, &y, &dx, &dy); + src_width = Abs(src_width); + // TODO(fbarchard): Remove this and make AddRows handle boxheight 1. + if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) { + uint16* dst = dst_ptr; + int j; + for (j = 0; j < dst_height; ++j) { + int boxheight; + int iy = y >> 16; + const uint16* src = src_ptr + iy * src_stride; + y += dy; + if (y > max_y) { + y = max_y; + } + boxheight = (y >> 16) - iy; + ScalePlaneBoxRow_16_C(dst_width, boxheight, + x, dx, src_stride, + src, dst); + dst += dst_stride; + } + return; + } + { + // Allocate a row buffer of uint32. + align_buffer_64(row32, src_width * 4); + void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, + const uint32* src_ptr, uint16* dst_ptr) = + (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C; + void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride, + uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C; + +#if defined(HAS_SCALEADDROWS_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && +#ifdef AVOID_OVERREAD + IS_ALIGNED(src_width, 16) && +#endif + IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { + ScaleAddRows = ScaleAddRows_16_SSE2; + } +#endif + + for (j = 0; j < dst_height; ++j) { + int boxheight; + int iy = y >> 16; + const uint16* src = src_ptr + iy * src_stride; + y += dy; + if (y > (src_height << 16)) { + y = (src_height << 16); + } + boxheight = (y >> 16) - iy; + ScaleAddRows(src, src_stride, (uint32*)(row32), + src_width, boxheight); + ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), + dst_ptr); + dst_ptr += dst_stride; + } + free_aligned_buffer_64(row32); + } +} + // Scale plane down with bilinear interpolation. void ScalePlaneBilinearDown(int src_width, int src_height, int dst_width, int dst_height, @@ -562,6 +961,108 @@ void ScalePlaneBilinearDown(int src_width, int src_height, free_aligned_buffer_64(row); } +void ScalePlaneBilinearDown_16(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint16* src_ptr, uint16* dst_ptr, + enum FilterMode filtering) { + // Initial source x/y coordinate and step values as 16.16 fixed point. + int x = 0; + int y = 0; + int dx = 0; + int dy = 0; + // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. + // Allocate a row buffer. + align_buffer_64(row, src_width * 2); + + const int max_y = (src_height - 1) << 16; + int j; + void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr, + int dst_width, int x, int dx) = + (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C; + void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr, + ptrdiff_t src_stride, int dst_width, int source_y_fraction) = + InterpolateRow_16_C; + ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, + &x, &y, &dx, &dy); + src_width = Abs(src_width); + +#if defined(HAS_INTERPOLATEROW_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) { + InterpolateRow = InterpolateRow_Any_16_SSE2; + if (IS_ALIGNED(src_width, 16)) { + InterpolateRow = InterpolateRow_Unaligned_16_SSE2; + if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { + InterpolateRow = InterpolateRow_16_SSE2; + } + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) { + InterpolateRow = InterpolateRow_Any_16_SSSE3; + if (IS_ALIGNED(src_width, 16)) { + InterpolateRow = InterpolateRow_Unaligned_16_SSSE3; + if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { + InterpolateRow = InterpolateRow_16_SSSE3; + } + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_AVX2) + if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) { + InterpolateRow = InterpolateRow_Any_16_AVX2; + if (IS_ALIGNED(src_width, 32)) { + InterpolateRow = InterpolateRow_16_AVX2; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) { + InterpolateRow = InterpolateRow_Any_16_NEON; + if (IS_ALIGNED(src_width, 16)) { + InterpolateRow = InterpolateRow_16_NEON; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2) + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) { + InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; + if (IS_ALIGNED(src_width, 4)) { + InterpolateRow = InterpolateRow_16_MIPS_DSPR2; + } + } +#endif + + +#if defined(HAS_SCALEFILTERCOLS_16_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { + ScaleFilterCols = ScaleFilterCols_16_SSSE3; + } +#endif + if (y > max_y) { + y = max_y; + } + + for (j = 0; j < dst_height; ++j) { + int yi = y >> 16; + const uint16* src = src_ptr + yi * src_stride; + if (filtering == kFilterLinear) { + ScaleFilterCols(dst_ptr, src, dst_width, x, dx); + } else { + int yf = (y >> 8) & 255; + InterpolateRow((uint16*)row, src, src_stride, src_width, yf); + ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx); + } + dst_ptr += dst_stride; + y += dy; + if (y > max_y) { + y = max_y; + } + } + free_aligned_buffer_64(row); +} + // Scale up down with bilinear interpolation. void ScalePlaneBilinearUp(int src_width, int src_height, int dst_width, int dst_height, @@ -702,6 +1203,145 @@ void ScalePlaneBilinearUp(int src_width, int src_height, } } +void ScalePlaneBilinearUp_16(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint16* src_ptr, uint16* dst_ptr, + enum FilterMode filtering) { + int j; + // Initial source x/y coordinate and step values as 16.16 fixed point. + int x = 0; + int y = 0; + int dx = 0; + int dy = 0; + const int max_y = (src_height - 1) << 16; + void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr, + ptrdiff_t src_stride, int dst_width, int source_y_fraction) = + InterpolateRow_16_C; + void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr, + int dst_width, int x, int dx) = + filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; + ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, + &x, &y, &dx, &dy); + src_width = Abs(src_width); + +#if defined(HAS_INTERPOLATEROW_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) { + InterpolateRow = InterpolateRow_Any_16_SSE2; + if (IS_ALIGNED(dst_width, 16)) { + InterpolateRow = InterpolateRow_Unaligned_16_SSE2; + if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { + InterpolateRow = InterpolateRow_16_SSE2; + } + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) { + InterpolateRow = InterpolateRow_Any_16_SSSE3; + if (IS_ALIGNED(dst_width, 16)) { + InterpolateRow = InterpolateRow_Unaligned_16_SSSE3; + if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { + InterpolateRow = InterpolateRow_16_SSSE3; + } + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_AVX2) + if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) { + InterpolateRow = InterpolateRow_Any_16_AVX2; + if (IS_ALIGNED(dst_width, 32)) { + InterpolateRow = InterpolateRow_16_AVX2; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) { + InterpolateRow = InterpolateRow_Any_16_NEON; + if (IS_ALIGNED(dst_width, 16)) { + InterpolateRow = InterpolateRow_16_NEON; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2) + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) { + InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; + if (IS_ALIGNED(dst_width, 4)) { + InterpolateRow = InterpolateRow_16_MIPS_DSPR2; + } + } +#endif + + if (filtering && src_width >= 32768) { + ScaleFilterCols = ScaleFilterCols64_16_C; + } +#if defined(HAS_SCALEFILTERCOLS_16_SSSE3) + if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { + ScaleFilterCols = ScaleFilterCols_16_SSSE3; + } +#endif + if (!filtering && src_width * 2 == dst_width && x < 0x8000) { + ScaleFilterCols = ScaleColsUp2_16_C; +#if defined(HAS_SCALECOLS_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && + IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && + IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { + ScaleFilterCols = ScaleColsUp2_16_SSE2; + } +#endif + } + + if (y > max_y) { + y = max_y; + } + { + int yi = y >> 16; + const uint16* src = src_ptr + yi * src_stride; + + // Allocate 2 row buffers. + const int kRowSize = (dst_width + 15) & ~15; + align_buffer_64(row, kRowSize * 4); + + uint16* rowptr = (uint16*)row; + int rowstride = kRowSize; + int lasty = yi; + + ScaleFilterCols(rowptr, src, dst_width, x, dx); + if (src_height > 1) { + src += src_stride; + } + ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); + src += src_stride; + + for (j = 0; j < dst_height; ++j) { + yi = y >> 16; + if (yi != lasty) { + if (y > max_y) { + y = max_y; + yi = y >> 16; + src = src_ptr + yi * src_stride; + } + if (yi != lasty) { + ScaleFilterCols(rowptr, src, dst_width, x, dx); + rowptr += rowstride; + rowstride = -rowstride; + lasty = yi; + src += src_stride; + } + } + if (filtering == kFilterLinear) { + InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); + } else { + int yf = (y >> 8) & 255; + InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); + } + dst_ptr += dst_stride; + y += dy; + } + free_aligned_buffer_64(row); + } +} + // Scale Plane to/from any dimensions, without interpolation. // Fixed point math is used for performance: The upper 16 bits // of x and dx is the integer part of the source position and @@ -742,6 +1382,41 @@ static void ScalePlaneSimple(int src_width, int src_height, } } +static void ScalePlaneSimple_16(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint16* src_ptr, uint16* dst_ptr) { + int i; + void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr, + int dst_width, int x, int dx) = ScaleCols_16_C; + // Initial source x/y coordinate and step values as 16.16 fixed point. + int x = 0; + int y = 0; + int dx = 0; + int dy = 0; + ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, + &x, &y, &dx, &dy); + src_width = Abs(src_width); + + if (src_width * 2 == dst_width && x < 0x8000) { + ScaleCols = ScaleColsUp2_16_C; +#if defined(HAS_SCALECOLS_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && + IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && + IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { + ScaleCols = ScaleColsUp2_16_SSE2; + } +#endif + } + + for (i = 0; i < dst_height; ++i) { + ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, + dst_width, x, dx); + dst_ptr += dst_stride; + y += dy; + } +} + // Scale a plane. // This function dispatches to a specialized scaler based on scale factor. @@ -829,6 +1504,90 @@ void ScalePlane(const uint8* src, int src_stride, src_stride, dst_stride, src, dst); } +LIBYUV_API +void ScalePlane_16(const uint16* src, int src_stride, + int src_width, int src_height, + uint16* dst, int dst_stride, + int dst_width, int dst_height, + enum FilterMode filtering) { + // Simplify filtering when possible. + filtering = ScaleFilterReduce(src_width, src_height, + dst_width, dst_height, + filtering); + + // Negative height means invert the image. + if (src_height < 0) { + src_height = -src_height; + src = src + (src_height - 1) * src_stride; + src_stride = -src_stride; + } + + // Use specialized scales to improve performance for common resolutions. + // For example, all the 1/2 scalings will use ScalePlaneDown2() + if (dst_width == src_width && dst_height == src_height) { + // Straight copy. + CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height); + return; + } + if (dst_width == src_width) { + int dy = FixedDiv(src_height, dst_height); + // Arbitrary scale vertically, but unscaled vertically. + ScalePlaneVertical_16(src_height, + dst_width, dst_height, + src_stride, dst_stride, src, dst, + 0, 0, dy, 1, filtering); + return; + } + if (dst_width <= Abs(src_width) && dst_height <= src_height) { + // Scale down. + if (4 * dst_width == 3 * src_width && + 4 * dst_height == 3 * src_height) { + // optimized, 3/4 + ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); + return; + } + if (2 * dst_width == src_width && 2 * dst_height == src_height) { + // optimized, 1/2 + ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); + return; + } + // 3/8 rounded up for odd sized chroma height. + if (8 * dst_width == 3 * src_width && + dst_height == ((src_height * 3 + 7) / 8)) { + // optimized, 3/8 + ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); + return; + } + if (4 * dst_width == src_width && 4 * dst_height == src_height && + filtering != kFilterBilinear) { + // optimized, 1/4 + ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); + return; + } + } + if (filtering == kFilterBox && dst_height * 2 < src_height) { + ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst); + return; + } + if (filtering && dst_height > src_height) { + ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); + return; + } + if (filtering) { + ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); + return; + } + ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst); +} + // Scale an I420 image. // This function in turn calls a scaling function for each plane. @@ -863,6 +1622,37 @@ int I420Scale(const uint8* src_y, int src_stride_y, return 0; } +LIBYUV_API +int I420Scale_16(const uint16* src_y, int src_stride_y, + const uint16* src_u, int src_stride_u, + const uint16* src_v, int src_stride_v, + int src_width, int src_height, + uint16* dst_y, int dst_stride_y, + uint16* dst_u, int dst_stride_u, + uint16* dst_v, int dst_stride_v, + int dst_width, int dst_height, + enum FilterMode filtering) { + int src_halfwidth = SUBSAMPLE(src_width, 1, 1); + int src_halfheight = SUBSAMPLE(src_height, 1, 1); + int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); + int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || + !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { + return -1; + } + + ScalePlane_16(src_y, src_stride_y, src_width, src_height, + dst_y, dst_stride_y, dst_width, dst_height, + filtering); + ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, + dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, + filtering); + ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, + dst_v, dst_stride_v, dst_halfwidth, dst_halfheight, + filtering); + return 0; +} + // Deprecated api LIBYUV_API int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, diff --git a/source/scale_common.cc b/source/scale_common.cc index 6ed8bfa..e4b2acc 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -42,6 +42,20 @@ void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, } } +void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width) { + int x; + for (x = 0; x < dst_width - 1; x += 2) { + dst[0] = src_ptr[1]; + dst[1] = src_ptr[3]; + dst += 2; + src_ptr += 4; + } + if (dst_width & 1) { + dst[0] = src_ptr[1]; + } +} + void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width) { const uint8* s = src_ptr; @@ -57,6 +71,21 @@ void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, } } +void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width) { + const uint16* s = src_ptr; + int x; + for (x = 0; x < dst_width - 1; x += 2) { + dst[0] = (s[0] + s[1] + 1) >> 1; + dst[1] = (s[2] + s[3] + 1) >> 1; + dst += 2; + s += 4; + } + if (dst_width & 1) { + dst[0] = (s[0] + s[1] + 1) >> 1; + } +} + void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width) { const uint8* s = src_ptr; @@ -74,6 +103,23 @@ void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, } } +void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width) { + const uint16* s = src_ptr; + const uint16* t = src_ptr + src_stride; + int x; + for (x = 0; x < dst_width - 1; x += 2) { + dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; + dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; + dst += 2; + s += 4; + t += 4; + } + if (dst_width & 1) { + dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; + } +} + void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width) { int x; @@ -88,6 +134,20 @@ void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, } } +void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width) { + int x; + for (x = 0; x < dst_width - 1; x += 2) { + dst[0] = src_ptr[2]; + dst[1] = src_ptr[6]; + dst += 2; + src_ptr += 8; + } + if (dst_width & 1) { + dst[0] = src_ptr[2]; + } +} + void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width) { intptr_t stride = src_stride; @@ -124,6 +184,42 @@ void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, } } +void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width) { + intptr_t stride = src_stride; + int x; + for (x = 0; x < dst_width - 1; x += 2) { + dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + + src_ptr[stride + 0] + src_ptr[stride + 1] + + src_ptr[stride + 2] + src_ptr[stride + 3] + + src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + + src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + + src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + + src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + + 8) >> 4; + dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + + src_ptr[stride + 4] + src_ptr[stride + 5] + + src_ptr[stride + 6] + src_ptr[stride + 7] + + src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] + + src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] + + src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] + + src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] + + 8) >> 4; + dst += 2; + src_ptr += 8; + } + if (dst_width & 1) { + dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + + src_ptr[stride + 0] + src_ptr[stride + 1] + + src_ptr[stride + 2] + src_ptr[stride + 3] + + src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + + src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + + src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + + src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + + 8) >> 4; + } +} + void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width) { int x; @@ -137,6 +233,19 @@ void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, } } +void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width) { + int x; + assert((dst_width % 3 == 0) && (dst_width > 0)); + for (x = 0; x < dst_width; x += 3) { + dst[0] = src_ptr[0]; + dst[1] = src_ptr[1]; + dst[2] = src_ptr[3]; + dst += 3; + src_ptr += 4; + } +} + // Filter rows 0 and 1 together, 3 : 1 void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* d, int dst_width) { @@ -160,6 +269,28 @@ void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, } } +void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* d, int dst_width) { + const uint16* s = src_ptr; + const uint16* t = src_ptr + src_stride; + int x; + assert((dst_width % 3 == 0) && (dst_width > 0)); + for (x = 0; x < dst_width; x += 3) { + uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; + uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; + uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; + uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; + uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; + uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; + d[0] = (a0 * 3 + b0 + 2) >> 2; + d[1] = (a1 * 3 + b1 + 2) >> 2; + d[2] = (a2 * 3 + b2 + 2) >> 2; + d += 3; + s += 4; + t += 4; + } +} + // Filter rows 1 and 2 together, 1 : 1 void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* d, int dst_width) { @@ -183,6 +314,28 @@ void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, } } +void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* d, int dst_width) { + const uint16* s = src_ptr; + const uint16* t = src_ptr + src_stride; + int x; + assert((dst_width % 3 == 0) && (dst_width > 0)); + for (x = 0; x < dst_width; x += 3) { + uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; + uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; + uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; + uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; + uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; + uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; + d[0] = (a0 + b0 + 1) >> 1; + d[1] = (a1 + b1 + 1) >> 1; + d[2] = (a2 + b2 + 1) >> 1; + d += 3; + s += 4; + t += 4; + } +} + // Scales a single row of pixels using point sampling. void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) { @@ -199,6 +352,21 @@ void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, } } +void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr, + int dst_width, int x, int dx) { + int j; + for (j = 0; j < dst_width - 1; j += 2) { + dst_ptr[0] = src_ptr[x >> 16]; + x += dx; + dst_ptr[1] = src_ptr[x >> 16]; + x += dx; + dst_ptr += 2; + } + if (dst_width & 1) { + dst_ptr[0] = src_ptr[x >> 16]; + } +} + // Scales a single row of pixels up by 2x using point sampling. void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) { @@ -213,6 +381,19 @@ void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, } } +void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr, + int dst_width, int x, int dx) { + int j; + for (j = 0; j < dst_width - 1; j += 2) { + dst_ptr[1] = dst_ptr[0] = src_ptr[0]; + src_ptr += 1; + dst_ptr += 2; + } + if (dst_width & 1) { + dst_ptr[0] = src_ptr[0]; + } +} + // (1-f)a + fb can be replaced with a + f(b-a) #define BLENDER(a, b, f) (uint8)((int)(a) + \ ((int)(f) * ((int)(b) - (int)(a)) >> 16)) @@ -267,6 +448,59 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, } #undef BLENDER +#define BLENDER(a, b, f) (uint16)((int)(a) + \ + ((int)(f) * ((int)(b) - (int)(a)) >> 16)) + +void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr, + int dst_width, int x, int dx) { + int j; + for (j = 0; j < dst_width - 1; j += 2) { + int xi = x >> 16; + int a = src_ptr[xi]; + int b = src_ptr[xi + 1]; + dst_ptr[0] = BLENDER(a, b, x & 0xffff); + x += dx; + xi = x >> 16; + a = src_ptr[xi]; + b = src_ptr[xi + 1]; + dst_ptr[1] = BLENDER(a, b, x & 0xffff); + x += dx; + dst_ptr += 2; + } + if (dst_width & 1) { + int xi = x >> 16; + int a = src_ptr[xi]; + int b = src_ptr[xi + 1]; + dst_ptr[0] = BLENDER(a, b, x & 0xffff); + } +} + +void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr, + int dst_width, int x32, int dx) { + int64 x = (int64)(x32); + int j; + for (j = 0; j < dst_width - 1; j += 2) { + int64 xi = x >> 16; + int a = src_ptr[xi]; + int b = src_ptr[xi + 1]; + dst_ptr[0] = BLENDER(a, b, x & 0xffff); + x += dx; + xi = x >> 16; + a = src_ptr[xi]; + b = src_ptr[xi + 1]; + dst_ptr[1] = BLENDER(a, b, x & 0xffff); + x += dx; + dst_ptr += 2; + } + if (dst_width & 1) { + int64 xi = x >> 16; + int a = src_ptr[xi]; + int b = src_ptr[xi + 1]; + dst_ptr[0] = BLENDER(a, b, x & 0xffff); + } +} +#undef BLENDER + void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width) { int x; @@ -280,6 +514,19 @@ void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, } } +void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst, int dst_width) { + int x; + assert(dst_width % 3 == 0); + for (x = 0; x < dst_width; x += 3) { + dst[0] = src_ptr[0]; + dst[1] = src_ptr[3]; + dst[2] = src_ptr[6]; + dst += 3; + src_ptr += 8; + } +} + // 8x3 -> 3x1 void ScaleRowDown38_3_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, @@ -307,6 +554,32 @@ void ScaleRowDown38_3_Box_C(const uint8* src_ptr, } } +void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr, + ptrdiff_t src_stride, + uint16* dst_ptr, int dst_width) { + intptr_t stride = src_stride; + int i; + assert((dst_width % 3 == 0) && (dst_width > 0)); + for (i = 0; i < dst_width; i += 3) { + dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + + src_ptr[stride + 0] + src_ptr[stride + 1] + + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * + (65536 / 9) >> 16; + dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + + src_ptr[stride + 3] + src_ptr[stride + 4] + + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + + src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * + (65536 / 9) >> 16; + dst_ptr[2] = (src_ptr[6] + src_ptr[7] + + src_ptr[stride + 6] + src_ptr[stride + 7] + + src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * + (65536 / 6) >> 16; + src_ptr += 8; + dst_ptr += 3; + } +} + // 8x2 -> 3x1 void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { @@ -328,6 +601,26 @@ void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, } } +void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint16* dst_ptr, int dst_width) { + intptr_t stride = src_stride; + int i; + assert((dst_width % 3 == 0) && (dst_width > 0)); + for (i = 0; i < dst_width; i += 3) { + dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + + src_ptr[stride + 0] + src_ptr[stride + 1] + + src_ptr[stride + 2]) * (65536 / 6) >> 16; + dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + + src_ptr[stride + 3] + src_ptr[stride + 4] + + src_ptr[stride + 5]) * (65536 / 6) >> 16; + dst_ptr[2] = (src_ptr[6] + src_ptr[7] + + src_ptr[stride + 6] + src_ptr[stride + 7]) * + (65536 / 4) >> 16; + src_ptr += 8; + dst_ptr += 3; + } +} + void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, uint16* dst_ptr, int src_width, int src_height) { int x; @@ -346,6 +639,24 @@ void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, } } +void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride, + uint32* dst_ptr, int src_width, int src_height) { + int x; + assert(src_width > 0); + assert(src_height > 0); + for (x = 0; x < src_width; ++x) { + const uint16* s = src_ptr + x; + unsigned int sum = 0u; + int y; + for (y = 0; y < src_height; ++y) { + sum += s[0]; + s += src_stride; + } + // No risk of overflow here now + dst_ptr[x] = sum; + } +} + void ScaleARGBRowDown2_C(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width) { @@ -637,6 +948,88 @@ void ScalePlaneVertical(int src_height, y += dy; } } +void ScalePlaneVertical_16(int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint16* src_argb, uint16* dst_argb, + int x, int y, int dy, + int wpp, enum FilterMode filtering) { + // TODO(fbarchard): Allow higher wpp. + int dst_width_words = dst_width * wpp; + void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb, + ptrdiff_t src_stride, int dst_width, int source_y_fraction) = + InterpolateRow_16_C; + const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; + int j; + assert(wpp >= 1 && wpp <= 2); + assert(src_height != 0); + assert(dst_width > 0); + assert(dst_height > 0); + src_argb += (x >> 16) * wpp; +#if defined(HAS_INTERPOLATEROW_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) { + InterpolateRow = InterpolateRow_Any_16_SSE2; + if (IS_ALIGNED(dst_width_bytes, 16)) { + InterpolateRow = InterpolateRow_Unaligned_16_SSE2; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { + InterpolateRow = InterpolateRow_16_SSE2; + } + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) { + InterpolateRow = InterpolateRow_Any_16_SSSE3; + if (IS_ALIGNED(dst_width_bytes, 16)) { + InterpolateRow = InterpolateRow_Unaligned_16_SSSE3; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { + InterpolateRow = InterpolateRow_16_SSSE3; + } + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_AVX2) + if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) { + InterpolateRow = InterpolateRow_Any_16_AVX2; + if (IS_ALIGNED(dst_width_bytes, 32)) { + InterpolateRow = InterpolateRow_16_AVX2; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) { + InterpolateRow = InterpolateRow_Any_16_NEON; + if (IS_ALIGNED(dst_width_bytes, 16)) { + InterpolateRow = InterpolateRow_16_NEON; + } + } +#endif +#if defined(HAS_INTERPOLATEROWS_16_MIPS_DSPR2) + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 && + IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) && + IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { + InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; + if (IS_ALIGNED(dst_width_bytes, 4)) { + InterpolateRow = InterpolateRow_16_MIPS_DSPR2; + } + } +#endif + for (j = 0; j < dst_height; ++j) { + int yi; + int yf; + if (y > max_y) { + y = max_y; + } + yi = y >> 16; + yf = filtering ? ((y >> 8) & 255) : 0; + InterpolateRow(dst_argb, src_argb + yi * src_stride, + src_stride, dst_width_words, yf); + dst_argb += dst_stride; + y += dy; + } +} // Simplify the filtering based on scale factors. enum FilterMode ScaleFilterReduce(int src_width, int src_height, diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc index 366c5fd..00f0707 100644 --- a/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -132,6 +132,134 @@ static int TestFilter(int src_width, int src_height, return max_diff; } +// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference. +// 0 = exact. +static int TestFilter_16(int src_width, int src_height, + int dst_width, int dst_height, + FilterMode f, int benchmark_iterations) { + int i, j; + const int b = 0; // 128 to test for padding/stride. + int src_width_uv = (Abs(src_width) + 1) >> 1; + int src_height_uv = (Abs(src_height) + 1) >> 1; + + int src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2); + int src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2); + + int src_stride_y = b * 2 + Abs(src_width); + int src_stride_uv = b * 2 + src_width_uv; + + align_buffer_page_end(src_y, src_y_plane_size) + align_buffer_page_end(src_u, src_uv_plane_size) + align_buffer_page_end(src_v, src_uv_plane_size) + align_buffer_page_end(src_y_16, src_y_plane_size * 2) + align_buffer_page_end(src_u_16, src_uv_plane_size * 2) + align_buffer_page_end(src_v_16, src_uv_plane_size * 2) + uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16); + uint16* p_src_u_16 = reinterpret_cast<uint16*>(src_u_16); + uint16* p_src_v_16 = reinterpret_cast<uint16*>(src_v_16); + + srandom(time(NULL)); + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + for (i = b; i < src_height + b; ++i) { + for (j = b; j < src_width + b; ++j) { + p_src_y_16[(i * src_stride_y) + j] = src_y[(i * src_stride_y) + j]; + } + } + + for (i = b; i < (src_height_uv + b); ++i) { + for (j = b; j < (src_width_uv + b); ++j) { + p_src_u_16[(i * src_stride_uv) + j] = src_u[(i * src_stride_uv) + j]; + p_src_v_16[(i * src_stride_uv) + j] = src_v[(i * src_stride_uv) + j]; + } + } + + int dst_width_uv = (dst_width + 1) >> 1; + int dst_height_uv = (dst_height + 1) >> 1; + + int dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2); + int dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2); + + int dst_stride_y = b * 2 + dst_width; + int dst_stride_uv = b * 2 + dst_width_uv; + + align_buffer_page_end(dst_y_8, dst_y_plane_size) + align_buffer_page_end(dst_u_8, dst_uv_plane_size) + align_buffer_page_end(dst_v_8, dst_uv_plane_size) + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2) + align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2) + align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2) + + uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16); + uint16* p_dst_u_16 = reinterpret_cast<uint16*>(dst_u_16); + uint16* p_dst_v_16 = reinterpret_cast<uint16*>(dst_v_16); + + I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, + src_u + (src_stride_uv * b) + b, src_stride_uv, + src_v + (src_stride_uv * b) + b, src_stride_uv, + src_width, src_height, + dst_y_8 + (dst_stride_y * b) + b, dst_stride_y, + dst_u_8 + (dst_stride_uv * b) + b, dst_stride_uv, + dst_v_8 + (dst_stride_uv * b) + b, dst_stride_uv, + dst_width, dst_height, f); + + for (i = 0; i < benchmark_iterations; ++i) { + I420Scale_16(p_src_y_16 + (src_stride_y * b) + b, src_stride_y, + p_src_u_16 + (src_stride_uv * b) + b, src_stride_uv, + p_src_v_16 + (src_stride_uv * b) + b, src_stride_uv, + src_width, src_height, + p_dst_y_16 + (dst_stride_y * b) + b, dst_stride_y, + p_dst_u_16 + (dst_stride_uv * b) + b, dst_stride_uv, + p_dst_v_16 + (dst_stride_uv * b) + b, dst_stride_uv, + dst_width, dst_height, f); + } + + // Expect an exact match + int max_diff = 0; + for (i = b; i < (dst_height + b); ++i) { + for (j = b; j < (dst_width + b); ++j) { + int abs_diff = Abs(dst_y_8[(i * dst_stride_y) + j] - + p_dst_y_16[(i * dst_stride_y) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + for (i = b; i < (dst_height_uv + b); ++i) { + for (j = b; j < (dst_width_uv + b); ++j) { + int abs_diff = Abs(dst_u_8[(i * dst_stride_uv) + j] - + p_dst_u_16[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_8[(i * dst_stride_uv) + j] - + p_dst_v_16[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + free_aligned_buffer_page_end(dst_y_8) + free_aligned_buffer_page_end(dst_u_8) + free_aligned_buffer_page_end(dst_v_8) + free_aligned_buffer_page_end(dst_y_16) + free_aligned_buffer_page_end(dst_u_16) + free_aligned_buffer_page_end(dst_v_16) + + free_aligned_buffer_page_end(src_y) + free_aligned_buffer_page_end(src_u) + free_aligned_buffer_page_end(src_v) + free_aligned_buffer_page_end(src_y_16) + free_aligned_buffer_page_end(src_u_16) + free_aligned_buffer_page_end(src_v_16) + + return max_diff; +} + #define TEST_FACTOR1(name, filter, hfactor, vfactor, max_diff) \ TEST_F(libyuvTest, ScaleDownBy##name##_##filter) { \ int diff = TestFilter(benchmark_width_, benchmark_height_, \ @@ -139,6 +267,13 @@ static int TestFilter(int src_width, int src_height, Abs(benchmark_height_) * vfactor, \ kFilter##filter, benchmark_iterations_); \ EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(libyuvTest, ScaleDownBy##name##_##filter##_16) { \ + int diff = TestFilter_16(benchmark_width_, benchmark_height_, \ + Abs(benchmark_width_) * hfactor, \ + Abs(benchmark_height_) * vfactor, \ + kFilter##filter, benchmark_iterations_); \ + EXPECT_LE(diff, max_diff); \ } // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but @@ -168,6 +303,18 @@ TEST_FACTOR(3by4, 3 / 4, 3 / 4) Abs(benchmark_width_), Abs(benchmark_height_), \ kFilter##filter, benchmark_iterations_); \ EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(libyuvTest, name##To##width##x##height##_##filter##_16) { \ + int diff = TestFilter_16(benchmark_width_, benchmark_height_, \ + width, height, \ + kFilter##filter, benchmark_iterations_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(libyuvTest, name##From##width##x##height##_##filter##_16) { \ + int diff = TestFilter_16(width, height, \ + Abs(benchmark_width_), Abs(benchmark_height_), \ + kFilter##filter, benchmark_iterations_); \ + EXPECT_LE(diff, max_diff); \ } // Test scale to a specified size with all 4 filters. |