diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2024-04-19 18:27:08 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2024-04-19 18:27:08 +0000 |
commit | f72df94f7894736b8115a1b43ecf6acde951efbc (patch) | |
tree | 7aa355fd0b89ec0b2611e17ee84a14c6fa449e22 | |
parent | 60b8e179403ea8a6ae163ac1e62206766d75f137 (diff) | |
parent | e8ac7a941dd76f8ab24d01c9f1a0099672baf057 (diff) | |
download | libyuv-androidx-lifecycle-release.tar.gz |
Merge "Snap for 11739378 from 488a2af021e3e7473f083a9435b1472c0d411f3d to androidx-lifecycle-release" into androidx-lifecycle-releaseandroidx-lifecycle-release
35 files changed, 4519 insertions, 3614 deletions
@@ -1,13 +1,7 @@ package { default_applicable_licenses: ["external_libyuv_license"], } - // Added automatically by a large-scale-change -// -// large-scale-change included anything that looked like it might be a license -// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc. -// -// Please consider removing redundant or irrelevant files from 'license_text:'. // See: http://go/android-license-faq license { name: "external_libyuv_license", @@ -20,15 +14,13 @@ license { "PATENTS", ], } +subdirs = ["files"] cc_library { name: "libyuv", vendor_available: true, product_available: true, host_supported: true, - vndk: { - enabled: true, - }, srcs: [ "source/compare.cc", @@ -99,6 +91,7 @@ cc_library { apex_available: [ "//apex_available:platform", "com.android.media.swcodec", + "com.android.virt", ], min_sdk_version: "29", } @@ -133,6 +126,7 @@ cc_test { "unit_test/rotate_argb_test.cc", "unit_test/rotate_test.cc", "unit_test/scale_argb_test.cc", + "unit_test/scale_plane_test.cc", "unit_test/scale_rgb_test.cc", "unit_test/scale_test.cc", "unit_test/scale_uv_test.cc", @@ -81,11 +81,11 @@ group("libyuv") { } if (libyuv_use_lsx) { - deps += [ ":libyuv_lsx"] + deps += [ ":libyuv_lsx" ] } if (libyuv_use_lasx) { - deps += [ ":libyuv_lasx"] + deps += [ ":libyuv_lasx" ] } if (!is_ios && !libyuv_disable_jpeg) { @@ -254,8 +254,8 @@ if (libyuv_use_lsx) { static_library("libyuv_lsx") { sources = [ # LSX Source Files - "source/row_lsx.cc", "source/rotate_lsx.cc", + "source/row_lsx.cc", "source/scale_lsx.cc", ] @@ -315,6 +315,7 @@ if (libyuv_include_tests) { "unit_test/basictypes_test.cc", "unit_test/color_test.cc", "unit_test/compare_test.cc", + "unit_test/convert_argb_test.cc", "unit_test/convert_test.cc", "unit_test/cpu_test.cc", "unit_test/cpu_thread_test.cc", @@ -323,6 +324,7 @@ if (libyuv_include_tests) { "unit_test/rotate_argb_test.cc", "unit_test/rotate_test.cc", "unit_test/scale_argb_test.cc", + "unit_test/scale_plane_test.cc", "unit_test/scale_rgb_test.cc", "unit_test/scale_test.cc", "unit_test/scale_uv_test.cc", diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a4a1994..9abfa74b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,6 +37,10 @@ if(WIN32) SET_TARGET_PROPERTIES ( ${ly_lib_shared} PROPERTIES IMPORT_PREFIX "lib" ) endif() +# this creates the cpuid tool +ADD_EXECUTABLE ( cpuid ${ly_base_dir}/util/cpuid.c ) +TARGET_LINK_LIBRARIES ( cpuid ${ly_lib_static} ) + # this creates the conversion tool ADD_EXECUTABLE ( yuvconvert ${ly_base_dir}/util/yuvconvert.cc ) TARGET_LINK_LIBRARIES ( yuvconvert ${ly_lib_static} ) @@ -18,24 +18,24 @@ vars = { # By default, download the fuchsia sdk from the public sdk directory. 'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/core/', - 'fuchsia_version': 'version:13.20230714.0.1', + 'fuchsia_version': 'version:15.20230909.2.1', # By default, download the fuchsia images from the fuchsia GCS bucket. 'fuchsia_images_bucket': 'fuchsia', 'checkout_fuchsia': False, # Since the images are hundreds of MB, default to only downloading the image # most commonly useful for developers. Bots and developers that need to use # other images can override this with additional images. - 'checkout_fuchsia_boot_images': "terminal.qemu-x64", + 'checkout_fuchsia_boot_images': "terminal.qemu-x64,terminal.x64", 'checkout_fuchsia_product_bundles': '"{checkout_fuchsia_boot_images}" != ""', } deps = { 'src/build': - Var('chromium_git') + '/chromium/src/build' + '@' + '860dae780c100c2d001dc6ee16625b17bc84c10f', + Var('chromium_git') + '/chromium/src/build' + '@' + '5885d3c24833ad72845a52a1b913a2b8bc651b56', 'src/buildtools': - Var('chromium_git') + '/chromium/src/buildtools' + '@' + 'ca163845c76db63454f99436f6cd2bf03739dc24', + Var('chromium_git') + '/chromium/src/buildtools' + '@' + '79ab87fa54614258c4c95891e873223371194525', 'src/testing': - Var('chromium_git') + '/chromium/src/testing' + '@' + '184b068a94f24ddf0b4299d48062779e1fc1950e', + Var('chromium_git') + '/chromium/src/testing' + '@' + '51e9a02297057cc0e917763a51e16680b7d16fb6', 'src/third_party': Var('chromium_git') + '/chromium/src/third_party' + '@' + '2dc4b18abd1003ce7b1eda509dc96f12d49a9667', @@ -82,15 +82,6 @@ deps = { 'dep_type': 'cipd', }, - # TODO(chromium:1458042): Remove these paths, when chromium builds files - # have moved to third_party/lib*/src paths. - 'src/buildtools/third_party/libc++/trunk': - Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + '84fb809dd6dae36d556dc0bb702c6cc2ce9d4b80', - 'src/buildtools/third_party/libc++abi/trunk': - Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + 'd4760c0af99ccc9bce077960d5ddde4d66146c05', - 'src/buildtools/third_party/libunwind/trunk': - Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + '6c0013015be8a2be9de4b1e54cdc9d576b1d0729', - 'src/third_party/catapult': Var('chromium_git') + '/catapult.git' + '@' + 'fa05d995e152efdae488a2aeba397cd609fdbc9d', 'src/third_party/clang-format/script': @@ -374,7 +365,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/jdk', - 'version': 'IivIDwNBf73mf7UwCOBceRUuDdtizMCgSOQDfUGHArsC', + 'version': 'GCFtf5t6M4HlrHj6NXedHbpHp2xjgognF8ptNci4478C', }, ], 'condition': 'checkout_android', @@ -5,15 +5,15 @@ name: "libyuv" description: "libyuv is an open source project that includes YUV scaling and conversion functionality." third_party { - url { - type: GIT - value: "https://chromium.googlesource.com/libyuv/libyuv/" - } - version: "f0921806a293e3e008e6325a51d4ea760c39d2c1" license_type: NOTICE last_upgrade_date { - year: 2023 - month: 9 - day: 5 + year: 2024 + month: 1 + day: 11 + } + identifier { + type: "Git" + value: "https://chromium.googlesource.com/libyuv/libyuv/" + version: "af6ac8265bbd07bcf977526458b60305c4304288" } } diff --git a/README.chromium b/README.chromium index c68be174..1389f285 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: https://chromium.googlesource.com/libyuv/libyuv/ -Version: 1875 +Version: 1883 License: BSD License File: LICENSE Shipped: yes diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 203f7e0d..5a81e7c9 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -31,24 +31,25 @@ static const int kCpuHasX86 = 0x10; static const int kCpuHasSSE2 = 0x20; static const int kCpuHasSSSE3 = 0x40; static const int kCpuHasSSE41 = 0x80; -static const int kCpuHasSSE42 = 0x100; // unused at this time. +static const int kCpuHasSSE42 = 0x100; static const int kCpuHasAVX = 0x200; static const int kCpuHasAVX2 = 0x400; static const int kCpuHasERMS = 0x800; static const int kCpuHasFMA3 = 0x1000; static const int kCpuHasF16C = 0x2000; -static const int kCpuHasGFNI = 0x4000; -static const int kCpuHasAVX512BW = 0x8000; -static const int kCpuHasAVX512VL = 0x10000; -static const int kCpuHasAVX512VNNI = 0x20000; -static const int kCpuHasAVX512VBMI = 0x40000; -static const int kCpuHasAVX512VBMI2 = 0x80000; -static const int kCpuHasAVX512VBITALG = 0x100000; -static const int kCpuHasAVX512VPOPCNTDQ = 0x200000; +static const int kCpuHasAVX512BW = 0x4000; +static const int kCpuHasAVX512VL = 0x8000; +static const int kCpuHasAVX512VNNI = 0x10000; +static const int kCpuHasAVX512VBMI = 0x20000; +static const int kCpuHasAVX512VBMI2 = 0x40000; +static const int kCpuHasAVX512VBITALG = 0x80000; +static const int kCpuHasAVX10 = 0x100000; +static const int kCpuHasAVXVNNI = 0x200000; +static const int kCpuHasAVXVNNIINT8 = 0x400000; // These flags are only valid on MIPS processors. -static const int kCpuHasMIPS = 0x400000; -static const int kCpuHasMSA = 0x800000; +static const int kCpuHasMIPS = 0x800000; +static const int kCpuHasMSA = 0x1000000; // These flags are only valid on LOONGARCH processors. static const int kCpuHasLOONGARCH = 0x2000000; diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 0455b4cc..46685a50 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -800,14 +800,18 @@ extern "C" { #define HAS_ABGRTOYJROW_RVV #define HAS_ABGRTOYROW_RVV #define HAS_AR64TOARGBROW_RVV +#define HAS_AR64TOAB64ROW_RVV #define HAS_ARGBATTENUATEROW_RVV #define HAS_ARGBBLENDROW_RVV #define HAS_ARGBCOPYYTOALPHAROW_RVV #define HAS_ARGBEXTRACTALPHAROW_RVV #define HAS_ARGBTOAB64ROW_RVV +#define HAS_ARGBTOABGRROW_RVV #define HAS_ARGBTOAR64ROW_RVV +#define HAS_ARGBTOBGRAROW_RVV #define HAS_ARGBTORAWROW_RVV #define HAS_ARGBTORGB24ROW_RVV +#define HAS_ARGBTORGBAROW_RVV #define HAS_ARGBTOYJROW_RVV #define HAS_ARGBTOYMATRIXROW_RVV #define HAS_ARGBTOYROW_RVV @@ -839,6 +843,7 @@ extern "C" { #define HAS_RGB24TOARGBROW_RVV #define HAS_RGB24TOYJROW_RVV #define HAS_RGB24TOYROW_RVV +#define HAS_RGBATOARGBROW_RVV #define HAS_RGBATOYJROW_RVV #define HAS_RGBATOYMATRIXROW_RVV #define HAS_RGBATOYROW_RVV @@ -944,14 +949,6 @@ struct YuvConstants { free(var##_mem); \ var = NULL -#define align_buffer_64_16(var, size) \ - void* var##_mem = malloc((size)*2 + 63); /* NOLINT */ \ - uint16_t* var = (uint16_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */ - -#define free_aligned_buffer_64_16(var) \ - free(var##_mem); \ - var = NULL - #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP #else @@ -3494,8 +3491,13 @@ void ARGBToARGB4444Row_LASX(const uint8_t* src_argb, int width); void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width); +void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width); +void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width); +void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_RVV(const uint8_t* src_argb, uint8_t* dst_rgb24, int width); +void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width); +void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width); void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); @@ -3509,6 +3511,8 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width); void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width); void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width); void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width); +void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width); void AR64ShuffleRow_C(const uint8_t* src_ar64, uint8_t* dst_ar64, const uint8_t* shuffler, @@ -3537,6 +3541,8 @@ void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width); void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width); void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width); void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void AR64ToAB64Row_RVV(const uint16_t* src_ar64, uint16_t* dst_ab64, int width); +void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width); void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr, uint16_t* dst_ptr, int width); diff --git a/include/libyuv/scale.h b/include/libyuv/scale.h index 443f89c2..bfe4a344 100644 --- a/include/libyuv/scale.h +++ b/include/libyuv/scale.h @@ -27,39 +27,40 @@ typedef enum FilterMode { } FilterModeEnum; // Scale a YUV plane. +// Returns 0 if successful. LIBYUV_API -void ScalePlane(const uint8_t* src, - int src_stride, - int src_width, - int src_height, - uint8_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering); +int ScalePlane(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering); LIBYUV_API -void ScalePlane_16(const uint16_t* src, - int src_stride, - int src_width, - int src_height, - uint16_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering); +int ScalePlane_16(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering); // Sample is expected to be in the low 12 bits. LIBYUV_API -void ScalePlane_12(const uint16_t* src, - int src_stride, - int src_width, - int src_height, - uint16_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering); +int ScalePlane_12(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering); // Scales a YUV 4:2:0 image from the src width and height to the // dst width and height. diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index c015d772..02ed61ca 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -180,6 +180,8 @@ extern "C" { #if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) #define HAS_SCALEADDROW_RVV +// TODO: Test ScaleARGBRowDownEven_RVV and enable it +// #define HAS_SCALEARGBROWDOWNEVEN_RVV #define HAS_SCALEUVROWDOWN4_RVV #define HAS_SCALEUVROWDOWNEVEN_RVV #if __riscv_v_intrinsic == 11000 diff --git a/include/libyuv/version.h b/include/libyuv/version.h index d45ef09d..a9c54400 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1875 +#define LIBYUV_VERSION 1883 #endif // INCLUDE_LIBYUV_VERSION_H_ @@ -33,6 +33,7 @@ LOCAL_OBJ_FILES := \ source/rotate_argb.o \ source/rotate_common.o \ source/rotate_gcc.o \ + source/rotate_lsx.o \ source/rotate_msa.o \ source/rotate_neon.o \ source/rotate_neon64.o \ @@ -40,19 +41,24 @@ LOCAL_OBJ_FILES := \ source/row_any.o \ source/row_common.o \ source/row_gcc.o \ + source/row_lasx.o \ + source/row_lsx.o \ source/row_msa.o \ source/row_neon.o \ source/row_neon64.o \ + source/row_rvv.o \ source/row_win.o \ source/scale.o \ source/scale_any.o \ source/scale_argb.o \ source/scale_common.o \ source/scale_gcc.o \ + source/scale_lsx.o \ source/scale_msa.o \ source/scale_neon.o \ source/scale_neon64.o \ source/scale_rgb.o \ + source/scale_rvv.o \ source/scale_uv.o \ source/scale_win.o \ source/video_common.o diff --git a/source/convert.cc b/source/convert.cc index b68fb1d3..6ac5bc43 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -54,18 +54,25 @@ static int I4xxToI420(const uint8_t* src_y, const int dst_y_height = Abs(src_y_height); const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); + int r; if (src_uv_width <= 0 || src_uv_height == 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, - dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, + dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, - dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); - ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, - dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); - return 0; + r = ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, + dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, + dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); + return r; } // Copy I420 with optional flipping. @@ -526,18 +533,25 @@ static int Ix10ToI010(const uint16_t* src_y, const int src_uv_height = SUBSAMPLE(height, subsample_y, subsample_y); const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - dst_y_width, dst_y_height, kFilterBilinear); + r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + dst_y_width, dst_y_height, kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, - dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); - ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, - dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); - return 0; + r = ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, + dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, + dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); + return r; } LIBYUV_API @@ -777,6 +791,8 @@ int I422ToNV21(const uint8_t* src_y, // Allocate u and v buffers align_buffer_64(plane_u, halfwidth * halfheight * 2); uint8_t* plane_v = plane_u + halfwidth * halfheight; + if (!plane_u) + return 1; I422ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, width, @@ -892,6 +908,8 @@ int MT2TToP010(const uint8_t* src_y, void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) = UnpackMT2T_C; align_buffer_64(row_buf, row_buf_size); + if (!row_buf) + return 1; #if defined(HAS_UNPACKMT2T_NEON) if (TestCpuFlag(kCpuHasNEON)) { @@ -1092,6 +1110,8 @@ int I422ToNV21(const uint8_t* src_y, int awidth = halfwidth * 2; align_buffer_64(row_vu_0, awidth * 2); uint8_t* row_vu_1 = row_vu_0 + awidth; + if (!row_vu_0) + return 1; for (y = 0; y < height - 1; y += 2) { MergeUVRow(src_v, src_u, row_vu_0, halfwidth); @@ -1330,18 +1350,22 @@ int NV12ToNV24(const uint8_t* src_y, int dst_stride_uv, int width, int height) { + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), - SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), - Abs(height), kFilterBilinear); - return 0; + r = UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), + Abs(height), kFilterBilinear); + return r; } LIBYUV_API @@ -1355,17 +1379,21 @@ int NV16ToNV24(const uint8_t* src_y, int dst_stride_uv, int width, int height) { + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, - dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); - return 0; + r = UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, + dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); + return r; } // Any P[420]1[02] to I[420]1[02] format with mirroring. @@ -1443,18 +1471,22 @@ int P010ToP410(const uint16_t* src_y, int dst_stride_uv, int width, int height) { + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), - SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), - Abs(height), kFilterBilinear); - return 0; + r = UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), + Abs(height), kFilterBilinear); + return r; } LIBYUV_API @@ -1468,17 +1500,21 @@ int P210ToP410(const uint16_t* src_y, int dst_stride_uv, int width, int height) { + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, - dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); - return 0; + r = UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, + dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); + return r; } // Convert YUY2 to I420. @@ -2660,6 +2696,8 @@ int RGB24ToI420(const uint8_t* src_rgb24, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -2836,6 +2874,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -3015,6 +3055,8 @@ int RAWToI420(const uint8_t* src_raw, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -3191,6 +3233,8 @@ int RAWToJ420(const uint8_t* src_raw, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -3369,6 +3413,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ @@ -3549,6 +3595,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -3762,6 +3810,8 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { diff --git a/source/convert_argb.cc b/source/convert_argb.cc index f6ab0784..871fea59 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -3003,6 +3003,7 @@ int J400ToARGB(const uint8_t* src_y, return 0; } +#ifndef __riscv // Shuffle table for converting BGRA to ARGB. static const uvec8 kShuffleMaskBGRAToARGB = { 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u}; @@ -3090,6 +3091,195 @@ int AR64ToAB64(const uint16_t* src_ar64, return AR64Shuffle(src_ar64, src_stride_ar64, dst_ab64, dst_stride_ab64, (const uint8_t*)&kShuffleMaskAR64ToAB64, width, height); } +#else +// Convert BGRA to ARGB (same as ARGBToBGRA). +LIBYUV_API +int BGRAToARGB(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return ARGBToBGRA(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, width, + height); +} + +// Convert ARGB to BGRA. +LIBYUV_API +int ARGBToBGRA(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_bgra, + int dst_stride_bgra, + int width, + int height) { + int y; + void (*ARGBToBGRARow)(const uint8_t* src_argb, uint8_t* dst_bgra, int width) = + ARGBToBGRARow_C; + if (!src_argb || !dst_bgra || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_bgra == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_bgra = 0; + } + +#if defined(HAS_ARGBTOBGRAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToBGRARow = ARGBToBGRARow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToBGRARow(src_argb, dst_bgra, width); + src_argb += src_stride_argb; + dst_bgra += dst_stride_bgra; + } + return 0; +} + +// Convert ARGB to ABGR. +LIBYUV_API +int ARGBToABGR(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + int y; + void (*ARGBToABGRRow)(const uint8_t* src_argb, uint8_t* dst_abgr, int width) = + ARGBToABGRRow_C; + if (!src_argb || !dst_abgr || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_abgr == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_abgr = 0; + } + +#if defined(HAS_ARGBTOABGRROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToABGRRow = ARGBToABGRRow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToABGRRow(src_argb, dst_abgr, width); + src_argb += src_stride_argb; + dst_abgr += dst_stride_abgr; + } + return 0; +} + +// Convert ABGR to ARGB (same as ARGBToABGR). +LIBYUV_API +int ABGRToARGB(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return ARGBToABGR(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, width, + height); +} + +// Convert RGBA to ARGB. +LIBYUV_API +int RGBAToARGB(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + int y; + void (*RGBAToARGBRow)(const uint8_t* src_rgba, uint8_t* dst_argb, int width) = + RGBAToARGBRow_C; + if (!src_rgba || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_rgba = src_rgba + (height - 1) * src_stride_rgba; + src_stride_rgba = -src_stride_rgba; + } + // Coalesce rows. + if (src_stride_rgba == width * 4 && dst_stride_argb == width * 4) { + width *= height; + height = 1; + src_stride_rgba = dst_stride_argb = 0; + } + +#if defined(HAS_RGBATOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGBAToARGBRow = RGBAToARGBRow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + RGBAToARGBRow(src_rgba, dst_argb, width); + src_rgba += src_stride_rgba; + dst_argb += dst_stride_argb; + } + return 0; +} + +// Convert AR64 To AB64. +LIBYUV_API +int AR64ToAB64(const uint16_t* src_ar64, + int src_stride_ar64, + uint16_t* dst_ab64, + int dst_stride_ab64, + int width, + int height) { + int y; + void (*AR64ToAB64Row)(const uint16_t* src_ar64, uint16_t* dst_ab64, + int width) = AR64ToAB64Row_C; + if (!src_ar64 || !dst_ab64 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_ar64 = src_ar64 + (height - 1) * src_stride_ar64; + src_stride_ar64 = -src_stride_ar64; + } + // Coalesce rows. + if (src_stride_ar64 == width * 4 && dst_stride_ab64 == width * 4) { + width *= height; + height = 1; + src_stride_ar64 = dst_stride_ab64 = 0; + } + +#if defined(HAS_AR64TOAB64ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + AR64ToAB64Row = AR64ToAB64Row_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + AR64ToAB64Row(src_ar64, dst_ab64, width); + src_ar64 += src_stride_ar64; + dst_ab64 += dst_stride_ab64; + } + return 0; +} +#endif // Convert RGB24 to ARGB. LIBYUV_API @@ -4480,6 +4670,8 @@ int Android420ToARGBMatrix(const uint8_t* src_y, // General case fallback creates NV12 align_buffer_64(plane_uv, halfwidth * 2 * halfheight); + if (!plane_uv) + return 1; dst_uv = plane_uv; for (y = 0; y < halfheight; ++y) { WeavePixels(src_u, src_v, src_pixel_stride_uv, dst_uv, halfwidth); @@ -5792,6 +5984,8 @@ int I420ToRGB565Dither(const uint8_t* src_y, { // Allocate a row of argb. align_buffer_64(row_argb, width * 4); + if (!row_argb) + return 1; for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width); ARGBToRGB565DitherRow(row_argb, dst_rgb565, @@ -6054,6 +6248,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, uint8_t* temp_u_2 = row + row_size; uint8_t* temp_v_1 = row + row_size * 2; uint8_t* temp_v_2 = row + row_size * 3; + if (!row) + return 1; ScaleRowUp2_Linear(src_u, temp_u_1, width); ScaleRowUp2_Linear(src_v, temp_v_1, width); @@ -6188,6 +6384,8 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, align_buffer_64(row, row_size * 2); uint8_t* temp_u = row; uint8_t* temp_v = row + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear(src_u, temp_u, width); @@ -6321,6 +6519,8 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, uint8_t* temp_u_2 = row + row_size; uint8_t* temp_v_1 = row + row_size * 2; uint8_t* temp_v_2 = row + row_size * 3; + if (!row) + return 1; ScaleRowUp2_Linear(src_u, temp_u_1, width); ScaleRowUp2_Linear(src_v, temp_v_1, width); @@ -6427,6 +6627,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, uint16_t* temp_u_2 = (uint16_t*)(row) + row_size; uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; + if (!row) + return 1; ScaleRowUp2_Linear_12(src_u, temp_u_1, width); ScaleRowUp2_Linear_12(src_v, temp_v_1, width); @@ -6524,6 +6726,8 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_u = (uint16_t*)(row); uint16_t* temp_v = (uint16_t*)(row) + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear_12(src_u, temp_u, width); @@ -6614,6 +6818,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, uint16_t* temp_u_2 = (uint16_t*)(row) + row_size; uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; + if (!row) + return 1; ScaleRowUp2_Linear_12(src_u, temp_u_1, width); ScaleRowUp2_Linear_12(src_v, temp_v_1, width); @@ -6710,6 +6916,8 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_u = (uint16_t*)(row); uint16_t* temp_v = (uint16_t*)(row) + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear_12(src_u, temp_u, width); @@ -6888,6 +7096,8 @@ static int I420AlphaToARGBMatrixBilinear( uint8_t* temp_u_2 = row + row_size; uint8_t* temp_v_1 = row + row_size * 2; uint8_t* temp_v_2 = row + row_size * 3; + if (!row) + return 1; ScaleRowUp2_Linear(src_u, temp_u_1, width); ScaleRowUp2_Linear(src_v, temp_v_1, width); @@ -7086,6 +7296,8 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, align_buffer_64(row, row_size * 2); uint8_t* temp_u = row; uint8_t* temp_v = row + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear(src_u, temp_u, width); @@ -7227,6 +7439,8 @@ static int I010AlphaToARGBMatrixBilinear( uint16_t* temp_u_2 = (uint16_t*)(row) + row_size; uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; + if (!row) + return 1; ScaleRowUp2_Linear_12(src_u, temp_u_1, width); ScaleRowUp2_Linear_12(src_v, temp_v_1, width); @@ -7386,6 +7600,8 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_u = (uint16_t*)(row); uint16_t* temp_v = (uint16_t*)(row) + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear(src_u, temp_u, width); @@ -7471,6 +7687,8 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_uv_1 = (uint16_t*)(row); uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size; + if (!row) + return 1; Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); @@ -7560,6 +7778,8 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, const int row_size = (2 * width + 31) & ~31; align_buffer_64(row, row_size * sizeof(uint16_t)); uint16_t* temp_uv = (uint16_t*)(row); + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear(src_uv, temp_uv, width); @@ -7639,6 +7859,8 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_uv_1 = (uint16_t*)(row); uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size; + if (!row) + return 1; Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); @@ -7728,6 +7950,8 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, const int row_size = (2 * width + 31) & ~31; align_buffer_64(row, row_size * sizeof(uint16_t)); uint16_t* temp_uv = (uint16_t*)(row); + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear(src_uv, temp_uv, width); @@ -7829,6 +8053,8 @@ static int I422ToRGB24MatrixLinear(const uint8_t* src_y, align_buffer_64(row, row_size * 2); uint8_t* temp_u = row; uint8_t* temp_v = row + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear(src_u, temp_u, width); diff --git a/source/convert_from.cc b/source/convert_from.cc index 4102d610..e69da9e9 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -52,19 +52,26 @@ static int I420ToI4xx(const uint8_t* src_y, const int dst_y_height = Abs(src_y_height); const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1); const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1); + int r; if (src_y_width == 0 || src_y_height == 0 || dst_uv_width <= 0 || dst_uv_height <= 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, - dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, + dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, - dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); - ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, - dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); - return 0; + r = ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, + dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, + dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); + return r; } // Convert 8 bit YUV to 10 bit. @@ -223,21 +230,28 @@ int I010ToI410(const uint16_t* src_y, int dst_stride_v, int width, int height) { + int r; if (width == 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); - } - ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), - SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width), - Abs(height), kFilterBilinear); - ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), - SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width), - Abs(height), kFilterBilinear); - return 0; + r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } + } + r = ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width), + Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width), + Abs(height), kFilterBilinear); + return r; } // 422 chroma to 444 chroma, 10/12 bit version @@ -256,19 +270,26 @@ int I210ToI410(const uint16_t* src_y, int dst_stride_v, int width, int height) { + int r; if (width == 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u, - dst_stride_u, Abs(width), Abs(height), kFilterBilinear); - ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v, - dst_stride_v, Abs(width), Abs(height), kFilterBilinear); - return 0; + r = ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u, + dst_stride_u, Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v, + dst_stride_v, Abs(width), Abs(height), kFilterBilinear); + return r; } // 422 chroma is 1/2 width, 1x height @@ -288,19 +309,26 @@ int I422ToI444(const uint8_t* src_y, int dst_stride_v, int width, int height) { + int r; if (width == 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u, - dst_stride_u, Abs(width), Abs(height), kFilterBilinear); - ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v, - dst_stride_v, Abs(width), Abs(height), kFilterBilinear); - return 0; + r = ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u, + dst_stride_u, Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v, + dst_stride_v, Abs(width), Abs(height), kFilterBilinear); + return r; } // Copy to I400. Source can be I420,422,444,400,NV12,NV21 diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index c3d037c4..b45de8c8 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -463,6 +463,8 @@ int ARGBToNV12(const uint8_t* src_argb, // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); @@ -661,6 +663,8 @@ int ARGBToNV21(const uint8_t* src_argb, // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); @@ -846,6 +850,8 @@ int ABGRToNV12(const uint8_t* src_abgr, // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; for (y = 0; y < height - 1; y += 2) { ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width); @@ -1032,6 +1038,8 @@ int ABGRToNV21(const uint8_t* src_abgr, // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; for (y = 0; y < height - 1; y += 2) { ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width); @@ -1232,6 +1240,8 @@ int ARGBToYUY2(const uint8_t* src_argb, align_buffer_64(row_y, ((width + 63) & ~63) * 2); uint8_t* row_u = row_y + ((width + 63) & ~63); uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; + if (!row_y) + return 1; for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); @@ -1426,6 +1436,8 @@ int ARGBToUYVY(const uint8_t* src_argb, align_buffer_64(row_y, ((width + 63) & ~63) * 2); uint8_t* row_u = row_y + ((width + 63) & ~63); uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; + if (!row_y) + return 1; for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); @@ -1527,6 +1539,7 @@ int ARGBToI400(const uint8_t* src_argb, return 0; } +#ifndef __riscv // Shuffle table for converting ARGB to RGBA. static const uvec8 kShuffleMaskARGBToRGBA = { 3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u}; @@ -1542,6 +1555,47 @@ int ARGBToRGBA(const uint8_t* src_argb, return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba, (const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height); } +#else +// Convert ARGB to RGBA. +LIBYUV_API +int ARGBToRGBA(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height) { + int y; + void (*ARGBToRGBARow)(const uint8_t* src_argb, uint8_t* dst_rgba, int width) = + ARGBToRGBARow_C; + if (!src_argb || !dst_rgba || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_rgba == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_rgba = 0; + } + +#if defined(HAS_ARGBTORGBAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToRGBARow = ARGBToRGBARow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToRGBARow(src_argb, dst_rgba, width); + src_argb += src_stride_argb; + dst_rgba += dst_stride_rgba; + } + return 0; +} +#endif // Convert ARGB To RGB24. LIBYUV_API @@ -3230,14 +3284,21 @@ int RAWToJNV21(const uint8_t* src_raw, } #endif { +#if defined(HAS_RAWTOYJROW) // Allocate a row of uv. - align_buffer_64(row_uj, ((halfwidth + 31) & ~31) * 2); - uint8_t* row_vj = row_uj + ((halfwidth + 31) & ~31); -#if !defined(HAS_RAWTOYJROW) - // Allocate 2 rows of ARGB. - const int row_size = (width * 4 + 31) & ~31; - align_buffer_64(row, row_size * 2); + const int row_uv_size = ((halfwidth + 31) & ~31); + align_buffer_64(row_uj, row_uv_size * 2); + uint8_t* row_vj = row_uj + row_uv_size; +#else + // Allocate row of uv and 2 rows of ARGB. + const int row_size = ((width * 4 + 31) & ~31); + const int row_uv_size = ((halfwidth + 31) & ~31); + align_buffer_64(row_uj, row_uv_size * 2 + row_size * 2); + uint8_t* row_vj = row_uj + row_uv_size; + uint8_t* row = row_vj + row_uv_size; #endif + if (!row_uj) + return 1; for (y = 0; y < height - 1; y += 2) { #if defined(HAS_RAWTOYJROW) @@ -3269,9 +3330,6 @@ int RAWToJNV21(const uint8_t* src_raw, ARGBToYJRow(row, dst_y, width); #endif } -#if !defined(HAS_RAWTOYJROW) - free_aligned_buffer_64(row); -#endif free_aligned_buffer_64(row_uj); } return 0; diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 0c4a1581..eedce16b 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -292,10 +292,12 @@ static SAFEBUFFERS int GetCpuFlags(void) { int cpu_info0[4] = {0, 0, 0, 0}; int cpu_info1[4] = {0, 0, 0, 0}; int cpu_info7[4] = {0, 0, 0, 0}; + int cpu_einfo7[4] = {0, 0, 0, 0}; CpuId(0, 0, cpu_info0); CpuId(1, 0, cpu_info1); if (cpu_info0[0] >= 7) { CpuId(7, 0, cpu_info7); + CpuId(7, 1, cpu_einfo7); } cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | @@ -308,7 +310,9 @@ static SAFEBUFFERS int GetCpuFlags(void) { ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | - ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0); + ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0) | + ((cpu_einfo7[0] & 0x00000010) ? kCpuHasAVXVNNI : 0) | + ((cpu_einfo7[3] & 0x00000010) ? kCpuHasAVXVNNIINT8 : 0); // Detect AVX512bw if ((GetXCR0() & 0xe0) == 0xe0) { @@ -318,8 +322,7 @@ static SAFEBUFFERS int GetCpuFlags(void) { cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0; cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0; cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0; - cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0; - cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0; + cpu_info |= (cpu_einfo7[3] & 0x00080000) ? kCpuHasAVX10 : 0; } } #endif diff --git a/source/planar_functions.cc b/source/planar_functions.cc index f6ec0dac..1c94e260 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -3027,6 +3027,8 @@ int I420Blend(const uint8_t* src_y0, // Row buffer for intermediate alpha pixels. align_buffer_64(halfalpha, halfwidth); + if (!halfalpha) + return 1; for (y = 0; y < height; y += 2) { // last row of odd height image use 1 row of alpha instead of 2. if (y == (height - 1)) { @@ -4710,6 +4712,8 @@ int GaussPlane_F32(const float* src, { // 2 pixels on each side, but aligned out to 16 bytes. align_buffer_64(rowbuf, (4 + width + 4) * 4); + if (!rowbuf) + return 1; memset(rowbuf, 0, 16); memset(rowbuf + (4 + width) * 4, 0, 16); float* row = (float*)(rowbuf + 16); @@ -4868,6 +4872,8 @@ static int ARGBSobelize(const uint8_t* src_argb, uint8_t* row_y0 = row_y + kEdge; uint8_t* row_y1 = row_y0 + row_size; uint8_t* row_y2 = row_y1 + row_size; + if (!rows) + return 1; ARGBToYJRow(src_argb, row_y0, width); row_y0[-1] = row_y0[0]; memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. @@ -5654,6 +5660,8 @@ int UYVYToNV12(const uint8_t* src_uyvy, int awidth = halfwidth * 2; // row of y and 2 rows of uv align_buffer_64(rows, awidth * 3); + if (!rows) + return 1; for (y = 0; y < height - 1; y += 2) { // Split Y from UV. diff --git a/source/rotate.cc b/source/rotate.cc index 3678b80a..3f8332c3 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <assert.h> + #include "libyuv/rotate.h" #include "libyuv/convert.h" @@ -140,6 +142,9 @@ void RotatePlane180(const uint8_t* src, int height) { // Swap top and bottom row and mirror the content. Uses a temporary row. align_buffer_64(row, width); + assert(row); + if (!row) + return; const uint8_t* src_bot = src + src_stride * (height - 1); uint8_t* dst_bot = dst + dst_stride * (height - 1); int half_height = (height + 1) >> 1; @@ -543,24 +548,29 @@ static void RotatePlane180_16(const uint16_t* src, int dst_stride, int width, int height) { - // Swap top and bottom row and mirror the content. Uses a temporary row. - align_buffer_64_16(row, width); const uint16_t* src_bot = src + src_stride * (height - 1); uint16_t* dst_bot = dst + dst_stride * (height - 1); int half_height = (height + 1) >> 1; int y; + // Swap top and bottom row and mirror the content. Uses a temporary row. + align_buffer_64(row, width * 2); + uint16_t* row_tmp = (uint16_t*)row; + assert(row); + if (!row) + return; + // Odd height will harmlessly mirror the middle row twice. for (y = 0; y < half_height; ++y) { - CopyRow_16_C(src, row, width); // Copy top row into buffer - MirrorRow_16_C(src_bot, dst, width); // Mirror bottom row into top row - MirrorRow_16_C(row, dst_bot, width); // Mirror buffer into bottom row + CopyRow_16_C(src, row_tmp, width); // Copy top row into buffer + MirrorRow_16_C(src_bot, dst, width); // Mirror bottom row into top row + MirrorRow_16_C(row_tmp, dst_bot, width); // Mirror buffer into bottom row src += src_stride; dst += dst_stride; src_bot -= src_stride; dst_bot -= dst_stride; } - free_aligned_buffer_64_16(row); + free_aligned_buffer_64(row); } LIBYUV_API @@ -690,6 +700,7 @@ int I422Rotate(const uint8_t* src_y, enum RotationMode mode) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; + int r; if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || !dst_u || !dst_v) { return -1; @@ -725,23 +736,35 @@ int I422Rotate(const uint8_t* src_y, case kRotate90: RotatePlane90(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, height); - ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u, - halfheight, width, kFilterBilinear); + r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, + dst_stride_u, halfheight, width, kFilterBilinear); + if (r != 0) { + return r; + } RotatePlane90(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, height); - ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v, - halfheight, width, kFilterLinear); + r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, + dst_stride_v, halfheight, width, kFilterLinear); + if (r != 0) { + return r; + } RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; case kRotate270: RotatePlane270(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, height); - ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u, - halfheight, width, kFilterBilinear); + r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, + dst_stride_u, halfheight, width, kFilterBilinear); + if (r != 0) { + return r; + } RotatePlane270(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, height); - ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v, - halfheight, width, kFilterLinear); + r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, + dst_stride_v, halfheight, width, kFilterLinear); + if (r != 0) { + return r; + } RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; case kRotate180: @@ -1055,6 +1078,7 @@ int I210Rotate(const uint16_t* src_y, enum RotationMode mode) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; + int r; if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || !dst_u || !dst_v) { return -1; @@ -1090,23 +1114,35 @@ int I210Rotate(const uint16_t* src_y, case kRotate90: RotatePlane90_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, height); - ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u, - halfheight, width, kFilterBilinear); + r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, + dst_stride_u, halfheight, width, kFilterBilinear); + if (r != 0) { + return r; + } RotatePlane90_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, height); - ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v, - halfheight, width, kFilterLinear); + r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, + dst_stride_v, halfheight, width, kFilterLinear); + if (r != 0) { + return r; + } RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; case kRotate270: RotatePlane270_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, height); - ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u, - halfheight, width, kFilterBilinear); + r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, + dst_stride_u, halfheight, width, kFilterBilinear); + if (r != 0) { + return r; + } RotatePlane270_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, height); - ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v, - halfheight, width, kFilterLinear); + r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, + dst_stride_v, halfheight, width, kFilterLinear); + if (r != 0) { + return r; + } RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc index 034d53e8..d55fac4f 100644 --- a/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -120,7 +120,6 @@ static int ARGBRotate180(const uint8_t* src_argb, int width, int height) { // Swap first and last row and mirror the content. Uses a temporary row. - align_buffer_64(row, width * 4); const uint8_t* src_bot = src_argb + src_stride_argb * (height - 1); uint8_t* dst_bot = dst_argb + dst_stride_argb * (height - 1); int half_height = (height + 1) >> 1; @@ -129,6 +128,9 @@ static int ARGBRotate180(const uint8_t* src_argb, ARGBMirrorRow_C; void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = CopyRow_C; + align_buffer_64(row, width * 4); + if (!row) + return 1; #if defined(HAS_ARGBMIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBMirrorRow = ARGBMirrorRow_Any_NEON; diff --git a/source/row_common.cc b/source/row_common.cc index 7591c6b6..3afc4b4d 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -281,6 +281,54 @@ void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) { } } +void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; + uint8_t a = src_argb[3]; + dst_abgr[0] = r; + dst_abgr[1] = g; + dst_abgr[2] = b; + dst_abgr[3] = a; + dst_abgr += 4; + src_argb += 4; + } +} + +void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; + uint8_t a = src_argb[3]; + dst_bgra[0] = a; + dst_bgra[1] = r; + dst_bgra[2] = g; + dst_bgra[3] = b; + dst_bgra += 4; + src_argb += 4; + } +} + +void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; + uint8_t a = src_argb[3]; + dst_rgba[0] = a; + dst_rgba[1] = b; + dst_rgba[2] = g; + dst_rgba[3] = r; + dst_rgba += 4; + src_argb += 4; + } +} + void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width; ++x) { @@ -309,6 +357,22 @@ void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { } } +void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t a = src_rgba[0]; + uint8_t b = src_rgba[1]; + uint8_t g = src_rgba[2]; + uint8_t r = src_rgba[3]; + dst_argb[0] = b; + dst_argb[1] = g; + dst_argb[2] = r; + dst_argb[3] = a; + dst_argb += 4; + src_rgba += 4; + } +} + void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width - 1; x += 2) { @@ -517,6 +581,22 @@ void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) { } } +void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) { + int x; + for (x = 0; x < width; ++x) { + uint16_t b = src_ar64[0]; + uint16_t g = src_ar64[1]; + uint16_t r = src_ar64[2]; + uint16_t a = src_ar64[3]; + dst_ab64[0] = r; + dst_ab64[1] = g; + dst_ab64[2] = b; + dst_ab64[3] = a; + dst_ab64 += 4; + src_ar64 += 4; + } +} + // TODO(fbarchard): Make shuffle compatible with SIMD versions void AR64ShuffleRow_C(const uint8_t* src_ar64, uint8_t* dst_ar64, diff --git a/source/row_lasx.cc b/source/row_lasx.cc index 1082ad80..be85022e 100644 --- a/source/row_lasx.cc +++ b/source/row_lasx.cc @@ -543,8 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y, __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; __m256i vec_ubvr, vec_ugvg; __m256i const_0x80 = __lasx_xvldi(0x80); - __m256i alpha = {0xF000F000F000F000, 0xF000F000F000F000, 0xF000F000F000F000, - 0xF000F000F000F000}; + __m256i alpha = (__m256i)v4u64{0xF000F000F000F000, 0xF000F000F000F000, + 0xF000F000F000F000, 0xF000F000F000F000}; __m256i mask = {0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0}; @@ -595,8 +595,8 @@ void I422ToARGB1555Row_LASX(const uint8_t* src_y, __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; __m256i vec_ubvr, vec_ugvg; __m256i const_0x80 = __lasx_xvldi(0x80); - __m256i alpha = {0x8000800080008000, 0x8000800080008000, 0x8000800080008000, - 0x8000800080008000}; + __m256i alpha = (__m256i)v4u64{0x8000800080008000, 0x8000800080008000, + 0x8000800080008000, 0x8000800080008000}; YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); vec_ubvr = __lasx_xvilvl_h(vec_ub, vec_vr); @@ -799,8 +799,8 @@ void ARGBToUVRow_LASX(const uint8_t* src_argb0, 0x0009000900090009, 0x0009000900090009}; __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003}; - __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64, @@ -1037,8 +1037,8 @@ void ARGBToUV444Row_LASX(const uint8_t* src_argb, __m256i const_38 = __lasx_xvldi(38); __m256i const_94 = __lasx_xvldi(94); __m256i const_18 = __lasx_xvldi(18); - __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003}; for (x = 0; x < len; x++) { @@ -1609,8 +1609,8 @@ void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555, __m256i const_38 = __lasx_xvldi(0x413); __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lasx_xvld, src_argb1555, 0, src_argb1555, 32, next_argb1555, 0, @@ -1726,8 +1726,8 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565, __m256i const_38 = __lasx_xvldi(0x413); __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lasx_xvld, src_rgb565, 0, src_rgb565, 32, next_rgb565, 0, @@ -1793,8 +1793,8 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24, __m256i const_38 = __lasx_xvldi(0x413); __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; __m256i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18, 0x15120F0C09060300, 0x00000000001E1B18}; __m256i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908, @@ -1856,8 +1856,8 @@ void RAWToUVRow_LASX(const uint8_t* src_raw, __m256i const_38 = __lasx_xvldi(0x413); __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; __m256i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18, 0x15120F0C09060300, 0x00000000001E1B18}; __m256i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908, @@ -2242,8 +2242,8 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb, __m256i const_21 = __lasx_xvldi(0x415); __m256i const_53 = __lasx_xvldi(0x435); __m256i const_10 = __lasx_xvldi(0x40A); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; __m256i shuff = {0x1614060412100200, 0x1E1C0E0C1A180A08, 0x1715070513110301, 0x1F1D0F0D1B190B09}; diff --git a/source/row_lsx.cc b/source/row_lsx.cc index e626072a..fa088c9e 100644 --- a/source/row_lsx.cc +++ b/source/row_lsx.cc @@ -565,7 +565,7 @@ void I422ToARGB4444Row_LSX(const uint8_t* src_y, __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; __m128i vec_ubvr, vec_ugvg; __m128i const_80 = __lsx_vldi(0x80); - __m128i alpha = {0xF000F000F000F000, 0xF000F000F000F000}; + __m128i alpha = (__m128i)v2u64{0xF000F000F000F000, 0xF000F000F000F000}; __m128i mask = {0x00F000F000F000F0, 0x00F000F000F000F0}; YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb); @@ -612,7 +612,7 @@ void I422ToARGB1555Row_LSX(const uint8_t* src_y, __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; __m128i vec_ubvr, vec_ugvg; __m128i const_80 = __lsx_vldi(0x80); - __m128i alpha = {0x8000800080008000, 0x8000800080008000}; + __m128i alpha = (__m128i)v2u64{0x8000800080008000, 0x8000800080008000}; YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb); vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr); @@ -792,7 +792,7 @@ void ARGBToUVRow_LSX(const uint8_t* src_argb0, __m128i const_0x26 = {0x0013001300130013, 0x0013001300130013}; __m128i const_0x5E = {0x002f002f002f002f, 0x002f002f002f002f}; __m128i const_0x12 = {0x0009000900090009, 0x0009000900090009}; - __m128i const_0x8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_0x8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb0, 0, src_argb0, 16, src_argb0, 32, src_argb0, 48, src0, src1, src2, src3); @@ -991,7 +991,7 @@ void ARGBToUV444Row_LSX(const uint8_t* src_argb, __m128i const_38 = __lsx_vldi(38); __m128i const_94 = __lsx_vldi(94); __m128i const_18 = __lsx_vldi(18); - __m128i const_0x8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_0x8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, src0, src1, src2, src3); @@ -1533,7 +1533,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb1555, 0, src_argb1555, 16, next_argb1555, 0, @@ -1642,7 +1642,7 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_rgb565, 0, src_rgb565, 16, next_rgb565, 0, @@ -1703,7 +1703,7 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; __m128i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18}; __m128i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908}; __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19}; @@ -1756,7 +1756,7 @@ void RAWToUVRow_LSX(const uint8_t* src_raw, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; __m128i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18}; __m128i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908}; __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19}; @@ -1991,7 +1991,7 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48, @@ -2039,7 +2039,7 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48, @@ -2087,7 +2087,7 @@ void RGBAToUVRow_LSX(const uint8_t* src_rgba, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48, @@ -2136,7 +2136,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb, __m128i const_21 = __lsx_vldi(0x415); __m128i const_53 = __lsx_vldi(0x435); __m128i const_10 = __lsx_vldi(0x40A); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, @@ -2514,7 +2514,7 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb, __m128i const_256 = __lsx_vldi(0x500); __m128i zero = __lsx_vldi(0); __m128i alpha = __lsx_vldi(0xFF); - __m128i control = {0xFF000000FF000000, 0xFF000000FF000000}; + __m128i control = (__m128i)v2u64{0xFF000000FF000000, 0xFF000000FF000000}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb1, 0, src_argb1, 16, @@ -2560,7 +2560,7 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb, __m128i vec_offset = __lsx_vreplgr2vr_b(interval_offset); __m128i vec_scale = __lsx_vreplgr2vr_w(scale); __m128i zero = __lsx_vldi(0); - __m128i control = {0xFF000000FF000000, 0xFF000000FF000000}; + __m128i control = (__m128i)v2u64{0xFF000000FF000000, 0xFF000000FF000000}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, dst_argb, 32, dst_argb, 48, diff --git a/source/row_rvv.cc b/source/row_rvv.cc index c875be2f..0bf2bef6 100644 --- a/source/row_rvv.cc +++ b/source/row_rvv.cc @@ -200,6 +200,23 @@ void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) { } #endif +#ifdef HAS_AR64TOAB64ROW_RVV +void AR64ToAB64Row_RVV(const uint16_t* src_ar64, + uint16_t* dst_ab64, + int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e16m2(w); + vuint16m2_t v_b, v_g, v_r, v_a; + __riscv_vlseg4e16_v_u16m2(&v_b, &v_g, &v_r, &v_a, src_ar64, vl); + __riscv_vsseg4e16_v_u16m2(dst_ab64, v_r, v_g, v_b, v_a, vl); + w -= vl; + src_ar64 += vl * 4; + dst_ab64 += vl * 4; + } while (w > 0); +} +#endif + #ifdef HAS_AB64TOARGBROW_RVV void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) { size_t avl = (size_t)width; @@ -301,6 +318,66 @@ void ARGBToRGB24Row_RVV(const uint8_t* src_argb, } #endif +#ifdef HAS_ARGBTOABGRROW_RVV +void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a, v_r, v_g, v_b; + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vsseg4e8_v_u8m2(dst_abgr, v_r, v_g, v_b, v_a, vl); + w -= vl; + src_argb += vl * 4; + dst_abgr += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBTOBGRAROW_RVV +void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_bgra, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a, v_r, v_g, v_b; + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vsseg4e8_v_u8m2(dst_bgra, v_a, v_r, v_g, v_b, vl); + w -= vl; + src_argb += vl * 4; + dst_bgra += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBTORGBAROW_RVV +void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a, v_r, v_g, v_b; + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl); + w -= vl; + src_argb += vl * 4; + dst_rgba += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_RGBATOARGBROW_RVV +void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a, v_r, v_g, v_b; + __riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_rgba += vl * 4; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + #ifdef HAS_RGB24TOARGBROW_RVV void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_argb, diff --git a/source/scale.cc b/source/scale.cc index 43d973af..b7a602ba 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -939,14 +939,14 @@ static void ScaleAddCols1_16_C(int dst_width, // one pixel of destination using fixed point (16.16) to step // through source, sampling a box of pixel with simple // averaging. -static void ScalePlaneBox(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr) { +static int ScalePlaneBox(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { int j, k; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -960,6 +960,8 @@ static void ScalePlaneBox(int src_width, { // Allocate a row buffer of uint16_t. align_buffer_64(row16, src_width * 2); + if (!row16) + return 1; void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, const uint16_t* src_ptr, uint8_t* dst_ptr) = (dx & 0xffff) ? ScaleAddCols2_C @@ -1031,16 +1033,17 @@ static void ScalePlaneBox(int src_width, } free_aligned_buffer_64(row16); } + return 0; } -static void ScalePlaneBox_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static int ScalePlaneBox_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { int j, k; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1054,6 +1057,8 @@ static void ScalePlaneBox_16(int src_width, { // Allocate a row buffer of uint32_t. align_buffer_64(row32, src_width * 4); + if (!row32) + return 1; void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, const uint32_t* src_ptr, uint16_t* dst_ptr) = (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C; @@ -1085,18 +1090,19 @@ static void ScalePlaneBox_16(int src_width, } free_aligned_buffer_64(row32); } + return 0; } // Scale plane down with bilinear interpolation. -static void ScalePlaneBilinearDown(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr, - enum FilterMode filtering) { +static int ScalePlaneBilinearDown(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr, + enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -1105,6 +1111,8 @@ static void ScalePlaneBilinearDown(int src_width, // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row buffer. align_buffer_64(row, src_width); + if (!row) + return 1; const int max_y = (src_height - 1) << 16; int j; @@ -1214,17 +1222,18 @@ static void ScalePlaneBilinearDown(int src_width, } } free_aligned_buffer_64(row); + return 0; } -static void ScalePlaneBilinearDown_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr, - enum FilterMode filtering) { +static int ScalePlaneBilinearDown_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr, + enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -1233,6 +1242,8 @@ static void ScalePlaneBilinearDown_16(int src_width, // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row buffer. align_buffer_64(row, src_width * 2); + if (!row) + return 1; const int max_y = (src_height - 1) << 16; int j; @@ -1305,18 +1316,19 @@ static void ScalePlaneBilinearDown_16(int src_width, } } free_aligned_buffer_64(row); + return 0; } // Scale up down with bilinear interpolation. -static void ScalePlaneBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr, - enum FilterMode filtering) { +static int ScalePlaneBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr, + enum FilterMode filtering) { int j; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1415,6 +1427,8 @@ static void ScalePlaneBilinearUp(int src_width, // Allocate 2 row buffers. const int row_size = (dst_width + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; uint8_t* rowptr = row; int rowstride = row_size; @@ -1458,6 +1472,7 @@ static void ScalePlaneBilinearUp(int src_width, } free_aligned_buffer_64(row); } + return 0; } // Scale plane, horizontally up by 2 times. @@ -1479,6 +1494,7 @@ static void ScalePlaneUp2_Linear(int src_width, int y; int dy; + (void)src_width; // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); @@ -1542,6 +1558,7 @@ static void ScalePlaneUp2_Bilinear(int src_width, ScaleRowUp2_Bilinear_Any_C; int x; + (void)src_width; // This function can only scale up by 2 times. assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); @@ -1608,6 +1625,7 @@ static void ScalePlaneUp2_12_Linear(int src_width, int y; int dy; + (void)src_width; // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); @@ -1661,6 +1679,7 @@ static void ScalePlaneUp2_12_Bilinear(int src_width, ScaleRowUp2_Bilinear_16_Any_C; int x; + (void)src_width; // This function can only scale up by 2 times. assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); @@ -1709,6 +1728,7 @@ static void ScalePlaneUp2_16_Linear(int src_width, int y; int dy; + (void)src_width; // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); @@ -1757,6 +1777,7 @@ static void ScalePlaneUp2_16_Bilinear(int src_width, ScaleRowUp2_Bilinear_16_Any_C; int x; + (void)src_width; // This function can only scale up by 2 times. assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); @@ -1791,15 +1812,15 @@ static void ScalePlaneUp2_16_Bilinear(int src_width, } } -static void ScalePlaneBilinearUp_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr, - enum FilterMode filtering) { +static int ScalePlaneBilinearUp_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr, + enum FilterMode filtering) { int j; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1876,10 +1897,11 @@ static void ScalePlaneBilinearUp_16(int src_width, // Allocate 2 row buffers. const int row_size = (dst_width + 31) & ~31; align_buffer_64(row, row_size * 4); - - uint16_t* rowptr = (uint16_t*)row; int rowstride = row_size; int lasty = yi; + uint16_t* rowptr = (uint16_t*)row; + if (!row) + return 1; ScaleFilterCols(rowptr, src, dst_width, x, dx); if (src_height > 1) { @@ -1919,6 +1941,7 @@ static void ScalePlaneBilinearUp_16(int src_width, } free_aligned_buffer_64(row); } + return 0; } // Scale Plane to/from any dimensions, without interpolation. @@ -2003,15 +2026,15 @@ static void ScalePlaneSimple_16(int src_width, // Scale a plane. // This function dispatches to a specialized scaler based on scale factor. LIBYUV_API -void ScalePlane(const uint8_t* src, - int src_stride, - int src_width, - int src_height, - uint8_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering) { +int ScalePlane(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering) { // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); @@ -2027,7 +2050,7 @@ void ScalePlane(const uint8_t* src, if (dst_width == src_width && dst_height == src_height) { // Straight copy. CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); - return; + return 0; } if (dst_width == src_width && filtering != kFilterBox) { int dy = 0; @@ -2043,7 +2066,7 @@ void ScalePlane(const uint8_t* src, // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical(src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering); - return; + return 0; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { // Scale down. @@ -2051,69 +2074,67 @@ void ScalePlane(const uint8_t* src, // optimized, 3/4 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } if (2 * dst_width == src_width && 2 * dst_height == src_height) { // optimized, 1/2 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } // 3/8 rounded up for odd sized chroma height. if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { // optimized, 3/8 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } if (4 * dst_width == src_width && 4 * dst_height == src_height && (filtering == kFilterBox || filtering == kFilterNone)) { // optimized, 1/4 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } } if (filtering == kFilterBox && dst_height * 2 < src_height) { - ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride, - dst_stride, src, dst); - return; + return ScalePlaneBox(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst); } if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && (filtering == kFilterBilinear || filtering == kFilterBox)) { ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if (filtering && dst_height > src_height) { - ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; + return ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); } if (filtering) { - ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; + return ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); } ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); + return 0; } LIBYUV_API -void ScalePlane_16(const uint16_t* src, - int src_stride, - int src_width, - int src_height, - uint16_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering) { +int ScalePlane_16(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering) { // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); @@ -2129,7 +2150,7 @@ void ScalePlane_16(const uint16_t* src, if (dst_width == src_width && dst_height == src_height) { // Straight copy. CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height); - return; + return 0; } if (dst_width == src_width && filtering != kFilterBox) { int dy = 0; @@ -2148,7 +2169,7 @@ void ScalePlane_16(const uint16_t* src, // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering); - return; + return 0; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { // Scale down. @@ -2156,69 +2177,68 @@ void ScalePlane_16(const uint16_t* src, // optimized, 3/4 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } if (2 * dst_width == src_width && 2 * dst_height == src_height) { // optimized, 1/2 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } // 3/8 rounded up for odd sized chroma height. if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { // optimized, 3/8 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } if (4 * dst_width == src_width && 4 * dst_height == src_height && (filtering == kFilterBox || filtering == kFilterNone)) { // optimized, 1/4 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } } if (filtering == kFilterBox && dst_height * 2 < src_height) { - ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride, - dst_stride, src, dst); - return; + return ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst); } if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && (filtering == kFilterBilinear || filtering == kFilterBox)) { ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if (filtering && dst_height > src_height) { - ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; + return ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); } if (filtering) { - ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; + return ScalePlaneBilinearDown_16(src_width, src_height, dst_width, + dst_height, src_stride, dst_stride, src, + dst, filtering); } ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); + return 0; } LIBYUV_API -void ScalePlane_12(const uint16_t* src, - int src_stride, - int src_width, - int src_height, - uint16_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering) { +int ScalePlane_12(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering) { // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); @@ -2233,17 +2253,17 @@ void ScalePlane_12(const uint16_t* src, if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { ScalePlaneUp2_12_Linear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && (filtering == kFilterBilinear || filtering == kFilterBox)) { ScalePlaneUp2_12_Bilinear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } - ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride, - dst_width, dst_height, filtering); + return ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride, + dst_width, dst_height, filtering); } // Scale an I420 image. @@ -2271,6 +2291,7 @@ int I420Scale(const uint8_t* src_y, int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2278,13 +2299,19 @@ int I420Scale(const uint8_t* src_y, return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, - dst_stride_u, dst_halfwidth, dst_halfheight, filtering); - ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, - dst_stride_v, dst_halfwidth, dst_halfheight, filtering); - return 0; + r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, + dst_stride_u, dst_halfwidth, dst_halfheight, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, + dst_stride_v, dst_halfwidth, dst_halfheight, filtering); + return r; } LIBYUV_API @@ -2309,6 +2336,7 @@ int I420Scale_16(const uint16_t* src_y, int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2316,13 +2344,19 @@ int I420Scale_16(const uint16_t* src_y, return -1; } - ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, - dst_stride_u, dst_halfwidth, dst_halfheight, filtering); - ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, - dst_stride_v, dst_halfwidth, dst_halfheight, filtering); - return 0; + r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, + dst_stride_u, dst_halfwidth, dst_halfheight, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, + dst_stride_v, dst_halfwidth, dst_halfheight, filtering); + return r; } LIBYUV_API @@ -2347,6 +2381,7 @@ int I420Scale_12(const uint16_t* src_y, int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2354,13 +2389,19 @@ int I420Scale_12(const uint16_t* src_y, return -1; } - ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, - dst_stride_u, dst_halfwidth, dst_halfheight, filtering); - ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, - dst_stride_v, dst_halfwidth, dst_halfheight, filtering); - return 0; + r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, + dst_stride_u, dst_halfwidth, dst_halfheight, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, + dst_stride_v, dst_halfwidth, dst_halfheight, filtering); + return r; } // Scale an I444 image. @@ -2384,19 +2425,27 @@ int I444Scale(const uint8_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { + int r; + if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u, - dst_width, dst_height, filtering); - ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v, - dst_width, dst_height, filtering); - return 0; + r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, + dst_stride_u, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, + dst_stride_v, dst_width, dst_height, filtering); + return r; } LIBYUV_API @@ -2417,19 +2466,27 @@ int I444Scale_16(const uint16_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { + int r; + if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { return -1; } - ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u, - dst_width, dst_height, filtering); - ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v, - dst_width, dst_height, filtering); - return 0; + r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, + dst_stride_u, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, + dst_stride_v, dst_width, dst_height, filtering); + return r; } LIBYUV_API @@ -2450,19 +2507,27 @@ int I444Scale_12(const uint16_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { + int r; + if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { return -1; } - ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u, - dst_width, dst_height, filtering); - ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v, - dst_width, dst_height, filtering); - return 0; + r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u, + dst_stride_u, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v, + dst_stride_v, dst_width, dst_height, filtering); + return r; } // Scale an I422 image. @@ -2488,6 +2553,7 @@ int I422Scale(const uint8_t* src_y, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2495,13 +2561,19 @@ int I422Scale(const uint8_t* src_y, return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u, - dst_stride_u, dst_halfwidth, dst_height, filtering); - ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v, - dst_stride_v, dst_halfwidth, dst_height, filtering); - return 0; + r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u, + dst_stride_u, dst_halfwidth, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v, + dst_stride_v, dst_halfwidth, dst_height, filtering); + return r; } LIBYUV_API @@ -2524,6 +2596,7 @@ int I422Scale_16(const uint16_t* src_y, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2531,13 +2604,19 @@ int I422Scale_16(const uint16_t* src_y, return -1; } - ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u, - dst_stride_u, dst_halfwidth, dst_height, filtering); - ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v, - dst_stride_v, dst_halfwidth, dst_height, filtering); - return 0; + r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u, + dst_stride_u, dst_halfwidth, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v, + dst_stride_v, dst_halfwidth, dst_height, filtering); + return r; } LIBYUV_API @@ -2560,6 +2639,7 @@ int I422Scale_12(const uint16_t* src_y, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2567,13 +2647,19 @@ int I422Scale_12(const uint16_t* src_y, return -1; } - ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u, - dst_stride_u, dst_halfwidth, dst_height, filtering); - ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v, - dst_stride_v, dst_halfwidth, dst_height, filtering); - return 0; + r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u, + dst_stride_u, dst_halfwidth, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v, + dst_stride_v, dst_halfwidth, dst_height, filtering); + return r; } // Scale an NV12 image. @@ -2597,6 +2683,7 @@ int NV12Scale(const uint8_t* src_y, int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; if (!src_y || !src_uv || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv || @@ -2604,11 +2691,14 @@ int NV12Scale(const uint8_t* src_y, return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv, - dst_stride_uv, dst_halfwidth, dst_halfheight, filtering); - return 0; + r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv, + dst_stride_uv, dst_halfwidth, dst_halfheight, filtering); + return r; } // Deprecated api diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 1d5c1b60..18bdeb86 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -151,22 +151,27 @@ static void ScaleARGBDown2(int src_width, // ScaleARGB ARGB, 1/4 // This is an optimized version for scaling down a ARGB to 1/4 of // its original size. -static void ScaleARGBDown4Box(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_argb, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy) { +static int ScaleARGBDown4Box(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy) { int j; // Allocate 2 rows of ARGB. const int row_size = (dst_width * 2 * 4 + 31) & ~31; + // TODO(fbarchard): Remove this row buffer and implement a ScaleARGBRowDown4 + // but implemented via a 2 pass wrapper that uses a very small array on the + // stack with a horizontal loop. align_buffer_64(row, row_size * 2); + if (!row) + return 1; int row_stride = src_stride * (dy >> 16); void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) = @@ -209,6 +214,7 @@ static void ScaleARGBDown4Box(int src_width, dst_argb += dst_stride; } free_aligned_buffer_64(row); + return 0; } // ScaleARGB ARGB Even @@ -278,10 +284,14 @@ static void ScaleARGBDownEven(int src_width, } } #endif +#if defined(HAS_SCALEARGBROWDOWNEVENBOX_RVV) + if (filtering && TestCpuFlag(kCpuHasRVV)) { + ScaleARGBRowDownEven = ScaleARGBRowDownEvenBox_RVV; + } +#endif #if defined(HAS_SCALEARGBROWDOWNEVEN_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ScaleARGBRowDownEven = - filtering ? ScaleARGBRowDownEvenBox_RVV : ScaleARGBRowDownEven_RVV; + if (!filtering && TestCpuFlag(kCpuHasRVV)) { + ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV; } #endif @@ -296,19 +306,19 @@ static void ScaleARGBDownEven(int src_width, } // Scale ARGB down with bilinear interpolation. -static void ScaleARGBBilinearDown(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_argb, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleARGBBilinearDown(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, @@ -407,6 +417,8 @@ static void ScaleARGBBilinearDown(int src_width, // Allocate a row of ARGB. { align_buffer_64(row, clip_src_width * 4); + if (!row) + return 1; const int max_y = (src_height - 1) << 16; if (y > max_y) { @@ -430,22 +442,23 @@ static void ScaleARGBBilinearDown(int src_width, } free_aligned_buffer_64(row); } + return 0; } // Scale ARGB up with bilinear interpolation. -static void ScaleARGBBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_argb, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleARGBBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, @@ -581,6 +594,8 @@ static void ScaleARGBBilinearUp(int src_width, // Allocate 2 rows of ARGB. const int row_size = (dst_width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; uint8_t* rowptr = row; int rowstride = row_size; @@ -624,27 +639,28 @@ static void ScaleARGBBilinearUp(int src_width, } free_aligned_buffer_64(row); } + return 0; } #ifdef YUVSCALEUP // Scale YUV to ARGB up with bilinear interpolation. -static void ScaleYUVToARGBBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride_y, - int src_stride_u, - int src_stride_v, - int dst_stride_argb, - const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleYUVToARGBBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride_y, + int src_stride_u, + int src_stride_v, + int dst_stride_argb, + const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, int width) = @@ -846,16 +862,17 @@ static void ScaleYUVToARGBBilinearUp(int src_width, const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u; const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v; - // Allocate 2 rows of ARGB. + // Allocate 1 row of ARGB for source conversion and 2 rows of ARGB + // scaled horizontally to the destination width. const int row_size = (dst_width * 4 + 31) & ~31; - align_buffer_64(row, row_size * 2); - - // Allocate 1 row of ARGB for source conversion. - align_buffer_64(argb_row, src_width * 4); + align_buffer_64(row, row_size * 2 + src_width * 4); + uint8_t* argb_row = row + row_size * 2; uint8_t* rowptr = row; int rowstride = row_size; int lasty = yi; + if (!row) + return 1; // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); @@ -910,7 +927,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, y += dy; } free_aligned_buffer_64(row); - free_aligned_buffer_64(row_argb); + return 0; } #endif @@ -985,19 +1002,19 @@ static void ScaleARGBSimple(int src_width, // ScaleARGB a ARGB. // This function in turn calls a scaling function // suitable for handling the desired resolutions. -static void ScaleARGB(const uint8_t* src, - int src_stride, - int src_width, - int src_height, - uint8_t* dst, - int dst_stride, - int dst_width, - int dst_height, - int clip_x, - int clip_y, - int clip_width, - int clip_height, - enum FilterMode filtering) { +static int ScaleARGB(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, + enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -1042,18 +1059,18 @@ static void ScaleARGB(const uint8_t* src, ScaleARGBDown2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); - return; + return 0; } if (dx == 0x40000 && filtering == kFilterBox) { // Optimized 1/4 box downsample. - ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy); - return; + return ScaleARGBDown4Box(src_width, src_height, clip_width, + clip_height, src_stride, dst_stride, src, + dst, x, dx, y, dy); } ScaleARGBDownEven(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); - return; + return 0; } // Optimized odd scale down. ie 3, 5, 7, 9x. if ((dx & 0x10000) && (dy & 0x10000)) { @@ -1062,7 +1079,7 @@ static void ScaleARGB(const uint8_t* src, // Straight copy. ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4, src_stride, dst, dst_stride, clip_width, clip_height); - return; + return 0; } } } @@ -1071,22 +1088,21 @@ static void ScaleARGB(const uint8_t* src, // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical(src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, y, dy, /*bpp=*/4, filtering); - return; + return 0; } if (filtering && dy < 65536) { - ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy, - filtering); - return; + return ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); } if (filtering) { - ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy, - filtering); - return; + return ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); } ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy); + return 0; } LIBYUV_API @@ -1110,10 +1126,9 @@ int ARGBScaleClip(const uint8_t* src_argb, (clip_y + clip_height) > dst_height) { return -1; } - ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, - dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width, - clip_height, filtering); - return 0; + return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, + dst_stride_argb, dst_width, dst_height, clip_x, clip_y, + clip_width, clip_height, filtering); } // Scale an ARGB image. @@ -1131,10 +1146,9 @@ int ARGBScale(const uint8_t* src_argb, src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) { return -1; } - ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, - dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height, - filtering); - return 0; + return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, + dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, + dst_height, filtering); } // Scale with YUV conversion to ARGB and clipping. @@ -1158,8 +1172,11 @@ int YUVToARGBScaleClip(const uint8_t* src_y, int clip_width, int clip_height, enum FilterMode filtering) { - uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4); int r; + uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4); + if (!argb_buffer) { + return 1; // Out of memory runtime error. + } (void)src_fourcc; // TODO(fbarchard): implement and/or assert. (void)dst_fourcc; I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, diff --git a/source/scale_rvv.cc b/source/scale_rvv.cc index fd14842d..de037e45 100644 --- a/source/scale_rvv.cc +++ b/source/scale_rvv.cc @@ -130,6 +130,7 @@ void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb, } #endif +#ifdef HAS_SCALEARGBROWDOWNEVEN_RVV void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, @@ -148,6 +149,7 @@ void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb, dst += vl; } while (w > 0); } +#endif #ifdef HAS_SCALEARGBROWDOWNEVENBOX_RVV void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb, diff --git a/source/scale_uv.cc b/source/scale_uv.cc index 536b9436..0931c89a 100644 --- a/source/scale_uv.cc +++ b/source/scale_uv.cc @@ -188,22 +188,24 @@ static void ScaleUVDown2(int src_width, // This is an optimized version for scaling down a UV to 1/4 of // its original size. #if HAS_SCALEUVDOWN4BOX -static void ScaleUVDown4Box(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_uv, - uint8_t* dst_uv, - int x, - int dx, - int y, - int dy) { +static int ScaleUVDown4Box(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_uv, + uint8_t* dst_uv, + int x, + int dx, + int y, + int dy) { int j; // Allocate 2 rows of UV. const int row_size = (dst_width * 2 * 2 + 15) & ~15; align_buffer_64(row, row_size * 2); + if (!row) + return 1; int row_stride = src_stride * (dy >> 16); void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride, uint8_t* dst_uv, int dst_width) = @@ -255,6 +257,7 @@ static void ScaleUVDown4Box(int src_width, dst_uv += dst_stride; } free_aligned_buffer_64(row); + return 0; } #endif // HAS_SCALEUVDOWN4BOX @@ -344,19 +347,19 @@ static void ScaleUVDownEven(int src_width, // Scale UV down with bilinear interpolation. #if HAS_SCALEUVBILINEARDOWN -static void ScaleUVBilinearDown(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_uv, - uint8_t* dst_uv, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleUVBilinearDown(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_uv, + uint8_t* dst_uv, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv, ptrdiff_t src_stride, int dst_width, @@ -446,9 +449,10 @@ static void ScaleUVBilinearDown(int src_width, // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row of UV. { - align_buffer_64(row, clip_src_width * 2); - const int max_y = (src_height - 1) << 16; + align_buffer_64(row, clip_src_width * 2); + if (!row) + return 1; if (y > max_y) { y = max_y; } @@ -470,24 +474,25 @@ static void ScaleUVBilinearDown(int src_width, } free_aligned_buffer_64(row); } + return 0; } #endif // Scale UV up with bilinear interpolation. #if HAS_SCALEUVBILINEARUP -static void ScaleUVBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_uv, - uint8_t* dst_uv, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleUVBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_uv, + uint8_t* dst_uv, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv, ptrdiff_t src_stride, int dst_width, @@ -606,6 +611,8 @@ static void ScaleUVBilinearUp(int src_width, // Allocate 2 rows of UV. const int row_size = (dst_width * 2 + 15) & ~15; align_buffer_64(row, row_size * 2); + if (!row) + return 1; uint8_t* rowptr = row; int rowstride = row_size; @@ -649,6 +656,7 @@ static void ScaleUVBilinearUp(int src_width, } free_aligned_buffer_64(row); } + return 0; } #endif // HAS_SCALEUVBILINEARUP @@ -984,19 +992,19 @@ static int UVCopy_16(const uint16_t* src_uv, // Scale a UV plane (from NV12) // This function in turn calls a scaling function // suitable for handling the desired resolutions. -static void ScaleUV(const uint8_t* src, - int src_stride, - int src_width, - int src_height, - uint8_t* dst, - int dst_stride, - int dst_width, - int dst_height, - int clip_x, - int clip_y, - int clip_width, - int clip_height, - enum FilterMode filtering) { +static int ScaleUV(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, + enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -1042,22 +1050,22 @@ static void ScaleUV(const uint8_t* src, ScaleUVDown2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); - return; + return 0; } #endif #if HAS_SCALEUVDOWN4BOX if (dx == 0x40000 && filtering == kFilterBox) { // Optimized 1/4 box downsample. - ScaleUVDown4Box(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy); - return; + return ScaleUVDown4Box(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, + dy); } #endif #if HAS_SCALEUVDOWNEVEN ScaleUVDownEven(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); - return; + return 0; #endif } // Optimized odd scale down. ie 3, 5, 7, 9x. @@ -1068,7 +1076,7 @@ static void ScaleUV(const uint8_t* src, // Straight copy. UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2, src_stride, dst, dst_stride, clip_width, clip_height); - return; + return 0; } #endif } @@ -1079,38 +1087,37 @@ static void ScaleUV(const uint8_t* src, // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical(src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, y, dy, /*bpp=*/2, filtering); - return; + return 0; } if ((filtering == kFilterLinear) && ((dst_width + 1) / 2 == src_width)) { ScaleUVLinearUp2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst); - return; + return 0; } if ((clip_height + 1) / 2 == src_height && (clip_width + 1) / 2 == src_width && (filtering == kFilterBilinear || filtering == kFilterBox)) { ScaleUVBilinearUp2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst); - return; + return 0; } #if HAS_SCALEUVBILINEARUP if (filtering && dy < 65536) { - ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy, - filtering); - return; + return ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); } #endif #if HAS_SCALEUVBILINEARDOWN if (filtering) { - ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy, - filtering); - return; + return ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); } #endif ScaleUVSimple(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy); + return 0; } // Scale an UV image. @@ -1128,9 +1135,9 @@ int UVScale(const uint8_t* src_uv, src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) { return -1; } - ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, dst_stride_uv, - dst_width, dst_height, 0, 0, dst_width, dst_height, filtering); - return 0; + return ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, + dst_stride_uv, dst_width, dst_height, 0, 0, dst_width, + dst_height, filtering); } // Scale a 16 bit UV image. diff --git a/unit_test/convert_argb_test.cc b/unit_test/convert_argb_test.cc new file mode 100644 index 00000000..aeee8a7f --- /dev/null +++ b/unit_test/convert_argb_test.cc @@ -0,0 +1,2700 @@ +/* + * Copyright 2023 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> +#include <stdlib.h> +#include <time.h> + +#include "libyuv/basic_types.h" +#include "libyuv/compare.h" +#include "libyuv/convert.h" +#include "libyuv/convert_argb.h" +#include "libyuv/convert_from.h" +#include "libyuv/convert_from_argb.h" +#include "libyuv/cpu_id.h" +#ifdef HAVE_JPEG +#include "libyuv/mjpeg_decoder.h" +#endif +#include "../unit_test/unit_test.h" +#include "libyuv/planar_functions.h" +#include "libyuv/rotate.h" +#include "libyuv/video_common.h" + +#ifdef ENABLE_ROW_TESTS +#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */ +#endif + +#if defined(__riscv) && !defined(__clang__) +#define DISABLE_SLOW_TESTS +#undef ENABLE_FULL_TESTS +#undef ENABLE_ROW_TESTS +#define LEAN_TESTS +#endif + +// Some functions fail on big endian. Enable these tests on all cpus except +// PowerPC, but they are not optimized so disabled by default. +#if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__) +#define LITTLE_ENDIAN_ONLY_TEST 1 +#endif +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +// SLOW TESTS are those that are unoptimized C code. +// FULL TESTS are optimized but test many variations of the same code. +#define ENABLE_FULL_TESTS +#endif + +namespace libyuv { + +// Alias to copy pixels as is +#define AR30ToAR30 ARGBCopy +#define ABGRToABGR ARGBCopy + +// subsample amount uses a divide. +#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) + +#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) + +#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ + const int kPaddedHeight = \ + (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ + const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ + align_buffer_page_end( \ + src_uv, kSrcHalfPaddedWidth* kSrcHalfPaddedHeight* SRC_BPC * 2 + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ + SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \ + for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \ + src_y_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \ + src_uv_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \ + reinterpret_cast<DST_T*>(dst_y_c), kWidth, \ + reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \ + reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \ + NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \ + reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \ + reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \ + reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ + } \ + for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_u_c); \ + free_aligned_buffer_page_end(dst_v_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_u_opt); \ + free_aligned_buffer_page_end(dst_v_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) +#else +#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) +#endif + +TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) +TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1) +TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1) + +// Provide matrix wrappers for full range bt.709 +#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) +#define F420ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) +#define F422ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) +#define F422ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) +#define F444ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) +#define F444ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) + +// Provide matrix wrappers for full range bt.2020 +#define V420ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j) +#define V420ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) +#define V422ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j) +#define V422ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) +#define V444ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j) +#define V444ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) + +#define I420ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ + I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I422ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ + I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) + +#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + double time0 = get_time(); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, dst_argb_c + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + double time1 = get_time(); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, dst_argb_opt + OFF, \ + kStrideB, kWidth, NEG kHeight); \ + } \ + double time2 = get_time(); \ + printf(" %8d us C - %8d us OPT\n", \ + static_cast<int>((time1 - time0) * 1e6), \ + static_cast<int>((time2 - time1) * 1e6 / benchmark_iterations_)); \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ + 1, _Any, +, 0) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Unaligned, +, 4) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Invert, -, 0) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0) +#else +#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0) +#endif + +#if defined(ENABLE_FULL_TESTS) +TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(J420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(J420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(F420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(F420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(H420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(H420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(U420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(U420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(V420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(V420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1) +TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1) +TESTPLANARTOB(J420, 2, 2, RAW, 3, 3, 1) +TESTPLANARTOB(J420, 2, 2, RGB24, 3, 3, 1) +TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1) +TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1) +TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1) +TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1) +TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1) +TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1) +TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1) +#endif +TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(J422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(J422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(U422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(U422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) +TESTPLANARTOB(I422, 1, 1, RGB24, 3, 3, 1) +TESTPLANARTOB(I422, 1, 1, RAW, 3, 3, 1) +TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I444, 1, 1, RGB24, 3, 3, 1) +TESTPLANARTOB(I444, 1, 1, RAW, 3, 3, 1) +TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(H444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(U444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(U444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(V444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(V444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) +TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) +TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) +TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1) +TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, AB30, 4, 4, 1) +TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1) +#endif +TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) +TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1) +#else // FULL_TESTS +TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1) +TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1) +TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1) +TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1) +TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1) +TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1) +#endif +TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) +TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) +TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) +TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) +TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) +#endif + +#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = kWidth * BPP_B; \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_uv, \ + kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < kStrideUV * 2; ++j) { \ + src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff); \ + } \ + } \ + memset(dst_argb_c, 1, kStrideB* kHeight); \ + memset(dst_argb_opt, 101, kStrideB* kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \ + dst_argb_c, kWidth * BPP_B, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \ + dst_argb_opt, kWidth * BPP_B, kWidth, \ + NEG kHeight); \ + } \ + /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ + align_buffer_page_end(dst_argb32_c, kWidth * 4 * kHeight); \ + align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \ + memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \ + memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \ + FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \ + kHeight); \ + FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \ + kHeight); \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth * 4; ++j) { \ + EXPECT_EQ(dst_argb32_c[i * kWidth * 4 + j], \ + dst_argb32_opt[i * kWidth * 4 + j]); \ + } \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + free_aligned_buffer_page_end(dst_argb32_c); \ + free_aligned_buffer_page_end(dst_argb32_opt); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Invert, -, 0) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Opt, +, 0) +#else +#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Opt, +, 0) +#endif + +#define JNV12ToARGB(a, b, c, d, e, f, g, h) \ + NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) +#define JNV21ToARGB(a, b, c, d, e, f, g, h) \ + NV21ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) +#define JNV12ToABGR(a, b, c, d, e, f, g, h) \ + NV21ToARGBMatrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h) +#define JNV21ToABGR(a, b, c, d, e, f, g, h) \ + NV12ToARGBMatrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h) +#define JNV12ToRGB24(a, b, c, d, e, f, g, h) \ + NV12ToRGB24Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) +#define JNV21ToRGB24(a, b, c, d, e, f, g, h) \ + NV21ToRGB24Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) +#define JNV12ToRAW(a, b, c, d, e, f, g, h) \ + NV21ToRGB24Matrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h) +#define JNV21ToRAW(a, b, c, d, e, f, g, h) \ + NV12ToRGB24Matrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h) +#define JNV12ToRGB565(a, b, c, d, e, f, g, h) \ + NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) + +TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3) +TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2) +#endif + +TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(NV12, 2, 2, RAW, RAW, 3) +TESTBPTOB(NV21, 2, 2, RAW, RAW, 3) +TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2) +#endif + +#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 1, kStrideB* kHeightB); \ + memset(dst_argb_opt, 101, kStrideB* kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_B*)dst_argb_c, \ + kStrideB, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, \ + (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \ + TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \ + for (int times = 0; times < benchmark_iterations_; ++times) { \ + const int kWidth = (fastrand() & 63) + 1; \ + const int kHeight = (fastrand() & 31) + 1; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ + align_buffer_page_end(dst_argb_c, \ + kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + src_argb[i] = 0xfe; \ + } \ + memset(dst_argb_c, 123, kStrideB* kHeightB); \ + memset(dst_argb_opt, 123, kStrideB* kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \ + kStrideB, kWidth, kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \ + kStrideB, kWidth, kHeight); \ + for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B) \ + TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_ + 1, _Any, +, 0) \ + TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_, _Unaligned, +, 4) \ + TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_, _Invert, -, 0) \ + TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0) \ + TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B) +#else +#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B) \ + TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0) +#endif + +TESTATOB(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOB(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOB(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +#endif +TESTATOB(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOB(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1) +#endif +TESTATOB(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOB(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +TESTATOB(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#endif +TESTATOB(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOB(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +#endif +TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOB(ABGR, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) +TESTATOB(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) +TESTATOB(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1) +#endif +TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) +TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1) +TESTATOB(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1) +TESTATOB(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1) +TESTATOB(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1) +TESTATOB(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1) +TESTATOB(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) +TESTATOB(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1) +TESTATOB(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOB(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +#endif +TESTATOB(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOB(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +TESTATOB(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOB(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) + +// in place test +#define TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_c, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memcpy(dst_argb_c + OFF, src_argb, \ + kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + memcpy(dst_argb_opt + OFF, src_argb, \ + kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B((TYPE_A*)(dst_argb_c /* src */ + OFF), kStrideA, \ + (TYPE_B*)dst_argb_c, kStrideB, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \ + (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ + } \ + memcpy(dst_argb_opt + OFF, src_argb, \ + kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \ + (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ + for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTATOA(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B) \ + TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_, _Inplace, +, 0) + +TESTATOA(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOA(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +#endif +TESTATOA(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1) +#endif +TESTATOA(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +TESTATOA(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#endif +TESTATOA(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +#endif +TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1) +// TODO(fbarchard): Support in place for mirror. +// TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOA(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) +TESTATOA(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) +TESTATOA(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1) +#endif +TESTATOA(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) +// TODO(fbarchard): Support in place for conversions that increase bpp. +// TESTATOA(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1) +// TESTATOA(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1) +// TESTATOA(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1) +// TESTATOA(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1) +TESTATOA(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1) +// TESTATOA(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) +// TESTATOA(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1) +TESTATOA(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +// TESTATOA(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +#endif +TESTATOA(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1) +// TESTATOA(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +// TESTATOA(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +// TESTATOA(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +// TESTATOA(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOA(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOA(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOA(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) + +#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \ + for (int i = 0; i < kStrideA * kHeightA; ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 1, kStrideB* kHeightB); \ + memset(dst_argb_opt, 101, kStrideB* kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_c, kStrideB, \ + NULL, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_opt, \ + kStrideB, NULL, kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kStrideB * kHeightB; ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, \ + STRIDE_B, HEIGHT_B) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither_Random) { \ + for (int times = 0; times < benchmark_iterations_; ++times) { \ + const int kWidth = (fastrand() & 63) + 1; \ + const int kHeight = (fastrand() & 31) + 1; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, kStrideA* kHeightA); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \ + for (int i = 0; i < kStrideA * kHeightA; ++i) { \ + src_argb[i] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 123, kStrideB* kHeightB); \ + memset(dst_argb_opt, 123, kStrideB* kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_c, kStrideB, NULL, \ + kWidth, kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_opt, kStrideB, \ + NULL, kWidth, kHeight); \ + for (int i = 0; i < kStrideB * kHeightB; ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B) \ + TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B, benchmark_width_ + 1, _Any, +, 0) \ + TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B, benchmark_width_, _Unaligned, +, 2) \ + TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B, benchmark_width_, _Invert, -, 0) \ + TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B, benchmark_width_, _Opt, +, 0) \ + TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B) +#else +#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B) \ + TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B) +#endif + +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1) +#endif + +// These conversions called twice, produce the original result. +// e.g. endian swap twice. +#define TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG, \ + OFF) \ + TEST_F(LibYUVConvertTest, FMT_ATOB##_Endswap##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kStrideA = \ + (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + align_buffer_page_end(src_argb, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 1, kStrideA* kHeightA); \ + memset(dst_argb_opt, 101, kStrideA* kHeightA); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_c, \ + kStrideA, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_opt, \ + kStrideA, kWidth, NEG kHeight); \ + } \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_ATOB((TYPE_A*)dst_argb_c, kStrideA, (TYPE_A*)dst_argb_c, kStrideA, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + FMT_ATOB((TYPE_A*)dst_argb_opt, kStrideA, (TYPE_A*)dst_argb_opt, kStrideA, \ + kWidth, NEG kHeight); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTEND(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A) \ + TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_ + 1, \ + _Any, +, 0) \ + TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \ + _Unaligned, +, 2) \ + TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \ + _Opt, +, 0) +#else +#define TESTEND(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A) \ + TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \ + _Opt, +, 0) +#endif + +TESTEND(ARGBToBGRA, uint8_t, 4, 4, 1) +TESTEND(ARGBToABGR, uint8_t, 4, 4, 1) +TESTEND(BGRAToARGB, uint8_t, 4, 4, 1) +TESTEND(ABGRToARGB, uint8_t, 4, 4, 1) +TESTEND(AB64ToAR64, uint16_t, 4, 4, 1) + +#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, OFF, ATTEN) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(src_a, kWidth* kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + src_a[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ + dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \ + ATTEN); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ + dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \ + ATTEN); \ + } \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(src_a); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Unaligned, +, 2, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Invert, -, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Premult, +, 0, 1) +#else +#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, 0) +#endif + +#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) + +#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) +#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) + +#if defined(ENABLE_FULL_TESTS) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) +#else +TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) +#endif + +TEST_F(LibYUVConvertTest, TestYToARGB) { + uint8_t y[32]; + uint8_t expectedg[32]; + for (int i = 0; i < 32; ++i) { + y[i] = i * 5 + 17; + expectedg[i] = static_cast<int>((y[i] - 16) * 1.164f + 0.5f); + } + uint8_t argb[32 * 4]; + YToARGB(y, 0, argb, 0, 32, 1); + + for (int i = 0; i < 32; ++i) { + printf("%2d %d: %d <-> %d,%d,%d,%d\n", i, y[i], expectedg[i], + argb[i * 4 + 0], argb[i * 4 + 1], argb[i * 4 + 2], argb[i * 4 + 3]); + } + for (int i = 0; i < 32; ++i) { + EXPECT_EQ(expectedg[i], argb[i * 4 + 0]); + } +} + +static const uint8_t kNoDither4x4[16] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +TEST_F(LibYUVConvertTest, TestNoDither) { + align_buffer_page_end(src_argb, benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_rgb565dither, + benchmark_width_ * benchmark_height_ * 2); + MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4); + MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2); + ARGBToRGB565(src_argb, benchmark_width_ * 4, dst_rgb565, benchmark_width_ * 2, + benchmark_width_, benchmark_height_); + ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, dst_rgb565dither, + benchmark_width_ * 2, kNoDither4x4, benchmark_width_, + benchmark_height_); + for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) { + EXPECT_EQ(dst_rgb565[i], dst_rgb565dither[i]); + } + + free_aligned_buffer_page_end(src_argb); + free_aligned_buffer_page_end(dst_rgb565); + free_aligned_buffer_page_end(dst_rgb565dither); +} + +// Ordered 4x4 dither for 888 to 565. Values from 0 to 7. +static const uint8_t kDither565_4x4[16] = { + 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2, +}; + +TEST_F(LibYUVConvertTest, TestDither) { + align_buffer_page_end(src_argb, benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_rgb565dither, + benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_argb, benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_argbdither, + benchmark_width_ * benchmark_height_ * 4); + MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4); + MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2); + MemRandomize(dst_argb, benchmark_width_ * benchmark_height_ * 4); + MemRandomize(dst_argbdither, benchmark_width_ * benchmark_height_ * 4); + ARGBToRGB565(src_argb, benchmark_width_ * 4, dst_rgb565, benchmark_width_ * 2, + benchmark_width_, benchmark_height_); + ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, dst_rgb565dither, + benchmark_width_ * 2, kDither565_4x4, benchmark_width_, + benchmark_height_); + RGB565ToARGB(dst_rgb565, benchmark_width_ * 2, dst_argb, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + RGB565ToARGB(dst_rgb565dither, benchmark_width_ * 2, dst_argbdither, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + + for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) { + EXPECT_NEAR(dst_argb[i], dst_argbdither[i], 9); + } + free_aligned_buffer_page_end(src_argb); + free_aligned_buffer_page_end(dst_rgb565); + free_aligned_buffer_page_end(dst_rgb565dither); + free_aligned_buffer_page_end(dst_argb); + free_aligned_buffer_page_end(dst_argbdither); +} + +#define TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, OFF, FMT_C, BPP_C) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, dst_argb_c + OFF, \ + kStrideB, NULL, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B##Dither( \ + src_y + OFF, kWidth, src_u + OFF, kStrideUV, src_v + OFF, kStrideUV, \ + dst_argb_opt + OFF, kStrideB, NULL, kWidth, NEG kHeight); \ + } \ + /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ + align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight); \ + align_buffer_page_end(dst_argb32_opt, kWidth* BPP_C* kHeight); \ + memset(dst_argb32_c, 2, kWidth* BPP_C* kHeight); \ + memset(dst_argb32_opt, 102, kWidth* BPP_C* kHeight); \ + FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, dst_argb32_c, kWidth * BPP_C, \ + kWidth, kHeight); \ + FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt, \ + kWidth * BPP_C, kWidth, kHeight); \ + for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ + EXPECT_EQ(dst_argb32_c[i], dst_argb32_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + free_aligned_buffer_page_end(dst_argb32_c); \ + free_aligned_buffer_page_end(dst_argb32_opt); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) +#else +#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) +#endif + +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4) +#endif + +// Transitive test. A to B to C is same as A to C. +// Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere. +#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + W1280, N, NEG, OFF, FMT_C, BPP_C) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##To##FMT_C##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, dst_argb_b + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ + const int kStrideC = kWidth * BPP_C; \ + align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ + memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ + memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, dst_argb_c + OFF, \ + kStrideC, kWidth, NEG kHeight); \ + /* Convert B to C */ \ + FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, \ + kStrideC, kWidth, kHeight); \ + } \ + for (int i = 0; i < kStrideC * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_b); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_bc); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) +#else +#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) +#endif + +#if defined(ENABLE_FULL_TESTS) +TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB24, 3) +TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3) +TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3) +TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3) +TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RGB24, 3) +TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3) +TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3) +TESTPLANARTOE(J420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(J420, 2, 2, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(U420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(U420, 2, 2, ARGB, 1, 4, ARGB, 4) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2) +TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2) +#endif +TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(J422, 2, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(J422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(H422, 2, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(H422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(U422, 2, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(U422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(V422, 2, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(V422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4) +TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(J444, 1, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(J444, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(H444, 1, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(H444, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(U444, 1, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(U444, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(V444, 1, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(V444, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4) +#else +TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB24, 3) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2) +TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3) +TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3) +TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2) +TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4) +TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4) +#endif + +// Transitive test: Compare 1 step vs 2 step conversion for YUVA to ARGB. +// Benchmark 2 step conversion for comparison to 1 step conversion. +#define TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + W1280, N, NEG, OFF, FMT_C, BPP_C, ATTEN) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##To##FMT_C##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ + const int kSizeUV = \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(src_a, kWidth* kHeight + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ + const int kStrideC = kWidth * BPP_C; \ + align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ + memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ + memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + src_a[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + /* Convert A to B */ \ + FMT_PLANAR##To##FMT_B( \ + src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth, \ + dst_argb_b + OFF, kStrideB, kWidth, NEG kHeight, ATTEN); \ + /* Convert B to C */ \ + FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, \ + kStrideC, kWidth, kHeight); \ + } \ + /* Convert A to C */ \ + FMT_PLANAR##To##FMT_C( \ + src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth, \ + dst_argb_c + OFF, kStrideC, kWidth, NEG kHeight, ATTEN); \ + for (int i = 0; i < kStrideC * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(src_a); \ + free_aligned_buffer_page_end(dst_argb_b); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_bc); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + FMT_C, BPP_C) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Invert, -, 0, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Premult, +, 0, FMT_C, BPP_C, 1) +#else +#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + FMT_C, BPP_C) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0) +#endif + +#if defined(ENABLE_FULL_TESTS) +TESTQPLANARTOE(I420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(J420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(J420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(H420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(H420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(F420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(F420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(U420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(U420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(V420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(V420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(I422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(J422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(J422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(F422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(F422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(H422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(H422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(U422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(U422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(V422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(V422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(I444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(J444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(J444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(H444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(H444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(U444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(U444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(V444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(V444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +#else +TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +#endif + +#define TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, W1280, N, NEG, \ + OFF, FMT_C, BPP_C) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##To##FMT_C##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStrideA = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ + const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ + align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ + MemRandomize(src_argb_a + OFF, kStrideA * kHeight); \ + memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ + FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ + const int kStrideC = kWidth * BPP_C; \ + align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ + memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ + memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_C(src_argb_a + OFF, kStrideA, dst_argb_c + OFF, kStrideC, \ + kWidth, NEG kHeight); \ + /* Convert B to C */ \ + FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, \ + kStrideC, kWidth, kHeight); \ + } \ + for (int i = 0; i < kStrideC * kHeight; i += 4) { \ + EXPECT_EQ(dst_argb_c[i + OFF + 0], dst_argb_bc[i + OFF + 0]); \ + EXPECT_EQ(dst_argb_c[i + OFF + 1], dst_argb_bc[i + OFF + 1]); \ + EXPECT_EQ(dst_argb_c[i + OFF + 2], dst_argb_bc[i + OFF + 2]); \ + EXPECT_NEAR(dst_argb_c[i + OFF + 3], dst_argb_bc[i + OFF + 3], 64); \ + } \ + free_aligned_buffer_page_end(src_argb_a); \ + free_aligned_buffer_page_end(dst_argb_b); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_bc); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, \ + benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Unaligned, +, 4, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Opt, +, 0, FMT_C, BPP_C) +#else +#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Opt, +, 0, FMT_C, BPP_C) +#endif + +// Caveat: Destination needs to be 4 bytes +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4) +TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4) +TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4) +TESTPLANETOE(AR30, 1, 4, ABGR, 1, 4, ARGB, 4) +TESTPLANETOE(ARGB, 1, 4, AB30, 1, 4, ARGB, 4) +TESTPLANETOE(ABGR, 1, 4, AB30, 1, 4, ABGR, 4) +TESTPLANETOE(AB30, 1, 4, ARGB, 1, 4, ABGR, 4) +TESTPLANETOE(AB30, 1, 4, ABGR, 1, 4, ARGB, 4) +#endif + +TEST_F(LibYUVConvertTest, RotateWithARGBSource) { + // 2x2 frames + uint32_t src[4]; + uint32_t dst[4]; + // some random input + src[0] = 0x11000000; + src[1] = 0x00450000; + src[2] = 0x00009f00; + src[3] = 0x000000ff; + // zeros on destination + dst[0] = 0x00000000; + dst[1] = 0x00000000; + dst[2] = 0x00000000; + dst[3] = 0x00000000; + + int r = ConvertToARGB(reinterpret_cast<uint8_t*>(src), + 16, // input size + reinterpret_cast<uint8_t*>(dst), + 8, // destination stride + 0, // crop_x + 0, // crop_y + 2, // width + 2, // height + 2, // crop width + 2, // crop height + kRotate90, FOURCC_ARGB); + + EXPECT_EQ(r, 0); + // 90 degrees rotation, no conversion + EXPECT_EQ(dst[0], src[2]); + EXPECT_EQ(dst[1], src[0]); + EXPECT_EQ(dst[2], src[3]); + EXPECT_EQ(dst[3], src[1]); +} + +#ifdef HAS_ARGBTOAR30ROW_AVX2 +TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) { + // ARGBToAR30Row_AVX2 expects a multiple of 8 pixels. + const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; + align_buffer_page_end(src, kPixels * 4); + align_buffer_page_end(dst_opt, kPixels * 4); + align_buffer_page_end(dst_c, kPixels * 4); + MemRandomize(src, kPixels * 4); + memset(dst_opt, 0, kPixels * 4); + memset(dst_c, 1, kPixels * 4); + + ARGBToAR30Row_C(src, dst_c, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + ARGBToAR30Row_AVX2(src, dst_opt, kPixels); + } else if (has_ssse3) { + ARGBToAR30Row_SSSE3(src, dst_opt, kPixels); + } else { + ARGBToAR30Row_C(src, dst_opt, kPixels); + } + } + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_opt[i], dst_c[i]); + } + + free_aligned_buffer_page_end(src); + free_aligned_buffer_page_end(dst_opt); + free_aligned_buffer_page_end(dst_c); +} +#endif // HAS_ARGBTOAR30ROW_AVX2 + +#ifdef HAS_ABGRTOAR30ROW_AVX2 +TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { + // ABGRToAR30Row_AVX2 expects a multiple of 8 pixels. + const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; + align_buffer_page_end(src, kPixels * 4); + align_buffer_page_end(dst_opt, kPixels * 4); + align_buffer_page_end(dst_c, kPixels * 4); + MemRandomize(src, kPixels * 4); + memset(dst_opt, 0, kPixels * 4); + memset(dst_c, 1, kPixels * 4); + + ABGRToAR30Row_C(src, dst_c, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + ABGRToAR30Row_AVX2(src, dst_opt, kPixels); + } else if (has_ssse3) { + ABGRToAR30Row_SSSE3(src, dst_opt, kPixels); + } else { + ABGRToAR30Row_C(src, dst_opt, kPixels); + } + } + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_opt[i], dst_c[i]); + } + + free_aligned_buffer_page_end(src); + free_aligned_buffer_page_end(dst_opt); + free_aligned_buffer_page_end(dst_c); +} +#endif // HAS_ABGRTOAR30ROW_AVX2 + +#if !defined(LEAN_TESTS) + +// Provide matrix wrappers for 12 bit YUV +#define I012ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \ + I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define I012ToAB30(a, b, c, d, e, f, g, h, i, j) \ + I012ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) + +#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define I410ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define H410ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j) +#define H410ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j) +#define U410ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) +#define U410ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) +#define I410ToAR30(a, b, c, d, e, f, g, h, i, j) \ + I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define I410ToAB30(a, b, c, d, e, f, g, h, i, j) \ + I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define H410ToAR30(a, b, c, d, e, f, g, h, i, j) \ + I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j) +#define H410ToAB30(a, b, c, d, e, f, g, h, i, j) \ + I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j) +#define U410ToAR30(a, b, c, d, e, f, g, h, i, j) \ + I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) +#define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \ + I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) + +#define I010ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ + I010ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I010ToAR30Filter(a, b, c, d, e, f, g, h, i, j) \ + I010ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I210ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ + I210ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I210ToAR30Filter(a, b, c, d, e, f, g, h, i, j) \ + I210ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) + +// TODO(fbarchard): Fix clamping issue affected by U channel. +#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \ + BPP_B, ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + const int kBpc = 2; \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ + align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & FMT_MASK); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & FMT_MASK); \ + reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & FMT_MASK); \ + } \ + memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B( \ + reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ + reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \ + reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \ + dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B( \ + reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ + reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \ + reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \ + dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \ + BPP_B, ALIGN, YALIGN) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ + ALIGN, YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ + ALIGN, YALIGN, benchmark_width_, _Unaligned, +, 4, 4) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ + ALIGN, YALIGN, benchmark_width_, _Invert, -, 0, 0) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ + ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0) +#else +#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \ + BPP_B, ALIGN, YALIGN) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ + ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0) +#endif + +// These conversions are only optimized for x86 +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(I012, 2, 2, 0xfff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGBFilter, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGBFilter, 4, 4, 1) + +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1) +TESTPLANAR16TOB(I012, 2, 2, 0xfff, AB30, 4, 4, 1) +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30Filter, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1) +#endif // LITTLE_ENDIAN_ONLY_TEST +#endif // DISABLE_SLOW_TESTS + +#define TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + ALIGN, YALIGN, W1280, N, NEG, OFF, ATTEN, S_DEPTH) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + const int kBpc = 2; \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF); \ + align_buffer_page_end(src_u, kSizeUV* kBpc + OFF); \ + align_buffer_page_end(src_v, kSizeUV* kBpc + OFF); \ + align_buffer_page_end(src_a, kWidth* kHeight* kBpc + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + reinterpret_cast<uint16_t*>(src_y + OFF)[i] = \ + (fastrand() & ((1 << S_DEPTH) - 1)); \ + reinterpret_cast<uint16_t*>(src_a + OFF)[i] = \ + (fastrand() & ((1 << S_DEPTH) - 1)); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + reinterpret_cast<uint16_t*>(src_u + OFF)[i] = \ + (fastrand() & ((1 << S_DEPTH) - 1)); \ + reinterpret_cast<uint16_t*>(src_v + OFF)[i] = \ + (fastrand() & ((1 << S_DEPTH) - 1)); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + OFF), kWidth, \ + reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV, \ + reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV, \ + reinterpret_cast<uint16_t*>(src_a + OFF), kWidth, \ + dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \ + ATTEN); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B( \ + reinterpret_cast<uint16_t*>(src_y + OFF), kWidth, \ + reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV, \ + reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV, \ + reinterpret_cast<uint16_t*>(src_a + OFF), kWidth, \ + dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, ATTEN); \ + } \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(src_a); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + ALIGN, YALIGN, S_DEPTH) \ + TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \ + TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Unaligned, +, 2, 0, S_DEPTH) \ + TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ + TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH) \ + TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Premult, +, 0, 1, S_DEPTH) +#else +#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + ALIGN, YALIGN, S_DEPTH) \ + TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH) +#endif + +#define I010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ + l, m) +#define I010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ + l, m) +#define J010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define I210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ + l, m) +#define I210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ + l, m) +#define J210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define I410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ + l, m) +#define I410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ + l, m) +#define J410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define I010AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) +#define I210AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) + +// These conversions are only optimized for x86 +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(I010Alpha, 2, 2, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(J010Alpha, 2, 2, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(J010Alpha, 2, 2, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(H010Alpha, 2, 2, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(H010Alpha, 2, 2, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(F010Alpha, 2, 2, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(F010Alpha, 2, 2, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(U010Alpha, 2, 2, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(U010Alpha, 2, 2, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(V010Alpha, 2, 2, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(V010Alpha, 2, 2, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(I210Alpha, 2, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(J210Alpha, 2, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(J210Alpha, 2, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(H210Alpha, 2, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(H210Alpha, 2, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(F210Alpha, 2, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(F210Alpha, 2, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(U210Alpha, 2, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(U210Alpha, 2, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(V210Alpha, 2, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(V210Alpha, 2, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(I410Alpha, 1, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(I410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(J410Alpha, 1, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(J410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(H410Alpha, 1, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(H410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(F410Alpha, 1, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(F410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(U410Alpha, 1, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(U410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(V410Alpha, 1, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(V410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10) +TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) +#endif // DISABLE_SLOW_TESTS + +#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; \ + const int kBpc = 2; \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ + align_buffer_page_end(src_uv, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = \ + (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + reinterpret_cast<uint16_t*>(src_uv + SOFF)[i] = \ + (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \ + } \ + memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ + reinterpret_cast<uint16_t*>(src_uv + SOFF), \ + kStrideUV, dst_argb_c + DOFF, kStrideB, kWidth, \ + NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ + reinterpret_cast<uint16_t*>(src_uv + SOFF), \ + kStrideUV, dst_argb_opt + DOFF, kStrideB, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Opt, +, 0, 0, S_DEPTH) +#else +#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Opt, +, 0, 0, S_DEPTH) +#endif + +#define P010ToARGB(a, b, c, d, e, f, g, h) \ + P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P210ToARGB(a, b, c, d, e, f, g, h) \ + P210ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P010ToAR30(a, b, c, d, e, f, g, h) \ + P010ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P210ToAR30(a, b, c, d, e, f, g, h) \ + P210ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) + +#define P012ToARGB(a, b, c, d, e, f, g, h) \ + P012ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P212ToARGB(a, b, c, d, e, f, g, h) \ + P212ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P012ToAR30(a, b, c, d, e, f, g, h) \ + P012ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P212ToAR30(a, b, c, d, e, f, g, h) \ + P212ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) + +#define P016ToARGB(a, b, c, d, e, f, g, h) \ + P016ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P216ToARGB(a, b, c, d, e, f, g, h) \ + P216ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P016ToAR30(a, b, c, d, e, f, g, h) \ + P016ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P216ToAR30(a, b, c, d, e, f, g, h) \ + P216ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) + +#define P010ToARGBFilter(a, b, c, d, e, f, g, h) \ + P010ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \ + kFilterBilinear) +#define P210ToARGBFilter(a, b, c, d, e, f, g, h) \ + P210ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \ + kFilterBilinear) +#define P010ToAR30Filter(a, b, c, d, e, f, g, h) \ + P010ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \ + kFilterBilinear) +#define P210ToAR30Filter(a, b, c, d, e, f, g, h) \ + P210ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \ + kFilterBilinear) + +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10) +TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12) +TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12) +TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16) +TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16) +TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10) +TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12) +TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12) +TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16) +TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16) +TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10) +#endif // LITTLE_ENDIAN_ONLY_TEST +#endif // DISABLE_SLOW_TESTS + +static int Clamp(int y) { + if (y < 0) { + y = 0; + } + if (y > 255) { + y = 255; + } + return y; +} + +static int Clamp10(int y) { + if (y < 0) { + y = 0; + } + if (y > 1023) { + y = 1023; + } + return y; +} + +// Test 8 bit YUV to 8 bit RGB +TEST_F(LibYUVConvertTest, TestH420ToARGB) { + const int kSize = 256; + int histogram_b[256]; + int histogram_g[256]; + int histogram_r[256]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2); + align_buffer_page_end(argb_pixels, kSize * 4); + uint8_t* orig_y = orig_yuv; + uint8_t* orig_u = orig_y + kSize; + uint8_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 128; // 128 is 0. + orig_v[i] = 128; + } + + H420ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b = argb_pixels[i * 4 + 0]; + int g = argb_pixels[i * 4 + 1]; + int r = argb_pixels[i * 4 + 2]; + int a = argb_pixels[i * 4 + 3]; + ++histogram_b[b]; + ++histogram_g[g]; + ++histogram_r[r]; + // Reference formula for Y channel contribution in YUV to RGB conversions: + int expected_y = Clamp(static_cast<int>((i - 16) * 1.164f + 0.5f)); + EXPECT_EQ(b, expected_y); + EXPECT_EQ(g, expected_y); + EXPECT_EQ(r, expected_y); + EXPECT_EQ(a, 255); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(argb_pixels); +} + +// Test 10 bit YUV to 8 bit RGB +TEST_F(LibYUVConvertTest, TestH010ToARGB) { + const int kSize = 1024; + int histogram_b[1024]; + int histogram_g[1024]; + int histogram_r[1024]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); + align_buffer_page_end(argb_pixels, kSize * 4); + uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv); + uint16_t* orig_u = orig_y + kSize; + uint16_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 512; // 512 is 0. + orig_v[i] = 512; + } + + H010ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b = argb_pixels[i * 4 + 0]; + int g = argb_pixels[i * 4 + 1]; + int r = argb_pixels[i * 4 + 2]; + int a = argb_pixels[i * 4 + 3]; + ++histogram_b[b]; + ++histogram_g[g]; + ++histogram_r[r]; + int expected_y = Clamp(static_cast<int>((i - 64) * 1.164f / 4)); + EXPECT_NEAR(b, expected_y, 1); + EXPECT_NEAR(g, expected_y, 1); + EXPECT_NEAR(r, expected_y, 1); + EXPECT_EQ(a, 255); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(argb_pixels); +} + +// Test 10 bit YUV to 10 bit RGB +// Caveat: Result is near due to float rounding in expected +// result. +TEST_F(LibYUVConvertTest, TestH010ToAR30) { + const int kSize = 1024; + int histogram_b[1024]; + int histogram_g[1024]; + int histogram_r[1024]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + + align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); + align_buffer_page_end(ar30_pixels, kSize * 4); + uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv); + uint16_t* orig_u = orig_y + kSize; + uint16_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 512; // 512 is 0. + orig_v[i] = 512; + } + + H010ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023; + int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023; + int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023; + int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3; + ++histogram_b[b10]; + ++histogram_g[g10]; + ++histogram_r[r10]; + int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f + 0.5)); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(ar30_pixels); +} + +// Test 10 bit YUV to 10 bit RGB +// Caveat: Result is near due to float rounding in expected +// result. +TEST_F(LibYUVConvertTest, TestH010ToAB30) { + const int kSize = 1024; + int histogram_b[1024]; + int histogram_g[1024]; + int histogram_r[1024]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + + align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); + align_buffer_page_end(ab30_pixels, kSize * 4); + uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv); + uint16_t* orig_u = orig_y + kSize; + uint16_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 512; // 512 is 0. + orig_v[i] = 512; + } + + H010ToAB30(orig_y, 0, orig_u, 0, orig_v, 0, ab30_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int r10 = reinterpret_cast<uint32_t*>(ab30_pixels)[i] & 1023; + int g10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 10) & 1023; + int b10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 20) & 1023; + int a2 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 30) & 3; + ++histogram_b[b10]; + ++histogram_g[g10]; + ++histogram_r[r10]; + int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f)); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(ab30_pixels); +} + +// Test 8 bit YUV to 10 bit RGB +TEST_F(LibYUVConvertTest, TestH420ToAR30) { + const int kSize = 256; + const int kHistSize = 1024; + int histogram_b[kHistSize]; + int histogram_g[kHistSize]; + int histogram_r[kHistSize]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2); + align_buffer_page_end(ar30_pixels, kSize * 4); + uint8_t* orig_y = orig_yuv; + uint8_t* orig_u = orig_y + kSize; + uint8_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 128; // 128 is 0. + orig_v[i] = 128; + } + + H420ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023; + int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023; + int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023; + int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3; + ++histogram_b[b10]; + ++histogram_g[g10]; + ++histogram_r[r10]; + int expected_y = Clamp10(static_cast<int>((i - 16) * 1.164f * 4.f)); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kHistSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(ar30_pixels); +} + +// Test I400 with jpeg matrix is same as J400 +TEST_F(LibYUVConvertTest, TestI400) { + const int kSize = 256; + align_buffer_page_end(orig_i400, kSize); + align_buffer_page_end(argb_pixels_i400, kSize * 4); + align_buffer_page_end(argb_pixels_j400, kSize * 4); + align_buffer_page_end(argb_pixels_jpeg_i400, kSize * 4); + align_buffer_page_end(argb_pixels_h709_i400, kSize * 4); + align_buffer_page_end(argb_pixels_2020_i400, kSize * 4); + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_i400[i] = i; + } + + J400ToARGB(orig_i400, 0, argb_pixels_j400, 0, kSize, 1); + I400ToARGB(orig_i400, 0, argb_pixels_i400, 0, kSize, 1); + I400ToARGBMatrix(orig_i400, 0, argb_pixels_jpeg_i400, 0, &kYuvJPEGConstants, + kSize, 1); + I400ToARGBMatrix(orig_i400, 0, argb_pixels_h709_i400, 0, &kYuvH709Constants, + kSize, 1); + I400ToARGBMatrix(orig_i400, 0, argb_pixels_2020_i400, 0, &kYuv2020Constants, + kSize, 1); + + EXPECT_EQ(0, argb_pixels_i400[0]); + EXPECT_EQ(0, argb_pixels_j400[0]); + EXPECT_EQ(0, argb_pixels_jpeg_i400[0]); + EXPECT_EQ(0, argb_pixels_h709_i400[0]); + EXPECT_EQ(0, argb_pixels_2020_i400[0]); + EXPECT_EQ(0, argb_pixels_i400[16 * 4]); + EXPECT_EQ(16, argb_pixels_j400[16 * 4]); + EXPECT_EQ(16, argb_pixels_jpeg_i400[16 * 4]); + EXPECT_EQ(0, argb_pixels_h709_i400[16 * 4]); + EXPECT_EQ(0, argb_pixels_2020_i400[16 * 4]); + EXPECT_EQ(130, argb_pixels_i400[128 * 4]); + EXPECT_EQ(128, argb_pixels_j400[128 * 4]); + EXPECT_EQ(128, argb_pixels_jpeg_i400[128 * 4]); + EXPECT_EQ(130, argb_pixels_h709_i400[128 * 4]); + EXPECT_EQ(130, argb_pixels_2020_i400[128 * 4]); + EXPECT_EQ(255, argb_pixels_i400[255 * 4]); + EXPECT_EQ(255, argb_pixels_j400[255 * 4]); + EXPECT_EQ(255, argb_pixels_jpeg_i400[255 * 4]); + EXPECT_EQ(255, argb_pixels_h709_i400[255 * 4]); + EXPECT_EQ(255, argb_pixels_2020_i400[255 * 4]); + + for (int i = 0; i < kSize * 4; ++i) { + if ((i & 3) == 3) { + EXPECT_EQ(255, argb_pixels_j400[i]); + } else { + EXPECT_EQ(i / 4, argb_pixels_j400[i]); + } + EXPECT_EQ(argb_pixels_jpeg_i400[i], argb_pixels_j400[i]); + } + + free_aligned_buffer_page_end(orig_i400); + free_aligned_buffer_page_end(argb_pixels_i400); + free_aligned_buffer_page_end(argb_pixels_j400); + free_aligned_buffer_page_end(argb_pixels_jpeg_i400); + free_aligned_buffer_page_end(argb_pixels_h709_i400); + free_aligned_buffer_page_end(argb_pixels_2020_i400); +} + +// Test RGB24 to ARGB and back to RGB24 +TEST_F(LibYUVConvertTest, TestARGBToRGB24) { + const int kSize = 256; + align_buffer_page_end(orig_rgb24, kSize * 3); + align_buffer_page_end(argb_pixels, kSize * 4); + align_buffer_page_end(dest_rgb24, kSize * 3); + + // Test grey scale + for (int i = 0; i < kSize * 3; ++i) { + orig_rgb24[i] = i; + } + + RGB24ToARGB(orig_rgb24, 0, argb_pixels, 0, kSize, 1); + ARGBToRGB24(argb_pixels, 0, dest_rgb24, 0, kSize, 1); + + for (int i = 0; i < kSize * 3; ++i) { + EXPECT_EQ(orig_rgb24[i], dest_rgb24[i]); + } + + free_aligned_buffer_page_end(orig_rgb24); + free_aligned_buffer_page_end(argb_pixels); + free_aligned_buffer_page_end(dest_rgb24); +} + +TEST_F(LibYUVConvertTest, Test565) { + SIMD_ALIGNED(uint8_t orig_pixels[256][4]); + SIMD_ALIGNED(uint8_t pixels565[256][2]); + + for (int i = 0; i < 256; ++i) { + for (int j = 0; j < 4; ++j) { + orig_pixels[i][j] = i; + } + } + ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1); + uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); + EXPECT_EQ(610919429u, checksum); +} +#endif // !defined(LEAN_TESTS) + +} // namespace libyuv diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 1f1896b0..f55bace3 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -31,6 +31,13 @@ #include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */ #endif +#if defined(__riscv) && !defined(__clang__) +#define DISABLE_SLOW_TESTS +#undef ENABLE_FULL_TESTS +#undef ENABLE_ROW_TESTS +#define LEAN_TESTS +#endif + // Some functions fail on big endian. Enable these tests on all cpus except // PowerPC, but they are not optimized so disabled by default. #if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__) @@ -51,6 +58,8 @@ namespace libyuv { // subsample amount uses a divide. #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) +#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) + // Planar test #define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ @@ -138,6 +147,7 @@ namespace libyuv { free_aligned_buffer_page_end(src_v); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ @@ -153,6 +163,14 @@ namespace libyuv { TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ benchmark_width_, _Opt, +, 0, SRC_DEPTH) +#else +#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH) +#endif TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8) TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2, 8) @@ -276,6 +294,7 @@ TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12) free_aligned_buffer_page_end(src_uv); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \ SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \ SUBSAMP_Y) \ @@ -291,6 +310,14 @@ TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12) TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \ 0, PN, OFF_U, OFF_V) +#else +#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \ + SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \ + SUBSAMP_Y) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \ + 0, PN, OFF_U, OFF_V) +#endif TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2) TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2) @@ -393,6 +420,7 @@ int I400ToNV21(const uint8_t* src_y, free_aligned_buffer_page_end(src_v); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ @@ -409,6 +437,14 @@ int I400ToNV21(const uint8_t* src_y, TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH) +#else +#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH) +#endif TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8) TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8) @@ -510,6 +546,7 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) free_aligned_buffer_page_end(src_uv); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ @@ -533,6 +570,15 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \ TILE_HEIGHT) +#else +#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) +#endif TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1) TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1) @@ -547,645 +593,6 @@ TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1) TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32) TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32) -#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ - static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ - static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ - static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ - "SRC_SUBSAMP_X unsupported"); \ - static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ - "SRC_SUBSAMP_Y unsupported"); \ - static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ - "DST_SUBSAMP_X unsupported"); \ - static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ - "DST_SUBSAMP_Y unsupported"); \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ - const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ - const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ - const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ - const int kPaddedHeight = \ - (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ - const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ - const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ - align_buffer_page_end( \ - src_uv, kSrcHalfPaddedWidth* kSrcHalfPaddedHeight* SRC_BPC * 2 + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ - SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \ - for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \ - src_y_p[i] = \ - (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ - } \ - for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \ - src_uv_p[i] = \ - (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ - } \ - memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ - memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ - memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - MaskCpuFlags(disable_cpu_flags_); \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \ - reinterpret_cast<DST_T*>(dst_y_c), kWidth, \ - reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \ - reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \ - NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \ - reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \ - reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \ - reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \ - NEG kHeight); \ - } \ - for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ - EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ - } \ - for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ - EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ - EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_u_c); \ - free_aligned_buffer_page_end(dst_v_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_u_opt); \ - free_aligned_buffer_page_end(dst_v_opt); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_uv); \ - } - -#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) - -TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) -TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1) -TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1) - -// Provide matrix wrappers for full range bt.709 -#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \ - I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) -#define F420ToARGB(a, b, c, d, e, f, g, h, i, j) \ - I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) -#define F422ToABGR(a, b, c, d, e, f, g, h, i, j) \ - I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) -#define F422ToARGB(a, b, c, d, e, f, g, h, i, j) \ - I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) -#define F444ToABGR(a, b, c, d, e, f, g, h, i, j) \ - I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) -#define F444ToARGB(a, b, c, d, e, f, g, h, i, j) \ - I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) - -// Provide matrix wrappers for full range bt.2020 -#define V420ToABGR(a, b, c, d, e, f, g, h, i, j) \ - I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j) -#define V420ToARGB(a, b, c, d, e, f, g, h, i, j) \ - I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) -#define V422ToABGR(a, b, c, d, e, f, g, h, i, j) \ - I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j) -#define V422ToARGB(a, b, c, d, e, f, g, h, i, j) \ - I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) -#define V444ToABGR(a, b, c, d, e, f, g, h, i, j) \ - I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j) -#define V444ToARGB(a, b, c, d, e, f, g, h, i, j) \ - I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) - -#define I420ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ - I420ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ - kFilterBilinear) -#define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ - I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ - kFilterBilinear) -#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ - I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ - kFilterBilinear) -#define I422ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ - I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ - kFilterBilinear) - -#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) - -#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, W1280, N, NEG, OFF) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = W1280; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_u, kSizeUV + OFF); \ - align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y[i + OFF] = (fastrand() & 0xff); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - src_u[i + OFF] = (fastrand() & 0xff); \ - src_v[i + OFF] = (fastrand() & 0xff); \ - } \ - memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ - memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ - MaskCpuFlags(disable_cpu_flags_); \ - double time0 = get_time(); \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, dst_argb_c + OFF, kStrideB, \ - kWidth, NEG kHeight); \ - double time1 = get_time(); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, dst_argb_opt + OFF, \ - kStrideB, kWidth, NEG kHeight); \ - } \ - double time2 = get_time(); \ - printf(" %8d us C - %8d us OPT\n", \ - static_cast<int>((time1 - time0) * 1e6), \ - static_cast<int>((time2 - time1) * 1e6 / benchmark_iterations_)); \ - for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ - EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ - } \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } - -#if defined(ENABLE_FULL_TESTS) -#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ + 1, _Any, +, 0) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Unaligned, +, 4) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Invert, -, 0) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0) -#else -#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ + 1, _Any, +, 0) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0) -#endif - -#if defined(ENABLE_FULL_TESTS) -TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) -TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) -TESTPLANARTOB(J420, 2, 2, ARGB, 4, 4, 1) -TESTPLANARTOB(J420, 2, 2, ABGR, 4, 4, 1) -TESTPLANARTOB(F420, 2, 2, ARGB, 4, 4, 1) -TESTPLANARTOB(F420, 2, 2, ABGR, 4, 4, 1) -TESTPLANARTOB(H420, 2, 2, ARGB, 4, 4, 1) -TESTPLANARTOB(H420, 2, 2, ABGR, 4, 4, 1) -TESTPLANARTOB(U420, 2, 2, ARGB, 4, 4, 1) -TESTPLANARTOB(U420, 2, 2, ABGR, 4, 4, 1) -TESTPLANARTOB(V420, 2, 2, ARGB, 4, 4, 1) -TESTPLANARTOB(V420, 2, 2, ABGR, 4, 4, 1) -TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1) -TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1) -TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1) -TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1) -TESTPLANARTOB(J420, 2, 2, RAW, 3, 3, 1) -TESTPLANARTOB(J420, 2, 2, RGB24, 3, 3, 1) -TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1) -TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1) -TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1) -TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1) -TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1) -TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1) -TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1) -#endif -TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) -TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1) -TESTPLANARTOB(J422, 2, 1, ARGB, 4, 4, 1) -TESTPLANARTOB(J422, 2, 1, ABGR, 4, 4, 1) -TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1) -TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1) -TESTPLANARTOB(U422, 2, 1, ARGB, 4, 4, 1) -TESTPLANARTOB(U422, 2, 1, ABGR, 4, 4, 1) -TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1) -TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1) -TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) -TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) -TESTPLANARTOB(I422, 1, 1, RGB24, 3, 3, 1) -TESTPLANARTOB(I422, 1, 1, RAW, 3, 3, 1) -TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) -TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) -TESTPLANARTOB(I444, 1, 1, RGB24, 3, 3, 1) -TESTPLANARTOB(I444, 1, 1, RAW, 3, 3, 1) -TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1) -TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1) -TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1) -TESTPLANARTOB(H444, 1, 1, ABGR, 4, 4, 1) -TESTPLANARTOB(U444, 1, 1, ARGB, 4, 4, 1) -TESTPLANARTOB(U444, 1, 1, ABGR, 4, 4, 1) -TESTPLANARTOB(V444, 1, 1, ARGB, 4, 4, 1) -TESTPLANARTOB(V444, 1, 1, ABGR, 4, 4, 1) -TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) -TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) -TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) -TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) -TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) -TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1) -TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) -TESTPLANARTOB(I420, 2, 2, AB30, 4, 4, 1) -TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1) -#endif -TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) -TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) -TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) -TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1) -#else -TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) -TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) -TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1) -TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1) -TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1) -TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1) -TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1) -TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1) -TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1) -#endif -TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) -TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) -TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) -TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1) -TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) -TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) -TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) -TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) -TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) -TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) -TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) -TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) -TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) -TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) -#endif - -#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, W1280, N, NEG, OFF, ATTEN) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = W1280; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_u, kSizeUV + OFF); \ - align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(src_a, kWidth* kHeight + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y[i + OFF] = (fastrand() & 0xff); \ - src_a[i + OFF] = (fastrand() & 0xff); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - src_u[i + OFF] = (fastrand() & 0xff); \ - src_v[i + OFF] = (fastrand() & 0xff); \ - } \ - memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ - memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ - dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \ - ATTEN); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ - dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \ - ATTEN); \ - } \ - for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ - EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ - } \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - free_aligned_buffer_page_end(src_a); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } - -#if defined(ENABLE_FULL_TESTS) -#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Unaligned, +, 2, 0) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Invert, -, 0, 0) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0, 0) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Premult, +, 0, 1) -#else -#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0, 0) -#endif - -#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) - -#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ - &kYuvI601Constants, k, l, m, kFilterBilinear) -#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ - &kYuvI601Constants, k, l, m, kFilterBilinear) - -#if defined(ENABLE_FULL_TESTS) -TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) -#else -TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) -#endif - -#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - W1280, N, NEG, OFF) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kStrideB = kWidth * BPP_B; \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_uv, \ - kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < kStrideUV * 2; ++j) { \ - src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff); \ - } \ - } \ - memset(dst_argb_c, 1, kStrideB* kHeight); \ - memset(dst_argb_opt, 101, kStrideB* kHeight); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \ - dst_argb_c, kWidth * BPP_B, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \ - dst_argb_opt, kWidth * BPP_B, kWidth, \ - NEG kHeight); \ - } \ - /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ - align_buffer_page_end(dst_argb32_c, kWidth * 4 * kHeight); \ - align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \ - memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \ - memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \ - FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \ - kHeight); \ - FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \ - kHeight); \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth * 4; ++j) { \ - EXPECT_EQ(dst_argb32_c[i * kWidth * 4 + j], \ - dst_argb32_opt[i * kWidth * 4 + j]); \ - } \ - } \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_uv); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - free_aligned_buffer_page_end(dst_argb32_c); \ - free_aligned_buffer_page_end(dst_argb32_opt); \ - } - -#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ - TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_, _Unaligned, +, 2) \ - TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_, _Invert, -, 0) \ - TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_, _Opt, +, 0) - -#define JNV12ToARGB(a, b, c, d, e, f, g, h) \ - NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) -#define JNV21ToARGB(a, b, c, d, e, f, g, h) \ - NV21ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) -#define JNV12ToABGR(a, b, c, d, e, f, g, h) \ - NV21ToARGBMatrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h) -#define JNV21ToABGR(a, b, c, d, e, f, g, h) \ - NV12ToARGBMatrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h) -#define JNV12ToRGB24(a, b, c, d, e, f, g, h) \ - NV12ToRGB24Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) -#define JNV21ToRGB24(a, b, c, d, e, f, g, h) \ - NV21ToRGB24Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) -#define JNV12ToRAW(a, b, c, d, e, f, g, h) \ - NV21ToRGB24Matrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h) -#define JNV21ToRAW(a, b, c, d, e, f, g, h) \ - NV12ToRGB24Matrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h) -#define JNV12ToRGB565(a, b, c, d, e, f, g, h) \ - NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) - -TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4) -TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4) -TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4) -TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4) -TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3) -TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3) -TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3) -TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2) -#endif - -TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4) -TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4) -TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4) -TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4) -TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3) -TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3) -TESTBPTOB(NV12, 2, 2, RAW, RAW, 3) -TESTBPTOB(NV21, 2, 2, RAW, RAW, 3) -TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2) -#endif - #define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ @@ -1247,8 +654,6 @@ TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2) #else #define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ benchmark_width_, _Opt, +, 0) #endif @@ -1347,8 +752,6 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1) #else #define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ benchmark_width_, _Opt, +, 0) #endif @@ -1401,6 +804,7 @@ TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2) free_aligned_buffer_page_end(src_argb); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ benchmark_width_ + 1, _Any, +, 0) \ @@ -1410,6 +814,11 @@ TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2) benchmark_width_, _Invert, -, 0) \ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ benchmark_width_, _Opt, +, 0) +#else +#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#endif TESTATOBP(ARGB, 1, 4, NV12, 2, 2) TESTATOBP(ARGB, 1, 4, NV21, 2, 2) @@ -1421,440 +830,7 @@ TESTATOBP(UYVY, 2, 4, NV12, 2, 2) TESTATOBP(AYUV, 1, 4, NV12, 2, 2) TESTATOBP(AYUV, 1, 4, NV21, 2, 2) -#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ - EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ - const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ - const int kStrideA = \ - (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ - const int kStrideB = \ - (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ - align_buffer_page_end(src_argb, \ - kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ - align_buffer_page_end(dst_argb_opt, \ - kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ - for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ - src_argb[i + OFF] = (fastrand() & 0xff); \ - } \ - memset(dst_argb_c, 1, kStrideB* kHeightB); \ - memset(dst_argb_opt, 101, kStrideB* kHeightB); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_B*)dst_argb_c, \ - kStrideB, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, \ - (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ - } \ - for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ - EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ - } \ - free_aligned_buffer_page_end(src_argb); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } - -#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \ - TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \ - for (int times = 0; times < benchmark_iterations_; ++times) { \ - const int kWidth = (fastrand() & 63) + 1; \ - const int kHeight = (fastrand() & 31) + 1; \ - const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ - const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ - const int kStrideA = \ - (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ - const int kStrideB = \ - (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ - align_buffer_page_end(src_argb, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ - align_buffer_page_end(dst_argb_c, \ - kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ - align_buffer_page_end(dst_argb_opt, \ - kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ - for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ - src_argb[i] = 0xfe; \ - } \ - memset(dst_argb_c, 123, kStrideB* kHeightB); \ - memset(dst_argb_opt, 123, kStrideB* kHeightB); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \ - kStrideB, kWidth, kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \ - kStrideB, kWidth, kHeight); \ - for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ - EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ - } \ - free_aligned_buffer_page_end(src_argb); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } \ - } - -#if defined(ENABLE_FULL_TESTS) -#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ - EPP_B, STRIDE_B, HEIGHT_B) \ - TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ - STRIDE_B, HEIGHT_B, benchmark_width_ + 1, _Any, +, 0) \ - TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ - STRIDE_B, HEIGHT_B, benchmark_width_, _Unaligned, +, 4) \ - TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ - STRIDE_B, HEIGHT_B, benchmark_width_, _Invert, -, 0) \ - TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ - STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0) \ - TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ - EPP_B, STRIDE_B, HEIGHT_B) -#else -#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ - EPP_B, STRIDE_B, HEIGHT_B) \ - TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ - STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0) -#endif - -TESTATOB(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) -TESTATOB(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOB(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) -#endif -TESTATOB(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOB(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1) -#endif -TESTATOB(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOB(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) -TESTATOB(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -#endif -TESTATOB(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOB(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) -#endif -TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) -TESTATOB(ABGR, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) -TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) -TESTATOB(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) -TESTATOB(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1) -#endif -TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) -TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1) -TESTATOB(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1) -TESTATOB(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1) -TESTATOB(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1) -TESTATOB(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1) -TESTATOB(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) -TESTATOB(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1) -TESTATOB(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOB(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) -#endif -TESTATOB(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) -TESTATOB(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) -TESTATOB(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) -TESTATOB(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOB(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) -TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) -TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) -TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) - -// in place test -#define TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ - EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ - const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ - const int kStrideA = \ - (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ - const int kStrideB = \ - (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ - align_buffer_page_end(src_argb, \ - kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ - align_buffer_page_end(dst_argb_c, \ - kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ - align_buffer_page_end(dst_argb_opt, \ - kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ - for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ - src_argb[i + OFF] = (fastrand() & 0xff); \ - } \ - memcpy(dst_argb_c + OFF, src_argb, \ - kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ - memcpy(dst_argb_opt + OFF, src_argb, \ - kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_A##To##FMT_B((TYPE_A*)(dst_argb_c /* src */ + OFF), kStrideA, \ - (TYPE_B*)dst_argb_c, kStrideB, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \ - (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ - } \ - memcpy(dst_argb_opt + OFF, src_argb, \ - kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ - FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \ - (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ - for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ - EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ - } \ - free_aligned_buffer_page_end(src_argb); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } - -#define TESTATOA(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ - EPP_B, STRIDE_B, HEIGHT_B) \ - TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ - STRIDE_B, HEIGHT_B, benchmark_width_, _Inplace, +, 0) - -TESTATOA(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) -TESTATOA(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOA(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) -#endif -TESTATOA(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOA(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1) -#endif -TESTATOA(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOA(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) -TESTATOA(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -#endif -TESTATOA(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOA(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) -#endif -TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1) -TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1) -// TODO(fbarchard): Support in place for mirror. -// TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1) -TESTATOA(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1) -TESTATOA(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1) -TESTATOA(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) -TESTATOA(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) -TESTATOA(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) -TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) -TESTATOA(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) -TESTATOA(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1) -#endif -TESTATOA(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1) -TESTATOA(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1) -TESTATOA(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) -// TODO(fbarchard): Support in place for conversions that increase bpp. -// TESTATOA(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) -// TESTATOA(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOA(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -// TESTATOA(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOA(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1) -// TESTATOA(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1) -// TESTATOA(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOA(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1) -// TESTATOA(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) -// TESTATOA(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1) -TESTATOA(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1) -// TESTATOA(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOA(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) -// TESTATOA(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1) -TESTATOA(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -// TESTATOA(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) -#endif -TESTATOA(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -// TESTATOA(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) -// TESTATOA(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOA(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1) -// TESTATOA(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) -// TESTATOA(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) -// TESTATOA(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) -// TESTATOA(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) -TESTATOA(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOA(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) -TESTATOA(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) -TESTATOA(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) -TESTATOA(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) -TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) - -#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ - HEIGHT_B, W1280, N, NEG, OFF) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ - const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ - const int kStrideA = \ - (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ - const int kStrideB = \ - (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ - align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \ - for (int i = 0; i < kStrideA * kHeightA; ++i) { \ - src_argb[i + OFF] = (fastrand() & 0xff); \ - } \ - memset(dst_argb_c, 1, kStrideB* kHeightB); \ - memset(dst_argb_opt, 101, kStrideB* kHeightB); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_c, kStrideB, \ - NULL, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_opt, \ - kStrideB, NULL, kWidth, NEG kHeight); \ - } \ - for (int i = 0; i < kStrideB * kHeightB; ++i) { \ - EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ - } \ - free_aligned_buffer_page_end(src_argb); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } - -#define TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, \ - STRIDE_B, HEIGHT_B) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither_Random) { \ - for (int times = 0; times < benchmark_iterations_; ++times) { \ - const int kWidth = (fastrand() & 63) + 1; \ - const int kHeight = (fastrand() & 31) + 1; \ - const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ - const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ - const int kStrideA = \ - (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ - const int kStrideB = \ - (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ - align_buffer_page_end(src_argb, kStrideA* kHeightA); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \ - for (int i = 0; i < kStrideA * kHeightA; ++i) { \ - src_argb[i] = (fastrand() & 0xff); \ - } \ - memset(dst_argb_c, 123, kStrideB* kHeightB); \ - memset(dst_argb_opt, 123, kStrideB* kHeightB); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_c, kStrideB, NULL, \ - kWidth, kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_opt, kStrideB, \ - NULL, kWidth, kHeight); \ - for (int i = 0; i < kStrideB * kHeightB; ++i) { \ - EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ - } \ - free_aligned_buffer_page_end(src_argb); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } \ - } - -#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ - HEIGHT_B) \ - TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ - HEIGHT_B, benchmark_width_ + 1, _Any, +, 0) \ - TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ - HEIGHT_B, benchmark_width_, _Unaligned, +, 2) \ - TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ - HEIGHT_B, benchmark_width_, _Invert, -, 0) \ - TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ - HEIGHT_B, benchmark_width_, _Opt, +, 0) \ - TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ - HEIGHT_B) - -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1) -#endif - -// These conversions called twice, produce the original result. -// e.g. endian swap twice. -#define TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG, \ - OFF) \ - TEST_F(LibYUVConvertTest, FMT_ATOB##_Endswap##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ - const int kStrideA = \ - (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ - align_buffer_page_end(src_argb, \ - kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ - align_buffer_page_end(dst_argb_opt, \ - kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ - for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ - src_argb[i + OFF] = (fastrand() & 0xff); \ - } \ - memset(dst_argb_c, 1, kStrideA* kHeightA); \ - memset(dst_argb_opt, 101, kStrideA* kHeightA); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_c, \ - kStrideA, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_opt, \ - kStrideA, kWidth, NEG kHeight); \ - } \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_ATOB((TYPE_A*)dst_argb_c, kStrideA, (TYPE_A*)dst_argb_c, kStrideA, \ - kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - FMT_ATOB((TYPE_A*)dst_argb_opt, kStrideA, (TYPE_A*)dst_argb_opt, kStrideA, \ - kWidth, NEG kHeight); \ - for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ - EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \ - EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ - } \ - free_aligned_buffer_page_end(src_argb); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } - -#if defined(ENABLE_FULL_TESTS) -#define TESTEND(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A) \ - TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_ + 1, \ - _Any, +, 0) \ - TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \ - _Unaligned, +, 2) \ - TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \ - _Opt, +, 0) -#else -#define TESTEND(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A) \ - TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \ - _Opt, +, 0) -#endif - -TESTEND(ARGBToBGRA, uint8_t, 4, 4, 1) -TESTEND(ARGBToABGR, uint8_t, 4, 4, 1) -TESTEND(BGRAToARGB, uint8_t, 4, 4, 1) -TESTEND(ABGRToARGB, uint8_t, 4, 4, 1) -TESTEND(AB64ToAR64, uint16_t, 4, 4, 1) +#if !defined(LEAN_TESTS) #ifdef HAVE_JPEG TEST_F(LibYUVConvertTest, ValidateJpeg) { @@ -2968,157 +1944,6 @@ TEST_F(LibYUVConvertTest, I420CropOddY) { free_aligned_buffer_page_end(src_y); } -TEST_F(LibYUVConvertTest, TestYToARGB) { - uint8_t y[32]; - uint8_t expectedg[32]; - for (int i = 0; i < 32; ++i) { - y[i] = i * 5 + 17; - expectedg[i] = static_cast<int>((y[i] - 16) * 1.164f + 0.5f); - } - uint8_t argb[32 * 4]; - YToARGB(y, 0, argb, 0, 32, 1); - - for (int i = 0; i < 32; ++i) { - printf("%2d %d: %d <-> %d,%d,%d,%d\n", i, y[i], expectedg[i], - argb[i * 4 + 0], argb[i * 4 + 1], argb[i * 4 + 2], argb[i * 4 + 3]); - } - for (int i = 0; i < 32; ++i) { - EXPECT_EQ(expectedg[i], argb[i * 4 + 0]); - } -} - -static const uint8_t kNoDither4x4[16] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -TEST_F(LibYUVConvertTest, TestNoDither) { - align_buffer_page_end(src_argb, benchmark_width_ * benchmark_height_ * 4); - align_buffer_page_end(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); - align_buffer_page_end(dst_rgb565dither, - benchmark_width_ * benchmark_height_ * 2); - MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4); - MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); - MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2); - ARGBToRGB565(src_argb, benchmark_width_ * 4, dst_rgb565, benchmark_width_ * 2, - benchmark_width_, benchmark_height_); - ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, dst_rgb565dither, - benchmark_width_ * 2, kNoDither4x4, benchmark_width_, - benchmark_height_); - for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) { - EXPECT_EQ(dst_rgb565[i], dst_rgb565dither[i]); - } - - free_aligned_buffer_page_end(src_argb); - free_aligned_buffer_page_end(dst_rgb565); - free_aligned_buffer_page_end(dst_rgb565dither); -} - -// Ordered 4x4 dither for 888 to 565. Values from 0 to 7. -static const uint8_t kDither565_4x4[16] = { - 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2, -}; - -TEST_F(LibYUVConvertTest, TestDither) { - align_buffer_page_end(src_argb, benchmark_width_ * benchmark_height_ * 4); - align_buffer_page_end(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); - align_buffer_page_end(dst_rgb565dither, - benchmark_width_ * benchmark_height_ * 2); - align_buffer_page_end(dst_argb, benchmark_width_ * benchmark_height_ * 4); - align_buffer_page_end(dst_argbdither, - benchmark_width_ * benchmark_height_ * 4); - MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4); - MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); - MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2); - MemRandomize(dst_argb, benchmark_width_ * benchmark_height_ * 4); - MemRandomize(dst_argbdither, benchmark_width_ * benchmark_height_ * 4); - ARGBToRGB565(src_argb, benchmark_width_ * 4, dst_rgb565, benchmark_width_ * 2, - benchmark_width_, benchmark_height_); - ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, dst_rgb565dither, - benchmark_width_ * 2, kDither565_4x4, benchmark_width_, - benchmark_height_); - RGB565ToARGB(dst_rgb565, benchmark_width_ * 2, dst_argb, benchmark_width_ * 4, - benchmark_width_, benchmark_height_); - RGB565ToARGB(dst_rgb565dither, benchmark_width_ * 2, dst_argbdither, - benchmark_width_ * 4, benchmark_width_, benchmark_height_); - - for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) { - EXPECT_NEAR(dst_argb[i], dst_argbdither[i], 9); - } - free_aligned_buffer_page_end(src_argb); - free_aligned_buffer_page_end(dst_rgb565); - free_aligned_buffer_page_end(dst_rgb565dither); - free_aligned_buffer_page_end(dst_argb); - free_aligned_buffer_page_end(dst_argbdither); -} - -#define TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, W1280, N, NEG, OFF, FMT_C, BPP_C) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \ - const int kWidth = W1280; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_u, kSizeUV + OFF); \ - align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y[i + OFF] = (fastrand() & 0xff); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - src_u[i + OFF] = (fastrand() & 0xff); \ - src_v[i + OFF] = (fastrand() & 0xff); \ - } \ - memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ - memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, dst_argb_c + OFF, \ - kStrideB, NULL, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B##Dither( \ - src_y + OFF, kWidth, src_u + OFF, kStrideUV, src_v + OFF, kStrideUV, \ - dst_argb_opt + OFF, kStrideB, NULL, kWidth, NEG kHeight); \ - } \ - /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ - align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight); \ - align_buffer_page_end(dst_argb32_opt, kWidth* BPP_C* kHeight); \ - memset(dst_argb32_c, 2, kWidth* BPP_C* kHeight); \ - memset(dst_argb32_opt, 102, kWidth* BPP_C* kHeight); \ - FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, dst_argb32_c, kWidth * BPP_C, \ - kWidth, kHeight); \ - FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt, \ - kWidth * BPP_C, kWidth, kHeight); \ - for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ - EXPECT_EQ(dst_argb32_c[i], dst_argb32_opt[i]); \ - } \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - free_aligned_buffer_page_end(dst_argb32_c); \ - free_aligned_buffer_page_end(dst_argb32_opt); \ - } - -#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, FMT_C, BPP_C) \ - TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \ - TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C) \ - TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \ - TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) - -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4) -#endif - #define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \ TEST_F(LibYUVConvertTest, NAME) { \ const int kWidth = benchmark_width_; \ @@ -3224,1370 +2049,6 @@ TEST_F(LibYUVConvertTest, MM21ToYUY2) { free_aligned_buffer_page_end(golden_yuyv); } -// Transitive test. A to B to C is same as A to C. -// Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere. -#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - W1280, N, NEG, OFF, FMT_C, BPP_C) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##To##FMT_C##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_u, kSizeUV + OFF); \ - align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y[i + OFF] = (fastrand() & 0xff); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - src_u[i + OFF] = (fastrand() & 0xff); \ - src_v[i + OFF] = (fastrand() & 0xff); \ - } \ - memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, dst_argb_b + OFF, kStrideB, \ - kWidth, NEG kHeight); \ - /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ - const int kStrideC = kWidth * BPP_C; \ - align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ - align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ - memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ - memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, dst_argb_c + OFF, \ - kStrideC, kWidth, NEG kHeight); \ - /* Convert B to C */ \ - FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, \ - kStrideC, kWidth, kHeight); \ - } \ - for (int i = 0; i < kStrideC * kHeight; ++i) { \ - EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ - } \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - free_aligned_buffer_page_end(dst_argb_b); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_bc); \ - } - -#if defined(ENABLE_FULL_TESTS) -#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - FMT_C, BPP_C) \ - TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \ - TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C) \ - TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \ - TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) -#else -#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - FMT_C, BPP_C) \ - TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) -#endif - -#if defined(ENABLE_FULL_TESTS) -TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ABGR, 4) -TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3) -TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB24, 3) -TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4) -TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4) -TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3) -TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4) -TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3) -TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4) -TESTPLANARTOE(H420, 2, 2, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, ABGR, 4) -TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3) -TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RGB24, 3) -TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4) -TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3) -TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, ARGB, 4) -TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3) -TESTPLANARTOE(J420, 2, 2, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(J420, 2, 2, ARGB, 1, 4, ARGB, 4) -TESTPLANARTOE(U420, 2, 2, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(U420, 2, 2, ARGB, 1, 4, ARGB, 4) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2) -TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2) -TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2) -TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2) -#endif -TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, ABGR, 4) -TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(J422, 2, 1, ARGB, 1, 4, ARGB, 4) -TESTPLANARTOE(J422, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(H422, 2, 1, ARGB, 1, 4, ARGB, 4) -TESTPLANARTOE(H422, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(U422, 2, 1, ARGB, 1, 4, ARGB, 4) -TESTPLANARTOE(U422, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(V422, 2, 1, ARGB, 1, 4, ARGB, 4) -TESTPLANARTOE(V422, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4) -TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4) -TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ABGR, 4) -TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(J444, 1, 1, ARGB, 1, 4, ARGB, 4) -TESTPLANARTOE(J444, 1, 1, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(H444, 1, 1, ARGB, 1, 4, ARGB, 4) -TESTPLANARTOE(H444, 1, 1, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(U444, 1, 1, ARGB, 1, 4, ARGB, 4) -TESTPLANARTOE(U444, 1, 1, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(V444, 1, 1, ARGB, 1, 4, ARGB, 4) -TESTPLANARTOE(V444, 1, 1, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4) -TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4) -TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4) -TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4) -#else -TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2) -TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2) -TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3) -TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB24, 3) -TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2) -TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4) -TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4) -TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3) -TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4) -TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3) -TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4) -TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4) -TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4) -TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2) -TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4) -TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4) -TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4) -TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4) -TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4) -#endif - -// Transitive test: Compare 1 step vs 2 step conversion for YUVA to ARGB. -// Benchmark 2 step conversion for comparison to 1 step conversion. -#define TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - W1280, N, NEG, OFF, FMT_C, BPP_C, ATTEN) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##To##FMT_C##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ - const int kSizeUV = \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_u, kSizeUV + OFF); \ - align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(src_a, kWidth* kHeight + OFF); \ - align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ - const int kStrideC = kWidth * BPP_C; \ - align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ - align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ - memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ - memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ - memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y[i + OFF] = (fastrand() & 0xff); \ - src_a[i + OFF] = (fastrand() & 0xff); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - src_u[i + OFF] = (fastrand() & 0xff); \ - src_v[i + OFF] = (fastrand() & 0xff); \ - } \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - /* Convert A to B */ \ - FMT_PLANAR##To##FMT_B( \ - src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth, \ - dst_argb_b + OFF, kStrideB, kWidth, NEG kHeight, ATTEN); \ - /* Convert B to C */ \ - FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, \ - kStrideC, kWidth, kHeight); \ - } \ - /* Convert A to C */ \ - FMT_PLANAR##To##FMT_C( \ - src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth, \ - dst_argb_c + OFF, kStrideC, kWidth, NEG kHeight, ATTEN); \ - for (int i = 0; i < kStrideC * kHeight; ++i) { \ - EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ - } \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - free_aligned_buffer_page_end(src_a); \ - free_aligned_buffer_page_end(dst_argb_b); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_bc); \ - } - -#if defined(ENABLE_FULL_TESTS) -#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - FMT_C, BPP_C) \ - TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C, 0) \ - TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C, 0) \ - TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - benchmark_width_, _Invert, -, 0, FMT_C, BPP_C, 0) \ - TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0) \ - TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - benchmark_width_, _Premult, +, 0, FMT_C, BPP_C, 1) -#else -#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - FMT_C, BPP_C) \ - TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ - benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0) -#endif - -#if defined(ENABLE_FULL_TESTS) -TESTQPLANARTOE(I420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(J420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(J420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(H420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(H420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(F420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(F420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(U420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(U420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(V420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(V420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(I422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(J422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(J422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(F422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(F422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(H422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(H422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(U422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(U422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(V422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(V422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(I444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(J444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(J444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(H444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(H444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(U444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(U444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(V444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) -TESTQPLANARTOE(V444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) -#else -TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) -TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) -#endif - -#define TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, W1280, N, NEG, \ - OFF, FMT_C, BPP_C) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##To##FMT_C##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kStrideA = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ - const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ - align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \ - align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ - MemRandomize(src_argb_a + OFF, kStrideA * kHeight); \ - memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ - FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB, \ - kWidth, NEG kHeight); \ - /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ - const int kStrideC = kWidth * BPP_C; \ - align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ - align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ - memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ - memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_C(src_argb_a + OFF, kStrideA, dst_argb_c + OFF, kStrideC, \ - kWidth, NEG kHeight); \ - /* Convert B to C */ \ - FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, \ - kStrideC, kWidth, kHeight); \ - } \ - for (int i = 0; i < kStrideC * kHeight; i += 4) { \ - EXPECT_EQ(dst_argb_c[i + OFF + 0], dst_argb_bc[i + OFF + 0]); \ - EXPECT_EQ(dst_argb_c[i + OFF + 1], dst_argb_bc[i + OFF + 1]); \ - EXPECT_EQ(dst_argb_c[i + OFF + 2], dst_argb_bc[i + OFF + 2]); \ - EXPECT_NEAR(dst_argb_c[i + OFF + 3], dst_argb_bc[i + OFF + 3], 64); \ - } \ - free_aligned_buffer_page_end(src_argb_a); \ - free_aligned_buffer_page_end(dst_argb_b); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_bc); \ - } - -#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \ - TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, \ - benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \ - TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ - _Unaligned, +, 4, FMT_C, BPP_C) \ - TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ - _Invert, -, 0, FMT_C, BPP_C) \ - TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ - _Opt, +, 0, FMT_C, BPP_C) - -// Caveat: Destination needs to be 4 bytes -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4) -TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4) -TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4) -TESTPLANETOE(AR30, 1, 4, ABGR, 1, 4, ARGB, 4) -TESTPLANETOE(ARGB, 1, 4, AB30, 1, 4, ARGB, 4) -TESTPLANETOE(ABGR, 1, 4, AB30, 1, 4, ABGR, 4) -TESTPLANETOE(AB30, 1, 4, ARGB, 1, 4, ABGR, 4) -TESTPLANETOE(AB30, 1, 4, ABGR, 1, 4, ARGB, 4) -#endif - -TEST_F(LibYUVConvertTest, RotateWithARGBSource) { - // 2x2 frames - uint32_t src[4]; - uint32_t dst[4]; - // some random input - src[0] = 0x11000000; - src[1] = 0x00450000; - src[2] = 0x00009f00; - src[3] = 0x000000ff; - // zeros on destination - dst[0] = 0x00000000; - dst[1] = 0x00000000; - dst[2] = 0x00000000; - dst[3] = 0x00000000; - - int r = ConvertToARGB(reinterpret_cast<uint8_t*>(src), - 16, // input size - reinterpret_cast<uint8_t*>(dst), - 8, // destination stride - 0, // crop_x - 0, // crop_y - 2, // width - 2, // height - 2, // crop width - 2, // crop height - kRotate90, FOURCC_ARGB); - - EXPECT_EQ(r, 0); - // 90 degrees rotation, no conversion - EXPECT_EQ(dst[0], src[2]); - EXPECT_EQ(dst[1], src[0]); - EXPECT_EQ(dst[2], src[3]); - EXPECT_EQ(dst[3], src[1]); -} - -#ifdef HAS_ARGBTOAR30ROW_AVX2 -TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) { - // ARGBToAR30Row_AVX2 expects a multiple of 8 pixels. - const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; - align_buffer_page_end(src, kPixels * 4); - align_buffer_page_end(dst_opt, kPixels * 4); - align_buffer_page_end(dst_c, kPixels * 4); - MemRandomize(src, kPixels * 4); - memset(dst_opt, 0, kPixels * 4); - memset(dst_c, 1, kPixels * 4); - - ARGBToAR30Row_C(src, dst_c, kPixels); - - int has_avx2 = TestCpuFlag(kCpuHasAVX2); - int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); - for (int i = 0; i < benchmark_iterations_; ++i) { - if (has_avx2) { - ARGBToAR30Row_AVX2(src, dst_opt, kPixels); - } else if (has_ssse3) { - ARGBToAR30Row_SSSE3(src, dst_opt, kPixels); - } else { - ARGBToAR30Row_C(src, dst_opt, kPixels); - } - } - for (int i = 0; i < kPixels * 4; ++i) { - EXPECT_EQ(dst_opt[i], dst_c[i]); - } - - free_aligned_buffer_page_end(src); - free_aligned_buffer_page_end(dst_opt); - free_aligned_buffer_page_end(dst_c); -} -#endif // HAS_ARGBTOAR30ROW_AVX2 - -#ifdef HAS_ABGRTOAR30ROW_AVX2 -TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { - // ABGRToAR30Row_AVX2 expects a multiple of 8 pixels. - const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; - align_buffer_page_end(src, kPixels * 4); - align_buffer_page_end(dst_opt, kPixels * 4); - align_buffer_page_end(dst_c, kPixels * 4); - MemRandomize(src, kPixels * 4); - memset(dst_opt, 0, kPixels * 4); - memset(dst_c, 1, kPixels * 4); - - ABGRToAR30Row_C(src, dst_c, kPixels); - - int has_avx2 = TestCpuFlag(kCpuHasAVX2); - int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); - for (int i = 0; i < benchmark_iterations_; ++i) { - if (has_avx2) { - ABGRToAR30Row_AVX2(src, dst_opt, kPixels); - } else if (has_ssse3) { - ABGRToAR30Row_SSSE3(src, dst_opt, kPixels); - } else { - ABGRToAR30Row_C(src, dst_opt, kPixels); - } - } - for (int i = 0; i < kPixels * 4; ++i) { - EXPECT_EQ(dst_opt[i], dst_c[i]); - } - - free_aligned_buffer_page_end(src); - free_aligned_buffer_page_end(dst_opt); - free_aligned_buffer_page_end(dst_c); -} -#endif // HAS_ABGRTOAR30ROW_AVX2 - -// Provide matrix wrappers for 12 bit YUV -#define I012ToARGB(a, b, c, d, e, f, g, h, i, j) \ - I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) -#define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \ - I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) -#define I012ToAB30(a, b, c, d, e, f, g, h, i, j) \ - I012ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) - -#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \ - I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) -#define I410ToABGR(a, b, c, d, e, f, g, h, i, j) \ - I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) -#define H410ToARGB(a, b, c, d, e, f, g, h, i, j) \ - I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j) -#define H410ToABGR(a, b, c, d, e, f, g, h, i, j) \ - I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j) -#define U410ToARGB(a, b, c, d, e, f, g, h, i, j) \ - I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) -#define U410ToABGR(a, b, c, d, e, f, g, h, i, j) \ - I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) -#define I410ToAR30(a, b, c, d, e, f, g, h, i, j) \ - I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) -#define I410ToAB30(a, b, c, d, e, f, g, h, i, j) \ - I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) -#define H410ToAR30(a, b, c, d, e, f, g, h, i, j) \ - I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j) -#define H410ToAB30(a, b, c, d, e, f, g, h, i, j) \ - I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j) -#define U410ToAR30(a, b, c, d, e, f, g, h, i, j) \ - I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) -#define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \ - I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) - -#define I010ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ - I010ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ - kFilterBilinear) -#define I010ToAR30Filter(a, b, c, d, e, f, g, h, i, j) \ - I010ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ - kFilterBilinear) -#define I210ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ - I210ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ - kFilterBilinear) -#define I210ToAR30Filter(a, b, c, d, e, f, g, h, i, j) \ - I210ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ - kFilterBilinear) - -// TODO(fbarchard): Fix clamping issue affected by U channel. -#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \ - BPP_B, ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = W1280; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - const int kBpc = 2; \ - align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ - align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \ - align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & FMT_MASK); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & FMT_MASK); \ - reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & FMT_MASK); \ - } \ - memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \ - memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_PLANAR##To##FMT_B( \ - reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ - reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \ - reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \ - dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B( \ - reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ - reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \ - reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \ - dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \ - } \ - for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ - EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \ - } \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } - -#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \ - BPP_B, ALIGN, YALIGN) \ - TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ - ALIGN, YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \ - TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ - ALIGN, YALIGN, benchmark_width_, _Unaligned, +, 4, 4) \ - TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ - ALIGN, YALIGN, benchmark_width_, _Invert, -, 0, 0) \ - TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ - ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0) - -// These conversions are only optimized for x86 -#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) -TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGB, 4, 4, 1) -TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ABGR, 4, 4, 1) -TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ARGB, 4, 4, 1) -TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ABGR, 4, 4, 1) -TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ARGB, 4, 4, 1) -TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ABGR, 4, 4, 1) -TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGB, 4, 4, 1) -TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ABGR, 4, 4, 1) -TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ARGB, 4, 4, 1) -TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ABGR, 4, 4, 1) -TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ARGB, 4, 4, 1) -TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ABGR, 4, 4, 1) -TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ARGB, 4, 4, 1) -TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ABGR, 4, 4, 1) -TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ARGB, 4, 4, 1) -TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ABGR, 4, 4, 1) -TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ARGB, 4, 4, 1) -TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ABGR, 4, 4, 1) -TESTPLANAR16TOB(I012, 2, 2, 0xfff, ARGB, 4, 4, 1) -TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGBFilter, 4, 4, 1) -TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGBFilter, 4, 4, 1) - -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30, 4, 4, 1) -TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AB30, 4, 4, 1) -TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AR30, 4, 4, 1) -TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AB30, 4, 4, 1) -TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AR30, 4, 4, 1) -TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AB30, 4, 4, 1) -TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30, 4, 4, 1) -TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AB30, 4, 4, 1) -TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AR30, 4, 4, 1) -TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AB30, 4, 4, 1) -TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AR30, 4, 4, 1) -TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AB30, 4, 4, 1) -TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AR30, 4, 4, 1) -TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AB30, 4, 4, 1) -TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AR30, 4, 4, 1) -TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1) -TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1) -TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1) -TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1) -TESTPLANAR16TOB(I012, 2, 2, 0xfff, AB30, 4, 4, 1) -TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30Filter, 4, 4, 1) -TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1) -#endif // LITTLE_ENDIAN_ONLY_TEST -#endif // DISABLE_SLOW_TESTS - -#define TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - ALIGN, YALIGN, W1280, N, NEG, OFF, ATTEN, S_DEPTH) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = W1280; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - const int kBpc = 2; \ - align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF); \ - align_buffer_page_end(src_u, kSizeUV* kBpc + OFF); \ - align_buffer_page_end(src_v, kSizeUV* kBpc + OFF); \ - align_buffer_page_end(src_a, kWidth* kHeight* kBpc + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - reinterpret_cast<uint16_t*>(src_y + OFF)[i] = \ - (fastrand() & ((1 << S_DEPTH) - 1)); \ - reinterpret_cast<uint16_t*>(src_a + OFF)[i] = \ - (fastrand() & ((1 << S_DEPTH) - 1)); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - reinterpret_cast<uint16_t*>(src_u + OFF)[i] = \ - (fastrand() & ((1 << S_DEPTH) - 1)); \ - reinterpret_cast<uint16_t*>(src_v + OFF)[i] = \ - (fastrand() & ((1 << S_DEPTH) - 1)); \ - } \ - memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ - memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + OFF), kWidth, \ - reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV, \ - reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV, \ - reinterpret_cast<uint16_t*>(src_a + OFF), kWidth, \ - dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \ - ATTEN); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B( \ - reinterpret_cast<uint16_t*>(src_y + OFF), kWidth, \ - reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV, \ - reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV, \ - reinterpret_cast<uint16_t*>(src_a + OFF), kWidth, \ - dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, ATTEN); \ - } \ - for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ - EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ - } \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - free_aligned_buffer_page_end(src_a); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } - -#if defined(ENABLE_FULL_TESTS) -#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - ALIGN, YALIGN, S_DEPTH) \ - TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \ - TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Unaligned, +, 2, 0, S_DEPTH) \ - TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ - TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH) \ - TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Premult, +, 0, 1, S_DEPTH) -#else -#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - ALIGN, YALIGN, S_DEPTH) \ - TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH) -#endif - -#define I010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ - l, m) -#define I010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ - l, m) -#define J010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define J010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define F010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define F010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define H010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define H010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define U010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define U010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define V010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define V010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define I210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ - l, m) -#define I210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ - l, m) -#define J210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define J210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define F210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define F210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define H210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define H210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define U210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define U210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define V210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define V210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define I410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ - l, m) -#define I410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ - l, m) -#define J410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define J410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define F410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define F410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define H410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define H410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define U410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define U410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define V410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define V410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define I010AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ - &kYuvI601Constants, k, l, m, kFilterBilinear) -#define I210AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ - &kYuvI601Constants, k, l, m, kFilterBilinear) - -// These conversions are only optimized for x86 -#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) -TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(I010Alpha, 2, 2, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(J010Alpha, 2, 2, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(J010Alpha, 2, 2, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(H010Alpha, 2, 2, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(H010Alpha, 2, 2, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(F010Alpha, 2, 2, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(F010Alpha, 2, 2, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(U010Alpha, 2, 2, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(U010Alpha, 2, 2, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(V010Alpha, 2, 2, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(V010Alpha, 2, 2, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(I210Alpha, 2, 1, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(J210Alpha, 2, 1, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(J210Alpha, 2, 1, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(H210Alpha, 2, 1, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(H210Alpha, 2, 1, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(F210Alpha, 2, 1, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(F210Alpha, 2, 1, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(U210Alpha, 2, 1, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(U210Alpha, 2, 1, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(V210Alpha, 2, 1, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(V210Alpha, 2, 1, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(I410Alpha, 1, 1, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(I410Alpha, 1, 1, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(J410Alpha, 1, 1, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(J410Alpha, 1, 1, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(H410Alpha, 1, 1, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(H410Alpha, 1, 1, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(F410Alpha, 1, 1, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(F410Alpha, 1, 1, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(U410Alpha, 1, 1, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(U410Alpha, 1, 1, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(V410Alpha, 1, 1, ARGB, 4, 4, 1, 10) -TESTQPLANAR16TOB(V410Alpha, 1, 1, ABGR, 4, 4, 1, 10) -TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10) -TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) -#endif // DISABLE_SLOW_TESTS - -#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = W1280; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \ - const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; \ - const int kBpc = 2; \ - align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ - align_buffer_page_end(src_uv, kSizeUV* kBpc + SOFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = \ - (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - reinterpret_cast<uint16_t*>(src_uv + SOFF)[i] = \ - (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \ - } \ - memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \ - memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ - reinterpret_cast<uint16_t*>(src_uv + SOFF), \ - kStrideUV, dst_argb_c + DOFF, kStrideB, kWidth, \ - NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ - reinterpret_cast<uint16_t*>(src_uv + SOFF), \ - kStrideUV, dst_argb_opt + DOFF, kStrideB, kWidth, \ - NEG kHeight); \ - } \ - for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ - EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \ - } \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_uv); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } - -#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, S_DEPTH) \ - TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ - benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \ - TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ - benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \ - TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ - benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ - TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ - benchmark_width_, _Opt, +, 0, 0, S_DEPTH) - -#define P010ToARGB(a, b, c, d, e, f, g, h) \ - P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) -#define P210ToARGB(a, b, c, d, e, f, g, h) \ - P210ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) -#define P010ToAR30(a, b, c, d, e, f, g, h) \ - P010ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) -#define P210ToAR30(a, b, c, d, e, f, g, h) \ - P210ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) - -#define P012ToARGB(a, b, c, d, e, f, g, h) \ - P012ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) -#define P212ToARGB(a, b, c, d, e, f, g, h) \ - P212ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) -#define P012ToAR30(a, b, c, d, e, f, g, h) \ - P012ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) -#define P212ToAR30(a, b, c, d, e, f, g, h) \ - P212ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) - -#define P016ToARGB(a, b, c, d, e, f, g, h) \ - P016ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) -#define P216ToARGB(a, b, c, d, e, f, g, h) \ - P216ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) -#define P016ToAR30(a, b, c, d, e, f, g, h) \ - P016ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) -#define P216ToAR30(a, b, c, d, e, f, g, h) \ - P216ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) - -#define P010ToARGBFilter(a, b, c, d, e, f, g, h) \ - P010ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \ - kFilterBilinear) -#define P210ToARGBFilter(a, b, c, d, e, f, g, h) \ - P210ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \ - kFilterBilinear) -#define P010ToAR30Filter(a, b, c, d, e, f, g, h) \ - P010ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \ - kFilterBilinear) -#define P210ToAR30Filter(a, b, c, d, e, f, g, h) \ - P210ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \ - kFilterBilinear) - -#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) -TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10) -TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10) -TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12) -TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12) -TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16) -TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16) -TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10) -TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10) -TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10) -TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12) -TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12) -TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16) -TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16) -TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10) -TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10) -#endif // LITTLE_ENDIAN_ONLY_TEST -#endif // DISABLE_SLOW_TESTS - -static int Clamp(int y) { - if (y < 0) { - y = 0; - } - if (y > 255) { - y = 255; - } - return y; -} - -static int Clamp10(int y) { - if (y < 0) { - y = 0; - } - if (y > 1023) { - y = 1023; - } - return y; -} - -// Test 8 bit YUV to 8 bit RGB -TEST_F(LibYUVConvertTest, TestH420ToARGB) { - const int kSize = 256; - int histogram_b[256]; - int histogram_g[256]; - int histogram_r[256]; - memset(histogram_b, 0, sizeof(histogram_b)); - memset(histogram_g, 0, sizeof(histogram_g)); - memset(histogram_r, 0, sizeof(histogram_r)); - align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2); - align_buffer_page_end(argb_pixels, kSize * 4); - uint8_t* orig_y = orig_yuv; - uint8_t* orig_u = orig_y + kSize; - uint8_t* orig_v = orig_u + kSize / 2; - - // Test grey scale - for (int i = 0; i < kSize; ++i) { - orig_y[i] = i; - } - for (int i = 0; i < kSize / 2; ++i) { - orig_u[i] = 128; // 128 is 0. - orig_v[i] = 128; - } - - H420ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1); - - for (int i = 0; i < kSize; ++i) { - int b = argb_pixels[i * 4 + 0]; - int g = argb_pixels[i * 4 + 1]; - int r = argb_pixels[i * 4 + 2]; - int a = argb_pixels[i * 4 + 3]; - ++histogram_b[b]; - ++histogram_g[g]; - ++histogram_r[r]; - // Reference formula for Y channel contribution in YUV to RGB conversions: - int expected_y = Clamp(static_cast<int>((i - 16) * 1.164f + 0.5f)); - EXPECT_EQ(b, expected_y); - EXPECT_EQ(g, expected_y); - EXPECT_EQ(r, expected_y); - EXPECT_EQ(a, 255); - } - - int count_b = 0; - int count_g = 0; - int count_r = 0; - for (int i = 0; i < kSize; ++i) { - if (histogram_b[i]) { - ++count_b; - } - if (histogram_g[i]) { - ++count_g; - } - if (histogram_r[i]) { - ++count_r; - } - } - printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); - - free_aligned_buffer_page_end(orig_yuv); - free_aligned_buffer_page_end(argb_pixels); -} - -// Test 10 bit YUV to 8 bit RGB -TEST_F(LibYUVConvertTest, TestH010ToARGB) { - const int kSize = 1024; - int histogram_b[1024]; - int histogram_g[1024]; - int histogram_r[1024]; - memset(histogram_b, 0, sizeof(histogram_b)); - memset(histogram_g, 0, sizeof(histogram_g)); - memset(histogram_r, 0, sizeof(histogram_r)); - align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); - align_buffer_page_end(argb_pixels, kSize * 4); - uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv); - uint16_t* orig_u = orig_y + kSize; - uint16_t* orig_v = orig_u + kSize / 2; - - // Test grey scale - for (int i = 0; i < kSize; ++i) { - orig_y[i] = i; - } - for (int i = 0; i < kSize / 2; ++i) { - orig_u[i] = 512; // 512 is 0. - orig_v[i] = 512; - } - - H010ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1); - - for (int i = 0; i < kSize; ++i) { - int b = argb_pixels[i * 4 + 0]; - int g = argb_pixels[i * 4 + 1]; - int r = argb_pixels[i * 4 + 2]; - int a = argb_pixels[i * 4 + 3]; - ++histogram_b[b]; - ++histogram_g[g]; - ++histogram_r[r]; - int expected_y = Clamp(static_cast<int>((i - 64) * 1.164f / 4)); - EXPECT_NEAR(b, expected_y, 1); - EXPECT_NEAR(g, expected_y, 1); - EXPECT_NEAR(r, expected_y, 1); - EXPECT_EQ(a, 255); - } - - int count_b = 0; - int count_g = 0; - int count_r = 0; - for (int i = 0; i < kSize; ++i) { - if (histogram_b[i]) { - ++count_b; - } - if (histogram_g[i]) { - ++count_g; - } - if (histogram_r[i]) { - ++count_r; - } - } - printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); - - free_aligned_buffer_page_end(orig_yuv); - free_aligned_buffer_page_end(argb_pixels); -} - -// Test 10 bit YUV to 10 bit RGB -// Caveat: Result is near due to float rounding in expected -// result. -TEST_F(LibYUVConvertTest, TestH010ToAR30) { - const int kSize = 1024; - int histogram_b[1024]; - int histogram_g[1024]; - int histogram_r[1024]; - memset(histogram_b, 0, sizeof(histogram_b)); - memset(histogram_g, 0, sizeof(histogram_g)); - memset(histogram_r, 0, sizeof(histogram_r)); - - align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); - align_buffer_page_end(ar30_pixels, kSize * 4); - uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv); - uint16_t* orig_u = orig_y + kSize; - uint16_t* orig_v = orig_u + kSize / 2; - - // Test grey scale - for (int i = 0; i < kSize; ++i) { - orig_y[i] = i; - } - for (int i = 0; i < kSize / 2; ++i) { - orig_u[i] = 512; // 512 is 0. - orig_v[i] = 512; - } - - H010ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1); - - for (int i = 0; i < kSize; ++i) { - int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023; - int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023; - int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023; - int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3; - ++histogram_b[b10]; - ++histogram_g[g10]; - ++histogram_r[r10]; - int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f + 0.5)); - EXPECT_NEAR(b10, expected_y, 4); - EXPECT_NEAR(g10, expected_y, 4); - EXPECT_NEAR(r10, expected_y, 4); - EXPECT_EQ(a2, 3); - } - - int count_b = 0; - int count_g = 0; - int count_r = 0; - for (int i = 0; i < kSize; ++i) { - if (histogram_b[i]) { - ++count_b; - } - if (histogram_g[i]) { - ++count_g; - } - if (histogram_r[i]) { - ++count_r; - } - } - printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); - - free_aligned_buffer_page_end(orig_yuv); - free_aligned_buffer_page_end(ar30_pixels); -} - -// Test 10 bit YUV to 10 bit RGB -// Caveat: Result is near due to float rounding in expected -// result. -TEST_F(LibYUVConvertTest, TestH010ToAB30) { - const int kSize = 1024; - int histogram_b[1024]; - int histogram_g[1024]; - int histogram_r[1024]; - memset(histogram_b, 0, sizeof(histogram_b)); - memset(histogram_g, 0, sizeof(histogram_g)); - memset(histogram_r, 0, sizeof(histogram_r)); - - align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); - align_buffer_page_end(ab30_pixels, kSize * 4); - uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv); - uint16_t* orig_u = orig_y + kSize; - uint16_t* orig_v = orig_u + kSize / 2; - - // Test grey scale - for (int i = 0; i < kSize; ++i) { - orig_y[i] = i; - } - for (int i = 0; i < kSize / 2; ++i) { - orig_u[i] = 512; // 512 is 0. - orig_v[i] = 512; - } - - H010ToAB30(orig_y, 0, orig_u, 0, orig_v, 0, ab30_pixels, 0, kSize, 1); - - for (int i = 0; i < kSize; ++i) { - int r10 = reinterpret_cast<uint32_t*>(ab30_pixels)[i] & 1023; - int g10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 10) & 1023; - int b10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 20) & 1023; - int a2 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 30) & 3; - ++histogram_b[b10]; - ++histogram_g[g10]; - ++histogram_r[r10]; - int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f)); - EXPECT_NEAR(b10, expected_y, 4); - EXPECT_NEAR(g10, expected_y, 4); - EXPECT_NEAR(r10, expected_y, 4); - EXPECT_EQ(a2, 3); - } - - int count_b = 0; - int count_g = 0; - int count_r = 0; - for (int i = 0; i < kSize; ++i) { - if (histogram_b[i]) { - ++count_b; - } - if (histogram_g[i]) { - ++count_g; - } - if (histogram_r[i]) { - ++count_r; - } - } - printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); - - free_aligned_buffer_page_end(orig_yuv); - free_aligned_buffer_page_end(ab30_pixels); -} - -// Test 8 bit YUV to 10 bit RGB -TEST_F(LibYUVConvertTest, TestH420ToAR30) { - const int kSize = 256; - const int kHistSize = 1024; - int histogram_b[kHistSize]; - int histogram_g[kHistSize]; - int histogram_r[kHistSize]; - memset(histogram_b, 0, sizeof(histogram_b)); - memset(histogram_g, 0, sizeof(histogram_g)); - memset(histogram_r, 0, sizeof(histogram_r)); - align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2); - align_buffer_page_end(ar30_pixels, kSize * 4); - uint8_t* orig_y = orig_yuv; - uint8_t* orig_u = orig_y + kSize; - uint8_t* orig_v = orig_u + kSize / 2; - - // Test grey scale - for (int i = 0; i < kSize; ++i) { - orig_y[i] = i; - } - for (int i = 0; i < kSize / 2; ++i) { - orig_u[i] = 128; // 128 is 0. - orig_v[i] = 128; - } - - H420ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1); - - for (int i = 0; i < kSize; ++i) { - int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023; - int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023; - int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023; - int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3; - ++histogram_b[b10]; - ++histogram_g[g10]; - ++histogram_r[r10]; - int expected_y = Clamp10(static_cast<int>((i - 16) * 1.164f * 4.f)); - EXPECT_NEAR(b10, expected_y, 4); - EXPECT_NEAR(g10, expected_y, 4); - EXPECT_NEAR(r10, expected_y, 4); - EXPECT_EQ(a2, 3); - } - - int count_b = 0; - int count_g = 0; - int count_r = 0; - for (int i = 0; i < kHistSize; ++i) { - if (histogram_b[i]) { - ++count_b; - } - if (histogram_g[i]) { - ++count_g; - } - if (histogram_r[i]) { - ++count_r; - } - } - printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); - - free_aligned_buffer_page_end(orig_yuv); - free_aligned_buffer_page_end(ar30_pixels); -} - -// Test I400 with jpeg matrix is same as J400 -TEST_F(LibYUVConvertTest, TestI400) { - const int kSize = 256; - align_buffer_page_end(orig_i400, kSize); - align_buffer_page_end(argb_pixels_i400, kSize * 4); - align_buffer_page_end(argb_pixels_j400, kSize * 4); - align_buffer_page_end(argb_pixels_jpeg_i400, kSize * 4); - align_buffer_page_end(argb_pixels_h709_i400, kSize * 4); - align_buffer_page_end(argb_pixels_2020_i400, kSize * 4); - - // Test grey scale - for (int i = 0; i < kSize; ++i) { - orig_i400[i] = i; - } - - J400ToARGB(orig_i400, 0, argb_pixels_j400, 0, kSize, 1); - I400ToARGB(orig_i400, 0, argb_pixels_i400, 0, kSize, 1); - I400ToARGBMatrix(orig_i400, 0, argb_pixels_jpeg_i400, 0, &kYuvJPEGConstants, - kSize, 1); - I400ToARGBMatrix(orig_i400, 0, argb_pixels_h709_i400, 0, &kYuvH709Constants, - kSize, 1); - I400ToARGBMatrix(orig_i400, 0, argb_pixels_2020_i400, 0, &kYuv2020Constants, - kSize, 1); - - EXPECT_EQ(0, argb_pixels_i400[0]); - EXPECT_EQ(0, argb_pixels_j400[0]); - EXPECT_EQ(0, argb_pixels_jpeg_i400[0]); - EXPECT_EQ(0, argb_pixels_h709_i400[0]); - EXPECT_EQ(0, argb_pixels_2020_i400[0]); - EXPECT_EQ(0, argb_pixels_i400[16 * 4]); - EXPECT_EQ(16, argb_pixels_j400[16 * 4]); - EXPECT_EQ(16, argb_pixels_jpeg_i400[16 * 4]); - EXPECT_EQ(0, argb_pixels_h709_i400[16 * 4]); - EXPECT_EQ(0, argb_pixels_2020_i400[16 * 4]); - EXPECT_EQ(130, argb_pixels_i400[128 * 4]); - EXPECT_EQ(128, argb_pixels_j400[128 * 4]); - EXPECT_EQ(128, argb_pixels_jpeg_i400[128 * 4]); - EXPECT_EQ(130, argb_pixels_h709_i400[128 * 4]); - EXPECT_EQ(130, argb_pixels_2020_i400[128 * 4]); - EXPECT_EQ(255, argb_pixels_i400[255 * 4]); - EXPECT_EQ(255, argb_pixels_j400[255 * 4]); - EXPECT_EQ(255, argb_pixels_jpeg_i400[255 * 4]); - EXPECT_EQ(255, argb_pixels_h709_i400[255 * 4]); - EXPECT_EQ(255, argb_pixels_2020_i400[255 * 4]); - - for (int i = 0; i < kSize * 4; ++i) { - if ((i & 3) == 3) { - EXPECT_EQ(255, argb_pixels_j400[i]); - } else { - EXPECT_EQ(i / 4, argb_pixels_j400[i]); - } - EXPECT_EQ(argb_pixels_jpeg_i400[i], argb_pixels_j400[i]); - } - - free_aligned_buffer_page_end(orig_i400); - free_aligned_buffer_page_end(argb_pixels_i400); - free_aligned_buffer_page_end(argb_pixels_j400); - free_aligned_buffer_page_end(argb_pixels_jpeg_i400); - free_aligned_buffer_page_end(argb_pixels_h709_i400); - free_aligned_buffer_page_end(argb_pixels_2020_i400); -} - -// Test RGB24 to ARGB and back to RGB24 -TEST_F(LibYUVConvertTest, TestARGBToRGB24) { - const int kSize = 256; - align_buffer_page_end(orig_rgb24, kSize * 3); - align_buffer_page_end(argb_pixels, kSize * 4); - align_buffer_page_end(dest_rgb24, kSize * 3); - - // Test grey scale - for (int i = 0; i < kSize * 3; ++i) { - orig_rgb24[i] = i; - } - - RGB24ToARGB(orig_rgb24, 0, argb_pixels, 0, kSize, 1); - ARGBToRGB24(argb_pixels, 0, dest_rgb24, 0, kSize, 1); - - for (int i = 0; i < kSize * 3; ++i) { - EXPECT_EQ(orig_rgb24[i], dest_rgb24[i]); - } - - free_aligned_buffer_page_end(orig_rgb24); - free_aligned_buffer_page_end(argb_pixels); - free_aligned_buffer_page_end(dest_rgb24); -} - -TEST_F(LibYUVConvertTest, Test565) { - SIMD_ALIGNED(uint8_t orig_pixels[256][4]); - SIMD_ALIGNED(uint8_t pixels565[256][2]); - - for (int i = 0; i < 256; ++i) { - for (int j = 0; j < 4; ++j) { - orig_pixels[i][j] = i; - } - } - ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1); - uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); - EXPECT_EQ(610919429u, checksum); -} - // Test RGB24 to J420 is exact #if defined(LIBYUV_BIT_EXACT) TEST_F(LibYUVConvertTest, TestRGB24ToJ420) { @@ -4644,4 +2105,6 @@ TEST_F(LibYUVConvertTest, TestRGB24ToI420) { } #endif +#endif // !defined(LEAN_TESTS) + } // namespace libyuv diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index 431343e3..437b6632 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -47,14 +47,15 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { int has_erms = TestCpuFlag(kCpuHasERMS); int has_fma3 = TestCpuFlag(kCpuHasFMA3); int has_f16c = TestCpuFlag(kCpuHasF16C); - int has_gfni = TestCpuFlag(kCpuHasGFNI); int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW); int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL); int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI); int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI); int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2); int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG); - int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ); + int has_avx10 = TestCpuFlag(kCpuHasAVX10); + int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI); + int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8); printf("Has X86 0x%x\n", has_x86); printf("Has SSE2 0x%x\n", has_sse2); printf("Has SSSE3 0x%x\n", has_ssse3); @@ -65,14 +66,15 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { printf("Has ERMS 0x%x\n", has_erms); printf("Has FMA3 0x%x\n", has_fma3); printf("Has F16C 0x%x\n", has_f16c); - printf("Has GFNI 0x%x\n", has_gfni); printf("Has AVX512BW 0x%x\n", has_avx512bw); printf("Has AVX512VL 0x%x\n", has_avx512vl); printf("Has AVX512VNNI 0x%x\n", has_avx512vnni); printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi); printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2); printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg); - printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq); + printf("Has AVX10 0x%x\n", has_avx10); + printf("HAS AVXVNNI 0x%x\n", has_avxvnni); + printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8); #endif #if defined(__mips__) int has_mips = TestCpuFlag(kCpuHasMIPS); @@ -183,7 +185,7 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) { printf("__pnacl__ %d\n", __pnacl__); #endif #ifdef GG_LONGLONG - printf("GG_LONGLONG %d\n", GG_LONGLONG); + printf("GG_LONGLONG %lld\n", GG_LONGLONG(1)); #endif #ifdef INT_TYPES_DEFINED printf("INT_TYPES_DEFINED\n"); diff --git a/unit_test/scale_plane_test.cc b/unit_test/scale_plane_test.cc new file mode 100644 index 00000000..9ce47a02 --- /dev/null +++ b/unit_test/scale_plane_test.cc @@ -0,0 +1,470 @@ +/* + * Copyright 2023 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <time.h> + +#include "../unit_test/unit_test.h" +#include "libyuv/cpu_id.h" +#include "libyuv/scale.h" + +#ifdef ENABLE_ROW_TESTS +#include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C +#endif + +#define STRINGIZE(line) #line +#define FILELINESTR(file, line) file ":" STRINGIZE(line) + +#if defined(__riscv) && !defined(__clang__) +#define DISABLE_SLOW_TESTS +#undef ENABLE_FULL_TESTS +#undef ENABLE_ROW_TESTS +#define LEAN_TESTS +#endif + +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +// SLOW TESTS are those that are unoptimized C code. +// FULL TESTS are optimized but test many variations of the same code. +#define ENABLE_FULL_TESTS +#endif + +namespace libyuv { + +#ifdef ENABLE_ROW_TESTS +#ifdef HAS_SCALEROWDOWN2_SSSE3 +TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { + SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[64]); + SIMD_ALIGNED(uint8_t dst_pixels_c[64]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt)); + memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); + + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + if (!has_ssse3) { + printf("Warning SSSE3 not detected; Skipping test.\n"); + } else { + // TL. + orig_pixels[0] = 255u; + orig_pixels[1] = 0u; + orig_pixels[128 + 0] = 0u; + orig_pixels[128 + 1] = 0u; + // TR. + orig_pixels[2] = 0u; + orig_pixels[3] = 100u; + orig_pixels[128 + 2] = 0u; + orig_pixels[128 + 3] = 0u; + // BL. + orig_pixels[4] = 0u; + orig_pixels[5] = 0u; + orig_pixels[128 + 4] = 50u; + orig_pixels[128 + 5] = 0u; + // BR. + orig_pixels[6] = 0u; + orig_pixels[7] = 0u; + orig_pixels[128 + 6] = 0u; + orig_pixels[128 + 7] = 20u; + // Odd. + orig_pixels[126] = 4u; + orig_pixels[127] = 255u; + orig_pixels[128 + 126] = 16u; + orig_pixels[128 + 127] = 255u; + + // Test regular half size. + ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(133u, dst_pixels_c[63]); + + // Test Odd width version - Last pixel is just 1 horizontal pixel. + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(10u, dst_pixels_c[63]); + + // Test one pixel less, should skip the last pixel. + memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(0u, dst_pixels_c[63]); + + // Test regular half size SSSE3. + ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); + + EXPECT_EQ(64u, dst_pixels_opt[0]); + EXPECT_EQ(25u, dst_pixels_opt[1]); + EXPECT_EQ(13u, dst_pixels_opt[2]); + EXPECT_EQ(5u, dst_pixels_opt[3]); + EXPECT_EQ(0u, dst_pixels_opt[4]); + EXPECT_EQ(133u, dst_pixels_opt[63]); + + // Compare C and SSSE3 match. + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); + ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); + for (int i = 0; i < 64; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + } +} +#endif // HAS_SCALEROWDOWN2_SSSE3 + +extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); + +TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) { + SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]); + SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); + SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); + + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); + + for (int i = 0; i < 2560 * 2; ++i) { + orig_pixels[i] = i; + } + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); + } else { + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); + } +#else + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); +#endif + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4); + EXPECT_EQ(dst_pixels_c[1279], 3839); +} +#endif // ENABLE_ROW_TESTS + +// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel +// difference. +// 0 = exact. +static int TestPlaneFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int src_stride_y = Abs(src_width); + int dst_y_plane_size = dst_width * dst_height; + int dst_stride_y = dst_width; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_y_16, src_y_plane_size * 2); + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); + uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); + uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); + + MemRandomize(src_y, src_y_plane_size); + memset(dst_y_8, 0, dst_y_plane_size); + memset(dst_y_16, 1, dst_y_plane_size * 2); + + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_16[i] = src_y[i] & 255; + } + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y, + dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + + for (i = 0; i < benchmark_iterations; ++i) { + ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16, + dst_stride_y, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_y_16); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_y_16); + + return max_diff; +} + +// The following adjustments in dimensions ensure the scale factor will be +// exactly achieved. +// 2 is chroma subsample. +#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2) +#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) + +#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ + TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \ + int diff = TestPlaneFilter_16( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but +// filtering is different fixed point implementations for SSSE3, Neon and C. +#define TEST_FACTOR(name, nom, denom, boxdiff) \ + TEST_FACTOR1(name, None, nom, denom, 0) \ + TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \ + TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \ + TEST_FACTOR1(name, Box, nom, denom, boxdiff) + +TEST_FACTOR(2, 1, 2, 0) +TEST_FACTOR(4, 1, 4, 0) +// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds. +TEST_FACTOR(3by4, 3, 4, 1) +TEST_FACTOR(3by8, 3, 8, 1) +TEST_FACTOR(3, 1, 3, 0) +#undef TEST_FACTOR1 +#undef TEST_FACTOR +#undef SX +#undef DX + +TEST_F(LibYUVScaleTest, PlaneTest3x) { + const int kSrcStride = 480; + const int kDstStride = 160; + const int kSize = kSrcStride * 3; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 480 * 3; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(225, dest_pixels[0]); + + ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterNone); + + EXPECT_EQ(225, dest_pixels[0]); + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest4x) { + const int kSrcStride = 640; + const int kDstStride = 160; + const int kSize = kSrcStride * 4; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 640 * 4; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(66, dest_pixels[0]); + + ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, + kFilterNone); + + EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +// Intent is to test 200x50 to 50x200 but width and height can be parameters. +TEST_F(LibYUVScaleTest, PlaneTestRotate_None) { + const int kSize = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < kSize; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_opt_pixels, kSize); + align_buffer_page_end(dest_c_pixels, kSize); + + MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterNone); + MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. + + for (int i = 0; i < benchmark_iterations_; ++i) { + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterNone); + } + + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + } + + free_aligned_buffer_page_end(dest_c_pixels); + free_aligned_buffer_page_end(dest_opt_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) { + const int kSize = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < kSize; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_opt_pixels, kSize); + align_buffer_page_end(dest_c_pixels, kSize); + + MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterBilinear); + MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. + + for (int i = 0; i < benchmark_iterations_; ++i) { + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterBilinear); + } + + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + } + + free_aligned_buffer_page_end(dest_c_pixels); + free_aligned_buffer_page_end(dest_opt_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +// Intent is to test 200x50 to 50x200 but width and height can be parameters. +TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) { + const int kSize = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < kSize; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_opt_pixels, kSize); + align_buffer_page_end(dest_c_pixels, kSize); + + MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterBox); + MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. + + for (int i = 0; i < benchmark_iterations_; ++i) { + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterBox); + } + + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + } + + free_aligned_buffer_page_end(dest_c_pixels); + free_aligned_buffer_page_end(dest_opt_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest1_Box) { + align_buffer_page_end(orig_pixels, 3); + align_buffer_page_end(dst_pixels, 3); + + // Pad the 1x1 byte image with invalid values before and after in case libyuv + // reads outside the memory boundaries. + orig_pixels[0] = 0; + orig_pixels[1] = 1; // scale this pixel + orig_pixels[2] = 2; + dst_pixels[0] = 3; + dst_pixels[1] = 3; + dst_pixels[2] = 3; + + libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, + /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, + /* dst_width= */ 1, /* dst_height= */ 2, + libyuv::kFilterBox); + + EXPECT_EQ(dst_pixels[0], 1); + EXPECT_EQ(dst_pixels[1], 1); + EXPECT_EQ(dst_pixels[2], 3); + + free_aligned_buffer_page_end(dst_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) { + align_buffer_page_end(orig_pixels_alloc, 3 * 2); + align_buffer_page_end(dst_pixels_alloc, 3 * 2); + uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc; + uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc; + + // Pad the 1x1 byte image with invalid values before and after in case libyuv + // reads outside the memory boundaries. + orig_pixels[0] = 0; + orig_pixels[1] = 1; // scale this pixel + orig_pixels[2] = 2; + dst_pixels[0] = 3; + dst_pixels[1] = 3; + dst_pixels[2] = 3; + + libyuv::ScalePlane_16( + orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, + /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, + /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone); + + EXPECT_EQ(dst_pixels[0], 1); + EXPECT_EQ(dst_pixels[1], 1); + EXPECT_EQ(dst_pixels[2], 3); + + free_aligned_buffer_page_end(dst_pixels_alloc); + free_aligned_buffer_page_end(orig_pixels_alloc); +} +} // namespace libyuv diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc index c2232e66..6e3b9271 100644 --- a/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -22,6 +22,11 @@ #define STRINGIZE(line) #line #define FILELINESTR(file, line) file ":" STRINGIZE(line) +#if defined(__riscv) && !defined(__clang__) +#define DISABLE_SLOW_TESTS +#undef ENABLE_FULL_TESTS +#endif + #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) // SLOW TESTS are those that are unoptimized C code. // FULL TESTS are optimized but test many variations of the same code. @@ -1123,437 +1128,6 @@ TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3) TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3) #endif #endif - #undef TEST_SCALESWAPXY1 -#ifdef ENABLE_ROW_TESTS -#ifdef HAS_SCALEROWDOWN2_SSSE3 -TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { - SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]); - SIMD_ALIGNED(uint8_t dst_pixels_opt[64]); - SIMD_ALIGNED(uint8_t dst_pixels_c[64]); - memset(orig_pixels, 0, sizeof(orig_pixels)); - memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt)); - memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); - - int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); - if (!has_ssse3) { - printf("Warning SSSE3 not detected; Skipping test.\n"); - } else { - // TL. - orig_pixels[0] = 255u; - orig_pixels[1] = 0u; - orig_pixels[128 + 0] = 0u; - orig_pixels[128 + 1] = 0u; - // TR. - orig_pixels[2] = 0u; - orig_pixels[3] = 100u; - orig_pixels[128 + 2] = 0u; - orig_pixels[128 + 3] = 0u; - // BL. - orig_pixels[4] = 0u; - orig_pixels[5] = 0u; - orig_pixels[128 + 4] = 50u; - orig_pixels[128 + 5] = 0u; - // BR. - orig_pixels[6] = 0u; - orig_pixels[7] = 0u; - orig_pixels[128 + 6] = 0u; - orig_pixels[128 + 7] = 20u; - // Odd. - orig_pixels[126] = 4u; - orig_pixels[127] = 255u; - orig_pixels[128 + 126] = 16u; - orig_pixels[128 + 127] = 255u; - - // Test regular half size. - ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64); - - EXPECT_EQ(64u, dst_pixels_c[0]); - EXPECT_EQ(25u, dst_pixels_c[1]); - EXPECT_EQ(13u, dst_pixels_c[2]); - EXPECT_EQ(5u, dst_pixels_c[3]); - EXPECT_EQ(0u, dst_pixels_c[4]); - EXPECT_EQ(133u, dst_pixels_c[63]); - - // Test Odd width version - Last pixel is just 1 horizontal pixel. - ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); - - EXPECT_EQ(64u, dst_pixels_c[0]); - EXPECT_EQ(25u, dst_pixels_c[1]); - EXPECT_EQ(13u, dst_pixels_c[2]); - EXPECT_EQ(5u, dst_pixels_c[3]); - EXPECT_EQ(0u, dst_pixels_c[4]); - EXPECT_EQ(10u, dst_pixels_c[63]); - - // Test one pixel less, should skip the last pixel. - memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); - ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63); - - EXPECT_EQ(64u, dst_pixels_c[0]); - EXPECT_EQ(25u, dst_pixels_c[1]); - EXPECT_EQ(13u, dst_pixels_c[2]); - EXPECT_EQ(5u, dst_pixels_c[3]); - EXPECT_EQ(0u, dst_pixels_c[4]); - EXPECT_EQ(0u, dst_pixels_c[63]); - - // Test regular half size SSSE3. - ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); - - EXPECT_EQ(64u, dst_pixels_opt[0]); - EXPECT_EQ(25u, dst_pixels_opt[1]); - EXPECT_EQ(13u, dst_pixels_opt[2]); - EXPECT_EQ(5u, dst_pixels_opt[3]); - EXPECT_EQ(0u, dst_pixels_opt[4]); - EXPECT_EQ(133u, dst_pixels_opt[63]); - - // Compare C and SSSE3 match. - ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); - ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); - for (int i = 0; i < 64; ++i) { - EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); - } - } -} -#endif // HAS_SCALEROWDOWN2_SSSE3 - -extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width); - -TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) { - SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]); - SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); - SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); - - memset(orig_pixels, 0, sizeof(orig_pixels)); - memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); - memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); - - for (int i = 0; i < 2560 * 2; ++i) { - orig_pixels[i] = i; - } - ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280); - for (int i = 0; i < benchmark_pixels_div1280_; ++i) { -#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) - int has_neon = TestCpuFlag(kCpuHasNEON); - if (has_neon) { - ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); - } else { - ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); - } -#else - ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); -#endif - } - - for (int i = 0; i < 1280; ++i) { - EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); - } - - EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4); - EXPECT_EQ(dst_pixels_c[1279], 3839); -} -#endif // ENABLE_ROW_TESTS - -// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel -// difference. -// 0 = exact. -static int TestPlaneFilter_16(int src_width, - int src_height, - int dst_width, - int dst_height, - FilterMode f, - int benchmark_iterations, - int disable_cpu_flags, - int benchmark_cpu_info) { - if (!SizeValid(src_width, src_height, dst_width, dst_height)) { - return 0; - } - - int i; - int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); - int src_stride_y = Abs(src_width); - int dst_y_plane_size = dst_width * dst_height; - int dst_stride_y = dst_width; - - align_buffer_page_end(src_y, src_y_plane_size); - align_buffer_page_end(src_y_16, src_y_plane_size * 2); - align_buffer_page_end(dst_y_8, dst_y_plane_size); - align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); - uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); - uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); - - MemRandomize(src_y, src_y_plane_size); - memset(dst_y_8, 0, dst_y_plane_size); - memset(dst_y_16, 1, dst_y_plane_size * 2); - - for (i = 0; i < src_y_plane_size; ++i) { - p_src_y_16[i] = src_y[i] & 255; - } - - MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y, - dst_width, dst_height, f); - MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. - - for (i = 0; i < benchmark_iterations; ++i) { - ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16, - dst_stride_y, dst_width, dst_height, f); - } - - // Expect an exact match. - int max_diff = 0; - for (i = 0; i < dst_y_plane_size; ++i) { - int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); - if (abs_diff > max_diff) { - max_diff = abs_diff; - } - } - - free_aligned_buffer_page_end(dst_y_8); - free_aligned_buffer_page_end(dst_y_16); - free_aligned_buffer_page_end(src_y); - free_aligned_buffer_page_end(src_y_16); - - return max_diff; -} - -// The following adjustments in dimensions ensure the scale factor will be -// exactly achieved. -// 2 is chroma subsample. -#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2) -#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) - -#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ - TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \ - int diff = TestPlaneFilter_16( \ - SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ - DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ - kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ - benchmark_cpu_info_); \ - EXPECT_LE(diff, max_diff); \ - } - -// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but -// filtering is different fixed point implementations for SSSE3, Neon and C. -#define TEST_FACTOR(name, nom, denom, boxdiff) \ - TEST_FACTOR1(name, None, nom, denom, 0) \ - TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \ - TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \ - TEST_FACTOR1(name, Box, nom, denom, boxdiff) - -TEST_FACTOR(2, 1, 2, 0) -TEST_FACTOR(4, 1, 4, 0) -// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds. -TEST_FACTOR(3by4, 3, 4, 1) -TEST_FACTOR(3by8, 3, 8, 1) -TEST_FACTOR(3, 1, 3, 0) -#undef TEST_FACTOR1 -#undef TEST_FACTOR -#undef SX -#undef DX - -TEST_F(LibYUVScaleTest, PlaneTest3x) { - const int kSrcStride = 480; - const int kDstStride = 160; - const int kSize = kSrcStride * 3; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < 480 * 3; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_pixels, kDstStride); - - int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * - benchmark_iterations_; - for (int i = 0; i < iterations160; ++i) { - ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, - kFilterBilinear); - } - - EXPECT_EQ(225, dest_pixels[0]); - - ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, - kFilterNone); - - EXPECT_EQ(225, dest_pixels[0]); - - free_aligned_buffer_page_end(dest_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTest4x) { - const int kSrcStride = 640; - const int kDstStride = 160; - const int kSize = kSrcStride * 4; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < 640 * 4; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_pixels, kDstStride); - - int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * - benchmark_iterations_; - for (int i = 0; i < iterations160; ++i) { - ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, - kFilterBilinear); - } - - EXPECT_EQ(66, dest_pixels[0]); - - ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, - kFilterNone); - - EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row - - free_aligned_buffer_page_end(dest_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -// Intent is to test 200x50 to 50x200 but width and height can be parameters. -TEST_F(LibYUVScaleTest, PlaneTestRotate_None) { - const int kSize = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < kSize; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_opt_pixels, kSize); - align_buffer_page_end(dest_c_pixels, kSize); - - MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, - dest_c_pixels, benchmark_height_, benchmark_height_, - benchmark_width_, kFilterNone); - MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. - - for (int i = 0; i < benchmark_iterations_; ++i) { - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, - benchmark_height_, dest_opt_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, kFilterNone); - } - - for (int i = 0; i < kSize; ++i) { - EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); - } - - free_aligned_buffer_page_end(dest_c_pixels); - free_aligned_buffer_page_end(dest_opt_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) { - const int kSize = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < kSize; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_opt_pixels, kSize); - align_buffer_page_end(dest_c_pixels, kSize); - - MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, - dest_c_pixels, benchmark_height_, benchmark_height_, - benchmark_width_, kFilterBilinear); - MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. - - for (int i = 0; i < benchmark_iterations_; ++i) { - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, - benchmark_height_, dest_opt_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, kFilterBilinear); - } - - for (int i = 0; i < kSize; ++i) { - EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); - } - - free_aligned_buffer_page_end(dest_c_pixels); - free_aligned_buffer_page_end(dest_opt_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -// Intent is to test 200x50 to 50x200 but width and height can be parameters. -TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) { - const int kSize = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < kSize; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_opt_pixels, kSize); - align_buffer_page_end(dest_c_pixels, kSize); - - MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, - dest_c_pixels, benchmark_height_, benchmark_height_, - benchmark_width_, kFilterBox); - MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. - - for (int i = 0; i < benchmark_iterations_; ++i) { - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, - benchmark_height_, dest_opt_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, kFilterBox); - } - - for (int i = 0; i < kSize; ++i) { - EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); - } - - free_aligned_buffer_page_end(dest_c_pixels); - free_aligned_buffer_page_end(dest_opt_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTest1_Box) { - align_buffer_page_end(orig_pixels, 3); - align_buffer_page_end(dst_pixels, 3); - - // Pad the 1x1 byte image with invalid values before and after in case libyuv - // reads outside the memory boundaries. - orig_pixels[0] = 0; - orig_pixels[1] = 1; // scale this pixel - orig_pixels[2] = 2; - dst_pixels[0] = 3; - dst_pixels[1] = 3; - dst_pixels[2] = 3; - - libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, - /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, - /* dst_width= */ 1, /* dst_height= */ 2, - libyuv::kFilterBox); - - EXPECT_EQ(dst_pixels[0], 1); - EXPECT_EQ(dst_pixels[1], 1); - EXPECT_EQ(dst_pixels[2], 3); - - free_aligned_buffer_page_end(dst_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) { - align_buffer_page_end(orig_pixels_alloc, 3 * 2); - align_buffer_page_end(dst_pixels_alloc, 3 * 2); - uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc; - uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc; - - // Pad the 1x1 byte image with invalid values before and after in case libyuv - // reads outside the memory boundaries. - orig_pixels[0] = 0; - orig_pixels[1] = 1; // scale this pixel - orig_pixels[2] = 2; - dst_pixels[0] = 3; - dst_pixels[1] = 3; - dst_pixels[2] = 3; - - libyuv::ScalePlane_16( - orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, - /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, - /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone); - - EXPECT_EQ(dst_pixels[0], 1); - EXPECT_EQ(dst_pixels[1], 1); - EXPECT_EQ(dst_pixels[2], 3); - - free_aligned_buffer_page_end(dst_pixels_alloc); - free_aligned_buffer_page_end(orig_pixels_alloc); -} } // namespace libyuv diff --git a/unit_test/unit_test.cc b/unit_test/unit_test.cc index b66ebfab..239d5b92 100644 --- a/unit_test/unit_test.cc +++ b/unit_test/unit_test.cc @@ -144,11 +144,14 @@ int TestCpuEnv(int cpu_info) { if (TestEnv("LIBYUV_DISABLE_AVX512VBITALG")) { cpu_info &= ~libyuv::kCpuHasAVX512VBITALG; } - if (TestEnv("LIBYUV_DISABLE_AVX512VPOPCNTDQ")) { - cpu_info &= ~libyuv::kCpuHasAVX512VPOPCNTDQ; + if (TestEnv("LIBYUV_DISABLE_AVX10")) { + cpu_info &= ~libyuv::kCpuHasAVX10; } - if (TestEnv("LIBYUV_DISABLE_GFNI")) { - cpu_info &= ~libyuv::kCpuHasGFNI; + if (TestEnv("LIBYUV_DISABLE_AVXVNNI")) { + cpu_info &= ~libyuv::kCpuHasAVXVNNI; + } + if (TestEnv("LIBYUV_DISABLE_AVXVNNIINT8")) { + cpu_info &= ~libyuv::kCpuHasAVXVNNIINT8; } #endif if (TestEnv("LIBYUV_DISABLE_ASM")) { diff --git a/util/cpuid.c b/util/cpuid.c index edc6a26e..c07e6e95 100644 --- a/util/cpuid.c +++ b/util/cpuid.c @@ -96,14 +96,15 @@ int main(int argc, const char* argv[]) { int has_erms = TestCpuFlag(kCpuHasERMS); int has_fma3 = TestCpuFlag(kCpuHasFMA3); int has_f16c = TestCpuFlag(kCpuHasF16C); - int has_gfni = TestCpuFlag(kCpuHasGFNI); int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW); int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL); int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI); int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI); int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2); int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG); - int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ); + int has_avx10 = TestCpuFlag(kCpuHasAVX10); + int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI); + int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8); printf("Has X86 0x%x\n", has_x86); printf("Has SSE2 0x%x\n", has_sse2); printf("Has SSSE3 0x%x\n", has_ssse3); @@ -114,14 +115,15 @@ int main(int argc, const char* argv[]) { printf("Has ERMS 0x%x\n", has_erms); printf("Has FMA3 0x%x\n", has_fma3); printf("Has F16C 0x%x\n", has_f16c); - printf("Has GFNI 0x%x\n", has_gfni); printf("Has AVX512BW 0x%x\n", has_avx512bw); printf("Has AVX512VL 0x%x\n", has_avx512vl); printf("Has AVX512VNNI 0x%x\n", has_avx512vnni); printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi); printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2); printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg); - printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq); + printf("Has AVX10 0x%x\n", has_avx10); + printf("HAS AVXVNNI 0x%x\n", has_avxvnni); + printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8); } return 0; } |