Merge "Snap for 11739378 from 488a2af021e3e7473f083a9435b1472c0d411f3d to androidx-lifecycle-release" into androidx-lifecycle-releaseandroidx-lifecycle-release

author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2024-04-19 18:27:08 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> 2024-04-19 18:27:08 +0000
commit: f72df94f7894736b8115a1b43ecf6acde951efbc (patch)
tree: 7aa355fd0b89ec0b2611e17ee84a14c6fa449e22
parent: 60b8e179403ea8a6ae163ac1e62206766d75f137 (diff)
parent: e8ac7a941dd76f8ab24d01c9f1a0099672baf057 (diff)
download: libyuv-androidx-lifecycle-release.tar.gz
35 files changed, 4519 insertions, 3614 deletions
diff --git a/Android.bp b/Android.bp
index 8a94da24..506184e0 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,13 +1,7 @@
 package {
     default_applicable_licenses: ["external_libyuv_license"],
 }
-
 // Added automatically by a large-scale-change
-//
-// large-scale-change included anything that looked like it might be a license
-// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc.
-//
-// Please consider removing redundant or irrelevant files from 'license_text:'.
 // See: http://go/android-license-faq
 license {
     name: "external_libyuv_license",
@@ -20,15 +14,13 @@ license {
         "PATENTS",
     ],
 }
+subdirs = ["files"]
 
 cc_library {
     name: "libyuv",
     vendor_available: true,
     product_available: true,
     host_supported: true,
-    vndk: {
-        enabled: true,
-    },
 
     srcs: [
         "source/compare.cc",
@@ -99,6 +91,7 @@ cc_library {
     apex_available: [
         "//apex_available:platform",
         "com.android.media.swcodec",
+        "com.android.virt",
     ],
     min_sdk_version: "29",
 }
@@ -133,6 +126,7 @@ cc_test {
         "unit_test/rotate_argb_test.cc",
         "unit_test/rotate_test.cc",
         "unit_test/scale_argb_test.cc",
+        "unit_test/scale_plane_test.cc",
         "unit_test/scale_rgb_test.cc",
         "unit_test/scale_test.cc",
         "unit_test/scale_uv_test.cc",
diff --git a/BUILD.gn b/BUILD.gn
index df019b88..2c600b22 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -81,11 +81,11 @@ group("libyuv") {
   }
 
   if (libyuv_use_lsx) {
-    deps += [ ":libyuv_lsx"]
+    deps += [ ":libyuv_lsx" ]
   }
 
   if (libyuv_use_lasx) {
-    deps += [ ":libyuv_lasx"]
+    deps += [ ":libyuv_lasx" ]
   }
 
   if (!is_ios && !libyuv_disable_jpeg) {
@@ -254,8 +254,8 @@ if (libyuv_use_lsx) {
   static_library("libyuv_lsx") {
     sources = [
       # LSX Source Files
-      "source/row_lsx.cc",
       "source/rotate_lsx.cc",
+      "source/row_lsx.cc",
       "source/scale_lsx.cc",
     ]
 
@@ -315,6 +315,7 @@ if (libyuv_include_tests) {
       "unit_test/basictypes_test.cc",
       "unit_test/color_test.cc",
       "unit_test/compare_test.cc",
+      "unit_test/convert_argb_test.cc",
       "unit_test/convert_test.cc",
       "unit_test/cpu_test.cc",
       "unit_test/cpu_thread_test.cc",
@@ -323,6 +324,7 @@ if (libyuv_include_tests) {
       "unit_test/rotate_argb_test.cc",
       "unit_test/rotate_test.cc",
       "unit_test/scale_argb_test.cc",
+      "unit_test/scale_plane_test.cc",
       "unit_test/scale_rgb_test.cc",
       "unit_test/scale_test.cc",
       "unit_test/scale_uv_test.cc",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7a4a1994..9abfa74b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,6 +37,10 @@ if(WIN32)
   SET_TARGET_PROPERTIES	( ${ly_lib_shared} PROPERTIES IMPORT_PREFIX "lib" )
 endif()
 
+# this creates the cpuid tool
+ADD_EXECUTABLE      ( cpuid ${ly_base_dir}/util/cpuid.c )
+TARGET_LINK_LIBRARIES  ( cpuid ${ly_lib_static} )
+
 # this creates the conversion tool
 ADD_EXECUTABLE			( yuvconvert ${ly_base_dir}/util/yuvconvert.cc )
 TARGET_LINK_LIBRARIES	( yuvconvert ${ly_lib_static} )
diff --git a/DEPS b/DEPS
index b6a7bc7b..70ed1d58 100644
--- a/DEPS
+++ b/DEPS
@@ -18,24 +18,24 @@ vars = {
 
   # By default, download the fuchsia sdk from the public sdk directory.
   'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/core/',
-  'fuchsia_version': 'version:13.20230714.0.1',
+  'fuchsia_version': 'version:15.20230909.2.1',
   # By default, download the fuchsia images from the fuchsia GCS bucket.
   'fuchsia_images_bucket': 'fuchsia',
   'checkout_fuchsia': False,
   # Since the images are hundreds of MB, default to only downloading the image
   # most commonly useful for developers. Bots and developers that need to use
   # other images can override this with additional images.
-  'checkout_fuchsia_boot_images': "terminal.qemu-x64",
+  'checkout_fuchsia_boot_images': "terminal.qemu-x64,terminal.x64",
   'checkout_fuchsia_product_bundles': '"{checkout_fuchsia_boot_images}" != ""',
 }
 
 deps = {
   'src/build':
-    Var('chromium_git') + '/chromium/src/build' + '@' + '860dae780c100c2d001dc6ee16625b17bc84c10f',
+    Var('chromium_git') + '/chromium/src/build' + '@' + '5885d3c24833ad72845a52a1b913a2b8bc651b56',
   'src/buildtools':
-    Var('chromium_git') + '/chromium/src/buildtools' + '@' + 'ca163845c76db63454f99436f6cd2bf03739dc24',
+    Var('chromium_git') + '/chromium/src/buildtools' + '@' + '79ab87fa54614258c4c95891e873223371194525',
   'src/testing':
-    Var('chromium_git') + '/chromium/src/testing' + '@' + '184b068a94f24ddf0b4299d48062779e1fc1950e',
+    Var('chromium_git') + '/chromium/src/testing' + '@' + '51e9a02297057cc0e917763a51e16680b7d16fb6',
   'src/third_party':
     Var('chromium_git') + '/chromium/src/third_party' + '@' + '2dc4b18abd1003ce7b1eda509dc96f12d49a9667',
 
@@ -82,15 +82,6 @@ deps = {
     'dep_type': 'cipd',
   },
 
-  # TODO(chromium:1458042): Remove these paths, when chromium builds files
-  # have moved to third_party/lib*/src paths.
-  'src/buildtools/third_party/libc++/trunk':
-    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + '84fb809dd6dae36d556dc0bb702c6cc2ce9d4b80',
-  'src/buildtools/third_party/libc++abi/trunk':
-    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + 'd4760c0af99ccc9bce077960d5ddde4d66146c05',
-  'src/buildtools/third_party/libunwind/trunk':
-    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + '6c0013015be8a2be9de4b1e54cdc9d576b1d0729',
-
   'src/third_party/catapult':
     Var('chromium_git') + '/catapult.git' + '@' + 'fa05d995e152efdae488a2aeba397cd609fdbc9d',
   'src/third_party/clang-format/script':
@@ -374,7 +365,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/jdk',
-              'version': 'IivIDwNBf73mf7UwCOBceRUuDdtizMCgSOQDfUGHArsC',
+              'version': 'GCFtf5t6M4HlrHj6NXedHbpHp2xjgognF8ptNci4478C',
           },
       ],
       'condition': 'checkout_android',
diff --git a/METADATA b/METADATA
index 7ceb7be3..19d0436e 100644
--- a/METADATA
+++ b/METADATA
@@ -5,15 +5,15 @@
 name: "libyuv"
 description: "libyuv is an open source project that includes YUV scaling and conversion functionality."
 third_party {
-  url {
-    type: GIT
-    value: "https://chromium.googlesource.com/libyuv/libyuv/"
-  }
-  version: "f0921806a293e3e008e6325a51d4ea760c39d2c1"
   license_type: NOTICE
   last_upgrade_date {
-    year: 2023
-    month: 9
-    day: 5
+    year: 2024
+    month: 1
+    day: 11
+  }
+  identifier {
+    type: "Git"
+    value: "https://chromium.googlesource.com/libyuv/libyuv/"
+    version: "af6ac8265bbd07bcf977526458b60305c4304288"
   }
 }
diff --git a/README.chromium b/README.chromium
index c68be174..1389f285 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: https://chromium.googlesource.com/libyuv/libyuv/
-Version: 1875
+Version: 1883
 License: BSD
 License File: LICENSE
 Shipped: yes
diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h
index 203f7e0d..5a81e7c9 100644
--- a/include/libyuv/cpu_id.h
+++ b/include/libyuv/cpu_id.h
@@ -31,24 +31,25 @@ static const int kCpuHasX86 = 0x10;
 static const int kCpuHasSSE2 = 0x20;
 static const int kCpuHasSSSE3 = 0x40;
 static const int kCpuHasSSE41 = 0x80;
-static const int kCpuHasSSE42 = 0x100;  // unused at this time.
+static const int kCpuHasSSE42 = 0x100;
 static const int kCpuHasAVX = 0x200;
 static const int kCpuHasAVX2 = 0x400;
 static const int kCpuHasERMS = 0x800;
 static const int kCpuHasFMA3 = 0x1000;
 static const int kCpuHasF16C = 0x2000;
-static const int kCpuHasGFNI = 0x4000;
-static const int kCpuHasAVX512BW = 0x8000;
-static const int kCpuHasAVX512VL = 0x10000;
-static const int kCpuHasAVX512VNNI = 0x20000;
-static const int kCpuHasAVX512VBMI = 0x40000;
-static const int kCpuHasAVX512VBMI2 = 0x80000;
-static const int kCpuHasAVX512VBITALG = 0x100000;
-static const int kCpuHasAVX512VPOPCNTDQ = 0x200000;
+static const int kCpuHasAVX512BW = 0x4000;
+static const int kCpuHasAVX512VL = 0x8000;
+static const int kCpuHasAVX512VNNI = 0x10000;
+static const int kCpuHasAVX512VBMI = 0x20000;
+static const int kCpuHasAVX512VBMI2 = 0x40000;
+static const int kCpuHasAVX512VBITALG = 0x80000;
+static const int kCpuHasAVX10 = 0x100000;
+static const int kCpuHasAVXVNNI = 0x200000;
+static const int kCpuHasAVXVNNIINT8 = 0x400000;
 
 // These flags are only valid on MIPS processors.
-static const int kCpuHasMIPS = 0x400000;
-static const int kCpuHasMSA = 0x800000;
+static const int kCpuHasMIPS = 0x800000;
+static const int kCpuHasMSA = 0x1000000;
 
 // These flags are only valid on LOONGARCH processors.
 static const int kCpuHasLOONGARCH = 0x2000000;
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 0455b4cc..46685a50 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -800,14 +800,18 @@ extern "C" {
 #define HAS_ABGRTOYJROW_RVV
 #define HAS_ABGRTOYROW_RVV
 #define HAS_AR64TOARGBROW_RVV
+#define HAS_AR64TOAB64ROW_RVV
 #define HAS_ARGBATTENUATEROW_RVV
 #define HAS_ARGBBLENDROW_RVV
 #define HAS_ARGBCOPYYTOALPHAROW_RVV
 #define HAS_ARGBEXTRACTALPHAROW_RVV
 #define HAS_ARGBTOAB64ROW_RVV
+#define HAS_ARGBTOABGRROW_RVV
 #define HAS_ARGBTOAR64ROW_RVV
+#define HAS_ARGBTOBGRAROW_RVV
 #define HAS_ARGBTORAWROW_RVV
 #define HAS_ARGBTORGB24ROW_RVV
+#define HAS_ARGBTORGBAROW_RVV
 #define HAS_ARGBTOYJROW_RVV
 #define HAS_ARGBTOYMATRIXROW_RVV
 #define HAS_ARGBTOYROW_RVV
@@ -839,6 +843,7 @@ extern "C" {
 #define HAS_RGB24TOARGBROW_RVV
 #define HAS_RGB24TOYJROW_RVV
 #define HAS_RGB24TOYROW_RVV
+#define HAS_RGBATOARGBROW_RVV
 #define HAS_RGBATOYJROW_RVV
 #define HAS_RGBATOYMATRIXROW_RVV
 #define HAS_RGBATOYROW_RVV
@@ -944,14 +949,6 @@ struct YuvConstants {
   free(var##_mem);                  \
   var = NULL
 
-#define align_buffer_64_16(var, size)                                        \
-  void* var##_mem = malloc((size)*2 + 63);                      /* NOLINT */ \
-  uint16_t* var = (uint16_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
-
-#define free_aligned_buffer_64_16(var) \
-  free(var##_mem);                     \
-  var = NULL
-
 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
 #define OMITFP
 #else
@@ -3494,8 +3491,13 @@ void ARGBToARGB4444Row_LASX(const uint8_t* src_argb,
                             int width);
 
 void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width);
+void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width);
+void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width);
+void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
 void ARGBToRGB24Row_RVV(const uint8_t* src_argb, uint8_t* dst_rgb24, int width);
 
+void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width);
+void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width);
 void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
 void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@@ -3509,6 +3511,8 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
 void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
 void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
 void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
+void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width);
+void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width);
 void AR64ShuffleRow_C(const uint8_t* src_ar64,
                       uint8_t* dst_ar64,
                       const uint8_t* shuffler,
@@ -3537,6 +3541,8 @@ void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
 void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
 void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
 void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
+void AR64ToAB64Row_RVV(const uint16_t* src_ar64, uint16_t* dst_ab64, int width);
+void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width);
 void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr,
                              uint16_t* dst_ptr,
                              int width);
diff --git a/include/libyuv/scale.h b/include/libyuv/scale.h
index 443f89c2..bfe4a344 100644
--- a/include/libyuv/scale.h
+++ b/include/libyuv/scale.h
@@ -27,39 +27,40 @@ typedef enum FilterMode {
 } FilterModeEnum;
 
 // Scale a YUV plane.
+// Returns 0 if successful.
 LIBYUV_API
-void ScalePlane(const uint8_t* src,
-                int src_stride,
-                int src_width,
-                int src_height,
-                uint8_t* dst,
-                int dst_stride,
-                int dst_width,
-                int dst_height,
-                enum FilterMode filtering);
+int ScalePlane(const uint8_t* src,
+               int src_stride,
+               int src_width,
+               int src_height,
+               uint8_t* dst,
+               int dst_stride,
+               int dst_width,
+               int dst_height,
+               enum FilterMode filtering);
 
 LIBYUV_API
-void ScalePlane_16(const uint16_t* src,
-                   int src_stride,
-                   int src_width,
-                   int src_height,
-                   uint16_t* dst,
-                   int dst_stride,
-                   int dst_width,
-                   int dst_height,
-                   enum FilterMode filtering);
+int ScalePlane_16(const uint16_t* src,
+                  int src_stride,
+                  int src_width,
+                  int src_height,
+                  uint16_t* dst,
+                  int dst_stride,
+                  int dst_width,
+                  int dst_height,
+                  enum FilterMode filtering);
 
 // Sample is expected to be in the low 12 bits.
 LIBYUV_API
-void ScalePlane_12(const uint16_t* src,
-                   int src_stride,
-                   int src_width,
-                   int src_height,
-                   uint16_t* dst,
-                   int dst_stride,
-                   int dst_width,
-                   int dst_height,
-                   enum FilterMode filtering);
+int ScalePlane_12(const uint16_t* src,
+                  int src_stride,
+                  int src_width,
+                  int src_height,
+                  uint16_t* dst,
+                  int dst_stride,
+                  int dst_width,
+                  int dst_height,
+                  enum FilterMode filtering);
 
 // Scales a YUV 4:2:0 image from the src width and height to the
 // dst width and height.
diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h
index c015d772..02ed61ca 100644
--- a/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -180,6 +180,8 @@ extern "C" {
 
 #if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
 #define HAS_SCALEADDROW_RVV
+// TODO: Test ScaleARGBRowDownEven_RVV and enable it
+// #define HAS_SCALEARGBROWDOWNEVEN_RVV
 #define HAS_SCALEUVROWDOWN4_RVV
 #define HAS_SCALEUVROWDOWNEVEN_RVV
 #if __riscv_v_intrinsic == 11000
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index d45ef09d..a9c54400 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 1875
+#define LIBYUV_VERSION 1883
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_
diff --git a/linux.mk b/linux.mk
index b541b47c..d19a888a 100644
--- a/linux.mk
+++ b/linux.mk
@@ -33,6 +33,7 @@ LOCAL_OBJ_FILES := \
 	source/rotate_argb.o       \
 	source/rotate_common.o     \
 	source/rotate_gcc.o        \
+	source/rotate_lsx.o        \
 	source/rotate_msa.o        \
 	source/rotate_neon.o       \
 	source/rotate_neon64.o     \
@@ -40,19 +41,24 @@ LOCAL_OBJ_FILES := \
 	source/row_any.o           \
 	source/row_common.o        \
 	source/row_gcc.o           \
+	source/row_lasx.o          \
+	source/row_lsx.o           \
 	source/row_msa.o           \
 	source/row_neon.o          \
 	source/row_neon64.o        \
+	source/row_rvv.o           \
 	source/row_win.o           \
 	source/scale.o             \
 	source/scale_any.o         \
 	source/scale_argb.o        \
 	source/scale_common.o      \
 	source/scale_gcc.o         \
+	source/scale_lsx.o         \
 	source/scale_msa.o         \
 	source/scale_neon.o        \
 	source/scale_neon64.o      \
 	source/scale_rgb.o         \
+	source/scale_rvv.o         \
 	source/scale_uv.o          \
 	source/scale_win.o         \
 	source/video_common.o
diff --git a/source/convert.cc b/source/convert.cc
index b68fb1d3..6ac5bc43 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -54,18 +54,25 @@ static int I4xxToI420(const uint8_t* src_y,
   const int dst_y_height = Abs(src_y_height);
   const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
   const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
+  int r;
   if (src_uv_width <= 0 || src_uv_height == 0) {
     return -1;
   }
   if (dst_y) {
-    ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
-               dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+    r = ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
+                   dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
-             dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
-  ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
-             dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
-  return 0;
+  r = ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
+                 dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
+                 dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
+  return r;
 }
 
 // Copy I420 with optional flipping.
@@ -526,18 +533,25 @@ static int Ix10ToI010(const uint16_t* src_y,
   const int src_uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
   const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
   const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
+  int r;
   if (width <= 0 || height == 0) {
     return -1;
   }
   if (dst_y) {
-    ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-                  dst_y_width, dst_y_height, kFilterBilinear);
+    r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                      dst_y_width, dst_y_height, kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
-                dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
-  ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
-                dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
-  return 0;
+  r = ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
+                    dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
+                    dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
+  return r;
 }
 
 LIBYUV_API
@@ -777,6 +791,8 @@ int I422ToNV21(const uint8_t* src_y,
   // Allocate u and v buffers
   align_buffer_64(plane_u, halfwidth * halfheight * 2);
   uint8_t* plane_v = plane_u + halfwidth * halfheight;
+  if (!plane_u)
+    return 1;
 
   I422ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
              dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, width,
@@ -892,6 +908,8 @@ int MT2TToP010(const uint8_t* src_y,
     void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) =
         UnpackMT2T_C;
     align_buffer_64(row_buf, row_buf_size);
+    if (!row_buf)
+      return 1;
 
 #if defined(HAS_UNPACKMT2T_NEON)
     if (TestCpuFlag(kCpuHasNEON)) {
@@ -1092,6 +1110,8 @@ int I422ToNV21(const uint8_t* src_y,
     int awidth = halfwidth * 2;
     align_buffer_64(row_vu_0, awidth * 2);
     uint8_t* row_vu_1 = row_vu_0 + awidth;
+    if (!row_vu_0)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
       MergeUVRow(src_v, src_u, row_vu_0, halfwidth);
@@ -1330,18 +1350,22 @@ int NV12ToNV24(const uint8_t* src_y,
                int dst_stride_uv,
                int width,
                int height) {
+  int r;
   if (width <= 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-               Abs(width), Abs(height), kFilterBilinear);
+    r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                   Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
-          SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
-          Abs(height), kFilterBilinear);
-  return 0;
+  r = UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
+              SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
+              Abs(height), kFilterBilinear);
+  return r;
 }
 
 LIBYUV_API
@@ -1355,17 +1379,21 @@ int NV16ToNV24(const uint8_t* src_y,
                int dst_stride_uv,
                int width,
                int height) {
+  int r;
   if (width <= 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-               Abs(width), Abs(height), kFilterBilinear);
+    r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                   Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
-          dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
-  return 0;
+  r = UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
+              dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
+  return r;
 }
 
 // Any P[420]1[02] to I[420]1[02] format with mirroring.
@@ -1443,18 +1471,22 @@ int P010ToP410(const uint16_t* src_y,
                int dst_stride_uv,
                int width,
                int height) {
+  int r;
   if (width <= 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-                  Abs(width), Abs(height), kFilterBilinear);
+    r = ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                      Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
-             SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
-             Abs(height), kFilterBilinear);
-  return 0;
+  r = UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
+                 SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
+                 Abs(height), kFilterBilinear);
+  return r;
 }
 
 LIBYUV_API
@@ -1468,17 +1500,21 @@ int P210ToP410(const uint16_t* src_y,
                int dst_stride_uv,
                int width,
                int height) {
+  int r;
   if (width <= 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-                  Abs(width), Abs(height), kFilterBilinear);
+    r = ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                      Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
-             dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
-  return 0;
+  r = UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
+                 dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
+  return r;
 }
 
 // Convert YUY2 to I420.
@@ -2660,6 +2696,8 @@ int RGB24ToI420(const uint8_t* src_rgb24,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -2836,6 +2874,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -3015,6 +3055,8 @@ int RAWToI420(const uint8_t* src_raw,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -3191,6 +3233,8 @@ int RAWToJ420(const uint8_t* src_raw,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -3369,6 +3413,8 @@ int RGB565ToI420(const uint8_t* src_rgb565,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
     for (y = 0; y < height - 1; y += 2) {
 #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
@@ -3549,6 +3595,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -3762,6 +3810,8 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index f6ab0784..871fea59 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -3003,6 +3003,7 @@ int J400ToARGB(const uint8_t* src_y,
   return 0;
 }
 
+#ifndef __riscv
 // Shuffle table for converting BGRA to ARGB.
 static const uvec8 kShuffleMaskBGRAToARGB = {
     3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u};
@@ -3090,6 +3091,195 @@ int AR64ToAB64(const uint16_t* src_ar64,
   return AR64Shuffle(src_ar64, src_stride_ar64, dst_ab64, dst_stride_ab64,
                      (const uint8_t*)&kShuffleMaskAR64ToAB64, width, height);
 }
+#else
+// Convert BGRA to ARGB (same as ARGBToBGRA).
+LIBYUV_API
+int BGRAToARGB(const uint8_t* src_bgra,
+               int src_stride_bgra,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height) {
+  return ARGBToBGRA(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, width,
+                    height);
+}
+
+// Convert ARGB to BGRA.
+LIBYUV_API
+int ARGBToBGRA(const uint8_t* src_argb,
+               int src_stride_argb,
+               uint8_t* dst_bgra,
+               int dst_stride_bgra,
+               int width,
+               int height) {
+  int y;
+  void (*ARGBToBGRARow)(const uint8_t* src_argb, uint8_t* dst_bgra, int width) =
+      ARGBToBGRARow_C;
+  if (!src_argb || !dst_bgra || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 && dst_stride_bgra == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_bgra = 0;
+  }
+
+#if defined(HAS_ARGBTOBGRAROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToBGRARow = ARGBToBGRARow_RVV;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToBGRARow(src_argb, dst_bgra, width);
+    src_argb += src_stride_argb;
+    dst_bgra += dst_stride_bgra;
+  }
+  return 0;
+}
+
+// Convert ARGB to ABGR.
+LIBYUV_API
+int ARGBToABGR(const uint8_t* src_argb,
+               int src_stride_argb,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height) {
+  int y;
+  void (*ARGBToABGRRow)(const uint8_t* src_argb, uint8_t* dst_abgr, int width) =
+      ARGBToABGRRow_C;
+  if (!src_argb || !dst_abgr || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 && dst_stride_abgr == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_abgr = 0;
+  }
+
+#if defined(HAS_ARGBTOABGRROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToABGRRow = ARGBToABGRRow_RVV;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToABGRRow(src_argb, dst_abgr, width);
+    src_argb += src_stride_argb;
+    dst_abgr += dst_stride_abgr;
+  }
+  return 0;
+}
+
+// Convert ABGR to ARGB (same as ARGBToABGR).
+LIBYUV_API
+int ABGRToARGB(const uint8_t* src_abgr,
+               int src_stride_abgr,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height) {
+  return ARGBToABGR(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, width,
+                    height);
+}
+
+// Convert RGBA to ARGB.
+LIBYUV_API
+int RGBAToARGB(const uint8_t* src_rgba,
+               int src_stride_rgba,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height) {
+  int y;
+  void (*RGBAToARGBRow)(const uint8_t* src_rgba, uint8_t* dst_argb, int width) =
+      RGBAToARGBRow_C;
+  if (!src_rgba || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_rgba = src_rgba + (height - 1) * src_stride_rgba;
+    src_stride_rgba = -src_stride_rgba;
+  }
+  // Coalesce rows.
+  if (src_stride_rgba == width * 4 && dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_rgba = dst_stride_argb = 0;
+  }
+
+#if defined(HAS_RGBATOARGBROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    RGBAToARGBRow = RGBAToARGBRow_RVV;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    RGBAToARGBRow(src_rgba, dst_argb, width);
+    src_rgba += src_stride_rgba;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Convert AR64 To AB64.
+LIBYUV_API
+int AR64ToAB64(const uint16_t* src_ar64,
+               int src_stride_ar64,
+               uint16_t* dst_ab64,
+               int dst_stride_ab64,
+               int width,
+               int height) {
+  int y;
+  void (*AR64ToAB64Row)(const uint16_t* src_ar64, uint16_t* dst_ab64,
+                        int width) = AR64ToAB64Row_C;
+  if (!src_ar64 || !dst_ab64 || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
+    src_stride_ar64 = -src_stride_ar64;
+  }
+  // Coalesce rows.
+  if (src_stride_ar64 == width * 4 && dst_stride_ab64 == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_ar64 = dst_stride_ab64 = 0;
+  }
+
+#if defined(HAS_AR64TOAB64ROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    AR64ToAB64Row = AR64ToAB64Row_RVV;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    AR64ToAB64Row(src_ar64, dst_ab64, width);
+    src_ar64 += src_stride_ar64;
+    dst_ab64 += dst_stride_ab64;
+  }
+  return 0;
+}
+#endif
 
 // Convert RGB24 to ARGB.
 LIBYUV_API
@@ -4480,6 +4670,8 @@ int Android420ToARGBMatrix(const uint8_t* src_y,
 
   // General case fallback creates NV12
   align_buffer_64(plane_uv, halfwidth * 2 * halfheight);
+  if (!plane_uv)
+    return 1;
   dst_uv = plane_uv;
   for (y = 0; y < halfheight; ++y) {
     WeavePixels(src_u, src_v, src_pixel_stride_uv, dst_uv, halfwidth);
@@ -5792,6 +5984,8 @@ int I420ToRGB565Dither(const uint8_t* src_y,
   {
     // Allocate a row of argb.
     align_buffer_64(row_argb, width * 4);
+    if (!row_argb)
+      return 1;
     for (y = 0; y < height; ++y) {
       I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
       ARGBToRGB565DitherRow(row_argb, dst_rgb565,
@@ -6054,6 +6248,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
   uint8_t* temp_u_2 = row + row_size;
   uint8_t* temp_v_1 = row + row_size * 2;
   uint8_t* temp_v_2 = row + row_size * 3;
+  if (!row)
+    return 1;
 
   ScaleRowUp2_Linear(src_u, temp_u_1, width);
   ScaleRowUp2_Linear(src_v, temp_v_1, width);
@@ -6188,6 +6384,8 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
   align_buffer_64(row, row_size * 2);
   uint8_t* temp_u = row;
   uint8_t* temp_v = row + row_size;
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear(src_u, temp_u, width);
@@ -6321,6 +6519,8 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y,
   uint8_t* temp_u_2 = row + row_size;
   uint8_t* temp_v_1 = row + row_size * 2;
   uint8_t* temp_v_2 = row + row_size * 3;
+  if (!row)
+    return 1;
 
   ScaleRowUp2_Linear(src_u, temp_u_1, width);
   ScaleRowUp2_Linear(src_v, temp_v_1, width);
@@ -6427,6 +6627,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
   uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
   uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
   uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
+  if (!row)
+    return 1;
 
   ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
   ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
@@ -6524,6 +6726,8 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
   align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_u = (uint16_t*)(row);
   uint16_t* temp_v = (uint16_t*)(row) + row_size;
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear_12(src_u, temp_u, width);
@@ -6614,6 +6818,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
   uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
   uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
   uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
+  if (!row)
+    return 1;
 
   ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
   ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
@@ -6710,6 +6916,8 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
   align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_u = (uint16_t*)(row);
   uint16_t* temp_v = (uint16_t*)(row) + row_size;
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear_12(src_u, temp_u, width);
@@ -6888,6 +7096,8 @@ static int I420AlphaToARGBMatrixBilinear(
   uint8_t* temp_u_2 = row + row_size;
   uint8_t* temp_v_1 = row + row_size * 2;
   uint8_t* temp_v_2 = row + row_size * 3;
+  if (!row)
+    return 1;
 
   ScaleRowUp2_Linear(src_u, temp_u_1, width);
   ScaleRowUp2_Linear(src_v, temp_v_1, width);
@@ -7086,6 +7296,8 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
   align_buffer_64(row, row_size * 2);
   uint8_t* temp_u = row;
   uint8_t* temp_v = row + row_size;
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear(src_u, temp_u, width);
@@ -7227,6 +7439,8 @@ static int I010AlphaToARGBMatrixBilinear(
   uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
   uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
   uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
+  if (!row)
+    return 1;
 
   ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
   ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
@@ -7386,6 +7600,8 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
   align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_u = (uint16_t*)(row);
   uint16_t* temp_v = (uint16_t*)(row) + row_size;
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear(src_u, temp_u, width);
@@ -7471,6 +7687,8 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
   align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_uv_1 = (uint16_t*)(row);
   uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
+  if (!row)
+    return 1;
 
   Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
   P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
@@ -7560,6 +7778,8 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
   const int row_size = (2 * width + 31) & ~31;
   align_buffer_64(row, row_size * sizeof(uint16_t));
   uint16_t* temp_uv = (uint16_t*)(row);
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear(src_uv, temp_uv, width);
@@ -7639,6 +7859,8 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
   align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_uv_1 = (uint16_t*)(row);
   uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
+  if (!row)
+    return 1;
 
   Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
   P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
@@ -7728,6 +7950,8 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
   const int row_size = (2 * width + 31) & ~31;
   align_buffer_64(row, row_size * sizeof(uint16_t));
   uint16_t* temp_uv = (uint16_t*)(row);
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear(src_uv, temp_uv, width);
@@ -7829,6 +8053,8 @@ static int I422ToRGB24MatrixLinear(const uint8_t* src_y,
   align_buffer_64(row, row_size * 2);
   uint8_t* temp_u = row;
   uint8_t* temp_v = row + row_size;
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear(src_u, temp_u, width);
diff --git a/source/convert_from.cc b/source/convert_from.cc
index 4102d610..e69da9e9 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -52,19 +52,26 @@ static int I420ToI4xx(const uint8_t* src_y,
   const int dst_y_height = Abs(src_y_height);
   const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1);
   const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1);
+  int r;
   if (src_y_width == 0 || src_y_height == 0 || dst_uv_width <= 0 ||
       dst_uv_height <= 0) {
     return -1;
   }
   if (dst_y) {
-    ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
-               dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+    r = ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
+                   dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
-             dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
-  ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
-             dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
-  return 0;
+  r = ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
+                 dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
+                 dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
+  return r;
 }
 
 // Convert 8 bit YUV to 10 bit.
@@ -223,21 +230,28 @@ int I010ToI410(const uint16_t* src_y,
                int dst_stride_v,
                int width,
                int height) {
+  int r;
   if (width == 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-                  Abs(width), Abs(height), kFilterBilinear);
-  }
-  ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1),
-                SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width),
-                Abs(height), kFilterBilinear);
-  ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1),
-                SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width),
-                Abs(height), kFilterBilinear);
-  return 0;
+    r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                      Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
+  }
+  r = ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1),
+                    SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width),
+                    Abs(height), kFilterBilinear);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1),
+                    SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width),
+                    Abs(height), kFilterBilinear);
+  return r;
 }
 
 // 422 chroma to 444 chroma, 10/12 bit version
@@ -256,19 +270,26 @@ int I210ToI410(const uint16_t* src_y,
                int dst_stride_v,
                int width,
                int height) {
+  int r;
   if (width == 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-                  Abs(width), Abs(height), kFilterBilinear);
+    r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                      Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
-                dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
-  ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
-                dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
-  return 0;
+  r = ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
+                    dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
+                    dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
+  return r;
 }
 
 // 422 chroma is 1/2 width, 1x height
@@ -288,19 +309,26 @@ int I422ToI444(const uint8_t* src_y,
                int dst_stride_v,
                int width,
                int height) {
+  int r;
   if (width == 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-               Abs(width), Abs(height), kFilterBilinear);
+    r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                   Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
-             dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
-  ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
-             dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
-  return 0;
+  r = ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
+                 dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
+                 dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
+  return r;
 }
 
 // Copy to I400. Source can be I420,422,444,400,NV12,NV21
diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc
index c3d037c4..b45de8c8 100644
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -463,6 +463,8 @@ int ARGBToNV12(const uint8_t* src_argb,
     // Allocate a rows of uv.
     align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
     uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+    if (!row_u)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
       ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@@ -661,6 +663,8 @@ int ARGBToNV21(const uint8_t* src_argb,
     // Allocate a rows of uv.
     align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
     uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+    if (!row_u)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
       ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@@ -846,6 +850,8 @@ int ABGRToNV12(const uint8_t* src_abgr,
     // Allocate a rows of uv.
     align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
     uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+    if (!row_u)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
       ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width);
@@ -1032,6 +1038,8 @@ int ABGRToNV21(const uint8_t* src_abgr,
     // Allocate a rows of uv.
     align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
     uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+    if (!row_u)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
       ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width);
@@ -1232,6 +1240,8 @@ int ARGBToYUY2(const uint8_t* src_argb,
     align_buffer_64(row_y, ((width + 63) & ~63) * 2);
     uint8_t* row_u = row_y + ((width + 63) & ~63);
     uint8_t* row_v = row_u + ((width + 63) & ~63) / 2;
+    if (!row_y)
+      return 1;
 
     for (y = 0; y < height; ++y) {
       ARGBToUVRow(src_argb, 0, row_u, row_v, width);
@@ -1426,6 +1436,8 @@ int ARGBToUYVY(const uint8_t* src_argb,
     align_buffer_64(row_y, ((width + 63) & ~63) * 2);
     uint8_t* row_u = row_y + ((width + 63) & ~63);
     uint8_t* row_v = row_u + ((width + 63) & ~63) / 2;
+    if (!row_y)
+      return 1;
 
     for (y = 0; y < height; ++y) {
       ARGBToUVRow(src_argb, 0, row_u, row_v, width);
@@ -1527,6 +1539,7 @@ int ARGBToI400(const uint8_t* src_argb,
   return 0;
 }
 
+#ifndef __riscv
 // Shuffle table for converting ARGB to RGBA.
 static const uvec8 kShuffleMaskARGBToRGBA = {
     3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u};
@@ -1542,6 +1555,47 @@ int ARGBToRGBA(const uint8_t* src_argb,
   return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba,
                      (const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height);
 }
+#else
+// Convert ARGB to RGBA.
+LIBYUV_API
+int ARGBToRGBA(const uint8_t* src_argb,
+               int src_stride_argb,
+               uint8_t* dst_rgba,
+               int dst_stride_rgba,
+               int width,
+               int height) {
+  int y;
+  void (*ARGBToRGBARow)(const uint8_t* src_argb, uint8_t* dst_rgba, int width) =
+      ARGBToRGBARow_C;
+  if (!src_argb || !dst_rgba || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 && dst_stride_rgba == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_rgba = 0;
+  }
+
+#if defined(HAS_ARGBTORGBAROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToRGBARow = ARGBToRGBARow_RVV;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToRGBARow(src_argb, dst_rgba, width);
+    src_argb += src_stride_argb;
+    dst_rgba += dst_stride_rgba;
+  }
+  return 0;
+}
+#endif
 
 // Convert ARGB To RGB24.
 LIBYUV_API
@@ -3230,14 +3284,21 @@ int RAWToJNV21(const uint8_t* src_raw,
   }
 #endif
   {
+#if defined(HAS_RAWTOYJROW)
     // Allocate a row of uv.
-    align_buffer_64(row_uj, ((halfwidth + 31) & ~31) * 2);
-    uint8_t* row_vj = row_uj + ((halfwidth + 31) & ~31);
-#if !defined(HAS_RAWTOYJROW)
-    // Allocate 2 rows of ARGB.
-    const int row_size = (width * 4 + 31) & ~31;
-    align_buffer_64(row, row_size * 2);
+    const int row_uv_size = ((halfwidth + 31) & ~31);
+    align_buffer_64(row_uj, row_uv_size * 2);
+    uint8_t* row_vj = row_uj + row_uv_size;
+#else
+    // Allocate row of uv and 2 rows of ARGB.
+    const int row_size = ((width * 4 + 31) & ~31);
+    const int row_uv_size = ((halfwidth + 31) & ~31);
+    align_buffer_64(row_uj, row_uv_size * 2 + row_size * 2);
+    uint8_t* row_vj = row_uj + row_uv_size;
+    uint8_t* row = row_vj + row_uv_size;
 #endif
+    if (!row_uj)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
 #if defined(HAS_RAWTOYJROW)
@@ -3269,9 +3330,6 @@ int RAWToJNV21(const uint8_t* src_raw,
       ARGBToYJRow(row, dst_y, width);
 #endif
     }
-#if !defined(HAS_RAWTOYJROW)
-    free_aligned_buffer_64(row);
-#endif
     free_aligned_buffer_64(row_uj);
   }
   return 0;
diff --git a/source/cpu_id.cc b/source/cpu_id.cc
index 0c4a1581..eedce16b 100644
--- a/source/cpu_id.cc
+++ b/source/cpu_id.cc
@@ -292,10 +292,12 @@ static SAFEBUFFERS int GetCpuFlags(void) {
   int cpu_info0[4] = {0, 0, 0, 0};
   int cpu_info1[4] = {0, 0, 0, 0};
   int cpu_info7[4] = {0, 0, 0, 0};
+  int cpu_einfo7[4] = {0, 0, 0, 0};
   CpuId(0, 0, cpu_info0);
   CpuId(1, 0, cpu_info1);
   if (cpu_info0[0] >= 7) {
     CpuId(7, 0, cpu_info7);
+    CpuId(7, 1, cpu_einfo7);
   }
   cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
              ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
@@ -308,7 +310,9 @@ static SAFEBUFFERS int GetCpuFlags(void) {
       ((GetXCR0() & 6) == 6)) {  // Test OS saves YMM registers
     cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
                 ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
-                ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0);
+                ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0) |
+                ((cpu_einfo7[0] & 0x00000010) ? kCpuHasAVXVNNI : 0) |
+                ((cpu_einfo7[3] & 0x00000010) ? kCpuHasAVXVNNIINT8 : 0);
 
     // Detect AVX512bw
     if ((GetXCR0() & 0xe0) == 0xe0) {
@@ -318,8 +322,7 @@ static SAFEBUFFERS int GetCpuFlags(void) {
       cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0;
       cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0;
       cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0;
-      cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0;
-      cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0;
+      cpu_info |= (cpu_einfo7[3] & 0x00080000) ? kCpuHasAVX10 : 0;
     }
   }
 #endif
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index f6ec0dac..1c94e260 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -3027,6 +3027,8 @@ int I420Blend(const uint8_t* src_y0,
 
   // Row buffer for intermediate alpha pixels.
   align_buffer_64(halfalpha, halfwidth);
+  if (!halfalpha)
+    return 1;
   for (y = 0; y < height; y += 2) {
     // last row of odd height image use 1 row of alpha instead of 2.
     if (y == (height - 1)) {
@@ -4710,6 +4712,8 @@ int GaussPlane_F32(const float* src,
   {
     // 2 pixels on each side, but aligned out to 16 bytes.
     align_buffer_64(rowbuf, (4 + width + 4) * 4);
+    if (!rowbuf)
+      return 1;
     memset(rowbuf, 0, 16);
     memset(rowbuf + (4 + width) * 4, 0, 16);
     float* row = (float*)(rowbuf + 16);
@@ -4868,6 +4872,8 @@ static int ARGBSobelize(const uint8_t* src_argb,
     uint8_t* row_y0 = row_y + kEdge;
     uint8_t* row_y1 = row_y0 + row_size;
     uint8_t* row_y2 = row_y1 + row_size;
+    if (!rows)
+      return 1;
     ARGBToYJRow(src_argb, row_y0, width);
     row_y0[-1] = row_y0[0];
     memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
@@ -5654,6 +5660,8 @@ int UYVYToNV12(const uint8_t* src_uyvy,
     int awidth = halfwidth * 2;
     // row of y and 2 rows of uv
     align_buffer_64(rows, awidth * 3);
+    if (!rows)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
       // Split Y from UV.
diff --git a/source/rotate.cc b/source/rotate.cc
index 3678b80a..3f8332c3 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -8,6 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <assert.h>
+
 #include "libyuv/rotate.h"
 
 #include "libyuv/convert.h"
@@ -140,6 +142,9 @@ void RotatePlane180(const uint8_t* src,
                     int height) {
   // Swap top and bottom row and mirror the content. Uses a temporary row.
   align_buffer_64(row, width);
+  assert(row);
+  if (!row)
+    return;
   const uint8_t* src_bot = src + src_stride * (height - 1);
   uint8_t* dst_bot = dst + dst_stride * (height - 1);
   int half_height = (height + 1) >> 1;
@@ -543,24 +548,29 @@ static void RotatePlane180_16(const uint16_t* src,
                               int dst_stride,
                               int width,
                               int height) {
-  // Swap top and bottom row and mirror the content. Uses a temporary row.
-  align_buffer_64_16(row, width);
   const uint16_t* src_bot = src + src_stride * (height - 1);
   uint16_t* dst_bot = dst + dst_stride * (height - 1);
   int half_height = (height + 1) >> 1;
   int y;
 
+  // Swap top and bottom row and mirror the content. Uses a temporary row.
+  align_buffer_64(row, width * 2);
+  uint16_t* row_tmp = (uint16_t*)row;
+  assert(row);
+  if (!row)
+    return;
+
   // Odd height will harmlessly mirror the middle row twice.
   for (y = 0; y < half_height; ++y) {
-    CopyRow_16_C(src, row, width);        // Copy top row into buffer
-    MirrorRow_16_C(src_bot, dst, width);  // Mirror bottom row into top row
-    MirrorRow_16_C(row, dst_bot, width);  // Mirror buffer into bottom row
+    CopyRow_16_C(src, row_tmp, width);        // Copy top row into buffer
+    MirrorRow_16_C(src_bot, dst, width);      // Mirror bottom row into top row
+    MirrorRow_16_C(row_tmp, dst_bot, width);  // Mirror buffer into bottom row
     src += src_stride;
     dst += dst_stride;
     src_bot -= src_stride;
     dst_bot -= dst_stride;
   }
-  free_aligned_buffer_64_16(row);
+  free_aligned_buffer_64(row);
 }
 
 LIBYUV_API
@@ -690,6 +700,7 @@ int I422Rotate(const uint8_t* src_y,
                enum RotationMode mode) {
   int halfwidth = (width + 1) >> 1;
   int halfheight = (height + 1) >> 1;
+  int r;
   if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
       !dst_u || !dst_v) {
     return -1;
@@ -725,23 +736,35 @@ int I422Rotate(const uint8_t* src_y,
     case kRotate90:
       RotatePlane90(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
                     height);
-      ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
-                 halfheight, width, kFilterBilinear);
+      r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u,
+                     dst_stride_u, halfheight, width, kFilterBilinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane90(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
                     height);
-      ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
-                 halfheight, width, kFilterLinear);
+      r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v,
+                     dst_stride_v, halfheight, width, kFilterLinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
       return 0;
     case kRotate270:
       RotatePlane270(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
                      height);
-      ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
-                 halfheight, width, kFilterBilinear);
+      r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u,
+                     dst_stride_u, halfheight, width, kFilterBilinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane270(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
                      height);
-      ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
-                 halfheight, width, kFilterLinear);
+      r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v,
+                     dst_stride_v, halfheight, width, kFilterLinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
       return 0;
     case kRotate180:
@@ -1055,6 +1078,7 @@ int I210Rotate(const uint16_t* src_y,
                enum RotationMode mode) {
   int halfwidth = (width + 1) >> 1;
   int halfheight = (height + 1) >> 1;
+  int r;
   if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
       !dst_u || !dst_v) {
     return -1;
@@ -1090,23 +1114,35 @@ int I210Rotate(const uint16_t* src_y,
     case kRotate90:
       RotatePlane90_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
                        height);
-      ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
-                    halfheight, width, kFilterBilinear);
+      r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u,
+                        dst_stride_u, halfheight, width, kFilterBilinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane90_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
                        height);
-      ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
-                    halfheight, width, kFilterLinear);
+      r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v,
+                        dst_stride_v, halfheight, width, kFilterLinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
       return 0;
     case kRotate270:
       RotatePlane270_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
                         height);
-      ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
-                    halfheight, width, kFilterBilinear);
+      r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u,
+                        dst_stride_u, halfheight, width, kFilterBilinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane270_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
                         height);
-      ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
-                    halfheight, width, kFilterLinear);
+      r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v,
+                        dst_stride_v, halfheight, width, kFilterLinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
                         height);
       return 0;
diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc
index 034d53e8..d55fac4f 100644
--- a/source/rotate_argb.cc
+++ b/source/rotate_argb.cc
@@ -120,7 +120,6 @@ static int ARGBRotate180(const uint8_t* src_argb,
                          int width,
                          int height) {
   // Swap first and last row and mirror the content. Uses a temporary row.
-  align_buffer_64(row, width * 4);
   const uint8_t* src_bot = src_argb + src_stride_argb * (height - 1);
   uint8_t* dst_bot = dst_argb + dst_stride_argb * (height - 1);
   int half_height = (height + 1) >> 1;
@@ -129,6 +128,9 @@ static int ARGBRotate180(const uint8_t* src_argb,
       ARGBMirrorRow_C;
   void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
       CopyRow_C;
+  align_buffer_64(row, width * 4);
+  if (!row)
+    return 1;
 #if defined(HAS_ARGBMIRRORROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
diff --git a/source/row_common.cc b/source/row_common.cc
index 7591c6b6..3afc4b4d 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -281,6 +281,54 @@ void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
   }
 }
 
+void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8_t b = src_argb[0];
+    uint8_t g = src_argb[1];
+    uint8_t r = src_argb[2];
+    uint8_t a = src_argb[3];
+    dst_abgr[0] = r;
+    dst_abgr[1] = g;
+    dst_abgr[2] = b;
+    dst_abgr[3] = a;
+    dst_abgr += 4;
+    src_argb += 4;
+  }
+}
+
+void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8_t b = src_argb[0];
+    uint8_t g = src_argb[1];
+    uint8_t r = src_argb[2];
+    uint8_t a = src_argb[3];
+    dst_bgra[0] = a;
+    dst_bgra[1] = r;
+    dst_bgra[2] = g;
+    dst_bgra[3] = b;
+    dst_bgra += 4;
+    src_argb += 4;
+  }
+}
+
+void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8_t b = src_argb[0];
+    uint8_t g = src_argb[1];
+    uint8_t r = src_argb[2];
+    uint8_t a = src_argb[3];
+    dst_rgba[0] = a;
+    dst_rgba[1] = b;
+    dst_rgba[2] = g;
+    dst_rgba[3] = r;
+    dst_rgba += 4;
+    src_argb += 4;
+  }
+}
+
 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
   int x;
   for (x = 0; x < width; ++x) {
@@ -309,6 +357,22 @@ void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
   }
 }
 
+void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8_t a = src_rgba[0];
+    uint8_t b = src_rgba[1];
+    uint8_t g = src_rgba[2];
+    uint8_t r = src_rgba[3];
+    dst_argb[0] = b;
+    dst_argb[1] = g;
+    dst_argb[2] = r;
+    dst_argb[3] = a;
+    dst_argb += 4;
+    src_rgba += 4;
+  }
+}
+
 void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
@@ -517,6 +581,22 @@ void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
   }
 }
 
+void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint16_t b = src_ar64[0];
+    uint16_t g = src_ar64[1];
+    uint16_t r = src_ar64[2];
+    uint16_t a = src_ar64[3];
+    dst_ab64[0] = r;
+    dst_ab64[1] = g;
+    dst_ab64[2] = b;
+    dst_ab64[3] = a;
+    dst_ab64 += 4;
+    src_ar64 += 4;
+  }
+}
+
 // TODO(fbarchard): Make shuffle compatible with SIMD versions
 void AR64ShuffleRow_C(const uint8_t* src_ar64,
                       uint8_t* dst_ar64,
diff --git a/source/row_lasx.cc b/source/row_lasx.cc
index 1082ad80..be85022e 100644
--- a/source/row_lasx.cc
+++ b/source/row_lasx.cc
@@ -543,8 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y,
   __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
   __m256i vec_ubvr, vec_ugvg;
   __m256i const_0x80 = __lasx_xvldi(0x80);
-  __m256i alpha = {0xF000F000F000F000, 0xF000F000F000F000, 0xF000F000F000F000,
-                   0xF000F000F000F000};
+  __m256i alpha = (__m256i)v4u64{0xF000F000F000F000, 0xF000F000F000F000,
+                                 0xF000F000F000F000, 0xF000F000F000F000};
   __m256i mask = {0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0,
                   0x00F000F000F000F0};
 
@@ -595,8 +595,8 @@ void I422ToARGB1555Row_LASX(const uint8_t* src_y,
   __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
   __m256i vec_ubvr, vec_ugvg;
   __m256i const_0x80 = __lasx_xvldi(0x80);
-  __m256i alpha = {0x8000800080008000, 0x8000800080008000, 0x8000800080008000,
-                   0x8000800080008000};
+  __m256i alpha = (__m256i)v4u64{0x8000800080008000, 0x8000800080008000,
+                                 0x8000800080008000, 0x8000800080008000};
 
   YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb);
   vec_ubvr = __lasx_xvilvl_h(vec_ub, vec_vr);
@@ -799,8 +799,8 @@ void ARGBToUVRow_LASX(const uint8_t* src_argb0,
                         0x0009000900090009, 0x0009000900090009};
   __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
                      0x0000000700000003};
-  __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080,
-                          0x8080808080808080, 0x8080808080808080};
+  __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                        0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
@@ -1037,8 +1037,8 @@ void ARGBToUV444Row_LASX(const uint8_t* src_argb,
   __m256i const_38 = __lasx_xvldi(38);
   __m256i const_94 = __lasx_xvldi(94);
   __m256i const_18 = __lasx_xvldi(18);
-  __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080,
-                          0x8080808080808080, 0x8080808080808080};
+  __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                        0x8080808080808080, 0x8080808080808080};
   __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
                      0x0000000700000003};
   for (x = 0; x < len; x++) {
@@ -1609,8 +1609,8 @@ void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555,
   __m256i const_38 = __lasx_xvldi(0x413);
   __m256i const_94 = __lasx_xvldi(0x42F);
   __m256i const_18 = __lasx_xvldi(0x409);
-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
-                        0x8080808080808080, 0x8080808080808080};
+  __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                      0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lasx_xvld, src_argb1555, 0, src_argb1555, 32, next_argb1555, 0,
@@ -1726,8 +1726,8 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565,
   __m256i const_38 = __lasx_xvldi(0x413);
   __m256i const_94 = __lasx_xvldi(0x42F);
   __m256i const_18 = __lasx_xvldi(0x409);
-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
-                        0x8080808080808080, 0x8080808080808080};
+  __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                      0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lasx_xvld, src_rgb565, 0, src_rgb565, 32, next_rgb565, 0,
@@ -1793,8 +1793,8 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
   __m256i const_38 = __lasx_xvldi(0x413);
   __m256i const_94 = __lasx_xvldi(0x42F);
   __m256i const_18 = __lasx_xvldi(0x409);
-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
-                        0x8080808080808080, 0x8080808080808080};
+  __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                      0x8080808080808080, 0x8080808080808080};
   __m256i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18,
                       0x15120F0C09060300, 0x00000000001E1B18};
   __m256i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908,
@@ -1856,8 +1856,8 @@ void RAWToUVRow_LASX(const uint8_t* src_raw,
   __m256i const_38 = __lasx_xvldi(0x413);
   __m256i const_94 = __lasx_xvldi(0x42F);
   __m256i const_18 = __lasx_xvldi(0x409);
-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
-                        0x8080808080808080, 0x8080808080808080};
+  __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                      0x8080808080808080, 0x8080808080808080};
   __m256i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18,
                       0x15120F0C09060300, 0x00000000001E1B18};
   __m256i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908,
@@ -2242,8 +2242,8 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb,
   __m256i const_21 = __lasx_xvldi(0x415);
   __m256i const_53 = __lasx_xvldi(0x435);
   __m256i const_10 = __lasx_xvldi(0x40A);
-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
-                        0x8080808080808080, 0x8080808080808080};
+  __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                      0x8080808080808080, 0x8080808080808080};
   __m256i shuff = {0x1614060412100200, 0x1E1C0E0C1A180A08, 0x1715070513110301,
                    0x1F1D0F0D1B190B09};
 
diff --git a/source/row_lsx.cc b/source/row_lsx.cc
index e626072a..fa088c9e 100644
--- a/source/row_lsx.cc
+++ b/source/row_lsx.cc
@@ -565,7 +565,7 @@ void I422ToARGB4444Row_LSX(const uint8_t* src_y,
   __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
   __m128i vec_ubvr, vec_ugvg;
   __m128i const_80 = __lsx_vldi(0x80);
-  __m128i alpha = {0xF000F000F000F000, 0xF000F000F000F000};
+  __m128i alpha = (__m128i)v2u64{0xF000F000F000F000, 0xF000F000F000F000};
   __m128i mask = {0x00F000F000F000F0, 0x00F000F000F000F0};
 
   YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
@@ -612,7 +612,7 @@ void I422ToARGB1555Row_LSX(const uint8_t* src_y,
   __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
   __m128i vec_ubvr, vec_ugvg;
   __m128i const_80 = __lsx_vldi(0x80);
-  __m128i alpha = {0x8000800080008000, 0x8000800080008000};
+  __m128i alpha = (__m128i)v2u64{0x8000800080008000, 0x8000800080008000};
 
   YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
   vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
@@ -792,7 +792,7 @@ void ARGBToUVRow_LSX(const uint8_t* src_argb0,
   __m128i const_0x26 = {0x0013001300130013, 0x0013001300130013};
   __m128i const_0x5E = {0x002f002f002f002f, 0x002f002f002f002f};
   __m128i const_0x12 = {0x0009000900090009, 0x0009000900090009};
-  __m128i const_0x8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_0x8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_argb0, 0, src_argb0, 16, src_argb0, 32, src_argb0,
               48, src0, src1, src2, src3);
@@ -991,7 +991,7 @@ void ARGBToUV444Row_LSX(const uint8_t* src_argb,
   __m128i const_38 = __lsx_vldi(38);
   __m128i const_94 = __lsx_vldi(94);
   __m128i const_18 = __lsx_vldi(18);
-  __m128i const_0x8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_0x8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
               src0, src1, src2, src3);
@@ -1533,7 +1533,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_argb1555, 0, src_argb1555, 16, next_argb1555, 0,
@@ -1642,7 +1642,7 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_rgb565, 0, src_rgb565, 16, next_rgb565, 0,
@@ -1703,7 +1703,7 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
   __m128i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18};
   __m128i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908};
   __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19};
@@ -1756,7 +1756,7 @@ void RAWToUVRow_LSX(const uint8_t* src_raw,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
   __m128i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18};
   __m128i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908};
   __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19};
@@ -1991,7 +1991,7 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48,
@@ -2039,7 +2039,7 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48,
@@ -2087,7 +2087,7 @@ void RGBAToUVRow_LSX(const uint8_t* src_rgba,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48,
@@ -2136,7 +2136,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb,
   __m128i const_21 = __lsx_vldi(0x415);
   __m128i const_53 = __lsx_vldi(0x435);
   __m128i const_10 = __lsx_vldi(0x40A);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
@@ -2514,7 +2514,7 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb,
   __m128i const_256 = __lsx_vldi(0x500);
   __m128i zero = __lsx_vldi(0);
   __m128i alpha = __lsx_vldi(0xFF);
-  __m128i control = {0xFF000000FF000000, 0xFF000000FF000000};
+  __m128i control = (__m128i)v2u64{0xFF000000FF000000, 0xFF000000FF000000};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb1, 0, src_argb1, 16,
@@ -2560,7 +2560,7 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb,
   __m128i vec_offset = __lsx_vreplgr2vr_b(interval_offset);
   __m128i vec_scale = __lsx_vreplgr2vr_w(scale);
   __m128i zero = __lsx_vldi(0);
-  __m128i control = {0xFF000000FF000000, 0xFF000000FF000000};
+  __m128i control = (__m128i)v2u64{0xFF000000FF000000, 0xFF000000FF000000};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, dst_argb, 32, dst_argb, 48,
diff --git a/source/row_rvv.cc b/source/row_rvv.cc
index c875be2f..0bf2bef6 100644
--- a/source/row_rvv.cc
+++ b/source/row_rvv.cc
@@ -200,6 +200,23 @@ void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
 }
 #endif
 
+#ifdef HAS_AR64TOAB64ROW_RVV
+void AR64ToAB64Row_RVV(const uint16_t* src_ar64,
+                       uint16_t* dst_ab64,
+                       int width) {
+  size_t w = (size_t)width;
+  do {
+    size_t vl = __riscv_vsetvl_e16m2(w);
+    vuint16m2_t v_b, v_g, v_r, v_a;
+    __riscv_vlseg4e16_v_u16m2(&v_b, &v_g, &v_r, &v_a, src_ar64, vl);
+    __riscv_vsseg4e16_v_u16m2(dst_ab64, v_r, v_g, v_b, v_a, vl);
+    w -= vl;
+    src_ar64 += vl * 4;
+    dst_ab64 += vl * 4;
+  } while (w > 0);
+}
+#endif
+
 #ifdef HAS_AB64TOARGBROW_RVV
 void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
   size_t avl = (size_t)width;
@@ -301,6 +318,66 @@ void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
 }
 #endif
 
+#ifdef HAS_ARGBTOABGRROW_RVV
+void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
+  size_t w = (size_t)width;
+  do {
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    vuint8m2_t v_a, v_r, v_g, v_b;
+    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+    __riscv_vsseg4e8_v_u8m2(dst_abgr, v_r, v_g, v_b, v_a, vl);
+    w -= vl;
+    src_argb += vl * 4;
+    dst_abgr += vl * 4;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTOBGRAROW_RVV
+void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
+  size_t w = (size_t)width;
+  do {
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    vuint8m2_t v_a, v_r, v_g, v_b;
+    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+    __riscv_vsseg4e8_v_u8m2(dst_bgra, v_a, v_r, v_g, v_b, vl);
+    w -= vl;
+    src_argb += vl * 4;
+    dst_bgra += vl * 4;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTORGBAROW_RVV
+void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
+  size_t w = (size_t)width;
+  do {
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    vuint8m2_t v_a, v_r, v_g, v_b;
+    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+    __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
+    w -= vl;
+    src_argb += vl * 4;
+    dst_rgba += vl * 4;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_RGBATOARGBROW_RVV
+void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
+  size_t w = (size_t)width;
+  do {
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    vuint8m2_t v_a, v_r, v_g, v_b;
+    __riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl);
+    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+    w -= vl;
+    src_rgba += vl * 4;
+    dst_argb += vl * 4;
+  } while (w > 0);
+}
+#endif
+
 #ifdef HAS_RGB24TOARGBROW_RVV
 void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
                         uint8_t* dst_argb,
diff --git a/source/scale.cc b/source/scale.cc
index 43d973af..b7a602ba 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -939,14 +939,14 @@ static void ScaleAddCols1_16_C(int dst_width,
 // one pixel of destination using fixed point (16.16) to step
 // through source, sampling a box of pixel with simple
 // averaging.
-static void ScalePlaneBox(int src_width,
-                          int src_height,
-                          int dst_width,
-                          int dst_height,
-                          int src_stride,
-                          int dst_stride,
-                          const uint8_t* src_ptr,
-                          uint8_t* dst_ptr) {
+static int ScalePlaneBox(int src_width,
+                         int src_height,
+                         int dst_width,
+                         int dst_height,
+                         int src_stride,
+                         int dst_stride,
+                         const uint8_t* src_ptr,
+                         uint8_t* dst_ptr) {
   int j, k;
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
@@ -960,6 +960,8 @@ static void ScalePlaneBox(int src_width,
   {
     // Allocate a row buffer of uint16_t.
     align_buffer_64(row16, src_width * 2);
+    if (!row16)
+      return 1;
     void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
                          const uint16_t* src_ptr, uint8_t* dst_ptr) =
         (dx & 0xffff) ? ScaleAddCols2_C
@@ -1031,16 +1033,17 @@ static void ScalePlaneBox(int src_width,
     }
     free_aligned_buffer_64(row16);
   }
+  return 0;
 }
 
-static void ScalePlaneBox_16(int src_width,
-                             int src_height,
-                             int dst_width,
-                             int dst_height,
-                             int src_stride,
-                             int dst_stride,
-                             const uint16_t* src_ptr,
-                             uint16_t* dst_ptr) {
+static int ScalePlaneBox_16(int src_width,
+                            int src_height,
+                            int dst_width,
+                            int dst_height,
+                            int src_stride,
+                            int dst_stride,
+                            const uint16_t* src_ptr,
+                            uint16_t* dst_ptr) {
   int j, k;
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
@@ -1054,6 +1057,8 @@ static void ScalePlaneBox_16(int src_width,
   {
     // Allocate a row buffer of uint32_t.
     align_buffer_64(row32, src_width * 4);
+    if (!row32)
+      return 1;
     void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
                          const uint32_t* src_ptr, uint16_t* dst_ptr) =
         (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
@@ -1085,18 +1090,19 @@ static void ScalePlaneBox_16(int src_width,
     }
     free_aligned_buffer_64(row32);
   }
+  return 0;
 }
 
 // Scale plane down with bilinear interpolation.
-static void ScalePlaneBilinearDown(int src_width,
-                                   int src_height,
-                                   int dst_width,
-                                   int dst_height,
-                                   int src_stride,
-                                   int dst_stride,
-                                   const uint8_t* src_ptr,
-                                   uint8_t* dst_ptr,
-                                   enum FilterMode filtering) {
+static int ScalePlaneBilinearDown(int src_width,
+                                  int src_height,
+                                  int dst_width,
+                                  int dst_height,
+                                  int src_stride,
+                                  int dst_stride,
+                                  const uint8_t* src_ptr,
+                                  uint8_t* dst_ptr,
+                                  enum FilterMode filtering) {
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
   int y = 0;
@@ -1105,6 +1111,8 @@ static void ScalePlaneBilinearDown(int src_width,
   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
   // Allocate a row buffer.
   align_buffer_64(row, src_width);
+  if (!row)
+    return 1;
 
   const int max_y = (src_height - 1) << 16;
   int j;
@@ -1214,17 +1222,18 @@ static void ScalePlaneBilinearDown(int src_width,
     }
   }
   free_aligned_buffer_64(row);
+  return 0;
 }
 
-static void ScalePlaneBilinearDown_16(int src_width,
-                                      int src_height,
-                                      int dst_width,
-                                      int dst_height,
-                                      int src_stride,
-                                      int dst_stride,
-                                      const uint16_t* src_ptr,
-                                      uint16_t* dst_ptr,
-                                      enum FilterMode filtering) {
+static int ScalePlaneBilinearDown_16(int src_width,
+                                     int src_height,
+                                     int dst_width,
+                                     int dst_height,
+                                     int src_stride,
+                                     int dst_stride,
+                                     const uint16_t* src_ptr,
+                                     uint16_t* dst_ptr,
+                                     enum FilterMode filtering) {
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
   int y = 0;
@@ -1233,6 +1242,8 @@ static void ScalePlaneBilinearDown_16(int src_width,
   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
   // Allocate a row buffer.
   align_buffer_64(row, src_width * 2);
+  if (!row)
+    return 1;
 
   const int max_y = (src_height - 1) << 16;
   int j;
@@ -1305,18 +1316,19 @@ static void ScalePlaneBilinearDown_16(int src_width,
     }
   }
   free_aligned_buffer_64(row);
+  return 0;
 }
 
 // Scale up down with bilinear interpolation.
-static void ScalePlaneBilinearUp(int src_width,
-                                 int src_height,
-                                 int dst_width,
-                                 int dst_height,
-                                 int src_stride,
-                                 int dst_stride,
-                                 const uint8_t* src_ptr,
-                                 uint8_t* dst_ptr,
-                                 enum FilterMode filtering) {
+static int ScalePlaneBilinearUp(int src_width,
+                                int src_height,
+                                int dst_width,
+                                int dst_height,
+                                int src_stride,
+                                int dst_stride,
+                                const uint8_t* src_ptr,
+                                uint8_t* dst_ptr,
+                                enum FilterMode filtering) {
   int j;
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
@@ -1415,6 +1427,8 @@ static void ScalePlaneBilinearUp(int src_width,
     // Allocate 2 row buffers.
     const int row_size = (dst_width + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 
     uint8_t* rowptr = row;
     int rowstride = row_size;
@@ -1458,6 +1472,7 @@ static void ScalePlaneBilinearUp(int src_width,
     }
     free_aligned_buffer_64(row);
   }
+  return 0;
 }
 
 // Scale plane, horizontally up by 2 times.
@@ -1479,6 +1494,7 @@ static void ScalePlaneUp2_Linear(int src_width,
   int y;
   int dy;
 
+  (void)src_width;
   // This function can only scale up by 2 times horizontally.
   assert(src_width == ((dst_width + 1) / 2));
 
@@ -1542,6 +1558,7 @@ static void ScalePlaneUp2_Bilinear(int src_width,
       ScaleRowUp2_Bilinear_Any_C;
   int x;
 
+  (void)src_width;
   // This function can only scale up by 2 times.
   assert(src_width == ((dst_width + 1) / 2));
   assert(src_height == ((dst_height + 1) / 2));
@@ -1608,6 +1625,7 @@ static void ScalePlaneUp2_12_Linear(int src_width,
   int y;
   int dy;
 
+  (void)src_width;
   // This function can only scale up by 2 times horizontally.
   assert(src_width == ((dst_width + 1) / 2));
 
@@ -1661,6 +1679,7 @@ static void ScalePlaneUp2_12_Bilinear(int src_width,
       ScaleRowUp2_Bilinear_16_Any_C;
   int x;
 
+  (void)src_width;
   // This function can only scale up by 2 times.
   assert(src_width == ((dst_width + 1) / 2));
   assert(src_height == ((dst_height + 1) / 2));
@@ -1709,6 +1728,7 @@ static void ScalePlaneUp2_16_Linear(int src_width,
   int y;
   int dy;
 
+  (void)src_width;
   // This function can only scale up by 2 times horizontally.
   assert(src_width == ((dst_width + 1) / 2));
 
@@ -1757,6 +1777,7 @@ static void ScalePlaneUp2_16_Bilinear(int src_width,
       ScaleRowUp2_Bilinear_16_Any_C;
   int x;
 
+  (void)src_width;
   // This function can only scale up by 2 times.
   assert(src_width == ((dst_width + 1) / 2));
   assert(src_height == ((dst_height + 1) / 2));
@@ -1791,15 +1812,15 @@ static void ScalePlaneUp2_16_Bilinear(int src_width,
   }
 }
 
-static void ScalePlaneBilinearUp_16(int src_width,
-                                    int src_height,
-                                    int dst_width,
-                                    int dst_height,
-                                    int src_stride,
-                                    int dst_stride,
-                                    const uint16_t* src_ptr,
-                                    uint16_t* dst_ptr,
-                                    enum FilterMode filtering) {
+static int ScalePlaneBilinearUp_16(int src_width,
+                                   int src_height,
+                                   int dst_width,
+                                   int dst_height,
+                                   int src_stride,
+                                   int dst_stride,
+                                   const uint16_t* src_ptr,
+                                   uint16_t* dst_ptr,
+                                   enum FilterMode filtering) {
   int j;
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
@@ -1876,10 +1897,11 @@ static void ScalePlaneBilinearUp_16(int src_width,
     // Allocate 2 row buffers.
     const int row_size = (dst_width + 31) & ~31;
     align_buffer_64(row, row_size * 4);
-
-    uint16_t* rowptr = (uint16_t*)row;
     int rowstride = row_size;
     int lasty = yi;
+    uint16_t* rowptr = (uint16_t*)row;
+    if (!row)
+      return 1;
 
     ScaleFilterCols(rowptr, src, dst_width, x, dx);
     if (src_height > 1) {
@@ -1919,6 +1941,7 @@ static void ScalePlaneBilinearUp_16(int src_width,
     }
     free_aligned_buffer_64(row);
   }
+  return 0;
 }
 
 // Scale Plane to/from any dimensions, without interpolation.
@@ -2003,15 +2026,15 @@ static void ScalePlaneSimple_16(int src_width,
 // Scale a plane.
 // This function dispatches to a specialized scaler based on scale factor.
 LIBYUV_API
-void ScalePlane(const uint8_t* src,
-                int src_stride,
-                int src_width,
-                int src_height,
-                uint8_t* dst,
-                int dst_stride,
-                int dst_width,
-                int dst_height,
-                enum FilterMode filtering) {
+int ScalePlane(const uint8_t* src,
+               int src_stride,
+               int src_width,
+               int src_height,
+               uint8_t* dst,
+               int dst_stride,
+               int dst_width,
+               int dst_height,
+               enum FilterMode filtering) {
   // Simplify filtering when possible.
   filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
                                 filtering);
@@ -2027,7 +2050,7 @@ void ScalePlane(const uint8_t* src,
   if (dst_width == src_width && dst_height == src_height) {
     // Straight copy.
     CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
-    return;
+    return 0;
   }
   if (dst_width == src_width && filtering != kFilterBox) {
     int dy = 0;
@@ -2043,7 +2066,7 @@ void ScalePlane(const uint8_t* src,
     // Arbitrary scale vertically, but unscaled horizontally.
     ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
                        dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
-    return;
+    return 0;
   }
   if (dst_width <= Abs(src_width) && dst_height <= src_height) {
     // Scale down.
@@ -2051,69 +2074,67 @@ void ScalePlane(const uint8_t* src,
       // optimized, 3/4
       ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
                        dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
     if (2 * dst_width == src_width && 2 * dst_height == src_height) {
       // optimized, 1/2
       ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
                       dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
     // 3/8 rounded up for odd sized chroma height.
     if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
       // optimized, 3/8
       ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
                        dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
     if (4 * dst_width == src_width && 4 * dst_height == src_height &&
         (filtering == kFilterBox || filtering == kFilterNone)) {
       // optimized, 1/4
       ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
                       dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
   }
   if (filtering == kFilterBox && dst_height * 2 < src_height) {
-    ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
-                  dst_stride, src, dst);
-    return;
+    return ScalePlaneBox(src_width, src_height, dst_width, dst_height,
+                         src_stride, dst_stride, src, dst);
   }
   if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
     ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height,
                          src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
   if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
       (filtering == kFilterBilinear || filtering == kFilterBox)) {
     ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height,
                            src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
   if (filtering && dst_height > src_height) {
-    ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
-                         src_stride, dst_stride, src, dst, filtering);
-    return;
+    return ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
+                                src_stride, dst_stride, src, dst, filtering);
   }
   if (filtering) {
-    ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
-                           src_stride, dst_stride, src, dst, filtering);
-    return;
+    return ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
+                                  src_stride, dst_stride, src, dst, filtering);
   }
   ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
                    dst_stride, src, dst);
+  return 0;
 }
 
 LIBYUV_API
-void ScalePlane_16(const uint16_t* src,
-                   int src_stride,
-                   int src_width,
-                   int src_height,
-                   uint16_t* dst,
-                   int dst_stride,
-                   int dst_width,
-                   int dst_height,
-                   enum FilterMode filtering) {
+int ScalePlane_16(const uint16_t* src,
+                  int src_stride,
+                  int src_width,
+                  int src_height,
+                  uint16_t* dst,
+                  int dst_stride,
+                  int dst_width,
+                  int dst_height,
+                  enum FilterMode filtering) {
   // Simplify filtering when possible.
   filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
                                 filtering);
@@ -2129,7 +2150,7 @@ void ScalePlane_16(const uint16_t* src,
   if (dst_width == src_width && dst_height == src_height) {
     // Straight copy.
     CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
-    return;
+    return 0;
   }
   if (dst_width == src_width && filtering != kFilterBox) {
     int dy = 0;
@@ -2148,7 +2169,7 @@ void ScalePlane_16(const uint16_t* src,
     // Arbitrary scale vertically, but unscaled horizontally.
     ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
                           dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
-    return;
+    return 0;
   }
   if (dst_width <= Abs(src_width) && dst_height <= src_height) {
     // Scale down.
@@ -2156,69 +2177,68 @@ void ScalePlane_16(const uint16_t* src,
       // optimized, 3/4
       ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
                           src_stride, dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
     if (2 * dst_width == src_width && 2 * dst_height == src_height) {
       // optimized, 1/2
       ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
                          src_stride, dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
     // 3/8 rounded up for odd sized chroma height.
     if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
       // optimized, 3/8
       ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
                           src_stride, dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
     if (4 * dst_width == src_width && 4 * dst_height == src_height &&
         (filtering == kFilterBox || filtering == kFilterNone)) {
       // optimized, 1/4
       ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
                          src_stride, dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
   }
   if (filtering == kFilterBox && dst_height * 2 < src_height) {
-    ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride,
-                     dst_stride, src, dst);
-    return;
+    return ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
+                            src_stride, dst_stride, src, dst);
   }
   if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
     ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height,
                             src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
   if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
       (filtering == kFilterBilinear || filtering == kFilterBox)) {
     ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height,
                               src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
   if (filtering && dst_height > src_height) {
-    ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
-                            src_stride, dst_stride, src, dst, filtering);
-    return;
+    return ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
+                                   src_stride, dst_stride, src, dst, filtering);
   }
   if (filtering) {
-    ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
-                              src_stride, dst_stride, src, dst, filtering);
-    return;
+    return ScalePlaneBilinearDown_16(src_width, src_height, dst_width,
+                                     dst_height, src_stride, dst_stride, src,
+                                     dst, filtering);
   }
   ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
                       dst_stride, src, dst);
+  return 0;
 }
 
 LIBYUV_API
-void ScalePlane_12(const uint16_t* src,
-                   int src_stride,
-                   int src_width,
-                   int src_height,
-                   uint16_t* dst,
-                   int dst_stride,
-                   int dst_width,
-                   int dst_height,
-                   enum FilterMode filtering) {
+int ScalePlane_12(const uint16_t* src,
+                  int src_stride,
+                  int src_width,
+                  int src_height,
+                  uint16_t* dst,
+                  int dst_stride,
+                  int dst_width,
+                  int dst_height,
+                  enum FilterMode filtering) {
   // Simplify filtering when possible.
   filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
                                 filtering);
@@ -2233,17 +2253,17 @@ void ScalePlane_12(const uint16_t* src,
   if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
     ScalePlaneUp2_12_Linear(src_width, src_height, dst_width, dst_height,
                             src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
   if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
       (filtering == kFilterBilinear || filtering == kFilterBox)) {
     ScalePlaneUp2_12_Bilinear(src_width, src_height, dst_width, dst_height,
                               src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
 
-  ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride,
-                dst_width, dst_height, filtering);
+  return ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride,
+                       dst_width, dst_height, filtering);
 }
 
 // Scale an I420 image.
@@ -2271,6 +2291,7 @@ int I420Scale(const uint8_t* src_y,
   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+  int r;
 
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2278,13 +2299,19 @@ int I420Scale(const uint8_t* src_y,
     return -1;
   }
 
-  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-             dst_width, dst_height, filtering);
-  ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
-             dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
-  ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
-             dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
-  return 0;
+  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
+                 dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
+                 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
+                 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
+  return r;
 }
 
 LIBYUV_API
@@ -2309,6 +2336,7 @@ int I420Scale_16(const uint16_t* src_y,
   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+  int r;
 
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2316,13 +2344,19 @@ int I420Scale_16(const uint16_t* src_y,
     return -1;
   }
 
-  ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-                dst_width, dst_height, filtering);
-  ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
-                dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
-  ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
-                dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
-  return 0;
+  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
+                    dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
+                    dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
+                    dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
+  return r;
 }
 
 LIBYUV_API
@@ -2347,6 +2381,7 @@ int I420Scale_12(const uint16_t* src_y,
   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+  int r;
 
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2354,13 +2389,19 @@ int I420Scale_12(const uint16_t* src_y,
     return -1;
   }
 
-  ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-                dst_width, dst_height, filtering);
-  ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
-                dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
-  ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
-                dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
-  return 0;
+  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
+                    dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
+                    dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
+                    dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
+  return r;
 }
 
 // Scale an I444 image.
@@ -2384,19 +2425,27 @@ int I444Scale(const uint8_t* src_y,
               int dst_width,
               int dst_height,
               enum FilterMode filtering) {
+  int r;
+
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
       dst_width <= 0 || dst_height <= 0) {
     return -1;
   }
 
-  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-             dst_width, dst_height, filtering);
-  ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
-             dst_width, dst_height, filtering);
-  ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
-             dst_width, dst_height, filtering);
-  return 0;
+  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
+                 dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u,
+                 dst_stride_u, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v,
+                 dst_stride_v, dst_width, dst_height, filtering);
+  return r;
 }
 
 LIBYUV_API
@@ -2417,19 +2466,27 @@ int I444Scale_16(const uint16_t* src_y,
                  int dst_width,
                  int dst_height,
                  enum FilterMode filtering) {
+  int r;
+
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
       dst_width <= 0 || dst_height <= 0) {
     return -1;
   }
 
-  ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-                dst_width, dst_height, filtering);
-  ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
-                dst_width, dst_height, filtering);
-  ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
-                dst_width, dst_height, filtering);
-  return 0;
+  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
+                    dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u,
+                    dst_stride_u, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v,
+                    dst_stride_v, dst_width, dst_height, filtering);
+  return r;
 }
 
 LIBYUV_API
@@ -2450,19 +2507,27 @@ int I444Scale_12(const uint16_t* src_y,
                  int dst_width,
                  int dst_height,
                  enum FilterMode filtering) {
+  int r;
+
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
       dst_width <= 0 || dst_height <= 0) {
     return -1;
   }
 
-  ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-                dst_width, dst_height, filtering);
-  ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
-                dst_width, dst_height, filtering);
-  ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
-                dst_width, dst_height, filtering);
-  return 0;
+  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
+                    dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u,
+                    dst_stride_u, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v,
+                    dst_stride_v, dst_width, dst_height, filtering);
+  return r;
 }
 
 // Scale an I422 image.
@@ -2488,6 +2553,7 @@ int I422Scale(const uint8_t* src_y,
               enum FilterMode filtering) {
   int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+  int r;
 
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2495,13 +2561,19 @@ int I422Scale(const uint8_t* src_y,
     return -1;
   }
 
-  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-             dst_width, dst_height, filtering);
-  ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
-             dst_stride_u, dst_halfwidth, dst_height, filtering);
-  ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
-             dst_stride_v, dst_halfwidth, dst_height, filtering);
-  return 0;
+  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
+                 dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
+                 dst_stride_u, dst_halfwidth, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
+                 dst_stride_v, dst_halfwidth, dst_height, filtering);
+  return r;
 }
 
 LIBYUV_API
@@ -2524,6 +2596,7 @@ int I422Scale_16(const uint16_t* src_y,
                  enum FilterMode filtering) {
   int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+  int r;
 
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2531,13 +2604,19 @@ int I422Scale_16(const uint16_t* src_y,
     return -1;
   }
 
-  ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-                dst_width, dst_height, filtering);
-  ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
-                dst_stride_u, dst_halfwidth, dst_height, filtering);
-  ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
-                dst_stride_v, dst_halfwidth, dst_height, filtering);
-  return 0;
+  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
+                    dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
+                    dst_stride_u, dst_halfwidth, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
+                    dst_stride_v, dst_halfwidth, dst_height, filtering);
+  return r;
 }
 
 LIBYUV_API
@@ -2560,6 +2639,7 @@ int I422Scale_12(const uint16_t* src_y,
                  enum FilterMode filtering) {
   int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+  int r;
 
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2567,13 +2647,19 @@ int I422Scale_12(const uint16_t* src_y,
     return -1;
   }
 
-  ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-                dst_width, dst_height, filtering);
-  ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
-                dst_stride_u, dst_halfwidth, dst_height, filtering);
-  ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
-                dst_stride_v, dst_halfwidth, dst_height, filtering);
-  return 0;
+  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
+                    dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
+                    dst_stride_u, dst_halfwidth, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
+                    dst_stride_v, dst_halfwidth, dst_height, filtering);
+  return r;
 }
 
 // Scale an NV12 image.
@@ -2597,6 +2683,7 @@ int NV12Scale(const uint8_t* src_y,
   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+  int r;
 
   if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
@@ -2604,11 +2691,14 @@ int NV12Scale(const uint8_t* src_y,
     return -1;
   }
 
-  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-             dst_width, dst_height, filtering);
-  UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv,
-          dst_stride_uv, dst_halfwidth, dst_halfheight, filtering);
-  return 0;
+  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
+                 dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv,
+              dst_stride_uv, dst_halfwidth, dst_halfheight, filtering);
+  return r;
 }
 
 // Deprecated api
diff --git a/source/scale_argb.cc b/source/scale_argb.cc
index 1d5c1b60..18bdeb86 100644
--- a/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -151,22 +151,27 @@ static void ScaleARGBDown2(int src_width,
 // ScaleARGB ARGB, 1/4
 // This is an optimized version for scaling down a ARGB to 1/4 of
 // its original size.
-static void ScaleARGBDown4Box(int src_width,
-                              int src_height,
-                              int dst_width,
-                              int dst_height,
-                              int src_stride,
-                              int dst_stride,
-                              const uint8_t* src_argb,
-                              uint8_t* dst_argb,
-                              int x,
-                              int dx,
-                              int y,
-                              int dy) {
+static int ScaleARGBDown4Box(int src_width,
+                             int src_height,
+                             int dst_width,
+                             int dst_height,
+                             int src_stride,
+                             int dst_stride,
+                             const uint8_t* src_argb,
+                             uint8_t* dst_argb,
+                             int x,
+                             int dx,
+                             int y,
+                             int dy) {
   int j;
   // Allocate 2 rows of ARGB.
   const int row_size = (dst_width * 2 * 4 + 31) & ~31;
+  // TODO(fbarchard): Remove this row buffer and implement a ScaleARGBRowDown4
+  // but implemented via a 2 pass wrapper that uses a very small array on the
+  // stack with a horizontal loop.
   align_buffer_64(row, row_size * 2);
+  if (!row)
+    return 1;
   int row_stride = src_stride * (dy >> 16);
   void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
                             uint8_t* dst_argb, int dst_width) =
@@ -209,6 +214,7 @@ static void ScaleARGBDown4Box(int src_width,
     dst_argb += dst_stride;
   }
   free_aligned_buffer_64(row);
+  return 0;
 }
 
 // ScaleARGB ARGB Even
@@ -278,10 +284,14 @@ static void ScaleARGBDownEven(int src_width,
     }
   }
 #endif
+#if defined(HAS_SCALEARGBROWDOWNEVENBOX_RVV)
+  if (filtering && TestCpuFlag(kCpuHasRVV)) {
+    ScaleARGBRowDownEven = ScaleARGBRowDownEvenBox_RVV;
+  }
+#endif
 #if defined(HAS_SCALEARGBROWDOWNEVEN_RVV)
-  if (TestCpuFlag(kCpuHasRVV)) {
-    ScaleARGBRowDownEven =
-        filtering ? ScaleARGBRowDownEvenBox_RVV : ScaleARGBRowDownEven_RVV;
+  if (!filtering && TestCpuFlag(kCpuHasRVV)) {
+    ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV;
   }
 #endif
 
@@ -296,19 +306,19 @@ static void ScaleARGBDownEven(int src_width,
 }
 
 // Scale ARGB down with bilinear interpolation.
-static void ScaleARGBBilinearDown(int src_width,
-                                  int src_height,
-                                  int dst_width,
-                                  int dst_height,
-                                  int src_stride,
-                                  int dst_stride,
-                                  const uint8_t* src_argb,
-                                  uint8_t* dst_argb,
-                                  int x,
-                                  int dx,
-                                  int y,
-                                  int dy,
-                                  enum FilterMode filtering) {
+static int ScaleARGBBilinearDown(int src_width,
+                                 int src_height,
+                                 int dst_width,
+                                 int dst_height,
+                                 int src_stride,
+                                 int dst_stride,
+                                 const uint8_t* src_argb,
+                                 uint8_t* dst_argb,
+                                 int x,
+                                 int dx,
+                                 int y,
+                                 int dy,
+                                 enum FilterMode filtering) {
   int j;
   void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
                          ptrdiff_t src_stride, int dst_width,
@@ -407,6 +417,8 @@ static void ScaleARGBBilinearDown(int src_width,
   // Allocate a row of ARGB.
   {
     align_buffer_64(row, clip_src_width * 4);
+    if (!row)
+      return 1;
 
     const int max_y = (src_height - 1) << 16;
     if (y > max_y) {
@@ -430,22 +442,23 @@ static void ScaleARGBBilinearDown(int src_width,
     }
     free_aligned_buffer_64(row);
   }
+  return 0;
 }
 
 // Scale ARGB up with bilinear interpolation.
-static void ScaleARGBBilinearUp(int src_width,
-                                int src_height,
-                                int dst_width,
-                                int dst_height,
-                                int src_stride,
-                                int dst_stride,
-                                const uint8_t* src_argb,
-                                uint8_t* dst_argb,
-                                int x,
-                                int dx,
-                                int y,
-                                int dy,
-                                enum FilterMode filtering) {
+static int ScaleARGBBilinearUp(int src_width,
+                               int src_height,
+                               int dst_width,
+                               int dst_height,
+                               int src_stride,
+                               int dst_stride,
+                               const uint8_t* src_argb,
+                               uint8_t* dst_argb,
+                               int x,
+                               int dx,
+                               int y,
+                               int dy,
+                               enum FilterMode filtering) {
   int j;
   void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
                          ptrdiff_t src_stride, int dst_width,
@@ -581,6 +594,8 @@ static void ScaleARGBBilinearUp(int src_width,
     // Allocate 2 rows of ARGB.
     const int row_size = (dst_width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 
     uint8_t* rowptr = row;
     int rowstride = row_size;
@@ -624,27 +639,28 @@ static void ScaleARGBBilinearUp(int src_width,
     }
     free_aligned_buffer_64(row);
   }
+  return 0;
 }
 
 #ifdef YUVSCALEUP
 // Scale YUV to ARGB up with bilinear interpolation.
-static void ScaleYUVToARGBBilinearUp(int src_width,
-                                     int src_height,
-                                     int dst_width,
-                                     int dst_height,
-                                     int src_stride_y,
-                                     int src_stride_u,
-                                     int src_stride_v,
-                                     int dst_stride_argb,
-                                     const uint8_t* src_y,
-                                     const uint8_t* src_u,
-                                     const uint8_t* src_v,
-                                     uint8_t* dst_argb,
-                                     int x,
-                                     int dx,
-                                     int y,
-                                     int dy,
-                                     enum FilterMode filtering) {
+static int ScaleYUVToARGBBilinearUp(int src_width,
+                                    int src_height,
+                                    int dst_width,
+                                    int dst_height,
+                                    int src_stride_y,
+                                    int src_stride_u,
+                                    int src_stride_v,
+                                    int dst_stride_argb,
+                                    const uint8_t* src_y,
+                                    const uint8_t* src_u,
+                                    const uint8_t* src_v,
+                                    uint8_t* dst_argb,
+                                    int x,
+                                    int dx,
+                                    int y,
+                                    int dy,
+                                    enum FilterMode filtering) {
   int j;
   void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
                         const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
@@ -846,16 +862,17 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
   const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
   const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
 
-  // Allocate 2 rows of ARGB.
+  // Allocate 1 row of ARGB for source conversion and 2 rows of ARGB
+  // scaled horizontally to the destination width.
   const int row_size = (dst_width * 4 + 31) & ~31;
-  align_buffer_64(row, row_size * 2);
-
-  // Allocate 1 row of ARGB for source conversion.
-  align_buffer_64(argb_row, src_width * 4);
+  align_buffer_64(row, row_size * 2 + src_width * 4);
 
+  uint8_t* argb_row = row + row_size * 2;
   uint8_t* rowptr = row;
   int rowstride = row_size;
   int lasty = yi;
+  if (!row)
+    return 1;
 
   // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
   ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
@@ -910,7 +927,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
     y += dy;
   }
   free_aligned_buffer_64(row);
-  free_aligned_buffer_64(row_argb);
+  return 0;
 }
 #endif
 
@@ -985,19 +1002,19 @@ static void ScaleARGBSimple(int src_width,
 // ScaleARGB a ARGB.
 // This function in turn calls a scaling function
 // suitable for handling the desired resolutions.
-static void ScaleARGB(const uint8_t* src,
-                      int src_stride,
-                      int src_width,
-                      int src_height,
-                      uint8_t* dst,
-                      int dst_stride,
-                      int dst_width,
-                      int dst_height,
-                      int clip_x,
-                      int clip_y,
-                      int clip_width,
-                      int clip_height,
-                      enum FilterMode filtering) {
+static int ScaleARGB(const uint8_t* src,
+                     int src_stride,
+                     int src_width,
+                     int src_height,
+                     uint8_t* dst,
+                     int dst_stride,
+                     int dst_width,
+                     int dst_height,
+                     int clip_x,
+                     int clip_y,
+                     int clip_width,
+                     int clip_height,
+                     enum FilterMode filtering) {
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
   int y = 0;
@@ -1042,18 +1059,18 @@ static void ScaleARGB(const uint8_t* src,
           ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
                          src_stride, dst_stride, src, dst, x, dx, y, dy,
                          filtering);
-          return;
+          return 0;
         }
         if (dx == 0x40000 && filtering == kFilterBox) {
           // Optimized 1/4 box downsample.
-          ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
-                            src_stride, dst_stride, src, dst, x, dx, y, dy);
-          return;
+          return ScaleARGBDown4Box(src_width, src_height, clip_width,
+                                   clip_height, src_stride, dst_stride, src,
+                                   dst, x, dx, y, dy);
         }
         ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
                           src_stride, dst_stride, src, dst, x, dx, y, dy,
                           filtering);
-        return;
+        return 0;
       }
       // Optimized odd scale down. ie 3, 5, 7, 9x.
       if ((dx & 0x10000) && (dy & 0x10000)) {
@@ -1062,7 +1079,7 @@ static void ScaleARGB(const uint8_t* src,
           // Straight copy.
           ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4,
                    src_stride, dst, dst_stride, clip_width, clip_height);
-          return;
+          return 0;
         }
       }
     }
@@ -1071,22 +1088,21 @@ static void ScaleARGB(const uint8_t* src,
     // Arbitrary scale vertically, but unscaled horizontally.
     ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
                        dst_stride, src, dst, x, y, dy, /*bpp=*/4, filtering);
-    return;
+    return 0;
   }
   if (filtering && dy < 65536) {
-    ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
-                        src_stride, dst_stride, src, dst, x, dx, y, dy,
-                        filtering);
-    return;
+    return ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
+                               src_stride, dst_stride, src, dst, x, dx, y, dy,
+                               filtering);
   }
   if (filtering) {
-    ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
-                          src_stride, dst_stride, src, dst, x, dx, y, dy,
-                          filtering);
-    return;
+    return ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
+                                 src_stride, dst_stride, src, dst, x, dx, y, dy,
+                                 filtering);
   }
   ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
                   dst_stride, src, dst, x, dx, y, dy);
+  return 0;
 }
 
 LIBYUV_API
@@ -1110,10 +1126,9 @@ int ARGBScaleClip(const uint8_t* src_argb,
       (clip_y + clip_height) > dst_height) {
     return -1;
   }
-  ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
-            dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
-            clip_height, filtering);
-  return 0;
+  return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
+                   dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
+                   clip_width, clip_height, filtering);
 }
 
 // Scale an ARGB image.
@@ -1131,10 +1146,9 @@ int ARGBScale(const uint8_t* src_argb,
       src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
     return -1;
   }
-  ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
-            dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
-            filtering);
-  return 0;
+  return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
+                   dst_stride_argb, dst_width, dst_height, 0, 0, dst_width,
+                   dst_height, filtering);
 }
 
 // Scale with YUV conversion to ARGB and clipping.
@@ -1158,8 +1172,11 @@ int YUVToARGBScaleClip(const uint8_t* src_y,
                        int clip_width,
                        int clip_height,
                        enum FilterMode filtering) {
-  uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
   int r;
+  uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
+  if (!argb_buffer) {
+    return 1;  // Out of memory runtime error.
+  }
   (void)src_fourcc;  // TODO(fbarchard): implement and/or assert.
   (void)dst_fourcc;
   I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
diff --git a/source/scale_rvv.cc b/source/scale_rvv.cc
index fd14842d..de037e45 100644
--- a/source/scale_rvv.cc
+++ b/source/scale_rvv.cc
@@ -130,6 +130,7 @@ void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
 }
 #endif
 
+#ifdef HAS_SCALEARGBROWDOWNEVEN_RVV
 void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb,
                               ptrdiff_t src_stride,
                               int src_stepx,
@@ -148,6 +149,7 @@ void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb,
     dst += vl;
   } while (w > 0);
 }
+#endif
 
 #ifdef HAS_SCALEARGBROWDOWNEVENBOX_RVV
 void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
diff --git a/source/scale_uv.cc b/source/scale_uv.cc
index 536b9436..0931c89a 100644
--- a/source/scale_uv.cc
+++ b/source/scale_uv.cc
@@ -188,22 +188,24 @@ static void ScaleUVDown2(int src_width,
 // This is an optimized version for scaling down a UV to 1/4 of
 // its original size.
 #if HAS_SCALEUVDOWN4BOX
-static void ScaleUVDown4Box(int src_width,
-                            int src_height,
-                            int dst_width,
-                            int dst_height,
-                            int src_stride,
-                            int dst_stride,
-                            const uint8_t* src_uv,
-                            uint8_t* dst_uv,
-                            int x,
-                            int dx,
-                            int y,
-                            int dy) {
+static int ScaleUVDown4Box(int src_width,
+                           int src_height,
+                           int dst_width,
+                           int dst_height,
+                           int src_stride,
+                           int dst_stride,
+                           const uint8_t* src_uv,
+                           uint8_t* dst_uv,
+                           int x,
+                           int dx,
+                           int y,
+                           int dy) {
   int j;
   // Allocate 2 rows of UV.
   const int row_size = (dst_width * 2 * 2 + 15) & ~15;
   align_buffer_64(row, row_size * 2);
+  if (!row)
+    return 1;
   int row_stride = src_stride * (dy >> 16);
   void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
                           uint8_t* dst_uv, int dst_width) =
@@ -255,6 +257,7 @@ static void ScaleUVDown4Box(int src_width,
     dst_uv += dst_stride;
   }
   free_aligned_buffer_64(row);
+  return 0;
 }
 #endif  // HAS_SCALEUVDOWN4BOX
 
@@ -344,19 +347,19 @@ static void ScaleUVDownEven(int src_width,
 
 // Scale UV down with bilinear interpolation.
 #if HAS_SCALEUVBILINEARDOWN
-static void ScaleUVBilinearDown(int src_width,
-                                int src_height,
-                                int dst_width,
-                                int dst_height,
-                                int src_stride,
-                                int dst_stride,
-                                const uint8_t* src_uv,
-                                uint8_t* dst_uv,
-                                int x,
-                                int dx,
-                                int y,
-                                int dy,
-                                enum FilterMode filtering) {
+static int ScaleUVBilinearDown(int src_width,
+                               int src_height,
+                               int dst_width,
+                               int dst_height,
+                               int src_stride,
+                               int dst_stride,
+                               const uint8_t* src_uv,
+                               uint8_t* dst_uv,
+                               int x,
+                               int dx,
+                               int y,
+                               int dy,
+                               enum FilterMode filtering) {
   int j;
   void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
                          ptrdiff_t src_stride, int dst_width,
@@ -446,9 +449,10 @@ static void ScaleUVBilinearDown(int src_width,
   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
   // Allocate a row of UV.
   {
-    align_buffer_64(row, clip_src_width * 2);
-
     const int max_y = (src_height - 1) << 16;
+    align_buffer_64(row, clip_src_width * 2);
+    if (!row)
+      return 1;
     if (y > max_y) {
       y = max_y;
     }
@@ -470,24 +474,25 @@ static void ScaleUVBilinearDown(int src_width,
     }
     free_aligned_buffer_64(row);
   }
+  return 0;
 }
 #endif
 
 // Scale UV up with bilinear interpolation.
 #if HAS_SCALEUVBILINEARUP
-static void ScaleUVBilinearUp(int src_width,
-                              int src_height,
-                              int dst_width,
-                              int dst_height,
-                              int src_stride,
-                              int dst_stride,
-                              const uint8_t* src_uv,
-                              uint8_t* dst_uv,
-                              int x,
-                              int dx,
-                              int y,
-                              int dy,
-                              enum FilterMode filtering) {
+static int ScaleUVBilinearUp(int src_width,
+                             int src_height,
+                             int dst_width,
+                             int dst_height,
+                             int src_stride,
+                             int dst_stride,
+                             const uint8_t* src_uv,
+                             uint8_t* dst_uv,
+                             int x,
+                             int dx,
+                             int y,
+                             int dy,
+                             enum FilterMode filtering) {
   int j;
   void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
                          ptrdiff_t src_stride, int dst_width,
@@ -606,6 +611,8 @@ static void ScaleUVBilinearUp(int src_width,
     // Allocate 2 rows of UV.
     const int row_size = (dst_width * 2 + 15) & ~15;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 
     uint8_t* rowptr = row;
     int rowstride = row_size;
@@ -649,6 +656,7 @@ static void ScaleUVBilinearUp(int src_width,
     }
     free_aligned_buffer_64(row);
   }
+  return 0;
 }
 #endif  // HAS_SCALEUVBILINEARUP
 
@@ -984,19 +992,19 @@ static int UVCopy_16(const uint16_t* src_uv,
 // Scale a UV plane (from NV12)
 // This function in turn calls a scaling function
 // suitable for handling the desired resolutions.
-static void ScaleUV(const uint8_t* src,
-                    int src_stride,
-                    int src_width,
-                    int src_height,
-                    uint8_t* dst,
-                    int dst_stride,
-                    int dst_width,
-                    int dst_height,
-                    int clip_x,
-                    int clip_y,
-                    int clip_width,
-                    int clip_height,
-                    enum FilterMode filtering) {
+static int ScaleUV(const uint8_t* src,
+                   int src_stride,
+                   int src_width,
+                   int src_height,
+                   uint8_t* dst,
+                   int dst_stride,
+                   int dst_width,
+                   int dst_height,
+                   int clip_x,
+                   int clip_y,
+                   int clip_width,
+                   int clip_height,
+                   enum FilterMode filtering) {
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
   int y = 0;
@@ -1042,22 +1050,22 @@ static void ScaleUV(const uint8_t* src,
           ScaleUVDown2(src_width, src_height, clip_width, clip_height,
                        src_stride, dst_stride, src, dst, x, dx, y, dy,
                        filtering);
-          return;
+          return 0;
         }
 #endif
 #if HAS_SCALEUVDOWN4BOX
         if (dx == 0x40000 && filtering == kFilterBox) {
           // Optimized 1/4 box downsample.
-          ScaleUVDown4Box(src_width, src_height, clip_width, clip_height,
-                          src_stride, dst_stride, src, dst, x, dx, y, dy);
-          return;
+          return ScaleUVDown4Box(src_width, src_height, clip_width, clip_height,
+                                 src_stride, dst_stride, src, dst, x, dx, y,
+                                 dy);
         }
 #endif
 #if HAS_SCALEUVDOWNEVEN
         ScaleUVDownEven(src_width, src_height, clip_width, clip_height,
                         src_stride, dst_stride, src, dst, x, dx, y, dy,
                         filtering);
-        return;
+        return 0;
 #endif
       }
       // Optimized odd scale down. ie 3, 5, 7, 9x.
@@ -1068,7 +1076,7 @@ static void ScaleUV(const uint8_t* src,
           // Straight copy.
           UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2,
                  src_stride, dst, dst_stride, clip_width, clip_height);
-          return;
+          return 0;
         }
 #endif
       }
@@ -1079,38 +1087,37 @@ static void ScaleUV(const uint8_t* src,
     // Arbitrary scale vertically, but unscaled horizontally.
     ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
                        dst_stride, src, dst, x, y, dy, /*bpp=*/2, filtering);
-    return;
+    return 0;
   }
   if ((filtering == kFilterLinear) && ((dst_width + 1) / 2 == src_width)) {
     ScaleUVLinearUp2(src_width, src_height, clip_width, clip_height, src_stride,
                      dst_stride, src, dst);
-    return;
+    return 0;
   }
   if ((clip_height + 1) / 2 == src_height &&
       (clip_width + 1) / 2 == src_width &&
       (filtering == kFilterBilinear || filtering == kFilterBox)) {
     ScaleUVBilinearUp2(src_width, src_height, clip_width, clip_height,
                        src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
 #if HAS_SCALEUVBILINEARUP
   if (filtering && dy < 65536) {
-    ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height,
-                      src_stride, dst_stride, src, dst, x, dx, y, dy,
-                      filtering);
-    return;
+    return ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height,
+                             src_stride, dst_stride, src, dst, x, dx, y, dy,
+                             filtering);
   }
 #endif
 #if HAS_SCALEUVBILINEARDOWN
   if (filtering) {
-    ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height,
-                        src_stride, dst_stride, src, dst, x, dx, y, dy,
-                        filtering);
-    return;
+    return ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height,
+                               src_stride, dst_stride, src, dst, x, dx, y, dy,
+                               filtering);
   }
 #endif
   ScaleUVSimple(src_width, src_height, clip_width, clip_height, src_stride,
                 dst_stride, src, dst, x, dx, y, dy);
+  return 0;
 }
 
 // Scale an UV image.
@@ -1128,9 +1135,9 @@ int UVScale(const uint8_t* src_uv,
       src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) {
     return -1;
   }
-  ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, dst_stride_uv,
-          dst_width, dst_height, 0, 0, dst_width, dst_height, filtering);
-  return 0;
+  return ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv,
+                 dst_stride_uv, dst_width, dst_height, 0, 0, dst_width,
+                 dst_height, filtering);
 }
 
 // Scale a 16 bit UV image.
diff --git a/unit_test/convert_argb_test.cc b/unit_test/convert_argb_test.cc
new file mode 100644
index 00000000..aeee8a7f
--- /dev/null
+++ b/unit_test/convert_argb_test.cc
@@ -0,0 +1,2700 @@
+/*
+ *  Copyright 2023 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "libyuv/basic_types.h"
+#include "libyuv/compare.h"
+#include "libyuv/convert.h"
+#include "libyuv/convert_argb.h"
+#include "libyuv/convert_from.h"
+#include "libyuv/convert_from_argb.h"
+#include "libyuv/cpu_id.h"
+#ifdef HAVE_JPEG
+#include "libyuv/mjpeg_decoder.h"
+#endif
+#include "../unit_test/unit_test.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
+#include "libyuv/video_common.h"
+
+#ifdef ENABLE_ROW_TESTS
+#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
+#endif
+
+#if defined(__riscv) && !defined(__clang__)
+#define DISABLE_SLOW_TESTS
+#undef ENABLE_FULL_TESTS
+#undef ENABLE_ROW_TESTS
+#define LEAN_TESTS
+#endif
+
+// Some functions fail on big endian. Enable these tests on all cpus except
+// PowerPC, but they are not optimized so disabled by default.
+#if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__)
+#define LITTLE_ENDIAN_ONLY_TEST 1
+#endif
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+// SLOW TESTS are those that are unoptimized C code.
+// FULL TESTS are optimized but test many variations of the same code.
+#define ENABLE_FULL_TESTS
+#endif
+
+namespace libyuv {
+
+// Alias to copy pixels as is
+#define AR30ToAR30 ARGBCopy
+#define ABGRToABGR ARGBCopy
+
+// subsample amount uses a divide.
+#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
+
+#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
+
+#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
+                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
+                   DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH,   \
+                   TILE_HEIGHT)                                                \
+  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {               \
+    static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported");        \
+    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");        \
+    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                    \
+                  "SRC_SUBSAMP_X unsupported");                                \
+    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                    \
+                  "SRC_SUBSAMP_Y unsupported");                                \
+    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                    \
+                  "DST_SUBSAMP_X unsupported");                                \
+    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                    \
+                  "DST_SUBSAMP_Y unsupported");                                \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = benchmark_height_;                                     \
+    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);                \
+    const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X);                \
+    const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y);              \
+    const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1);  \
+    const int kPaddedHeight =                                                  \
+        (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1);                    \
+    const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X);    \
+    const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y);  \
+    align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF);  \
+    align_buffer_page_end(                                                     \
+        src_uv, kSrcHalfPaddedWidth* kSrcHalfPaddedHeight* SRC_BPC * 2 + OFF); \
+    align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC);                  \
+    align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC);    \
+    align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC);    \
+    align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC);                \
+    align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC);  \
+    align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC);  \
+    SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF);                    \
+    SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF);                  \
+    for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) {                   \
+      src_y_p[i] =                                                             \
+          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));       \
+    }                                                                          \
+    for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
+      src_uv_p[i] =                                                            \
+          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));       \
+    }                                                                          \
+    memset(dst_y_c, 1, kWidth* kHeight* DST_BPC);                              \
+    memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC);                \
+    memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC);                \
+    memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC);                          \
+    memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC);            \
+    memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC);            \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    SRC_FMT_PLANAR##To##FMT_PLANAR(                                            \
+        src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2,                          \
+        reinterpret_cast<DST_T*>(dst_y_c), kWidth,                             \
+        reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth,                      \
+        reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth,              \
+        NEG kHeight);                                                          \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      SRC_FMT_PLANAR##To##FMT_PLANAR(                                          \
+          src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2,                        \
+          reinterpret_cast<DST_T*>(dst_y_opt), kWidth,                         \
+          reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth,                  \
+          reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth,          \
+          NEG kHeight);                                                        \
+    }                                                                          \
+    for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) {                     \
+      EXPECT_EQ(dst_y_c[i], dst_y_opt[i]);                                     \
+    }                                                                          \
+    for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) {       \
+      EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);                                     \
+      EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);                                     \
+    }                                                                          \
+    free_aligned_buffer_page_end(dst_y_c);                                     \
+    free_aligned_buffer_page_end(dst_u_c);                                     \
+    free_aligned_buffer_page_end(dst_v_c);                                     \
+    free_aligned_buffer_page_end(dst_y_opt);                                   \
+    free_aligned_buffer_page_end(dst_u_opt);                                   \
+    free_aligned_buffer_page_end(dst_v_opt);                                   \
+    free_aligned_buffer_page_end(src_y);                                       \
+    free_aligned_buffer_page_end(src_uv);                                      \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                  DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH,       \
+             TILE_HEIGHT)                                                   \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH,     \
+             TILE_HEIGHT)                                                   \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH,        \
+             TILE_HEIGHT)                                                   \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
+#else
+#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                  DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
+#endif
+
+TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1)
+TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)
+
+// Provide matrix wrappers for full range bt.709
+#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \
+  I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j)
+#define F420ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j)
+#define F422ToABGR(a, b, c, d, e, f, g, h, i, j) \
+  I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j)
+#define F422ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j)
+#define F444ToABGR(a, b, c, d, e, f, g, h, i, j) \
+  I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j)
+#define F444ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j)
+
+// Provide matrix wrappers for full range bt.2020
+#define V420ToABGR(a, b, c, d, e, f, g, h, i, j) \
+  I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j)
+#define V420ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j)
+#define V422ToABGR(a, b, c, d, e, f, g, h, i, j) \
+  I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j)
+#define V422ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j)
+#define V444ToABGR(a, b, c, d, e, f, g, h, i, j) \
+  I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j)
+#define V444ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j)
+
+#define I420ToARGBFilter(a, b, c, d, e, f, g, h, i, j)                     \
+  I420ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+                         kFilterBilinear)
+#define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j)                     \
+  I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+                         kFilterBilinear)
+#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j)                     \
+  I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+                          kFilterBilinear)
+#define I422ToRGB24Filter(a, b, c, d, e, f, g, h, i, j)                     \
+  I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+                          kFilterBilinear)
+
+#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                       YALIGN, W1280, N, NEG, OFF)                            \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                       \
+    const int kWidth = W1280;                                                 \
+    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                  \
+    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                     \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                       \
+    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);            \
+    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                      \
+    align_buffer_page_end(src_u, kSizeUV + OFF);                              \
+    align_buffer_page_end(src_v, kSizeUV + OFF);                              \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF);               \
+    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF);             \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
+      src_y[i + OFF] = (fastrand() & 0xff);                                   \
+    }                                                                         \
+    for (int i = 0; i < kSizeUV; ++i) {                                       \
+      src_u[i + OFF] = (fastrand() & 0xff);                                   \
+      src_v[i + OFF] = (fastrand() & 0xff);                                   \
+    }                                                                         \
+    memset(dst_argb_c + OFF, 1, kStrideB * kHeight);                          \
+    memset(dst_argb_opt + OFF, 101, kStrideB * kHeight);                      \
+    MaskCpuFlags(disable_cpu_flags_);                                         \
+    double time0 = get_time();                                                \
+    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,        \
+                          src_v + OFF, kStrideUV, dst_argb_c + OFF, kStrideB, \
+                          kWidth, NEG kHeight);                               \
+    double time1 = get_time();                                                \
+    MaskCpuFlags(benchmark_cpu_info_);                                        \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
+      FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,      \
+                            src_v + OFF, kStrideUV, dst_argb_opt + OFF,       \
+                            kStrideB, kWidth, NEG kHeight);                   \
+    }                                                                         \
+    double time2 = get_time();                                                \
+    printf(" %8d us C - %8d us OPT\n",                                        \
+           static_cast<int>((time1 - time0) * 1e6),                           \
+           static_cast<int>((time2 - time1) * 1e6 / benchmark_iterations_));  \
+    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                      \
+      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]);                  \
+    }                                                                         \
+    free_aligned_buffer_page_end(src_y);                                      \
+    free_aligned_buffer_page_end(src_u);                                      \
+    free_aligned_buffer_page_end(src_v);                                      \
+    free_aligned_buffer_page_end(dst_argb_c);                                 \
+    free_aligned_buffer_page_end(dst_argb_opt);                               \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                      YALIGN)                                                \
+  TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                 YALIGN, benchmark_width_ + 1, _Any, +, 0)                   \
+  TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                 YALIGN, benchmark_width_, _Unaligned, +, 4)                 \
+  TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                 YALIGN, benchmark_width_, _Invert, -, 0)                    \
+  TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                 YALIGN, benchmark_width_, _Opt, +, 0)
+#else
+#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                      YALIGN)                                                \
+  TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                 YALIGN, benchmark_width_, _Opt, +, 0)
+#endif
+
+#if defined(ENABLE_FULL_TESTS)
+TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
+TESTPLANARTOB(J420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(J420, 2, 2, ABGR, 4, 4, 1)
+TESTPLANARTOB(F420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(F420, 2, 2, ABGR, 4, 4, 1)
+TESTPLANARTOB(H420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(H420, 2, 2, ABGR, 4, 4, 1)
+TESTPLANARTOB(U420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(U420, 2, 2, ABGR, 4, 4, 1)
+TESTPLANARTOB(V420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(V420, 2, 2, ABGR, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1)
+TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1)
+TESTPLANARTOB(J420, 2, 2, RAW, 3, 3, 1)
+TESTPLANARTOB(J420, 2, 2, RGB24, 3, 3, 1)
+TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1)
+TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1)
+TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1)
+TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1)
+TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1)
+TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1)
+TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
+#endif
+TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(J422, 2, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(J422, 2, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(U422, 2, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(U422, 2, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
+TESTPLANARTOB(I422, 1, 1, RGB24, 3, 3, 1)
+TESTPLANARTOB(I422, 1, 1, RAW, 3, 3, 1)
+TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(I444, 1, 1, RGB24, 3, 3, 1)
+TESTPLANARTOB(I444, 1, 1, RAW, 3, 3, 1)
+TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(H444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(U444, 1, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(U444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(V444, 1, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(V444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1)
+TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1)
+TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
+TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
+TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
+TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1)
+TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, AB30, 4, 4, 1)
+TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1)
+#endif
+TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
+TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1)
+#else  // FULL_TESTS
+TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1)
+TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1)
+TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1)
+TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1)
+TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1)
+TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
+#endif
+TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
+TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1)
+TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1)
+TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
+TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
+TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
+TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
+#endif
+
+#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+                   W1280, N, NEG, OFF)                                         \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = benchmark_height_;                                     \
+    const int kStrideB = kWidth * BPP_B;                                       \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
+    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                       \
+    align_buffer_page_end(src_uv,                                              \
+                          kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeight);                      \
+    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight);                    \
+    for (int i = 0; i < kHeight; ++i)                                          \
+      for (int j = 0; j < kWidth; ++j)                                         \
+        src_y[i * kWidth + j + OFF] = (fastrand() & 0xff);                     \
+    for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                  \
+      for (int j = 0; j < kStrideUV * 2; ++j) {                                \
+        src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff);             \
+      }                                                                        \
+    }                                                                          \
+    memset(dst_argb_c, 1, kStrideB* kHeight);                                  \
+    memset(dst_argb_opt, 101, kStrideB* kHeight);                              \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2,    \
+                          dst_argb_c, kWidth * BPP_B, kWidth, NEG kHeight);    \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2,  \
+                            dst_argb_opt, kWidth * BPP_B, kWidth,              \
+                            NEG kHeight);                                      \
+    }                                                                          \
+    /* Convert to ARGB so 565 is expanded to bytes that can be compared. */    \
+    align_buffer_page_end(dst_argb32_c, kWidth * 4 * kHeight);                 \
+    align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight);               \
+    memset(dst_argb32_c, 2, kWidth * 4 * kHeight);                             \
+    memset(dst_argb32_opt, 102, kWidth * 4 * kHeight);                         \
+    FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth,      \
+                  kHeight);                                                    \
+    FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth,  \
+                  kHeight);                                                    \
+    for (int i = 0; i < kHeight; ++i) {                                        \
+      for (int j = 0; j < kWidth * 4; ++j) {                                   \
+        EXPECT_EQ(dst_argb32_c[i * kWidth * 4 + j],                            \
+                  dst_argb32_opt[i * kWidth * 4 + j]);                         \
+      }                                                                        \
+    }                                                                          \
+    free_aligned_buffer_page_end(src_y);                                       \
+    free_aligned_buffer_page_end(src_uv);                                      \
+    free_aligned_buffer_page_end(dst_argb_c);                                  \
+    free_aligned_buffer_page_end(dst_argb_opt);                                \
+    free_aligned_buffer_page_end(dst_argb32_c);                                \
+    free_aligned_buffer_page_end(dst_argb32_opt);                              \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_ + 1, _Any, +, 0)                           \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_, _Unaligned, +, 2)                         \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_, _Invert, -, 0)                            \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_, _Opt, +, 0)
+#else
+#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_, _Opt, +, 0)
+#endif
+
+#define JNV12ToARGB(a, b, c, d, e, f, g, h) \
+  NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
+#define JNV21ToARGB(a, b, c, d, e, f, g, h) \
+  NV21ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
+#define JNV12ToABGR(a, b, c, d, e, f, g, h) \
+  NV21ToARGBMatrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h)
+#define JNV21ToABGR(a, b, c, d, e, f, g, h) \
+  NV12ToARGBMatrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h)
+#define JNV12ToRGB24(a, b, c, d, e, f, g, h) \
+  NV12ToRGB24Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
+#define JNV21ToRGB24(a, b, c, d, e, f, g, h) \
+  NV21ToRGB24Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
+#define JNV12ToRAW(a, b, c, d, e, f, g, h) \
+  NV21ToRGB24Matrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h)
+#define JNV21ToRAW(a, b, c, d, e, f, g, h) \
+  NV12ToRGB24Matrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h)
+#define JNV12ToRGB565(a, b, c, d, e, f, g, h) \
+  NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
+
+TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3)
+TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2)
+#endif
+
+TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(NV12, 2, 2, RAW, RAW, 3)
+TESTBPTOB(NV21, 2, 2, RAW, RAW, 3)
+TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
+#endif
+
+#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,     \
+                  EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF)               \
+  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) {                             \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = benchmark_height_;                                     \
+    const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;       \
+    const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;       \
+    const int kStrideA =                                                       \
+        (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;                 \
+    const int kStrideB =                                                       \
+        (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;                 \
+    align_buffer_page_end(src_argb,                                            \
+                          kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF);       \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
+    align_buffer_page_end(dst_argb_opt,                                        \
+                          kStrideB* kHeightB*(int)sizeof(TYPE_B));             \
+    for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) {      \
+      src_argb[i + OFF] = (fastrand() & 0xff);                                 \
+    }                                                                          \
+    memset(dst_argb_c, 1, kStrideB* kHeightB);                                 \
+    memset(dst_argb_opt, 101, kStrideB* kHeightB);                             \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_B*)dst_argb_c, \
+                     kStrideB, kWidth, NEG kHeight);                           \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA,                    \
+                       (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight);  \
+    }                                                                          \
+    for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) {      \
+      EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                               \
+    }                                                                          \
+    free_aligned_buffer_page_end(src_argb);                                    \
+    free_aligned_buffer_page_end(dst_argb_c);                                  \
+    free_aligned_buffer_page_end(dst_argb_opt);                                \
+  }
+
+#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B,        \
+                       TYPE_B, EPP_B, STRIDE_B, HEIGHT_B)                      \
+  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) {                       \
+    for (int times = 0; times < benchmark_iterations_; ++times) {              \
+      const int kWidth = (fastrand() & 63) + 1;                                \
+      const int kHeight = (fastrand() & 31) + 1;                               \
+      const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;     \
+      const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;     \
+      const int kStrideA =                                                     \
+          (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;               \
+      const int kStrideB =                                                     \
+          (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;               \
+      align_buffer_page_end(src_argb, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
+      align_buffer_page_end(dst_argb_c,                                        \
+                            kStrideB* kHeightB*(int)sizeof(TYPE_B));           \
+      align_buffer_page_end(dst_argb_opt,                                      \
+                            kStrideB* kHeightB*(int)sizeof(TYPE_B));           \
+      for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) {    \
+        src_argb[i] = 0xfe;                                                    \
+      }                                                                        \
+      memset(dst_argb_c, 123, kStrideB* kHeightB);                             \
+      memset(dst_argb_opt, 123, kStrideB* kHeightB);                           \
+      MaskCpuFlags(disable_cpu_flags_);                                        \
+      FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c,       \
+                       kStrideB, kWidth, kHeight);                             \
+      MaskCpuFlags(benchmark_cpu_info_);                                       \
+      FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt,     \
+                       kStrideB, kWidth, kHeight);                             \
+      for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) {    \
+        EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                             \
+      }                                                                        \
+      free_aligned_buffer_page_end(src_argb);                                  \
+      free_aligned_buffer_page_end(dst_argb_c);                                \
+      free_aligned_buffer_page_end(dst_argb_opt);                              \
+    }                                                                          \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,   \
+                 EPP_B, STRIDE_B, HEIGHT_B)                                 \
+  TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+            STRIDE_B, HEIGHT_B, benchmark_width_ + 1, _Any, +, 0)           \
+  TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+            STRIDE_B, HEIGHT_B, benchmark_width_, _Unaligned, +, 4)         \
+  TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+            STRIDE_B, HEIGHT_B, benchmark_width_, _Invert, -, 0)            \
+  TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+            STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0)               \
+  TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,   \
+                 EPP_B, STRIDE_B, HEIGHT_B)
+#else
+#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,   \
+                 EPP_B, STRIDE_B, HEIGHT_B)                                 \
+  TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+            STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0)
+#endif
+
+TESTATOB(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOB(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOB(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+#endif
+TESTATOB(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOB(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1)
+#endif
+TESTATOB(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOB(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+TESTATOB(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#endif
+TESTATOB(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOB(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+#endif
+TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOB(ABGR, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
+TESTATOB(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
+TESTATOB(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
+#endif
+TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1)
+TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1)
+TESTATOB(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1)
+TESTATOB(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1)
+TESTATOB(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1)
+TESTATOB(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1)
+TESTATOB(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
+TESTATOB(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1)
+TESTATOB(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOB(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+#endif
+TESTATOB(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOB(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+TESTATOB(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOB(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+
+// in place test
+#define TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,    \
+                  EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF)              \
+  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) {                            \
+    const int kWidth = W1280;                                                 \
+    const int kHeight = benchmark_height_;                                    \
+    const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;      \
+    const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;      \
+    const int kStrideA =                                                      \
+        (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;                \
+    const int kStrideB =                                                      \
+        (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;                \
+    align_buffer_page_end(src_argb,                                           \
+                          kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF);      \
+    align_buffer_page_end(dst_argb_c,                                         \
+                          kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF);      \
+    align_buffer_page_end(dst_argb_opt,                                       \
+                          kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF);      \
+    for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) {     \
+      src_argb[i + OFF] = (fastrand() & 0xff);                                \
+    }                                                                         \
+    memcpy(dst_argb_c + OFF, src_argb,                                        \
+           kStrideA * kHeightA * (int)sizeof(TYPE_A));                        \
+    memcpy(dst_argb_opt + OFF, src_argb,                                      \
+           kStrideA * kHeightA * (int)sizeof(TYPE_A));                        \
+    MaskCpuFlags(disable_cpu_flags_);                                         \
+    FMT_A##To##FMT_B((TYPE_A*)(dst_argb_c /* src */ + OFF), kStrideA,         \
+                     (TYPE_B*)dst_argb_c, kStrideB, kWidth, NEG kHeight);     \
+    MaskCpuFlags(benchmark_cpu_info_);                                        \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
+      FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA,     \
+                       (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
+    }                                                                         \
+    memcpy(dst_argb_opt + OFF, src_argb,                                      \
+           kStrideA * kHeightA * (int)sizeof(TYPE_A));                        \
+    FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA,       \
+                     (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight);   \
+    for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) {     \
+      EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                              \
+    }                                                                         \
+    free_aligned_buffer_page_end(src_argb);                                   \
+    free_aligned_buffer_page_end(dst_argb_c);                                 \
+    free_aligned_buffer_page_end(dst_argb_opt);                               \
+  }
+
+#define TESTATOA(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,   \
+                 EPP_B, STRIDE_B, HEIGHT_B)                                 \
+  TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+            STRIDE_B, HEIGHT_B, benchmark_width_, _Inplace, +, 0)
+
+TESTATOA(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOA(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+TESTATOA(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1)
+// TODO(fbarchard): Support in place for mirror.
+// TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOA(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
+TESTATOA(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
+TESTATOA(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
+#endif
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1)
+// TODO(fbarchard): Support in place for conversions that increase bpp.
+// TESTATOA(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1)
+// TESTATOA(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1)
+// TESTATOA(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1)
+// TESTATOA(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1)
+TESTATOA(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1)
+// TESTATOA(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
+// TESTATOA(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1)
+TESTATOA(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+// TESTATOA(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1)
+// TESTATOA(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+// TESTATOA(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+// TESTATOA(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+// TESTATOA(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOA(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOA(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOA(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+
+#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
+                   HEIGHT_B, W1280, N, NEG, OFF)                             \
+  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) {                   \
+    const int kWidth = W1280;                                                \
+    const int kHeight = benchmark_height_;                                   \
+    const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;     \
+    const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;     \
+    const int kStrideA =                                                     \
+        (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;               \
+    const int kStrideB =                                                     \
+        (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;               \
+    align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF);               \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeightB);                   \
+    align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB);                 \
+    for (int i = 0; i < kStrideA * kHeightA; ++i) {                          \
+      src_argb[i + OFF] = (fastrand() & 0xff);                               \
+    }                                                                        \
+    memset(dst_argb_c, 1, kStrideB* kHeightB);                               \
+    memset(dst_argb_opt, 101, kStrideB* kHeightB);                           \
+    MaskCpuFlags(disable_cpu_flags_);                                        \
+    FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_c, kStrideB, \
+                             NULL, kWidth, NEG kHeight);                     \
+    MaskCpuFlags(benchmark_cpu_info_);                                       \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                        \
+      FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_opt,       \
+                               kStrideB, NULL, kWidth, NEG kHeight);         \
+    }                                                                        \
+    for (int i = 0; i < kStrideB * kHeightB; ++i) {                          \
+      EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                             \
+    }                                                                        \
+    free_aligned_buffer_page_end(src_argb);                                  \
+    free_aligned_buffer_page_end(dst_argb_c);                                \
+    free_aligned_buffer_page_end(dst_argb_opt);                              \
+  }
+
+#define TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B,        \
+                        STRIDE_B, HEIGHT_B)                                    \
+  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither_Random) {                 \
+    for (int times = 0; times < benchmark_iterations_; ++times) {              \
+      const int kWidth = (fastrand() & 63) + 1;                                \
+      const int kHeight = (fastrand() & 31) + 1;                               \
+      const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;     \
+      const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;     \
+      const int kStrideA =                                                     \
+          (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;               \
+      const int kStrideB =                                                     \
+          (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;               \
+      align_buffer_page_end(src_argb, kStrideA* kHeightA);                     \
+      align_buffer_page_end(dst_argb_c, kStrideB* kHeightB);                   \
+      align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB);                 \
+      for (int i = 0; i < kStrideA * kHeightA; ++i) {                          \
+        src_argb[i] = (fastrand() & 0xff);                                     \
+      }                                                                        \
+      memset(dst_argb_c, 123, kStrideB* kHeightB);                             \
+      memset(dst_argb_opt, 123, kStrideB* kHeightB);                           \
+      MaskCpuFlags(disable_cpu_flags_);                                        \
+      FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_c, kStrideB, NULL, \
+                               kWidth, kHeight);                               \
+      MaskCpuFlags(benchmark_cpu_info_);                                       \
+      FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_opt, kStrideB,     \
+                               NULL, kWidth, kHeight);                         \
+      for (int i = 0; i < kStrideB * kHeightB; ++i) {                          \
+        EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                             \
+      }                                                                        \
+      free_aligned_buffer_page_end(src_argb);                                  \
+      free_aligned_buffer_page_end(dst_argb_c);                                \
+      free_aligned_buffer_page_end(dst_argb_opt);                              \
+    }                                                                          \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
+                  HEIGHT_B)                                                 \
+  TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B,      \
+             HEIGHT_B, benchmark_width_ + 1, _Any, +, 0)                    \
+  TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B,      \
+             HEIGHT_B, benchmark_width_, _Unaligned, +, 2)                  \
+  TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B,      \
+             HEIGHT_B, benchmark_width_, _Invert, -, 0)                     \
+  TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B,      \
+             HEIGHT_B, benchmark_width_, _Opt, +, 0)                        \
+  TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
+                  HEIGHT_B)
+#else
+#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
+                  HEIGHT_B)                                                 \
+  TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
+                  HEIGHT_B)
+#endif
+
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
+#endif
+
+// These conversions called twice, produce the original result.
+// e.g. endian swap twice.
+#define TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG,   \
+                 OFF)                                                          \
+  TEST_F(LibYUVConvertTest, FMT_ATOB##_Endswap##N) {                           \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = benchmark_height_;                                     \
+    const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;       \
+    const int kStrideA =                                                       \
+        (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;                 \
+    align_buffer_page_end(src_argb,                                            \
+                          kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF);       \
+    align_buffer_page_end(dst_argb_c, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
+    align_buffer_page_end(dst_argb_opt,                                        \
+                          kStrideA* kHeightA*(int)sizeof(TYPE_A));             \
+    for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) {      \
+      src_argb[i + OFF] = (fastrand() & 0xff);                                 \
+    }                                                                          \
+    memset(dst_argb_c, 1, kStrideA* kHeightA);                                 \
+    memset(dst_argb_opt, 101, kStrideA* kHeightA);                             \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_c,         \
+             kStrideA, kWidth, NEG kHeight);                                   \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_opt,     \
+               kStrideA, kWidth, NEG kHeight);                                 \
+    }                                                                          \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_ATOB((TYPE_A*)dst_argb_c, kStrideA, (TYPE_A*)dst_argb_c, kStrideA,     \
+             kWidth, NEG kHeight);                                             \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    FMT_ATOB((TYPE_A*)dst_argb_opt, kStrideA, (TYPE_A*)dst_argb_opt, kStrideA, \
+             kWidth, NEG kHeight);                                             \
+    for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) {      \
+      EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]);                           \
+      EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                               \
+    }                                                                          \
+    free_aligned_buffer_page_end(src_argb);                                    \
+    free_aligned_buffer_page_end(dst_argb_c);                                  \
+    free_aligned_buffer_page_end(dst_argb_opt);                                \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTEND(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A)                  \
+  TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_ + 1, \
+           _Any, +, 0)                                                        \
+  TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_,     \
+           _Unaligned, +, 2)                                                  \
+  TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_,     \
+           _Opt, +, 0)
+#else
+#define TESTEND(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A)              \
+  TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \
+           _Opt, +, 0)
+#endif
+
+TESTEND(ARGBToBGRA, uint8_t, 4, 4, 1)
+TESTEND(ARGBToABGR, uint8_t, 4, 4, 1)
+TESTEND(BGRAToARGB, uint8_t, 4, 4, 1)
+TESTEND(ABGRToARGB, uint8_t, 4, 4, 1)
+TESTEND(AB64ToAR64, uint16_t, 4, 4, 1)
+
+#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                        YALIGN, W1280, N, NEG, OFF, ATTEN)                     \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
+    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                      \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
+    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);             \
+    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                       \
+    align_buffer_page_end(src_u, kSizeUV + OFF);                               \
+    align_buffer_page_end(src_v, kSizeUV + OFF);                               \
+    align_buffer_page_end(src_a, kWidth* kHeight + OFF);                       \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF);                \
+    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF);              \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
+      src_y[i + OFF] = (fastrand() & 0xff);                                    \
+      src_a[i + OFF] = (fastrand() & 0xff);                                    \
+    }                                                                          \
+    for (int i = 0; i < kSizeUV; ++i) {                                        \
+      src_u[i + OFF] = (fastrand() & 0xff);                                    \
+      src_v[i + OFF] = (fastrand() & 0xff);                                    \
+    }                                                                          \
+    memset(dst_argb_c + OFF, 1, kStrideB * kHeight);                           \
+    memset(dst_argb_opt + OFF, 101, kStrideB * kHeight);                       \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,         \
+                          src_v + OFF, kStrideUV, src_a + OFF, kWidth,         \
+                          dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight,     \
+                          ATTEN);                                              \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,       \
+                            src_v + OFF, kStrideUV, src_a + OFF, kWidth,       \
+                            dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \
+                            ATTEN);                                            \
+    }                                                                          \
+    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                       \
+      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]);                   \
+    }                                                                          \
+    free_aligned_buffer_page_end(src_y);                                       \
+    free_aligned_buffer_page_end(src_u);                                       \
+    free_aligned_buffer_page_end(src_v);                                       \
+    free_aligned_buffer_page_end(src_a);                                       \
+    free_aligned_buffer_page_end(dst_argb_c);                                  \
+    free_aligned_buffer_page_end(dst_argb_opt);                                \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                       YALIGN)                                                \
+  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_ + 1, _Any, +, 0, 0)                \
+  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Unaligned, +, 2, 0)              \
+  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Invert, -, 0, 0)                 \
+  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Opt, +, 0, 0)                    \
+  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Premult, +, 0, 1)
+#else
+#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                       YALIGN)                                                \
+  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Opt, +, 0, 0)
+#endif
+
+#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+
+#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+  I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j,          \
+                              &kYuvI601Constants, k, l, m, kFilterBilinear)
+#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+  I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j,          \
+                              &kYuvI601Constants, k, l, m, kFilterBilinear)
+
+#if defined(ENABLE_FULL_TESTS)
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
+#else
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
+#endif
+
+TEST_F(LibYUVConvertTest, TestYToARGB) {
+  uint8_t y[32];
+  uint8_t expectedg[32];
+  for (int i = 0; i < 32; ++i) {
+    y[i] = i * 5 + 17;
+    expectedg[i] = static_cast<int>((y[i] - 16) * 1.164f + 0.5f);
+  }
+  uint8_t argb[32 * 4];
+  YToARGB(y, 0, argb, 0, 32, 1);
+
+  for (int i = 0; i < 32; ++i) {
+    printf("%2d %d: %d <-> %d,%d,%d,%d\n", i, y[i], expectedg[i],
+           argb[i * 4 + 0], argb[i * 4 + 1], argb[i * 4 + 2], argb[i * 4 + 3]);
+  }
+  for (int i = 0; i < 32; ++i) {
+    EXPECT_EQ(expectedg[i], argb[i * 4 + 0]);
+  }
+}
+
+static const uint8_t kNoDither4x4[16] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+TEST_F(LibYUVConvertTest, TestNoDither) {
+  align_buffer_page_end(src_argb, benchmark_width_ * benchmark_height_ * 4);
+  align_buffer_page_end(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
+  align_buffer_page_end(dst_rgb565dither,
+                        benchmark_width_ * benchmark_height_ * 2);
+  MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4);
+  MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
+  MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2);
+  ARGBToRGB565(src_argb, benchmark_width_ * 4, dst_rgb565, benchmark_width_ * 2,
+               benchmark_width_, benchmark_height_);
+  ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, dst_rgb565dither,
+                     benchmark_width_ * 2, kNoDither4x4, benchmark_width_,
+                     benchmark_height_);
+  for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
+    EXPECT_EQ(dst_rgb565[i], dst_rgb565dither[i]);
+  }
+
+  free_aligned_buffer_page_end(src_argb);
+  free_aligned_buffer_page_end(dst_rgb565);
+  free_aligned_buffer_page_end(dst_rgb565dither);
+}
+
+// Ordered 4x4 dither for 888 to 565.  Values from 0 to 7.
+static const uint8_t kDither565_4x4[16] = {
+    0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2,
+};
+
+TEST_F(LibYUVConvertTest, TestDither) {
+  align_buffer_page_end(src_argb, benchmark_width_ * benchmark_height_ * 4);
+  align_buffer_page_end(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
+  align_buffer_page_end(dst_rgb565dither,
+                        benchmark_width_ * benchmark_height_ * 2);
+  align_buffer_page_end(dst_argb, benchmark_width_ * benchmark_height_ * 4);
+  align_buffer_page_end(dst_argbdither,
+                        benchmark_width_ * benchmark_height_ * 4);
+  MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4);
+  MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
+  MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2);
+  MemRandomize(dst_argb, benchmark_width_ * benchmark_height_ * 4);
+  MemRandomize(dst_argbdither, benchmark_width_ * benchmark_height_ * 4);
+  ARGBToRGB565(src_argb, benchmark_width_ * 4, dst_rgb565, benchmark_width_ * 2,
+               benchmark_width_, benchmark_height_);
+  ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, dst_rgb565dither,
+                     benchmark_width_ * 2, kDither565_4x4, benchmark_width_,
+                     benchmark_height_);
+  RGB565ToARGB(dst_rgb565, benchmark_width_ * 2, dst_argb, benchmark_width_ * 4,
+               benchmark_width_, benchmark_height_);
+  RGB565ToARGB(dst_rgb565dither, benchmark_width_ * 2, dst_argbdither,
+               benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+
+  for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
+    EXPECT_NEAR(dst_argb[i], dst_argbdither[i], 9);
+  }
+  free_aligned_buffer_page_end(src_argb);
+  free_aligned_buffer_page_end(dst_rgb565);
+  free_aligned_buffer_page_end(dst_rgb565dither);
+  free_aligned_buffer_page_end(dst_argb);
+  free_aligned_buffer_page_end(dst_argbdither);
+}
+
+#define TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                        YALIGN, W1280, N, NEG, OFF, FMT_C, BPP_C)              \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) {                \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
+    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                      \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
+    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);             \
+    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                       \
+    align_buffer_page_end(src_u, kSizeUV + OFF);                               \
+    align_buffer_page_end(src_v, kSizeUV + OFF);                               \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF);                \
+    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF);              \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
+      src_y[i + OFF] = (fastrand() & 0xff);                                    \
+    }                                                                          \
+    for (int i = 0; i < kSizeUV; ++i) {                                        \
+      src_u[i + OFF] = (fastrand() & 0xff);                                    \
+      src_v[i + OFF] = (fastrand() & 0xff);                                    \
+    }                                                                          \
+    memset(dst_argb_c + OFF, 1, kStrideB * kHeight);                           \
+    memset(dst_argb_opt + OFF, 101, kStrideB * kHeight);                       \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \
+                                  src_v + OFF, kStrideUV, dst_argb_c + OFF,    \
+                                  kStrideB, NULL, kWidth, NEG kHeight);        \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_PLANAR##To##FMT_B##Dither(                                           \
+          src_y + OFF, kWidth, src_u + OFF, kStrideUV, src_v + OFF, kStrideUV, \
+          dst_argb_opt + OFF, kStrideB, NULL, kWidth, NEG kHeight);            \
+    }                                                                          \
+    /* Convert to ARGB so 565 is expanded to bytes that can be compared. */    \
+    align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight);               \
+    align_buffer_page_end(dst_argb32_opt, kWidth* BPP_C* kHeight);             \
+    memset(dst_argb32_c, 2, kWidth* BPP_C* kHeight);                           \
+    memset(dst_argb32_opt, 102, kWidth* BPP_C* kHeight);                       \
+    FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, dst_argb32_c, kWidth * BPP_C, \
+                     kWidth, kHeight);                                         \
+    FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt,             \
+                     kWidth * BPP_C, kWidth, kHeight);                         \
+    for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) {                       \
+      EXPECT_EQ(dst_argb32_c[i], dst_argb32_opt[i]);                           \
+    }                                                                          \
+    free_aligned_buffer_page_end(src_y);                                       \
+    free_aligned_buffer_page_end(src_u);                                       \
+    free_aligned_buffer_page_end(src_v);                                       \
+    free_aligned_buffer_page_end(dst_argb_c);                                  \
+    free_aligned_buffer_page_end(dst_argb_opt);                                \
+    free_aligned_buffer_page_end(dst_argb32_c);                                \
+    free_aligned_buffer_page_end(dst_argb32_opt);                              \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                       YALIGN, FMT_C, BPP_C)                                  \
+  TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C)     \
+  TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C)   \
+  TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Invert, -, 0, FMT_C, BPP_C)      \
+  TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
+#else
+#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                       YALIGN, FMT_C, BPP_C)                                  \
+  TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
+#endif
+
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
+#endif
+
+// Transitive test.  A to B to C is same as A to C.
+// Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere.
+#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
+                       W1280, N, NEG, OFF, FMT_C, BPP_C)                      \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##To##FMT_C##N) {            \
+    const int kWidth = W1280;                                                 \
+    const int kHeight = benchmark_height_;                                    \
+    const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B;                    \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                       \
+    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);            \
+    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                      \
+    align_buffer_page_end(src_u, kSizeUV + OFF);                              \
+    align_buffer_page_end(src_v, kSizeUV + OFF);                              \
+    align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF);               \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
+      src_y[i + OFF] = (fastrand() & 0xff);                                   \
+    }                                                                         \
+    for (int i = 0; i < kSizeUV; ++i) {                                       \
+      src_u[i + OFF] = (fastrand() & 0xff);                                   \
+      src_v[i + OFF] = (fastrand() & 0xff);                                   \
+    }                                                                         \
+    memset(dst_argb_b + OFF, 1, kStrideB * kHeight);                          \
+    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,        \
+                          src_v + OFF, kStrideUV, dst_argb_b + OFF, kStrideB, \
+                          kWidth, NEG kHeight);                               \
+    /* Convert to a 3rd format in 1 step and 2 steps and compare  */          \
+    const int kStrideC = kWidth * BPP_C;                                      \
+    align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF);               \
+    align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF);              \
+    memset(dst_argb_c + OFF, 2, kStrideC * kHeight);                          \
+    memset(dst_argb_bc + OFF, 3, kStrideC * kHeight);                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
+      FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, src_u + OFF, kStrideUV,      \
+                            src_v + OFF, kStrideUV, dst_argb_c + OFF,         \
+                            kStrideC, kWidth, NEG kHeight);                   \
+      /* Convert B to C */                                                    \
+      FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF,         \
+                       kStrideC, kWidth, kHeight);                            \
+    }                                                                         \
+    for (int i = 0; i < kStrideC * kHeight; ++i) {                            \
+      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]);                   \
+    }                                                                         \
+    free_aligned_buffer_page_end(src_y);                                      \
+    free_aligned_buffer_page_end(src_u);                                      \
+    free_aligned_buffer_page_end(src_v);                                      \
+    free_aligned_buffer_page_end(dst_argb_b);                                 \
+    free_aligned_buffer_page_end(dst_argb_c);                                 \
+    free_aligned_buffer_page_end(dst_argb_bc);                                \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
+                      FMT_C, BPP_C)                                          \
+  TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
+                 benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C)             \
+  TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
+                 benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C)           \
+  TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
+                 benchmark_width_, _Invert, -, 0, FMT_C, BPP_C)              \
+  TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
+                 benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
+#else
+#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
+                      FMT_C, BPP_C)                                          \
+  TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
+                 benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
+#endif
+
+#if defined(ENABLE_FULL_TESTS)
+TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB24, 3)
+TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3)
+TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3)
+TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4)
+TESTPLANARTOE(H420, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3)
+TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RGB24, 3)
+TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4)
+TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3)
+TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, ARGB, 4)
+TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3)
+TESTPLANARTOE(J420, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(J420, 2, 2, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(U420, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(U420, 2, 2, ARGB, 1, 4, ARGB, 4)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
+TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2)
+#endif
+TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(J422, 2, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(J422, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(H422, 2, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(H422, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(U422, 2, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(U422, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(V422, 2, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(V422, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4)
+TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ABGR, 4)
+TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(J444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(J444, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(H444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(H444, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(U444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(U444, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(V444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(V444, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4)
+#else
+TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB24, 3)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
+TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3)
+TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3)
+TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2)
+TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4)
+TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4)
+#endif
+
+// Transitive test: Compare 1 step vs 2 step conversion for YUVA to ARGB.
+// Benchmark 2 step conversion for comparison to 1 step conversion.
+#define TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
+                        W1280, N, NEG, OFF, FMT_C, BPP_C, ATTEN)               \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##To##FMT_C##N) {             \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = benchmark_height_;                                     \
+    const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B;                     \
+    const int kSizeUV =                                                        \
+        SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y);          \
+    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                       \
+    align_buffer_page_end(src_u, kSizeUV + OFF);                               \
+    align_buffer_page_end(src_v, kSizeUV + OFF);                               \
+    align_buffer_page_end(src_a, kWidth* kHeight + OFF);                       \
+    align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF);                \
+    const int kStrideC = kWidth * BPP_C;                                       \
+    align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF);                \
+    align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF);               \
+    memset(dst_argb_c + OFF, 2, kStrideC * kHeight);                           \
+    memset(dst_argb_b + OFF, 1, kStrideB * kHeight);                           \
+    memset(dst_argb_bc + OFF, 3, kStrideC * kHeight);                          \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
+      src_y[i + OFF] = (fastrand() & 0xff);                                    \
+      src_a[i + OFF] = (fastrand() & 0xff);                                    \
+    }                                                                          \
+    for (int i = 0; i < kSizeUV; ++i) {                                        \
+      src_u[i + OFF] = (fastrand() & 0xff);                                    \
+      src_v[i + OFF] = (fastrand() & 0xff);                                    \
+    }                                                                          \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      /* Convert A to B */                                                     \
+      FMT_PLANAR##To##FMT_B(                                                   \
+          src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),      \
+          src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth,      \
+          dst_argb_b + OFF, kStrideB, kWidth, NEG kHeight, ATTEN);             \
+      /* Convert B to C */                                                     \
+      FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF,          \
+                       kStrideC, kWidth, kHeight);                             \
+    }                                                                          \
+    /* Convert A to C */                                                       \
+    FMT_PLANAR##To##FMT_C(                                                     \
+        src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),        \
+        src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth,        \
+        dst_argb_c + OFF, kStrideC, kWidth, NEG kHeight, ATTEN);               \
+    for (int i = 0; i < kStrideC * kHeight; ++i) {                             \
+      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]);                    \
+    }                                                                          \
+    free_aligned_buffer_page_end(src_y);                                       \
+    free_aligned_buffer_page_end(src_u);                                       \
+    free_aligned_buffer_page_end(src_v);                                       \
+    free_aligned_buffer_page_end(src_a);                                       \
+    free_aligned_buffer_page_end(dst_argb_b);                                  \
+    free_aligned_buffer_page_end(dst_argb_c);                                  \
+    free_aligned_buffer_page_end(dst_argb_bc);                                 \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
+                       FMT_C, BPP_C)                                          \
+  TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
+                  benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C, 0)          \
+  TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
+                  benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C, 0)        \
+  TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
+                  benchmark_width_, _Invert, -, 0, FMT_C, BPP_C, 0)           \
+  TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
+                  benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0)              \
+  TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
+                  benchmark_width_, _Premult, +, 0, FMT_C, BPP_C, 1)
+#else
+#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
+                       FMT_C, BPP_C)                                          \
+  TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
+                  benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0)
+#endif
+
+#if defined(ENABLE_FULL_TESTS)
+TESTQPLANARTOE(I420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(J420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(J420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(H420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(H420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(F420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(F420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(U420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(U420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(V420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(V420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(I422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(J422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(J422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(F422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(F422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(H422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(H422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(U422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(U422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(V422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(V422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(I444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(J444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(J444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(H444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(H444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(U444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(U444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(V444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(V444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
+#else
+TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
+#endif
+
+#define TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, W1280, N, NEG, \
+                      OFF, FMT_C, BPP_C)                                       \
+  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##To##FMT_C##N) {                  \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = benchmark_height_;                                     \
+    const int kStrideA = SUBSAMPLE(kWidth, SUB_A) * BPP_A;                     \
+    const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B;                     \
+    align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF);                \
+    align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF);                \
+    MemRandomize(src_argb_a + OFF, kStrideA * kHeight);                        \
+    memset(dst_argb_b + OFF, 1, kStrideB * kHeight);                           \
+    FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB,   \
+                     kWidth, NEG kHeight);                                     \
+    /* Convert to a 3rd format in 1 step and 2 steps and compare  */           \
+    const int kStrideC = kWidth * BPP_C;                                       \
+    align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF);                \
+    align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF);               \
+    memset(dst_argb_c + OFF, 2, kStrideC * kHeight);                           \
+    memset(dst_argb_bc + OFF, 3, kStrideC * kHeight);                          \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_A##To##FMT_C(src_argb_a + OFF, kStrideA, dst_argb_c + OFF, kStrideC, \
+                       kWidth, NEG kHeight);                                   \
+      /* Convert B to C */                                                     \
+      FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF,          \
+                       kStrideC, kWidth, kHeight);                             \
+    }                                                                          \
+    for (int i = 0; i < kStrideC * kHeight; i += 4) {                          \
+      EXPECT_EQ(dst_argb_c[i + OFF + 0], dst_argb_bc[i + OFF + 0]);            \
+      EXPECT_EQ(dst_argb_c[i + OFF + 1], dst_argb_bc[i + OFF + 1]);            \
+      EXPECT_EQ(dst_argb_c[i + OFF + 2], dst_argb_bc[i + OFF + 2]);            \
+      EXPECT_NEAR(dst_argb_c[i + OFF + 3], dst_argb_bc[i + OFF + 3], 64);      \
+    }                                                                          \
+    free_aligned_buffer_page_end(src_argb_a);                                  \
+    free_aligned_buffer_page_end(dst_argb_b);                                  \
+    free_aligned_buffer_page_end(dst_argb_c);                                  \
+    free_aligned_buffer_page_end(dst_argb_bc);                                 \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \
+  TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B,                    \
+                benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C)              \
+  TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_,  \
+                _Unaligned, +, 4, FMT_C, BPP_C)                              \
+  TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_,  \
+                _Invert, -, 0, FMT_C, BPP_C)                                 \
+  TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_,  \
+                _Opt, +, 0, FMT_C, BPP_C)
+#else
+#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \
+  TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_,  \
+                _Opt, +, 0, FMT_C, BPP_C)
+#endif
+
+// Caveat: Destination needs to be 4 bytes
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4)
+TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4)
+TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4)
+TESTPLANETOE(AR30, 1, 4, ABGR, 1, 4, ARGB, 4)
+TESTPLANETOE(ARGB, 1, 4, AB30, 1, 4, ARGB, 4)
+TESTPLANETOE(ABGR, 1, 4, AB30, 1, 4, ABGR, 4)
+TESTPLANETOE(AB30, 1, 4, ARGB, 1, 4, ABGR, 4)
+TESTPLANETOE(AB30, 1, 4, ABGR, 1, 4, ARGB, 4)
+#endif
+
+TEST_F(LibYUVConvertTest, RotateWithARGBSource) {
+  // 2x2 frames
+  uint32_t src[4];
+  uint32_t dst[4];
+  // some random input
+  src[0] = 0x11000000;
+  src[1] = 0x00450000;
+  src[2] = 0x00009f00;
+  src[3] = 0x000000ff;
+  // zeros on destination
+  dst[0] = 0x00000000;
+  dst[1] = 0x00000000;
+  dst[2] = 0x00000000;
+  dst[3] = 0x00000000;
+
+  int r = ConvertToARGB(reinterpret_cast<uint8_t*>(src),
+                        16,  // input size
+                        reinterpret_cast<uint8_t*>(dst),
+                        8,  // destination stride
+                        0,  // crop_x
+                        0,  // crop_y
+                        2,  // width
+                        2,  // height
+                        2,  // crop width
+                        2,  // crop height
+                        kRotate90, FOURCC_ARGB);
+
+  EXPECT_EQ(r, 0);
+  // 90 degrees rotation, no conversion
+  EXPECT_EQ(dst[0], src[2]);
+  EXPECT_EQ(dst[1], src[0]);
+  EXPECT_EQ(dst[2], src[3]);
+  EXPECT_EQ(dst[3], src[1]);
+}
+
+#ifdef HAS_ARGBTOAR30ROW_AVX2
+TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) {
+  // ARGBToAR30Row_AVX2 expects a multiple of 8 pixels.
+  const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7;
+  align_buffer_page_end(src, kPixels * 4);
+  align_buffer_page_end(dst_opt, kPixels * 4);
+  align_buffer_page_end(dst_c, kPixels * 4);
+  MemRandomize(src, kPixels * 4);
+  memset(dst_opt, 0, kPixels * 4);
+  memset(dst_c, 1, kPixels * 4);
+
+  ARGBToAR30Row_C(src, dst_c, kPixels);
+
+  int has_avx2 = TestCpuFlag(kCpuHasAVX2);
+  int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
+  for (int i = 0; i < benchmark_iterations_; ++i) {
+    if (has_avx2) {
+      ARGBToAR30Row_AVX2(src, dst_opt, kPixels);
+    } else if (has_ssse3) {
+      ARGBToAR30Row_SSSE3(src, dst_opt, kPixels);
+    } else {
+      ARGBToAR30Row_C(src, dst_opt, kPixels);
+    }
+  }
+  for (int i = 0; i < kPixels * 4; ++i) {
+    EXPECT_EQ(dst_opt[i], dst_c[i]);
+  }
+
+  free_aligned_buffer_page_end(src);
+  free_aligned_buffer_page_end(dst_opt);
+  free_aligned_buffer_page_end(dst_c);
+}
+#endif  // HAS_ARGBTOAR30ROW_AVX2
+
+#ifdef HAS_ABGRTOAR30ROW_AVX2
+TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
+  // ABGRToAR30Row_AVX2 expects a multiple of 8 pixels.
+  const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7;
+  align_buffer_page_end(src, kPixels * 4);
+  align_buffer_page_end(dst_opt, kPixels * 4);
+  align_buffer_page_end(dst_c, kPixels * 4);
+  MemRandomize(src, kPixels * 4);
+  memset(dst_opt, 0, kPixels * 4);
+  memset(dst_c, 1, kPixels * 4);
+
+  ABGRToAR30Row_C(src, dst_c, kPixels);
+
+  int has_avx2 = TestCpuFlag(kCpuHasAVX2);
+  int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
+  for (int i = 0; i < benchmark_iterations_; ++i) {
+    if (has_avx2) {
+      ABGRToAR30Row_AVX2(src, dst_opt, kPixels);
+    } else if (has_ssse3) {
+      ABGRToAR30Row_SSSE3(src, dst_opt, kPixels);
+    } else {
+      ABGRToAR30Row_C(src, dst_opt, kPixels);
+    }
+  }
+  for (int i = 0; i < kPixels * 4; ++i) {
+    EXPECT_EQ(dst_opt[i], dst_c[i]);
+  }
+
+  free_aligned_buffer_page_end(src);
+  free_aligned_buffer_page_end(dst_opt);
+  free_aligned_buffer_page_end(dst_c);
+}
+#endif  // HAS_ABGRTOAR30ROW_AVX2
+
+#if !defined(LEAN_TESTS)
+
+// Provide matrix wrappers for 12 bit YUV
+#define I012ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \
+  I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define I012ToAB30(a, b, c, d, e, f, g, h, i, j) \
+  I012ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+
+#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define I410ToABGR(a, b, c, d, e, f, g, h, i, j) \
+  I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define H410ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
+#define H410ToABGR(a, b, c, d, e, f, g, h, i, j) \
+  I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
+#define U410ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
+#define U410ToABGR(a, b, c, d, e, f, g, h, i, j) \
+  I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
+#define I410ToAR30(a, b, c, d, e, f, g, h, i, j) \
+  I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define I410ToAB30(a, b, c, d, e, f, g, h, i, j) \
+  I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define H410ToAR30(a, b, c, d, e, f, g, h, i, j) \
+  I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
+#define H410ToAB30(a, b, c, d, e, f, g, h, i, j) \
+  I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
+#define U410ToAR30(a, b, c, d, e, f, g, h, i, j) \
+  I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
+#define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \
+  I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
+
+#define I010ToARGBFilter(a, b, c, d, e, f, g, h, i, j)                     \
+  I010ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+                         kFilterBilinear)
+#define I010ToAR30Filter(a, b, c, d, e, f, g, h, i, j)                     \
+  I010ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+                         kFilterBilinear)
+#define I210ToARGBFilter(a, b, c, d, e, f, g, h, i, j)                     \
+  I210ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+                         kFilterBilinear)
+#define I210ToAR30Filter(a, b, c, d, e, f, g, h, i, j)                     \
+  I210ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+                         kFilterBilinear)
+
+// TODO(fbarchard): Fix clamping issue affected by U channel.
+#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B,   \
+                         BPP_B, ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF)     \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                       \
+    const int kWidth = W1280;                                                 \
+    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                  \
+    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                     \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                       \
+    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);            \
+    const int kBpc = 2;                                                       \
+    align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF);               \
+    align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF);                       \
+    align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF);                       \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF);              \
+    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF);            \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
+      reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & FMT_MASK); \
+    }                                                                         \
+    for (int i = 0; i < kSizeUV; ++i) {                                       \
+      reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & FMT_MASK); \
+      reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & FMT_MASK); \
+    }                                                                         \
+    memset(dst_argb_c + DOFF, 1, kStrideB * kHeight);                         \
+    memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight);                     \
+    MaskCpuFlags(disable_cpu_flags_);                                         \
+    FMT_PLANAR##To##FMT_B(                                                    \
+        reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth,                    \
+        reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV,                 \
+        reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV,                 \
+        dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight);                    \
+    MaskCpuFlags(benchmark_cpu_info_);                                        \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
+      FMT_PLANAR##To##FMT_B(                                                  \
+          reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth,                  \
+          reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV,               \
+          reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV,               \
+          dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight);                \
+    }                                                                         \
+    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                      \
+      EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]);                \
+    }                                                                         \
+    free_aligned_buffer_page_end(src_y);                                      \
+    free_aligned_buffer_page_end(src_u);                                      \
+    free_aligned_buffer_page_end(src_v);                                      \
+    free_aligned_buffer_page_end(dst_argb_c);                                 \
+    free_aligned_buffer_page_end(dst_argb_opt);                               \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B,   \
+                        BPP_B, ALIGN, YALIGN)                                \
+  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
+                   ALIGN, YALIGN, benchmark_width_ + 1, _Any, +, 0, 0)       \
+  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
+                   ALIGN, YALIGN, benchmark_width_, _Unaligned, +, 4, 4)     \
+  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
+                   ALIGN, YALIGN, benchmark_width_, _Invert, -, 0, 0)        \
+  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
+                   ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0)
+#else
+#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B,   \
+                        BPP_B, ALIGN, YALIGN)                                \
+  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
+                   ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0)
+#endif
+
+// These conversions are only optimized for x86
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(I012, 2, 2, 0xfff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGBFilter, 4, 4, 1)
+TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGBFilter, 4, 4, 1)
+
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(I012, 2, 2, 0xfff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30Filter, 4, 4, 1)
+TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1)
+#endif  // LITTLE_ENDIAN_ONLY_TEST
+#endif  // DISABLE_SLOW_TESTS
+
+#define TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,      \
+                          ALIGN, YALIGN, W1280, N, NEG, OFF, ATTEN, S_DEPTH)   \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
+    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                      \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
+    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);             \
+    const int kBpc = 2;                                                        \
+    align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF);                 \
+    align_buffer_page_end(src_u, kSizeUV* kBpc + OFF);                         \
+    align_buffer_page_end(src_v, kSizeUV* kBpc + OFF);                         \
+    align_buffer_page_end(src_a, kWidth* kHeight* kBpc + OFF);                 \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF);                \
+    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF);              \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
+      reinterpret_cast<uint16_t*>(src_y + OFF)[i] =                            \
+          (fastrand() & ((1 << S_DEPTH) - 1));                                 \
+      reinterpret_cast<uint16_t*>(src_a + OFF)[i] =                            \
+          (fastrand() & ((1 << S_DEPTH) - 1));                                 \
+    }                                                                          \
+    for (int i = 0; i < kSizeUV; ++i) {                                        \
+      reinterpret_cast<uint16_t*>(src_u + OFF)[i] =                            \
+          (fastrand() & ((1 << S_DEPTH) - 1));                                 \
+      reinterpret_cast<uint16_t*>(src_v + OFF)[i] =                            \
+          (fastrand() & ((1 << S_DEPTH) - 1));                                 \
+    }                                                                          \
+    memset(dst_argb_c + OFF, 1, kStrideB * kHeight);                           \
+    memset(dst_argb_opt + OFF, 101, kStrideB * kHeight);                       \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + OFF), kWidth,    \
+                          reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV, \
+                          reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV, \
+                          reinterpret_cast<uint16_t*>(src_a + OFF), kWidth,    \
+                          dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight,     \
+                          ATTEN);                                              \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_PLANAR##To##FMT_B(                                                   \
+          reinterpret_cast<uint16_t*>(src_y + OFF), kWidth,                    \
+          reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV,                 \
+          reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV,                 \
+          reinterpret_cast<uint16_t*>(src_a + OFF), kWidth,                    \
+          dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, ATTEN);           \
+    }                                                                          \
+    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                       \
+      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]);                   \
+    }                                                                          \
+    free_aligned_buffer_page_end(src_y);                                       \
+    free_aligned_buffer_page_end(src_u);                                       \
+    free_aligned_buffer_page_end(src_v);                                       \
+    free_aligned_buffer_page_end(src_a);                                       \
+    free_aligned_buffer_page_end(dst_argb_c);                                  \
+    free_aligned_buffer_page_end(dst_argb_opt);                                \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,    \
+                         ALIGN, YALIGN, S_DEPTH)                            \
+  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                    YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH)   \
+  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                    YALIGN, benchmark_width_, _Unaligned, +, 2, 0, S_DEPTH) \
+  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                    YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH)    \
+  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                    YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)       \
+  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                    YALIGN, benchmark_width_, _Premult, +, 0, 1, S_DEPTH)
+#else
+#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,   \
+                         ALIGN, YALIGN, S_DEPTH)                           \
+  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                    YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#endif
+
+#define I010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+                        l, m)
+#define I010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+                        l, m)
+#define J010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define J010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define F010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define F010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define H010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define H010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define U010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define U010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define V010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define V010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define I210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+                        l, m)
+#define I210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+                        l, m)
+#define J210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define J210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define F210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define F210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define H210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define H210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define U210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define U210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define V210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define V210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define I410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+                        l, m)
+#define I410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+                        l, m)
+#define J410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define J410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define F410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define F410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define H410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define H410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define U410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define U410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define V410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define V410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define I010AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+  I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j,          \
+                              &kYuvI601Constants, k, l, m, kFilterBilinear)
+#define I210AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+  I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j,          \
+                              &kYuvI601Constants, k, l, m, kFilterBilinear)
+
+// These conversions are only optimized for x86
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
+#endif  // DISABLE_SLOW_TESTS
+
+#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,    \
+                     YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH)               \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
+    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                      \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2;                    \
+    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2;         \
+    const int kBpc = 2;                                                        \
+    align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF);                \
+    align_buffer_page_end(src_uv, kSizeUV* kBpc + SOFF);                       \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF);               \
+    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF);             \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
+      reinterpret_cast<uint16_t*>(src_y + SOFF)[i] =                           \
+          (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH)));                 \
+    }                                                                          \
+    for (int i = 0; i < kSizeUV; ++i) {                                        \
+      reinterpret_cast<uint16_t*>(src_uv + SOFF)[i] =                          \
+          (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH)));                 \
+    }                                                                          \
+    memset(dst_argb_c + DOFF, 1, kStrideB * kHeight);                          \
+    memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight);                      \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth,   \
+                          reinterpret_cast<uint16_t*>(src_uv + SOFF),          \
+                          kStrideUV, dst_argb_c + DOFF, kStrideB, kWidth,      \
+                          NEG kHeight);                                        \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
+                            reinterpret_cast<uint16_t*>(src_uv + SOFF),        \
+                            kStrideUV, dst_argb_opt + DOFF, kStrideB, kWidth,  \
+                            NEG kHeight);                                      \
+    }                                                                          \
+    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                       \
+      EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]);                 \
+    }                                                                          \
+    free_aligned_buffer_page_end(src_y);                                       \
+    free_aligned_buffer_page_end(src_uv);                                      \
+    free_aligned_buffer_page_end(dst_argb_c);                                  \
+    free_aligned_buffer_page_end(dst_argb_opt);                                \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,    \
+                    YALIGN, S_DEPTH)                                          \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH)                  \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH)                \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_, _Invert, -, 0, 0, S_DEPTH)                   \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#else
+#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,    \
+                    YALIGN, S_DEPTH)                                          \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#endif
+
+#define P010ToARGB(a, b, c, d, e, f, g, h) \
+  P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P210ToARGB(a, b, c, d, e, f, g, h) \
+  P210ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P010ToAR30(a, b, c, d, e, f, g, h) \
+  P010ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P210ToAR30(a, b, c, d, e, f, g, h) \
+  P210ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+
+#define P012ToARGB(a, b, c, d, e, f, g, h) \
+  P012ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P212ToARGB(a, b, c, d, e, f, g, h) \
+  P212ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P012ToAR30(a, b, c, d, e, f, g, h) \
+  P012ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P212ToAR30(a, b, c, d, e, f, g, h) \
+  P212ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+
+#define P016ToARGB(a, b, c, d, e, f, g, h) \
+  P016ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P216ToARGB(a, b, c, d, e, f, g, h) \
+  P216ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P016ToAR30(a, b, c, d, e, f, g, h) \
+  P016ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P216ToAR30(a, b, c, d, e, f, g, h) \
+  P216ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+
+#define P010ToARGBFilter(a, b, c, d, e, f, g, h)                     \
+  P010ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \
+                         kFilterBilinear)
+#define P210ToARGBFilter(a, b, c, d, e, f, g, h)                     \
+  P210ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \
+                         kFilterBilinear)
+#define P010ToAR30Filter(a, b, c, d, e, f, g, h)                     \
+  P010ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \
+                         kFilterBilinear)
+#define P210ToAR30Filter(a, b, c, d, e, f, g, h)                     \
+  P210ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \
+                         kFilterBilinear)
+
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
+TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
+TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
+TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
+TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
+TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
+TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
+TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
+TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
+TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
+TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
+#endif  // LITTLE_ENDIAN_ONLY_TEST
+#endif  // DISABLE_SLOW_TESTS
+
+static int Clamp(int y) {
+  if (y < 0) {
+    y = 0;
+  }
+  if (y > 255) {
+    y = 255;
+  }
+  return y;
+}
+
+static int Clamp10(int y) {
+  if (y < 0) {
+    y = 0;
+  }
+  if (y > 1023) {
+    y = 1023;
+  }
+  return y;
+}
+
+// Test 8 bit YUV to 8 bit RGB
+TEST_F(LibYUVConvertTest, TestH420ToARGB) {
+  const int kSize = 256;
+  int histogram_b[256];
+  int histogram_g[256];
+  int histogram_r[256];
+  memset(histogram_b, 0, sizeof(histogram_b));
+  memset(histogram_g, 0, sizeof(histogram_g));
+  memset(histogram_r, 0, sizeof(histogram_r));
+  align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2);
+  align_buffer_page_end(argb_pixels, kSize * 4);
+  uint8_t* orig_y = orig_yuv;
+  uint8_t* orig_u = orig_y + kSize;
+  uint8_t* orig_v = orig_u + kSize / 2;
+
+  // Test grey scale
+  for (int i = 0; i < kSize; ++i) {
+    orig_y[i] = i;
+  }
+  for (int i = 0; i < kSize / 2; ++i) {
+    orig_u[i] = 128;  // 128 is 0.
+    orig_v[i] = 128;
+  }
+
+  H420ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1);
+
+  for (int i = 0; i < kSize; ++i) {
+    int b = argb_pixels[i * 4 + 0];
+    int g = argb_pixels[i * 4 + 1];
+    int r = argb_pixels[i * 4 + 2];
+    int a = argb_pixels[i * 4 + 3];
+    ++histogram_b[b];
+    ++histogram_g[g];
+    ++histogram_r[r];
+    // Reference formula for Y channel contribution in YUV to RGB conversions:
+    int expected_y = Clamp(static_cast<int>((i - 16) * 1.164f + 0.5f));
+    EXPECT_EQ(b, expected_y);
+    EXPECT_EQ(g, expected_y);
+    EXPECT_EQ(r, expected_y);
+    EXPECT_EQ(a, 255);
+  }
+
+  int count_b = 0;
+  int count_g = 0;
+  int count_r = 0;
+  for (int i = 0; i < kSize; ++i) {
+    if (histogram_b[i]) {
+      ++count_b;
+    }
+    if (histogram_g[i]) {
+      ++count_g;
+    }
+    if (histogram_r[i]) {
+      ++count_r;
+    }
+  }
+  printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
+
+  free_aligned_buffer_page_end(orig_yuv);
+  free_aligned_buffer_page_end(argb_pixels);
+}
+
+// Test 10 bit YUV to 8 bit RGB
+TEST_F(LibYUVConvertTest, TestH010ToARGB) {
+  const int kSize = 1024;
+  int histogram_b[1024];
+  int histogram_g[1024];
+  int histogram_r[1024];
+  memset(histogram_b, 0, sizeof(histogram_b));
+  memset(histogram_g, 0, sizeof(histogram_g));
+  memset(histogram_r, 0, sizeof(histogram_r));
+  align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2);
+  align_buffer_page_end(argb_pixels, kSize * 4);
+  uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv);
+  uint16_t* orig_u = orig_y + kSize;
+  uint16_t* orig_v = orig_u + kSize / 2;
+
+  // Test grey scale
+  for (int i = 0; i < kSize; ++i) {
+    orig_y[i] = i;
+  }
+  for (int i = 0; i < kSize / 2; ++i) {
+    orig_u[i] = 512;  // 512 is 0.
+    orig_v[i] = 512;
+  }
+
+  H010ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1);
+
+  for (int i = 0; i < kSize; ++i) {
+    int b = argb_pixels[i * 4 + 0];
+    int g = argb_pixels[i * 4 + 1];
+    int r = argb_pixels[i * 4 + 2];
+    int a = argb_pixels[i * 4 + 3];
+    ++histogram_b[b];
+    ++histogram_g[g];
+    ++histogram_r[r];
+    int expected_y = Clamp(static_cast<int>((i - 64) * 1.164f / 4));
+    EXPECT_NEAR(b, expected_y, 1);
+    EXPECT_NEAR(g, expected_y, 1);
+    EXPECT_NEAR(r, expected_y, 1);
+    EXPECT_EQ(a, 255);
+  }
+
+  int count_b = 0;
+  int count_g = 0;
+  int count_r = 0;
+  for (int i = 0; i < kSize; ++i) {
+    if (histogram_b[i]) {
+      ++count_b;
+    }
+    if (histogram_g[i]) {
+      ++count_g;
+    }
+    if (histogram_r[i]) {
+      ++count_r;
+    }
+  }
+  printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
+
+  free_aligned_buffer_page_end(orig_yuv);
+  free_aligned_buffer_page_end(argb_pixels);
+}
+
+// Test 10 bit YUV to 10 bit RGB
+// Caveat: Result is near due to float rounding in expected
+// result.
+TEST_F(LibYUVConvertTest, TestH010ToAR30) {
+  const int kSize = 1024;
+  int histogram_b[1024];
+  int histogram_g[1024];
+  int histogram_r[1024];
+  memset(histogram_b, 0, sizeof(histogram_b));
+  memset(histogram_g, 0, sizeof(histogram_g));
+  memset(histogram_r, 0, sizeof(histogram_r));
+
+  align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2);
+  align_buffer_page_end(ar30_pixels, kSize * 4);
+  uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv);
+  uint16_t* orig_u = orig_y + kSize;
+  uint16_t* orig_v = orig_u + kSize / 2;
+
+  // Test grey scale
+  for (int i = 0; i < kSize; ++i) {
+    orig_y[i] = i;
+  }
+  for (int i = 0; i < kSize / 2; ++i) {
+    orig_u[i] = 512;  // 512 is 0.
+    orig_v[i] = 512;
+  }
+
+  H010ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1);
+
+  for (int i = 0; i < kSize; ++i) {
+    int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023;
+    int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023;
+    int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023;
+    int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3;
+    ++histogram_b[b10];
+    ++histogram_g[g10];
+    ++histogram_r[r10];
+    int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f + 0.5));
+    EXPECT_NEAR(b10, expected_y, 4);
+    EXPECT_NEAR(g10, expected_y, 4);
+    EXPECT_NEAR(r10, expected_y, 4);
+    EXPECT_EQ(a2, 3);
+  }
+
+  int count_b = 0;
+  int count_g = 0;
+  int count_r = 0;
+  for (int i = 0; i < kSize; ++i) {
+    if (histogram_b[i]) {
+      ++count_b;
+    }
+    if (histogram_g[i]) {
+      ++count_g;
+    }
+    if (histogram_r[i]) {
+      ++count_r;
+    }
+  }
+  printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
+
+  free_aligned_buffer_page_end(orig_yuv);
+  free_aligned_buffer_page_end(ar30_pixels);
+}
+
+// Test 10 bit YUV to 10 bit RGB
+// Caveat: Result is near due to float rounding in expected
+// result.
+TEST_F(LibYUVConvertTest, TestH010ToAB30) {
+  const int kSize = 1024;
+  int histogram_b[1024];
+  int histogram_g[1024];
+  int histogram_r[1024];
+  memset(histogram_b, 0, sizeof(histogram_b));
+  memset(histogram_g, 0, sizeof(histogram_g));
+  memset(histogram_r, 0, sizeof(histogram_r));
+
+  align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2);
+  align_buffer_page_end(ab30_pixels, kSize * 4);
+  uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv);
+  uint16_t* orig_u = orig_y + kSize;
+  uint16_t* orig_v = orig_u + kSize / 2;
+
+  // Test grey scale
+  for (int i = 0; i < kSize; ++i) {
+    orig_y[i] = i;
+  }
+  for (int i = 0; i < kSize / 2; ++i) {
+    orig_u[i] = 512;  // 512 is 0.
+    orig_v[i] = 512;
+  }
+
+  H010ToAB30(orig_y, 0, orig_u, 0, orig_v, 0, ab30_pixels, 0, kSize, 1);
+
+  for (int i = 0; i < kSize; ++i) {
+    int r10 = reinterpret_cast<uint32_t*>(ab30_pixels)[i] & 1023;
+    int g10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 10) & 1023;
+    int b10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 20) & 1023;
+    int a2 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 30) & 3;
+    ++histogram_b[b10];
+    ++histogram_g[g10];
+    ++histogram_r[r10];
+    int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f));
+    EXPECT_NEAR(b10, expected_y, 4);
+    EXPECT_NEAR(g10, expected_y, 4);
+    EXPECT_NEAR(r10, expected_y, 4);
+    EXPECT_EQ(a2, 3);
+  }
+
+  int count_b = 0;
+  int count_g = 0;
+  int count_r = 0;
+  for (int i = 0; i < kSize; ++i) {
+    if (histogram_b[i]) {
+      ++count_b;
+    }
+    if (histogram_g[i]) {
+      ++count_g;
+    }
+    if (histogram_r[i]) {
+      ++count_r;
+    }
+  }
+  printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
+
+  free_aligned_buffer_page_end(orig_yuv);
+  free_aligned_buffer_page_end(ab30_pixels);
+}
+
+// Test 8 bit YUV to 10 bit RGB
+TEST_F(LibYUVConvertTest, TestH420ToAR30) {
+  const int kSize = 256;
+  const int kHistSize = 1024;
+  int histogram_b[kHistSize];
+  int histogram_g[kHistSize];
+  int histogram_r[kHistSize];
+  memset(histogram_b, 0, sizeof(histogram_b));
+  memset(histogram_g, 0, sizeof(histogram_g));
+  memset(histogram_r, 0, sizeof(histogram_r));
+  align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2);
+  align_buffer_page_end(ar30_pixels, kSize * 4);
+  uint8_t* orig_y = orig_yuv;
+  uint8_t* orig_u = orig_y + kSize;
+  uint8_t* orig_v = orig_u + kSize / 2;
+
+  // Test grey scale
+  for (int i = 0; i < kSize; ++i) {
+    orig_y[i] = i;
+  }
+  for (int i = 0; i < kSize / 2; ++i) {
+    orig_u[i] = 128;  // 128 is 0.
+    orig_v[i] = 128;
+  }
+
+  H420ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1);
+
+  for (int i = 0; i < kSize; ++i) {
+    int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023;
+    int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023;
+    int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023;
+    int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3;
+    ++histogram_b[b10];
+    ++histogram_g[g10];
+    ++histogram_r[r10];
+    int expected_y = Clamp10(static_cast<int>((i - 16) * 1.164f * 4.f));
+    EXPECT_NEAR(b10, expected_y, 4);
+    EXPECT_NEAR(g10, expected_y, 4);
+    EXPECT_NEAR(r10, expected_y, 4);
+    EXPECT_EQ(a2, 3);
+  }
+
+  int count_b = 0;
+  int count_g = 0;
+  int count_r = 0;
+  for (int i = 0; i < kHistSize; ++i) {
+    if (histogram_b[i]) {
+      ++count_b;
+    }
+    if (histogram_g[i]) {
+      ++count_g;
+    }
+    if (histogram_r[i]) {
+      ++count_r;
+    }
+  }
+  printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
+
+  free_aligned_buffer_page_end(orig_yuv);
+  free_aligned_buffer_page_end(ar30_pixels);
+}
+
+// Test I400 with jpeg matrix is same as J400
+TEST_F(LibYUVConvertTest, TestI400) {
+  const int kSize = 256;
+  align_buffer_page_end(orig_i400, kSize);
+  align_buffer_page_end(argb_pixels_i400, kSize * 4);
+  align_buffer_page_end(argb_pixels_j400, kSize * 4);
+  align_buffer_page_end(argb_pixels_jpeg_i400, kSize * 4);
+  align_buffer_page_end(argb_pixels_h709_i400, kSize * 4);
+  align_buffer_page_end(argb_pixels_2020_i400, kSize * 4);
+
+  // Test grey scale
+  for (int i = 0; i < kSize; ++i) {
+    orig_i400[i] = i;
+  }
+
+  J400ToARGB(orig_i400, 0, argb_pixels_j400, 0, kSize, 1);
+  I400ToARGB(orig_i400, 0, argb_pixels_i400, 0, kSize, 1);
+  I400ToARGBMatrix(orig_i400, 0, argb_pixels_jpeg_i400, 0, &kYuvJPEGConstants,
+                   kSize, 1);
+  I400ToARGBMatrix(orig_i400, 0, argb_pixels_h709_i400, 0, &kYuvH709Constants,
+                   kSize, 1);
+  I400ToARGBMatrix(orig_i400, 0, argb_pixels_2020_i400, 0, &kYuv2020Constants,
+                   kSize, 1);
+
+  EXPECT_EQ(0, argb_pixels_i400[0]);
+  EXPECT_EQ(0, argb_pixels_j400[0]);
+  EXPECT_EQ(0, argb_pixels_jpeg_i400[0]);
+  EXPECT_EQ(0, argb_pixels_h709_i400[0]);
+  EXPECT_EQ(0, argb_pixels_2020_i400[0]);
+  EXPECT_EQ(0, argb_pixels_i400[16 * 4]);
+  EXPECT_EQ(16, argb_pixels_j400[16 * 4]);
+  EXPECT_EQ(16, argb_pixels_jpeg_i400[16 * 4]);
+  EXPECT_EQ(0, argb_pixels_h709_i400[16 * 4]);
+  EXPECT_EQ(0, argb_pixels_2020_i400[16 * 4]);
+  EXPECT_EQ(130, argb_pixels_i400[128 * 4]);
+  EXPECT_EQ(128, argb_pixels_j400[128 * 4]);
+  EXPECT_EQ(128, argb_pixels_jpeg_i400[128 * 4]);
+  EXPECT_EQ(130, argb_pixels_h709_i400[128 * 4]);
+  EXPECT_EQ(130, argb_pixels_2020_i400[128 * 4]);
+  EXPECT_EQ(255, argb_pixels_i400[255 * 4]);
+  EXPECT_EQ(255, argb_pixels_j400[255 * 4]);
+  EXPECT_EQ(255, argb_pixels_jpeg_i400[255 * 4]);
+  EXPECT_EQ(255, argb_pixels_h709_i400[255 * 4]);
+  EXPECT_EQ(255, argb_pixels_2020_i400[255 * 4]);
+
+  for (int i = 0; i < kSize * 4; ++i) {
+    if ((i & 3) == 3) {
+      EXPECT_EQ(255, argb_pixels_j400[i]);
+    } else {
+      EXPECT_EQ(i / 4, argb_pixels_j400[i]);
+    }
+    EXPECT_EQ(argb_pixels_jpeg_i400[i], argb_pixels_j400[i]);
+  }
+
+  free_aligned_buffer_page_end(orig_i400);
+  free_aligned_buffer_page_end(argb_pixels_i400);
+  free_aligned_buffer_page_end(argb_pixels_j400);
+  free_aligned_buffer_page_end(argb_pixels_jpeg_i400);
+  free_aligned_buffer_page_end(argb_pixels_h709_i400);
+  free_aligned_buffer_page_end(argb_pixels_2020_i400);
+}
+
+// Test RGB24 to ARGB and back to RGB24
+TEST_F(LibYUVConvertTest, TestARGBToRGB24) {
+  const int kSize = 256;
+  align_buffer_page_end(orig_rgb24, kSize * 3);
+  align_buffer_page_end(argb_pixels, kSize * 4);
+  align_buffer_page_end(dest_rgb24, kSize * 3);
+
+  // Test grey scale
+  for (int i = 0; i < kSize * 3; ++i) {
+    orig_rgb24[i] = i;
+  }
+
+  RGB24ToARGB(orig_rgb24, 0, argb_pixels, 0, kSize, 1);
+  ARGBToRGB24(argb_pixels, 0, dest_rgb24, 0, kSize, 1);
+
+  for (int i = 0; i < kSize * 3; ++i) {
+    EXPECT_EQ(orig_rgb24[i], dest_rgb24[i]);
+  }
+
+  free_aligned_buffer_page_end(orig_rgb24);
+  free_aligned_buffer_page_end(argb_pixels);
+  free_aligned_buffer_page_end(dest_rgb24);
+}
+
+TEST_F(LibYUVConvertTest, Test565) {
+  SIMD_ALIGNED(uint8_t orig_pixels[256][4]);
+  SIMD_ALIGNED(uint8_t pixels565[256][2]);
+
+  for (int i = 0; i < 256; ++i) {
+    for (int j = 0; j < 4; ++j) {
+      orig_pixels[i][j] = i;
+    }
+  }
+  ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1);
+  uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381);
+  EXPECT_EQ(610919429u, checksum);
+}
+#endif  // !defined(LEAN_TESTS)
+
+}  // namespace libyuv
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index 1f1896b0..f55bace3 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -31,6 +31,13 @@
 #include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
 #endif
 
+#if defined(__riscv) && !defined(__clang__)
+#define DISABLE_SLOW_TESTS
+#undef ENABLE_FULL_TESTS
+#undef ENABLE_ROW_TESTS
+#define LEAN_TESTS
+#endif
+
 // Some functions fail on big endian. Enable these tests on all cpus except
 // PowerPC, but they are not optimized so disabled by default.
 #if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__)
@@ -51,6 +58,8 @@ namespace libyuv {
 // subsample amount uses a divide.
 #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
 
+#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
+
 // Planar test
 
 #define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,         \
@@ -138,6 +147,7 @@ namespace libyuv {
     free_aligned_buffer_page_end(src_v);                                      \
   }
 
+#if defined(ENABLE_FULL_TESTS)
 #define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,           \
                       SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,               \
                       DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH)                 \
@@ -153,6 +163,14 @@ namespace libyuv {
   TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
                  FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,     \
                  benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+#else
+#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,           \
+                      SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,               \
+                      DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH)                 \
+  TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+                 FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,     \
+                 benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+#endif
 
 TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8)
 TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2, 8)
@@ -276,6 +294,7 @@ TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12)
     free_aligned_buffer_page_end(src_uv);                                     \
   }
 
+#if defined(ENABLE_FULL_TESTS)
 #define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V,         \
                        SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X,    \
                        SUBSAMP_Y)                                              \
@@ -291,6 +310,14 @@ TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12)
   TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
                   FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
                   0, PN, OFF_U, OFF_V)
+#else
+#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V,         \
+                       SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X,    \
+                       SUBSAMP_Y)                                              \
+  TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+                  FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
+                  0, PN, OFF_U, OFF_V)
+#endif
 
 TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2)
 TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2)
@@ -393,6 +420,7 @@ int I400ToNV21(const uint8_t* src_y,
     free_aligned_buffer_page_end(src_v);                                      \
   }
 
+#if defined(ENABLE_FULL_TESTS)
 #define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,         \
                        SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,             \
                        DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH)               \
@@ -409,6 +437,14 @@ int I400ToNV21(const uint8_t* src_y,
   TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
                   DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+#else
+#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,       \
+                       SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,           \
+                       DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH)             \
+  TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                  DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+#endif
 
 TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8)
 TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
@@ -510,6 +546,7 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
     free_aligned_buffer_page_end(src_uv);                                     \
   }
 
+#if defined(ENABLE_FULL_TESTS)
 #define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
                    DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
@@ -533,6 +570,15 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
               FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
               benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH,      \
               TILE_HEIGHT)
+#else
+#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                   DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH,      \
+              TILE_HEIGHT)
+#endif
 
 TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
 TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
@@ -547,645 +593,6 @@ TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
 TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
 TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)
 
-#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                   DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH,   \
-                   TILE_HEIGHT)                                                \
-  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {               \
-    static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported");        \
-    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");        \
-    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                    \
-                  "SRC_SUBSAMP_X unsupported");                                \
-    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                    \
-                  "SRC_SUBSAMP_Y unsupported");                                \
-    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                    \
-                  "DST_SUBSAMP_X unsupported");                                \
-    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                    \
-                  "DST_SUBSAMP_Y unsupported");                                \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = benchmark_height_;                                     \
-    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);                \
-    const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X);                \
-    const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y);              \
-    const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1);  \
-    const int kPaddedHeight =                                                  \
-        (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1);                    \
-    const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X);    \
-    const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y);  \
-    align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF);  \
-    align_buffer_page_end(                                                     \
-        src_uv, kSrcHalfPaddedWidth* kSrcHalfPaddedHeight* SRC_BPC * 2 + OFF); \
-    align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC);                  \
-    align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC);    \
-    align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC);    \
-    align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC);                \
-    align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC);  \
-    align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC);  \
-    SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF);                    \
-    SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF);                  \
-    for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) {                   \
-      src_y_p[i] =                                                             \
-          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));       \
-    }                                                                          \
-    for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
-      src_uv_p[i] =                                                            \
-          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));       \
-    }                                                                          \
-    memset(dst_y_c, 1, kWidth* kHeight* DST_BPC);                              \
-    memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC);                \
-    memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC);                \
-    memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC);                          \
-    memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC);            \
-    memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC);            \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    SRC_FMT_PLANAR##To##FMT_PLANAR(                                            \
-        src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2,                          \
-        reinterpret_cast<DST_T*>(dst_y_c), kWidth,                             \
-        reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth,                      \
-        reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth,              \
-        NEG kHeight);                                                          \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      SRC_FMT_PLANAR##To##FMT_PLANAR(                                          \
-          src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2,                        \
-          reinterpret_cast<DST_T*>(dst_y_opt), kWidth,                         \
-          reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth,                  \
-          reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth,          \
-          NEG kHeight);                                                        \
-    }                                                                          \
-    for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) {                     \
-      EXPECT_EQ(dst_y_c[i], dst_y_opt[i]);                                     \
-    }                                                                          \
-    for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) {       \
-      EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);                                     \
-      EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);                                     \
-    }                                                                          \
-    free_aligned_buffer_page_end(dst_y_c);                                     \
-    free_aligned_buffer_page_end(dst_u_c);                                     \
-    free_aligned_buffer_page_end(dst_v_c);                                     \
-    free_aligned_buffer_page_end(dst_y_opt);                                   \
-    free_aligned_buffer_page_end(dst_u_opt);                                   \
-    free_aligned_buffer_page_end(dst_v_opt);                                   \
-    free_aligned_buffer_page_end(src_y);                                       \
-    free_aligned_buffer_page_end(src_uv);                                      \
-  }
-
-#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
-                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
-                  DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
-  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
-             benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH,       \
-             TILE_HEIGHT)                                                   \
-  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
-             benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH,     \
-             TILE_HEIGHT)                                                   \
-  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
-             benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH,        \
-             TILE_HEIGHT)                                                   \
-  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
-             benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
-
-TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
-TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1)
-TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)
-
-// Provide matrix wrappers for full range bt.709
-#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \
-  I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j)
-#define F420ToARGB(a, b, c, d, e, f, g, h, i, j) \
-  I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j)
-#define F422ToABGR(a, b, c, d, e, f, g, h, i, j) \
-  I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j)
-#define F422ToARGB(a, b, c, d, e, f, g, h, i, j) \
-  I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j)
-#define F444ToABGR(a, b, c, d, e, f, g, h, i, j) \
-  I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j)
-#define F444ToARGB(a, b, c, d, e, f, g, h, i, j) \
-  I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j)
-
-// Provide matrix wrappers for full range bt.2020
-#define V420ToABGR(a, b, c, d, e, f, g, h, i, j) \
-  I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j)
-#define V420ToARGB(a, b, c, d, e, f, g, h, i, j) \
-  I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j)
-#define V422ToABGR(a, b, c, d, e, f, g, h, i, j) \
-  I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j)
-#define V422ToARGB(a, b, c, d, e, f, g, h, i, j) \
-  I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j)
-#define V444ToABGR(a, b, c, d, e, f, g, h, i, j) \
-  I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j)
-#define V444ToARGB(a, b, c, d, e, f, g, h, i, j) \
-  I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j)
-
-#define I420ToARGBFilter(a, b, c, d, e, f, g, h, i, j)                     \
-  I420ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
-                         kFilterBilinear)
-#define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j)                     \
-  I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
-                         kFilterBilinear)
-#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j)                     \
-  I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
-                          kFilterBilinear)
-#define I422ToRGB24Filter(a, b, c, d, e, f, g, h, i, j)                     \
-  I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
-                          kFilterBilinear)
-
-#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
-
-#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                       YALIGN, W1280, N, NEG, OFF)                            \
-  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                       \
-    const int kWidth = W1280;                                                 \
-    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                  \
-    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                     \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                       \
-    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);            \
-    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                      \
-    align_buffer_page_end(src_u, kSizeUV + OFF);                              \
-    align_buffer_page_end(src_v, kSizeUV + OFF);                              \
-    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF);               \
-    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF);             \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
-      src_y[i + OFF] = (fastrand() & 0xff);                                   \
-    }                                                                         \
-    for (int i = 0; i < kSizeUV; ++i) {                                       \
-      src_u[i + OFF] = (fastrand() & 0xff);                                   \
-      src_v[i + OFF] = (fastrand() & 0xff);                                   \
-    }                                                                         \
-    memset(dst_argb_c + OFF, 1, kStrideB * kHeight);                          \
-    memset(dst_argb_opt + OFF, 101, kStrideB * kHeight);                      \
-    MaskCpuFlags(disable_cpu_flags_);                                         \
-    double time0 = get_time();                                                \
-    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,        \
-                          src_v + OFF, kStrideUV, dst_argb_c + OFF, kStrideB, \
-                          kWidth, NEG kHeight);                               \
-    double time1 = get_time();                                                \
-    MaskCpuFlags(benchmark_cpu_info_);                                        \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
-      FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,      \
-                            src_v + OFF, kStrideUV, dst_argb_opt + OFF,       \
-                            kStrideB, kWidth, NEG kHeight);                   \
-    }                                                                         \
-    double time2 = get_time();                                                \
-    printf(" %8d us C - %8d us OPT\n",                                        \
-           static_cast<int>((time1 - time0) * 1e6),                           \
-           static_cast<int>((time2 - time1) * 1e6 / benchmark_iterations_));  \
-    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                      \
-      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]);                  \
-    }                                                                         \
-    free_aligned_buffer_page_end(src_y);                                      \
-    free_aligned_buffer_page_end(src_u);                                      \
-    free_aligned_buffer_page_end(src_v);                                      \
-    free_aligned_buffer_page_end(dst_argb_c);                                 \
-    free_aligned_buffer_page_end(dst_argb_opt);                               \
-  }
-
-#if defined(ENABLE_FULL_TESTS)
-#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                      YALIGN)                                                \
-  TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                 YALIGN, benchmark_width_ + 1, _Any, +, 0)                   \
-  TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                 YALIGN, benchmark_width_, _Unaligned, +, 4)                 \
-  TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                 YALIGN, benchmark_width_, _Invert, -, 0)                    \
-  TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                 YALIGN, benchmark_width_, _Opt, +, 0)
-#else
-#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                      YALIGN)                                                \
-  TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                 YALIGN, benchmark_width_ + 1, _Any, +, 0)                   \
-  TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                 YALIGN, benchmark_width_, _Opt, +, 0)
-#endif
-
-#if defined(ENABLE_FULL_TESTS)
-TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
-TESTPLANARTOB(J420, 2, 2, ARGB, 4, 4, 1)
-TESTPLANARTOB(J420, 2, 2, ABGR, 4, 4, 1)
-TESTPLANARTOB(F420, 2, 2, ARGB, 4, 4, 1)
-TESTPLANARTOB(F420, 2, 2, ABGR, 4, 4, 1)
-TESTPLANARTOB(H420, 2, 2, ARGB, 4, 4, 1)
-TESTPLANARTOB(H420, 2, 2, ABGR, 4, 4, 1)
-TESTPLANARTOB(U420, 2, 2, ARGB, 4, 4, 1)
-TESTPLANARTOB(U420, 2, 2, ABGR, 4, 4, 1)
-TESTPLANARTOB(V420, 2, 2, ARGB, 4, 4, 1)
-TESTPLANARTOB(V420, 2, 2, ABGR, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1)
-TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1)
-TESTPLANARTOB(J420, 2, 2, RAW, 3, 3, 1)
-TESTPLANARTOB(J420, 2, 2, RGB24, 3, 3, 1)
-TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1)
-TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1)
-TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1)
-TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1)
-TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1)
-TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1)
-TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
-#endif
-TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
-TESTPLANARTOB(J422, 2, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(J422, 2, 1, ABGR, 4, 4, 1)
-TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1)
-TESTPLANARTOB(U422, 2, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(U422, 2, 1, ABGR, 4, 4, 1)
-TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1)
-TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
-TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
-TESTPLANARTOB(I422, 1, 1, RGB24, 3, 3, 1)
-TESTPLANARTOB(I422, 1, 1, RAW, 3, 3, 1)
-TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
-TESTPLANARTOB(I444, 1, 1, RGB24, 3, 3, 1)
-TESTPLANARTOB(I444, 1, 1, RAW, 3, 3, 1)
-TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1)
-TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(H444, 1, 1, ABGR, 4, 4, 1)
-TESTPLANARTOB(U444, 1, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(U444, 1, 1, ABGR, 4, 4, 1)
-TESTPLANARTOB(V444, 1, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(V444, 1, 1, ABGR, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1)
-TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1)
-TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
-TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
-TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
-TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1)
-TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, AB30, 4, 4, 1)
-TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1)
-#endif
-TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
-TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
-TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1)
-#else
-TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1)
-TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1)
-TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1)
-TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1)
-TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1)
-TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
-#endif
-TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
-TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1)
-TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1)
-TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
-TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
-TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
-TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
-TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
-TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
-TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
-TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
-TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
-#endif
-
-#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                        YALIGN, W1280, N, NEG, OFF, ATTEN)                     \
-  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
-    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                      \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
-    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);             \
-    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                       \
-    align_buffer_page_end(src_u, kSizeUV + OFF);                               \
-    align_buffer_page_end(src_v, kSizeUV + OFF);                               \
-    align_buffer_page_end(src_a, kWidth* kHeight + OFF);                       \
-    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF);                \
-    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF);              \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
-      src_y[i + OFF] = (fastrand() & 0xff);                                    \
-      src_a[i + OFF] = (fastrand() & 0xff);                                    \
-    }                                                                          \
-    for (int i = 0; i < kSizeUV; ++i) {                                        \
-      src_u[i + OFF] = (fastrand() & 0xff);                                    \
-      src_v[i + OFF] = (fastrand() & 0xff);                                    \
-    }                                                                          \
-    memset(dst_argb_c + OFF, 1, kStrideB * kHeight);                           \
-    memset(dst_argb_opt + OFF, 101, kStrideB * kHeight);                       \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,         \
-                          src_v + OFF, kStrideUV, src_a + OFF, kWidth,         \
-                          dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight,     \
-                          ATTEN);                                              \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,       \
-                            src_v + OFF, kStrideUV, src_a + OFF, kWidth,       \
-                            dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \
-                            ATTEN);                                            \
-    }                                                                          \
-    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                       \
-      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]);                   \
-    }                                                                          \
-    free_aligned_buffer_page_end(src_y);                                       \
-    free_aligned_buffer_page_end(src_u);                                       \
-    free_aligned_buffer_page_end(src_v);                                       \
-    free_aligned_buffer_page_end(src_a);                                       \
-    free_aligned_buffer_page_end(dst_argb_c);                                  \
-    free_aligned_buffer_page_end(dst_argb_opt);                                \
-  }
-
-#if defined(ENABLE_FULL_TESTS)
-#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                       YALIGN)                                                \
-  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_ + 1, _Any, +, 0, 0)                \
-  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Unaligned, +, 2, 0)              \
-  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Invert, -, 0, 0)                 \
-  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Opt, +, 0, 0)                    \
-  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Premult, +, 0, 1)
-#else
-#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                       YALIGN)                                                \
-  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Opt, +, 0, 0)
-#endif
-
-#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-
-#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
-  I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j,          \
-                              &kYuvI601Constants, k, l, m, kFilterBilinear)
-#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
-  I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j,          \
-                              &kYuvI601Constants, k, l, m, kFilterBilinear)
-
-#if defined(ENABLE_FULL_TESTS)
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
-#else
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
-#endif
-
-#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-                   W1280, N, NEG, OFF)                                         \
-  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = benchmark_height_;                                     \
-    const int kStrideB = kWidth * BPP_B;                                       \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
-    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                       \
-    align_buffer_page_end(src_uv,                                              \
-                          kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \
-    align_buffer_page_end(dst_argb_c, kStrideB* kHeight);                      \
-    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight);                    \
-    for (int i = 0; i < kHeight; ++i)                                          \
-      for (int j = 0; j < kWidth; ++j)                                         \
-        src_y[i * kWidth + j + OFF] = (fastrand() & 0xff);                     \
-    for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                  \
-      for (int j = 0; j < kStrideUV * 2; ++j) {                                \
-        src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff);             \
-      }                                                                        \
-    }                                                                          \
-    memset(dst_argb_c, 1, kStrideB* kHeight);                                  \
-    memset(dst_argb_opt, 101, kStrideB* kHeight);                              \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2,    \
-                          dst_argb_c, kWidth * BPP_B, kWidth, NEG kHeight);    \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2,  \
-                            dst_argb_opt, kWidth * BPP_B, kWidth,              \
-                            NEG kHeight);                                      \
-    }                                                                          \
-    /* Convert to ARGB so 565 is expanded to bytes that can be compared. */    \
-    align_buffer_page_end(dst_argb32_c, kWidth * 4 * kHeight);                 \
-    align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight);               \
-    memset(dst_argb32_c, 2, kWidth * 4 * kHeight);                             \
-    memset(dst_argb32_opt, 102, kWidth * 4 * kHeight);                         \
-    FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth,      \
-                  kHeight);                                                    \
-    FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth,  \
-                  kHeight);                                                    \
-    for (int i = 0; i < kHeight; ++i) {                                        \
-      for (int j = 0; j < kWidth * 4; ++j) {                                   \
-        EXPECT_EQ(dst_argb32_c[i * kWidth * 4 + j],                            \
-                  dst_argb32_opt[i * kWidth * 4 + j]);                         \
-      }                                                                        \
-    }                                                                          \
-    free_aligned_buffer_page_end(src_y);                                       \
-    free_aligned_buffer_page_end(src_uv);                                      \
-    free_aligned_buffer_page_end(dst_argb_c);                                  \
-    free_aligned_buffer_page_end(dst_argb_opt);                                \
-    free_aligned_buffer_page_end(dst_argb32_c);                                \
-    free_aligned_buffer_page_end(dst_argb32_opt);                              \
-  }
-
-#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
-  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-             benchmark_width_ + 1, _Any, +, 0)                           \
-  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-             benchmark_width_, _Unaligned, +, 2)                         \
-  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-             benchmark_width_, _Invert, -, 0)                            \
-  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-             benchmark_width_, _Opt, +, 0)
-
-#define JNV12ToARGB(a, b, c, d, e, f, g, h) \
-  NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
-#define JNV21ToARGB(a, b, c, d, e, f, g, h) \
-  NV21ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
-#define JNV12ToABGR(a, b, c, d, e, f, g, h) \
-  NV21ToARGBMatrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h)
-#define JNV21ToABGR(a, b, c, d, e, f, g, h) \
-  NV12ToARGBMatrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h)
-#define JNV12ToRGB24(a, b, c, d, e, f, g, h) \
-  NV12ToRGB24Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
-#define JNV21ToRGB24(a, b, c, d, e, f, g, h) \
-  NV21ToRGB24Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
-#define JNV12ToRAW(a, b, c, d, e, f, g, h) \
-  NV21ToRGB24Matrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h)
-#define JNV21ToRAW(a, b, c, d, e, f, g, h) \
-  NV12ToRGB24Matrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h)
-#define JNV12ToRGB565(a, b, c, d, e, f, g, h) \
-  NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
-
-TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4)
-TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4)
-TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4)
-TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4)
-TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3)
-TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3)
-TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3)
-TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2)
-#endif
-
-TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4)
-TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4)
-TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4)
-TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4)
-TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3)
-TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3)
-TESTBPTOB(NV12, 2, 2, RAW, RAW, 3)
-TESTBPTOB(NV21, 2, 2, RAW, RAW, 3)
-TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
-#endif
-
 #define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
                        W1280, N, NEG, OFF)                                     \
   TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) {                        \
@@ -1247,8 +654,6 @@ TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
 #else
 #define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
   TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                 benchmark_width_ + 1, _Any, +, 0)                            \
-  TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
                  benchmark_width_, _Opt, +, 0)
 #endif
 
@@ -1347,8 +752,6 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
 #else
 #define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
   TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                  benchmark_width_ + 1, _Any, +, 0)                            \
-  TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
                   benchmark_width_, _Opt, +, 0)
 #endif
 
@@ -1401,6 +804,7 @@ TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2)
     free_aligned_buffer_page_end(src_argb);                                   \
   }
 
+#if defined(ENABLE_FULL_TESTS)
 #define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
   TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
              benchmark_width_ + 1, _Any, +, 0)                           \
@@ -1410,6 +814,11 @@ TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2)
              benchmark_width_, _Invert, -, 0)                            \
   TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
              benchmark_width_, _Opt, +, 0)
+#else
+#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_, _Opt, +, 0)
+#endif
 
 TESTATOBP(ARGB, 1, 4, NV12, 2, 2)
 TESTATOBP(ARGB, 1, 4, NV21, 2, 2)
@@ -1421,440 +830,7 @@ TESTATOBP(UYVY, 2, 4, NV12, 2, 2)
 TESTATOBP(AYUV, 1, 4, NV12, 2, 2)
 TESTATOBP(AYUV, 1, 4, NV21, 2, 2)
 
-#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,     \
-                  EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF)               \
-  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) {                             \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = benchmark_height_;                                     \
-    const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;       \
-    const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;       \
-    const int kStrideA =                                                       \
-        (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;                 \
-    const int kStrideB =                                                       \
-        (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;                 \
-    align_buffer_page_end(src_argb,                                            \
-                          kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF);       \
-    align_buffer_page_end(dst_argb_c, kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
-    align_buffer_page_end(dst_argb_opt,                                        \
-                          kStrideB* kHeightB*(int)sizeof(TYPE_B));             \
-    for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) {      \
-      src_argb[i + OFF] = (fastrand() & 0xff);                                 \
-    }                                                                          \
-    memset(dst_argb_c, 1, kStrideB* kHeightB);                                 \
-    memset(dst_argb_opt, 101, kStrideB* kHeightB);                             \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_B*)dst_argb_c, \
-                     kStrideB, kWidth, NEG kHeight);                           \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA,                    \
-                       (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight);  \
-    }                                                                          \
-    for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) {      \
-      EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                               \
-    }                                                                          \
-    free_aligned_buffer_page_end(src_argb);                                    \
-    free_aligned_buffer_page_end(dst_argb_c);                                  \
-    free_aligned_buffer_page_end(dst_argb_opt);                                \
-  }
-
-#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B,        \
-                       TYPE_B, EPP_B, STRIDE_B, HEIGHT_B)                      \
-  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) {                       \
-    for (int times = 0; times < benchmark_iterations_; ++times) {              \
-      const int kWidth = (fastrand() & 63) + 1;                                \
-      const int kHeight = (fastrand() & 31) + 1;                               \
-      const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;     \
-      const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;     \
-      const int kStrideA =                                                     \
-          (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;               \
-      const int kStrideB =                                                     \
-          (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;               \
-      align_buffer_page_end(src_argb, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
-      align_buffer_page_end(dst_argb_c,                                        \
-                            kStrideB* kHeightB*(int)sizeof(TYPE_B));           \
-      align_buffer_page_end(dst_argb_opt,                                      \
-                            kStrideB* kHeightB*(int)sizeof(TYPE_B));           \
-      for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) {    \
-        src_argb[i] = 0xfe;                                                    \
-      }                                                                        \
-      memset(dst_argb_c, 123, kStrideB* kHeightB);                             \
-      memset(dst_argb_opt, 123, kStrideB* kHeightB);                           \
-      MaskCpuFlags(disable_cpu_flags_);                                        \
-      FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c,       \
-                       kStrideB, kWidth, kHeight);                             \
-      MaskCpuFlags(benchmark_cpu_info_);                                       \
-      FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt,     \
-                       kStrideB, kWidth, kHeight);                             \
-      for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) {    \
-        EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                             \
-      }                                                                        \
-      free_aligned_buffer_page_end(src_argb);                                  \
-      free_aligned_buffer_page_end(dst_argb_c);                                \
-      free_aligned_buffer_page_end(dst_argb_opt);                              \
-    }                                                                          \
-  }
-
-#if defined(ENABLE_FULL_TESTS)
-#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,   \
-                 EPP_B, STRIDE_B, HEIGHT_B)                                 \
-  TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
-            STRIDE_B, HEIGHT_B, benchmark_width_ + 1, _Any, +, 0)           \
-  TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
-            STRIDE_B, HEIGHT_B, benchmark_width_, _Unaligned, +, 4)         \
-  TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
-            STRIDE_B, HEIGHT_B, benchmark_width_, _Invert, -, 0)            \
-  TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
-            STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0)               \
-  TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,   \
-                 EPP_B, STRIDE_B, HEIGHT_B)
-#else
-#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,   \
-                 EPP_B, STRIDE_B, HEIGHT_B)                                 \
-  TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
-            STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0)
-#endif
-
-TESTATOB(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
-TESTATOB(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOB(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
-#endif
-TESTATOB(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOB(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1)
-#endif
-TESTATOB(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOB(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
-TESTATOB(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-#endif
-TESTATOB(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOB(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
-#endif
-TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
-TESTATOB(ABGR, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
-TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
-TESTATOB(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
-TESTATOB(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
-#endif
-TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1)
-TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1)
-TESTATOB(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1)
-TESTATOB(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1)
-TESTATOB(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1)
-TESTATOB(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1)
-TESTATOB(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
-TESTATOB(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1)
-TESTATOB(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOB(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
-#endif
-TESTATOB(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
-TESTATOB(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
-TESTATOB(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
-TESTATOB(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOB(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
-TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
-TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
-TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
-
-// in place test
-#define TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,    \
-                  EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF)              \
-  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) {                            \
-    const int kWidth = W1280;                                                 \
-    const int kHeight = benchmark_height_;                                    \
-    const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;      \
-    const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;      \
-    const int kStrideA =                                                      \
-        (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;                \
-    const int kStrideB =                                                      \
-        (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;                \
-    align_buffer_page_end(src_argb,                                           \
-                          kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF);      \
-    align_buffer_page_end(dst_argb_c,                                         \
-                          kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF);      \
-    align_buffer_page_end(dst_argb_opt,                                       \
-                          kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF);      \
-    for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) {     \
-      src_argb[i + OFF] = (fastrand() & 0xff);                                \
-    }                                                                         \
-    memcpy(dst_argb_c + OFF, src_argb,                                        \
-           kStrideA * kHeightA * (int)sizeof(TYPE_A));                        \
-    memcpy(dst_argb_opt + OFF, src_argb,                                      \
-           kStrideA * kHeightA * (int)sizeof(TYPE_A));                        \
-    MaskCpuFlags(disable_cpu_flags_);                                         \
-    FMT_A##To##FMT_B((TYPE_A*)(dst_argb_c /* src */ + OFF), kStrideA,         \
-                     (TYPE_B*)dst_argb_c, kStrideB, kWidth, NEG kHeight);     \
-    MaskCpuFlags(benchmark_cpu_info_);                                        \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
-      FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA,     \
-                       (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
-    }                                                                         \
-    memcpy(dst_argb_opt + OFF, src_argb,                                      \
-           kStrideA * kHeightA * (int)sizeof(TYPE_A));                        \
-    FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA,       \
-                     (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight);   \
-    for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) {     \
-      EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                              \
-    }                                                                         \
-    free_aligned_buffer_page_end(src_argb);                                   \
-    free_aligned_buffer_page_end(dst_argb_c);                                 \
-    free_aligned_buffer_page_end(dst_argb_opt);                               \
-  }
-
-#define TESTATOA(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,   \
-                 EPP_B, STRIDE_B, HEIGHT_B)                                 \
-  TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
-            STRIDE_B, HEIGHT_B, benchmark_width_, _Inplace, +, 0)
-
-TESTATOA(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
-TESTATOA(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOA(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
-#endif
-TESTATOA(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOA(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1)
-#endif
-TESTATOA(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOA(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
-TESTATOA(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-#endif
-TESTATOA(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOA(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
-#endif
-TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1)
-TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1)
-// TODO(fbarchard): Support in place for mirror.
-// TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
-TESTATOA(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
-TESTATOA(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
-TESTATOA(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
-TESTATOA(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
-TESTATOA(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
-TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
-TESTATOA(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
-TESTATOA(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
-#endif
-TESTATOA(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
-TESTATOA(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
-TESTATOA(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1)
-// TODO(fbarchard): Support in place for conversions that increase bpp.
-// TESTATOA(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
-// TESTATOA(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOA(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-// TESTATOA(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOA(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1)
-// TESTATOA(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1)
-// TESTATOA(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOA(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1)
-// TESTATOA(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
-// TESTATOA(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1)
-TESTATOA(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1)
-// TESTATOA(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOA(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
-// TESTATOA(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1)
-TESTATOA(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-// TESTATOA(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
-#endif
-TESTATOA(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-// TESTATOA(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
-// TESTATOA(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOA(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1)
-// TESTATOA(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
-// TESTATOA(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
-// TESTATOA(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
-// TESTATOA(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
-TESTATOA(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOA(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
-TESTATOA(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
-TESTATOA(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
-TESTATOA(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
-TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
-
-#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
-                   HEIGHT_B, W1280, N, NEG, OFF)                             \
-  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) {                   \
-    const int kWidth = W1280;                                                \
-    const int kHeight = benchmark_height_;                                   \
-    const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;     \
-    const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;     \
-    const int kStrideA =                                                     \
-        (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;               \
-    const int kStrideB =                                                     \
-        (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;               \
-    align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF);               \
-    align_buffer_page_end(dst_argb_c, kStrideB* kHeightB);                   \
-    align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB);                 \
-    for (int i = 0; i < kStrideA * kHeightA; ++i) {                          \
-      src_argb[i + OFF] = (fastrand() & 0xff);                               \
-    }                                                                        \
-    memset(dst_argb_c, 1, kStrideB* kHeightB);                               \
-    memset(dst_argb_opt, 101, kStrideB* kHeightB);                           \
-    MaskCpuFlags(disable_cpu_flags_);                                        \
-    FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_c, kStrideB, \
-                             NULL, kWidth, NEG kHeight);                     \
-    MaskCpuFlags(benchmark_cpu_info_);                                       \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                        \
-      FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_opt,       \
-                               kStrideB, NULL, kWidth, NEG kHeight);         \
-    }                                                                        \
-    for (int i = 0; i < kStrideB * kHeightB; ++i) {                          \
-      EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                             \
-    }                                                                        \
-    free_aligned_buffer_page_end(src_argb);                                  \
-    free_aligned_buffer_page_end(dst_argb_c);                                \
-    free_aligned_buffer_page_end(dst_argb_opt);                              \
-  }
-
-#define TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B,        \
-                        STRIDE_B, HEIGHT_B)                                    \
-  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither_Random) {                 \
-    for (int times = 0; times < benchmark_iterations_; ++times) {              \
-      const int kWidth = (fastrand() & 63) + 1;                                \
-      const int kHeight = (fastrand() & 31) + 1;                               \
-      const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;     \
-      const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;     \
-      const int kStrideA =                                                     \
-          (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;               \
-      const int kStrideB =                                                     \
-          (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;               \
-      align_buffer_page_end(src_argb, kStrideA* kHeightA);                     \
-      align_buffer_page_end(dst_argb_c, kStrideB* kHeightB);                   \
-      align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB);                 \
-      for (int i = 0; i < kStrideA * kHeightA; ++i) {                          \
-        src_argb[i] = (fastrand() & 0xff);                                     \
-      }                                                                        \
-      memset(dst_argb_c, 123, kStrideB* kHeightB);                             \
-      memset(dst_argb_opt, 123, kStrideB* kHeightB);                           \
-      MaskCpuFlags(disable_cpu_flags_);                                        \
-      FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_c, kStrideB, NULL, \
-                               kWidth, kHeight);                               \
-      MaskCpuFlags(benchmark_cpu_info_);                                       \
-      FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_opt, kStrideB,     \
-                               NULL, kWidth, kHeight);                         \
-      for (int i = 0; i < kStrideB * kHeightB; ++i) {                          \
-        EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                             \
-      }                                                                        \
-      free_aligned_buffer_page_end(src_argb);                                  \
-      free_aligned_buffer_page_end(dst_argb_c);                                \
-      free_aligned_buffer_page_end(dst_argb_opt);                              \
-    }                                                                          \
-  }
-
-#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
-                  HEIGHT_B)                                                 \
-  TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B,      \
-             HEIGHT_B, benchmark_width_ + 1, _Any, +, 0)                    \
-  TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B,      \
-             HEIGHT_B, benchmark_width_, _Unaligned, +, 2)                  \
-  TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B,      \
-             HEIGHT_B, benchmark_width_, _Invert, -, 0)                     \
-  TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B,      \
-             HEIGHT_B, benchmark_width_, _Opt, +, 0)                        \
-  TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
-                  HEIGHT_B)
-
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
-#endif
-
-// These conversions called twice, produce the original result.
-// e.g. endian swap twice.
-#define TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG,   \
-                 OFF)                                                          \
-  TEST_F(LibYUVConvertTest, FMT_ATOB##_Endswap##N) {                           \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = benchmark_height_;                                     \
-    const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;       \
-    const int kStrideA =                                                       \
-        (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;                 \
-    align_buffer_page_end(src_argb,                                            \
-                          kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF);       \
-    align_buffer_page_end(dst_argb_c, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
-    align_buffer_page_end(dst_argb_opt,                                        \
-                          kStrideA* kHeightA*(int)sizeof(TYPE_A));             \
-    for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) {      \
-      src_argb[i + OFF] = (fastrand() & 0xff);                                 \
-    }                                                                          \
-    memset(dst_argb_c, 1, kStrideA* kHeightA);                                 \
-    memset(dst_argb_opt, 101, kStrideA* kHeightA);                             \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_c,         \
-             kStrideA, kWidth, NEG kHeight);                                   \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_opt,     \
-               kStrideA, kWidth, NEG kHeight);                                 \
-    }                                                                          \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    FMT_ATOB((TYPE_A*)dst_argb_c, kStrideA, (TYPE_A*)dst_argb_c, kStrideA,     \
-             kWidth, NEG kHeight);                                             \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    FMT_ATOB((TYPE_A*)dst_argb_opt, kStrideA, (TYPE_A*)dst_argb_opt, kStrideA, \
-             kWidth, NEG kHeight);                                             \
-    for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) {      \
-      EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]);                           \
-      EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                               \
-    }                                                                          \
-    free_aligned_buffer_page_end(src_argb);                                    \
-    free_aligned_buffer_page_end(dst_argb_c);                                  \
-    free_aligned_buffer_page_end(dst_argb_opt);                                \
-  }
-
-#if defined(ENABLE_FULL_TESTS)
-#define TESTEND(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A)                  \
-  TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_ + 1, \
-           _Any, +, 0)                                                        \
-  TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_,     \
-           _Unaligned, +, 2)                                                  \
-  TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_,     \
-           _Opt, +, 0)
-#else
-#define TESTEND(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A)              \
-  TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \
-           _Opt, +, 0)
-#endif
-
-TESTEND(ARGBToBGRA, uint8_t, 4, 4, 1)
-TESTEND(ARGBToABGR, uint8_t, 4, 4, 1)
-TESTEND(BGRAToARGB, uint8_t, 4, 4, 1)
-TESTEND(ABGRToARGB, uint8_t, 4, 4, 1)
-TESTEND(AB64ToAR64, uint16_t, 4, 4, 1)
+#if !defined(LEAN_TESTS)
 
 #ifdef HAVE_JPEG
 TEST_F(LibYUVConvertTest, ValidateJpeg) {
@@ -2968,157 +1944,6 @@ TEST_F(LibYUVConvertTest, I420CropOddY) {
   free_aligned_buffer_page_end(src_y);
 }
 
-TEST_F(LibYUVConvertTest, TestYToARGB) {
-  uint8_t y[32];
-  uint8_t expectedg[32];
-  for (int i = 0; i < 32; ++i) {
-    y[i] = i * 5 + 17;
-    expectedg[i] = static_cast<int>((y[i] - 16) * 1.164f + 0.5f);
-  }
-  uint8_t argb[32 * 4];
-  YToARGB(y, 0, argb, 0, 32, 1);
-
-  for (int i = 0; i < 32; ++i) {
-    printf("%2d %d: %d <-> %d,%d,%d,%d\n", i, y[i], expectedg[i],
-           argb[i * 4 + 0], argb[i * 4 + 1], argb[i * 4 + 2], argb[i * 4 + 3]);
-  }
-  for (int i = 0; i < 32; ++i) {
-    EXPECT_EQ(expectedg[i], argb[i * 4 + 0]);
-  }
-}
-
-static const uint8_t kNoDither4x4[16] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-};
-
-TEST_F(LibYUVConvertTest, TestNoDither) {
-  align_buffer_page_end(src_argb, benchmark_width_ * benchmark_height_ * 4);
-  align_buffer_page_end(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
-  align_buffer_page_end(dst_rgb565dither,
-                        benchmark_width_ * benchmark_height_ * 2);
-  MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4);
-  MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
-  MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2);
-  ARGBToRGB565(src_argb, benchmark_width_ * 4, dst_rgb565, benchmark_width_ * 2,
-               benchmark_width_, benchmark_height_);
-  ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, dst_rgb565dither,
-                     benchmark_width_ * 2, kNoDither4x4, benchmark_width_,
-                     benchmark_height_);
-  for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
-    EXPECT_EQ(dst_rgb565[i], dst_rgb565dither[i]);
-  }
-
-  free_aligned_buffer_page_end(src_argb);
-  free_aligned_buffer_page_end(dst_rgb565);
-  free_aligned_buffer_page_end(dst_rgb565dither);
-}
-
-// Ordered 4x4 dither for 888 to 565.  Values from 0 to 7.
-static const uint8_t kDither565_4x4[16] = {
-    0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2,
-};
-
-TEST_F(LibYUVConvertTest, TestDither) {
-  align_buffer_page_end(src_argb, benchmark_width_ * benchmark_height_ * 4);
-  align_buffer_page_end(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
-  align_buffer_page_end(dst_rgb565dither,
-                        benchmark_width_ * benchmark_height_ * 2);
-  align_buffer_page_end(dst_argb, benchmark_width_ * benchmark_height_ * 4);
-  align_buffer_page_end(dst_argbdither,
-                        benchmark_width_ * benchmark_height_ * 4);
-  MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4);
-  MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
-  MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2);
-  MemRandomize(dst_argb, benchmark_width_ * benchmark_height_ * 4);
-  MemRandomize(dst_argbdither, benchmark_width_ * benchmark_height_ * 4);
-  ARGBToRGB565(src_argb, benchmark_width_ * 4, dst_rgb565, benchmark_width_ * 2,
-               benchmark_width_, benchmark_height_);
-  ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, dst_rgb565dither,
-                     benchmark_width_ * 2, kDither565_4x4, benchmark_width_,
-                     benchmark_height_);
-  RGB565ToARGB(dst_rgb565, benchmark_width_ * 2, dst_argb, benchmark_width_ * 4,
-               benchmark_width_, benchmark_height_);
-  RGB565ToARGB(dst_rgb565dither, benchmark_width_ * 2, dst_argbdither,
-               benchmark_width_ * 4, benchmark_width_, benchmark_height_);
-
-  for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
-    EXPECT_NEAR(dst_argb[i], dst_argbdither[i], 9);
-  }
-  free_aligned_buffer_page_end(src_argb);
-  free_aligned_buffer_page_end(dst_rgb565);
-  free_aligned_buffer_page_end(dst_rgb565dither);
-  free_aligned_buffer_page_end(dst_argb);
-  free_aligned_buffer_page_end(dst_argbdither);
-}
-
-#define TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                        YALIGN, W1280, N, NEG, OFF, FMT_C, BPP_C)              \
-  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) {                \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
-    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                      \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
-    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);             \
-    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                       \
-    align_buffer_page_end(src_u, kSizeUV + OFF);                               \
-    align_buffer_page_end(src_v, kSizeUV + OFF);                               \
-    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF);                \
-    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF);              \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
-      src_y[i + OFF] = (fastrand() & 0xff);                                    \
-    }                                                                          \
-    for (int i = 0; i < kSizeUV; ++i) {                                        \
-      src_u[i + OFF] = (fastrand() & 0xff);                                    \
-      src_v[i + OFF] = (fastrand() & 0xff);                                    \
-    }                                                                          \
-    memset(dst_argb_c + OFF, 1, kStrideB * kHeight);                           \
-    memset(dst_argb_opt + OFF, 101, kStrideB * kHeight);                       \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \
-                                  src_v + OFF, kStrideUV, dst_argb_c + OFF,    \
-                                  kStrideB, NULL, kWidth, NEG kHeight);        \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      FMT_PLANAR##To##FMT_B##Dither(                                           \
-          src_y + OFF, kWidth, src_u + OFF, kStrideUV, src_v + OFF, kStrideUV, \
-          dst_argb_opt + OFF, kStrideB, NULL, kWidth, NEG kHeight);            \
-    }                                                                          \
-    /* Convert to ARGB so 565 is expanded to bytes that can be compared. */    \
-    align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight);               \
-    align_buffer_page_end(dst_argb32_opt, kWidth* BPP_C* kHeight);             \
-    memset(dst_argb32_c, 2, kWidth* BPP_C* kHeight);                           \
-    memset(dst_argb32_opt, 102, kWidth* BPP_C* kHeight);                       \
-    FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, dst_argb32_c, kWidth * BPP_C, \
-                     kWidth, kHeight);                                         \
-    FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt,             \
-                     kWidth * BPP_C, kWidth, kHeight);                         \
-    for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) {                       \
-      EXPECT_EQ(dst_argb32_c[i], dst_argb32_opt[i]);                           \
-    }                                                                          \
-    free_aligned_buffer_page_end(src_y);                                       \
-    free_aligned_buffer_page_end(src_u);                                       \
-    free_aligned_buffer_page_end(src_v);                                       \
-    free_aligned_buffer_page_end(dst_argb_c);                                  \
-    free_aligned_buffer_page_end(dst_argb_opt);                                \
-    free_aligned_buffer_page_end(dst_argb32_c);                                \
-    free_aligned_buffer_page_end(dst_argb32_opt);                              \
-  }
-
-#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                       YALIGN, FMT_C, BPP_C)                                  \
-  TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C)     \
-  TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C)   \
-  TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Invert, -, 0, FMT_C, BPP_C)      \
-  TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
-
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
-#endif
-
 #define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12)                                \
   TEST_F(LibYUVConvertTest, NAME) {                                           \
     const int kWidth = benchmark_width_;                                      \
@@ -3224,1370 +2049,6 @@ TEST_F(LibYUVConvertTest, MM21ToYUY2) {
   free_aligned_buffer_page_end(golden_yuyv);
 }
 
-// Transitive test.  A to B to C is same as A to C.
-// Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere.
-#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
-                       W1280, N, NEG, OFF, FMT_C, BPP_C)                      \
-  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##To##FMT_C##N) {            \
-    const int kWidth = W1280;                                                 \
-    const int kHeight = benchmark_height_;                                    \
-    const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B;                    \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                       \
-    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);            \
-    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                      \
-    align_buffer_page_end(src_u, kSizeUV + OFF);                              \
-    align_buffer_page_end(src_v, kSizeUV + OFF);                              \
-    align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF);               \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
-      src_y[i + OFF] = (fastrand() & 0xff);                                   \
-    }                                                                         \
-    for (int i = 0; i < kSizeUV; ++i) {                                       \
-      src_u[i + OFF] = (fastrand() & 0xff);                                   \
-      src_v[i + OFF] = (fastrand() & 0xff);                                   \
-    }                                                                         \
-    memset(dst_argb_b + OFF, 1, kStrideB * kHeight);                          \
-    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,        \
-                          src_v + OFF, kStrideUV, dst_argb_b + OFF, kStrideB, \
-                          kWidth, NEG kHeight);                               \
-    /* Convert to a 3rd format in 1 step and 2 steps and compare  */          \
-    const int kStrideC = kWidth * BPP_C;                                      \
-    align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF);               \
-    align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF);              \
-    memset(dst_argb_c + OFF, 2, kStrideC * kHeight);                          \
-    memset(dst_argb_bc + OFF, 3, kStrideC * kHeight);                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
-      FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, src_u + OFF, kStrideUV,      \
-                            src_v + OFF, kStrideUV, dst_argb_c + OFF,         \
-                            kStrideC, kWidth, NEG kHeight);                   \
-      /* Convert B to C */                                                    \
-      FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF,         \
-                       kStrideC, kWidth, kHeight);                            \
-    }                                                                         \
-    for (int i = 0; i < kStrideC * kHeight; ++i) {                            \
-      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]);                   \
-    }                                                                         \
-    free_aligned_buffer_page_end(src_y);                                      \
-    free_aligned_buffer_page_end(src_u);                                      \
-    free_aligned_buffer_page_end(src_v);                                      \
-    free_aligned_buffer_page_end(dst_argb_b);                                 \
-    free_aligned_buffer_page_end(dst_argb_c);                                 \
-    free_aligned_buffer_page_end(dst_argb_bc);                                \
-  }
-
-#if defined(ENABLE_FULL_TESTS)
-#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
-                      FMT_C, BPP_C)                                          \
-  TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
-                 benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C)             \
-  TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
-                 benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C)           \
-  TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
-                 benchmark_width_, _Invert, -, 0, FMT_C, BPP_C)              \
-  TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
-                 benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
-#else
-#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
-                      FMT_C, BPP_C)                                          \
-  TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
-                 benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
-#endif
-
-#if defined(ENABLE_FULL_TESTS)
-TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ABGR, 4)
-TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3)
-TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB24, 3)
-TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3)
-TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3)
-TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4)
-TESTPLANARTOE(H420, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, ABGR, 4)
-TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3)
-TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RGB24, 3)
-TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4)
-TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3)
-TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, ARGB, 4)
-TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3)
-TESTPLANARTOE(J420, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(J420, 2, 2, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(U420, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(U420, 2, 2, ARGB, 1, 4, ARGB, 4)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
-TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
-TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
-TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2)
-#endif
-TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, ABGR, 4)
-TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(J422, 2, 1, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(J422, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(H422, 2, 1, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(H422, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(U422, 2, 1, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(U422, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(V422, 2, 1, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(V422, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ABGR, 4)
-TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(J444, 1, 1, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(J444, 1, 1, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(H444, 1, 1, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(H444, 1, 1, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(U444, 1, 1, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(U444, 1, 1, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(V444, 1, 1, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(V444, 1, 1, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4)
-TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4)
-TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4)
-#else
-TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
-TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
-TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3)
-TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB24, 3)
-TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
-TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3)
-TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3)
-TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4)
-TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2)
-TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4)
-TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4)
-TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4)
-#endif
-
-// Transitive test: Compare 1 step vs 2 step conversion for YUVA to ARGB.
-// Benchmark 2 step conversion for comparison to 1 step conversion.
-#define TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
-                        W1280, N, NEG, OFF, FMT_C, BPP_C, ATTEN)               \
-  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##To##FMT_C##N) {             \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = benchmark_height_;                                     \
-    const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B;                     \
-    const int kSizeUV =                                                        \
-        SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y);          \
-    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                       \
-    align_buffer_page_end(src_u, kSizeUV + OFF);                               \
-    align_buffer_page_end(src_v, kSizeUV + OFF);                               \
-    align_buffer_page_end(src_a, kWidth* kHeight + OFF);                       \
-    align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF);                \
-    const int kStrideC = kWidth * BPP_C;                                       \
-    align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF);                \
-    align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF);               \
-    memset(dst_argb_c + OFF, 2, kStrideC * kHeight);                           \
-    memset(dst_argb_b + OFF, 1, kStrideB * kHeight);                           \
-    memset(dst_argb_bc + OFF, 3, kStrideC * kHeight);                          \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
-      src_y[i + OFF] = (fastrand() & 0xff);                                    \
-      src_a[i + OFF] = (fastrand() & 0xff);                                    \
-    }                                                                          \
-    for (int i = 0; i < kSizeUV; ++i) {                                        \
-      src_u[i + OFF] = (fastrand() & 0xff);                                    \
-      src_v[i + OFF] = (fastrand() & 0xff);                                    \
-    }                                                                          \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      /* Convert A to B */                                                     \
-      FMT_PLANAR##To##FMT_B(                                                   \
-          src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),      \
-          src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth,      \
-          dst_argb_b + OFF, kStrideB, kWidth, NEG kHeight, ATTEN);             \
-      /* Convert B to C */                                                     \
-      FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF,          \
-                       kStrideC, kWidth, kHeight);                             \
-    }                                                                          \
-    /* Convert A to C */                                                       \
-    FMT_PLANAR##To##FMT_C(                                                     \
-        src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),        \
-        src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth,        \
-        dst_argb_c + OFF, kStrideC, kWidth, NEG kHeight, ATTEN);               \
-    for (int i = 0; i < kStrideC * kHeight; ++i) {                             \
-      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]);                    \
-    }                                                                          \
-    free_aligned_buffer_page_end(src_y);                                       \
-    free_aligned_buffer_page_end(src_u);                                       \
-    free_aligned_buffer_page_end(src_v);                                       \
-    free_aligned_buffer_page_end(src_a);                                       \
-    free_aligned_buffer_page_end(dst_argb_b);                                  \
-    free_aligned_buffer_page_end(dst_argb_c);                                  \
-    free_aligned_buffer_page_end(dst_argb_bc);                                 \
-  }
-
-#if defined(ENABLE_FULL_TESTS)
-#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
-                       FMT_C, BPP_C)                                          \
-  TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
-                  benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C, 0)          \
-  TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
-                  benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C, 0)        \
-  TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
-                  benchmark_width_, _Invert, -, 0, FMT_C, BPP_C, 0)           \
-  TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
-                  benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0)              \
-  TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
-                  benchmark_width_, _Premult, +, 0, FMT_C, BPP_C, 1)
-#else
-#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
-                       FMT_C, BPP_C)                                          \
-  TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B,      \
-                  benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0)
-#endif
-
-#if defined(ENABLE_FULL_TESTS)
-TESTQPLANARTOE(I420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(J420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(J420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(H420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(H420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(F420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(F420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(U420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(U420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(V420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(V420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(I422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(J422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(J422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(F422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(F422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(H422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(H422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(U422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(U422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(V422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(V422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(I444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(J444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(J444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(H444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(H444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(U444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(U444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(V444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
-TESTQPLANARTOE(V444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
-#else
-TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
-TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
-#endif
-
-#define TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, W1280, N, NEG, \
-                      OFF, FMT_C, BPP_C)                                       \
-  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##To##FMT_C##N) {                  \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = benchmark_height_;                                     \
-    const int kStrideA = SUBSAMPLE(kWidth, SUB_A) * BPP_A;                     \
-    const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B;                     \
-    align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF);                \
-    align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF);                \
-    MemRandomize(src_argb_a + OFF, kStrideA * kHeight);                        \
-    memset(dst_argb_b + OFF, 1, kStrideB * kHeight);                           \
-    FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB,   \
-                     kWidth, NEG kHeight);                                     \
-    /* Convert to a 3rd format in 1 step and 2 steps and compare  */           \
-    const int kStrideC = kWidth * BPP_C;                                       \
-    align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF);                \
-    align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF);               \
-    memset(dst_argb_c + OFF, 2, kStrideC * kHeight);                           \
-    memset(dst_argb_bc + OFF, 3, kStrideC * kHeight);                          \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      FMT_A##To##FMT_C(src_argb_a + OFF, kStrideA, dst_argb_c + OFF, kStrideC, \
-                       kWidth, NEG kHeight);                                   \
-      /* Convert B to C */                                                     \
-      FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF,          \
-                       kStrideC, kWidth, kHeight);                             \
-    }                                                                          \
-    for (int i = 0; i < kStrideC * kHeight; i += 4) {                          \
-      EXPECT_EQ(dst_argb_c[i + OFF + 0], dst_argb_bc[i + OFF + 0]);            \
-      EXPECT_EQ(dst_argb_c[i + OFF + 1], dst_argb_bc[i + OFF + 1]);            \
-      EXPECT_EQ(dst_argb_c[i + OFF + 2], dst_argb_bc[i + OFF + 2]);            \
-      EXPECT_NEAR(dst_argb_c[i + OFF + 3], dst_argb_bc[i + OFF + 3], 64);      \
-    }                                                                          \
-    free_aligned_buffer_page_end(src_argb_a);                                  \
-    free_aligned_buffer_page_end(dst_argb_b);                                  \
-    free_aligned_buffer_page_end(dst_argb_c);                                  \
-    free_aligned_buffer_page_end(dst_argb_bc);                                 \
-  }
-
-#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \
-  TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B,                    \
-                benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C)              \
-  TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_,  \
-                _Unaligned, +, 4, FMT_C, BPP_C)                              \
-  TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_,  \
-                _Invert, -, 0, FMT_C, BPP_C)                                 \
-  TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_,  \
-                _Opt, +, 0, FMT_C, BPP_C)
-
-// Caveat: Destination needs to be 4 bytes
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4)
-TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4)
-TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4)
-TESTPLANETOE(AR30, 1, 4, ABGR, 1, 4, ARGB, 4)
-TESTPLANETOE(ARGB, 1, 4, AB30, 1, 4, ARGB, 4)
-TESTPLANETOE(ABGR, 1, 4, AB30, 1, 4, ABGR, 4)
-TESTPLANETOE(AB30, 1, 4, ARGB, 1, 4, ABGR, 4)
-TESTPLANETOE(AB30, 1, 4, ABGR, 1, 4, ARGB, 4)
-#endif
-
-TEST_F(LibYUVConvertTest, RotateWithARGBSource) {
-  // 2x2 frames
-  uint32_t src[4];
-  uint32_t dst[4];
-  // some random input
-  src[0] = 0x11000000;
-  src[1] = 0x00450000;
-  src[2] = 0x00009f00;
-  src[3] = 0x000000ff;
-  // zeros on destination
-  dst[0] = 0x00000000;
-  dst[1] = 0x00000000;
-  dst[2] = 0x00000000;
-  dst[3] = 0x00000000;
-
-  int r = ConvertToARGB(reinterpret_cast<uint8_t*>(src),
-                        16,  // input size
-                        reinterpret_cast<uint8_t*>(dst),
-                        8,  // destination stride
-                        0,  // crop_x
-                        0,  // crop_y
-                        2,  // width
-                        2,  // height
-                        2,  // crop width
-                        2,  // crop height
-                        kRotate90, FOURCC_ARGB);
-
-  EXPECT_EQ(r, 0);
-  // 90 degrees rotation, no conversion
-  EXPECT_EQ(dst[0], src[2]);
-  EXPECT_EQ(dst[1], src[0]);
-  EXPECT_EQ(dst[2], src[3]);
-  EXPECT_EQ(dst[3], src[1]);
-}
-
-#ifdef HAS_ARGBTOAR30ROW_AVX2
-TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) {
-  // ARGBToAR30Row_AVX2 expects a multiple of 8 pixels.
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7;
-  align_buffer_page_end(src, kPixels * 4);
-  align_buffer_page_end(dst_opt, kPixels * 4);
-  align_buffer_page_end(dst_c, kPixels * 4);
-  MemRandomize(src, kPixels * 4);
-  memset(dst_opt, 0, kPixels * 4);
-  memset(dst_c, 1, kPixels * 4);
-
-  ARGBToAR30Row_C(src, dst_c, kPixels);
-
-  int has_avx2 = TestCpuFlag(kCpuHasAVX2);
-  int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
-  for (int i = 0; i < benchmark_iterations_; ++i) {
-    if (has_avx2) {
-      ARGBToAR30Row_AVX2(src, dst_opt, kPixels);
-    } else if (has_ssse3) {
-      ARGBToAR30Row_SSSE3(src, dst_opt, kPixels);
-    } else {
-      ARGBToAR30Row_C(src, dst_opt, kPixels);
-    }
-  }
-  for (int i = 0; i < kPixels * 4; ++i) {
-    EXPECT_EQ(dst_opt[i], dst_c[i]);
-  }
-
-  free_aligned_buffer_page_end(src);
-  free_aligned_buffer_page_end(dst_opt);
-  free_aligned_buffer_page_end(dst_c);
-}
-#endif  // HAS_ARGBTOAR30ROW_AVX2
-
-#ifdef HAS_ABGRTOAR30ROW_AVX2
-TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
-  // ABGRToAR30Row_AVX2 expects a multiple of 8 pixels.
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7;
-  align_buffer_page_end(src, kPixels * 4);
-  align_buffer_page_end(dst_opt, kPixels * 4);
-  align_buffer_page_end(dst_c, kPixels * 4);
-  MemRandomize(src, kPixels * 4);
-  memset(dst_opt, 0, kPixels * 4);
-  memset(dst_c, 1, kPixels * 4);
-
-  ABGRToAR30Row_C(src, dst_c, kPixels);
-
-  int has_avx2 = TestCpuFlag(kCpuHasAVX2);
-  int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
-  for (int i = 0; i < benchmark_iterations_; ++i) {
-    if (has_avx2) {
-      ABGRToAR30Row_AVX2(src, dst_opt, kPixels);
-    } else if (has_ssse3) {
-      ABGRToAR30Row_SSSE3(src, dst_opt, kPixels);
-    } else {
-      ABGRToAR30Row_C(src, dst_opt, kPixels);
-    }
-  }
-  for (int i = 0; i < kPixels * 4; ++i) {
-    EXPECT_EQ(dst_opt[i], dst_c[i]);
-  }
-
-  free_aligned_buffer_page_end(src);
-  free_aligned_buffer_page_end(dst_opt);
-  free_aligned_buffer_page_end(dst_c);
-}
-#endif  // HAS_ABGRTOAR30ROW_AVX2
-
-// Provide matrix wrappers for 12 bit YUV
-#define I012ToARGB(a, b, c, d, e, f, g, h, i, j) \
-  I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
-#define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \
-  I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
-#define I012ToAB30(a, b, c, d, e, f, g, h, i, j) \
-  I012ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
-
-#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \
-  I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
-#define I410ToABGR(a, b, c, d, e, f, g, h, i, j) \
-  I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
-#define H410ToARGB(a, b, c, d, e, f, g, h, i, j) \
-  I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
-#define H410ToABGR(a, b, c, d, e, f, g, h, i, j) \
-  I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
-#define U410ToARGB(a, b, c, d, e, f, g, h, i, j) \
-  I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
-#define U410ToABGR(a, b, c, d, e, f, g, h, i, j) \
-  I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
-#define I410ToAR30(a, b, c, d, e, f, g, h, i, j) \
-  I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
-#define I410ToAB30(a, b, c, d, e, f, g, h, i, j) \
-  I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
-#define H410ToAR30(a, b, c, d, e, f, g, h, i, j) \
-  I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
-#define H410ToAB30(a, b, c, d, e, f, g, h, i, j) \
-  I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
-#define U410ToAR30(a, b, c, d, e, f, g, h, i, j) \
-  I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
-#define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \
-  I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
-
-#define I010ToARGBFilter(a, b, c, d, e, f, g, h, i, j)                     \
-  I010ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
-                         kFilterBilinear)
-#define I010ToAR30Filter(a, b, c, d, e, f, g, h, i, j)                     \
-  I010ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
-                         kFilterBilinear)
-#define I210ToARGBFilter(a, b, c, d, e, f, g, h, i, j)                     \
-  I210ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
-                         kFilterBilinear)
-#define I210ToAR30Filter(a, b, c, d, e, f, g, h, i, j)                     \
-  I210ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
-                         kFilterBilinear)
-
-// TODO(fbarchard): Fix clamping issue affected by U channel.
-#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B,   \
-                         BPP_B, ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF)     \
-  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                       \
-    const int kWidth = W1280;                                                 \
-    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                  \
-    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                     \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                       \
-    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);            \
-    const int kBpc = 2;                                                       \
-    align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF);               \
-    align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF);                       \
-    align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF);                       \
-    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF);              \
-    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF);            \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
-      reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & FMT_MASK); \
-    }                                                                         \
-    for (int i = 0; i < kSizeUV; ++i) {                                       \
-      reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & FMT_MASK); \
-      reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & FMT_MASK); \
-    }                                                                         \
-    memset(dst_argb_c + DOFF, 1, kStrideB * kHeight);                         \
-    memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight);                     \
-    MaskCpuFlags(disable_cpu_flags_);                                         \
-    FMT_PLANAR##To##FMT_B(                                                    \
-        reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth,                    \
-        reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV,                 \
-        reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV,                 \
-        dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight);                    \
-    MaskCpuFlags(benchmark_cpu_info_);                                        \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
-      FMT_PLANAR##To##FMT_B(                                                  \
-          reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth,                  \
-          reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV,               \
-          reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV,               \
-          dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight);                \
-    }                                                                         \
-    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                      \
-      EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]);                \
-    }                                                                         \
-    free_aligned_buffer_page_end(src_y);                                      \
-    free_aligned_buffer_page_end(src_u);                                      \
-    free_aligned_buffer_page_end(src_v);                                      \
-    free_aligned_buffer_page_end(dst_argb_c);                                 \
-    free_aligned_buffer_page_end(dst_argb_opt);                               \
-  }
-
-#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B,   \
-                        BPP_B, ALIGN, YALIGN)                                \
-  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
-                   ALIGN, YALIGN, benchmark_width_ + 1, _Any, +, 0, 0)       \
-  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
-                   ALIGN, YALIGN, benchmark_width_, _Unaligned, +, 4, 4)     \
-  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
-                   ALIGN, YALIGN, benchmark_width_, _Invert, -, 0, 0)        \
-  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
-                   ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0)
-
-// These conversions are only optimized for x86
-#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
-TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(I012, 2, 2, 0xfff, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGBFilter, 4, 4, 1)
-TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGBFilter, 4, 4, 1)
-
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30, 4, 4, 1)
-TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AB30, 4, 4, 1)
-TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AR30, 4, 4, 1)
-TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AB30, 4, 4, 1)
-TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AR30, 4, 4, 1)
-TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AB30, 4, 4, 1)
-TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30, 4, 4, 1)
-TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AB30, 4, 4, 1)
-TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AR30, 4, 4, 1)
-TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AB30, 4, 4, 1)
-TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AR30, 4, 4, 1)
-TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AB30, 4, 4, 1)
-TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AR30, 4, 4, 1)
-TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AB30, 4, 4, 1)
-TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AR30, 4, 4, 1)
-TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1)
-TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1)
-TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1)
-TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1)
-TESTPLANAR16TOB(I012, 2, 2, 0xfff, AB30, 4, 4, 1)
-TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30Filter, 4, 4, 1)
-TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1)
-#endif  // LITTLE_ENDIAN_ONLY_TEST
-#endif  // DISABLE_SLOW_TESTS
-
-#define TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,      \
-                          ALIGN, YALIGN, W1280, N, NEG, OFF, ATTEN, S_DEPTH)   \
-  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
-    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                      \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
-    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);             \
-    const int kBpc = 2;                                                        \
-    align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF);                 \
-    align_buffer_page_end(src_u, kSizeUV* kBpc + OFF);                         \
-    align_buffer_page_end(src_v, kSizeUV* kBpc + OFF);                         \
-    align_buffer_page_end(src_a, kWidth* kHeight* kBpc + OFF);                 \
-    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF);                \
-    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF);              \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
-      reinterpret_cast<uint16_t*>(src_y + OFF)[i] =                            \
-          (fastrand() & ((1 << S_DEPTH) - 1));                                 \
-      reinterpret_cast<uint16_t*>(src_a + OFF)[i] =                            \
-          (fastrand() & ((1 << S_DEPTH) - 1));                                 \
-    }                                                                          \
-    for (int i = 0; i < kSizeUV; ++i) {                                        \
-      reinterpret_cast<uint16_t*>(src_u + OFF)[i] =                            \
-          (fastrand() & ((1 << S_DEPTH) - 1));                                 \
-      reinterpret_cast<uint16_t*>(src_v + OFF)[i] =                            \
-          (fastrand() & ((1 << S_DEPTH) - 1));                                 \
-    }                                                                          \
-    memset(dst_argb_c + OFF, 1, kStrideB * kHeight);                           \
-    memset(dst_argb_opt + OFF, 101, kStrideB * kHeight);                       \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + OFF), kWidth,    \
-                          reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV, \
-                          reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV, \
-                          reinterpret_cast<uint16_t*>(src_a + OFF), kWidth,    \
-                          dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight,     \
-                          ATTEN);                                              \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      FMT_PLANAR##To##FMT_B(                                                   \
-          reinterpret_cast<uint16_t*>(src_y + OFF), kWidth,                    \
-          reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV,                 \
-          reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV,                 \
-          reinterpret_cast<uint16_t*>(src_a + OFF), kWidth,                    \
-          dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, ATTEN);           \
-    }                                                                          \
-    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                       \
-      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]);                   \
-    }                                                                          \
-    free_aligned_buffer_page_end(src_y);                                       \
-    free_aligned_buffer_page_end(src_u);                                       \
-    free_aligned_buffer_page_end(src_v);                                       \
-    free_aligned_buffer_page_end(src_a);                                       \
-    free_aligned_buffer_page_end(dst_argb_c);                                  \
-    free_aligned_buffer_page_end(dst_argb_opt);                                \
-  }
-
-#if defined(ENABLE_FULL_TESTS)
-#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,    \
-                         ALIGN, YALIGN, S_DEPTH)                            \
-  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                    YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH)   \
-  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                    YALIGN, benchmark_width_, _Unaligned, +, 2, 0, S_DEPTH) \
-  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                    YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH)    \
-  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                    YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)       \
-  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                    YALIGN, benchmark_width_, _Premult, +, 0, 1, S_DEPTH)
-#else
-#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,   \
-                         ALIGN, YALIGN, S_DEPTH)                           \
-  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                    YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
-#endif
-
-#define I010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
-                        l, m)
-#define I010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
-                        l, m)
-#define J010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define J010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define F010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define F010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define H010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define H010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define U010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define U010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define V010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define V010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define I210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
-                        l, m)
-#define I210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
-                        l, m)
-#define J210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define J210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define F210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define F210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define H210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define H210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define U210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define U210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define V210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define V210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define I410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
-                        l, m)
-#define I410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
-                        l, m)
-#define J410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define J410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define F410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define F410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define H410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define H410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define U410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define U410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define V410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define V410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define I010AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
-  I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j,          \
-                              &kYuvI601Constants, k, l, m, kFilterBilinear)
-#define I210AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
-  I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j,          \
-                              &kYuvI601Constants, k, l, m, kFilterBilinear)
-
-// These conversions are only optimized for x86
-#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
-TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(I010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(J010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(J010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(H010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(H010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(F010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(F010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(U010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(U010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(V010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(V010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(I210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(J210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(J210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(H210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(H210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(F210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(F210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(U210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(U210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(V210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(V210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(I410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(I410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(J410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(J410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(H410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(H410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(F410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(F410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(U410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(U410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(V410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
-TESTQPLANAR16TOB(V410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
-TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10)
-TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
-#endif  // DISABLE_SLOW_TESTS
-
-#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,    \
-                     YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH)               \
-  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
-    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                      \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2;                    \
-    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2;         \
-    const int kBpc = 2;                                                        \
-    align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF);                \
-    align_buffer_page_end(src_uv, kSizeUV* kBpc + SOFF);                       \
-    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF);               \
-    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF);             \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
-      reinterpret_cast<uint16_t*>(src_y + SOFF)[i] =                           \
-          (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH)));                 \
-    }                                                                          \
-    for (int i = 0; i < kSizeUV; ++i) {                                        \
-      reinterpret_cast<uint16_t*>(src_uv + SOFF)[i] =                          \
-          (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH)));                 \
-    }                                                                          \
-    memset(dst_argb_c + DOFF, 1, kStrideB * kHeight);                          \
-    memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight);                      \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth,   \
-                          reinterpret_cast<uint16_t*>(src_uv + SOFF),          \
-                          kStrideUV, dst_argb_c + DOFF, kStrideB, kWidth,      \
-                          NEG kHeight);                                        \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
-                            reinterpret_cast<uint16_t*>(src_uv + SOFF),        \
-                            kStrideUV, dst_argb_opt + DOFF, kStrideB, kWidth,  \
-                            NEG kHeight);                                      \
-    }                                                                          \
-    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                       \
-      EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]);                 \
-    }                                                                          \
-    free_aligned_buffer_page_end(src_y);                                       \
-    free_aligned_buffer_page_end(src_uv);                                      \
-    free_aligned_buffer_page_end(dst_argb_c);                                  \
-    free_aligned_buffer_page_end(dst_argb_opt);                                \
-  }
-
-#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,    \
-                    YALIGN, S_DEPTH)                                          \
-  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
-               benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH)                  \
-  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
-               benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH)                \
-  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
-               benchmark_width_, _Invert, -, 0, 0, S_DEPTH)                   \
-  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
-               benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
-
-#define P010ToARGB(a, b, c, d, e, f, g, h) \
-  P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
-#define P210ToARGB(a, b, c, d, e, f, g, h) \
-  P210ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
-#define P010ToAR30(a, b, c, d, e, f, g, h) \
-  P010ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
-#define P210ToAR30(a, b, c, d, e, f, g, h) \
-  P210ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
-
-#define P012ToARGB(a, b, c, d, e, f, g, h) \
-  P012ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
-#define P212ToARGB(a, b, c, d, e, f, g, h) \
-  P212ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
-#define P012ToAR30(a, b, c, d, e, f, g, h) \
-  P012ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
-#define P212ToAR30(a, b, c, d, e, f, g, h) \
-  P212ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
-
-#define P016ToARGB(a, b, c, d, e, f, g, h) \
-  P016ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
-#define P216ToARGB(a, b, c, d, e, f, g, h) \
-  P216ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
-#define P016ToAR30(a, b, c, d, e, f, g, h) \
-  P016ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
-#define P216ToAR30(a, b, c, d, e, f, g, h) \
-  P216ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
-
-#define P010ToARGBFilter(a, b, c, d, e, f, g, h)                     \
-  P010ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \
-                         kFilterBilinear)
-#define P210ToARGBFilter(a, b, c, d, e, f, g, h)                     \
-  P210ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \
-                         kFilterBilinear)
-#define P010ToAR30Filter(a, b, c, d, e, f, g, h)                     \
-  P010ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \
-                         kFilterBilinear)
-#define P210ToAR30Filter(a, b, c, d, e, f, g, h)                     \
-  P210ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \
-                         kFilterBilinear)
-
-#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
-TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
-TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
-TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
-TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
-TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
-TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
-TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
-TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
-TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
-TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
-TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
-TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
-TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
-TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
-TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
-#endif  // LITTLE_ENDIAN_ONLY_TEST
-#endif  // DISABLE_SLOW_TESTS
-
-static int Clamp(int y) {
-  if (y < 0) {
-    y = 0;
-  }
-  if (y > 255) {
-    y = 255;
-  }
-  return y;
-}
-
-static int Clamp10(int y) {
-  if (y < 0) {
-    y = 0;
-  }
-  if (y > 1023) {
-    y = 1023;
-  }
-  return y;
-}
-
-// Test 8 bit YUV to 8 bit RGB
-TEST_F(LibYUVConvertTest, TestH420ToARGB) {
-  const int kSize = 256;
-  int histogram_b[256];
-  int histogram_g[256];
-  int histogram_r[256];
-  memset(histogram_b, 0, sizeof(histogram_b));
-  memset(histogram_g, 0, sizeof(histogram_g));
-  memset(histogram_r, 0, sizeof(histogram_r));
-  align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2);
-  align_buffer_page_end(argb_pixels, kSize * 4);
-  uint8_t* orig_y = orig_yuv;
-  uint8_t* orig_u = orig_y + kSize;
-  uint8_t* orig_v = orig_u + kSize / 2;
-
-  // Test grey scale
-  for (int i = 0; i < kSize; ++i) {
-    orig_y[i] = i;
-  }
-  for (int i = 0; i < kSize / 2; ++i) {
-    orig_u[i] = 128;  // 128 is 0.
-    orig_v[i] = 128;
-  }
-
-  H420ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1);
-
-  for (int i = 0; i < kSize; ++i) {
-    int b = argb_pixels[i * 4 + 0];
-    int g = argb_pixels[i * 4 + 1];
-    int r = argb_pixels[i * 4 + 2];
-    int a = argb_pixels[i * 4 + 3];
-    ++histogram_b[b];
-    ++histogram_g[g];
-    ++histogram_r[r];
-    // Reference formula for Y channel contribution in YUV to RGB conversions:
-    int expected_y = Clamp(static_cast<int>((i - 16) * 1.164f + 0.5f));
-    EXPECT_EQ(b, expected_y);
-    EXPECT_EQ(g, expected_y);
-    EXPECT_EQ(r, expected_y);
-    EXPECT_EQ(a, 255);
-  }
-
-  int count_b = 0;
-  int count_g = 0;
-  int count_r = 0;
-  for (int i = 0; i < kSize; ++i) {
-    if (histogram_b[i]) {
-      ++count_b;
-    }
-    if (histogram_g[i]) {
-      ++count_g;
-    }
-    if (histogram_r[i]) {
-      ++count_r;
-    }
-  }
-  printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
-
-  free_aligned_buffer_page_end(orig_yuv);
-  free_aligned_buffer_page_end(argb_pixels);
-}
-
-// Test 10 bit YUV to 8 bit RGB
-TEST_F(LibYUVConvertTest, TestH010ToARGB) {
-  const int kSize = 1024;
-  int histogram_b[1024];
-  int histogram_g[1024];
-  int histogram_r[1024];
-  memset(histogram_b, 0, sizeof(histogram_b));
-  memset(histogram_g, 0, sizeof(histogram_g));
-  memset(histogram_r, 0, sizeof(histogram_r));
-  align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2);
-  align_buffer_page_end(argb_pixels, kSize * 4);
-  uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv);
-  uint16_t* orig_u = orig_y + kSize;
-  uint16_t* orig_v = orig_u + kSize / 2;
-
-  // Test grey scale
-  for (int i = 0; i < kSize; ++i) {
-    orig_y[i] = i;
-  }
-  for (int i = 0; i < kSize / 2; ++i) {
-    orig_u[i] = 512;  // 512 is 0.
-    orig_v[i] = 512;
-  }
-
-  H010ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1);
-
-  for (int i = 0; i < kSize; ++i) {
-    int b = argb_pixels[i * 4 + 0];
-    int g = argb_pixels[i * 4 + 1];
-    int r = argb_pixels[i * 4 + 2];
-    int a = argb_pixels[i * 4 + 3];
-    ++histogram_b[b];
-    ++histogram_g[g];
-    ++histogram_r[r];
-    int expected_y = Clamp(static_cast<int>((i - 64) * 1.164f / 4));
-    EXPECT_NEAR(b, expected_y, 1);
-    EXPECT_NEAR(g, expected_y, 1);
-    EXPECT_NEAR(r, expected_y, 1);
-    EXPECT_EQ(a, 255);
-  }
-
-  int count_b = 0;
-  int count_g = 0;
-  int count_r = 0;
-  for (int i = 0; i < kSize; ++i) {
-    if (histogram_b[i]) {
-      ++count_b;
-    }
-    if (histogram_g[i]) {
-      ++count_g;
-    }
-    if (histogram_r[i]) {
-      ++count_r;
-    }
-  }
-  printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
-
-  free_aligned_buffer_page_end(orig_yuv);
-  free_aligned_buffer_page_end(argb_pixels);
-}
-
-// Test 10 bit YUV to 10 bit RGB
-// Caveat: Result is near due to float rounding in expected
-// result.
-TEST_F(LibYUVConvertTest, TestH010ToAR30) {
-  const int kSize = 1024;
-  int histogram_b[1024];
-  int histogram_g[1024];
-  int histogram_r[1024];
-  memset(histogram_b, 0, sizeof(histogram_b));
-  memset(histogram_g, 0, sizeof(histogram_g));
-  memset(histogram_r, 0, sizeof(histogram_r));
-
-  align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2);
-  align_buffer_page_end(ar30_pixels, kSize * 4);
-  uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv);
-  uint16_t* orig_u = orig_y + kSize;
-  uint16_t* orig_v = orig_u + kSize / 2;
-
-  // Test grey scale
-  for (int i = 0; i < kSize; ++i) {
-    orig_y[i] = i;
-  }
-  for (int i = 0; i < kSize / 2; ++i) {
-    orig_u[i] = 512;  // 512 is 0.
-    orig_v[i] = 512;
-  }
-
-  H010ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1);
-
-  for (int i = 0; i < kSize; ++i) {
-    int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023;
-    int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023;
-    int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023;
-    int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3;
-    ++histogram_b[b10];
-    ++histogram_g[g10];
-    ++histogram_r[r10];
-    int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f + 0.5));
-    EXPECT_NEAR(b10, expected_y, 4);
-    EXPECT_NEAR(g10, expected_y, 4);
-    EXPECT_NEAR(r10, expected_y, 4);
-    EXPECT_EQ(a2, 3);
-  }
-
-  int count_b = 0;
-  int count_g = 0;
-  int count_r = 0;
-  for (int i = 0; i < kSize; ++i) {
-    if (histogram_b[i]) {
-      ++count_b;
-    }
-    if (histogram_g[i]) {
-      ++count_g;
-    }
-    if (histogram_r[i]) {
-      ++count_r;
-    }
-  }
-  printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
-
-  free_aligned_buffer_page_end(orig_yuv);
-  free_aligned_buffer_page_end(ar30_pixels);
-}
-
-// Test 10 bit YUV to 10 bit RGB
-// Caveat: Result is near due to float rounding in expected
-// result.
-TEST_F(LibYUVConvertTest, TestH010ToAB30) {
-  const int kSize = 1024;
-  int histogram_b[1024];
-  int histogram_g[1024];
-  int histogram_r[1024];
-  memset(histogram_b, 0, sizeof(histogram_b));
-  memset(histogram_g, 0, sizeof(histogram_g));
-  memset(histogram_r, 0, sizeof(histogram_r));
-
-  align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2);
-  align_buffer_page_end(ab30_pixels, kSize * 4);
-  uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv);
-  uint16_t* orig_u = orig_y + kSize;
-  uint16_t* orig_v = orig_u + kSize / 2;
-
-  // Test grey scale
-  for (int i = 0; i < kSize; ++i) {
-    orig_y[i] = i;
-  }
-  for (int i = 0; i < kSize / 2; ++i) {
-    orig_u[i] = 512;  // 512 is 0.
-    orig_v[i] = 512;
-  }
-
-  H010ToAB30(orig_y, 0, orig_u, 0, orig_v, 0, ab30_pixels, 0, kSize, 1);
-
-  for (int i = 0; i < kSize; ++i) {
-    int r10 = reinterpret_cast<uint32_t*>(ab30_pixels)[i] & 1023;
-    int g10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 10) & 1023;
-    int b10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 20) & 1023;
-    int a2 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 30) & 3;
-    ++histogram_b[b10];
-    ++histogram_g[g10];
-    ++histogram_r[r10];
-    int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f));
-    EXPECT_NEAR(b10, expected_y, 4);
-    EXPECT_NEAR(g10, expected_y, 4);
-    EXPECT_NEAR(r10, expected_y, 4);
-    EXPECT_EQ(a2, 3);
-  }
-
-  int count_b = 0;
-  int count_g = 0;
-  int count_r = 0;
-  for (int i = 0; i < kSize; ++i) {
-    if (histogram_b[i]) {
-      ++count_b;
-    }
-    if (histogram_g[i]) {
-      ++count_g;
-    }
-    if (histogram_r[i]) {
-      ++count_r;
-    }
-  }
-  printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
-
-  free_aligned_buffer_page_end(orig_yuv);
-  free_aligned_buffer_page_end(ab30_pixels);
-}
-
-// Test 8 bit YUV to 10 bit RGB
-TEST_F(LibYUVConvertTest, TestH420ToAR30) {
-  const int kSize = 256;
-  const int kHistSize = 1024;
-  int histogram_b[kHistSize];
-  int histogram_g[kHistSize];
-  int histogram_r[kHistSize];
-  memset(histogram_b, 0, sizeof(histogram_b));
-  memset(histogram_g, 0, sizeof(histogram_g));
-  memset(histogram_r, 0, sizeof(histogram_r));
-  align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2);
-  align_buffer_page_end(ar30_pixels, kSize * 4);
-  uint8_t* orig_y = orig_yuv;
-  uint8_t* orig_u = orig_y + kSize;
-  uint8_t* orig_v = orig_u + kSize / 2;
-
-  // Test grey scale
-  for (int i = 0; i < kSize; ++i) {
-    orig_y[i] = i;
-  }
-  for (int i = 0; i < kSize / 2; ++i) {
-    orig_u[i] = 128;  // 128 is 0.
-    orig_v[i] = 128;
-  }
-
-  H420ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1);
-
-  for (int i = 0; i < kSize; ++i) {
-    int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023;
-    int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023;
-    int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023;
-    int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3;
-    ++histogram_b[b10];
-    ++histogram_g[g10];
-    ++histogram_r[r10];
-    int expected_y = Clamp10(static_cast<int>((i - 16) * 1.164f * 4.f));
-    EXPECT_NEAR(b10, expected_y, 4);
-    EXPECT_NEAR(g10, expected_y, 4);
-    EXPECT_NEAR(r10, expected_y, 4);
-    EXPECT_EQ(a2, 3);
-  }
-
-  int count_b = 0;
-  int count_g = 0;
-  int count_r = 0;
-  for (int i = 0; i < kHistSize; ++i) {
-    if (histogram_b[i]) {
-      ++count_b;
-    }
-    if (histogram_g[i]) {
-      ++count_g;
-    }
-    if (histogram_r[i]) {
-      ++count_r;
-    }
-  }
-  printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
-
-  free_aligned_buffer_page_end(orig_yuv);
-  free_aligned_buffer_page_end(ar30_pixels);
-}
-
-// Test I400 with jpeg matrix is same as J400
-TEST_F(LibYUVConvertTest, TestI400) {
-  const int kSize = 256;
-  align_buffer_page_end(orig_i400, kSize);
-  align_buffer_page_end(argb_pixels_i400, kSize * 4);
-  align_buffer_page_end(argb_pixels_j400, kSize * 4);
-  align_buffer_page_end(argb_pixels_jpeg_i400, kSize * 4);
-  align_buffer_page_end(argb_pixels_h709_i400, kSize * 4);
-  align_buffer_page_end(argb_pixels_2020_i400, kSize * 4);
-
-  // Test grey scale
-  for (int i = 0; i < kSize; ++i) {
-    orig_i400[i] = i;
-  }
-
-  J400ToARGB(orig_i400, 0, argb_pixels_j400, 0, kSize, 1);
-  I400ToARGB(orig_i400, 0, argb_pixels_i400, 0, kSize, 1);
-  I400ToARGBMatrix(orig_i400, 0, argb_pixels_jpeg_i400, 0, &kYuvJPEGConstants,
-                   kSize, 1);
-  I400ToARGBMatrix(orig_i400, 0, argb_pixels_h709_i400, 0, &kYuvH709Constants,
-                   kSize, 1);
-  I400ToARGBMatrix(orig_i400, 0, argb_pixels_2020_i400, 0, &kYuv2020Constants,
-                   kSize, 1);
-
-  EXPECT_EQ(0, argb_pixels_i400[0]);
-  EXPECT_EQ(0, argb_pixels_j400[0]);
-  EXPECT_EQ(0, argb_pixels_jpeg_i400[0]);
-  EXPECT_EQ(0, argb_pixels_h709_i400[0]);
-  EXPECT_EQ(0, argb_pixels_2020_i400[0]);
-  EXPECT_EQ(0, argb_pixels_i400[16 * 4]);
-  EXPECT_EQ(16, argb_pixels_j400[16 * 4]);
-  EXPECT_EQ(16, argb_pixels_jpeg_i400[16 * 4]);
-  EXPECT_EQ(0, argb_pixels_h709_i400[16 * 4]);
-  EXPECT_EQ(0, argb_pixels_2020_i400[16 * 4]);
-  EXPECT_EQ(130, argb_pixels_i400[128 * 4]);
-  EXPECT_EQ(128, argb_pixels_j400[128 * 4]);
-  EXPECT_EQ(128, argb_pixels_jpeg_i400[128 * 4]);
-  EXPECT_EQ(130, argb_pixels_h709_i400[128 * 4]);
-  EXPECT_EQ(130, argb_pixels_2020_i400[128 * 4]);
-  EXPECT_EQ(255, argb_pixels_i400[255 * 4]);
-  EXPECT_EQ(255, argb_pixels_j400[255 * 4]);
-  EXPECT_EQ(255, argb_pixels_jpeg_i400[255 * 4]);
-  EXPECT_EQ(255, argb_pixels_h709_i400[255 * 4]);
-  EXPECT_EQ(255, argb_pixels_2020_i400[255 * 4]);
-
-  for (int i = 0; i < kSize * 4; ++i) {
-    if ((i & 3) == 3) {
-      EXPECT_EQ(255, argb_pixels_j400[i]);
-    } else {
-      EXPECT_EQ(i / 4, argb_pixels_j400[i]);
-    }
-    EXPECT_EQ(argb_pixels_jpeg_i400[i], argb_pixels_j400[i]);
-  }
-
-  free_aligned_buffer_page_end(orig_i400);
-  free_aligned_buffer_page_end(argb_pixels_i400);
-  free_aligned_buffer_page_end(argb_pixels_j400);
-  free_aligned_buffer_page_end(argb_pixels_jpeg_i400);
-  free_aligned_buffer_page_end(argb_pixels_h709_i400);
-  free_aligned_buffer_page_end(argb_pixels_2020_i400);
-}
-
-// Test RGB24 to ARGB and back to RGB24
-TEST_F(LibYUVConvertTest, TestARGBToRGB24) {
-  const int kSize = 256;
-  align_buffer_page_end(orig_rgb24, kSize * 3);
-  align_buffer_page_end(argb_pixels, kSize * 4);
-  align_buffer_page_end(dest_rgb24, kSize * 3);
-
-  // Test grey scale
-  for (int i = 0; i < kSize * 3; ++i) {
-    orig_rgb24[i] = i;
-  }
-
-  RGB24ToARGB(orig_rgb24, 0, argb_pixels, 0, kSize, 1);
-  ARGBToRGB24(argb_pixels, 0, dest_rgb24, 0, kSize, 1);
-
-  for (int i = 0; i < kSize * 3; ++i) {
-    EXPECT_EQ(orig_rgb24[i], dest_rgb24[i]);
-  }
-
-  free_aligned_buffer_page_end(orig_rgb24);
-  free_aligned_buffer_page_end(argb_pixels);
-  free_aligned_buffer_page_end(dest_rgb24);
-}
-
-TEST_F(LibYUVConvertTest, Test565) {
-  SIMD_ALIGNED(uint8_t orig_pixels[256][4]);
-  SIMD_ALIGNED(uint8_t pixels565[256][2]);
-
-  for (int i = 0; i < 256; ++i) {
-    for (int j = 0; j < 4; ++j) {
-      orig_pixels[i][j] = i;
-    }
-  }
-  ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1);
-  uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381);
-  EXPECT_EQ(610919429u, checksum);
-}
-
 // Test RGB24 to J420 is exact
 #if defined(LIBYUV_BIT_EXACT)
 TEST_F(LibYUVConvertTest, TestRGB24ToJ420) {
@@ -4644,4 +2105,6 @@ TEST_F(LibYUVConvertTest, TestRGB24ToI420) {
 }
 #endif
 
+#endif  // !defined(LEAN_TESTS)
+
 }  // namespace libyuv
diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc
index 431343e3..437b6632 100644
--- a/unit_test/cpu_test.cc
+++ b/unit_test/cpu_test.cc
@@ -47,14 +47,15 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
   int has_erms = TestCpuFlag(kCpuHasERMS);
   int has_fma3 = TestCpuFlag(kCpuHasFMA3);
   int has_f16c = TestCpuFlag(kCpuHasF16C);
-  int has_gfni = TestCpuFlag(kCpuHasGFNI);
   int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
   int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL);
   int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI);
   int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI);
   int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
   int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
-  int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ);
+  int has_avx10 = TestCpuFlag(kCpuHasAVX10);
+  int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI);
+  int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8);
   printf("Has X86 0x%x\n", has_x86);
   printf("Has SSE2 0x%x\n", has_sse2);
   printf("Has SSSE3 0x%x\n", has_ssse3);
@@ -65,14 +66,15 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
   printf("Has ERMS 0x%x\n", has_erms);
   printf("Has FMA3 0x%x\n", has_fma3);
   printf("Has F16C 0x%x\n", has_f16c);
-  printf("Has GFNI 0x%x\n", has_gfni);
   printf("Has AVX512BW 0x%x\n", has_avx512bw);
   printf("Has AVX512VL 0x%x\n", has_avx512vl);
   printf("Has AVX512VNNI 0x%x\n", has_avx512vnni);
   printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi);
   printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2);
   printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg);
-  printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq);
+  printf("Has AVX10 0x%x\n", has_avx10);
+  printf("HAS AVXVNNI 0x%x\n", has_avxvnni);
+  printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
 #endif
 #if defined(__mips__)
   int has_mips = TestCpuFlag(kCpuHasMIPS);
@@ -183,7 +185,7 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
   printf("__pnacl__ %d\n", __pnacl__);
 #endif
 #ifdef GG_LONGLONG
-  printf("GG_LONGLONG %d\n", GG_LONGLONG);
+  printf("GG_LONGLONG %lld\n", GG_LONGLONG(1));
 #endif
 #ifdef INT_TYPES_DEFINED
   printf("INT_TYPES_DEFINED\n");
diff --git a/unit_test/scale_plane_test.cc b/unit_test/scale_plane_test.cc
new file mode 100644
index 00000000..9ce47a02
--- /dev/null
+++ b/unit_test/scale_plane_test.cc
@@ -0,0 +1,470 @@
+/*
+ *  Copyright 2023 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <time.h>
+
+#include "../unit_test/unit_test.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/scale.h"
+
+#ifdef ENABLE_ROW_TESTS
+#include "libyuv/scale_row.h"  // For ScaleRowDown2Box_Odd_C
+#endif
+
+#define STRINGIZE(line) #line
+#define FILELINESTR(file, line) file ":" STRINGIZE(line)
+
+#if defined(__riscv) && !defined(__clang__)
+#define DISABLE_SLOW_TESTS
+#undef ENABLE_FULL_TESTS
+#undef ENABLE_ROW_TESTS
+#define LEAN_TESTS
+#endif
+
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+// SLOW TESTS are those that are unoptimized C code.
+// FULL TESTS are optimized but test many variations of the same code.
+#define ENABLE_FULL_TESTS
+#endif
+
+namespace libyuv {
+
+#ifdef ENABLE_ROW_TESTS
+#ifdef HAS_SCALEROWDOWN2_SSSE3
+TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
+  SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
+  SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
+  SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
+  memset(orig_pixels, 0, sizeof(orig_pixels));
+  memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
+  memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
+
+  int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
+  if (!has_ssse3) {
+    printf("Warning SSSE3 not detected; Skipping test.\n");
+  } else {
+    // TL.
+    orig_pixels[0] = 255u;
+    orig_pixels[1] = 0u;
+    orig_pixels[128 + 0] = 0u;
+    orig_pixels[128 + 1] = 0u;
+    // TR.
+    orig_pixels[2] = 0u;
+    orig_pixels[3] = 100u;
+    orig_pixels[128 + 2] = 0u;
+    orig_pixels[128 + 3] = 0u;
+    // BL.
+    orig_pixels[4] = 0u;
+    orig_pixels[5] = 0u;
+    orig_pixels[128 + 4] = 50u;
+    orig_pixels[128 + 5] = 0u;
+    // BR.
+    orig_pixels[6] = 0u;
+    orig_pixels[7] = 0u;
+    orig_pixels[128 + 6] = 0u;
+    orig_pixels[128 + 7] = 20u;
+    // Odd.
+    orig_pixels[126] = 4u;
+    orig_pixels[127] = 255u;
+    orig_pixels[128 + 126] = 16u;
+    orig_pixels[128 + 127] = 255u;
+
+    // Test regular half size.
+    ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
+
+    EXPECT_EQ(64u, dst_pixels_c[0]);
+    EXPECT_EQ(25u, dst_pixels_c[1]);
+    EXPECT_EQ(13u, dst_pixels_c[2]);
+    EXPECT_EQ(5u, dst_pixels_c[3]);
+    EXPECT_EQ(0u, dst_pixels_c[4]);
+    EXPECT_EQ(133u, dst_pixels_c[63]);
+
+    // Test Odd width version - Last pixel is just 1 horizontal pixel.
+    ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
+
+    EXPECT_EQ(64u, dst_pixels_c[0]);
+    EXPECT_EQ(25u, dst_pixels_c[1]);
+    EXPECT_EQ(13u, dst_pixels_c[2]);
+    EXPECT_EQ(5u, dst_pixels_c[3]);
+    EXPECT_EQ(0u, dst_pixels_c[4]);
+    EXPECT_EQ(10u, dst_pixels_c[63]);
+
+    // Test one pixel less, should skip the last pixel.
+    memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
+    ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
+
+    EXPECT_EQ(64u, dst_pixels_c[0]);
+    EXPECT_EQ(25u, dst_pixels_c[1]);
+    EXPECT_EQ(13u, dst_pixels_c[2]);
+    EXPECT_EQ(5u, dst_pixels_c[3]);
+    EXPECT_EQ(0u, dst_pixels_c[4]);
+    EXPECT_EQ(0u, dst_pixels_c[63]);
+
+    // Test regular half size SSSE3.
+    ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
+
+    EXPECT_EQ(64u, dst_pixels_opt[0]);
+    EXPECT_EQ(25u, dst_pixels_opt[1]);
+    EXPECT_EQ(13u, dst_pixels_opt[2]);
+    EXPECT_EQ(5u, dst_pixels_opt[3]);
+    EXPECT_EQ(0u, dst_pixels_opt[4]);
+    EXPECT_EQ(133u, dst_pixels_opt[63]);
+
+    // Compare C and SSSE3 match.
+    ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
+    ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
+    for (int i = 0; i < 64; ++i) {
+      EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+    }
+  }
+}
+#endif  // HAS_SCALEROWDOWN2_SSSE3
+
+extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
+                                         ptrdiff_t src_stride,
+                                         uint16_t* dst,
+                                         int dst_width);
+
+TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
+  SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
+  SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
+  SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
+
+  memset(orig_pixels, 0, sizeof(orig_pixels));
+  memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
+  memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
+
+  for (int i = 0; i < 2560 * 2; ++i) {
+    orig_pixels[i] = i;
+  }
+  ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
+  for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+    int has_neon = TestCpuFlag(kCpuHasNEON);
+    if (has_neon) {
+      ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
+    } else {
+      ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
+    }
+#else
+    ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
+#endif
+  }
+
+  for (int i = 0; i < 1280; ++i) {
+    EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+  }
+
+  EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
+  EXPECT_EQ(dst_pixels_c[1279], 3839);
+}
+#endif  // ENABLE_ROW_TESTS
+
+// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel
+// difference.
+// 0 = exact.
+static int TestPlaneFilter_16(int src_width,
+                              int src_height,
+                              int dst_width,
+                              int dst_height,
+                              FilterMode f,
+                              int benchmark_iterations,
+                              int disable_cpu_flags,
+                              int benchmark_cpu_info) {
+  if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+    return 0;
+  }
+
+  int i;
+  int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+  int src_stride_y = Abs(src_width);
+  int dst_y_plane_size = dst_width * dst_height;
+  int dst_stride_y = dst_width;
+
+  align_buffer_page_end(src_y, src_y_plane_size);
+  align_buffer_page_end(src_y_16, src_y_plane_size * 2);
+  align_buffer_page_end(dst_y_8, dst_y_plane_size);
+  align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
+  uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
+  uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
+
+  MemRandomize(src_y, src_y_plane_size);
+  memset(dst_y_8, 0, dst_y_plane_size);
+  memset(dst_y_16, 1, dst_y_plane_size * 2);
+
+  for (i = 0; i < src_y_plane_size; ++i) {
+    p_src_y_16[i] = src_y[i] & 255;
+  }
+
+  MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
+  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
+             dst_width, dst_height, f);
+  MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
+
+  for (i = 0; i < benchmark_iterations; ++i) {
+    ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
+                  dst_stride_y, dst_width, dst_height, f);
+  }
+
+  // Expect an exact match.
+  int max_diff = 0;
+  for (i = 0; i < dst_y_plane_size; ++i) {
+    int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
+    if (abs_diff > max_diff) {
+      max_diff = abs_diff;
+    }
+  }
+
+  free_aligned_buffer_page_end(dst_y_8);
+  free_aligned_buffer_page_end(dst_y_16);
+  free_aligned_buffer_page_end(src_y);
+  free_aligned_buffer_page_end(src_y_16);
+
+  return max_diff;
+}
+
+// The following adjustments in dimensions ensure the scale factor will be
+// exactly achieved.
+// 2 is chroma subsample.
+#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
+#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
+
+#define TEST_FACTOR1(name, filter, nom, denom, max_diff)                       \
+  TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
+    int diff = TestPlaneFilter_16(                                             \
+        SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),   \
+        DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),   \
+        kFilter##filter, benchmark_iterations_, disable_cpu_flags_,            \
+        benchmark_cpu_info_);                                                  \
+    EXPECT_LE(diff, max_diff);                                                 \
+  }
+
+// Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
+// filtering is different fixed point implementations for SSSE3, Neon and C.
+#define TEST_FACTOR(name, nom, denom, boxdiff)      \
+  TEST_FACTOR1(name, None, nom, denom, 0)           \
+  TEST_FACTOR1(name, Linear, nom, denom, boxdiff)   \
+  TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
+  TEST_FACTOR1(name, Box, nom, denom, boxdiff)
+
+TEST_FACTOR(2, 1, 2, 0)
+TEST_FACTOR(4, 1, 4, 0)
+// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance.  Takes 90 seconds.
+TEST_FACTOR(3by4, 3, 4, 1)
+TEST_FACTOR(3by8, 3, 8, 1)
+TEST_FACTOR(3, 1, 3, 0)
+#undef TEST_FACTOR1
+#undef TEST_FACTOR
+#undef SX
+#undef DX
+
+TEST_F(LibYUVScaleTest, PlaneTest3x) {
+  const int kSrcStride = 480;
+  const int kDstStride = 160;
+  const int kSize = kSrcStride * 3;
+  align_buffer_page_end(orig_pixels, kSize);
+  for (int i = 0; i < 480 * 3; ++i) {
+    orig_pixels[i] = i;
+  }
+  align_buffer_page_end(dest_pixels, kDstStride);
+
+  int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+                      benchmark_iterations_;
+  for (int i = 0; i < iterations160; ++i) {
+    ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+               kFilterBilinear);
+  }
+
+  EXPECT_EQ(225, dest_pixels[0]);
+
+  ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+             kFilterNone);
+
+  EXPECT_EQ(225, dest_pixels[0]);
+
+  free_aligned_buffer_page_end(dest_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest4x) {
+  const int kSrcStride = 640;
+  const int kDstStride = 160;
+  const int kSize = kSrcStride * 4;
+  align_buffer_page_end(orig_pixels, kSize);
+  for (int i = 0; i < 640 * 4; ++i) {
+    orig_pixels[i] = i;
+  }
+  align_buffer_page_end(dest_pixels, kDstStride);
+
+  int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+                      benchmark_iterations_;
+  for (int i = 0; i < iterations160; ++i) {
+    ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
+               kFilterBilinear);
+  }
+
+  EXPECT_EQ(66, dest_pixels[0]);
+
+  ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
+             kFilterNone);
+
+  EXPECT_EQ(2, dest_pixels[0]);  // expect the 3rd pixel of the 3rd row
+
+  free_aligned_buffer_page_end(dest_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+// Intent is to test 200x50 to 50x200 but width and height can be parameters.
+TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
+  const int kSize = benchmark_width_ * benchmark_height_;
+  align_buffer_page_end(orig_pixels, kSize);
+  for (int i = 0; i < kSize; ++i) {
+    orig_pixels[i] = i;
+  }
+  align_buffer_page_end(dest_opt_pixels, kSize);
+  align_buffer_page_end(dest_c_pixels, kSize);
+
+  MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
+  ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
+             dest_c_pixels, benchmark_height_, benchmark_height_,
+             benchmark_width_, kFilterNone);
+  MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
+
+  for (int i = 0; i < benchmark_iterations_; ++i) {
+    ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
+               benchmark_height_, dest_opt_pixels, benchmark_height_,
+               benchmark_height_, benchmark_width_, kFilterNone);
+  }
+
+  for (int i = 0; i < kSize; ++i) {
+    EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
+  }
+
+  free_aligned_buffer_page_end(dest_c_pixels);
+  free_aligned_buffer_page_end(dest_opt_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
+  const int kSize = benchmark_width_ * benchmark_height_;
+  align_buffer_page_end(orig_pixels, kSize);
+  for (int i = 0; i < kSize; ++i) {
+    orig_pixels[i] = i;
+  }
+  align_buffer_page_end(dest_opt_pixels, kSize);
+  align_buffer_page_end(dest_c_pixels, kSize);
+
+  MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
+  ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
+             dest_c_pixels, benchmark_height_, benchmark_height_,
+             benchmark_width_, kFilterBilinear);
+  MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
+
+  for (int i = 0; i < benchmark_iterations_; ++i) {
+    ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
+               benchmark_height_, dest_opt_pixels, benchmark_height_,
+               benchmark_height_, benchmark_width_, kFilterBilinear);
+  }
+
+  for (int i = 0; i < kSize; ++i) {
+    EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
+  }
+
+  free_aligned_buffer_page_end(dest_c_pixels);
+  free_aligned_buffer_page_end(dest_opt_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+// Intent is to test 200x50 to 50x200 but width and height can be parameters.
+TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
+  const int kSize = benchmark_width_ * benchmark_height_;
+  align_buffer_page_end(orig_pixels, kSize);
+  for (int i = 0; i < kSize; ++i) {
+    orig_pixels[i] = i;
+  }
+  align_buffer_page_end(dest_opt_pixels, kSize);
+  align_buffer_page_end(dest_c_pixels, kSize);
+
+  MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
+  ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
+             dest_c_pixels, benchmark_height_, benchmark_height_,
+             benchmark_width_, kFilterBox);
+  MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
+
+  for (int i = 0; i < benchmark_iterations_; ++i) {
+    ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
+               benchmark_height_, dest_opt_pixels, benchmark_height_,
+               benchmark_height_, benchmark_width_, kFilterBox);
+  }
+
+  for (int i = 0; i < kSize; ++i) {
+    EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
+  }
+
+  free_aligned_buffer_page_end(dest_c_pixels);
+  free_aligned_buffer_page_end(dest_opt_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest1_Box) {
+  align_buffer_page_end(orig_pixels, 3);
+  align_buffer_page_end(dst_pixels, 3);
+
+  // Pad the 1x1 byte image with invalid values before and after in case libyuv
+  // reads outside the memory boundaries.
+  orig_pixels[0] = 0;
+  orig_pixels[1] = 1;  // scale this pixel
+  orig_pixels[2] = 2;
+  dst_pixels[0] = 3;
+  dst_pixels[1] = 3;
+  dst_pixels[2] = 3;
+
+  libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
+                     /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
+                     /* dst_width= */ 1, /* dst_height= */ 2,
+                     libyuv::kFilterBox);
+
+  EXPECT_EQ(dst_pixels[0], 1);
+  EXPECT_EQ(dst_pixels[1], 1);
+  EXPECT_EQ(dst_pixels[2], 3);
+
+  free_aligned_buffer_page_end(dst_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) {
+  align_buffer_page_end(orig_pixels_alloc, 3 * 2);
+  align_buffer_page_end(dst_pixels_alloc, 3 * 2);
+  uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc;
+  uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc;
+
+  // Pad the 1x1 byte image with invalid values before and after in case libyuv
+  // reads outside the memory boundaries.
+  orig_pixels[0] = 0;
+  orig_pixels[1] = 1;  // scale this pixel
+  orig_pixels[2] = 2;
+  dst_pixels[0] = 3;
+  dst_pixels[1] = 3;
+  dst_pixels[2] = 3;
+
+  libyuv::ScalePlane_16(
+      orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
+      /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
+      /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone);
+
+  EXPECT_EQ(dst_pixels[0], 1);
+  EXPECT_EQ(dst_pixels[1], 1);
+  EXPECT_EQ(dst_pixels[2], 3);
+
+  free_aligned_buffer_page_end(dst_pixels_alloc);
+  free_aligned_buffer_page_end(orig_pixels_alloc);
+}
+}  // namespace libyuv
diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc
index c2232e66..6e3b9271 100644
--- a/unit_test/scale_test.cc
+++ b/unit_test/scale_test.cc
@@ -22,6 +22,11 @@
 #define STRINGIZE(line) #line
 #define FILELINESTR(file, line) file ":" STRINGIZE(line)
 
+#if defined(__riscv) && !defined(__clang__)
+#define DISABLE_SLOW_TESTS
+#undef ENABLE_FULL_TESTS
+#endif
+
 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
 // SLOW TESTS are those that are unoptimized C code.
 // FULL TESTS are optimized but test many variations of the same code.
@@ -1123,437 +1128,6 @@ TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3)
 TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3)
 #endif
 #endif
-
 #undef TEST_SCALESWAPXY1
 
-#ifdef ENABLE_ROW_TESTS
-#ifdef HAS_SCALEROWDOWN2_SSSE3
-TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
-  SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
-  SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
-  SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
-  memset(orig_pixels, 0, sizeof(orig_pixels));
-  memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
-  memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
-
-  int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
-  if (!has_ssse3) {
-    printf("Warning SSSE3 not detected; Skipping test.\n");
-  } else {
-    // TL.
-    orig_pixels[0] = 255u;
-    orig_pixels[1] = 0u;
-    orig_pixels[128 + 0] = 0u;
-    orig_pixels[128 + 1] = 0u;
-    // TR.
-    orig_pixels[2] = 0u;
-    orig_pixels[3] = 100u;
-    orig_pixels[128 + 2] = 0u;
-    orig_pixels[128 + 3] = 0u;
-    // BL.
-    orig_pixels[4] = 0u;
-    orig_pixels[5] = 0u;
-    orig_pixels[128 + 4] = 50u;
-    orig_pixels[128 + 5] = 0u;
-    // BR.
-    orig_pixels[6] = 0u;
-    orig_pixels[7] = 0u;
-    orig_pixels[128 + 6] = 0u;
-    orig_pixels[128 + 7] = 20u;
-    // Odd.
-    orig_pixels[126] = 4u;
-    orig_pixels[127] = 255u;
-    orig_pixels[128 + 126] = 16u;
-    orig_pixels[128 + 127] = 255u;
-
-    // Test regular half size.
-    ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
-
-    EXPECT_EQ(64u, dst_pixels_c[0]);
-    EXPECT_EQ(25u, dst_pixels_c[1]);
-    EXPECT_EQ(13u, dst_pixels_c[2]);
-    EXPECT_EQ(5u, dst_pixels_c[3]);
-    EXPECT_EQ(0u, dst_pixels_c[4]);
-    EXPECT_EQ(133u, dst_pixels_c[63]);
-
-    // Test Odd width version - Last pixel is just 1 horizontal pixel.
-    ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
-
-    EXPECT_EQ(64u, dst_pixels_c[0]);
-    EXPECT_EQ(25u, dst_pixels_c[1]);
-    EXPECT_EQ(13u, dst_pixels_c[2]);
-    EXPECT_EQ(5u, dst_pixels_c[3]);
-    EXPECT_EQ(0u, dst_pixels_c[4]);
-    EXPECT_EQ(10u, dst_pixels_c[63]);
-
-    // Test one pixel less, should skip the last pixel.
-    memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
-    ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
-
-    EXPECT_EQ(64u, dst_pixels_c[0]);
-    EXPECT_EQ(25u, dst_pixels_c[1]);
-    EXPECT_EQ(13u, dst_pixels_c[2]);
-    EXPECT_EQ(5u, dst_pixels_c[3]);
-    EXPECT_EQ(0u, dst_pixels_c[4]);
-    EXPECT_EQ(0u, dst_pixels_c[63]);
-
-    // Test regular half size SSSE3.
-    ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
-
-    EXPECT_EQ(64u, dst_pixels_opt[0]);
-    EXPECT_EQ(25u, dst_pixels_opt[1]);
-    EXPECT_EQ(13u, dst_pixels_opt[2]);
-    EXPECT_EQ(5u, dst_pixels_opt[3]);
-    EXPECT_EQ(0u, dst_pixels_opt[4]);
-    EXPECT_EQ(133u, dst_pixels_opt[63]);
-
-    // Compare C and SSSE3 match.
-    ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
-    ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
-    for (int i = 0; i < 64; ++i) {
-      EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
-    }
-  }
-}
-#endif  // HAS_SCALEROWDOWN2_SSSE3
-
-extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
-                                         ptrdiff_t src_stride,
-                                         uint16_t* dst,
-                                         int dst_width);
-
-TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
-  SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
-  SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
-  SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
-
-  memset(orig_pixels, 0, sizeof(orig_pixels));
-  memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
-  memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
-
-  for (int i = 0; i < 2560 * 2; ++i) {
-    orig_pixels[i] = i;
-  }
-  ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
-  for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-    int has_neon = TestCpuFlag(kCpuHasNEON);
-    if (has_neon) {
-      ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
-    } else {
-      ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
-    }
-#else
-    ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
-#endif
-  }
-
-  for (int i = 0; i < 1280; ++i) {
-    EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
-  }
-
-  EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
-  EXPECT_EQ(dst_pixels_c[1279], 3839);
-}
-#endif  // ENABLE_ROW_TESTS
-
-// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel
-// difference.
-// 0 = exact.
-static int TestPlaneFilter_16(int src_width,
-                              int src_height,
-                              int dst_width,
-                              int dst_height,
-                              FilterMode f,
-                              int benchmark_iterations,
-                              int disable_cpu_flags,
-                              int benchmark_cpu_info) {
-  if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
-    return 0;
-  }
-
-  int i;
-  int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
-  int src_stride_y = Abs(src_width);
-  int dst_y_plane_size = dst_width * dst_height;
-  int dst_stride_y = dst_width;
-
-  align_buffer_page_end(src_y, src_y_plane_size);
-  align_buffer_page_end(src_y_16, src_y_plane_size * 2);
-  align_buffer_page_end(dst_y_8, dst_y_plane_size);
-  align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
-  uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
-  uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
-
-  MemRandomize(src_y, src_y_plane_size);
-  memset(dst_y_8, 0, dst_y_plane_size);
-  memset(dst_y_16, 1, dst_y_plane_size * 2);
-
-  for (i = 0; i < src_y_plane_size; ++i) {
-    p_src_y_16[i] = src_y[i] & 255;
-  }
-
-  MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
-  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
-             dst_width, dst_height, f);
-  MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
-
-  for (i = 0; i < benchmark_iterations; ++i) {
-    ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
-                  dst_stride_y, dst_width, dst_height, f);
-  }
-
-  // Expect an exact match.
-  int max_diff = 0;
-  for (i = 0; i < dst_y_plane_size; ++i) {
-    int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
-    if (abs_diff > max_diff) {
-      max_diff = abs_diff;
-    }
-  }
-
-  free_aligned_buffer_page_end(dst_y_8);
-  free_aligned_buffer_page_end(dst_y_16);
-  free_aligned_buffer_page_end(src_y);
-  free_aligned_buffer_page_end(src_y_16);
-
-  return max_diff;
-}
-
-// The following adjustments in dimensions ensure the scale factor will be
-// exactly achieved.
-// 2 is chroma subsample.
-#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
-#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
-
-#define TEST_FACTOR1(name, filter, nom, denom, max_diff)                       \
-  TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
-    int diff = TestPlaneFilter_16(                                             \
-        SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),   \
-        DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),   \
-        kFilter##filter, benchmark_iterations_, disable_cpu_flags_,            \
-        benchmark_cpu_info_);                                                  \
-    EXPECT_LE(diff, max_diff);                                                 \
-  }
-
-// Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
-// filtering is different fixed point implementations for SSSE3, Neon and C.
-#define TEST_FACTOR(name, nom, denom, boxdiff)      \
-  TEST_FACTOR1(name, None, nom, denom, 0)           \
-  TEST_FACTOR1(name, Linear, nom, denom, boxdiff)   \
-  TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
-  TEST_FACTOR1(name, Box, nom, denom, boxdiff)
-
-TEST_FACTOR(2, 1, 2, 0)
-TEST_FACTOR(4, 1, 4, 0)
-// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance.  Takes 90 seconds.
-TEST_FACTOR(3by4, 3, 4, 1)
-TEST_FACTOR(3by8, 3, 8, 1)
-TEST_FACTOR(3, 1, 3, 0)
-#undef TEST_FACTOR1
-#undef TEST_FACTOR
-#undef SX
-#undef DX
-
-TEST_F(LibYUVScaleTest, PlaneTest3x) {
-  const int kSrcStride = 480;
-  const int kDstStride = 160;
-  const int kSize = kSrcStride * 3;
-  align_buffer_page_end(orig_pixels, kSize);
-  for (int i = 0; i < 480 * 3; ++i) {
-    orig_pixels[i] = i;
-  }
-  align_buffer_page_end(dest_pixels, kDstStride);
-
-  int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
-                      benchmark_iterations_;
-  for (int i = 0; i < iterations160; ++i) {
-    ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
-               kFilterBilinear);
-  }
-
-  EXPECT_EQ(225, dest_pixels[0]);
-
-  ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
-             kFilterNone);
-
-  EXPECT_EQ(225, dest_pixels[0]);
-
-  free_aligned_buffer_page_end(dest_pixels);
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVScaleTest, PlaneTest4x) {
-  const int kSrcStride = 640;
-  const int kDstStride = 160;
-  const int kSize = kSrcStride * 4;
-  align_buffer_page_end(orig_pixels, kSize);
-  for (int i = 0; i < 640 * 4; ++i) {
-    orig_pixels[i] = i;
-  }
-  align_buffer_page_end(dest_pixels, kDstStride);
-
-  int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
-                      benchmark_iterations_;
-  for (int i = 0; i < iterations160; ++i) {
-    ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
-               kFilterBilinear);
-  }
-
-  EXPECT_EQ(66, dest_pixels[0]);
-
-  ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
-             kFilterNone);
-
-  EXPECT_EQ(2, dest_pixels[0]);  // expect the 3rd pixel of the 3rd row
-
-  free_aligned_buffer_page_end(dest_pixels);
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-// Intent is to test 200x50 to 50x200 but width and height can be parameters.
-TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
-  const int kSize = benchmark_width_ * benchmark_height_;
-  align_buffer_page_end(orig_pixels, kSize);
-  for (int i = 0; i < kSize; ++i) {
-    orig_pixels[i] = i;
-  }
-  align_buffer_page_end(dest_opt_pixels, kSize);
-  align_buffer_page_end(dest_c_pixels, kSize);
-
-  MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
-  ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
-             dest_c_pixels, benchmark_height_, benchmark_height_,
-             benchmark_width_, kFilterNone);
-  MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
-
-  for (int i = 0; i < benchmark_iterations_; ++i) {
-    ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
-               benchmark_height_, dest_opt_pixels, benchmark_height_,
-               benchmark_height_, benchmark_width_, kFilterNone);
-  }
-
-  for (int i = 0; i < kSize; ++i) {
-    EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
-  }
-
-  free_aligned_buffer_page_end(dest_c_pixels);
-  free_aligned_buffer_page_end(dest_opt_pixels);
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
-  const int kSize = benchmark_width_ * benchmark_height_;
-  align_buffer_page_end(orig_pixels, kSize);
-  for (int i = 0; i < kSize; ++i) {
-    orig_pixels[i] = i;
-  }
-  align_buffer_page_end(dest_opt_pixels, kSize);
-  align_buffer_page_end(dest_c_pixels, kSize);
-
-  MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
-  ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
-             dest_c_pixels, benchmark_height_, benchmark_height_,
-             benchmark_width_, kFilterBilinear);
-  MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
-
-  for (int i = 0; i < benchmark_iterations_; ++i) {
-    ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
-               benchmark_height_, dest_opt_pixels, benchmark_height_,
-               benchmark_height_, benchmark_width_, kFilterBilinear);
-  }
-
-  for (int i = 0; i < kSize; ++i) {
-    EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
-  }
-
-  free_aligned_buffer_page_end(dest_c_pixels);
-  free_aligned_buffer_page_end(dest_opt_pixels);
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-// Intent is to test 200x50 to 50x200 but width and height can be parameters.
-TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
-  const int kSize = benchmark_width_ * benchmark_height_;
-  align_buffer_page_end(orig_pixels, kSize);
-  for (int i = 0; i < kSize; ++i) {
-    orig_pixels[i] = i;
-  }
-  align_buffer_page_end(dest_opt_pixels, kSize);
-  align_buffer_page_end(dest_c_pixels, kSize);
-
-  MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
-  ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
-             dest_c_pixels, benchmark_height_, benchmark_height_,
-             benchmark_width_, kFilterBox);
-  MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
-
-  for (int i = 0; i < benchmark_iterations_; ++i) {
-    ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
-               benchmark_height_, dest_opt_pixels, benchmark_height_,
-               benchmark_height_, benchmark_width_, kFilterBox);
-  }
-
-  for (int i = 0; i < kSize; ++i) {
-    EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
-  }
-
-  free_aligned_buffer_page_end(dest_c_pixels);
-  free_aligned_buffer_page_end(dest_opt_pixels);
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVScaleTest, PlaneTest1_Box) {
-  align_buffer_page_end(orig_pixels, 3);
-  align_buffer_page_end(dst_pixels, 3);
-
-  // Pad the 1x1 byte image with invalid values before and after in case libyuv
-  // reads outside the memory boundaries.
-  orig_pixels[0] = 0;
-  orig_pixels[1] = 1;  // scale this pixel
-  orig_pixels[2] = 2;
-  dst_pixels[0] = 3;
-  dst_pixels[1] = 3;
-  dst_pixels[2] = 3;
-
-  libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
-                     /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
-                     /* dst_width= */ 1, /* dst_height= */ 2,
-                     libyuv::kFilterBox);
-
-  EXPECT_EQ(dst_pixels[0], 1);
-  EXPECT_EQ(dst_pixels[1], 1);
-  EXPECT_EQ(dst_pixels[2], 3);
-
-  free_aligned_buffer_page_end(dst_pixels);
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) {
-  align_buffer_page_end(orig_pixels_alloc, 3 * 2);
-  align_buffer_page_end(dst_pixels_alloc, 3 * 2);
-  uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc;
-  uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc;
-
-  // Pad the 1x1 byte image with invalid values before and after in case libyuv
-  // reads outside the memory boundaries.
-  orig_pixels[0] = 0;
-  orig_pixels[1] = 1;  // scale this pixel
-  orig_pixels[2] = 2;
-  dst_pixels[0] = 3;
-  dst_pixels[1] = 3;
-  dst_pixels[2] = 3;
-
-  libyuv::ScalePlane_16(
-      orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
-      /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
-      /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone);
-
-  EXPECT_EQ(dst_pixels[0], 1);
-  EXPECT_EQ(dst_pixels[1], 1);
-  EXPECT_EQ(dst_pixels[2], 3);
-
-  free_aligned_buffer_page_end(dst_pixels_alloc);
-  free_aligned_buffer_page_end(orig_pixels_alloc);
-}
 }  // namespace libyuv
diff --git a/unit_test/unit_test.cc b/unit_test/unit_test.cc
index b66ebfab..239d5b92 100644
--- a/unit_test/unit_test.cc
+++ b/unit_test/unit_test.cc
@@ -144,11 +144,14 @@ int TestCpuEnv(int cpu_info) {
   if (TestEnv("LIBYUV_DISABLE_AVX512VBITALG")) {
     cpu_info &= ~libyuv::kCpuHasAVX512VBITALG;
   }
-  if (TestEnv("LIBYUV_DISABLE_AVX512VPOPCNTDQ")) {
-    cpu_info &= ~libyuv::kCpuHasAVX512VPOPCNTDQ;
+  if (TestEnv("LIBYUV_DISABLE_AVX10")) {
+    cpu_info &= ~libyuv::kCpuHasAVX10;
   }
-  if (TestEnv("LIBYUV_DISABLE_GFNI")) {
-    cpu_info &= ~libyuv::kCpuHasGFNI;
+  if (TestEnv("LIBYUV_DISABLE_AVXVNNI")) {
+    cpu_info &= ~libyuv::kCpuHasAVXVNNI;
+  }
+  if (TestEnv("LIBYUV_DISABLE_AVXVNNIINT8")) {
+    cpu_info &= ~libyuv::kCpuHasAVXVNNIINT8;
   }
 #endif
   if (TestEnv("LIBYUV_DISABLE_ASM")) {
diff --git a/util/cpuid.c b/util/cpuid.c
index edc6a26e..c07e6e95 100644
--- a/util/cpuid.c
+++ b/util/cpuid.c
@@ -96,14 +96,15 @@ int main(int argc, const char* argv[]) {
     int has_erms = TestCpuFlag(kCpuHasERMS);
     int has_fma3 = TestCpuFlag(kCpuHasFMA3);
     int has_f16c = TestCpuFlag(kCpuHasF16C);
-    int has_gfni = TestCpuFlag(kCpuHasGFNI);
     int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
     int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL);
     int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI);
     int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI);
     int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
     int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
-    int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ);
+    int has_avx10 = TestCpuFlag(kCpuHasAVX10);
+    int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI);
+    int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8);
     printf("Has X86 0x%x\n", has_x86);
     printf("Has SSE2 0x%x\n", has_sse2);
     printf("Has SSSE3 0x%x\n", has_ssse3);
@@ -114,14 +115,15 @@ int main(int argc, const char* argv[]) {
     printf("Has ERMS 0x%x\n", has_erms);
     printf("Has FMA3 0x%x\n", has_fma3);
     printf("Has F16C 0x%x\n", has_f16c);
-    printf("Has GFNI 0x%x\n", has_gfni);
     printf("Has AVX512BW 0x%x\n", has_avx512bw);
     printf("Has AVX512VL 0x%x\n", has_avx512vl);
     printf("Has AVX512VNNI 0x%x\n", has_avx512vnni);
     printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi);
     printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2);
     printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg);
-    printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq);
+    printf("Has AVX10 0x%x\n", has_avx10);
+    printf("HAS AVXVNNI 0x%x\n", has_avxvnni);
+    printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
   }
   return 0;
 }
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2024-04-19 18:27:08 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	2024-04-19 18:27:08 +0000
commit	f72df94f7894736b8115a1b43ecf6acde951efbc (patch)
tree	7aa355fd0b89ec0b2611e17ee84a14c6fa449e22
parent	60b8e179403ea8a6ae163ac1e62206766d75f137 (diff)
parent	e8ac7a941dd76f8ab24d01c9f1a0099672baf057 (diff)
download	libyuv-androidx-lifecycle-release.tar.gz