aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlan Kelly <alankelly@google.com>2022-08-23 07:35:09 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-08-23 07:36:05 -0700
commitddbb37a87d134781f23d35fed4ad8262241a50ad (patch)
treef29f21ad65a094b32df3e7b0d94dcad3fa706e38
parent0f51d3524b7d5ea4d40f24ab3aa9a5c0215bea50 (diff)
downloadXNNPACK-ddbb37a87d134781f23d35fed4ad8262241a50ad.tar.gz
Variable size transpose ukernels no longer assume that input and output element strides are equal to the element size.
PiperOrigin-RevId: 469454228
-rw-r--r--bench/xx-transpose.cc4
-rw-r--r--src/operator-run.c20
-rw-r--r--src/xnnpack/microfnptr.h6
-rw-r--r--src/xnnpack/transpose.h16
-rw-r--r--src/xx-transpose/1x1-memcpy.c18
-rw-r--r--test/transpose-microkernel-tester.h44
-rw-r--r--test/x16-transpose.cc1284
-rw-r--r--test/x24-transpose.cc416
-rw-r--r--test/x32-transpose.cc1774
-rw-r--r--test/x64-transpose.cc610
-rw-r--r--test/x8-transpose.cc850
-rw-r--r--test/xx-transpose.cc111
12 files changed, 5121 insertions, 32 deletions
diff --git a/bench/xx-transpose.cc b/bench/xx-transpose.cc
index a6441c4e1..ae596bbb6 100644
--- a/bench/xx-transpose.cc
+++ b/bench/xx-transpose.cc
@@ -41,8 +41,8 @@ void transpose(
std::fill(y.begin(), y.end(), 0);
for (auto _ : state) {
- transpose(x.data(), y.data(), tile_wbytes, tile_hbytes, element_size, width,
- height);
+ transpose(x.data(), y.data(), tile_wbytes, tile_hbytes, element_size,
+ element_size, element_size, width, height);
}
const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
diff --git a/src/operator-run.c b/src/operator-run.c
index ce3b4d362..ab4d5ace5 100644
--- a/src/operator-run.c
+++ b/src/operator-run.c
@@ -159,13 +159,15 @@ void xnn_compute_transposev_2d(
const size_t ld_output = context->output_stride[0];
const void* x = (const void*) ((uintptr_t) context->x +
i * context->input_stride[0] + j * ld_input);
- void* y = (void*) ((uintptr_t) context->y + element_size * j + i * context->output_stride[0]);
+ void* y = (void*) ((uintptr_t) context->y + context->output_stride[1] * j + i * context->output_stride[0]);
context->variable_size_ukernel(
x,
y,
ld_input,
ld_output,
+ context->input_stride[0],
+ context->output_stride[1],
element_size,
tile_i,
tile_j);
@@ -185,13 +187,15 @@ void xnn_compute_transposev_3d(
const void* x = (const void*)((uintptr_t)context->x + i * context->input_stride[0] + j * context->input_stride[1] +
k * ld_input);
void* y = (void*)((uintptr_t)context->y + i * context->output_stride[0] + j * context->output_stride[1] +
- k * element_size);
+ k * context->output_stride[2]);
context->variable_size_ukernel(
x,
y,
ld_input,
ld_output,
+ context->input_stride[1],
+ context->output_stride[2],
element_size,
tile_j,
tile_k);
@@ -211,7 +215,7 @@ void xnn_compute_transposev_4d(
const size_t ld_output = context->output_stride[2];
const void* x = (const void*)((uintptr_t)context->x + i * context->input_stride[0] + j * context->input_stride[1] +
k * context->input_stride[2] + l * ld_input);
- void* y = (void*)((uintptr_t)context->y + element_size * l + i * context->output_stride[0] +
+ void* y = (void*)((uintptr_t)context->y + context->output_stride[3] * l + i * context->output_stride[0] +
j * context->output_stride[1] + k * context->output_stride[2]);
context->variable_size_ukernel(
@@ -219,6 +223,8 @@ void xnn_compute_transposev_4d(
y,
ld_input,
ld_output,
+ context->input_stride[2],
+ context->output_stride[3],
element_size,
tile_k,
tile_l);
@@ -239,7 +245,7 @@ void xnn_compute_transposev_5d(
const size_t ld_output = context->output_stride[3];
const void* x = (const void*)((uintptr_t)context->x + i * context->input_stride[0] + j * context->input_stride[1] +
k * context->input_stride[2] + l * context->input_stride[3] + m * ld_input);
- void* y = (void*)((uintptr_t)context->y + element_size * m + i * context->output_stride[0] +
+ void* y = (void*)((uintptr_t)context->y + context->output_stride[4] * m + i * context->output_stride[0] +
j * context->output_stride[1] + k * context->output_stride[2] + l * context->output_stride[3]);
context->variable_size_ukernel(
@@ -247,6 +253,8 @@ void xnn_compute_transposev_5d(
y,
ld_input,
ld_output,
+ context->input_stride[3],
+ context->output_stride[4],
element_size,
tile_l,
tile_m);
@@ -269,7 +277,7 @@ void xnn_compute_transposev_6d(
const void* x = (const void*)((uintptr_t)context->x + i * context->input_stride[0] + j * context->input_stride[1] +
k * context->input_stride[2] + l * context->input_stride[3] +
m * context->input_stride[4] + n * ld_input);
- void* y = (void*)((uintptr_t)context->y + element_size * n + i * context->output_stride[0] +
+ void* y = (void*)((uintptr_t)context->y + context->output_stride[5] * n + i * context->output_stride[0] +
j * context->output_stride[1] + k * context->output_stride[2] + l * context->output_stride[3] +
m * context->output_stride[4]);
@@ -278,6 +286,8 @@ void xnn_compute_transposev_6d(
y,
ld_input,
ld_output,
+ context->input_stride[4],
+ context->output_stride[4],
element_size,
tile_m,
tile_n);
diff --git a/src/xnnpack/microfnptr.h b/src/xnnpack/microfnptr.h
index 8555ea44c..93525b6fe 100644
--- a/src/xnnpack/microfnptr.h
+++ b/src/xnnpack/microfnptr.h
@@ -1009,8 +1009,10 @@ typedef void (*xnn_transposec_ukernel_function)(
typedef void (*xnn_transposev_ukernel_function)(
const void* input,
void* output,
- size_t input_stride,
- size_t output_stride,
+ size_t input_row_stride,
+ size_t output_row_stride,
+ size_t input_element_stride,
+ size_t output_element_stride,
size_t element_size,
size_t block_width,
size_t block_height);
diff --git a/src/xnnpack/transpose.h b/src/xnnpack/transpose.h
index 3634f6c8d..5e2a8afa9 100644
--- a/src/xnnpack/transpose.h
+++ b/src/xnnpack/transpose.h
@@ -14,13 +14,15 @@
extern "C" {
#endif
-#define DECLARE_XX_TRANSPOSEV_UKERNEL_FUNCTION(fn_name) \
- XNN_INTERNAL void fn_name(const void* input, \
- void* output, \
- size_t input_stride, \
- size_t output_stride, \
- size_t element_size, \
- size_t block_width, \
+#define DECLARE_XX_TRANSPOSEV_UKERNEL_FUNCTION(fn_name) \
+ XNN_INTERNAL void fn_name(const void* input, \
+ void* output, \
+ size_t input_row_stride, \
+ size_t output_row_stride, \
+ size_t input_element_stride, \
+ size_t output_element_stride, \
+ size_t element_size, \
+ size_t block_width, \
size_t block_height);
DECLARE_XX_TRANSPOSEV_UKERNEL_FUNCTION(xnn_xx_transposev_ukernel__1x1_memcpy)
diff --git a/src/xx-transpose/1x1-memcpy.c b/src/xx-transpose/1x1-memcpy.c
index e8e5cdb31..47a39beae 100644
--- a/src/xx-transpose/1x1-memcpy.c
+++ b/src/xx-transpose/1x1-memcpy.c
@@ -13,18 +13,16 @@
void xnn_xx_transposev_ukernel__1x1_memcpy(
const void* input,
void* output,
- size_t input_stride,
- size_t output_stride,
+ size_t input_row_stride,
+ size_t output_row_stride,
+ size_t input_element_stride,
+ size_t output_element_stride,
size_t element_size,
size_t block_width,
size_t block_height)
{
- const size_t tile_height = 1;
- const size_t tile_width = 1;
- const size_t tile_wbytes = tile_width * element_size;
- const size_t input_reset = tile_wbytes - block_height * input_stride;
- const size_t output_reset = tile_width * output_stride - block_height * element_size;
- const size_t input_offset = tile_height * input_stride;
+ const size_t input_reset = input_element_stride - block_height * input_row_stride;
+ const size_t output_reset = output_row_stride - block_height * output_element_stride;
const void* i = (const void*) input;
void* o = (void*) output;
@@ -33,8 +31,8 @@ void xnn_xx_transposev_ukernel__1x1_memcpy(
size_t bh = block_height;
for (; bh >= 1; bh -= 1) {
memcpy(o, i, element_size);
- i = (const void*) ((uintptr_t) i + input_offset);
- o = (void*) ((uintptr_t) o + element_size);
+ i = (const void*) ((uintptr_t) i + input_row_stride);
+ o = (void*) ((uintptr_t) o + output_element_stride);
}
i = (const void*) ((uintptr_t) i + input_reset);
diff --git a/test/transpose-microkernel-tester.h b/test/transpose-microkernel-tester.h
index 7fd79259d..c9c7f9f51 100644
--- a/test/transpose-microkernel-tester.h
+++ b/test/transpose-microkernel-tester.h
@@ -58,6 +58,34 @@ class TransposeMicrokernelTester {
inline size_t output_stride() const { return this->output_stride_; }
+ inline TransposeMicrokernelTester& input_element_stride(size_t input_element_stride) {
+ assert(input_element_stride >= element_size_);
+ this->input_element_stride_ = input_element_stride;
+ return *this;
+ }
+
+ inline size_t input_element_stride() const {
+ if (input_element_stride_ == 0) {
+ return element_size_;
+ } else {
+ return input_element_stride_;
+ }
+ }
+
+ inline TransposeMicrokernelTester& output_element_stride(size_t output_element_stride) {
+ assert(output_element_stride >= element_size_);
+ this->output_element_stride_ = output_element_stride;
+ return *this;
+ }
+
+ inline size_t output_element_stride() const {
+ if (output_element_stride_ == 0) {
+ return element_size_;
+ } else {
+ return output_element_stride_;
+ }
+ }
+
inline TransposeMicrokernelTester& iterations(size_t iterations) {
this->iterations_ = iterations;
return *this;
@@ -66,16 +94,18 @@ class TransposeMicrokernelTester {
inline size_t iterations() const { return this->iterations_; }
void Test(xnn_transposev_ukernel_function transpose) const {
- std::vector<uint8_t> input(input_stride() * block_height() * element_size() + XNN_EXTRA_BYTES);
- std::vector<uint8_t> output(output_stride() * block_width() * element_size());
+ std::vector<uint8_t> input(input_stride() * block_height() * input_element_stride() + XNN_EXTRA_BYTES);
+ std::vector<uint8_t> output(output_stride() * block_width() * output_element_stride());
std::iota(input.begin(), input.end(), 0);
std::fill(output.begin(), output.end(), UINT8_C(0xA5));
// Call optimized micro-kernel.
transpose(input.data(),
output.data(),
- input_stride() * element_size(),
- output_stride() * element_size(),
+ input_stride() * input_element_stride(),
+ output_stride() * output_element_stride(),
+ input_element_stride(),
+ output_element_stride(),
element_size(),
block_width(),
block_height());
@@ -83,8 +113,8 @@ class TransposeMicrokernelTester {
// Verify results.
for (size_t c = 0; c < block_width(); c++) {
for (size_t r = 0; r < block_height(); r++) {
- ASSERT_EQ(std::memcmp(&input[element_size() * (c+ r * input_stride())],
- &output[element_size() * (r + c * output_stride())],
+ ASSERT_EQ(std::memcmp(&input[input_element_stride() * (c+ r * input_stride())],
+ &output[output_element_stride() * (r + c * output_stride())],
element_size()), 0)
<< "at row " << r << " / " << block_height()
<< ", at column " << c << " / " << block_width();
@@ -226,6 +256,8 @@ class TransposeMicrokernelTester {
size_t element_size_ = 1;
size_t input_stride_ = 1;
size_t output_stride_ = 1;
+ size_t input_element_stride_ = 0;
+ size_t output_element_stride_ = 0;
size_t block_height_ = 1;
size_t block_width_ = 1;
size_t iterations_ = 15;
diff --git a/test/x16-transpose.cc b/test/x16-transpose.cc
index 5098ce75c..a40285df4 100644
--- a/test/x16-transpose.cc
+++ b/test/x16-transpose.cc
@@ -165,6 +165,43 @@ TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_2_is_4_os_2) {
.Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
}
+TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_17_bw_38_ies_13) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(17)
+ .block_width(38)
+ .block_height(17)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_3_bw_10_oes_13) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(3)
+ .block_width(10)
+ .block_height(3)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_7_bw_46_ies_19_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(13)
+ .block_width(46)
+ .block_height(7)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
+}
+
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -313,6 +350,43 @@ TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_4_is_8_os_2) {
.Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
}
+TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_17_bw_76_ies_13) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(17)
+ .block_width(76)
+ .block_height(17)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_3_bw_20_oes_13) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(3)
+ .block_width(20)
+ .block_height(3)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_7_bw_92_ies_19_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(13)
+ .block_width(92)
+ .block_height(7)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
+}
+
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -461,6 +535,43 @@ TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_1_is_2_os_4) {
.Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
}
+TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_34_bw_19_ies_13) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(34)
+ .block_width(19)
+ .block_height(34)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_6_bw_5_oes_13) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(6)
+ .block_width(5)
+ .block_height(6)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_14_bw_23_ies_19_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(20)
+ .block_width(23)
+ .block_height(14)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
+}
+
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -609,6 +720,43 @@ TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_2_is_4_os_4) {
.Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
}
+TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_34_bw_38_ies_13) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_6_bw_10_oes_13) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_14_bw_46_ies_19_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
+}
+
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -757,6 +905,43 @@ TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_4_is_8_os_4) {
.Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
}
+TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_34_bw_76_ies_13) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(34)
+ .block_width(76)
+ .block_height(34)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_6_bw_20_oes_13) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(6)
+ .block_width(20)
+ .block_height(6)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_14_bw_92_ies_19_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(20)
+ .block_width(92)
+ .block_height(14)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
+}
+
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -905,6 +1090,43 @@ TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_1_is_2_os_8) {
.Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
}
+TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_68_bw_19_ies_13) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(68)
+ .block_width(19)
+ .block_height(68)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_12_bw_5_oes_13) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(12)
+ .block_width(5)
+ .block_height(12)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_28_bw_23_ies_19_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(34)
+ .block_width(23)
+ .block_height(28)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
+}
+
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -1053,6 +1275,43 @@ TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_2_is_4_os_8) {
.Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
}
+TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_68_bw_38_ies_13) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(68)
+ .block_width(38)
+ .block_height(68)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_12_bw_10_oes_13) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(12)
+ .block_width(10)
+ .block_height(12)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_28_bw_46_ies_19_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(34)
+ .block_width(46)
+ .block_height(28)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
+}
+
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -1201,6 +1460,43 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
}
+TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_68_bw_76_ies_13) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_12_bw_20_oes_13) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
+}
+
+TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_28_bw_92_ies_19_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
+}
+
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_8) {
TEST_REQUIRES_X86_SSE2;
@@ -1361,6 +1657,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__4x8_sse2);
}
+
+ TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_68_bw_152_ies_13) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(68)
+ .block_width(152)
+ .block_height(68)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x8_sse2);
+ }
+
+ TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_12_bw_40_oes_13) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(12)
+ .block_width(40)
+ .block_height(12)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x8_sse2);
+ }
+
+ TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_28_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(34)
+ .block_width(184)
+ .block_height(28)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x8_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -1524,6 +1860,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
}
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_136_bw_152_ies_13) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_24_bw_40_oes_13) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -1687,6 +2063,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
}
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_136_bw_152_ies_13) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_24_bw_40_oes_13) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -1850,6 +2266,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
}
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_136_bw_152_ies_13) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_24_bw_40_oes_13) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -2013,6 +2469,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
}
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_136_bw_152_ies_13) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_24_bw_40_oes_13) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -2176,6 +2672,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
}
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_136_bw_152_ies_13) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_24_bw_40_oes_13) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -2327,6 +2863,43 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
}
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_136_bw_152_ies_13) {
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_24_bw_40_oes_13) {
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_56_bw_184_ies_19_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
+ }
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
@@ -2478,6 +3051,43 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
}
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_136_bw_152_ies_13) {
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_24_bw_40_oes_13) {
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_56_bw_184_ies_19_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
+ }
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
@@ -2629,6 +3239,43 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
}
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_136_bw_152_ies_13) {
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_24_bw_40_oes_13) {
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_56_bw_184_ies_19_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
+ }
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
@@ -2780,6 +3427,43 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
}
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_136_bw_152_ies_13) {
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_24_bw_40_oes_13) {
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_56_bw_184_ies_19_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
+ }
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
@@ -2943,6 +3627,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_68_bw_76_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_12_bw_20_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -3106,6 +3830,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_68_bw_76_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_12_bw_20_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -3269,6 +4033,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_68_bw_76_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_12_bw_20_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -3432,6 +4236,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_68_bw_76_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_12_bw_20_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -3595,6 +4439,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_68_bw_76_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_12_bw_20_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -3758,6 +4642,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_68_bw_76_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_12_bw_20_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -3921,6 +4845,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_68_bw_76_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_12_bw_20_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -4084,6 +5048,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_68_bw_76_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_12_bw_20_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -4247,6 +5251,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_136_bw_152_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_24_bw_40_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -4410,6 +5454,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_136_bw_152_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_24_bw_40_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -4573,6 +5657,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_136_bw_152_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_24_bw_40_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -4736,6 +5860,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_136_bw_152_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_24_bw_40_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -4899,6 +6063,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_136_bw_152_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_24_bw_40_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -5062,6 +6266,46 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_136_bw_152_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_24_bw_40_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -5225,4 +6469,44 @@ TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
}
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_136_bw_152_ies_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(2)
+ .input_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_24_bw_40_oes_13) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(2)
+ .output_element_stride(13)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
+ }
+
+ TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(2)
+ .input_element_stride(19)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
diff --git a/test/x24-transpose.cc b/test/x24-transpose.cc
index f02c913e2..bf0a151d4 100644
--- a/test/x24-transpose.cc
+++ b/test/x24-transpose.cc
@@ -165,6 +165,43 @@ TEST(X24_TRANSPOSEC__1X2_SCALAR_3, bh_1_bw_2_is_4_os_2) {
.Test(xnn_x24_transposec_ukernel__1x2_scalar);
}
+TEST(X24_TRANSPOSEC__1X2_SCALAR_3, bh_17_bw_38_ies_14) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(17)
+ .block_width(38)
+ .block_height(17)
+ .element_size(3)
+ .input_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__1x2_scalar);
+}
+
+TEST(X24_TRANSPOSEC__1X2_SCALAR_3, bh_3_bw_10_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(3)
+ .block_width(10)
+ .block_height(3)
+ .element_size(3)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__1x2_scalar);
+}
+
+TEST(X24_TRANSPOSEC__1X2_SCALAR_3, bh_7_bw_46_ies_20_oes_16) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(13)
+ .block_width(46)
+ .block_height(7)
+ .element_size(3)
+ .input_element_stride(20)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__1x2_scalar);
+}
+
TEST(X24_TRANSPOSEC__1X4_SCALAR_3, bh_1_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -313,6 +350,43 @@ TEST(X24_TRANSPOSEC__1X4_SCALAR_3, bh_1_bw_4_is_8_os_2) {
.Test(xnn_x24_transposec_ukernel__1x4_scalar);
}
+TEST(X24_TRANSPOSEC__1X4_SCALAR_3, bh_17_bw_76_ies_14) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(17)
+ .block_width(76)
+ .block_height(17)
+ .element_size(3)
+ .input_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__1x4_scalar);
+}
+
+TEST(X24_TRANSPOSEC__1X4_SCALAR_3, bh_3_bw_20_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(3)
+ .block_width(20)
+ .block_height(3)
+ .element_size(3)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__1x4_scalar);
+}
+
+TEST(X24_TRANSPOSEC__1X4_SCALAR_3, bh_7_bw_92_ies_20_oes_16) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(13)
+ .block_width(92)
+ .block_height(7)
+ .element_size(3)
+ .input_element_stride(20)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__1x4_scalar);
+}
+
TEST(X24_TRANSPOSEC__2X1_SCALAR_3, bh_2_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -461,6 +535,43 @@ TEST(X24_TRANSPOSEC__2X1_SCALAR_3, bh_2_bw_1_is_2_os_4) {
.Test(xnn_x24_transposec_ukernel__2x1_scalar);
}
+TEST(X24_TRANSPOSEC__2X1_SCALAR_3, bh_34_bw_19_ies_14) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(34)
+ .block_width(19)
+ .block_height(34)
+ .element_size(3)
+ .input_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__2x1_scalar);
+}
+
+TEST(X24_TRANSPOSEC__2X1_SCALAR_3, bh_6_bw_5_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(6)
+ .block_width(5)
+ .block_height(6)
+ .element_size(3)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__2x1_scalar);
+}
+
+TEST(X24_TRANSPOSEC__2X1_SCALAR_3, bh_14_bw_23_ies_20_oes_16) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(20)
+ .block_width(23)
+ .block_height(14)
+ .element_size(3)
+ .input_element_stride(20)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__2x1_scalar);
+}
+
TEST(X24_TRANSPOSEC__2X2_SCALAR_3, bh_2_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -609,6 +720,43 @@ TEST(X24_TRANSPOSEC__2X2_SCALAR_3, bh_2_bw_2_is_4_os_4) {
.Test(xnn_x24_transposec_ukernel__2x2_scalar);
}
+TEST(X24_TRANSPOSEC__2X2_SCALAR_3, bh_34_bw_38_ies_14) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(3)
+ .input_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__2x2_scalar);
+}
+
+TEST(X24_TRANSPOSEC__2X2_SCALAR_3, bh_6_bw_10_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(3)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__2x2_scalar);
+}
+
+TEST(X24_TRANSPOSEC__2X2_SCALAR_3, bh_14_bw_46_ies_20_oes_16) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(3)
+ .input_element_stride(20)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__2x2_scalar);
+}
+
TEST(X24_TRANSPOSEC__2X4_SCALAR_3, bh_2_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -757,6 +905,43 @@ TEST(X24_TRANSPOSEC__2X4_SCALAR_3, bh_2_bw_4_is_8_os_4) {
.Test(xnn_x24_transposec_ukernel__2x4_scalar);
}
+TEST(X24_TRANSPOSEC__2X4_SCALAR_3, bh_34_bw_76_ies_14) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(34)
+ .block_width(76)
+ .block_height(34)
+ .element_size(3)
+ .input_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__2x4_scalar);
+}
+
+TEST(X24_TRANSPOSEC__2X4_SCALAR_3, bh_6_bw_20_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(6)
+ .block_width(20)
+ .block_height(6)
+ .element_size(3)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__2x4_scalar);
+}
+
+TEST(X24_TRANSPOSEC__2X4_SCALAR_3, bh_14_bw_92_ies_20_oes_16) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(20)
+ .block_width(92)
+ .block_height(14)
+ .element_size(3)
+ .input_element_stride(20)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__2x4_scalar);
+}
+
TEST(X24_TRANSPOSEC__4X1_SCALAR_3, bh_4_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -905,6 +1090,43 @@ TEST(X24_TRANSPOSEC__4X1_SCALAR_3, bh_4_bw_1_is_2_os_8) {
.Test(xnn_x24_transposec_ukernel__4x1_scalar);
}
+TEST(X24_TRANSPOSEC__4X1_SCALAR_3, bh_68_bw_19_ies_14) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(68)
+ .block_width(19)
+ .block_height(68)
+ .element_size(3)
+ .input_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x1_scalar);
+}
+
+TEST(X24_TRANSPOSEC__4X1_SCALAR_3, bh_12_bw_5_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(12)
+ .block_width(5)
+ .block_height(12)
+ .element_size(3)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x1_scalar);
+}
+
+TEST(X24_TRANSPOSEC__4X1_SCALAR_3, bh_28_bw_23_ies_20_oes_16) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(34)
+ .block_width(23)
+ .block_height(28)
+ .element_size(3)
+ .input_element_stride(20)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x1_scalar);
+}
+
TEST(X24_TRANSPOSEC__4X2_SCALAR_3, bh_4_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -1053,6 +1275,43 @@ TEST(X24_TRANSPOSEC__4X2_SCALAR_3, bh_4_bw_2_is_4_os_8) {
.Test(xnn_x24_transposec_ukernel__4x2_scalar);
}
+TEST(X24_TRANSPOSEC__4X2_SCALAR_3, bh_68_bw_38_ies_14) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(68)
+ .block_width(38)
+ .block_height(68)
+ .element_size(3)
+ .input_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x2_scalar);
+}
+
+TEST(X24_TRANSPOSEC__4X2_SCALAR_3, bh_12_bw_10_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(12)
+ .block_width(10)
+ .block_height(12)
+ .element_size(3)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x2_scalar);
+}
+
+TEST(X24_TRANSPOSEC__4X2_SCALAR_3, bh_28_bw_46_ies_20_oes_16) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(34)
+ .block_width(46)
+ .block_height(28)
+ .element_size(3)
+ .input_element_stride(20)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x2_scalar);
+}
+
TEST(X24_TRANSPOSEC__4X4_SCALAR_3, bh_4_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -1201,6 +1460,43 @@ TEST(X24_TRANSPOSEC__4X4_SCALAR_3, bh_4_bw_4_is_8_os_8) {
.Test(xnn_x24_transposec_ukernel__4x4_scalar);
}
+TEST(X24_TRANSPOSEC__4X4_SCALAR_3, bh_68_bw_76_ies_14) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(3)
+ .input_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x4_scalar);
+}
+
+TEST(X24_TRANSPOSEC__4X4_SCALAR_3, bh_12_bw_20_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(3)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x4_scalar);
+}
+
+TEST(X24_TRANSPOSEC__4X4_SCALAR_3, bh_28_bw_92_ies_20_oes_16) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(3)
+ .input_element_stride(20)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x4_scalar);
+}
+
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X24_TRANSPOSEC__2X2_NEON_TBL_3, bh_2_bw_2) {
TEST_REQUIRES_ARM_NEON;
@@ -1361,6 +1657,46 @@ TEST(X24_TRANSPOSEC__4X4_SCALAR_3, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x24_transposec_ukernel__2x2_neon_tbl);
}
+
+ TEST(X24_TRANSPOSEC__2X2_NEON_TBL_3, bh_34_bw_38_ies_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(3)
+ .input_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__2x2_neon_tbl);
+ }
+
+ TEST(X24_TRANSPOSEC__2X2_NEON_TBL_3, bh_6_bw_10_oes_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(3)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__2x2_neon_tbl);
+ }
+
+ TEST(X24_TRANSPOSEC__2X2_NEON_TBL_3, bh_14_bw_46_ies_20_oes_16) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(3)
+ .input_element_stride(20)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__2x2_neon_tbl);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -1524,6 +1860,46 @@ TEST(X24_TRANSPOSEC__4X4_SCALAR_3, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x24_transposec_ukernel__4x4_aarch64_neon_tbl);
}
+
+ TEST(X24_TRANSPOSEC__4X4_AARCH64_NEON_TBL_3, bh_68_bw_76_ies_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(3)
+ .input_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x4_aarch64_neon_tbl);
+ }
+
+ TEST(X24_TRANSPOSEC__4X4_AARCH64_NEON_TBL_3, bh_12_bw_20_oes_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(3)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x4_aarch64_neon_tbl);
+ }
+
+ TEST(X24_TRANSPOSEC__4X4_AARCH64_NEON_TBL_3, bh_28_bw_92_ies_20_oes_16) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(3)
+ .input_element_stride(20)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x4_aarch64_neon_tbl);
+ }
#endif // XNN_ARCH_ARM64
@@ -1687,4 +2063,44 @@ TEST(X24_TRANSPOSEC__4X4_SCALAR_3, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x24_transposec_ukernel__4x4_ssse3);
}
+
+ TEST(X24_TRANSPOSEC__4X4_SSSE3_3, bh_68_bw_76_ies_14) {
+ TEST_REQUIRES_X86_SSSE3;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(3)
+ .input_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x4_ssse3);
+ }
+
+ TEST(X24_TRANSPOSEC__4X4_SSSE3_3, bh_12_bw_20_oes_14) {
+ TEST_REQUIRES_X86_SSSE3;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(3)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x4_ssse3);
+ }
+
+ TEST(X24_TRANSPOSEC__4X4_SSSE3_3, bh_28_bw_92_ies_20_oes_16) {
+ TEST_REQUIRES_X86_SSSE3;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(3)
+ .input_element_stride(20)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_x24_transposec_ukernel__4x4_ssse3);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
diff --git a/test/x32-transpose.cc b/test/x32-transpose.cc
index 16e859013..d22307b83 100644
--- a/test/x32-transpose.cc
+++ b/test/x32-transpose.cc
@@ -165,6 +165,43 @@ TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_2_is_4_os_2) {
.Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
}
+TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_17_bw_38_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(17)
+ .block_width(38)
+ .block_height(17)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_3_bw_10_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(3)
+ .block_width(10)
+ .block_height(3)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_7_bw_46_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(13)
+ .block_width(46)
+ .block_height(7)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
+}
+
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -313,6 +350,43 @@ TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_2_is_4_os_2) {
.Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
}
+TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_17_bw_38_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(17)
+ .block_width(38)
+ .block_height(17)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_3_bw_10_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(3)
+ .block_width(10)
+ .block_height(3)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_7_bw_46_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(13)
+ .block_width(46)
+ .block_height(7)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
+}
+
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -461,6 +535,43 @@ TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_4_is_8_os_2) {
.Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
}
+TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_17_bw_76_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(17)
+ .block_width(76)
+ .block_height(17)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_3_bw_20_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(3)
+ .block_width(20)
+ .block_height(3)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_7_bw_92_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(13)
+ .block_width(92)
+ .block_height(7)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
+}
+
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -609,6 +720,43 @@ TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_4_is_8_os_2) {
.Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
}
+TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_17_bw_76_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(17)
+ .block_width(76)
+ .block_height(17)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_3_bw_20_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(3)
+ .block_width(20)
+ .block_height(3)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_7_bw_92_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(13)
+ .block_width(92)
+ .block_height(7)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
+}
+
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -757,6 +905,43 @@ TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_1_is_2_os_4) {
.Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
}
+TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_34_bw_19_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(34)
+ .block_width(19)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_6_bw_5_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(6)
+ .block_width(5)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_14_bw_23_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(20)
+ .block_width(23)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
+}
+
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -905,6 +1090,43 @@ TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_1_is_2_os_4) {
.Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
}
+TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_34_bw_19_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(34)
+ .block_width(19)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_6_bw_5_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(6)
+ .block_width(5)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_14_bw_23_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(20)
+ .block_width(23)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
+}
+
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -1053,6 +1275,43 @@ TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_2_is_4_os_4) {
.Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
}
+TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_34_bw_38_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_6_bw_10_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_14_bw_46_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
+}
+
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -1201,6 +1460,43 @@ TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_2_is_4_os_4) {
.Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
}
+TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_34_bw_38_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_6_bw_10_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_14_bw_46_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
+}
+
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -1349,6 +1645,43 @@ TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_4_is_8_os_4) {
.Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
}
+TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_34_bw_76_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(34)
+ .block_width(76)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_6_bw_20_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(6)
+ .block_width(20)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_14_bw_92_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(20)
+ .block_width(92)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
+}
+
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -1497,6 +1830,43 @@ TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_4_is_8_os_4) {
.Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
}
+TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_34_bw_76_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(34)
+ .block_width(76)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_6_bw_20_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(6)
+ .block_width(20)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_14_bw_92_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(20)
+ .block_width(92)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
+}
+
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -1645,6 +2015,43 @@ TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_1_is_2_os_8) {
.Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
}
+TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_68_bw_19_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(68)
+ .block_width(19)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_12_bw_5_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(12)
+ .block_width(5)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_28_bw_23_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(34)
+ .block_width(23)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
+}
+
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -1793,6 +2200,43 @@ TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_1_is_2_os_8) {
.Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
}
+TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_68_bw_19_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(68)
+ .block_width(19)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_12_bw_5_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(12)
+ .block_width(5)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_28_bw_23_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(34)
+ .block_width(23)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
+}
+
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -1941,6 +2385,43 @@ TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_2_is_4_os_8) {
.Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
}
+TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_68_bw_38_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(68)
+ .block_width(38)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_12_bw_10_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(12)
+ .block_width(10)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_28_bw_46_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(34)
+ .block_width(46)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
+}
+
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -2089,6 +2570,43 @@ TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_2_is_4_os_8) {
.Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
}
+TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_68_bw_38_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(68)
+ .block_width(38)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_12_bw_10_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(12)
+ .block_width(10)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_28_bw_46_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(34)
+ .block_width(46)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
+}
+
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -2237,6 +2755,43 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_4_is_8_os_8) {
.Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
}
+TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_68_bw_76_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_12_bw_20_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
+}
+
+TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_28_bw_92_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
+}
+
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -2385,6 +2940,43 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
}
+TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_68_bw_76_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_12_bw_20_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
+}
+
+TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_28_bw_92_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
+}
+
#if XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_4) {
TEST_REQUIRES_ARM_NEON;
@@ -2545,6 +3137,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
}
+
+ TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
+ }
#endif // XNN_ARCH_ARM64
@@ -2708,6 +3340,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
}
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -2871,6 +3543,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
}
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -3034,6 +3746,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
}
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -3197,6 +3949,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
}
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -3360,6 +4152,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
}
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -3523,6 +4355,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
}
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -3686,6 +4558,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_sse);
}
+
+ TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_X86_SSE;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_sse);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_X86_SSE;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_sse);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_X86_SSE;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_sse);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -3837,6 +4749,43 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
}
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_68_bw_76_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_12_bw_20_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
+ }
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
@@ -3988,6 +4937,43 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
}
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_68_bw_76_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_12_bw_20_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
+ }
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
@@ -4139,6 +5125,43 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
}
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_68_bw_76_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_12_bw_20_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
+ }
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
@@ -4290,6 +5313,43 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
}
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_68_bw_76_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_12_bw_20_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
+ }
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
@@ -4441,6 +5501,43 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
}
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_68_bw_76_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_12_bw_20_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
+ }
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
@@ -4592,6 +5689,43 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
}
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_68_bw_76_ies_15) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_12_bw_20_oes_15) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
+ }
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
@@ -4755,6 +5889,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_34_bw_38_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_6_bw_10_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -4918,6 +6092,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_34_bw_38_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_6_bw_10_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -5081,6 +6295,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_34_bw_38_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_6_bw_10_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -5244,6 +6498,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_34_bw_38_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_6_bw_10_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -5407,6 +6701,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_34_bw_38_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_6_bw_10_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -5570,6 +6904,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_34_bw_38_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_6_bw_10_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -5733,6 +7107,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_34_bw_38_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_6_bw_10_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -5896,6 +7310,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_34_bw_38_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_6_bw_10_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -6059,6 +7513,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -6222,6 +7716,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -6385,6 +7919,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -6548,6 +8122,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -6711,6 +8325,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -6874,6 +8528,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -7037,6 +8731,46 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -7200,4 +8934,44 @@ TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
}
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_68_bw_76_ies_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(4)
+ .input_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_12_bw_20_oes_15) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(4)
+ .output_element_stride(15)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
+ }
+
+ TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(4)
+ .input_element_stride(21)
+ .output_element_stride(17)
+ .iterations(1)
+ .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
diff --git a/test/x64-transpose.cc b/test/x64-transpose.cc
index cadc6f09a..e0363b728 100644
--- a/test/x64-transpose.cc
+++ b/test/x64-transpose.cc
@@ -165,6 +165,43 @@ TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_2_is_4_os_2) {
.Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
}
+TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_17_bw_38_ies_19) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(17)
+ .block_width(38)
+ .block_height(17)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
+}
+
+TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_3_bw_10_oes_19) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(3)
+ .block_width(10)
+ .block_height(3)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
+}
+
+TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_7_bw_46_ies_25_oes_21) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(13)
+ .block_width(46)
+ .block_height(7)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
+}
+
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -313,6 +350,43 @@ TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_2_is_4_os_2) {
.Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
}
+TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_17_bw_38_ies_19) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(17)
+ .block_width(38)
+ .block_height(17)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
+}
+
+TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_3_bw_10_oes_19) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(3)
+ .block_width(10)
+ .block_height(3)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
+}
+
+TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_7_bw_46_ies_25_oes_21) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(13)
+ .block_width(46)
+ .block_height(7)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
+}
+
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -461,6 +535,43 @@ TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_1_is_2_os_4) {
.Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
}
+TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_34_bw_19_ies_19) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(34)
+ .block_width(19)
+ .block_height(34)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
+}
+
+TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_6_bw_5_oes_19) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(6)
+ .block_width(5)
+ .block_height(6)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
+}
+
+TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_14_bw_23_ies_25_oes_21) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(20)
+ .block_width(23)
+ .block_height(14)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
+}
+
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -609,6 +720,43 @@ TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_1_is_2_os_4) {
.Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
}
+TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_34_bw_19_ies_19) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(34)
+ .block_width(19)
+ .block_height(34)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
+}
+
+TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_6_bw_5_oes_19) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(6)
+ .block_width(5)
+ .block_height(6)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
+}
+
+TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_14_bw_23_ies_25_oes_21) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(20)
+ .block_width(23)
+ .block_height(14)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
+}
+
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -757,6 +905,43 @@ TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_2_is_4_os_4) {
.Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
}
+TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_34_bw_38_ies_19) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
+}
+
+TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_6_bw_10_oes_19) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
+}
+
+TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_14_bw_46_ies_25_oes_21) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
+}
+
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -905,6 +1090,43 @@ TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_2_is_4_os_4) {
.Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
}
+TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_34_bw_38_ies_19) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
+}
+
+TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_6_bw_10_oes_19) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
+}
+
+TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_14_bw_46_ies_25_oes_21) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
+}
+
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -1053,6 +1275,43 @@ TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_1_is_2_os_8) {
.Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
}
+TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_68_bw_19_ies_19) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(68)
+ .block_width(19)
+ .block_height(68)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
+}
+
+TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_12_bw_5_oes_19) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(12)
+ .block_width(5)
+ .block_height(12)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
+}
+
+TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_28_bw_23_ies_25_oes_21) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(34)
+ .block_width(23)
+ .block_height(28)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
+}
+
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -1201,6 +1460,43 @@ TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_1_is_2_os_8) {
.Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
}
+TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_68_bw_19_ies_19) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(68)
+ .block_width(19)
+ .block_height(68)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
+}
+
+TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_12_bw_5_oes_19) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(12)
+ .block_width(5)
+ .block_height(12)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
+}
+
+TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_28_bw_23_ies_25_oes_21) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(34)
+ .block_width(23)
+ .block_height(28)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
+}
+
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -1349,6 +1645,43 @@ TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_2_is_4_os_8) {
.Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
}
+TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_68_bw_38_ies_19) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(68)
+ .block_width(38)
+ .block_height(68)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
+}
+
+TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_12_bw_10_oes_19) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(12)
+ .block_width(10)
+ .block_height(12)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
+}
+
+TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_28_bw_46_ies_25_oes_21) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(34)
+ .block_width(46)
+ .block_height(28)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
+}
+
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -1497,6 +1830,43 @@ TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_is_4_os_8) {
.Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
}
+TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_68_bw_38_ies_19) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(68)
+ .block_width(38)
+ .block_height(68)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
+}
+
+TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_12_bw_10_oes_19) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(12)
+ .block_width(10)
+ .block_height(12)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
+}
+
+TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_28_bw_46_ies_25_oes_21) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(34)
+ .block_width(46)
+ .block_height(28)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
+}
+
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_2) {
TEST_REQUIRES_X86_SSE2;
@@ -1657,6 +2027,46 @@ TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_is_4_os_8) {
.iterations(1)
.Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
}
+
+ TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_34_bw_38_ies_19) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
+ }
+
+ TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_6_bw_10_oes_19) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
+ }
+
+ TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -1820,6 +2230,46 @@ TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_is_4_os_8) {
.iterations(1)
.Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
}
+
+ TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_34_bw_38_ies_19) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
+ }
+
+ TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_6_bw_10_oes_19) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
+ }
+
+ TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -1983,6 +2433,46 @@ TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_is_4_os_8) {
.iterations(1)
.Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
}
+
+ TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_34_bw_38_ies_19) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
+ }
+
+ TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_6_bw_10_oes_19) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
+ }
+
+ TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -2146,6 +2636,46 @@ TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_is_4_os_8) {
.iterations(1)
.Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
}
+
+ TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_34_bw_38_ies_19) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
+ }
+
+ TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_6_bw_10_oes_19) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
+ }
+
+ TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -2309,6 +2839,46 @@ TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_is_4_os_8) {
.iterations(1)
.Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
}
+
+ TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_34_bw_38_ies_19) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
+ }
+
+ TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_6_bw_10_oes_19) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
+ }
+
+ TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -2472,4 +3042,44 @@ TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_is_4_os_8) {
.iterations(1)
.Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
}
+
+ TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_34_bw_38_ies_19) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(8)
+ .input_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
+ }
+
+ TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_6_bw_10_oes_19) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(8)
+ .output_element_stride(19)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
+ }
+
+ TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(8)
+ .input_element_stride(25)
+ .output_element_stride(21)
+ .iterations(1)
+ .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
diff --git a/test/x8-transpose.cc b/test/x8-transpose.cc
index 13cf9ce4d..5eca8cc54 100644
--- a/test/x8-transpose.cc
+++ b/test/x8-transpose.cc
@@ -165,6 +165,43 @@ TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_2_is_4_os_2) {
.Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
}
+TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_17_bw_38_ies_12) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(17)
+ .block_width(38)
+ .block_height(17)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_3_bw_10_oes_12) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(3)
+ .block_width(10)
+ .block_height(3)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_7_bw_46_ies_18_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(13)
+ .block_width(46)
+ .block_height(7)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
+}
+
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -313,6 +350,43 @@ TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_4_is_8_os_2) {
.Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
}
+TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_17_bw_76_ies_12) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(17)
+ .block_width(76)
+ .block_height(17)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_3_bw_20_oes_12) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(3)
+ .block_width(20)
+ .block_height(3)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_7_bw_92_ies_18_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(13)
+ .block_width(92)
+ .block_height(7)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
+}
+
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -461,6 +535,43 @@ TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_1_is_2_os_4) {
.Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
}
+TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_34_bw_19_ies_12) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(34)
+ .block_width(19)
+ .block_height(34)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_6_bw_5_oes_12) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(6)
+ .block_width(5)
+ .block_height(6)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_14_bw_23_ies_18_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(20)
+ .block_width(23)
+ .block_height(14)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
+}
+
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -609,6 +720,43 @@ TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_2_is_4_os_4) {
.Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
}
+TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_34_bw_38_ies_12) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(34)
+ .block_width(38)
+ .block_height(34)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_6_bw_10_oes_12) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(6)
+ .block_width(10)
+ .block_height(6)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_14_bw_46_ies_18_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(20)
+ .block_width(46)
+ .block_height(14)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
+}
+
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -757,6 +905,43 @@ TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_4_is_8_os_4) {
.Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
}
+TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_34_bw_76_ies_12) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(34)
+ .block_width(76)
+ .block_height(34)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_6_bw_20_oes_12) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(6)
+ .block_width(20)
+ .block_height(6)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_14_bw_92_ies_18_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(20)
+ .block_width(92)
+ .block_height(14)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
+}
+
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -905,6 +1090,43 @@ TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_1_is_2_os_8) {
.Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
}
+TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_68_bw_19_ies_12) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(68)
+ .block_width(19)
+ .block_height(68)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_12_bw_5_oes_12) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(12)
+ .block_width(5)
+ .block_height(12)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_28_bw_23_ies_18_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(34)
+ .block_width(23)
+ .block_height(28)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
+}
+
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_2) {
TransposeMicrokernelTester()
.input_stride(4)
@@ -1053,6 +1275,43 @@ TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_2_is_4_os_8) {
.Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
}
+TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_68_bw_38_ies_12) {
+ TransposeMicrokernelTester()
+ .input_stride(38)
+ .output_stride(68)
+ .block_width(38)
+ .block_height(68)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_12_bw_10_oes_12) {
+ TransposeMicrokernelTester()
+ .input_stride(10)
+ .output_stride(12)
+ .block_width(10)
+ .block_height(12)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_28_bw_46_ies_18_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(51)
+ .output_stride(34)
+ .block_width(46)
+ .block_height(28)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
+}
+
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4) {
TransposeMicrokernelTester()
.input_stride(8)
@@ -1201,6 +1460,43 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
}
+TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_68_bw_76_ies_12) {
+ TransposeMicrokernelTester()
+ .input_stride(76)
+ .output_stride(68)
+ .block_width(76)
+ .block_height(68)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_12_bw_20_oes_12) {
+ TransposeMicrokernelTester()
+ .input_stride(20)
+ .output_stride(12)
+ .block_width(20)
+ .block_height(12)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
+}
+
+TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_28_bw_92_ies_18_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(97)
+ .output_stride(34)
+ .block_width(92)
+ .block_height(28)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
+}
+
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_16) {
TEST_REQUIRES_X86_SSE2;
@@ -1361,6 +1657,46 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
}
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_272_bw_304_ies_12) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(304)
+ .output_stride(272)
+ .block_width(304)
+ .block_height(272)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_48_bw_80_oes_12) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(80)
+ .output_stride(48)
+ .block_width(80)
+ .block_height(48)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_112_bw_368_ies_18_oes_14) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(373)
+ .output_stride(118)
+ .block_width(368)
+ .block_height(112)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -1524,6 +1860,46 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
}
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_272_bw_304_ies_12) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(304)
+ .output_stride(272)
+ .block_width(304)
+ .block_height(272)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_48_bw_80_oes_12) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(80)
+ .output_stride(48)
+ .block_width(80)
+ .block_height(48)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_112_bw_368_ies_18_oes_14) {
+ TEST_REQUIRES_X86_SSE2;
+ TransposeMicrokernelTester()
+ .input_stride(373)
+ .output_stride(118)
+ .block_width(368)
+ .block_height(112)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
+ }
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -1675,6 +2051,43 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
}
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_272_bw_304_ies_12) {
+ TransposeMicrokernelTester()
+ .input_stride(304)
+ .output_stride(272)
+ .block_width(304)
+ .block_height(272)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_48_bw_80_oes_12) {
+ TransposeMicrokernelTester()
+ .input_stride(80)
+ .output_stride(48)
+ .block_width(80)
+ .block_height(48)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_112_bw_368_ies_18_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(373)
+ .output_stride(118)
+ .block_width(368)
+ .block_height(112)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
+ }
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
@@ -1826,6 +2239,43 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
}
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_272_bw_304_ies_12) {
+ TransposeMicrokernelTester()
+ .input_stride(304)
+ .output_stride(272)
+ .block_width(304)
+ .block_height(272)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_48_bw_80_oes_12) {
+ TransposeMicrokernelTester()
+ .input_stride(80)
+ .output_stride(48)
+ .block_width(80)
+ .block_height(48)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_112_bw_368_ies_18_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(373)
+ .output_stride(118)
+ .block_width(368)
+ .block_height(112)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
+ }
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
@@ -1989,6 +2439,46 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
}
+
+ TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_136_bw_152_ies_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_24_bw_40_oes_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -2152,6 +2642,46 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
}
+
+ TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_136_bw_152_ies_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_24_bw_40_oes_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -2315,6 +2845,46 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
}
+
+ TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_136_bw_152_ies_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_24_bw_40_oes_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -2478,6 +3048,46 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
}
+
+ TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_136_bw_152_ies_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_24_bw_40_oes_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -2641,6 +3251,46 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
}
+
+ TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_136_bw_152_ies_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_24_bw_40_oes_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -2804,6 +3454,46 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
}
+
+ TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_136_bw_152_ies_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_24_bw_40_oes_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -2967,6 +3657,46 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
}
+
+ TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_136_bw_152_ies_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(152)
+ .output_stride(136)
+ .block_width(152)
+ .block_height(136)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_24_bw_40_oes_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(40)
+ .output_stride(24)
+ .block_width(40)
+ .block_height(24)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(189)
+ .output_stride(62)
+ .block_width(184)
+ .block_height(56)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -3130,6 +3860,46 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
}
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_272_bw_304_ies_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(304)
+ .output_stride(272)
+ .block_width(304)
+ .block_height(272)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_48_bw_80_oes_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(80)
+ .output_stride(48)
+ .block_width(80)
+ .block_height(48)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_112_bw_368_ies_18_oes_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(373)
+ .output_stride(118)
+ .block_width(368)
+ .block_height(112)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -3293,6 +4063,46 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
}
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_272_bw_304_ies_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(304)
+ .output_stride(272)
+ .block_width(304)
+ .block_height(272)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_48_bw_80_oes_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(80)
+ .output_stride(48)
+ .block_width(80)
+ .block_height(48)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_112_bw_368_ies_18_oes_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(373)
+ .output_stride(118)
+ .block_width(368)
+ .block_height(112)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -3456,4 +4266,44 @@ TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
.iterations(1)
.Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
}
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_272_bw_304_ies_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(304)
+ .output_stride(272)
+ .block_width(304)
+ .block_height(272)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_48_bw_80_oes_12) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(80)
+ .output_stride(48)
+ .block_width(80)
+ .block_height(48)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
+ }
+
+ TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_112_bw_368_ies_18_oes_14) {
+ TEST_REQUIRES_ARM_NEON;
+ TransposeMicrokernelTester()
+ .input_stride(373)
+ .output_stride(118)
+ .block_width(368)
+ .block_height(112)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
+ }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
diff --git a/test/xx-transpose.cc b/test/xx-transpose.cc
index 8b52b72c3..6b26cc2d3 100644
--- a/test/xx-transpose.cc
+++ b/test/xx-transpose.cc
@@ -164,6 +164,43 @@ TEST(XX_TRANSPOSEV__1X1_MEMCPY_1, bh_1_bw_1_is_2_os_2) {
.iterations(1)
.Test(xnn_xx_transposev_ukernel__1x1_memcpy);
}
+
+TEST(XX_TRANSPOSEV__1X1_MEMCPY_1, bh_17_bw_19_ies_12) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(17)
+ .block_width(19)
+ .block_height(17)
+ .element_size(1)
+ .input_element_stride(12)
+ .iterations(1)
+ .Test(xnn_xx_transposev_ukernel__1x1_memcpy);
+}
+
+TEST(XX_TRANSPOSEV__1X1_MEMCPY_1, bh_3_bw_5_oes_12) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(3)
+ .block_width(5)
+ .block_height(3)
+ .element_size(1)
+ .output_element_stride(12)
+ .iterations(1)
+ .Test(xnn_xx_transposev_ukernel__1x1_memcpy);
+}
+
+TEST(XX_TRANSPOSEV__1X1_MEMCPY_1, bh_7_bw_23_ies_18_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(13)
+ .block_width(23)
+ .block_height(7)
+ .element_size(1)
+ .input_element_stride(18)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_xx_transposev_ukernel__1x1_memcpy);
+}
TEST(XX_TRANSPOSEV__1X1_MEMCPY_3, bh_1_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -311,6 +348,43 @@ TEST(XX_TRANSPOSEV__1X1_MEMCPY_3, bh_1_bw_1_is_2_os_2) {
.iterations(1)
.Test(xnn_xx_transposev_ukernel__1x1_memcpy);
}
+
+TEST(XX_TRANSPOSEV__1X1_MEMCPY_3, bh_17_bw_19_ies_14) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(17)
+ .block_width(19)
+ .block_height(17)
+ .element_size(3)
+ .input_element_stride(14)
+ .iterations(1)
+ .Test(xnn_xx_transposev_ukernel__1x1_memcpy);
+}
+
+TEST(XX_TRANSPOSEV__1X1_MEMCPY_3, bh_3_bw_5_oes_14) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(3)
+ .block_width(5)
+ .block_height(3)
+ .element_size(3)
+ .output_element_stride(14)
+ .iterations(1)
+ .Test(xnn_xx_transposev_ukernel__1x1_memcpy);
+}
+
+TEST(XX_TRANSPOSEV__1X1_MEMCPY_3, bh_7_bw_23_ies_20_oes_16) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(13)
+ .block_width(23)
+ .block_height(7)
+ .element_size(3)
+ .input_element_stride(20)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_xx_transposev_ukernel__1x1_memcpy);
+}
TEST(XX_TRANSPOSEV__1X1_MEMCPY_5, bh_1_bw_1) {
TransposeMicrokernelTester()
.input_stride(2)
@@ -457,4 +531,41 @@ TEST(XX_TRANSPOSEV__1X1_MEMCPY_5, bh_1_bw_1_is_2_os_2) {
.element_size(5)
.iterations(1)
.Test(xnn_xx_transposev_ukernel__1x1_memcpy);
+}
+
+TEST(XX_TRANSPOSEV__1X1_MEMCPY_5, bh_17_bw_19_ies_16) {
+ TransposeMicrokernelTester()
+ .input_stride(19)
+ .output_stride(17)
+ .block_width(19)
+ .block_height(17)
+ .element_size(5)
+ .input_element_stride(16)
+ .iterations(1)
+ .Test(xnn_xx_transposev_ukernel__1x1_memcpy);
+}
+
+TEST(XX_TRANSPOSEV__1X1_MEMCPY_5, bh_3_bw_5_oes_16) {
+ TransposeMicrokernelTester()
+ .input_stride(5)
+ .output_stride(3)
+ .block_width(5)
+ .block_height(3)
+ .element_size(5)
+ .output_element_stride(16)
+ .iterations(1)
+ .Test(xnn_xx_transposev_ukernel__1x1_memcpy);
+}
+
+TEST(XX_TRANSPOSEV__1X1_MEMCPY_5, bh_7_bw_23_ies_22_oes_18) {
+ TransposeMicrokernelTester()
+ .input_stride(28)
+ .output_stride(13)
+ .block_width(23)
+ .block_height(7)
+ .element_size(5)
+ .input_element_stride(22)
+ .output_element_stride(18)
+ .iterations(1)
+ .Test(xnn_xx_transposev_ukernel__1x1_memcpy);
} \ No newline at end of file