aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlan Kelly <alankelly@google.com>2022-08-23 07:45:54 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-08-23 07:47:00 -0700
commit3c3769df8dc93069b1464d6912aefa042059503a (patch)
tree75e820f949e87626995099affba00bed334eb1bf
parentddbb37a87d134781f23d35fed4ad8262241a50ad (diff)
downloadXNNPACK-3c3769df8dc93069b1464d6912aefa042059503a.tar.gz
Space to Depth operator
PiperOrigin-RevId: 469456395
-rw-r--r--BUILD.bazel9
-rwxr-xr-xCMakeLists.txt5
-rw-r--r--include/xnnpack.h51
-rw-r--r--src/operator-run.c2
-rw-r--r--src/operator-strings.c3
-rw-r--r--src/operator-strings.yaml6
-rw-r--r--src/operators/transpose-nd.c247
-rw-r--r--src/xnnpack/operator-type.h3
-rw-r--r--test/depth-to-space-nhwc.cc1
-rw-r--r--test/space-to-depth-nhwc.cc306
-rw-r--r--test/space-to-depth-operator-tester.h351
-rwxr-xr-xtools/generate-transpose-test.py43
12 files changed, 1025 insertions, 2 deletions
diff --git a/BUILD.bazel b/BUILD.bazel
index 63feeb4ec..ecd7f482f 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -14889,6 +14889,15 @@ xnnpack_unit_test(
)
xnnpack_unit_test(
+ name = "space_to_depth_nhwc_test",
+ srcs = [
+ "test/space-to-depth-nhwc.cc",
+ "test/space-to-depth-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
name = "square_nc_test",
srcs = [
"test/square-nc.cc",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 664c0e95e..27965112a 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7602,6 +7602,11 @@ IF(XNNPACK_BUILD_TESTS)
TARGET_LINK_LIBRARIES(softmax-nc-test PRIVATE XNNPACK fp16 gtest gtest_main microparams_init logging operators)
ADD_TEST(NAME softmax-nc-test COMMAND softmax-nc-test)
+ ADD_EXECUTABLE(space-to-depth-nhwc-test test/space-to-depth-nhwc.cc)
+ TARGET_INCLUDE_DIRECTORIES(space-to-depth-nhwc-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(space-to-depth-nhwc-test PRIVATE XNNPACK fp16 gtest gtest_main microparams_init logging operators)
+ ADD_TEST(NAME space-to-depth-nhwc-test COMMAND space-to-depth-nhwc-test)
+
ADD_EXECUTABLE(square-nc-test test/square-nc.cc)
TARGET_INCLUDE_DIRECTORIES(square-nc-test PRIVATE src test)
TARGET_LINK_LIBRARIES(square-nc-test PRIVATE XNNPACK fp16 gtest gtest_main microparams_init logging operators)
diff --git a/include/xnnpack.h b/include/xnnpack.h
index f6810decd..b183d32a9 100644
--- a/include/xnnpack.h
+++ b/include/xnnpack.h
@@ -2224,6 +2224,23 @@ enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32(
void* output,
pthreadpool_t threadpool);
+enum xnn_status xnn_create_space_to_depth_nhwc_x32(
+ size_t input_channels,
+ size_t input_channel_stride,
+ size_t output_channel_stride,
+ uint32_t block_size,
+ uint32_t flags,
+ xnn_operator_t* space_to_depth_op_out);
+
+enum xnn_status xnn_setup_space_to_depth_nhwc_x32(
+ xnn_operator_t space_to_depth_op,
+ size_t batch_size,
+ size_t input_height,
+ size_t input_width,
+ const void* input,
+ void* output,
+ pthreadpool_t threadpool);
+
enum xnn_status xnn_create_transpose_nd_x32(
uint32_t flags,
xnn_operator_t* transpose_op_out);
@@ -2829,6 +2846,23 @@ enum xnn_status xnn_setup_depth_to_space_nhwc_x16(
void* output,
pthreadpool_t threadpool);
+enum xnn_status xnn_create_space_to_depth_nhwc_x16(
+ size_t input_channels,
+ size_t input_channel_stride,
+ size_t output_channel_stride,
+ uint32_t block_size,
+ uint32_t flags,
+ xnn_operator_t* space_to_depth_op_out);
+
+enum xnn_status xnn_setup_space_to_depth_nhwc_x16(
+ xnn_operator_t space_to_depth_op,
+ size_t batch_size,
+ size_t input_height,
+ size_t input_width,
+ const void* input,
+ void* output,
+ pthreadpool_t threadpool);
+
enum xnn_status xnn_create_transpose_nd_x16(
uint32_t flags,
xnn_operator_t* transpose_op_out);
@@ -3664,6 +3698,23 @@ enum xnn_status xnn_setup_depth_to_space_nhwc_x8(
void* output,
pthreadpool_t threadpool);
+enum xnn_status xnn_create_space_to_depth_nhwc_x8(
+ size_t input_channels,
+ size_t input_channel_stride,
+ size_t output_channel_stride,
+ uint32_t block_size,
+ uint32_t flags,
+ xnn_operator_t* space_to_depth_op_out);
+
+enum xnn_status xnn_setup_space_to_depth_nhwc_x8(
+ xnn_operator_t space_to_depth_op,
+ size_t batch_size,
+ size_t input_height,
+ size_t input_width,
+ const void* input,
+ void* output,
+ pthreadpool_t threadpool);
+
enum xnn_status xnn_create_transpose_nd_x8(
uint32_t flags,
xnn_operator_t* transpose_op_out);
diff --git a/src/operator-run.c b/src/operator-run.c
index ab4d5ace5..67f27199c 100644
--- a/src/operator-run.c
+++ b/src/operator-run.c
@@ -287,7 +287,7 @@ void xnn_compute_transposev_6d(
ld_input,
ld_output,
context->input_stride[4],
- context->output_stride[4],
+ context->output_stride[5],
element_size,
tile_m,
tile_n);
diff --git a/src/operator-strings.c b/src/operator-strings.c
index d2edb053e..9407805e5 100644
--- a/src/operator-strings.c
+++ b/src/operator-strings.c
@@ -115,6 +115,9 @@ static const char *data =
"Softmax (NC, F16)\0"
"Softmax (NC, F32)\0"
"Softmax (NC, QU8)\0"
+ "Space To Depth (NHWC, X8)\0"
+ "Space To Depth (NHWC, X16)\0"
+ "Space To Depth (NHWC, X32)\0"
"Square (NC, F16)\0"
"Square (NC, F32)\0"
"Square Root (NC, F16)\0"
diff --git a/src/operator-strings.yaml b/src/operator-strings.yaml
index 14eb83eaf..d9abe6b0a 100644
--- a/src/operator-strings.yaml
+++ b/src/operator-strings.yaml
@@ -201,6 +201,12 @@
string: "Softmax (NC, F32)"
- name: xnn_operator_type_softmax_nc_qu8
string: "Softmax (NC, QU8)"
+- name: xnn_operator_type_space_to_depth_nhwc_x8
+ string: "Space To Depth (NHWC, X8)"
+- name: xnn_operator_type_space_to_depth_nhwc_x16
+ string: "Space To Depth (NHWC, X16)"
+- name: xnn_operator_type_space_to_depth_nhwc_x32
+ string: "Space To Depth (NHWC, X32)"
- name: xnn_operator_type_square_nc_f16
string: "Square (NC, F16)"
- name: xnn_operator_type_square_nc_f32
diff --git a/src/operators/transpose-nd.c b/src/operators/transpose-nd.c
index 41e4335c3..2d57791f7 100644
--- a/src/operators/transpose-nd.c
+++ b/src/operators/transpose-nd.c
@@ -883,3 +883,250 @@ enum xnn_status xnn_setup_depth_to_space_nhwc_x32(
batch_size, input_height, input_width,
input, output, 4);
}
+
+static enum xnn_status create_space_to_depth_nhwc(
+ size_t input_channels,
+ size_t input_channel_stride,
+ size_t output_channel_stride,
+ uint32_t block_size,
+ uint32_t flags,
+ enum xnn_operator_type operator_type,
+ xnn_operator_t* space_to_depth_op_out)
+{
+ xnn_operator_t space_to_depth_op = NULL;
+ enum xnn_status status = xnn_status_uninitialized;
+
+ if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
+ xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
+ xnn_operator_type_to_string(operator_type));
+ goto error;
+ }
+
+ status = xnn_status_invalid_parameter;
+
+ if (input_channels == 0) {
+ xnn_log_error("failed to create %s operator with %zu input channels: number of channels must be non-zero",
+ xnn_operator_type_to_string(operator_type), input_channels);
+ goto error;
+ }
+
+ if (input_channel_stride < input_channels) {
+ xnn_log_error(
+ "failed to create %s operator with input channel stride of %zu: "
+ "stride must be at least as large as the number of input channels (%zu)",
+ xnn_operator_type_to_string(operator_type),
+ input_channel_stride, input_channels);
+ goto error;
+ }
+
+ if (block_size <= 1) {
+ xnn_log_error("failed to create %s operator with %u block size: block size must be greater than 1",
+ xnn_operator_type_to_string(operator_type),
+ block_size);
+ goto error;
+ }
+
+ const size_t output_channels = input_channels * block_size * block_size;
+ if (output_channel_stride < output_channels) {
+ xnn_log_error(
+ "failed to create %s operator with output channel stride of %zu: "
+ "stride must be at least as large as the number of output channels (%" PRIu32 "x%" PRIu32 "x%zu)",
+ xnn_operator_type_to_string(operator_type),
+ output_channel_stride, block_size, block_size, input_channels);
+ goto error;
+ }
+
+ status = xnn_status_out_of_memory;
+
+ space_to_depth_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
+ if (space_to_depth_op == NULL) {
+ xnn_log_error(
+ "failed to allocate %zu bytes for %s operator descriptor",
+ sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
+ goto error;
+ }
+
+ space_to_depth_op->channels = input_channels;
+ space_to_depth_op->input_pixel_stride = input_channel_stride;
+ space_to_depth_op->output_pixel_stride = output_channel_stride;
+ space_to_depth_op->block_size = block_size;
+
+ space_to_depth_op->type = operator_type;
+ space_to_depth_op->flags = flags;
+
+ space_to_depth_op->state = xnn_run_state_invalid;
+
+ *space_to_depth_op_out = space_to_depth_op;
+ return xnn_status_success;
+
+error:
+ xnn_delete_operator(space_to_depth_op);
+ return status;
+}
+
+enum xnn_status xnn_create_space_to_depth_nhwc_x8(
+ size_t input_channels,
+ size_t input_channel_stride,
+ size_t output_channel_stride,
+ uint32_t block_size,
+ uint32_t flags,
+ xnn_operator_t* space_to_depth_op_out)
+{
+ return create_space_to_depth_nhwc(
+ input_channels,
+ input_channel_stride,
+ output_channel_stride,
+ block_size,
+ flags,
+ xnn_operator_type_space_to_depth_nhwc_x8,
+ space_to_depth_op_out);
+}
+
+enum xnn_status xnn_create_space_to_depth_nhwc_x16(
+ size_t input_channels,
+ size_t input_channel_stride,
+ size_t output_channel_stride,
+ uint32_t block_size,
+ uint32_t flags,
+ xnn_operator_t* space_to_depth_op_out)
+{
+ return create_space_to_depth_nhwc(
+ input_channels,
+ input_channel_stride,
+ output_channel_stride,
+ block_size,
+ flags,
+ xnn_operator_type_space_to_depth_nhwc_x16,
+ space_to_depth_op_out);
+}
+
+enum xnn_status xnn_create_space_to_depth_nhwc_x32(
+ size_t input_channels,
+ size_t input_channel_stride,
+ size_t output_channel_stride,
+ uint32_t block_size,
+ uint32_t flags,
+ xnn_operator_t* space_to_depth_op_out)
+{
+ return create_space_to_depth_nhwc(
+ input_channels,
+ input_channel_stride,
+ output_channel_stride,
+ block_size,
+ flags,
+ xnn_operator_type_space_to_depth_nhwc_x32,
+ space_to_depth_op_out);
+}
+
+static enum xnn_status setup_space_to_depth_nhwc(
+ xnn_operator_t space_to_depth_op,
+ enum xnn_operator_type expected_operator_type,
+ size_t batch_size,
+ size_t input_height,
+ size_t input_width,
+ const void* input,
+ void* output,
+ uint32_t element_size)
+{
+ if (space_to_depth_op->type != expected_operator_type) {
+ xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
+ xnn_operator_type_to_string(expected_operator_type),
+ xnn_operator_type_to_string(space_to_depth_op->type));
+ return xnn_status_invalid_parameter;
+ }
+ space_to_depth_op->state = xnn_run_state_invalid;
+
+ if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
+ xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
+ xnn_operator_type_to_string(expected_operator_type));
+ return xnn_status_uninitialized;
+ }
+
+ if (input_width == 0 || input_height == 0) {
+ xnn_log_error("failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
+ xnn_operator_type_to_string(expected_operator_type), input_width, input_height);
+ return xnn_status_invalid_parameter;
+ }
+
+ if (batch_size == 0) {
+ space_to_depth_op->state = xnn_run_state_skip;
+ return xnn_status_success;
+ }
+
+ const uint32_t block_size = space_to_depth_op->block_size;
+
+ const size_t input_shape[5] = {batch_size * (input_height / block_size), block_size, input_width / block_size, block_size, space_to_depth_op->channels};
+ const size_t perm[5] = {0, 2, 1, 3, 4};
+
+ const size_t input_stride[5] = {
+ block_size * input_width * space_to_depth_op->input_pixel_stride,
+ input_width * space_to_depth_op->input_pixel_stride,
+ block_size * space_to_depth_op->input_pixel_stride,
+ space_to_depth_op->input_pixel_stride,
+ 1};
+ const size_t output_stride[5] = {
+ (input_width/block_size) * space_to_depth_op->output_pixel_stride,
+ space_to_depth_op->output_pixel_stride,
+ block_size * space_to_depth_op->channels,
+ space_to_depth_op->channels,
+ 1};
+
+ return setup_transpose_nd(
+ space_to_depth_op,
+ input,
+ output,
+ 5,
+ input_shape,
+ perm,
+ input_stride,
+ output_stride,
+ element_size);
+}
+
+enum xnn_status xnn_setup_space_to_depth_nhwc_x8(
+ xnn_operator_t space_to_depth_op,
+ size_t batch_size,
+ size_t input_height,
+ size_t input_width,
+ const void* input,
+ void* output,
+ pthreadpool_t threadpool)
+{
+ return setup_space_to_depth_nhwc(
+ space_to_depth_op,
+ xnn_operator_type_space_to_depth_nhwc_x8,
+ batch_size, input_height, input_width,
+ input, output, sizeof(uint8_t));
+}
+
+enum xnn_status xnn_setup_space_to_depth_nhwc_x16(
+ xnn_operator_t space_to_depth_op,
+ size_t batch_size,
+ size_t input_height,
+ size_t input_width,
+ const void* input,
+ void* output,
+ pthreadpool_t threadpool)
+{
+ return setup_space_to_depth_nhwc(
+ space_to_depth_op,
+ xnn_operator_type_space_to_depth_nhwc_x16,
+ batch_size, input_height, input_width,
+ input, output, sizeof(uint16_t));
+}
+
+enum xnn_status xnn_setup_space_to_depth_nhwc_x32(
+ xnn_operator_t space_to_depth_op,
+ size_t batch_size,
+ size_t input_height,
+ size_t input_width,
+ const void* input,
+ void* output,
+ pthreadpool_t threadpool)
+{
+ return setup_space_to_depth_nhwc(
+ space_to_depth_op,
+ xnn_operator_type_space_to_depth_nhwc_x32,
+ batch_size, input_height, input_width,
+ input, output, sizeof(uint32_t));
+}
diff --git a/src/xnnpack/operator-type.h b/src/xnnpack/operator-type.h
index 083aae3a4..d1a0b716f 100644
--- a/src/xnnpack/operator-type.h
+++ b/src/xnnpack/operator-type.h
@@ -109,6 +109,9 @@ enum xnn_operator_type {
xnn_operator_type_softmax_nc_f16,
xnn_operator_type_softmax_nc_f32,
xnn_operator_type_softmax_nc_qu8,
+ xnn_operator_type_space_to_depth_nhwc_x8,
+ xnn_operator_type_space_to_depth_nhwc_x16,
+ xnn_operator_type_space_to_depth_nhwc_x32,
xnn_operator_type_square_nc_f16,
xnn_operator_type_square_nc_f32,
xnn_operator_type_square_root_nc_f16,
diff --git a/test/depth-to-space-nhwc.cc b/test/depth-to-space-nhwc.cc
index 7a8a3ae7d..900b64f6b 100644
--- a/test/depth-to-space-nhwc.cc
+++ b/test/depth-to-space-nhwc.cc
@@ -99,7 +99,6 @@ TEST(DEPTH_TO_SPACE_NHWC_X8, output_channels_stride) {
.TestNHWCxX8();
}
-
TEST(DEPTH_TO_SPACE_NHWC_X16, one_pixel) {
DepthToSpaceOperatorTester()
.input_size(1, 1)
diff --git a/test/space-to-depth-nhwc.cc b/test/space-to-depth-nhwc.cc
new file mode 100644
index 000000000..b26674480
--- /dev/null
+++ b/test/space-to-depth-nhwc.cc
@@ -0,0 +1,306 @@
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include "space-to-depth-operator-tester.h"
+
+#include <gtest/gtest.h>
+
+
+TEST(SPACE_TO_DEPTH_NHWC_X8, one_output_pixel) {
+ size_t block_size = 3;
+ SpaceToDepthOperatorTester()
+ .input_size(block_size, block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX8();
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X8, one_column) {
+ size_t block_size = 3;
+ for (size_t input_height = 2; input_height <= 7; input_height++) {
+ SpaceToDepthOperatorTester()
+ .input_size(input_height * block_size, block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX8();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X8, one_row) {
+ size_t block_size = 3;
+ for (size_t input_width = 2; input_width <= 7; input_width++) {
+ SpaceToDepthOperatorTester()
+ .input_size(block_size, input_width * block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX8();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X8, varying_input_size) {
+ size_t block_size = 3;
+ for (size_t input_height = 1; input_height <= 5; input_height++) {
+ for (size_t input_width = 1; input_width <= 5; input_width++) {
+ SpaceToDepthOperatorTester()
+ .input_size(input_height * block_size, input_width * block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX8();
+ }
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X8, varying_block_size) {
+ for (uint32_t block_size = 2; block_size <= 5; block_size++) {
+ SpaceToDepthOperatorTester()
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX8();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X8, varying_input_channels) {
+ size_t block_size = 3;
+ for (size_t input_channels = 1; input_channels <= 15; input_channels++) {
+ SpaceToDepthOperatorTester()
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .input_channels(input_channels)
+ .TestNHWCxX8();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X8, varying_batch_size) {
+ size_t block_size = 3;
+ for (size_t batch_size = 2; batch_size <= 3; batch_size++) {
+ SpaceToDepthOperatorTester()
+ .batch_size(batch_size)
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX8();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X8, input_channels_stride) {
+ size_t block_size = 3;
+ SpaceToDepthOperatorTester()
+ .batch_size(1)
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .input_channels_stride(10)
+ .input_channels(9)
+ .TestNHWCxX8();
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X8, output_channels_stride) {
+ size_t block_size = 3;
+ SpaceToDepthOperatorTester()
+ .batch_size(2)
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .output_channels_stride(171)
+ .input_channels(17)
+ .TestNHWCxX8();
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X16, one_output_pixel) {
+ size_t block_size = 3;
+ SpaceToDepthOperatorTester()
+ .input_size(block_size, block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX16();
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X16, one_column) {
+ size_t block_size = 3;
+ for (size_t input_height = 2; input_height <= 7; input_height++) {
+ SpaceToDepthOperatorTester()
+ .input_size(input_height * block_size, block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX16();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X16, one_row) {
+ size_t block_size = 3;
+ for (size_t input_width = 2; input_width <= 7; input_width++) {
+ SpaceToDepthOperatorTester()
+ .input_size(block_size, input_width * block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX16();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X16, varying_input_size) {
+ size_t block_size = 3;
+ for (size_t input_height = 1; input_height <= 5; input_height++) {
+ for (size_t input_width = 1; input_width <= 5; input_width++) {
+ SpaceToDepthOperatorTester()
+ .input_size(input_height * block_size, input_width * block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX16();
+ }
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X16, varying_block_size) {
+ for (uint32_t block_size = 2; block_size <= 5; block_size++) {
+ SpaceToDepthOperatorTester()
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX16();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X16, varying_input_channels) {
+ size_t block_size = 3;
+ for (size_t input_channels = 1; input_channels <= 15; input_channels++) {
+ SpaceToDepthOperatorTester()
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .input_channels(input_channels)
+ .TestNHWCxX16();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X16, varying_batch_size) {
+ size_t block_size = 3;
+ for (size_t batch_size = 2; batch_size <= 3; batch_size++) {
+ SpaceToDepthOperatorTester()
+ .batch_size(batch_size)
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX32();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X16, input_channels_stride) {
+ size_t block_size = 3;
+ SpaceToDepthOperatorTester()
+ .batch_size(1)
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .input_channels_stride(10)
+ .input_channels(9)
+ .TestNHWCxX16();
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X16, output_channels_stride) {
+ size_t block_size = 3;
+ SpaceToDepthOperatorTester()
+ .batch_size(2)
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .output_channels_stride(171)
+ .input_channels(17)
+ .TestNHWCxX32();
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X32, one_output_pixel) {
+ size_t block_size = 3;
+ SpaceToDepthOperatorTester()
+ .input_size(block_size, block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX32();
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X32, one_column) {
+ size_t block_size = 3;
+ for (size_t input_height = 2; input_height <= 7; input_height++) {
+ SpaceToDepthOperatorTester()
+ .input_size(input_height * block_size, block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX32();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X32, one_row) {
+ size_t block_size = 3;
+ for (size_t input_width = 2; input_width <= 7; input_width++) {
+ SpaceToDepthOperatorTester()
+ .input_size(block_size, input_width * block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX32();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X32, varying_input_size) {
+ size_t block_size = 3;
+ for (size_t input_height = 1; input_height <= 5; input_height++) {
+ for (size_t input_width = 1; input_width <= 5; input_width++) {
+ SpaceToDepthOperatorTester()
+ .input_size(input_height * block_size, input_width * block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX32();
+ }
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X32, varying_block_size) {
+ for (uint32_t block_size = 2; block_size <= 5; block_size++) {
+ SpaceToDepthOperatorTester()
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX32();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X32, varying_input_channels) {
+ size_t block_size = 3;
+ for (size_t input_channels = 1; input_channels <= 15; input_channels++) {
+ SpaceToDepthOperatorTester()
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .input_channels(input_channels)
+ .TestNHWCxX32();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X32, varying_batch_size) {
+ size_t block_size = 3;
+ for (size_t batch_size = 2; batch_size <= 3; batch_size++) {
+ SpaceToDepthOperatorTester()
+ .batch_size(batch_size)
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .input_channels(17)
+ .TestNHWCxX32();
+ }
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X32, input_channels_stride) {
+ size_t block_size = 3;
+ SpaceToDepthOperatorTester()
+ .batch_size(1)
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .input_channels_stride(10)
+ .input_channels(9)
+ .TestNHWCxX32();
+}
+
+TEST(SPACE_TO_DEPTH_NHWC_X32, output_channels_stride) {
+ size_t block_size = 3;
+ SpaceToDepthOperatorTester()
+ .batch_size(2)
+ .input_size(7 * block_size, 5 * block_size)
+ .block_size(block_size)
+ .output_channels_stride(171)
+ .input_channels(17)
+ .TestNHWCxX32();
+}
diff --git a/test/space-to-depth-operator-tester.h b/test/space-to-depth-operator-tester.h
new file mode 100644
index 000000000..6bc7eb78d
--- /dev/null
+++ b/test/space-to-depth-operator-tester.h
@@ -0,0 +1,351 @@
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+#include <functional>
+#include <random>
+#include <vector>
+
+#include <xnnpack.h>
+
+
+class SpaceToDepthOperatorTester {
+ public:
+ inline SpaceToDepthOperatorTester& input_size(size_t input_height, size_t input_width) {
+ assert(input_height >= 1);
+ assert(input_width >= 1);
+ this->input_height_ = input_height;
+ this->input_width_ = input_width;
+ return *this;
+ }
+
+ inline SpaceToDepthOperatorTester& input_height(size_t input_height) {
+ assert(input_height >= 1);
+ this->input_height_ = input_height;
+ return *this;
+ }
+
+ inline size_t input_height() const {
+ return this->input_height_;
+ }
+
+ inline SpaceToDepthOperatorTester& input_width(size_t input_width) {
+ assert(input_width >= 1);
+ this->input_width_ = input_width;
+ return *this;
+ }
+
+ inline size_t input_width() const {
+ return this->input_width_;
+ }
+
+ inline size_t output_height() const {
+ assert(input_height() % block_size() == 0);
+ return input_height() / block_size();
+ }
+
+ inline size_t output_width() const {
+ assert(input_width() % block_size() == 0);
+ return input_width() / block_size();
+ }
+
+ inline SpaceToDepthOperatorTester& block_size(size_t block_size) {
+ assert(block_size >= 2);
+ this->block_size_ = block_size;
+ return *this;
+ }
+
+ inline size_t block_size() const {
+ return this->block_size_;
+ }
+
+ inline SpaceToDepthOperatorTester& input_channels(size_t input_channels) {
+ assert(input_channels != 0);
+ this->input_channels_ = input_channels;
+ return *this;
+ }
+
+ inline size_t input_channels() const {
+ return this->input_channels_;
+ }
+
+ inline size_t output_channels() const {
+ return input_channels() * block_size() * block_size();
+ }
+
+ inline SpaceToDepthOperatorTester& batch_size(size_t batch_size) {
+ assert(batch_size != 0);
+ this->batch_size_ = batch_size;
+ return *this;
+ }
+
+ inline size_t batch_size() const {
+ return this->batch_size_;
+ }
+
+ inline SpaceToDepthOperatorTester& input_channels_stride(size_t input_channels_stride) {
+ assert(input_channels_stride >= 1);
+ this->input_channels_stride_ = input_channels_stride;
+ return *this;
+ }
+
+ inline size_t input_channels_stride() const {
+ if (this->input_channels_stride_ == 0) {
+ return input_channels();
+ } else {
+ assert(this->input_channels_stride_ >= input_channels());
+ return this->input_channels_stride_;
+ }
+ }
+
+ inline SpaceToDepthOperatorTester& output_channels_stride(size_t output_channels_stride) {
+ assert(output_channels_stride >= 1);
+ this->output_channels_stride_ = output_channels_stride;
+ return *this;
+ }
+
+ inline size_t output_channels_stride() const {
+ if (this->output_channels_stride_ == 0) {
+ return output_channels();
+ } else {
+ assert(this->output_channels_stride_ >= output_channels());
+ return this->output_channels_stride_;
+ }
+ }
+
+ inline SpaceToDepthOperatorTester& iterations(size_t iterations) {
+ this->iterations_ = iterations;
+ return *this;
+ }
+
+ inline size_t iterations() const {
+ return this->iterations_;
+ }
+
+ void TestNHWCxX8() const {
+ std::vector<int8_t> input(
+ (batch_size() * input_height() * input_width() - 1) * input_channels_stride() + input_channels());
+ std::vector<int8_t> output(
+ (batch_size() * output_height() * output_width() - 1) * output_channels_stride() + output_channels());
+ for (size_t iteration = 0; iteration < iterations(); iteration++) {
+ std::iota(input.begin(), input.end(), 0);
+ std::fill(output.begin(), output.end(), INT8_C(0xAF));
+
+ // Create, setup, run, and destroy Depth To Space operator.
+ ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
+ xnn_operator_t space_to_depth_op = nullptr;
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_create_space_to_depth_nhwc_x8(
+ input_channels(), input_channels_stride(), output_channels_stride(),
+ block_size(), 0, &space_to_depth_op));
+ ASSERT_NE(nullptr, space_to_depth_op);
+
+ // Smart pointer to automatically delete space_to_depth_op.
+ std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_space_to_depth_op(space_to_depth_op, xnn_delete_operator);
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_setup_space_to_depth_nhwc_x8(
+ space_to_depth_op,
+ batch_size(), input_height(), input_width(),
+ input.data(), output.data(), nullptr /* thread pool */));
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_run_operator(space_to_depth_op, nullptr /* thread pool */));
+
+ // Verify results.
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t iy = 0; iy < output_height(); iy++) {
+ for (size_t ix = 0; ix < output_width(); ix++) {
+ for (size_t by = 0; by < block_size(); by++) {
+ for (size_t bx = 0; bx < block_size(); bx++) {
+ for (size_t oc = 0; oc < input_channels(); oc++) {
+ const size_t input_index = oc
+ + bx * input_channels_stride()
+ + ix * block_size() * input_channels_stride()
+ + by * output_width() * block_size() * input_channels_stride()
+ + iy * block_size() * output_width() * block_size() * input_channels_stride()
+ + i * output_height() * block_size() * output_width() * block_size() * input_channels_stride();
+ const size_t output_index = oc
+ + bx * input_channels()
+ + by * input_channels() * block_size()
+ + ix * output_channels_stride()
+ + iy * output_width() * output_channels_stride()
+ + i * output_height() * output_width() * output_channels_stride();
+
+ ASSERT_EQ(int32_t(output[output_index]), int32_t(input[input_index]))
+ << "batch: " << i << " / " << batch_size()
+ << ", output x: " << ix << " / " << output_width()
+ << ", output y: " << iy << " / " << output_height()
+ << ", block x: " << bx << " / " << block_size()
+ << ", block y: " << by << " / " << block_size()
+ << ", input channel: " << oc << " / " << input_channels()
+ << ", input stride: " << input_channels_stride()
+ << ", output stride: " << output_channels_stride();
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ void TestNHWCxX16() const {
+ std::vector<int16_t> input(
+ (batch_size() * input_height() * input_width() - 1) * input_channels_stride() + input_channels());
+ std::vector<int16_t> output(
+ (batch_size() * output_height() * output_width() - 1) * output_channels_stride() + output_channels());
+ for (size_t iteration = 0; iteration < iterations(); iteration++) {
+ std::iota(input.begin(), input.end(), 0);
+ std::fill(output.begin(), output.end(), INT16_C(0xDEAD));
+
+ // Create, setup, run, and destroy Depth To Space operator.
+ ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
+ xnn_operator_t space_to_depth_op = nullptr;
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_create_space_to_depth_nhwc_x16(
+ input_channels(), input_channels_stride(), output_channels_stride(),
+ block_size(), 0, &space_to_depth_op));
+ ASSERT_NE(nullptr, space_to_depth_op);
+
+ // Smart pointer to automatically delete space_to_depth_op.
+ std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_space_to_depth_op(space_to_depth_op, xnn_delete_operator);
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_setup_space_to_depth_nhwc_x16(
+ space_to_depth_op,
+ batch_size(), input_height(), input_width(),
+ input.data(), output.data(), nullptr /* thread pool */));
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_run_operator(space_to_depth_op, nullptr /* thread pool */));
+
+ // Verify results.
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t iy = 0; iy < output_height(); iy++) {
+ for (size_t ix = 0; ix < output_width(); ix++) {
+ for (size_t by = 0; by < block_size(); by++) {
+ for (size_t bx = 0; bx < block_size(); bx++) {
+ for (size_t oc = 0; oc < input_channels(); oc++) {
+ const size_t input_index = oc
+ + bx * input_channels_stride()
+ + ix * block_size() * input_channels_stride()
+ + by * output_width() * block_size() * input_channels_stride()
+ + iy * block_size() * output_width() * block_size() * input_channels_stride()
+ + i * output_height() * block_size() * output_width() * block_size() * input_channels_stride();
+ const size_t output_index = oc
+ + bx * input_channels()
+ + by * input_channels() * block_size()
+ + ix * output_channels_stride()
+ + iy * output_width() * output_channels_stride()
+ + i * output_height() * output_width() * output_channels_stride();
+
+ ASSERT_EQ(int32_t(output[output_index]), int32_t(input[input_index]))
+ << "batch: " << i << " / " << batch_size()
+ << ", output x: " << ix << " / " << output_width()
+ << ", output y: " << iy << " / " << output_height()
+ << ", block x: " << bx << " / " << block_size()
+ << ", block y: " << by << " / " << block_size()
+ << ", input channel: " << oc << " / " << input_channels()
+ << ", input stride: " << input_channels_stride()
+ << ", output stride: " << output_channels_stride();
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ void TestNHWCxX32() const {
+ std::vector<int32_t> input(
+ (batch_size() * input_height() * input_width() - 1) * input_channels_stride() + input_channels());
+ std::vector<int32_t> output(
+ (batch_size() * output_height() * output_width() - 1) * output_channels_stride() + output_channels());
+ for (size_t iteration = 0; iteration < iterations(); iteration++) {
+ std::iota(input.begin(), input.end(), 0);
+ std::fill(output.begin(), output.end(), INT32_C(0xDEADBEEF));
+
+ // Create, setup, run, and destroy Depth To Space operator.
+ ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
+ xnn_operator_t space_to_depth_op = nullptr;
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_create_space_to_depth_nhwc_x32(
+ input_channels(), input_channels_stride(), output_channels_stride(),
+ block_size(), 0, &space_to_depth_op));
+ ASSERT_NE(nullptr, space_to_depth_op);
+
+ // Smart pointer to automatically delete space_to_depth_op.
+ std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_space_to_depth_op(space_to_depth_op, xnn_delete_operator);
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_setup_space_to_depth_nhwc_x32(
+ space_to_depth_op,
+ batch_size(), input_height(), input_width(),
+ input.data(), output.data(), nullptr /* thread pool */));
+
+ ASSERT_EQ(xnn_status_success,
+ xnn_run_operator(space_to_depth_op, nullptr /* thread pool */));
+
+ // Verify results.
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t iy = 0; iy < output_height(); iy++) {
+ for (size_t ix = 0; ix < output_width(); ix++) {
+ for (size_t by = 0; by < block_size(); by++) {
+ for (size_t bx = 0; bx < block_size(); bx++) {
+ for (size_t oc = 0; oc < input_channels(); oc++) {
+ const size_t input_index = oc
+ + bx * input_channels_stride()
+ + ix * block_size() * input_channels_stride()
+ + by * output_width() * block_size() * input_channels_stride()
+ + iy * block_size() * output_width() * block_size() * input_channels_stride()
+ + i * output_height() * block_size() * output_width() * block_size() * input_channels_stride();
+ const size_t output_index = oc
+ + bx * input_channels()
+ + by * input_channels() * block_size()
+ + ix * output_channels_stride()
+ + iy * output_width() * output_channels_stride()
+ + i * output_height() * output_width() * output_channels_stride();
+
+ ASSERT_EQ(int32_t(output[output_index]), int32_t(input[input_index]))
+ << "batch: " << i << " / " << batch_size()
+ << ", output x: " << ix << " / " << output_width()
+ << ", output y: " << iy << " / " << output_height()
+ << ", block x: " << bx << " / " << block_size()
+ << ", block y: " << by << " / " << block_size()
+ << ", input channel: " << oc << " / " << input_channels()
+ << ", input stride: " << input_channels_stride()
+ << ", output stride: " << output_channels_stride();
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ private:
+ size_t input_height_{1};
+ size_t input_width_{1};
+ size_t input_channels_{1};
+ size_t block_size_{2};
+ size_t batch_size_{1};
+ size_t input_channels_stride_{0};
+ size_t output_channels_stride_{0};
+ size_t iterations_{1};
+};
diff --git a/tools/generate-transpose-test.py b/tools/generate-transpose-test.py
index a7d49768a..15a2e2bee 100755
--- a/tools/generate-transpose-test.py
+++ b/tools/generate-transpose-test.py
@@ -220,6 +220,49 @@ TEST(${TEST_NAME}, bh_${TILE_HEIGHT}_bw_${TILE_WIDTH}_is_${TILE_WIDTH * 2}_os_${
.iterations(1)
.Test(${KERNEL});
}
+
+TEST(${TEST_NAME}, bh_${TILE_HEIGHT * 17}_bw_${TILE_WIDTH * 19}_ies_${ELEMENT_SIZE + 11}) {
+ $if ISA_CHECK:
+ ${ISA_CHECK};
+ TransposeMicrokernelTester()
+ .input_stride(${TILE_WIDTH * 19})
+ .output_stride(${TILE_HEIGHT * 17})
+ .block_width(${TILE_WIDTH * 19})
+ .block_height(${TILE_HEIGHT * 17})
+ .element_size(${ELEMENT_SIZE})
+ .input_element_stride(${ELEMENT_SIZE + 11})
+ .iterations(1)
+ .Test(${KERNEL});
+}
+
+TEST(${TEST_NAME}, bh_${TILE_HEIGHT * 3}_bw_${TILE_WIDTH * 5}_oes_${ELEMENT_SIZE + 11}) {
+ $if ISA_CHECK:
+ ${ISA_CHECK};
+ TransposeMicrokernelTester()
+ .input_stride(${TILE_WIDTH * 5})
+ .output_stride(${TILE_HEIGHT * 3})
+ .block_width(${TILE_WIDTH * 5})
+ .block_height(${TILE_HEIGHT * 3})
+ .element_size(${ELEMENT_SIZE})
+ .output_element_stride(${ELEMENT_SIZE + 11})
+ .iterations(1)
+ .Test(${KERNEL});
+}
+
+TEST(${TEST_NAME}, bh_${TILE_HEIGHT * 7}_bw_${TILE_WIDTH * 23}_ies_${ELEMENT_SIZE + 17}_oes_${ELEMENT_SIZE + 13}) {
+ $if ISA_CHECK:
+ ${ISA_CHECK};
+ TransposeMicrokernelTester()
+ .input_stride(${TILE_WIDTH * 23 + 5})
+ .output_stride(${TILE_HEIGHT * 7 + 6})
+ .block_width(${TILE_WIDTH * 23})
+ .block_height(${TILE_HEIGHT * 7})
+ .element_size(${ELEMENT_SIZE})
+ .input_element_stride(${ELEMENT_SIZE + 17})
+ .output_element_stride(${ELEMENT_SIZE + 13})
+ .iterations(1)
+ .Test(${KERNEL});
+}
"""