diff options
author | Alan Kelly <alankelly@google.com> | 2022-08-23 07:45:54 -0700 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-08-23 07:47:00 -0700 |
commit | 3c3769df8dc93069b1464d6912aefa042059503a (patch) | |
tree | 75e820f949e87626995099affba00bed334eb1bf | |
parent | ddbb37a87d134781f23d35fed4ad8262241a50ad (diff) | |
download | XNNPACK-3c3769df8dc93069b1464d6912aefa042059503a.tar.gz |
Space to Depth operator
PiperOrigin-RevId: 469456395
-rw-r--r-- | BUILD.bazel | 9 | ||||
-rwxr-xr-x | CMakeLists.txt | 5 | ||||
-rw-r--r-- | include/xnnpack.h | 51 | ||||
-rw-r--r-- | src/operator-run.c | 2 | ||||
-rw-r--r-- | src/operator-strings.c | 3 | ||||
-rw-r--r-- | src/operator-strings.yaml | 6 | ||||
-rw-r--r-- | src/operators/transpose-nd.c | 247 | ||||
-rw-r--r-- | src/xnnpack/operator-type.h | 3 | ||||
-rw-r--r-- | test/depth-to-space-nhwc.cc | 1 | ||||
-rw-r--r-- | test/space-to-depth-nhwc.cc | 306 | ||||
-rw-r--r-- | test/space-to-depth-operator-tester.h | 351 | ||||
-rwxr-xr-x | tools/generate-transpose-test.py | 43 |
12 files changed, 1025 insertions, 2 deletions
diff --git a/BUILD.bazel b/BUILD.bazel index 63feeb4ec..ecd7f482f 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -14889,6 +14889,15 @@ xnnpack_unit_test( ) xnnpack_unit_test( + name = "space_to_depth_nhwc_test", + srcs = [ + "test/space-to-depth-nhwc.cc", + "test/space-to-depth-operator-tester.h", + ], + deps = OPERATOR_TEST_DEPS, +) + +xnnpack_unit_test( name = "square_nc_test", srcs = [ "test/square-nc.cc", diff --git a/CMakeLists.txt b/CMakeLists.txt index 664c0e95e..27965112a 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7602,6 +7602,11 @@ IF(XNNPACK_BUILD_TESTS) TARGET_LINK_LIBRARIES(softmax-nc-test PRIVATE XNNPACK fp16 gtest gtest_main microparams_init logging operators) ADD_TEST(NAME softmax-nc-test COMMAND softmax-nc-test) + ADD_EXECUTABLE(space-to-depth-nhwc-test test/space-to-depth-nhwc.cc) + TARGET_INCLUDE_DIRECTORIES(space-to-depth-nhwc-test PRIVATE src test) + TARGET_LINK_LIBRARIES(space-to-depth-nhwc-test PRIVATE XNNPACK fp16 gtest gtest_main microparams_init logging operators) + ADD_TEST(NAME space-to-depth-nhwc-test COMMAND space-to-depth-nhwc-test) + ADD_EXECUTABLE(square-nc-test test/square-nc.cc) TARGET_INCLUDE_DIRECTORIES(square-nc-test PRIVATE src test) TARGET_LINK_LIBRARIES(square-nc-test PRIVATE XNNPACK fp16 gtest gtest_main microparams_init logging operators) diff --git a/include/xnnpack.h b/include/xnnpack.h index f6810decd..b183d32a9 100644 --- a/include/xnnpack.h +++ b/include/xnnpack.h @@ -2224,6 +2224,23 @@ enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32( void* output, pthreadpool_t threadpool); +enum xnn_status xnn_create_space_to_depth_nhwc_x32( + size_t input_channels, + size_t input_channel_stride, + size_t output_channel_stride, + uint32_t block_size, + uint32_t flags, + xnn_operator_t* space_to_depth_op_out); + +enum xnn_status xnn_setup_space_to_depth_nhwc_x32( + xnn_operator_t space_to_depth_op, + size_t batch_size, + size_t input_height, + size_t input_width, + const void* input, + void* output, + pthreadpool_t threadpool); + enum xnn_status xnn_create_transpose_nd_x32( uint32_t flags, xnn_operator_t* transpose_op_out); @@ -2829,6 +2846,23 @@ enum xnn_status xnn_setup_depth_to_space_nhwc_x16( void* output, pthreadpool_t threadpool); +enum xnn_status xnn_create_space_to_depth_nhwc_x16( + size_t input_channels, + size_t input_channel_stride, + size_t output_channel_stride, + uint32_t block_size, + uint32_t flags, + xnn_operator_t* space_to_depth_op_out); + +enum xnn_status xnn_setup_space_to_depth_nhwc_x16( + xnn_operator_t space_to_depth_op, + size_t batch_size, + size_t input_height, + size_t input_width, + const void* input, + void* output, + pthreadpool_t threadpool); + enum xnn_status xnn_create_transpose_nd_x16( uint32_t flags, xnn_operator_t* transpose_op_out); @@ -3664,6 +3698,23 @@ enum xnn_status xnn_setup_depth_to_space_nhwc_x8( void* output, pthreadpool_t threadpool); +enum xnn_status xnn_create_space_to_depth_nhwc_x8( + size_t input_channels, + size_t input_channel_stride, + size_t output_channel_stride, + uint32_t block_size, + uint32_t flags, + xnn_operator_t* space_to_depth_op_out); + +enum xnn_status xnn_setup_space_to_depth_nhwc_x8( + xnn_operator_t space_to_depth_op, + size_t batch_size, + size_t input_height, + size_t input_width, + const void* input, + void* output, + pthreadpool_t threadpool); + enum xnn_status xnn_create_transpose_nd_x8( uint32_t flags, xnn_operator_t* transpose_op_out); diff --git a/src/operator-run.c b/src/operator-run.c index ab4d5ace5..67f27199c 100644 --- a/src/operator-run.c +++ b/src/operator-run.c @@ -287,7 +287,7 @@ void xnn_compute_transposev_6d( ld_input, ld_output, context->input_stride[4], - context->output_stride[4], + context->output_stride[5], element_size, tile_m, tile_n); diff --git a/src/operator-strings.c b/src/operator-strings.c index d2edb053e..9407805e5 100644 --- a/src/operator-strings.c +++ b/src/operator-strings.c @@ -115,6 +115,9 @@ static const char *data = "Softmax (NC, F16)\0" "Softmax (NC, F32)\0" "Softmax (NC, QU8)\0" + "Space To Depth (NHWC, X8)\0" + "Space To Depth (NHWC, X16)\0" + "Space To Depth (NHWC, X32)\0" "Square (NC, F16)\0" "Square (NC, F32)\0" "Square Root (NC, F16)\0" diff --git a/src/operator-strings.yaml b/src/operator-strings.yaml index 14eb83eaf..d9abe6b0a 100644 --- a/src/operator-strings.yaml +++ b/src/operator-strings.yaml @@ -201,6 +201,12 @@ string: "Softmax (NC, F32)" - name: xnn_operator_type_softmax_nc_qu8 string: "Softmax (NC, QU8)" +- name: xnn_operator_type_space_to_depth_nhwc_x8 + string: "Space To Depth (NHWC, X8)" +- name: xnn_operator_type_space_to_depth_nhwc_x16 + string: "Space To Depth (NHWC, X16)" +- name: xnn_operator_type_space_to_depth_nhwc_x32 + string: "Space To Depth (NHWC, X32)" - name: xnn_operator_type_square_nc_f16 string: "Square (NC, F16)" - name: xnn_operator_type_square_nc_f32 diff --git a/src/operators/transpose-nd.c b/src/operators/transpose-nd.c index 41e4335c3..2d57791f7 100644 --- a/src/operators/transpose-nd.c +++ b/src/operators/transpose-nd.c @@ -883,3 +883,250 @@ enum xnn_status xnn_setup_depth_to_space_nhwc_x32( batch_size, input_height, input_width, input, output, 4); } + +static enum xnn_status create_space_to_depth_nhwc( + size_t input_channels, + size_t input_channel_stride, + size_t output_channel_stride, + uint32_t block_size, + uint32_t flags, + enum xnn_operator_type operator_type, + xnn_operator_t* space_to_depth_op_out) +{ + xnn_operator_t space_to_depth_op = NULL; + enum xnn_status status = xnn_status_uninitialized; + + if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) { + xnn_log_error("failed to create %s operator: XNNPACK is not initialized", + xnn_operator_type_to_string(operator_type)); + goto error; + } + + status = xnn_status_invalid_parameter; + + if (input_channels == 0) { + xnn_log_error("failed to create %s operator with %zu input channels: number of channels must be non-zero", + xnn_operator_type_to_string(operator_type), input_channels); + goto error; + } + + if (input_channel_stride < input_channels) { + xnn_log_error( + "failed to create %s operator with input channel stride of %zu: " + "stride must be at least as large as the number of input channels (%zu)", + xnn_operator_type_to_string(operator_type), + input_channel_stride, input_channels); + goto error; + } + + if (block_size <= 1) { + xnn_log_error("failed to create %s operator with %u block size: block size must be greater than 1", + xnn_operator_type_to_string(operator_type), + block_size); + goto error; + } + + const size_t output_channels = input_channels * block_size * block_size; + if (output_channel_stride < output_channels) { + xnn_log_error( + "failed to create %s operator with output channel stride of %zu: " + "stride must be at least as large as the number of output channels (%" PRIu32 "x%" PRIu32 "x%zu)", + xnn_operator_type_to_string(operator_type), + output_channel_stride, block_size, block_size, input_channels); + goto error; + } + + status = xnn_status_out_of_memory; + + space_to_depth_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator)); + if (space_to_depth_op == NULL) { + xnn_log_error( + "failed to allocate %zu bytes for %s operator descriptor", + sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type)); + goto error; + } + + space_to_depth_op->channels = input_channels; + space_to_depth_op->input_pixel_stride = input_channel_stride; + space_to_depth_op->output_pixel_stride = output_channel_stride; + space_to_depth_op->block_size = block_size; + + space_to_depth_op->type = operator_type; + space_to_depth_op->flags = flags; + + space_to_depth_op->state = xnn_run_state_invalid; + + *space_to_depth_op_out = space_to_depth_op; + return xnn_status_success; + +error: + xnn_delete_operator(space_to_depth_op); + return status; +} + +enum xnn_status xnn_create_space_to_depth_nhwc_x8( + size_t input_channels, + size_t input_channel_stride, + size_t output_channel_stride, + uint32_t block_size, + uint32_t flags, + xnn_operator_t* space_to_depth_op_out) +{ + return create_space_to_depth_nhwc( + input_channels, + input_channel_stride, + output_channel_stride, + block_size, + flags, + xnn_operator_type_space_to_depth_nhwc_x8, + space_to_depth_op_out); +} + +enum xnn_status xnn_create_space_to_depth_nhwc_x16( + size_t input_channels, + size_t input_channel_stride, + size_t output_channel_stride, + uint32_t block_size, + uint32_t flags, + xnn_operator_t* space_to_depth_op_out) +{ + return create_space_to_depth_nhwc( + input_channels, + input_channel_stride, + output_channel_stride, + block_size, + flags, + xnn_operator_type_space_to_depth_nhwc_x16, + space_to_depth_op_out); +} + +enum xnn_status xnn_create_space_to_depth_nhwc_x32( + size_t input_channels, + size_t input_channel_stride, + size_t output_channel_stride, + uint32_t block_size, + uint32_t flags, + xnn_operator_t* space_to_depth_op_out) +{ + return create_space_to_depth_nhwc( + input_channels, + input_channel_stride, + output_channel_stride, + block_size, + flags, + xnn_operator_type_space_to_depth_nhwc_x32, + space_to_depth_op_out); +} + +static enum xnn_status setup_space_to_depth_nhwc( + xnn_operator_t space_to_depth_op, + enum xnn_operator_type expected_operator_type, + size_t batch_size, + size_t input_height, + size_t input_width, + const void* input, + void* output, + uint32_t element_size) +{ + if (space_to_depth_op->type != expected_operator_type) { + xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)", + xnn_operator_type_to_string(expected_operator_type), + xnn_operator_type_to_string(space_to_depth_op->type)); + return xnn_status_invalid_parameter; + } + space_to_depth_op->state = xnn_run_state_invalid; + + if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) { + xnn_log_error("failed to setup %s operator: XNNPACK is not initialized", + xnn_operator_type_to_string(expected_operator_type)); + return xnn_status_uninitialized; + } + + if (input_width == 0 || input_height == 0) { + xnn_log_error("failed to setup %s operator with %zux%zu input: input dimensions must be non-zero", + xnn_operator_type_to_string(expected_operator_type), input_width, input_height); + return xnn_status_invalid_parameter; + } + + if (batch_size == 0) { + space_to_depth_op->state = xnn_run_state_skip; + return xnn_status_success; + } + + const uint32_t block_size = space_to_depth_op->block_size; + + const size_t input_shape[5] = {batch_size * (input_height / block_size), block_size, input_width / block_size, block_size, space_to_depth_op->channels}; + const size_t perm[5] = {0, 2, 1, 3, 4}; + + const size_t input_stride[5] = { + block_size * input_width * space_to_depth_op->input_pixel_stride, + input_width * space_to_depth_op->input_pixel_stride, + block_size * space_to_depth_op->input_pixel_stride, + space_to_depth_op->input_pixel_stride, + 1}; + const size_t output_stride[5] = { + (input_width/block_size) * space_to_depth_op->output_pixel_stride, + space_to_depth_op->output_pixel_stride, + block_size * space_to_depth_op->channels, + space_to_depth_op->channels, + 1}; + + return setup_transpose_nd( + space_to_depth_op, + input, + output, + 5, + input_shape, + perm, + input_stride, + output_stride, + element_size); +} + +enum xnn_status xnn_setup_space_to_depth_nhwc_x8( + xnn_operator_t space_to_depth_op, + size_t batch_size, + size_t input_height, + size_t input_width, + const void* input, + void* output, + pthreadpool_t threadpool) +{ + return setup_space_to_depth_nhwc( + space_to_depth_op, + xnn_operator_type_space_to_depth_nhwc_x8, + batch_size, input_height, input_width, + input, output, sizeof(uint8_t)); +} + +enum xnn_status xnn_setup_space_to_depth_nhwc_x16( + xnn_operator_t space_to_depth_op, + size_t batch_size, + size_t input_height, + size_t input_width, + const void* input, + void* output, + pthreadpool_t threadpool) +{ + return setup_space_to_depth_nhwc( + space_to_depth_op, + xnn_operator_type_space_to_depth_nhwc_x16, + batch_size, input_height, input_width, + input, output, sizeof(uint16_t)); +} + +enum xnn_status xnn_setup_space_to_depth_nhwc_x32( + xnn_operator_t space_to_depth_op, + size_t batch_size, + size_t input_height, + size_t input_width, + const void* input, + void* output, + pthreadpool_t threadpool) +{ + return setup_space_to_depth_nhwc( + space_to_depth_op, + xnn_operator_type_space_to_depth_nhwc_x32, + batch_size, input_height, input_width, + input, output, sizeof(uint32_t)); +} diff --git a/src/xnnpack/operator-type.h b/src/xnnpack/operator-type.h index 083aae3a4..d1a0b716f 100644 --- a/src/xnnpack/operator-type.h +++ b/src/xnnpack/operator-type.h @@ -109,6 +109,9 @@ enum xnn_operator_type { xnn_operator_type_softmax_nc_f16, xnn_operator_type_softmax_nc_f32, xnn_operator_type_softmax_nc_qu8, + xnn_operator_type_space_to_depth_nhwc_x8, + xnn_operator_type_space_to_depth_nhwc_x16, + xnn_operator_type_space_to_depth_nhwc_x32, xnn_operator_type_square_nc_f16, xnn_operator_type_square_nc_f32, xnn_operator_type_square_root_nc_f16, diff --git a/test/depth-to-space-nhwc.cc b/test/depth-to-space-nhwc.cc index 7a8a3ae7d..900b64f6b 100644 --- a/test/depth-to-space-nhwc.cc +++ b/test/depth-to-space-nhwc.cc @@ -99,7 +99,6 @@ TEST(DEPTH_TO_SPACE_NHWC_X8, output_channels_stride) { .TestNHWCxX8(); } - TEST(DEPTH_TO_SPACE_NHWC_X16, one_pixel) { DepthToSpaceOperatorTester() .input_size(1, 1) diff --git a/test/space-to-depth-nhwc.cc b/test/space-to-depth-nhwc.cc new file mode 100644 index 000000000..b26674480 --- /dev/null +++ b/test/space-to-depth-nhwc.cc @@ -0,0 +1,306 @@ +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include "space-to-depth-operator-tester.h" + +#include <gtest/gtest.h> + + +TEST(SPACE_TO_DEPTH_NHWC_X8, one_output_pixel) { + size_t block_size = 3; + SpaceToDepthOperatorTester() + .input_size(block_size, block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX8(); +} + +TEST(SPACE_TO_DEPTH_NHWC_X8, one_column) { + size_t block_size = 3; + for (size_t input_height = 2; input_height <= 7; input_height++) { + SpaceToDepthOperatorTester() + .input_size(input_height * block_size, block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX8(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X8, one_row) { + size_t block_size = 3; + for (size_t input_width = 2; input_width <= 7; input_width++) { + SpaceToDepthOperatorTester() + .input_size(block_size, input_width * block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX8(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X8, varying_input_size) { + size_t block_size = 3; + for (size_t input_height = 1; input_height <= 5; input_height++) { + for (size_t input_width = 1; input_width <= 5; input_width++) { + SpaceToDepthOperatorTester() + .input_size(input_height * block_size, input_width * block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX8(); + } + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X8, varying_block_size) { + for (uint32_t block_size = 2; block_size <= 5; block_size++) { + SpaceToDepthOperatorTester() + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX8(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X8, varying_input_channels) { + size_t block_size = 3; + for (size_t input_channels = 1; input_channels <= 15; input_channels++) { + SpaceToDepthOperatorTester() + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .input_channels(input_channels) + .TestNHWCxX8(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X8, varying_batch_size) { + size_t block_size = 3; + for (size_t batch_size = 2; batch_size <= 3; batch_size++) { + SpaceToDepthOperatorTester() + .batch_size(batch_size) + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX8(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X8, input_channels_stride) { + size_t block_size = 3; + SpaceToDepthOperatorTester() + .batch_size(1) + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .input_channels_stride(10) + .input_channels(9) + .TestNHWCxX8(); +} + +TEST(SPACE_TO_DEPTH_NHWC_X8, output_channels_stride) { + size_t block_size = 3; + SpaceToDepthOperatorTester() + .batch_size(2) + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .output_channels_stride(171) + .input_channels(17) + .TestNHWCxX8(); +} + +TEST(SPACE_TO_DEPTH_NHWC_X16, one_output_pixel) { + size_t block_size = 3; + SpaceToDepthOperatorTester() + .input_size(block_size, block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX16(); +} + +TEST(SPACE_TO_DEPTH_NHWC_X16, one_column) { + size_t block_size = 3; + for (size_t input_height = 2; input_height <= 7; input_height++) { + SpaceToDepthOperatorTester() + .input_size(input_height * block_size, block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX16(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X16, one_row) { + size_t block_size = 3; + for (size_t input_width = 2; input_width <= 7; input_width++) { + SpaceToDepthOperatorTester() + .input_size(block_size, input_width * block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX16(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X16, varying_input_size) { + size_t block_size = 3; + for (size_t input_height = 1; input_height <= 5; input_height++) { + for (size_t input_width = 1; input_width <= 5; input_width++) { + SpaceToDepthOperatorTester() + .input_size(input_height * block_size, input_width * block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX16(); + } + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X16, varying_block_size) { + for (uint32_t block_size = 2; block_size <= 5; block_size++) { + SpaceToDepthOperatorTester() + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX16(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X16, varying_input_channels) { + size_t block_size = 3; + for (size_t input_channels = 1; input_channels <= 15; input_channels++) { + SpaceToDepthOperatorTester() + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .input_channels(input_channels) + .TestNHWCxX16(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X16, varying_batch_size) { + size_t block_size = 3; + for (size_t batch_size = 2; batch_size <= 3; batch_size++) { + SpaceToDepthOperatorTester() + .batch_size(batch_size) + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX32(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X16, input_channels_stride) { + size_t block_size = 3; + SpaceToDepthOperatorTester() + .batch_size(1) + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .input_channels_stride(10) + .input_channels(9) + .TestNHWCxX16(); +} + +TEST(SPACE_TO_DEPTH_NHWC_X16, output_channels_stride) { + size_t block_size = 3; + SpaceToDepthOperatorTester() + .batch_size(2) + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .output_channels_stride(171) + .input_channels(17) + .TestNHWCxX32(); +} + +TEST(SPACE_TO_DEPTH_NHWC_X32, one_output_pixel) { + size_t block_size = 3; + SpaceToDepthOperatorTester() + .input_size(block_size, block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX32(); +} + +TEST(SPACE_TO_DEPTH_NHWC_X32, one_column) { + size_t block_size = 3; + for (size_t input_height = 2; input_height <= 7; input_height++) { + SpaceToDepthOperatorTester() + .input_size(input_height * block_size, block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX32(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X32, one_row) { + size_t block_size = 3; + for (size_t input_width = 2; input_width <= 7; input_width++) { + SpaceToDepthOperatorTester() + .input_size(block_size, input_width * block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX32(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X32, varying_input_size) { + size_t block_size = 3; + for (size_t input_height = 1; input_height <= 5; input_height++) { + for (size_t input_width = 1; input_width <= 5; input_width++) { + SpaceToDepthOperatorTester() + .input_size(input_height * block_size, input_width * block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX32(); + } + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X32, varying_block_size) { + for (uint32_t block_size = 2; block_size <= 5; block_size++) { + SpaceToDepthOperatorTester() + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX32(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X32, varying_input_channels) { + size_t block_size = 3; + for (size_t input_channels = 1; input_channels <= 15; input_channels++) { + SpaceToDepthOperatorTester() + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .input_channels(input_channels) + .TestNHWCxX32(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X32, varying_batch_size) { + size_t block_size = 3; + for (size_t batch_size = 2; batch_size <= 3; batch_size++) { + SpaceToDepthOperatorTester() + .batch_size(batch_size) + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .input_channels(17) + .TestNHWCxX32(); + } +} + +TEST(SPACE_TO_DEPTH_NHWC_X32, input_channels_stride) { + size_t block_size = 3; + SpaceToDepthOperatorTester() + .batch_size(1) + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .input_channels_stride(10) + .input_channels(9) + .TestNHWCxX32(); +} + +TEST(SPACE_TO_DEPTH_NHWC_X32, output_channels_stride) { + size_t block_size = 3; + SpaceToDepthOperatorTester() + .batch_size(2) + .input_size(7 * block_size, 5 * block_size) + .block_size(block_size) + .output_channels_stride(171) + .input_channels(17) + .TestNHWCxX32(); +} diff --git a/test/space-to-depth-operator-tester.h b/test/space-to-depth-operator-tester.h new file mode 100644 index 000000000..6bc7eb78d --- /dev/null +++ b/test/space-to-depth-operator-tester.h @@ -0,0 +1,351 @@ +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include <gtest/gtest.h> + +#include <algorithm> +#include <cmath> +#include <cassert> +#include <cstddef> +#include <cstdlib> +#include <functional> +#include <random> +#include <vector> + +#include <xnnpack.h> + + +class SpaceToDepthOperatorTester { + public: + inline SpaceToDepthOperatorTester& input_size(size_t input_height, size_t input_width) { + assert(input_height >= 1); + assert(input_width >= 1); + this->input_height_ = input_height; + this->input_width_ = input_width; + return *this; + } + + inline SpaceToDepthOperatorTester& input_height(size_t input_height) { + assert(input_height >= 1); + this->input_height_ = input_height; + return *this; + } + + inline size_t input_height() const { + return this->input_height_; + } + + inline SpaceToDepthOperatorTester& input_width(size_t input_width) { + assert(input_width >= 1); + this->input_width_ = input_width; + return *this; + } + + inline size_t input_width() const { + return this->input_width_; + } + + inline size_t output_height() const { + assert(input_height() % block_size() == 0); + return input_height() / block_size(); + } + + inline size_t output_width() const { + assert(input_width() % block_size() == 0); + return input_width() / block_size(); + } + + inline SpaceToDepthOperatorTester& block_size(size_t block_size) { + assert(block_size >= 2); + this->block_size_ = block_size; + return *this; + } + + inline size_t block_size() const { + return this->block_size_; + } + + inline SpaceToDepthOperatorTester& input_channels(size_t input_channels) { + assert(input_channels != 0); + this->input_channels_ = input_channels; + return *this; + } + + inline size_t input_channels() const { + return this->input_channels_; + } + + inline size_t output_channels() const { + return input_channels() * block_size() * block_size(); + } + + inline SpaceToDepthOperatorTester& batch_size(size_t batch_size) { + assert(batch_size != 0); + this->batch_size_ = batch_size; + return *this; + } + + inline size_t batch_size() const { + return this->batch_size_; + } + + inline SpaceToDepthOperatorTester& input_channels_stride(size_t input_channels_stride) { + assert(input_channels_stride >= 1); + this->input_channels_stride_ = input_channels_stride; + return *this; + } + + inline size_t input_channels_stride() const { + if (this->input_channels_stride_ == 0) { + return input_channels(); + } else { + assert(this->input_channels_stride_ >= input_channels()); + return this->input_channels_stride_; + } + } + + inline SpaceToDepthOperatorTester& output_channels_stride(size_t output_channels_stride) { + assert(output_channels_stride >= 1); + this->output_channels_stride_ = output_channels_stride; + return *this; + } + + inline size_t output_channels_stride() const { + if (this->output_channels_stride_ == 0) { + return output_channels(); + } else { + assert(this->output_channels_stride_ >= output_channels()); + return this->output_channels_stride_; + } + } + + inline SpaceToDepthOperatorTester& iterations(size_t iterations) { + this->iterations_ = iterations; + return *this; + } + + inline size_t iterations() const { + return this->iterations_; + } + + void TestNHWCxX8() const { + std::vector<int8_t> input( + (batch_size() * input_height() * input_width() - 1) * input_channels_stride() + input_channels()); + std::vector<int8_t> output( + (batch_size() * output_height() * output_width() - 1) * output_channels_stride() + output_channels()); + for (size_t iteration = 0; iteration < iterations(); iteration++) { + std::iota(input.begin(), input.end(), 0); + std::fill(output.begin(), output.end(), INT8_C(0xAF)); + + // Create, setup, run, and destroy Depth To Space operator. + ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); + xnn_operator_t space_to_depth_op = nullptr; + + ASSERT_EQ(xnn_status_success, + xnn_create_space_to_depth_nhwc_x8( + input_channels(), input_channels_stride(), output_channels_stride(), + block_size(), 0, &space_to_depth_op)); + ASSERT_NE(nullptr, space_to_depth_op); + + // Smart pointer to automatically delete space_to_depth_op. + std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_space_to_depth_op(space_to_depth_op, xnn_delete_operator); + + ASSERT_EQ(xnn_status_success, + xnn_setup_space_to_depth_nhwc_x8( + space_to_depth_op, + batch_size(), input_height(), input_width(), + input.data(), output.data(), nullptr /* thread pool */)); + + ASSERT_EQ(xnn_status_success, + xnn_run_operator(space_to_depth_op, nullptr /* thread pool */)); + + // Verify results. + for (size_t i = 0; i < batch_size(); i++) { + for (size_t iy = 0; iy < output_height(); iy++) { + for (size_t ix = 0; ix < output_width(); ix++) { + for (size_t by = 0; by < block_size(); by++) { + for (size_t bx = 0; bx < block_size(); bx++) { + for (size_t oc = 0; oc < input_channels(); oc++) { + const size_t input_index = oc + + bx * input_channels_stride() + + ix * block_size() * input_channels_stride() + + by * output_width() * block_size() * input_channels_stride() + + iy * block_size() * output_width() * block_size() * input_channels_stride() + + i * output_height() * block_size() * output_width() * block_size() * input_channels_stride(); + const size_t output_index = oc + + bx * input_channels() + + by * input_channels() * block_size() + + ix * output_channels_stride() + + iy * output_width() * output_channels_stride() + + i * output_height() * output_width() * output_channels_stride(); + + ASSERT_EQ(int32_t(output[output_index]), int32_t(input[input_index])) + << "batch: " << i << " / " << batch_size() + << ", output x: " << ix << " / " << output_width() + << ", output y: " << iy << " / " << output_height() + << ", block x: " << bx << " / " << block_size() + << ", block y: " << by << " / " << block_size() + << ", input channel: " << oc << " / " << input_channels() + << ", input stride: " << input_channels_stride() + << ", output stride: " << output_channels_stride(); + } + } + } + } + } + } + } + } + + void TestNHWCxX16() const { + std::vector<int16_t> input( + (batch_size() * input_height() * input_width() - 1) * input_channels_stride() + input_channels()); + std::vector<int16_t> output( + (batch_size() * output_height() * output_width() - 1) * output_channels_stride() + output_channels()); + for (size_t iteration = 0; iteration < iterations(); iteration++) { + std::iota(input.begin(), input.end(), 0); + std::fill(output.begin(), output.end(), INT16_C(0xDEAD)); + + // Create, setup, run, and destroy Depth To Space operator. + ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); + xnn_operator_t space_to_depth_op = nullptr; + + ASSERT_EQ(xnn_status_success, + xnn_create_space_to_depth_nhwc_x16( + input_channels(), input_channels_stride(), output_channels_stride(), + block_size(), 0, &space_to_depth_op)); + ASSERT_NE(nullptr, space_to_depth_op); + + // Smart pointer to automatically delete space_to_depth_op. + std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_space_to_depth_op(space_to_depth_op, xnn_delete_operator); + + ASSERT_EQ(xnn_status_success, + xnn_setup_space_to_depth_nhwc_x16( + space_to_depth_op, + batch_size(), input_height(), input_width(), + input.data(), output.data(), nullptr /* thread pool */)); + + ASSERT_EQ(xnn_status_success, + xnn_run_operator(space_to_depth_op, nullptr /* thread pool */)); + + // Verify results. + for (size_t i = 0; i < batch_size(); i++) { + for (size_t iy = 0; iy < output_height(); iy++) { + for (size_t ix = 0; ix < output_width(); ix++) { + for (size_t by = 0; by < block_size(); by++) { + for (size_t bx = 0; bx < block_size(); bx++) { + for (size_t oc = 0; oc < input_channels(); oc++) { + const size_t input_index = oc + + bx * input_channels_stride() + + ix * block_size() * input_channels_stride() + + by * output_width() * block_size() * input_channels_stride() + + iy * block_size() * output_width() * block_size() * input_channels_stride() + + i * output_height() * block_size() * output_width() * block_size() * input_channels_stride(); + const size_t output_index = oc + + bx * input_channels() + + by * input_channels() * block_size() + + ix * output_channels_stride() + + iy * output_width() * output_channels_stride() + + i * output_height() * output_width() * output_channels_stride(); + + ASSERT_EQ(int32_t(output[output_index]), int32_t(input[input_index])) + << "batch: " << i << " / " << batch_size() + << ", output x: " << ix << " / " << output_width() + << ", output y: " << iy << " / " << output_height() + << ", block x: " << bx << " / " << block_size() + << ", block y: " << by << " / " << block_size() + << ", input channel: " << oc << " / " << input_channels() + << ", input stride: " << input_channels_stride() + << ", output stride: " << output_channels_stride(); + } + } + } + } + } + } + } + } + + void TestNHWCxX32() const { + std::vector<int32_t> input( + (batch_size() * input_height() * input_width() - 1) * input_channels_stride() + input_channels()); + std::vector<int32_t> output( + (batch_size() * output_height() * output_width() - 1) * output_channels_stride() + output_channels()); + for (size_t iteration = 0; iteration < iterations(); iteration++) { + std::iota(input.begin(), input.end(), 0); + std::fill(output.begin(), output.end(), INT32_C(0xDEADBEEF)); + + // Create, setup, run, and destroy Depth To Space operator. + ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); + xnn_operator_t space_to_depth_op = nullptr; + + ASSERT_EQ(xnn_status_success, + xnn_create_space_to_depth_nhwc_x32( + input_channels(), input_channels_stride(), output_channels_stride(), + block_size(), 0, &space_to_depth_op)); + ASSERT_NE(nullptr, space_to_depth_op); + + // Smart pointer to automatically delete space_to_depth_op. + std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_space_to_depth_op(space_to_depth_op, xnn_delete_operator); + + ASSERT_EQ(xnn_status_success, + xnn_setup_space_to_depth_nhwc_x32( + space_to_depth_op, + batch_size(), input_height(), input_width(), + input.data(), output.data(), nullptr /* thread pool */)); + + ASSERT_EQ(xnn_status_success, + xnn_run_operator(space_to_depth_op, nullptr /* thread pool */)); + + // Verify results. + for (size_t i = 0; i < batch_size(); i++) { + for (size_t iy = 0; iy < output_height(); iy++) { + for (size_t ix = 0; ix < output_width(); ix++) { + for (size_t by = 0; by < block_size(); by++) { + for (size_t bx = 0; bx < block_size(); bx++) { + for (size_t oc = 0; oc < input_channels(); oc++) { + const size_t input_index = oc + + bx * input_channels_stride() + + ix * block_size() * input_channels_stride() + + by * output_width() * block_size() * input_channels_stride() + + iy * block_size() * output_width() * block_size() * input_channels_stride() + + i * output_height() * block_size() * output_width() * block_size() * input_channels_stride(); + const size_t output_index = oc + + bx * input_channels() + + by * input_channels() * block_size() + + ix * output_channels_stride() + + iy * output_width() * output_channels_stride() + + i * output_height() * output_width() * output_channels_stride(); + + ASSERT_EQ(int32_t(output[output_index]), int32_t(input[input_index])) + << "batch: " << i << " / " << batch_size() + << ", output x: " << ix << " / " << output_width() + << ", output y: " << iy << " / " << output_height() + << ", block x: " << bx << " / " << block_size() + << ", block y: " << by << " / " << block_size() + << ", input channel: " << oc << " / " << input_channels() + << ", input stride: " << input_channels_stride() + << ", output stride: " << output_channels_stride(); + } + } + } + } + } + } + } + } + + private: + size_t input_height_{1}; + size_t input_width_{1}; + size_t input_channels_{1}; + size_t block_size_{2}; + size_t batch_size_{1}; + size_t input_channels_stride_{0}; + size_t output_channels_stride_{0}; + size_t iterations_{1}; +}; diff --git a/tools/generate-transpose-test.py b/tools/generate-transpose-test.py index a7d49768a..15a2e2bee 100755 --- a/tools/generate-transpose-test.py +++ b/tools/generate-transpose-test.py @@ -220,6 +220,49 @@ TEST(${TEST_NAME}, bh_${TILE_HEIGHT}_bw_${TILE_WIDTH}_is_${TILE_WIDTH * 2}_os_${ .iterations(1) .Test(${KERNEL}); } + +TEST(${TEST_NAME}, bh_${TILE_HEIGHT * 17}_bw_${TILE_WIDTH * 19}_ies_${ELEMENT_SIZE + 11}) { + $if ISA_CHECK: + ${ISA_CHECK}; + TransposeMicrokernelTester() + .input_stride(${TILE_WIDTH * 19}) + .output_stride(${TILE_HEIGHT * 17}) + .block_width(${TILE_WIDTH * 19}) + .block_height(${TILE_HEIGHT * 17}) + .element_size(${ELEMENT_SIZE}) + .input_element_stride(${ELEMENT_SIZE + 11}) + .iterations(1) + .Test(${KERNEL}); +} + +TEST(${TEST_NAME}, bh_${TILE_HEIGHT * 3}_bw_${TILE_WIDTH * 5}_oes_${ELEMENT_SIZE + 11}) { + $if ISA_CHECK: + ${ISA_CHECK}; + TransposeMicrokernelTester() + .input_stride(${TILE_WIDTH * 5}) + .output_stride(${TILE_HEIGHT * 3}) + .block_width(${TILE_WIDTH * 5}) + .block_height(${TILE_HEIGHT * 3}) + .element_size(${ELEMENT_SIZE}) + .output_element_stride(${ELEMENT_SIZE + 11}) + .iterations(1) + .Test(${KERNEL}); +} + +TEST(${TEST_NAME}, bh_${TILE_HEIGHT * 7}_bw_${TILE_WIDTH * 23}_ies_${ELEMENT_SIZE + 17}_oes_${ELEMENT_SIZE + 13}) { + $if ISA_CHECK: + ${ISA_CHECK}; + TransposeMicrokernelTester() + .input_stride(${TILE_WIDTH * 23 + 5}) + .output_stride(${TILE_HEIGHT * 7 + 6}) + .block_width(${TILE_WIDTH * 23}) + .block_height(${TILE_HEIGHT * 7}) + .element_size(${ELEMENT_SIZE}) + .input_element_stride(${ELEMENT_SIZE + 17}) + .output_element_stride(${ELEMENT_SIZE + 13}) + .iterations(1) + .Test(${KERNEL}); +} """ |