diff options
author | Zhi An Ng <zhin@google.com> | 2022-09-01 10:59:41 -0700 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-09-01 11:00:39 -0700 |
commit | fb5abc8628079b870ac818044286083294e55f1d (patch) | |
tree | 69d07630d24e2cd4280bd791fa97ed31e67fdc65 | |
parent | 0eaea5648189017ea286d1df37c0ca34f8302e25 (diff) | |
download | XNNPACK-fb5abc8628079b870ac818044286083294e55f1d.tar.gz |
Add primary_tile argument to xnn_indirection_init_dwconv2d
This allows indirection to write pointers to zero buffer for the last (primary_tile - kernel_size) elements in the indirection buffer.
PiperOrigin-RevId: 471575796
-rw-r--r-- | bench/f16-dwconv.cc | 2 | ||||
-rw-r--r-- | bench/f32-dwconv.cc | 2 | ||||
-rw-r--r-- | bench/qs8-dwconv.cc | 2 | ||||
-rw-r--r-- | src/indirection.c | 7 | ||||
-rw-r--r-- | src/operators/average-pooling-nhwc.c | 2 | ||||
-rw-r--r-- | src/operators/convolution-nhwc.c | 7 | ||||
-rw-r--r-- | src/xnnpack/indirection.h | 1 |
7 files changed, 17 insertions, 6 deletions
diff --git a/bench/f16-dwconv.cc b/bench/f16-dwconv.cc index 3a730b360..cf90b7d37 100644 --- a/bench/f16-dwconv.cc +++ b/bench/f16-dwconv.cc @@ -118,7 +118,7 @@ static void f16_dwconv(benchmark::State& state, convolution_op.padding_top = padding_top; convolution_op.padding_left = padding_left; - xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, 1 /* log2(sizeof(uint16_t)) */); + xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, primary_tile, 1 /* log2(sizeof(uint16_t)) */); for (size_t n = 1; n < num_buffers; n++) { std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements); } diff --git a/bench/f32-dwconv.cc b/bench/f32-dwconv.cc index 64989fc4f..749ee8842 100644 --- a/bench/f32-dwconv.cc +++ b/bench/f32-dwconv.cc @@ -110,7 +110,7 @@ static void f32_dwconv(benchmark::State& state, convolution_op.padding_top = padding_top; convolution_op.padding_left = padding_left; - xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, 2 /* log2(sizeof(float)) */); + xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, primary_tile, 2 /* log2(sizeof(float)) */); for (size_t n = 1; n < num_buffers; n++) { std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements); } diff --git a/bench/qs8-dwconv.cc b/bench/qs8-dwconv.cc index 2764815f4..6837c9575 100644 --- a/bench/qs8-dwconv.cc +++ b/bench/qs8-dwconv.cc @@ -116,7 +116,7 @@ static void DWConvBenchmark(benchmark::State& state, convolution_op.padding_top = padding_top; convolution_op.padding_left = padding_left; - xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, 0 /* log2(sizeof(int8_t)) */); + xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, primary_tile, 0 /* log2(sizeof(int8_t)) */); for (size_t n = 1; n < num_buffers; n++) { std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements); } diff --git a/src/indirection.c b/src/indirection.c index 50ad95980..359a5b80e 100644 --- a/src/indirection.c +++ b/src/indirection.c @@ -202,6 +202,7 @@ void xnn_indirection_init_dwconv2d( xnn_operator_t op, size_t step_height, size_t step_width, + size_t primary_tile, uint32_t log2_element_size) { const void** indirection_buffer = op->indirection_buffer; @@ -247,6 +248,12 @@ void xnn_indirection_init_dwconv2d( } } } + + const void* last_output_pixel = indirection_buffer[output_height * step_height - 1]; + const size_t last_kernel_index = output_height * step_height - (kernel_height * kernel_width); + for (size_t tile_index = kernel_height * kernel_width; tile_index < primary_tile; tile_index++) { + indirection_buffer[last_kernel_index + tile_index] = last_output_pixel; + } } void xnn_indirection_init_maxpool2d( diff --git a/src/operators/average-pooling-nhwc.c b/src/operators/average-pooling-nhwc.c index a43994fdc..bab46731d 100644 --- a/src/operators/average-pooling-nhwc.c +++ b/src/operators/average-pooling-nhwc.c @@ -740,7 +740,7 @@ static enum xnn_status setup_average_pooling2d( } average_pooling_op->indirection_buffer = indirection_buffer; - xnn_indirection_init_dwconv2d(average_pooling_op, step_height, step_width, log2_data_element_size); + xnn_indirection_init_dwconv2d(average_pooling_op, step_height, step_width, primary_tile, log2_data_element_size); average_pooling_op->last_input = input; average_pooling_op->last_input_height = input_height; diff --git a/src/operators/convolution-nhwc.c b/src/operators/convolution-nhwc.c index 5402c18bd..dfd1c9581 100644 --- a/src/operators/convolution-nhwc.c +++ b/src/operators/convolution-nhwc.c @@ -1711,8 +1711,11 @@ static enum xnn_status setup_convolution2d_nhwc( const size_t output_width = convolution_op->output_width; const size_t step_width = convolution_op->dilation_width == 1 ? convolution_op->stride_width : kernel_width; const size_t step_height = kernel_size + (output_width - 1) * step_width * kernel_height; + const size_t primary_tile = convolution_op->ukernel.dwconv.primary_tile; if (input_height != convolution_op->last_input_height || input_width != convolution_op->last_input_width) { - const size_t indirection_buffer_size = sizeof(void*) * output_height * step_height; + // Micro-kernel will read (primary_tile - kernel_size) elements after the end of indirection buffer. + const size_t indirection_buffer_size = + sizeof(void*) * (primary_tile - kernel_size + output_height * step_height); const void** indirection_buffer = (const void**) xnn_reallocate_memory(convolution_op->indirection_buffer, indirection_buffer_size); @@ -1723,7 +1726,7 @@ static enum xnn_status setup_convolution2d_nhwc( } convolution_op->indirection_buffer = indirection_buffer; - xnn_indirection_init_dwconv2d(convolution_op, step_height, step_width, log2_input_element_size); + xnn_indirection_init_dwconv2d(convolution_op, step_height, step_width, primary_tile, log2_input_element_size); convolution_op->last_input = input; convolution_op->last_input_height = input_height; diff --git a/src/xnnpack/indirection.h b/src/xnnpack/indirection.h index 7f4e664e6..2196b3b0c 100644 --- a/src/xnnpack/indirection.h +++ b/src/xnnpack/indirection.h @@ -28,6 +28,7 @@ XNN_INTERNAL void xnn_indirection_init_dwconv2d( xnn_operator_t op, size_t step_height, size_t step_width, + size_t primary_tile, uint32_t log2_element_size); XNN_INTERNAL void xnn_indirection_init_deconv2d( |