aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhi An Ng <zhin@google.com>2022-09-01 10:59:41 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-09-01 11:00:39 -0700
commitfb5abc8628079b870ac818044286083294e55f1d (patch)
tree69d07630d24e2cd4280bd791fa97ed31e67fdc65
parent0eaea5648189017ea286d1df37c0ca34f8302e25 (diff)
downloadXNNPACK-fb5abc8628079b870ac818044286083294e55f1d.tar.gz
Add primary_tile argument to xnn_indirection_init_dwconv2d
This allows indirection to write pointers to zero buffer for the last (primary_tile - kernel_size) elements in the indirection buffer. PiperOrigin-RevId: 471575796
-rw-r--r--bench/f16-dwconv.cc2
-rw-r--r--bench/f32-dwconv.cc2
-rw-r--r--bench/qs8-dwconv.cc2
-rw-r--r--src/indirection.c7
-rw-r--r--src/operators/average-pooling-nhwc.c2
-rw-r--r--src/operators/convolution-nhwc.c7
-rw-r--r--src/xnnpack/indirection.h1
7 files changed, 17 insertions, 6 deletions
diff --git a/bench/f16-dwconv.cc b/bench/f16-dwconv.cc
index 3a730b360..cf90b7d37 100644
--- a/bench/f16-dwconv.cc
+++ b/bench/f16-dwconv.cc
@@ -118,7 +118,7 @@ static void f16_dwconv(benchmark::State& state,
convolution_op.padding_top = padding_top;
convolution_op.padding_left = padding_left;
- xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, 1 /* log2(sizeof(uint16_t)) */);
+ xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, primary_tile, 1 /* log2(sizeof(uint16_t)) */);
for (size_t n = 1; n < num_buffers; n++) {
std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements);
}
diff --git a/bench/f32-dwconv.cc b/bench/f32-dwconv.cc
index 64989fc4f..749ee8842 100644
--- a/bench/f32-dwconv.cc
+++ b/bench/f32-dwconv.cc
@@ -110,7 +110,7 @@ static void f32_dwconv(benchmark::State& state,
convolution_op.padding_top = padding_top;
convolution_op.padding_left = padding_left;
- xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, 2 /* log2(sizeof(float)) */);
+ xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, primary_tile, 2 /* log2(sizeof(float)) */);
for (size_t n = 1; n < num_buffers; n++) {
std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements);
}
diff --git a/bench/qs8-dwconv.cc b/bench/qs8-dwconv.cc
index 2764815f4..6837c9575 100644
--- a/bench/qs8-dwconv.cc
+++ b/bench/qs8-dwconv.cc
@@ -116,7 +116,7 @@ static void DWConvBenchmark(benchmark::State& state,
convolution_op.padding_top = padding_top;
convolution_op.padding_left = padding_left;
- xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, 0 /* log2(sizeof(int8_t)) */);
+ xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, primary_tile, 0 /* log2(sizeof(int8_t)) */);
for (size_t n = 1; n < num_buffers; n++) {
std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements);
}
diff --git a/src/indirection.c b/src/indirection.c
index 50ad95980..359a5b80e 100644
--- a/src/indirection.c
+++ b/src/indirection.c
@@ -202,6 +202,7 @@ void xnn_indirection_init_dwconv2d(
xnn_operator_t op,
size_t step_height,
size_t step_width,
+ size_t primary_tile,
uint32_t log2_element_size)
{
const void** indirection_buffer = op->indirection_buffer;
@@ -247,6 +248,12 @@ void xnn_indirection_init_dwconv2d(
}
}
}
+
+ const void* last_output_pixel = indirection_buffer[output_height * step_height - 1];
+ const size_t last_kernel_index = output_height * step_height - (kernel_height * kernel_width);
+ for (size_t tile_index = kernel_height * kernel_width; tile_index < primary_tile; tile_index++) {
+ indirection_buffer[last_kernel_index + tile_index] = last_output_pixel;
+ }
}
void xnn_indirection_init_maxpool2d(
diff --git a/src/operators/average-pooling-nhwc.c b/src/operators/average-pooling-nhwc.c
index a43994fdc..bab46731d 100644
--- a/src/operators/average-pooling-nhwc.c
+++ b/src/operators/average-pooling-nhwc.c
@@ -740,7 +740,7 @@ static enum xnn_status setup_average_pooling2d(
}
average_pooling_op->indirection_buffer = indirection_buffer;
- xnn_indirection_init_dwconv2d(average_pooling_op, step_height, step_width, log2_data_element_size);
+ xnn_indirection_init_dwconv2d(average_pooling_op, step_height, step_width, primary_tile, log2_data_element_size);
average_pooling_op->last_input = input;
average_pooling_op->last_input_height = input_height;
diff --git a/src/operators/convolution-nhwc.c b/src/operators/convolution-nhwc.c
index 5402c18bd..dfd1c9581 100644
--- a/src/operators/convolution-nhwc.c
+++ b/src/operators/convolution-nhwc.c
@@ -1711,8 +1711,11 @@ static enum xnn_status setup_convolution2d_nhwc(
const size_t output_width = convolution_op->output_width;
const size_t step_width = convolution_op->dilation_width == 1 ? convolution_op->stride_width : kernel_width;
const size_t step_height = kernel_size + (output_width - 1) * step_width * kernel_height;
+ const size_t primary_tile = convolution_op->ukernel.dwconv.primary_tile;
if (input_height != convolution_op->last_input_height || input_width != convolution_op->last_input_width) {
- const size_t indirection_buffer_size = sizeof(void*) * output_height * step_height;
+ // Micro-kernel will read (primary_tile - kernel_size) elements after the end of indirection buffer.
+ const size_t indirection_buffer_size =
+ sizeof(void*) * (primary_tile - kernel_size + output_height * step_height);
const void** indirection_buffer =
(const void**) xnn_reallocate_memory(convolution_op->indirection_buffer, indirection_buffer_size);
@@ -1723,7 +1726,7 @@ static enum xnn_status setup_convolution2d_nhwc(
}
convolution_op->indirection_buffer = indirection_buffer;
- xnn_indirection_init_dwconv2d(convolution_op, step_height, step_width, log2_input_element_size);
+ xnn_indirection_init_dwconv2d(convolution_op, step_height, step_width, primary_tile, log2_input_element_size);
convolution_op->last_input = input;
convolution_op->last_input_height = input_height;
diff --git a/src/xnnpack/indirection.h b/src/xnnpack/indirection.h
index 7f4e664e6..2196b3b0c 100644
--- a/src/xnnpack/indirection.h
+++ b/src/xnnpack/indirection.h
@@ -28,6 +28,7 @@ XNN_INTERNAL void xnn_indirection_init_dwconv2d(
xnn_operator_t op,
size_t step_height,
size_t step_width,
+ size_t primary_tile,
uint32_t log2_element_size);
XNN_INTERNAL void xnn_indirection_init_deconv2d(