aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2022-08-22 23:46:05 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-08-22 23:47:00 -0700
commit0f51d3524b7d5ea4d40f24ab3aa9a5c0215bea50 (patch)
tree5ccb81ccd296a1ccd19278be7bc3ecb4dbd1e8ce
parent4bd21df6ac8c62bae76a6cf9ddd5e2bd9513f373 (diff)
downloadXNNPACK-0f51d3524b7d5ea4d40f24ab3aa9a5c0215bea50.tar.gz
Remove batch argument for FILTERBANK-ACCUMULATE microkernels
PiperOrigin-RevId: 469378551
-rw-r--r--bench/u32-filterbank-accumulate.cc2
-rw-r--r--src/u32-filterbank-accumulate/gen/neon-x1.c2
-rw-r--r--src/u32-filterbank-accumulate/gen/neon-x2.c2
-rw-r--r--src/u32-filterbank-accumulate/gen/scalar-x1.c2
-rw-r--r--src/u32-filterbank-accumulate/neon.c.in2
-rw-r--r--src/u32-filterbank-accumulate/scalar.c.in2
-rw-r--r--src/xnnpack/filterbank.h1
-rw-r--r--src/xnnpack/microfnptr.h1
-rw-r--r--test/filterbank-accumulate-microkernel-tester.h42
-rw-r--r--test/u32-filterbank-accumulate.cc71
-rwxr-xr-xtools/generate-filterbank-accumulate-test.py40
11 files changed, 39 insertions, 128 deletions
diff --git a/bench/u32-filterbank-accumulate.cc b/bench/u32-filterbank-accumulate.cc
index c899f65ba..a5c0a9a52 100644
--- a/bench/u32-filterbank-accumulate.cc
+++ b/bench/u32-filterbank-accumulate.cc
@@ -40,7 +40,7 @@ void filterbank_accumulate(
std::iota(output.begin(), output.end(), 0);
for (auto _ : state) {
- filterbank_accumulate(rows, batch, input.data(), weight_widths.data(), weights.data(), output.data());
+ filterbank_accumulate(rows, input.data(), weight_widths.data(), weights.data(), output.data());
}
const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
diff --git a/src/u32-filterbank-accumulate/gen/neon-x1.c b/src/u32-filterbank-accumulate/gen/neon-x1.c
index 1181f3e3d..d57abef42 100644
--- a/src/u32-filterbank-accumulate/gen/neon-x1.c
+++ b/src/u32-filterbank-accumulate/gen/neon-x1.c
@@ -19,14 +19,12 @@
void xnn_u32_filterbank_accumulate_ukernel__neon_x1(
size_t rows,
- size_t batch_size,
const uint32_t* input,
const uint8_t* weight_widths,
const uint16_t* weights,
uint64_t* output) {
assert(rows != 0);
- assert(batch_size != 0);
assert(input != NULL);
assert(weight_widths != NULL);
assert(weights != NULL);
diff --git a/src/u32-filterbank-accumulate/gen/neon-x2.c b/src/u32-filterbank-accumulate/gen/neon-x2.c
index 375ba5f08..155516885 100644
--- a/src/u32-filterbank-accumulate/gen/neon-x2.c
+++ b/src/u32-filterbank-accumulate/gen/neon-x2.c
@@ -19,14 +19,12 @@
void xnn_u32_filterbank_accumulate_ukernel__neon_x2(
size_t rows,
- size_t batch_size,
const uint32_t* input,
const uint8_t* weight_widths,
const uint16_t* weights,
uint64_t* output) {
assert(rows != 0);
- assert(batch_size != 0);
assert(input != NULL);
assert(weight_widths != NULL);
assert(weights != NULL);
diff --git a/src/u32-filterbank-accumulate/gen/scalar-x1.c b/src/u32-filterbank-accumulate/gen/scalar-x1.c
index fdca24be5..b0e8e5749 100644
--- a/src/u32-filterbank-accumulate/gen/scalar-x1.c
+++ b/src/u32-filterbank-accumulate/gen/scalar-x1.c
@@ -17,14 +17,12 @@
void xnn_u32_filterbank_accumulate_ukernel__scalar_x1(
size_t rows,
- size_t batch_size,
const uint32_t* input,
const uint8_t* weight_widths,
const uint16_t* weights,
uint64_t* output) {
assert(rows != 0);
- assert(batch_size != 0);
assert(input != NULL);
assert(weight_widths != NULL);
assert(weights != NULL);
diff --git a/src/u32-filterbank-accumulate/neon.c.in b/src/u32-filterbank-accumulate/neon.c.in
index ea7e67aac..b51ac5a65 100644
--- a/src/u32-filterbank-accumulate/neon.c.in
+++ b/src/u32-filterbank-accumulate/neon.c.in
@@ -15,14 +15,12 @@
void xnn_u32_filterbank_accumulate_ukernel__neon_x${BATCH_TILE}(
size_t rows,
- size_t batch_size,
const uint32_t* input,
const uint8_t* weight_widths,
const uint16_t* weights,
uint64_t* output) {
assert(rows != 0);
- assert(batch_size != 0);
assert(input != NULL);
assert(weight_widths != NULL);
assert(weights != NULL);
diff --git a/src/u32-filterbank-accumulate/scalar.c.in b/src/u32-filterbank-accumulate/scalar.c.in
index e2d30374e..cdcf3bb4c 100644
--- a/src/u32-filterbank-accumulate/scalar.c.in
+++ b/src/u32-filterbank-accumulate/scalar.c.in
@@ -14,14 +14,12 @@ $assert BATCH_TILE == 1
void xnn_u32_filterbank_accumulate_ukernel__scalar_x${BATCH_TILE}(
size_t rows,
- size_t batch_size,
const uint32_t* input,
const uint8_t* weight_widths,
const uint16_t* weights,
uint64_t* output) {
assert(rows != 0);
- assert(batch_size != 0);
assert(input != NULL);
assert(weight_widths != NULL);
assert(weights != NULL);
diff --git a/src/xnnpack/filterbank.h b/src/xnnpack/filterbank.h
index 5349cf42c..90ce94051 100644
--- a/src/xnnpack/filterbank.h
+++ b/src/xnnpack/filterbank.h
@@ -18,7 +18,6 @@ extern "C" {
#define DECLARE_U32_FILTERBANK_ACCUMULATE_UKERNEL_FUNCTION(fn_name) \
XNN_INTERNAL void fn_name( \
size_t rows, \
- size_t batch_size, \
const uint32_t* input, \
const uint8_t* weight_widths, \
const uint16_t* weights, \
diff --git a/src/xnnpack/microfnptr.h b/src/xnnpack/microfnptr.h
index bc66c5e49..8555ea44c 100644
--- a/src/xnnpack/microfnptr.h
+++ b/src/xnnpack/microfnptr.h
@@ -1641,7 +1641,6 @@ typedef void (*xnn_s16_window_ukernel_function)(
typedef void (*xnn_u32_filterbank_accumulate_ukernel_function)(
size_t rows,
- size_t batch_size,
const uint32_t* input,
const uint8_t* weight_widths,
const uint16_t* weights,
diff --git a/test/filterbank-accumulate-microkernel-tester.h b/test/filterbank-accumulate-microkernel-tester.h
index cda264807..6a85e81df 100644
--- a/test/filterbank-accumulate-microkernel-tester.h
+++ b/test/filterbank-accumulate-microkernel-tester.h
@@ -32,16 +32,6 @@ class FilterbankAccumulateMicrokernelTester {
return this->rows_;
}
- inline FilterbankAccumulateMicrokernelTester& batch(size_t batch) {
- assert(batch != 0);
- this->batch_ = batch;
- return *this;
- }
-
- inline size_t batch() const {
- return this->batch_;
- }
-
inline FilterbankAccumulateMicrokernelTester& iterations(size_t iterations) {
this->iterations_ = iterations;
return *this;
@@ -54,37 +44,42 @@ class FilterbankAccumulateMicrokernelTester {
void Test(xnn_u32_filterbank_accumulate_ukernel_function filterbank_accumulate) const {
std::random_device random_device;
auto rng = std::mt19937(random_device());
+ auto u8rng = std::bind(std::uniform_int_distribution<uint16_t>(1, 10), std::ref(rng));
auto u16rng = std::bind(std::uniform_int_distribution<uint16_t>(), std::ref(rng));
auto u32rng = std::bind(std::uniform_int_distribution<uint32_t>(), std::ref(rng));
- std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> input(batch() + XNN_EXTRA_BYTES / sizeof(int16_t));
- std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> weight_widths(rows());
- std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> weights(batch() * 2 + XNN_EXTRA_BYTES / sizeof(uint16_t));
- std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> output(rows());
- std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> output_ref(rows());
+ std::vector<uint8_t> filterbank_widths(rows());
+ std::vector<uint64_t> output(rows());
+ std::vector<uint64_t> output_ref(rows());
for (size_t iteration = 0; iteration < iterations(); iteration++) {
+ std::generate(filterbank_widths.begin(), filterbank_widths.end(), std::ref(u8rng));
+ const size_t num_channels = std::accumulate(filterbank_widths.cbegin(), filterbank_widths.cend(), 0);
+
+ std::vector<uint32_t> input(num_channels + XNN_EXTRA_BYTES / sizeof(int16_t));
std::generate(input.begin(), input.end(), std::ref(u32rng));
- std::fill(weight_widths.begin(), weight_widths.end(), rows());
+
+ std::vector<uint16_t> weights(num_channels * 2 + XNN_EXTRA_BYTES / sizeof(uint16_t));
std::generate(weights.begin(), weights.end(), std::ref(u16rng));
- std::iota(output.begin(), output.end(), 0);
- std::iota(output_ref.begin(), output_ref.end(), 1);
+
+ std::fill(output.begin(), output.end(), UINT64_C(0xCAFEB0BADEADBEAF));
uint64_t weight_accumulator = 0;
uint64_t unweight_accumulator = 0;
size_t i = 0;
for (size_t m = 0; m < rows(); m++) {
- const size_t weight_width = (size_t) weight_widths[m];
- for (size_t n = 0; n < weight_width; n++, i++) {
- weight_accumulator += (uint64_t) input[i] * (uint64_t) weights[i * 2];
- unweight_accumulator += (uint64_t) input[i] * (uint64_t) weights[i * 2 + 1];
+ const size_t weight_width = filterbank_widths[m];
+ for (size_t n = 0; n < weight_width; n++) {
+ weight_accumulator += uint64_t(input[i]) * uint64_t(weights[i * 2]);
+ unweight_accumulator += uint64_t(input[i]) * uint64_t(weights[i * 2 + 1]);
+ i += 1;
}
output_ref[m] = weight_accumulator;
weight_accumulator = unweight_accumulator;
}
// Call optimized micro-kernel.
- filterbank_accumulate(rows(), batch(), input.data(), weight_widths.data(), weights.data(), output.data());
+ filterbank_accumulate(rows(), input.data(), filterbank_widths.data(), weights.data(), output.data());
// Verify results.
for (size_t m = 0; m < rows(); m++) {
@@ -96,6 +91,5 @@ class FilterbankAccumulateMicrokernelTester {
private:
size_t rows_{1};
- size_t batch_{1};
size_t iterations_{15};
};
diff --git a/test/u32-filterbank-accumulate.cc b/test/u32-filterbank-accumulate.cc
index 7a87cdae3..a1c0fd5d8 100644
--- a/test/u32-filterbank-accumulate.cc
+++ b/test/u32-filterbank-accumulate.cc
@@ -18,94 +18,53 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, batch_eq_1) {
+ TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, rows_eq_1) {
TEST_REQUIRES_ARM_NEON;
FilterbankAccumulateMicrokernelTester()
- .batch(1)
+ .rows(1)
.Test(xnn_u32_filterbank_accumulate_ukernel__neon_x1);
}
- TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, batch_gt_1) {
+ TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, rows_gt_1) {
TEST_REQUIRES_ARM_NEON;
- for (size_t batch = 2; batch < 10; batch++) {
+ for (size_t rows = 2; rows <= 10; rows++) {
FilterbankAccumulateMicrokernelTester()
- .batch(batch)
+ .rows(2)
.Test(xnn_u32_filterbank_accumulate_ukernel__neon_x1);
}
}
-
- TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, rows_eq_2) {
- TEST_REQUIRES_ARM_NEON;
- FilterbankAccumulateMicrokernelTester()
- .rows(2)
- .batch(1)
- .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x1);
- }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, batch_eq_2) {
+ TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, rows_eq_1) {
TEST_REQUIRES_ARM_NEON;
FilterbankAccumulateMicrokernelTester()
- .batch(2)
+ .rows(1)
.Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2);
}
- TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, batch_div_2) {
+ TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, rows_gt_1) {
TEST_REQUIRES_ARM_NEON;
- for (size_t batch = 4; batch < 20; batch += 2) {
+ for (size_t rows = 2; rows <= 10; rows++) {
FilterbankAccumulateMicrokernelTester()
- .batch(batch)
+ .rows(2)
.Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2);
}
}
-
- TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, batch_lt_2) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t batch = 1; batch < 2; batch++) {
- FilterbankAccumulateMicrokernelTester()
- .batch(batch)
- .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2);
- }
- }
-
- TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, batch_gt_2) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t batch = 3; batch < 4; batch++) {
- FilterbankAccumulateMicrokernelTester()
- .batch(batch)
- .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2);
- }
- }
-
- TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, rows_eq_2) {
- TEST_REQUIRES_ARM_NEON;
- FilterbankAccumulateMicrokernelTester()
- .rows(2)
- .batch(2)
- .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2);
- }
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, batch_eq_1) {
+TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, rows_eq_1) {
FilterbankAccumulateMicrokernelTester()
- .batch(1)
+ .rows(1)
.Test(xnn_u32_filterbank_accumulate_ukernel__scalar_x1);
}
-TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, batch_gt_1) {
- for (size_t batch = 2; batch < 10; batch++) {
+TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, rows_gt_1) {
+ for (size_t rows = 2; rows <= 10; rows++) {
FilterbankAccumulateMicrokernelTester()
- .batch(batch)
+ .rows(2)
.Test(xnn_u32_filterbank_accumulate_ukernel__scalar_x1);
}
}
-
-TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, rows_eq_2) {
- FilterbankAccumulateMicrokernelTester()
- .rows(2)
- .batch(1)
- .Test(xnn_u32_filterbank_accumulate_ukernel__scalar_x1);
-}
diff --git a/tools/generate-filterbank-accumulate-test.py b/tools/generate-filterbank-accumulate-test.py
index 41bf97009..2a06a9396 100755
--- a/tools/generate-filterbank-accumulate-test.py
+++ b/tools/generate-filterbank-accumulate-test.py
@@ -37,54 +37,24 @@ def split_ukernel_name(name):
FILTERBANK_ACCUMULATE_TEST_TEMPLATE = """\
-TEST(${TEST_NAME}, batch_eq_${BATCH_TILE}) {
+TEST(${TEST_NAME}, rows_eq_1) {
$if ISA_CHECK:
${ISA_CHECK};
FilterbankAccumulateMicrokernelTester()
- .batch(${BATCH_TILE})
+ .rows(1)
.Test(${", ".join(TEST_ARGS)});
}
-$if BATCH_TILE > 1:
- TEST(${TEST_NAME}, batch_div_${BATCH_TILE}) {
- $if ISA_CHECK:
- ${ISA_CHECK};
- for (size_t batch = ${BATCH_TILE*2}; batch < ${BATCH_TILE*10}; batch += ${BATCH_TILE}) {
- FilterbankAccumulateMicrokernelTester()
- .batch(batch)
- .Test(${", ".join(TEST_ARGS)});
- }
- }
-
- TEST(${TEST_NAME}, batch_lt_${BATCH_TILE}) {
- $if ISA_CHECK:
- ${ISA_CHECK};
- for (size_t batch = 1; batch < ${BATCH_TILE}; batch++) {
- FilterbankAccumulateMicrokernelTester()
- .batch(batch)
- .Test(${", ".join(TEST_ARGS)});
- }
- }
-
-TEST(${TEST_NAME}, batch_gt_${BATCH_TILE}) {
+TEST(${TEST_NAME}, rows_gt_1) {
$if ISA_CHECK:
${ISA_CHECK};
- for (size_t batch = ${BATCH_TILE+1}; batch < ${10 if BATCH_TILE == 1 else BATCH_TILE*2}; batch++) {
+ for (size_t rows = 2; rows <= 10; rows++) {
FilterbankAccumulateMicrokernelTester()
- .batch(batch)
+ .rows(2)
.Test(${", ".join(TEST_ARGS)});
}
}
-TEST(${TEST_NAME}, rows_eq_2) {
- $if ISA_CHECK:
- ${ISA_CHECK};
- FilterbankAccumulateMicrokernelTester()
- .rows(2)
- .batch(${BATCH_TILE})
- .Test(${", ".join(TEST_ARGS)});
-}
-
"""