diff options
author | Marat Dukhan <maratek@google.com> | 2022-08-22 23:46:05 -0700 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-08-22 23:47:00 -0700 |
commit | 0f51d3524b7d5ea4d40f24ab3aa9a5c0215bea50 (patch) | |
tree | 5ccb81ccd296a1ccd19278be7bc3ecb4dbd1e8ce | |
parent | 4bd21df6ac8c62bae76a6cf9ddd5e2bd9513f373 (diff) | |
download | XNNPACK-0f51d3524b7d5ea4d40f24ab3aa9a5c0215bea50.tar.gz |
Remove batch argument for FILTERBANK-ACCUMULATE microkernels
PiperOrigin-RevId: 469378551
-rw-r--r-- | bench/u32-filterbank-accumulate.cc | 2 | ||||
-rw-r--r-- | src/u32-filterbank-accumulate/gen/neon-x1.c | 2 | ||||
-rw-r--r-- | src/u32-filterbank-accumulate/gen/neon-x2.c | 2 | ||||
-rw-r--r-- | src/u32-filterbank-accumulate/gen/scalar-x1.c | 2 | ||||
-rw-r--r-- | src/u32-filterbank-accumulate/neon.c.in | 2 | ||||
-rw-r--r-- | src/u32-filterbank-accumulate/scalar.c.in | 2 | ||||
-rw-r--r-- | src/xnnpack/filterbank.h | 1 | ||||
-rw-r--r-- | src/xnnpack/microfnptr.h | 1 | ||||
-rw-r--r-- | test/filterbank-accumulate-microkernel-tester.h | 42 | ||||
-rw-r--r-- | test/u32-filterbank-accumulate.cc | 71 | ||||
-rwxr-xr-x | tools/generate-filterbank-accumulate-test.py | 40 |
11 files changed, 39 insertions, 128 deletions
diff --git a/bench/u32-filterbank-accumulate.cc b/bench/u32-filterbank-accumulate.cc index c899f65ba..a5c0a9a52 100644 --- a/bench/u32-filterbank-accumulate.cc +++ b/bench/u32-filterbank-accumulate.cc @@ -40,7 +40,7 @@ void filterbank_accumulate( std::iota(output.begin(), output.end(), 0); for (auto _ : state) { - filterbank_accumulate(rows, batch, input.data(), weight_widths.data(), weights.data(), output.data()); + filterbank_accumulate(rows, input.data(), weight_widths.data(), weights.data(), output.data()); } const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); diff --git a/src/u32-filterbank-accumulate/gen/neon-x1.c b/src/u32-filterbank-accumulate/gen/neon-x1.c index 1181f3e3d..d57abef42 100644 --- a/src/u32-filterbank-accumulate/gen/neon-x1.c +++ b/src/u32-filterbank-accumulate/gen/neon-x1.c @@ -19,14 +19,12 @@ void xnn_u32_filterbank_accumulate_ukernel__neon_x1( size_t rows, - size_t batch_size, const uint32_t* input, const uint8_t* weight_widths, const uint16_t* weights, uint64_t* output) { assert(rows != 0); - assert(batch_size != 0); assert(input != NULL); assert(weight_widths != NULL); assert(weights != NULL); diff --git a/src/u32-filterbank-accumulate/gen/neon-x2.c b/src/u32-filterbank-accumulate/gen/neon-x2.c index 375ba5f08..155516885 100644 --- a/src/u32-filterbank-accumulate/gen/neon-x2.c +++ b/src/u32-filterbank-accumulate/gen/neon-x2.c @@ -19,14 +19,12 @@ void xnn_u32_filterbank_accumulate_ukernel__neon_x2( size_t rows, - size_t batch_size, const uint32_t* input, const uint8_t* weight_widths, const uint16_t* weights, uint64_t* output) { assert(rows != 0); - assert(batch_size != 0); assert(input != NULL); assert(weight_widths != NULL); assert(weights != NULL); diff --git a/src/u32-filterbank-accumulate/gen/scalar-x1.c b/src/u32-filterbank-accumulate/gen/scalar-x1.c index fdca24be5..b0e8e5749 100644 --- a/src/u32-filterbank-accumulate/gen/scalar-x1.c +++ b/src/u32-filterbank-accumulate/gen/scalar-x1.c @@ -17,14 +17,12 @@ void xnn_u32_filterbank_accumulate_ukernel__scalar_x1( size_t rows, - size_t batch_size, const uint32_t* input, const uint8_t* weight_widths, const uint16_t* weights, uint64_t* output) { assert(rows != 0); - assert(batch_size != 0); assert(input != NULL); assert(weight_widths != NULL); assert(weights != NULL); diff --git a/src/u32-filterbank-accumulate/neon.c.in b/src/u32-filterbank-accumulate/neon.c.in index ea7e67aac..b51ac5a65 100644 --- a/src/u32-filterbank-accumulate/neon.c.in +++ b/src/u32-filterbank-accumulate/neon.c.in @@ -15,14 +15,12 @@ void xnn_u32_filterbank_accumulate_ukernel__neon_x${BATCH_TILE}( size_t rows, - size_t batch_size, const uint32_t* input, const uint8_t* weight_widths, const uint16_t* weights, uint64_t* output) { assert(rows != 0); - assert(batch_size != 0); assert(input != NULL); assert(weight_widths != NULL); assert(weights != NULL); diff --git a/src/u32-filterbank-accumulate/scalar.c.in b/src/u32-filterbank-accumulate/scalar.c.in index e2d30374e..cdcf3bb4c 100644 --- a/src/u32-filterbank-accumulate/scalar.c.in +++ b/src/u32-filterbank-accumulate/scalar.c.in @@ -14,14 +14,12 @@ $assert BATCH_TILE == 1 void xnn_u32_filterbank_accumulate_ukernel__scalar_x${BATCH_TILE}( size_t rows, - size_t batch_size, const uint32_t* input, const uint8_t* weight_widths, const uint16_t* weights, uint64_t* output) { assert(rows != 0); - assert(batch_size != 0); assert(input != NULL); assert(weight_widths != NULL); assert(weights != NULL); diff --git a/src/xnnpack/filterbank.h b/src/xnnpack/filterbank.h index 5349cf42c..90ce94051 100644 --- a/src/xnnpack/filterbank.h +++ b/src/xnnpack/filterbank.h @@ -18,7 +18,6 @@ extern "C" { #define DECLARE_U32_FILTERBANK_ACCUMULATE_UKERNEL_FUNCTION(fn_name) \ XNN_INTERNAL void fn_name( \ size_t rows, \ - size_t batch_size, \ const uint32_t* input, \ const uint8_t* weight_widths, \ const uint16_t* weights, \ diff --git a/src/xnnpack/microfnptr.h b/src/xnnpack/microfnptr.h index bc66c5e49..8555ea44c 100644 --- a/src/xnnpack/microfnptr.h +++ b/src/xnnpack/microfnptr.h @@ -1641,7 +1641,6 @@ typedef void (*xnn_s16_window_ukernel_function)( typedef void (*xnn_u32_filterbank_accumulate_ukernel_function)( size_t rows, - size_t batch_size, const uint32_t* input, const uint8_t* weight_widths, const uint16_t* weights, diff --git a/test/filterbank-accumulate-microkernel-tester.h b/test/filterbank-accumulate-microkernel-tester.h index cda264807..6a85e81df 100644 --- a/test/filterbank-accumulate-microkernel-tester.h +++ b/test/filterbank-accumulate-microkernel-tester.h @@ -32,16 +32,6 @@ class FilterbankAccumulateMicrokernelTester { return this->rows_; } - inline FilterbankAccumulateMicrokernelTester& batch(size_t batch) { - assert(batch != 0); - this->batch_ = batch; - return *this; - } - - inline size_t batch() const { - return this->batch_; - } - inline FilterbankAccumulateMicrokernelTester& iterations(size_t iterations) { this->iterations_ = iterations; return *this; @@ -54,37 +44,42 @@ class FilterbankAccumulateMicrokernelTester { void Test(xnn_u32_filterbank_accumulate_ukernel_function filterbank_accumulate) const { std::random_device random_device; auto rng = std::mt19937(random_device()); + auto u8rng = std::bind(std::uniform_int_distribution<uint16_t>(1, 10), std::ref(rng)); auto u16rng = std::bind(std::uniform_int_distribution<uint16_t>(), std::ref(rng)); auto u32rng = std::bind(std::uniform_int_distribution<uint32_t>(), std::ref(rng)); - std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> input(batch() + XNN_EXTRA_BYTES / sizeof(int16_t)); - std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> weight_widths(rows()); - std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> weights(batch() * 2 + XNN_EXTRA_BYTES / sizeof(uint16_t)); - std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> output(rows()); - std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> output_ref(rows()); + std::vector<uint8_t> filterbank_widths(rows()); + std::vector<uint64_t> output(rows()); + std::vector<uint64_t> output_ref(rows()); for (size_t iteration = 0; iteration < iterations(); iteration++) { + std::generate(filterbank_widths.begin(), filterbank_widths.end(), std::ref(u8rng)); + const size_t num_channels = std::accumulate(filterbank_widths.cbegin(), filterbank_widths.cend(), 0); + + std::vector<uint32_t> input(num_channels + XNN_EXTRA_BYTES / sizeof(int16_t)); std::generate(input.begin(), input.end(), std::ref(u32rng)); - std::fill(weight_widths.begin(), weight_widths.end(), rows()); + + std::vector<uint16_t> weights(num_channels * 2 + XNN_EXTRA_BYTES / sizeof(uint16_t)); std::generate(weights.begin(), weights.end(), std::ref(u16rng)); - std::iota(output.begin(), output.end(), 0); - std::iota(output_ref.begin(), output_ref.end(), 1); + + std::fill(output.begin(), output.end(), UINT64_C(0xCAFEB0BADEADBEAF)); uint64_t weight_accumulator = 0; uint64_t unweight_accumulator = 0; size_t i = 0; for (size_t m = 0; m < rows(); m++) { - const size_t weight_width = (size_t) weight_widths[m]; - for (size_t n = 0; n < weight_width; n++, i++) { - weight_accumulator += (uint64_t) input[i] * (uint64_t) weights[i * 2]; - unweight_accumulator += (uint64_t) input[i] * (uint64_t) weights[i * 2 + 1]; + const size_t weight_width = filterbank_widths[m]; + for (size_t n = 0; n < weight_width; n++) { + weight_accumulator += uint64_t(input[i]) * uint64_t(weights[i * 2]); + unweight_accumulator += uint64_t(input[i]) * uint64_t(weights[i * 2 + 1]); + i += 1; } output_ref[m] = weight_accumulator; weight_accumulator = unweight_accumulator; } // Call optimized micro-kernel. - filterbank_accumulate(rows(), batch(), input.data(), weight_widths.data(), weights.data(), output.data()); + filterbank_accumulate(rows(), input.data(), filterbank_widths.data(), weights.data(), output.data()); // Verify results. for (size_t m = 0; m < rows(); m++) { @@ -96,6 +91,5 @@ class FilterbankAccumulateMicrokernelTester { private: size_t rows_{1}; - size_t batch_{1}; size_t iterations_{15}; }; diff --git a/test/u32-filterbank-accumulate.cc b/test/u32-filterbank-accumulate.cc index 7a87cdae3..a1c0fd5d8 100644 --- a/test/u32-filterbank-accumulate.cc +++ b/test/u32-filterbank-accumulate.cc @@ -18,94 +18,53 @@ #if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, batch_eq_1) { + TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, rows_eq_1) { TEST_REQUIRES_ARM_NEON; FilterbankAccumulateMicrokernelTester() - .batch(1) + .rows(1) .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x1); } - TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, batch_gt_1) { + TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, rows_gt_1) { TEST_REQUIRES_ARM_NEON; - for (size_t batch = 2; batch < 10; batch++) { + for (size_t rows = 2; rows <= 10; rows++) { FilterbankAccumulateMicrokernelTester() - .batch(batch) + .rows(2) .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x1); } } - - TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, rows_eq_2) { - TEST_REQUIRES_ARM_NEON; - FilterbankAccumulateMicrokernelTester() - .rows(2) - .batch(1) - .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x1); - } #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, batch_eq_2) { + TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, rows_eq_1) { TEST_REQUIRES_ARM_NEON; FilterbankAccumulateMicrokernelTester() - .batch(2) + .rows(1) .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2); } - TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, batch_div_2) { + TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, rows_gt_1) { TEST_REQUIRES_ARM_NEON; - for (size_t batch = 4; batch < 20; batch += 2) { + for (size_t rows = 2; rows <= 10; rows++) { FilterbankAccumulateMicrokernelTester() - .batch(batch) + .rows(2) .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2); } } - - TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, batch_lt_2) { - TEST_REQUIRES_ARM_NEON; - for (size_t batch = 1; batch < 2; batch++) { - FilterbankAccumulateMicrokernelTester() - .batch(batch) - .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2); - } - } - - TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, batch_gt_2) { - TEST_REQUIRES_ARM_NEON; - for (size_t batch = 3; batch < 4; batch++) { - FilterbankAccumulateMicrokernelTester() - .batch(batch) - .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2); - } - } - - TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, rows_eq_2) { - TEST_REQUIRES_ARM_NEON; - FilterbankAccumulateMicrokernelTester() - .rows(2) - .batch(2) - .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2); - } #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, batch_eq_1) { +TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, rows_eq_1) { FilterbankAccumulateMicrokernelTester() - .batch(1) + .rows(1) .Test(xnn_u32_filterbank_accumulate_ukernel__scalar_x1); } -TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, batch_gt_1) { - for (size_t batch = 2; batch < 10; batch++) { +TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, rows_gt_1) { + for (size_t rows = 2; rows <= 10; rows++) { FilterbankAccumulateMicrokernelTester() - .batch(batch) + .rows(2) .Test(xnn_u32_filterbank_accumulate_ukernel__scalar_x1); } } - -TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, rows_eq_2) { - FilterbankAccumulateMicrokernelTester() - .rows(2) - .batch(1) - .Test(xnn_u32_filterbank_accumulate_ukernel__scalar_x1); -} diff --git a/tools/generate-filterbank-accumulate-test.py b/tools/generate-filterbank-accumulate-test.py index 41bf97009..2a06a9396 100755 --- a/tools/generate-filterbank-accumulate-test.py +++ b/tools/generate-filterbank-accumulate-test.py @@ -37,54 +37,24 @@ def split_ukernel_name(name): FILTERBANK_ACCUMULATE_TEST_TEMPLATE = """\ -TEST(${TEST_NAME}, batch_eq_${BATCH_TILE}) { +TEST(${TEST_NAME}, rows_eq_1) { $if ISA_CHECK: ${ISA_CHECK}; FilterbankAccumulateMicrokernelTester() - .batch(${BATCH_TILE}) + .rows(1) .Test(${", ".join(TEST_ARGS)}); } -$if BATCH_TILE > 1: - TEST(${TEST_NAME}, batch_div_${BATCH_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t batch = ${BATCH_TILE*2}; batch < ${BATCH_TILE*10}; batch += ${BATCH_TILE}) { - FilterbankAccumulateMicrokernelTester() - .batch(batch) - .Test(${", ".join(TEST_ARGS)}); - } - } - - TEST(${TEST_NAME}, batch_lt_${BATCH_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t batch = 1; batch < ${BATCH_TILE}; batch++) { - FilterbankAccumulateMicrokernelTester() - .batch(batch) - .Test(${", ".join(TEST_ARGS)}); - } - } - -TEST(${TEST_NAME}, batch_gt_${BATCH_TILE}) { +TEST(${TEST_NAME}, rows_gt_1) { $if ISA_CHECK: ${ISA_CHECK}; - for (size_t batch = ${BATCH_TILE+1}; batch < ${10 if BATCH_TILE == 1 else BATCH_TILE*2}; batch++) { + for (size_t rows = 2; rows <= 10; rows++) { FilterbankAccumulateMicrokernelTester() - .batch(batch) + .rows(2) .Test(${", ".join(TEST_ARGS)}); } } -TEST(${TEST_NAME}, rows_eq_2) { - $if ISA_CHECK: - ${ISA_CHECK}; - FilterbankAccumulateMicrokernelTester() - .rows(2) - .batch(${BATCH_TILE}) - .Test(${", ".join(TEST_ARGS)}); -} - """ |