aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2022-08-23 22:05:36 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-08-23 22:06:35 -0700
commit2c02fb77c4323339014390a5d377182419bacd7f (patch)
tree52c7ec9c0f69e5c9341b06d91f3639c29b2c935c
parentf133344411a0fc6ebf1a3b1518321b61e2e759b9 (diff)
downloadXNNPACK-2c02fb77c4323339014390a5d377182419bacd7f.tar.gz
Fix FILTERBANK-ACCUMULATE microkernels
Make FILTERBANK-ACCUMULATE microkernels match TFLM audio_frontend semantics PiperOrigin-RevId: 469635373
-rw-r--r--BUILD.bazel4
-rwxr-xr-xCMakeLists.txt6
-rw-r--r--bench/u32-filterbank-accumulate.cc10
-rwxr-xr-xscripts/generate-u32-filterbank-accumulate.sh4
-rw-r--r--src/u32-filterbank-accumulate/aarch32-neon-x1.S58
-rw-r--r--src/u32-filterbank-accumulate/aarch32-neon-x2.S69
-rw-r--r--src/u32-filterbank-accumulate/gen/neon-x1.c53
-rw-r--r--src/u32-filterbank-accumulate/gen/neon-x2.c61
-rw-r--r--src/u32-filterbank-accumulate/gen/scalar-x1.c1
-rw-r--r--src/u32-filterbank-accumulate/neon.c.in58
-rw-r--r--src/u32-filterbank-accumulate/scalar.c.in1
-rw-r--r--test/filterbank-accumulate-microkernel-tester.h1
-rw-r--r--test/u32-filterbank-accumulate.cc68
-rw-r--r--test/u32-filterbank-accumulate.yaml9
14 files changed, 4 insertions, 399 deletions
diff --git a/BUILD.bazel b/BUILD.bazel
index a11e9d439..35c7a89dc 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -4214,8 +4214,6 @@ ALL_NEON_MICROKERNEL_SRCS = [
"src/u8-maxpool/9p8x-minmax-neon-c16.c",
"src/u8-rmax/neon.c",
"src/u8-vclamp/neon-x64.c",
- "src/u32-filterbank-accumulate/gen/neon-x1.c",
- "src/u32-filterbank-accumulate/gen/neon-x2.c",
"src/xx-fill/neon-x64.c",
"src/xx-pad/neon.c",
"src/x8-transposec/gen/8x8-multi-dec-zip-neon.c",
@@ -8309,8 +8307,6 @@ AARCH32_ASM_MICROKERNEL_SRCS = [
"src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-cortex-a7.S",
"src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-cortex-a53.S",
"src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-ld64.S",
- "src/u32-filterbank-accumulate/aarch32-neon-x1.S",
- "src/u32-filterbank-accumulate/aarch32-neon-x2.S",
]
AARCH64_ASM_MICROKERNEL_SRCS = [
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3dce2f237..c7cfb7c4e 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2708,8 +2708,6 @@ SET(ALL_NEON_MICROKERNEL_SRCS
src/u8-maxpool/9p8x-minmax-neon-c16.c
src/u8-rmax/neon.c
src/u8-vclamp/neon-x64.c
- src/u32-filterbank-accumulate/gen/neon-x1.c
- src/u32-filterbank-accumulate/gen/neon-x2.c
src/xx-fill/neon-x64.c
src/xx-pad/neon.c
src/x8-transposec/gen/8x8-multi-dec-zip-neon.c
@@ -6750,9 +6748,7 @@ SET(AARCH32_ASM_MICROKERNEL_SRCS
src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-ld64.S
src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-cortex-a7.S
src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-cortex-a53.S
- src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-ld64.S
- src/u32-filterbank-accumulate/aarch32-neon-x1.S
- src/u32-filterbank-accumulate/aarch32-neon-x2.S)
+ src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-ld64.S)
SET(AARCH64_ASM_MICROKERNEL_SRCS
src/f16-gemm/gen-inc/1x8inc-minmax-aarch64-neonfp16arith-ld64.S
diff --git a/bench/u32-filterbank-accumulate.cc b/bench/u32-filterbank-accumulate.cc
index ba064f945..948998339 100644
--- a/bench/u32-filterbank-accumulate.cc
+++ b/bench/u32-filterbank-accumulate.cc
@@ -67,16 +67,6 @@ static void BenchmarkKernelSize(benchmark::internal::Benchmark* b)
b->Args({1, 13});
}
-#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
-BENCHMARK_CAPTURE(filterbank_accumulate, u32_aarch32_neon_x1, xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1, benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime();
-BENCHMARK_CAPTURE(filterbank_accumulate, u32_aarch32_neon_x2, xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2, benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime();
-#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-BENCHMARK_CAPTURE(filterbank_accumulate, u32_neon_x1, xnn_u32_filterbank_accumulate_ukernel__neon_x1, benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime();
-BENCHMARK_CAPTURE(filterbank_accumulate, u32_neon_x2, xnn_u32_filterbank_accumulate_ukernel__neon_x2, benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime();
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
BENCHMARK_CAPTURE(filterbank_accumulate, u32_scalar_x1, xnn_u32_filterbank_accumulate_ukernel__scalar_x1)->Apply(BenchmarkKernelSize)->UseRealTime();
#ifndef XNNPACK_BENCHMARK_NO_MAIN
diff --git a/scripts/generate-u32-filterbank-accumulate.sh b/scripts/generate-u32-filterbank-accumulate.sh
index 36f44f266..f2dee01d0 100755
--- a/scripts/generate-u32-filterbank-accumulate.sh
+++ b/scripts/generate-u32-filterbank-accumulate.sh
@@ -7,10 +7,6 @@
################################### SCALAR ###################################
tools/xngen src/u32-filterbank-accumulate/scalar.c.in -D BATCH_TILE=1 -o src/u32-filterbank-accumulate/gen/scalar-x1.c &
-################################### NEON ###################################
-tools/xngen src/u32-filterbank-accumulate/neon.c.in -D BATCH_TILE=1 -o src/u32-filterbank-accumulate/gen/neon-x1.c &
-tools/xngen src/u32-filterbank-accumulate/neon.c.in -D BATCH_TILE=2 -o src/u32-filterbank-accumulate/gen/neon-x2.c &
-
################################## Unit tests #################################
tools/generate-filterbank-accumulate-test.py --spec test/u32-filterbank-accumulate.yaml --output test/u32-filterbank-accumulate.cc &
diff --git a/src/u32-filterbank-accumulate/aarch32-neon-x1.S b/src/u32-filterbank-accumulate/aarch32-neon-x1.S
deleted file mode 100644
index bd5ad932a..000000000
--- a/src/u32-filterbank-accumulate/aarch32-neon-x1.S
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2022 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <xnnpack/assembly.h>
-
-.syntax unified
-
-// void xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1(
-// size_t rows, r0
-// const uint32_t* input, r1
-// const uint8_t* weight_widths, r2
-// const uint16_t* weights, r3
-// uint64_t* output) sp -> r12
-
-// d8-d15, r12-r11,r14(lr) need to be preserved if used. r13(sp),r15(pc) are reserved.
-
-// Register usage
-// input r1 d2
-// weights r3 d3 d4 d5
-// output r12 d0 d1
-
-// weight_widths r2 r4
-
-BEGIN_FUNCTION xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1
- .arm
-#ifndef __APPLE__
- .arch armv7-a
- .fpu neon
-#endif
- LDR r12, [sp] // output
- PUSH {r4} // push 4 bytes
- VMOV.U8 q0, #0 // weight_accumulator
-0:
- LDRB r4, [r2], #1 // weight_widths
-
-1:
- VLD1.32 {d3[]}, [r3]! // weights
- VLD1.32 {d2[]}, [r1]! // input
- SUBS r4, r4, #1
- VMOVL.U16 q2, d3
- VMLAL.U32 q0, d4, d2
- BHI 1b
-
- VST1.64 {d0}, [r12]!
- SUBS r0, r0, #1
- VMOV d0, d1
- BNE 0b
-
- POP {r4}
- BX lr
-
-END_FUNCTION xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1
-
-#ifdef __ELF__
-.section ".note.GNU-stack","",%progbits
-#endif
diff --git a/src/u32-filterbank-accumulate/aarch32-neon-x2.S b/src/u32-filterbank-accumulate/aarch32-neon-x2.S
deleted file mode 100644
index 5c8cc3a00..000000000
--- a/src/u32-filterbank-accumulate/aarch32-neon-x2.S
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2022 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <xnnpack/assembly.h>
-
-.syntax unified
-
-// void xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2(
-// size_t rows, r0
-// const uint32_t* input, r1
-// const uint8_t* weight_widths, r2
-// const uint16_t* weights, r3
-// uint64_t* output) sp -> r12
-
-// d8-d15, r12-r11,r14(lr) need to be preserved if used. r13(sp),r15(pc) are reserved.
-
-// Register usage
-// input r1 d2
-// weights r3 d3 d4 d5
-// output r12 d0 d1
-
-// weight_widths r2 r4
-
-BEGIN_FUNCTION xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2
- .arm
-#ifndef __APPLE__
- .arch armv7-a
- .fpu neon
-#endif
- LDR r12, [sp] // output
- PUSH {r4} // push 4 bytes
- VMOV.U8 q0, #0 // weight_accumulator
-0:
- LDRB r4, [r2], #1 // weight_widths
- SUBS r4, r4, #1
- BLS 2f // less than 2 weights?
-
-1:
- VLD1.16 {d3}, [r3]! // weights
- VLD1.32 {d2}, [r1]! // input
- SUBS r4, r4, #2
- VMOVL.U16 q2, d3
- VMLAL.U32 q0, d4, d2[0]
- VMLAL.U32 q0, d5, d2[1]
- BHI 1b
-
- BLO 3f // is there a remainder?
-2:
- VLD1.32 {d3[]}, [r3]! // weights
- VLD1.32 {d2[]}, [r1]! // input
- VMOVL.U16 q2, d3
- VMLAL.U32 q0, d4, d2
-
-3:
- VST1.64 {d0}, [r12]!
- SUBS r0, r0, #1
- VMOV d0, d1
- BNE 0b
-
- POP {r4}
- BX lr
-
-END_FUNCTION xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2
-
-#ifdef __ELF__
-.section ".note.GNU-stack","",%progbits
-#endif
diff --git a/src/u32-filterbank-accumulate/gen/neon-x1.c b/src/u32-filterbank-accumulate/gen/neon-x1.c
deleted file mode 100644
index d57abef42..000000000
--- a/src/u32-filterbank-accumulate/gen/neon-x1.c
+++ /dev/null
@@ -1,53 +0,0 @@
-// Auto-generated file. Do not edit!
-// Template: src/u32-filterbank-accumulate/neon.c.in
-// Generator: tools/xngen
-//
-// Copyright 2022 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#include <arm_neon.h>
-
-#include <xnnpack/math.h>
-#include <xnnpack/filterbank.h>
-
-
-void xnn_u32_filterbank_accumulate_ukernel__neon_x1(
- size_t rows,
- const uint32_t* input,
- const uint8_t* weight_widths,
- const uint16_t* weights,
- uint64_t* output) {
-
- assert(rows != 0);
- assert(input != NULL);
- assert(weight_widths != NULL);
- assert(weights != NULL);
- assert(output != NULL);
-
- uint64x2_t weight_accumulator = vdupq_n_u64(0);
-
- do {
- size_t n = (size_t) *weight_widths++;
- assert(n != 0);
-
- if (n != 0) {
- do {
- const uint32x2_t vi = vld1_dup_u32(input); input += 1;
- const uint16x4_t vw = vreinterpret_u16_u32(vld1_dup_u32((const void*) weights)); weights += 2;
- const uint32x2_t vw32 = vget_low_u32(vmovl_u16(vw));
-
- weight_accumulator = vmlal_u32(weight_accumulator, vw32, vi);
- } while (--n != 0);
- }
-
- vst1_u64(output, vget_low_u64(weight_accumulator)); output += 1;
- weight_accumulator = vcombine_u64(vget_high_u64(weight_accumulator), vget_high_u64(weight_accumulator));
-
- } while (--rows != 0);
-}
diff --git a/src/u32-filterbank-accumulate/gen/neon-x2.c b/src/u32-filterbank-accumulate/gen/neon-x2.c
deleted file mode 100644
index ad7aab503..000000000
--- a/src/u32-filterbank-accumulate/gen/neon-x2.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// Auto-generated file. Do not edit!
-// Template: src/u32-filterbank-accumulate/neon.c.in
-// Generator: tools/xngen
-//
-// Copyright 2022 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#include <arm_neon.h>
-
-#include <xnnpack/math.h>
-#include <xnnpack/filterbank.h>
-
-
-void xnn_u32_filterbank_accumulate_ukernel__neon_x2(
- size_t rows,
- const uint32_t* input,
- const uint8_t* weight_widths,
- const uint16_t* weights,
- uint64_t* output) {
-
- assert(rows != 0);
- assert(input != NULL);
- assert(weight_widths != NULL);
- assert(weights != NULL);
- assert(output != NULL);
-
- uint64x2_t weight_accumulator = vdupq_n_u64(0);
-
- do {
- size_t n = (size_t) *weight_widths++;
- assert(n != 0);
-
- for (;n >= 2; n -= 2) {
- const uint32x2_t vi = vld1_u32(input); input += 2;
- const uint16x4_t vw = vld1_u16(weights); weights += 4;
- const uint32x4_t vw32 = vmovl_u16(vw);
-
- weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_low_u32(vw32), vi, 0);
- weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_high_u32(vw32), vi, 1);
- }
- if (n != 0) {
- do {
- const uint32x2_t vi = vld1_dup_u32(input); input += 1;
- const uint16x4_t vw = vreinterpret_u16_u32(vld1_dup_u32((const void*) weights)); weights += 2;
- const uint32x2_t vw32 = vget_low_u32(vmovl_u16(vw));
-
- weight_accumulator = vmlal_u32(weight_accumulator, vw32, vi);
- } while (--n != 0);
- }
-
- vst1_u64(output, vget_low_u64(weight_accumulator)); output += 1;
- weight_accumulator = vcombine_u64(vget_high_u64(weight_accumulator), vget_high_u64(weight_accumulator));
-
- } while (--rows != 0);
-}
diff --git a/src/u32-filterbank-accumulate/gen/scalar-x1.c b/src/u32-filterbank-accumulate/gen/scalar-x1.c
index b0e8e5749..8ec5c0771 100644
--- a/src/u32-filterbank-accumulate/gen/scalar-x1.c
+++ b/src/u32-filterbank-accumulate/gen/scalar-x1.c
@@ -50,6 +50,7 @@ void xnn_u32_filterbank_accumulate_ukernel__scalar_x1(
*output++ = weight_accumulator;
weight_accumulator = unweight_accumulator;
+ unweight_accumulator = 0;
} while (--rows != 0);
}
diff --git a/src/u32-filterbank-accumulate/neon.c.in b/src/u32-filterbank-accumulate/neon.c.in
deleted file mode 100644
index 4038107f8..000000000
--- a/src/u32-filterbank-accumulate/neon.c.in
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2022 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#include <arm_neon.h>
-
-#include <xnnpack/math.h>
-#include <xnnpack/filterbank.h>
-
-
-void xnn_u32_filterbank_accumulate_ukernel__neon_x${BATCH_TILE}(
- size_t rows,
- const uint32_t* input,
- const uint8_t* weight_widths,
- const uint16_t* weights,
- uint64_t* output) {
-
- assert(rows != 0);
- assert(input != NULL);
- assert(weight_widths != NULL);
- assert(weights != NULL);
- assert(output != NULL);
-
- uint64x2_t weight_accumulator = vdupq_n_u64(0);
-
- do {
- size_t n = (size_t) *weight_widths++;
- assert(n != 0);
-
- $if BATCH_TILE == 2:
- for (;n >= 2; n -= 2) {
- const uint32x2_t vi = vld1_u32(input); input += 2;
- const uint16x4_t vw = vld1_u16(weights); weights += 4;
- const uint32x4_t vw32 = vmovl_u16(vw);
-
- weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_low_u32(vw32), vi, 0);
- weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_high_u32(vw32), vi, 1);
- }
- if (n != 0) {
- do {
- const uint32x2_t vi = vld1_dup_u32(input); input += 1;
- const uint16x4_t vw = vreinterpret_u16_u32(vld1_dup_u32((const void*) weights)); weights += 2;
- const uint32x2_t vw32 = vget_low_u32(vmovl_u16(vw));
-
- weight_accumulator = vmlal_u32(weight_accumulator, vw32, vi);
- } while (--n != 0);
- }
-
- vst1_u64(output, vget_low_u64(weight_accumulator)); output += 1;
- weight_accumulator = vcombine_u64(vget_high_u64(weight_accumulator), vget_high_u64(weight_accumulator));
-
- } while (--rows != 0);
-}
diff --git a/src/u32-filterbank-accumulate/scalar.c.in b/src/u32-filterbank-accumulate/scalar.c.in
index cdcf3bb4c..b9dc80c1a 100644
--- a/src/u32-filterbank-accumulate/scalar.c.in
+++ b/src/u32-filterbank-accumulate/scalar.c.in
@@ -47,6 +47,7 @@ void xnn_u32_filterbank_accumulate_ukernel__scalar_x${BATCH_TILE}(
*output++ = weight_accumulator;
weight_accumulator = unweight_accumulator;
+ unweight_accumulator = 0;
} while (--rows != 0);
}
diff --git a/test/filterbank-accumulate-microkernel-tester.h b/test/filterbank-accumulate-microkernel-tester.h
index 6a85e81df..4a1a705a0 100644
--- a/test/filterbank-accumulate-microkernel-tester.h
+++ b/test/filterbank-accumulate-microkernel-tester.h
@@ -76,6 +76,7 @@ class FilterbankAccumulateMicrokernelTester {
}
output_ref[m] = weight_accumulator;
weight_accumulator = unweight_accumulator;
+ unweight_accumulator = 0;
}
// Call optimized micro-kernel.
diff --git a/test/u32-filterbank-accumulate.cc b/test/u32-filterbank-accumulate.cc
index 4929af500..655c9e810 100644
--- a/test/u32-filterbank-accumulate.cc
+++ b/test/u32-filterbank-accumulate.cc
@@ -17,74 +17,6 @@
#include "filterbank-accumulate-microkernel-tester.h"
-#if XNN_ARCH_ARM
- TEST(U32_FILTERBANK_ACCUMULATE__AARCH32_NEON_X1, rows_eq_1) {
- TEST_REQUIRES_ARM_NEON;
- FilterbankAccumulateMicrokernelTester()
- .rows(1)
- .Test(xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1);
- }
-
- TEST(U32_FILTERBANK_ACCUMULATE__AARCH32_NEON_X1, rows_eq_2) {
- TEST_REQUIRES_ARM_NEON;
- FilterbankAccumulateMicrokernelTester()
- .rows(2)
- .Test(xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1);
- }
-#endif // XNN_ARCH_ARM
-
-
-#if XNN_ARCH_ARM
- TEST(U32_FILTERBANK_ACCUMULATE__AARCH32_NEON_X2, rows_eq_1) {
- TEST_REQUIRES_ARM_NEON;
- FilterbankAccumulateMicrokernelTester()
- .rows(1)
- .Test(xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2);
- }
-
- TEST(U32_FILTERBANK_ACCUMULATE__AARCH32_NEON_X2, rows_eq_2) {
- TEST_REQUIRES_ARM_NEON;
- FilterbankAccumulateMicrokernelTester()
- .rows(2)
- .Test(xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2);
- }
-#endif // XNN_ARCH_ARM
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, rows_eq_1) {
- TEST_REQUIRES_ARM_NEON;
- FilterbankAccumulateMicrokernelTester()
- .rows(1)
- .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x1);
- }
-
- TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, rows_eq_2) {
- TEST_REQUIRES_ARM_NEON;
- FilterbankAccumulateMicrokernelTester()
- .rows(2)
- .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x1);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, rows_eq_1) {
- TEST_REQUIRES_ARM_NEON;
- FilterbankAccumulateMicrokernelTester()
- .rows(1)
- .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2);
- }
-
- TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, rows_eq_2) {
- TEST_REQUIRES_ARM_NEON;
- FilterbankAccumulateMicrokernelTester()
- .rows(2)
- .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, rows_eq_1) {
FilterbankAccumulateMicrokernelTester()
.rows(1)
diff --git a/test/u32-filterbank-accumulate.yaml b/test/u32-filterbank-accumulate.yaml
index 54798504c..a44f9d45c 100644
--- a/test/u32-filterbank-accumulate.yaml
+++ b/test/u32-filterbank-accumulate.yaml
@@ -3,14 +3,5 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
-
-# AArch32 assembly
-- name: xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1
-- name: xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2
-
-# ARM NEON
-- name: xnn_u32_filterbank_accumulate_ukernel__neon_x1
-- name: xnn_u32_filterbank_accumulate_ukernel__neon_x2
-
# Scalar
- name: xnn_u32_filterbank_accumulate_ukernel__scalar_x1