aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJenkins <bsgcomp@arm.com>2023-03-16 12:19:40 +0000
committerJenkins <bsgcomp@arm.com>2023-03-16 12:19:40 +0000
commitd8bf9b53752a4f573120cf51b31055de8b3c7d29 (patch)
tree996b6ca8f2ec33efea40514d4707ab9dd6358125
parentcfb1c3035cbfc31a2fe8491c7df13e911698e2b6 (diff)
downloadComputeLibrary-d8bf9b53752a4f573120cf51b31055de8b3c7d29.tar.gz
Compute Library v23.02.1
-rw-r--r--.bazelrc1
-rw-r--r--BUILD.bazel29
-rw-r--r--CMakeLists.txt22
-rw-r--r--CONTRIBUTING.md2
-rw-r--r--README.md30
-rw-r--r--SConscript4
-rw-r--r--cmake/Options.cmake13
-rw-r--r--docs/Doxyfile2
-rw-r--r--docs/user_guide/how_to_build_and_run_examples.dox2
-rw-r--r--docs/user_guide/release_version_and_change_log.dox4
-rw-r--r--src/cpu/operators/CpuGemmDirectConv2d.cpp7
-rw-r--r--tests/BUILD.bazel65
-rw-r--r--tests/validation/NEON/ConvolutionLayer.cpp10
13 files changed, 147 insertions, 44 deletions
diff --git a/.bazelrc b/.bazelrc
index 267e64898..8611db3d4 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -36,3 +36,4 @@ build --flag_alias=debug=//:debug
build --flag_alias=logging=//:logging
build --flag_alias=openmp=//:openmp
build --flag_alias=cppthreads=//:cppthreads
+build --flag_alias=enable_bf16_validation=//:enable_bf16_validation
diff --git a/BUILD.bazel b/BUILD.bazel
index d33cf6b8b..e3ad75abd 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -66,6 +66,12 @@ bool_flag(
visibility = ["//visibility:public"],
)
+bool_flag(
+ name = "enable_bf16_validation",
+ build_setting_default = False,
+ visibility = ["//visibility:public"],
+)
+
#---------------------------------------------------------------------
# Flag variables
config_setting(
@@ -103,6 +109,14 @@ config_setting(
},
)
+config_setting(
+ name = "bf16_validation_flag",
+ flag_values = {
+ ":enable_bf16_validation": "true",
+ },
+)
+
+
#---------------------------------------------------------------------
# Common defines used for all targets
cc_library(
@@ -112,7 +126,6 @@ cc_library(
"ARM_COMPUTE_CPU_ENABLED",
"ARM_COMPUTE_ENABLE_NEON",
"ARM_COMPUTE_ENABLE_FP16",
- "ARM_COMPUTE_ENABLE_BF16",
"ARM_COMPUTE_ENABLE_I8MM",
"ENABLE_FP16_KERNELS",
"ENABLE_FP32_KERNELS",
@@ -125,6 +138,9 @@ cc_library(
"DARM_COMPUTE_GRAPH_ENABLED",
"ARM_COMPUTE_ENABLE_SVEF32MM",
"ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS",
+ "ENABLE_SVE",
+ "ARM_COMPUTE_ENABLE_SVE",
+ "_GLIBCXX_USE_NANOSLEEP"
] + select({
"//:debug_flag": [
"ARM_COMPUTE_DEBUG_ENABLED",
@@ -227,9 +243,8 @@ cc_library(
"//conditions:default": [],
}),
local_defines = [
- "ENABLE_SVE",
- "ARM_COMPUTE_ENABLE_SVE",
"ARM_COMPUTE_ENABLE_SVE2",
+ "ARM_COMPUTE_ENABLE_BF16"
],
deps = [
"//:common_defines",
@@ -277,8 +292,7 @@ cc_library(
"//conditions:default": [],
}),
local_defines = [
- "ENABLE_SVE",
- "ARM_COMPUTE_ENABLE_SVE",
+ "ARM_COMPUTE_ENABLE_BF16",
],
deps = [
"//:common_defines",
@@ -333,6 +347,9 @@ cc_library(
"//:openmp_flag": ["-fopenmp"],
"//conditions:default": [],
}),
+ local_defines = [
+ "ARM_COMPUTE_ENABLE_BF16",
+ ],
visibility = ["//visibility:public"],
deps = [
"//:common_defines",
@@ -342,6 +359,8 @@ cc_library(
"//include",
"//support",
"//utils",
+ "//:arm_compute_sve",
+ "//:arm_compute_sve2"
],
alwayslink = True,
)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4ac0c722a..72992ed03 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -101,7 +101,12 @@ set(COMMON_CXX_FLAGS
-Wsign-promo
-Weffc++
-Wno-overlength-strings
- -Wno-ignored-attributes)
+ -Wno-ignored-attributes
+ -Wlogical-op
+ -Wnoexcept
+ -Wstrict-null-sentinel
+ -Wno-misleading-indentation
+ -O3)
# Disable note popups on compiler ABI changes
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
@@ -143,9 +148,7 @@ endif()
add_library(arm_compute_sve "")
target_compile_options(arm_compute_sve
PRIVATE "-march=armv8.2-a+sve+fp16+dotprod")
-target_compile_definitions(arm_compute_sve PRIVATE ENABLE_SVE)
-target_compile_definitions(arm_compute_sve PRIVATE ARM_COMPUTE_ENABLE_SVE)
-
+target_compile_definitions(arm_compute_sve PRIVATE ARM_COMPUTE_ENABLE_BF16)
target_include_directories(
arm_compute_sve
PUBLIC $<INSTALL_INTERFACE:include>
@@ -165,10 +168,8 @@ target_include_directories(
add_library(arm_compute_sve2 "")
target_compile_options(arm_compute_sve2
PRIVATE "-march=armv8.6-a+sve2+fp16+dotprod")
-target_compile_definitions(arm_compute_sve2 PRIVATE ENABLE_SVE)
-target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_SVE)
target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_SVE2)
-
+target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_BF16)
target_include_directories(
arm_compute_sve2
PUBLIC $<INSTALL_INTERFACE:include>
@@ -187,6 +188,7 @@ target_include_directories(
add_library(arm_compute_core "")
target_compile_options(arm_compute_core PRIVATE "-march=armv8.2-a+fp16")
+target_compile_definitions(arm_compute_core PRIVATE ARM_COMPUTE_ENABLE_BF16)
target_include_directories(
arm_compute_core
PUBLIC $<INSTALL_INTERFACE:include>
@@ -201,6 +203,8 @@ target_include_directories(
target_compile_options(arm_compute_core PUBLIC ${COMMON_CXX_FLAGS})
add_library(ArmCompute::Core ALIAS arm_compute_core)
+target_link_libraries(
+ arm_compute_core PUBLIC arm_compute_sve arm_compute_sve2)
# ---------------------------------------------------------------------
# Graph Library
@@ -255,7 +259,9 @@ if(BUILD_TESTING)
add_executable(arm_compute_validation "")
target_compile_options(arm_compute_validation PRIVATE "-march=armv8.2-a+fp16")
-
+ if(ENABLE_BF16_VALIDATION)
+ target_compile_definitions(arm_compute_validation PRIVATE ARM_COMPUTE_ENABLE_BF16)
+ endif()
add_subdirectory(tests/validation)
target_compile_options(arm_compute_validation PUBLIC ${COMMON_CXX_FLAGS})
set_target_properties(
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 171d101bd..b11239035 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-Please read https://arm-software.github.io/ComputeLibrary/latest/contribution_guidelines.xhtml
+Please read https://arm-software.github.io/ComputeLibrary/v23.02.1/contribution_guidelines.xhtml
Here on github we only publish a snapshot of the main development branch for each release, that's the reason why we don't accept pull requests.
diff --git a/README.md b/README.md
index fee9eafe8..3e6ccaaf0 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
> **⚠ Important**
-> From release 22.05: 'master' branch has been replaced with 'main' following our inclusive language update, more information [here](https://arm-software.github.io/ComputeLibrary/latest/contribution_guidelines.xhtml#S5_0_inc_lang).
+> From release 22.05: 'master' branch has been replaced with 'main' following our inclusive language update, more information [here](https://arm-software.github.io/ComputeLibrary/v23.02.1/contribution_guidelines.xhtml#S5_0_inc_lang).
> **⚠ Important**
> From release 22.08: armv7a with Android build will no longer be tested or maintained.
@@ -16,7 +16,7 @@
<img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br>
</div>
-# Compute Library ![](https://img.shields.io/badge/latest_release-23.02-green)
+# Compute Library ![](https://img.shields.io/badge/latest_release-23.02.1-green)
The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br>
@@ -44,7 +44,7 @@ Key Features:
<br>
## Documentation
-[![Documentation](https://img.shields.io/badge/documentation-23.02-green)](https://arm-software.github.io/ComputeLibrary/latest)
+[![Documentation](https://img.shields.io/badge/documentation-23.02.1-green)](https://arm-software.github.io/ComputeLibrary/v23.02.1)
> Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
@@ -57,24 +57,24 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C
| Platform | Operating System | Release archive (Download) |
| -------------- | ---------------- | -------------------------- |
-| Raspberry Pi 4 | Linux 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-armv7a-neon.tar.gz) |
-| Raspberry Pi 4 | Linux 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon.tar.gz) |
-| Odroid N2 | Linux 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon-cl.tar.gz) |
-| HiKey960 | Linux 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| Raspberry Pi 4 | Linux 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-armv7a-neon.tar.gz) |
+| Raspberry Pi 4 | Linux 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon.tar.gz) |
+| Odroid N2 | Linux 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| HiKey960 | Linux 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon-cl.tar.gz) |
<br>
| Architecture | Operating System | Release archive (Download) |
| ------------ | ---------------- | -------------------------- |
-| armv7 | Linux | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-armv7a-neon-cl.tar.gz) |
-| arm64-v8a | Android | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-android-arm64-v8a-neon-cl.tar.gz) |
-| arm64-v8a | Linux | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon-cl.tar.gz) |
-| arm64-v8.2-a | Android | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
-| arm64-v8.2-a | Linux | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
+| armv7 | Linux | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-armv7a-neon-cl.tar.gz) |
+| arm64-v8a | Android | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-android-arm64-v8a-neon-cl.tar.gz) |
+| arm64-v8a | Linux | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| arm64-v8.2-a | Android | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
+| arm64-v8.2-a | Linux | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
<br>
-Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v23.02-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v23.02)
+Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v23.02.1-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v23.02.1)
Pre-build binaries are generated with the following security / good coding practices related flags:
> -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
@@ -116,13 +116,13 @@ Pre-build binaries are generated with the following security / good coding pract
## Experimental builds
-**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://arm-software.github.io/ComputeLibrary/latest/how_to_build.xhtml) for more details.
+**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://arm-software.github.io/ComputeLibrary/v23.02.1/how_to_build.xhtml) for more details.
<br>
## How to contribute
-Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://arm-software.github.io/ComputeLibrary/latest/contribution_guidelines.xhtml).
+Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://arm-software.github.io/ComputeLibrary/v23.02.1/contribution_guidelines.xhtml).
### Developer Certificate of Origin (DCO)
Before the Compute Library accepts your contribution, you need to certify its origin and give us your permission. To manage this process we use the Developer Certificate of Origin (DCO) V1.1 (https://developercertificate.org/)
diff --git a/SConscript b/SConscript
index b2218b512..e5263cf34 100644
--- a/SConscript
+++ b/SConscript
@@ -31,10 +31,10 @@ import zlib
import json
import codecs
-VERSION = "v23.02"
+VERSION = "v23.02.1"
LIBRARY_VERSION_MAJOR = 30
LIBRARY_VERSION_MINOR = 0
-LIBRARY_VERSION_PATCH = 0
+LIBRARY_VERSION_PATCH = 1
SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)
Import('env')
diff --git a/cmake/Options.cmake b/cmake/Options.cmake
index 20cf0e42c..170dad9f6 100644
--- a/cmake/Options.cmake
+++ b/cmake/Options.cmake
@@ -50,7 +50,6 @@ endif()
# ---------------------------------------------------------------------
# Backends
-# TODO Add help string for each setting (Should user be able to )
option(ENABLE_NEON "Enable Arm® Neon™ support" ON)
option(ARM_COMPUTE_CPU_ENABLED "" ON)
option(ARM_COMPUTE_ENABLE_NEON "" ON)
@@ -65,11 +64,12 @@ option(ENABLE_INTEGER_KERNELS "" ON)
option(ENABLE_NHWC_KERNELS "" ON)
option(ENABLE_NCHW_KERNELS "" ON)
option(ARM_COMPUTE_GRAPH_ENABLED "" ON)
-option(ARM_COMPUTE_ENABLE_BF16 "" ON)
option(ARM_COMPUTE_ENABLE_SVEF32MM "" ON)
option(ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS "" ON)
+option(ENABLE_SVE "" ON)
+option(ARM_COMPUTE_ENABLE_SVE "" ON)
+option(ENABLE_BF16_VALIDATION "" OFF)
-# TODO Check if this is required
if(ENABLE_NEON)
add_definitions(-DENABLE_NEON)
endif()
@@ -121,3 +121,10 @@ endif()
if(ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS)
add_definitions(-DARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS)
endif()
+if(ENABLE_SVE)
+ add_definitions(-DENABLE_SVE)
+endif()
+if(ARM_COMPUTE_ENABLE_SVE)
+ add_definitions(-DARM_COMPUTE_ENABLE_SVE)
+endif()
+add_definitions(-D_GLIBCXX_USE_NANOSLEEP) \ No newline at end of file
diff --git a/docs/Doxyfile b/docs/Doxyfile
index 33e313fb3..7b025a1de 100644
--- a/docs/Doxyfile
+++ b/docs/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME = "Compute Library"
# could be handy for archiving the generated documentation or if some version
# control system is used.
-PROJECT_NUMBER = 23.02
+PROJECT_NUMBER = 23.02.1
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
diff --git a/docs/user_guide/how_to_build_and_run_examples.dox b/docs/user_guide/how_to_build_and_run_examples.dox
index 2f61f938a..ab6777d2b 100644
--- a/docs/user_guide/how_to_build_and_run_examples.dox
+++ b/docs/user_guide/how_to_build_and_run_examples.dox
@@ -510,7 +510,7 @@ To build libraries, examples and tests:
mkdir build
cd build
- cmake .. -DOPENMP=1 -DCPPTHREADS=1 -DWERROR=0 -DDEBUG=0 -DBUILD_EXAMPLES=1 -DBUILD_TESTING=1 -DCMAKE_INSTALL_LIBDIR=.
+ cmake .. -DOPENMP=1 -DWERROR=0 -DDEBUG=0 -DBUILD_EXAMPLES=1 -DBUILD_TESTING=1 -DCMAKE_INSTALL_LIBDIR=.
cmake --build . -j32
*/
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index 8bb2a3f30..11cb71628 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -40,6 +40,10 @@ If there is more than one release in a month then an extra sequential number is
@note Starting from release 22.05, 'master' branch is no longer being used, it has been replaced by 'main'. Please update your clone jobs accordingly.
@section S2_2_changelog Changelog
+v23.02.1 Public patch release
+ - Allow mismatching data layouts between the source tensor and weights for \link cpu::CpuGemmDirectConv2d CpuGemmDirectConv2d \endlink with fixed format kernels.
+ - Fixes for experimental CPU only Bazel and CMake builds.
+
v23.02 Public major release
- New features:
- Rework the experimental dynamic fusion interface by identifying auxiliary and intermediate tensors, and specifying an explicit output operator.
diff --git a/src/cpu/operators/CpuGemmDirectConv2d.cpp b/src/cpu/operators/CpuGemmDirectConv2d.cpp
index ee47a17d6..5ce285cb6 100644
--- a/src/cpu/operators/CpuGemmDirectConv2d.cpp
+++ b/src/cpu/operators/CpuGemmDirectConv2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -158,7 +158,10 @@ Status CpuGemmDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo *
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::BFLOAT16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::BFLOAT16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, weights);
+ if(!is_fixed_format(info.weights_info.weight_format()))
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, weights);
+ }
ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.num_groups > 1, "Grouping (num_groups != 1) is not supported on Neon");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_layout() != DataLayout::NHWC, "Data layout supported is NHWC");
const DataType data_type = src->data_type();
diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel
index 57ea3f657..812265299 100644
--- a/tests/BUILD.bazel
+++ b/tests/BUILD.bazel
@@ -35,8 +35,23 @@ cc_library(
"validation/**/*.h",
]),
copts = [
- "-march=armv8.2-a+fp16",
- ],
+ "-march=armv8.2-a+fp16",
+ ] + select({
+ "//:debug_flag": [
+ "-O0",
+ "-g",
+ "-gdwarf-2",
+ ],
+ "//conditions:default": ["-O3"],
+ }) +
+ select({
+ "//:openmp_flag": ["-fopenmp"],
+ "//conditions:default": [],
+ }) +
+ select({
+ "//:Werror_flag": ["-Werror"],
+ "//conditions:default": [],
+ }),
linkstatic = True,
deps = [
"//:arm_compute",
@@ -60,9 +75,23 @@ cc_binary(
"instruments/*.h",
]),
copts = [
- "-march=armv8.2-a+fp16",
- "-ffp-contract=off",
- ],
+ "-march=armv8.2-a+fp16",
+ ] + select({
+ "//:debug_flag": [
+ "-O0",
+ "-g",
+ "-gdwarf-2",
+ ],
+ "//conditions:default": ["-O3"],
+ }) +
+ select({
+ "//:openmp_flag": ["-fopenmp"],
+ "//conditions:default": [],
+ }) +
+ select({
+ "//:Werror_flag": ["-Werror"],
+ "//conditions:default": [],
+ }),
linkstatic = True,
deps = [
":validation_framework",
@@ -71,6 +100,13 @@ cc_binary(
"//:common_defines",
"//tests/framework",
],
+ local_defines = [] +
+ select({
+ "//:bf16_validation_flag": [
+ "ARM_COMPUTE_ENABLE_BF16",
+ ],
+ "//conditions:default": [],
+ })
)
#---------------------------------------------------------------------
@@ -82,7 +118,24 @@ cc_binary(
"benchmark/NEON/*.cpp",
"*.cpp",
]),
- copts = ["-march=armv8.2-a+fp16"],
+ copts = [
+ "-march=armv8.2-a+fp16",
+ ] + select({
+ "//:debug_flag": [
+ "-O0",
+ "-g",
+ "-gdwarf-2",
+ ],
+ "//conditions:default": ["-O3"],
+ }) +
+ select({
+ "//:openmp_flag": ["-fopenmp"],
+ "//conditions:default": [],
+ }) +
+ select({
+ "//:Werror_flag": ["-Werror"],
+ "//conditions:default": [],
+ }),
linkstatic = True,
deps = [
":arm_compute_validation",
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 08b6a0237..ea161a11a 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -530,6 +530,7 @@ TEST_SUITE_END() // FP16
TEST_SUITE_END() // WinogradLayer
#ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
+TEST_SUITE(FIXED_FORMAT_KERNELS)
TEST_SUITE(VariableWeightUtils)
// UC2_1_* tests: the user requests a specific fixed format, but there is no kernel that supports it.
@@ -588,6 +589,8 @@ FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath
ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS);
}
+#if defined(ARM_COMPUTE_ENABLE_BF16)
+
FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
combine(framework::dataset::make("DataType", { DataType::F32 }),
framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 })))
@@ -604,6 +607,8 @@ FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureF
ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS);
}
+#endif // ARM_COMPUTE_ENABLE_BF16
+
// UC3_1_* tests: the user queries for ANY fixed format, but there is
// no kernel that support the use case specified by the user (for
// example, there is no fixed format kernel for the datatype of the
@@ -664,6 +669,8 @@ FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath
ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
}
+#if defined(ARM_COMPUTE_ENABLE_BF16)
+
FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
combine(framework::dataset::make("DataType", { DataType::F32 }),
framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
@@ -684,6 +691,8 @@ FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureF
ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS);
}
+#endif // ARM_COMPUTE_ENABLE_BF16
+
namespace
{
using TestCaseType = std::tuple<TensorShape, TensorShape, arm_compute::WeightFormat>;
@@ -839,6 +848,7 @@ FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloatFastMath, NEGEMMVarWidthFastMath<float
#endif // ARM_COMPUTE_ENABLE_BF16
TEST_SUITE_END() // ExperimentalNEAPIVariableWeightWithFixtures
+TEST_SUITE_END() // FIXED_FORMAT_KERNELS
#endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS