diff options
author | Jenkins <bsgcomp@arm.com> | 2023-03-16 12:19:40 +0000 |
---|---|---|
committer | Jenkins <bsgcomp@arm.com> | 2023-03-16 12:19:40 +0000 |
commit | d8bf9b53752a4f573120cf51b31055de8b3c7d29 (patch) | |
tree | 996b6ca8f2ec33efea40514d4707ab9dd6358125 | |
parent | cfb1c3035cbfc31a2fe8491c7df13e911698e2b6 (diff) | |
download | ComputeLibrary-d8bf9b53752a4f573120cf51b31055de8b3c7d29.tar.gz |
Compute Library v23.02.1
-rw-r--r-- | .bazelrc | 1 | ||||
-rw-r--r-- | BUILD.bazel | 29 | ||||
-rw-r--r-- | CMakeLists.txt | 22 | ||||
-rw-r--r-- | CONTRIBUTING.md | 2 | ||||
-rw-r--r-- | README.md | 30 | ||||
-rw-r--r-- | SConscript | 4 | ||||
-rw-r--r-- | cmake/Options.cmake | 13 | ||||
-rw-r--r-- | docs/Doxyfile | 2 | ||||
-rw-r--r-- | docs/user_guide/how_to_build_and_run_examples.dox | 2 | ||||
-rw-r--r-- | docs/user_guide/release_version_and_change_log.dox | 4 | ||||
-rw-r--r-- | src/cpu/operators/CpuGemmDirectConv2d.cpp | 7 | ||||
-rw-r--r-- | tests/BUILD.bazel | 65 | ||||
-rw-r--r-- | tests/validation/NEON/ConvolutionLayer.cpp | 10 |
13 files changed, 147 insertions, 44 deletions
@@ -36,3 +36,4 @@ build --flag_alias=debug=//:debug build --flag_alias=logging=//:logging build --flag_alias=openmp=//:openmp build --flag_alias=cppthreads=//:cppthreads +build --flag_alias=enable_bf16_validation=//:enable_bf16_validation diff --git a/BUILD.bazel b/BUILD.bazel index d33cf6b8b..e3ad75abd 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -66,6 +66,12 @@ bool_flag( visibility = ["//visibility:public"], ) +bool_flag( + name = "enable_bf16_validation", + build_setting_default = False, + visibility = ["//visibility:public"], +) + #--------------------------------------------------------------------- # Flag variables config_setting( @@ -103,6 +109,14 @@ config_setting( }, ) +config_setting( + name = "bf16_validation_flag", + flag_values = { + ":enable_bf16_validation": "true", + }, +) + + #--------------------------------------------------------------------- # Common defines used for all targets cc_library( @@ -112,7 +126,6 @@ cc_library( "ARM_COMPUTE_CPU_ENABLED", "ARM_COMPUTE_ENABLE_NEON", "ARM_COMPUTE_ENABLE_FP16", - "ARM_COMPUTE_ENABLE_BF16", "ARM_COMPUTE_ENABLE_I8MM", "ENABLE_FP16_KERNELS", "ENABLE_FP32_KERNELS", @@ -125,6 +138,9 @@ cc_library( "DARM_COMPUTE_GRAPH_ENABLED", "ARM_COMPUTE_ENABLE_SVEF32MM", "ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS", + "ENABLE_SVE", + "ARM_COMPUTE_ENABLE_SVE", + "_GLIBCXX_USE_NANOSLEEP" ] + select({ "//:debug_flag": [ "ARM_COMPUTE_DEBUG_ENABLED", @@ -227,9 +243,8 @@ cc_library( "//conditions:default": [], }), local_defines = [ - "ENABLE_SVE", - "ARM_COMPUTE_ENABLE_SVE", "ARM_COMPUTE_ENABLE_SVE2", + "ARM_COMPUTE_ENABLE_BF16" ], deps = [ "//:common_defines", @@ -277,8 +292,7 @@ cc_library( "//conditions:default": [], }), local_defines = [ - "ENABLE_SVE", - "ARM_COMPUTE_ENABLE_SVE", + "ARM_COMPUTE_ENABLE_BF16", ], deps = [ "//:common_defines", @@ -333,6 +347,9 @@ cc_library( "//:openmp_flag": ["-fopenmp"], "//conditions:default": [], }), + local_defines = [ + "ARM_COMPUTE_ENABLE_BF16", + ], visibility = ["//visibility:public"], deps = [ "//:common_defines", @@ -342,6 +359,8 @@ cc_library( "//include", "//support", "//utils", + "//:arm_compute_sve", + "//:arm_compute_sve2" ], alwayslink = True, ) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ac0c722a..72992ed03 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,7 +101,12 @@ set(COMMON_CXX_FLAGS -Wsign-promo -Weffc++ -Wno-overlength-strings - -Wno-ignored-attributes) + -Wno-ignored-attributes + -Wlogical-op + -Wnoexcept + -Wstrict-null-sentinel + -Wno-misleading-indentation + -O3) # Disable note popups on compiler ABI changes if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") @@ -143,9 +148,7 @@ endif() add_library(arm_compute_sve "") target_compile_options(arm_compute_sve PRIVATE "-march=armv8.2-a+sve+fp16+dotprod") -target_compile_definitions(arm_compute_sve PRIVATE ENABLE_SVE) -target_compile_definitions(arm_compute_sve PRIVATE ARM_COMPUTE_ENABLE_SVE) - +target_compile_definitions(arm_compute_sve PRIVATE ARM_COMPUTE_ENABLE_BF16) target_include_directories( arm_compute_sve PUBLIC $<INSTALL_INTERFACE:include> @@ -165,10 +168,8 @@ target_include_directories( add_library(arm_compute_sve2 "") target_compile_options(arm_compute_sve2 PRIVATE "-march=armv8.6-a+sve2+fp16+dotprod") -target_compile_definitions(arm_compute_sve2 PRIVATE ENABLE_SVE) -target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_SVE) target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_SVE2) - +target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_BF16) target_include_directories( arm_compute_sve2 PUBLIC $<INSTALL_INTERFACE:include> @@ -187,6 +188,7 @@ target_include_directories( add_library(arm_compute_core "") target_compile_options(arm_compute_core PRIVATE "-march=armv8.2-a+fp16") +target_compile_definitions(arm_compute_core PRIVATE ARM_COMPUTE_ENABLE_BF16) target_include_directories( arm_compute_core PUBLIC $<INSTALL_INTERFACE:include> @@ -201,6 +203,8 @@ target_include_directories( target_compile_options(arm_compute_core PUBLIC ${COMMON_CXX_FLAGS}) add_library(ArmCompute::Core ALIAS arm_compute_core) +target_link_libraries( + arm_compute_core PUBLIC arm_compute_sve arm_compute_sve2) # --------------------------------------------------------------------- # Graph Library @@ -255,7 +259,9 @@ if(BUILD_TESTING) add_executable(arm_compute_validation "") target_compile_options(arm_compute_validation PRIVATE "-march=armv8.2-a+fp16") - + if(ENABLE_BF16_VALIDATION) + target_compile_definitions(arm_compute_validation PRIVATE ARM_COMPUTE_ENABLE_BF16) + endif() add_subdirectory(tests/validation) target_compile_options(arm_compute_validation PUBLIC ${COMMON_CXX_FLAGS}) set_target_properties( diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 171d101bd..b11239035 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,4 @@ -Please read https://arm-software.github.io/ComputeLibrary/latest/contribution_guidelines.xhtml +Please read https://arm-software.github.io/ComputeLibrary/v23.02.1/contribution_guidelines.xhtml Here on github we only publish a snapshot of the main development branch for each release, that's the reason why we don't accept pull requests. @@ -1,6 +1,6 @@ > **⚠ Important** -> From release 22.05: 'master' branch has been replaced with 'main' following our inclusive language update, more information [here](https://arm-software.github.io/ComputeLibrary/latest/contribution_guidelines.xhtml#S5_0_inc_lang). +> From release 22.05: 'master' branch has been replaced with 'main' following our inclusive language update, more information [here](https://arm-software.github.io/ComputeLibrary/v23.02.1/contribution_guidelines.xhtml#S5_0_inc_lang). > **⚠ Important** > From release 22.08: armv7a with Android build will no longer be tested or maintained. @@ -16,7 +16,7 @@ <img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br> </div> -# Compute Library ![](https://img.shields.io/badge/latest_release-23.02-green) +# Compute Library ![](https://img.shields.io/badge/latest_release-23.02.1-green) The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br> @@ -44,7 +44,7 @@ Key Features: <br> ## Documentation -[![Documentation](https://img.shields.io/badge/documentation-23.02-green)](https://arm-software.github.io/ComputeLibrary/latest) +[![Documentation](https://img.shields.io/badge/documentation-23.02.1-green)](https://arm-software.github.io/ComputeLibrary/v23.02.1) > Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc. @@ -57,24 +57,24 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C | Platform | Operating System | Release archive (Download) | | -------------- | ---------------- | -------------------------- | -| Raspberry Pi 4 | Linux 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-armv7a-neon.tar.gz) | -| Raspberry Pi 4 | Linux 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon.tar.gz) | -| Odroid N2 | Linux 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon-cl.tar.gz) | -| HiKey960 | Linux 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon-cl.tar.gz) | +| Raspberry Pi 4 | Linux 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-armv7a-neon.tar.gz) | +| Raspberry Pi 4 | Linux 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon.tar.gz) | +| Odroid N2 | Linux 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon-cl.tar.gz) | +| HiKey960 | Linux 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon-cl.tar.gz) | <br> | Architecture | Operating System | Release archive (Download) | | ------------ | ---------------- | -------------------------- | -| armv7 | Linux | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-armv7a-neon-cl.tar.gz) | -| arm64-v8a | Android | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-android-arm64-v8a-neon-cl.tar.gz) | -| arm64-v8a | Linux | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8a-neon-cl.tar.gz) | -| arm64-v8.2-a | Android | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-android-arm64-v8.2-a-neon-cl.tar.gz) | -| arm64-v8.2-a | Linux | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02/arm_compute-v23.02-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) | +| armv7 | Linux | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-armv7a-neon-cl.tar.gz) | +| arm64-v8a | Android | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-android-arm64-v8a-neon-cl.tar.gz) | +| arm64-v8a | Linux | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8a-neon-cl.tar.gz) | +| arm64-v8.2-a | Android | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-android-arm64-v8.2-a-neon-cl.tar.gz) | +| arm64-v8.2-a | Linux | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.02.1/arm_compute-v23.02.1-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) | <br> -Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v23.02-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v23.02) +Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v23.02.1-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v23.02.1) Pre-build binaries are generated with the following security / good coding practices related flags: > -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong @@ -116,13 +116,13 @@ Pre-build binaries are generated with the following security / good coding pract ## Experimental builds -**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://arm-software.github.io/ComputeLibrary/latest/how_to_build.xhtml) for more details. +**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://arm-software.github.io/ComputeLibrary/v23.02.1/how_to_build.xhtml) for more details. <br> ## How to contribute -Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://arm-software.github.io/ComputeLibrary/latest/contribution_guidelines.xhtml). +Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://arm-software.github.io/ComputeLibrary/v23.02.1/contribution_guidelines.xhtml). ### Developer Certificate of Origin (DCO) Before the Compute Library accepts your contribution, you need to certify its origin and give us your permission. To manage this process we use the Developer Certificate of Origin (DCO) V1.1 (https://developercertificate.org/) diff --git a/SConscript b/SConscript index b2218b512..e5263cf34 100644 --- a/SConscript +++ b/SConscript @@ -31,10 +31,10 @@ import zlib import json import codecs -VERSION = "v23.02" +VERSION = "v23.02.1" LIBRARY_VERSION_MAJOR = 30 LIBRARY_VERSION_MINOR = 0 -LIBRARY_VERSION_PATCH = 0 +LIBRARY_VERSION_PATCH = 1 SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH) Import('env') diff --git a/cmake/Options.cmake b/cmake/Options.cmake index 20cf0e42c..170dad9f6 100644 --- a/cmake/Options.cmake +++ b/cmake/Options.cmake @@ -50,7 +50,6 @@ endif() # --------------------------------------------------------------------- # Backends -# TODO Add help string for each setting (Should user be able to ) option(ENABLE_NEON "Enable Arm® Neon™ support" ON) option(ARM_COMPUTE_CPU_ENABLED "" ON) option(ARM_COMPUTE_ENABLE_NEON "" ON) @@ -65,11 +64,12 @@ option(ENABLE_INTEGER_KERNELS "" ON) option(ENABLE_NHWC_KERNELS "" ON) option(ENABLE_NCHW_KERNELS "" ON) option(ARM_COMPUTE_GRAPH_ENABLED "" ON) -option(ARM_COMPUTE_ENABLE_BF16 "" ON) option(ARM_COMPUTE_ENABLE_SVEF32MM "" ON) option(ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS "" ON) +option(ENABLE_SVE "" ON) +option(ARM_COMPUTE_ENABLE_SVE "" ON) +option(ENABLE_BF16_VALIDATION "" OFF) -# TODO Check if this is required if(ENABLE_NEON) add_definitions(-DENABLE_NEON) endif() @@ -121,3 +121,10 @@ endif() if(ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS) add_definitions(-DARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS) endif() +if(ENABLE_SVE) + add_definitions(-DENABLE_SVE) +endif() +if(ARM_COMPUTE_ENABLE_SVE) + add_definitions(-DARM_COMPUTE_ENABLE_SVE) +endif() +add_definitions(-D_GLIBCXX_USE_NANOSLEEP)
\ No newline at end of file diff --git a/docs/Doxyfile b/docs/Doxyfile index 33e313fb3..7b025a1de 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "Compute Library" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 23.02 +PROJECT_NUMBER = 23.02.1 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/docs/user_guide/how_to_build_and_run_examples.dox b/docs/user_guide/how_to_build_and_run_examples.dox index 2f61f938a..ab6777d2b 100644 --- a/docs/user_guide/how_to_build_and_run_examples.dox +++ b/docs/user_guide/how_to_build_and_run_examples.dox @@ -510,7 +510,7 @@ To build libraries, examples and tests: mkdir build cd build - cmake .. -DOPENMP=1 -DCPPTHREADS=1 -DWERROR=0 -DDEBUG=0 -DBUILD_EXAMPLES=1 -DBUILD_TESTING=1 -DCMAKE_INSTALL_LIBDIR=. + cmake .. -DOPENMP=1 -DWERROR=0 -DDEBUG=0 -DBUILD_EXAMPLES=1 -DBUILD_TESTING=1 -DCMAKE_INSTALL_LIBDIR=. cmake --build . -j32 */ diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox index 8bb2a3f30..11cb71628 100644 --- a/docs/user_guide/release_version_and_change_log.dox +++ b/docs/user_guide/release_version_and_change_log.dox @@ -40,6 +40,10 @@ If there is more than one release in a month then an extra sequential number is @note Starting from release 22.05, 'master' branch is no longer being used, it has been replaced by 'main'. Please update your clone jobs accordingly. @section S2_2_changelog Changelog +v23.02.1 Public patch release + - Allow mismatching data layouts between the source tensor and weights for \link cpu::CpuGemmDirectConv2d CpuGemmDirectConv2d \endlink with fixed format kernels. + - Fixes for experimental CPU only Bazel and CMake builds. + v23.02 Public major release - New features: - Rework the experimental dynamic fusion interface by identifying auxiliary and intermediate tensors, and specifying an explicit output operator. diff --git a/src/cpu/operators/CpuGemmDirectConv2d.cpp b/src/cpu/operators/CpuGemmDirectConv2d.cpp index ee47a17d6..5ce285cb6 100644 --- a/src/cpu/operators/CpuGemmDirectConv2d.cpp +++ b/src/cpu/operators/CpuGemmDirectConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -158,7 +158,10 @@ Status CpuGemmDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo * ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::BFLOAT16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::BFLOAT16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, weights); + if(!is_fixed_format(info.weights_info.weight_format())) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, weights); + } ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.num_groups > 1, "Grouping (num_groups != 1) is not supported on Neon"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_layout() != DataLayout::NHWC, "Data layout supported is NHWC"); const DataType data_type = src->data_type(); diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel index 57ea3f657..812265299 100644 --- a/tests/BUILD.bazel +++ b/tests/BUILD.bazel @@ -35,8 +35,23 @@ cc_library( "validation/**/*.h", ]), copts = [ - "-march=armv8.2-a+fp16", - ], + "-march=armv8.2-a+fp16", + ] + select({ + "//:debug_flag": [ + "-O0", + "-g", + "-gdwarf-2", + ], + "//conditions:default": ["-O3"], + }) + + select({ + "//:openmp_flag": ["-fopenmp"], + "//conditions:default": [], + }) + + select({ + "//:Werror_flag": ["-Werror"], + "//conditions:default": [], + }), linkstatic = True, deps = [ "//:arm_compute", @@ -60,9 +75,23 @@ cc_binary( "instruments/*.h", ]), copts = [ - "-march=armv8.2-a+fp16", - "-ffp-contract=off", - ], + "-march=armv8.2-a+fp16", + ] + select({ + "//:debug_flag": [ + "-O0", + "-g", + "-gdwarf-2", + ], + "//conditions:default": ["-O3"], + }) + + select({ + "//:openmp_flag": ["-fopenmp"], + "//conditions:default": [], + }) + + select({ + "//:Werror_flag": ["-Werror"], + "//conditions:default": [], + }), linkstatic = True, deps = [ ":validation_framework", @@ -71,6 +100,13 @@ cc_binary( "//:common_defines", "//tests/framework", ], + local_defines = [] + + select({ + "//:bf16_validation_flag": [ + "ARM_COMPUTE_ENABLE_BF16", + ], + "//conditions:default": [], + }) ) #--------------------------------------------------------------------- @@ -82,7 +118,24 @@ cc_binary( "benchmark/NEON/*.cpp", "*.cpp", ]), - copts = ["-march=armv8.2-a+fp16"], + copts = [ + "-march=armv8.2-a+fp16", + ] + select({ + "//:debug_flag": [ + "-O0", + "-g", + "-gdwarf-2", + ], + "//conditions:default": ["-O3"], + }) + + select({ + "//:openmp_flag": ["-fopenmp"], + "//conditions:default": [], + }) + + select({ + "//:Werror_flag": ["-Werror"], + "//conditions:default": [], + }), linkstatic = True, deps = [ ":arm_compute_validation", diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 08b6a0237..ea161a11a 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -530,6 +530,7 @@ TEST_SUITE_END() // FP16 TEST_SUITE_END() // WinogradLayer #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS +TEST_SUITE(FIXED_FORMAT_KERNELS) TEST_SUITE(VariableWeightUtils) // UC2_1_* tests: the user requests a specific fixed format, but there is no kernel that supports it. @@ -588,6 +589,8 @@ FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS); } +#if defined(ARM_COMPUTE_ENABLE_BF16) + FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 }))) @@ -604,6 +607,8 @@ FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureF ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); } +#endif // ARM_COMPUTE_ENABLE_BF16 + // UC3_1_* tests: the user queries for ANY fixed format, but there is // no kernel that support the use case specified by the user (for // example, there is no fixed format kernel for the datatype of the @@ -664,6 +669,8 @@ FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); } +#if defined(ARM_COMPUTE_ENABLE_BF16) + FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) @@ -684,6 +691,8 @@ FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureF ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); } +#endif // ARM_COMPUTE_ENABLE_BF16 + namespace { using TestCaseType = std::tuple<TensorShape, TensorShape, arm_compute::WeightFormat>; @@ -839,6 +848,7 @@ FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloatFastMath, NEGEMMVarWidthFastMath<float #endif // ARM_COMPUTE_ENABLE_BF16 TEST_SUITE_END() // ExperimentalNEAPIVariableWeightWithFixtures +TEST_SUITE_END() // FIXED_FORMAT_KERNELS #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS |