diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-05-10 07:12:53 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-05-10 07:12:53 +0000 |
commit | 99960e39c16f57c8ea9f71edb700aec8248e9aa7 (patch) | |
tree | 5e6961ce35f827aaf3c595903b56c3c05219c45c | |
parent | 96659df0a62fadb60eb3a5b9708626b82cb2e11b (diff) | |
parent | 0a3207f08f581da4c6f6520a7d69e7fda17eab2a (diff) | |
download | rs-android13-mainline-adbd-release.tar.gz |
Snap for 8564071 from 0a3207f08f581da4c6f6520a7d69e7fda17eab2a to mainline-adbd-releaseaml_adb_331610000aml_adb_331314020aml_adb_331113120aml_adb_331011050aml_adb_331011040android13-mainline-adbd-release
Change-Id: If72fd9a23a5977e07f51f9e9bc3b0c9aad2f2863
332 files changed, 24924 insertions, 20036 deletions
@@ -370,4 +370,5 @@ subdirs = [ "cpu_ref", "script_api", "support", + "toolkit", ] diff --git a/build_rs.py b/build_rs.py index 04161092..c2faa5be 100755 --- a/build_rs.py +++ b/build_rs.py @@ -56,14 +56,17 @@ def install_directory(src, dst): def build(out_dir): - products = ( - 'aosp_arm', - 'aosp_arm64', - # 'aosp_mips', - # 'aosp_mips64', - 'aosp_x86', - 'aosp_x86_64', - ) + if sys.platform == 'darwin': + products = ('aosp_arm',) + else: + products = ( + 'aosp_arm', + 'aosp_arm64', + # 'aosp_mips', + # 'aosp_mips64', + 'aosp_x86', + 'aosp_x86_64', + ) for product in products: build_product(out_dir, product) @@ -78,13 +81,19 @@ def build_product(out_dir, product): env['TARGET_BUILD_VARIANT'] = 'userdebug' env['TARGET_PRODUCT'] = product - targets = [ - # PHONY target specified in frameworks/rs/Android.mk. - 'rs-prebuilts-full', - # We have to explicitly specify the jar for JACK to build. - android_path('out/target/common/obj/JAVA_LIBRARIES/' + - 'android-support-v8-renderscript_intermediates/classes.jar') - ] + if sys.platform == 'darwin': + targets = [ + 'llvm-rs-cc', + 'bcc_compat', + ] + else: + targets = [ + # PHONY target specified in frameworks/rs/Android.mk. + 'rs-prebuilts-full', + # We have to explicitly specify the jar for JACK to build. + android_path('out/target/common/obj/JAVA_LIBRARIES/' + + 'android-support-v8-renderscript_intermediates/classes.jar') + ] subprocess.check_call( ['build/soong/soong_ui.bash', '--make-mode'] + targets, cwd=android_path(), env=env) @@ -113,7 +122,8 @@ def package_toolchain(build_dir, build_name, host, dist_dir): def install_toolchain(build_dir, install_dir, host): install_built_host_files(build_dir, install_dir, host) install_clang_headers(build_dir, install_dir, host) - install_built_device_files(build_dir, install_dir, host) + if not host.startswith('darwin'): + install_built_device_files(build_dir, install_dir, host) install_license_files(install_dir) # We need to package libwinpthread-1.dll for Windows. This is explicitly # linked whenever pthreads is used, and the build system doesn't allow diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp index ce300927..d1024884 100644 --- a/cpu_ref/rsCpuIntrinsicBlend.cpp +++ b/cpu_ref/rsCpuIntrinsicBlend.cpp @@ -109,6 +109,15 @@ extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8); namespace android { namespace renderscript { +// Convert vector to uchar4, clipping each value to 255. +template <typename TI> +static inline uchar4 convertClipped(TI amount) { + return uchar4 { static_cast<uchar>(amount.x > 255 ? 255 : amount.x), + static_cast<uchar>(amount.y > 255 ? 255 : amount.y), + static_cast<uchar>(amount.z > 255 ? 255 : amount.z), + static_cast<uchar>(amount.w > 255 ? 255 : amount.w)}; +} + void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart, uint32_t xend, uint32_t outstep) { @@ -120,8 +129,11 @@ void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info, #if defined(ARCH_ARM_USE_INTRINSICS) if (gArchUseSIMD) { - if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0) + if (rsdIntrinsicBlend_K(out, in, info->slot, 0, x2 - x1) >= 0) { return; + } else { + ALOGW("Intrinsic Blend failed to use SIMD for %d", info->slot); + } } #endif switch (info->slot) { @@ -151,10 +163,10 @@ void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info, } #endif for (;x1 < x2; x1++, out++, in++) { - short4 in_s = convert_short4(*in); - short4 out_s = convert_short4(*out); - in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8); - *out = convert_uchar4(in_s); + ushort4 in_s = convert_ushort4(*in); + ushort4 out_s = convert_ushort4(*out); + in_s = in_s + ((out_s * (ushort4)(255 - in_s.w)) >> (ushort4)8); + *out = convertClipped(in_s); } break; case BLEND_DST_OVER: @@ -170,10 +182,10 @@ void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info, } #endif for (;x1 < x2; x1++, out++, in++) { - short4 in_s = convert_short4(*in); - short4 out_s = convert_short4(*out); - in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8); - *out = convert_uchar4(in_s); + ushort4 in_s = convert_ushort4(*in); + ushort4 out_s = convert_ushort4(*out); + in_s = out_s + ((in_s * (ushort4)(255 - out_s.w)) >> (ushort4)8); + *out = convertClipped(in_s); } break; case BLEND_SRC_IN: @@ -189,8 +201,8 @@ void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info, } #endif for (;x1 < x2; x1++, out++, in++) { - short4 in_s = convert_short4(*in); - in_s = (in_s * out->w) >> (short4)8; + ushort4 in_s = convert_ushort4(*in); + in_s = (in_s * out->w) >> (ushort4)8; *out = convert_uchar4(in_s); } break; @@ -261,11 +273,14 @@ void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info, } #endif for (;x1 < x2; x1++, out++, in++) { - short4 in_s = convert_short4(*in); - short4 out_s = convert_short4(*out); + // The max value the operation could produce before the shift + // is 255 * 255 + 255 * (255 - 0) = 130050, or 0x1FC02. + // That value does not fit in a ushort, so we use uint. + uint4 in_s = convert_uint4(*in); + uint4 out_s = convert_uint4(*out); out_s.xyz = ((in_s.xyz * out_s.w) + - (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8; - *out = convert_uchar4(out_s); + (out_s.xyz * ((uint3)255 - (uint3)in_s.w))) >> (uint3)8; + *out = convertClipped(out_s); } break; case BLEND_DST_ATOP: @@ -281,12 +296,12 @@ void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info, } #endif for (;x1 < x2; x1++, out++, in++) { - short4 in_s = convert_short4(*in); - short4 out_s = convert_short4(*out); + uint4 in_s = convert_uint4(*in); + uint4 out_s = convert_uint4(*out); out_s.xyz = ((out_s.xyz * in_s.w) + - (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8; + (in_s.xyz * ((uint3)255 - (uint3)out_s.w))) >> (uint3)8; out_s.w = in_s.w; - *out = convert_uchar4(out_s); + *out = convertClipped(out_s); } break; case BLEND_XOR: diff --git a/cpu_ref/rsCpuIntrinsicResize.cpp b/cpu_ref/rsCpuIntrinsicResize.cpp index 8a3dd1ae..8afa2ede 100644 --- a/cpu_ref/rsCpuIntrinsicResize.cpp +++ b/cpu_ref/rsCpuIntrinsicResize.cpp @@ -353,7 +353,7 @@ void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info, const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2); const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3); - uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart; + uchar4 *out = ((uchar4 *)info->outPtr[0]); uint32_t x1 = xstart; uint32_t x2 = xend; @@ -430,7 +430,7 @@ void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info, const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2); const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3); - uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart; + uchar2 *out = ((uchar2 *)info->outPtr[0]); uint32_t x1 = xstart; uint32_t x2 = xend; @@ -508,7 +508,7 @@ void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info, const uchar *yp2 = pin + stride * ys2; const uchar *yp3 = pin + stride * ys3; - uchar *out = ((uchar *)info->outPtr[0]) + xstart; + uchar *out = ((uchar *)info->outPtr[0]); uint32_t x1 = xstart; uint32_t x2 = xend; @@ -586,7 +586,7 @@ void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info, const float4 *yp2 = (const float4 *)(pin + stride * ys2); const float4 *yp3 = (const float4 *)(pin + stride * ys3); - float4 *out = ((float4 *)info->outPtr[0]) + xstart; + float4 *out = ((float4 *)info->outPtr[0]); uint32_t x1 = xstart; uint32_t x2 = xend; @@ -638,7 +638,7 @@ void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info, const float2 *yp2 = (const float2 *)(pin + stride * ys2); const float2 *yp3 = (const float2 *)(pin + stride * ys3); - float2 *out = ((float2 *)info->outPtr[0]) + xstart; + float2 *out = ((float2 *)info->outPtr[0]); uint32_t x1 = xstart; uint32_t x2 = xend; @@ -690,7 +690,7 @@ void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info, const float *yp2 = (const float *)(pin + stride * ys2); const float *yp3 = (const float *)(pin + stride * ys3); - float *out = ((float *)info->outPtr[0]) + xstart; + float *out = ((float *)info->outPtr[0]); uint32_t x1 = xstart; uint32_t x2 = xend; diff --git a/cpu_ref/rsCpuIntrinsics_advsimd_Blend.S b/cpu_ref/rsCpuIntrinsics_advsimd_Blend.S index b4a2b7c4..1473336f 100644 --- a/cpu_ref/rsCpuIntrinsics_advsimd_Blend.S +++ b/cpu_ref/rsCpuIntrinsics_advsimd_Blend.S @@ -593,7 +593,7 @@ ENTRY(rsdIntrinsicBlend_K) adrp x5, blendtable add x5, x5, :lo12:blendtable - cmp w2, tablesize >> 1 + cmp w2, tablesize bhs 1f ldrsh x6, [x5, w2, uxtw #1] add x0, x0, w3, uxtw #2 @@ -615,4 +615,3 @@ blendtable: #define BLEND_X(d, n) .rept d-off ; .hword 0 ; .endr ; .hword blend_line_##n - 2b ; .set off, d+1 ; BLEND_LIST(BLEND_X) #undef BLEND_X - diff --git a/script_api/Android.bp b/script_api/Android.bp index 0f026bf0..8046bd0b 100644 --- a/script_api/Android.bp +++ b/script_api/Android.bp @@ -26,3 +26,9 @@ cc_binary_host { never: true, }, } + +filegroup { + name: "rs_script_api", + srcs: ["include/*.rsh"], + path: "include", +} diff --git a/tests/cpp_api/Android.bp b/tests/cpp_api/Android.bp new file mode 100644 index 00000000..9c3632ac --- /dev/null +++ b/tests/cpp_api/Android.bp @@ -0,0 +1,31 @@ +// Copyright (C) 2021 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +cc_defaults { + name: "frameworks_rs_tests_cpp-api-defaults", + shared_libs: ["liblog"], + cflags: [ + "-Werror", + "-Wall", + "-Wextra", + ], + header_libs: ["rs-headers"], +} + diff --git a/tests/cpp_api/Android.mk b/tests/cpp_api/Android.mk deleted file mode 100644 index 6145a3df..00000000 --- a/tests/cpp_api/Android.mk +++ /dev/null @@ -1,3 +0,0 @@ -LOCAL_PATH:=$(call my-dir) - -include $(call all-makefiles-under,$(LOCAL_PATH)) diff --git a/tests/cpp_api/common.mk b/tests/cpp_api/common.mk deleted file mode 100644 index 9bad7901..00000000 --- a/tests/cpp_api/common.mk +++ /dev/null @@ -1,7 +0,0 @@ -LOCAL_MODULE_TAGS := tests - -LOCAL_CFLAGS += -Werror -Wall -Wextra -LOCAL_LDFLAGS += -llog - -intermediates := $(call intermediates-dir-for,STATIC_LIBRARIES,libRS,TARGET,) -LOCAL_C_INCLUDES += $(intermediates) diff --git a/tests/cpp_api/cpp-globalguard/Android.bp b/tests/cpp_api/cpp-globalguard/Android.bp new file mode 100644 index 00000000..c0ac0f97 --- /dev/null +++ b/tests/cpp_api/cpp-globalguard/Android.bp @@ -0,0 +1,34 @@ +// +// Copyright (C) 2021 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +cc_binary { + name: "rstest-cpp-globalguard", + defaults: ["frameworks_rs_tests_cpp-api-defaults"], + sdk_version: "21", + stl: "c++_static", + srcs: [ + "multiply.rscript", + "compute.cpp", + ], + static_libs: ["libRScpp_static"], +} diff --git a/tests/cpp_api/cpp-globalguard/Android.mk b/tests/cpp_api/cpp-globalguard/Android.mk deleted file mode 100644 index 88a10c9e..00000000 --- a/tests/cpp_api/cpp-globalguard/Android.mk +++ /dev/null @@ -1,21 +0,0 @@ -LOCAL_PATH:= $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE:= rstest-cpp-globalguard -LOCAL_LICENSE_KINDS:= SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS:= notice - -LOCAL_SDK_VERSION := 21 -LOCAL_NDK_STL_VARIANT := c++_static - -LOCAL_SRC_FILES:= \ - multiply.rscript \ - compute.cpp - -LOCAL_STATIC_LIBRARIES := \ - libRScpp_static - -LOCAL_LDFLAGS += -llog - -include frameworks/rs/tests/cpp_api/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/cpp_api/cppallocation/Android.bp b/tests/cpp_api/cppallocation/Android.bp new file mode 100644 index 00000000..210969b2 --- /dev/null +++ b/tests/cpp_api/cppallocation/Android.bp @@ -0,0 +1,32 @@ +// +// Copyright (C) 2021 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +cc_binary { + name: "rstest-cppallocation", + defaults: ["frameworks_rs_tests_cpp-api-defaults"], + sdk_version: "21", + stl: "c++_static", + srcs: [ + "multiply.rscript", + "compute.cpp", + ], + static_libs: ["libRScpp_static"], +} diff --git a/tests/cpp_api/cppallocation/Android.mk b/tests/cpp_api/cppallocation/Android.mk deleted file mode 100644 index d2c7cbcc..00000000 --- a/tests/cpp_api/cppallocation/Android.mk +++ /dev/null @@ -1,21 +0,0 @@ -LOCAL_PATH:= $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE:= rstest-cppallocation -LOCAL_LICENSE_KINDS:= SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS:= notice - -LOCAL_SDK_VERSION := 21 -LOCAL_NDK_STL_VARIANT := c++_static - -LOCAL_SRC_FILES:= \ - multiply.rscript \ - compute.cpp - -LOCAL_STATIC_LIBRARIES := \ - libRScpp_static - -LOCAL_LDFLAGS += -llog - -include frameworks/rs/tests/cpp_api/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/cpp_api/cppbasic-getpointer/Android.bp b/tests/cpp_api/cppbasic-getpointer/Android.bp new file mode 100644 index 00000000..203a8c9d --- /dev/null +++ b/tests/cpp_api/cppbasic-getpointer/Android.bp @@ -0,0 +1,32 @@ +// +// Copyright (C) 2021 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +cc_binary { + name: "rstest-compute-getpointer", + defaults: ["frameworks_rs_tests_cpp-api-defaults"], + srcs: [ + "mono.rscript", + "compute.cpp", + ], + shared_libs: ["libRScpp"], +} diff --git a/tests/cpp_api/cppbasic-getpointer/Android.mk b/tests/cpp_api/cppbasic-getpointer/Android.mk deleted file mode 100644 index 963a3e4a..00000000 --- a/tests/cpp_api/cppbasic-getpointer/Android.mk +++ /dev/null @@ -1,18 +0,0 @@ -LOCAL_PATH:= $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE:= rstest-compute-getpointer -LOCAL_LICENSE_KINDS:= SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS:= notice - -LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk - -LOCAL_SRC_FILES:= \ - mono.rscript \ - compute.cpp - -LOCAL_SHARED_LIBRARIES := \ - libRScpp - -include frameworks/rs/tests/cpp_api/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/cpp_api/cppbasic-shared/Android.bp b/tests/cpp_api/cppbasic-shared/Android.bp new file mode 100644 index 00000000..0b49d864 --- /dev/null +++ b/tests/cpp_api/cppbasic-shared/Android.bp @@ -0,0 +1,32 @@ +// +// Copyright (C) 2021 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +cc_binary { + name: "rstest-compute-shared", + defaults: ["frameworks_rs_tests_cpp-api-defaults"], + srcs: [ + "mono.rscript", + "compute.cpp", + ], + shared_libs: ["libRScpp"], +} diff --git a/tests/cpp_api/cppbasic-shared/Android.mk b/tests/cpp_api/cppbasic-shared/Android.mk deleted file mode 100644 index ca91745d..00000000 --- a/tests/cpp_api/cppbasic-shared/Android.mk +++ /dev/null @@ -1,18 +0,0 @@ -LOCAL_PATH:= $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE:= rstest-compute-shared -LOCAL_LICENSE_KINDS:= SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS:= notice - -LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk - -LOCAL_SRC_FILES:= \ - mono.rscript \ - compute.cpp - -LOCAL_SHARED_LIBRARIES := \ - libRScpp - -include frameworks/rs/tests/cpp_api/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/cpp_api/cppbasic/Android.bp b/tests/cpp_api/cppbasic/Android.bp new file mode 100644 index 00000000..d6723ca2 --- /dev/null +++ b/tests/cpp_api/cppbasic/Android.bp @@ -0,0 +1,34 @@ +// +// Copyright (C) 2021 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +cc_binary { + name: "rstest-compute", + defaults: ["frameworks_rs_tests_cpp-api-defaults"], + sdk_version: "21", + stl: "c++_static", + srcs: [ + "mono.rscript", + "compute.cpp", + ], + static_libs: ["libRScpp_static"], +} diff --git a/tests/cpp_api/cppbasic/Android.mk b/tests/cpp_api/cppbasic/Android.mk deleted file mode 100644 index a1f090f4..00000000 --- a/tests/cpp_api/cppbasic/Android.mk +++ /dev/null @@ -1,19 +0,0 @@ -LOCAL_PATH:= $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE:= rstest-compute -LOCAL_LICENSE_KINDS:= SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS:= notice - -LOCAL_SDK_VERSION := 21 -LOCAL_NDK_STL_VARIANT := c++_static - -LOCAL_SRC_FILES:= \ - mono.rscript \ - compute.cpp - -LOCAL_STATIC_LIBRARIES := \ - libRScpp_static - -include frameworks/rs/tests/cpp_api/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/cpp_api/cppf16/Android.bp b/tests/cpp_api/cppf16/Android.bp new file mode 100644 index 00000000..4aaca318 --- /dev/null +++ b/tests/cpp_api/cppf16/Android.bp @@ -0,0 +1,33 @@ +// +// Copyright (C) 2021 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +cc_binary { + name: "rstest-cppf16", + defaults: ["frameworks_rs_tests_cpp-api-defaults"], + stl: "c++_static", + srcs: ["compute.cpp"], + static_libs: ["libRScpp_static"], + shared_libs: [ + "libdl", + ], +} diff --git a/tests/cpp_api/cppf16/Android.mk b/tests/cpp_api/cppf16/Android.mk deleted file mode 100644 index eca91db0..00000000 --- a/tests/cpp_api/cppf16/Android.mk +++ /dev/null @@ -1,19 +0,0 @@ -LOCAL_PATH:= $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE:= rstest-cppf16 -LOCAL_LICENSE_KINDS:= SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS:= notice - -LOCAL_NDK_STL_VARIANT := c++_static - -LOCAL_SRC_FILES:= \ - compute.cpp - -LOCAL_STATIC_LIBRARIES := \ - libRScpp_static - -LOCAL_LDFLAGS += -llog -ldl - -include frameworks/rs/tests/cpp_api/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/cpp_api/cppstrided/Android.bp b/tests/cpp_api/cppstrided/Android.bp new file mode 100644 index 00000000..df3fc937 --- /dev/null +++ b/tests/cpp_api/cppstrided/Android.bp @@ -0,0 +1,34 @@ +// +// Copyright (C) 2021 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +cc_binary { + name: "rstest-cppstrided", + defaults: ["frameworks_rs_tests_cpp-api-defaults"], + sdk_version: "21", + stl: "c++_static", + srcs: [ + "multiply.rscript", + "compute.cpp", + ], + static_libs: ["libRScpp_static"], +} diff --git a/tests/cpp_api/cppstrided/Android.mk b/tests/cpp_api/cppstrided/Android.mk deleted file mode 100644 index e0e03b3a..00000000 --- a/tests/cpp_api/cppstrided/Android.mk +++ /dev/null @@ -1,19 +0,0 @@ -LOCAL_PATH:= $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE:= rstest-cppstrided -LOCAL_LICENSE_KINDS:= SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS:= notice - -LOCAL_SDK_VERSION := 21 -LOCAL_NDK_STL_VARIANT := c++_static - -LOCAL_SRC_FILES:= \ - multiply.rscript \ - compute.cpp - -LOCAL_STATIC_LIBRARIES := \ - libRScpp_static - -include frameworks/rs/tests/cpp_api/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/cpp_api/latency/Android.bp b/tests/cpp_api/latency/Android.bp new file mode 100644 index 00000000..3eaf1eeb --- /dev/null +++ b/tests/cpp_api/latency/Android.bp @@ -0,0 +1,34 @@ +// +// Copyright (C) 2021 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +cc_binary { + name: "rstest-latency", + defaults: ["frameworks_rs_tests_cpp-api-defaults"], + sdk_version: "21", + stl: "c++_static", + srcs: [ + "latency.rscript", + "latency.cpp", + ], + static_libs: ["libRScpp_static"], +} diff --git a/tests/cpp_api/latency/Android.mk b/tests/cpp_api/latency/Android.mk deleted file mode 100644 index 16557f4c..00000000 --- a/tests/cpp_api/latency/Android.mk +++ /dev/null @@ -1,19 +0,0 @@ -LOCAL_PATH:= $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE:= rstest-latency -LOCAL_LICENSE_KINDS:= SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS:= notice - -LOCAL_SDK_VERSION := 21 -LOCAL_NDK_STL_VARIANT := c++_static - -LOCAL_SRC_FILES:= \ - latency.rscript \ - latency.cpp - -LOCAL_STATIC_LIBRARIES := \ - libRScpp_static - -include frameworks/rs/tests/cpp_api/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/cpp_api/typecheck/Android.bp b/tests/cpp_api/typecheck/Android.bp new file mode 100644 index 00000000..793888c0 --- /dev/null +++ b/tests/cpp_api/typecheck/Android.bp @@ -0,0 +1,34 @@ +// +// Copyright (C) 2021 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +cc_binary { + name: "rstest-typecheck", + defaults: ["frameworks_rs_tests_cpp-api-defaults"], + sdk_version: "21", + stl: "c++_static", + srcs: [ + "kernels.rscript", + "typecheck.cpp", + ], + static_libs: ["libRScpp_static"], +} diff --git a/tests/cpp_api/typecheck/Android.mk b/tests/cpp_api/typecheck/Android.mk deleted file mode 100644 index 82f92ea8..00000000 --- a/tests/cpp_api/typecheck/Android.mk +++ /dev/null @@ -1,19 +0,0 @@ -LOCAL_PATH:= $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE:= rstest-typecheck -LOCAL_LICENSE_KINDS:= SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS:= notice - -LOCAL_SDK_VERSION := 21 -LOCAL_NDK_STL_VARIANT := c++_static - -LOCAL_SRC_FILES:= \ - kernels.rscript \ - typecheck.cpp - -LOCAL_STATIC_LIBRARIES := \ - libRScpp_static - -include frameworks/rs/tests/cpp_api/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/java_api/Balls/Android.bp b/tests/java_api/Balls/Android.bp new file mode 100644 index 00000000..e931d3e9 --- /dev/null +++ b/tests/java_api/Balls/Android.bp @@ -0,0 +1,63 @@ +// +// Copyright (C) 2008 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +// This variable is used to match the 'LOCAL_SDK_VERSION' field in the former Android.mk file. +local_sdk_version = "14" +// This variable is used to set the value of the '-target-api' option for the 'llvm-rs-cc' command. +// Note: it may NOT always be the same as the 'local_sdk_version', due to the existing logic in the Make build system. +// For the Android.mk to Android.bp conversion, it is recommend to run the build before and after +// the conversion, to make sure the value of the '-target-api' option to be the same. +target_api_level = local_sdk_version + +android_test { + name: "RsBalls", + srcs: [ + "src/**/*.java", + ":RsBalls-rscript{RsBalls.srcjar}", + ], + resource_zips: [ + ":RsBalls-rscript{RsBalls.res.zip}", + ], + sdk_version: local_sdk_version, +} + +genrule { + name: "RsBalls-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "RsBalls.srcjar", + "RsBalls.res.zip", + ], + cmd: "$(location llvm-rs-cc) -target-api " + target_api_level + + " -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $(locations src/**/*.rscript) &&" + + "$(location soong_zip) -srcjar -o $(location RsBalls.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location RsBalls.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/Balls/Android.mk b/tests/java_api/Balls/Android.mk deleted file mode 100644 index 409c7354..00000000 --- a/tests/java_api/Balls/Android.mk +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright (C) 2008 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := RsBalls -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := 14 - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/CannyLive/Android.bp b/tests/java_api/CannyLive/Android.bp new file mode 100644 index 00000000..24d8a98e --- /dev/null +++ b/tests/java_api/CannyLive/Android.bp @@ -0,0 +1,65 @@ +// +// Copyright (C) 2015 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "CannyLive", + static_libs: [ + "androidx.legacy_legacy-support-v4", + "androidx.appcompat_appcompat", + ], + aaptflags: [ + "--extra-packages", + "android.support.v7.appcompat", + ], + srcs: [ + "src/**/*.java", + ":CannyLive-rscript{CannyLive.srcjar}", + ], + resource_zips: [ + ":CannyLive-rscript{CannyLive.res.zip}", + ], + sdk_version: "current", +} + +genrule { + name: "CannyLive-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "CannyLive.srcjar", + "CannyLive.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -target-api 22 " + + " -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location CannyLive.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location CannyLive.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/CannyLive/Android.mk b/tests/java_api/CannyLive/Android.mk deleted file mode 100644 index 54aeb00b..00000000 --- a/tests/java_api/CannyLive/Android.mk +++ /dev/null @@ -1,34 +0,0 @@ -# -# Copyright (C) 2015 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests -LOCAL_USE_AAPT2 :=true -LOCAL_STATIC_ANDROID_LIBRARIES += androidx.legacy_legacy-support-v4 -LOCAL_STATIC_ANDROID_LIBRARIES += androidx.appcompat_appcompat -LOCAL_AAPT_FLAGS += --extra-packages android.support.v7.appcompat -LOCAL_RENDERSCRIPT_TARGET_API := 22 - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) -LOCAL_SDK_VERSION := current - -LOCAL_PACKAGE_NAME := CannyLive -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/CannyLive/AndroidManifest.xml b/tests/java_api/CannyLive/AndroidManifest.xml index e7e32991..4764b8fb 100644 --- a/tests/java_api/CannyLive/AndroidManifest.xml +++ b/tests/java_api/CannyLive/AndroidManifest.xml @@ -16,7 +16,8 @@ android:theme="@style/AppTheme" >
<activity
android:name="com.android.example.cannylive.MainActivity"
- android:label="@string/app_name" >
+ android:label="@string/app_name"
+ android:exported="true" >
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
diff --git a/tests/java_api/ComputeBenchmark/Android.bp b/tests/java_api/ComputeBenchmark/Android.bp new file mode 100644 index 00000000..6a4e96d8 --- /dev/null +++ b/tests/java_api/ComputeBenchmark/Android.bp @@ -0,0 +1,57 @@ +// +// Copyright (C) 2012 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "RsComputeBenchmark", + srcs: [ + "src/**/*.java", + ":RsComputeBenchmark-rscript{RsComputeBenchmark.srcjar}", + ], + resource_zips: [ + ":RsComputeBenchmark-rscript{RsComputeBenchmark.res.zip}", + ], + sdk_version: "current", + min_sdk_version: "17", +} + +genrule { + name: "RsComputeBenchmark-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "RsComputeBenchmark.srcjar", + "RsComputeBenchmark.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location RsComputeBenchmark.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location RsComputeBenchmark.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/ComputeBenchmark/Android.mk b/tests/java_api/ComputeBenchmark/Android.mk deleted file mode 100644 index a3ca7852..00000000 --- a/tests/java_api/ComputeBenchmark/Android.mk +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (C) 2012 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := RsComputeBenchmark -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_MIN_SDK_VERSION := 17 - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/ComputePerf/Android.bp b/tests/java_api/ComputePerf/Android.bp new file mode 100644 index 00000000..b60397d5 --- /dev/null +++ b/tests/java_api/ComputePerf/Android.bp @@ -0,0 +1,57 @@ +// +// Copyright (C) 2011 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "RsComputePerf", + srcs: [ + "src/**/*.java", + ":RsComputePerf-rscript{RsComputePerf.srcjar}", + ], + resource_zips: [ + ":RsComputePerf-rscript{RsComputePerf.res.zip}", + ], + sdk_version: "current", + min_sdk_version: "19", +} + +genrule { + name: "RsComputePerf-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "RsComputePerf.srcjar", + "RsComputePerf.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location RsComputePerf.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location RsComputePerf.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/ComputePerf/Android.mk b/tests/java_api/ComputePerf/Android.mk deleted file mode 100644 index 163e7dd6..00000000 --- a/tests/java_api/ComputePerf/Android.mk +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (C) 2011 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := RsComputePerf -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_MIN_SDK_VERSION := 19 - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/GenImages/Android.bp b/tests/java_api/GenImages/Android.bp new file mode 100644 index 00000000..520e698c --- /dev/null +++ b/tests/java_api/GenImages/Android.bp @@ -0,0 +1,29 @@ +// +// Copyright (C) 2013 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "RsGenImages", + srcs: [ + "src/**/*.java", + ], + sdk_version: "current", + min_sdk_version: "14", +} diff --git a/tests/java_api/GenImages/Android.mk b/tests/java_api/GenImages/Android.mk deleted file mode 100644 index d3e00ae6..00000000 --- a/tests/java_api/GenImages/Android.mk +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (C) 2013 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := RsGenImages -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_MIN_SDK_VERSION := 14 - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/HealingBrush/Android.bp b/tests/java_api/HealingBrush/Android.bp new file mode 100644 index 00000000..b8843d5b --- /dev/null +++ b/tests/java_api/HealingBrush/Android.bp @@ -0,0 +1,61 @@ +// +// Copyright (C) 2015 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +android_test { + name: "HealingBrush", + srcs: [ + "src/**/*.java", + ":HealingBrush-rscript{HealingBrush.srcjar}", + ], + resource_zips: [ + ":HealingBrush-rscript{HealingBrush.res.zip}", + ], + static_libs: ["android-support-v8-renderscript"], + sdk_version: "current", + jni_libs: ["librsjni"], +} + +genrule { + name: "HealingBrush-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "HealingBrush.srcjar", + "HealingBrush.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -target-api 21 -rs-package-name=androidx.renderscript " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location HealingBrush.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location HealingBrush.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/HealingBrush/Android.mk b/tests/java_api/HealingBrush/Android.mk deleted file mode 100644 index 4ca12512..00000000 --- a/tests/java_api/HealingBrush/Android.mk +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright (C) 2015 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_STATIC_JAVA_LIBRARIES := android-support-v8-renderscript - -LOCAL_PACKAGE_NAME := HealingBrush -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_RENDERSCRIPT_TARGET_API := 19 -LOCAL_RENDERSCRIPT_COMPATIBILITY := 18 - -LOCAL_RENDERSCRIPT_CC := $(LLVM_RS_CC) -LOCAL_RENDERSCRIPT_INCLUDES_OVERRIDE := \ - $(TOPDIR)external/clang/lib/Headers \ - $(TOPDIR)frameworks/rs/script_api/include - -LOCAL_RENDERSCRIPT_FLAGS := -rs-package-name=androidx.renderscript -LOCAL_JNI_SHARED_LIBRARIES := librsjni - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/HelloComputeNDK/Android.bp b/tests/java_api/HelloComputeNDK/Android.bp new file mode 100644 index 00000000..7dd25d64 --- /dev/null +++ b/tests/java_api/HelloComputeNDK/Android.bp @@ -0,0 +1,35 @@ +// +// Copyright (C) 2013 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +android_test { + name: "HelloComputeNDK", + + srcs: [ + "src/**/*.java", + ], + + sdk_version: "current", + + jni_libs: ["libhellocomputendk"], + +} diff --git a/tests/java_api/HelloComputeNDK/Android.mk b/tests/java_api/HelloComputeNDK/Android.mk deleted file mode 100644 index 5fe2ffda..00000000 --- a/tests/java_api/HelloComputeNDK/Android.mk +++ /dev/null @@ -1,35 +0,0 @@ -# -# Copyright (C) 2013 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_CFLAGS := -Werror -Wall -Wextra - -LOCAL_PACKAGE_NAME := HelloComputeNDK -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -LOCAL_JNI_SHARED_LIBRARIES := libhellocomputendk - -include $(BUILD_PACKAGE) -include $(LOCAL_PATH)/libhellocomputendk/Android.mk diff --git a/tests/java_api/HelloComputeNDK/libhellocomputendk/Android.bp b/tests/java_api/HelloComputeNDK/libhellocomputendk/Android.bp new file mode 100644 index 00000000..00679ffd --- /dev/null +++ b/tests/java_api/HelloComputeNDK/libhellocomputendk/Android.bp @@ -0,0 +1,45 @@ +// Copyright (C) 2013 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +// +// This is the shared library included by the JNI test app. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +cc_test_library { + name: "libhellocomputendk", + + srcs: [ + "helloComputeNDK.cpp", + "mono.rscript", + ], + ldflags: ["-Wl,-Bsymbolic"], + header_libs: ["jni_headers"], + shared_libs: [ + "libdl", + "liblog", + "libjnigraphics", + ], + static_libs: ["libRScpp_static"], + + sdk_version: "current", + + stl: "c++_static", + +} diff --git a/tests/java_api/HelloComputeNDK/libhellocomputendk/Android.mk b/tests/java_api/HelloComputeNDK/libhellocomputendk/Android.mk deleted file mode 100644 index d0a8d888..00000000 --- a/tests/java_api/HelloComputeNDK/libhellocomputendk/Android.mk +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (C) 2013 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -# This is the shared library included by the JNI test app. -# -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) -LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk - -LOCAL_MODULE := libhellocomputendk -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_MODULE_TAGS := tests -LOCAL_SRC_FILES := helloComputeNDK.cpp mono.rscript - -LOCAL_CFLAGS := -Wall -Werror -LOCAL_LDFLAGS := -Wl,-Bsymbolic -LOCAL_HEADER_LIBRARIES := jni_headers -LOCAL_SHARED_LIBRARIES := libdl liblog libjnigraphics -LOCAL_STATIC_LIBRARIES := libRScpp_static - -LOCAL_SDK_VERSION := current - -LOCAL_NDK_STL_VARIANT := c++_static - -include $(BUILD_SHARED_LIBRARY) diff --git a/tests/java_api/ImageProcessing/Android.bp b/tests/java_api/ImageProcessing/Android.bp new file mode 100644 index 00000000..6ac21676 --- /dev/null +++ b/tests/java_api/ImageProcessing/Android.bp @@ -0,0 +1,62 @@ +// +// Copyright (C) 2009 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "ImageProcessing", + libs: [ + "android.test.runner.stubs", + "android.test.base.stubs", + ], + static_libs: ["junit"], + srcs: [ + "src/**/*.java", + ":ImageProcessing-rscript{ImageProcessing.srcjar}", + ], + resource_zips: [ + ":ImageProcessing-rscript{ImageProcessing.res.zip}", + ], + sdk_version: "current", + min_sdk_version: "23", +} + +genrule { + name: "ImageProcessing-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "ImageProcessing.srcjar", + "ImageProcessing.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location ImageProcessing.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location ImageProcessing.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/ImageProcessing/Android.mk b/tests/java_api/ImageProcessing/Android.mk deleted file mode 100644 index 5ac0d15c..00000000 --- a/tests/java_api/ImageProcessing/Android.mk +++ /dev/null @@ -1,35 +0,0 @@ -# -# Copyright (C) 2009 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_JAVA_LIBRARIES := android.test.runner.stubs android.test.base.stubs - -LOCAL_STATIC_JAVA_LIBRARIES := junit - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := ImageProcessing -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_MIN_SDK_VERSION := 23 - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/ImageProcessing2/Android.bp b/tests/java_api/ImageProcessing2/Android.bp new file mode 100644 index 00000000..85c3a14c --- /dev/null +++ b/tests/java_api/ImageProcessing2/Android.bp @@ -0,0 +1,61 @@ +// +// Copyright (C) 2009 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +android_test { + name: "ImageProcessing2", + srcs: [ + "src/**/*.java", + ":ImageProcessing2-rscript{ImageProcessing2.srcjar}", + ], + resource_zips: [ + ":ImageProcessing2-rscript{ImageProcessing2.res.zip}", + ], + static_libs: ["android-support-v8-renderscript"], + sdk_version: "current", + jni_libs: ["librsjni"], +} + +genrule { + name: "ImageProcessing2-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "ImageProcessing2.srcjar", + "ImageProcessing2.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -target-api 21 -rs-package-name=androidx.renderscript " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location ImageProcessing2.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location ImageProcessing2.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/ImageProcessing2/Android.mk b/tests/java_api/ImageProcessing2/Android.mk deleted file mode 100644 index 7165cc35..00000000 --- a/tests/java_api/ImageProcessing2/Android.mk +++ /dev/null @@ -1,39 +0,0 @@ -# -# Copyright (C) 2009 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_STATIC_JAVA_LIBRARIES := android-support-v8-renderscript - -LOCAL_PACKAGE_NAME := ImageProcessing2 -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_RENDERSCRIPT_TARGET_API := 18 -LOCAL_RENDERSCRIPT_COMPATIBILITY := 18 -LOCAL_RENDERSCRIPT_INCLUDES_OVERRIDE := $(TOPDIR)external/clang/lib/Headers \ - $(TOPDIR)frameworks/rs/script_api/include - -LOCAL_RENDERSCRIPT_FLAGS := -rs-package-name=androidx.renderscript -LOCAL_JNI_SHARED_LIBRARIES := librsjni - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/ImageProcessing_jb/Android.bp b/tests/java_api/ImageProcessing_jb/Android.bp new file mode 100644 index 00000000..9ffa7edc --- /dev/null +++ b/tests/java_api/ImageProcessing_jb/Android.bp @@ -0,0 +1,62 @@ +// +// Copyright (C) 2009 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "ImageProcessingJB", + static_libs: ["androidx.test.rules"], + libs: [ + "android.test.runner.stubs", + "android.test.base.stubs", + ], + test_suites: ["device-tests"], + srcs: [ + "src/**/*.java", + ":ImageProcessingJB-rscript{ImageProcessingJB.srcjar}", + ], + resource_zips: [ + ":ImageProcessingJB-rscript{ImageProcessingJB.res.zip}", + ], + sdk_version: "current", +} + +genrule { + name: "ImageProcessingJB-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "ImageProcessingJB.srcjar", + "ImageProcessingJB.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location ImageProcessingJB.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location ImageProcessingJB.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/ImageProcessing_jb/Android.mk b/tests/java_api/ImageProcessing_jb/Android.mk deleted file mode 100644 index 71669e7b..00000000 --- a/tests/java_api/ImageProcessing_jb/Android.mk +++ /dev/null @@ -1,34 +0,0 @@ -# -# Copyright (C) 2009 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_STATIC_JAVA_LIBRARIES := androidx.test.rules -LOCAL_JAVA_LIBRARIES := android.test.runner.stubs android.test.base.stubs - -LOCAL_MODULE_TAGS := tests -LOCAL_COMPATIBILITY_SUITE += device-tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := ImageProcessingJB -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/LatencyBenchmark/Android.bp b/tests/java_api/LatencyBenchmark/Android.bp new file mode 100644 index 00000000..0f05493a --- /dev/null +++ b/tests/java_api/LatencyBenchmark/Android.bp @@ -0,0 +1,58 @@ +// +// Copyright (C) 2012 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "RsLatencyBenchmark", + srcs: [ + "src/**/*.java", + ":RsLatencyBenchmark-rscript{RsLatencyBenchmark.srcjar}", + ], + resource_zips: [ + ":RsLatencyBenchmark-rscript{RsLatencyBenchmark.res.zip}" + ], + sdk_version: "current", + min_sdk_version: "23", +} + +genrule { + name: "RsLatencyBenchmark-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "RsLatencyBenchmark.srcjar", + "RsLatencyBenchmark.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -target-api 23 " + + " -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location RsLatencyBenchmark.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location RsLatencyBenchmark.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/LatencyBenchmark/Android.mk b/tests/java_api/LatencyBenchmark/Android.mk deleted file mode 100644 index 8df2d057..00000000 --- a/tests/java_api/LatencyBenchmark/Android.mk +++ /dev/null @@ -1,33 +0,0 @@ -# -# Copyright (C) 2012 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_RENDERSCRIPT_TARGET_API := 23 - -LOCAL_PACKAGE_NAME := RsLatencyBenchmark -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_MIN_SDK_VERSION := 23 - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/LivePreview/Android.bp b/tests/java_api/LivePreview/Android.bp new file mode 100644 index 00000000..0b06972d --- /dev/null +++ b/tests/java_api/LivePreview/Android.bp @@ -0,0 +1,56 @@ +// +// Copyright (C) 2012 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "PreviewRS", + srcs: [ + "src/**/*.java", + ":PreviewRS-rscript{PreviewRS.srcjar}", + ], + resource_zips: [ + ":PreviewRS-rscript{PreviewRS.res.zip}", + ], + sdk_version: "current", +} + +genrule { + name: "PreviewRS-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "PreviewRS.srcjar", + "PreviewRS.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location PreviewRS.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location PreviewRS.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/LivePreview/Android.mk b/tests/java_api/LivePreview/Android.mk deleted file mode 100644 index a1cbe5f8..00000000 --- a/tests/java_api/LivePreview/Android.mk +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright (C) 2012 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := PreviewRS -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/MathErr/Android.bp b/tests/java_api/MathErr/Android.bp new file mode 100644 index 00000000..628c5c54 --- /dev/null +++ b/tests/java_api/MathErr/Android.bp @@ -0,0 +1,57 @@ +// +// Copyright (C) 2013 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "RsMathErr", + srcs: [ + "src/**/*.java", + ":RsMathErr-rscript{RsMathErr.srcjar}", + ], + resource_zips: [ + ":RsMathErr-rscript{RsMathErr.res.zip}" + ], + sdk_version: "current", + min_sdk_version: "19", +} + +genrule { + name: "RsMathErr-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "RsMathErr.srcjar", + "RsMathErr.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location RsMathErr.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location RsMathErr.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/MathErr/Android.mk b/tests/java_api/MathErr/Android.mk deleted file mode 100644 index 1fae614b..00000000 --- a/tests/java_api/MathErr/Android.mk +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (C) 2013 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := RsMathErr -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_MIN_SDK_VERSION := 19 - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/RSTestBackward/Android.bp b/tests/java_api/RSTestBackward/Android.bp new file mode 100644 index 00000000..c962be7b --- /dev/null +++ b/tests/java_api/RSTestBackward/Android.bp @@ -0,0 +1,38 @@ +// +// Copyright (C) 2017 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +android_test { + name: "RSTestBackward", + sdk_version: "current", + srcs: [ + "src/**/*.java", + ":RSUnitTests_java_srcs", + ":RSTest-rscript{RSTest.srcjar}", + ], + resource_zips: [ + ":RSTest-rscript{RSTest.res.zip}", + ], + static_libs: ["androidx.test.rules"], + test_suites: ["device-tests"], + min_sdk_version: "21", +} diff --git a/tests/java_api/RSTestBackward/Android.mk b/tests/java_api/RSTestBackward/Android.mk deleted file mode 100644 index b3111a65..00000000 --- a/tests/java_api/RSTestBackward/Android.mk +++ /dev/null @@ -1,38 +0,0 @@ -# -# Copyright (C) 2017 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_PACKAGE_NAME := RSTestBackward -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := \ - $(call all-java-files-under, src)\ - $(call all-java-files-under, ../RSUnitTests/src)\ - $(call all-renderscript-files-under, ../RSUnitTests/src)\ - -LOCAL_STATIC_JAVA_LIBRARIES := androidx.test.rules -LOCAL_COMPATIBILITY_SUITE := device-tests - -LOCAL_RENDERSCRIPT_TARGET_API := current -LOCAL_MIN_SDK_VERSION := 21 -LOCAL_SDK_VERSION := current - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/RSTest_CompatLib/Android.bp b/tests/java_api/RSTest_CompatLib/Android.bp new file mode 100644 index 00000000..74dc9660 --- /dev/null +++ b/tests/java_api/RSTest_CompatLib/Android.bp @@ -0,0 +1,66 @@ +// +// Copyright (C) 2017 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +android_test { + name: "RSTest_Compat", + sdk_version: "current", + srcs: [ + "src/**/*.java", + ":RSUnitTests_java_gui", + ":RSUnitTests_java_supportlibsrc_gen", + ":RSTest_Compat-rscript{RSTest_Compat.srcjar}", + ], + resource_zips: [ + ":RSTest_Compat-rscript{RSTest_Compat.res.zip}", + ], + static_libs: [ + "androidx.test.rules", + "android-support-v8-renderscript", + ], + min_sdk_version: "8", +} + +genrule { + name: "RSTest_Compat-rscript", + srcs: [ + ":RSUnitTests_rscript_supportlibsrc_gen", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "RSTest_Compat.srcjar", + "RSTest_Compat.res.zip", + ], + cmd: "for f in $(locations :RSUnitTests_rscript_supportlibsrc_gen); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -rs-package-name=androidx.renderscript " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location RSTest_Compat.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location RSTest_Compat.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/RSTest_CompatLib/Android.mk b/tests/java_api/RSTest_CompatLib/Android.mk deleted file mode 100644 index 2df8904f..00000000 --- a/tests/java_api/RSTest_CompatLib/Android.mk +++ /dev/null @@ -1,43 +0,0 @@ -# -# Copyright (C) 2017 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_PACKAGE_NAME := RSTest_Compat -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := \ - $(call all-java-files-under,src) \ - $(call all-java-files-under,../RSUnitTests/gui) \ - $(call all-java-files-under,../RSUnitTests/supportlibsrc_gen) \ - $(call all-renderscript-files-under,../RSUnitTests/supportlibsrc_gen) \ - -LOCAL_STATIC_JAVA_LIBRARIES := \ - androidx.test.rules \ - android-support-v8-renderscript \ - -LOCAL_RENDERSCRIPT_TARGET_API := current -LOCAL_RENDERSCRIPT_COMPATIBILITY := true -LOCAL_SDK_VERSION := current -LOCAL_MIN_SDK_VERSION := 8 - -LOCAL_RENDERSCRIPT_FLAGS := -rs-package-name=androidx.renderscript - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/RSUnitTests/Android.bp b/tests/java_api/RSUnitTests/Android.bp new file mode 100644 index 00000000..7d35d9fa --- /dev/null +++ b/tests/java_api/RSUnitTests/Android.bp @@ -0,0 +1,55 @@ +// +// Copyright (C) 2008 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +filegroup { + name: "RSUnitTests_java_srcs", + srcs: [ + "src/**/*.java", + ], +} + +filegroup { + name: "RSUnitTests_rscript_srcs", + srcs: [ + "src/**/*.rscript", + ], +} + +filegroup { + name: "RSUnitTests_java_gui", + srcs: [ + "gui/**/*.java", + ], +} + +filegroup { + name: "RSUnitTests_java_supportlibsrc_gen", + srcs: [ + "supportlibsrc_gen/**/*.java", + ], +} + +filegroup { + name: "RSUnitTests_rscript_supportlibsrc_gen", + srcs: [ + "supportlibsrc_gen/**/*.rscript", + ], +} diff --git a/tests/java_api/Refocus/Android.bp b/tests/java_api/Refocus/Android.bp new file mode 100644 index 00000000..26af7aa1 --- /dev/null +++ b/tests/java_api/Refocus/Android.bp @@ -0,0 +1,64 @@ +// +// Copyright (C) 2015 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +android_test { + name: "Refocus", + static_libs: [ + "android-support-v8-renderscript", + "xmp_toolkit", + ], + srcs: [ + "src/**/*.java", + ":Refocus-rscript{Refocus.srcjar}", + ], + resource_zips: [ + ":Refocus-rscript{Refocus.res.zip}", + ], + sdk_version: "current", + jni_libs: ["librsjni"], +} + +genrule { + name: "Refocus-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "Refocus.srcjar", + "Refocus.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -target-api 21 -rs-package-name=androidx.renderscript " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location Refocus.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location Refocus.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/Refocus/Android.mk b/tests/java_api/Refocus/Android.mk deleted file mode 100644 index 96350056..00000000 --- a/tests/java_api/Refocus/Android.mk +++ /dev/null @@ -1,43 +0,0 @@ -# -# Copyright (C) 2015 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_STATIC_JAVA_LIBRARIES := android-support-v8-renderscript -LOCAL_STATIC_JAVA_LIBRARIES += xmp_toolkit - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := Refocus -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SDK_VERSION := current -LOCAL_RENDERSCRIPT_TARGET_API := 21 -LOCAL_RENDERSCRIPT_COMPATIBILITY := 21 - -LOCAL_RENDERSCRIPT_CC := $(LLVM_RS_CC) -LOCAL_RENDERSCRIPT_INCLUDES_OVERRIDE := \ - $(TOPDIR)external/clang/lib/Headers \ - $(TOPDIR)frameworks/rs/script_api/include - -LOCAL_RENDERSCRIPT_FLAGS := -rs-package-name=androidx.renderscript -LOCAL_JNI_SHARED_LIBRARIES := librsjni - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/RsCameraDemo/Android.bp b/tests/java_api/RsCameraDemo/Android.bp new file mode 100644 index 00000000..9e03305c --- /dev/null +++ b/tests/java_api/RsCameraDemo/Android.bp @@ -0,0 +1,56 @@ +// +// Copyright (C) 2015 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "RsCameraDemo", + srcs: [ + "src/**/*.java", + ":RsCameraDemo-rscript{RsCameraDemo.srcjar}", + ], + resource_zips: [ + ":RsCameraDemo-rscript{RsCameraDemo.srcjar}", + ], + sdk_version: "current", +} + +genrule { + name: "RsCameraDemo-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "RsCameraDemo.srcjar", + "RsCameraDemo.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location RsCameraDemo.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location RsCameraDemo.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/RsCameraDemo/Android.mk b/tests/java_api/RsCameraDemo/Android.mk deleted file mode 100644 index fd9daf9f..00000000 --- a/tests/java_api/RsCameraDemo/Android.mk +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright (C) 2015 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) -LOCAL_SDK_VERSION := current - -LOCAL_PACKAGE_NAME := RsCameraDemo -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/RsMinimalTest/Android.bp b/tests/java_api/RsMinimalTest/Android.bp new file mode 100644 index 00000000..96bc7bbb --- /dev/null +++ b/tests/java_api/RsMinimalTest/Android.bp @@ -0,0 +1,28 @@ +// +// Copyright (C) 2018 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "RsMinimalTest", + static_libs: ["androidx.test.rules"], + srcs: ["src/**/*.java"], + sdk_version: "30", + test_suites: ["device-tests"], +} diff --git a/tests/java_api/RsMinimalTest/Android.mk b/tests/java_api/RsMinimalTest/Android.mk deleted file mode 100644 index 65cba1b3..00000000 --- a/tests/java_api/RsMinimalTest/Android.mk +++ /dev/null @@ -1,33 +0,0 @@ -# -# Copyright (C) 2018 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests -LOCAL_STATIC_JAVA_LIBRARIES := androidx.test.rules - -LOCAL_SRC_FILES := $(call all-java-files-under,src) - -LOCAL_RENDERSCRIPT_TARGET_API := 21 -LOCAL_SDK_VERSION := 26 - -LOCAL_PACKAGE_NAME := RsMinimalTest -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_COMPATIBILITY_SUITE := device-tests - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/RsMinimalTest/AndroidManifest.xml b/tests/java_api/RsMinimalTest/AndroidManifest.xml index 006d320f..40b5acf0 100644 --- a/tests/java_api/RsMinimalTest/AndroidManifest.xml +++ b/tests/java_api/RsMinimalTest/AndroidManifest.xml @@ -17,7 +17,7 @@ package="com.android.rs.minimaltest"> <uses-sdk android:minSdkVersion="21" - android:targetSdkVersion="26" /> + android:targetSdkVersion="30" /> <application android:largeHeap="true" diff --git a/tests/java_api/RsNbody/Android.bp b/tests/java_api/RsNbody/Android.bp new file mode 100644 index 00000000..01183152 --- /dev/null +++ b/tests/java_api/RsNbody/Android.bp @@ -0,0 +1,58 @@ +// +// Copyright (C) 2015 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "RsNbody", + srcs: [ + "src/**/*.java", + ":RsNbody-rscript{RsNbody.srcjar}", + ], + resource_zips: [ + ":RsNbody-rscript{RsNbody.res.zip}", + ], + static_libs: ["androidx.legacy_legacy-support-v4"], + sdk_version: "current", +} + +genrule { + name: "RsNbody-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "RsNbody.srcjar", + "RsNbody.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -target-api 23 " + + " -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location RsNbody.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location RsNbody.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/RsNbody/Android.mk b/tests/java_api/RsNbody/Android.mk deleted file mode 100644 index 4c99c42a..00000000 --- a/tests/java_api/RsNbody/Android.mk +++ /dev/null @@ -1,38 +0,0 @@ -# -# Copyright (C) 2015 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests -LOCAL_USE_AAPT2 := true -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_STATIC_ANDROID_LIBRARIES += androidx.legacy_legacy-support-v4 - -LOCAL_PACKAGE_NAME := RsNbody -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_RENDERSCRIPT_TARGET_API := 23 - -LOCAL_RENDERSCRIPT_CC := $(LLVM_RS_CC) -LOCAL_RENDERSCRIPT_INCLUDES_OVERRIDE := \ - $(TOPDIR)external/clang/lib/Headers \ - $(TOPDIR)frameworks/rs/script_api/include - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/RsTest/Android.bp b/tests/java_api/RsTest/Android.bp new file mode 100644 index 00000000..36952348 --- /dev/null +++ b/tests/java_api/RsTest/Android.bp @@ -0,0 +1,62 @@ +// +// Copyright (C) 2008 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +android_test { + name: "RSTest", + static_libs: ["androidx.test.rules"], + srcs: [ + "src/**/*.java", + ":RSUnitTests_java_srcs", + ":RSUnitTests_java_gui", + ":RSTest-rscript{RSTest.srcjar}", + ], + resource_zips: [ + ":RSTest-rscript{RSTest.res.zip}", + ], + sdk_version: "current", + test_suites: ["device-tests"], +} + +genrule { + name: "RSTest-rscript", + srcs: [ + ":RSUnitTests_rscript_srcs", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "RSTest.srcjar", + "RSTest.res.zip", + ], + cmd: "for f in $(locations :RSUnitTests_rscript_srcs); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location RSTest.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location RSTest.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/RsTest/Android.mk b/tests/java_api/RsTest/Android.mk deleted file mode 100644 index 4f231b5f..00000000 --- a/tests/java_api/RsTest/Android.mk +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright (C) 2008 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests -LOCAL_STATIC_JAVA_LIBRARIES := androidx.test.rules - -LOCAL_SRC_FILES := \ - $(call all-java-files-under,src) \ - $(call all-java-files-under,../RSUnitTests/gui) \ - $(call all-java-files-under,../RSUnitTests/src) \ - $(call all-renderscript-files-under,../RSUnitTests/src) \ - -LOCAL_RENDERSCRIPT_TARGET_API := current - -LOCAL_PACKAGE_NAME := RSTest -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_COMPATIBILITY_SUITE := device-tests - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/RsTest_11/Android.bp b/tests/java_api/RsTest_11/Android.bp new file mode 100644 index 00000000..1e176d27 --- /dev/null +++ b/tests/java_api/RsTest_11/Android.bp @@ -0,0 +1,65 @@ +// +// Copyright (C) 2008 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +// This variable is used to match the 'LOCAL_SDK_VERSION' field in the former Android.mk file. +local_sdk_version = "11" +// This variable is used to set the value of the '-target-api' option for the 'llvm-rs-cc' command. +// Note: it may NOT always be the same as the 'local_sdk_version', due to the existing logic in the Make build system. +// For the Android.mk to Android.bp conversion, it is recommend to run the build before and after +// the conversion, to make sure the value of the '-target-api' option to be the same. +target_api_level = local_sdk_version + +android_test { + name: "RSTest_v11", + srcs: [ + "src/**/*.java", + ":RSTest_v11-rscript{RSTest_v11.srcjar}", + ], + resource_zips: [ + ":RSTest_v11-rscript{RSTest_v11.res.zip}" + ], + sdk_version: local_sdk_version, +} + +genrule { + name: "RSTest_v11-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "RSTest_v11.srcjar", + "RSTest_v11.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -target-api " + target_api_level + + " -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location RSTest_v11.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location RSTest_v11.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/RsTest_11/Android.mk b/tests/java_api/RsTest_11/Android.mk deleted file mode 100644 index 87414414..00000000 --- a/tests/java_api/RsTest_11/Android.mk +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright (C) 2008 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := RSTest_v11 -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := 11 - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/RsTest_14/Android.bp b/tests/java_api/RsTest_14/Android.bp new file mode 100644 index 00000000..0f526497 --- /dev/null +++ b/tests/java_api/RsTest_14/Android.bp @@ -0,0 +1,65 @@ +// +// Copyright (C) 2008 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +// This variable is used to match the 'LOCAL_SDK_VERSION' field in the former Android.mk file. +local_sdk_version = "14" +// This variable is used to set the value of the '-target-api' option for the 'llvm-rs-cc' command. +// Note: it may NOT always be the same as the 'local_sdk_version', due to the existing logic in the Make build system. +// For the Android.mk to Android.bp conversion, it is recommend to run the build before and after +// the conversion, to make sure the value of the '-target-api' option to be the same. +target_api_level = local_sdk_version + +android_test { + name: "RSTest_v14", + srcs: [ + "src/**/*.java", + ":RSTest_v14-rscript{RSTest_v14.srcjar}", + ], + resource_zips: [ + ":RSTest_v14-rscript{RSTest_v14.res.zip}", + ], + sdk_version: local_sdk_version, +} + +genrule { + name: "RSTest_v14-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "RSTest_v14.srcjar", + "RSTest_v14.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -target-api " + target_api_level + + " -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location RSTest_v14.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location RSTest_v14.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/RsTest_14/Android.mk b/tests/java_api/RsTest_14/Android.mk deleted file mode 100644 index 3008e2e4..00000000 --- a/tests/java_api/RsTest_14/Android.mk +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright (C) 2008 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := RSTest_v14 -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := 14 - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/RsTest_16/Android.bp b/tests/java_api/RsTest_16/Android.bp new file mode 100644 index 00000000..50fce593 --- /dev/null +++ b/tests/java_api/RsTest_16/Android.bp @@ -0,0 +1,65 @@ +// +// Copyright (C) 2008 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +// This variable is used to match the 'LOCAL_SDK_VERSION' field in the former Android.mk file. +local_sdk_version = "16" +// This variable is used to set the value of the '-target-api' option for the 'llvm-rs-cc' command. +// Note: it may NOT always be the same as the 'local_sdk_version', due to the existing logic in the Make build system. +// For the Android.mk to Android.bp conversion, it is recommend to run the build before and after +// the conversion, to make sure the value of the '-target-api' option to be the same. +target_api_level = local_sdk_version + +android_test { + name: "RSTest_v16", + srcs: [ + "src/**/*.java", + ":RSTest_v16-rscript{RSTest_v16.srcjar}", + ], + resource_zips: [ + ":RSTest_v16-rscript{RSTest_v16.res.zip}", + ], + sdk_version: local_sdk_version, +} + +genrule { + name: "RSTest_v16-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "RSTest_v16.srcjar", + "RSTest_v16.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -target-api " + target_api_level + + " -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location RSTest_v16.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location RSTest_v16.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/RsTest_16/Android.mk b/tests/java_api/RsTest_16/Android.mk deleted file mode 100644 index 4bb59a28..00000000 --- a/tests/java_api/RsTest_16/Android.mk +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright (C) 2008 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := RSTest_v16 -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := 16 - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/SSHealingBrush/Android.bp b/tests/java_api/SSHealingBrush/Android.bp new file mode 100644 index 00000000..4e139e43 --- /dev/null +++ b/tests/java_api/SSHealingBrush/Android.bp @@ -0,0 +1,58 @@ +// +// Copyright (C) 2015 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "SSHealingBrush", + srcs: [ + "src/**/*.java", + ":SSHealingBrush-rscript{SSHealingBrush.srcjar}", + ], + resource_zips: [ + ":SSHealingBrush-rscript{SSHealingBrush.res.zip}", + ], + static_libs: ["android-support-v8-renderscript"], + sdk_version: "current", +} + +genrule { + name: "SSHealingBrush-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "SSHealingBrush.srcjar", + "SSHealingBrush.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -target-api 0" + + " -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location SSHealingBrush.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location SSHealingBrush.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/SSHealingBrush/Android.mk b/tests/java_api/SSHealingBrush/Android.mk deleted file mode 100644 index 63154e50..00000000 --- a/tests/java_api/SSHealingBrush/Android.mk +++ /dev/null @@ -1,34 +0,0 @@ -# -# Copyright (C) 2015 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_STATIC_JAVA_LIBRARIES := android-support-v8-renderscript - -LOCAL_PACKAGE_NAME := SSHealingBrush -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -LOCAL_RENDERSCRIPT_FLAGS := -target-api 0 - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/SampleTest/Android.bp b/tests/java_api/SampleTest/Android.bp new file mode 100644 index 00000000..03991773 --- /dev/null +++ b/tests/java_api/SampleTest/Android.bp @@ -0,0 +1,57 @@ +// +// Copyright (C) 2012 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "SampleRS", + srcs: [ + "src/**/*.java", + ":SampleRS-rscript{SampleRS.srcjar}", + ], + resource_zips: [ + ":SampleRS-rscript{SampleRS.res.zip}", + ], + sdk_version: "current", + min_sdk_version: "17", +} + +genrule { + name: "SampleRS-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "SampleRS.srcjar", + "SampleRS.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location SampleRS.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location SampleRS.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/SampleTest/Android.mk b/tests/java_api/SampleTest/Android.mk deleted file mode 100644 index 81521812..00000000 --- a/tests/java_api/SampleTest/Android.mk +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright (C) 2012 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := SampleRS -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current -LOCAL_MIN_SDK_VERSION := 17 - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/ScriptGroupTest/Android.bp b/tests/java_api/ScriptGroupTest/Android.bp new file mode 100644 index 00000000..2f45df12 --- /dev/null +++ b/tests/java_api/ScriptGroupTest/Android.bp @@ -0,0 +1,58 @@ +// +// Copyright (C) 2009 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "ScriptGroupTest", + libs: ["android.test.runner.stubs"], + srcs: [ + "src/**/*.java", + ":ScriptGroupTest-rscript{ScriptGroupTest.srcjar}", + ], + resource_zips: [ + ":ScriptGroupTest-rscript{ScriptGroupTest.res.zip}", + ], + sdk_version: "current", +} + +genrule { + name: "ScriptGroupTest-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "ScriptGroupTest.srcjar", + "ScriptGroupTest.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -target-api 0" + + " -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location ScriptGroupTest.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location ScriptGroupTest.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/ScriptGroupTest/Android.mk b/tests/java_api/ScriptGroupTest/Android.mk deleted file mode 100644 index 6fed8b9b..00000000 --- a/tests/java_api/ScriptGroupTest/Android.mk +++ /dev/null @@ -1,35 +0,0 @@ -# -# Copyright (C) 2009 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_JAVA_LIBRARIES := android.test.runner.stubs - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) -#LOCAL_STATIC_JAVA_LIBRARIES := android.renderscript - -LOCAL_RENDERSCRIPT_FLAGS := -target-api 0 - -LOCAL_PACKAGE_NAME := ScriptGroupTest -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -include $(BUILD_PACKAGE) diff --git a/tests/java_api/VrDemo/Android.bp b/tests/java_api/VrDemo/Android.bp new file mode 100644 index 00000000..5072f2c5 --- /dev/null +++ b/tests/java_api/VrDemo/Android.bp @@ -0,0 +1,56 @@ +// +// Copyright (C) 2015 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + // See: http://go/android-license-faq + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_test { + name: "VrDemo", + srcs: [ + "src/**/*.java", + ":VrDemo-rscript{VrDemo.srcjar}", + ], + resource_zips: [ + ":VrDemo-rscript{VrDemo.res.zip}", + ], + sdk_version: "current", +} + +genrule { + name: "VrDemo-rscript", + srcs: [ + "src/**/*.rscript", + ":rs_script_api", + ":rs_clang_headers", + ], + tools: [ + "llvm-rs-cc", + "soong_zip", + ], + out: [ + "VrDemo.srcjar", + "VrDemo.res.zip", + ], + cmd: "for f in $(locations src/**/*.rscript); do " + + " $(location llvm-rs-cc) -o $(genDir)/res/raw -p $(genDir)/src " + + " -I $$(dirname $$(echo $(locations :rs_script_api) | awk '{ print $$1 }')) " + + " -I $$(dirname $$(echo $(locations :rs_clang_headers) | awk '{ print $$1 }')) $${f}; " + + "done && " + + "$(location soong_zip) -srcjar -o $(location VrDemo.srcjar) -C $(genDir)/src -D $(genDir)/src &&" + + "$(location soong_zip) -o $(location VrDemo.res.zip) -C $(genDir)/res -D $(genDir)/res", +} diff --git a/tests/java_api/VrDemo/Android.mk b/tests/java_api/VrDemo/Android.mk deleted file mode 100644 index e33fb77b..00000000 --- a/tests/java_api/VrDemo/Android.mk +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright (C) 2015 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) -LOCAL_SDK_VERSION := current - -LOCAL_PACKAGE_NAME := VrDemo -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -include $(BUILD_PACKAGE) diff --git a/tests/lldb/.gitignore b/tests/lldb/.gitignore deleted file mode 100644 index b79973f1..00000000 --- a/tests/lldb/.gitignore +++ /dev/null @@ -1,65 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class -*.swp - -*~ - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -*.egg-info/ -.installed.cfg -*.egg - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*,cover - -# Translations -*.mo -*.pot - -# Django stuff: -*.log - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - - -results.xml -LLDBTestsuiteLog.txt diff --git a/tests/lldb/Android.mk b/tests/lldb/Android.mk deleted file mode 100644 index 95904a8b..00000000 --- a/tests/lldb/Android.mk +++ /dev/null @@ -1,4 +0,0 @@ -LOCAL_PATH:=$(call my-dir) - -include $(call all-makefiles-under,$(LOCAL_PATH)) - diff --git a/tests/lldb/README.txt b/tests/lldb/README.txt deleted file mode 100644 index 70e93bc6..00000000 --- a/tests/lldb/README.txt +++ /dev/null @@ -1,307 +0,0 @@ -- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- -- LLDB for Renderscript Test Suite -- -- 16/03/2016 -- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- - -Overview: - - The LLDB for Renderscript test suite is written in python and relies on - LLDB's python bindings. The test suite will push several test app's onto - a target device, and makes coordinated use of LLDB and ADB to run automated - debug sessions. - -Set up the test suite: - - Check out the AOSP and build it for your target. Navigate to - /frameworks/rs/test/lldb and type mm. This should successfully build the - binaries that the testsuite uses. They will be placed in - <path to out folder>/target/product/<product code name>/data/app, system/lib - and system/bin. - -Prerequisite: - - An lldb-server executable must be present on your device/emulator. - LLDB must be compiled on your host machine along with its python interface. - lldb-server and lldb should be built from the same source revisions. - -Running the test suite: - - The test suite can be run via the following command: - - > python run_tests.py [-h] - [--config path] - [--device DEVICE] - [--test path] - [--install-only] - [--no-install] - [--no-uninstall] - [--print-to-stdout] - [--verbose] - [--wimpy] - [--run-emu] - [--adb-path ADB_PATH] - [--aosp-product-path AOSP_PRODUCT_PATH] - [--blocklist BLOCKLIST [BLOCKLIST ...]] - [--device-port DEVICE_PORT] - [--emu-cmd EMU_CMD] - [--host-port HOST_PORT] - [--lldb-path LLDB_PATH] - [--lldb-server-path-device LLDB_SERVER_PATH_DEVICE] - [--lldb-server-path-host LLDB_SERVER_PATH_HOST] - [--log-file-path LOG_FILE_PATH] - [--results-file-path RESULTS_FILE_PATH] - [--timeout TIMEOUT] - - optional arguments: - -h, --help show this help message and exit - --config path, -c path - Path to a custom config file. - --device DEVICE, -d DEVICE - Specify the device id of the device to test on. - --test path, -t path Specify a specific test to run. - --install-only It only runs the pre-run stage of the test suite. It - installs the required APKs but does not execute the - tests. - --no-install, -n Stop the test suite installing apks to device. - --no-uninstall Stop the test suite uninstalling apks after - completion. - --print-to-stdout Print all logging information to standard out. - --verbose, -v Store extra info in the log. - --wimpy, -w Test only a core subset of features. - --run-emu Spawn an emulator and run the test suite on that. - Specify the emulator command line in the config file - or with -emu-cmd. - --adb-path ADB_PATH Path to android debug bridge on the host. - --aosp-product-path AOSP_PRODUCT_PATH - The path to the "out" folder of the AOSP repository. - --blocklist BLOCKLIST [BLOCKLIST ...] - Provide a test blocklist for skipping specific tests. - To specify the blocklist from the command line the - following can be used: --blocklist test1.py test2.py - ... - --device-port DEVICE_PORT - Specify the port number that lldb-server (on the - device) listens on. When lldb-server is spawned on the - device it will listen on this port. Each successive - test will increment onwards from this port. - --emu-cmd EMU_CMD The command line for the emulator (if using -run-emu). - --host-port HOST_PORT - Specify host port which lldb-server will be forwarded - to. Specify the starting host port number that lldb- - server (on the target) will be forwarded to on the - host. Each successive test will increment onwards from - this initial port. - --lldb-path LLDB_PATH - The path to lldb executable on the host. - --lldb-server-path-device LLDB_SERVER_PATH_DEVICE - Path to the lldb-server executable on the device. - --lldb-server-path-host LLDB_SERVER_PATH_HOST - Path to the lldb-server executable on host (if using - -run-emu). - --log-file-path LOG_FILE_PATH - The path to the file where the log will be written. - --results-file-path RESULTS_FILE_PATH - The path to the file where junit results.xml will be - written. - --timeout TIMEOUT Timeout period for a single command, expressed in - seconds - - An optional config file can be passed to the test suite which will provide - details of your specific environment. The user file should define a custom - class inheriting from Config in config.py. The class Config presents the - default set of options, available to be overridden. - - All options in the config file can also be specified on the command line. - - If your config and command line do not specify a path to the host lldb, - the PYTHONPATH environment variable must be set. The appropriate value to - set this to can be obtained by running the following command: - - > lldb -P - - This will print out a path to the lldb python bindings on your local machine. - -Build Requirements: - - - The following revisions are from the llvm git mirror: - - llvm : 5786b73 - clang: b6d0b32 - lldb : 98712eb - - lldb has the following dependencies: - - Python2.7.6 - swig2.0 - lldb-server - -Building LLDB python bindings: - - Linux: - - Build instructions for Linux lldb can be found on the official lldb web - page: - - http://lldb.llvm.org/build.html - - The following CMake variables should be enabled when generating: - - LLDB_ENABLE_PYTHON_SCRIPTS_SWIG_API_GENERATION = True - - As a post build step, swig will generate the python bindings for lldb. - - Windows: - - Prerequisites: - - Visual Studio 2015 - Custom x64 Python2.7.10 package: - http://p-nand-q.com/python/2015.08.07-Python2710-x64-vs2015.7z - Swig Version 3.0.5 (Added to the $PATH) - Cmake 3.4.0 (So we can generate Visual Studio 2015 solutions) - - Build Python for Windows: - - http://p-nand-q.com/python/building-python-27-with-vs2010.html - - The important thing here is that the above python distribution - contains debug versions of the libraries and is built with the same - Visual Studio version we are using so the runtimes do not conflict. - - Build LLDB on Windows: - - Select the Cmake generator "Microsoft Visual Studio 2015 Win64". - The following CMake variables should be enabled when generating: - - LLDB_DISABLE_PYTHON=False - LLDB_RELOCATABLE_PYTHON=False - PYTHON_HOME=<path to the above python release> - - Using cmake-gui is a good idea and lets you make sure that swig has - been correctly detected. - - In the CMake configure step, you should see something similar to the - following: - -- Found Python version 2.7.9 - -- LLDB Found PythonExecutable: E:/Python27/python.exe and - E:/Python27/python_d.exe - -- LLDB Found PythonLibs: E:/Python27/libs/python27.lib and - E:/Python27/libs/python27_d.lib - -- LLDB Found PythonDLL: E:/Python27/python27.dll and - E:/Python27/python27_d.dll - -- LLDB Found PythonIncludeDirs: E:/Python27/Include - LLDB version: 3.8.0 - Could NOT find Doxygen (missing: DOXYGEN_EXECUTABLE) - Found PythonInterp: - $<$<CONFIG:Debug>:E:/Python27/python_d.exe>$<$<NOT:$<CONFIG:Debug>>:E:/Python27/python.exe> - (found version "1.4") - - Build LLDB as a RelWithDebInfo build, because debug builds of lldb - produce an lldb_d.pyd file, but the __init__.py still refers to - lldb.pyd. - - The LLDB python bindings should have built in - "llvm_build\RelWithDebInfo\lib\site-packages\lldb". Point the test - suite towards "llvm_build\RelWithDebInfo\bin\lldb.exe". - When running the test suite itself, make sure to use the python - executable from the custom package. - -A typical test transcript: - - Located ADB - Located device ZX1G427S2S - Pushing all tests... - Pushed all tests - Pre run complete - Found 86 tests - Running test_allocation_dump_2_cpp.py: PASS - Running test_breakpoint_fileline_multiple_rs_files_cpp.py: PASS - Running test_read_local_cpp.py: PASS - Running test_breakpoint_kernel_multiple_rs_files_cpp.py: PASS - Running test_multiple_rs_files_cpp.py: PASS - Running test_breakpoint_kernel_all_multiple_rs_files_cpp.py: PASS - Running test_dwarf_lang_cpp.py: PASS - Running test_write_global_element_cpp.py: PASS - Running test_allocation_list_cpp.py: PASS - Running test_breakpoint_coordinate_cpp.py: PASS - Running test_rs_consts_cpp.py: PASS - Running test_allocation_file_cpp.py: PASS - Running test_allocation_dump_1_cpp.py: PASS - Running test_source_step_cpp.py: PASS - Running test_breakpoint_kernel_2_cpp.py: PASS - Running test_backtrace_cpp.py: PASS - Running test_language_subcmds_no_debug_cpp.py: PASS - Running test_breakpoint_kernel_1_cpp.py: PASS - Running test_language_subcmds_cpp.py: PASS - Running test_write_global_cpp.py: PASS - Running test_invoke_fun_cpp.py: PASS - Running test_breakpoint_fileline_cpp.py: PASS - Running test_write_local_cpp.py: PASS - Running test_breakpoint_kernel_all_cpp.py: PASS - Running test_write_local_element_cpp.py: PASS - Running test_call_api_funs_cpp.py: PASS - Running test_coordinates_cpp.py: PASS - Running test_read_global_cpp.py: PASS - Running test_language_subcmds.py: PASS - Running test_coordinates.py: PASS - Running test_language_subcmds_no_debug.py: PASS - Running test_read_local.py: PASS - Running test_call_api_funs.py: PASS - Running test_breakpoint_kernel_1.py: PASS - Running test_breakpoint_fileline.py: PASS - Running test_breakpoint_fileline_multiple_rs_files.py: PASS - Running test_rs_consts.py: PASS - Running test_invoke_fun.py: PASS - Running test_write_local_element.py: PASS - Running test_source_step.py: PASS - Running test_allocation_file.py: PASS - Running test_allocation_list.py: PASS - Running test_breakpoint_kernel_multiple_rs_files.py: PASS - Running test_allocation_dump_1.py: PASS - Running test_breakpoint_kernel_all.py: PASS - Running test_allocation_dump_2.py: PASS - Running test_allocation_dump_struct.py: PASS - Running test_read_global.py: PASS - Running test_language.py: PASS - Running test_dwarf_lang.py: PASS - Running test_breakpoint_coordinate.py: PASS - Running test_write_global.py: PASS - Running test_multiple_rs_files.py: PASS - Running test_write_global_element.py: PASS - Running test_breakpoint_kernel_all_multiple_rs_files.py: PASS - Running test_breakpoint_kernel_2.py: PASS - Running test_write_local.py: PASS - Running test_backtrace.py: PASS - Running test_call_api_funs_jni.py: PASS - Running test_invoke_fun_jni.py: PASS - Running test_allocation_dump_1_jni.py: PASS - Running test_breakpoint_fileline_multiple_rs_files_jni.py: PASS - Running test_allocation_file_jni.py: PASS - Running test_breakpoint_fileline_jni.py: PASS - Running test_source_step_jni.py: PASS - Running test_coordinates_jni.py: PASS - Running test_rs_consts_jni.py: PASS - Running test_breakpoint_kernel_all_multiple_rs_files_jni.py: PASS - Running test_multiple_rs_files_jni.py: PASS - Running test_allocation_dump_2_jni.py: PASS - Running test_allocation_list_jni.py: PASS - Running test_write_local_element_jni.py: PASS - Running test_breakpoint_kernel_all_jni.py: PASS - Running test_breakpoint_coordinate_jni.py: PASS - Running test_language_subcmds_no_debug_jni.py: PASS - Running test_read_local_jni.py: PASS - Running test_dwarf_lang_jni.py: PASS - Running test_breakpoint_kernel_2_jni.py: PASS - Running test_breakpoint_kernel_multiple_rs_files_jni.py: PASS - Running test_write_global_element_jni.py: PASS - Running test_breakpoint_kernel_1_jni.py: PASS - Running test_read_global_jni.py: PASS - Running test_language_subcmds_jni.py: PASS - Running test_write_global_jni.py: PASS - Running test_backtrace_jni.py: PASS - Running test_write_local_jni.py: PASS - Uninstalled/Deleted all tests - 86 of 86 passed - 100% rate diff --git a/tests/lldb/config.py b/tests/lldb/config.py deleted file mode 100644 index bc478d03..00000000 --- a/tests/lldb/config.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''LLDB-Renderscript test suite configuration file. - -This file contains the default test suite config which will be used in the -case a developer did not supply a custom one.''' - -import os -from tests.harness.decorators import deprecated - - -class Config(object): - '''Test suite configuration object. - - The Config class is used by the test suite to abstract the specifics of a - user's local setup. This config can be overridden by specifying a custom - config on the command line.''' - # pylint: disable=no-self-use - - @property - def adb_path(self): - '''Path to android debug bridge on the host.''' - return 'adb' - - @property - def host_port(self): - '''Specify host port which lldb-server will be forwarded to. - - Specify the starting host port number that lldb-server (on the target) - will be forwarded to on the host. Each successive test will increment - onwards from this initial port.''' - return 1234 - - @property - def device_port(self): - '''Specify the port number that lldb-server (on the device) listens on. - - When lldb-server is spawned on the device it will listen on this port. - Each successive test will increment onwards from this port.''' - return 1234 - - @property - def lldb_server_path_device(self): - '''Path to the lldb-server executable on the device.''' - return '/data/lldb-server' - - @property - def lldb_server_path_host(self): - '''Path to the lldb-server executable on host (if using -run-emu).''' - return 'lldb-server' - - @property - def aosp_product_path(self): - '''The path to the "out" folder of the AOSP repository.''' - return os.getenv('ANDROID_PRODUCT_OUT') - - @property - def log_file_path(self): - '''The path to the file where the log will be written.''' - return os.path.join(os.getcwd(), 'LLDBTestsuiteLog.txt') - - @property - def results_file_path(self): - '''The path to the file where junit results.xml will be written.''' - return os.path.join(os.getcwd(), 'results.xml') - - @property - def lldb_path(self): - '''The path to lldb executable on the host.''' - return 'lldb' - - @property - def blocklist(self): - '''Provide a test blocklist for skipping specific tests. - - To specify the blocklist from the command line the following can be - used: --blocklist test1.py test2.py ...''' - return [] - - @property - def verbose(self): - '''Flag to indicate whether to store extra output in the logs.''' - return False - - @property - def device(self): - '''Specify the device id of the device to run on. - - When multiple devices or emulators are present, a specific device to - use while testing can be indicated here.''' - return os.environ.get('ANDROID_SERIAL') - - @property - def timeout(self): - '''Timeout period for a single command, expressed in seconds''' - return 60 * 15 - - @property - @deprecated() - def emu_cmd(self): - '''The command line for the emulator (if using -run-emu).''' - return os.path.join(os.path.dirname(__file__), '..', '..', '..', '..', - 'prebuilts', 'android-emulator', 'linux-x86_64', - 'emulator') diff --git a/tests/lldb/cpp/Allocations/Allocations.cpp b/tests/lldb/cpp/Allocations/Allocations.cpp deleted file mode 100644 index 4ad546d6..00000000 --- a/tests/lldb/cpp/Allocations/Allocations.cpp +++ /dev/null @@ -1,416 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <RenderScript.h> - -#include "ScriptC_allocs.h" - -sp<RS> mRS; -sp<Allocation> mBoolAllocation; // boolean - -sp<Allocation> mCharAllocation; // char -sp<Allocation> mChar2Allocation; // char2 -sp<Allocation> mChar3Allocation; // char3 -sp<Allocation> mChar4Allocation; // char4 - -sp<Allocation> mUCharAllocation; // uchar -sp<Allocation> mUChar2Allocation; // uchar2 -sp<Allocation> mUChar3Allocation; // uchar3 -sp<Allocation> mUChar4Allocation; // uchar4 - -sp<Allocation> mShortAllocation; // short -sp<Allocation> mShort2Allocation; // short2 -sp<Allocation> mShort3Allocation; // short3 -sp<Allocation> mShort4Allocation; // short4 - -sp<Allocation> mUShortAllocation; // ushort -sp<Allocation> mUShort2Allocation; // ushort2 -sp<Allocation> mUShort3Allocation; // ushort3 -sp<Allocation> mUShort4Allocation; // ushort4 - -sp<Allocation> mIntAllocation; // int -sp<Allocation> mInt2Allocation; // int2 -sp<Allocation> mInt3Allocation; // int3 -sp<Allocation> mInt4Allocation; // int4 - -sp<Allocation> mUIntAllocation; // uint -sp<Allocation> mUInt2Allocation; // uint2 -sp<Allocation> mUInt3Allocation; // uint3 -sp<Allocation> mUInt4Allocation; // uint4 - -sp<Allocation> mLongAllocation; // long -sp<Allocation> mLong2Allocation; // long2 -sp<Allocation> mLong3Allocation; // long3 -sp<Allocation> mLong4Allocation; // long4 - -sp<Allocation> mULongAllocation; // ulong -sp<Allocation> mULong2Allocation; // ulong2 -sp<Allocation> mULong3Allocation; // ulong3 -sp<Allocation> mULong4Allocation; // ulong4 - -sp<Allocation> mHalfAllocation; // half -sp<Allocation> mHalf2Allocation; // half2 -sp<Allocation> mHalf3Allocation; // half3 -sp<Allocation> mHalf4Allocation; // half4 - -sp<Allocation> mFloatAllocation; // float -sp<Allocation> mFloat2Allocation; // float2 -sp<Allocation> mFloat3Allocation; // float3 -sp<Allocation> mFloat4Allocation; // float4 - -sp<Allocation> mDoubleAllocation; // double -sp<Allocation> mDouble2Allocation; // double2 -sp<Allocation> mDouble3Allocation; // double3 -sp<Allocation> mDouble4Allocation; // double4 - -const int mAllocSize = 24; // Needs to be < CHAR_MAX and divisible by 4. -const int mBitmapSize = 64; - -void createSignedAllocations() { - Type::Builder typeI8Builder(mRS, Element::I8(mRS)); - typeI8Builder.setX(1); // One element here to test 16 byte memory alignment - typeI8Builder.setY(3); - typeI8Builder.setZ(8); - - mCharAllocation = Allocation::createTyped(mRS, typeI8Builder.create()); - mChar2Allocation = Allocation::createSized(mRS, Element::I8_2(mRS), mAllocSize / 2); - mChar3Allocation = Allocation::createSized(mRS, Element::I8_3(mRS), mAllocSize / 4); - mChar4Allocation = Allocation::createSized(mRS, Element::I8_4(mRS), mAllocSize / 4); - - Type::Builder typeI16_2Builder(mRS, Element::I16_2(mRS)); - typeI16_2Builder.setX(6); - typeI16_2Builder.setY(1); - typeI16_2Builder.setZ(2); - - mShortAllocation = Allocation::createSized(mRS, Element::I16(mRS), mAllocSize); - mShort2Allocation = Allocation::createTyped(mRS, typeI16_2Builder.create()); - mShort3Allocation = Allocation::createSized(mRS, Element::I16_3(mRS), mAllocSize / 4); - mShort4Allocation = Allocation::createSized(mRS, Element::I16_4(mRS), mAllocSize / 4); - - Type::Builder typeI32_3Builder(mRS, Element::I32_3(mRS)); - typeI32_3Builder.setX(3); - typeI32_3Builder.setY(2); - - mIntAllocation = Allocation::createSized(mRS, Element::I32(mRS), mAllocSize); - mInt2Allocation = Allocation::createSized(mRS, Element::I32_2(mRS), mAllocSize / 2); - mInt3Allocation = Allocation::createTyped(mRS, typeI32_3Builder.create()); - mInt4Allocation = Allocation::createSized(mRS, Element::I32_4(mRS), mAllocSize / 4); - - Type::Builder typeI64_4Builder(mRS, Element::I64_4(mRS)); - typeI64_4Builder.setX(1); - typeI64_4Builder.setY(6); - - mLongAllocation = Allocation::createSized(mRS, Element::I64(mRS), mAllocSize); - mLong2Allocation = Allocation::createSized(mRS, Element::I64_2(mRS), mAllocSize / 2); - mLong3Allocation = Allocation::createSized(mRS, Element::I64_3(mRS), mAllocSize / 4); - mLong4Allocation = Allocation::createTyped(mRS, typeI64_4Builder.create()); - - mBoolAllocation = Allocation::createSized(mRS, Element::BOOLEAN(mRS), mAllocSize); -} - -void initSignedAllocations() { - char *buffer_char = new char[mAllocSize]; - short *buffer_short = new short[mAllocSize]; - int *buffer_int = new int[mAllocSize]; - int64_t *buffer_long = new int64_t[mAllocSize]; - char *buffer_bool = new char[mAllocSize]; - - for(int i = 0; i < mAllocSize; ++i) { - buffer_char[i] = (char) i; - buffer_short[i] = (short) i; - buffer_int[i] = (int) i; - buffer_long[i] = (int64_t) i; - buffer_bool[i] = (char) (0x01 & i); - } - - mCharAllocation->copy3DRangeFrom(0, 0, 0, 1, 3, 8, buffer_char); - mChar2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_char); - mChar3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_char); - mChar4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_char); - - delete [] buffer_char; - - mShortAllocation->copy1DRangeFrom(0, mAllocSize, buffer_short); - mShort2Allocation->copy3DRangeFrom(0, 0, 0, 6, 1, 2, buffer_short); - mShort3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_short); - mShort4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_short); - - delete [] buffer_short; - - mIntAllocation->copy1DRangeFrom(0, mAllocSize, buffer_int); - mInt2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_int); - mInt3Allocation->copy2DRangeFrom(0, 0, 3, 2, buffer_int); - mInt4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_int); - - delete [] buffer_int; - - mLongAllocation->copy1DRangeFrom(0, mAllocSize, buffer_long); - mLong2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_long); - mLong3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_long); - mLong4Allocation->copy2DRangeFrom(0, 0, 1, 6, buffer_long); - - delete [] buffer_long; - - mBoolAllocation->copy1DRangeFrom(0, mAllocSize, buffer_bool); - - delete [] buffer_bool; -} - -void createUnsignedAllocations() { - Type::Builder typeU8_2Builder(mRS, Element::U8_2(mRS)); - typeU8_2Builder.setX(2); - typeU8_2Builder.setY(6); - - mUCharAllocation = Allocation::createSized(mRS, Element::U8(mRS), mAllocSize); - mUChar2Allocation = Allocation::createTyped(mRS, typeU8_2Builder.create()); - mUChar3Allocation = Allocation::createSized(mRS, Element::U8_3(mRS), mAllocSize / 4); - mUChar4Allocation = Allocation::createSized(mRS, Element::U8_4(mRS), mAllocSize / 4); - - Type::Builder typeU16_3Builder(mRS, Element::U16_3(mRS)); - typeU16_3Builder.setX(1); - typeU16_3Builder.setY(6); - - mUShortAllocation = Allocation::createSized(mRS, Element::U16(mRS), mAllocSize); - mUShort2Allocation = Allocation::createSized(mRS, Element::U16_2(mRS), mAllocSize / 2); - mUShort3Allocation = Allocation::createTyped(mRS, typeU16_3Builder.create()); - mUShort4Allocation = Allocation::createSized(mRS, Element::U16_4(mRS), mAllocSize / 4); - - Type::Builder typeU32_4Builder(mRS, Element::U32_4(mRS)); - typeU32_4Builder.setX(1); - typeU32_4Builder.setY(1); - typeU32_4Builder.setZ(6); - - mUIntAllocation = Allocation::createSized(mRS, Element::U32(mRS), mAllocSize); - mUInt2Allocation = Allocation::createSized(mRS, Element::U32_2(mRS), mAllocSize / 2); - mUInt3Allocation = Allocation::createSized(mRS, Element::U32_3(mRS), mAllocSize / 4); - mUInt4Allocation = Allocation::createTyped(mRS, typeU32_4Builder.create()); - - Type::Builder typeU64Builder(mRS, Element::U64(mRS)); - typeU64Builder.setX(4); - typeU64Builder.setY(3); - typeU64Builder.setZ(2); - - mULongAllocation = Allocation::createTyped(mRS, typeU64Builder.create()); - mULong2Allocation = Allocation::createSized(mRS, Element::U64_2(mRS), mAllocSize / 2); - mULong3Allocation = Allocation::createSized(mRS, Element::U64_3(mRS), mAllocSize / 4); - mULong4Allocation = Allocation::createSized(mRS, Element::U64_4(mRS), mAllocSize / 4); -} - -void initUnsignedAllocations() { - char *buffer_char = new char[mAllocSize]; - short *buffer_short = new short[mAllocSize]; - int *buffer_int = new int[mAllocSize]; - uint64_t *buffer_long = new uint64_t[mAllocSize]; - - for(int i = 0; i < mAllocSize; ++i) { - buffer_char[i] = (char) i; - buffer_short[i] = (short) i; - buffer_int[i] = (int) i; - buffer_long[i] = (uint64_t) i; - } - - mUCharAllocation->copy1DRangeFrom(0, mAllocSize, buffer_char); - mUChar2Allocation->copy2DRangeFrom(0, 0, 2, 6, buffer_char); - mUChar3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_char); - mUChar4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_char); - - delete [] buffer_char; - - mUShortAllocation->copy1DRangeFrom(0, mAllocSize, buffer_short); - mUShort2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_short); - mUShort3Allocation->copy2DRangeFrom(0, 0, 1, 6, buffer_short); - mUShort4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_short); - - delete [] buffer_short; - - mUIntAllocation->copy1DRangeFrom(0, mAllocSize, buffer_int); - mUInt2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_int); - mUInt3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_int); - mUInt4Allocation->copy3DRangeFrom(0, 0, 0, 1, 1, 6, buffer_int); - - delete [] buffer_int; - - mULongAllocation->copy3DRangeFrom(0, 0, 0, 4, 3, 2, buffer_long); - mULong2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_long); - mULong3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_long); - mULong4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_long); - - delete [] buffer_long; -} - -void createFloatAllocations() { - Type::Builder typeF16_3Builder(mRS, Element::F16_3(mRS)); - typeF16_3Builder.setX(1); - typeF16_3Builder.setY(6); - - mHalfAllocation = Allocation::createSized(mRS, Element::F16(mRS), mAllocSize); - mHalf2Allocation = Allocation::createSized(mRS, Element::F16_2(mRS), mAllocSize / 2); - mHalf3Allocation = Allocation::createTyped(mRS, typeF16_3Builder.create()); - mHalf4Allocation = Allocation::createSized(mRS, Element::F16_4(mRS), mAllocSize / 4); - - Type::Builder typeF32_4Builder(mRS, Element::F32_4(mRS)); - typeF32_4Builder.setX(3); - typeF32_4Builder.setY(2); - - mFloatAllocation = Allocation::createSized(mRS, Element::F32(mRS), mAllocSize); - mFloat2Allocation = Allocation::createSized(mRS, Element::F32_2(mRS), mAllocSize / 2); - mFloat3Allocation = Allocation::createSized(mRS, Element::F32_3(mRS), mAllocSize / 4); - mFloat4Allocation = Allocation::createTyped(mRS, typeF32_4Builder.create()); - - Type::Builder typeF64_2Builder(mRS, Element::F64_2(mRS)); - typeF64_2Builder.setX(4); - typeF64_2Builder.setY(1); - typeF64_2Builder.setZ(3); - - mDoubleAllocation = Allocation::createSized(mRS, Element::F64(mRS), mAllocSize); - mDouble2Allocation = Allocation::createTyped(mRS, typeF64_2Builder.create()); - - Type::Builder typeF64_3Builder(mRS, Element::F64_3(mRS)); - typeF64_3Builder.setX(1); - typeF64_3Builder.setY(2); - typeF64_3Builder.setZ(3); - - Type::Builder typeF64_4Builder(mRS, Element::F64_4(mRS)); - typeF64_4Builder.setX(1); - typeF64_4Builder.setY(2); - typeF64_4Builder.setZ(3); - - mDouble3Allocation = Allocation::createTyped(mRS, typeF64_3Builder.create()); - mDouble4Allocation = Allocation::createTyped(mRS, typeF64_4Builder.create()); -} - -void initFloatAllocations() { - __fp16 *buffer_half = new __fp16[mAllocSize]; - float *buffer_float = new float[mAllocSize]; - double *buffer_double = new double[mAllocSize]; - - for(int i = 0; i < mAllocSize; ++i) { - buffer_half[i] = (__fp16) 1 / i; - buffer_float[i] = (float) 1 / i; - buffer_double[i] = (double) 1 / i; - } - - mHalfAllocation->copy1DRangeFrom(0, mAllocSize, buffer_half); - mHalf2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_half); - mHalf3Allocation->copy2DRangeFrom(0, 0, 1, 6, buffer_half); - mHalf4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_half); - - delete [] buffer_half; - - mFloatAllocation->copy1DRangeFrom(0, mAllocSize, buffer_float); - mFloat2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_float); - mFloat3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_float); - mFloat4Allocation->copy2DRangeFrom(0, 0, 3, 2, buffer_float); - - delete [] buffer_float; - - mDoubleAllocation->copy1DRangeFrom(0, mAllocSize, buffer_double); - mDouble2Allocation->copy3DRangeFrom(0, 0, 0, 4, 1, 3, buffer_double); - mDouble3Allocation->copy3DRangeFrom(0, 0, 0, 1, 2, 3, buffer_double); - mDouble4Allocation->copy3DRangeFrom(0, 0, 0, 1, 2, 3, buffer_double); - - delete [] buffer_double; -} - -int main() -{ - mRS = new RS(); - - mRS->init("/data/rscache", RS_INIT_LOW_LATENCY | RS_INIT_WAIT_FOR_ATTACH); - - sp<ScriptC_allocs> mScript = new ScriptC_allocs(mRS); - - Type::Builder typeRGBA_888Builder(mRS, Element::RGBA_8888(mRS)); - typeRGBA_888Builder.setX(mBitmapSize); - typeRGBA_888Builder.setY(mBitmapSize); - - sp<Allocation> mInAllocation = Allocation::createTyped(mRS, typeRGBA_888Builder.create()); - - const int image_area = mBitmapSize*mBitmapSize; - const int image_size = image_area*sizeof(int); - - char *zero_buffer = new char[image_size]; - memset(zero_buffer, 0, image_size); - mInAllocation->copy1DRangeFrom(0, image_area, zero_buffer); - delete [] zero_buffer; - - sp<Allocation> mOutAllocation = Allocation::createTyped(mRS, typeRGBA_888Builder.create()); - createSignedAllocations(); - initSignedAllocations(); - - mRS->finish(); - mScript->forEach_swizzle_kernel(mInAllocation, mOutAllocation); - mRS->finish(); - - mCharAllocation.clear(); - mChar2Allocation.clear(); - mChar3Allocation.clear(); - mChar4Allocation.clear(); - - mShort2Allocation.clear(); - mShort3Allocation.clear(); - mShort4Allocation.clear(); - - mIntAllocation.clear(); - mInt2Allocation.clear(); - mInt3Allocation.clear(); - mInt4Allocation.clear(); - - mLongAllocation.clear(); - mLong2Allocation.clear(); - mLong3Allocation.clear(); - mLong4Allocation.clear(); - - mBoolAllocation.clear(); - - createUnsignedAllocations(); - initUnsignedAllocations(); - - mInAllocation = mUShortAllocation; // Host side assignment - - mRS->finish(); - mScript->forEach_square_kernel(mInAllocation, mUIntAllocation); - mRS->finish(); - - mUCharAllocation.clear(); - mUChar2Allocation.clear(); - mUChar3Allocation.clear(); - mUChar4Allocation.clear(); - - mUShortAllocation.clear(); - mUShort2Allocation.clear(); - mUShort3Allocation.clear(); - mUShort4Allocation.clear(); - - mUInt2Allocation.clear(); - mUInt3Allocation.clear(); - mUInt4Allocation.clear(); - - mULongAllocation.clear(); - mULong2Allocation.clear(); - mULong3Allocation.clear(); - mULong4Allocation.clear(); - - createFloatAllocations(); - initFloatAllocations(); - - mRS->finish(); - mScript->forEach_add_half_kernel(mDouble4Allocation, mDouble3Allocation); - mRS->finish(); - - return 0; -} - diff --git a/tests/lldb/cpp/Allocations/Android.mk b/tests/lldb/cpp/Allocations/Android.mk deleted file mode 100644 index 9f72923b..00000000 --- a/tests/lldb/cpp/Allocations/Android.mk +++ /dev/null @@ -1,15 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := CppAllocations -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := \ - Allocations.cpp \ - allocs.rscript - -LOCAL_STATIC_LIBRARIES := libcompiler_rt - -include frameworks/rs/tests/lldb/cpp/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/lldb/cpp/Allocations/allocs.rscript b/tests/lldb/cpp/Allocations/allocs.rscript deleted file mode 100644 index eff79777..00000000 --- a/tests/lldb/cpp/Allocations/allocs.rscript +++ /dev/null @@ -1,47 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.cppallocations) - -// Kernel performs basic vector swizzle -uchar4 __attribute__((kernel)) swizzle_kernel(uchar4 in) -{ - return in.wzyx; -} - -// Kernel squares every element in allocation -uint __attribute__((kernel)) square_kernel(ushort in) -{ - uint result = (uint)in * (uint)in; - return result; -} - -// Helper function adding 1/2 to passed in double -static double half_helper(double in) -{ - return (in + 0.5); -} - -// Kernel returns first 3 elements of a double4 plus 1/2 -double3 __attribute__((kernel)) add_half_kernel(double4 in) -{ - double3 result; - result.x = half_helper(in.x); - result.y = half_helper(in.y); - result.z = half_helper(in.z); - return result; -} diff --git a/tests/lldb/cpp/Android.mk b/tests/lldb/cpp/Android.mk deleted file mode 100644 index 83384322..00000000 --- a/tests/lldb/cpp/Android.mk +++ /dev/null @@ -1,2 +0,0 @@ -include $(call all-subdir-makefiles) - diff --git a/tests/lldb/cpp/BranchingFunCalls/Android.mk b/tests/lldb/cpp/BranchingFunCalls/Android.mk deleted file mode 100644 index c06e6af3..00000000 --- a/tests/lldb/cpp/BranchingFunCalls/Android.mk +++ /dev/null @@ -1,13 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := CppBranchingFunCalls -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := \ - BranchingFunCalls.cpp \ - scalars.rscript - -include frameworks/rs/tests/lldb/cpp/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/lldb/cpp/BranchingFunCalls/BranchingFunCalls.cpp b/tests/lldb/cpp/BranchingFunCalls/BranchingFunCalls.cpp deleted file mode 100644 index f0eee09c..00000000 --- a/tests/lldb/cpp/BranchingFunCalls/BranchingFunCalls.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <RenderScript.h> - -#include "ScriptC_scalars.h" - -int main() -{ - static const int size = 64; - sp<RS> rs = new RS(); - - rs->init("/data/rscache", RS_INIT_LOW_LATENCY | RS_INIT_WAIT_FOR_ATTACH); - - auto e = Element::I32(rs); - Type::Builder tb(rs, e); - tb.setX(size); - tb.setY(size); - auto t = tb.create(); - - auto a = Allocation::createTyped(rs, t); - auto b = Allocation::createTyped(rs, t); - - int * input = new int[size*size]; - for(int i = 0; i < size*size; ++i) { - input[i] = i - (size*size / 2); - } - a->copy2DRangeFrom(0, 0, size, size, input); - delete [] input; - - // Script is executed once, then the data is copied back when finished - sp<ScriptC_scalars> s = new ScriptC_scalars(rs); - s->invoke_addToGlobal(234); - s->forEach_simple_kernel(a, b); - rs->finish(); - int32_t * output = new int32_t[size*size]; - b->copy2DRangeTo(0, 0, size, size, output); - delete [] output; - - return 0; -} - diff --git a/tests/lldb/cpp/BranchingFunCalls/scalars.rscript b/tests/lldb/cpp/BranchingFunCalls/scalars.rscript deleted file mode 100644 index 279694d7..00000000 --- a/tests/lldb/cpp/BranchingFunCalls/scalars.rscript +++ /dev/null @@ -1,76 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.cppbranchingfuncalls) - -static bool is_neg(int a) -{ - if(a < 0) - return true; - else - return false; -} - -static bool is_pos(int a) -{ - if(a > 0) - return true; - else - return false; -} - -static void set_i(int * a, int b) -{ - int tmp = b; - *a = tmp; -} - -static void modify_f(float * f) -{ - *f *= 0.5f; -} - -static void modify_i(int * i) -{ - int j = *i; - int cutoff = 2 << 6; - if(j > cutoff) - j = cutoff; - if(is_neg(j)) - set_i(i, 0); - else if(is_pos(j)) - set_i(i, j); - else - set_i(i, cutoff); -} - -int __attribute__((kernel)) simple_kernel(int in) -{ - int i = in; - float f = (float) i; - modify_f(&f); - modify_i(&i); - int ret = (int) f; - return in * ret; -} - -int glob = 123; - -void addToGlobal(int arg) -{ - glob += arg; -} diff --git a/tests/lldb/cpp/InfiniteLoop/Android.mk b/tests/lldb/cpp/InfiniteLoop/Android.mk deleted file mode 100644 index 86a4ec55..00000000 --- a/tests/lldb/cpp/InfiniteLoop/Android.mk +++ /dev/null @@ -1,13 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := CppInfiniteLoop -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := \ - InfiniteLoop.cpp \ - infiniteloop.rscript - -include frameworks/rs/tests/lldb/cpp/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/lldb/cpp/InfiniteLoop/InfiniteLoop.cpp b/tests/lldb/cpp/InfiniteLoop/InfiniteLoop.cpp deleted file mode 100644 index 2b55d4e5..00000000 --- a/tests/lldb/cpp/InfiniteLoop/InfiniteLoop.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <thread> -#include <chrono> - -#include <RenderScript.h> - -#include "ScriptC_infiniteloop.h" - -int main() -{ - static const int size = 64; - sp<RS> rs = new RS(); - - rs->init("/data/rscache", RS_INIT_LOW_LATENCY); - - auto e = Element::RGBA_8888(rs); - Type::Builder tb(rs, e); - tb.setX(size); - tb.setY(size); - auto t = tb.create(); - - auto a = Allocation::createTyped(rs, t); - auto b = Allocation::createTyped(rs, t); - - sp<ScriptC_infiniteloop> s = new ScriptC_infiniteloop(rs); - - // Test is designed to loop forever, waits for two seconds - // between each invocation of the kernel - bool forever = true; - while(forever) - { - s->forEach_simple_kernel(a, b); - std::this_thread::sleep_for(std::chrono::seconds(2)); - } - - uint32_t * output = new uint32_t[size*size]; - b->copy2DRangeTo(0, 0, size, size, output); - delete [] output; - - return 0; -} - diff --git a/tests/lldb/cpp/InfiniteLoop/infiniteloop.rscript b/tests/lldb/cpp/InfiniteLoop/infiniteloop.rscript deleted file mode 100644 index 7eff95c7..00000000 --- a/tests/lldb/cpp/InfiniteLoop/infiniteloop.rscript +++ /dev/null @@ -1,55 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.cppinfiniteloop) - - -float4 gColor = {0.299f, 0.587f, 0.114f, 1.f}; - -/* RenderScript kernel that just sets the colour of the screen and does some - * simple operations so it is not completely empty - * (and can therefore be debugged). - */ -uchar4 __attribute__((kernel)) simple_kernel(uchar4 in) -{ - float4 out = rsUnpackColor8888(in); - - out.r = gColor.r; - out.g = gColor.g; - out.b = gColor.b; - out.a = gColor.a; - - uchar4 result = rsPackColorTo8888(out); - return result; -} - diff --git a/tests/lldb/cpp/KernelVariables/Android.mk b/tests/lldb/cpp/KernelVariables/Android.mk deleted file mode 100644 index 7a68c937..00000000 --- a/tests/lldb/cpp/KernelVariables/Android.mk +++ /dev/null @@ -1,13 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := CppKernelVariables -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := \ - KernelVariables.cpp \ - simple.rscript - -include frameworks/rs/tests/lldb/cpp/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/lldb/cpp/KernelVariables/KernelVariables.cpp b/tests/lldb/cpp/KernelVariables/KernelVariables.cpp deleted file mode 100644 index e289005b..00000000 --- a/tests/lldb/cpp/KernelVariables/KernelVariables.cpp +++ /dev/null @@ -1,87 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <RenderScript.h> - -#include "ScriptC_simple.h" - -int main() -{ - static const int size = 64; - sp<RS> rs = new RS(); - - rs->init("/data/rscache", RS_INIT_LOW_LATENCY | RS_INIT_WAIT_FOR_ATTACH); - - auto e = Element::RGBA_8888(rs); - Type::Builder tb(rs, e); - tb.setX(size); - tb.setY(size); - auto t = tb.create(); - - auto a = Allocation::createTyped(rs, t); - auto b = Allocation::createTyped(rs, t); - - sp<ScriptC_simple> s = new ScriptC_simple(rs); - - static const int buffer_int[] = {1, 2, 3, 4}; - sp<Allocation> int_allocation = Allocation::createSized(rs, Element::I32(rs), 4); - int_allocation->copy1DRangeFrom(0, 4, buffer_int); - s->set_allocation_1D_global(int_allocation); - - static const int buffer_int2[] = {5, 6, 7, 8}; - - Type::Builder typeI32Builder2D(rs, Element::I32(rs)); - typeI32Builder2D.setX(2); - typeI32Builder2D.setY(2); - - sp<Allocation> int_allocation2 = Allocation::createTyped(rs, typeI32Builder2D.create()); - int_allocation2->copy2DRangeFrom(0, 0, 2, 2, buffer_int2); - s->set_allocation_1D_global2(int_allocation2); - - s->set_allocation_2D_global(a); - s->set_allocation_2D_global2(b); - - static const int buffer_int3[] = {9, 10, 11, 12, 13, 14, 15, 16}; - - Type::Builder typeI32Builder3D(rs, Element::I32(rs)); - typeI32Builder3D.setX(2); - typeI32Builder3D.setY(2); - typeI32Builder3D.setZ(2); - - sp<Allocation> int_allocation3 = Allocation::createTyped(rs, typeI32Builder3D.create()); - int_allocation3->copy3DRangeFrom(0, 0, 0, 2, 2, 2, buffer_int3); - s->set_allocation_3D_global(int_allocation3); - - Type::Builder yuvTypeBuilder(rs, Element::YUV(rs)); - yuvTypeBuilder.setX(4); - yuvTypeBuilder.setY(4); - yuvTypeBuilder.setYuvFormat(RS_YUV_YV12); - - sp<Allocation> yuv_allocation = Allocation::createTyped(rs, yuvTypeBuilder.create()); - s->set_allocation_YUV_2D_global(yuv_allocation); - - s->set_sampler_global(Sampler::CLAMP_LINEAR(rs)); - - // Script is executed once, then the data is copied back when finished - s->forEach_kernel(a, b); - rs->finish(); - uint32_t * output = new uint32_t[size*size]; - b->copy2DRangeTo(0, 0, size, size, output); - delete [] output; - - return 0; -} - diff --git a/tests/lldb/cpp/KernelVariables/simple.rscript b/tests/lldb/cpp/KernelVariables/simple.rscript deleted file mode 100644 index bad675eb..00000000 --- a/tests/lldb/cpp/KernelVariables/simple.rscript +++ /dev/null @@ -1,197 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.cppkernelvariables) - -char char_global = 12; -uchar uchar_global = 234; -short short_global = -321; -ushort ushort_global = 432; -int int_global = 1234; -uint uint_global = 2345; -float float_global = 4.5f; -long long_global = -77777; -ulong ulong_global = 8888; -double double_global = -456.5f; - -char2 char2_global = {11, -22}; -uchar2 uchar2_global = {33, 44}; -short2 short2_global = {-555, 666}; -ushort2 ushort2_global = {777, 888}; -int2 int2_global = {999, -1111}; -uint2 uint2_global = {2222, 3333}; -float2 float2_global = {4.5f, -5.0f}; -long2 long2_global = {-4444, 5555}; -ulong2 ulong2_global = {6666, 7777}; -double2 double2_global = {88.5f, -99.0f}; - -char3 char3_global = {11, -22, -33}; -uchar3 uchar3_global = {33, 44, 55}; -short3 short3_global = {-555, 666, 777}; -ushort3 ushort3_global = {777, 888, 999}; -int3 int3_global = {999, -1111, 2222}; -uint3 uint3_global = {2222, 3333, 4444}; -float3 float3_global = {4.5f, -5.0f, -6.5f}; -long3 long3_global = {-4444, 5555, 6666}; -ulong3 ulong3_global = {6666, 7777, 8888}; -double3 double3_global = {88.5f, -99.0f, 111.5f}; - -char4 char4_global = {55, 11, -22, -33}; -uchar4 uchar4_global = {222, 33, 44, 55}; -short4 short4_global = {-444, -555, 666, 777}; -ushort4 ushort4_global = {666, 777, 888, 999}; -int4 int4_global = {888, 999, -1111, 2222}; -uint4 uint4_global = {1111, 2222, 3333, 4444}; -float4 float4_global = {3.0f, 4.5f, -5.0f, -6.5f}; -long4 long4_global = {-3333, -4444, 5555, 6666}; -ulong4 ulong4_global = {5555, 6666, 7777, 8888}; -double4 double4_global = {-77.0f, 88.5f, -99.0f, 111.5f}; - -rs_matrix2x2 matrix2x2_global; -rs_matrix3x3 matrix3x3_global; -rs_matrix4x4 matrix4x4_global; - -rs_quaternion quaternion_global; - -rs_allocation allocation_1D_global; -rs_allocation allocation_1D_global2; -rs_allocation allocation_2D_global; -rs_allocation allocation_2D_global2; -rs_allocation allocation_3D_global; -rs_allocation allocation_YUV_2D_global; - -rs_allocation_cubemap_face cubemap_face_global; -rs_sampler sampler_global; - -uchar4 __attribute__((kernel)) kernel(uchar4 in) -{ - char char_local = 'a'; - uchar uchar_local = 'b'; - short short_local = -321; - ushort ushort_local = 432; - int int_local = 1234; - uint uint_local = 2345; - float float_local = 4.5f; - long long_local = -77777; - ulong ulong_local = 8888; - double double_local = -456.5f; - - char2 char2_local = {-11, -22}; - uchar2 uchar2_local = {33, 44}; - short2 short2_local = {-555, 666}; - ushort2 ushort2_local = {777, 888}; - int2 int2_local = {999, -1111}; - uint2 uint2_local = {2222, 3333}; - float2 float2_local = {4.5f, -5.0f}; - long2 long2_local = {-4444, 5555}; - ulong2 ulong2_local = {6666, 7777}; - double2 double2_local = {88.5f, -99.0f}; - - char3 char3_local = {11, -22, -33}; - uchar3 uchar3_local = {33, 44, 55}; - short3 short3_local = {-555, 666, 777}; - ushort3 ushort3_local = {777, 888, 999}; - int3 int3_local = {999, -1111, 2222}; - uint3 uint3_local = {2222, 3333, 4444}; - float3 float3_local = {4.5f, -5.0f, -6.5f}; - long3 long3_local = {-4444, 5555, 6666}; - ulong3 ulong3_local = {6666, 7777, 8888}; - double3 double3_local = {88.5f, -99.0f, 111.5f}; - - char4 char4_local = {55, 11, -22, -33}; - uchar4 uchar4_local = {22, 33, 44, 55}; - short4 short4_local = {-444, -555, 666, 777}; - ushort4 ushort4_local = {666, 777, 888, 999}; - int4 int4_local = {888, 999, -1111, 2222}; - uint4 uint4_local = {1111, 2222, 3333, 4444}; - float4 float4_local = {3.0f, 4.5f, -5.0f, -6.5f}; - long4 long4_local = {-3333, -4444, 5555, 6666}; - ulong4 ulong4_local = {5555, 6666, 7777, 8888}; - double4 double4_local = {-77.0f, 88.5f, -99.0f, 111.5f}; - - rs_matrix2x2 matrix2x2_local = {{1., 2.5, - 3., 4.5}}; - rs_matrix3x3 matrix3x3_local = {{5., 6.5, 7., - 8.5, 9., 1.5, - 2., 3.5, 4.}}; - rs_matrix4x4 matrix4x4_local = {{5.5, 6., 7.5, 8., - 9., 1.5, 2., 3.5, - 4.5, 5.5, 6.5, 7., - 8., 9.5, 1.5, 2.5}}; - - matrix2x2_global = matrix2x2_local; - matrix3x3_global = matrix3x3_local; - matrix4x4_global = matrix4x4_local; - - rsQuaternionSet(&quaternion_global, 3.0, 4.5, 5.5, 6.0); - - rs_quaternion quaternion_local; - rsQuaternionSet(&quaternion_local, 7.5, 8.0, 9.0, 0.5); - - char char_combined = char_local + (char)uchar_local + char2_local.x + - (char)uchar2_local.x + char3_local.x - (char)uchar3_local.x + - char4_local.x + (char)uchar4_local.x; - - short short_combined = short_local + (short)ushort_local + short2_local.x + - (short)ushort2_local.x + short3_local.x + (short)ushort3_local.x + - short4_local.x + (short)ushort4_local.x; - - int int_combined = int_local + (int)uint_local + int2_local.x + - (int)uint2_local.x + int3_local.x + (int)uint3_local.x + int4_local.x + - (int)uint4_local.x; - - float float_combined = float_local + float2_local.x + float3_local.x + - float4_local.x; - - long long_combined = long_local + (long)ulong_local + long2_local.x + - (long)ulong2_local.x + long3_local.x + (long)ulong3_local.x + - long4_local.x + (long)ulong4_local.x; - - double double_combined = double_local + double2_local.x + double3_local.x + - double4_local.x; - - char_global = char_combined; - short_global = short_combined; - int_global = int_combined; - float_global = float_combined; - long_global = long_combined; - double_global = double_combined; - - uchar4 result = {1,2,3,4}; - return result; -} - -float use_constants_global; - -void setup(void) -{ - use_constants_global = - M_1_PI + - M_2_PI + - M_2_PIl + - M_2_SQRTPI + - M_E + - M_LN10 + - M_LN2 + - M_LOG10E + - M_LOG2E + - M_PI + - M_PI_2 + - M_PI_4 + - M_SQRT1_2 + - M_SQRT2; -} diff --git a/tests/lldb/cpp/MultipleRSFiles/Android.mk b/tests/lldb/cpp/MultipleRSFiles/Android.mk deleted file mode 100644 index d4b5b35c..00000000 --- a/tests/lldb/cpp/MultipleRSFiles/Android.mk +++ /dev/null @@ -1,14 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := CppMultipleRSFiles -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := \ - MultipleRSFiles.cpp \ - first.rscript \ - second.rscript - -include frameworks/rs/tests/lldb/cpp/common.mk -include $(BUILD_EXECUTABLE) diff --git a/tests/lldb/cpp/MultipleRSFiles/MultipleRSFiles.cpp b/tests/lldb/cpp/MultipleRSFiles/MultipleRSFiles.cpp deleted file mode 100644 index 9d0d4e7b..00000000 --- a/tests/lldb/cpp/MultipleRSFiles/MultipleRSFiles.cpp +++ /dev/null @@ -1,51 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <RenderScript.h> - -#include "ScriptC_first.h" -#include "ScriptC_second.h" - -int main() -{ - static const int size = 64; - sp<RS> rs = new RS(); - - rs->init("/data/rscache", RS_INIT_LOW_LATENCY | RS_INIT_WAIT_FOR_ATTACH); - - auto e = Element::RGBA_8888(rs); - Type::Builder tb(rs, e); - tb.setX(size); - tb.setY(size); - auto t = tb.create(); - - auto a = Allocation::createTyped(rs, t); - auto b = Allocation::createTyped(rs, t); - - // Script is executed once, then the data is copied back when finished - sp<ScriptC_first> s1 = new ScriptC_first(rs); - sp<ScriptC_second> s2 = new ScriptC_second(rs); - - s1->forEach_first_kernel(a, b); - uint32_t * output = new uint32_t[size*size]; - b->copy2DRangeTo(0, 0, size, size, output); - delete [] output; - - s2->forEach_second_kernel(a, b); - - rs->finish(); - return 0; -} diff --git a/tests/lldb/cpp/MultipleRSFiles/first.rscript b/tests/lldb/cpp/MultipleRSFiles/first.rscript deleted file mode 100644 index 2f84c465..00000000 --- a/tests/lldb/cpp/MultipleRSFiles/first.rscript +++ /dev/null @@ -1,32 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.cppmultiplersfiles) - -float4 gColor = {0.299f, 0.587f, 0.114f, 1.f}; - -/* RenderScript kernel that just sets the colour of the screen and does some - * simple operations so it is not completely empty - * (and can therefore be debugged). - */ -uchar4 __attribute__((kernel)) first_kernel(uchar4 in) -{ - float4 temp = rsUnpackColor8888(in); - temp = gColor; - uchar4 result = rsPackColorTo8888(temp); - return result; -} diff --git a/tests/lldb/cpp/MultipleRSFiles/second.rscript b/tests/lldb/cpp/MultipleRSFiles/second.rscript deleted file mode 100644 index 0279d0ff..00000000 --- a/tests/lldb/cpp/MultipleRSFiles/second.rscript +++ /dev/null @@ -1,25 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.cppmultiplersfiles) - -/* RenderScript kernel that just returns the swizzled input. */ -uchar4 __attribute__((kernel)) second_kernel(uchar4 in) -{ - uchar4 result = in.wzyx; - return result; -} diff --git a/tests/lldb/cpp/WaitAttach/Android.mk b/tests/lldb/cpp/WaitAttach/Android.mk deleted file mode 100644 index 00bf745d..00000000 --- a/tests/lldb/cpp/WaitAttach/Android.mk +++ /dev/null @@ -1,27 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := CppDebugWaitAttach -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := \ - WaitAttach.cpp \ - simple.rscript - -include frameworks/rs/tests/lldb/cpp/common.mk -include $(BUILD_EXECUTABLE) - -include $(CLEAR_VARS) - -LOCAL_MODULE := CppNoDebugWaitAttach -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := \ - WaitAttach.cpp \ - simple.rscript - -include frameworks/rs/tests/lldb/cpp/common.mk -LOCAL_RENDERSCRIPT_FLAGS := $(filter-out -g,$(LOCAL_RENDERSCRIPT_FLAGS)) -include $(BUILD_EXECUTABLE) diff --git a/tests/lldb/cpp/WaitAttach/WaitAttach.cpp b/tests/lldb/cpp/WaitAttach/WaitAttach.cpp deleted file mode 100644 index c6c1980b..00000000 --- a/tests/lldb/cpp/WaitAttach/WaitAttach.cpp +++ /dev/null @@ -1,48 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <RenderScript.h> - -#include "ScriptC_simple.h" - -int main() -{ - static const int size = 8; - sp<RS> rs = new RS(); - - rs->init("/data/rscache", RS_INIT_LOW_LATENCY | RS_INIT_WAIT_FOR_ATTACH); - - auto e = Element::RGBA_8888(rs); - Type::Builder tb(rs, e); - tb.setX(size); - tb.setY(size); - auto t = tb.create(); - - auto a = Allocation::createTyped(rs, t); - auto b = Allocation::createTyped(rs, t); - - // Script is executed once, then the data is copied back when finished - sp<ScriptC_simple> s = new ScriptC_simple(rs); - s->forEach_simple_kernel(a, b); - uint32_t * output = new uint32_t[size*size]; - b->copy2DRangeTo(0, 0, size, size, output); - delete [] output; - - s->forEach_other_kernel(a, b); - - rs->finish(); - return 0; -} diff --git a/tests/lldb/cpp/WaitAttach/simple.rscript b/tests/lldb/cpp/WaitAttach/simple.rscript deleted file mode 100644 index 1e9780a2..00000000 --- a/tests/lldb/cpp/WaitAttach/simple.rscript +++ /dev/null @@ -1,39 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.cppwaitattach) - -float4 gColor = {0.299f, 0.587f, 0.114f, 1.f}; - -/* RenderScript kernel that just sets the colour of the screen and does some - * simple operations so it is not completely empty - * (and can therefore be debugged). - */ -uchar4 __attribute__((kernel)) simple_kernel(uchar4 in) -{ - float4 temp = rsUnpackColor8888(in); - temp = gColor; - uchar4 result = rsPackColorTo8888(temp); - return result; -} - -// Extra kernel to test lldb setting breakpoints on all the RS kernels. -uchar4 __attribute__((kernel)) other_kernel(uchar4 in) -{ - uchar4 result = in.wzyx; - return result; -} diff --git a/tests/lldb/cpp/common.mk b/tests/lldb/cpp/common.mk deleted file mode 100644 index 3b657412..00000000 --- a/tests/lldb/cpp/common.mk +++ /dev/null @@ -1,11 +0,0 @@ -LOCAL_MODULE_TAGS := tests - -LOCAL_RENDERSCRIPT_FLAGS += -g -O0 -target-api 0 -LOCAL_CFLAGS := -Werror -Wall -Wextra -std=c++11 -LOCAL_LDFLAGS += -llog - -LOCAL_STATIC_LIBRARIES += libRScpp_static - -intermediates += $(call intermediates-dir-for,STATIC_LIBRARIES,libRS,TARGET,) - -LOCAL_C_INCLUDES += $(intermediates) diff --git a/tests/lldb/java/Allocations/Android.mk b/tests/lldb/java/Allocations/Android.mk deleted file mode 100644 index cff322f3..00000000 --- a/tests/lldb/java/Allocations/Android.mk +++ /dev/null @@ -1,17 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := \ - $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := Allocations -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_PRIVATE_PLATFORM_APIS := true - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -Wno-unused -target-api 0 - -include $(BUILD_PACKAGE) diff --git a/tests/lldb/java/Allocations/AndroidManifest.xml b/tests/lldb/java/Allocations/AndroidManifest.xml deleted file mode 100644 index 8650cb0c..00000000 --- a/tests/lldb/java/Allocations/AndroidManifest.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.allocations"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="Allocations" - android:hardwareAccelerated="true"> - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> - diff --git a/tests/lldb/java/Allocations/res/layout/main_layout.xml b/tests/lldb/java/Allocations/res/layout/main_layout.xml deleted file mode 100644 index 131c3b57..00000000 --- a/tests/lldb/java/Allocations/res/layout/main_layout.xml +++ /dev/null @@ -1,15 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> - diff --git a/tests/lldb/java/Allocations/src/com/android/rs/allocations/MainActivity.java b/tests/lldb/java/Allocations/src/com/android/rs/allocations/MainActivity.java deleted file mode 100644 index f06f2b97..00000000 --- a/tests/lldb/java/Allocations/src/com/android/rs/allocations/MainActivity.java +++ /dev/null @@ -1,552 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.allocations; - -import android.app.Activity; -import android.os.Bundle; -import android.graphics.Bitmap; -import android.renderscript.*; - -public class MainActivity extends Activity { - private RenderScript mRS; - - private Allocation mInAllocation; // script input - private Allocation mOutAllocation; // script output - - private Allocation mStructInAlloc; // complexStruct input - private Allocation mStructOutAlloc; // complexStruct output - - private Allocation mBoolAllocation; // boolean - - private Allocation mCharAllocation; // char - private Allocation mChar2Allocation; // char2 - private Allocation mChar3Allocation; // char3 - private Allocation mChar4Allocation; // char4 - - private Allocation mUCharAllocation; // uchar - private Allocation mUChar2Allocation; // uchar2 - private Allocation mUChar3Allocation; // uchar3 - private Allocation mUChar4Allocation; // uchar4 - - private Allocation mShortAllocation; // short - private Allocation mShort2Allocation; // short2 - private Allocation mShort3Allocation; // short3 - private Allocation mShort4Allocation; // short4 - - private Allocation mUShortAllocation; // ushort - private Allocation mUShort2Allocation; // ushort2 - private Allocation mUShort3Allocation; // ushort3 - private Allocation mUShort4Allocation; // ushort4 - - private Allocation mIntAllocation; // int - private Allocation mInt2Allocation; // int2 - private Allocation mInt3Allocation; // int3 - private Allocation mInt4Allocation; // int4 - - private Allocation mUIntAllocation; // uint - private Allocation mUInt2Allocation; // uint2 - private Allocation mUInt3Allocation; // uint3 - private Allocation mUInt4Allocation; // uint4 - - private Allocation mLongAllocation; // long - private Allocation mLong2Allocation; // long2 - private Allocation mLong3Allocation; // long3 - private Allocation mLong4Allocation; // long4 - - private Allocation mULongAllocation; // ulong - private Allocation mULong2Allocation; // ulong2 - private Allocation mULong3Allocation; // ulong3 - private Allocation mULong4Allocation; // ulong4 - - private Allocation mHalfAllocation; // half - private Allocation mHalf2Allocation; // half2 - private Allocation mHalf3Allocation; // half3 - private Allocation mHalf4Allocation; // half4 - - private Allocation mFloatAllocation; // float - private Allocation mFloat2Allocation; // float2 - private Allocation mFloat3Allocation; // float3 - private Allocation mFloat4Allocation; // float4 - - private Allocation mDoubleAllocation; // double - private Allocation mDouble2Allocation; // double2 - private Allocation mDouble3Allocation; // double3 - private Allocation mDouble4Allocation; // double4 - - private ScriptC_allocs mScript; - - private int mAllocSize = 24; // Chosen as allocation size since it's easily divisible - - private Bitmap mBitmapIn; - private Bitmap mBitmapOut; - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - - setContentView(R.layout.main_layout); - - mBitmapIn = Bitmap.createBitmap(64, 64, Bitmap.Config.ARGB_8888); - mBitmapOut = Bitmap.createBitmap(mBitmapIn.getWidth(), mBitmapIn.getHeight(), mBitmapIn.getConfig()); - - createScript(); - runScript(); - } - - private void createScript() { - mRS = RenderScript.create(this, - RenderScript.ContextType.NORMAL, - RenderScript.CREATE_FLAG_LOW_LATENCY | - RenderScript.CREATE_FLAG_WAIT_FOR_ATTACH); - - mScript = new ScriptC_allocs(mRS); - } - - private void createSignedAllocations() { - Type.Builder typeI8Builder = new Type.Builder(mRS, Element.I8(mRS)); - typeI8Builder.setX(1); // One element here to test 16 byte memory alignment - typeI8Builder.setY(3); - typeI8Builder.setZ(8); - - mCharAllocation = Allocation.createTyped(mRS, typeI8Builder.create()); - mRS.finish(); - mChar2Allocation = Allocation.createSized(mRS, Element.I8_2(mRS), mAllocSize / 2); - mRS.finish(); - mChar3Allocation = Allocation.createSized(mRS, Element.I8_3(mRS), mAllocSize / 4); - mRS.finish(); - mChar4Allocation = Allocation.createSized(mRS, Element.I8_4(mRS), mAllocSize / 4); - mRS.finish(); - - Type.Builder typeI16_2Builder = new Type.Builder(mRS, Element.I16_2(mRS)); - typeI16_2Builder.setX(6); - typeI16_2Builder.setY(1); - typeI16_2Builder.setZ(2); - - mShortAllocation = Allocation.createSized(mRS, Element.I16(mRS), mAllocSize); - mRS.finish(); - mShort2Allocation = Allocation.createTyped(mRS, typeI16_2Builder.create()); - mRS.finish(); - mShort3Allocation = Allocation.createSized(mRS, Element.I16_3(mRS), mAllocSize / 4); - mRS.finish(); - mShort4Allocation = Allocation.createSized(mRS, Element.I16_4(mRS), mAllocSize / 4); - mRS.finish(); - - Type.Builder typeI32_3Builder = new Type.Builder(mRS, Element.I32_3(mRS)); - typeI32_3Builder.setX(3); - typeI32_3Builder.setY(2); - - mIntAllocation = Allocation.createSized(mRS, Element.I32(mRS), mAllocSize); - mRS.finish(); - mInt2Allocation = Allocation.createSized(mRS, Element.I32_2(mRS), mAllocSize / 2); - mRS.finish(); - mInt3Allocation = Allocation.createTyped(mRS, typeI32_3Builder.create()); - mRS.finish(); - mInt4Allocation = Allocation.createSized(mRS, Element.I32_4(mRS), mAllocSize / 4); - mRS.finish(); - - Type.Builder typeI64_4Builder = new Type.Builder(mRS, Element.I64_4(mRS)); - typeI64_4Builder.setX(1); - typeI64_4Builder.setY(6); - - mLongAllocation = Allocation.createSized(mRS, Element.I64(mRS), mAllocSize); - mRS.finish(); - mLong2Allocation = Allocation.createSized(mRS, Element.I64_2(mRS), mAllocSize / 2); - mRS.finish(); - mLong3Allocation = Allocation.createSized(mRS, Element.I64_3(mRS), mAllocSize / 4); - mRS.finish(); - mLong4Allocation = Allocation.createTyped(mRS, typeI64_4Builder.create()); - mRS.finish(); - - mBoolAllocation = Allocation.createSized(mRS, Element.BOOLEAN(mRS), mAllocSize); - mRS.finish(); - } - - private void initSignedAllocations() { - byte[] buffer_char = new byte[mAllocSize]; - short[] buffer_short = new short[mAllocSize]; - int[] buffer_int = new int[mAllocSize]; - long[] buffer_long = new long[mAllocSize]; - byte[] buffer_bool = new byte[mAllocSize]; - - for(int i = 0; i < mAllocSize; ++i) { - buffer_char[i] = (byte) i; - buffer_short[i] = (short) i; - buffer_int[i] = (int) i; - buffer_long[i] = (long) i; - buffer_bool[i] = (byte) (0x01 & i); - } - - mCharAllocation.copyFrom(buffer_char); - mChar2Allocation.copyFrom(buffer_char); - mChar3Allocation.copyFrom(buffer_char); - mChar4Allocation.copyFrom(buffer_char); - - mShortAllocation.copyFrom(buffer_short); - mShort2Allocation.copyFrom(buffer_short); - mShort3Allocation.copyFrom(buffer_short); - mShort4Allocation.copyFrom(buffer_short); - - mIntAllocation.copyFrom(buffer_int); - mInt2Allocation.copyFrom(buffer_int); - mInt3Allocation.copyFrom(buffer_int); - mInt4Allocation.copyFrom(buffer_int); - - mLongAllocation.copyFrom(buffer_long); - mLong2Allocation.copyFrom(buffer_long); - mLong3Allocation.copyFrom(buffer_long); - mLong4Allocation.copyFrom(buffer_long); - - mBoolAllocation.copyFromUnchecked(buffer_bool); - } - - private void createUnsignedAllocations() { - Type.Builder typeU8_2Builder = new Type.Builder(mRS, Element.U8_2(mRS)); - typeU8_2Builder.setX(2); - typeU8_2Builder.setY(6); - - mUCharAllocation = Allocation.createSized(mRS, Element.U8(mRS), mAllocSize); - mRS.finish(); - mUChar2Allocation = Allocation.createTyped(mRS, typeU8_2Builder.create()); - mRS.finish(); - mUChar3Allocation = Allocation.createSized(mRS, Element.U8_3(mRS), mAllocSize / 4); - mRS.finish(); - mUChar4Allocation = Allocation.createSized(mRS, Element.U8_4(mRS), mAllocSize / 4); - mRS.finish(); - - Type.Builder typeU16_3Builder = new Type.Builder(mRS, Element.U16_3(mRS)); - typeU16_3Builder.setX(1); - typeU16_3Builder.setY(6); - - mUShortAllocation = Allocation.createSized(mRS, Element.U16(mRS), mAllocSize); - mRS.finish(); - mUShort2Allocation = Allocation.createSized(mRS, Element.U16_2(mRS), mAllocSize / 2); - mRS.finish(); - mUShort3Allocation = Allocation.createTyped(mRS, typeU16_3Builder.create()); - mRS.finish(); - mUShort4Allocation = Allocation.createSized(mRS, Element.U16_4(mRS), mAllocSize / 4); - mRS.finish(); - - Type.Builder typeU32_4Builder = new Type.Builder(mRS, Element.U32_4(mRS)); - typeU32_4Builder.setX(1); - typeU32_4Builder.setY(1); - typeU32_4Builder.setZ(6); - - mUIntAllocation = Allocation.createSized(mRS, Element.U32(mRS), mAllocSize); - mRS.finish(); - mUInt2Allocation = Allocation.createSized(mRS, Element.U32_2(mRS), mAllocSize / 2); - mRS.finish(); - mUInt3Allocation = Allocation.createSized(mRS, Element.U32_3(mRS), mAllocSize / 4); - mRS.finish(); - mUInt4Allocation = Allocation.createTyped(mRS, typeU32_4Builder.create()); - mRS.finish(); - - Type.Builder typeU64Builder = new Type.Builder(mRS, Element.U64(mRS)); - typeU64Builder.setX(4); - typeU64Builder.setY(3); - typeU64Builder.setZ(2); - - mULongAllocation = Allocation.createTyped(mRS, typeU64Builder.create()); - mRS.finish(); - mULong2Allocation = Allocation.createSized(mRS, Element.U64_2(mRS), mAllocSize / 2); - mRS.finish(); - mULong3Allocation = Allocation.createSized(mRS, Element.U64_3(mRS), mAllocSize / 4); - mRS.finish(); - mULong4Allocation = Allocation.createSized(mRS, Element.U64_4(mRS), mAllocSize / 4); - mRS.finish(); - } - - private void initUnsignedAllocations() { - byte[] buffer_char = new byte[mAllocSize]; - short[] buffer_short = new short[mAllocSize]; - int[] buffer_int = new int[mAllocSize]; - long[] buffer_long = new long[mAllocSize]; - - for(int i = 0; i < mAllocSize; ++i) { - buffer_char[i] = (byte) i; - buffer_short[i] = (short) i; - buffer_int[i] = (int) i; - buffer_long[i] = (long) i; - } - - mUCharAllocation.copyFrom(buffer_char); - mUChar2Allocation.copyFrom(buffer_char); - mUChar3Allocation.copyFrom(buffer_char); - mUChar4Allocation.copyFrom(buffer_char); - - mUShortAllocation.copyFrom(buffer_short); - mUShort2Allocation.copyFrom(buffer_short); - mUShort3Allocation.copyFrom(buffer_short); - mUShort4Allocation.copyFrom(buffer_short); - - mUIntAllocation.copyFrom(buffer_int); - mUInt2Allocation.copyFrom(buffer_int); - mUInt3Allocation.copyFrom(buffer_int); - mUInt4Allocation.copyFrom(buffer_int); - - mULongAllocation.copyFrom(buffer_long); - mULong2Allocation.copyFrom(buffer_long); - mULong3Allocation.copyFrom(buffer_long); - mULong4Allocation.copyFrom(buffer_long); - } - - private void createFloatAllocations() { - Type.Builder typeF16_3Builder = new Type.Builder(mRS, Element.F16_3(mRS)); - typeF16_3Builder.setX(1); - typeF16_3Builder.setY(6); - - mHalfAllocation = Allocation.createSized(mRS, Element.F16(mRS), mAllocSize); - mRS.finish(); - mHalf2Allocation = Allocation.createSized(mRS, Element.F16_2(mRS), mAllocSize / 2); - mRS.finish(); - mHalf3Allocation = Allocation.createTyped(mRS, typeF16_3Builder.create()); - mRS.finish(); - mHalf4Allocation = Allocation.createSized(mRS, Element.F16_4(mRS), mAllocSize / 4); - mRS.finish(); - - Type.Builder typeF32_4Builder = new Type.Builder(mRS, Element.F32_4(mRS)); - typeF32_4Builder.setX(3); - typeF32_4Builder.setY(2); - - mFloatAllocation = Allocation.createSized(mRS, Element.F32(mRS), mAllocSize); - mRS.finish(); - mFloat2Allocation = Allocation.createSized(mRS, Element.F32_2(mRS), mAllocSize / 2); - mRS.finish(); - mFloat3Allocation = Allocation.createSized(mRS, Element.F32_3(mRS), mAllocSize / 4); - mRS.finish(); - mFloat4Allocation = Allocation.createTyped(mRS, typeF32_4Builder.create()); - mRS.finish(); - - Type.Builder typeF64_2Builder = new Type.Builder(mRS, Element.F64_2(mRS)); - typeF64_2Builder.setX(4); - typeF64_2Builder.setY(1); - typeF64_2Builder.setZ(3); - - mDoubleAllocation = Allocation.createSized(mRS, Element.F64(mRS), mAllocSize); - mRS.finish(); - mDouble2Allocation = Allocation.createTyped(mRS, typeF64_2Builder.create()); - mRS.finish(); - - Type.Builder typeF64_3Builder = new Type.Builder(mRS, Element.F64_3(mRS)); - typeF64_3Builder.setX(1); - typeF64_3Builder.setY(2); - typeF64_3Builder.setZ(3); - - Type.Builder typeF64_4Builder = new Type.Builder(mRS, Element.F64_4(mRS)); - typeF64_4Builder.setX(1); - typeF64_4Builder.setY(2); - typeF64_4Builder.setZ(3); - - mDouble3Allocation = Allocation.createTyped(mRS, typeF64_3Builder.create()); - mRS.finish(); - mDouble4Allocation = Allocation.createTyped(mRS, typeF64_4Builder.create()); - mRS.finish(); - } - - private void initFloatAllocations() { - // No java type for half precision float, so bitcast 16-bit int - short[] buffer_half = new short[mAllocSize]; - float[] buffer_float = new float[mAllocSize]; - double[] buffer_double = new double[mAllocSize]; - - for(int i = 0; i < mAllocSize; ++i) { - // Construct IEEE 754 half with increasing fraction. - byte mantissa = (byte)(i); - byte exponent = 0b00111100; // keep exponent constant at one - buffer_half[i] = (short)((exponent << 8) | mantissa); - - buffer_float[i] = (float) 1 / i; - buffer_double[i] = (double) 1 / i; - } - - mHalfAllocation.copyFromUnchecked(buffer_half); - mHalf2Allocation.copyFromUnchecked(buffer_half); - mHalf3Allocation.copyFromUnchecked(buffer_half); - mHalf4Allocation.copyFromUnchecked(buffer_half); - - mFloatAllocation.copyFrom(buffer_float); - mFloat2Allocation.copyFrom(buffer_float); - mFloat3Allocation.copyFrom(buffer_float); - mFloat4Allocation.copyFrom(buffer_float); - - mDoubleAllocation.copyFrom(buffer_double); - mDouble2Allocation.copyFrom(buffer_double); - mDouble3Allocation.copyFrom(buffer_double); - mDouble4Allocation.copyFrom(buffer_double); - } - - private void createStructAllocations() { - ScriptField_complexStruct complex_struct; - - complex_struct = new ScriptField_complexStruct(mRS, mAllocSize); - mRS.finish(); - mScript.bind_g_complexStruct_in(complex_struct); - mRS.finish(); - mStructInAlloc = complex_struct.getAllocation(); - mRS.finish(); - - complex_struct = new ScriptField_complexStruct(mRS, mAllocSize); - mRS.finish(); - mScript.bind_g_complexStruct_out(complex_struct); - mRS.finish(); - mStructOutAlloc = complex_struct.getAllocation(); - mRS.finish(); - } - - private void overwriteFloatAllocations() { - float[] buffer_float = new float[mAllocSize]; - - // Set float allocations to -1/n - for(int i = 0; i < mAllocSize; ++i) { - buffer_float[i] = -1f / i; - } - - mFloatAllocation.copyFrom(buffer_float); - mFloat2Allocation.copyFrom(buffer_float); - mFloat3Allocation.copyFrom(buffer_float); - mFloat4Allocation.copyFrom(buffer_float); - } - - private void runScript() { - mInAllocation = Allocation.createFromBitmap(mRS, mBitmapIn); - mRS.finish(); - mOutAllocation = Allocation.createFromBitmap(mRS, mBitmapOut); - mRS.finish(); - - - createSignedAllocations(); - initSignedAllocations(); - - mRS.finish(); - mScript.forEach_swizzle_kernel(mInAllocation, mOutAllocation); - mRS.finish(); - - mOutAllocation.copyTo(mBitmapOut); - - mCharAllocation.destroy(); - mRS.finish(); - mChar2Allocation.destroy(); - mRS.finish(); - mChar3Allocation.destroy(); - mRS.finish(); - mChar4Allocation.destroy(); - mRS.finish(); - - mShort2Allocation.destroy(); - mRS.finish(); - mShort3Allocation.destroy(); - mRS.finish(); - mShort4Allocation.destroy(); - mRS.finish(); - - mIntAllocation.destroy(); - mRS.finish(); - mInt2Allocation.destroy(); - mRS.finish(); - mInt3Allocation.destroy(); - mRS.finish(); - mInt4Allocation.destroy(); - mRS.finish(); - - mLongAllocation.destroy(); - mRS.finish(); - mLong2Allocation.destroy(); - mRS.finish(); - mLong3Allocation.destroy(); - mRS.finish(); - mLong4Allocation.destroy(); - mRS.finish(); - - mBoolAllocation.destroy(); - mRS.finish(); - - - createUnsignedAllocations(); - initUnsignedAllocations(); - - mInAllocation = mUShortAllocation; // Host side assignment - - mRS.finish(); - mScript.forEach_square_kernel(mInAllocation, mUIntAllocation); - mRS.finish(); - - mUCharAllocation.destroy(); - mRS.finish(); - mUChar2Allocation.destroy(); - mRS.finish(); - mUChar3Allocation.destroy(); - mRS.finish(); - mUChar4Allocation.destroy(); - mRS.finish(); - - mUShortAllocation.destroy(); - mRS.finish(); - mUShort2Allocation.destroy(); - mRS.finish(); - mUShort3Allocation.destroy(); - mRS.finish(); - mUShort4Allocation.destroy(); - mRS.finish(); - - mUInt2Allocation.destroy(); - mRS.finish(); - mUInt3Allocation.destroy(); - mRS.finish(); - mUInt4Allocation.destroy(); - mRS.finish(); - - mULongAllocation.destroy(); - mRS.finish(); - mULong2Allocation.destroy(); - mRS.finish(); - mULong3Allocation.destroy(); - mRS.finish(); - mULong4Allocation.destroy(); - mRS.finish(); - - - createFloatAllocations(); - initFloatAllocations(); - - mRS.finish(); - mScript.forEach_add_half_kernel(mDouble4Allocation, mDouble3Allocation); - mRS.finish(); - - mHalfAllocation.destroy(); - mRS.finish(); - mHalf2Allocation.destroy(); - mRS.finish(); - mHalf3Allocation.destroy(); - mRS.finish(); - mHalf4Allocation.destroy(); - mRS.finish(); - - mDoubleAllocation.destroy(); - mRS.finish(); - mDouble2Allocation.destroy(); - mRS.finish(); - mDouble4Allocation.destroy(); - mRS.finish(); - - overwriteFloatAllocations(); - - createStructAllocations(); - - mRS.finish(); - mScript.forEach_struct_kernel(mStructInAlloc, mStructOutAlloc); - mRS.finish(); - } -} diff --git a/tests/lldb/java/Allocations/src/rs/allocs.rscript b/tests/lldb/java/Allocations/src/rs/allocs.rscript deleted file mode 100644 index 0ec39c52..00000000 --- a/tests/lldb/java/Allocations/src/rs/allocs.rscript +++ /dev/null @@ -1,82 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.allocations) - -struct simpleStruct { - int i; - unsigned int j; -}; - -struct complexStruct { - struct simpleStruct s; - uchar4 c; - float f[2]; -}; - -struct complexStruct *g_complexStruct_in; -struct complexStruct *g_complexStruct_out; - -// Kernel performs basic vector swizzle -uchar4 __attribute__((kernel)) swizzle_kernel(uchar4 in) -{ - return in.wzyx; -} - -// Kernel squares every element in allocation -uint __attribute__((kernel)) square_kernel(ushort in) -{ - uint result = (uint)(in) * (uint)in; - return result; -} - -// Helper function adding 1/2 to passed in double -static double half_helper(double in) -{ - return (in + 0.5); -} - -// Kernel returns first 3 elements of a double4 plus 1/2 -double3 __attribute__((kernel)) add_half_kernel(double4 in) -{ - double3 result; - result.x = half_helper(in.x); - result.y = half_helper(in.y); - result.z = half_helper(in.z); - return result; -} - -// Kernel for testing structs -struct complexStruct __attribute__((kernel)) -struct_kernel(struct complexStruct in, uint32_t x) -{ - struct complexStruct complex_out; - struct simpleStruct simple_out; - simple_out.i = (int) x; - simple_out.j = x; - complex_out.s = simple_out; - - complex_out.f[0] = (float) x; - complex_out.f[1] = (float) x + 0.5; - - complex_out.c.x = (uchar) (x % 128); - complex_out.c.y = 'A'; - complex_out.c.z = 'B'; - complex_out.c.w = 'C'; - - return complex_out; -} diff --git a/tests/lldb/java/Android.mk b/tests/lldb/java/Android.mk deleted file mode 100644 index 5053e7d6..00000000 --- a/tests/lldb/java/Android.mk +++ /dev/null @@ -1 +0,0 @@ -include $(call all-subdir-makefiles) diff --git a/tests/lldb/java/BranchingFunCalls/Android.mk b/tests/lldb/java/BranchingFunCalls/Android.mk deleted file mode 100644 index 94f9d6fb..00000000 --- a/tests/lldb/java/BranchingFunCalls/Android.mk +++ /dev/null @@ -1,17 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := \ - $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := BranchingFunCalls -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_PRIVATE_PLATFORM_APIS := true - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) diff --git a/tests/lldb/java/BranchingFunCalls/AndroidManifest.xml b/tests/lldb/java/BranchingFunCalls/AndroidManifest.xml deleted file mode 100644 index fb83a04e..00000000 --- a/tests/lldb/java/BranchingFunCalls/AndroidManifest.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.branchingfuncalls"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="BranchingFunCalls" - android:hardwareAccelerated="true"> - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> - diff --git a/tests/lldb/java/BranchingFunCalls/res/layout/main_layout.xml b/tests/lldb/java/BranchingFunCalls/res/layout/main_layout.xml deleted file mode 100644 index 131c3b57..00000000 --- a/tests/lldb/java/BranchingFunCalls/res/layout/main_layout.xml +++ /dev/null @@ -1,15 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> - diff --git a/tests/lldb/java/BranchingFunCalls/src/com/android/rs/branchingfuncalls/MainActivity.java b/tests/lldb/java/BranchingFunCalls/src/com/android/rs/branchingfuncalls/MainActivity.java deleted file mode 100644 index a2c1f824..00000000 --- a/tests/lldb/java/BranchingFunCalls/src/com/android/rs/branchingfuncalls/MainActivity.java +++ /dev/null @@ -1,63 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.branchingfuncalls; - -import android.app.Activity; -import android.os.Bundle; -import android.renderscript.*; - -public class MainActivity extends Activity { - private RenderScript mRS; - private Allocation mInAllocation; - private Allocation mOutAllocation; - private ScriptC_scalars mScript; - private int mAllocSize = 256; - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - - setContentView(R.layout.main_layout); - createScript(); - runScript(); - } - - private void createScript() { - mRS = RenderScript.create(this, - RenderScript.ContextType.NORMAL, - RenderScript.CREATE_FLAG_LOW_LATENCY | - RenderScript.CREATE_FLAG_WAIT_FOR_ATTACH); - - Element e = Element.I32(mRS); - mInAllocation = Allocation.createSized(mRS, e, mAllocSize); - mOutAllocation = Allocation.createSized(mRS, e, mAllocSize); - - mScript = new ScriptC_scalars(mRS); - } - - private void runScript() { - mScript.invoke_addToGlobal(234); - - int[] init = new int[mAllocSize]; - for(int i = 0; i < mAllocSize; ++i) { - init[i] = i - (mAllocSize / 2); - } - mInAllocation.copy1DRangeFrom(0, mAllocSize, init); - mScript.forEach_simple_kernel(mInAllocation, mOutAllocation); - } -} - diff --git a/tests/lldb/java/BranchingFunCalls/src/rs/scalars.rscript b/tests/lldb/java/BranchingFunCalls/src/rs/scalars.rscript deleted file mode 100644 index 7206330d..00000000 --- a/tests/lldb/java/BranchingFunCalls/src/rs/scalars.rscript +++ /dev/null @@ -1,76 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.branchingfuncalls) - -static bool is_neg(int a) -{ - if(a < 0) - return true; - else - return false; -} - -static bool is_pos(int a) -{ - if(a > 0) - return true; - else - return false; -} - -static void set_i(int * a, int b) -{ - int tmp = b; - *a = tmp; -} - -static void modify_f(float * f) -{ - *f *= 0.5f; -} - -static void modify_i(int * i) -{ - int j = *i; - int cutoff = 2 << 6; - if(j > cutoff) - j = cutoff; - if(is_neg(j)) - set_i(i, 0); - else if(is_pos(j)) - set_i(i, j); - else - set_i(i, cutoff); -} - -int __attribute__((kernel)) simple_kernel(int in) -{ - int i = in; - float f = (float) i; - modify_f(&f); - modify_i(&i); - int ret = (int) f; - return in * ret; -} - -int glob = 123; - -void addToGlobal(int arg) -{ - glob += arg; -} diff --git a/tests/lldb/java/DebugWaitAttach/Android.mk b/tests/lldb/java/DebugWaitAttach/Android.mk deleted file mode 100644 index 3486b780..00000000 --- a/tests/lldb/java/DebugWaitAttach/Android.mk +++ /dev/null @@ -1,15 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := JavaDebugWaitAttach -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_PRIVATE_PLATFORM_APIS := true - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) diff --git a/tests/lldb/java/DebugWaitAttach/AndroidManifest.xml b/tests/lldb/java/DebugWaitAttach/AndroidManifest.xml deleted file mode 100644 index 862af289..00000000 --- a/tests/lldb/java/DebugWaitAttach/AndroidManifest.xml +++ /dev/null @@ -1,14 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.waitattachdebug"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="JavaDebugWaitAttach" - android:hardwareAccelerated="true"> - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> diff --git a/tests/lldb/java/DebugWaitAttach/res/layout/main_layout.xml b/tests/lldb/java/DebugWaitAttach/res/layout/main_layout.xml deleted file mode 100644 index 4ef172f3..00000000 --- a/tests/lldb/java/DebugWaitAttach/res/layout/main_layout.xml +++ /dev/null @@ -1,14 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> diff --git a/tests/lldb/java/DebugWaitAttach/src/com/android/rs/waitattachdebug/MainActivity.java b/tests/lldb/java/DebugWaitAttach/src/com/android/rs/waitattachdebug/MainActivity.java deleted file mode 100644 index 3aeefd3b..00000000 --- a/tests/lldb/java/DebugWaitAttach/src/com/android/rs/waitattachdebug/MainActivity.java +++ /dev/null @@ -1,72 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.waitattachdebug; - -import android.app.Activity; -import android.graphics.Bitmap; -import android.os.Bundle; -import android.widget.ImageView; -import android.renderscript.*; - -public class MainActivity extends Activity { - private Bitmap mBitmapIn; - private Bitmap mBitmapOut; - private ImageView mImageView; - - private RenderScript mRS; - private Allocation mInAllocation; - private Allocation mOutAllocation; - private ScriptC_simple mScript; - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - - setContentView(R.layout.main_layout); - - mBitmapIn = Bitmap.createBitmap(8, 8, Bitmap.Config.ARGB_8888); - mBitmapOut = Bitmap.createBitmap(mBitmapIn.getWidth(), - mBitmapIn.getHeight(), mBitmapIn.getConfig()); - - mImageView = findViewById(R.id.imageView); - mImageView.setImageBitmap(mBitmapOut); - - createScript(); - updateImage(1.0f); - } - - private void createScript() { - mRS = RenderScript.create(this, - RenderScript.ContextType.NORMAL, - RenderScript.CREATE_FLAG_LOW_LATENCY | - RenderScript.CREATE_FLAG_WAIT_FOR_ATTACH); - - mInAllocation = Allocation.createFromBitmap(mRS, mBitmapIn); - mOutAllocation = Allocation.createFromBitmap(mRS, mBitmapOut); - - mScript = new ScriptC_simple(mRS); - } - - - private void updateImage(final float f) { - mScript.set_gColor(new Float4(0.9f, 0.8f, 0.5f, 1.0f)); - mScript.forEach_simple_kernel(mInAllocation, mOutAllocation); - mOutAllocation.copyTo(mBitmapOut); - mScript.forEach_other_kernel(mInAllocation, mOutAllocation); - } -} - diff --git a/tests/lldb/java/DebugWaitAttach/src/rs/simple.rscript b/tests/lldb/java/DebugWaitAttach/src/rs/simple.rscript deleted file mode 100644 index 6b564c0f..00000000 --- a/tests/lldb/java/DebugWaitAttach/src/rs/simple.rscript +++ /dev/null @@ -1,39 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.waitattachdebug) - -float4 gColor = {0.299f, 0.587f, 0.114f, 1.f}; - -/* RenderScript kernel that just sets the colour of the screen and does some - * simple operations so it is not completely empty - * (and can therefore be debugged). - */ -uchar4 __attribute__((kernel)) simple_kernel(uchar4 in) -{ - float4 temp = rsUnpackColor8888(in); - temp = gColor; - uchar4 result = rsPackColorTo8888(temp); - return result; -} - -// Extra kernel to test lldb setting breakpoints on all the RS kernels. -uchar4 __attribute__((kernel)) other_kernel(uchar4 in) -{ - uchar4 result = in.wzyx; - return result; -} diff --git a/tests/lldb/java/InfiniteLoop/Android.mk b/tests/lldb/java/InfiniteLoop/Android.mk deleted file mode 100644 index 02e27908..00000000 --- a/tests/lldb/java/InfiniteLoop/Android.mk +++ /dev/null @@ -1,15 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := JavaInfiniteLoop -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) diff --git a/tests/lldb/java/InfiniteLoop/AndroidManifest.xml b/tests/lldb/java/InfiniteLoop/AndroidManifest.xml deleted file mode 100644 index ba846cde..00000000 --- a/tests/lldb/java/InfiniteLoop/AndroidManifest.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.infiniteloop"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="InfiniteLoop" - android:hardwareAccelerated="true"> - - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> diff --git a/tests/lldb/java/InfiniteLoop/res/layout/main_layout.xml b/tests/lldb/java/InfiniteLoop/res/layout/main_layout.xml deleted file mode 100755 index 4ef172f3..00000000 --- a/tests/lldb/java/InfiniteLoop/res/layout/main_layout.xml +++ /dev/null @@ -1,14 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> diff --git a/tests/lldb/java/InfiniteLoop/src/com/android/rs/infiniteloop/MainActivity.java b/tests/lldb/java/InfiniteLoop/src/com/android/rs/infiniteloop/MainActivity.java deleted file mode 100644 index 7243cc50..00000000 --- a/tests/lldb/java/InfiniteLoop/src/com/android/rs/infiniteloop/MainActivity.java +++ /dev/null @@ -1,174 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.infiniteloop; - -import android.app.Activity; -import android.graphics.Bitmap; -import android.os.AsyncTask; -import android.os.Bundle; -import android.widget.ImageView; -import android.renderscript.*; - -import java.util.Random; -import java.util.Timer; -import java.util.TimerTask; - -public class MainActivity extends Activity { - /* Number of bitmaps that are used for renderScript thread and UI thread synchronization. - Ideally, this can be reduced to 2, however in some devices, 2 buffers still showing tearing on UI. - */ - private final int NUM_BITMAPS = 3; - private int mCurrentBitmap = 0; - private Bitmap mBitmapIn; - private Bitmap[] mBitmapsOut; - private ImageView mImageView; - private Random mRand; - - private RenderScript mRS; - private Allocation mInAllocation; - private Allocation[] mOutAllocations; - private ScriptC_infiniteloop mScript; - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - - setContentView(R.layout.main_layout); - mRand = new Random(); - - /* - * Initialize UI - */ - mBitmapIn = Bitmap.createBitmap(500, 500, Bitmap.Config.ARGB_8888); - mBitmapsOut = new Bitmap[NUM_BITMAPS]; - for (int i = 0; i < NUM_BITMAPS; ++i) { - mBitmapsOut[i] = Bitmap.createBitmap(mBitmapIn.getWidth(), - mBitmapIn.getHeight(), mBitmapIn.getConfig()); - } - - mImageView = findViewById(R.id.imageView); - mImageView.setImageBitmap(mBitmapsOut[mCurrentBitmap]); - mCurrentBitmap += (mCurrentBitmap + 1) % NUM_BITMAPS; - - /* - * Create renderScript - */ - createScript(); - - /* - * Invoke renderScript kernel and update imageView - */ - updateImage(1.0f); - - Timer t = new Timer(); - t.schedule(new TimerTask() { - @Override - public void run() { - updateImage(1.f); - } - }, 2000, 2000); - } - - /* - * Initialize RenderScript - * In the sample, it creates RenderScript kernel that performs saturation manipulation. - */ - private void createScript() { - // Initialize RS - mRS = RenderScript.create(this, - RenderScript.ContextType.NORMAL, - RenderScript.CREATE_FLAG_LOW_LATENCY); - - // Allocate buffers - mInAllocation = Allocation.createFromBitmap(mRS, mBitmapIn); - mOutAllocations = new Allocation[NUM_BITMAPS]; - for (int i = 0; i < NUM_BITMAPS; ++i) { - mOutAllocations[i] = Allocation.createFromBitmap(mRS, mBitmapsOut[i]); - } - - // Load script - mScript = new ScriptC_infiniteloop(mRS); - } - - /* - * In the AsyncTask, it invokes a simple RenderScript kernel. - * After the kernel is done, an operation blocks at Allocation.copyTo() in AsyncTask thread. - * Once operations are finished and we reach onPostExecute() in the UI thread, - * it can invalidate and update the ImageView UI. - */ - private class RenderScriptTask extends AsyncTask<Float, Integer, Integer> { - Boolean issued = false; - - protected Integer doInBackground(Float... values) { - int index = -1; - if (isCancelled() == false) { - issued = true; - index = mCurrentBitmap; - - /* - * Set global variable in RS - */ - mScript.set_gColour(new Float4(mRand.nextFloat(), mRand.nextFloat(), - mRand.nextFloat(), 1.f)); - - /* - * Invoke saturation filter kernel - */ - mScript.forEach_simple_kernel(mInAllocation, mOutAllocations[index]); - - /* - * Copy to bitmap and invalidate image view - */ - mOutAllocations[index].copyTo(mBitmapsOut[index]); - mCurrentBitmap = (mCurrentBitmap + 1) % NUM_BITMAPS; - } - return index; - } - - void updateView(Integer result) { - if (result != -1) { - // Request UI update - mImageView.setImageBitmap(mBitmapsOut[result]); - mImageView.invalidate(); - } - } - - protected void onPostExecute(Integer result) { - updateView(result); - } - - protected void onCancelled(Integer result) { - if (issued) { - updateView(result); - } - } - } - - RenderScriptTask currentTask = null; - - /* - * Invoke AsyncTask and cancel the previous task. - * When AsyncTasks are piled up (typically in slow device with heavy kernel), - * only the latest (and already started) task invokes RenderScript operation. - */ - private void updateImage(final float f) { - if (currentTask != null) - currentTask.cancel(false); - currentTask = new RenderScriptTask(); - currentTask.execute(f); - } -} diff --git a/tests/lldb/java/InfiniteLoop/src/com/android/rs/infiniteloop/infiniteloop.rscript b/tests/lldb/java/InfiniteLoop/src/com/android/rs/infiniteloop/infiniteloop.rscript deleted file mode 100644 index 5042ebfa..00000000 --- a/tests/lldb/java/InfiniteLoop/src/com/android/rs/infiniteloop/infiniteloop.rscript +++ /dev/null @@ -1,49 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.infiniteloop) -#pragma rs_fp_relaxed - -float4 gColour = {0.299f, 0.587f, 0.114f, 1.f}; - -/* RenderScript kernel that just sets the colour of the screen and does some - * simple operations so it is not completely empty - * (and can therefore be debugged). - */ -uchar4 __attribute__((kernel)) simple_kernel(uchar4 in) -{ - float4 temp = rsUnpackColor8888(in); - temp = gColour; - uchar4 result = rsPackColorTo8888(temp); - return result; -} diff --git a/tests/lldb/java/KernelVariables/Android.mk b/tests/lldb/java/KernelVariables/Android.mk deleted file mode 100644 index c642a9d0..00000000 --- a/tests/lldb/java/KernelVariables/Android.mk +++ /dev/null @@ -1,15 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := KernelVariables -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_PRIVATE_PLATFORM_APIS := true - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) diff --git a/tests/lldb/java/KernelVariables/AndroidManifest.xml b/tests/lldb/java/KernelVariables/AndroidManifest.xml deleted file mode 100644 index a1e2a746..00000000 --- a/tests/lldb/java/KernelVariables/AndroidManifest.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.kernelvariables"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="KernelVariables" - android:hardwareAccelerated="true"> - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> - diff --git a/tests/lldb/java/KernelVariables/res/layout/main_layout.xml b/tests/lldb/java/KernelVariables/res/layout/main_layout.xml deleted file mode 100644 index 131c3b57..00000000 --- a/tests/lldb/java/KernelVariables/res/layout/main_layout.xml +++ /dev/null @@ -1,15 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> - diff --git a/tests/lldb/java/KernelVariables/src/com/android/rs/kernelvariables/MainActivity.java b/tests/lldb/java/KernelVariables/src/com/android/rs/kernelvariables/MainActivity.java deleted file mode 100644 index 728b872d..00000000 --- a/tests/lldb/java/KernelVariables/src/com/android/rs/kernelvariables/MainActivity.java +++ /dev/null @@ -1,110 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.kernelvariables; - -import android.app.Activity; -import android.graphics.Bitmap; -import android.graphics.ImageFormat; -import android.os.Bundle; -import android.widget.ImageView; -import android.renderscript.*; - -public class MainActivity extends Activity { - private Bitmap mBitmapIn; - private Bitmap mBitmapOut; - private ImageView mImageView; - - private RenderScript mRS; - private Allocation mInAllocation; - private Allocation mOutAllocation; - private ScriptC_simple mScript; - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - - setContentView(R.layout.main_layout); - - mBitmapIn = Bitmap.createBitmap(500, 500, Bitmap.Config.ARGB_8888); - mBitmapOut = Bitmap.createBitmap(mBitmapIn.getWidth(), - mBitmapIn.getHeight(), mBitmapIn.getConfig()); - - mImageView = findViewById(R.id.imageView); - mImageView.setImageBitmap(mBitmapOut); - - createScript(); - updateImage(); - } - - private void createScript() { - mRS = RenderScript.create(this, - RenderScript.ContextType.NORMAL, - RenderScript.CREATE_FLAG_LOW_LATENCY | - RenderScript.CREATE_FLAG_WAIT_FOR_ATTACH); - - mInAllocation = Allocation.createFromBitmap(mRS, mBitmapIn); - mOutAllocation = Allocation.createFromBitmap(mRS, mBitmapOut); - - mScript = new ScriptC_simple(mRS); - } - - private void updateImage() { - int[] buffer_int = {1, 2, 3, 4}; - Allocation int_allocation = Allocation.createSized(mRS, Element.I32(mRS), 4); - int_allocation.copyFrom(buffer_int); - mScript.set_allocation_1D_global(int_allocation); - - int[] buffer_int2 = {5, 6, 7, 8}; - - Type.Builder typeI32Builder2D = new Type.Builder(mRS, Element.I32(mRS)); - typeI32Builder2D.setX(2); - typeI32Builder2D.setY(2); - - Allocation int_allocation2 = Allocation.createTyped(mRS, typeI32Builder2D.create()); - int_allocation2.copyFrom(buffer_int2); - mScript.set_allocation_1D_global2(int_allocation2); - - mScript.set_allocation_2D_global(mInAllocation); - mScript.set_allocation_2D_global2(mOutAllocation); - - int[] buffer_int3 = new int[64]; - - for (int i=0; i<4*4*4; ++i) - buffer_int3[i] = 9 + i; - - Type.Builder typeI32Builder3D = new Type.Builder(mRS, Element.I32(mRS)); - typeI32Builder3D.setX(4); - typeI32Builder3D.setY(4); - typeI32Builder3D.setZ(4); - - Allocation int_allocation3 = Allocation.createTyped(mRS, typeI32Builder3D.create()); - int_allocation3.copyFrom(buffer_int3); - mScript.set_allocation_3D_global(int_allocation3); - - Type.Builder yuvTypeBuilder = new Type.Builder(mRS, Element.YUV(mRS)); - yuvTypeBuilder.setX(4); - yuvTypeBuilder.setY(4); - yuvTypeBuilder.setYuvFormat(ImageFormat.YV12); - Allocation yuv_allocation = Allocation.createTyped(mRS, yuvTypeBuilder.create()); - mScript.set_allocation_YUV_2D_global(yuv_allocation); - - mScript.set_sampler_global(Sampler.CLAMP_LINEAR(mRS)); - - mScript.forEach_kernel(mInAllocation, mOutAllocation); - mOutAllocation.copyTo(mBitmapOut); - } -} diff --git a/tests/lldb/java/KernelVariables/src/rs/simple.rscript b/tests/lldb/java/KernelVariables/src/rs/simple.rscript deleted file mode 100644 index 26b6affc..00000000 --- a/tests/lldb/java/KernelVariables/src/rs/simple.rscript +++ /dev/null @@ -1,197 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.kernelvariables) - -char char_global = 12; -uchar uchar_global = 234; -short short_global = -321; -ushort ushort_global = 432; -int int_global = 1234; -uint uint_global = 2345; -float float_global = 4.5f; -long long_global = -77777; -ulong ulong_global = 8888; -double double_global = -456.5f; - -char2 char2_global = {11, -22}; -uchar2 uchar2_global = {33, 44}; -short2 short2_global = {-555, 666}; -ushort2 ushort2_global = {777, 888}; -int2 int2_global = {999, -1111}; -uint2 uint2_global = {2222, 3333}; -float2 float2_global = {4.5f, -5.0f}; -long2 long2_global = {-4444, 5555}; -ulong2 ulong2_global = {6666, 7777}; -double2 double2_global = {88.5f, -99.0f}; - -char3 char3_global = {11, -22, -33}; -uchar3 uchar3_global = {33, 44, 55}; -short3 short3_global = {-555, 666, 777}; -ushort3 ushort3_global = {777, 888, 999}; -int3 int3_global = {999, -1111, 2222}; -uint3 uint3_global = {2222, 3333, 4444}; -float3 float3_global = {4.5f, -5.0f, -6.5f}; -long3 long3_global = {-4444, 5555, 6666}; -ulong3 ulong3_global = {6666, 7777, 8888}; -double3 double3_global = {88.5f, -99.0f, 111.5f}; - -char4 char4_global = {55, 11, -22, -33}; -uchar4 uchar4_global = {222, 33, 44, 55}; -short4 short4_global = {-444, -555, 666, 777}; -ushort4 ushort4_global = {666, 777, 888, 999}; -int4 int4_global = {888, 999, -1111, 2222}; -uint4 uint4_global = {1111, 2222, 3333, 4444}; -float4 float4_global = {3.0f, 4.5f, -5.0f, -6.5f}; -long4 long4_global = {-3333, -4444, 5555, 6666}; -ulong4 ulong4_global = {5555, 6666, 7777, 8888}; -double4 double4_global = {-77.0f, 88.5f, -99.0f, 111.5f}; - -rs_matrix2x2 matrix2x2_global; -rs_matrix3x3 matrix3x3_global; -rs_matrix4x4 matrix4x4_global; - -rs_quaternion quaternion_global; - -rs_allocation allocation_1D_global; -rs_allocation allocation_1D_global2; -rs_allocation allocation_2D_global; -rs_allocation allocation_2D_global2; -rs_allocation allocation_3D_global; -rs_allocation allocation_YUV_2D_global; - -rs_allocation_cubemap_face cubemap_face_global; -rs_sampler sampler_global; - -uchar4 __attribute__((kernel)) kernel(uchar4 in) -{ - char char_local = 'a'; - uchar uchar_local = 'b'; - short short_local = -321; - ushort ushort_local = 432; - int int_local = 1234; - uint uint_local = 2345; - float float_local = 4.5f; - long long_local = -77777; - ulong ulong_local = 8888; - double double_local = -456.5f; - - char2 char2_local = {-11, -22}; - uchar2 uchar2_local = {33, 44}; - short2 short2_local = {-555, 666}; - ushort2 ushort2_local = {777, 888}; - int2 int2_local = {999, -1111}; - uint2 uint2_local = {2222, 3333}; - float2 float2_local = {4.5f, -5.0f}; - long2 long2_local = {-4444, 5555}; - ulong2 ulong2_local = {6666, 7777}; - double2 double2_local = {88.5f, -99.0f}; - - char3 char3_local = {11, -22, -33}; - uchar3 uchar3_local = {33, 44, 55}; - short3 short3_local = {-555, 666, 777}; - ushort3 ushort3_local = {777, 888, 999}; - int3 int3_local = {999, -1111, 2222}; - uint3 uint3_local = {2222, 3333, 4444}; - float3 float3_local = {4.5f, -5.0f, -6.5f}; - long3 long3_local = {-4444, 5555, 6666}; - ulong3 ulong3_local = {6666, 7777, 8888}; - double3 double3_local = {88.5f, -99.0f, 111.5f}; - - char4 char4_local = {55, 11, -22, -33}; - uchar4 uchar4_local = {22, 33, 44, 55}; - short4 short4_local = {-444, -555, 666, 777}; - ushort4 ushort4_local = {666, 777, 888, 999}; - int4 int4_local = {888, 999, -1111, 2222}; - uint4 uint4_local = {1111, 2222, 3333, 4444}; - float4 float4_local = {3.0f, 4.5f, -5.0f, -6.5f}; - long4 long4_local = {-3333, -4444, 5555, 6666}; - ulong4 ulong4_local = {5555, 6666, 7777, 8888}; - double4 double4_local = {-77.0f, 88.5f, -99.0f, 111.5f}; - - rs_matrix2x2 matrix2x2_local = {{1., 2.5, - 3., 4.5}}; - rs_matrix3x3 matrix3x3_local = {{5., 6.5, 7., - 8.5, 9., 1.5, - 2., 3.5, 4.}}; - rs_matrix4x4 matrix4x4_local = {{5.5, 6., 7.5, 8., - 9., 1.5, 2., 3.5, - 4.5, 5.5, 6.5, 7., - 8., 9.5, 1.5, 2.5}}; - - matrix2x2_global = matrix2x2_local; - matrix3x3_global = matrix3x3_local; - matrix4x4_global = matrix4x4_local; - - rsQuaternionSet(&quaternion_global, 3.0, 4.5, 5.5, 6.0); - - rs_quaternion quaternion_local; - rsQuaternionSet(&quaternion_local, 7.5, 8.0, 9.0, 0.5); - - char char_combined = char_local + (char)uchar_local + char2_local.x + - (char)uchar2_local.x + char3_local.x - (char)uchar3_local.x + - char4_local.x + (char)uchar4_local.x; - - short short_combined = short_local + (short)ushort_local + short2_local.x + - (short)ushort2_local.x + short3_local.x + (short)ushort3_local.x + - short4_local.x + (short)ushort4_local.x; - - int int_combined = int_local + (int)uint_local + int2_local.x + - (int)uint2_local.x + int3_local.x + (int)uint3_local.x + int4_local.x + - (int)uint4_local.x; - - float float_combined = float_local + float2_local.x + float3_local.x + - float4_local.x; - - long long_combined = long_local + (long)ulong_local + long2_local.x + - (long)ulong2_local.x + long3_local.x + (long)ulong3_local.x + - long4_local.x + (long)ulong4_local.x; - - double double_combined = double_local + double2_local.x + double3_local.x + - double4_local.x; - - char_global = char_combined; - short_global = short_combined; - int_global = int_combined; - float_global = float_combined; - long_global = long_combined; - double_global = double_combined; - - uchar4 result = {1,2,3,4}; - return result; -} - -float use_constants_global; - -void setup(void) -{ - use_constants_global = - M_1_PI + - M_2_PI + - M_2_PIl + - M_2_SQRTPI + - M_E + - M_LN10 + - M_LN2 + - M_LOG10E + - M_LOG2E + - M_PI + - M_PI_2 + - M_PI_4 + - M_SQRT1_2 + - M_SQRT2; -} diff --git a/tests/lldb/java/MultipleRSFiles/Android.mk b/tests/lldb/java/MultipleRSFiles/Android.mk deleted file mode 100644 index b45cc517..00000000 --- a/tests/lldb/java/MultipleRSFiles/Android.mk +++ /dev/null @@ -1,15 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := MultipleRSFiles -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_PRIVATE_PLATFORM_APIS := true - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) diff --git a/tests/lldb/java/MultipleRSFiles/AndroidManifest.xml b/tests/lldb/java/MultipleRSFiles/AndroidManifest.xml deleted file mode 100644 index 5a839012..00000000 --- a/tests/lldb/java/MultipleRSFiles/AndroidManifest.xml +++ /dev/null @@ -1,14 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.multiplersfiles"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="MultipleRSFiles" - android:hardwareAccelerated="true"> - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> diff --git a/tests/lldb/java/MultipleRSFiles/res/layout/main_layout.xml b/tests/lldb/java/MultipleRSFiles/res/layout/main_layout.xml deleted file mode 100644 index 4ef172f3..00000000 --- a/tests/lldb/java/MultipleRSFiles/res/layout/main_layout.xml +++ /dev/null @@ -1,14 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> diff --git a/tests/lldb/java/MultipleRSFiles/src/com/android/rs/multiplersfiles/MainActivity.java b/tests/lldb/java/MultipleRSFiles/src/com/android/rs/multiplersfiles/MainActivity.java deleted file mode 100644 index 78954840..00000000 --- a/tests/lldb/java/MultipleRSFiles/src/com/android/rs/multiplersfiles/MainActivity.java +++ /dev/null @@ -1,74 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.multiplersfiles; - -import android.app.Activity; -import android.graphics.Bitmap; -import android.os.Bundle; -import android.widget.ImageView; -import android.renderscript.*; - -public class MainActivity extends Activity { - private Bitmap mBitmapIn; - private Bitmap mBitmapOut; - private ImageView mImageView; - - private RenderScript mRS; - private Allocation mInAllocation; - private Allocation mOutAllocation; - private ScriptC_first mFirstScript; - private ScriptC_second mSecondScript; - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - - setContentView(R.layout.main_layout); - - mBitmapIn = Bitmap.createBitmap(500, 500, Bitmap.Config.ARGB_8888); - mBitmapOut = Bitmap.createBitmap(mBitmapIn.getWidth(), - mBitmapIn.getHeight(), mBitmapIn.getConfig()); - - mImageView = findViewById(R.id.imageView); - mImageView.setImageBitmap(mBitmapOut); - - createScript(); - updateImage(1.0f); - } - - private void createScript() { - mRS = RenderScript.create(this, - RenderScript.ContextType.NORMAL, - RenderScript.CREATE_FLAG_LOW_LATENCY | - RenderScript.CREATE_FLAG_WAIT_FOR_ATTACH); - - mInAllocation = Allocation.createFromBitmap(mRS, mBitmapIn); - mOutAllocation = Allocation.createFromBitmap(mRS, mBitmapOut); - - mFirstScript = new ScriptC_first(mRS); - mSecondScript = new ScriptC_second(mRS); - } - - - private void updateImage(final float f) { - mFirstScript.set_gColor(new Float4(0.9f, 0.8f, 0.5f, 1.0f)); - mFirstScript.forEach_first_kernel(mInAllocation, mOutAllocation); - mOutAllocation.copyTo(mBitmapOut); - mSecondScript.forEach_second_kernel(mInAllocation, mOutAllocation); - } -} - diff --git a/tests/lldb/java/MultipleRSFiles/src/rs/first.rscript b/tests/lldb/java/MultipleRSFiles/src/rs/first.rscript deleted file mode 100644 index cbed426f..00000000 --- a/tests/lldb/java/MultipleRSFiles/src/rs/first.rscript +++ /dev/null @@ -1,32 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.multiplersfiles) - -float4 gColor = {0.299f, 0.587f, 0.114f, 1.f}; - -/* RenderScript kernel that just sets the colour of the screen and does some - * simple operations so it is not completely empty - * (and can therefore be debugged). - */ -uchar4 __attribute__((kernel)) first_kernel(uchar4 in) -{ - float4 temp = rsUnpackColor8888(in); - temp = gColor; - uchar4 result = rsPackColorTo8888(temp); - return result; -} diff --git a/tests/lldb/java/MultipleRSFiles/src/rs/second.rscript b/tests/lldb/java/MultipleRSFiles/src/rs/second.rscript deleted file mode 100644 index c87d7ae0..00000000 --- a/tests/lldb/java/MultipleRSFiles/src/rs/second.rscript +++ /dev/null @@ -1,25 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.multiplersfiles) - -/* RenderScript kernel that just returns the swizzled input. */ -uchar4 __attribute__((kernel)) second_kernel(uchar4 in) -{ - uchar4 result = in.wzyx; - return result; -} diff --git a/tests/lldb/java/NoDebugWaitAttach/Android.mk b/tests/lldb/java/NoDebugWaitAttach/Android.mk deleted file mode 100644 index 355ffb74..00000000 --- a/tests/lldb/java/NoDebugWaitAttach/Android.mk +++ /dev/null @@ -1,13 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := JavaNoDebugWaitAttach -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_PRIVATE_PLATFORM_APIS := true - -include $(BUILD_PACKAGE) diff --git a/tests/lldb/java/NoDebugWaitAttach/AndroidManifest.xml b/tests/lldb/java/NoDebugWaitAttach/AndroidManifest.xml deleted file mode 100644 index dbc31a54..00000000 --- a/tests/lldb/java/NoDebugWaitAttach/AndroidManifest.xml +++ /dev/null @@ -1,14 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.waitattachnodebug"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="JavaNoDebugWaitAttach" - android:hardwareAccelerated="true"> - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> diff --git a/tests/lldb/java/NoDebugWaitAttach/res/layout/main_layout.xml b/tests/lldb/java/NoDebugWaitAttach/res/layout/main_layout.xml deleted file mode 100644 index 4ef172f3..00000000 --- a/tests/lldb/java/NoDebugWaitAttach/res/layout/main_layout.xml +++ /dev/null @@ -1,14 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> diff --git a/tests/lldb/java/NoDebugWaitAttach/src/com/android/rs/waitattachnodebug/MainActivity.java b/tests/lldb/java/NoDebugWaitAttach/src/com/android/rs/waitattachnodebug/MainActivity.java deleted file mode 100644 index a43b387e..00000000 --- a/tests/lldb/java/NoDebugWaitAttach/src/com/android/rs/waitattachnodebug/MainActivity.java +++ /dev/null @@ -1,71 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.waitattachnodebug; - -import android.app.Activity; -import android.graphics.Bitmap; -import android.os.Bundle; -import android.widget.ImageView; -import android.renderscript.*; - -public class MainActivity extends Activity { - private Bitmap mBitmapIn; - private Bitmap mBitmapOut; - private ImageView mImageView; - - private RenderScript mRS; - private Allocation mInAllocation; - private Allocation mOutAllocation; - private ScriptC_simple mScript; - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - - setContentView(R.layout.main_layout); - - mBitmapIn = Bitmap.createBitmap(8, 8, Bitmap.Config.ARGB_8888); - mBitmapOut = Bitmap.createBitmap(mBitmapIn.getWidth(), - mBitmapIn.getHeight(), mBitmapIn.getConfig()); - - mImageView = findViewById(R.id.imageView); - mImageView.setImageBitmap(mBitmapOut); - - createScript(); - updateImage(1.0f); - } - - private void createScript() { - mRS = RenderScript.create(this, - RenderScript.ContextType.NORMAL, - RenderScript.CREATE_FLAG_LOW_LATENCY | - RenderScript.CREATE_FLAG_WAIT_FOR_ATTACH); - - mInAllocation = Allocation.createFromBitmap(mRS, mBitmapIn); - mOutAllocation = Allocation.createFromBitmap(mRS, mBitmapOut); - - mScript = new ScriptC_simple(mRS); - } - - - private void updateImage(final float f) { - mScript.set_gColor(new Float4(0.9f, 0.8f, 0.5f, 1.0f)); - mScript.forEach_simple_kernel(mInAllocation, mOutAllocation); - mOutAllocation.copyTo(mBitmapOut); - } -} - diff --git a/tests/lldb/java/NoDebugWaitAttach/src/rs/simple.rscript b/tests/lldb/java/NoDebugWaitAttach/src/rs/simple.rscript deleted file mode 100644 index 6652675c..00000000 --- a/tests/lldb/java/NoDebugWaitAttach/src/rs/simple.rscript +++ /dev/null @@ -1,33 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.waitattachnodebug) - -float4 gColor = {0.299f, 0.587f, 0.114f, 1.f}; - -/* RenderScript kernel that just sets the colour of the screen and does some - * simple operations so it is not completely empty - * (and can therefore be debugged). - */ -uchar4 __attribute__((kernel)) simple_kernel(uchar4 in) -{ - float4 temp = rsUnpackColor8888(in); - temp = gColor; - uchar4 result = rsPackColorTo8888(temp); - return result; -} - diff --git a/tests/lldb/java/Reduction/Android.mk b/tests/lldb/java/Reduction/Android.mk deleted file mode 100644 index 6e71659d..00000000 --- a/tests/lldb/java/Reduction/Android.mk +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_RENDERSCRIPT_FLAGS := -target-api 0 -O0 -g - -LOCAL_PACKAGE_NAME := Reduction -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_PRIVATE_PLATFORM_APIS := true - -include $(BUILD_PACKAGE) diff --git a/tests/lldb/java/Reduction/src/com/android/rs/lldbreductiontest/MainActivity.java b/tests/lldb/java/Reduction/src/com/android/rs/lldbreductiontest/MainActivity.java deleted file mode 100644 index b10d5d28..00000000 --- a/tests/lldb/java/Reduction/src/com/android/rs/lldbreductiontest/MainActivity.java +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This application is a simple scheduler for testing the lldb debugger - * implementation for general reduction kernels. - * - * It launches one of two simple reductions in a loop - */ - -package com.android.rs.lldbreductiontest; - -import android.app.Activity; -import android.content.Context; -import android.content.res.Resources; -import android.os.Handler; -import android.os.Bundle; -import android.util.Log; -import android.renderscript.*; - -import java.lang.Float; -import java.lang.Math; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Random; - -public class MainActivity extends Activity { - static private int idxOffset = 10; - static private int mX = 128; - static private int mY = 2; - static private int mZ = 2; - static private float mMultiplier = 2.f; - private RenderScript mRS; - private ScriptC_reduce mScript; - private ScriptC_reduce_auto_comb mScript_auto_comb; - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - initRS(); - - int loopDelayMillis = 1000; - Handler loopHandler = new Handler(); - loopHandler.postDelayed(new Runnable() { - @Override - public void run() { - runRS(); - loopHandler.postDelayed(this, loopDelayMillis); - } - }, loopDelayMillis); - } - - private float findMinUserTypeAutoComb( - RenderScript rs, ScriptC_reduce_auto_comb s, Allocation alloc) { - s.set_a_startval(mX); - s.set_b_startval(mY); - s.set_multiplier(mMultiplier); - - return s.reduce_find_min_user_type_auto_comb(alloc).get(); - } - - private float findMinUserType(RenderScript rs, ScriptC_reduce s, Allocation alloc) { - s.set_a_startval(mX); - s.set_b_startval(mY); - s.set_multiplier(mMultiplier); - - return s.reduce_find_min_user_type(alloc).get(); - } - - private float findMinUserType1DAutoComb(RenderScript rs, ScriptC_reduce_auto_comb s, int xCount) { - ScriptField_MinUserType minUserType = new ScriptField_MinUserType(rs, xCount); - for (int i = 0; i < xCount; i++) { - ScriptField_MinUserType.Item val = new ScriptField_MinUserType.Item(); - val.a = i + idxOffset; - val.b = i + idxOffset; - minUserType.set(val, i, true); - } - - Allocation alloc = minUserType.getAllocation(); - - return findMinUserTypeAutoComb(rs, s, alloc); - } - - private float findMinUserType1D(RenderScript rs, ScriptC_reduce s, int xCount) { - ScriptField_MinUserType minUserType = new ScriptField_MinUserType(rs, xCount); - for (int i = 0; i < xCount; i++) { - ScriptField_MinUserType.Item val = new ScriptField_MinUserType.Item(); - val.a = i + idxOffset; - val.b = i + idxOffset; - minUserType.set(val, i, true); - } - - Allocation alloc = minUserType.getAllocation(); - - return findMinUserType(rs, s, alloc); - } - - public void initRS() { - mRS = RenderScript.create(this, RenderScript.ContextType.NORMAL, - RenderScript.CREATE_FLAG_LOW_LATENCY | RenderScript.CREATE_FLAG_WAIT_FOR_ATTACH); - mScript = new ScriptC_reduce(mRS); - mScript_auto_comb = new ScriptC_reduce_auto_comb(mRS); - } - - public void runRS() { - findMinUserType1D(mRS, mScript, mX); - findMinUserType1DAutoComb(mRS, mScript_auto_comb, mX); - } - - public void onDestroy() { - mRS.finish(); - mRS.destroy(); - } -} diff --git a/tests/lldb/java/Reduction/src/com/android/rs/lldbreductiontest/reduce.rscript b/tests/lldb/java/Reduction/src/com/android/rs/lldbreductiontest/reduce.rscript deleted file mode 100644 index c8fb088a..00000000 --- a/tests/lldb/java/Reduction/src/com/android/rs/lldbreductiontest/reduce.rscript +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This application is a simple scheduler for testing the lldb debugger - * implementation for general reduction kernels. - * - * It launches one of two simple reductions in a loop - */ - -#pragma rs java_package_name(com.android.rs.lldbreductiontest) -#pragma version(1) -#pragma rs reduce(find_min_user_type) initializer(find_min_user_type_init) \ - accumulator(find_min_user_type_accum) \ - outconverter(find_min_user_type_outc) \ - combiner(find_min_user_type_comb) - -#define RSTESTS_USER_COMBINER -#include "reduce_common.rsh" diff --git a/tests/lldb/java/Reduction/src/com/android/rs/lldbreductiontest/reduce_auto_comb.rscript b/tests/lldb/java/Reduction/src/com/android/rs/lldbreductiontest/reduce_auto_comb.rscript deleted file mode 100644 index 5a9de904..00000000 --- a/tests/lldb/java/Reduction/src/com/android/rs/lldbreductiontest/reduce_auto_comb.rscript +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This application is a simple scheduler for testing the lldb debugger - * implementation for general reduction kernels. - * - * It launches one of two simple reductions in a loop - */ - -#pragma rs java_package_name(com.android.rs.lldbreductiontest) -#pragma version(1) -#pragma rs reduce(find_min_user_type_auto_comb) \ - initializer(find_min_user_type_init) accumulator(find_min_user_type_accum) \ - outconverter(find_min_user_type_outc) \ - -#include "reduce_common.rsh" diff --git a/tests/lldb/java/Reduction/src/com/android/rs/lldbreductiontest/reduce_common.rsh b/tests/lldb/java/Reduction/src/com/android/rs/lldbreductiontest/reduce_common.rsh deleted file mode 100644 index 846d5302..00000000 --- a/tests/lldb/java/Reduction/src/com/android/rs/lldbreductiontest/reduce_common.rsh +++ /dev/null @@ -1,59 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -int __attribute__((kernel)) my_foreach_kernel(int a) { - // This kernel is unused, but we want to make sure it is not listed as a - // reduction kernel by the debugger - return a + 1; -} - -typedef struct MinUserType { - int32_t a; - int32_t b; -} user_t; - -int32_t b_startval; -int32_t a_startval; -float multiplier; - -static void find_min_user_type_init(user_t *alloc) { - alloc->a = a_startval; - alloc->b = b_startval; -} - -static void find_min_user_type_accum(user_t *accum, const user_t val) { - if (val.a + val.b * multiplier < accum->a + accum->b * multiplier) { - accum->a = val.a; - accum->b = val.b; - } -} - -// Combiners are autogenerated if the user has not defined the combiner. -// We specialise the tests for lldb's handling of this behaviour as well, -// generating two test apps from the same source. -// This combiner is equivalent to the accumulator. -#if defined(RSTESTS_USER_COMBINER) -static void find_min_user_type_comb(user_t *accum, const user_t *val) { - if (val->a + val->b * multiplier < accum->a + accum->b * multiplier) { - accum->a = val->a; - accum->b = val->b; - } -} -#endif - -static void find_min_user_type_outc(float *output, const user_t *val) { - *output = val->a + val->b * multiplier; -} diff --git a/tests/lldb/java/ScriptGroup/Android.mk b/tests/lldb/java/ScriptGroup/Android.mk deleted file mode 100644 index 18a1cd5e..00000000 --- a/tests/lldb/java/ScriptGroup/Android.mk +++ /dev/null @@ -1,15 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := ScriptGroup -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_PRIVATE_PLATFORM_APIS := true - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) diff --git a/tests/lldb/java/ScriptGroup/AndroidManifest.xml b/tests/lldb/java/ScriptGroup/AndroidManifest.xml deleted file mode 100644 index 5288c746..00000000 --- a/tests/lldb/java/ScriptGroup/AndroidManifest.xml +++ /dev/null @@ -1,14 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.scriptgroup"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="scriptgroup" - android:hardwareAccelerated="true"> - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> diff --git a/tests/lldb/java/ScriptGroup/res/layout/main_layout.xml b/tests/lldb/java/ScriptGroup/res/layout/main_layout.xml deleted file mode 100644 index 4ef172f3..00000000 --- a/tests/lldb/java/ScriptGroup/res/layout/main_layout.xml +++ /dev/null @@ -1,14 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> diff --git a/tests/lldb/java/ScriptGroup/src/com/android/rs/scriptgroup/MainActivity.java b/tests/lldb/java/ScriptGroup/src/com/android/rs/scriptgroup/MainActivity.java deleted file mode 100644 index 01e8a13d..00000000 --- a/tests/lldb/java/ScriptGroup/src/com/android/rs/scriptgroup/MainActivity.java +++ /dev/null @@ -1,65 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.scriptgroup; - -import android.app.Activity; -import android.graphics.Bitmap; -import android.os.Bundle; -import android.widget.ImageView; -import android.renderscript.*; - -public class MainActivity extends Activity { - private static final int ARRAY_SIZE = 8; - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.main_layout); - - // create renderscript context - RenderScript pRS = RenderScript.create(this, RenderScript.ContextType.NORMAL, - RenderScript.CREATE_FLAG_WAIT_FOR_ATTACH | RenderScript.CREATE_FLAG_LOW_LATENCY); - - ScriptC_scriptgroup script = new ScriptC_scriptgroup(pRS); - - // create and initalize a simple input allocation - int[] array = new int[ARRAY_SIZE]; - for (int i = 0; i < ARRAY_SIZE; i++) { - array[i] = i; - } - Allocation input = Allocation.createSized(pRS, Element.I32(pRS), ARRAY_SIZE); - input.copyFrom(array); - - ScriptGroup.Builder2 builder = new ScriptGroup.Builder2(pRS); - - ScriptGroup.Input unbound = builder.addInput(); - - ScriptGroup.Closure c0 = builder.addKernel( - script.getKernelID_foo(), Type.createX(pRS, Element.I32(pRS), ARRAY_SIZE), unbound); - - ScriptGroup.Closure c1 = builder.addKernel(script.getKernelID_goo(), - Type.createX(pRS, Element.I32(pRS), ARRAY_SIZE), c0.getReturn()); - - ScriptGroup group = builder.create("scriptgroup_test", c1.getReturn()); - - int[] a = new int[ARRAY_SIZE]; - ((Allocation) group.execute(input)[0]).copyTo(a); - - pRS.finish(); - pRS.destroy(); - } -} diff --git a/tests/lldb/java/ScriptGroup/src/rs/scriptgroup.rscript b/tests/lldb/java/ScriptGroup/src/rs/scriptgroup.rscript deleted file mode 100644 index 29089e10..00000000 --- a/tests/lldb/java/ScriptGroup/src/rs/scriptgroup.rscript +++ /dev/null @@ -1,27 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.scriptgroup) -#pragma rs_fp_full - -int __attribute__((kernel)) foo(int a) { - return a * a; -} - -int __attribute__((kernel)) goo(int a) { - return a + a; -} diff --git a/tests/lldb/java/SingleSource/Android.mk b/tests/lldb/java/SingleSource/Android.mk deleted file mode 100644 index 202c2a79..00000000 --- a/tests/lldb/java/SingleSource/Android.mk +++ /dev/null @@ -1,15 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := SingleSource -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_PRIVATE_PLATFORM_APIS := true - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) diff --git a/tests/lldb/java/SingleSource/AndroidManifest.xml b/tests/lldb/java/SingleSource/AndroidManifest.xml deleted file mode 100644 index 8820c93c..00000000 --- a/tests/lldb/java/SingleSource/AndroidManifest.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.singlesource"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="SingleSource" - android:hardwareAccelerated="true"> - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> - diff --git a/tests/lldb/java/SingleSource/res/layout/main_layout.xml b/tests/lldb/java/SingleSource/res/layout/main_layout.xml deleted file mode 100644 index 4ef172f3..00000000 --- a/tests/lldb/java/SingleSource/res/layout/main_layout.xml +++ /dev/null @@ -1,14 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> diff --git a/tests/lldb/java/SingleSource/src/com/android/rs/singlesource/MainActivity.java b/tests/lldb/java/SingleSource/src/com/android/rs/singlesource/MainActivity.java deleted file mode 100644 index 83080432..00000000 --- a/tests/lldb/java/SingleSource/src/com/android/rs/singlesource/MainActivity.java +++ /dev/null @@ -1,77 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.singlesource; - -import android.app.Activity; -import android.graphics.Bitmap; -import android.os.Bundle; -import android.widget.ImageView; -import android.renderscript.*; - -public class MainActivity extends Activity { - - private RenderScript mRS; - private Allocation mAllocIn1; - private Allocation mAllocIn2; - private Allocation mAllocOut; - private ScriptC_rs_single_source mScript; - - @Override - protected void onCreate(Bundle savedInstanceState) { - - super.onCreate(savedInstanceState); - - setContentView(R.layout.main_layout); - - // create renderscript context - mRS = RenderScript.create( - this, - RenderScript.ContextType.NORMAL, - RenderScript.CREATE_FLAG_WAIT_FOR_ATTACH | - RenderScript.CREATE_FLAG_LOW_LATENCY); - - // create a new instance of the script - mScript = new ScriptC_rs_single_source(mRS); - - // create the first input allocation - mAllocIn1 = Allocation.createSized(mRS, Element.F32(mRS), 4); - float [] in1 = new float[]{ 1.f, 2.f, 3.f, 4.f }; - mAllocIn1.copyFrom(in1); - - // create second input allocation - mAllocIn2 = Allocation.createSized(mRS, Element.F32(mRS), 4); - float [] in2 = new float[]{ 5.f, 6.f, 7.f, 8.f }; - mAllocIn2.copyFrom(in2); - - // create output allocation - mAllocOut = Allocation.createSized(mRS, Element.F32(mRS), 4); - - // setup the global output allocation - mScript.set_global_alloc(Allocation.createSized(mRS, Element.F32(mRS), 4)); - - // invoke static function 1 - mScript.invoke_script_invoke_1(mAllocOut, mAllocIn1, mAllocIn2); - - // invoke static function 2 - mScript.invoke_script_invoke_2(); - - // invoke void kernel - Script.LaunchOptions options = new Script.LaunchOptions(); - options.setX(0, 4); - mScript.forEach_void_kernel_1(options); - } -} diff --git a/tests/lldb/java/SingleSource/src/rs/rs_single_source.rscript b/tests/lldb/java/SingleSource/src/rs/rs_single_source.rscript deleted file mode 100644 index 15c35ef2..00000000 --- a/tests/lldb/java/SingleSource/src/rs/rs_single_source.rscript +++ /dev/null @@ -1,72 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.singlesource) -#pragma rs_fp_full - -// global allocation used for void kernel -rs_allocation global_alloc; - -static void check_in() -{ - // debugger check point - return; -} - -float __attribute__((kernel)) kernel_1(float a) -{ - // square - return a * a; -} - -float __attribute__((kernel)) kernel_2(float a, float b) -{ - // product - return a * b; -} - -void __attribute__((kernel)) void_kernel_1(uint32_t x) -{ - // allocation[x] = x - rsSetElementAt_float(global_alloc, (float)x, x); -} - -void script_invoke_1(rs_allocation out, rs_allocation in1, rs_allocation in2) -{ - // invoke kernel taking one argument - rsForEach(kernel_1, out, in1); - - check_in(); - - // invoke kernel taking two arguments - rsForEach(kernel_2, out, in1, in2); - - check_in(); -} - -void script_invoke_2() -{ - // invoke kernel that takes no arguments and no return type - rs_script_call_t options = { - .strategy=RS_FOR_EACH_STRATEGY_DONT_CARE, - .xStart=0, - .xEnd=4 - }; - rsForEachWithOptions(void_kernel_1, &options); - - check_in(); -} diff --git a/tests/lldb/jni/Allocations/Android.mk b/tests/lldb/jni/Allocations/Android.mk deleted file mode 100644 index 67ef2d9a..00000000 --- a/tests/lldb/jni/Allocations/Android.mk +++ /dev/null @@ -1,19 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := JNIAllocations -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -LOCAL_JNI_SHARED_LIBRARIES := libjniallocations - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) -include $(LOCAL_PATH)/jniallocations/Android.mk diff --git a/tests/lldb/jni/Allocations/AndroidManifest.xml b/tests/lldb/jni/Allocations/AndroidManifest.xml deleted file mode 100644 index e73799fc..00000000 --- a/tests/lldb/jni/Allocations/AndroidManifest.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.jniallocations"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="JNIAllocations" - android:hardwareAccelerated="true"> - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> - diff --git a/tests/lldb/jni/Allocations/jniallocations/Android.mk b/tests/lldb/jni/Allocations/jniallocations/Android.mk deleted file mode 100644 index e52e1a35..00000000 --- a/tests/lldb/jni/Allocations/jniallocations/Android.mk +++ /dev/null @@ -1,15 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := libjniallocations -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := jniallocations.cpp allocs.rscript - -LOCAL_RENDERSCRIPT_FLAGS := -g - -LOCAL_STATIC_LIBRARIES := libcompiler_rt - -include frameworks/rs/tests/lldb/jni/common.mk -include $(BUILD_SHARED_LIBRARY) diff --git a/tests/lldb/jni/Allocations/jniallocations/allocs.rscript b/tests/lldb/jni/Allocations/jniallocations/allocs.rscript deleted file mode 100644 index c5ebcbfc..00000000 --- a/tests/lldb/jni/Allocations/jniallocations/allocs.rscript +++ /dev/null @@ -1,47 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.jniallocations) - -// Kernel performs basic vector swizzle -uchar4 __attribute__((kernel)) swizzle_kernel(uchar4 in) -{ - return in.wzyx; -} - -// Kernel squares every element in allocation -uint __attribute__((kernel)) square_kernel(ushort in) -{ - uint result = (uint)(in) * (uint)in; - return result; -} - -// Helper function adding 1/2 to passed in double -static double half_helper(double in) -{ - return (in + 0.5); -} - -// Kernel returns first 3 elements of a double4 plus 1/2 -double3 __attribute__((kernel)) add_half_kernel(double4 in) -{ - double3 result; - result.x = half_helper(in.x); - result.y = half_helper(in.y); - result.z = half_helper(in.z); - return result; -} diff --git a/tests/lldb/jni/Allocations/jniallocations/jniallocations.cpp b/tests/lldb/jni/Allocations/jniallocations/jniallocations.cpp deleted file mode 100644 index f86b64ce..00000000 --- a/tests/lldb/jni/Allocations/jniallocations/jniallocations.cpp +++ /dev/null @@ -1,424 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <memory> - -#include <jni.h> -#include <RenderScript.h> - -#include "ScriptC_allocs.h" - -sp<RS> mRS; - -sp<Allocation> mBoolAllocation; // boolean - -sp<Allocation> mCharAllocation; // char -sp<Allocation> mChar2Allocation; // char2 -sp<Allocation> mChar3Allocation; // char3 -sp<Allocation> mChar4Allocation; // char4 - -sp<Allocation> mUCharAllocation; // uchar -sp<Allocation> mUChar2Allocation; // uchar2 -sp<Allocation> mUChar3Allocation; // uchar3 -sp<Allocation> mUChar4Allocation; // uchar4 - -sp<Allocation> mShortAllocation; // short -sp<Allocation> mShort2Allocation; // short2 -sp<Allocation> mShort3Allocation; // short3 -sp<Allocation> mShort4Allocation; // short4 - -sp<Allocation> mUShortAllocation; // ushort -sp<Allocation> mUShort2Allocation; // ushort2 -sp<Allocation> mUShort3Allocation; // ushort3 -sp<Allocation> mUShort4Allocation; // ushort4 - -sp<Allocation> mIntAllocation; // int -sp<Allocation> mInt2Allocation; // int2 -sp<Allocation> mInt3Allocation; // int3 -sp<Allocation> mInt4Allocation; // int4 - -sp<Allocation> mUIntAllocation; // uint -sp<Allocation> mUInt2Allocation; // uint2 -sp<Allocation> mUInt3Allocation; // uint3 -sp<Allocation> mUInt4Allocation; // uint4 - -sp<Allocation> mLongAllocation; // long -sp<Allocation> mLong2Allocation; // long2 -sp<Allocation> mLong3Allocation; // long3 -sp<Allocation> mLong4Allocation; // long4 - -sp<Allocation> mULongAllocation; // ulong -sp<Allocation> mULong2Allocation; // ulong2 -sp<Allocation> mULong3Allocation; // ulong3 -sp<Allocation> mULong4Allocation; // ulong4 - -sp<Allocation> mHalfAllocation; // half -sp<Allocation> mHalf2Allocation; // half2 -sp<Allocation> mHalf3Allocation; // half3 -sp<Allocation> mHalf4Allocation; // half4 - -sp<Allocation> mFloatAllocation; // float -sp<Allocation> mFloat2Allocation; // float2 -sp<Allocation> mFloat3Allocation; // float3 -sp<Allocation> mFloat4Allocation; // float4 - -sp<Allocation> mDoubleAllocation; // double -sp<Allocation> mDouble2Allocation; // double2 -sp<Allocation> mDouble3Allocation; // double3 -sp<Allocation> mDouble4Allocation; // double4 - -const int mAllocSize = 24; // Needs to be < CHAR_MAX and divisible by 4. -const int mBitmapSize = 64; - -void createSignedAllocations() { - Type::Builder typeI8Builder(mRS, Element::I8(mRS)); - typeI8Builder.setX(1); // One element here to test 16 byte memory alignment - typeI8Builder.setY(3); - typeI8Builder.setZ(8); - - mCharAllocation = Allocation::createTyped(mRS, typeI8Builder.create()); - mChar2Allocation = Allocation::createSized(mRS, Element::I8_2(mRS), mAllocSize / 2); - mChar3Allocation = Allocation::createSized(mRS, Element::I8_3(mRS), mAllocSize / 4); - mChar4Allocation = Allocation::createSized(mRS, Element::I8_4(mRS), mAllocSize / 4); - - Type::Builder typeI16_2Builder(mRS, Element::I16_2(mRS)); - typeI16_2Builder.setX(6); - typeI16_2Builder.setY(1); - typeI16_2Builder.setZ(2); - - mShortAllocation = Allocation::createSized(mRS, Element::I16(mRS), mAllocSize); - mShort2Allocation = Allocation::createTyped(mRS, typeI16_2Builder.create()); - mShort3Allocation = Allocation::createSized(mRS, Element::I16_3(mRS), mAllocSize / 4); - mShort4Allocation = Allocation::createSized(mRS, Element::I16_4(mRS), mAllocSize / 4); - - Type::Builder typeI32_3Builder(mRS, Element::I32_3(mRS)); - typeI32_3Builder.setX(3); - typeI32_3Builder.setY(2); - - mIntAllocation = Allocation::createSized(mRS, Element::I32(mRS), mAllocSize); - mInt2Allocation = Allocation::createSized(mRS, Element::I32_2(mRS), mAllocSize / 2); - mInt3Allocation = Allocation::createTyped(mRS, typeI32_3Builder.create()); - mInt4Allocation = Allocation::createSized(mRS, Element::I32_4(mRS), mAllocSize / 4); - - Type::Builder typeI64_4Builder(mRS, Element::I64_4(mRS)); - typeI64_4Builder.setX(1); - typeI64_4Builder.setY(6); - - mLongAllocation = Allocation::createSized(mRS, Element::I64(mRS), mAllocSize); - mLong2Allocation = Allocation::createSized(mRS, Element::I64_2(mRS), mAllocSize / 2); - mLong3Allocation = Allocation::createSized(mRS, Element::I64_3(mRS), mAllocSize / 4); - mLong4Allocation = Allocation::createTyped(mRS, typeI64_4Builder.create()); - - mBoolAllocation = Allocation::createSized(mRS, Element::BOOLEAN(mRS), mAllocSize); -} - -void initSignedAllocations() { - char *buffer_char = new char[mAllocSize]; - short *buffer_short = new short[mAllocSize]; - int *buffer_int = new int[mAllocSize]; - int64_t *buffer_long = new int64_t[mAllocSize]; - char *buffer_bool = new char[mAllocSize]; - - for(int i = 0; i < mAllocSize; ++i) { - buffer_char[i] = (char) i; - buffer_short[i] = (short) i; - buffer_int[i] = (int) i; - buffer_long[i] = (int64_t) i; - buffer_bool[i] = (char) (0x01 & i); - } - - mCharAllocation->copy3DRangeFrom(0, 0, 0, 1, 3, 8, buffer_char); - mChar2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_char); - mChar3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_char); - mChar4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_char); - - delete [] buffer_char; - - mShortAllocation->copy1DRangeFrom(0, mAllocSize, buffer_short); - mShort2Allocation->copy3DRangeFrom(0, 0, 0, 6, 1, 2, buffer_short); - mShort3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_short); - mShort4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_short); - - delete [] buffer_short; - - mIntAllocation->copy1DRangeFrom(0, mAllocSize, buffer_int); - mInt2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_int); - mInt3Allocation->copy2DRangeFrom(0, 0, 3, 2, buffer_int); - mInt4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_int); - - delete [] buffer_int; - - mLongAllocation->copy1DRangeFrom(0, mAllocSize, buffer_long); - mLong2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_long); - mLong3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_long); - mLong4Allocation->copy2DRangeFrom(0, 0, 1, 6, buffer_long); - - delete [] buffer_long; - - mBoolAllocation->copy1DRangeFrom(0, mAllocSize, buffer_bool); - - delete [] buffer_bool; -} - -void createUnsignedAllocations() { - Type::Builder typeU8_2Builder(mRS, Element::U8_2(mRS)); - typeU8_2Builder.setX(2); - typeU8_2Builder.setY(6); - - mUCharAllocation = Allocation::createSized(mRS, Element::U8(mRS), mAllocSize); - mUChar2Allocation = Allocation::createTyped(mRS, typeU8_2Builder.create()); - mUChar3Allocation = Allocation::createSized(mRS, Element::U8_3(mRS), mAllocSize / 4); - mUChar4Allocation = Allocation::createSized(mRS, Element::U8_4(mRS), mAllocSize / 4); - - Type::Builder typeU16_3Builder(mRS, Element::U16_3(mRS)); - typeU16_3Builder.setX(1); - typeU16_3Builder.setY(6); - - mUShortAllocation = Allocation::createSized(mRS, Element::U16(mRS), mAllocSize); - mUShort2Allocation = Allocation::createSized(mRS, Element::U16_2(mRS), mAllocSize / 2); - mUShort3Allocation = Allocation::createTyped(mRS, typeU16_3Builder.create()); - mUShort4Allocation = Allocation::createSized(mRS, Element::U16_4(mRS), mAllocSize / 4); - - Type::Builder typeU32_4Builder(mRS, Element::U32_4(mRS)); - typeU32_4Builder.setX(1); - typeU32_4Builder.setY(1); - typeU32_4Builder.setZ(6); - - mUIntAllocation = Allocation::createSized(mRS, Element::U32(mRS), mAllocSize); - mUInt2Allocation = Allocation::createSized(mRS, Element::U32_2(mRS), mAllocSize / 2); - mUInt3Allocation = Allocation::createSized(mRS, Element::U32_3(mRS), mAllocSize / 4); - mUInt4Allocation = Allocation::createTyped(mRS, typeU32_4Builder.create()); - - Type::Builder typeU64Builder(mRS, Element::U64(mRS)); - typeU64Builder.setX(4); - typeU64Builder.setY(3); - typeU64Builder.setZ(2); - - mULongAllocation = Allocation::createTyped(mRS, typeU64Builder.create()); - mULong2Allocation = Allocation::createSized(mRS, Element::U64_2(mRS), mAllocSize / 2); - mULong3Allocation = Allocation::createSized(mRS, Element::U64_3(mRS), mAllocSize / 4); - mULong4Allocation = Allocation::createSized(mRS, Element::U64_4(mRS), mAllocSize / 4); -} - -void initUnsignedAllocations() { - char *buffer_char = new char[mAllocSize]; - short *buffer_short = new short[mAllocSize]; - int *buffer_int = new int[mAllocSize]; - uint64_t *buffer_long = new uint64_t[mAllocSize]; - - for(int i = 0; i < mAllocSize; ++i) { - buffer_char[i] = (char) i; - buffer_short[i] = (short) i; - buffer_int[i] = (int) i; - buffer_long[i] = (uint64_t) i; - } - - mUCharAllocation->copy1DRangeFrom(0, mAllocSize, buffer_char); - mUChar2Allocation->copy2DRangeFrom(0, 0, 2, 6, buffer_char); - mUChar3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_char); - mUChar4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_char); - - delete [] buffer_char; - - mUShortAllocation->copy1DRangeFrom(0, mAllocSize, buffer_short); - mUShort2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_short); - mUShort3Allocation->copy2DRangeFrom(0, 0, 1, 6, buffer_short); - mUShort4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_short); - - delete [] buffer_short; - - mUIntAllocation->copy1DRangeFrom(0, mAllocSize, buffer_int); - mUInt2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_int); - mUInt3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_int); - mUInt4Allocation->copy3DRangeFrom(0, 0, 0, 1, 1, 6, buffer_int); - - delete [] buffer_int; - - mULongAllocation->copy3DRangeFrom(0, 0, 0, 4, 3, 2, buffer_long); - mULong2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_long); - mULong3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_long); - mULong4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_long); - - delete [] buffer_long; -} - -void createFloatAllocations() { - Type::Builder typeF16_3Builder(mRS, Element::F16_3(mRS)); - typeF16_3Builder.setX(1); - typeF16_3Builder.setY(6); - - mHalfAllocation = Allocation::createSized(mRS, Element::F16(mRS), mAllocSize); - mHalf2Allocation = Allocation::createSized(mRS, Element::F16_2(mRS), mAllocSize / 2); - mHalf3Allocation = Allocation::createTyped(mRS, typeF16_3Builder.create()); - mHalf4Allocation = Allocation::createSized(mRS, Element::F16_4(mRS), mAllocSize / 4); - - Type::Builder typeF32_4Builder(mRS, Element::F32_4(mRS)); - typeF32_4Builder.setX(3); - typeF32_4Builder.setY(2); - - mFloatAllocation = Allocation::createSized(mRS, Element::F32(mRS), mAllocSize); - mFloat2Allocation = Allocation::createSized(mRS, Element::F32_2(mRS), mAllocSize / 2); - mFloat3Allocation = Allocation::createSized(mRS, Element::F32_3(mRS), mAllocSize / 4); - mFloat4Allocation = Allocation::createTyped(mRS, typeF32_4Builder.create()); - - Type::Builder typeF64_2Builder(mRS, Element::F64_2(mRS)); - typeF64_2Builder.setX(4); - typeF64_2Builder.setY(1); - typeF64_2Builder.setZ(3); - - mDoubleAllocation = Allocation::createSized(mRS, Element::F64(mRS), mAllocSize); - mDouble2Allocation = Allocation::createTyped(mRS, typeF64_2Builder.create()); - - Type::Builder typeF64_3Builder(mRS, Element::F64_3(mRS)); - typeF64_3Builder.setX(1); - typeF64_3Builder.setY(2); - typeF64_3Builder.setZ(3); - - Type::Builder typeF64_4Builder(mRS, Element::F64_4(mRS)); - typeF64_4Builder.setX(1); - typeF64_4Builder.setY(2); - typeF64_4Builder.setZ(3); - - mDouble3Allocation = Allocation::createTyped(mRS, typeF64_3Builder.create()); - mDouble4Allocation = Allocation::createTyped(mRS, typeF64_4Builder.create()); -} - -void initFloatAllocations() { - __fp16 *buffer_half = new __fp16[mAllocSize]; - float *buffer_float = new float[mAllocSize]; - double *buffer_double = new double[mAllocSize]; - - for(int i = 0; i < mAllocSize; ++i) { - buffer_half[i] = (__fp16) 1 / i; - buffer_float[i] = (float) 1 / i; - buffer_double[i] = (double) 1 / i; - } - - mHalfAllocation->copy1DRangeFrom(0, mAllocSize, buffer_half); - mHalf2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_half); - mHalf3Allocation->copy2DRangeFrom(0, 0, 1, 6, buffer_half); - mHalf4Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_half); - - delete [] buffer_half; - - mFloatAllocation->copy1DRangeFrom(0, mAllocSize, buffer_float); - mFloat2Allocation->copy1DRangeFrom(0, mAllocSize/2, buffer_float); - mFloat3Allocation->copy1DRangeFrom(0, mAllocSize/4, buffer_float); - mFloat4Allocation->copy2DRangeFrom(0, 0, 3, 2, buffer_float); - - delete [] buffer_float; - - mDoubleAllocation->copy1DRangeFrom(0, mAllocSize, buffer_double); - mDouble2Allocation->copy3DRangeFrom(0, 0, 0, 4, 1, 3, buffer_double); - mDouble3Allocation->copy3DRangeFrom(0, 0, 0, 1, 2, 3, buffer_double); - mDouble4Allocation->copy3DRangeFrom(0, 0, 0, 1, 2, 3, buffer_double); - - delete [] buffer_double; -} - -extern "C" void JNICALL -Java_com_android_rs_jniallocations_MainActivity_nativeRS( - JNIEnv * env, - jclass, - jstring pathObj) -{ - mRS = new RS(); - - const char * path = env->GetStringUTFChars(pathObj, nullptr); - mRS->init(path, RS_INIT_LOW_LATENCY | RS_INIT_WAIT_FOR_ATTACH); - env->ReleaseStringUTFChars(pathObj, path); - - sp<ScriptC_allocs> mScript = new ScriptC_allocs(mRS); - - Type::Builder typeRGBA_888Builder(mRS, Element::RGBA_8888(mRS)); - typeRGBA_888Builder.setX(mBitmapSize); - typeRGBA_888Builder.setY(mBitmapSize); - - sp<Allocation> mInAllocation = Allocation::createTyped(mRS, typeRGBA_888Builder.create()); - - const int image_area = mBitmapSize*mBitmapSize; - const int image_size = image_area*sizeof(int); - - char *zero_buffer = new char[image_size]; - memset(zero_buffer, 0, image_size); - mInAllocation->copy1DRangeFrom(0, image_area, zero_buffer); - delete [] zero_buffer; - - sp<Allocation> mOutAllocation = Allocation::createTyped(mRS, typeRGBA_888Builder.create()); - createSignedAllocations(); - initSignedAllocations(); - - mRS->finish(); - mScript->forEach_swizzle_kernel(mInAllocation, mOutAllocation); - mRS->finish(); - - mCharAllocation.clear(); - mChar2Allocation.clear(); - mChar3Allocation.clear(); - mChar4Allocation.clear(); - - mShort2Allocation.clear(); - mShort3Allocation.clear(); - mShort4Allocation.clear(); - - mIntAllocation.clear(); - mInt2Allocation.clear(); - mInt3Allocation.clear(); - mInt4Allocation.clear(); - - mLongAllocation.clear(); - mLong2Allocation.clear(); - mLong3Allocation.clear(); - mLong4Allocation.clear(); - - mBoolAllocation.clear(); - - createUnsignedAllocations(); - initUnsignedAllocations(); - - mInAllocation = mUShortAllocation; // Host side assignment - - mRS->finish(); - mScript->forEach_square_kernel(mInAllocation, mUIntAllocation); - mRS->finish(); - - mUCharAllocation.clear(); - mUChar2Allocation.clear(); - mUChar3Allocation.clear(); - mUChar4Allocation.clear(); - - mUShortAllocation.clear(); - mUShort2Allocation.clear(); - mUShort3Allocation.clear(); - mUShort4Allocation.clear(); - - mUInt2Allocation.clear(); - mUInt3Allocation.clear(); - mUInt4Allocation.clear(); - - mULongAllocation.clear(); - mULong2Allocation.clear(); - mULong3Allocation.clear(); - mULong4Allocation.clear(); - - createFloatAllocations(); - initFloatAllocations(); - - mRS->finish(); - mScript->forEach_add_half_kernel(mDouble4Allocation, mDouble3Allocation); - mRS->finish(); -} - diff --git a/tests/lldb/jni/Allocations/res/layout/main_layout.xml b/tests/lldb/jni/Allocations/res/layout/main_layout.xml deleted file mode 100644 index 131c3b57..00000000 --- a/tests/lldb/jni/Allocations/res/layout/main_layout.xml +++ /dev/null @@ -1,15 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> - diff --git a/tests/lldb/jni/Allocations/src/com/android/rs/jniallocations/MainActivity.java b/tests/lldb/jni/Allocations/src/com/android/rs/jniallocations/MainActivity.java deleted file mode 100644 index f13682f2..00000000 --- a/tests/lldb/jni/Allocations/src/com/android/rs/jniallocations/MainActivity.java +++ /dev/null @@ -1,43 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.jniallocations; - -import android.app.Activity; -import android.os.Bundle; -import android.graphics.BitmapFactory; -import android.graphics.Bitmap; -import android.widget.ImageView; - -public class MainActivity extends Activity { - private Bitmap mBitmapIn; - private Bitmap mBitmapOut; - - static { - System.loadLibrary("RS"); - System.loadLibrary("jniallocations"); - } - - native void nativeRS(String cacheDir); - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.main_layout); - nativeRS(this.getCacheDir().toString()); - } -} - diff --git a/tests/lldb/jni/Android.mk b/tests/lldb/jni/Android.mk deleted file mode 100644 index 5053e7d6..00000000 --- a/tests/lldb/jni/Android.mk +++ /dev/null @@ -1 +0,0 @@ -include $(call all-subdir-makefiles) diff --git a/tests/lldb/jni/BranchingFunCalls/Android.mk b/tests/lldb/jni/BranchingFunCalls/Android.mk deleted file mode 100644 index a5ee3b42..00000000 --- a/tests/lldb/jni/BranchingFunCalls/Android.mk +++ /dev/null @@ -1,19 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := JNIBranchingFunCalls -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -LOCAL_JNI_SHARED_LIBRARIES := libjnibranchingfuncalls - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) -include $(LOCAL_PATH)/jnibranchingfuncalls/Android.mk diff --git a/tests/lldb/jni/BranchingFunCalls/AndroidManifest.xml b/tests/lldb/jni/BranchingFunCalls/AndroidManifest.xml deleted file mode 100644 index 3b616fa6..00000000 --- a/tests/lldb/jni/BranchingFunCalls/AndroidManifest.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.jnibranchingfuncalls"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="JNIBranchingFunCalls" - android:hardwareAccelerated="true"> - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> - diff --git a/tests/lldb/jni/BranchingFunCalls/jnibranchingfuncalls/Android.mk b/tests/lldb/jni/BranchingFunCalls/jnibranchingfuncalls/Android.mk deleted file mode 100644 index 69f9162c..00000000 --- a/tests/lldb/jni/BranchingFunCalls/jnibranchingfuncalls/Android.mk +++ /dev/null @@ -1,13 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := libjnibranchingfuncalls -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := jnibranchingfuncalls.cpp scalars.rscript - -LOCAL_RENDERSCRIPT_FLAGS := -g - -include frameworks/rs/tests/lldb/jni/common.mk -include $(BUILD_SHARED_LIBRARY) diff --git a/tests/lldb/jni/BranchingFunCalls/jnibranchingfuncalls/jnibranchingfuncalls.cpp b/tests/lldb/jni/BranchingFunCalls/jnibranchingfuncalls/jnibranchingfuncalls.cpp deleted file mode 100644 index 4e2c4cfb..00000000 --- a/tests/lldb/jni/BranchingFunCalls/jnibranchingfuncalls/jnibranchingfuncalls.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <memory> - -#include <jni.h> -#include <RenderScript.h> - -#include "ScriptC_scalars.h" - -extern "C" void JNICALL -Java_com_android_rs_jnibranchingfuncalls_MainActivity_nativeRS( - JNIEnv * env, - jclass, - jstring pathObj) -{ - static const int size = 64; - sp<RS> rs = new RS(); - - const char * path = env->GetStringUTFChars(pathObj, nullptr); - rs->init(path, RS_INIT_LOW_LATENCY | RS_INIT_WAIT_FOR_ATTACH); - env->ReleaseStringUTFChars(pathObj, path); - - auto e = Element::I32(rs); - Type::Builder tb(rs, e); - tb.setX(size); - tb.setY(size); - auto t = tb.create(); - - auto a = Allocation::createTyped(rs, t); - auto b = Allocation::createTyped(rs, t); - - int * input = new int[size*size]; - for(int i = 0; i < size*size; ++i) { - input[i] = i - (size*size / 2); - } - a->copy2DRangeFrom(0, 0, size, size, input); - delete [] input; - - // Script is executed once, then the data is copied back when finished - sp<ScriptC_scalars> s = new ScriptC_scalars(rs); - s->invoke_addToGlobal(234); - s->forEach_simple_kernel(a, b); - rs->finish(); - int32_t * output = new int32_t[size*size]; - b->copy2DRangeTo(0, 0, size, size, output); - delete [] output; -} - diff --git a/tests/lldb/jni/BranchingFunCalls/jnibranchingfuncalls/scalars.rscript b/tests/lldb/jni/BranchingFunCalls/jnibranchingfuncalls/scalars.rscript deleted file mode 100644 index b98df28b..00000000 --- a/tests/lldb/jni/BranchingFunCalls/jnibranchingfuncalls/scalars.rscript +++ /dev/null @@ -1,76 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.jnibranchingfuncalls) - -static bool is_neg(int a) -{ - if(a < 0) - return true; - else - return false; -} - -static bool is_pos(int a) -{ - if(a > 0) - return true; - else - return false; -} - -static void set_i(int * a, int b) -{ - int tmp = b; - *a = tmp; -} - -static void modify_f(float * f) -{ - *f *= 0.5f; -} - -static void modify_i(int * i) -{ - int j = *i; - int cutoff = 2 << 6; - if(j > cutoff) - j = cutoff; - if(is_neg(j)) - set_i(i, 0); - else if(is_pos(j)) - set_i(i, j); - else - set_i(i, cutoff); -} - -int __attribute__((kernel)) simple_kernel(int in) -{ - int i = in; - float f = (float) i; - modify_f(&f); - modify_i(&i); - int ret = (int) f; - return in * ret; -} - -int glob = 123; - -void addToGlobal(int arg) -{ - glob += arg; -} diff --git a/tests/lldb/jni/BranchingFunCalls/res/layout/main_layout.xml b/tests/lldb/jni/BranchingFunCalls/res/layout/main_layout.xml deleted file mode 100644 index 131c3b57..00000000 --- a/tests/lldb/jni/BranchingFunCalls/res/layout/main_layout.xml +++ /dev/null @@ -1,15 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> - diff --git a/tests/lldb/jni/BranchingFunCalls/src/com/android/rs/jnibranchingfuncalls/MainActivity.java b/tests/lldb/jni/BranchingFunCalls/src/com/android/rs/jnibranchingfuncalls/MainActivity.java deleted file mode 100644 index b0ac283d..00000000 --- a/tests/lldb/jni/BranchingFunCalls/src/com/android/rs/jnibranchingfuncalls/MainActivity.java +++ /dev/null @@ -1,43 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.jnibranchingfuncalls; - -import android.app.Activity; -import android.os.Bundle; -import android.graphics.BitmapFactory; -import android.graphics.Bitmap; -import android.widget.ImageView; - -public class MainActivity extends Activity { - private Bitmap mBitmapIn; - private Bitmap mBitmapOut; - - static { - System.loadLibrary("RS"); - System.loadLibrary("jnibranchingfuncalls"); - } - - native void nativeRS(String cacheDir); - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.main_layout); - nativeRS(this.getCacheDir().toString()); - } -} - diff --git a/tests/lldb/jni/DebugWaitAttach/Android.mk b/tests/lldb/jni/DebugWaitAttach/Android.mk deleted file mode 100644 index 14cf20d2..00000000 --- a/tests/lldb/jni/DebugWaitAttach/Android.mk +++ /dev/null @@ -1,19 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := JNIDebugWaitAttach -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -LOCAL_JNI_SHARED_LIBRARIES := libjnidebugwaitattach - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) -include $(LOCAL_PATH)/jnidebugwaitattach/Android.mk diff --git a/tests/lldb/jni/DebugWaitAttach/AndroidManifest.xml b/tests/lldb/jni/DebugWaitAttach/AndroidManifest.xml deleted file mode 100644 index 12e544b2..00000000 --- a/tests/lldb/jni/DebugWaitAttach/AndroidManifest.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.jnidebugwaitattach"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="JNIDebugWaitAttach" - android:hardwareAccelerated="true"> - - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> diff --git a/tests/lldb/jni/DebugWaitAttach/jnidebugwaitattach/Android.mk b/tests/lldb/jni/DebugWaitAttach/jnidebugwaitattach/Android.mk deleted file mode 100644 index af7d578a..00000000 --- a/tests/lldb/jni/DebugWaitAttach/jnidebugwaitattach/Android.mk +++ /dev/null @@ -1,13 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := libjnidebugwaitattach -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := jnidebugwaitattach.cpp simple.rscript - -LOCAL_RENDERSCRIPT_FLAGS := -g - -include frameworks/rs/tests/lldb/jni/common.mk -include $(BUILD_SHARED_LIBRARY) diff --git a/tests/lldb/jni/DebugWaitAttach/jnidebugwaitattach/jnidebugwaitattach.cpp b/tests/lldb/jni/DebugWaitAttach/jnidebugwaitattach/jnidebugwaitattach.cpp deleted file mode 100644 index f8151f4f..00000000 --- a/tests/lldb/jni/DebugWaitAttach/jnidebugwaitattach/jnidebugwaitattach.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <memory> - -#include <jni.h> -#include <RenderScript.h> - -#include "ScriptC_simple.h" - -extern "C" void JNICALL -Java_com_android_rs_jnidebugwaitattach_MainActivity_nativeRS( - JNIEnv * env, - jclass, - jstring pathObj) -{ - static const int size = 8; - sp<RS> rs = new RS(); - - const char * path = env->GetStringUTFChars(pathObj, nullptr); - rs->init(path, RS_INIT_LOW_LATENCY | RS_INIT_WAIT_FOR_ATTACH); - env->ReleaseStringUTFChars(pathObj, path); - - auto e = Element::RGBA_8888(rs); - Type::Builder tb(rs, e); - tb.setX(size); - tb.setY(size); - auto t = tb.create(); - - auto a = Allocation::createTyped(rs, t); - auto b = Allocation::createTyped(rs, t); - - // Script is executed once, then the data is copied back when finished - sp<ScriptC_simple> s = new ScriptC_simple(rs); - s->forEach_simple_kernel(a, b); - uint32_t * output = new uint32_t[size*size]; - b->copy2DRangeTo(0, 0, size, size, output); - delete [] output; - - s->forEach_other_kernel(a, b); - - rs->finish(); -} - diff --git a/tests/lldb/jni/DebugWaitAttach/jnidebugwaitattach/simple.rscript b/tests/lldb/jni/DebugWaitAttach/jnidebugwaitattach/simple.rscript deleted file mode 100644 index a89c1f21..00000000 --- a/tests/lldb/jni/DebugWaitAttach/jnidebugwaitattach/simple.rscript +++ /dev/null @@ -1,39 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.jnidebugwaitattach) - -float4 gColor = {0.299f, 0.587f, 0.114f, 1.f}; - -/* RenderScript kernel that just sets the colour of the screen and does some - * simple operations so it is not completely empty - * (and can therefore be debugged). - */ -uchar4 __attribute__((kernel)) simple_kernel(uchar4 in) -{ - float4 temp = rsUnpackColor8888(in); - temp = gColor; - uchar4 result = rsPackColorTo8888(temp); - return result; -} - -// Extra kernel to test lldb setting breakpoints on all the RS kernels. -uchar4 __attribute__((kernel)) other_kernel(uchar4 in) -{ - uchar4 result = in.wzyx; - return result; -} diff --git a/tests/lldb/jni/DebugWaitAttach/res/layout/main_layout.xml b/tests/lldb/jni/DebugWaitAttach/res/layout/main_layout.xml deleted file mode 100644 index 131c3b57..00000000 --- a/tests/lldb/jni/DebugWaitAttach/res/layout/main_layout.xml +++ /dev/null @@ -1,15 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> - diff --git a/tests/lldb/jni/DebugWaitAttach/src/com/android/rs/jniwaitattachdebug/MainActivity.java b/tests/lldb/jni/DebugWaitAttach/src/com/android/rs/jniwaitattachdebug/MainActivity.java deleted file mode 100644 index b858cf7d..00000000 --- a/tests/lldb/jni/DebugWaitAttach/src/com/android/rs/jniwaitattachdebug/MainActivity.java +++ /dev/null @@ -1,43 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.jnidebugwaitattach; - -import android.app.Activity; -import android.os.Bundle; -import android.graphics.BitmapFactory; -import android.graphics.Bitmap; -import android.widget.ImageView; - -public class MainActivity extends Activity { - private Bitmap mBitmapIn; - private Bitmap mBitmapOut; - - static { - System.loadLibrary("RS"); - System.loadLibrary("jnidebugwaitattach"); - } - - native void nativeRS(String cacheDir); - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.main_layout); - nativeRS(this.getCacheDir().toString()); - } -} - diff --git a/tests/lldb/jni/InfiniteLoop/Android.mk b/tests/lldb/jni/InfiniteLoop/Android.mk deleted file mode 100644 index 892e1e94..00000000 --- a/tests/lldb/jni/InfiniteLoop/Android.mk +++ /dev/null @@ -1,19 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := JNIInfiniteLoop -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -LOCAL_JNI_SHARED_LIBRARIES := libjniinfiniteloop - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) -include $(LOCAL_PATH)/jniinfiniteloop/Android.mk diff --git a/tests/lldb/jni/InfiniteLoop/AndroidManifest.xml b/tests/lldb/jni/InfiniteLoop/AndroidManifest.xml deleted file mode 100644 index 23031f0c..00000000 --- a/tests/lldb/jni/InfiniteLoop/AndroidManifest.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.jniinfiniteloop"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="JNIInfiniteLoop" - android:hardwareAccelerated="true"> - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> - diff --git a/tests/lldb/jni/InfiniteLoop/jniinfiniteloop/Android.mk b/tests/lldb/jni/InfiniteLoop/jniinfiniteloop/Android.mk deleted file mode 100644 index 21717224..00000000 --- a/tests/lldb/jni/InfiniteLoop/jniinfiniteloop/Android.mk +++ /dev/null @@ -1,13 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := libjniinfiniteloop -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := jniinfiniteloop.cpp infiniteloop.rscript - -LOCAL_RENDERSCRIPT_FLAGS := -g - -include frameworks/rs/tests/lldb/jni/common.mk -include $(BUILD_SHARED_LIBRARY) diff --git a/tests/lldb/jni/InfiniteLoop/jniinfiniteloop/infiniteloop.rscript b/tests/lldb/jni/InfiniteLoop/jniinfiniteloop/infiniteloop.rscript deleted file mode 100644 index 142b27ba..00000000 --- a/tests/lldb/jni/InfiniteLoop/jniinfiniteloop/infiniteloop.rscript +++ /dev/null @@ -1,49 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.jniinfiniteloop) -#pragma rs_fp_relaxed - -float4 gColour = {0.299f, 0.587f, 0.114f, 1.f}; - -/* RenderScript kernel that just sets the colour of the screen and does some - * simple operations so it is not completely empty - * (and can therefore be debugged). - */ -uchar4 __attribute__((kernel)) simple_kernel(uchar4 in) -{ - float4 temp = rsUnpackColor8888(in); - temp = gColour; - uchar4 result = rsPackColorTo8888(temp); - return result; -} diff --git a/tests/lldb/jni/InfiniteLoop/jniinfiniteloop/jniinfiniteloop.cpp b/tests/lldb/jni/InfiniteLoop/jniinfiniteloop/jniinfiniteloop.cpp deleted file mode 100644 index 73d1cbba..00000000 --- a/tests/lldb/jni/InfiniteLoop/jniinfiniteloop/jniinfiniteloop.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <memory> -#include <unistd.h> - -#include <jni.h> -#include <RenderScript.h> - -#include "ScriptC_infiniteloop.h" - -extern "C" void JNICALL -Java_com_android_rs_jniinfiniteloop_MainActivity_nativeRS( - JNIEnv * env, - jclass, - jstring pathObj) -{ - static const int size = 64; - sp<RS> rs = new RS(); - - const char * path = env->GetStringUTFChars(pathObj, nullptr); - rs->init(path, RS_INIT_LOW_LATENCY); - env->ReleaseStringUTFChars(pathObj, path); - - auto e = Element::RGBA_8888(rs); - Type::Builder tb(rs, e); - tb.setX(size); - tb.setY(size); - auto t = tb.create(); - - auto a = Allocation::createTyped(rs, t); - auto b = Allocation::createTyped(rs, t); - - sp<ScriptC_infiniteloop> s = new ScriptC_infiniteloop(rs); - - // Test is designed to loop forever, waits for two seconds - // between each invocation of the kernel - bool forever = true; - while(forever) - { - s->forEach_simple_kernel(a, b); - sleep(2); - } - - uint32_t * output = new uint32_t[size*size]; - b->copy2DRangeTo(0, 0, size, size, output); - delete [] output; -} - diff --git a/tests/lldb/jni/InfiniteLoop/res/layout/main_layout.xml b/tests/lldb/jni/InfiniteLoop/res/layout/main_layout.xml deleted file mode 100644 index 131c3b57..00000000 --- a/tests/lldb/jni/InfiniteLoop/res/layout/main_layout.xml +++ /dev/null @@ -1,15 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> - diff --git a/tests/lldb/jni/InfiniteLoop/src/com/android/rs/jniinfiniteloop/MainActivity.java b/tests/lldb/jni/InfiniteLoop/src/com/android/rs/jniinfiniteloop/MainActivity.java deleted file mode 100644 index a18c4200..00000000 --- a/tests/lldb/jni/InfiniteLoop/src/com/android/rs/jniinfiniteloop/MainActivity.java +++ /dev/null @@ -1,43 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.jniinfiniteloop; - -import android.app.Activity; -import android.os.Bundle; -import android.graphics.BitmapFactory; -import android.graphics.Bitmap; -import android.widget.ImageView; - -public class MainActivity extends Activity { - private Bitmap mBitmapIn; - private Bitmap mBitmapOut; - - static { - System.loadLibrary("RS"); - System.loadLibrary("jniinfiniteloop"); - } - - native void nativeRS(String cacheDir); - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.main_layout); - nativeRS(this.getCacheDir().toString()); - } -} - diff --git a/tests/lldb/jni/KernelVariables/Android.mk b/tests/lldb/jni/KernelVariables/Android.mk deleted file mode 100644 index 12017d5a..00000000 --- a/tests/lldb/jni/KernelVariables/Android.mk +++ /dev/null @@ -1,19 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := JNIKernelVariables -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -LOCAL_JNI_SHARED_LIBRARIES := libjnikernelvariables - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) -include $(LOCAL_PATH)/jnikernelvariables/Android.mk diff --git a/tests/lldb/jni/KernelVariables/AndroidManifest.xml b/tests/lldb/jni/KernelVariables/AndroidManifest.xml deleted file mode 100644 index b7198921..00000000 --- a/tests/lldb/jni/KernelVariables/AndroidManifest.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.jnikernelvariables"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="JNIKernelVariables" - android:hardwareAccelerated="true"> - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> - diff --git a/tests/lldb/jni/KernelVariables/jnikernelvariables/Android.mk b/tests/lldb/jni/KernelVariables/jnikernelvariables/Android.mk deleted file mode 100644 index 10afc4bf..00000000 --- a/tests/lldb/jni/KernelVariables/jnikernelvariables/Android.mk +++ /dev/null @@ -1,13 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := libjnikernelvariables -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := jnikernelvariables.cpp simple.rscript - -LOCAL_RENDERSCRIPT_FLAGS := -g - -include frameworks/rs/tests/lldb/jni/common.mk -include $(BUILD_SHARED_LIBRARY) diff --git a/tests/lldb/jni/KernelVariables/jnikernelvariables/jnikernelvariables.cpp b/tests/lldb/jni/KernelVariables/jnikernelvariables/jnikernelvariables.cpp deleted file mode 100644 index 94917bf2..00000000 --- a/tests/lldb/jni/KernelVariables/jnikernelvariables/jnikernelvariables.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <memory> - -#include <jni.h> -#include <RenderScript.h> - -#include "ScriptC_simple.h" - -extern "C" void JNICALL -Java_com_android_rs_jnikernelvariables_MainActivity_nativeRS( - JNIEnv * env, - jclass, - jstring pathObj) -{ - static const int size = 64; - sp<RS> rs = new RS(); - - const char * path = env->GetStringUTFChars(pathObj, nullptr); - rs->init(path, RS_INIT_LOW_LATENCY | RS_INIT_WAIT_FOR_ATTACH); - env->ReleaseStringUTFChars(pathObj, path); - - auto e = Element::RGBA_8888(rs); - Type::Builder tb(rs, e); - tb.setX(size); - tb.setY(size); - auto t = tb.create(); - - auto a = Allocation::createTyped(rs, t); - auto b = Allocation::createTyped(rs, t); - - sp<ScriptC_simple> s = new ScriptC_simple(rs); - - static const int buffer_int[] = {1, 2, 3, 4}; - sp<Allocation> int_allocation = Allocation::createSized(rs, Element::I32(rs), 4); - int_allocation->copy1DRangeFrom(0, 4, buffer_int); - s->set_allocation_1D_global(int_allocation); - - static const int buffer_int2[] = {5, 6, 7, 8}; - - Type::Builder typeI32Builder2D(rs, Element::I32(rs)); - typeI32Builder2D.setX(2); - typeI32Builder2D.setY(2); - - sp<Allocation> int_allocation2 = Allocation::createTyped(rs, typeI32Builder2D.create()); - int_allocation2->copy2DRangeFrom(0, 0, 2, 2, buffer_int2); - s->set_allocation_1D_global2(int_allocation2); - - s->set_allocation_2D_global(a); - s->set_allocation_2D_global2(b); - - static const int buffer_int3[] = {9, 10, 11, 12, 13, 14, 15, 16}; - - Type::Builder typeI32Builder3D(rs, Element::I32(rs)); - typeI32Builder3D.setX(2); - typeI32Builder3D.setY(2); - typeI32Builder3D.setZ(2); - - sp<Allocation> int_allocation3 = Allocation::createTyped(rs, typeI32Builder3D.create()); - int_allocation3->copy3DRangeFrom(0, 0, 0, 2, 2, 2, buffer_int3); - s->set_allocation_3D_global(int_allocation3); - - Type::Builder yuvTypeBuilder(rs, Element::YUV(rs)); - yuvTypeBuilder.setX(4); - yuvTypeBuilder.setY(4); - yuvTypeBuilder.setYuvFormat(RS_YUV_YV12); - - sp<Allocation> yuv_allocation = Allocation::createTyped(rs, yuvTypeBuilder.create()); - s->set_allocation_YUV_2D_global(yuv_allocation); - - s->set_sampler_global(Sampler::CLAMP_LINEAR(rs)); - - // Script is executed once, then the data is copied back when finished - s->forEach_kernel(a, b); - rs->finish(); - uint32_t * output = new uint32_t[size*size]; - b->copy2DRangeTo(0, 0, size, size, output); - delete [] output; -} - diff --git a/tests/lldb/jni/KernelVariables/jnikernelvariables/simple.rscript b/tests/lldb/jni/KernelVariables/jnikernelvariables/simple.rscript deleted file mode 100644 index 30feb005..00000000 --- a/tests/lldb/jni/KernelVariables/jnikernelvariables/simple.rscript +++ /dev/null @@ -1,197 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.jnikernelvariables) - -char char_global = 12; -uchar uchar_global = 234; -short short_global = -321; -ushort ushort_global = 432; -int int_global = 1234; -uint uint_global = 2345; -float float_global = 4.5f; -long long_global = -77777; -ulong ulong_global = 8888; -double double_global = -456.5f; - -char2 char2_global = {11, -22}; -uchar2 uchar2_global = {33, 44}; -short2 short2_global = {-555, 666}; -ushort2 ushort2_global = {777, 888}; -int2 int2_global = {999, -1111}; -uint2 uint2_global = {2222, 3333}; -float2 float2_global = {4.5f, -5.0f}; -long2 long2_global = {-4444, 5555}; -ulong2 ulong2_global = {6666, 7777}; -double2 double2_global = {88.5f, -99.0f}; - -char3 char3_global = {11, -22, -33}; -uchar3 uchar3_global = {33, 44, 55}; -short3 short3_global = {-555, 666, 777}; -ushort3 ushort3_global = {777, 888, 999}; -int3 int3_global = {999, -1111, 2222}; -uint3 uint3_global = {2222, 3333, 4444}; -float3 float3_global = {4.5f, -5.0f, -6.5f}; -long3 long3_global = {-4444, 5555, 6666}; -ulong3 ulong3_global = {6666, 7777, 8888}; -double3 double3_global = {88.5f, -99.0f, 111.5f}; - -char4 char4_global = {55, 11, -22, -33}; -uchar4 uchar4_global = {222, 33, 44, 55}; -short4 short4_global = {-444, -555, 666, 777}; -ushort4 ushort4_global = {666, 777, 888, 999}; -int4 int4_global = {888, 999, -1111, 2222}; -uint4 uint4_global = {1111, 2222, 3333, 4444}; -float4 float4_global = {3.0f, 4.5f, -5.0f, -6.5f}; -long4 long4_global = {-3333, -4444, 5555, 6666}; -ulong4 ulong4_global = {5555, 6666, 7777, 8888}; -double4 double4_global = {-77.0f, 88.5f, -99.0f, 111.5f}; - -rs_matrix2x2 matrix2x2_global; -rs_matrix3x3 matrix3x3_global; -rs_matrix4x4 matrix4x4_global; - -rs_quaternion quaternion_global; - -rs_allocation allocation_1D_global; -rs_allocation allocation_1D_global2; -rs_allocation allocation_2D_global; -rs_allocation allocation_2D_global2; -rs_allocation allocation_3D_global; -rs_allocation allocation_YUV_2D_global; - -rs_allocation_cubemap_face cubemap_face_global; -rs_sampler sampler_global; - -uchar4 __attribute__((kernel)) kernel(uchar4 in) -{ - char char_local = 'a'; - uchar uchar_local = 'b'; - short short_local = -321; - ushort ushort_local = 432; - int int_local = 1234; - uint uint_local = 2345; - float float_local = 4.5f; - long long_local = -77777; - ulong ulong_local = 8888; - double double_local = -456.5f; - - char2 char2_local = {-11, -22}; - uchar2 uchar2_local = {33, 44}; - short2 short2_local = {-555, 666}; - ushort2 ushort2_local = {777, 888}; - int2 int2_local = {999, -1111}; - uint2 uint2_local = {2222, 3333}; - float2 float2_local = {4.5f, -5.0f}; - long2 long2_local = {-4444, 5555}; - ulong2 ulong2_local = {6666, 7777}; - double2 double2_local = {88.5f, -99.0f}; - - char3 char3_local = {11, -22, -33}; - uchar3 uchar3_local = {33, 44, 55}; - short3 short3_local = {-555, 666, 777}; - ushort3 ushort3_local = {777, 888, 999}; - int3 int3_local = {999, -1111, 2222}; - uint3 uint3_local = {2222, 3333, 4444}; - float3 float3_local = {4.5f, -5.0f, -6.5f}; - long3 long3_local = {-4444, 5555, 6666}; - ulong3 ulong3_local = {6666, 7777, 8888}; - double3 double3_local = {88.5f, -99.0f, 111.5f}; - - char4 char4_local = {55, 11, -22, -33}; - uchar4 uchar4_local = {22, 33, 44, 55}; - short4 short4_local = {-444, -555, 666, 777}; - ushort4 ushort4_local = {666, 777, 888, 999}; - int4 int4_local = {888, 999, -1111, 2222}; - uint4 uint4_local = {1111, 2222, 3333, 4444}; - float4 float4_local = {3.0f, 4.5f, -5.0f, -6.5f}; - long4 long4_local = {-3333, -4444, 5555, 6666}; - ulong4 ulong4_local = {5555, 6666, 7777, 8888}; - double4 double4_local = {-77.0f, 88.5f, -99.0f, 111.5f}; - - rs_matrix2x2 matrix2x2_local = {{1., 2.5, - 3., 4.5}}; - rs_matrix3x3 matrix3x3_local = {{5., 6.5, 7., - 8.5, 9., 1.5, - 2., 3.5, 4.}}; - rs_matrix4x4 matrix4x4_local = {{5.5, 6., 7.5, 8., - 9., 1.5, 2., 3.5, - 4.5, 5.5, 6.5, 7., - 8., 9.5, 1.5, 2.5}}; - - matrix2x2_global = matrix2x2_local; - matrix3x3_global = matrix3x3_local; - matrix4x4_global = matrix4x4_local; - - rsQuaternionSet(&quaternion_global, 3.0, 4.5, 5.5, 6.0); - - rs_quaternion quaternion_local; - rsQuaternionSet(&quaternion_local, 7.5, 8.0, 9.0, 0.5); - - char char_combined = char_local + (char)uchar_local + char2_local.x + - (char)uchar2_local.x + char3_local.x - (char)uchar3_local.x + - char4_local.x + (char)uchar4_local.x; - - short short_combined = short_local + (short)ushort_local + short2_local.x + - (short)ushort2_local.x + short3_local.x + (short)ushort3_local.x + - short4_local.x + (short)ushort4_local.x; - - int int_combined = int_local + (int)uint_local + int2_local.x + - (int)uint2_local.x + int3_local.x + (int)uint3_local.x + int4_local.x + - (int)uint4_local.x; - - float float_combined = float_local + float2_local.x + float3_local.x + - float4_local.x; - - long long_combined = long_local + (long)ulong_local + long2_local.x + - (long)ulong2_local.x + long3_local.x + (long)ulong3_local.x + - long4_local.x + (long)ulong4_local.x; - - double double_combined = double_local + double2_local.x + double3_local.x + - double4_local.x; - - char_global = char_combined; - short_global = short_combined; - int_global = int_combined; - float_global = float_combined; - long_global = long_combined; - double_global = double_combined; - - uchar4 result = {1,2,3,4}; - return result; -} - -float use_constants_global; - -void setup(void) -{ - use_constants_global = - M_1_PI + - M_2_PI + - M_2_PIl + - M_2_SQRTPI + - M_E + - M_LN10 + - M_LN2 + - M_LOG10E + - M_LOG2E + - M_PI + - M_PI_2 + - M_PI_4 + - M_SQRT1_2 + - M_SQRT2; -} diff --git a/tests/lldb/jni/KernelVariables/res/layout/main_layout.xml b/tests/lldb/jni/KernelVariables/res/layout/main_layout.xml deleted file mode 100644 index 131c3b57..00000000 --- a/tests/lldb/jni/KernelVariables/res/layout/main_layout.xml +++ /dev/null @@ -1,15 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> - diff --git a/tests/lldb/jni/KernelVariables/src/com/android/rs/jnikernelvariables/MainActivity.java b/tests/lldb/jni/KernelVariables/src/com/android/rs/jnikernelvariables/MainActivity.java deleted file mode 100644 index 11e41f2c..00000000 --- a/tests/lldb/jni/KernelVariables/src/com/android/rs/jnikernelvariables/MainActivity.java +++ /dev/null @@ -1,42 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.jnikernelvariables; - -import android.app.Activity; -import android.os.Bundle; -import android.graphics.BitmapFactory; -import android.graphics.Bitmap; -import android.widget.ImageView; - -public class MainActivity extends Activity { - private Bitmap mBitmapIn; - private Bitmap mBitmapOut; - - static { - System.loadLibrary("RS"); - System.loadLibrary("jnikernelvariables"); - } - - native void nativeRS(String cacheDir); - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.main_layout); - nativeRS(this.getCacheDir().toString()); - } -} diff --git a/tests/lldb/jni/MultipleRSFiles/Android.mk b/tests/lldb/jni/MultipleRSFiles/Android.mk deleted file mode 100644 index ff24154d..00000000 --- a/tests/lldb/jni/MultipleRSFiles/Android.mk +++ /dev/null @@ -1,19 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := JNIMultipleRSFiles -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -LOCAL_JNI_SHARED_LIBRARIES := libjnimultiplersfiles - -LOCAL_RENDERSCRIPT_FLAGS := -g -O0 -target-api 0 - -include $(BUILD_PACKAGE) -include $(LOCAL_PATH)/jnimultiplersfiles/Android.mk diff --git a/tests/lldb/jni/MultipleRSFiles/AndroidManifest.xml b/tests/lldb/jni/MultipleRSFiles/AndroidManifest.xml deleted file mode 100644 index 19bb65c4..00000000 --- a/tests/lldb/jni/MultipleRSFiles/AndroidManifest.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.jnimultiplersfiles"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="JNIMultipleRSFiles" - android:hardwareAccelerated="true"> - - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> diff --git a/tests/lldb/jni/MultipleRSFiles/jnimultiplersfiles/Android.mk b/tests/lldb/jni/MultipleRSFiles/jnimultiplersfiles/Android.mk deleted file mode 100644 index b3c335fa..00000000 --- a/tests/lldb/jni/MultipleRSFiles/jnimultiplersfiles/Android.mk +++ /dev/null @@ -1,13 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := libjnimultiplersfiles -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := jnimultiplersfiles.cpp first.rscript second.rscript - -LOCAL_RENDERSCRIPT_FLAGS := -g - -include frameworks/rs/tests/lldb/jni/common.mk -include $(BUILD_SHARED_LIBRARY) diff --git a/tests/lldb/jni/MultipleRSFiles/jnimultiplersfiles/first.rscript b/tests/lldb/jni/MultipleRSFiles/jnimultiplersfiles/first.rscript deleted file mode 100644 index 7c4a8520..00000000 --- a/tests/lldb/jni/MultipleRSFiles/jnimultiplersfiles/first.rscript +++ /dev/null @@ -1,32 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.jnimultiplersfiles) - -float4 gColor = {0.299f, 0.587f, 0.114f, 1.f}; - -/* RenderScript kernel that just sets the colour of the screen and does some - * simple operations so it is not completely empty - * (and can therefore be debugged). - */ -uchar4 __attribute__((kernel)) first_kernel(uchar4 in) -{ - float4 temp = rsUnpackColor8888(in); - temp = gColor; - uchar4 result = rsPackColorTo8888(temp); - return result; -} diff --git a/tests/lldb/jni/MultipleRSFiles/jnimultiplersfiles/jnimultiplersfiles.cpp b/tests/lldb/jni/MultipleRSFiles/jnimultiplersfiles/jnimultiplersfiles.cpp deleted file mode 100644 index 6d28a4b5..00000000 --- a/tests/lldb/jni/MultipleRSFiles/jnimultiplersfiles/jnimultiplersfiles.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <memory> - -#include <jni.h> -#include <RenderScript.h> - -#include "ScriptC_first.h" -#include "ScriptC_second.h" - -extern "C" void JNICALL -Java_com_android_rs_jnimultiplersfiles_MainActivity_nativeRS( - JNIEnv * env, - jclass, - jstring pathObj) -{ - static const int size = 64; - sp<RS> rs = new RS(); - - const char * path = env->GetStringUTFChars(pathObj, nullptr); - rs->init(path, RS_INIT_LOW_LATENCY | RS_INIT_WAIT_FOR_ATTACH); - env->ReleaseStringUTFChars(pathObj, path); - - auto e = Element::RGBA_8888(rs); - Type::Builder tb(rs, e); - tb.setX(size); - tb.setY(size); - auto t = tb.create(); - - auto a = Allocation::createTyped(rs, t); - auto b = Allocation::createTyped(rs, t); - - // Script is executed once, then the data is copied back when finished - sp<ScriptC_first> s1 = new ScriptC_first(rs); - sp<ScriptC_second> s2 = new ScriptC_second(rs); - - s1->forEach_first_kernel(a, b); - uint32_t * output = new uint32_t[size*size]; - b->copy2DRangeTo(0, 0, size, size, output); - delete [] output; - - s2->forEach_second_kernel(a, b); - - rs->finish(); -} - diff --git a/tests/lldb/jni/MultipleRSFiles/jnimultiplersfiles/second.rscript b/tests/lldb/jni/MultipleRSFiles/jnimultiplersfiles/second.rscript deleted file mode 100644 index ab898f90..00000000 --- a/tests/lldb/jni/MultipleRSFiles/jnimultiplersfiles/second.rscript +++ /dev/null @@ -1,25 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.jnimultiplersfiles) - -/* RenderScript kernel that just returns the swizzled input. */ -uchar4 __attribute__((kernel)) second_kernel(uchar4 in) -{ - uchar4 result = in.wzyx; - return result; -} diff --git a/tests/lldb/jni/MultipleRSFiles/res/layout/main_layout.xml b/tests/lldb/jni/MultipleRSFiles/res/layout/main_layout.xml deleted file mode 100644 index 131c3b57..00000000 --- a/tests/lldb/jni/MultipleRSFiles/res/layout/main_layout.xml +++ /dev/null @@ -1,15 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> - diff --git a/tests/lldb/jni/MultipleRSFiles/src/com/android/rs/jnimultiplersfiles/MainActivity.java b/tests/lldb/jni/MultipleRSFiles/src/com/android/rs/jnimultiplersfiles/MainActivity.java deleted file mode 100644 index ea743a62..00000000 --- a/tests/lldb/jni/MultipleRSFiles/src/com/android/rs/jnimultiplersfiles/MainActivity.java +++ /dev/null @@ -1,43 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.jnimultiplersfiles; - -import android.app.Activity; -import android.os.Bundle; -import android.graphics.BitmapFactory; -import android.graphics.Bitmap; -import android.widget.ImageView; - -public class MainActivity extends Activity { - private Bitmap mBitmapIn; - private Bitmap mBitmapOut; - - static { - System.loadLibrary("RS"); - System.loadLibrary("jnimultiplersfiles"); - } - - native void nativeRS(String cacheDir); - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.main_layout); - nativeRS(this.getCacheDir().toString()); - } -} - diff --git a/tests/lldb/jni/NoDebugWaitAttach/Android.mk b/tests/lldb/jni/NoDebugWaitAttach/Android.mk deleted file mode 100644 index 887b1996..00000000 --- a/tests/lldb/jni/NoDebugWaitAttach/Android.mk +++ /dev/null @@ -1,19 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := tests - -LOCAL_SRC_FILES := $(call all-java-files-under, src) \ - $(call all-renderscript-files-under, src) - -LOCAL_PACKAGE_NAME := JNINoDebugWaitAttach -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice -LOCAL_SDK_VERSION := current - -LOCAL_JNI_SHARED_LIBRARIES := libjninodebugwaitattach - -LOCAL_RENDERSCRIPT_FLAGS := -O0 -target-api 0 - -include $(BUILD_PACKAGE) -include $(LOCAL_PATH)/jninodebugwaitattach/Android.mk diff --git a/tests/lldb/jni/NoDebugWaitAttach/AndroidManifest.xml b/tests/lldb/jni/NoDebugWaitAttach/AndroidManifest.xml deleted file mode 100644 index 846eddd4..00000000 --- a/tests/lldb/jni/NoDebugWaitAttach/AndroidManifest.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.jninodebugwaitattach"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="JNINoDebugWaitAttach" - android:hardwareAccelerated="true"> - - <activity android:name="MainActivity"> - <intent-filter> - <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER" /> - </intent-filter> - </activity> - </application> -</manifest> diff --git a/tests/lldb/jni/NoDebugWaitAttach/jninodebugwaitattach/Android.mk b/tests/lldb/jni/NoDebugWaitAttach/jninodebugwaitattach/Android.mk deleted file mode 100644 index 19b08075..00000000 --- a/tests/lldb/jni/NoDebugWaitAttach/jninodebugwaitattach/Android.mk +++ /dev/null @@ -1,11 +0,0 @@ -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE := libjninodebugwaitattach -LOCAL_LICENSE_KINDS := SPDX-license-identifier-Apache-2.0 -LOCAL_LICENSE_CONDITIONS := notice - -LOCAL_SRC_FILES := jninodebugwaitattach.cpp simple.rscript - -include frameworks/rs/tests/lldb/jni/common.mk -include $(BUILD_SHARED_LIBRARY) diff --git a/tests/lldb/jni/NoDebugWaitAttach/jninodebugwaitattach/jninodebugwaitattach.cpp b/tests/lldb/jni/NoDebugWaitAttach/jninodebugwaitattach/jninodebugwaitattach.cpp deleted file mode 100644 index 72ec36ec..00000000 --- a/tests/lldb/jni/NoDebugWaitAttach/jninodebugwaitattach/jninodebugwaitattach.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include <memory> - -#include <jni.h> -#include <RenderScript.h> - -#include "ScriptC_simple.h" - -extern "C" void JNICALL -Java_com_android_rs_jninodebugwaitattach_MainActivity_nativeRS( - JNIEnv * env, - jclass, - jstring pathObj) -{ - static const int size = 8; - sp<RS> rs = new RS(); - - const char * path = env->GetStringUTFChars(pathObj, nullptr); - rs->init(path, RS_INIT_LOW_LATENCY | RS_INIT_WAIT_FOR_ATTACH); - env->ReleaseStringUTFChars(pathObj, path); - - auto e = Element::RGBA_8888(rs); - Type::Builder tb(rs, e); - tb.setX(size); - tb.setY(size); - auto t = tb.create(); - - auto a = Allocation::createTyped(rs, t); - auto b = Allocation::createTyped(rs, t); - - // Script is executed once, then the data is copied back when finished - sp<ScriptC_simple> s = new ScriptC_simple(rs); - s->forEach_simple_kernel(a, b); - uint32_t * output = new uint32_t[size*size]; - b->copy2DRangeTo(0, 0, size, size, output); - delete [] output; - - rs->finish(); -} - diff --git a/tests/lldb/jni/NoDebugWaitAttach/jninodebugwaitattach/simple.rscript b/tests/lldb/jni/NoDebugWaitAttach/jninodebugwaitattach/simple.rscript deleted file mode 100644 index c55e0b51..00000000 --- a/tests/lldb/jni/NoDebugWaitAttach/jninodebugwaitattach/simple.rscript +++ /dev/null @@ -1,32 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#pragma version(1) -#pragma rs java_package_name(com.android.rs.jninodebugwaitattach) - -float4 gColor = {0.299f, 0.587f, 0.114f, 1.f}; - -/* RenderScript kernel that just sets the colour of the screen and does some - * simple operations so it is not completely empty - * (and can therefore be debugged). - */ -uchar4 __attribute__((kernel)) simple_kernel(uchar4 in) -{ - float4 temp = rsUnpackColor8888(in); - temp = gColor; - uchar4 result = rsPackColorTo8888(temp); - return result; -}
\ No newline at end of file diff --git a/tests/lldb/jni/NoDebugWaitAttach/res/layout/main_layout.xml b/tests/lldb/jni/NoDebugWaitAttach/res/layout/main_layout.xml deleted file mode 100644 index 131c3b57..00000000 --- a/tests/lldb/jni/NoDebugWaitAttach/res/layout/main_layout.xml +++ /dev/null @@ -1,15 +0,0 @@ -<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android" - xmlns:tools="http://schemas.android.com/tools" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:background="#0099cc" - tools:context=".MainActivity"> - - <ImageView - android:id="@+id/imageView" - android:layout_width="match_parent" - android:layout_height="match_parent" - android:scaleType="fitCenter" /> - -</FrameLayout> - diff --git a/tests/lldb/jni/NoDebugWaitAttach/src/com/android/rs/jninodebugwaitattach/MainActivity.java b/tests/lldb/jni/NoDebugWaitAttach/src/com/android/rs/jninodebugwaitattach/MainActivity.java deleted file mode 100644 index 937e4d01..00000000 --- a/tests/lldb/jni/NoDebugWaitAttach/src/com/android/rs/jninodebugwaitattach/MainActivity.java +++ /dev/null @@ -1,43 +0,0 @@ -/* -* Copyright (C) 2016 The Android Open Source Project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package com.android.rs.jninodebugwaitattach; - -import android.app.Activity; -import android.os.Bundle; -import android.graphics.BitmapFactory; -import android.graphics.Bitmap; -import android.widget.ImageView; - -public class MainActivity extends Activity { - private Bitmap mBitmapIn; - private Bitmap mBitmapOut; - - static { - System.loadLibrary("RS"); - System.loadLibrary("jninodebugwaitattach"); - } - - native void nativeRS(String cacheDir); - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.main_layout); - nativeRS(this.getCacheDir().toString()); - } -} - diff --git a/tests/lldb/jni/common.mk b/tests/lldb/jni/common.mk deleted file mode 100644 index 74413077..00000000 --- a/tests/lldb/jni/common.mk +++ /dev/null @@ -1,13 +0,0 @@ -LOCAL_MODULE_TAGS := tests - -LOCAL_CPP_FEATURES += exceptions - -LOCAL_CFLAGS := -Werror -Wall -Wextra -std=c++11 -LOCAL_RENDERSCRIPT_FLAGS += -O0 -target-api 0 - -LOCAL_HEADER_LIBRARIES := jni_headers -LOCAL_SHARED_LIBRARIES += libdl liblog -LOCAL_STATIC_LIBRARIES += libRScpp_static - -LOCAL_SDK_VERSION := 23 -LOCAL_NDK_STL_VARIANT := c++_static diff --git a/tests/lldb/run_tests.py b/tests/lldb/run_tests.py deleted file mode 100755 index d1700bbd..00000000 --- a/tests/lldb/run_tests.py +++ /dev/null @@ -1,839 +0,0 @@ -#!/usr/bin/env python - -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Main test suite execution script.''' -import argparse -import inspect -import logging -import os -import signal -import subprocess -import sys -import time -import collections -import xml.etree.ElementTree as ET - -from config import Config -from tests.harness import util_constants -from tests.harness.exception import TestSuiteException, FailFastException -from tests.harness import UtilAndroid -from tests.harness import UtilBundle -from tests.harness import util_log -from tests.harness.util_functions import load_py_module -from tests.harness.decorators import deprecated - -# For some reason pylint is not able to understand the class returned by -# from util_log.get_logger() and generates a lot of false warnings -#pylint: disable=maybe-no-member - -EMU_PROC = None - -def _parse_args(): - '''Parse the command line arguments. - - Returns: - A namespace object that contains the options specified to run_tests on - the command line. - ''' - - parser = argparse.ArgumentParser(description='Run the test suite.') - - parser.add_argument('--config', '-c', - metavar='path', - help='Path to a custom config file.') - parser.add_argument('--device', '-d', - help='Specify the device id of the device to test on.') - parser.add_argument('--test', '-t', - metavar='path', - help='Specify a specific test to run.') - group = parser.add_mutually_exclusive_group() - group.add_argument('--wimpy', '-w', - action='store_true', - default=None, - help='Test only a core subset of features.') - group.add_argument('--app-types', - default=['java', 'cpp', 'jni'], - nargs='*', - help='Specify a list of Android app types against which' - ' to run the tests', - dest='bundle_types') - parser.add_argument('--install-only', - action='store_true', - default=False, - help='It only runs the pre-run stage of the test suite.' - ' It installs the required APKs but does not ' - 'execute the tests.', - dest='install_only') - parser.add_argument('--no-install', '-n', - action='store_true', - default=False, - help='Stop the test suite installing apks to device.', - dest='noinstall') - parser.add_argument('--no-uninstall', - action='store_true', - default=False, - help='Stop the test suite uninstalling apks after ' - 'completion.', - dest='nouninstall') - parser.add_argument('--print-to-stdout', - action='store_true', - default=False, - help='Print all logging information to standard out.', - dest='print_to_stdout') - parser.add_argument('--verbose', '-v', - action='store_true', - default=None, - help='Store extra info in the log.') - parser.add_argument('--fail-fast', - action='store_true', - default=False, - help='Exit the test suite immediately on the first failure.') - parser.add_argument('--run-emu', - action='store_true', - default=None, - help='Spawn an emulator and run the test suite on that.' - ' Specify the emulator command line in the config' - ' file or with -emu-cmd.', - dest='run_emu') - - # Get the properties of the Config class and add a command line argument - # for each. - this_module = sys.modules[__name__] - for member_name, member_obj in inspect.getmembers(Config): - if (inspect.isdatadescriptor(member_obj) and - member_name not in ['__weakref__', 'device', 'verbose']): - - # List type properties can take one or more arguments - num_args = None - if (isinstance(member_obj, property) - and isinstance(member_obj.fget(Config), list)): - num_args = '+' - - opt_name = member_name.replace('_', '-') - - setattr(this_module, opt_name, '') - - parser.add_argument('--' + opt_name, - nargs=num_args, - help=member_obj.__doc__, - dest=member_name) - - return parser.parse_args() - - -def _choice(first_choice, second_choice): - '''Return first_choice if it is not None otherwise return second_choice. - - Args: - first_choice: The first choice value. - second_choice: The alternative value. - - Returns: - The first argument if it is not None, and the second otherwise. - ''' - return first_choice if first_choice else second_choice - - -class State(object): - '''This class manages all objects required by the test suite.''' - - # pylint: disable=too-many-instance-attributes - # Since this is a state class many attributes are expected. - - def __init__(self): - '''State constructor. - - Raises: - TestSuiteException: When unable to load config file. - - AssertionError: When assertions fail. - ''' - - # Parse the command line options - args = _parse_args() - - # create a config instance - if args.config: - # use the user supplied - config = State.load_user_configuration(args.config) - else: - # use the default configuration - config = Config() - - # save the test denylist - self.blocklist = _choice(args.blocklist, config.blocklist) - - # Allow any of the command line arguments to override the - # values in the config file. - self.adb_path = _choice(args.adb_path, config.adb_path) - - self.host_port = int(_choice(args.host_port, config.host_port)) - - self.device = _choice(args.device, config.device) - - self.user_specified_device = self.device - - self.device_port = int(_choice(args.device_port, config.device_port)) - - self.lldb_server_path_device = _choice(args.lldb_server_path_device, - config.lldb_server_path_device) - - self.lldb_server_path_host = _choice(args.lldb_server_path_host, - config.lldb_server_path_host) - - self.aosp_product_path = _choice(args.aosp_product_path, - config.aosp_product_path) - - self.log_file_path = _choice(args.log_file_path, config.log_file_path) - - self.results_file_path = _choice(args.results_file_path, - config.results_file_path) - - self.lldb_path = _choice(args.lldb_path, config.lldb_path) - self.print_to_stdout = args.print_to_stdout - self.verbose = _choice(args.verbose, config.verbose) - self.timeout = int(_choice(args.timeout, config.timeout)) - self.emu_cmd = _choice(args.emu_cmd, config.emu_cmd) - self.run_emu = args.run_emu - self.wimpy = args.wimpy - self.bundle_types = args.bundle_types if not self.wimpy else ['java'] - self.fail_fast = args.fail_fast - - # validate the param "verbose" - if not isinstance(self.verbose, bool): - raise TestSuiteException('The parameter "verbose" should be a ' - 'boolean: {0}'.format(self.verbose)) - - # create result array - self.results = dict() - self.single_test = args.test - - # initialise the logging facility - log_level = logging.INFO if not self.verbose else logging.DEBUG - util_log.initialise("driver", - print_to_stdout=self.print_to_stdout, - level=log_level, - file_mode='w', # open for write - file_path=self.log_file_path - ) - log = util_log.get_logger() - - if self.run_emu and not self.emu_cmd: - log.TestSuiteException('Need to specify --emu-cmd (or specify a' - ' value in the config file) if using --run-emu.') - - # create a results file - self.results_file = open(self.results_file_path, 'w') - - # create an android helper object - self.android = UtilAndroid(self.adb_path, - self.lldb_server_path_device, - self.device) - assert self.android - - # create a test bundle - self.bundle = UtilBundle(self.android, - self.aosp_product_path) - assert self.bundle - - # save the no pushing option - assert isinstance(args.noinstall, bool) - self.noinstall = args.noinstall - - assert isinstance(args.nouninstall, bool) - self.nouninstall = args.nouninstall - - # install only option - assert type(args.install_only) is bool - self.install_only = args.install_only - if self.install_only: - log.log_and_print('Option --install-only set. The test APKs will ' - 'be installed on the device but the tests will ' - 'not be executed.') - if self.noinstall: - raise TestSuiteException('Conflicting options given: ' - '--install-only and --no-install') - - # TCP port modifier which is used to increment the port number used for - # each test case to avoid collisions. - self.port_mod = 0 - - # total number of test files that have been executed - self.test_count = 0 - - def get_android(self): - '''Return the android ADB helper instance. - - Returns: - The android ADB helper, instance of UtilAndroid. - ''' - assert self.android - return self.android - - def get_bundle(self): - '''Return the test executable bundle. - - Returns: - The test exectable collection, instance of UtilBundle. - ''' - return self.bundle - - def add_result(self, name, app_type, result): - '''Add a test result to the collection. - - Args: - name: String name of the test that has executed. - app_type: type of app i.e. java, jni, or cpp - result: String result of the test, "pass", "fail", "error". - ''' - key = (name, app_type) - assert key not in self.results - self.results[key] = result - - def get_single_test(self): - '''Get the name of the single test to run. - - Returns: - A string that is the name of the python file containing the test to - be run. If all tests are to be run this returns None. - ''' - return self.single_test - - @staticmethod - def load_user_configuration(path): - '''Load the test suite config from the give path. - - Instantiate the Config class found in the module at the given path. - If no suitable class is available, it raises a TestSuiteException. - - Args: - path: String location of the module. - - Returns: - an instance of the Config class, defined in the module. - - Raises: - TestSuiteException: when unable to import the module or when a - subclass of Config is not found inside it. - ''' - - # load the module - config_module = load_py_module(path) - if not config_module: - raise TestSuiteException('Unable to import the module from "%s"' - % (path)) - - # look for a subclass of Config - for name, value in inspect.getmembers(config_module): - if (inspect.isclass(value) - and name != 'Config' - and issubclass(value, Config)): - # that's our candidate - return value() - - # otherwise there are no valid candidates - raise TestSuiteException('The provided user configuration is not ' - 'valid. The module must define a subclass ' - 'of Config') - - -def _kill_emulator(): - ''' Kill the emulator process. ''' - global EMU_PROC - if EMU_PROC: - try: - EMU_PROC.terminate() - except OSError: - # can't kill a dead proc - log = util_log.get_logger() - log.debug('Trying to kill an emulator but it is already dead.') - - -def _check_emulator_terminated(): - ''' Throw an exception if the emulator process has ended. - - Raises: - TestSuiteException: If the emulator process has ended. - ''' - global EMU_PROC - assert EMU_PROC - if EMU_PROC.poll(): - stdout, stderr = EMU_PROC.communicate() - raise TestSuiteException('The emulator terminated with output:' - '\nstderr: {0}\nstdout: {1}.'.format(stderr, stdout)) - - -@deprecated() -def _launch_emulator(state): - '''Launch the emulator and wait for it to boot. - - Args: - emu_cmd: The command line to run the emulator. - - Raises: - TestSuiteException: If an emulator already exists or the emulator - process terminated before we could connect to it, or - we failed to copy lldb-server to the emulator. - ''' - global EMU_PROC - android = state.android - if state.user_specified_device: - if android.device_with_substring_exists(state.user_specified_device): - raise TestSuiteException( - 'A device with name {0} already exists.', - state.user_specified_device) - else: - if android.device_with_substring_exists('emulator'): - raise TestSuiteException('An emulator already exists.') - - assert state.emu_cmd - EMU_PROC = subprocess.Popen(state.emu_cmd.split(), - stdout=None, - stderr=subprocess.STDOUT) - - log = util_log.get_logger() - log.info('Launching emulator with command line {0}'.format(state.emu_cmd)) - - tries_number = 180 - tries = tries_number - found_device = False - while not found_device: - try: - android.validate_device(False, 'emulator') - found_device = True - except TestSuiteException as ex: - tries -= 1 - if tries == 0: - # Avoid infinitely looping if the emulator won't boot - log.warning( - 'Giving up trying to validate device after {0} tries.' - .format(tries_number)) - raise ex - _check_emulator_terminated() - # wait a bit and try again, maybe it has now booted - time.sleep(10) - - tries = 500 - while not android.is_booted(): - tries -= 1 - if tries == 0: - # Avoid infinitely looping if the emulator won't boot - raise TestSuiteException('The emulator has failed to boot.') - _check_emulator_terminated() - time.sleep(5) - - # Need to be root before we can push lldb-server - android.adb_root() - android.wait_for_device() - - # Push the lldb-server executable to the device. - output = android.adb('push {0} {1}'.format(state.lldb_server_path_host, - state.lldb_server_path_device)) - - if 'failed to copy' in output or 'No such file or directory' in output: - raise TestSuiteException( - 'unable to push lldb-server to the emulator: {0}.' - .format(output)) - - output = android.shell('chmod a+x {0}' - .format(state.lldb_server_path_device)) - - if 'No such file or directory' in output: - raise TestSuiteException('Failed to copy lldb-server to the emulator.') - - -def _restart_emulator(state): - '''Kill the emulator and start a new instance. - - Args: - state: Test suite state collection, instance of State. - ''' - _kill_emulator() - _launch_emulator(state) - - -def _run_test(state, name, bundle_type): - '''Execute a single test case. - - Args: - state: Test suite state collection, instance of State. - name: String file name of the test to execute. - bundle_type: string for the installed app type (cpp|jni|java) - - Raises: - AssertionError: When assertion fails. - ''' - assert isinstance(name, str) - - try: - state.android.check_adb_alive() - except TestSuiteException as expt: - global EMU_PROC - if EMU_PROC: - _restart_emulator(state) - else: - raise expt - - log = util_log.get_logger() - sys.stdout.write('Running {0}\r'.format(name)) - sys.stdout.flush() - log.info('Running {0}'.format(name)) - - run_tests_dir = os.path.dirname(os.path.realpath(__file__)) - run_test_path = os.path.join(run_tests_dir, 'tests', 'run_test.py') - - # Forward port for lldb-server on the device to our host - hport = int(state.host_port) + state.port_mod - dport = int(state.device_port) + state.port_mod - state.android.forward_port(hport, dport) - state.port_mod += 1 - - log.debug('Giving up control to {0}...'.format(name)) - - params = map(str, [ - sys.executable, - run_test_path, - name, - state.log_file_path, - state.adb_path, - state.lldb_server_path_device, - state.aosp_product_path, - dport, - state.android.get_device_id(), - state.print_to_stdout, - state.verbose, - state.wimpy, - state.timeout, - bundle_type - ]) - - return_code = subprocess.call(params) - state.test_count += 1 - state.android.remove_port_forwarding() - log.seek_to_end() - - # report in sys.stdout the result - success = return_code == util_constants.RC_TEST_OK - status_handlers = collections.defaultdict(lambda: ('error', log.error), ( - (util_constants.RC_TEST_OK, ('pass', log.info)), - (util_constants.RC_TEST_TIMEOUT, ('timeout', log.error)), - (util_constants.RC_TEST_IGNORED, ('ignored', log.info)), - (util_constants.RC_TEST_FAIL, ('fail', log.critical)) - ) - ) - status_name, status_logger = status_handlers[return_code] - log.info('Running %s: %s', name, status_name.upper()) - status_logger("Test %r: %s", name, status_name) - - # Special case for ignored tests - just return now - if return_code == util_constants.RC_TEST_IGNORED: - return - - state.add_result(name, bundle_type, status_name) - - if state.fail_fast and not success: - raise FailFastException(name) - - # print a running total pass rate - passes = sum(1 for key, value in state.results.items() if value == 'pass') - log.info('Current pass rate: %s of %s executed.', passes, len(state.results)) - - -def _check_lldbserver_exists(state): - '''Check lldb-server exists on the target device and it is executable. - - Raises: - TestSuiteError: If lldb-server does not exist on the target. - ''' - assert state - - message = 'Unable to verify valid lldb-server on target' - - android = state.get_android() - assert android - - cmd = state.lldb_server_path_device - out = android.shell(cmd, False) - if not isinstance(out, str): - raise TestSuiteException(message) - if out.find('Usage:') < 0: - raise TestSuiteException(message) - - -def _suite_pre_run(state): - '''This function is executed before the test cases are run (setup). - - Args: - state: Test suite state collection, instance of State. - - Return: - True if the pre_run step completes without error. - Checks made: - - Validating that adb exists and runs. - - Validating that a device is attached. - - We have root access to the device. - - All test binaries were pushed to the device. - - The port for lldb-server was forwarded correctly. - - Raises: - AssertionError: When assertions fail. - ''' - assert state - log = util_log.get_logger() - - try: - android = state.get_android() - bundle = state.get_bundle() - assert android - assert bundle - - # validate ADB helper class - android.validate_adb() - log.log_and_print('Located ADB') - - if state.run_emu: - log.log_and_print('Launching emulator...') - _launch_emulator(state) - log.log_and_print('Started emulator ' + android.device) - else: - android.validate_device() - log.log_and_print('Located device ' + android.device) - - if state.noinstall and not state.single_test: - bundle.check_apps_installed(state.wimpy) - - # elevate to root user - android.adb_root() - android.wait_for_device() - # check that lldb-server exists on device - android.kill_servers() - _check_lldbserver_exists(state) - - if not state.noinstall: - # push all tests to the device - log.log_and_print('Pushing all tests...') - bundle.push_all() - log.log_and_print('Pushed all tests') - log.log_and_print('Pre run complete') - - except TestSuiteException as expt: - log.exception('Test suite pre run failure') - - # Even if we are logging the error, it may be helpful and more - # immediate to find out the error into the terminal - log.log_and_print('ERROR: Unable to set up the test suite: %s\n' - % expt.message, logging.ERROR) - - return False - return True - - -def _suite_post_run(state): - '''This function is executed after the test cases have run (teardown). - - Args: - state: Test suite state collection, instance of State. - Returns: - Number of failures - ''' - log = util_log.get_logger() - - if not state.noinstall and not state.nouninstall: - if state.wimpy: - state.bundle.uninstall_all_apk() - else: - state.bundle.uninstall_all() - log.log_and_print('Uninstalled/Deleted all tests') - - total = 0 - passes = 0 - failures = 0 - - results = ET.Element('testsuite') - results.attrib['name'] = 'LLDB RS Test Suite' - - for key, value in state.results.items(): - total += 1 - if value == 'pass': - passes += 1 - else: - failures += 1 - - # test case name, followed by pass, failure or error elements - testcase = ET.Element('testcase') - testcase.attrib['name'] = "%s:%s" % key - result_element = ET.Element(value) - result_element.text = "%s:%s" % key - testcase.append(result_element) - results.append(testcase) - - assert passes + failures == total, 'Invalid test results status' - if failures: - log.log_and_print( - 'The following failures occurred:\n%s\n' % - '\n'.join('failed: %s:%s' % test_spec - for test_spec, result in state.results.items() if result != 'pass' - )) - - log.log_and_print('{0} of {1} passed'.format(passes, total)) - if total: - log.log_and_print('{0}% rate'.format((passes*100)/total)) - - results.attrib['tests'] = str(total) - state.results_file.write(ET.tostring(results, encoding='iso-8859-1')) - - return failures - - -def _discover_tests(state): - '''Discover all tests in the tests directory. - - Returns: - List of strings, test file names from the 'tests' directory. - ''' - tests = [] - - single_test = state.get_single_test() - if single_test is None: - file_dir = os.path.dirname(os.path.realpath(__file__)) - tests_dir = os.path.join(file_dir, 'tests') - - for sub_dir in os.listdir(tests_dir): - current_test_dir = os.path.join(tests_dir, sub_dir) - if os.path.isdir(current_test_dir): - dir_name = os.path.basename(current_test_dir) - - if dir_name == 'harness': - continue - - for item in os.listdir(current_test_dir): - if (item.startswith('test') - and item.endswith('.py') - and not item in state.blocklist): - tests.append(item) - else: - if single_test.endswith('.py'): - tests.append(single_test) - else: - tests.append(single_test + '.py') - - return tests - - -def _deduce_python_path(state): - '''Try to deduce the PYTHONPATH environment variable via the LLDB binary. - - Args: - state: Test suite state collection, instance of State. - - Returns: - True if PYTHONPATH has been updated, False otherwise. - - Raises: - TestSuiteException: If lldb path provided in the config or command line - is incorrect. - AssertionError: If an assertion fails. - ''' - - lldb_path = state.lldb_path - if not lldb_path: - # lldb may not be provided in preference of a manual $PYTHONPATH - return False - - params = [lldb_path, '-P'] - - try: - proc = subprocess.Popen(params, stdout=subprocess.PIPE) - except OSError as err: - error_string = 'Could not run lldb at %s: %s' % (lldb_path, str(err)) - raise TestSuiteException(error_string) - - stdout = proc.communicate()[0] - if stdout: - os.environ['PYTHONPATH'] = stdout.strip() - return True - - return False - - -def main(): - '''The lldb-renderscript test suite entry point.''' - log = None - - try: - # parse the command line - state = State() - assert state - - # logging is initialised in State() - log = util_log.get_logger() - - # if we can, set PYTHONPATH for lldb bindings - if not _deduce_python_path(state): - log.log_and_print('Unable to deduce PYTHONPATH', logging.WARN) - - # pre run step - if not _suite_pre_run(state): - raise TestSuiteException('Test suite pre-run step failed') - # discover all tests and execute them - tests = _discover_tests(state) - log.log_and_print('Found {0} tests'.format(len(tests))) - if state.install_only: - log.log_and_print('Test applications installed. Terminating due to ' - '--install-only option') - else: - # run the tests - for bundle_type in state.bundle_types: - log.info("Running bundle type '%s'", bundle_type) - for item in tests: - _run_test(state, item, bundle_type) - # post run step - quit(0 if _suite_post_run(state) == 0 else 1) - - except AssertionError: - if log: - log.exception('Internal test suite error') - - print('Internal test suite error') - quit(1) - - except FailFastException: - log.exception('Early exit after first test failure') - quit(1) - - except TestSuiteException as error: - if log: - log.exception('Test suite exception') - - print('{0}'.format(str(error))) - quit(2) - - finally: - _kill_emulator() - logging.shutdown() - -def signal_handler(_, _unused): - '''Signal handler for SIGINT, caused by the user typing Ctrl-C.''' - # pylint: disable=unused-argument - # pylint: disable=protected-access - print('Ctrl+C!') - os._exit(1) - - -# execution trampoline -if __name__ == '__main__': - signal.signal(signal.SIGINT, signal_handler) - main() diff --git a/tests/lldb/tests/__init__.py b/tests/lldb/tests/__init__.py deleted file mode 100644 index d91549fa..00000000 --- a/tests/lldb/tests/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''This module contains a test runner, tests and utility code''' - - - -from . import harness diff --git a/tests/lldb/tests/harness/RS_funs.py b/tests/lldb/tests/harness/RS_funs.py deleted file mode 100644 index a5a0539c..00000000 --- a/tests/lldb/tests/harness/RS_funs.py +++ /dev/null @@ -1,1401 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''A list of signatures for RS builtin functions and util functions for them. - -from __future__ import absolute_import - -Function signature syntax is usually C-like, however, fixed values can also be -specified for those functions where the input range is restricted. -Lines in the function table beginning with - are comments. -Also contains utility functions to build an LLDB expression from a single -function line. -''' - -import re - -# Remove blank and comment lines using a lambda. -FUNC_LIST = filter(lambda line: line.strip() - and not line.strip().startswith('-'), ''' -- math functions - -uchar abs(char v); -uchar2 abs(char2 v); -uchar3 abs(char3 v); -uchar4 abs(char4 v); -uint abs(int v); -uint2 abs(int2 v); -uint3 abs(int3 v); -uint4 abs(int4 v); -ushort abs(short v); -ushort2 abs(short2 v); -ushort3 abs(short3 v); -ushort4 abs(short4 v); - -float acos(float v); -float2 acos(float2 v); -float3 acos(float3 v); -float4 acos(float4 v); - -float acosh(float v); -float2 acosh(float2 v); -float3 acosh(float3 v); -float4 acosh(float4 v); - -float acospi(float v); -float2 acospi(float2 v); -float3 acospi(float3 v); -float4 acospi(float4 v); - -float asin(float v); -float2 asin(float2 v); -float3 asin(float3 v); -float4 asin(float4 v); - -float asinh(float v); -float2 asinh(float2 v); -float3 asinh(float3 v); -float4 asinh(float4 v); - -float asinpi(float v); -float2 asinpi(float2 v); -float3 asinpi(float3 v); -float4 asinpi(float4 v); - -float atan(float v); -float2 atan(float2 v); -float3 atan(float3 v); -float4 atan(float4 v); - -float atan2(float numerator, float denominator); -float2 atan2(float2 numerator, float2 denominator); -float3 atan2(float3 numerator, float3 denominator); -float4 atan2(float4 numerator, float4 denominator); - -float atan2pi(float numerator, float denominator); -float2 atan2pi(float2 numerator, float2 denominator); -float3 atan2pi(float3 numerator, float3 denominator); -float4 atan2pi(float4 numerator, float4 denominator); - -float atanh(float v); -float2 atanh(float2 v); -float3 atanh(float3 v); -float4 atanh(float4 v); - -float atanpi(float v); -float2 atanpi(float2 v); -float3 atanpi(float3 v); -float4 atanpi(float4 v); - -float cbrt(float v); -float2 cbrt(float2 v); -float3 cbrt(float3 v); -float4 cbrt(float4 v); - -float ceil(float v); -float2 ceil(float2 v); -float3 ceil(float3 v); -float4 ceil(float4 v); - -char clamp(char value, char min_value, char max_value); -char2 clamp(char2 value, char min_value, char max_value); -char2 clamp(char2 value, char2 min_value, char2 max_value); -char3 clamp(char3 value, char min_value, char max_value); -char3 clamp(char3 value, char3 min_value, char3 max_value); -char4 clamp(char4 value, char min_value, char max_value); -char4 clamp(char4 value, char4 min_value, char4 max_value); -float clamp(float value, float min_value, float max_value); -float2 clamp(float2 value, float min_value, float max_value); -float2 clamp(float2 value, float2 min_value, float2 max_value); -float3 clamp(float3 value, float min_value, float max_value); -float3 clamp(float3 value, float3 min_value, float3 max_value); -float4 clamp(float4 value, float min_value, float max_value); -float4 clamp(float4 value, float4 min_value, float4 max_value); -int clamp(int value, int min_value, int max_value); -int2 clamp(int2 value, int min_value, int max_value); -int2 clamp(int2 value, int2 min_value, int2 max_value); -int3 clamp(int3 value, int min_value, int max_value); -int3 clamp(int3 value, int3 min_value, int3 max_value); -int4 clamp(int4 value, int min_value, int max_value); -int4 clamp(int4 value, int4 min_value, int4 max_value); -long clamp(long value, long min_value, long max_value); -long2 clamp(long2 value, long min_value, long max_value); -long2 clamp(long2 value, long2 min_value, long2 max_value); -long3 clamp(long3 value, long min_value, long max_value); -long3 clamp(long3 value, long3 min_value, long3 max_value); -long4 clamp(long4 value, long min_value, long max_value); -long4 clamp(long4 value, long4 min_value, long4 max_value); -short clamp(short value, short min_value, short max_value); -short2 clamp(short2 value, short min_value, short max_value); -short2 clamp(short2 value, short2 min_value, short2 max_value); -short3 clamp(short3 value, short min_value, short max_value); -short3 clamp(short3 value, short3 min_value, short3 max_value); -short4 clamp(short4 value, short min_value, short max_value); -short4 clamp(short4 value, short4 min_value, short4 max_value); -uchar clamp(uchar value, uchar min_value, uchar max_value); -uchar2 clamp(uchar2 value, uchar min_value, uchar max_value); -uchar2 clamp(uchar2 value, uchar2 min_value, uchar2 max_value); -uchar3 clamp(uchar3 value, uchar min_value, uchar max_value); -uchar3 clamp(uchar3 value, uchar3 min_value, uchar3 max_value); -uchar4 clamp(uchar4 value, uchar min_value, uchar max_value); -uchar4 clamp(uchar4 value, uchar4 min_value, uchar4 max_value); -uint clamp(uint value, uint min_value, uint max_value); -uint2 clamp(uint2 value, uint min_value, uint max_value); -uint2 clamp(uint2 value, uint2 min_value, uint2 max_value); -uint3 clamp(uint3 value, uint min_value, uint max_value); -uint3 clamp(uint3 value, uint3 min_value, uint3 max_value); -uint4 clamp(uint4 value, uint min_value, uint max_value); -uint4 clamp(uint4 value, uint4 min_value, uint4 max_value); -ulong clamp(ulong value, ulong min_value, ulong max_value); -ulong2 clamp(ulong2 value, ulong min_value, ulong max_value); -ulong2 clamp(ulong2 value, ulong2 min_value, ulong2 max_value); -ulong3 clamp(ulong3 value, ulong min_value, ulong max_value); -ulong3 clamp(ulong3 value, ulong3 min_value, ulong3 max_value); -ulong4 clamp(ulong4 value, ulong min_value, ulong max_value); -ulong4 clamp(ulong4 value, ulong4 min_value, ulong4 max_value); -ushort clamp(ushort value, ushort min_value, ushort max_value); -ushort2 clamp(ushort2 value, ushort min_value, ushort max_value); -ushort2 clamp(ushort2 value, ushort2 min_value, ushort2 max_value); -ushort3 clamp(ushort3 value, ushort min_value, ushort max_value); -ushort3 clamp(ushort3 value, ushort3 min_value, ushort3 max_value); -ushort4 clamp(ushort4 value, ushort min_value, ushort max_value); -ushort4 clamp(ushort4 value, ushort4 min_value, ushort4 max_value); - -char clz(char value); -char2 clz(char2 value); -char3 clz(char3 value); -char4 clz(char4 value); -int clz(int value); -int2 clz(int2 value); -int3 clz(int3 value); -int4 clz(int4 value); -short clz(short value); -short2 clz(short2 value); -short3 clz(short3 value); -short4 clz(short4 value); -uchar clz(uchar value); -uchar2 clz(uchar2 value); -uchar3 clz(uchar3 value); -uchar4 clz(uchar4 value); -uint clz(uint value); -uint2 clz(uint2 value); -uint3 clz(uint3 value); -uint4 clz(uint4 value); -ushort clz(ushort value); -ushort2 clz(ushort2 value); -ushort3 clz(ushort3 value); -ushort4 clz(ushort4 value); - -float copysign(float magnitude_value, float sign_value); -float2 copysign(float2 magnitude_value, float2 sign_value); -float3 copysign(float3 magnitude_value, float3 sign_value); -float4 copysign(float4 magnitude_value, float4 sign_value); - -float cos(float v); -float2 cos(float2 v); -float3 cos(float3 v); -float4 cos(float4 v); - -float cosh(float v); -float2 cosh(float2 v); -float3 cosh(float3 v); -float4 cosh(float4 v); - -float cospi(float v); -float2 cospi(float2 v); -float3 cospi(float3 v); -float4 cospi(float4 v); - -float erf(float v); -float2 erf(float2 v); -float3 erf(float3 v); -float4 erf(float4 v); - -float erfc(float v); -float2 erfc(float2 v); -float3 erfc(float3 v); -float4 erfc(float4 v); - -float exp(float v); -float2 exp(float2 v); -float3 exp(float3 v); -float4 exp(float4 v); - -float exp10(float v); -float2 exp10(float2 v); -float3 exp10(float3 v); -float4 exp10(float4 v); - -float exp2(float v); -float2 exp2(float2 v); -float3 exp2(float3 v); -float4 exp2(float4 v); - -float expm1(float v); -float2 expm1(float2 v); -float3 expm1(float3 v); -float4 expm1(float4 v); - -float fabs(float v); -float2 fabs(float2 v); -float3 fabs(float3 v); -float4 fabs(float4 v); - -float fdim(float a, float b); -float2 fdim(float2 a, float2 b); -float3 fdim(float3 a, float3 b); -float4 fdim(float4 a, float4 b); - -float floor(float v); -float2 floor(float2 v); -float3 floor(float3 v); -float4 floor(float4 v); - -float fma(float multiplicand1, float multiplicand2, float offset); -float2 fma(float2 multiplicand1, float2 multiplicand2, float2 offset); -float3 fma(float3 multiplicand1, float3 multiplicand2, float3 offset); -float4 fma(float4 multiplicand1, float4 multiplicand2, float4 offset); - -float fmax(float a, float b); -float2 fmax(float2 a, float b); -float2 fmax(float2 a, float2 b); -float3 fmax(float3 a, float b); -float3 fmax(float3 a, float3 b); -float4 fmax(float4 a, float b); -float4 fmax(float4 a, float4 b); - -float fmin(float a, float b); -float2 fmin(float2 a, float b); -float2 fmin(float2 a, float2 b); -float3 fmin(float3 a, float b); -float3 fmin(float3 a, float3 b); -float4 fmin(float4 a, float b); -float4 fmin(float4 a, float4 b); - -float fmod(float numerator, float denominator); -float2 fmod(float2 numerator, float2 denominator); -float3 fmod(float3 numerator, float3 denominator); -float4 fmod(float4 numerator, float4 denominator); - -float fract(float v, float* floor); -float2 fract(float2 v, float2* floor); -float3 fract(float3 v, float3* floor); -float4 fract(float4 v, float4* floor); - -float frexp(float v, int* exponent); -float2 frexp(float2 v, int2* exponent); -float3 frexp(float3 v, int3* exponent); -float4 frexp(float4 v, int4* exponent); - -float half_recip(float v); -float2 half_recip(float2 v); -float3 half_recip(float3 v); -float4 half_recip(float4 v); - -float half_rsqrt(float v); -float2 half_rsqrt(float2 v); -float3 half_rsqrt(float3 v); -float4 half_rsqrt(float4 v); - -float half_sqrt(float v); -float2 half_sqrt(float2 v); -float3 half_sqrt(float3 v); -float4 half_sqrt(float4 v); - -float hypot(float a, float b); -float2 hypot(float2 a, float2 b); -float3 hypot(float3 a, float3 b); -float4 hypot(float4 a, float4 b); - -int ilogb(float v); -int2 ilogb(float2 v); -int3 ilogb(float3 v); -int4 ilogb(float4 v); - -float ldexp(float mantissa, int exponent); -float2 ldexp(float2 mantissa, int exponent); -float2 ldexp(float2 mantissa, int2 exponent); -float3 ldexp(float3 mantissa, int exponent); -float3 ldexp(float3 mantissa, int3 exponent); -float4 ldexp(float4 mantissa, int exponent); -float4 ldexp(float4 mantissa, int4 exponent); - -float lgamma(float v); -float lgamma(float v, int* sign_of_gamma); -float2 lgamma(float2 v); -float2 lgamma(float2 v, int2* sign_of_gamma); -float3 lgamma(float3 v); -float3 lgamma(float3 v, int3* sign_of_gamma); -float4 lgamma(float4 v); -float4 lgamma(float4 v, int4* sign_of_gamma); - -float log(float v); -float2 log(float2 v); -float3 log(float3 v); -float4 log(float4 v); - -float log10(float v); -float2 log10(float2 v); -float3 log10(float3 v); -float4 log10(float4 v); - -float log1p(float v); -float2 log1p(float2 v); -float3 log1p(float3 v); -float4 log1p(float4 v); - -float log2(float v); -float2 log2(float2 v); -float3 log2(float3 v); -float4 log2(float4 v); - -float logb(float v); -float2 logb(float2 v); -float3 logb(float3 v); -float4 logb(float4 v); - -float mad(float multiplicand1, float multiplicand2, float offset); -float2 mad(float2 multiplicand1, float2 multiplicand2, float2 offset); -float3 mad(float3 multiplicand1, float3 multiplicand2, float3 offset); -float4 mad(float4 multiplicand1, float4 multiplicand2, float4 offset); - -char max(char a, char b); -char2 max(char2 a, char2 b); -char3 max(char3 a, char3 b); -char4 max(char4 a, char4 b); -float max(float a, float b); -float2 max(float2 a, float2 b); -float3 max(float3 a, float3 b); -float4 max(float4 a, float4 b); -int max(int a, int b); -int2 max(int2 a, int2 b); -int3 max(int3 a, int3 b); -int4 max(int4 a, int4 b); -long max(long a, long b); -long2 max(long2 a, long2 b); -long3 max(long3 a, long3 b); -long4 max(long4 a, long4 b); -short max(short a, short b); -short2 max(short2 a, short2 b); -short3 max(short3 a, short3 b); -short4 max(short4 a, short4 b); -uchar max(uchar a, uchar b); -uchar2 max(uchar2 a, uchar2 b); -uchar3 max(uchar3 a, uchar3 b); -uchar4 max(uchar4 a, uchar4 b); -uint max(uint a, uint b); -uint2 max(uint2 a, uint2 b); -uint3 max(uint3 a, uint3 b); -uint4 max(uint4 a, uint4 b); -ulong max(ulong a, ulong b); -ulong2 max(ulong2 a, ulong2 b); -ulong3 max(ulong3 a, ulong3 b); -ulong4 max(ulong4 a, ulong4 b); -ushort max(ushort a, ushort b); -ushort2 max(ushort2 a, ushort2 b); -ushort3 max(ushort3 a, ushort3 b); -ushort4 max(ushort4 a, ushort4 b); - -char min(char a, char b); -char2 min(char2 a, char2 b); -char3 min(char3 a, char3 b); -char4 min(char4 a, char4 b); -float min(float a, float b); -float2 min(float2 a, float2 b); -float3 min(float3 a, float3 b); -float4 min(float4 a, float4 b); -int min(int a, int b); -int2 min(int2 a, int2 b); -int3 min(int3 a, int3 b); -int4 min(int4 a, int4 b); -long min(long a, long b); -long2 min(long2 a, long2 b); -long3 min(long3 a, long3 b); -long4 min(long4 a, long4 b); -short min(short a, short b); -short2 min(short2 a, short2 b); -short3 min(short3 a, short3 b); -short4 min(short4 a, short4 b); -uchar min(uchar a, uchar b); -uchar2 min(uchar2 a, uchar2 b); -uchar3 min(uchar3 a, uchar3 b); -uchar4 min(uchar4 a, uchar4 b); -uint min(uint a, uint b); -uint2 min(uint2 a, uint2 b); -uint3 min(uint3 a, uint3 b); -uint4 min(uint4 a, uint4 b); -ulong min(ulong a, ulong b); -ulong2 min(ulong2 a, ulong2 b); -ulong3 min(ulong3 a, ulong3 b); -ulong4 min(ulong4 a, ulong4 b); -ushort min(ushort a, ushort b); -ushort2 min(ushort2 a, ushort2 b); -ushort3 min(ushort3 a, ushort3 b); -ushort4 min(ushort4 a, ushort4 b); - -float mix(float start, float stop, float fraction); -float2 mix(float2 start, float2 stop, float fraction); -float2 mix(float2 start, float2 stop, float2 fraction); -float3 mix(float3 start, float3 stop, float fraction); -float3 mix(float3 start, float3 stop, float3 fraction); -float4 mix(float4 start, float4 stop, float fraction); -float4 mix(float4 start, float4 stop, float4 fraction); - -float modf(float v, float* integral_part); -float2 modf(float2 v, float2* integral_part); -float3 modf(float3 v, float3* integral_part); -float4 modf(float4 v, float4* integral_part); - -float nan(uint v); - -float native_acos(float v); -float2 native_acos(float2 v); -float3 native_acos(float3 v); -float4 native_acos(float4 v); - -float native_acosh(float v); -float2 native_acosh(float2 v); -float3 native_acosh(float3 v); -float4 native_acosh(float4 v); - -float native_acospi(float v); -float2 native_acospi(float2 v); -float3 native_acospi(float3 v); -float4 native_acospi(float4 v); - -float native_asin(float v); -float2 native_asin(float2 v); -float3 native_asin(float3 v); -float4 native_asin(float4 v); - -float native_asinh(float v); -float2 native_asinh(float2 v); -float3 native_asinh(float3 v); -float4 native_asinh(float4 v); - -float native_asinpi(float v); -float2 native_asinpi(float2 v); -float3 native_asinpi(float3 v); -float4 native_asinpi(float4 v); - -float native_atan(float v); -float2 native_atan(float2 v); -float3 native_atan(float3 v); -float4 native_atan(float4 v); - -float native_atan2(float numerator, float denominator); -float2 native_atan2(float2 numerator, float2 denominator); -float3 native_atan2(float3 numerator, float3 denominator); -float4 native_atan2(float4 numerator, float4 denominator); - -float native_atan2pi(float numerator, float denominator); -float2 native_atan2pi(float2 numerator, float2 denominator); -float3 native_atan2pi(float3 numerator, float3 denominator); -float4 native_atan2pi(float4 numerator, float4 denominator); - -float native_atanh(float v); -float2 native_atanh(float2 v); -float3 native_atanh(float3 v); -float4 native_atanh(float4 v); - -float native_atanpi(float v); -float2 native_atanpi(float2 v); -float3 native_atanpi(float3 v); -float4 native_atanpi(float4 v); - -float native_cbrt(float v); -float2 native_cbrt(float2 v); -float3 native_cbrt(float3 v); -float4 native_cbrt(float4 v); - -float native_cos(float v); -float2 native_cos(float2 v); -float3 native_cos(float3 v); -float4 native_cos(float4 v); - -float native_cosh(float v); -float2 native_cosh(float2 v); -float3 native_cosh(float3 v); -float4 native_cosh(float4 v); - -float native_cospi(float v); -float2 native_cospi(float2 v); -float3 native_cospi(float3 v); -float4 native_cospi(float4 v); - -float native_divide(float left_vector, float right_vector); -float2 native_divide(float2 left_vector, float2 right_vector); -float3 native_divide(float3 left_vector, float3 right_vector); -float4 native_divide(float4 left_vector, float4 right_vector); - -float native_exp(float v); -float2 native_exp(float2 v); -float3 native_exp(float3 v); -float4 native_exp(float4 v); - -float native_exp10(float v); -float2 native_exp10(float2 v); -float3 native_exp10(float3 v); -float4 native_exp10(float4 v); - -float native_exp2(float v); -float2 native_exp2(float2 v); -float3 native_exp2(float3 v); -float4 native_exp2(float4 v); - -float native_expm1(float v); -float2 native_expm1(float2 v); -float3 native_expm1(float3 v); -float4 native_expm1(float4 v); - -float native_hypot(float a, float b); -float2 native_hypot(float2 a, float2 b); -float3 native_hypot(float3 a, float3 b); -float4 native_hypot(float4 a, float4 b); - -float native_log(float v); -float2 native_log(float2 v); -float3 native_log(float3 v); -float4 native_log(float4 v); - -float native_log10(float v); -float2 native_log10(float2 v); -float3 native_log10(float3 v); -float4 native_log10(float4 v); - -float native_log1p(float v); -float2 native_log1p(float2 v); -float3 native_log1p(float3 v); -float4 native_log1p(float4 v); - -float native_log2(float v); -float2 native_log2(float2 v); -float3 native_log2(float3 v); -float4 native_log2(float4 v); - -float native_powr(float base, float exponent); -float2 native_powr(float2 base, float2 exponent); -float3 native_powr(float3 base, float3 exponent); -float4 native_powr(float4 base, float4 exponent); - -float native_recip(float v); -float2 native_recip(float2 v); -float3 native_recip(float3 v); -float4 native_recip(float4 v); - -float native_rootn(float v, int n); -float2 native_rootn(float2 v, int2 n); -float3 native_rootn(float3 v, int3 n); -float4 native_rootn(float4 v, int4 n); - -float native_rsqrt(float v); -float2 native_rsqrt(float2 v); -float3 native_rsqrt(float3 v); -float4 native_rsqrt(float4 v); - -float native_sin(float v); -float2 native_sin(float2 v); -float3 native_sin(float3 v); -float4 native_sin(float4 v); - -float native_sincos(float v, float* cos); -float2 native_sincos(float2 v, float2* cos); -float3 native_sincos(float3 v, float3* cos); -float4 native_sincos(float4 v, float4* cos); - -float native_sinh(float v); -float2 native_sinh(float2 v); -float3 native_sinh(float3 v); -float4 native_sinh(float4 v); - -float native_sinpi(float v); -float2 native_sinpi(float2 v); -float3 native_sinpi(float3 v); -float4 native_sinpi(float4 v); - -float native_sqrt(float v); -float2 native_sqrt(float2 v); -float3 native_sqrt(float3 v); -float4 native_sqrt(float4 v); - -float native_tan(float v); -float2 native_tan(float2 v); -float3 native_tan(float3 v); -float4 native_tan(float4 v); - -float native_tanh(float v); -float2 native_tanh(float2 v); -float3 native_tanh(float3 v); -float4 native_tanh(float4 v); - -float native_tanpi(float v); -float2 native_tanpi(float2 v); -float3 native_tanpi(float3 v); -float4 native_tanpi(float4 v); - -float nextafter(float v, float target); -float2 nextafter(float2 v, float2 target); -float3 nextafter(float3 v, float3 target); -float4 nextafter(float4 v, float4 target); - -float pow(float base, float exponent); -float2 pow(float2 base, float2 exponent); -float3 pow(float3 base, float3 exponent); -float4 pow(float4 base, float4 exponent); - -float pown(float base, int exponent); -float2 pown(float2 base, int2 exponent); -float3 pown(float3 base, int3 exponent); -float4 pown(float4 base, int4 exponent); - -float powr(float base, float exponent); -float2 powr(float2 base, float2 exponent); -float3 powr(float3 base, float3 exponent); -float4 powr(float4 base, float4 exponent); - -float radians(float v); -float2 radians(float2 v); -float3 radians(float3 v); -float4 radians(float4 v); - -float remainder(float numerator, float denominator); -float2 remainder(float2 numerator, float2 denominator); -float3 remainder(float3 numerator, float3 denominator); -float4 remainder(float4 numerator, float4 denominator); - -float remquo(float numerator, float denominator, int* quotient); -float2 remquo(float2 numerator, float2 denominator, int2* quotient); -float3 remquo(float3 numerator, float3 denominator, int3* quotient); -float4 remquo(float4 numerator, float4 denominator, int4* quotient); - -float rint(float v); -float2 rint(float2 v); -float3 rint(float3 v); -float4 rint(float4 v); - -float rootn(float v, int n); -float2 rootn(float2 v, int2 n); -float3 rootn(float3 v, int3 n); -float4 rootn(float4 v, int4 n); - -float round(float v); -float2 round(float2 v); -float3 round(float3 v); -float4 round(float4 v); - -char rsClamp(char amount, char low, char high); -int rsClamp(int amount, int low, int high); -short rsClamp(short amount, short low, short high); -uchar rsClamp(uchar amount, uchar low, uchar high); -uint rsClamp(uint amount, uint low, uint high); -ushort rsClamp(ushort amount, ushort low, ushort high); - -float rsFrac(float v); - -float rsRand(float max_value); -float rsRand(float min_value, float max_value); -int rsRand(int max_value); -int rsRand(int min_value, int max_value); - -float rsqrt(float v); -float2 rsqrt(float2 v); -float3 rsqrt(float3 v); -float4 rsqrt(float4 v); - -float sign(float v); -float2 sign(float2 v); -float3 sign(float3 v); -float4 sign(float4 v); - -float sin(float v); -float2 sin(float2 v); -float3 sin(float3 v); -float4 sin(float4 v); - -float sincos(float v, float* cos); -float2 sincos(float2 v, float2* cos); -float3 sincos(float3 v, float3* cos); -float4 sincos(float4 v, float4* cos); - -float sinh(float v); -float2 sinh(float2 v); -float3 sinh(float3 v); -float4 sinh(float4 v); - -float sinpi(float v); -float2 sinpi(float2 v); -float3 sinpi(float3 v); -float4 sinpi(float4 v); - -float sqrt(float v); -float2 sqrt(float2 v); -float3 sqrt(float3 v); -float4 sqrt(float4 v); - -float step(float edge, float v); -float2 step(float edge, float2 v); -float2 step(float2 edge, float v); -float2 step(float2 edge, float2 v); -float3 step(float edge, float3 v); -float3 step(float3 edge, float v); -float3 step(float3 edge, float3 v); -float4 step(float edge, float4 v); -float4 step(float4 edge, float v); -float4 step(float4 edge, float4 v); - -float tan(float v); -float2 tan(float2 v); -float3 tan(float3 v); -float4 tan(float4 v); - -float tanh(float v); -float2 tanh(float2 v); -float3 tanh(float3 v); -float4 tanh(float4 v); - -float tanpi(float v); -float2 tanpi(float2 v); -float3 tanpi(float3 v); -float4 tanpi(float4 v); - -float tgamma(float v); -float2 tgamma(float2 v); -float3 tgamma(float3 v); -float4 tgamma(float4 v); - -float trunc(float v); -float2 trunc(float2 v); -float3 trunc(float3 v); -float4 trunc(float4 v); - -uchar4 rsPackColorTo8888(float r, float g, float b); -uchar4 rsPackColorTo8888(float r, float g, float b, float a); -uchar4 rsPackColorTo8888(float3 color); -uchar4 rsPackColorTo8888(float4 color); -float4 rsUnpackColor8888(uchar4 c); - -float4 rsYuvToRGBA_float4(uchar y, uchar u, uchar v); -uchar4 rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v); - -- vector functions - -float3 cross(float3 left_vector, float3 right_vector); -float4 cross(float4 left_vector, float4 right_vector); - -float distance(float left_vector, float right_vector); -float distance(float2 left_vector, float2 right_vector); -float distance(float3 left_vector, float3 right_vector); -float distance(float4 left_vector, float4 right_vector); - -float dot(float left_vector, float right_vector); -float dot(float2 left_vector, float2 right_vector); -float dot(float3 left_vector, float3 right_vector); -float dot(float4 left_vector, float4 right_vector); - -float fast_distance(float left_vector, float right_vector); -float fast_distance(float2 left_vector, float2 right_vector); -float fast_distance(float3 left_vector, float3 right_vector); -float fast_distance(float4 left_vector, float4 right_vector); - -float fast_length(float v); -float fast_length(float2 v); -float fast_length(float3 v); -float fast_length(float4 v); - -float fast_normalize(float v); -float2 fast_normalize(float2 v); -float3 fast_normalize(float3 v); -float4 fast_normalize(float4 v); - -float length(float v); -float length(float2 v); -float length(float3 v); -float length(float4 v); - -float native_distance(float left_vector, float right_vector); -float native_distance(float2 left_vector, float2 right_vector); -float native_distance(float3 left_vector, float3 right_vector); -float native_distance(float4 left_vector, float4 right_vector); - -float native_length(float v); -float native_length(float2 v); -float native_length(float3 v); -float native_length(float4 v); - -float native_normalize(float v); -float2 native_normalize(float2 v); -float3 native_normalize(float3 v); -float4 native_normalize(float4 v); - -float normalize(float v); -float2 normalize(float2 v); -float3 normalize(float3 v); -float4 normalize(float4 v); - -- conversion functions - -char2 convert_char2(char2 v); -char2 convert_char2(double2 v); -char2 convert_char2(float2 v); -char2 convert_char2(int2 v); -char2 convert_char2(long2 v); -char2 convert_char2(short2 v); -char2 convert_char2(uchar2 v); -char2 convert_char2(uint2 v); -char2 convert_char2(ulong2 v); -char2 convert_char2(ushort2 v); - -char3 convert_char3(char3 v); -char3 convert_char3(double3 v); -char3 convert_char3(float3 v); -char3 convert_char3(int3 v); -char3 convert_char3(long3 v); -char3 convert_char3(short3 v); -char3 convert_char3(uchar3 v); -char3 convert_char3(uint3 v); -char3 convert_char3(ulong3 v); -char3 convert_char3(ushort3 v); - -char4 convert_char4(char4 v); -char4 convert_char4(double4 v); -char4 convert_char4(float4 v); -char4 convert_char4(int4 v); -char4 convert_char4(long4 v); -char4 convert_char4(short4 v); -char4 convert_char4(uchar4 v); -char4 convert_char4(uint4 v); -char4 convert_char4(ulong4 v); -char4 convert_char4(ushort4 v); - -double2 convert_double2(char2 v); -double2 convert_double2(double2 v); -double2 convert_double2(float2 v); -double2 convert_double2(int2 v); -double2 convert_double2(long2 v); -double2 convert_double2(short2 v); -double2 convert_double2(uchar2 v); -double2 convert_double2(uint2 v); -double2 convert_double2(ulong2 v); -double2 convert_double2(ushort2 v); - -double3 convert_double3(char3 v); -double3 convert_double3(double3 v); -double3 convert_double3(float3 v); -double3 convert_double3(int3 v); -double3 convert_double3(long3 v); -double3 convert_double3(short3 v); -double3 convert_double3(uchar3 v); -double3 convert_double3(uint3 v); -double3 convert_double3(ulong3 v); -double3 convert_double3(ushort3 v); - -double4 convert_double4(char4 v); -double4 convert_double4(double4 v); -double4 convert_double4(float4 v); -double4 convert_double4(int4 v); -double4 convert_double4(long4 v); -double4 convert_double4(short4 v); -double4 convert_double4(uchar4 v); -double4 convert_double4(uint4 v); -double4 convert_double4(ulong4 v); -double4 convert_double4(ushort4 v); - -float2 convert_float2(char2 v); -float2 convert_float2(double2 v); -float2 convert_float2(float2 v); -float2 convert_float2(int2 v); -float2 convert_float2(long2 v); -float2 convert_float2(short2 v); -float2 convert_float2(uchar2 v); -float2 convert_float2(uint2 v); -float2 convert_float2(ulong2 v); -float2 convert_float2(ushort2 v); - -float3 convert_float3(char3 v); -float3 convert_float3(double3 v); -float3 convert_float3(float3 v); -float3 convert_float3(int3 v); -float3 convert_float3(long3 v); -float3 convert_float3(short3 v); -float3 convert_float3(uchar3 v); -float3 convert_float3(uint3 v); -float3 convert_float3(ulong3 v); -float3 convert_float3(ushort3 v); - -float4 convert_float4(char4 v); -float4 convert_float4(double4 v); -float4 convert_float4(float4 v); -float4 convert_float4(int4 v); -float4 convert_float4(long4 v); -float4 convert_float4(short4 v); -float4 convert_float4(uchar4 v); -float4 convert_float4(uint4 v); -float4 convert_float4(ulong4 v); -float4 convert_float4(ushort4 v); - -int2 convert_int2(char2 v); -int2 convert_int2(double2 v); -int2 convert_int2(float2 v); -int2 convert_int2(int2 v); -int2 convert_int2(long2 v); -int2 convert_int2(short2 v); -int2 convert_int2(uchar2 v); -int2 convert_int2(uint2 v); -int2 convert_int2(ulong2 v); -int2 convert_int2(ushort2 v); - -int3 convert_int3(char3 v); -int3 convert_int3(double3 v); -int3 convert_int3(float3 v); -int3 convert_int3(int3 v); -int3 convert_int3(long3 v); -int3 convert_int3(short3 v); -int3 convert_int3(uchar3 v); -int3 convert_int3(uint3 v); -int3 convert_int3(ulong3 v); -int3 convert_int3(ushort3 v); - -int4 convert_int4(char4 v); -int4 convert_int4(double4 v); -int4 convert_int4(float4 v); -int4 convert_int4(int4 v); -int4 convert_int4(long4 v); -int4 convert_int4(short4 v); -int4 convert_int4(uchar4 v); -int4 convert_int4(uint4 v); -int4 convert_int4(ulong4 v); -int4 convert_int4(ushort4 v); - -long2 convert_long2(char2 v); -long2 convert_long2(double2 v); -long2 convert_long2(float2 v); -long2 convert_long2(int2 v); -long2 convert_long2(long2 v); -long2 convert_long2(short2 v); -long2 convert_long2(uchar2 v); -long2 convert_long2(uint2 v); -long2 convert_long2(ulong2 v); -long2 convert_long2(ushort2 v); - -long3 convert_long3(char3 v); -long3 convert_long3(double3 v); -long3 convert_long3(float3 v); -long3 convert_long3(int3 v); -long3 convert_long3(long3 v); -long3 convert_long3(short3 v); -long3 convert_long3(uchar3 v); -long3 convert_long3(uint3 v); -long3 convert_long3(ulong3 v); -long3 convert_long3(ushort3 v); - -long4 convert_long4(char4 v); -long4 convert_long4(double4 v); -long4 convert_long4(float4 v); -long4 convert_long4(int4 v); -long4 convert_long4(long4 v); -long4 convert_long4(short4 v); -long4 convert_long4(uchar4 v); -long4 convert_long4(uint4 v); -long4 convert_long4(ulong4 v); -long4 convert_long4(ushort4 v); - -short2 convert_short2(char2 v); -short2 convert_short2(double2 v); -short2 convert_short2(float2 v); -short2 convert_short2(int2 v); -short2 convert_short2(long2 v); -short2 convert_short2(short2 v); -short2 convert_short2(uchar2 v); -short2 convert_short2(uint2 v); -short2 convert_short2(ulong2 v); -short2 convert_short2(ushort2 v); - -short3 convert_short3(char3 v); -short3 convert_short3(double3 v); -short3 convert_short3(float3 v); -short3 convert_short3(int3 v); -short3 convert_short3(long3 v); -short3 convert_short3(short3 v); -short3 convert_short3(uchar3 v); -short3 convert_short3(uint3 v); -short3 convert_short3(ulong3 v); -short3 convert_short3(ushort3 v); - -short4 convert_short4(char4 v); -short4 convert_short4(double4 v); -short4 convert_short4(float4 v); -short4 convert_short4(int4 v); -short4 convert_short4(long4 v); -short4 convert_short4(short4 v); -short4 convert_short4(uchar4 v); -short4 convert_short4(uint4 v); -short4 convert_short4(ulong4 v); -short4 convert_short4(ushort4 v); - -uchar2 convert_uchar2(char2 v); -uchar2 convert_uchar2(double2 v); -uchar2 convert_uchar2(float2 v); -uchar2 convert_uchar2(int2 v); -uchar2 convert_uchar2(long2 v); -uchar2 convert_uchar2(short2 v); -uchar2 convert_uchar2(uchar2 v); -uchar2 convert_uchar2(uint2 v); -uchar2 convert_uchar2(ulong2 v); -uchar2 convert_uchar2(ushort2 v); - -uchar3 convert_uchar3(char3 v); -uchar3 convert_uchar3(double3 v); -uchar3 convert_uchar3(float3 v); -uchar3 convert_uchar3(int3 v); -uchar3 convert_uchar3(long3 v); -uchar3 convert_uchar3(short3 v); -uchar3 convert_uchar3(uchar3 v); -uchar3 convert_uchar3(uint3 v); -uchar3 convert_uchar3(ulong3 v); -uchar3 convert_uchar3(ushort3 v); - -uchar4 convert_uchar4(char4 v); -uchar4 convert_uchar4(double4 v); -uchar4 convert_uchar4(float4 v); -uchar4 convert_uchar4(int4 v); -uchar4 convert_uchar4(long4 v); -uchar4 convert_uchar4(short4 v); -uchar4 convert_uchar4(uchar4 v); -uchar4 convert_uchar4(uint4 v); -uchar4 convert_uchar4(ulong4 v); -uchar4 convert_uchar4(ushort4 v); - -uint2 convert_uint2(char2 v); -uint2 convert_uint2(double2 v); -uint2 convert_uint2(float2 v); -uint2 convert_uint2(int2 v); -uint2 convert_uint2(long2 v); -uint2 convert_uint2(short2 v); -uint2 convert_uint2(uchar2 v); -uint2 convert_uint2(uint2 v); -uint2 convert_uint2(ulong2 v); -uint2 convert_uint2(ushort2 v); - -uint3 convert_uint3(char3 v); -uint3 convert_uint3(double3 v); -uint3 convert_uint3(float3 v); -uint3 convert_uint3(int3 v); -uint3 convert_uint3(long3 v); -uint3 convert_uint3(short3 v); -uint3 convert_uint3(uchar3 v); -uint3 convert_uint3(uint3 v); -uint3 convert_uint3(ulong3 v); -uint3 convert_uint3(ushort3 v); - -uint4 convert_uint4(char4 v); -uint4 convert_uint4(double4 v); -uint4 convert_uint4(float4 v); -uint4 convert_uint4(int4 v); -uint4 convert_uint4(long4 v); -uint4 convert_uint4(short4 v); -uint4 convert_uint4(uchar4 v); -uint4 convert_uint4(uint4 v); -uint4 convert_uint4(ulong4 v); -uint4 convert_uint4(ushort4 v); - -ulong2 convert_ulong2(char2 v); -ulong2 convert_ulong2(double2 v); -ulong2 convert_ulong2(float2 v); -ulong2 convert_ulong2(int2 v); -ulong2 convert_ulong2(long2 v); -ulong2 convert_ulong2(short2 v); -ulong2 convert_ulong2(uchar2 v); -ulong2 convert_ulong2(uint2 v); -ulong2 convert_ulong2(ulong2 v); -ulong2 convert_ulong2(ushort2 v); - -ulong3 convert_ulong3(char3 v); -ulong3 convert_ulong3(double3 v); -ulong3 convert_ulong3(float3 v); -ulong3 convert_ulong3(int3 v); -ulong3 convert_ulong3(long3 v); -ulong3 convert_ulong3(short3 v); -ulong3 convert_ulong3(uchar3 v); -ulong3 convert_ulong3(uint3 v); -ulong3 convert_ulong3(ulong3 v); -ulong3 convert_ulong3(ushort3 v); - -ulong4 convert_ulong4(char4 v); -ulong4 convert_ulong4(double4 v); -ulong4 convert_ulong4(float4 v); -ulong4 convert_ulong4(int4 v); -ulong4 convert_ulong4(long4 v); -ulong4 convert_ulong4(short4 v); -ulong4 convert_ulong4(uchar4 v); -ulong4 convert_ulong4(uint4 v); -ulong4 convert_ulong4(ulong4 v); -ulong4 convert_ulong4(ushort4 v); - -ushort2 convert_ushort2(char2 v); -ushort2 convert_ushort2(double2 v); -ushort2 convert_ushort2(float2 v); -ushort2 convert_ushort2(int2 v); -ushort2 convert_ushort2(long2 v); -ushort2 convert_ushort2(short2 v); -ushort2 convert_ushort2(uchar2 v); -ushort2 convert_ushort2(uint2 v); -ushort2 convert_ushort2(ulong2 v); -ushort2 convert_ushort2(ushort2 v); - -ushort3 convert_ushort3(char3 v); -ushort3 convert_ushort3(double3 v); -ushort3 convert_ushort3(float3 v); -ushort3 convert_ushort3(int3 v); -ushort3 convert_ushort3(long3 v); -ushort3 convert_ushort3(short3 v); -ushort3 convert_ushort3(uchar3 v); -ushort3 convert_ushort3(uint3 v); -ushort3 convert_ushort3(ulong3 v); -ushort3 convert_ushort3(ushort3 v); - -ushort4 convert_ushort4(char4 v); -ushort4 convert_ushort4(double4 v); -ushort4 convert_ushort4(float4 v); -ushort4 convert_ushort4(int4 v); -ushort4 convert_ushort4(long4 v); -ushort4 convert_ushort4(short4 v); -ushort4 convert_ushort4(uchar4 v); -ushort4 convert_ushort4(uint4 v); -ushort4 convert_ushort4(ulong4 v); -ushort4 convert_ushort4(ushort4 v); - -uchar4 rsPackColorTo8888(float r, float g, float b); -uchar4 rsPackColorTo8888(float r, float g, float b, float a); -uchar4 rsPackColorTo8888(float3 color); -uchar4 rsPackColorTo8888(float4 color); - -float4 rsUnpackColor8888(uchar4 c); - -float4 rsYuvToRGBA_float4(uchar y, uchar u, uchar v); - -uchar4 rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v); - -- matrix functions, some of these are not supported yet - --bool rsMatrixInverse(rs_matrix4x4* m); - --bool rsMatrixInverseTranspose(rs_matrix4x4* m); - --void rsMatrixLoad(rs_matrix2x2* destination, float* array); --void rsMatrixLoad(rs_matrix2x2* destination, rs_matrix2x2* source); --void rsMatrixLoad(rs_matrix3x3* destination, float* array); --void rsMatrixLoad(rs_matrix3x3* destination, rs_matrix3x3* source); --void rsMatrixLoad(rs_matrix4x4* destination, float* array); --void rsMatrixLoad(rs_matrix4x4* destination, rs_matrix2x2* source); --void rsMatrixLoad(rs_matrix4x4* destination, rs_matrix3x3* source); --void rsMatrixLoad(rs_matrix4x4* destination, rs_matrix4x4* source); - --void rsMatrixLoadFrustum(rs_matrix4x4* m, float left, float right, float bottom, float top, float near, float far); - --void rsMatrixLoadIdentity(rs_matrix2x2* m); --void rsMatrixLoadIdentity(rs_matrix3x3* m); --void rsMatrixLoadIdentity(rs_matrix4x4* m); - --void rsMatrixLoadMultiply(rs_matrix2x2* m, rs_matrix2x2* lhs, rs_matrix2x2* rhs); --void rsMatrixLoadMultiply(rs_matrix3x3* m, rs_matrix3x3* lhs, rs_matrix3x3* rhs); --void rsMatrixLoadMultiply(rs_matrix4x4* m, rs_matrix4x4* lhs, rs_matrix4x4* rhs); - --void rsMatrixLoadOrtho(rs_matrix4x4* m, float left, float right, float bottom, float top, float near, float far); - --void rsMatrixLoadPerspective(rs_matrix4x4* m, float fovy, float aspect, float near, float far); - --void rsMatrixLoadRotate(rs_matrix4x4* m, float rot, float x, float y, float z); - --void rsMatrixLoadScale(rs_matrix4x4* m, float x, float y, float z); - --void rsMatrixLoadTranslate(rs_matrix4x4* m, float x, float y, float z); - -float2 rsMatrixMultiply(rs_matrix2x2* m, float2 in); -float3 rsMatrixMultiply(rs_matrix3x3* m, float2 in); -float3 rsMatrixMultiply(rs_matrix3x3* m, float3 in); -float4 rsMatrixMultiply(rs_matrix4x4* m, float2 in); -float4 rsMatrixMultiply(rs_matrix4x4* m, float3 in); -float4 rsMatrixMultiply(rs_matrix4x4* m, float4 in); -void rsMatrixMultiply(rs_matrix2x2* m, rs_matrix2x2* rhs); -void rsMatrixMultiply(rs_matrix3x3* m, rs_matrix3x3* rhs); -void rsMatrixMultiply(rs_matrix4x4* m, rs_matrix4x4* rhs); - --void rsMatrixRotate(rs_matrix4x4* m, float rot, float x, float y, float z); - --void rsMatrixScale(rs_matrix4x4* m, float x, float y, float z); - -void rsMatrixSet(rs_matrix2x2* m, 0, 1, float v); -void rsMatrixSet(rs_matrix3x3* m, 2, 0, float v); -void rsMatrixSet(rs_matrix4x4* m, 1, 3, float v); - --void rsMatrixTranslate(rs_matrix4x4* m, float x, float y, float z); - --void rsMatrixTranspose(rs_matrix2x2* m); --void rsMatrixTranspose(rs_matrix3x3* m); --void rsMatrixTranspose(rs_matrix4x4* m); - -- quaternion functions - -void rsQuaternionAdd(rs_quaternion* q, rs_quaternion* rhs); - -void rsQuaternionConjugate(rs_quaternion* q); - -float rsQuaternionDot(rs_quaternion* q0, rs_quaternion* q1); - -void rsQuaternionGetMatrixUnit(rs_matrix4x4* m, rs_quaternion* q); - -void rsQuaternionLoadRotate(rs_quaternion* q, float rot, float x, float y, float z); - -void rsQuaternionLoadRotateUnit(rs_quaternion* q, float rot, float x, float y, float z); - -void rsQuaternionMultiply(rs_quaternion* q, rs_quaternion* rhs); -void rsQuaternionMultiply(rs_quaternion* q, float scalar); - -void rsQuaternionNormalize(rs_quaternion* q); - -void rsQuaternionSet(rs_quaternion* q, rs_quaternion* rhs); -void rsQuaternionSet(rs_quaternion* q, float w, float x, float y, float z); - -void rsQuaternionSlerp(rs_quaternion* q, rs_quaternion* q0, rs_quaternion* q1, float t); - -- allocation data access functions, this is a subset, since we don't have all types of allocations - --void rsAllocationCopy1DRange(allocation_1D_global, uint dstOff, uint dstMip, uint count, allocation_1D_global2, uint srcOff, uint srcMip); - --void rsAllocationCopy2DRange(allocation_2D_global, uint dstXoff, uint dstYoff, uint dstMip, rs_allocation_cubemap_face dstFace, uint width, uint height, allocation_2D_global2, uint srcXoff, uint srcYoff, uint srcMip, rs_allocation_cubemap_face srcFace); - -int2 rsAllocationVLoadX_int2(allocation_1D_global, 0); -int2 rsAllocationVLoadX_int2(allocation_2D_global, 24, 25); -int2 rsAllocationVLoadX_int2(allocation_3D_global, 0, 1, 0); -int3 rsAllocationVLoadX_int3(allocation_1D_global, 1); -int3 rsAllocationVLoadX_int3(allocation_2D_global, 27, 28); -int3 rsAllocationVLoadX_int3(allocation_3D_global, 1, 0, 1); -int4 rsAllocationVLoadX_int4(allocation_1D_global, 0); -int4 rsAllocationVLoadX_int4(allocation_2D_global, 29, 30); -int4 rsAllocationVLoadX_int4(allocation_3D_global, 0, 1, 0); - -void rsAllocationVStoreX_int2(allocation_1D_global, int2 val, 2); -void rsAllocationVStoreX_int2(allocation_2D_global, int2 val, 6, 7); -void rsAllocationVStoreX_int2(allocation_3D_global, int2 val, 0, 1, 0); -void rsAllocationVStoreX_int3(allocation_1D_global, int3 val, 1); -void rsAllocationVStoreX_int3(allocation_2D_global, int3 val, 12, 13); -void rsAllocationVStoreX_int3(allocation_3D_global, int3 val, 1, 0, 1); -void rsAllocationVStoreX_int4(allocation_1D_global, int4 val, 0); -void rsAllocationVStoreX_int4(allocation_2D_global, int4 val, 18, 19); -void rsAllocationVStoreX_int4(allocation_3D_global, int4 val, 0, 1, 0); - -void* rsGetElementAt(allocation_1D_global, 0); -void* rsGetElementAt(allocation_2D_global, 20, 21); -void* rsGetElementAt(allocation_3D_global, 1, 0, 1); -int rsGetElementAt_int(allocation_1D_global, 1); -int rsGetElementAt_int(allocation_2D_global, 22, 23); -int rsGetElementAt_int(allocation_3D_global, 0, 1, 0); - -uchar rsGetElementAtYuv_uchar_U(allocation_YUV_2D_global, 0, 1); - -uchar rsGetElementAtYuv_uchar_V(allocation_YUV_2D_global, 2, 3); - -uchar rsGetElementAtYuv_uchar_Y(allocation_YUV_2D_global, 0, 1); - -float4 rsSample(allocation_1D_global, rs_sampler s, float location); -float4 rsSample(allocation_1D_global, rs_sampler s, float location, float lod); -float4 rsSample(allocation_1D_global, rs_sampler s, float2 location); -float4 rsSample(allocation_1D_global, rs_sampler s, float2 location, float lod); - -void rsSetElementAt(allocation_1D_global, int* ptr, 2); -void rsSetElementAt(allocation_2D_global, int* ptr, 24, 25); -void rsSetElementAt_int(allocation_1D_global, int val, 0); -void rsSetElementAt_int(allocation_2D_global, int val, 26, 27); -void rsSetElementAt_int(allocation_3D_global, int val, 1, 0, 1); -'''.splitlines()) - - -TYPE_MAP = { - 'void' : '', - 'char' : r'\((signed )?char\)', - 'uchar' : r'\(uchar\)', - 'short' : r'\(short\)', - 'ushort' : r'\(ushort\)', - 'int' : r'\(int\)', - 'uint' : r'\(uint\)', - 'long' : r'\((long )?long\)', - 'ulong' : r'\(ulong\)', - 'float' : r'\(float\)', - 'float2' : r'\(float2\)', - 'float3' : r'\(float3\)', - 'float4' : r'\(float4\)' - } - - -def _build_arg(token): - '''Given a C argument construct an lldb expression for the argument. - - Given a token, which represents a single argument of a C function - declaration, construct an lldb expression for the argument. - - Args: - token: A string representing a single argument to a function. This - can be either [type][name] (e.g. int arg) or [value] (e.g. 5). - - Returns: - The string that is the lldb expression for that argument, e.g. - int_global or 5. - ''' - if len(token.split()) == 1: - # This is a fixed constant. Just take that as output. - return token - - result = token.split()[0] - - # Remove the rs_ prefix, because our globals' names don't have them - if result[:3] == 'rs_': - result = result[3:] - - # If the function expects a pointer, take the address of the global - if result[-1] == '*': - result = '&' + result - result = result[:-1] - - result += '_global' - return result - - -def build_expr(line): - '''Build an lldb expression given a function prototype. - - Given a function declaration, this function will construct an lldb - expression to call it. - - Args: - line: A string representing a function declaration. - - Returns: - The string that is the lldb expression. - ''' - tokens = re.findall(r"[^(),;]+", line) - assert len(tokens) > 0 - ret_name = tokens[0].split() - ret = ret_name[0] - name = ret_name[1] - expr = 'expr {0}('.format(name) - - first = True - for tok in tokens[1:]: - if not first: - expr += ', ' - expr += _build_arg(tok) - first = False - - expr += ')' - return ret, expr diff --git a/tests/lldb/tests/harness/__init__.py b/tests/lldb/tests/harness/__init__.py deleted file mode 100644 index 863ac226..00000000 --- a/tests/lldb/tests/harness/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''This module contains utility code used by the test suite''' - -from __future__ import absolute_import - -from . import exception -from . import util_constants -from .exception import TestSuiteException -from .test_base import TestBase -from .util_android import UtilAndroid -from .util_bundle import UtilBundle -from . import RS_funs
\ No newline at end of file diff --git a/tests/lldb/tests/harness/assert_mixins.py b/tests/lldb/tests/harness/assert_mixins.py deleted file mode 100644 index 94a9e229..00000000 --- a/tests/lldb/tests/harness/assert_mixins.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Set of mixins for asserting common RenderScript lldb interactions -That should cut down boilerplate -To use these assertions simply inherit from them along with your -`TestBase`: - - >>> class MyLLDBRenderScriptTest(TestBaseRemote, CoordinateAssertionsMixin): - >>> pass - -This will give you access to the useful assertion methods related to Coordinates - -NOTE: These are strictly clean mixins for `TestBase`. All classes here should -strictly inherit only from `object` -""" - - -class CoordinateAssertionsMixin(object): - def assert_coord_bp_set( - self, breakpoint_expr, x, y=None, z=None, kernel_type='kernel' - ): - ''' - Assert that a breakpoint conditional on a given coordinate is confirmed - by the renderscript breakpoint resolver. - This does not assert test the breakpoint is hit, only registered. - breakpoint_expr: the expression (e.g. the name of a function, or a - file and line). - kernel_type: The breakpoint resolver to use: - (reduction|kernel|scriptgroup) - default='kernel' - x: x coordinate: required - y, z: optional y, and z coordinates - ''' - - y = 0 if z is not None and y is None else y - coord_text = ','.join(map(str, filter(lambda p: p is not None, (x, y, z)))) - self.try_command( - 'language renderscript %s breakpoint set %s -c %s' % ( - kernel_type, breakpoint_expr, coord_text - ), - [r'Breakpoint(s) created'], - expected_regex=[ - r'Conditional kernel breakpoint on coordinate.+%d,\s*%d,\s*%d' % ( - x or 0, y or 0, z or 0 - ) - ] - ) - - def assert_coord_stop( - self, soname, func_name, x, y=None, z=None, stopped=True - ): - '''Run lldb commands to check that coordinates match expected values. - - Args: - (x, y, z): The expected coordinates. - soname: The name of the renderscript script module e.g. 'allocs' - for librs.allocs.so - func_name: String that is the name of the kernel function - - Raises: - TestFail: One of the lldb commands did not provide the expected - output. - ''' - - if stopped: - self.try_command( - 'process continue', - expected_regex=[ - r'resuming', - r'Process \d+ stopped', - r'stop reason = breakpoint', - r'frame #0: (0x[0-9a-fA-F]+ )?librs.%s.so`%s' % ( - soname, func_name) - ] - ) - else: - self.try_command( - 'bt', - expected_regex=[ - 'stop reason = breakpoint', - 'frame #0:', - 'librs.*\.so`%s' % kernel - ] - ) - - self.try_command( - 'language renderscript kernel coordinate', - '(%d, %d, %d)' % (x, y or 0, z or 0) - ) diff --git a/tests/lldb/tests/harness/decorators.py b/tests/lldb/tests/harness/decorators.py deleted file mode 100644 index e4a49b3a..00000000 --- a/tests/lldb/tests/harness/decorators.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function, absolute_import - -import functools -import warnings -import inspect - - -class skip_conditional(object): - ''' - Test method decorator that marks a test method as ignorable if the given - arguments evaluate as Truthy. If the argument is callable, then it is called - and the return value is used as the predicate. - - >>> class MyTestClass(TestBase): - ... def test_something(self): - ... pass - ... - ... @skip_conditional(not sys.platform.startswith("linux")) - ... def test_some_linux_behaviour(self): - ... assert "vmlinuz" in open("/proc/cmdline").read() - ... - ... @skip_conditional(lambda : True): - ... def test_that_never_runs(self): - ... pass - ''' - def __init__(self, skip_condition, message="skipped"): - self._skip_condition = skip_condition - self._message = message - - def __call__(self, func): - @functools.wraps(func) - def inner(*args, **kwargs): - skip_condition = self._skip_condition - if callable(skip_condition): - # args[0] is ``self`` - skip_condition = skip_condition(args[0]) - - if skip_condition: - print("skipping %r - %s" % (func, self._message)) - return True - return func(args[0]) - - return inner - - -class skip_test(skip_conditional): - '''' - Unconditionally skip a test - ''' - def __init__(self, skip_condition, *args, **kwargs): - super(skip_test, self).__init__(True, *args, **kwargs) - - -java_only_test = lambda: skip_conditional(lambda self: not self.app_type == 'java') - -cpp_only_test = lambda: skip_conditional(lambda self: not self.app_type == 'cpp') - -jni_only_test = lambda: skip_conditional(lambda self: not self.app_type == 'jni') - - -def wimpy(func): - ''' - Mark a test as 'wimpy' that is - a function specifically known to be quick-running. - This implementation simply adds the `.wimpy` attribute to the decorated function - and returns it, otherwise unmodified - ''' - func.wimpy = True - - return func - - -class ordered_test(object): - '''Set the ordered attribute on function''' - def __init__(self, order): - self._order = order - - def __call__(self, func): - func.test_order = self._order - return func - - -class deprecated(object): - """ - method or function decorator used to warn of pending feature removal: - - >>> @deprecated() - ... def myfunc(): - ... return 'hello' - ... - >>> myfunc() - DeprecationWarning: `__main__.myfunc()` is deprecated and will be removed soon. - 'hello' - >>> class MyClass(object): - ... @deprecated(alternative_feature='print') - ... def myprint(self, *args, **kwargs): - ... print(*args, **kwargs) - ... - >>> obj = MyClass() - >>> obj.myprint("hello") - DeprecationWarning: `__main__.MyClass.myfunc()` is deprecated and will be removed soon. Use 'print' instead. - hello - """ - - def __init__( - self, - alternative_feature=None, - removal_date='soon', - exception=UserWarning - ): - self.alternative_feature_message = ( - alternative_feature and 'use %r instead' % alternative_feature or '' - ) - self.exception = exception - self.removal_date = removal_date - - def __call__(self, func): - class_name = '' - if getattr(func, 'im_class', None): - class_name = '%s.' % func.im_class.__name__ - - if getattr(func, 'im_func', None): - func_name = func.im_func.func_name - else: - func_name = func.func_name - - module_name = getattr(func, '__module__') - - warning = "`%s.%s%s()` is deprecated and will be removed %s. %s" % ( - module_name, - class_name, - func_name, - self.removal_date, - self.alternative_feature_message - ) - - @functools.wraps(func) - def inner(*args, **kwargs): - if not getattr(func, 'deprecation_warned', False): - warnings.warn(warning, self.exception, 2) - func.deprecation_warned = True - return func(*args, **kwargs) - - return inner diff --git a/tests/lldb/tests/harness/exception.py b/tests/lldb/tests/harness/exception.py deleted file mode 100644 index 2cfc99f9..00000000 --- a/tests/lldb/tests/harness/exception.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains TestSuiteException.''' - -from __future__ import absolute_import - -class TestSuiteException(Exception): - '''Exception that is thrown whenever an internal error is encountered. - - Just contains a message. - ''' - pass - -class DisconnectedException(Exception): - '''Exception that is thrown if lldb-server unexpectedly disconnected. - - Just contains a message. - ''' - pass - - -class FailFastException(TestSuiteException): - '''Quick Bailout''' - pass - - -class TestIgnoredException(TestSuiteException): - '''Raised when a testcase is ignored.''' - pass diff --git a/tests/lldb/tests/harness/test_base.py b/tests/lldb/tests/harness/test_base.py deleted file mode 100644 index 99029898..00000000 --- a/tests/lldb/tests/harness/test_base.py +++ /dev/null @@ -1,332 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains TestBase, the base class of all tests.''' - -from __future__ import absolute_import - -import logging -import os -import re -import tempfile -import inspect -import traceback - -from .exception import DisconnectedException, TestSuiteException - -from . import util_log - - -class TestBase(object): - '''Base class for all tests. Provides some common functionality.''' - - bundle_target = {} - - class TestFail(Exception): - '''Exception that is thrown when a line in a test fails. - - This exception is thrown if a lldb command does not return the expected - string. - ''' - pass - - def __init__(self, device_port, device, timer, app_type, wimpy=False, **kwargs): - # Keep argument names for documentation purposes. This method is - # overwritten by test_base_remote. - # pylint: disable=unused-argument - self._lldb = None # handle to the lldb module - self._ci = None # instance of the lldb command interpreter for this test - self._timer = timer # timer instance, to check whether the test froze - self.app_type = app_type # The type of bundle that is being executed - self.wimpy = wimpy - - def setup(self, android): - '''Set up environment for the test. - - Override to specify commands to be run before the test APK launch. - Useful for setting Android properties or environment variables. See also - the teardown method. - - Args: - android: Handler to the android device, see the UtilAndroid class. - ''' - pass - - def teardown(self, android): - '''Clean up environment after test. - - Override this procedure to specify commands to be run after the test has - finished. This method is run regardless the outcome of the test. - - Args: - android: Handler to the android device, see the UtilAndroid class. - ''' - pass - - def run(self, dbg, remote_pid, lldb): - '''Execute the actual test suite. - - Args: - dbg: The instance of the SBDebugger that is used to test commands. - remote_pid: The integer that is the process id of the binary that - the debugger is attached to. - lldb: A handle to the lldb module. - - Returns: - A list of (test, failure) tuples. - ''' - log = util_log.get_logger() - - def predicate(obj): - '''check whether we're interested in the function''' - if not callable(obj): - return False - if self.wimpy and not getattr(obj, 'wimpy', False): - log.debug("skipping non-wimpy test in wimpy mode:%r", obj) - return False - return True - - test_methods = [ - method for name, method in inspect.getmembers(self, predicate) - if name.startswith('test_') - ] - log.debug("Found the following tests %r", test_methods) - test_errors = [] - - for test in sorted( - test_methods, - key=lambda item: getattr(item, 'test_order', float('Inf')) - ): - try: - log.info("running test %r", test.__name__) - result = test() - except (self.TestFail, TestSuiteException) as e: - test_errors.append((method, e)) - - return test_errors - - def post_run(self): - '''Clean up after test execution.''' - pass - - def assert_true(self, cond): - '''Check a given condition and raise TestFail if it is False. - - Args: - cond: The boolean condition to check. - - Raises: - TestFail: The condition was false. - ''' - if not cond: - raise self.TestFail() - - def assert_lang_renderscript(self): - '''Check that LLDB is stopped in a RenderScript frame - - Use the LLDB API to check that the language of the current frame - is RenderScript, fail otherwise. - - Raises: - TestFail: Detected language not RenderScript. - ''' - assert self._lldb - assert self._ci - - proc = self._ci.GetProcess() - frame = proc.GetSelectedThread().GetSelectedFrame() - lang = frame.GetCompileUnit().GetLanguage() - - if lang != self._lldb.eLanguageTypeExtRenderScript: - raise self.TestFail('Frame language not RenderScript, instead {0}' - .format(lang)) - - def do_command(self, cmd): - '''Run an lldb command and return the output. - - Args: - cmd: The string representing the lldb command to run. - - Raises: - TestFail: The lldb command failed. - ''' - assert self._lldb - assert self._ci - - log = util_log.get_logger() - res = self._lldb.SBCommandReturnObject() - - log.info('[Command] {0}'.format(cmd)) - - # before issuing the command, restart the current timer to check - # whether the command is going to freeze the test - if self._timer: - self._timer.reset() - - self._ci.HandleCommand(cmd, res) - - if not res.Succeeded(): - error = res.GetError() - error = error if error else res.GetOutput() - raise self.TestFail('The command "{0}" failed with the error: {1}' - .format(cmd, error if error else '<N/a>')) - - output = res.GetOutput() or '' - log.debug('[Output] {0}'.format(output.rstrip())) - - return output - - def try_command(self, cmd, expected=None, expected_regex=None): - '''Run an lldb command and match the expected response. - - Args: - cmd: The string representing the lldb command to run. - expected: A list of strings that should be present in lldb's - output. - expected_regex: A list of regular expressions that should - match lldb's output. - - Raises: - TestFail: One of the expected strings were not found in the lldb - output. - - Returns: - str: raw lldb command output. - ''' - assert self._lldb - assert self._ci - log = util_log.get_logger() - output = '' - try: - output = self.do_command(cmd) - - if 'lost connection' in output: - raise DisconnectedException('Lost connection to lldb-server.') - - # check the expected strings - if expected: - self._match_literals(output, expected) - - # check the regexp patterns - if expected_regex: - self._match_regexp_patterns(output, expected_regex) - - except self.TestFail as exception: - # if the command failed, ensure the output retrieved from the - # command is printed even in verbose mode - if log.getEffectiveLevel() > logging.DEBUG: - log.error('[Output] {0}'.format(output.rstrip() if output - else '<empty>')) - - # print the back trace, it should help to identify the error in - # the test - backtrace = ['[Back trace]'] - for (filename, line, function, text) in \ - traceback.extract_stack()[:-1]: - backtrace.append(' [{0} line: {2} fn: {1}] {3}'.format( - filename, function, line, text - ) - ) - log.error('\n'.join(backtrace)) - log.error('[TEST ERROR] {0}'.format(exception.message)) - raise # pass through - - return output - - def _match_literals(self, text, literals): - '''Checks the text against the array of literals. - - Raises a TestFail exception in case one of the literals is not contained - in the text. - - Args: - text: String, it represents the text to match. - literals: an array of string literals to match in the output. - - Throws: self.TestFail: if it cannot match one of the literals in - the output. - ''' - for string in literals: - if string not in text: - raise self.TestFail('Cannot find "{0}" in the output' - .format(string)) - - def _match_regexp_patterns(self, text, patterns): - '''Checks the text against the array of regular expression patterns. - - Raises a TestFail exception in case one of the patterns is not matched - in the given text. - - Args: - text: String, it represents the text to match. - patterns: an array of strings, each of them representing a regular - expression to match in text. - - Throws: self.TestFail: if it cannot match one of the literals in - the output. - ''' - log = util_log.get_logger() - - for regex in patterns: - match = re.search(regex, text) - if not match: - raise self.TestFail('Cannot match the regexp "{0}" in ' - 'the output'.format(regex)) - else: - msg = 'Found match to regex {0}: {1}'.format(regex, - match.group()) - log.debug(msg) - - @staticmethod - def get_tmp_file_path(): - '''Get the path of a temporary file that is then deleted. - - Returns: - A string that is the path to a temporary file. - ''' - file_desc, name = tempfile.mkstemp() - os.close(file_desc) - os.remove(name) - return name - - -class TestBaseNoTargetProcess(TestBase): - '''lldb target that doesn't require a binary to be running.''' - - def get_bundle_target(self): - '''Get bundle executable to run. - - Returns: None - ''' - return None - - @property - def bundle_target(self): - return self.get_bundle_target() - - def run(self, dbg, remote_pid, lldb): - '''Execute the test case. - - Args: - dbg: The instance of the SBDebugger that is used to test commands. - lldb: A handle to the lldb module. - - Returns: - True: test passed, False: test failed. - ''' - self._lldb = lldb - self._dbg = dbg - self._ci = dbg.GetCommandInterpreter() - assert self._ci.IsValid() - return super(TestBaseNoTargetProcess, self).run(self, dbg, remote_pid) diff --git a/tests/lldb/tests/harness/test_base_remote.py b/tests/lldb/tests/harness/test_base_remote.py deleted file mode 100644 index c430edff..00000000 --- a/tests/lldb/tests/harness/test_base_remote.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the base class TestBaseRemote''' - -from __future__ import absolute_import - -import os -import re - -from .test_base import TestBase -from . import util_log - - -class TestBaseRemote(TestBase): - '''Base class for all tests that connect to a remote device. - - Provides common functionality to set up the connection and tear it down. - ''' - - def __init__(self, device_port, device, timer, *args, **kwargs): - super(TestBaseRemote, self).__init__(device_port, device, timer, *args, **kwargs) - # port used by lldb-server on the device. - self._device_port = device_port - self._platform = None - # id of the device that adb will communicate with. - self._device = device - - def set_src_map(self, file_name, new_src_path): - '''Call lldb to set the source mapping of a given file. - - Set lldb's source mapping of a given file to a given path. This can be - used to make the test suite independent of where an APK was compiled. - - Args: - file_name: String, which is the name of the file whose mapping is - to be changed - new_src_path: String which is the new absolute path to the source - file. - ''' - line_table = self.do_command('target modules dump line-table ' - + file_name) - - lines = line_table.split('\n') - if 'Line table for' not in lines[0]: - raise self.TestFail('Could not determine source path of ' - + file_name) - - # Expecting output like: - # (lldb) target modules dump line-table scalars.rs - # Line table for /home/jenkins/workspace/grd-aosp-parameterised-build/ - # merge_151216/frameworks/rs/tests/lldb/java/BranchingFunCalls/src/rs/ - # frameworks/rs/tests/lldb/java/BranchingFunCalls/src/rs/scalars.rs in - # `librs.scalars.so - # 0xb30f2374: /home/jenkins/workspace/grd-aosp-parameterised-build/ - # merge_151216/frameworks/rs/tests/lldb/java/BranchingFunCalls/src/rs/ - # scalars.rs:46 - # ... - # For some reason the first line contains a mangled path? - old_path = re.findall(r"[^ :]+", lines[1])[1] - old_dir = os.path.dirname(old_path) - - self.try_command('settings set target.source-map %s %s' - % (old_dir, new_src_path), ['']) - - def post_run(self): - '''Clean up after execution.''' - if self._platform: - self._platform.DisconnectRemote() - - def _connect_to_platform(self, lldb_module, dbg, remote_pid): - '''Connect to an lldb platform that has been started elsewhere. - - Args: - lldb_module: A handle to the lldb module. - dbg: The instance of the SBDebugger that should connect to the - server. - remote_pid: The integer that is the process id of the binary that - the debugger should attach to. - - Returns: - True if the debugger successfully attached to the server and - process. - ''' - # pylint: disable=too-many-return-statements - remote_pid = str(remote_pid) - - log = util_log.get_logger() - - err1 = dbg.SetCurrentPlatform('remote-android') - if err1.Fail(): - log.fatal(err1.GetCString()) - return False - - self._platform = dbg.GetSelectedPlatform() - if not self._platform: - return False - - connect_string = \ - 'adb://{0}:{1}'.format(self._device, self._device_port) - opts = lldb_module.SBPlatformConnectOptions(connect_string) - - for _ in range(2): - err2 = self._platform.ConnectRemote(opts) - if err2.Fail(): - log.error(err2.GetCString()) - - if 'Connection refused' in err2.GetCString(): - log.warning('Connection to lldb server was refused. ' - 'Trying again.') - else: - # Unknown error. Don't try again. - return False - else: - # Success - break - else: - log.fatal('Not trying again, maximum retries exceeded.') - return False - - target = dbg.CreateTarget(None) - if not target: - return False - - dbg.SetSelectedTarget(target) - listener = lldb_module.SBListener() - err3 = lldb_module.SBError() - process = target.AttachToProcessWithID(listener, int(remote_pid), err3) - if err3.Fail() or not process: - log.fatal(err3.GetCString()) - return False - - return True - - def run(self, dbg, remote_pid, lldb): - '''Execute the actual testsuite. - - Args: - dbg: The instance of the SBDebugger that is used to test commands. - remote_pid: The integer that is the process id of the binary that - the debugger is attached to. - lldb: A handle to the lldb module. - - Returns: list of (test, failure) tuples. - - ''' - assert dbg - assert remote_pid - assert lldb - - self._lldb = lldb - - self.assert_true(self._connect_to_platform(lldb, dbg, remote_pid)) - self._ci = dbg.GetCommandInterpreter() - assert self._ci - - self.assert_true(self._ci.IsValid()) - self.assert_true(self._ci.HasCommands()) - - return super(TestBaseRemote, self).run(dbg, remote_pid, lldb) - diff --git a/tests/lldb/tests/harness/util_android.py b/tests/lldb/tests/harness/util_android.py deleted file mode 100644 index a0cf700b..00000000 --- a/tests/lldb/tests/harness/util_android.py +++ /dev/null @@ -1,736 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the class UtilAndroid, providing utility method to -interface with Android ADB.''' - -from __future__ import absolute_import - -import logging -import re -import subprocess -import time -import collections -import multiprocessing -try: - # Python 3 - import queue -except ImportError: - import Queue as queue - -from .exception import TestSuiteException -from . import util_log - - -class UtilAndroid(object): - '''Provides some utility methods that interface with Android using adb.''' - # pylint: disable=too-many-public-methods - - def __init__(self, adb_path, lldb_server_path_device, device): - # The path to the adb binary on the local machine - self._path_adb = adb_path - # The path to the lldb server binary on the device - self._path_lldbserver = lldb_server_path_device - self._log = util_log.get_logger() - self.device = device - self._prop_stacks = collections.defaultdict(list) - return - - @staticmethod - def _validate_string(string): - '''Check that a string is valid and not empty. - - Args: - string: The string to be checked. - ''' - assert isinstance(string, str) - assert len(string) > 0 - - def adb(self, args, async=False, device=True, timeout=None): - '''Run an adb command (async optional). - - Args: - args: The command (including arguments) to run in adb. - async: Boolean to specify whether adb should run the command - asynchronously. - device: boolean to specify whether the serial id of the android - device should be inserted in the adb command. - timeout: it specifies the number of seconds to wait for - a synchronous invocation before aborting. If unspecified or - None it waits indefinitely for the command to complete. - - Raises: - ValueError: it can be caused by any of the following situations: - - when both the combination async=True and timeout are - given. - - when a timeout <= 0 is specified. - - Returns: - If adb was synchronously run and the command completed by the - specified timeout, a string which is the output (standard out and - error) from adb. Otherwise it returns None. - ''' - - # Form the command - if device: - cmd = '{0} -s {1} {2}'.format(self._path_adb, self.device, args) - else: - cmd = '{0} {1}'.format(self._path_adb, args) - - self._log.debug('Execute ADB: %s', cmd) - - if timeout is None: - # local invocation - return_code, output = UtilAndroid._execute_command_local(cmd, async) - - else: - # remote invocation - if async: - raise ValueError('Invalid combination: asynchronous invocation ' - 'with timeout specified') - - return_code, output = UtilAndroid._execute_command_remote(cmd, - timeout) - - if return_code is None: - self._log.warn('[ADB] The command timed out: %s', cmd) - - # log the output message - if output is not None: - self._adb_log_output(cmd, output, return_code) - - return output - - def adb_retry(self, args, max_num_attempts, timeout): - '''Attempt to execute the given adb command a certain number of times. - - The function executes the given command through adb, waiting for its - completion up to 'timeout' seconds. If the command completes then it - returns its output. Otherwise it aborts the execution of the adb - command and re-issues it anew with the same parameters. In case of - timeout this process is repeated up to 'max_num_attempts'. - - The purpose of this function is to handle the cases when, for some - reason, a command sent to 'adb' freezes, blocking the whole test suite - indefinitely. - - Args: - args: The command (including arguments) to run in adb. - max_num_attempts: the max number of attempts to repeat the command - in case of timeout. - timeout: it specifies the number of seconds to wait for the adb - command to complete. - - Raises: - ValueError: when the parameter timeout is invalid (None or <= 0). - - Returns: - If adb was synchronously run and the command completes by the - specified timeout, a string which is the output (standard out and - error) from adb. Otherwise it returns None. - ''' - if timeout is None or timeout <= 0: - raise ValueError('Invalid value for timeout') - - output = None - - for attempt in range(max_num_attempts): - self._log.debug('[ADB] Attempt #%d: %s', attempt + 1, args) - output = self.adb(args, False, True, timeout) - if output: - break - - return output - - def _adb_log_output(self, cmd, output, return_code): - '''Save in the log the command & output from `adb`. - - Internal function, helper to record in the log the issued adb command - together with its output and return code. - - Params: - cmd: string, the command issued to `adb`. - output: string, the output retrieved from `adb`. - return_code: int, the return code from `adb`. - ''' - - message = output.strip() - - # if return_code != 0, we wish to also record the command executed - # (which occurs if and only if we are in verbose mode) - is_warning = return_code != 0 - threshold = self._log.getEffectiveLevel() - if is_warning and threshold > logging.DEBUG: - self._log.warn("[ADB] Command executed: {0}".format(cmd)) - - level = logging.WARNING if is_warning else logging.DEBUG - if message: - # if message is composed by multiple lines, then print it after - # the log preamble - if re.search('\n', message): - message = '\n' + message - else: - message = '<empty>' - - self._log.log(level, 'RC: {0}, Output: {1}'.format(return_code, - message)) - - def check_adb_alive(self): - '''Ping the device and raise an exception in case of timeout. - - It sends a ping message through 'adb shell'. The emulator/device should - echo the same message back by one minute. If it does not, it raises - a TestSuiteException. - - Purpose of this method is to check whether 'adb' became frozen or - stuck. - - Raises: - TestSuiteException: in case the device/emulator does not reply by - one minute or the `ping' message is not echoed - back. - ''' - token = 'PING' - log = util_log.get_logger() - cmd = "echo {0}".format(token) - - tries = 10 - try_number = tries - while try_number > 0: - log.debug('Sending a ping through "adb shell" (try #%s)...', - try_number) - output = self.shell(cmd, False, 60) - - if output is None: - raise TestSuiteException( - 'Timeout when pinging the device/emulator through ' - '"adb shell". Is "adb" stuck or dead?') - elif token not in output: - log.debug('Ping failed. Cannot match the token "%s" in "adb ' - 'shell %s"', token, cmd) - else: - log.debug('Pong message received') - return - - try_number -= 1 - time.sleep(5) - - raise TestSuiteException('Cannot ping the device/emulator through ' - '"adb shell". Tried %s times. Is "adb" stuck ' - 'or dead?' % tries) - - def shell(self, cmd, async=False, timeout=None): - '''Run a command via the adb shell. - - Args: - cmd: The command (including arguments) to run in the adb shell. - async: Boolean to specify whether adb should run the command - asynchronously. - timeout: it specifies the number of seconds to wait for - a synchronous invocation before aborting. If unspecified or - None it waits indefinitely for the command to complete - - Returns: - If adb was synchronously run, a string which is the output (standard - out and error) from adb. Otherwise None. - ''' - return self.adb('shell "{0}"'.format(cmd), async, True, timeout) - - def find_app_pid(self, process_name): - '''Find the process ID of a process with a given name. - - If more than one instance of the process is running return the first pid - it finds. - - Args: - process_name: A string representing the name of the package or - binary for which the id should be found. I.e. the - string or part of the string that shows up in the "ps" - command. - - Returns: - An integer representing the id of the process, or None if it was not - found. - ''' - self._validate_string(process_name) - - pid_output = self.shell('pidof ' + process_name) - pid_output = re.sub(r'\*.+\*', '', pid_output) - pids = pid_output.split() - - if len(pids) < 1: - self._log.warn('Unable to find pid of: {0}'.format(process_name)) - return None - - if len(pids) > 1: - self._log.warn('Found multiple instances of {0} running: {1}' - .format(process_name, pids)) - - try: - pid = int(pids[0]) - self._log.info('App pid found: {0}'.format(pids[0])) - return pid - except ValueError: - return None - - def adb_root(self): - '''Set adb to be in root mode.''' - self.adb('root') - - def _adb_remount(self): - '''Remount the filesystem of the device.''' - self.adb('remount') - - def validate_adb(self): - '''Validate adb that it can be run. - - Raises: - TestSuiteException: Unable to validate that adb exists and runs - successfully. - ''' - out = self.adb('version', False, False) - if out and 'Android' in out and 'version' in out: - self._log.info('adb found: {0}'.format(out)) - return None - raise TestSuiteException('unable to validate adb') - - def is_booted(self): - ''' Check if the device/emulator has finished booting. - - Returns: True if the property sys.boot_completed is true, False - otherwise. - ''' - return self._get_prop('sys.boot_completed').strip() == '1' - - def validate_device(self, check_boot=True, device_substring=''): - '''Validate that there is at least one device. - - Args: - check_boot: Boolean to specify whether to check whether the device - has finished booting as well as being present. - device_substring: String that needs to be part of the name of the - device. - - Raises: - TestSuiteException: There was a failure to run adb to list the - devices or there is no device connected or - multiple devices connected without the user - having specified the device to use. - ''' - - out = self.adb('devices', False, False) - if not 'List of devices attached' in out: - raise TestSuiteException('Unable to list devices') - - lines = out.split('\n') - found_device = False # True if the specified device is found - devices = [] - - for line in lines[1:]: - if '\tdevice' in line and device_substring in line: - device = line.split()[0] - devices.append(device) - if self.device: - if self.device == device: - found_device = True - - if len(devices) == 0: - raise TestSuiteException('adb is unable to find a connected ' - 'device/emulator to test.') - - if not self.device: - if len(devices) == 1: - self.device = devices[0] - else: - raise TestSuiteException('Multiple devices connected,' - 'specify -d device id.') - else: - if not found_device: - raise TestSuiteException('Couldn\'t find the device {0} that ' - 'was specified, please check -d ' - 'argument'.format(self.device)) - - if check_boot and not self.is_booted(): - raise TestSuiteException( - 'The device {0} has not yet finished booting.' - .format(self.device)) - - def device_with_substring_exists(self, device_substring): - '''Check whether a device exists whose name contains a given string. - - Args: - device_substring: String that is part of the name of the device to - look for. - - Raises: - TestSuiteException: There was a failure to run adb to list the - devices. - ''' - out = self.adb('devices', False, False) - if not 'List of devices attached' in out: - raise TestSuiteException('Unable to list devices') - - lines = out.split('\n') - - for line in lines[1:]: - if '\tdevice' in line: - device = line.split()[0] - if device.find(device_substring) != -1: - return True - - return False - - def get_device_id(self): - '''Return ID of the device that will be used for running the tests on. - - Returns: - String representing device ID. - ''' - return self.device - - def _kill_pid(self, pid): - '''Kill a process identified by its pid by issuing a "kill" command. - - Args: - pid: The integer that is the process id of the process to be killed. - ''' - self.shell('kill -9 ' + str(pid)) - - def stop_app(self, package_name): - '''Terminate an app by calling am force-stop. - - Args: - package_name: The string representing the name of the package of the - app that is to be stopped. - ''' - self._validate_string(package_name) - self.shell('am force-stop ' + package_name) - - def kill_process(self, name): - '''Kill a process identified by its name (package name in case of apk). - - Issues the "kill" command. - - Args: - name: The string representing the name of the binary of the process - that is to be killed. - - Returns: - True if the kill command was executed, False if it could not be - found. - ''' - pid = self.find_app_pid(name) - if pid: - self._kill_pid(pid) - return True - return False - - def kill_all_processes(self, name): - '''Repeatedly try to call "kill" on a process to ensure it is gone. - - If the process is still there after 5 attempts reboot the device. - - Args: - name: The string representing the name of the binary of the process - that is to be killed. - - Raises: - TestSuiteException: If the process could not be killed after 5 - attempts and the device then failed to boot - after rebooting. - ''' - - # try 5 times to kill this process - for _ in range(1, 5): - if not self.kill_process(name): - return - # stalled process must reboot - self._reboot_device() - - def kill_servers(self): - '''Kill all gdbserver and lldb-server instances. - - Raises: - TestSuiteException: If gdbserver or lldb-server could not be killed - after 5 attempts and the device then failed to - boot after rebooting. - ''' - self.kill_all_processes('gdbserver') - self.kill_all_processes('lldb-server') - - def launch_elf(self, binary_name): - '''Launch a binary (compiled with the NDK). - - Args: - binary_name: The string representing the name of the binary that is - to be launched. - - Returns: - Boolean, failure if the app is not installed, success otherwise. - ''' - # Ensure the apk is actually installed. - output = self.shell('ls /data/ | grep ' + binary_name) - if binary_name not in output: - return False - - stdout = self.shell('exec /data/' + binary_name, True) - self._log.info(str(stdout)) - - return True - - def wait_for_device(self): - '''Ask ADB to wait for a device to become ready.''' - self.adb('wait-for-device') - - def _reboot_device(self): - '''Reboot the remote device. - - Raises: - TestSuiteException: If the device failed to boot after rebooting. - ''' - self.adb('reboot') - self.wait_for_device() - # Allow 20 mins boot time to give emulators such as MIPS enough time - sleeping_countdown = 60*20 - while not self.is_booted(): - time.sleep(1) - sleeping_countdown -= 1 - if sleeping_countdown == 0: - raise TestSuiteException('Failed to reboot. Terminating.') - - self.adb_root() - self.wait_for_device() - self._adb_remount() - self.wait_for_device() - - def launch_app(self, name, activity): - '''Launch a Renderscript application. - - Args: - name: The string representing the name of the app that is to be - launched. - activity: The string representing the activity of the app that is to - be started. - - Returns: - Boolean, failure if the apk is not installed, success otherwise. - ''' - assert name and activity - - # Ensure the apk is actually installed. - output = self.shell('pm list packages ' + name) - if not output: - return False - - cmd = 'am start -S -W {0}/{0}.{1}'.format(name, activity) - stdout = self.shell(cmd) - - self._log.info(str(stdout)) - - return True - - def launch_lldb_platform(self, port): - '''Launch lldb server and attach to target app. - - Args: - port: The integer that is the port on which lldb should listen. - ''' - cmd = "export LLDB_DEBUGSERVER_PATH='{0}';{0} p --listen *:{1}"\ - .format(self._path_lldbserver, port) - self.shell(cmd, True) - time.sleep(5) - - def forward_port(self, local, remote): - '''Use adb to forward a device port onto the local machine. - - Args: - local: The integer that is the local port to forward. - remote: The integer that is the remote port to which to forward. - ''' - cmd = 'forward tcp:%s tcp:%s' % (str(local), str(remote)) - self.adb(cmd) - - def remove_port_forwarding(self): - '''Remove all of the forward socket connections open in adb. - - Avoids a windows adb error where we can't bind to a listener - because too many files are open. - ''' - self.adb('forward --remove-all') - - def _get_prop(self, name): - '''Get the value of an Android system property. - - Args: - name: Name of the property of interest [string]. - - Returns: - Current value of the property [string]. - ''' - return self.shell('getprop %s' % str(name)) - - def _set_prop(self, name, value): - '''Set the value of an Android system property. - - Args: - name: Name of the property of interest [string]. - value: Desired new value for the property [string or integer]. - ''' - self.shell("setprop %s '%s'" % (str(name), str(value))) - - def push_prop(self, name, new_value): - '''Save the value of an Android system property and set a new value. - - Saves the old value onto a stack so it can be restored later. - - Args: - name: Name of the property of interest [string]. - new_value: Desired new value for the property [string or integer]. - ''' - old_value = self._get_prop(name) - self._set_prop(name, new_value) - self._prop_stacks[name].append(old_value.strip()) - - def pop_prop(self, name): - '''Restore the value of an Android system property previously set by - push_prop. - - Args: - name: Name of the property of interest [string]. - - Returns: - Current value of the property [string]. - ''' - old_value = self._prop_stacks[name].pop() - self._set_prop(name, old_value) - - def reset_all_props(self): - '''Restore all the android properties to the state before the first push - - This is equivalent to popping each property the number of times it has - been pushed. - ''' - for name in self._prop_stacks: - if self._prop_stacks[name] != []: - self._set_prop(name, self._prop_stacks[name][0]) - self._prop_stacks[name] = [] - - def make_device_writeable(self): - ''' Ensure the device is full writable, in particular the system folder. - - This disables verity and remounts. - ''' - output = self.adb('disable-verity') - - # if the remote is an emulator do not even try to reboot - # otherwise check whether a reboot is advised - if (self._get_prop('ro.boot.qemu') != '1' and output and - 'Now reboot your device for settings to take effect' in output): - self._reboot_device() - - self._adb_remount() - self.wait_for_device() - self.adb_root() - self.wait_for_device() - - @staticmethod - def _execute_command_local(command, async=False): - '''Execute the given shell command in the same process. - - Args: - command: String, the command to execute - async: Boolean to specify whether adb should run the command - asynchronously. - - Returns: - if async == False, it returns a tuple with the return code and - the output from the executed command. Otherwise the tuple - (None, None). - ''' - proc = subprocess.Popen(command, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - shell=True) - if async: - return None, None - - # read the whole output from the command - with proc.stdout as file_proc: - output = ''.join(line for line in file_proc) - - # release the process state - proc.terminate() - return_code = proc.wait() - - return return_code, output - - @staticmethod - def _execute_command_remote(command, timeout): - '''Execute the given shell command remotely, in a separate process. - - It spawns an ad hoc process to execute the given command. It waits up - to timeout for the command to complete, otherwise it aborts the - execution and returns None. - - Args: - command: String, the command to execute. - timeout: the number of seconds to wait for the command to complete. - - Returns: - a pair with the return code and the output from the command, if it - completed by the specified 'timeout' seconds. Otherwise the tuple - (None, None). - ''' - - channel = multiprocessing.Queue() - proc = multiprocessing.Process( - target=_handle_remote_request, - name="Executor of `{0}'".format(command), - args=(command, channel) - ) - - # execute the command - proc.start() - return_code = None - output = None - - # wait for the result - try: - return_code, output = channel.get(True, timeout) - except queue.Empty: - # timeout hit, the remote process has not fulfilled our request by - # the given time. We are going to return <None, None>, nothing to - # do here as it already holds return_code = output = None. - pass - - # terminate the helper process - proc.terminate() - - return return_code, output - - -def _handle_remote_request(command, channel): - '''Entry point for the remote process. - - It executes the given command and reports the result into the channel. - This function is supposed to be only called by - UtilAndroid._execute_command_remote to handle the inter-process - communication. - - Args: - command: the command to execute. - channel: the channel to communicate with the caller process. - ''' - channel.put(UtilAndroid._execute_command_local(command)) - diff --git a/tests/lldb/tests/harness/util_bundle.py b/tests/lldb/tests/harness/util_bundle.py deleted file mode 100644 index 68954cb2..00000000 --- a/tests/lldb/tests/harness/util_bundle.py +++ /dev/null @@ -1,369 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the class UtilBundle, representing a collection of RS -binaries.''' - -from __future__ import absolute_import - -import os -import time -from . import util_constants -from . import util_log -from .exception import TestSuiteException - - -class UtilBundle(object): - '''Represents the collection of RS binaries that are debugged.''' - - # Map of binary name to package name of all Java apps debugged - _tests_apk = { - 'JavaInfiniteLoop': 'com.android.rs.infiniteloop', - 'JavaDebugWaitAttach': 'com.android.rs.waitattachdebug', - 'JavaNoDebugWaitAttach': 'com.android.rs.waitattachnodebug', - 'BranchingFunCalls': 'com.android.rs.branchingfuncalls', - 'KernelVariables': 'com.android.rs.kernelvariables', - 'Allocations': 'com.android.rs.allocations', - 'MultipleRSFiles': 'com.android.rs.multiplersfiles', - 'SingleSource': 'com.android.rs.singlesource', - 'ScriptGroup': 'com.android.rs.scriptgroup', - 'Reduction': 'com.android.rs.lldbreductiontest', - } - - _tests_jni = { - 'JNIInfiniteLoop': 'com.android.rs.jniinfiniteloop', - 'JNIDebugWaitAttach': 'com.android.rs.jnidebugwaitattach', - 'JNINoDebugWaitAttach': 'com.android.rs.jninodebugwaitattach', - 'JNIBranchingFunCalls': 'com.android.rs.jnibranchingfuncalls', - 'JNIKernelVariables': 'com.android.rs.jnikernelvariables', - 'JNIAllocations': 'com.android.rs.jniallocations', - 'JNIMultipleRSFiles': 'com.android.rs.jnimultiplersfiles' - } - - _tests_ndk = {'CppInfiniteLoop', 'CppNoDebugWaitAttach', - 'CppDebugWaitAttach', 'CppBranchingFunCalls', - 'CppKernelVariables', 'CppAllocations', 'CppMultipleRSFiles'} - - _missing_path_msg = ( - 'No product path has been provided. If using `lunch` ensure ' - 'the `ANDROID_PRODUCT_OUT` environment variable has been set correctly. ' - 'Alternatively, include it in the config file or specify it explicitly ' - 'on the command line (`--aosp-product-path`)' - ) - - def __init__(self, android, aosp_product_path): - assert android - self._android = android # Link to the android module - self._aosp_product_path = aosp_product_path - self._log = util_log.get_logger() - - def is_apk(self, name): - '''Checks if a binary of a given name is an apk. - - Checks whether the name of the apk is in the dictionary of apks. - - Args: - name: The string that is the name of the binary to check. - - Returns: - True if the binary is an apk, False if it is not. - - Raises: - TestSuiteException: The string does not match any item in the list - of APK or NDK binaries. - ''' - if name in self._tests_apk: - return True - if name not in self._tests_ndk and name not in self._tests_jni: - raise TestSuiteException('test not apk or ndk') - return False - - def uninstall_all(self): - '''Uninstall/Delete all the testsuite's apks and binaries on the device. - - Raises: - TestSuiteException: One or more apks could not be uninstalled. - ''' - self.uninstall_all_apk() - self._delete_all_ndk() - self._uninstall_all_jni() - - def uninstall_all_apk(self): - '''Uninstall all apks used by the test suite from the device. - - Raises: - TestSuiteException: An apk could not be uninstalled. - ''' - max_num_attempts = 3 - timeout = 180 - - for app, package in self._tests_apk.items(): - self._log.info('Uninstalling the application: %s', app) - output = self._android.adb_retry('uninstall ' + package, - max_num_attempts, timeout) - - if output is None: - raise TestSuiteException('Repeated timeouts when uninstalling ' - 'the application: ' + app) - elif 'Success' not in output: - outmsg = '\n' + output.rstrip() if output else '<empty>' - self._log.error('Cannot match the string "Success" in the ' - 'output: %s', outmsg) - raise TestSuiteException('Unable to uninstall app ' + app) - else: - self._log.debug('Application uninstalled: %r', app) - - if 'Success' not in output: - self._log.warning('unable to uninstall app ' + app) - - def _uninstall_all_jni(self): - '''Uninstall all apks used by the test suite from the device. - - Raises: - TestSuiteException: An apk could not be uninstalled. - ''' - for app, package in self._tests_jni.items(): - output = self._android.adb('uninstall ' + package) - - if 'Success' not in output: - raise TestSuiteException('unable to uninstall app ' + app) - - def _delete_all_ndk(self): - '''Delete all ndk binaries that were pushed to the device. - - Raises: - TestSuiteException: A binary could not be deleted from the device. - ''' - for app in self._tests_ndk: - output = self._android.shell('rm /data/' + app) - if 'No such file or directory' in output: - self._log.warning('unable to uninstall app ' + app) - - - def push_all(self): - '''Push all apk and ndk binaries required by the testsuite to the device - - Raises: - TestSuiteException: One or more apks could not be installed or - previously running processes thereof could not - be killed. - ''' - self._push_all_java() - self._push_all_ndk() - self._push_all_jni() - - def _install_apk(self, app, package): - '''Push an apk files to the device. - - This involves uninstalling any old installation and installing again. - - Args: - app: A string that is the name of the apk. - package: A string that is the name of the package of the apk. - - Raises: - TestSuiteException: The apk could not be installed. - ''' - self._log.info('pushing {0}'.format(app)) - - self._android.stop_app(package) - - self._android.adb('uninstall ' + package) - # Ignore the output of uninstall. - # The app may not have been installed in the first place. That's ok. - - flags = '' - - product_folder = self._aosp_product_path - if not product_folder: - raise TestSuiteException(self._missing_path_msg) - - app_folder = os.path.join(product_folder, 'data/app') - - cmd = 'install {0} {1}/{2}/{2}.apk'.format(flags, app_folder, app) - output = self._android.adb(cmd, False, True, - util_constants.PUSH_TIMEOUT) - if ('Success' not in output) or ("can't find" in output): - raise TestSuiteException('unable to install app {}: {}'.format( - app, output)) - - def _push_all_java(self): - '''Push all apk files to the device. - - This involves uninstalling any old installations and installing again. - - Raises: - TestSuiteException: An apk could not be installed. - ''' - for app, package in self._tests_apk.items(): - self._install_apk(app, package) - - def _push_all_ndk(self): - '''Push all ndk binaries to the device. - - Raises: - TestSuiteException: A binary could not be pushed to the device or - a previous process could not be killed. - ''' - product_folder = self._aosp_product_path - if not product_folder: - raise TestSuiteException(self._missing_path_msg) - - bin_folder = os.path.join(product_folder, 'system/bin') - - for app in self._tests_ndk: - self._log.info('pushing {0}'.format(app)) - - self._android.kill_all_processes(app) - - cmd = 'push %s/%s /data' % (bin_folder, app) - output = self._android.adb(cmd, False, True, - util_constants.PUSH_TIMEOUT) - if ('failed to copy' in output or - 'No such file or directory' in output): - raise TestSuiteException('unable to push binary ' + app) - - # be sure to set the execute bit for NDK binaries - self._android.shell('chmod 777 /data/{0}'.format(app)) - - def _push_all_jni(self): - '''Push all JNI apk files to the device. - - This involves uninstalling any old installations and installing again. - - Raises: - TestSuiteException: An apk could not be installed. - ''' - product_folder = self._aosp_product_path - if not product_folder: - raise TestSuiteException(self._missing_path_msg) - - app_folder = os.path.join(product_folder, 'system/lib') - - # Ensure the system/lib directory is writable - self._android.make_device_writeable() - - for app, package in self._tests_jni.items(): - self._install_apk(app, package) - - def delete_ndk_cache(self): - '''Deletes NDK cached scripts from the device. - - The NDK caches compiled scripts as shared libraries in - the folder specified when calling `rs->init()`. - - For all out tests this is set to '/data/rscache'. - ''' - self._android.shell('rm -r /data/rscache') - - def get_package(self, app_name): - '''From a given apk name get the name of its package. - - Args: - app_name: The string that is the name of the apk. - - Returns: - A string representing the name of the package of the app. - - Raises: - TestSuiteException: The app name is not in the list of apks. - ''' - if app_name in self._tests_apk: - return self._tests_apk[app_name] - elif app_name in self._tests_jni: - return self._tests_jni[app_name] - else: - msg = ('unknown app %s. (Do you need to add an ' - 'entry to bundle.py :: test_apps_?)' % app_name) - raise TestSuiteException(msg) - return self._tests_apk[app_name] - - def launch(self, app_name): - '''Launch an apk/ndk app on a remote device. - - Args: - app_name: The string that is the name of the APK or NDK executable. - - Returns: - The Process ID of the launched executable, otherwise None - - Raises: - TestSuiteException: Previous processes of this apk could not be - killed. - ''' - process_name = '' - success = False - if app_name in self._tests_apk: - process_name = self._tests_apk[app_name] - - self._android.kill_all_processes(process_name) - - success = self._android.launch_app(process_name, 'MainActivity') - elif app_name in self._tests_ndk: - process_name = app_name - self._android.kill_all_processes(process_name) - success = self._android.launch_elf(process_name) - elif app_name in self._tests_jni: - package = self._tests_jni[app_name] - - self._android.kill_process(package) - - success = self._android.launch_app(package, 'MainActivity') - if not success: - self._log.log_and_print(app_name + - ' is not installed. Try removing the --no-install option?') - return None - - return self._android.find_app_pid(package) - else: - self._log.error('Executable {0} neither Java nor NDK.' - .format(app_name)) - - self._log.fatal('Failed to launch test executable {0}' - .format(app_name)) - return None - - if not success: - self._log.log_and_print(app_name + - ' is not installed. Try removing the --no-install option?') - return None - - return self._android.find_app_pid(process_name) - - def check_apps_installed(self, java_only): - ''' Check whether all Java/JNI/NDK apps are installed on the device. - - Args: - java_only: Boolean to specify whether only the Java apks should be - checked (in case of --wimpy mode for example). - - Raises: - TestSuiteException: Not all apps are installed. - ''' - java_and_jni_apks = self._tests_apk.copy() - - if not java_only: - java_and_jni_apks.update(self._tests_jni) - - installed = self._android.shell('pm list packages -f') - - for app, package in java_and_jni_apks.items(): - if package not in installed: - raise TestSuiteException('apk %s is not installed.' % app) - - if not java_only: - ls_data = self._android.shell('ls /data') - for app in self._tests_ndk: - if app not in ls_data: - raise TestSuiteException('app %s is not installed.' % app) diff --git a/tests/lldb/tests/harness/util_constants.py b/tests/lldb/tests/harness/util_constants.py deleted file mode 100644 index 9c7b18c4..00000000 --- a/tests/lldb/tests/harness/util_constants.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''This file contains constants shared between the test suite runner and the -individual test runner.''' - -from __future__ import absolute_import - -RC_TEST_OK = 0 -RC_TEST_TIMEOUT = 64 -RC_TEST_FAIL = 65 -RC_TEST_FATAL = 66 -RC_TEST_IGNORED = 67 -PUSH_TIMEOUT = 60*5 - diff --git a/tests/lldb/tests/harness/util_functions.py b/tests/lldb/tests/harness/util_functions.py deleted file mode 100644 index 32dca1c4..00000000 --- a/tests/lldb/tests/harness/util_functions.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''This file contains utility functions used by both the test suite and the -single test executor.''' - -from __future__ import absolute_import - -import os -import importlib -import sys - - -def load_py_module(path): - '''Load a python file from disk. - - Args: - path: String path to python file. - - Returns: - python module if success, None otherwise. - ''' - assert isinstance(path, str) - try: - if not os.path.exists(path): - print('Path does not exist: ' + path) - return None - path = os.path.abspath(path) - module_dir, module_file = os.path.split(path) - module_name, _ = os.path.splitext(module_file) - # adjust sys.path, runtime counterpart of PYTHONPATH, to temporarily - # include the folder containing the user configuration module - sys.path.append(module_dir) - module_obj = importlib.import_module(module_name) - sys.path.pop(0) - return module_obj - except ImportError as err: - print(str(err)) - print("Looking in directory ") - print(module_dir) - return None diff --git a/tests/lldb/tests/harness/util_lldb.py b/tests/lldb/tests/harness/util_lldb.py deleted file mode 100644 index 2d1adcac..00000000 --- a/tests/lldb/tests/harness/util_lldb.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the class UtilLLDB, which provides lldb utility -methods.''' - -from __future__ import absolute_import - -from . import util_constants - -try: - import lldb -except ImportError: - print('unable to import lldb') - print('please run "lldb -P" and add to $PYTHONPATH') - quit(util_constants.RC_TEST_FATAL) - - -class UtilLLDB(object): - '''Provides utility methods to interface with lldb's python bindings.''' - - @staticmethod - def start(): - '''Initialise the lldb debugger framework.''' - lldb.SBDebugger_Initialize() - - @staticmethod - def stop(): - '''Terminate the lldb debugger framework. - - Raises: - AssertionError: If an assertion fails. - ''' - assert lldb - lldb.SBDebugger_Terminate() - - @staticmethod - def create_debugger(): - '''Create an lldb debugger instance. - - Returns: - The SBDebugger instance that was created. - - Raises: - AssertionError: If an assertion fails. - ''' - assert lldb - inst = lldb.SBDebugger_Create() - inst.SetAsync(False) - return inst - - @staticmethod - def destroy_debugger(dbg): - '''Destroy the lldb debugger instance. - - Args: - dbg: Instance of SBDebugger that is to be destroyed. - - Raises: - AssertionError: If an assertion fails. - ''' - assert lldb - lldb.SBDebugger_Destroy(dbg) - - @staticmethod - def get_module(): - '''Get the lldb module. - - Returns: - The lldb module. - - Raises: - AssertionError: If an assertion fails. - ''' - assert lldb - return lldb diff --git a/tests/lldb/tests/harness/util_log.py b/tests/lldb/tests/harness/util_log.py deleted file mode 100644 index fec07036..00000000 --- a/tests/lldb/tests/harness/util_log.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Initialise the Python logging facility for the test suite. - -from __future__ import absolute_import - -It provides the function to initialise the logging facility and retrieve an -instance of the logger class. It also contains the definition of the internal -logger class. -''' -from __future__ import print_function - -import io -import sys -import logging - - -INITIALISED = False -NAMESPACE = 'RS_LLDB_TESTSUITE' - -def initialise(identifier, level=logging.INFO, print_to_stdout=False, - file_path=None, file_mode='a'): - '''Initialise the logging facility for the test suite. - - This function should be invoked only once, at the start of the program, and - before emitting any log. - - Args: - identifier: String, a label that will be part of each record. It is - usually the test case name. - level: Integer, all messages above this log level will be discarded. - Valid values are those recognised by the python logging module: - https://docs.python.org/2/library/logging.html#levels . - print_to_stdout: Boolean, whether the logs should be redirected to - sys.stdout (true) or stored into a text file (false). - file_path: String, path to the text file in which to store the logs. - This option is only meaningful when print_to_stdout = False. - file_mode: String, the mode to open the text file. Valid modes are - those recognised by the standard Python `open' function. - This option is only meaningful when print_to_stdout = False. - - Raises: - RuntimeError: If the logging has already been initialised - ValueError: If the argument "file_path" has not been provided when - print_to_stdout=False - ''' - # pylint: disable=global-statement - global INITIALISED - if INITIALISED: - raise RuntimeError('Already initialised') - - # set the logging class - old_logger_class = logging.getLoggerClass() - logging.setLoggerClass(RsLogger) - - # initialise the Logger - log = logging.getLogger(NAMESPACE) - log.setLevel(level) # reject all logs below - - # don't propagate the log records to the logging root - log.propagate = False - - # restore the previous class - logging.setLoggerClass(old_logger_class) - - # handler - if print_to_stdout: - handler_default = logging.StreamHandler(sys.stdout) - else: - if file_path is None: - raise ValueError('Missing mandatory argument "file_path"') - - handler_default = logging.FileHandler(file_path, file_mode) - - # Do not filter records in the handler because of the level - handler_default.setLevel(logging.NOTSET) - - # format the message - handler_default.setFormatter( - logging.Formatter( - '%(asctime)s [{0}] [%(levelname)s] %(message)s' - .format(identifier) - )) - - log.addHandler(handler_default) - - INITIALISED = True - - -class RsLogger(logging.getLoggerClass()): - '''Internal logging class. - - This is an internal class to enhance the logging facility with the methods - "log_and_print" and "seek_to_end". - ''' - # pylint: disable=too-many-public-methods - - def log_and_print(self, msg, level=logging.INFO): - '''Print "msg" to stdout and emit a log record. - - Args: - msg: The message to emit. - level: The level to use. By default it is logging.INFO. - ''' - print(msg) - self.log(level, msg) - - def seek_to_end(self): - '''Reset the cursor position to the end for all handlers that are - Text File managers.''' - for hndlr in self.handlers: - if isinstance(hndlr, logging.FileHandler): - hndlr.stream.seek(0, io.SEEK_END) - - -def get_logger(): - '''Retrieves the Logger instance related to the testsuite. - - Throws: - RuntimeError: If the logging facility has not been initialised with - "initialise" beforehand. - - Returns: - An instance of logging.Logger to write the logs. - ''' - if not INITIALISED: - raise RuntimeError('Logging facility not initialised') - - return logging.getLogger(NAMESPACE) diff --git a/tests/lldb/tests/harness/util_timer.py b/tests/lldb/tests/harness/util_timer.py deleted file mode 100644 index b83a76f1..00000000 --- a/tests/lldb/tests/harness/util_timer.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Timer utility''' - -from __future__ import absolute_import - -import threading - - -class Timer(object): - '''A Timer utility to execute a callback after a certain interval.''' - - def __init__(self, interval, callback): - '''Initialise the Timer without starting it. - - Args: - interval: int or float, interval in seconds to count, before - invoking the callback - callback: function, it handles the function to call once - the timeout expires. - ''' - - # validate input parameters - if not isinstance(interval, (int, float)): - raise TypeError('Argument "interval" is not a number: ' - '{0}'.format(type(interval))) - if not callable(callback): - raise TypeError('Argument "callback" is not a function: ' - '{0}'.format(type(callback))) - - self._timer = None - self._callback = callback - self._interval = interval - - def _is_running(self): - '''Checks whether the timer is executing. - - Returns: - boolean, true if the timer is currently running, false otherwise - ''' - return self._timer is not None - - def start(self): - '''Starts the timer. - - Returns: - self, the Timer instance - - Throws: - RuntimeError: if the timer is already running - ''' - if self._is_running(): - raise RuntimeError('Timer already running') - - self._timer = threading.Timer(self._interval, self._callback) - self._timer.start() - return self # so that we can perform Timer(...).start() - - def stop(self): - '''Stops the timer if it's executing. - - Returns: - self, the Timer instance - ''' - - if self._is_running(): - self._timer.cancel() - self._timer = None - return self - - def reset(self): - '''Restart the timer. - - Returns: - self, the Timer instance - ''' - - self.stop() - self.start() - return self diff --git a/tests/lldb/tests/harness/util_warnings.py b/tests/lldb/tests/harness/util_warnings.py deleted file mode 100644 index dd527404..00000000 --- a/tests/lldb/tests/harness/util_warnings.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Redirect the Python warnings into the log.''' - -from __future__ import absolute_import - -import warnings - -from . import util_log - -_OLD_WARNINGS_HANDLER = None - - -def redirect_warnings(): - '''Redirect all warnings issued by warnings::warn to the log. - - By default all python warnings are printed into sys.stderr. This method - will force to redirect them into the test suite logger. - ''' - - # pylint: disable=global-statement - global _OLD_WARNINGS_HANDLER - - # Already redirecting? - if _OLD_WARNINGS_HANDLER: - return None - - _OLD_WARNINGS_HANDLER = warnings.showwarning - - log = util_log.get_logger() - - def _redirect_warnings_to_log(*args): - '''Redirect the warnings to the Logger.''' - log.warn(warnings.formatwarning(*args).rstrip()) - - warnings.showwarning = _redirect_warnings_to_log - - -def restore_warnings(): - '''Restore the reporting of warnings::warn as before.''' - - # pylint: disable=global-statement - global _OLD_WARNINGS_HANDLER - - if _OLD_WARNINGS_HANDLER: - warnings.showwarning = _OLD_WARNINGS_HANDLER - _OLD_WARNINGS_HANDLER = None - diff --git a/tests/lldb/tests/run_test.py b/tests/lldb/tests/run_test.py deleted file mode 100644 index 50a0530b..00000000 --- a/tests/lldb/tests/run_test.py +++ /dev/null @@ -1,422 +0,0 @@ -#!/usr/bin/env python - -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''This script will run one specific test.''' -from __future__ import print_function, absolute_import - -import os -import sys -import atexit -import inspect -import logging -import argparse -import warnings - -import harness -from harness import util_constants -from harness import util_log -from harness import util_warnings -from harness.util_functions import load_py_module -from harness.util_lldb import UtilLLDB -from harness.exception import DisconnectedException -from harness.exception import TestSuiteException, TestIgnoredException -from harness.util_timer import Timer - - -class TestState(object): - '''Simple mutable mapping (like namedtuple)''' - def __init__(self, **kwargs): - for key, val in kwargs.items(): - setattr(self, key, val) - - -def _test_pre_run(state): - '''This function is called before a test is executed (setup). - - Args: - state: Test suite state collection, instance of TestState. - - Returns: - True if the pre_run step completed without error. Currently the pre-run - will launch the target test binary on the device and attach an - lldb-server to it in platform mode. - - Raises: - AssertionError: If an assertion fails. - TestSuiteException: Previous processes of this apk required for this - test could not be killed. - ''' - assert state.test - assert state.bundle - - log = util_log.get_logger() - log.info('running: {0}'.format(state.name)) - - # Remove any cached NDK scripts between tests - state.bundle.delete_ndk_cache() - - # query our test case for the remote target app it needs - # First try the legacy behaviour - try: - target_name = state.test.get_bundle_target() - warnings.warn("get_bundle_target() is deprecated and will be removed soon" - " - use the `bundle_target` dictionary attribute instead") - except AttributeError: - try: - target_name = state.test.bundle_target[state.bundle_type] - except KeyError: - raise TestIgnoredException() - - if target_name is None: - # test case doesn't require a remote process to debug - return True - else: - # find the pid of our remote test process - state.pid = state.bundle.launch(target_name) - if not state.pid: - log.error('unable to get pid of target') - return False - state.android.kill_servers() - # spawn lldb platform on the target device - state.android.launch_lldb_platform(state.device_port) - return True - - -def _test_post_run(state): - '''This function is called after a test is executed (cleanup). - - Args: - state: Test suite state collection, instance of TestState. - - Raises: - AssertionError: If an assertion fails. - ''' - assert state.test - assert state.bundle - - try: - target_name = state.test.get_bundle_target() - warnings.warn("get_bundle_target() is deprecated and will be removed soon" - " - use the `bundle_target` dictionary attribute instead") - except AttributeError: - try: - target_name = state.test.bundle_target[state.bundle_type] - except KeyError: - raise TestIgnoredException() - - - if target_name: - if state.bundle.is_apk(target_name): - state.android.stop_app(state.bundle.get_package(target_name)) - else: - state.android.kill_process(target_name) - - -def _test_run(state): - '''Execute a single test suite. - - Args: - state: test suite state collection, instance of TestState. - - Returns: - True: if the test case ran successfully and passed. - False: if the test case failed or suffered an error. - - Raises: - AssertionError: If an assertion fails. - ''' - assert state.lldb - assert state.lldb_module - assert state.test - - test_failures = state.test.run(state.lldb, state.pid, state.lldb_module) - - if test_failures: - log = util_log.get_logger() - for test, err in test_failures: - log.error('test %s:%s failed: %r' % (state.name, test, err)) - - return False - - return True - - -def _initialise_timer(android, interval): - '''Start a 'timeout' timer, to catch stalled execution. - - This function will start a timer that will act as a timeout killing this - test session if a test becomes un-responsive. - - Args: - android: current instance of harness.UtilAndroid - interval: the interval for the timeout, in seconds - - Returns: - The instance of the Timer class that was created. - ''' - - def on_timeout(): - '''This is a callback function that will fire if a test takes longer - then a threshold time to complete.''' - # Clean up the android properties - android.reset_all_props() - # pylint: disable=protected-access - sys.stdout.flush() - # hard exit to force kill all threads that may block our exit - os._exit(util_constants.RC_TEST_TIMEOUT) - - timer = Timer(interval, on_timeout) - timer.start() - atexit.register(Timer.stop, timer) - return timer - - -def _quit_test(num, timer): - '''This function will exit making sure the timeout thread is killed. - - Args: - num: An integer specifying the exit status, 0 meaning "successful - termination". - timer: The current Timer instance. - ''' - if timer: - timer.stop() - sys.stdout.flush() - sys.exit(num) - - -def _execute_test(state): - '''Execute a test suite. - - Args: - state: The current TestState object. - ''' - log = util_log.get_logger() - - state.test.setup(state.android) - try: - if not _test_pre_run(state): - raise TestSuiteException('test_pre_run() failed') - if not _test_run(state): - raise TestSuiteException('test_run() failed') - _test_post_run(state) - log.info('Test passed') - - finally: - state.test.post_run() - state.test.teardown(state.android) - - -def _get_test_case_class(module): - '''Inspect a test case module and return the test case class. - - Args: - module: A loaded test case module. - ''' - # We consider only subclasses of TestCase that have `test_` methods` - log = util_log.get_logger() - log.debug("loading test suites from %r", module) - for name, klass in inspect.getmembers(module, inspect.isclass): - for attr in dir(klass): - if attr.startswith('test_'): - log.info("Found test class %r", name) - return klass - else: - log.debug("class %r has no test_ methods", name) - return None - - -def get_test_dir(test_name): - ''' Get the directory that contains a test with a given name. - - Returns: - A string that is the directory containing the test. - - Raises: - TestSuiteException: If a test with this name does not exist. - ''' - tests_dir = os.path.dirname(os.path.realpath(__file__)) - for sub_dir in os.listdir(tests_dir): - current_test_dir = os.path.join(tests_dir, sub_dir) - if (os.path.isdir(current_test_dir) and - test_name in os.listdir(current_test_dir)): - return current_test_dir - - raise TestSuiteException( - 'unable to find test: {0}'.format(test_name)) - - -def main(): - '''Test runner entry point.''' - - # re-open stdout with no buffering - sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) - - android = None - timer = None - log = None - - # parse the command line (positional arguments only) - truthy = lambda x: x.lower() in ('true', '1') - parser = argparse.ArgumentParser("Run a single RenderScript TestSuite against lldb") - for name, formatter in ( - ('test_name', str), - ('log_file_path', str), - ('adb_path', str), - ('lldb_server_path_device', str), - ('aosp_product_path', str), - ('device_port', int), - ('device', str), - ('print_to_stdout', truthy), - ('verbose', truthy), - ('wimpy', truthy), - ('timeout', int), - ('bundle_type', str), - ): - parser.add_argument(name, type=formatter) - - args = parser.parse_args() - - try: - # create utility classes - harness.util_log.initialise( - '%s(%s)' % (args.test_name, args.bundle_type), - print_to_stdout=args.print_to_stdout, - level=logging.INFO if not args.verbose else logging.DEBUG, - file_path=args.log_file_path, - file_mode='a' - ) - log = util_log.get_logger() - log.debug('Logger initialised') - - android = harness.UtilAndroid(args.adb_path, - args.lldb_server_path_device, - args.device) - - # start the timeout counter - timer = _initialise_timer(android, args.timeout) - - # startup lldb and register teardown handler - atexit.register(UtilLLDB.stop) - UtilLLDB.start() - - current_test_dir = get_test_dir(args.test_name) - - # load a test case module - test_module = load_py_module(os.path.join(current_test_dir, - args.test_name)) - - - # inspect the test module and locate our test case class - test_class = _get_test_case_class(test_module) - - # if our test inherits from TestBaseRemote, check we have a valid device - if (hasattr(test_module, "TestBaseRemote") and - issubclass(test_class, test_module.TestBaseRemote)): - android.validate_device() - - # create an instance of our test case - test_inst = test_class( - args.device_port, - args.device, - timer, - args.bundle_type, - wimpy=args.wimpy - ) - - # instantiate a test target bundle - bundle = harness.UtilBundle(android, args.aosp_product_path) - - # execute the test case - try: - for _ in range(2): - try: - # create an lldb instance - lldb = UtilLLDB.create_debugger() - - # create state object to encapsulate instances - - state = TestState( - android=android, - bundle=bundle, - lldb=lldb, - lldb_module=UtilLLDB.get_module(), - test=test_inst, - pid=None, - name=args.test_name, - device_port=args.device_port, - bundle_type=args.bundle_type - ) - - util_warnings.redirect_warnings() - - _execute_test(state) - - # tear down the lldb instance - UtilLLDB.destroy_debugger(lldb) - break - except DisconnectedException as error: - log.warning(error) - log.warning('Trying again.') - else: - log.fatal('Not trying again, maximum retries exceeded.') - raise TestSuiteException('Lost connection to lldb-server') - - finally: - util_warnings.restore_warnings() - - _quit_test(util_constants.RC_TEST_OK, timer) - - except AssertionError: - if log: - log.critical('Internal test suite error', exc_info=1) - print('Internal test suite error', file=sys.stderr) - _quit_test(util_constants.RC_TEST_FATAL, timer) - - except TestIgnoredException: - if log: - log.warn("test ignored") - _quit_test(util_constants.RC_TEST_IGNORED, timer) - - except TestSuiteException as error: - if log: - log.exception(str(error)) - else: - print(error, file=sys.stderr) - _quit_test(util_constants.RC_TEST_FAIL, timer) - - # use a global exception handler to be sure that we will - # exit safely and correctly - except Exception: - if log: - log.exception('INTERNAL ERROR') - else: - import traceback - print('Exception {0}'.format(traceback.format_exc()), - file=sys.stderr) - _quit_test(util_constants.RC_TEST_FATAL, timer) - - finally: - if android: - android.reset_all_props() - if timer: - timer.stop() - - -# execution trampoline -if __name__ == '__main__': - print(' '.join(sys.argv)) - main() diff --git a/tests/lldb/tests/testcases/reduce_common.py b/tests/lldb/tests/testcases/reduce_common.py deleted file mode 100644 index 462d0b39..00000000 --- a/tests/lldb/tests/testcases/reduce_common.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re - -REDUCE_ITERATIONS = 128 # This is in MainActivity.java -REDUCE_STARTVAL = 10 # This is in MainActivity.java -REDUCE_AUTO_COMB_SCRIPT = "reduce_common.rsh" -REDUCE_SCRIPT = "reduce_common.rsh" -X_TESTS = 100 -Y_TESTS = 2 -Z_TESTS = 2 - - -class ReductionMixin(object): - def _test_func_role_combinations(self, func_role_combinations): - """ - Assert that when a reduction breakpoint is conditional on a function - role, that breakpoints are only set on the the given functions. - We do this by setting breakpoints on all possible pairs of functions - and check that the resolved breakpoints are on functions that are part - of the given pair - """ - for combination in func_role_combinations: - self._delete_breakpoints() - self.try_command( - 'language renderscript reduction breakpoint set ' - 'find_min_user_type --function-role %s' % ( - ','.join(combination) - ), - [r'Breakpoint(s) created'] - ) - func_suffixes = [combination[0][:4], combination[1][:4]] - # just match the first 4 chars of the roles prefix - funcs_match = 'find_min_user_type_((%s|%s))' % tuple(func_suffixes) - # now check we stop on both functions for each coordinate in the - # allocation - for x in range(REDUCE_ITERATIONS): - output = self.try_command( - 'process continue', - expected_regex=[ - r'resuming', - r'Process \d+ stopped', - r'frame #0: (0x[0-9a-fA-F]+ )?librs.reduce.so`%s' % funcs_match - ] - ) - for line in output.splitlines(): - match = re.search(funcs_match, line) - if match: - try: - func_suffixes.remove(match.group(1)) - except ValueError: - # The outconverter may only be called in the final - # step but the accumulator will be called for every - # input index - continue - break - if len(func_suffixes) == 0: - # We've popped the functions we're interested in off the list - break - else: - raise self.TestFail( - "unable to match function roles for " + repr(combination)) - - def _reduction_breakpoint_set_single_type( - self, script_soname, script_basename, reduce_name, funcname_types): - """ - Assert - for each function role - that the correct symbol is resolved - and trapped by the debugger. - """ - for func, typename in funcname_types: - self._delete_breakpoints() - breakpoint_match = r'Breakpoint \d+: where = librs.%s.so`%s' - # Autogenerated combiners don't have a filename in the debugger - if not func.endswith(".combiner"): - breakpoint_match = r'%s (\+ \d+ )?at %s' % ( - breakpoint_match, script_basename) - self.try_command( - 'language renderscript reduction breakpoint set %s' - ' --function-role %s' % (reduce_name, typename), - expected_regex=[breakpoint_match % (script_soname, func)] - ) - self.try_command( - 'process continue', - expected_regex=[ - r'resuming', - r'Process \d+ stopped', - r'frame #0: (0x[0-9a-fA-F]+ )?librs.%s.so`%s' % ( - script_soname, func) - ] - ) diff --git a/tests/lldb/tests/testcases/test_allocation_dump_1.py b/tests/lldb/tests/testcases/test_allocation_dump_1.py deleted file mode 100644 index 53e77fc2..00000000 --- a/tests/lldb/tests/testcases/test_allocation_dump_1.py +++ /dev/null @@ -1,348 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestAllocationDump1.''' -from __future__ import absolute_import - -import os - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - wimpy, - cpp_only_test, -) - - -class TestAllocationDump1(TestBaseRemote): - '''Tests printing the contents of allocations.''' - - bundle_target = { - 'java': 'Allocations', - 'jni': 'JNIAllocations', - 'cpp': 'CppAllocations' - } - - @wimpy - @ordered_test(0) - def test_setup(self): - self.try_command('language renderscript kernel breakpoint all enable', - ['Breakpoints will be set on all kernels']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - @wimpy - def test_dump_to_file1(self): - # Test dumping large allocations to file - output_file_1 = self.get_tmp_file_path() - - self.try_command('language renderscript allocation dump 1 -f ' + - output_file_1, - ["Results written to '%s'" % output_file_1]) - - # Check the file was created - self.assert_true(os.path.isfile(output_file_1)) - os.remove(output_file_1) - - def test_dump_to_file2(self): - output_file_2 = self.get_tmp_file_path() - - self.try_command('language renderscript allocation dump 2 -f ' + - output_file_2, - ["Results written to '%s'" % output_file_2]) - - self.assert_true(os.path.isfile(output_file_2)) - os.remove(output_file_2) - - @wimpy - def test_dump_char(self): - self.try_command('language renderscript allocation dump 3', - ['(0, 0, 0) = 0', - '(0, 1, 0) = 1', - '(0, 2, 0) = 2', - '(0, 0, 1) = 3', - '(0, 1, 1) = 4', - '(0, 2, 1) = 5', - '(0, 0, 2) = 6', - '(0, 1, 2) = 7', - '(0, 2, 2) = 8', - '(0, 0, 3) = 9', - '(0, 1, 3) = 10', - '(0, 2, 3) = 11', - '(0, 0, 4) = 12', - '(0, 1, 4) = 13', - '(0, 2, 4) = 14', - '(0, 0, 5) = 15', - '(0, 1, 5) = 16', - '(0, 2, 5) = 17', - '(0, 0, 6) = 18', - '(0, 1, 6) = 19', - '(0, 2, 6) = 20', - '(0, 0, 7) = 21', - '(0, 1, 7) = 22', - '(0, 2, 7) = 23']) - - def test_dump_char2(self): - self.try_command('language renderscript allocation dump 4', - ['(0, 0, 0) = {0 1}', - '(1, 0, 0) = {2 3}', - '(2, 0, 0) = {4 5}', - '(3, 0, 0) = {6 7}', - '(4, 0, 0) = {8 9}', - '(5, 0, 0) = {10 11}', - '(6, 0, 0) = {12 13}', - '(7, 0, 0) = {14 15}', - '(8, 0, 0) = {16 17}', - '(9, 0, 0) = {18 19}', - '(10, 0, 0) = {20 21}', - '(11, 0, 0) = {22 23}']) - - def test_dump_char3(self): - self.try_command('language renderscript allocation dump 5', - ['(0, 0, 0) = {0 1 2}', - '(1, 0, 0) = {4 5 6}', - '(2, 0, 0) = {8 9 10}', - '(3, 0, 0) = {12 13 14}', - '(4, 0, 0) = {16 17 18}', - '(5, 0, 0) = {20 21 22}']) - - def test_dump_char4(self): - self.try_command('language renderscript allocation dump 6', - ['(0, 0, 0) = {0 1 2 3}', - '(1, 0, 0) = {4 5 6 7}', - '(2, 0, 0) = {8 9 10 11}', - '(3, 0, 0) = {12 13 14 15}', - '(4, 0, 0) = {16 17 18 19}', - '(5, 0, 0) = {20 21 22 23}']) - - def test_dump_short(self): - self.try_command('language renderscript allocation dump 7', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - def test_dump_short2(self): - self.try_command('language renderscript allocation dump 8', - ['(0, 0, 0) = {0 1}', - '(1, 0, 0) = {2 3}', - '(2, 0, 0) = {4 5}', - '(3, 0, 0) = {6 7}', - '(4, 0, 0) = {8 9}', - '(5, 0, 0) = {10 11}', - '(0, 0, 1) = {12 13}', - '(1, 0, 1) = {14 15}', - '(2, 0, 1) = {16 17}', - '(3, 0, 1) = {18 19}', - '(4, 0, 1) = {20 21}', - '(5, 0, 1) = {22 23}']) - - def test_dump_short3(self): - self.try_command('language renderscript allocation dump 9', - ['(0, 0, 0) = {0 1 2}', - '(1, 0, 0) = {4 5 6}', - '(2, 0, 0) = {8 9 10}', - '(3, 0, 0) = {12 13 14}', - '(4, 0, 0) = {16 17 18}', - '(5, 0, 0) = {20 21 22}']) - - def test_dump_short4(self): - self.try_command('language renderscript allocation dump 10', - ['(0, 0, 0) = {0 1 2 3}', - '(1, 0, 0) = {4 5 6 7}', - '(2, 0, 0) = {8 9 10 11}', - '(3, 0, 0) = {12 13 14 15}', - '(4, 0, 0) = {16 17 18 19}', - '(5, 0, 0) = {20 21 22 23}']) - - def test_dump_int(self): - self.try_command('language renderscript allocation dump 11', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - def test_dump_int2(self): - self.try_command('language renderscript allocation dump 12', - ['(0, 0, 0) = {0 1}', - '(1, 0, 0) = {2 3}', - '(2, 0, 0) = {4 5}', - '(3, 0, 0) = {6 7}', - '(4, 0, 0) = {8 9}', - '(5, 0, 0) = {10 11}', - '(6, 0, 0) = {12 13}', - '(7, 0, 0) = {14 15}', - '(8, 0, 0) = {16 17}', - '(9, 0, 0) = {18 19}', - '(10, 0, 0) = {20 21}', - '(11, 0, 0) = {22 23}']) - - def test_dump_int3(self): - self.try_command('language renderscript allocation dump 13', - ['(0, 0, 0) = {0 1 2}', - '(1, 0, 0) = {4 5 6}', - '(2, 0, 0) = {8 9 10}', - '(0, 1, 0) = {12 13 14}', - '(1, 1, 0) = {16 17 18}', - '(2, 1, 0) = {20 21 22}']) - - def test_dump_int4(self): - self.try_command('language renderscript allocation dump 14', - ['(0, 0, 0) = {0 1 2 3}', - '(1, 0, 0) = {4 5 6 7}', - '(2, 0, 0) = {8 9 10 11}', - '(3, 0, 0) = {12 13 14 15}', - '(4, 0, 0) = {16 17 18 19}', - '(5, 0, 0) = {20 21 22 23}']) - - def test_dump_int5(self): - self.try_command('language renderscript allocation dump 15', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - def test_dump_long2(self): - self.try_command('language renderscript allocation dump 16', - ['(0, 0, 0) = {0 1}', - '(1, 0, 0) = {2 3}', - '(2, 0, 0) = {4 5}', - '(3, 0, 0) = {6 7}', - '(4, 0, 0) = {8 9}', - '(5, 0, 0) = {10 11}', - '(6, 0, 0) = {12 13}', - '(7, 0, 0) = {14 15}', - '(8, 0, 0) = {16 17}', - '(9, 0, 0) = {18 19}', - '(10, 0, 0) = {20 21}', - '(11, 0, 0) = {22 23}']) - - def test_dump_long3(self): - self.try_command('language renderscript allocation dump 17', - ['(0, 0, 0) = {0 1 2}', - '(1, 0, 0) = {4 5 6}', - '(2, 0, 0) = {8 9 10}', - '(3, 0, 0) = {12 13 14}', - '(4, 0, 0) = {16 17 18}', - '(5, 0, 0) = {20 21 22}']) - - def test_dump_long4(self): - self.try_command('language renderscript allocation dump 18', - ['(0, 0, 0) = {0 1 2 3}', - '(0, 1, 0) = {4 5 6 7}', - '(0, 2, 0) = {8 9 10 11}', - '(0, 3, 0) = {12 13 14 15}', - '(0, 4, 0) = {16 17 18 19}', - '(0, 5, 0) = {20 21 22 23}']) - - def test_dump_bool(self): - self.try_command('language renderscript allocation dump 19', - ['(0, 0, 0) = false', - '(1, 0, 0) = true', - '(2, 0, 0) = false', - '(3, 0, 0) = true', - '(4, 0, 0) = false', - '(5, 0, 0) = true', - '(6, 0, 0) = false', - '(7, 0, 0) = true', - '(8, 0, 0) = false', - '(9, 0, 0) = true', - '(10, 0, 0) = false', - '(11, 0, 0) = true', - '(12, 0, 0) = false', - '(13, 0, 0) = true', - '(14, 0, 0) = false', - '(15, 0, 0) = true', - '(16, 0, 0) = false', - '(17, 0, 0) = true', - '(18, 0, 0) = false', - '(19, 0, 0) = true', - '(20, 0, 0) = false', - '(21, 0, 0) = true', - '(22, 0, 0) = false', - '(23, 0, 0) = true']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup_breakpoints(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('breakpoint delete 2', ['1 breakpoints deleted']) - - self.try_command('breakpoint delete 3', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_allocation_dump_2.py b/tests/lldb/tests/testcases/test_allocation_dump_2.py deleted file mode 100644 index 13123ec1..00000000 --- a/tests/lldb/tests/testcases/test_allocation_dump_2.py +++ /dev/null @@ -1,604 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestAllocationDump2''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - wimpy, - ordered_test -) - - -class TestAllocationDump2(TestBaseRemote): - '''Tests printing the contents of allocations.''' - - bundle_target = { - 'java': 'Allocations' - } - - @wimpy - @ordered_test(0) - def test_allocation_dump1(self): - # pylint: disable=line-too-long - self.try_command('language renderscript kernel breakpoint all enable', - ['Breakpoints will be set on all kernels']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('breakpoint del 1', - ['1 breakpoints deleted']) - - # Hit second kernel - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # uchar - self.try_command('language renderscript allocation dump 20', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - @ordered_test(1) - def test_allocation_dump_unsigned_types(self): - # uchar2 - self.try_command('language renderscript allocation dump 21', - ['(0, 0, 0) = {0x00 0x01}', - '(1, 0, 0) = {0x02 0x03}', - '(0, 1, 0) = {0x04 0x05}', - '(1, 1, 0) = {0x06 0x07}', - '(0, 2, 0) = {0x08 0x09}', - '(1, 2, 0) = {0x0a 0x0b}', - '(0, 3, 0) = {0x0c 0x0d}', - '(1, 3, 0) = {0x0e 0x0f}', - '(0, 4, 0) = {0x10 0x11}', - '(1, 4, 0) = {0x12 0x13}', - '(0, 5, 0) = {0x14 0x15}', - '(1, 5, 0) = {0x16 0x17}']) - - # uchar3 - self.try_command('language renderscript allocation dump 22', - ['(0, 0, 0) = {0x00 0x01 0x02}', - '(1, 0, 0) = {0x04 0x05 0x06}', - '(2, 0, 0) = {0x08 0x09 0x0a}', - '(3, 0, 0) = {0x0c 0x0d 0x0e}', - '(4, 0, 0) = {0x10 0x11 0x12}', - '(5, 0, 0) = {0x14 0x15 0x16}']) - - # uchar4 - self.try_command('language renderscript allocation dump 23', - ['(0, 0, 0) = {0x00 0x01 0x02 0x03}', - '(1, 0, 0) = {0x04 0x05 0x06 0x07}', - '(2, 0, 0) = {0x08 0x09 0x0a 0x0b}', - '(3, 0, 0) = {0x0c 0x0d 0x0e 0x0f}', - '(4, 0, 0) = {0x10 0x11 0x12 0x13}', - '(5, 0, 0) = {0x14 0x15 0x16 0x17}']) - - # ushort - self.try_command('language renderscript allocation dump 24', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - # ushort2 - self.try_command('language renderscript allocation dump 25', - ['(0, 0, 0) = {0x0000 0x0001}', - '(1, 0, 0) = {0x0002 0x0003}', - '(2, 0, 0) = {0x0004 0x0005}', - '(3, 0, 0) = {0x0006 0x0007}', - '(4, 0, 0) = {0x0008 0x0009}', - '(5, 0, 0) = {0x000a 0x000b}', - '(6, 0, 0) = {0x000c 0x000d}', - '(7, 0, 0) = {0x000e 0x000f}', - '(8, 0, 0) = {0x0010 0x0011}', - '(9, 0, 0) = {0x0012 0x0013}', - '(10, 0, 0) = {0x0014 0x0015}', - '(11, 0, 0) = {0x0016 0x0017}']) - - # ushort3 - self.try_command('language renderscript allocation dump 26', - ['(0, 0, 0) = {0x0000 0x0001 0x0002}', - '(0, 1, 0) = {0x0004 0x0005 0x0006}', - '(0, 2, 0) = {0x0008 0x0009 0x000a}', - '(0, 3, 0) = {0x000c 0x000d 0x000e}', - '(0, 4, 0) = {0x0010 0x0011 0x0012}', - '(0, 5, 0) = {0x0014 0x0015 0x0016}']) - - # ushort4 - self.try_command('language renderscript allocation dump 27', - ['(0, 0, 0) = {0x0000 0x0001 0x0002 0x0003}', - '(1, 0, 0) = {0x0004 0x0005 0x0006 0x0007}', - '(2, 0, 0) = {0x0008 0x0009 0x000a 0x000b}', - '(3, 0, 0) = {0x000c 0x000d 0x000e 0x000f}', - '(4, 0, 0) = {0x0010 0x0011 0x0012 0x0013}', - '(5, 0, 0) = {0x0014 0x0015 0x0016 0x0017}']) - - # uint - self.try_command('language renderscript allocation dump 28', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - # uint2 - self.try_command('language renderscript allocation dump 29', - ['(0, 0, 0) = {0x00000000 0x00000001}', - '(1, 0, 0) = {0x00000002 0x00000003}', - '(2, 0, 0) = {0x00000004 0x00000005}', - '(3, 0, 0) = {0x00000006 0x00000007}', - '(4, 0, 0) = {0x00000008 0x00000009}', - '(5, 0, 0) = {0x0000000a 0x0000000b}', - '(6, 0, 0) = {0x0000000c 0x0000000d}', - '(7, 0, 0) = {0x0000000e 0x0000000f}', - '(8, 0, 0) = {0x00000010 0x00000011}', - '(9, 0, 0) = {0x00000012 0x00000013}', - '(10, 0, 0) = {0x00000014 0x00000015}', - '(11, 0, 0) = {0x00000016 0x00000017}']) - - # uint3 - self.try_command('language renderscript allocation dump 30', - ['(0, 0, 0) = {0x00000000 0x00000001 0x00000002}', - '(1, 0, 0) = {0x00000004 0x00000005 0x00000006}', - '(2, 0, 0) = {0x00000008 0x00000009 0x0000000a}', - '(3, 0, 0) = {0x0000000c 0x0000000d 0x0000000e}', - '(4, 0, 0) = {0x00000010 0x00000011 0x00000012}', - '(5, 0, 0) = {0x00000014 0x00000015 0x00000016}']) - - # uint4 - self.try_command('language renderscript allocation dump 31', - ['(0, 0, 0) = {0x00000000 0x00000001 0x00000002 0x00000003}', - '(0, 0, 1) = {0x00000004 0x00000005 0x00000006 0x00000007}', - '(0, 0, 2) = {0x00000008 0x00000009 0x0000000a 0x0000000b}', - '(0, 0, 3) = {0x0000000c 0x0000000d 0x0000000e 0x0000000f}', - '(0, 0, 4) = {0x00000010 0x00000011 0x00000012 0x00000013}', - '(0, 0, 5) = {0x00000014 0x00000015 0x00000016 0x00000017}']) - - # ulong - self.try_command('language renderscript allocation dump 32', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(0, 1, 0) = 4', - '(1, 1, 0) = 5', - '(2, 1, 0) = 6', - '(3, 1, 0) = 7', - '(0, 2, 0) = 8', - '(1, 2, 0) = 9', - '(2, 2, 0) = 10', - '(3, 2, 0) = 11', - '(0, 0, 1) = 12', - '(1, 0, 1) = 13', - '(2, 0, 1) = 14', - '(3, 0, 1) = 15', - '(0, 1, 1) = 16', - '(1, 1, 1) = 17', - '(2, 1, 1) = 18', - '(3, 1, 1) = 19', - '(0, 2, 1) = 20', - '(1, 2, 1) = 21', - '(2, 2, 1) = 22', - '(3, 2, 1) = 23']) - - # ulong2 - self.try_command('language renderscript allocation dump 33', - ['(0, 0, 0) = {0x0000000000000000 0x0000000000000001}', - '(1, 0, 0) = {0x0000000000000002 0x0000000000000003}', - '(2, 0, 0) = {0x0000000000000004 0x0000000000000005}', - '(3, 0, 0) = {0x0000000000000006 0x0000000000000007}', - '(4, 0, 0) = {0x0000000000000008 0x0000000000000009}', - '(5, 0, 0) = {0x000000000000000a 0x000000000000000b}', - '(6, 0, 0) = {0x000000000000000c 0x000000000000000d}', - '(7, 0, 0) = {0x000000000000000e 0x000000000000000f}', - '(8, 0, 0) = {0x0000000000000010 0x0000000000000011}', - '(9, 0, 0) = {0x0000000000000012 0x0000000000000013}', - '(10, 0, 0) = {0x0000000000000014 0x0000000000000015}', - '(11, 0, 0) = {0x0000000000000016 0x0000000000000017}']) - - # ulong3 - self.try_command('language renderscript allocation dump 34', - ['(0, 0, 0) = {0x0000000000000000 0x0000000000000001 0x0000000000000002}', - '(1, 0, 0) = {0x0000000000000004 0x0000000000000005 0x0000000000000006}', - '(2, 0, 0) = {0x0000000000000008 0x0000000000000009 0x000000000000000a}', - '(3, 0, 0) = {0x000000000000000c 0x000000000000000d 0x000000000000000e}', - '(4, 0, 0) = {0x0000000000000010 0x0000000000000011 0x0000000000000012}', - '(5, 0, 0) = {0x0000000000000014 0x0000000000000015 0x0000000000000016}']) - - # ulong4 - self.try_command('language renderscript allocation dump 35', - ['(0, 0, 0) = {0x0000000000000000 0x0000000000000001 ' - '0x0000000000000002 0x0000000000000003}', - '(1, 0, 0) = {0x0000000000000004 0x0000000000000005 ' - '0x0000000000000006 0x0000000000000007}', - '(2, 0, 0) = {0x0000000000000008 0x0000000000000009 ' - '0x000000000000000a 0x000000000000000b}', - '(3, 0, 0) = {0x000000000000000c 0x000000000000000d ' - '0x000000000000000e 0x000000000000000f}', - '(4, 0, 0) = {0x0000000000000010 0x0000000000000011 ' - '0x0000000000000012 0x0000000000000013}', - '(5, 0, 0) = {0x0000000000000014 0x0000000000000015 ' - '0x0000000000000016 0x0000000000000017}']) - - @wimpy - @ordered_test(3) - def test_dump_square_kernel(self): - self.try_command('breakpoint del 2', - ['1 breakpoints deleted']) - - # Hit third kernel - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # Test that uint allocation has been squared by square_kernel - self.try_command('language renderscript allocation dump 28', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 4', - '(3, 0, 0) = 9', - '(4, 0, 0) = 16', - '(5, 0, 0) = 25', - '(6, 0, 0) = 36', - '(7, 0, 0) = 49', - '(8, 0, 0) = 64', - '(9, 0, 0) = 81', - '(10, 0, 0) = 100', - '(11, 0, 0) = 121', - '(12, 0, 0) = 144', - '(13, 0, 0) = 169', - '(14, 0, 0) = 196', - '(15, 0, 0) = 225', - '(16, 0, 0) = 256', - '(17, 0, 0) = 289', - '(18, 0, 0) = 324', - '(19, 0, 0) = 361', - '(20, 0, 0) = 400', - '(21, 0, 0) = 441', - '(22, 0, 0) = 484', - '(23, 0, 0) = 529']) - - @ordered_test(4) - def test_alloction_dump_floating_types(self): - # half - self.try_command('language renderscript allocation dump 36', - ['(0, 0, 0) = 1', - '(1, 0, 0) = 1.00098', - '(2, 0, 0) = 1.00195', - '(3, 0, 0) = 1.00293', - '(4, 0, 0) = 1.00391', - '(5, 0, 0) = 1.00488', - '(6, 0, 0) = 1.00586', - '(7, 0, 0) = 1.00684', - '(8, 0, 0) = 1.00781', - '(9, 0, 0) = 1.00879', - '(10, 0, 0) = 1.00977', - '(11, 0, 0) = 1.01074', - '(12, 0, 0) = 1.01172', - '(13, 0, 0) = 1.0127', - '(14, 0, 0) = 1.01367', - '(15, 0, 0) = 1.01465', - '(16, 0, 0) = 1.0156', - '(17, 0, 0) = 1.0166', - '(18, 0, 0) = 1.01758', - '(19, 0, 0) = 1.01855', - '(20, 0, 0) = 1.01953', - '(21, 0, 0) = 1.02051', - '(22, 0, 0) = 1.02148', - '(23, 0, 0) = 1.02246']) - - # half2 - self.try_command('language renderscript allocation dump 37', - ['(0, 0, 0) = {1 1.00098}', - '(1, 0, 0) = {1.00195 1.00293}', - '(2, 0, 0) = {1.00391 1.00488}', - '(3, 0, 0) = {1.00586 1.00684}', - '(4, 0, 0) = {1.00781 1.00879}', - '(5, 0, 0) = {1.00977 1.01074}', - '(6, 0, 0) = {1.01172 1.0127}', - '(7, 0, 0) = {1.01367 1.01465}', - '(9, 0, 0) = {1.01758 1.01855}', - '(10, 0, 0) = {1.01953 1.02051}', - '(11, 0, 0) = {1.02148 1.02246}'], - [r'\(8, 0, 0\) = \{1\.0156[23] 1\.0166\}']) - - # half3 - self.try_command('language renderscript allocation dump 38', - ['(0, 0, 0) = {1 1.00098 1.00195}', - '(0, 1, 0) = {1.00391 1.00488 1.00586}', - '(0, 2, 0) = {1.00781 1.00879 1.00977}', - '(0, 3, 0) = {1.01172 1.0127 1.01367}', - '(0, 5, 0) = {1.01953 1.02051 1.02148}'], - [r'\(0, 4, 0\) = \{1\.0156[23] 1\.0166 1\.01758\}']) - - # half4 - self.try_command('language renderscript allocation dump 39', - ['(0, 0, 0) = {1 1.00098 1.00195 1.00293}', - '(1, 0, 0) = {1.00391 1.00488 1.00586 1.00684}', - '(2, 0, 0) = {1.00781 1.00879 1.00977 1.01074}', - '(3, 0, 0) = {1.01172 1.0127 1.01367 1.01465}', - '(5, 0, 0) = {1.01953 1.02051 1.02148 1.02246}'], - [r'\(4, 0, 0\) = \{1\.0156[23] 1\.0166 1\.01758 1\.01855\}']) - - # float - self.try_command('language renderscript allocation dump 40', - ['(0, 0, 0) = inf', - '(1, 0, 0) = 1', - '(2, 0, 0) = 0.5', - '(3, 0, 0) = 0.333333', - '(4, 0, 0) = 0.25', - '(5, 0, 0) = 0.2', - '(6, 0, 0) = 0.166667', - '(7, 0, 0) = 0.142857', - '(8, 0, 0) = 0.125', - '(9, 0, 0) = 0.111111', - '(10, 0, 0) = 0.1', - '(11, 0, 0) = 0.0909091', - '(12, 0, 0) = 0.0833333', - '(13, 0, 0) = 0.0769231', - '(14, 0, 0) = 0.0714286', - '(15, 0, 0) = 0.0666667', - '(16, 0, 0) = 0.0625', - '(17, 0, 0) = 0.0588235', - '(18, 0, 0) = 0.0555556', - '(19, 0, 0) = 0.0526316', - '(20, 0, 0) = 0.05', - '(21, 0, 0) = 0.047619', - '(22, 0, 0) = 0.0454545', - '(23, 0, 0) = 0.0434783']) - - # float2 - self.try_command('language renderscript allocation dump 41', - ['(0, 0, 0) = {inf 1}', - '(1, 0, 0) = {0.5 0.333333}', - '(2, 0, 0) = {0.25 0.2}', - '(3, 0, 0) = {0.166667 0.142857}', - '(4, 0, 0) = {0.125 0.111111}', - '(5, 0, 0) = {0.1 0.0909091}', - '(6, 0, 0) = {0.0833333 0.0769231}', - '(7, 0, 0) = {0.0714286 0.0666667}', - '(8, 0, 0) = {0.0625 0.0588235}', - '(9, 0, 0) = {0.0555556 0.0526316}', - '(10, 0, 0) = {0.05 0.047619}', - '(11, 0, 0) = {0.0454545 0.0434783}']) - - # float3 - self.try_command('language renderscript allocation dump 42', - ['(0, 0, 0) = {inf 1 0.5}', - '(1, 0, 0) = {0.25 0.2 0.166667}', - '(2, 0, 0) = {0.125 0.111111 0.1}', - '(3, 0, 0) = {0.0833333 0.0769231 0.0714286}', - '(4, 0, 0) = {0.0625 0.0588235 0.0555556}', - '(5, 0, 0) = {0.05 0.047619 0.0454545}']) - - # float4 - self.try_command('language renderscript allocation dump 43', - ['(0, 0, 0) = {inf 1 0.5 0.333333}', - '(1, 0, 0) = {0.25 0.2 0.166667 0.142857}', - '(2, 0, 0) = {0.125 0.111111 0.1 0.0909091}', - '(0, 1, 0) = {0.0833333 0.0769231 0.0714286 0.0666667}', - '(1, 1, 0) = {0.0625 0.0588235 0.0555556 0.0526316}', - '(2, 1, 0) = {0.05 0.047619 0.0454545 0.0434783}']) - - # double - self.try_command('language renderscript allocation dump 44', - ['(0, 0, 0) = inf', - '(1, 0, 0) = 1', - '(2, 0, 0) = 0.5', - '(3, 0, 0) = 0.333333333333333', - '(4, 0, 0) = 0.25', - '(5, 0, 0) = 0.2', - '(6, 0, 0) = 0.166666666666667', - '(7, 0, 0) = 0.142857142857143', - '(8, 0, 0) = 0.125', - '(9, 0, 0) = 0.111111111111111', - '(10, 0, 0) = 0.1', - '(11, 0, 0) = 0.0909090909090909', - '(12, 0, 0) = 0.0833333333333333', - '(13, 0, 0) = 0.0769230769230769', - '(14, 0, 0) = 0.0714285714285714', - '(15, 0, 0) = 0.0666666666666667', - '(16, 0, 0) = 0.0625', - '(17, 0, 0) = 0.0588235294117647', - '(18, 0, 0) = 0.0555555555555556', - '(19, 0, 0) = 0.0526315789473684', - '(20, 0, 0) = 0.05', - '(21, 0, 0) = 0.0476190476190476', - '(22, 0, 0) = 0.0454545454545455', - '(23, 0, 0) = 0.0434782608695652']) - - # double2 - self.try_command('language renderscript allocation dump 45', - ['(0, 0, 0) = {inf 1}', - '(1, 0, 0) = {0.5 0.333333333333333}', - '(2, 0, 0) = {0.25 0.2}', - '(3, 0, 0) = {0.166666666666667 0.142857142857143}', - '(0, 0, 1) = {0.125 0.111111111111111}', - '(1, 0, 1) = {0.1 0.0909090909090909}', - '(2, 0, 1) = {0.0833333333333333 0.0769230769230769}', - '(3, 0, 1) = {0.0714285714285714 0.0666666666666667}', - '(0, 0, 2) = {0.0625 0.0588235294117647}', - '(1, 0, 2) = {0.0555555555555556 0.0526315789473684}', - '(2, 0, 2) = {0.05 0.0476190476190476}', - '(3, 0, 2) = {0.0454545454545455 0.0434782608695652}']) - - # double3 - self.try_command('language renderscript allocation dump 46', - ['(0, 0, 0) = {inf 1 0.5}', - '(0, 1, 0) = {0.25 0.2 0.166666666666667}', - '(0, 0, 1) = {0.125 0.111111111111111 0.1}', - '(0, 1, 1) = {0.0833333333333333 0.0769230769230769 ' - '0.0714285714285714}', - '(0, 0, 2) = {0.0625 0.0588235294117647 0.0555555555555556}', - '(0, 1, 2) = {0.05 0.0476190476190476 0.0454545454545455}']) - - # double4 - self.try_command('language renderscript allocation dump 47', - ['(0, 0, 0) = {inf 1 0.5 0.333333333333333}', - '(0, 1, 0) = {0.25 0.2 0.166666666666667 0.142857142857143}', - '(0, 0, 1) = {0.125 0.111111111111111 0.1 0.0909090909090909}', - '(0, 1, 1) = {0.0833333333333333 0.0769230769230769 ' - '0.0714285714285714 0.0666666666666667}', - '(0, 0, 2) = {0.0625 0.0588235294117647 ' - '0.0555555555555556 0.0526315789473684}', - '(0, 1, 2) = {0.05 0.0476190476190476 ' - '0.0454545454545455 0.0434782608695652}']) - - @wimpy - @ordered_test(5) - def test_allocation_dump_half_kernel(self): - # Delete kernel breakpoint on add_half_kernel - self.try_command('breakpoint del 3', - ['1 breakpoints deleted']) - - # Hit struct_kernel - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # Double 3 has been modified by add_half_kernel - self.try_command('language renderscript allocation dump 46', - ['(0, 0, 0) = {inf 1.5 1}', - '(0, 1, 0) = {0.75 0.7 0.666666666666667}', - '(0, 0, 1) = {0.625 0.611111111111111 0.6}', - '(0, 1, 1) = {0.583333333333333 0.576923076923077 0.571428571428571}', - '(0, 0, 2) = {0.5625 0.558823529411765 0.555555555555556}', - '(0, 1, 2) = {0.55 0.547619047619048 0.545454545454545}']) - - # Floating point allocation data should have been overwritten - self.try_command('language renderscript allocation dump 40', - ['(0, 0, 0) = -inf', - '(1, 0, 0) = -1', - '(2, 0, 0) = -0.5', - '(3, 0, 0) = -0.333333', - '(4, 0, 0) = -0.25', - '(5, 0, 0) = -0.2', - '(6, 0, 0) = -0.166667', - '(7, 0, 0) = -0.142857', - '(8, 0, 0) = -0.125', - '(9, 0, 0) = -0.111111', - '(10, 0, 0) = -0.1', - '(11, 0, 0) = -0.0909091', - '(12, 0, 0) = -0.0833333', - '(13, 0, 0) = -0.0769231', - '(14, 0, 0) = -0.0714286', - '(15, 0, 0) = -0.0666667', - '(16, 0, 0) = -0.0625', - '(17, 0, 0) = -0.0588235', - '(18, 0, 0) = -0.0555556', - '(19, 0, 0) = -0.0526316', - '(20, 0, 0) = -0.05', - '(21, 0, 0) = -0.047619', - '(22, 0, 0) = -0.0454545', - '(23, 0, 0) = -0.0434783']) - - self.try_command('language renderscript allocation dump 41', - ['(0, 0, 0) = {-inf -1}', - '(1, 0, 0) = {-0.5 -0.333333}', - '(2, 0, 0) = {-0.25 -0.2}', - '(3, 0, 0) = {-0.166667 -0.142857}', - '(4, 0, 0) = {-0.125 -0.111111}', - '(5, 0, 0) = {-0.1 -0.0909091}', - '(6, 0, 0) = {-0.0833333 -0.0769231}', - '(7, 0, 0) = {-0.0714286 -0.0666667}', - '(8, 0, 0) = {-0.0625 -0.0588235}', - '(9, 0, 0) = {-0.0555556 -0.0526316}', - '(10, 0, 0) = {-0.05 -0.047619}', - '(11, 0, 0) = {-0.0454545 -0.0434783}']) - - self.try_command('language renderscript allocation dump 42', - ['(0, 0, 0) = {-inf -1 -0.5}', - '(1, 0, 0) = {-0.25 -0.2 -0.166667}', - '(2, 0, 0) = {-0.125 -0.111111 -0.1}', - '(3, 0, 0) = {-0.0833333 -0.0769231 -0.0714286}', - '(4, 0, 0) = {-0.0625 -0.0588235 -0.0555556}', - '(5, 0, 0) = {-0.05 -0.047619 -0.0454545}']) - - self.try_command('language renderscript allocation dump 43', - ['(0, 0, 0) = {-inf -1 -0.5 -0.333333}', - '(1, 0, 0) = {-0.25 -0.2 -0.166667 -0.142857}', - '(2, 0, 0) = {-0.125 -0.111111 -0.1 -0.0909091}', - '(0, 1, 0) = {-0.0833333 -0.0769231 -0.0714286 -0.0666667}', - '(1, 1, 0) = {-0.0625 -0.0588235 -0.0555556 -0.0526316}', - '(2, 1, 0) = {-0.05 -0.047619 -0.0454545 -0.0434783}']) diff --git a/tests/lldb/tests/testcases/test_allocation_dump_2_cpp.py b/tests/lldb/tests/testcases/test_allocation_dump_2_cpp.py deleted file mode 100644 index b103ccbb..00000000 --- a/tests/lldb/tests/testcases/test_allocation_dump_2_cpp.py +++ /dev/null @@ -1,525 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestAllocationDump2Cpp.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote - - -class TestAllocationDump2Cpp(TestBaseRemote): - '''Tests printing the contents of allocations in an NDK app.''' - - bundle_target = { - 'cpp': 'CppAllocations' - } - - def test_case(self): - '''Run the lldb commands that are being tested. - - Raises: - TestFail: One of the lldb commands did not provide the expected - output. - ''' - # pylint: disable=line-too-long - self.try_command('language renderscript kernel breakpoint all enable', - ['Breakpoints will be set on all kernels']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('breakpoint del 1', - ['1 breakpoints deleted']) - - # Hit second kernel - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # uchar - self.try_command('language renderscript allocation dump 20', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - # uchar2 - self.try_command('language renderscript allocation dump 21', - ['(0, 0, 0) = {0x00 0x01}', - '(1, 0, 0) = {0x02 0x03}', - '(0, 1, 0) = {0x04 0x05}', - '(1, 1, 0) = {0x06 0x07}', - '(0, 2, 0) = {0x08 0x09}', - '(1, 2, 0) = {0x0a 0x0b}', - '(0, 3, 0) = {0x0c 0x0d}', - '(1, 3, 0) = {0x0e 0x0f}', - '(0, 4, 0) = {0x10 0x11}', - '(1, 4, 0) = {0x12 0x13}', - '(0, 5, 0) = {0x14 0x15}', - '(1, 5, 0) = {0x16 0x17}']) - - # uchar3 - self.try_command('language renderscript allocation dump 22', - ['(0, 0, 0) = {0x00 0x01 0x02}', - '(1, 0, 0) = {0x04 0x05 0x06}', - '(2, 0, 0) = {0x08 0x09 0x0a}', - '(3, 0, 0) = {0x0c 0x0d 0x0e}', - '(4, 0, 0) = {0x10 0x11 0x12}', - '(5, 0, 0) = {0x14 0x15 0x16}']) - - # uchar4 - self.try_command('language renderscript allocation dump 23', - ['(0, 0, 0) = {0x00 0x01 0x02 0x03}', - '(1, 0, 0) = {0x04 0x05 0x06 0x07}', - '(2, 0, 0) = {0x08 0x09 0x0a 0x0b}', - '(3, 0, 0) = {0x0c 0x0d 0x0e 0x0f}', - '(4, 0, 0) = {0x10 0x11 0x12 0x13}', - '(5, 0, 0) = {0x14 0x15 0x16 0x17}']) - - # ushort - self.try_command('language renderscript allocation dump 24', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - # ushort2 - self.try_command('language renderscript allocation dump 25', - ['(0, 0, 0) = {0x0000 0x0001}', - '(1, 0, 0) = {0x0002 0x0003}', - '(2, 0, 0) = {0x0004 0x0005}', - '(3, 0, 0) = {0x0006 0x0007}', - '(4, 0, 0) = {0x0008 0x0009}', - '(5, 0, 0) = {0x000a 0x000b}', - '(6, 0, 0) = {0x000c 0x000d}', - '(7, 0, 0) = {0x000e 0x000f}', - '(8, 0, 0) = {0x0010 0x0011}', - '(9, 0, 0) = {0x0012 0x0013}', - '(10, 0, 0) = {0x0014 0x0015}', - '(11, 0, 0) = {0x0016 0x0017}']) - - # ushort3 - self.try_command('language renderscript allocation dump 26', - ['(0, 0, 0) = {0x0000 0x0001 0x0002}', - '(0, 1, 0) = {0x0004 0x0005 0x0006}', - '(0, 2, 0) = {0x0008 0x0009 0x000a}', - '(0, 3, 0) = {0x000c 0x000d 0x000e}', - '(0, 4, 0) = {0x0010 0x0011 0x0012}', - '(0, 5, 0) = {0x0014 0x0015 0x0016}']) - - # ushort4 - self.try_command('language renderscript allocation dump 27', - ['(0, 0, 0) = {0x0000 0x0001 0x0002 0x0003}', - '(1, 0, 0) = {0x0004 0x0005 0x0006 0x0007}', - '(2, 0, 0) = {0x0008 0x0009 0x000a 0x000b}', - '(3, 0, 0) = {0x000c 0x000d 0x000e 0x000f}', - '(4, 0, 0) = {0x0010 0x0011 0x0012 0x0013}', - '(5, 0, 0) = {0x0014 0x0015 0x0016 0x0017}']) - - # uint - self.try_command('language renderscript allocation dump 28', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - # uint2 - self.try_command('language renderscript allocation dump 29', - ['(0, 0, 0) = {0x00000000 0x00000001}', - '(1, 0, 0) = {0x00000002 0x00000003}', - '(2, 0, 0) = {0x00000004 0x00000005}', - '(3, 0, 0) = {0x00000006 0x00000007}', - '(4, 0, 0) = {0x00000008 0x00000009}', - '(5, 0, 0) = {0x0000000a 0x0000000b}', - '(6, 0, 0) = {0x0000000c 0x0000000d}', - '(7, 0, 0) = {0x0000000e 0x0000000f}', - '(8, 0, 0) = {0x00000010 0x00000011}', - '(9, 0, 0) = {0x00000012 0x00000013}', - '(10, 0, 0) = {0x00000014 0x00000015}', - '(11, 0, 0) = {0x00000016 0x00000017}']) - - # uint3 - self.try_command('language renderscript allocation dump 30', - ['(0, 0, 0) = {0x00000000 0x00000001 0x00000002}', - '(1, 0, 0) = {0x00000004 0x00000005 0x00000006}', - '(2, 0, 0) = {0x00000008 0x00000009 0x0000000a}', - '(3, 0, 0) = {0x0000000c 0x0000000d 0x0000000e}', - '(4, 0, 0) = {0x00000010 0x00000011 0x00000012}', - '(5, 0, 0) = {0x00000014 0x00000015 0x00000016}']) - - # uint4 - self.try_command('language renderscript allocation dump 31', - ['(0, 0, 0) = {0x00000000 0x00000001 0x00000002 0x00000003}', - '(0, 0, 1) = {0x00000004 0x00000005 0x00000006 0x00000007}', - '(0, 0, 2) = {0x00000008 0x00000009 0x0000000a 0x0000000b}', - '(0, 0, 3) = {0x0000000c 0x0000000d 0x0000000e 0x0000000f}', - '(0, 0, 4) = {0x00000010 0x00000011 0x00000012 0x00000013}', - '(0, 0, 5) = {0x00000014 0x00000015 0x00000016 0x00000017}']) - - # ulong - self.try_command('language renderscript allocation dump 32', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(0, 1, 0) = 4', - '(1, 1, 0) = 5', - '(2, 1, 0) = 6', - '(3, 1, 0) = 7', - '(0, 2, 0) = 8', - '(1, 2, 0) = 9', - '(2, 2, 0) = 10', - '(3, 2, 0) = 11', - '(0, 0, 1) = 12', - '(1, 0, 1) = 13', - '(2, 0, 1) = 14', - '(3, 0, 1) = 15', - '(0, 1, 1) = 16', - '(1, 1, 1) = 17', - '(2, 1, 1) = 18', - '(3, 1, 1) = 19', - '(0, 2, 1) = 20', - '(1, 2, 1) = 21', - '(2, 2, 1) = 22', - '(3, 2, 1) = 23']) - - # ulong2 - self.try_command('language renderscript allocation dump 33', - ['(0, 0, 0) = {0x0000000000000000 0x0000000000000001}', - '(1, 0, 0) = {0x0000000000000002 0x0000000000000003}', - '(2, 0, 0) = {0x0000000000000004 0x0000000000000005}', - '(3, 0, 0) = {0x0000000000000006 0x0000000000000007}', - '(4, 0, 0) = {0x0000000000000008 0x0000000000000009}', - '(5, 0, 0) = {0x000000000000000a 0x000000000000000b}', - '(6, 0, 0) = {0x000000000000000c 0x000000000000000d}', - '(7, 0, 0) = {0x000000000000000e 0x000000000000000f}', - '(8, 0, 0) = {0x0000000000000010 0x0000000000000011}', - '(9, 0, 0) = {0x0000000000000012 0x0000000000000013}', - '(10, 0, 0) = {0x0000000000000014 0x0000000000000015}', - '(11, 0, 0) = {0x0000000000000016 0x0000000000000017}']) - - # ulong3 - self.try_command('language renderscript allocation dump 34', - ['(0, 0, 0) = {0x0000000000000000 0x0000000000000001 0x0000000000000002}', - '(1, 0, 0) = {0x0000000000000004 0x0000000000000005 0x0000000000000006}', - '(2, 0, 0) = {0x0000000000000008 0x0000000000000009 0x000000000000000a}', - '(3, 0, 0) = {0x000000000000000c 0x000000000000000d 0x000000000000000e}', - '(4, 0, 0) = {0x0000000000000010 0x0000000000000011 0x0000000000000012}', - '(5, 0, 0) = {0x0000000000000014 0x0000000000000015 0x0000000000000016}']) - - # ulong4 - self.try_command('language renderscript allocation dump 35', - ['(0, 0, 0) = {0x0000000000000000 0x0000000000000001 ' - '0x0000000000000002 0x0000000000000003}', - '(1, 0, 0) = {0x0000000000000004 0x0000000000000005 ' - '0x0000000000000006 0x0000000000000007}', - '(2, 0, 0) = {0x0000000000000008 0x0000000000000009 ' - '0x000000000000000a 0x000000000000000b}', - '(3, 0, 0) = {0x000000000000000c 0x000000000000000d ' - '0x000000000000000e 0x000000000000000f}', - '(4, 0, 0) = {0x0000000000000010 0x0000000000000011 ' - '0x0000000000000012 0x0000000000000013}', - '(5, 0, 0) = {0x0000000000000014 0x0000000000000015 ' - '0x0000000000000016 0x0000000000000017}']) - - self.try_command('breakpoint del 2', - ['1 breakpoints deleted']) - - # Hit third kernel - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # Test that uint allocation has been squared by square_kernel - self.try_command('language renderscript allocation dump 28', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 4', - '(3, 0, 0) = 9', - '(4, 0, 0) = 16', - '(5, 0, 0) = 25', - '(6, 0, 0) = 36', - '(7, 0, 0) = 49', - '(8, 0, 0) = 64', - '(9, 0, 0) = 81', - '(10, 0, 0) = 100', - '(11, 0, 0) = 121', - '(12, 0, 0) = 144', - '(13, 0, 0) = 169', - '(14, 0, 0) = 196', - '(15, 0, 0) = 225', - '(16, 0, 0) = 256', - '(17, 0, 0) = 289', - '(18, 0, 0) = 324', - '(19, 0, 0) = 361', - '(20, 0, 0) = 400', - '(21, 0, 0) = 441', - '(22, 0, 0) = 484', - '(23, 0, 0) = 529']) - - # half - self.try_command('language renderscript allocation dump 36', - ['(0, 0, 0) = inf', - '(1, 0, 0) = 1', - '(2, 0, 0) = 0.5', - '(3, 0, 0) = 0.333252', - '(4, 0, 0) = 0.25', - '(5, 0, 0) = 0.199951', - '(6, 0, 0) = 0.166626', - '(7, 0, 0) = 0.142822', - '(8, 0, 0) = 0.125', - '(9, 0, 0) = 0.111084', - '(10, 0, 0) = 0.0999756', - '(11, 0, 0) = 0.0908813', - '(12, 0, 0) = 0.083313', - '(13, 0, 0) = 0.0769043', - '(14, 0, 0) = 0.0714111', - '(15, 0, 0) = 0.0666504', - '(16, 0, 0) = 0.0625', - '(17, 0, 0) = 0.0588379', - '(18, 0, 0) = 0.055542', - '(19, 0, 0) = 0.0526428', - '(20, 0, 0) = 0.0499878', - '(21, 0, 0) = 0.0476074', - '(22, 0, 0) = 0.0454407', - '(23, 0, 0) = 0.0434875']) - - # half2 - self.try_command('language renderscript allocation dump 37', - ['(0, 0, 0) = {inf 1}', - '(1, 0, 0) = {0.5 0.333252}', - '(2, 0, 0) = {0.25 0.199951}', - '(3, 0, 0) = {0.166626 0.142822}', - '(4, 0, 0) = {0.125 0.111084}', - '(5, 0, 0) = {0.0999756 0.0908813}', - '(6, 0, 0) = {0.083313 0.0769043}', - '(7, 0, 0) = {0.0714111 0.0666504}', - '(8, 0, 0) = {0.0625 0.0588379}', - '(9, 0, 0) = {0.055542 0.0526428}', - '(10, 0, 0) = {0.0499878 0.0476074}', - '(11, 0, 0) = {0.0454407 0.0434875}']) - - # half3 - self.try_command('language renderscript allocation dump 38', - ['(0, 0, 0) = {inf 1 0.5}', - '(0, 1, 0) = {0.25 0.199951 0.166626}', - '(0, 2, 0) = {0.125 0.111084 0.0999756}', - '(0, 3, 0) = {0.083313 0.0769043 0.0714111}', - '(0, 4, 0) = {0.0625 0.0588379 0.055542}', - '(0, 5, 0) = {0.0499878 0.0476074 0.0454407}']) - - # half4 - self.try_command('language renderscript allocation dump 39', - ['(0, 0, 0) = {inf 1 0.5 0.333252}', - '(1, 0, 0) = {0.25 0.199951 0.166626 0.142822}', - '(2, 0, 0) = {0.125 0.111084 0.0999756 0.0908813}', - '(3, 0, 0) = {0.083313 0.0769043 0.0714111 0.0666504}', - '(4, 0, 0) = {0.0625 0.0588379 0.055542 0.0526428}', - '(5, 0, 0) = {0.0499878 0.0476074 0.0454407 0.0434875}']) - - # float - self.try_command('language renderscript allocation dump 40', - ['(0, 0, 0) = inf', - '(1, 0, 0) = 1', - '(2, 0, 0) = 0.5', - '(3, 0, 0) = 0.333333', - '(4, 0, 0) = 0.25', - '(5, 0, 0) = 0.2', - '(6, 0, 0) = 0.166667', - '(7, 0, 0) = 0.142857', - '(8, 0, 0) = 0.125', - '(9, 0, 0) = 0.111111', - '(10, 0, 0) = 0.1', - '(11, 0, 0) = 0.0909091', - '(12, 0, 0) = 0.0833333', - '(13, 0, 0) = 0.0769231', - '(14, 0, 0) = 0.0714286', - '(15, 0, 0) = 0.0666667', - '(16, 0, 0) = 0.0625', - '(17, 0, 0) = 0.0588235', - '(18, 0, 0) = 0.0555556', - '(19, 0, 0) = 0.0526316', - '(20, 0, 0) = 0.05', - '(21, 0, 0) = 0.047619', - '(22, 0, 0) = 0.0454545', - '(23, 0, 0) = 0.0434783']) - - # float2 - self.try_command('language renderscript allocation dump 41', - ['(0, 0, 0) = {inf 1}', - '(1, 0, 0) = {0.5 0.333333}', - '(2, 0, 0) = {0.25 0.2}', - '(3, 0, 0) = {0.166667 0.142857}', - '(4, 0, 0) = {0.125 0.111111}', - '(5, 0, 0) = {0.1 0.0909091}', - '(6, 0, 0) = {0.0833333 0.0769231}', - '(7, 0, 0) = {0.0714286 0.0666667}', - '(8, 0, 0) = {0.0625 0.0588235}', - '(9, 0, 0) = {0.0555556 0.0526316}', - '(10, 0, 0) = {0.05 0.047619}', - '(11, 0, 0) = {0.0454545 0.0434783}']) - - # float3 - self.try_command('language renderscript allocation dump 42', - ['(0, 0, 0) = {inf 1 0.5}', - '(1, 0, 0) = {0.25 0.2 0.166667}', - '(2, 0, 0) = {0.125 0.111111 0.1}', - '(3, 0, 0) = {0.0833333 0.0769231 0.0714286}', - '(4, 0, 0) = {0.0625 0.0588235 0.0555556}', - '(5, 0, 0) = {0.05 0.047619 0.0454545}']) - - # float4 - self.try_command('language renderscript allocation dump 43', - ['(0, 0, 0) = {inf 1 0.5 0.333333}', - '(1, 0, 0) = {0.25 0.2 0.166667 0.142857}', - '(2, 0, 0) = {0.125 0.111111 0.1 0.0909091}', - '(0, 1, 0) = {0.0833333 0.0769231 0.0714286 0.0666667}', - '(1, 1, 0) = {0.0625 0.0588235 0.0555556 0.0526316}', - '(2, 1, 0) = {0.05 0.047619 0.0454545 0.0434783}']) - - # double - self.try_command('language renderscript allocation dump 44', - ['(0, 0, 0) = inf', - '(1, 0, 0) = 1', - '(2, 0, 0) = 0.5', - '(3, 0, 0) = 0.333333333333333', - '(4, 0, 0) = 0.25', - '(5, 0, 0) = 0.2', - '(6, 0, 0) = 0.166666666666667', - '(7, 0, 0) = 0.142857142857143', - '(8, 0, 0) = 0.125', - '(9, 0, 0) = 0.111111111111111', - '(10, 0, 0) = 0.1', - '(11, 0, 0) = 0.0909090909090909', - '(12, 0, 0) = 0.0833333333333333', - '(13, 0, 0) = 0.0769230769230769', - '(14, 0, 0) = 0.0714285714285714', - '(15, 0, 0) = 0.0666666666666667', - '(16, 0, 0) = 0.0625', - '(17, 0, 0) = 0.0588235294117647', - '(18, 0, 0) = 0.0555555555555556', - '(19, 0, 0) = 0.0526315789473684', - '(20, 0, 0) = 0.05', - '(21, 0, 0) = 0.0476190476190476', - '(22, 0, 0) = 0.0454545454545455', - '(23, 0, 0) = 0.0434782608695652']) - - # double2 - self.try_command('language renderscript allocation dump 45', - ['(0, 0, 0) = {inf 1}', - '(1, 0, 0) = {0.5 0.333333333333333}', - '(2, 0, 0) = {0.25 0.2}', - '(3, 0, 0) = {0.166666666666667 0.142857142857143}', - '(0, 0, 1) = {0.125 0.111111111111111}', - '(1, 0, 1) = {0.1 0.0909090909090909}', - '(2, 0, 1) = {0.0833333333333333 0.0769230769230769}', - '(3, 0, 1) = {0.0714285714285714 0.0666666666666667}', - '(0, 0, 2) = {0.0625 0.0588235294117647}', - '(1, 0, 2) = {0.0555555555555556 0.0526315789473684}', - '(2, 0, 2) = {0.05 0.0476190476190476}', - '(3, 0, 2) = {0.0454545454545455 0.0434782608695652}']) - - # double3 - self.try_command('language renderscript allocation dump 46', - ['(0, 0, 0) = {inf 1 0.5}', - '(0, 1, 0) = {0.25 0.2 0.166666666666667}', - '(0, 0, 1) = {0.125 0.111111111111111 0.1}', - '(0, 1, 1) = {0.0833333333333333 0.0769230769230769 ' - '0.0714285714285714}', - '(0, 0, 2) = {0.0625 0.0588235294117647 0.0555555555555556}', - '(0, 1, 2) = {0.05 0.0476190476190476 0.0454545454545455}']) - - # double4 - self.try_command('language renderscript allocation dump 47', - ['(0, 0, 0) = {inf 1 0.5 0.333333333333333}', - '(0, 1, 0) = {0.25 0.2 0.166666666666667 0.142857142857143}', - '(0, 0, 1) = {0.125 0.111111111111111 0.1 0.0909090909090909}', - '(0, 1, 1) = {0.0833333333333333 0.0769230769230769 ' - '0.0714285714285714 0.0666666666666667}', - '(0, 0, 2) = {0.0625 0.0588235294117647 ' - '0.0555555555555556 0.0526315789473684}', - '(0, 1, 2) = {0.05 0.0476190476190476 ' - '0.0454545454545455 0.0434782608695652}']) - - # Delete kernel breakpoint on add_half_kernel - self.try_command('breakpoint del 3', - ['1 breakpoints deleted']) - - self.try_command('process continue', - ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_allocation_dump_2_jni.py b/tests/lldb/tests/testcases/test_allocation_dump_2_jni.py deleted file mode 100644 index 41e33d39..00000000 --- a/tests/lldb/tests/testcases/test_allocation_dump_2_jni.py +++ /dev/null @@ -1,518 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestAllocationDump2JNI.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote - - -class TestAllocationDump2JNI(TestBaseRemote): - '''Tests printing the contents of allocations of a JNI apk.''' - - bundle_target = { - 'jni': 'JNIAllocations' - } - - def test_case(self): - '''Run the lldb commands that are being tested. - - Raises: - TestFail: One of the lldb commands did not provide the expected - output. - ''' - # pylint: disable=line-too-long - self.try_command('language renderscript kernel breakpoint all enable', - ['Breakpoints will be set on all kernels']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('breakpoint del 1', - ['1 breakpoints deleted']) - - # Hit second kernel - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # uchar - self.try_command('language renderscript allocation dump 20', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - # uchar2 - self.try_command('language renderscript allocation dump 21', - ['(0, 0, 0) = {0x00 0x01}', - '(1, 0, 0) = {0x02 0x03}', - '(0, 1, 0) = {0x04 0x05}', - '(1, 1, 0) = {0x06 0x07}', - '(0, 2, 0) = {0x08 0x09}', - '(1, 2, 0) = {0x0a 0x0b}', - '(0, 3, 0) = {0x0c 0x0d}', - '(1, 3, 0) = {0x0e 0x0f}', - '(0, 4, 0) = {0x10 0x11}', - '(1, 4, 0) = {0x12 0x13}', - '(0, 5, 0) = {0x14 0x15}', - '(1, 5, 0) = {0x16 0x17}']) - - # uchar3 - self.try_command('language renderscript allocation dump 22', - ['(0, 0, 0) = {0x00 0x01 0x02}', - '(1, 0, 0) = {0x04 0x05 0x06}', - '(2, 0, 0) = {0x08 0x09 0x0a}', - '(3, 0, 0) = {0x0c 0x0d 0x0e}', - '(4, 0, 0) = {0x10 0x11 0x12}', - '(5, 0, 0) = {0x14 0x15 0x16}']) - - # uchar4 - self.try_command('language renderscript allocation dump 23', - ['(0, 0, 0) = {0x00 0x01 0x02 0x03}', - '(1, 0, 0) = {0x04 0x05 0x06 0x07}', - '(2, 0, 0) = {0x08 0x09 0x0a 0x0b}', - '(3, 0, 0) = {0x0c 0x0d 0x0e 0x0f}', - '(4, 0, 0) = {0x10 0x11 0x12 0x13}', - '(5, 0, 0) = {0x14 0x15 0x16 0x17}']) - - # ushort - self.try_command('language renderscript allocation dump 24', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - # ushort2 - self.try_command('language renderscript allocation dump 25', - ['(0, 0, 0) = {0x0000 0x0001}', - '(1, 0, 0) = {0x0002 0x0003}', - '(2, 0, 0) = {0x0004 0x0005}', - '(3, 0, 0) = {0x0006 0x0007}', - '(4, 0, 0) = {0x0008 0x0009}', - '(5, 0, 0) = {0x000a 0x000b}', - '(6, 0, 0) = {0x000c 0x000d}', - '(7, 0, 0) = {0x000e 0x000f}', - '(8, 0, 0) = {0x0010 0x0011}', - '(9, 0, 0) = {0x0012 0x0013}', - '(10, 0, 0) = {0x0014 0x0015}', - '(11, 0, 0) = {0x0016 0x0017}']) - - # ushort3 - self.try_command('language renderscript allocation dump 26', - ['(0, 0, 0) = {0x0000 0x0001 0x0002}', - '(0, 1, 0) = {0x0004 0x0005 0x0006}', - '(0, 2, 0) = {0x0008 0x0009 0x000a}', - '(0, 3, 0) = {0x000c 0x000d 0x000e}', - '(0, 4, 0) = {0x0010 0x0011 0x0012}', - '(0, 5, 0) = {0x0014 0x0015 0x0016}']) - - # ushort4 - self.try_command('language renderscript allocation dump 27', - ['(0, 0, 0) = {0x0000 0x0001 0x0002 0x0003}', - '(1, 0, 0) = {0x0004 0x0005 0x0006 0x0007}', - '(2, 0, 0) = {0x0008 0x0009 0x000a 0x000b}', - '(3, 0, 0) = {0x000c 0x000d 0x000e 0x000f}', - '(4, 0, 0) = {0x0010 0x0011 0x0012 0x0013}', - '(5, 0, 0) = {0x0014 0x0015 0x0016 0x0017}']) - - # uint - self.try_command('language renderscript allocation dump 28', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - # uint2 - self.try_command('language renderscript allocation dump 29', - ['(0, 0, 0) = {0x00000000 0x00000001}', - '(1, 0, 0) = {0x00000002 0x00000003}', - '(2, 0, 0) = {0x00000004 0x00000005}', - '(3, 0, 0) = {0x00000006 0x00000007}', - '(4, 0, 0) = {0x00000008 0x00000009}', - '(5, 0, 0) = {0x0000000a 0x0000000b}', - '(6, 0, 0) = {0x0000000c 0x0000000d}', - '(7, 0, 0) = {0x0000000e 0x0000000f}', - '(8, 0, 0) = {0x00000010 0x00000011}', - '(9, 0, 0) = {0x00000012 0x00000013}', - '(10, 0, 0) = {0x00000014 0x00000015}', - '(11, 0, 0) = {0x00000016 0x00000017}']) - - # uint3 - self.try_command('language renderscript allocation dump 30', - ['(0, 0, 0) = {0x00000000 0x00000001 0x00000002}', - '(1, 0, 0) = {0x00000004 0x00000005 0x00000006}', - '(2, 0, 0) = {0x00000008 0x00000009 0x0000000a}', - '(3, 0, 0) = {0x0000000c 0x0000000d 0x0000000e}', - '(4, 0, 0) = {0x00000010 0x00000011 0x00000012}', - '(5, 0, 0) = {0x00000014 0x00000015 0x00000016}']) - - # uint4 - self.try_command('language renderscript allocation dump 31', - ['(0, 0, 0) = {0x00000000 0x00000001 0x00000002 0x00000003}', - '(0, 0, 1) = {0x00000004 0x00000005 0x00000006 0x00000007}', - '(0, 0, 2) = {0x00000008 0x00000009 0x0000000a 0x0000000b}', - '(0, 0, 3) = {0x0000000c 0x0000000d 0x0000000e 0x0000000f}', - '(0, 0, 4) = {0x00000010 0x00000011 0x00000012 0x00000013}', - '(0, 0, 5) = {0x00000014 0x00000015 0x00000016 0x00000017}']) - - # ulong - self.try_command('language renderscript allocation dump 32', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(0, 1, 0) = 4', - '(1, 1, 0) = 5', - '(2, 1, 0) = 6', - '(3, 1, 0) = 7', - '(0, 2, 0) = 8', - '(1, 2, 0) = 9', - '(2, 2, 0) = 10', - '(3, 2, 0) = 11', - '(0, 0, 1) = 12', - '(1, 0, 1) = 13', - '(2, 0, 1) = 14', - '(3, 0, 1) = 15', - '(0, 1, 1) = 16', - '(1, 1, 1) = 17', - '(2, 1, 1) = 18', - '(3, 1, 1) = 19', - '(0, 2, 1) = 20', - '(1, 2, 1) = 21', - '(2, 2, 1) = 22', - '(3, 2, 1) = 23']) - - # ulong2 - self.try_command('language renderscript allocation dump 33', - ['(0, 0, 0) = {0x0000000000000000 0x0000000000000001}', - '(1, 0, 0) = {0x0000000000000002 0x0000000000000003}', - '(2, 0, 0) = {0x0000000000000004 0x0000000000000005}', - '(3, 0, 0) = {0x0000000000000006 0x0000000000000007}', - '(4, 0, 0) = {0x0000000000000008 0x0000000000000009}', - '(5, 0, 0) = {0x000000000000000a 0x000000000000000b}', - '(6, 0, 0) = {0x000000000000000c 0x000000000000000d}', - '(7, 0, 0) = {0x000000000000000e 0x000000000000000f}', - '(8, 0, 0) = {0x0000000000000010 0x0000000000000011}', - '(9, 0, 0) = {0x0000000000000012 0x0000000000000013}', - '(10, 0, 0) = {0x0000000000000014 0x0000000000000015}', - '(11, 0, 0) = {0x0000000000000016 0x0000000000000017}']) - - # ulong3 - self.try_command('language renderscript allocation dump 34', - ['(0, 0, 0) = {0x0000000000000000 0x0000000000000001 0x0000000000000002}', - '(1, 0, 0) = {0x0000000000000004 0x0000000000000005 0x0000000000000006}', - '(2, 0, 0) = {0x0000000000000008 0x0000000000000009 0x000000000000000a}', - '(3, 0, 0) = {0x000000000000000c 0x000000000000000d 0x000000000000000e}', - '(4, 0, 0) = {0x0000000000000010 0x0000000000000011 0x0000000000000012}', - '(5, 0, 0) = {0x0000000000000014 0x0000000000000015 0x0000000000000016}']) - - # ulong4 - self.try_command('language renderscript allocation dump 35', - ['(0, 0, 0) = {0x0000000000000000 0x0000000000000001 ' - '0x0000000000000002 0x0000000000000003}', - '(1, 0, 0) = {0x0000000000000004 0x0000000000000005 ' - '0x0000000000000006 0x0000000000000007}', - '(2, 0, 0) = {0x0000000000000008 0x0000000000000009 ' - '0x000000000000000a 0x000000000000000b}', - '(3, 0, 0) = {0x000000000000000c 0x000000000000000d ' - '0x000000000000000e 0x000000000000000f}', - '(4, 0, 0) = {0x0000000000000010 0x0000000000000011 ' - '0x0000000000000012 0x0000000000000013}', - '(5, 0, 0) = {0x0000000000000014 0x0000000000000015 ' - '0x0000000000000016 0x0000000000000017}']) - - self.try_command('breakpoint del 2', - ['1 breakpoints deleted']) - - # Hit third kernel - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # Test that uint allocation has been squared by square_kernel - self.try_command('language renderscript allocation dump 28', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 4', - '(3, 0, 0) = 9', - '(4, 0, 0) = 16', - '(5, 0, 0) = 25', - '(6, 0, 0) = 36', - '(7, 0, 0) = 49', - '(8, 0, 0) = 64', - '(9, 0, 0) = 81', - '(10, 0, 0) = 100', - '(11, 0, 0) = 121', - '(12, 0, 0) = 144', - '(13, 0, 0) = 169', - '(14, 0, 0) = 196', - '(15, 0, 0) = 225', - '(16, 0, 0) = 256', - '(17, 0, 0) = 289', - '(18, 0, 0) = 324', - '(19, 0, 0) = 361', - '(20, 0, 0) = 400', - '(21, 0, 0) = 441', - '(22, 0, 0) = 484', - '(23, 0, 0) = 529']) - - # half - self.try_command('language renderscript allocation dump 36', - ['(0, 0, 0) = inf', - '(1, 0, 0) = 1', - '(2, 0, 0) = 0.5', - '(3, 0, 0) = 0.333252', - '(4, 0, 0) = 0.25', - '(5, 0, 0) = 0.199951', - '(6, 0, 0) = 0.166626', - '(7, 0, 0) = 0.142822', - '(8, 0, 0) = 0.125', - '(9, 0, 0) = 0.111084', - '(10, 0, 0) = 0.0999756', - '(11, 0, 0) = 0.0908813', - '(12, 0, 0) = 0.083313', - '(13, 0, 0) = 0.0769043', - '(14, 0, 0) = 0.0714111', - '(15, 0, 0) = 0.0666504', - '(16, 0, 0) = 0.0625', - '(17, 0, 0) = 0.0588379', - '(18, 0, 0) = 0.055542', - '(19, 0, 0) = 0.0526428', - '(20, 0, 0) = 0.0499878', - '(21, 0, 0) = 0.0476074', - '(22, 0, 0) = 0.0454407', - '(23, 0, 0) = 0.0434875']) - - # half2 - self.try_command('language renderscript allocation dump 37', - ['(0, 0, 0) = {inf 1}', - '(1, 0, 0) = {0.5 0.333252}', - '(2, 0, 0) = {0.25 0.199951}', - '(3, 0, 0) = {0.166626 0.142822}', - '(4, 0, 0) = {0.125 0.111084}', - '(5, 0, 0) = {0.0999756 0.0908813}', - '(6, 0, 0) = {0.083313 0.0769043}', - '(7, 0, 0) = {0.0714111 0.0666504}', - '(8, 0, 0) = {0.0625 0.0588379}', - '(9, 0, 0) = {0.055542 0.0526428}', - '(10, 0, 0) = {0.0499878 0.0476074}', - '(11, 0, 0) = {0.0454407 0.0434875}']) - - # half3 - self.try_command('language renderscript allocation dump 38', - ['(0, 0, 0) = {inf 1 0.5}', - '(0, 1, 0) = {0.25 0.199951 0.166626}', - '(0, 2, 0) = {0.125 0.111084 0.0999756}', - '(0, 3, 0) = {0.083313 0.0769043 0.0714111}', - '(0, 4, 0) = {0.0625 0.0588379 0.055542}', - '(0, 5, 0) = {0.0499878 0.0476074 0.0454407}']) - - # half4 - self.try_command('language renderscript allocation dump 39', - ['(0, 0, 0) = {inf 1 0.5 0.333252}', - '(1, 0, 0) = {0.25 0.199951 0.166626 0.142822}', - '(2, 0, 0) = {0.125 0.111084 0.0999756 0.0908813}', - '(3, 0, 0) = {0.083313 0.0769043 0.0714111 0.0666504}', - '(4, 0, 0) = {0.0625 0.0588379 0.055542 0.0526428}', - '(5, 0, 0) = {0.0499878 0.0476074 0.0454407 0.0434875}']) - - # float - self.try_command('language renderscript allocation dump 40', - ['(0, 0, 0) = inf', - '(1, 0, 0) = 1', - '(2, 0, 0) = 0.5', - '(3, 0, 0) = 0.333333', - '(4, 0, 0) = 0.25', - '(5, 0, 0) = 0.2', - '(6, 0, 0) = 0.166667', - '(7, 0, 0) = 0.142857', - '(8, 0, 0) = 0.125', - '(9, 0, 0) = 0.111111', - '(10, 0, 0) = 0.1', - '(11, 0, 0) = 0.0909091', - '(12, 0, 0) = 0.0833333', - '(13, 0, 0) = 0.0769231', - '(14, 0, 0) = 0.0714286', - '(15, 0, 0) = 0.0666667', - '(16, 0, 0) = 0.0625', - '(17, 0, 0) = 0.0588235', - '(18, 0, 0) = 0.0555556', - '(19, 0, 0) = 0.0526316', - '(20, 0, 0) = 0.05', - '(21, 0, 0) = 0.047619', - '(22, 0, 0) = 0.0454545', - '(23, 0, 0) = 0.0434783']) - - # float2 - self.try_command('language renderscript allocation dump 41', - ['(0, 0, 0) = {inf 1}', - '(1, 0, 0) = {0.5 0.333333}', - '(2, 0, 0) = {0.25 0.2}', - '(3, 0, 0) = {0.166667 0.142857}', - '(4, 0, 0) = {0.125 0.111111}', - '(5, 0, 0) = {0.1 0.0909091}', - '(6, 0, 0) = {0.0833333 0.0769231}', - '(7, 0, 0) = {0.0714286 0.0666667}', - '(8, 0, 0) = {0.0625 0.0588235}', - '(9, 0, 0) = {0.0555556 0.0526316}', - '(10, 0, 0) = {0.05 0.047619}', - '(11, 0, 0) = {0.0454545 0.0434783}']) - - # float3 - self.try_command('language renderscript allocation dump 42', - ['(0, 0, 0) = {inf 1 0.5}', - '(1, 0, 0) = {0.25 0.2 0.166667}', - '(2, 0, 0) = {0.125 0.111111 0.1}', - '(3, 0, 0) = {0.0833333 0.0769231 0.0714286}', - '(4, 0, 0) = {0.0625 0.0588235 0.0555556}', - '(5, 0, 0) = {0.05 0.047619 0.0454545}']) - - # float4 - self.try_command('language renderscript allocation dump 43', - ['(0, 0, 0) = {inf 1 0.5 0.333333}', - '(1, 0, 0) = {0.25 0.2 0.166667 0.142857}', - '(2, 0, 0) = {0.125 0.111111 0.1 0.0909091}', - '(0, 1, 0) = {0.0833333 0.0769231 0.0714286 0.0666667}', - '(1, 1, 0) = {0.0625 0.0588235 0.0555556 0.0526316}', - '(2, 1, 0) = {0.05 0.047619 0.0454545 0.0434783}']) - - # double - self.try_command('language renderscript allocation dump 44', - ['(0, 0, 0) = inf', - '(1, 0, 0) = 1', - '(2, 0, 0) = 0.5', - '(3, 0, 0) = 0.333333333333333', - '(4, 0, 0) = 0.25', - '(5, 0, 0) = 0.2', - '(6, 0, 0) = 0.166666666666667', - '(7, 0, 0) = 0.142857142857143', - '(8, 0, 0) = 0.125', - '(9, 0, 0) = 0.111111111111111', - '(10, 0, 0) = 0.1', - '(11, 0, 0) = 0.0909090909090909', - '(12, 0, 0) = 0.0833333333333333', - '(13, 0, 0) = 0.0769230769230769', - '(14, 0, 0) = 0.0714285714285714', - '(15, 0, 0) = 0.0666666666666667', - '(16, 0, 0) = 0.0625', - '(17, 0, 0) = 0.0588235294117647', - '(18, 0, 0) = 0.0555555555555556', - '(19, 0, 0) = 0.0526315789473684', - '(20, 0, 0) = 0.05', - '(21, 0, 0) = 0.0476190476190476', - '(22, 0, 0) = 0.0454545454545455', - '(23, 0, 0) = 0.0434782608695652']) - - # double2 - self.try_command('language renderscript allocation dump 45', - ['(0, 0, 0) = {inf 1}', - '(1, 0, 0) = {0.5 0.333333333333333}', - '(2, 0, 0) = {0.25 0.2}', - '(3, 0, 0) = {0.166666666666667 0.142857142857143}', - '(0, 0, 1) = {0.125 0.111111111111111}', - '(1, 0, 1) = {0.1 0.0909090909090909}', - '(2, 0, 1) = {0.0833333333333333 0.0769230769230769}', - '(3, 0, 1) = {0.0714285714285714 0.0666666666666667}', - '(0, 0, 2) = {0.0625 0.0588235294117647}', - '(1, 0, 2) = {0.0555555555555556 0.0526315789473684}', - '(2, 0, 2) = {0.05 0.0476190476190476}', - '(3, 0, 2) = {0.0454545454545455 0.0434782608695652}']) - - # double3 - self.try_command('language renderscript allocation dump 46', - ['(0, 0, 0) = {inf 1 0.5}', - '(0, 1, 0) = {0.25 0.2 0.166666666666667}', - '(0, 0, 1) = {0.125 0.111111111111111 0.1}', - '(0, 1, 1) = {0.0833333333333333 0.0769230769230769 ' - '0.0714285714285714}', - '(0, 0, 2) = {0.0625 0.0588235294117647 0.0555555555555556}', - '(0, 1, 2) = {0.05 0.0476190476190476 0.0454545454545455}']) - - # double4 - self.try_command('language renderscript allocation dump 47', - ['(0, 0, 0) = {inf 1 0.5 0.333333333333333}', - '(0, 1, 0) = {0.25 0.2 0.166666666666667 0.142857142857143}', - '(0, 0, 1) = {0.125 0.111111111111111 0.1 0.0909090909090909}', - '(0, 1, 1) = {0.0833333333333333 0.0769230769230769 ' - '0.0714285714285714 0.0666666666666667}', - '(0, 0, 2) = {0.0625 0.0588235294117647 ' - '0.0555555555555556 0.0526315789473684}', - '(0, 1, 2) = {0.05 0.0476190476190476 ' - '0.0454545454545455 0.0434782608695652}']) diff --git a/tests/lldb/tests/testcases/test_allocation_dump_struct.py b/tests/lldb/tests/testcases/test_allocation_dump_struct.py deleted file mode 100644 index f161131f..00000000 --- a/tests/lldb/tests/testcases/test_allocation_dump_struct.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestAllocationDumpStruct.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote - - -class TestAllocationDumpStruct(TestBaseRemote): - '''Tests printing the contents of a struct allocation.''' - - bundle_target = { - 'java': 'Allocations' - } - - def setup(self, android): - '''This test requires to be run on one thread.''' - android.push_prop('debug.rs.max-threads', 1) - - def teardown(self, android): - '''Reset the number of RS threads to the previous value.''' - android.pop_prop('debug.rs.max-threads') - - def test_dump_complex_struct_allocation(self): - # Hit struct_kernel on last coordinate, so almost all elements have been initalised - self.try_command( - 'language renderscript kernel breakpoint set struct_kernel -c 23', - ['Conditional kernel breakpoint on coordinate (23, 0, 0)', - 'Breakpoint(s) created']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # complex_struct output allocation - self.try_command('language renderscript allocation dump 49', - ['(0, 0, 0) = (complexStruct) {\n' - ' (i = 0, j = 0)\n' - ' (0x00, 0x41, 0x42, 0x43)\n' - ' ([0] = 0, [1] = 0.5)\n' - '}', - '(1, 0, 0) = (complexStruct) {\n' - ' (i = 1, j = 1)\n' - ' (0x01, 0x41, 0x42, 0x43)\n' - ' ([0] = 1, [1] = 1.5)\n' - '}', - '(2, 0, 0) = (complexStruct) {\n' - ' (i = 2, j = 2)\n' - ' (0x02, 0x41, 0x42, 0x43)\n' - ' ([0] = 2, [1] = 2.5)\n' - '}', - '(3, 0, 0) = (complexStruct) {\n' - ' (i = 3, j = 3)\n' - ' (0x03, 0x41, 0x42, 0x43)\n' - ' ([0] = 3, [1] = 3.5)\n' - '}', - '(4, 0, 0) = (complexStruct) {\n' - ' (i = 4, j = 4)\n' - ' (0x04, 0x41, 0x42, 0x43)\n' - ' ([0] = 4, [1] = 4.5)\n' - '}', - '(5, 0, 0) = (complexStruct) {\n' - ' (i = 5, j = 5)\n' - ' (0x05, 0x41, 0x42, 0x43)\n' - ' ([0] = 5, [1] = 5.5)\n' - '}', - '(6, 0, 0) = (complexStruct) {\n' - ' (i = 6, j = 6)\n' - ' (0x06, 0x41, 0x42, 0x43)\n' - ' ([0] = 6, [1] = 6.5)\n' - '}', - '(7, 0, 0) = (complexStruct) {\n' - ' (i = 7, j = 7)\n' - ' (0x07, 0x41, 0x42, 0x43)\n' - ' ([0] = 7, [1] = 7.5)\n' - '}', - '(8, 0, 0) = (complexStruct) {\n' - ' (i = 8, j = 8)\n' - ' (0x08, 0x41, 0x42, 0x43)\n' - ' ([0] = 8, [1] = 8.5)\n' - '}', - '(9, 0, 0) = (complexStruct) {\n' - ' (i = 9, j = 9)\n' - ' (0x09, 0x41, 0x42, 0x43)\n' - ' ([0] = 9, [1] = 9.5)\n' - '}', - '(10, 0, 0) = (complexStruct) {\n' - ' (i = 10, j = 10)\n' - ' (0x0a, 0x41, 0x42, 0x43)\n' - ' ([0] = 10, [1] = 10.5)\n' - '}', - '(11, 0, 0) = (complexStruct) {\n' - ' (i = 11, j = 11)\n' - ' (0x0b, 0x41, 0x42, 0x43)\n' - ' ([0] = 11, [1] = 11.5)\n' - '}', - '(12, 0, 0) = (complexStruct) {\n' - ' (i = 12, j = 12)\n' - ' (0x0c, 0x41, 0x42, 0x43)\n' - ' ([0] = 12, [1] = 12.5)\n' - '}', - '(13, 0, 0) = (complexStruct) {\n' - ' (i = 13, j = 13)\n' - ' (0x0d, 0x41, 0x42, 0x43)\n' - ' ([0] = 13, [1] = 13.5)\n' - '}', - '(14, 0, 0) = (complexStruct) {\n' - ' (i = 14, j = 14)\n' - ' (0x0e, 0x41, 0x42, 0x43)\n' - ' ([0] = 14, [1] = 14.5)\n' - '}', - '(15, 0, 0) = (complexStruct) {\n' - ' (i = 15, j = 15)\n' - ' (0x0f, 0x41, 0x42, 0x43)\n' - ' ([0] = 15, [1] = 15.5)\n' - '}', - '(16, 0, 0) = (complexStruct) {\n' - ' (i = 16, j = 16)\n' - ' (0x10, 0x41, 0x42, 0x43)\n' - ' ([0] = 16, [1] = 16.5)\n' - '}', - '(17, 0, 0) = (complexStruct) {\n' - ' (i = 17, j = 17)\n' - ' (0x11, 0x41, 0x42, 0x43)\n' - ' ([0] = 17, [1] = 17.5)\n' - '}', - '(18, 0, 0) = (complexStruct) {\n' - ' (i = 18, j = 18)\n' - ' (0x12, 0x41, 0x42, 0x43)\n' - ' ([0] = 18, [1] = 18.5)\n' - '}', - '(19, 0, 0) = (complexStruct) {\n' - ' (i = 19, j = 19)\n' - ' (0x13, 0x41, 0x42, 0x43)\n' - ' ([0] = 19, [1] = 19.5)\n' - '}', - '(20, 0, 0) = (complexStruct) {\n' - ' (i = 20, j = 20)\n' - ' (0x14, 0x41, 0x42, 0x43)\n' - ' ([0] = 20, [1] = 20.5)\n' - '}', - '(21, 0, 0) = (complexStruct) {\n' - ' (i = 21, j = 21)\n' - ' (0x15, 0x41, 0x42, 0x43)\n' - ' ([0] = 21, [1] = 21.5)\n' - '}', - '(22, 0, 0) = (complexStruct) {\n' - ' (i = 22, j = 22)\n' - ' (0x16, 0x41, 0x42, 0x43)\n' - ' ([0] = 22, [1] = 22.5)\n' - '}']) diff --git a/tests/lldb/tests/testcases/test_allocation_file.py b/tests/lldb/tests/testcases/test_allocation_file.py deleted file mode 100644 index 680ae4a3..00000000 --- a/tests/lldb/tests/testcases/test_allocation_file.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestAllocationFile.''' - -from __future__ import absolute_import - -import os - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - cpp_only_test, - ordered_test -) - - -class TestAllocationFile(TestBaseRemote): - '''Tests saving the contents of allocations to disk and reloading them.''' - - bundle_target = { - 'java': 'Allocations', - 'cpp': 'CppAllocations', - 'jni': 'JNIAllocations' - } - - @ordered_test(0) - def test_allocation_file_roundtrip(self): - self.try_command('language renderscript kernel breakpoint all enable', - ['Breakpoints will be set on all kernels']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # Binary file of int2 allocation - file_int2 = self.get_tmp_file_path() - - self.try_command('language renderscript allocation save 12 ' + - file_int2, - ["Allocation written to file '%s'" % file_int2]) - - # Check file was created - self.assert_true(os.path.isfile(file_int2)) - - # Load the file we just created, to assert the allocation contents are - # the same - self.try_command('language renderscript allocation load 12 ' + - file_int2, - ["Contents of file '%s' read into allocation 12" % - file_int2]) - os.remove(file_int2) - - self.try_command('language renderscript allocation dump 12', - ['(0, 0, 0) = {0 1}', - '(1, 0, 0) = {2 3}', - '(2, 0, 0) = {4 5}', - '(3, 0, 0) = {6 7}', - '(4, 0, 0) = {8 9}', - '(5, 0, 0) = {10 11}', - '(6, 0, 0) = {12 13}', - '(7, 0, 0) = {14 15}', - '(8, 0, 0) = {16 17}', - '(9, 0, 0) = {18 19}', - '(10, 0, 0) = {20 21}', - '(11, 0, 0) = {22 23}']) - - self.try_command('breakpoint del 1', - ['1 breakpoints deleted']) - - # Hit second kernel - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # Binary file of uint allocation - file_uint = self.get_tmp_file_path() - - self.try_command('language renderscript allocation save 28 ' + - file_uint, - ["Allocation written to file '%s'" % file_uint]) - - # Check file was created - self.assert_true(os.path.isfile(file_uint)) - - # Test loading file into allocation with an incompatible type 'short' - self.try_command('language renderscript allocation load 7 ' + file_uint, - ["Contents of file '%s' read into allocation 7" % - file_uint, - "Warning: Mismatched Element sizes", - "Warning: Mismatched Types", - "Warning: Mismatched allocation sizes"]) - - # Check result of size inconsistency, mapping 4-byte unsigned to 2-byte - # int - self.try_command('language renderscript allocation dump 7', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 0', - '(2, 0, 0) = 1', - '(3, 0, 0) = 0', - '(4, 0, 0) = 2', - '(5, 0, 0) = 0', - '(6, 0, 0) = 3', - '(7, 0, 0) = 0', - '(8, 0, 0) = 4', - '(9, 0, 0) = 0', - '(10, 0, 0) = 5', - '(11, 0, 0) = 0', - '(12, 0, 0) = 6', - '(13, 0, 0) = 0', - '(14, 0, 0) = 7', - '(15, 0, 0) = 0', - '(16, 0, 0) = 8', - '(17, 0, 0) = 0', - '(18, 0, 0) = 9', - '(19, 0, 0) = 0', - '(20, 0, 0) = 10', - '(21, 0, 0) = 0', - '(22, 0, 0) = 11', - '(23, 0, 0) = 0']) - - self.try_command('breakpoint del 2', - ['1 breakpoints deleted']) - - # Hit third kernel - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # Test that uint allocation has been squared by square_kernel - self.try_command('language renderscript allocation dump 28', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 4', - '(3, 0, 0) = 9', - '(4, 0, 0) = 16', - '(5, 0, 0) = 25', - '(6, 0, 0) = 36', - '(7, 0, 0) = 49', - '(8, 0, 0) = 64', - '(9, 0, 0) = 81', - '(10, 0, 0) = 100', - '(11, 0, 0) = 121', - '(12, 0, 0) = 144', - '(13, 0, 0) = 169', - '(14, 0, 0) = 196', - '(15, 0, 0) = 225', - '(16, 0, 0) = 256', - '(17, 0, 0) = 289', - '(18, 0, 0) = 324', - '(19, 0, 0) = 361', - '(20, 0, 0) = 400', - '(21, 0, 0) = 441', - '(22, 0, 0) = 484', - '(23, 0, 0) = 529']) - - # Load uint allocation from save before square_kernel had been run - self.try_command('language renderscript allocation load 28 ' + - file_uint, - ["Contents of file '%s' read into allocation 28" % - file_uint]) - os.remove(file_uint) - - # Check contents are back to original - self.try_command('language renderscript allocation dump 28', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3', - '(4, 0, 0) = 4', - '(5, 0, 0) = 5', - '(6, 0, 0) = 6', - '(7, 0, 0) = 7', - '(8, 0, 0) = 8', - '(9, 0, 0) = 9', - '(10, 0, 0) = 10', - '(11, 0, 0) = 11', - '(12, 0, 0) = 12', - '(13, 0, 0) = 13', - '(14, 0, 0) = 14', - '(15, 0, 0) = 15', - '(16, 0, 0) = 16', - '(17, 0, 0) = 17', - '(18, 0, 0) = 18', - '(19, 0, 0) = 19', - '(20, 0, 0) = 20', - '(21, 0, 0) = 21', - '(22, 0, 0) = 22', - '(23, 0, 0) = 23']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 3', ['1 breakpoints deleted']) - - self.try_command('process continue', - ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_allocation_list.py b/tests/lldb/tests/testcases/test_allocation_list.py deleted file mode 100644 index ca492f2a..00000000 --- a/tests/lldb/tests/testcases/test_allocation_list.py +++ /dev/null @@ -1,547 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestAllocationList.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - wimpy, - cpp_only_test, - java_only_test, -) - - -class TestAllocationList(TestBaseRemote): - '''Tests printing the details of all allocations.''' - - bundle_target = { - 'java': 'Allocations', - 'jni': 'JNIAllocations', - 'cpp': 'CppAllocations' - } - - @wimpy - @ordered_test(0) - def test_allocation_list_single(self): - # pylint: disable=anomalous-backslash-in-string - self.try_command('language renderscript kernel breakpoint all enable', - ['Breakpoints will be set on all kernels']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # Test command line flag for single allocation - self.try_command('language renderscript allocation list -i 3', - [], - ['3:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(1, 3, 8\)\n' - ' Data Type: char\n' - ' Data Kind: User']) - - @ordered_test(1) - def test_allocation_list_all(self): - self.try_command('language renderscript allocation list', - [], - ['1:\n' - # Regex for non zero hex number - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(64, 64, 0\)\n' - ' Data Type: uchar4\n' - ' Data Kind: RGBA Pixel', - '2:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(64, 64, 0\)\n' - ' Data Type: uchar4\n' - ' Data Kind: RGBA Pixel', - '3:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(1, 3, 8\)\n' - ' Data Type: char\n' - ' Data Kind: User', - '4:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(12, 0, 0\)\n' - ' Data Type: char2\n' - ' Data Kind: User', - '5:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: char3\n' - ' Data Kind: User', - '6:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: char4\n' - ' Data Kind: User', - '7:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: short\n' - ' Data Kind: User', - '8:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 1, 2\)\n' - ' Data Type: short2\n' - ' Data Kind: User', - '9:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: short3\n' - ' Data Kind: User', - '10:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: short4\n' - ' Data Kind: User', - '11:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: int\n' - ' Data Kind: User', - '12:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(12, 0, 0\)\n' - ' Data Type: int2\n' - ' Data Kind: User', - '13:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(3, 2, 0\)\n' - ' Data Type: int3\n' - ' Data Kind: User', - '14:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: int4\n' - ' Data Kind: User', - '15:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: long\n' - ' Data Kind: User', - '16:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(12, 0, 0\)\n' - ' Data Type: long2\n' - ' Data Kind: User', - '17:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: long3\n' - ' Data Kind: User', - '18:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(1, 6, 0\)\n' - ' Data Type: long4\n' - ' Data Kind: User', - '19:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: bool\n' - ' Data Kind: User' - ]) - - @wimpy - @ordered_test(2) - def test_continue_1(self): - self.try_command('breakpoint del 1', - ['1 breakpoints deleted']) - - # Hit second kernel - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - @ordered_test(3) - def test_allocation_list_all2_java(self): - # TODO investigate why java tests show extra allocations - if self.app_type == 'java': - allocation_1_re = [ - '1:\n' - # Regex for non zero hex number - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(64, 64, 0\)\n' - ' Data Type: uchar4\n' - ' Data Kind: RGBA Pixel' - ] - else: - allocation_1_re = [] - - self.try_command('language renderscript allocation list', - [], - allocation_1_re + - ['2:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(64, 64, 0\)\n' - ' Data Type: uchar4\n' - ' Data Kind: RGBA Pixel', - '7:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: short\n' - ' Data Kind: User', - '20:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: uchar\n' - ' Data Kind: User', - '21:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(2, 6, 0\)\n' - ' Data Type: uchar2\n' - ' Data Kind: User', - '22:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: uchar3\n' - ' Data Kind: User', - '23:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: uchar4\n' - ' Data Kind: User', - '24:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: ushort\n' - ' Data Kind: User', - '25:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(12, 0, 0\)\n' - ' Data Type: ushort2\n' - ' Data Kind: User', - '26:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(1, 6, 0\)\n' - ' Data Type: ushort3\n' - ' Data Kind: User', - '27:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: ushort4\n' - ' Data Kind: User', - '28:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: uint\n' - ' Data Kind: User', - '29:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(12, 0, 0\)\n' - ' Data Type: uint2\n' - ' Data Kind: User', - '30:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: uint3\n' - ' Data Kind: User', - '31:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(1, 1, 6\)\n' - ' Data Type: uint4\n' - ' Data Kind: User', - '32:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(4, 3, 2\)\n' - ' Data Type: ulong\n' - ' Data Kind: User', - '33:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(12, 0, 0\)\n' - ' Data Type: ulong2\n' - ' Data Kind: User', - '34:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: ulong3\n' - ' Data Kind: User', - '35:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: ulong4\n' - ' Data Kind: User' - ]) - - @wimpy - @ordered_test(4) - def test_continue_2(self): - self.try_command('breakpoint del 2', - ['1 breakpoints deleted']) - - # Hit third kernel - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - @ordered_test(5) - def test_allocation_list_all3(self): - self.try_command('language renderscript allocation list', - [], - ['2:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(64, 64, 0\)\n' - ' Data Type: uchar4\n' - ' Data Kind: RGBA Pixel', - '7:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: short\n' - ' Data Kind: User', - '28:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: uint\n' - ' Data Kind: User', - '36:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: half\n' - ' Data Kind: User', - '37:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(12, 0, 0\)\n' - ' Data Type: half2\n' - ' Data Kind: User', - '38:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(1, 6, 0\)\n' - ' Data Type: half3\n' - ' Data Kind: User', - '39:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: half4\n' - ' Data Kind: User', - '40:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: float\n' - ' Data Kind: User', - '41:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(12, 0, 0\)\n' - ' Data Type: float2\n' - ' Data Kind: User', - '42:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(6, 0, 0\)\n' - ' Data Type: float3\n' - ' Data Kind: User', - '43:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(3, 2, 0\)\n' - ' Data Type: float4\n' - ' Data Kind: User', - '44:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: double\n' - ' Data Kind: User', - '45:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(4, 1, 3\)\n' - ' Data Type: double2\n' - ' Data Kind: User', - '46:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(1, 2, 3\)\n' - ' Data Type: double3\n' - ' Data Kind: User', - '47:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(1, 2, 3\)\n' - ' Data Type: double4\n' - ' Data Kind: User']) - - @wimpy - @ordered_test(6) - @java_only_test() - def test_allocation_list_all4(self): - self.try_command('breakpoint del 3', - ['1 breakpoints deleted']) - - # Hit last kernel - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('language renderscript allocation list', - [], - ['2:\n' - # Regex for non zero hex number - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(64, 64, 0\)\n' - ' Data Type: uchar4\n' - ' Data Kind: RGBA Pixel', - '7:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: short\n' - ' Data Kind: User', - '28:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: uint\n' - ' Data Kind: User', - '46:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(1, 2, 3\)\n' - ' Data Type: double3\n' - ' Data Kind: User', - '48:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: complexStruct\n' - ' Data Kind: User', - '49:\n' - ' Context: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Address: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Data pointer: 0x0*[1-9a-fA-F][0-9a-fA-F]*\n' - ' Dimensions: \(24, 0, 0\)\n' - ' Data Type: complexStruct\n' - ' Data Kind: User']) - - @ordered_test(7) - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 3', ['1 breakpoints deleted']) - - self.try_command('process continue', - ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_backtrace.py b/tests/lldb/tests/testcases/test_backtrace.py deleted file mode 100644 index 1e5e79f7..00000000 --- a/tests/lldb/tests/testcases/test_backtrace.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestBacktrace.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - cpp_only_test, -) - - -class TestBacktrace(TestBaseRemote): - '''Tests breaking on a kernel and a function, and viewing the call stack.''' - - bundle_target = { - 'java': 'BranchingFunCalls', - 'jni': 'JNIBranchingFunCalls', - 'cpp': 'CppBranchingFunCalls' - } - - def test_kernel_backtrace(self): - # pylint: disable=line-too-long - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('language renderscript kernel breakpoint set simple_kernel', - ['Breakpoint(s) created', - '(pending)']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('bt', - ['stop reason = breakpoint', - # We should be able to see three functions in bt: - # libRSCpuRef, kernel.expand and the kernel - 'frame #2:', - 'librs.scalars.so', - 'simple_kernel'], - [r'scalars\.rs:6[123]']) - - self.try_command('breakpoint delete 1', - ['1 breakpoints deleted']) - - self.try_command('b set_i', - ['Breakpoint 2', - 'set_i'], - [r'scalars\.rs:3[678]']) - - self.try_command('breakpoint list', - ['set_i', 'resolved']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('bt', - ['stop reason = breakpoint', - # We should be able to see five functions in bt: - # libRSCpuRef, kernel.expand, kernel and two functions - 'frame #4:', - 'librs.scalars.so', - 'modify_i', - 'set_i'], - [r'scalars\.rs:3[678]']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 2', - ['1 breakpoints deleted']) - - self.try_command('process continue', - ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_breakpoint_coordinate.py b/tests/lldb/tests/testcases/test_breakpoint_coordinate.py deleted file mode 100644 index 319d4f20..00000000 --- a/tests/lldb/tests/testcases/test_breakpoint_coordinate.py +++ /dev/null @@ -1,177 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestBreakpointCoordinate.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - wimpy, - ordered_test, - cpp_only_test, -) -from harness.assert_mixins import CoordinateAssertionsMixin - - -class TestBreakpointCoordinate(TestBaseRemote, CoordinateAssertionsMixin): - '''Tests breaking on a specific kernel invocation. - - Uses the -c option to specify the coordinate. - ''' - - bundle_target = { - 'java': 'Allocations', - 'jni': 'JNIAllocations', - 'cpp': 'CppAllocations' - } - - def setup(self, android): - '''This test requires to be run on one thread. - - Args: - android: The android_util module. - ''' - android.push_prop('debug.rs.max-threads', 1) - - def teardown(self, android): - '''Reset the number of RS threads to the previous value. - - Args: - android: The android_util module. - ''' - android.pop_prop('debug.rs.max-threads') - - @wimpy - @ordered_test(0) - def test_breakpoint_coordinate_2d_swizzle_kernel(self): - # pylint: disable=line-too-long - - # test conditional coordinate in two dimensions - # breakpoint 1 - self.assert_coord_bp_set('swizzle_kernel', 3, 7) - - # we will delete this breakpoint before we hit it. - # breakpoint 2 - self.assert_coord_bp_set('swizzle_kernel', 199, 190) - - self.assert_coord_stop('allocs', 'swizzle_kernel', x=3, y=7) - - # check breakpoints that have been hit are disabled - self.try_command( - 'breakpoint list', - [ - "1: RenderScript kernel breakpoint for 'swizzle_kernel', locations = 1 Options: disabled", - "2: RenderScript kernel breakpoint for 'swizzle_kernel', locations = 1" - ] - ) - - # delete breakpoint on 199,199,0 - self.try_command('breakpoint delete 2', ['1 breakpoints deleted']) - - # check breakpoints that have been hit are disabled - self.try_command( - 'breakpoint list', - ["1: RenderScript kernel breakpoint for 'swizzle_kernel', locations = 1 Options: disabled"] - ) - - # test conditional coordinate in a single dimension - # breakpoint 3 - self.assert_coord_bp_set('square_kernel', 8) - - # check breakpoints that have been hit are disabled - self.try_command( - 'breakpoint list', - [ - "1: RenderScript kernel breakpoint for 'swizzle_kernel', locations = 1 Options: disabled", - "3: RenderScript kernel breakpoint for 'square_kernel', locations = 1" - ] - ) - - self.assert_coord_stop('allocs', 'square_kernel', x=8) - - # check breakpoints that have been hit are disabled - self.try_command( - 'breakpoint list', - [ - "1: RenderScript kernel breakpoint for 'swizzle_kernel', locations = 1 Options: disabled", - "3: RenderScript kernel breakpoint for 'square_kernel', locations = 1 Options: disabled" - ] - ) - - @wimpy - @ordered_test(1) - def test_breakpoint_coordinate_3d_add_half_kernel(self): - # test conditional coordinate in three dimensions - # breakpoint 4 - self.assert_coord_bp_set('add_half_kernel', 0, 0, 1) - # test we can set more than one conditional kernel breakpoint - # and both will be hit; - # breakpoint 5 - self.assert_coord_bp_set('add_half_kernel', 0, 1, 2) - - # Now assert that the next two continue/stop cycles hit our conditionals - self.assert_coord_stop('allocs', 'add_half_kernel', x=0, y=0, z=1) - self.assert_coord_stop('allocs', 'add_half_kernel', x=0, y=1, z=2) - - # check we can see the coordinate from a function invoked by the kernel - # breakpoint 6 - self.try_command( - 'break set -n half_helper', - ['librs.allocs.so`half_helper'] - ) - - # continue till we hit breakpoint 6 - self.assert_coord_stop('allocs', 'half_helper', x=0, y=1, z=2) - - self.try_command( - 'breakpoint list', - [ - "1: RenderScript kernel breakpoint for 'swizzle_kernel', locations = 1 Options: disabled", - "3: RenderScript kernel breakpoint for 'square_kernel', locations = 1 Options: disabled", - "4: RenderScript kernel breakpoint for 'add_half_kernel', locations = 1 Options: disabled", - "5: RenderScript kernel breakpoint for 'add_half_kernel', locations = 1 Options: disabled", - "6: name = 'half_helper', locations = 1, resolved = 1, hit count = 1" - ] - ) - - self.try_command('breakpoint delete 3', ['1 breakpoints deleted']) - - self.try_command( - 'breakpoint list', - [ - "1: RenderScript kernel breakpoint for 'swizzle_kernel', locations = 1 Options: disabled", - "4: RenderScript kernel breakpoint for 'add_half_kernel', locations = 1 Options: disabled", - "5: RenderScript kernel breakpoint for 'add_half_kernel', locations = 1 Options: disabled", - "6: name = 'half_helper', locations = 1, resolved = 1, hit count = 1" - ] - ) - - self.try_command('breakpoint delete 6', ['1 breakpoints deleted']) - - self.try_command( - 'breakpoint list', - [ - "1: RenderScript kernel breakpoint for 'swizzle_kernel', locations = 1 Options: disabled", - "4: RenderScript kernel breakpoint for 'add_half_kernel', locations = 1 Options: disabled", - "5: RenderScript kernel breakpoint for 'add_half_kernel', locations = 1 Options: disabled" - ] - ) - - @cpp_only_test() - @ordered_test('last') - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 4', ['1 breakpoints deleted']) - self.try_command('breakpoint delete 5', ['1 breakpoints deleted']) - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_breakpoint_fileline.py b/tests/lldb/tests/testcases/test_breakpoint_fileline.py deleted file mode 100644 index be89ca94..00000000 --- a/tests/lldb/tests/testcases/test_breakpoint_fileline.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestBreakpointFileLine.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - cpp_only_test, - ordered_test -) - - -class TestBreakpointFileLine(TestBaseRemote): - '''Tests the setting of a breakpoint on a specific line of a RS file.''' - - bundle_target = { - 'java': 'JavaDebugWaitAttach', - 'jni': 'JNIDebugWaitAttach', - 'cpp': 'CppDebugWaitAttach' - } - - @ordered_test(0) - def test_breakpoint_fileline(self): - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('breakpoint set --file simple.rs --line 28', - ['(pending)']) - - self.try_command('process continue', - []) - - self.try_command('bt', - ['librs.simple.so', - 'simple_kernel', - 'stop reason = breakpoint']) - - self.try_command('breakpoint list', - ['simple.rs', - 'resolved = 1']) - - self.try_command('process status', - ['stopped', - 'stop reason = breakpoint']) - - self.try_command('breakpoint delete 1', - ['1 breakpoints deleted']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_breakpoint_fileline_multiple_rs_files.py b/tests/lldb/tests/testcases/test_breakpoint_fileline_multiple_rs_files.py deleted file mode 100644 index 9ba4283a..00000000 --- a/tests/lldb/tests/testcases/test_breakpoint_fileline_multiple_rs_files.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestBreakpointFileLineMultipleRSFiles.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - cpp_only_test, - ordered_test -) - - -class TestBreakpointFileLineMultipleRSFiles(TestBaseRemote): - '''Tests the setting of a breakpoint on one of multiple RS files.''' - - bundle_target = { - 'java': 'MultipleRSFiles', - 'jni': 'JNIMultipleRSFiles', - 'cpp': 'CppMultipleRSFiles' - } - - def _binary_name(self): - return { - 'java': 'multiplersfiles', - 'jni': 'multiplersfiles', - 'cpp': 'CppMultipleRSFi' - }[self.app_type] - - @ordered_test(0) - def test_breakpoint_fileline_multiple_files(self): - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('breakpoint set --file first.rs --line 28', - ['(pending)']) - - self.try_command('process continue', - ['stopped', - 'librs.first.so`first_kernel', - 'at first.rs:28', - "name = '%s'" % self._binary_name(), - 'stop reason = breakpoint 1']) - - self.try_command('breakpoint set --file second.rs --line 23', - ['Breakpoint 2', - 'librs.second.so`second_kernel', - 'second.rs:23']) - - self.try_command('breakpoint list', - ['first.rs', - 'second.rs', - 'resolved = 1', - 'first.rs:28', - 'second.rs:23']) - - self.try_command('breakpoint delete 1', - ['1 breakpoints deleted']) - - self.try_command('process continue', - ['stopped', - 'librs.second.so`second_kernel', - 'at second.rs:23', - "name = '%s'" % self._binary_name(), - 'stop reason = breakpoint 2']) - - self.try_command('process status', - ['stopped', - 'stop reason = breakpoint']) - - @cpp_only_test() - @ordered_test('last') - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 2', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_breakpoint_kernel_1.py b/tests/lldb/tests/testcases/test_breakpoint_kernel_1.py deleted file mode 100644 index a501b66c..00000000 --- a/tests/lldb/tests/testcases/test_breakpoint_kernel_1.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestBreakpointKernel1.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - cpp_only_test -) - - -class TestBreakpointKernel1(TestBaseRemote): - '''Tests the setting of a breakpoint on a RS kernel.''' - - bundle_target = { - 'java': 'JavaDebugWaitAttach', - 'jni': 'JNIDebugWaitAttach', - 'cpp': 'CppDebugWaitAttach' - } - - @ordered_test(0) - def test_breakpoint_set_nonexistent_kernel(self): - # pylint: disable=line-too-long - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('language renderscript kernel breakpoint set simple_kernel', - ['Breakpoint(s) created', - '(pending)']) - - # Try set a breakpoint on a kernel which doesn't exist - self.try_command('language renderscript kernel breakpoint set imaginary_kernel', - ['Breakpoint(s) created', - '(pending)']) - - self.try_command('breakpoint list', - ["'simple_kernel', locations = 0 (pending)", - "'imaginary_kernel', locations = 0 (pending)"]) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('bt', - ['stop reason = breakpoint', - 'frame #0:', - 'librs.simple.so', - 'simple_kernel']) - - self.try_command('breakpoint list', - ["'imaginary_kernel', locations = 0 (pending)", - "'simple_kernel', locations = 1, resolved = 1"]) - - @ordered_test(1) - def test_breakpoint_delete_nonexistent_kernel(self): - # Delete breakpoint on kernel which doesn't exist - self.try_command('breakpoint delete 2', - ['1 breakpoints deleted']) - - self.try_command('breakpoint list', - ["'simple_kernel', locations = 1, resolved = 1"]) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('breakpoint list', - ["'simple_kernel', locations = 1, resolved = 1"]) - - self.try_command('breakpoint delete 1', - ['1 breakpoints deleted']) - - self.try_command('breakpoint list', - ['No breakpoints currently set']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_breakpoint_kernel_2.py b/tests/lldb/tests/testcases/test_breakpoint_kernel_2.py deleted file mode 100644 index 6dea13fb..00000000 --- a/tests/lldb/tests/testcases/test_breakpoint_kernel_2.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestBreakpointKernel2.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote - - -class TestBreakpointKernel2(TestBaseRemote): - '''Tests the setting of a breakpoint on a RS kernel.''' - - bundle_target = { - 'java': 'JavaInfiniteLoop', - 'jni': 'JNIInfiniteLoop', - 'cpp': 'CppInfiniteLoop' - } - - def test_breakpoint_resolution_simple_kernel(self): - # pylint: disable=line-too-long - self.try_command('language renderscript kernel breakpoint set simple_kernel', - ['Breakpoint(s) created']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('bt', - ['stop reason = breakpoint', - 'frame #0:', - 'librs.infiniteloop.so', - 'simple_kernel'], - [r'infiniteloop\.rs:4[34]']) - - self.try_command('breakpoint list', - ['simple_kernel', - 'resolved = 1']) - - self.try_command('process status', - ['stopped', - '.so`simple_kernel', - 'stop reason = breakpoint']) diff --git a/tests/lldb/tests/testcases/test_breakpoint_kernel_all.py b/tests/lldb/tests/testcases/test_breakpoint_kernel_all.py deleted file mode 100644 index ea988e4d..00000000 --- a/tests/lldb/tests/testcases/test_breakpoint_kernel_all.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestBreakpointKernelAll.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - cpp_only_test, -) - - -class TestBreakpointKernelAll(TestBaseRemote): - '''Tests setting breakpoints on every RS kernel.''' - - bundle_target = { - 'java': 'JavaDebugWaitAttach', - 'jni': 'JNIDebugWaitAttach', - 'cpp': 'CppDebugWaitAttach' - } - - @ordered_test(0) - def test_kernel_breakpoint_all_unloaded_kernels(self): - # Test command works with no kernels currently loaded - self.try_command('language renderscript kernel breakpoint all enable', - ['Breakpoints will be set on all kernels']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('breakpoint list', - ["'simple_kernel', locations = 1, resolved = 1", - "'other_kernel', locations = 1, resolved = 1"]) - - # Check disable doesn't delete breakpoints - self.try_command('language renderscript kernel breakpoint all disable', - ['Breakpoints will not be set on any new kernels']) - - # Delete all breakpoints manually - self.try_command('breakpoint delete 1', - ['1 breakpoints deleted']) - - self.try_command('breakpoint delete 2', - ['1 breakpoints deleted']) - - self.try_command('breakpoint list', - ["No breakpoints currently set"]) - - # Test command works when kernels are loaded - self.try_command('language renderscript kernel breakpoint all enable', - ['Breakpoints will be set on all kernels']) - - self.try_command('breakpoint list', - ["'simple_kernel', locations = 1, resolved = 1", - "'other_kernel', locations = 1, resolved = 1"]) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('breakpoint delete 3', - ['1 breakpoints deleted']) - - # Check other_kernel breakpoint gets hit - self.try_command('breakpoint list', - ["'other_kernel', locations = 1, resolved = 1"]) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 4', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_breakpoint_kernel_all_multiple_rs_files.py b/tests/lldb/tests/testcases/test_breakpoint_kernel_all_multiple_rs_files.py deleted file mode 100644 index 675b5a33..00000000 --- a/tests/lldb/tests/testcases/test_breakpoint_kernel_all_multiple_rs_files.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestBreakpointKernelAllMultipleRSFiles.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - wimpy, - ordered_test, - cpp_only_test, -) - - -class TestBreakpointKernelAllMultipleRSFiles(TestBaseRemote): - '''Tests setting breakpoints on every RS kernel in multiple kernel files.''' - - bundle_target = { - 'java': 'MultipleRSFiles', - 'jni': 'JNIMultipleRSFiles', - 'cpp': 'CppMultipleRSFiles' - } - - @ordered_test(0) - def test_deferred_breakpoint_resolution(self): - # Test command works with no kernels currently loaded - self.try_command('language renderscript kernel breakpoint all enable', - ['Breakpoints will be set on all kernels']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('breakpoint list', - ["'first_kernel', locations = 1, resolved = 1", - "'second_kernel', locations = 1, resolved = 1"]) - - @ordered_test(1) - def test_disable_all_kernel_breakpoint_doesnt_delete_breakpoints(self): - # Check disable doesn't delete breakpoints - self.try_command('language renderscript kernel breakpoint all disable', - ['Breakpoints will not be set on any new kernels']) - - # Delete all breakpoints manually - self.try_command('breakpoint delete 1', - ['1 breakpoints deleted']) - - self.try_command('breakpoint delete 2', - ['1 breakpoints deleted']) - - self.try_command('breakpoint list', - ["No breakpoints currently set"]) - - @ordered_test(2) - def test_enable_breakpoint_on_loaded_kernels(self): - # Test command works when kernels are loaded - self.try_command('language renderscript kernel breakpoint all enable', - ['Breakpoints will be set on all kernels']) - - self.try_command('breakpoint list', - ["'first_kernel', locations = 1, resolved = 1", - "'second_kernel', locations = 1, resolved = 1"]) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - self.try_command('breakpoint delete 3', - ['1 breakpoints deleted']) - - # Check other_kernel breakpoint gets hit - self.try_command('breakpoint list', - ["'second_kernel', locations = 1, resolved = 1"]) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 4', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) - diff --git a/tests/lldb/tests/testcases/test_breakpoint_kernel_multiple_rs_files.py b/tests/lldb/tests/testcases/test_breakpoint_kernel_multiple_rs_files.py deleted file mode 100644 index 5cb29d0b..00000000 --- a/tests/lldb/tests/testcases/test_breakpoint_kernel_multiple_rs_files.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestBreakpointKernelMultipleRSFiles.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - cpp_only_test -) - - -class TestBreakpointKernelMultipleRSFiles(TestBaseRemote): - '''Tests the setting of a breakpoint on RS kernels in multiple files.''' - - bundle_target = { - 'java': 'MultipleRSFiles', - 'jni': 'JNIMultipleRSFiles', - 'cpp': 'CppMultipleRSFiles' - } - - def _binary_name(self): - return { - 'java': 'multiplersfiles', - 'jni': 'multiplersfiles', - 'cpp': 'CppMultipleRSFi' - }[self.app_type] - - def test_kernel_breakpoint_multiple_rs_files(self): - # pylint: disable=line-too-long - self.try_command('language renderscript kernel breakpoint set first_kernel', - ['Breakpoint(s) created', - '(pending)']) - - self.try_command('breakpoint list', - ["'first_kernel', locations = 0 (pending)"]) - - self.try_command('process continue', - ['stopped', - 'librs.first.so`first_kernel', - "name = '%s'" % self._binary_name(), - 'stop reason = breakpoint 1'], - [r'at first\.rs:2[678]']) - - self.try_command('breakpoint list', - ["'first_kernel', locations = 1, resolved = 1"]) - - self.try_command('language renderscript kernel breakpoint set second_kernel', - ['Breakpoint(s) created', - 'Breakpoint 2', - 'Breakpoint(s) created'], - [r"librs\.second\.so`second_kernel at second\.rs:2[012]",]) - - self.try_command('breakpoint list', - ["'first_kernel', locations = 1, resolved = 1", - "'second_kernel', locations = 1, resolved = 1"]) - - self.try_command('breakpoint delete 1', - ['1 breakpoints deleted']) - - self.try_command('breakpoint list', - ["'second_kernel', locations = 1, resolved = 1"]) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint', - "librs.second.so`second_kernel"], - [r'second\.rs:2[012]']) - - self.try_command('breakpoint delete 2', - ['1 breakpoints deleted']) - - self.try_command('breakpoint list', - ['No breakpoints currently set']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_call_api_funs.py b/tests/lldb/tests/testcases/test_call_api_funs.py deleted file mode 100644 index d94df3a9..00000000 --- a/tests/lldb/tests/testcases/test_call_api_funs.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestCallApiFuns.''' - -from __future__ import absolute_import - -import re -import string - -from harness.test_base_remote import TestBaseRemote -from harness import RS_funs -from harness.decorators import ( - wimpy, - ordered_test, - cpp_only_test, -) - - -class _APIFunsExprTestsMeta(type): - """ - Generate unique, standalone test methods from a list of lldb expressions. - The lldb expression evaluation engine for calling RenderScript - builtins need to be tested thoroughly; rather than manually - write the 1000s of individual test cases, we automatically generate them - and their variants to add to the test class. This is done from a list - of expressions that are all tested in the same way. - """ - def __new__(self, name, bases, class_dict): - func_name_sub = re.compile(r'[%s\s]+' % string.punctuation) - - for count, line in enumerate(RS_funs.FUNC_LIST): - def make_test(line): - """ - We use an extra level of indirection here to properly - close over the *value* of the loop variable, `line` - """ - @ordered_test(count) - def test(self): - # build the expression - ret, expr = RS_funs.build_expr(line) - try: - # evaluate the expression with expected return value - self.try_command(expr, [], [RS_funs.TYPE_MAP[ret]]) - except KeyError: - # or just check the return type if no return value - # specified - self.try_command(expr, '(%s)' % ret) - return test - - # Make a pretty python method that adheres to the testcase standard - # Use the `count` parameter to ensure the name is unique in the class - test_name = 'test_%s_%s' % (re.sub(func_name_sub, '_', line), count) - test = make_test(line) - test.func_name = test_name - # We mark every 10th test case as runnable in wimpy mode - class_dict[test_name] = wimpy(test) if count % 10 == 0 else test - - return type(name, bases, class_dict) - - -class TestCallApiFuns(TestBaseRemote): - '''Tests calling of some RS API functions. This tests that JITing works.''' - - __metaclass__ = _APIFunsExprTestsMeta - - bundle_target = { - 'java': "KernelVariables", - 'jni': "JNIKernelVariables", - 'cpp': "CppKernelVariables" - } - - @wimpy - @ordered_test(-2) - def test_setup(self): - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('b -f simple.rs -l 145', []) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - @wimpy - @ordered_test(-1) - def test_call_api_funs_atomic(self): - # Test the atomics separately because we want to check the output - # AtomicAdd(1234, 2) - self.try_command('expr rsAtomicAdd(&int_global, 2)', - ['1234'], - [r'\(int(32_t)?\)']) - - self.try_command('expr int_global', - ['(int)', - '1236']) - - # AtomicAnd(2345, 333) - self.try_command('expr rsAtomicAnd(&uint_global, 333)', - ['2345'], - [r'\(int(32_t)?\)']) - - self.try_command('expr uint_global', - ['(uint)', - '265']) - - # AtomicCas(1236, 1236, 2345) - self.try_command('expr rsAtomicCas(&int_global, 1236, 2345)', - ['1236'], - [r'\(int(32_t)?\)']) - - self.try_command('expr int_global', - ['(int)', - '2345']) - - # AtomicDec(265) - self.try_command('expr rsAtomicDec(&uint_global)', - ['265'], - [r'\(int(32_t)?\)']) - - self.try_command('expr uint_global', - ['(uint)', - '264']) - - # AtomicInc(2345) - self.try_command('expr rsAtomicInc(&int_global)', - ['2345'], - [r'\(int(32_t)?\)']) - - self.try_command('expr int_global', - ['(int)', - '2346']) - - # AtomicMax(264, 3456) - self.try_command('expr rsAtomicMax(&uint_global, 3456)', - ['264'], - [r'\(uint(32_t)?\)']) - - self.try_command('expr uint_global', - ['(uint)', - '3456']) - - # AtomicMin(2346, 3) - self.try_command('expr rsAtomicMin(&int_global, 3)', - ['2346'], - [r'\(int(32_t)?\)']) - - self.try_command('expr int_global', - ['(int)', - '3']) - - # AtomicOr(3, 456) - self.try_command('expr rsAtomicOr(&int_global, 456)', - ['3'], - [r'\(int(32_t)?\)']) - - self.try_command('expr int_global', - ['(int)', - '459']) - - # AtomicSub(3456, 7) - self.try_command('expr rsAtomicSub(&uint_global, 7)', - ['3456'], - [r'\(int(32_t)?\)']) - - self.try_command('expr uint_global', - ['(uint)', - '3449']) - - # AtomicXor(459, 89) - self.try_command('expr rsAtomicXor(&int_global, 89)', - ['459'], - [r'\(int(32_t)?\)']) - - self.try_command('expr int_global', - ['(int)', - '402']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_coordinates.py b/tests/lldb/tests/testcases/test_coordinates.py deleted file mode 100644 index 86807952..00000000 --- a/tests/lldb/tests/testcases/test_coordinates.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestCoordinates.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - cpp_only_test -) - - -class TestCoordinates(TestBaseRemote): - '''Tests the inspection of coordinates. - - Tests the inspection of the range and dimension of coordinates as well - as the current coordinates.''' - - bundle_target = { - 'java': 'JavaDebugWaitAttach', - 'jni': 'JNIDebugWaitAttach', - 'cpp': 'CppDebugWaitAttach' - } - - def setup(self, android): - '''This test requires to be run on one thread. - - Args: - android: The android_util module. - ''' - android.push_prop('debug.rs.max-threads', 1) - - def teardown(self, android): - '''Reset the number of RS threads to the previous value. - - Args: - android: The android_util module. - ''' - android.pop_prop('debug.rs.max-threads') - - @ordered_test(0) - def test_inspect_coordinates(self): - # pylint: disable=line-too-long - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('language renderscript kernel breakpoint set simple_kernel', - ['Breakpoint(s) created', - '(pending)']) - - # Check the initial conditions. - self._lldb_continue() - self._inspect_coordinates(0, 0, 0) - - # Check two more steps. - self._lldb_continue() - self._inspect_coordinates(1, 0, 0) - self._lldb_continue() - self._inspect_coordinates(2, 0, 0) - - # After eight more steps we should have advanced one step in the y dimension. - for _ in range(8): - self._lldb_continue() - self._inspect_coordinates(2, 1, 0) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('process continue', - ['exited with status = 0']) - - def _lldb_continue(self): - '''Try 'continue' lldb command. Expect to hit a breakpoint.''' - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - def _inspect_coordinates(self, x_coord, y_coord, z_coord): - '''Run lldb commands to inspect kernel size and coordinates - and match against expected values. - - Args: - (x_coord, y_coord, z_coord): The expected coordinates (int triple) - - Raises: - TestFail: One of the lldb commands did not provide the expected - output. - ''' - self.try_command('language renderscript kernel coordinate', - ['Coordinate: (%d, %d, %d)' - % (x_coord, y_coord, z_coord)]) - - self.try_command('frame select 1', - ['librs.simple.so`simple_kernel.expand', - 'at generated.rs:1']) - - # Inspect the invocation length, should be the same every time. - self.try_command('expr p->dim', - ['x = 8', - 'y = 8', - 'z = 0']) - - # The X coordinate is in the rsIndex variable. - self.try_command('expr rsIndex', - ['= ' + str(x_coord)]) - - # Inspect the Y and Z coordinates. - self.try_command('expr p->current', - ['x = ' + str(0), - 'y = ' + str(y_coord), - 'z = ' + str(z_coord)]) diff --git a/tests/lldb/tests/testcases/test_dwarf_lang.py b/tests/lldb/tests/testcases/test_dwarf_lang.py deleted file mode 100644 index 08cf8599..00000000 --- a/tests/lldb/tests/testcases/test_dwarf_lang.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test DWARF language attribute test.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote - - -class TestDWARFLang(TestBaseRemote): - '''Tests the DWARF language attribute is present in RenderScript kernels.''' - - bundle_target = { - 'java': 'JavaDebugWaitAttach', - 'jni': 'JNIDebugWaitAttach', - 'cpp': 'CppDebugWaitAttach' - } - - def test_renderscript_kernel_frame_dwarf_language(self): - self.try_command('language renderscript status', []) - self.try_command('b simple_kernel', []) - self.try_command('process continue', []) - - self.assert_lang_renderscript() diff --git a/tests/lldb/tests/testcases/test_invoke_fun.py b/tests/lldb/tests/testcases/test_invoke_fun.py deleted file mode 100644 index 26485582..00000000 --- a/tests/lldb/tests/testcases/test_invoke_fun.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestInvokeFun.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - cpp_only_test -) - - -class TestInvokeFun(TestBaseRemote): - '''Tests debugging a function executed from Java using invoke_*.''' - - bundle_target = { - 'java': 'BranchingFunCalls', - 'jni': 'JNIBranchingFunCalls', - 'cpp': 'CppBranchingFunCalls' - } - - def test_invoke_fun(self): - # pylint: disable=line-too-long - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('breakpoint set --name addToGlobal', - ['Breakpoint 1', '(pending)']) - - self.try_command('process continue', - ['stopped', - 'stop reason = breakpoint'], - [r'scalars\.rs:7[345]']) - - self.try_command('language renderscript kernel breakpoint set simple_kernel', - ['Breakpoint 2', 'Breakpoint(s) created']) - - self.try_command('process continue', - ['stopped', - 'stop reason = breakpoint', - 'simple_kernel'], - [r'scalars\.rs:6[123]']) - - self.try_command('expr glob', - ['(int)', - '357']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('breakpoint delete 2', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_language.py b/tests/lldb/tests/testcases/test_language.py deleted file mode 100644 index 84327810..00000000 --- a/tests/lldb/tests/testcases/test_language.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestLanguage.''' - -from __future__ import absolute_import - -from harness.test_base import TestBaseNoTargetProcess - - -class TestLanguage(TestBaseNoTargetProcess): - ''' - Tests the "language" command and "language renderscript" subcommand. - ''' - - def test_lldb_has_language_commands(self): - ci = self._ci - self.assert_true( - ci.HasCommands() and - ci.CommandExists('language') - ) - - self.try_command('language', ['renderscript']) - self.try_command('language renderscript', ['kernel', - 'context', - 'module', - 'status']) - diff --git a/tests/lldb/tests/testcases/test_language_subcmds.py b/tests/lldb/tests/testcases/test_language_subcmds.py deleted file mode 100644 index 67c8bd17..00000000 --- a/tests/lldb/tests/testcases/test_language_subcmds.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestLanguageSubcmds.''' - -from __future__ import absolute_import - -import os - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - cpp_only_test, - ordered_test, -) - - -class TestLanguageSubcmds(TestBaseRemote): - '''Tests the 'language renderscript' subcommands.''' - - bundle_target = { - 'java': 'JavaDebugWaitAttach', - 'jni': 'JNIDebugWaitAttach', - 'cpp': 'CppDebugWaitAttach' - } - - def setup(self, android): - '''This test requires to be run on one thread.''' - android.push_prop('debug.rs.max-threads', 1) - - def teardown(self, android): - '''Reset the number of RS threads to the previous value.''' - android.pop_prop('debug.rs.max-threads') - - def _pkg_name(self): - return { - 'java': 'com.android.rs.waitattachdebug', - 'jni': 'com.android.rs.jnidebugwaitattach', - 'cpp': 'com.android.rs.cppwaitattach' - }[self.app_type] - - def test_language_subcommands(self): - self.try_command('language', - []) - - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered', - 'Runtime functions hooked', - 'rsdAllocationInit', - 'rsdAllocationRead2D', - 'rsdScriptInit', - 'rsdScriptInvokeForEach', - 'rsdScriptInvokeForEachMulti', - 'rsdScriptSetGlobalVar']) - - self.try_command('breakpoint set --file simple.rs --line 28', - ['(pending)']) - - self.try_command('process continue', - []) - - self.try_command('language renderscript kernel', - ['breakpoint', - 'coordinate', - 'list']) - - self.try_command('language renderscript kernel breakpoint', - ['all', - 'set']) - - self.try_command('language renderscript kernel list', - ['RenderScript Kernels', - "Resource 'simple'", - 'root', - 'simple_kernel']) - - self.try_command('language renderscript kernel coordinate', - ['Coordinate: (0, 0, 0)']) - - self.try_command('language renderscript context', - ['dump']) - - self.try_command('language renderscript context dump', - ['Inferred RenderScript Contexts', - '1 script instances']) - - self.try_command('language renderscript allocation', - ['list', - 'load', - 'save', - 'dump', - 'refresh']) - - self.try_command('language renderscript allocation list', - ['RenderScript Allocations:']) - - self.try_command('language renderscript allocation list -i 0', - ['RenderScript Allocations:']) - - self.try_command('language renderscript allocation list --id 0', - ['RenderScript Allocations:']) - - self.try_command('language renderscript allocation dump 1', - ['Data (X, Y, Z):']) - - output_file = self.get_tmp_file_path() - self.try_command('language renderscript allocation dump 1 -f ' + - output_file, - ["Results written to '%s'" % output_file]) - - if os.path.isfile(output_file): - os.remove(output_file) - - self.try_command('language renderscript allocation dump 1 --file ' + - output_file, - ["Results written to '%s'" % output_file]) - - self.try_command('language renderscript allocation save 1 ' + - output_file, - ["Allocation written to file '%s'" % output_file]) - - self.try_command('language renderscript allocation load 1 ' + - output_file, - ["Contents of file '%s' read into allocation 1" % - output_file]) - - self.try_command('language renderscript allocation refresh', - ['All allocations successfully recomputed']) - - self.try_command('language renderscript module', - ['dump']) - - self.try_command('language renderscript module dump', - ['RenderScript Modules:', - 'librs.simple.so', - 'Debug info loaded', - 'Globals: 1', - 'gColor - float4', - 'Kernels: 3', - 'root', - 'simple_kernel', - 'other_kernel', - 'java_package_name: %s' % self._pkg_name(), - 'version:']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_language_subcmds_no_debug.py b/tests/lldb/tests/testcases/test_language_subcmds_no_debug.py deleted file mode 100644 index c57343de..00000000 --- a/tests/lldb/tests/testcases/test_language_subcmds_no_debug.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestLanguageSubcmdsNoDebug.''' - -from __future__ import absolute_import - -import os - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - cpp_only_test, - ordered_test, -) - - -class TestLanguageSubcmdsNoDebug(TestBaseRemote): - '''Tests the 'language renderscript' subcommands without debug info. - - In particular, module dump should report missing debug info. - ''' - - bundle_target = { - 'java': 'JavaNoDebugWaitAttach', - 'jni': 'JNINoDebugWaitAttach', - 'cpp': 'CppNoDebugWaitAttach' - } - - def _pkg_name(self): - return { - 'java': 'com.android.rs.waitattachnodebug', - 'jni': 'com.android.rs.jninodebugwaitattach', - 'cpp': 'com.android.rs.cppwaitattach' - }[self.app_type] - - @ordered_test(0) - def test_language_subcommands_no_debug(self): - # pylint: disable=line-too-long - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('language renderscript kernel breakpoint set simple_kernel' - '', - ['(pending)']) - - self.try_command('process continue', - []) - - self.try_command('language renderscript kernel', - ['breakpoint', - 'coordinate', - 'list']) - - self.try_command('language renderscript kernel list', - ['RenderScript Kernels', - "Resource 'simple'", - 'root', - 'simple_kernel']) - - self.try_command('language renderscript context', - ['dump']) - - self.try_command('language renderscript context dump', - ['Inferred RenderScript Contexts', - '1 script instances']) - - self.try_command('language renderscript allocation', - ['list', - 'load', - 'save', - 'dump', - 'refresh']) - - self.try_command('language renderscript allocation list', - ['RenderScript Allocations:']) - - self.try_command('language renderscript allocation list -i 0', - ['RenderScript Allocations:']) - - self.try_command('language renderscript allocation list --id 0', - ['RenderScript Allocations:']) - - self.try_command('language renderscript allocation dump 1', - ['Data (X, Y, Z):']) - - output_file = self.get_tmp_file_path() - self.try_command('language renderscript allocation dump 1 -f ' + - output_file, - ["Results written to '%s'" % output_file]) - - if os.path.isfile(output_file): - os.remove(output_file) - - self.try_command('language renderscript allocation dump 1 --file ' + - output_file, - ["Results written to '%s'" % output_file]) - - self.try_command('language renderscript allocation save 1 ' + - output_file, - ["Allocation written to file '%s'" % output_file]) - - self.try_command('language renderscript allocation load 1 ' + - output_file, - ["Contents of file '%s' read into allocation 1" % - output_file]) - - self.try_command('language renderscript allocation refresh', - ['All allocations successfully recomputed']) - - # C++ tests have an additional kernel `other_kernel` - kernel_count = 3 if self.app_type == 'cpp' else 2 - self.try_command('language renderscript module', - ['dump']) - - self.try_command('language renderscript module dump', - ['RenderScript Modules:', - 'librs.simple.so', - 'Debug info does not exist.', - 'Globals: 1', - 'gColor - variable identified, but not found in ' - 'binary (symbol exists)', - 'Kernels: %s' % kernel_count, - 'root', - 'simple_kernel', - '', - 'java_package_name: %s' % self._pkg_name(), - 'version']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_multiple_rs_files.py b/tests/lldb/tests/testcases/test_multiple_rs_files.py deleted file mode 100644 index e0497f02..00000000 --- a/tests/lldb/tests/testcases/test_multiple_rs_files.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestMultipleRSFiles.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - cpp_only_test, -) - -class TestMultipleRSFiles(TestBaseRemote): - '''Tests some commands on an apk which has two rs files.''' - - bundle_target = { - 'java': 'MultipleRSFiles', - 'jni': 'JNIMultipleRSFiles', - 'cpp': 'CppMultipleRSFiles' - } - - def _binary_name(self): - return { - 'java': 'multiplersfiles', - 'jni': 'multiplersfiles', - 'cpp': 'CppMultipleRSFi' - }[self.app_type] - - def _pkg_name(self): - return { - 'java': 'com.android.rs.multiplersfiles', - 'jni': 'com.android.rs.jnimultiplersfiles', - 'cpp': 'com.android.rs.cppmultiplersfiles' - }[self.app_type] - - def test_multiple_rs_files(self): - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered', - 'Runtime functions hooked']) - - self.try_command('breakpoint set --file first.rs --line 28', - ['(pending)']) - - self.try_command('process continue', - ['stopped', - 'librs.first.so`first_kernel', - 'at first.rs:28', - "name = '%s'" % self._binary_name(), - 'stop reason = breakpoint 1']) - - self.try_command('language renderscript kernel list', - ['RenderScript Kernels', - "Resource 'first'", - "Resource 'second'", - 'root', - 'first_kernel', - 'second_kernel']) - - self.try_command('language renderscript context dump', - ['Inferred RenderScript Contexts', - '2 script instances']) - - self.try_command('language renderscript module dump', - ['RenderScript Modules:', - 'librs.first.so', - 'librs.second.so', - 'Debug info loaded', - 'Globals: 1', - 'gColor - float4', - 'Kernels: 2', - 'root', - 'first_kernel', - 'second_kernel', - 'java_package_name: %s' % self._pkg_name(), - 'version:']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_read_global.py b/tests/lldb/tests/testcases/test_read_global.py deleted file mode 100644 index 0a337e02..00000000 --- a/tests/lldb/tests/testcases/test_read_global.py +++ /dev/null @@ -1,344 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestReadGlobal.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - wimpy, - ordered_test, - cpp_only_test, -) - - -class TestReadGlobal(TestBaseRemote): - '''Tests inspecting global variables of all types.''' - - bundle_target = { - 'java': 'KernelVariables', - 'jni': 'JNIKernelVariables', - 'cpp': 'CppKernelVariables' - } - - def _try_inspecting_global(self, global_name, expected_output, - expected_regex=None): - '''Inspect a global and check for the output. - - Run the "expr" and "target variable" commands on a given global and - with a given output. (The commands should be equivalent.) - - Args: - global_name: String which is the name of the global to inspect. - expected_output: List of strings that should be found in the output. - expected_regex: List of regular expressions that should match lldb's - output. - - Raises: - TestFail: One of the lldb commands did not provide the expected - output. - ''' - self.try_command('expr ' + global_name, - expected_output, - expected_regex) - - self.try_command('target variable ' + global_name, - expected_output, - expected_regex) - - @wimpy - @ordered_test(0) - def test_setup(self): - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('b -f simple.rs -l 145', []) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - @wimpy - def test_list_script_globals(self): - # pylint: disable=line-too-long - - self.try_command('target variable', - ['Global variables for', - 'librs.simple.so', - "(uchar) uchar_global = '\\xea'", - '(short) short_global = -321', - '(ushort) ushort_global = 432', - '(int) int_global = 1234', - '(uint) uint_global = 2345', - '(float) float_global = 4.5', - '(ulong) ulong_global = 8888', - '(double) double_global = -456.5', - '(char2) char2_global = (11, -22)', - '(uchar2) uchar2_global = (0x21, 0x2c)', - '(short2) short2_global = (-555, 666)', - '(ushort2) ushort2_global = (777, 888)', - '(int2) int2_global = (999, -1111)', - '(uint2) uint2_global = (2222, 3333)', - '(float2) float2_global = (4.5, -5)', - '(long2) long2_global = (-4444, 5555)', - '(ulong2) ulong2_global = (6666, 7777)', - '(double2) double2_global = (88.5, -99)', - '(char3) char3_global = (11, -22, -33,', - '(uchar3) uchar3_global = (0x21, 0x2c, 0x37,', - '(short3) short3_global = (-555, 666, 777,', - '(ushort3) ushort3_global = (777, 888, 999,', - '(int3) int3_global = (999, -1111, 2222,', - '(uint3) uint3_global = (2222, 3333, 4444,', - '(float3) float3_global = (4.5, -5, -6.5,', - '(long3) long3_global = (-4444, 5555, 6666,', - '(ulong3) ulong3_global = (6666, 7777, 8888,', - '(double3) double3_global = (88.5, -99, 111.5,', - '(char4) char4_global = (55, 11, -22, -33)', - '(uchar4) uchar4_global = (0xde, 0x21, 0x2c, 0x37)', - '(short4) short4_global = (-444, -555, 666, 777)', - '(ushort4) ushort4_global = (666, 777, 888, 999)', - '(int4) int4_global = (888, 999, -1111, 2222)', - '(uint4) uint4_global = (1111, 2222, 3333, 4444)', - '(float4) float4_global = (3, 4.5, -5, -6.5)', - '(long4) long4_global = (-3333, -4444, 5555, 6666)', - '(ulong4) ulong4_global = (5555, 6666, 7777, 8888)', - '(double4) double4_global = (-77, 88.5, -99, 111.5)', - '(rs_matrix2x2) matrix2x2_global = (m = (1, 2.5, 3, 4.5))', - '(rs_matrix3x3) matrix3x3_global = {\n' - ' m = ([0] = 5, [1] = 6.5, [2] = 7, [3] = 8.5, [4] = 9, [5] = 1.5, [6] = 2, [7] = 3.5, [8] = 4)', - '(rs_matrix4x4) matrix4x4_global = {\n' - ' m = {\n' - ' [0] = 5.5\n' - ' [1] = 6\n' - ' [2] = 7.5\n' - ' [3] = 8\n' - ' [4] = 9\n' - ' [5] = 1.5\n' - ' [6] = 2\n' - ' [7] = 3.5\n' - ' [8] = 4.5\n' - ' [9] = 5.5\n' - ' [10] = 6.5\n' - ' [11] = 7\n' - ' [12] = 8\n' - ' [13] = 9.5\n' - ' [14] = 1.5\n' - ' [15] = 2.5\n' - ' }\n', - '(rs_quaternion) quaternion_global = (4.5, 5.5, 6, 3)'], - [r"\((signed )?char\) char_global = '\\f'", - r'\((long )?long\) long_global = -77777']) - - @wimpy - def test_read_char_global(self): - # Use expr to inspect locals - self._try_inspecting_global('char_global', - ["'\\f'"], - [r'\((signed )?char\)']) - - def test_read_primitive_global(self): - self._try_inspecting_global('uchar_global', - ['(uchar)', "'\\xea'"]) - - self._try_inspecting_global('short_global', - ['(short)', '-321']) - - self._try_inspecting_global('ushort_global', - ['(ushort)', '432']) - - self._try_inspecting_global('int_global', - ['(int)', '1234']) - - self._try_inspecting_global('uint_global', - ['(uint)', '2345']) - - self._try_inspecting_global('float_global', - ['(float)', '4.5']) - - self._try_inspecting_global('long_global', - ['-77777'], - [r'\((long )?long\)']) - - self._try_inspecting_global('ulong_global', - ['(ulong)', '8888']) - - self._try_inspecting_global('double_global', - ['(double)', '-456.5']) - - self._try_inspecting_global('char2_global', - ['(char2)', '(11, -22)']) - - @wimpy - def test_write_global2(self): - self._try_inspecting_global('uchar2_global', - ['(uchar2)', '(0x21, 0x2c)']) - - def test_write_global3(self): - self._try_inspecting_global('short2_global', - ['(short2)', '(-555, 666)']) - - self._try_inspecting_global('ushort2_global', - ['(ushort2)', '(777, 888)']) - - self._try_inspecting_global('int2_global', - ['(int2)', '(999, -1111)']) - - self._try_inspecting_global('uint2_global', - ['(uint2)', '(2222, 3333)']) - - self._try_inspecting_global('float2_global', - ['(float2)', '(4.5, -5)']) - - self._try_inspecting_global('long2_global', - ['(long2)', '(-4444, 5555)']) - - self._try_inspecting_global('ulong2_global', - ['(ulong2)', '(6666, 7777)']) - - self._try_inspecting_global('double2_global', - ['(double2)', '(88.5, -99)']) - - self._try_inspecting_global('char3_global', - ['(char3)', - '(11, -22, -33,']) - - self._try_inspecting_global('uchar3_global', - ['(uchar3)', - '(0x21, 0x2c, 0x37,']) - - @wimpy - def test_global_write_short3(self): - self._try_inspecting_global('short3_global', - ['(short3)', - '(-555, 666, 777,']) - - def test_read_vec3(self): - self._try_inspecting_global('ushort3_global', - ['(ushort3)', - '(777, 888, 999,']) - - self._try_inspecting_global('int3_global', - ['(int3)', - '(999, -1111, 2222,']) - - self._try_inspecting_global('uint3_global', - ['(uint3)', - '(2222, 3333, 4444,']) - - self._try_inspecting_global('float3_global', - ['(float3)', - '(4.5, -5, -6.5,']) - - self._try_inspecting_global('long3_global', - ['(long3)', - '(-4444, 5555, 6666,']) - - self._try_inspecting_global('ulong3_global', - ['(ulong3)', - '(6666, 7777, 8888,']) - - self._try_inspecting_global('double3_global', - ['(double3)', - '(88.5, -99, 111.5,']) - - self._try_inspecting_global('char4_global', - ['(char4)', - '(55, 11, -22, -33)']) - - self._try_inspecting_global('uchar4_global', - ['(uchar4)', - '(0xde, 0x21, 0x2c, 0x37)']) - - self._try_inspecting_global('short4_global', - ['(short4)', - '(-444, -555, 666, 777)']) - - @wimpy - def test_read_ushort4(self): - self._try_inspecting_global('ushort4_global', - ['(ushort4)', - '(666, 777, 888, 999)']) - - def test_read_vec4(self): - self._try_inspecting_global('int4_global', - ['(int4)', - '(888, 999, -1111, 2222)']) - - self._try_inspecting_global('uint4_global', - ['(uint4)', - '(1111, 2222, 3333, 4444)']) - - self._try_inspecting_global('float4_global', - ['(float4)', - '(3, 4.5, -5, -6.5)']) - - self._try_inspecting_global('long4_global', - ['(long4)', - '(-3333, -4444, 5555, 6666)']) - - self._try_inspecting_global('ulong4_global', - ['(ulong4)', - '(5555, 6666, 7777, 8888)']) - - self._try_inspecting_global('double4_global', - ['(double4)', - '(-77, 88.5, -99, 111.5)']) - - self._try_inspecting_global('matrix2x2_global', - ['(rs_matrix2x2)', - '= (m = (1, 2.5, 3, 4.5))']) - - self._try_inspecting_global('matrix3x3_global', - ['(rs_matrix3x3)', - '= {\n' - ' m = ([0] = 5, [1] = 6.5, [2] = 7, [3] = 8.5, [4] = 9, [5] = 1.5, [6] = 2, [7] = 3.5, [8] = 4)']) - @wimpy - def test_read_matrix(self): - self._try_inspecting_global('matrix4x4_global', - ['(rs_matrix4x4)', - '= {\n' - ' m = {\n' - ' [0] = 5.5\n' - ' [1] = 6\n' - ' [2] = 7.5\n' - ' [3] = 8\n' - ' [4] = 9\n' - ' [5] = 1.5\n' - ' [6] = 2\n' - ' [7] = 3.5\n' - ' [8] = 4.5\n' - ' [9] = 5.5\n' - ' [10] = 6.5\n' - ' [11] = 7\n' - ' [12] = 8\n' - ' [13] = 9.5\n' - ' [14] = 1.5\n' - ' [15] = 2.5\n' - ' }\n']) - - @wimpy - def test_read_quaternion(self): - self._try_inspecting_global('quaternion_global', - ['(rs_quaternion)', - '(4.5, 5.5, 6, 3)']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_read_local.py b/tests/lldb/tests/testcases/test_read_local.py deleted file mode 100644 index 9a6a80fd..00000000 --- a/tests/lldb/tests/testcases/test_read_local.py +++ /dev/null @@ -1,344 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestReadLocal.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - wimpy, - ordered_test, - skip_conditional, - cpp_only_test -) - - -class TestReadLocal(TestBaseRemote): - '''Tests inspecting local variables of all types.''' - - bundle_target = { - 'java': 'KernelVariables', - 'jni': 'JNIKernelVariables', - 'cpp': 'CppKernelVariables' - } - - def _try_inspecting_local(self, local_name, expected_output, - expected_regex=None): - '''Inspect a local and check for the output. - - Run the "expr" and "frame variable" commands on a given local and - with a given output. (The commands should be equivalent.) - - Args: - local_name: String which is the name of the global to inspect. - expected_output: List of strings that should be found in the output. - expected_regex: List of regular expressions that should match lldb's - output. - - Raises: - TestFail: One of the lldb commands did not provide the expected - output. - ''' - self.try_command('expr ' + local_name, - expected_output, - expected_regex) - - self.try_command('frame variable ' + local_name, - expected_output, - expected_regex) - - @wimpy - @ordered_test(0) - def test_setup(self): - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('breakpoint set --file simple.rs --line 145', []) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - @wimpy - def test_list_rs_kernel_frame_variables(self): - # pylint: disable=line-too-long - - self.try_command('frame variable', - ["(uchar) uchar_local = 'b'", - '(short) short_local = -321', - '(ushort) ushort_local = 432', - '(int) int_local = 1234', - '(uint) uint_local = 2345', - '(float) float_local = 4.5', - '(ulong) ulong_local = 8888', - '(double) double_local = -456.5', - '(char2) char2_local = (-11, -22)', - '(uchar2) uchar2_local = (0x21, 0x2c)', - '(short2) short2_local = (-555, 666)', - '(ushort2) ushort2_local = (777, 888)', - '(int2) int2_local = (999, -1111)', - '(uint2) uint2_local = (2222, 3333)', - '(float2) float2_local = (4.5, -5)', - '(long2) long2_local = (-4444, 5555)', - '(ulong2) ulong2_local = (6666, 7777)', - '(double2) double2_local = (88.5, -99)', - '(char3) char3_local = (11, -22, -33,', - '(uchar3) uchar3_local = (0x21, 0x2c, 0x37,', - '(short3) short3_local = (-555, 666, 777,', - '(ushort3) ushort3_local = (777, 888, 999,', - '(int3) int3_local = (999, -1111, 2222,', - '(uint3) uint3_local = (2222, 3333, 4444,', - '(float3) float3_local = (4.5, -5, -6.5,', - '(long3) long3_local = (-4444, 5555, 6666,', - '(ulong3) ulong3_local = (6666, 7777, 8888,', - '(double3) double3_local = (88.5, -99, 111.5,', - '(char4) char4_local = (55, 11, -22, -33)', - '(uchar4) uchar4_local = (0x16, 0x21, 0x2c, 0x37)', - '(short4) short4_local = (-444, -555, 666, 777)', - '(ushort4) ushort4_local = (666, 777, 888, 999)', - '(int4) int4_local = (888, 999, -1111, 2222)', - '(uint4) uint4_local = (1111, 2222, 3333, 4444)', - '(float4) float4_local = (3, 4.5, -5, -6.5)', - '(long4) long4_local = (-3333, -4444, 5555, 6666)', - '(ulong4) ulong4_local = (5555, 6666, 7777, 8888)', - '(double4) double4_local = (-77, 88.5, -99, 111.5)', - '(rs_matrix2x2) matrix2x2_local = (m = (1, 2.5, 3, 4.5))', - '(rs_matrix3x3) matrix3x3_local = {\n' - ' m = ([0] = 5, [1] = 6.5, [2] = 7, [3] = 8.5, [4] = 9, [5] = 1.5, [6] = 2, [7] = 3.5, [8] = 4)', - '(rs_matrix4x4) matrix4x4_local = {\n' - ' m = {\n' - ' [0] = 5.5\n' - ' [1] = 6\n' - ' [2] = 7.5\n' - ' [3] = 8\n' - ' [4] = 9\n' - ' [5] = 1.5\n' - ' [6] = 2\n' - ' [7] = 3.5\n' - ' [8] = 4.5\n' - ' [9] = 5.5\n' - ' [10] = 6.5\n' - ' [11] = 7\n' - ' [12] = 8\n' - ' [13] = 9.5\n' - ' [14] = 1.5\n' - ' [15] = 2.5\n' - ' }\n', - '(rs_quaternion) quaternion_local = (8, 9, 0.5, 7.5)'], - [r"\((signed )?char\) char_local = 'a'", - r'\((long )?long\) long_local = -77777']) - - - @wimpy - def test_inspect_primitive_types(self): - # Use expr to inspect locals - self._try_inspecting_local('char_local', - ["'a'"], - [r'\((signed )?char\)']) - - self._try_inspecting_local('uchar_local', - ['(uchar)', "'b'"]) - - self._try_inspecting_local('short_local', - ['(short)', '-321']) - - self._try_inspecting_local('ushort_local', - ['(ushort)', '432']) - - self._try_inspecting_local('int_local', - ['(int)', '1234']) - - self._try_inspecting_local('uint_local', - ['(uint)', '2345']) - - self._try_inspecting_local('float_local', - ['(float)', '4.5']) - - self._try_inspecting_local('long_local', - ['-77777'], [r'\((long )?long\)']) - - self._try_inspecting_local('ulong_local', - ['(ulong)', '8888']) - - self._try_inspecting_local('double_local', - ['(double)', '-456.5']) - - - @wimpy - def test_inspect_uchar2(self): - self._try_inspecting_local('uchar2_local', - ['(uchar2)', '(0x21, 0x2c)']) - - def test_inspect_vec2_types(self): - self._try_inspecting_local('char2_local', - ['(char2)', '(-11, -22)']) - - self._try_inspecting_local('short2_local', - ['(short2)', '(-555, 666)']) - - self._try_inspecting_local('ushort2_local', - ['(ushort2)', '(777, 888)']) - - self._try_inspecting_local('int2_local', - ['(int2)', '(999, -1111)']) - - self._try_inspecting_local('uint2_local', - ['(uint2)', '(2222, 3333)']) - - self._try_inspecting_local('float2_local', - ['(float2)', '(4.5, -5)']) - - self._try_inspecting_local('long2_local', - ['(long2)', '(-4444, 5555)']) - - self._try_inspecting_local('ulong2_local', - ['(ulong2)', '(6666, 7777)']) - - self._try_inspecting_local('double2_local', - ['(double2)', '(88.5, -99)']) - - self._try_inspecting_local('char3_local', - ['(char3)', - '(11, -22, -33,']) - - self._try_inspecting_local('uchar3_local', - ['(uchar3)', - '(0x21, 0x2c, 0x37,']) - - @wimpy - def test_inspect_short3(self): - self._try_inspecting_local('short3_local', - ['(short3)', - '(-555, 666, 777,']) - - def test_inspect_vec3_types(self): - self._try_inspecting_local('ushort3_local', - ['(ushort3)', - '(777, 888, 999,']) - - self._try_inspecting_local('int3_local', - ['(int3)', - '(999, -1111, 2222,']) - - self._try_inspecting_local('uint3_local', - ['(uint3)', - '(2222, 3333, 4444,']) - - self._try_inspecting_local('float3_local', - ['(float3)', - '(4.5, -5, -6.5,']) - - self._try_inspecting_local('long3_local', - ['(long3)', - '(-4444, 5555, 6666,']) - - self._try_inspecting_local('ulong3_local', - ['(ulong3)', - '(6666, 7777, 8888,']) - - self._try_inspecting_local('double3_local', - ['(double3)', - '(88.5, -99, 111.5,']) - - self._try_inspecting_local('char4_local', - ['(char4)', - '(55, 11, -22, -33)']) - - self._try_inspecting_local('uchar4_local', - ['(uchar4)', - '(0x16, 0x21, 0x2c, 0x37)']) - - self._try_inspecting_local('short4_local', - ['(short4)', - '(-444, -555, 666, 777)']) - - @wimpy - def test_inspect_ushort4(self): - self._try_inspecting_local('ushort4_local', - ['(ushort4)', - '(666, 777, 888, 999)']) - - def test_inspect_vec4_types(self): - self._try_inspecting_local('int4_local', - ['(int4)', - '(888, 999, -1111, 2222)']) - - self._try_inspecting_local('uint4_local', - ['(uint4)', - '(1111, 2222, 3333, 4444)']) - - self._try_inspecting_local('float4_local', - ['(float4)', - '(3, 4.5, -5, -6.5)']) - - self._try_inspecting_local('long4_local', - ['(long4)', - '(-3333, -4444, 5555, 6666)']) - - self._try_inspecting_local('ulong4_local', - ['(ulong4)', - '(5555, 6666, 7777, 8888)']) - - self._try_inspecting_local('double4_local', - ['(double4)', - '(-77, 88.5, -99, 111.5)']) - def test_inspect_matrix_types(self): - self._try_inspecting_local('matrix2x2_local', - ['(rs_matrix2x2)', - '= (m = (1, 2.5, 3, 4.5))']) - - self._try_inspecting_local('matrix3x3_local', - ['(rs_matrix3x3)', - '= {\n' - ' m = ([0] = 5, [1] = 6.5, [2] = 7, [3] = 8.5, [4] = 9, [5] = 1.5, [6] = 2, [7] = 3.5, [8] = 4)']) - - @wimpy - def test_inspect_matrix_4x4_local(self): - self._try_inspecting_local('matrix4x4_local', - ['(rs_matrix4x4)', - '= {\n' - ' m = {\n' - ' [0] = 5.5\n' - ' [1] = 6\n' - ' [2] = 7.5\n' - ' [3] = 8\n' - ' [4] = 9\n' - ' [5] = 1.5\n' - ' [6] = 2\n' - ' [7] = 3.5\n' - ' [8] = 4.5\n' - ' [9] = 5.5\n' - ' [10] = 6.5\n' - ' [11] = 7\n' - ' [12] = 8\n' - ' [13] = 9.5\n' - ' [14] = 1.5\n' - ' [15] = 2.5\n' - ' }\n']) - - @wimpy - def test_inspect_quaternion_local(self): - self._try_inspecting_local('quaternion_local', - ['(rs_quaternion)', - '(8, 9, 0.5, 7.5)']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_reduction.py b/tests/lldb/tests/testcases/test_reduction.py deleted file mode 100644 index 9653c020..00000000 --- a/tests/lldb/tests/testcases/test_reduction.py +++ /dev/null @@ -1,279 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -import itertools - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - wimpy, -) -from harness.assert_mixins import CoordinateAssertionsMixin - -from reduce_common import ( - REDUCE_ITERATIONS, - REDUCE_STARTVAL, - REDUCE_SCRIPT, - X_TESTS, - Y_TESTS, - Z_TESTS, - ReductionMixin, -) - - -def coords_range_3d(x_range, y_range, z_range): - count = max((x_range, y_range, z_range)) - x = itertools.cycle(range(x_range)) - y = itertools.cycle(range(y_range)) - z = itertools.cycle(range(z_range)) - return itertools.islice( - itertools.izip(x, y, z), - count - ) - - -class TestReduce1DSingleThreaded( - TestBaseRemote, CoordinateAssertionsMixin, ReductionMixin): - """ - Reduction kernels for RenderScript are launched using - a different `.expand` function than regular `ForEach` kernels and reflect a - different API to the invoking program - - Although the debugger implementation for accessing these features tracks - this slightly differently for reduction kernels, the user interface should - still offer the basic functionality: - - breakpoints on a coordinate - - tracking, viewing and dumping allocations - - listing modules and constituent kernels and types - """ - - bundle_target = { - 'java': 'Reduction', - } - - def _delete_breakpoints(self): - try: - self.do_command('breakpoint delete -f') - except self.TestFail: - pass - - def setup(self, android): - """This test requires to be run on one thread.""" - android.push_prop('debug.rs.max-threads', 1) - - def teardown(self, android): - """Reset the number of RS threads to the previous value.""" - android.pop_prop('debug.rs.max-threads') - - @ordered_test(0) - @wimpy - def test_setup(self): - self.try_command('language renderscript status', []) - self.try_command('b find_min_user_type_accum', []) - self.try_command('c', []) - - @ordered_test(1) - @wimpy - def test_renderscript_module_dump(self): - """ - Generalised Reduction kernels for RenderScript are not tracked in the - same way as `ForEach` kernels, and do not have `__attribute__((kernel))` - so we need to make sure that when a module contains reduction kernels, - `language renderscript module dump` in lldb prints the correct kernels. - """ - self.try_command( - 'language renderscript module dump', - [ - 'Reductions: 1', - 'find_min_user_type', - 'accumulator: find_min_user_type_accum', - 'combiner: find_min_user_type_comb', - 'outconverter: find_min_user_type_outc' - ] - ) - - @ordered_test(2) - @wimpy - def test_module_dump_with_foreach_kernel_separate(self): - """ - The reduction breakpoint is separate from that of a standard kernel - function breakpoint, so we need to make sure that when we dump a module, - reductions are properly collected and displayed alongside the standard - __attribute__((kernel)) functions. - Assert that `... module dump` can correctly distinguish between `reduce` - kernels and `ForEach` kernels. - """ - self.try_command( - 'language renderscript module dump', - [ - 'Kernels: 2', - 'Reductions: 1', - 'accumulator: find_min_user_type_accum', - 'initializer: find_min_user_type_init', - 'combiner: find_min_user_type_comb', - 'outconverter: find_min_user_type_outc' - ] - ) - - @wimpy - @ordered_test(3) - def test_reduction_breakpoint_set_all_roles_resolved(self): - """ - Assert that a reduction breakpoint successfully resolves all the - functions that make up the reduction kernel - """ - self.try_command( - 'language renderscript reduction breakpoint set find_min_user_type', - ['Breakpoint(s) created'] - ) - - self.try_command( - 'process continue', - expected_regex=[ - r'Process \d+ stopped', - r'librs.reduce.so`find_min_user_type', - r'stop reason = breakpoint' - ] - ) - name = REDUCE_SCRIPT - self.try_command( - 'breakpoint list', - expected_regex=[ - "RenderScript reduce breakpoint for 'find_min_user_type', locations = 4, resolved = 4", - 'where = librs.reduce.so`find_min_user_type_init (\+ \d+ )?at %s(.+, resolved,)' % name, - 'where = librs.reduce.so`find_min_user_type_accum (\+ \d+ )?at %s(.+, resolved,)' % name, - 'where = librs.reduce.so`find_min_user_type_comb (\+ \d+ )?at %s(.+, resolved,)' % name, - 'where = librs.reduce.so`find_min_user_type_outc (\+ \d+ )?at %s(.+, resolved,)' % name, - ] - ) - - @ordered_test(4) - def test_reduce_iterations(self): - """ - Given a reduction, we want to make sure that we break on - every accumulator invocation before seeing the outconverter called. - This requires the tests to be run single threaded - """ - self._delete_breakpoints() - self.try_command( - 'language renderscript reduction breakpoint set find_min_user_type -t initializer', - ) - self.try_command( - 'process continue', - expected_regex=[ - r'Process \d+ stopped', - r'librs.reduce.so`find_min_user_type_init', - r'stop reason = breakpoint', - ] - ) - self._delete_breakpoints() - - self.try_command(( - 'language renderscript reduction breakpoint ' - 'set find_min_user_type --function-role accumulator,outconverter'), - ['Breakpoint(s) created'] - ) - for i in range(REDUCE_ITERATIONS): - self.try_command( - 'process continue', - expected_regex=[ - r'Process \d+ resuming', - r'Process \d+ stopped', - r'librs.reduce.so`find_min_user_type_accum', - r'stop reason = breakpoint' - ] - ) - self.try_command('p val') - self.try_command( - 'p val.b', - expected_regex=[ - r'^\((const )?int32_t\)\s*\$\d+ = %s\s*$' % ( - i + REDUCE_STARTVAL) - ] - ) - # We should then finally break on the outconverter - self.try_command( - 'process continue', - expected_regex=[ - r'Process \d+ resuming', - r'Process \d+ stopped', - r'librs.reduce.so`find_min_user_type_outc', - r'stop reason = breakpoint' - ] - ) - - @ordered_test(5) - def test_function_role_breakpoints_combinations(self): - func_role_combinations = itertools.combinations( - ('accumulator', 'initializer'), - r=2 - ) - self._test_func_role_combinations(func_role_combinations) - - @wimpy - @ordered_test(6) - def test_resolve_function_role_all_reduce_functions(self): - """ - Assert that a reduction breakpoint successfully resolves all the - functions that make up the reduction kernel when the parameter `all` is - passed to `--function-role` for the breakpoint command - """ - self._delete_breakpoints() - self.try_command( - 'language renderscript reduction breakpoint set find_min_user_type -t all', - [r'Breakpoint(s) created'] - ) - self.try_command('c', []) - breakpoints_match = [ - r"where = librs.reduce.so`%s (\+ \d+ )?at %s:\d+, address = 0x[0-9a-fA-F]+, resolved" % ( - 'find_min_user_type_%s' % func_match, - REDUCE_SCRIPT - ) - for func_match in ('accum', 'init', 'comb', 'outc') - ] - self.try_command( - 'breakpoint list', - expected_regex=[ - r"Current breakpoints:", - r"RenderScript reduce breakpoint for 'find_min_user_type', locations = 4, resolved = 4", - r"Names:", - r"RenderScriptReduction", - ] + breakpoints_match - ) - - @ordered_test(8) - def test_reduce_breakpoint_conditional_1d_coordinate(self): - """ - Assert that breakpoints conditional on an allocation coordinate - are only triggered on that coordinate - """ - for x, _, __ in sorted(coords_range_3d(X_TESTS, Y_TESTS, Z_TESTS)): - self._delete_breakpoints() - self.assert_coord_bp_set( - 'find_min_user_type -t accumulator', - x, - kernel_type='reduction' - ) - self.assert_coord_stop('reduce', 'find_min_user_type', x) - # Step *into* the function so locals are available - # FIXME remove the need for `next` here; skip the function prologue - self.try_command('n') - self.try_command('p accum->a') - self.try_command('p accum->b') - - @ordered_test('last') - def test_exit(self): - self.try_command('process kill', []) diff --git a/tests/lldb/tests/testcases/test_reduction_combiner.py b/tests/lldb/tests/testcases/test_reduction_combiner.py deleted file mode 100644 index c070f3b3..00000000 --- a/tests/lldb/tests/testcases/test_reduction_combiner.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -import re -import itertools - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - wimpy, - skip_conditional, -) -from harness.assert_mixins import CoordinateAssertionsMixin - -from reduce_common import ( - REDUCE_SCRIPT, - REDUCE_AUTO_COMB_SCRIPT, - ReductionMixin, -) - - -multithreaded = lambda: skip_conditional( - lambda self: self.cpu_count == 1, - "skipping multithreaded test (1 CPU present)" -) - - -class TestReduceCombinerMultithreaded( - TestBaseRemote, CoordinateAssertionsMixin, ReductionMixin): - """ - RenderScript reduction combiners are currently run only on the output of a - parallel reduction step for the CPU reference implementation. These - testcases test LLDB's handling of breakpoints for the combiner function. - """ - - bundle_target = { - 'java': 'Reduction', - } - - def _delete_breakpoints(self): - try: - self.do_command('breakpoint delete -f') - except self.TestFail: - pass - - def setup(self, android): - """ - This test *must* be run on multiple threads, and is skipped if the - device does not support multiple threads - """ - cpu_spec = android.shell("cat /sys/devices/system/cpu/online").strip() - match = re.search(r'(^0(-\d+)?(,\d+([-]\d*)?)*)$', cpu_spec) - if not match or not match.groups(): - raise self.TestFail( - "unable to parse number of available CPUs in %r" % cpu_spec) - - def parse_range(s): - r = s.split('-') - if len(r) == 1: - return 1 - return int(r[1]) - int(r[0]) - - self.cpu_count = sum(map(parse_range, cpu_spec.split(','))) - android.push_prop('debug.rs.max-threads', self.cpu_count + 1) - - def teardown(self, android): - """Reset the number of RS threads to the previous value.""" - android.pop_prop('debug.rs.max-threads') - - @multithreaded() - @ordered_test(0) - @wimpy - def test_setup(self): - self.try_command('language renderscript status', []) - # first point of order: make sure the compiled script is properly - # loaded and that we can set a breakpoint on the named reduction - self.try_command( - 'language renderscript reduction breakpoint set find_min_user_type_auto_comb') - self.try_command( - 'process continue', - expected_regex=[ - r'Process \d+ stopped', - r'frame #0: (0x[0-9a-fA-F]+ )?librs.reduce_auto_comb.so`' - ] - ) - - @multithreaded() - def test_function_role_breakpoints_combinations(self): - func_role_combinations = itertools.combinations( - ('accumulator', 'outconverter', 'initializer', 'combiner'), - r=2 - ) - self._test_func_role_combinations(func_role_combinations) - - @multithreaded() - def test_reduction_breakpoint_set_single_type_user_comb(self): - return self._reduction_breakpoint_set_single_type( - 'reduce', - REDUCE_SCRIPT, - 'find_min_user_type', - ( - ('find_min_user_type_init', 'initializer'), - ('find_min_user_type_accum', 'accumulator'), - ('find_min_user_type_comb', 'combiner'), - ('find_min_user_type_outc', 'outconverter') - ) - ) - - @multithreaded() - def test_reduction_breakpoint_set_single_type_auto_comb(self): - return self._reduction_breakpoint_set_single_type( - 'reduce_auto_comb', - REDUCE_AUTO_COMB_SCRIPT, - 'find_min_user_type_auto_comb', - ( - ('find_min_user_type_init', 'initializer'), - ('find_min_user_type_accum', 'accumulator'), - ('find_min_user_type_accum.combiner', 'combiner'), - ('find_min_user_type_outc', 'outconverter') - ) - ) diff --git a/tests/lldb/tests/testcases/test_rs_consts.py b/tests/lldb/tests/testcases/test_rs_consts.py deleted file mode 100644 index 9a7ba70a..00000000 --- a/tests/lldb/tests/testcases/test_rs_consts.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestRSConsts.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - cpp_only_test, -) - - -class TestRSConsts(TestBaseRemote): - '''Tests examining the RenderScript constants.''' - - bundle_target = { - 'java': 'KernelVariables', - 'jni': 'JNIKernelVariables', - 'cpp': 'CppKernelVariables' - } - - def test_rs_consts(self): - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('language renderscript kernel breakpoint set kernel', - []) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - # Constants - self.try_command('expr M_1_PI', - ['0.318309']) - - self.try_command('expr M_2_PI', - ['0.636619']) - - self.try_command('expr M_2_SQRTPI', - ['1.128379']) - - self.try_command('expr M_E', - ['2.718281']) - - self.try_command('expr M_LN10', - ['2.302585']) - - self.try_command('expr M_LN2', - ['0.693147']) - - self.try_command('expr M_LOG10E', - ['0.434294']) - - self.try_command('expr M_LOG2E', - ['1.442695']) - - self.try_command('expr M_PI', - ['3.141592']) - - self.try_command('expr M_PI_2', - ['1.570796']) - - self.try_command('expr M_PI_4', - ['0.785398']) - - self.try_command('expr M_SQRT1_2', - ['0.707106']) - - self.try_command('expr M_SQRT2', - ['1.414213']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) - diff --git a/tests/lldb/tests/testcases/test_script_group.py b/tests/lldb/tests/testcases/test_script_group.py deleted file mode 100644 index b3b31864..00000000 --- a/tests/lldb/tests/testcases/test_script_group.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestScriptGroup.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import wimpy - - -class TestScriptGroup(TestBaseRemote): - bundle_target = { - 'java': 'ScriptGroup' - } - - def setup(self, android): - '''This test requires to be run on one thread.''' - android.push_prop('debug.rs.max-threads', 1) - - def teardown(self, android): - '''Reset the number of RS threads to the previous value.''' - android.pop_prop('debug.rs.max-threads') - - @wimpy - def test_kernel_backtrace(self): - # number of allocation elements - array_size = 8 - - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered', - 'rsdDebugHintScriptGroup2']) - - self.try_command('language renderscript scriptgroup breakpoint set scriptgroup_test', - ['Breakpoint 1: no locations (pending)']) - - self.try_command('language renderscript scriptgroup list', - ['0 script groups']) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint', - 'librs.scriptgroup.so`foo', - 'scriptgroup.rs']) - - self.try_command('breakpoint list', - ['scriptgroup_test', - 'locations = 1']) - - self.try_command('language renderscript scriptgroup list', - ['1 script group', - 'scriptgroup_test', - 'foo', - 'goo']) - - self.try_command('language renderscript scriptgroup breakpoint set --stop-on-all scriptgroup_test', - ['Breakpoint 2: 2 locations']) - - self.try_command('breakpoint list', - ['scriptgroup_test', - 'librs.scriptgroup.so`foo', - 'librs.scriptgroup.so`goo']) - - # iterate over foo kernels - self.try_command('bt', - ['scriptgroup.rs:', - 'frame #0', 'librs.scriptgroup.so`foo', - 'frame #1', 'librs.scriptgroup.so`foo.expand']) - - for x in range(array_size): - self.try_command('frame var', - ['(int) a = {0}'.format(x)]) - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint', - 'librs.scriptgroup.so`{0}'.format( - 'foo' if x < 7 else 'goo')]) - - # iterate over goo kernels - self.try_command('bt', - ['stop reason = breakpoint', - 'scriptgroup.rs:', - 'frame #0', 'librs.scriptgroup.so`goo', - 'frame #1', 'librs.scriptgroup.so`goo.expand']) - - for x in range(array_size): - self.try_command('frame var', - ['(int) a = {0}'.format(x * x)]) - - if x < 7: - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint', - 'librs.scriptgroup.so`goo']) diff --git a/tests/lldb/tests/testcases/test_single_source.py b/tests/lldb/tests/testcases/test_single_source.py deleted file mode 100644 index 5da1d97b..00000000 --- a/tests/lldb/tests/testcases/test_single_source.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestInvokeFun.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import (ordered_test, wimpy) -from harness.exception import TestSuiteException - - -class TestSingleSource(TestBaseRemote): - '''Tests debugging a function executed from Java using invoke_*.''' - - bundle_target = { - 'java': "SingleSource" - } - - def setup(self, android): - - '''This test requires to be run on one thread.''' - android.push_prop('debug.rs.max-threads', 1) - - def teardown(self, android): - - '''Reset the number of RS threads to the previous value.''' - android.pop_prop('debug.rs.max-threads') - - @ordered_test(-1) - @wimpy - def test_startup(self): - - # pylint: disable=line-too-long - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('breakpoint set --name check_in', - ['(pending)']) - - @ordered_test(0) - @wimpy - def test_invoke_1(self): - - # enter script_invoke_1 - self.try_command('breakpoint set --name script_invoke_1', - ['(pending)']) - - self.try_command('process continue', - ['stopped', - 'stop reason = breakpoint'], - [r'librs.rs_single_source.so`script_invoke_1']) - - self.try_command( - 'language renderscript allocation dump 1', - ['(0, 0, 0) = 1', - '(1, 0, 0) = 2', - '(2, 0, 0) = 3', - '(3, 0, 0) = 4']) - - self.try_command( - 'language renderscript allocation dump 2', - ['(0, 0, 0) = 5', - '(1, 0, 0) = 6', - '(2, 0, 0) = 7', - '(3, 0, 0) = 8']) - - self.try_command('breakpoint set --name `kernel_1', - ['address']) - - self.try_command('breakpoint set --name `kernel_2', - ['address']) - - # check our global allocation is visible - self.try_command('p global_alloc', - ['(rs_allocation)', - 'p = 0x']) - - # test kernel_1 - for _ in range(10): - # continue as long as there are threads hitting kernel_1 - out = self.do_command('process continue') - if 'librs.rs_single_source.so`kernel_1' in out: - continue - # if we hit check_in we have finished with kernel_1 - if 'librs.rs_single_source.so`check_in' in out: - self.try_command( - 'language renderscript allocation dump 1', - ['(0, 0, 0) = 25', - '(1, 0, 0) = 36', - '(2, 0, 0) = 49', - '(3, 0, 0) = 64']) - break - TestSuiteException('unexpected breakpoint') - else: - TestSuiteException('loop quota exceeded') - - # test kernel_2 - for _ in range(10): - # continue as long as there are threads hitting kernel_2 - out = self.do_command('process continue') - if 'librs.rs_single_source.so`kernel_2' in out: - continue - # if we hit check_in we have finished with kernel_2 - if 'librs.rs_single_source.so`check_in' in out: - self.try_command( - 'language renderscript allocation dump 2', - ['(0, 0, 0) = 125', - '(1, 0, 0) = 216', - '(2, 0, 0) = 343', - '(3, 0, 0) = 512']) - break - TestSuiteException('unexpected breakpoint') - else: - TestSuiteException('loop quota exceeded') - - @ordered_test(1) - @wimpy - def test_invoke_2(self): - - # enter script_invoke_2 - self.try_command('breakpoint set --name script_invoke_2', - ['address']) - - self.try_command('process continue', - ['stopped', - 'stop reason = breakpoint'], - [r'librs.rs_single_source.so`script_invoke_2']) - - # test void_kernel_1 - self.try_command('breakpoint set --name void_kernel_1', - ['address']) - - for _ in range(10): - out = self.do_command('process continue') - - # continue as long as there are threads hitting void_kernel_1 - if 'librs.rs_single_source.so`void_kernel_1' in out: - continue - - # if we hit check_in we have finished with void_kernel_1 - if 'librs.rs_single_source.so`check_in' in out: - self.try_command( - 'language renderscript allocation dump 4', - ['(0, 0, 0) = 0', - '(1, 0, 0) = 1', - '(2, 0, 0) = 2', - '(3, 0, 0) = 3']) - break - - TestSuiteException('unexpected breakpoint') - else: - TestSuiteException('loop quota exceeded') diff --git a/tests/lldb/tests/testcases/test_source_step.py b/tests/lldb/tests/testcases/test_source_step.py deleted file mode 100644 index c780b8a5..00000000 --- a/tests/lldb/tests/testcases/test_source_step.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestSourceStep.''' - -from __future__ import absolute_import - -import os -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - cpp_only_test, -) - - -class TestSourceStep(TestBaseRemote): - '''Test stepping through the source using step-in, -over and -out.''' - - bundle_target = { - 'java': 'BranchingFunCalls', - 'jni': 'JNIBranchingFunCalls', - 'cpp': 'CppBranchingFunCalls' - - } - - def script_dir(self): - file_dir = os.path.dirname(os.path.realpath(__file__)) - app_root = os.path.join(file_dir, '..', '..') - - return { - 'java': os.path.join(app_root, 'java', 'BranchingFunCalls', 'src', 'rs'), - 'cpp': os.path.join(app_root, 'cpp', 'BranchingFunCalls'), - 'jni': os.path.join(app_root, 'jni', 'BranchingFunCalls', 'jnibranchingfuncalls') - }[self.app_type] - - def setup(self, android): - '''This test requires to be run on one thread.''' - android.push_prop('debug.rs.max-threads', 1) - - def teardown(self, android): - '''Reset the number of RS threads to the previous value.''' - android.pop_prop('debug.rs.max-threads') - - def test_source_thread_step_in_out(self): - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('b -f scalars.rs -l 63', - ['(pending)']) - - self.try_command('process continue', - ['stopped', - 'stop reason = breakpoint', - 'scalars.rs:63']) - - # set the source mapping - self.set_src_map('scalars.rs', self.script_dir()) - - self.try_command('process status', - ['-> 63', - 'int i = in;']) - - #63 int i = in; - self.try_command('thread step-in', - ['-> 64']) - #64 float f = (float) i; - self.try_command('thread step-in', - ['-> 65']) - #49 modify_f(&f); - self.try_command('thread step-over', - ['-> 66']) - #50 modify_i(&i); - self.try_command('thread step-in', - ['-> 49']) - #49 int j = *i; - self.try_command('b -f scalars.rs -l 54', - ['librs.scalars.so`modify_i', - 'scalars.rs:54']) - self.try_command('c', - ['stop reason = breakpoint', - 'scalars.rs:54', - '-> 54']) - #54 set_i(i, 0); - # For the line number anything between #37 and #38 is fine - self.try_command('thread step-in', - [], - [r'-> 3[678]']) - #38 int tmp = b; - self.try_command('thread step-out', - ['-> 54']) - - @cpp_only_test() - @ordered_test('last') - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('breakpoint delete 2', ['1 breakpoints deleted']) - - self.try_command('process continue', - ['exited with status = 0']) - diff --git a/tests/lldb/tests/testcases/test_write_global.py b/tests/lldb/tests/testcases/test_write_global.py deleted file mode 100644 index 1d8d3017..00000000 --- a/tests/lldb/tests/testcases/test_write_global.py +++ /dev/null @@ -1,230 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestWriteGlobal.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - wimpy, - ordered_test, - cpp_only_test, -) - - -class TestWriteGlobal(TestBaseRemote): - '''Tests modifying global variables of all types.''' - - bundle_target = { - 'java': 'KernelVariables', - 'jni': 'JNIKernelVariables', - 'cpp': 'CppKernelVariables' - } - - def _try_modifying_global(self, global_name, new_value, data_type_in, - expected_output, expected_output_regex=None): - '''Modify and then inspect a global and check for the output. - - Run the "expr" command to set a given global to a new value and - check that it is set afterwards by running the "target variable" - command. - - Args: - global_name: String which is the name of the global to modify. - new_value: A string that is the new value of the global. - data_type_in: A string containing a c-style parenthesised data type - representing the type of the global. - expected_output: List of strings that should be found in the output - of both commands. - expected_output_regex: List of regular expressions that should be - found in the output of the target variable - command. - - Raises: - TestFail: One of the lldb commands did not provide the expected - output. - ''' - self.try_command('expr %s = %s%s' % - (global_name, data_type_in, new_value), - expected_output, - expected_output_regex) - self.try_command('target variable ' + global_name, - expected_output, - expected_output_regex) - - @wimpy - @ordered_test(0) - def test_setup(self): - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('b -f simple.rs -l 145', []) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - @wimpy - def test_char_global(self): - self._try_modifying_global('char_global', '-2', - '(signed char)', ['\'\\xfe\''], - [r'\((signed )?char\)']) - - def test_write_primitive_types(self): - self._try_modifying_global('uchar_global', '22', - '(uchar)', ['(uchar)', '\'\\x16\'']) - - self._try_modifying_global('short_global', '-33', - '(short)', ['(short)', '-33']) - - self._try_modifying_global('ushort_global', '44', - '(ushort)', ['(ushort)', '44']) - - self._try_modifying_global('int_global', '-55', - '(int)', ['(int)', '-55']) - - self._try_modifying_global('uint_global', '66', - '(uint)', ['(uint)', '66']) - - self._try_modifying_global('float_global', '-7.5', - '(float)', ['(float)', '-7.5']) - - self._try_modifying_global('long_global', '-888888', - '(long long)', ['-888888'], - [r'\((long )?long\)']) - - self._try_modifying_global('ulong_global', '99999999', - '(ulong)', ['(ulong)', '99999999']) - - self._try_modifying_global('double_global', '-10101.5', - '(double)', ['(double)', '-10101.5']) - - self._try_modifying_global('char2_global', '{22, 4}', - '(char2)', ['(char2)', '(22, 4)']) - - @wimpy - def test_write_uchar2(self): - self._try_modifying_global('uchar2_global', '{44, 55}', - '(uchar2)', ['(uchar2)', '(0x2c, 0x37)']) - - def test_write_vec2(self): - self._try_modifying_global('short2_global', '{-66, 77}', - '(short2)', ['(short2)', '(-66, 77)']) - - self._try_modifying_global('ushort2_global', '{88, 99}', - '(ushort2)', ['(ushort2)', '(88, 99)']) - - self._try_modifying_global('int2_global', '{111, -222}', - '(int2)', ['(int2)', '(111, -222)']) - - self._try_modifying_global('uint2_global', '{333, 444}', - '(uint2)', ['(uint2)', '(333, 444)']) - - self._try_modifying_global('float2_global', '{-55.5f, 6.0}', - '(float2)', ['(float2)', '(-55.5, 6)']) - - self._try_modifying_global('long2_global', '{666666, -777777}', - '(long2)', ['(long2)', '(666666, -777777)']) - - self._try_modifying_global('ulong2_global', '{888888, 999999}', - '(ulong2)', ['(ulong2)', '(888888, 999999)']) - - self._try_modifying_global('double2_global', '{11.0000000, -0.0l}', - '(double2)', ['(double2)', '(11, -0)']) - - self._try_modifying_global('char3_global', '{2, -3, 4}', - '(char3)', ['(char3)', '(2, -3, 4,']) - - self._try_modifying_global('uchar3_global', '{\'a\', \'b\', \'c\'}', - '(uchar3)', ['(uchar3)', '(0x61, 0x62, 0x63,']) - - @wimpy - def test_write_short3(self): - self._try_modifying_global('short3_global', '{44, -55, 66}', - '(short3)', ['(short3)', '(44, -55, 66,']) - - def test_write_vec3(self): - self._try_modifying_global('ushort3_global', '{88, 99, 111}', - '(ushort3)', ['(ushort3)', '(88, 99, 111,']) - - self._try_modifying_global('int3_global', '{-111, 222, -333}', - '(int3)', ['(int3)', '(-111, 222, -333,']) - - self._try_modifying_global('uint3_global', '{444, 555, 666}', - '(uint3)', ['(uint3)', '(444, 555, 666,']) - - self._try_modifying_global('float3_global', '{7.5F, 0008.000, 9}', - '(float3)', ['(float3)', '(7.5, 8, 9,']) - - self._try_modifying_global('long3_global', '{111111, -22222222, 3333333}', - '(long3)', ['(long3)', '(111111, -22222222, 3333333,']) - - self._try_modifying_global('ulong3_global', '{4444444, 5555555, 66666666}', - '(ulong3)', ['(ulong3)', '(4444444, 5555555, 66666666,']) - - self._try_modifying_global('double3_global', '{7.5L, -0, 8.9e1}', - '(double3)', ['(double3)', '(7.5, 0, 89,']) - - self._try_modifying_global('char4_global', '{0x1, 0x2, 0x3, 0x4}', - '(char4)', - ['(char4)', '(1, 2, 3, 4)']) - - self._try_modifying_global('uchar4_global', '{0x5, 0x6, 0x7, 0x8}', - '(uchar4)', - ['(uchar4)', '(0x05, 0x06, 0x07, 0x08)']) - - self._try_modifying_global('short4_global', '{0x9, 0xa, 0xb, 0xc}', - '(short4)', - ['(short4)', '(9, 10, 11, 12)']) - - @wimpy - def test_write_ushort4(self): - self._try_modifying_global('ushort4_global', '{0xd, 0xe, 0xf, 0x10}', - '(ushort4)', - ['(ushort4)', '(13, 14, 15, 16)']) - - def test_write_vec4_global(self): - self._try_modifying_global('int4_global', '{0x11, 0x12, 0x13, 0x14}', - '(int4)', - ['(int4)', '(17, 18, 19, 20)']) - - self._try_modifying_global('uint4_global', '{0x15, 0x16, 0x17, 0x18}', - '(uint4)', - ['(uint4)', '(21, 22, 23, 24)']) - - self._try_modifying_global('float4_global', '{19.0, 20.5, -21, -22.5}', - '(float4)', - ['(float4)', '(19, 20.5, -21, -22.5)']) - - self._try_modifying_global('long4_global', '{0x1d, 0x1e, 0x1f, 0x20}', - '(long4)', - ['(long4)', '(29, 30, 31, 32)']) - - self._try_modifying_global('ulong4_global', '{0x21, 0x22, 0x23, 0x24}', - '(ulong4)', - ['(ulong4)', '(33, 34, 35, 36)']) - - self._try_modifying_global('double4_global', '{25.000, -26, -27.5, 28.0}', - '(double4)', - ['(double4)', '(25, -26, -27.5, 28)']) - - @ordered_test('last') - @cpp_only_test() - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('process continue', ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_write_global_element.py b/tests/lldb/tests/testcases/test_write_global_element.py deleted file mode 100644 index 28882bdc..00000000 --- a/tests/lldb/tests/testcases/test_write_global_element.py +++ /dev/null @@ -1,292 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestWriteGlobalElement.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - wimpy, - cpp_only_test -) - - -class TestWriteGlobalElement(TestBaseRemote): - '''Tests modifying elements of global variables of all types.''' - - bundle_target = { - 'java': 'KernelVariables', - 'jni': 'JNIKernelVariables', - 'cpp': 'CppKernelVariables' - } - - def _try_inspecting_global(self, global_name, expected_output): - '''Run the "expr" command on a given global and with a given output. - - Args: - global_name: String which is the name of the global to inspect. - expected_output: List of strings that should be found in the output. - - Raises: - TestFail: The lldb command did not provide the expected output. - ''' - self.try_command('expr ' + global_name, expected_output) - - def _try_modifying_global(self, global_name, new_value, expected_output, - expected_output_regex=None): - '''Modify and then inspect a global and check for the output. - - Run the "expr" command to set a given global to a new value and - check that it is set afterwards by running the "target variable" - command. - - Args: - global_name: String which is the name of the global to modify. - new_value: A string that is the new value of the global. - expected_output: List of strings that should be found in the output - of both commands. - expected_output_regex: List of regular expressions that should be - found in the output of the target variable - command. - - Raises: - TestFail: One of the lldb commands did not provide the expected - output. - ''' - self.try_command('expr %s = %s' % (global_name, new_value), - expected_output, - expected_output_regex) - self.try_command('target variable ' + global_name, - expected_output, - expected_output_regex) - - @wimpy - @ordered_test(0) - def test_setup(self): - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('b -f simple.rs -l 145', []) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - @wimpy - def test_modify_global(self): - self._try_modifying_global('char2_global[0]', '2', - ['\'\\x02\''], - [r'\((signed )?char\)']) - self._try_inspecting_global('char2_global', ['(char2)', '(2, -22)']) - - def test_vec2(self): - self._try_modifying_global('uchar2_global[1]', '3', - ['\'\\x03\''], - [r'\(u(nsigned )?char\)']) - self._try_inspecting_global('uchar2_global', - ['(uchar2)', '(0x21, 0x03)']) - - self._try_modifying_global('short2_global[0]', '-44', - ['(short)', '-44']) - self._try_inspecting_global('short2_global', - ['(short2)', '(-44, 666)']) - - self._try_modifying_global('ushort2_global[1]', '55', - ['55'], - [r'\(u(nsigned )?short\)']) - self._try_inspecting_global('ushort2_global', - ['(ushort2)', '(777, 55)']) - - self._try_modifying_global('int2_global[0]', '666', - ['(int)', '666']) - self._try_inspecting_global('int2_global', - ['(int2)', '(666, -1111)']) - - self._try_modifying_global('uint2_global[1]', '777', - ['777'], - [r'\(u(nsigned )?int\)']) - self._try_inspecting_global('uint2_global', - ['(uint2)', '(2222, 777)']) - - self._try_modifying_global('float2_global[0]', '-8.5', - ['(float)', '-8.5']) - self._try_inspecting_global('float2_global', - ['(float2)', '(-8.5, -5)']) - - self._try_modifying_global('long2_global[1]', '999999', - ['999999'], - [r'\((long )?long\)']) - self._try_inspecting_global('long2_global', - ['(long2)', '(-4444, 999999)']) - - self._try_modifying_global('ulong2_global[0]', '10101010101', - ['10101010101'], - [r'\(u(nsigned )?(long )?long\)']) - self._try_inspecting_global('ulong2_global', - ['(ulong2)', '(10101010101, 7777)']) - - self._try_modifying_global('double2_global[1]', '-11.000', - ['(double)', '-11']) - self._try_inspecting_global('double2_global', - ['(double2)', '(88.5, -11)']) - - self._try_modifying_global('char3_global[0]', '12', - ['\'\\f\''], - [r'\((signed )?char\)']) - self._try_inspecting_global('char3_global', - ['(char3)', - '(12, -22, -33,']) - - @wimpy - def test_uchar3(self): - self._try_modifying_global('uchar3_global[1]', '\'d\'', - ['\'d\''], - [r'\(u(nsigned )?char\)']) - self._try_inspecting_global('uchar3_global', - ['(uchar3)', - '(0x21, 0x64, 0x37,']) - - def test_vec3(self): - self._try_modifying_global('short3_global[2]', '-131', - ['(short)', '-131']) - self._try_inspecting_global('short3_global', - ['(short3)', - '(-555, 666, -131,']) - - self._try_modifying_global('ushort3_global[0]', '1414', - ['1414'], - [r'\(u(nsigned )?short\)']) - self._try_inspecting_global('ushort3_global', - ['(ushort3)', - '(1414, 888, 999,']) - - self._try_modifying_global('int3_global[0]', '151515', - ['(int)', '151515']) - self._try_inspecting_global('int3_global', - ['(int3)', - '(151515, -1111, 2222,']) - - self._try_modifying_global('uint3_global[1]', '161616', - ['161616'], - [r'\(u(nsigned )?int\)']) - self._try_inspecting_global('uint3_global', - ['(uint3)', - '(2222, 161616, 4444,']) - - self._try_modifying_global('float3_global[2]', '17.5', - ['(float)', '17.5']) - self._try_inspecting_global('float3_global', - ['(float3)', - '(4.5, -5, 17.5,']) - - self._try_modifying_global('long3_global[0]', '-181818181818', - ['-181818181818'], - [r'\((long )?long\)']) - self._try_inspecting_global('long3_global', - ['(long3)', - '(-181818181818, 5555, 6666,']) - - self._try_modifying_global('ulong3_global[1]', '191919191919', - ['191919191919'], - [r'\(u(nsigned )?(long )?long\)']) - self._try_inspecting_global('ulong3_global', - ['(ulong3)', - '(6666, 191919191919, 8888,']) - - self._try_modifying_global('double3_global[2]', '20.5', - ['(double)', '20.5']) - self._try_inspecting_global('double3_global', - ['(double3)', - '(88.5, -99, 20.5,']) - - self._try_modifying_global('char4_global[0]', '-21', - ['\'\\xeb\''], - [r'\((signed )?char\)']) - self._try_inspecting_global('char4_global', - ['(char4)', - '(-21, 11, -22, -33)']) - - self._try_modifying_global('uchar4_global[1]', '22', - ['\'\\x16\''], - [r'\(u(nsigned )?char\)']) - self._try_inspecting_global('uchar4_global', - ['(uchar4)', - '(0xde, 0x16, 0x2c, 0x37)']) - - @wimpy - def test_short4(self): - self._try_modifying_global('short4_global[2]', '23', - ['(short)', '23']) - self._try_inspecting_global('short4_global', - ['(short4)', - '(-444, -555, 23, 777)']) - - def test_vec4(self): - self._try_modifying_global('ushort4_global[3]', '24', - ['24'], - [r'\(u(nsigned )?short\)']) - self._try_inspecting_global('ushort4_global', - ['(ushort4)', - '(666, 777, 888, 24)']) - - self._try_modifying_global('int4_global[0]', '-2525', - ['(int)', '-2525']) - self._try_inspecting_global('int4_global', - ['(int4)', - '(-2525, 999, -1111, 2222)']) - - self._try_modifying_global('uint4_global[1]', '26262', - ['26262'], - [r'\(u(nsigned )?int\)']) - self._try_inspecting_global('uint4_global', - ['(uint4)', - '(1111, 26262, 3333, 4444)']) - - self._try_modifying_global('float4_global[2]', '27.0f', - ['(float)', '27']) - self._try_inspecting_global('float4_global', - ['(float4)', - '(3, 4.5, 27, -6.5)']) - - self._try_modifying_global('long4_global[3]', '-28282828282', - ['-28282828282'], - [r'\((long )?long\)']) - self._try_inspecting_global('long4_global', - ['(long4)', - '(-3333, -4444, 5555, -28282828282)']) - - self._try_modifying_global('ulong4_global[0]', '2929292929', - ['2929292929'], - [r'\(u(nsigned )?(long )?long\)']) - self._try_inspecting_global('ulong4_global', - ['(ulong4)', - '(2929292929, 6666, 7777, 8888)']) - - self._try_modifying_global('double4_global[1]', '30.5', - ['(double)', '30.5']) - self._try_inspecting_global('double4_global', - ['(double4)', - '(-77, 30.5, -99, 111.5)']) - - @cpp_only_test() - @ordered_test('last') - def test_cpp_cleanup(self): - self.try_command('breakpoint delete 1', ['1 breakpoints deleted']) - - self.try_command('process continue', - ['exited with status = 0']) diff --git a/tests/lldb/tests/testcases/test_write_local.py b/tests/lldb/tests/testcases/test_write_local.py deleted file mode 100644 index cd32ecc5..00000000 --- a/tests/lldb/tests/testcases/test_write_local.py +++ /dev/null @@ -1,223 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestWriteLocal.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - ordered_test, - wimpy -) - - -class TestWriteLocal(TestBaseRemote): - '''Tests modifying local variables of all types.''' - - bundle_target = { - 'java': 'KernelVariables', - 'jni': 'JNIKernelVariables', - 'cpp': 'CppKernelVariables' - } - - def _try_modifying_local(self, local_name, new_value, data_type_in, - expected_output, expected_output_regex=None): - '''Try getting lldb to modify a local and check the output. - - Run the "expr" command to set a given local to a new value and - check that it is set afterwards by running the "target variable" - command. - - Args: - local_name: String which is the name of the local to modify. - new_value: A string that is the new value of the local. - data_type_in: A string containing a c-style parenthesised data type - representing the type of the local. - expected_output: List of strings that should be found in the output - of both commands. - expected_output_regex: List of regular expressions that should be - found in the output of the target variable - command. - - Raises: - TestFail: One of the lldb commands did not provide the expected - output. - ''' - # pylint: disable=too-many-arguments - self.try_command('expr %s = %s%s' - % (local_name, data_type_in, new_value), - expected_output, - expected_output_regex) - self.try_command('frame variable ' + local_name, - expected_output, - expected_output_regex) - - @wimpy - @ordered_test(0) - def test_setup(self): - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('b -f simple.rs -l 145', []) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - @wimpy - def test_modify_char(self): - self._try_modifying_local('char_local', '-2', - '(signed char)', ['\'\\xfe\''], - [r'\((signed )?char\)']) - - def test_modify_primitive(self): - self._try_modifying_local('uchar_local', '22', - '(uchar)', ['(uchar)', '\'\\x16\'']) - - self._try_modifying_local('short_local', '-33', - '(short)', ['(short)', '-33']) - - self._try_modifying_local('ushort_local', '44', - '(ushort)', ['(ushort)', '44']) - - self._try_modifying_local('int_local', '-55', - '(int)', ['(int)', '-55']) - - self._try_modifying_local('uint_local', '66', - '(uint)', ['(uint)', '66']) - - self._try_modifying_local('float_local', '-7.5', - '(float)', ['(float)', '-7.5']) - - self._try_modifying_local('long_local', '-888888', - '(long long)', ['-888888'], - [r'\((long )?long\)']) - - self._try_modifying_local('ulong_local', '99999999', - '(ulong)', ['(ulong)', '99999999']) - - self._try_modifying_local('double_local', '-10101.5', - '(double)', ['(double)', '-10101.5']) - - self._try_modifying_local('char2_local', '{22, 4}', - '(char2)', ['(char2)', '(22, 4)']) - - @wimpy - def test_modify_uchar2(self): - self._try_modifying_local('uchar2_local', '{44, 55}', - '(uchar2)', ['(uchar2)', '(0x2c, 0x37)']) - - def test_modify_vec2(self): - self._try_modifying_local('short2_local', '{-66, 77}', - '(short2)', ['(short2)', '(-66, 77)']) - - self._try_modifying_local('ushort2_local', '{88, 99}', - '(ushort2)', ['(ushort2)', '(88, 99)']) - - self._try_modifying_local('int2_local', '{111, -222}', - '(int2)', ['(int2)', '(111, -222)']) - - self._try_modifying_local('uint2_local', '{333, 444}', - '(uint2)', ['(uint2)', '(333, 444)']) - - self._try_modifying_local('float2_local', '{-55.5f, 6.0}', - '(float2)', ['(float2)', '(-55.5, 6)']) - - self._try_modifying_local('long2_local', '{666666, -777777}', - '(long2)', ['(long2)', '(666666, -777777)']) - - self._try_modifying_local('ulong2_local', '{888888, 999999}', - '(ulong2)', ['(ulong2)', '(888888, 999999)']) - - self._try_modifying_local('double2_local', '{11.0000000, -0.0l}', - '(double2)', ['(double2)', '(11, -0)']) - - self._try_modifying_local('char3_local', '{2, -3, 4}', - '(char3)', ['(char3)', '(2, -3, 4,']) - - self._try_modifying_local('uchar3_local', '{\'a\', \'b\', \'c\'}', - '(uchar3)', ['(uchar3)', '(0x61, 0x62, 0x63,']) - - @wimpy - def test_modify_short3(self): - self._try_modifying_local('short3_local', '{44, -55, 66}', - '(short3)', ['(short3)', '(44, -55, 66,']) - - def test_modify_vec3(self): - self._try_modifying_local('ushort3_local', '{88, 99, 111}', - '(ushort3)', ['(ushort3)', '(88, 99, 111,']) - - self._try_modifying_local('int3_local', '{-111, 222, -333}', - '(int3)', ['(int3)', '(-111, 222, -333,']) - - self._try_modifying_local('uint3_local', '{444, 555, 666}', - '(uint3)', ['(uint3)', '(444, 555, 666,']) - - self._try_modifying_local('float3_local', '{7.5F, 0008.000, 9}', - '(float3)', ['(float3)', '(7.5, 8, 9,']) - - self._try_modifying_local('long3_local', '{111111, -22222222, 3333333}', - '(long3)', ['(long3)', '(111111, -22222222, 3333333,']) - - self._try_modifying_local('ulong3_local', '{4444444, 5555555, 66666666}', - '(ulong3)', ['(ulong3)', '(4444444, 5555555, 66666666,']) - - self._try_modifying_local('double3_local', '{7.5L, -0, 8.9e1}', - '(double3)', ['(double3)', '(7.5, 0, 89,']) - - self._try_modifying_local('char4_local', '{0x1, 0x2, 0x3, 0x4}', - '(char4)', - ['(char4)', '(1, 2, 3, 4)']) - - self._try_modifying_local('uchar4_local', '{0x5, 0x6, 0x7, 0x8}', - '(uchar4)', - ['(uchar4)', '(0x05, 0x06, 0x07, 0x08)']) - - self._try_modifying_local('short4_local', '{0x9, 0xa, 0xb, 0xc}', - '(short4)', - ['(short4)', '(9, 10, 11, 12)']) - - @wimpy - def test_modify_ushort4(self): - self._try_modifying_local('ushort4_local', '{0xd, 0xe, 0xf, 0x10}', - '(ushort4)', - ['(ushort4)', '(13, 14, 15, 16)']) - - def test_modify_vec4(self): - self._try_modifying_local('int4_local', '{0x11, 0x12, 0x13, 0x14}', - '(int4)', - ['(int4)', '(17, 18, 19, 20)']) - - self._try_modifying_local('uint4_local', '{0x15, 0x16, 0x17, 0x18}', - '(uint4)', - ['(uint4)', '(21, 22, 23, 24)']) - - self._try_modifying_local('float4_local', '{19.0, 20.5, -21, -22.5}', - '(float4)', - ['(float4)', '(19, 20.5, -21, -22.5)']) - - self._try_modifying_local('long4_local', '{0x1d, 0x1e, 0x1f, 0x20}', - '(long4)', - ['(long4)', '(29, 30, 31, 32)']) - - self._try_modifying_local('ulong4_local', '{0x21, 0x22, 0x23, 0x24}', - '(ulong4)', - ['(ulong4)', '(33, 34, 35, 36)']) - - self._try_modifying_local('double4_local', '{25.000, -26, -27.5, 28.0}', - '(double4)', - ['(double4)', '(25, -26, -27.5, 28)']) diff --git a/tests/lldb/tests/testcases/test_write_local_element.py b/tests/lldb/tests/testcases/test_write_local_element.py deleted file mode 100644 index 17e61216..00000000 --- a/tests/lldb/tests/testcases/test_write_local_element.py +++ /dev/null @@ -1,295 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Module that contains the test TestWriteLocalElement.''' - -from __future__ import absolute_import - -from harness.test_base_remote import TestBaseRemote -from harness.decorators import ( - wimpy, - ordered_test -) - - -class TestWriteLocalElement(TestBaseRemote): - '''Tests modifying elements of local variables of all types.''' - - bundle_target = { - 'java': 'KernelVariables', - 'jni': 'JNIKernelVariables', - 'cpp': 'CppKernelVariables' - } - - def _try_inspecting_local(self, local_name, expected_output): - '''Run the "expr" command on a given local and with a given output. - - Args: - local_name: String which is the name of the local to inspect. - expected_output: List of strings that should be found in the output. - - Raises: - TestFail: The lldb command did not provide the expected output. - ''' - self.try_command('expr ' + local_name, expected_output) - - def _try_modifying_local(self, local_name, new_value, expected_output, - expected_output_regex=None): - '''Modify and then inspect a local and check for the output. - - Run the "expr" command to set a given local to a new value and - check that it is set afterwards by running the "target variable" - command. - - Args: - local_name: String which is the name of the local to modify. - new_value: A string that is the new value of the local. - expected_output: List of strings that should be found in the output - of both commands. - expected_output_regex: List of regular expressions that should be - found in the output of the target variable - command. - - Raises: - TestFail: One of the lldb commands did not provide the expected - output. - ''' - self.try_command('expr %s = %s' % (local_name, new_value), - expected_output, - expected_output_regex) - self.try_command('frame variable ' + local_name, - expected_output, - expected_output_regex) - - @wimpy - @ordered_test(0) - def test_setup(self): - self.try_command('language renderscript status', - ['Runtime Library discovered', - 'Runtime Driver discovered']) - - self.try_command('b -f simple.rs -l 145', []) - - self.try_command('process continue', - ['resuming', - 'stopped', - 'stop reason = breakpoint']) - - @wimpy - def test_modify_char2(self): - self._try_modifying_local('char2_local[0]', '2', - ['2'], [r'\((signed )?char\)']) - self._try_inspecting_local('char2_local', - ['(char2)', '(2, -22)']) - - def test_modify_vec2(self): - self._try_modifying_local('uchar2_local[1]', '3', - ['3'], [r'\(u(nsigned )?char\)']) - self._try_inspecting_local('uchar2_local', - ['(uchar2)', '(0x21, 0x03)']) - - self._try_modifying_local('short2_local[0]', '-44', - ['(short)', '-44']) - self._try_inspecting_local('short2_local', - ['(short2)', '(-44, 666)']) - - self._try_modifying_local('ushort2_local[1]', '55', - ['55'], [r'\(u(nsigned )?short\)']) - self._try_inspecting_local('ushort2_local', - ['(ushort2)', '(777, 55)']) - - self._try_modifying_local('int2_local[0]', '666', - ['(int)', '666']) - self._try_inspecting_local('int2_local', - ['(int2)', '(666, -1111)']) - - self._try_modifying_local('uint2_local[1]', '777', - ['777'], [r'\(u(nsigned )?int\)']) - self._try_inspecting_local('uint2_local', - ['(uint2)', '(2222, 777)']) - - self._try_modifying_local('float2_local[0]', '-8.5', - ['(float)', '-8.5']) - self._try_inspecting_local('float2_local', - ['(float2)', '(-8.5, -5)']) - - self._try_modifying_local('long2_local[1]', '999999', - ['999999'], - [r'\((long )?long\)']) - self._try_inspecting_local('long2_local', - ['(long2)', '(-4444, 999999)']) - - self._try_modifying_local('ulong2_local[0]', '10101010101', - ['10101010101'], - [r'\(u(nsigned )?(long )?long\)']) - self._try_inspecting_local('ulong2_local', - ['(ulong2)', '(10101010101, 7777)']) - - self._try_modifying_local('double2_local[1]', '-11.000', - ['(double)', '-11']) - self._try_inspecting_local('double2_local', - ['(double2)', '(88.5, -11)']) - - # For some reason the result of some char and uchar expr is in hex - # and that of frame variable in decimal, so calling - # try_modifying_local doesn't work, because it reuses the expected - # output for both commands. - self.try_command('expr char3_local[0] = 12', - ['\'\\f\''], - [r'\((signed )?char\)']) - self.try_command('frame variable char3_local[0]', - ['12'], - [r'\((signed )?char\)']) - - self._try_inspecting_local('char3_local', - ['(char3)', - '(12, -22, -33,']) - - @wimpy - def test_modify_uchar3(self): - self.try_command('expr uchar3_local[1] = \'d\'', - ['\'d\''], - [r'\(u(nsigned )?char\)']) - self.try_command('frame variable uchar3_local[1]', - ['0x64'], - [r'\(u(nsigned )?char\)']) - - - def test_modify_vec3(self): - self._try_inspecting_local('uchar3_local', - ['(uchar3)', - '(0x21, 0x64, 0x37,']) - - self._try_modifying_local('short3_local[2]', '-131', - ['(short)', '-131']) - self._try_inspecting_local('short3_local', - ['(short3)', - '(-555, 666, -131,']) - - self._try_modifying_local('ushort3_local[0]', '1414', - ['1414'], [r'\(u(nsigned )?short\)']) - self._try_inspecting_local('ushort3_local', - ['(ushort3)', - '(1414, 888, 999,']) - - self._try_modifying_local('int3_local[0]', '151515', - ['(int)', '151515']) - self._try_inspecting_local('int3_local', - ['(int3)', - '(151515, -1111, 2222,']) - - self._try_modifying_local('uint3_local[1]', '161616', - ['161616'], [r'\(u(nsigned )?int\)']) - self._try_inspecting_local('uint3_local', - ['(uint3)', - '(2222, 161616, 4444,']) - - self._try_modifying_local('float3_local[2]', '17.5', - ['(float)', '17.5']) - self._try_inspecting_local('float3_local', - ['(float3)', - '(4.5, -5, 17.5,']) - - self._try_modifying_local('long3_local[0]', '-181818181818', - ['-181818181818'], [r'\((long )?long\)']) - self._try_inspecting_local('long3_local', - ['(long3)', - '(-181818181818, 5555, 6666,']) - - self._try_modifying_local('ulong3_local[1]', '191919191919', - ['191919191919'], - [r'\(u(nsigned )?(long )?long\)']) - self._try_inspecting_local('ulong3_local', - ['(ulong3)', - '(6666, 191919191919, 8888,']) - - self._try_modifying_local('double3_local[2]', '20.5', - ['(double)', '20.5']) - self._try_inspecting_local('double3_local', - ['(double3)', - '(88.5, -99, 20.5,']) - - self.try_command('expr char4_local[0] = -21', - ['\'\\xeb\''], - [r'\((signed )?char\)']) - self.try_command('frame variable char4_local[0]', - ['-21'], - [r'\((signed )?char\)']) - - self._try_inspecting_local('char4_local', - ['(char4)', - '(-21, 11, -22, -33)']) - - self.try_command('expr uchar4_local[1] = 22', - ['\'\\x16\''], - [r'\(u(nsigned )?char\)']) - self.try_command('frame variable uchar4_local[1]', - ['0x16'], - [r'\(u(nsigned )?char\)']) - - self._try_inspecting_local('uchar4_local', - ['(uchar4)', - '(0x16, 0x16, 0x2c, 0x37)']) - - @wimpy - def test_modify_short4(self): - self._try_modifying_local('short4_local[2]', '23', - ['(short)', '23']) - self._try_inspecting_local('short4_local', - ['(short4)', - '(-444, -555, 23, 777)']) - - def test_modify_vec4(self): - self._try_modifying_local('ushort4_local[3]', '24', - ['24'], [r'\(u(nsigned )?short\)']) - self._try_inspecting_local('ushort4_local', - ['(ushort4)', - '(666, 777, 888, 24)']) - - self._try_modifying_local('int4_local[0]', '-2525', - ['(int)', '-2525']) - self._try_inspecting_local('int4_local', - ['(int4)', - '(-2525, 999, -1111, 2222)']) - - self._try_modifying_local('uint4_local[1]', '26262', - ['26262'], [r'\(u(nsigned )?int\)']) - self._try_inspecting_local('uint4_local', - ['(uint4)', - '(1111, 26262, 3333, 4444)']) - - self._try_modifying_local('float4_local[2]', '27.0f', - ['(float)', '27']) - self._try_inspecting_local('float4_local', - ['(float4)', - '(3, 4.5, 27, -6.5)']) - - self._try_modifying_local('long4_local[3]', '-28282828282', - ['-28282828282'], [r'\((long )?long\)']) - self._try_inspecting_local('long4_local', - ['(long4)', - '(-3333, -4444, 5555, -28282828282)']) - - self._try_modifying_local('ulong4_local[0]', '2929292929', - ['2929292929'], - [r'\(u(nsigned )?(long )?long\)']) - self._try_inspecting_local('ulong4_local', - ['(ulong4)', - '(2929292929, 6666, 7777, 8888)']) - - self._try_modifying_local('double4_local[1]', '30.5', - ['(double)', '30.5']) - self._try_inspecting_local('double4_local', - ['(double4)', - '(-77, 30.5, -99, 111.5)']) diff --git a/toolkit/Android.bp b/toolkit/Android.bp new file mode 100644 index 00000000..d3fa21e1 --- /dev/null +++ b/toolkit/Android.bp @@ -0,0 +1,135 @@ +package { + default_applicable_licenses: ["Android-Apache-2.0"], +} + +// TODO: In later CLs, this build file will be replaced by a stand alone build that's not part of Android. + +cc_binary { + name: "renderscripttoolkittest", + srcs: [ + "TestTaskProcessor.cpp" + ], + shared_libs: [ + "libbase", + "librenderscripttoolkit", + ], +} + +cc_library_shared { + name: "librenderscripttoolkit", + defaults: [], + vendor_available: false, + native_bridge_supported: false, + vndk: { + enabled: false, + support_system_process: false, + }, + + srcs: [ + "Blend.cpp", + "Blur.cpp", + "ColorMatrix.cpp", + "Convolve3x3.cpp", + "Convolve5x5.cpp", + "Histogram.cpp", + "Lut.cpp", + "Lut3d.cpp", + "RenderScriptToolkit.cpp", + "Resize.cpp", + "TaskProcessor.cpp", + "Utils.cpp", + "YuvToRgb.cpp", + ], + + static_libs: [ "cpufeatures" ], + + arch: { + arm64: { + cflags: [ + "-DARCH_ARM_USE_INTRINSICS", + "-DARCH_ARM64_USE_INTRINSICS", + "-DARCH_ARM64_HAVE_NEON", + ], + + srcs: [ + "Blend_advsimd.S", + "Blur_advsimd.S", + "ColorMatrix_advsimd.S", + "Convolve_advsimd.S", + "Lut3d_advsimd.S", + "Resize_advsimd.S", + "YuvToRgb_advsimd.S", + ], + }, + + arm: { + cflags: [ + "-DARCH_ARM_HAVE_VFP", + "-DARCH_ARM_USE_INTRINSICS", + ], + + srcs: [ + "Blend_neon.S", + "Blur_neon.S", + "ColorMatrix_neon.S", + "Convolve_neon.S", + "Lut3d_neon.S", + "Resize_neon.S", + "YuvToRgb_neon.S", + ], + + asflags: ["-mfpu=neon"], + + neon: { + cflags: [ + "-DARCH_ARM_HAVE_NEON", + ], + }, + }, + + x86: { + cflags: ["-DARCH_X86_HAVE_SSSE3"], + srcs: ["x86.cpp"], + }, + x86_64: { + cflags: ["-DARCH_X86_HAVE_SSSE3"], + srcs: ["x86.cpp"], + avx2: { + cflags: ["-DARCH_X86_HAVE_AVX2", "-mavx2", "-mfma"], + }, + }, + }, + + shared_libs: [ + "libbase", + "liblog", + "libnativehelper", + "libjnigraphics", + ], + header_libs: [ + // TODO Once we compile in the .cpp files, check if any of these libraries are needed. + //"libutils_headers", + //"libhardware_headers", + ], + + include_dirs: [ + ], + + cflags: [ + "-Wthread-safety", + "-Werror", + "-Wall", + "-Wextra", + "-Wno-unused-parameter", + "-Wno-unused-variable", + ], + + // TODO: Is this needed? + product_variables: { + pdk: { + // Not building RenderScript modules in PDK builds, as libmediandk + // is not available in PDK. + enabled: false, + }, + }, +} diff --git a/toolkit/Blend.cpp b/toolkit/Blend.cpp new file mode 100644 index 00000000..1f6319e7 --- /dev/null +++ b/toolkit/Blend.cpp @@ -0,0 +1,370 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <assert.h> + +#include <cstdint> + +#include "RenderScriptToolkit.h" +#include "TaskProcessor.h" +#include "Utils.h" + +namespace android { +namespace renderscript { + +#define LOG_TAG "renderscript.toolkit.Blend" + +/** + * Blends a source into a destination, based on the mode. + */ +class BlendTask : public Task { + // The type of blending to do. + RenderScriptToolkit::BlendingMode mMode; + // The input we're blending. + const uchar4* mIn; + // The destination, used both for input and output. + uchar4* mOut; + + void blend(RenderScriptToolkit::BlendingMode mode, const uchar4* in, uchar4* out, + uint32_t length); + // Process a 2D tile of the overall work. threadIndex identifies which thread does the work. + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) override; + + public: + BlendTask(RenderScriptToolkit::BlendingMode mode, const uint8_t* in, uint8_t* out, size_t sizeX, + size_t sizeY, const Restriction* restriction) + : Task{sizeX, sizeY, 4, true, restriction}, + mMode{mode}, + mIn{reinterpret_cast<const uchar4*>(in)}, + mOut{reinterpret_cast<uchar4*>(out)} {} +}; + +#if defined(ARCH_ARM_USE_INTRINSICS) +extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot, + uint32_t xstart, uint32_t xend); +#endif + +#if defined(ARCH_X86_HAVE_SSSE3) +extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8); +extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8); +extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8); +extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8); +extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8); +extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8); +extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8); +extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8); +extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8); +extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8); +extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8); +extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8); +#endif + +// Convert vector to uchar4, clipping each value to 255. +template <typename TI> +static inline uchar4 convertClipped(TI amount) { + return uchar4 { static_cast<uchar>(amount.x > 255 ? 255 : amount.x), + static_cast<uchar>(amount.y > 255 ? 255 : amount.y), + static_cast<uchar>(amount.z > 255 ? 255 : amount.z), + static_cast<uchar>(amount.w > 255 ? 255 : amount.w)}; +} + +void BlendTask::blend(RenderScriptToolkit::BlendingMode mode, const uchar4* in, uchar4* out, + uint32_t length) { + uint32_t x1 = 0; + uint32_t x2 = length; + +#if defined(ARCH_ARM_USE_INTRINSICS) + if (mUsesSimd) { + if (rsdIntrinsicBlend_K(out, in, (int) mode, x1, x2) >= 0) { + return; + } else { + ALOGW("Intrinsic Blend failed to use SIMD for %d", mode); + } + } +#endif + switch (mode) { + case RenderScriptToolkit::BlendingMode::CLEAR: + for (;x1 < x2; x1++, out++) { + *out = 0; + } + break; + case RenderScriptToolkit::BlendingMode::SRC: + for (;x1 < x2; x1++, out++, in++) { + *out = *in; + } + break; + //RenderScriptToolkit::BlendingMode::DST is a NOP + case RenderScriptToolkit::BlendingMode::DST: + break; + case RenderScriptToolkit::BlendingMode::SRC_OVER: + #if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if ((x1 + 8) < x2) { + uint32_t len = (x2 - x1) >> 3; + rsdIntrinsicBlendSrcOver_K(out, in, len); + x1 += len << 3; + out += len << 3; + in += len << 3; + } + } + #endif + for (;x1 < x2; x1++, out++, in++) { + ushort4 in_s = convert<ushort4>(*in); + ushort4 out_s = convert<ushort4>(*out); + in_s = in_s + ((out_s * (ushort4)(255 - in_s.w)) >> (ushort4)8); + *out = convertClipped(in_s); + } + break; + case RenderScriptToolkit::BlendingMode::DST_OVER: + #if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if ((x1 + 8) < x2) { + uint32_t len = (x2 - x1) >> 3; + rsdIntrinsicBlendDstOver_K(out, in, len); + x1 += len << 3; + out += len << 3; + in += len << 3; + } + } + #endif + for (;x1 < x2; x1++, out++, in++) { + ushort4 in_s = convert<ushort4>(*in); + ushort4 out_s = convert<ushort4>(*out); + in_s = out_s + ((in_s * (ushort4)(255 - out_s.w)) >> (ushort4)8); + *out = convertClipped(in_s); + } + break; + case RenderScriptToolkit::BlendingMode::SRC_IN: + #if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if ((x1 + 8) < x2) { + uint32_t len = (x2 - x1) >> 3; + rsdIntrinsicBlendSrcIn_K(out, in, len); + x1 += len << 3; + out += len << 3; + in += len << 3; + } + } +#endif + for (;x1 < x2; x1++, out++, in++) { + ushort4 in_s = convert<ushort4>(*in); + in_s = (in_s * out->w) >> (ushort4)8; + *out = convert<uchar4>(in_s); + } + break; + case RenderScriptToolkit::BlendingMode::DST_IN: + #if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if ((x1 + 8) < x2) { + uint32_t len = (x2 - x1) >> 3; + rsdIntrinsicBlendDstIn_K(out, in, len); + x1 += len << 3; + out += len << 3; + in += len << 3; + } + } + #endif + for (;x1 < x2; x1++, out++, in++) { + ushort4 out_s = convert<ushort4>(*out); + out_s = (out_s * in->w) >> (ushort4)8; + *out = convert<uchar4>(out_s); + } + break; + case RenderScriptToolkit::BlendingMode::SRC_OUT: + #if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if ((x1 + 8) < x2) { + uint32_t len = (x2 - x1) >> 3; + rsdIntrinsicBlendSrcOut_K(out, in, len); + x1 += len << 3; + out += len << 3; + in += len << 3; + } + } + #endif + for (;x1 < x2; x1++, out++, in++) { + ushort4 in_s = convert<ushort4>(*in); + in_s = (in_s * (ushort4)(255 - out->w)) >> (ushort4)8; + *out = convert<uchar4>(in_s); + } + break; + case RenderScriptToolkit::BlendingMode::DST_OUT: + #if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if ((x1 + 8) < x2) { + uint32_t len = (x2 - x1) >> 3; + rsdIntrinsicBlendDstOut_K(out, in, len); + x1 += len << 3; + out += len << 3; + in += len << 3; + } + } + #endif + for (;x1 < x2; x1++, out++, in++) { + ushort4 out_s = convert<ushort4>(*out); + out_s = (out_s * (ushort4)(255 - in->w)) >> (ushort4)8; + *out = convert<uchar4>(out_s); + } + break; + case RenderScriptToolkit::BlendingMode::SRC_ATOP: + #if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if ((x1 + 8) < x2) { + uint32_t len = (x2 - x1) >> 3; + rsdIntrinsicBlendSrcAtop_K(out, in, len); + x1 += len << 3; + out += len << 3; + in += len << 3; + } + } + #endif + for (;x1 < x2; x1++, out++, in++) { + // The max value the operation could produce before the shift + // is 255 * 255 + 255 * (255 - 0) = 130050, or 0x1FC02. + // That value does not fit in a ushort, so we use uint. + uint4 in_s = convert<uint4>(*in); + uint4 out_s = convert<uint4>(*out); + out_s.xyz = ((in_s.xyz * out_s.w) + + (out_s.xyz * ((uint3)255 - (uint3)in_s.w))) >> (uint3)8; + *out = convertClipped(out_s); + } + break; + case RenderScriptToolkit::BlendingMode::DST_ATOP: + #if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if ((x1 + 8) < x2) { + uint32_t len = (x2 - x1) >> 3; + rsdIntrinsicBlendDstAtop_K(out, in, len); + x1 += len << 3; + out += len << 3; + in += len << 3; + } + } + #endif + for (;x1 < x2; x1++, out++, in++) { + uint4 in_s = convert<uint4>(*in); + uint4 out_s = convert<uint4>(*out); + out_s.xyz = ((out_s.xyz * in_s.w) + + (in_s.xyz * ((uint3)255 - (uint3)out_s.w))) >> (uint3)8; + out_s.w = in_s.w; + *out = convertClipped(out_s); + } + break; + case RenderScriptToolkit::BlendingMode::XOR: + #if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if ((x1 + 8) < x2) { + uint32_t len = (x2 - x1) >> 3; + rsdIntrinsicBlendXor_K(out, in, len); + x1 += len << 3; + out += len << 3; + in += len << 3; + } + } + #endif + for (;x1 < x2; x1++, out++, in++) { + *out = *in ^ *out; + } + break; + case RenderScriptToolkit::BlendingMode::MULTIPLY: + #if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if ((x1 + 8) < x2) { + uint32_t len = (x2 - x1) >> 3; + rsdIntrinsicBlendMultiply_K(out, in, len); + x1 += len << 3; + out += len << 3; + in += len << 3; + } + } + #endif + for (;x1 < x2; x1++, out++, in++) { + *out = convert<uchar4>((convert<ushort4>(*in) * convert<ushort4>(*out)) + >> (ushort4)8); + } + break; + case RenderScriptToolkit::BlendingMode::ADD: + #if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if((x1 + 8) < x2) { + uint32_t len = (x2 - x1) >> 3; + rsdIntrinsicBlendAdd_K(out, in, len); + x1 += len << 3; + out += len << 3; + in += len << 3; + } + } + #endif + for (;x1 < x2; x1++, out++, in++) { + uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w, + oR = out->x, oG = out->y, oB = out->z, oA = out->w; + out->x = (oR + iR) > 255 ? 255 : oR + iR; + out->y = (oG + iG) > 255 ? 255 : oG + iG; + out->z = (oB + iB) > 255 ? 255 : oB + iB; + out->w = (oA + iA) > 255 ? 255 : oA + iA; + } + break; + case RenderScriptToolkit::BlendingMode::SUBTRACT: + #if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if((x1 + 8) < x2) { + uint32_t len = (x2 - x1) >> 3; + rsdIntrinsicBlendSub_K(out, in, len); + x1 += len << 3; + out += len << 3; + in += len << 3; + } + } + #endif + for (;x1 < x2; x1++, out++, in++) { + int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w, + oR = out->x, oG = out->y, oB = out->z, oA = out->w; + out->x = (oR - iR) < 0 ? 0 : oR - iR; + out->y = (oG - iG) < 0 ? 0 : oG - iG; + out->z = (oB - iB) < 0 ? 0 : oB - iB; + out->w = (oA - iA) < 0 ? 0 : oA - iA; + } + break; + + default: + ALOGE("Called unimplemented value %d", mode); + assert(false); + } +} + +void BlendTask::processData(int /* threadIndex */, size_t startX, size_t startY, size_t endX, + size_t endY) { + for (size_t y = startY; y < endY; y++) { + size_t offset = y * mSizeX + startX; + blend(mMode, mIn + offset, mOut + offset, endX - startX); + } +} + +void RenderScriptToolkit::blend(BlendingMode mode, const uint8_t* in, uint8_t* out, size_t sizeX, + size_t sizeY, const Restriction* restriction) { +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE + if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) { + return; + } +#endif + + BlendTask task(mode, in, out, sizeX, sizeY, restriction); + processor->doTask(&task); +} + +} // namespace renderscript +} // namespace android diff --git a/toolkit/Blend_advsimd.S b/toolkit/Blend_advsimd.S new file mode 100644 index 00000000..e5cb29b6 --- /dev/null +++ b/toolkit/Blend_advsimd.S @@ -0,0 +1,622 @@ +/* + * Copyright (C) 2013-2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: +#define END(f) .size f, .-f; + +#define BLEND_LIST(X) \ + X(0, CLEAR) \ + X(1, SRC) \ + X(2, DST) \ + X(3, SRC_OVER) \ + X(4, DST_OVER) \ + X(5, SRC_IN) \ + X(6, DST_IN) \ + X(7, SRC_OUT) \ + X(8, DST_OUT) \ + X(9, SRC_ATOP) \ + X(10, DST_ATOP) \ + X(11, XOR) \ + X(12, MULTIPLY) \ + X(13, ADD) \ + X(14, SUBTRACT) + +/* This operation was not enabled in the original RenderScript. We could + * enable it. + * + * X(15, DIFFERENCE) \ + */ + +/* For every blend operation supported, define a macro with just the arithmetic + * component. The rest can be handled later on. + * + * At entry q0-q3 contain the RGBA data from the destination buffer, and q8-q11 + * contain the data from the source buffer. Both have already been split out + * into one colour component per register (if necessary). q3 and q11 contain + * the alpha components. + * + * At the same time as defining the assembly macro, define a corresponding + * preprocessor macro indicating any other requirements. + * zipped=0 -- The macro does not require the RGBA components to be + * separated. + * lddst=0 -- The macro does not require data from the destination buffer. + * ldsrc=0 -- The macro does not require data from the source buffer. + * nowrap=1 -- The macro requires no wrapper at all, and should simply be + * inserted without any surrounding load/store or loop code. + */ + +#define params_CLEAR zipped=0, lddst=0, ldsrc=0 +.macro blend_kernel_CLEAR + movi v0.16b, #0 + movi v1.16b, #0 + movi v2.16b, #0 + movi v3.16b, #0 +.endm + +#define params_SRC zipped=0, lddst=0 +.macro blend_kernel_SRC + mov v0.16b, v8.16b + mov v1.16b, v9.16b + mov v2.16b, v10.16b + mov v3.16b, v11.16b +.endm + +#define params_DST nowrap=1 +.macro blend_kernel_DST + /* nop */ +.endm + +#define params_SRC_OVER zipped=1 +.macro blend_kernel_SRC_OVER + mvn v7.16b, v11.16b + + umull2 v12.8h, v7.16b, v0.16b + umull v0.8h, v7.8b, v0.8b + umull2 v13.8h, v7.16b, v1.16b + umull v1.8h, v7.8b, v1.8b + umull2 v14.8h, v7.16b, v2.16b + umull v2.8h, v7.8b, v2.8b + umull2 v15.8h, v7.16b, v3.16b + umull v3.8h, v7.8b, v3.8b + + rshrn v4.8b, v0.8h, #8 + rshrn2 v4.16b, v12.8h, #8 + rshrn v5.8b, v1.8h, #8 + rshrn2 v5.16b, v13.8h, #8 + rshrn v6.8b, v2.8h, #8 + rshrn2 v6.16b, v14.8h, #8 + rshrn v7.8b, v3.8h, #8 + rshrn2 v7.16b, v15.8h, #8 + + uaddw v0.8h, v0.8h, v4.8b + uaddw2 v12.8h, v12.8h, v4.16b + uaddw v1.8h, v1.8h, v5.8b + uaddw2 v13.8h, v13.8h, v5.16b + uaddw v2.8h, v2.8h, v6.8b + uaddw2 v14.8h, v14.8h, v6.16b + uaddw v3.8h, v3.8h, v7.8b + uaddw2 v15.8h, v15.8h, v7.16b + + rshrn v0.8b, v0.8h, #8 + rshrn2 v0.16b, v12.8h, #8 + rshrn v1.8b, v1.8h, #8 + rshrn2 v1.16b, v13.8h, #8 + rshrn v2.8b, v2.8h, #8 + rshrn2 v2.16b, v14.8h, #8 + rshrn v3.8b, v3.8h, #8 + rshrn2 v3.16b, v15.8h, #8 + + uqadd v0.16b, v0.16b, v8.16b + uqadd v1.16b, v1.16b, v9.16b + uqadd v2.16b, v2.16b, v10.16b + uqadd v3.16b, v3.16b, v11.16b +.endm + +#define params_DST_OVER zipped=1 +.macro blend_kernel_DST_OVER + mvn v7.16b, v3.16b + + umull2 v12.8h, v7.16b, v8.16b + umull v8.8h, v7.8b, v8.8b + umull2 v13.8h, v7.16b, v9.16b + umull v9.8h, v7.8b, v9.8b + umull2 v14.8h, v7.16b, v10.16b + umull v10.8h, v7.8b, v10.8b + umull2 v15.8h, v7.16b, v11.16b + umull v11.8h, v7.8b, v11.8b + + rshrn v4.8b, v8.8h, #8 + rshrn2 v4.16b, v12.8h, #8 + rshrn v5.8b, v9.8h, #8 + rshrn2 v5.16b, v13.8h, #8 + rshrn v6.8b, v10.8h, #8 + rshrn2 v6.16b, v14.8h, #8 + rshrn v7.8b, v11.8h, #8 + rshrn2 v7.16b, v15.8h, #8 + + uaddw v8.8h, v8.8h, v4.8b + uaddw2 v12.8h, v12.8h, v4.16b + uaddw v9.8h, v9.8h, v5.8b + uaddw2 v13.8h, v13.8h, v5.16b + uaddw v10.8h, v10.8h, v6.8b + uaddw2 v14.8h, v14.8h, v6.16b + uaddw v11.8h, v11.8h, v7.8b + uaddw2 v15.8h, v15.8h, v7.16b + + rshrn v8.8b, v8.8h, #8 + rshrn2 v8.16b, v12.8h, #8 + rshrn v9.8b, v9.8h, #8 + rshrn2 v9.16b, v13.8h, #8 + rshrn v10.8b, v10.8h, #8 + rshrn2 v10.16b, v14.8h, #8 + rshrn v11.8b, v11.8h, #8 + rshrn2 v11.16b, v15.8h, #8 + + uqadd v0.16b, v0.16b, v8.16b + uqadd v1.16b, v1.16b, v9.16b + uqadd v2.16b, v2.16b, v10.16b + uqadd v3.16b, v3.16b, v11.16b +.endm + +#define params_SRC_IN zipped=1 +.macro blend_kernel_SRC_IN + umull2 v12.8h, v3.16b, v8.16b + umull v0.8h, v3.8b, v8.8b + umull2 v13.8h, v3.16b, v9.16b + umull v1.8h, v3.8b, v9.8b + umull2 v14.8h, v3.16b, v10.16b + umull v2.8h, v3.8b, v10.8b + umull2 v15.8h, v3.16b, v11.16b + umull v3.8h, v3.8b, v11.8b + + rshrn v4.8b, v0.8h, #8 + rshrn2 v4.16b, v12.8h, #8 + rshrn v5.8b, v1.8h, #8 + rshrn2 v5.16b, v13.8h, #8 + rshrn v6.8b, v2.8h, #8 + rshrn2 v6.16b, v14.8h, #8 + rshrn v7.8b, v3.8h, #8 + rshrn2 v7.16b, v15.8h, #8 + + uaddw v0.8h, v0.8h, v4.8b + uaddw2 v12.8h, v12.8h, v4.16b + uaddw v1.8h, v1.8h, v5.8b + uaddw2 v13.8h, v13.8h, v5.16b + uaddw v2.8h, v2.8h, v6.8b + uaddw2 v14.8h, v14.8h, v6.16b + uaddw v3.8h, v3.8h, v7.8b + uaddw2 v15.8h, v15.8h, v7.16b + + rshrn v0.8b, v0.8h, #8 + rshrn2 v0.16b, v12.8h, #8 + rshrn v1.8b, v1.8h, #8 + rshrn2 v1.16b, v13.8h, #8 + rshrn v2.8b, v2.8h, #8 + rshrn2 v2.16b, v14.8h, #8 + rshrn v3.8b, v3.8h, #8 + rshrn2 v3.16b, v15.8h, #8 +.endm + +#define params_DST_IN zipped=1 +.macro blend_kernel_DST_IN + umull2 v12.8h, v0.16b, v11.16b + umull v0.8h, v0.8b, v11.8b + umull2 v13.8h, v1.16b, v11.16b + umull v1.8h, v1.8b, v11.8b + umull2 v14.8h, v2.16b, v11.16b + umull v2.8h, v2.8b, v11.8b + umull2 v15.8h, v3.16b, v11.16b + umull v3.8h, v3.8b, v11.8b + + rshrn v4.8b, v0.8h, #8 + rshrn2 v4.16b, v12.8h, #8 + rshrn v5.8b, v1.8h, #8 + rshrn2 v5.16b, v13.8h, #8 + rshrn v6.8b, v2.8h, #8 + rshrn2 v6.16b, v14.8h, #8 + rshrn v7.8b, v3.8h, #8 + rshrn2 v7.16b, v15.8h, #8 + + uaddw v0.8h, v0.8h, v4.8b + uaddw2 v12.8h, v12.8h, v4.16b + uaddw v1.8h, v1.8h, v5.8b + uaddw2 v13.8h, v13.8h, v5.16b + uaddw v2.8h, v2.8h, v6.8b + uaddw2 v14.8h, v14.8h, v6.16b + uaddw v3.8h, v3.8h, v7.8b + uaddw2 v15.8h, v15.8h, v7.16b + + rshrn v0.8b, v0.8h, #8 + rshrn2 v0.16b, v12.8h, #8 + rshrn v1.8b, v1.8h, #8 + rshrn2 v1.16b, v13.8h, #8 + rshrn v2.8b, v2.8h, #8 + rshrn2 v2.16b, v14.8h, #8 + rshrn v3.8b, v3.8h, #8 + rshrn2 v3.16b, v15.8h, #8 +.endm + +#define params_SRC_OUT zipped=1 +.macro blend_kernel_SRC_OUT + mvn v3.16b, v3.16b + blend_kernel_SRC_IN +.endm + + +#define params_DST_OUT zipped=1 +.macro blend_kernel_DST_OUT + mvn v11.16b, v11.16b + blend_kernel_DST_IN +.endm + +#define params_SRC_ATOP zipped=1 +.macro blend_kernel_SRC_ATOP + mvn v11.16b, v11.16b + + umull2 v12.8h, v11.16b, v0.16b + umull v0.8h, v11.8b, v0.8b + umull2 v13.8h, v11.16b, v1.16b + umull v1.8h, v11.8b, v1.8b + umull2 v14.8h, v11.16b, v2.16b + umull v2.8h, v11.8b, v2.8b + + umull2 v4.8h, v3.16b, v8.16b + umull v8.8h, v3.8b, v8.8b + umull2 v5.8h, v3.16b, v9.16b + umull v9.8h, v3.8b, v9.8b + umull2 v6.8h, v3.16b, v10.16b + umull v10.8h, v3.8b, v10.8b + + uqadd v12.8h, v12.8h, v4.8h + uqadd v0.8h, v0.8h, v8.8h + uqadd v13.8h, v13.8h, v5.8h + uqadd v1.8h, v1.8h, v9.8h + uqadd v14.8h, v14.8h, v6.8h + uqadd v2.8h, v2.8h, v10.8h + + urshr v8.8h, v0.8h, #8 + urshr v4.8h, v12.8h, #8 + urshr v9.8h, v1.8h, #8 + urshr v5.8h, v13.8h, #8 + urshr v10.8h, v2.8h, #8 + urshr v6.8h, v14.8h, #8 + + uqadd v0.8h, v0.8h, v8.8h + uqadd v12.8h, v12.8h, v4.8h + uqadd v1.8h, v1.8h, v9.8h + uqadd v13.8h, v13.8h, v5.8h + uqadd v2.8h, v2.8h, v10.8h + uqadd v14.8h, v14.8h, v6.8h + + uqrshrn v0.8b, v0.8h, #8 + uqrshrn2 v0.16b, v12.8h, #8 + uqrshrn v1.8b, v1.8h, #8 + uqrshrn2 v1.16b, v13.8h, #8 + uqrshrn v2.8b, v2.8h, #8 + uqrshrn2 v2.16b, v14.8h, #8 +.endm + +#define params_DST_ATOP zipped=1 +.macro blend_kernel_DST_ATOP + mvn v3.16b, v3.16b + + umull2 v12.8h, v11.16b, v0.16b + umull v0.8h, v11.8b, v0.8b + umull2 v13.8h, v11.16b, v1.16b + umull v1.8h, v11.8b, v1.8b + umull2 v14.8h, v11.16b, v2.16b + umull v2.8h, v11.8b, v2.8b + + umull2 v4.8h, v3.16b, v8.16b + umull v8.8h, v3.8b, v8.8b + umull2 v5.8h, v3.16b, v9.16b + umull v9.8h, v3.8b, v9.8b + umull2 v6.8h, v3.16b, v10.16b + umull v10.8h, v3.8b, v10.8b + + uqadd v12.8h, v12.8h, v4.8h + uqadd v0.8h, v0.8h, v8.8h + uqadd v13.8h, v13.8h, v5.8h + uqadd v1.8h, v1.8h, v9.8h + uqadd v14.8h, v14.8h, v6.8h + uqadd v2.8h, v2.8h, v10.8h + + urshr v8.8h, v0.8h, #8 + urshr v4.8h, v12.8h, #8 + urshr v9.8h, v1.8h, #8 + urshr v5.8h, v13.8h, #8 + urshr v10.8h, v2.8h, #8 + urshr v6.8h, v14.8h, #8 + + uqadd v0.8h, v0.8h, v8.8h + uqadd v12.8h, v12.8h, v4.8h + uqadd v1.8h, v1.8h, v9.8h + uqadd v13.8h, v13.8h, v5.8h + uqadd v2.8h, v2.8h, v10.8h + uqadd v14.8h, v14.8h, v6.8h + + uqrshrn v0.8b, v0.8h, #8 + uqrshrn2 v0.16b, v12.8h, #8 + uqrshrn v1.8b, v1.8h, #8 + uqrshrn2 v1.16b, v13.8h, #8 + uqrshrn v2.8b, v2.8h, #8 + uqrshrn2 v2.16b, v14.8h, #8 + + mov v3.16b, v11.16b +.endm + +#define params_MULTIPLY zipped=0 +.macro blend_kernel_MULTIPLY + umull2 v12.8h, v0.16b, v8.16b + umull v0.8h, v0.8b, v8.8b + umull2 v13.8h, v1.16b, v9.16b + umull v1.8h, v1.8b, v9.8b + umull2 v14.8h, v2.16b, v10.16b + umull v2.8h, v2.8b, v10.8b + umull2 v15.8h, v3.16b, v11.16b + umull v3.8h, v3.8b, v11.8b + + rshrn v4.8b, v0.8h, #8 + rshrn2 v4.16b, v12.8h, #8 + rshrn v5.8b, v1.8h, #8 + rshrn2 v5.16b, v13.8h, #8 + rshrn v6.8b, v2.8h, #8 + rshrn2 v6.16b, v14.8h, #8 + rshrn v7.8b, v3.8h, #8 + rshrn2 v7.16b, v15.8h, #8 + + uaddw v0.8h, v0.8h, v4.8b + uaddw2 v12.8h, v12.8h, v4.16b + uaddw v1.8h, v1.8h, v5.8b + uaddw2 v13.8h, v13.8h, v5.16b + uaddw v2.8h, v2.8h, v6.8b + uaddw2 v14.8h, v14.8h, v6.16b + uaddw v3.8h, v3.8h, v7.8b + uaddw2 v15.8h, v15.8h, v7.16b + + rshrn v0.8b, v0.8h, #8 + rshrn2 v0.16b, v12.8h, #8 + rshrn v1.8b, v1.8h, #8 + rshrn2 v1.16b, v13.8h, #8 + rshrn v2.8b, v2.8h, #8 + rshrn2 v2.16b, v14.8h, #8 + rshrn v3.8b, v3.8h, #8 + rshrn2 v3.16b, v15.8h, #8 +.endm + +#define params_ADD zipped=0 +.macro blend_kernel_ADD + uqadd v0.16b, v0.16b, v8.16b + uqadd v1.16b, v1.16b, v9.16b + uqadd v2.16b, v2.16b, v10.16b + uqadd v3.16b, v3.16b, v11.16b +.endm + +#define params_SUBTRACT zipped=0 +.macro blend_kernel_SUBTRACT + uqsub v0.16b, v0.16b, v8.16b + uqsub v1.16b, v1.16b, v9.16b + uqsub v2.16b, v2.16b, v10.16b + uqsub v3.16b, v3.16b, v11.16b +.endm + +#define params_DIFFERENCE zipped=0 +.macro blend_kernel_DIFFERENCE + uabd v0.16b, v0.16b, v8.16b + uabd v1.16b, v1.16b, v9.16b + uabd v2.16b, v2.16b, v10.16b + uabd v3.16b, v3.16b, v11.16b +.endm + +#define params_XOR zipped=0 +.macro blend_kernel_XOR + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b +.endm + + +/* Define the wrapper code which will load and store the data, iterate the + * correct number of times, and safely handle the remainder at the end of the + * loop. Various sections of assembly code are dropped or substituted for + * simpler operations if they're not needed. + */ +.macro wrap_line kernel, nowrap=0, zipped=1, lddst=1, ldsrc=1, pld=1 +.if \nowrap + \kernel +.else + sub x3, sp, #32 + sub sp, sp, #64 + st1 {v8.1d - v11.1d}, [sp] + st1 {v12.1d - v15.1d}, [x3] + subs x2, x2, #64 + b 2f +.align 4 +1: + .if \lddst + .if \zipped + ld4 {v0.16b - v3.16b}, [x0] + .else + ld1 {v0.16b - v3.16b}, [x0] + .endif + .endif + .if \ldsrc + .if \zipped + ld4 {v8.16b - v11.16b}, [x1], #64 + .else + ld1 {v8.16b - v11.16b}, [x1], #64 + .endif + .endif + .if \pld +#if 0 /* TODO: test this on real hardware */ + .if \lddst ; prfm PLDL1STRM, [x0, #192] ; .endif + .if \ldsrc ; prfm PLDL1STRM, [x1, #192] ; .endif +#endif + .endif + + \kernel + + subs x2, x2, #64 + .if \zipped + st4 {v0.16b,v1.16b,v2.16b,v3.16b}, [x0], #64 + .else + st1 {v0.16b,v1.16b,v2.16b,v3.16b}, [x0], #64 + .endif + +2: bge 1b + adds x2, x2, #64 + beq 2f + + /* To handle the tail portion of the data (something less than 64 + * bytes) load small power-of-two chunks into working registers. It + * doesn't matter where they end up in the register; the same process + * will store them back out using the same positions and the operations + * don't require data to interact with its neighbours. + */ + movi v0.16b, #0 + movi v1.16b, #0 + movi v2.16b, #0 + movi v3.16b, #0 + + movi v8.16b, #0 + movi v9.16b, #0 + movi v10.16b, #0 + movi v11.16b, #0 + + tbz x2, #5, 1f + .if \lddst ; ld1 {v2.16b,v3.16b}, [x0], #32 ; .endif + .if \ldsrc ; ld1 {v10.16b,v11.16b}, [x1], #32 ; .endif +1: tbz x2, #4, 1f + .if \lddst ; ld1 {v1.16b}, [x0], #16 ; .endif + .if \ldsrc ; ld1 {v9.16b}, [x1], #16 ; .endif +1: tbz x2, #3, 1f + .if \lddst ; ld1 {v0.d}[1], [x0], #8 ; .endif + .if \ldsrc ; ld1 {v8.d}[1], [x1], #8 ; .endif +1: tbz x2, #2, 1f + .if \lddst ; ld1 {v0.s}[1], [x0], #4 ; .endif + .if \ldsrc ; ld1 {v8.s}[1], [x1], #4 ; .endif +1: tbz x2, #1, 1f + .if \lddst ; ld1 {v0.h}[1], [x0], #2 ; .endif + .if \ldsrc ; ld1 {v8.h}[1], [x1], #2 ; .endif +1: tbz x2, #0, 1f + .if \lddst ; ld1 {v0.b}[1], [x0], #1 ; .endif + .if \ldsrc ; ld1 {v8.b}[1], [x1], #1 ; .endif +1: + .if \lddst ; sub x0, x0, x2 ; .endif + +.if \zipped + /* One small impediment in the process above is that some of the load + * operations can't perform byte-wise structure deinterleaving at the + * same time as loading only part of a register. So the data is loaded + * linearly and unpacked manually at this point. + */ + uzp1 v4.16b, v0.16b, v1.16b + uzp2 v5.16b, v0.16b, v1.16b + uzp1 v6.16b, v2.16b, v3.16b + uzp2 v7.16b, v2.16b, v3.16b + uzp1 v0.16b, v4.16b, v6.16b + uzp2 v2.16b, v4.16b, v6.16b + uzp1 v1.16b, v5.16b, v7.16b + uzp2 v3.16b, v5.16b, v7.16b + + uzp1 v4.16b, v8.16b, v9.16b + uzp2 v5.16b, v8.16b, v9.16b + uzp1 v6.16b, v10.16b, v11.16b + uzp2 v7.16b, v10.16b, v11.16b + uzp1 v8.16b, v4.16b, v6.16b + uzp2 v10.16b, v4.16b, v6.16b + uzp1 v9.16b, v5.16b, v7.16b + uzp2 v11.16b, v5.16b, v7.16b + + \kernel + + zip1 v4.16b, v0.16b, v2.16b + zip2 v6.16b, v0.16b, v2.16b + zip1 v5.16b, v1.16b, v3.16b + zip2 v7.16b, v1.16b, v3.16b + zip1 v0.16b, v4.16b, v5.16b + zip2 v1.16b, v4.16b, v5.16b + zip1 v2.16b, v6.16b, v7.16b + zip2 v3.16b, v6.16b, v7.16b + .else + \kernel + .endif + + tbz x2, #5, 1f + st1 {v2.16b,v3.16b}, [x0], #32 +1: tbz x2, #4, 1f + st1 {v1.16b}, [x0], #16 +1: tbz x2, #3, 1f + st1 {v0.d}[1], [x0], #8 +1: tbz x2, #2, 1f + st1 {v0.s}[1], [x0], #4 +1: tbz x2, #1, 1f + st1 {v0.h}[1], [x0], #2 +1: tbz x2, #0, 2f + st1 {v0.b}[1], [x0], #1 +2: ld1 {v8.1d - v11.1d}, [sp], #32 + ld1 {v12.1d - v15.1d}, [sp], #32 +.endif + mov x0, #0 + ret +.endm + + +/* produce list of blend_line_XX() functions; each function uses the wrap_line + * macro, passing it the name of the operation macro it wants along with + * optional parameters to remove unnecessary operations. + */ +#define BLEND_X(d, n) ENTRY(blend_line_##n) ; wrap_line blend_kernel_##n, params_##n ; END(blend_line_##n) ; + BLEND_LIST(BLEND_X) +#undef BLEND_X + +#define BLEND_X(d, n) .set tablesize, d+1 ; + BLEND_LIST(BLEND_X) +#undef BLEND_X + +/* int rsdIntrinsicBlend_K( + * uchar4 *out, // x0 + * uchar4 const *in, // x1 + * int slot, // x2 + * size_t xstart, // x3 + * size_t xend); // x4 + */ +ENTRY(rsdIntrinsicBlend_K) + adrp x5, blendtable + add x5, x5, :lo12:blendtable + cmp w2, tablesize + bhs 1f + ldrsh x6, [x5, w2, uxtw #1] + add x0, x0, w3, uxtw #2 + add x1, x1, w3, uxtw #2 + sub w2, w4, w3 + ubfiz x2, x2, #2, #32 /* TODO: fix */ + cbz x6, 1f + adr x5, 2f + add x6, x5, x6 +2: br x6 +1: mov x0, #-1 + ret + +END(rsdIntrinsicBlend_K) + +.rodata +.set off,0 +blendtable: +#define BLEND_X(d, n) .rept d-off ; .hword 0 ; .endr ; .hword blend_line_##n - 2b ; .set off, d+1 ; + BLEND_LIST(BLEND_X) +#undef BLEND_X diff --git a/toolkit/Blend_neon.S b/toolkit/Blend_neon.S new file mode 100644 index 00000000..a1fa1b50 --- /dev/null +++ b/toolkit/Blend_neon.S @@ -0,0 +1,617 @@ +/* + * Copyright (C) 2013-2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart +#define END(f) .fnend; .size f, .-f; + +#define BLEND_LIST(X) \ + X(0, CLEAR) \ + X(1, SRC) \ + X(2, DST) \ + X(3, SRC_OVER) \ + X(4, DST_OVER) \ + X(5, SRC_IN) \ + X(6, DST_IN) \ + X(7, SRC_OUT) \ + X(8, DST_OUT) \ + X(9, SRC_ATOP) \ + X(10, DST_ATOP) \ + X(11, XOR) \ + X(14, MULTIPLY) \ + X(21, DIFFERENCE) \ + X(34, ADD) \ + X(35, SUBTRACT) + +.eabi_attribute 25,1 @Tag_ABI_align8_preserved +.arm + +/* For every blend operation supported, define a macro with just the arithmetic + * component. The rest can be handled later on. + * + * At entry q0-q3 contain the RGBA data from the destination buffer, and q8-q11 + * contain the data from the source buffer. Both have already been split out + * into one colour component per register (if necessary). q3 and q11 contain + * the alpha components. + * + * At the same time as defining the assembly macro, define a corresponding + * preprocessor macro indicating any other requirements. + * zipped=0 -- The macro does not require the RGBA components to be + * separated. + * lddst=0 -- The macro does not require data from the destination buffer. + * ldsrc=0 -- The macro does not require data from the source buffer. + * nowrap=1 -- The macro requires no wrapper at all, and should simply be + * inserted without any surrounding load/store or loop code. + */ + +#define params_CLEAR zipped=0, lddst=0, ldsrc=0 +.macro blend_kernel_CLEAR + vmov.i8 q0, #0 + vmov.i8 q1, #0 + vmov.i8 q2, #0 + vmov.i8 q3, #0 +.endm + +#define params_SRC zipped=0, lddst=0 +.macro blend_kernel_SRC + vmov q0, q8 + vmov q1, q9 + vmov q2, q10 + vmov q3, q11 +.endm + +#define params_DST nowrap=1 +.macro blend_kernel_DST + /* nop */ +.endm + +#define params_SRC_OVER zipped=1 +.macro blend_kernel_SRC_OVER + vmvn q7, q11 + + vmull.u8 q12, d15, d1 + vmull.u8 q0, d14, d0 + vmull.u8 q13, d15, d3 + vmull.u8 q1, d14, d2 + vmull.u8 q14, d15, d5 + vmull.u8 q2, d14, d4 + vmull.u8 q15, d15, d7 + vmull.u8 q3, d14, d6 + + vrshrn.u16 d8, q0, #8 + vrshrn.u16 d9, q12, #8 + vrshrn.u16 d10, q1, #8 + vrshrn.u16 d11, q13, #8 + vrshrn.u16 d12, q2, #8 + vrshrn.u16 d13, q14, #8 + vrshrn.u16 d14, q3, #8 + vrshrn.u16 d15, q15, #8 + + vaddw.u8 q0, d8 + vaddw.u8 q12, d9 + vaddw.u8 q1, d10 + vaddw.u8 q13, d11 + vaddw.u8 q2, d12 + vaddw.u8 q14, d13 + vaddw.u8 q3, d14 + vaddw.u8 q15, d15 + + vrshrn.u16 d0, q0, #8 + vrshrn.u16 d1, q12, #8 + vrshrn.u16 d2, q1, #8 + vrshrn.u16 d3, q13, #8 + vrshrn.u16 d4, q2, #8 + vrshrn.u16 d5, q14, #8 + vrshrn.u16 d6, q3, #8 + vrshrn.u16 d7, q15, #8 + + vqadd.u8 q0, q8 + vqadd.u8 q1, q9 + vqadd.u8 q2, q10 + vqadd.u8 q3, q11 +.endm + +#define params_DST_OVER zipped=1 +.macro blend_kernel_DST_OVER + vmvn q7, q3 + + vmull.u8 q12, d15, d17 + vmull.u8 q8, d14, d16 + vmull.u8 q13, d15, d19 + vmull.u8 q9, d14, d18 + vmull.u8 q14, d15, d21 + vmull.u8 q10, d14, d20 + vmull.u8 q15, d15, d23 + vmull.u8 q11, d14, d22 + + vrshrn.u16 d8, q0, #8 + vrshrn.u16 d9, q12, #8 + vrshrn.u16 d10, q1, #8 + vrshrn.u16 d11, q13, #8 + vrshrn.u16 d12, q2, #8 + vrshrn.u16 d13, q14, #8 + vrshrn.u16 d14, q3, #8 + vrshrn.u16 d15, q15, #8 + + vaddw.u8 q8, d8 + vaddw.u8 q12, d9 + vaddw.u8 q9, d10 + vaddw.u8 q13, d11 + vaddw.u8 q10, d12 + vaddw.u8 q14, d13 + vaddw.u8 q11, d14 + vaddw.u8 q15, d15 + + vrshrn.u16 d16, q8, #8 + vrshrn.u16 d17, q12, #8 + vrshrn.u16 d18, q9, #8 + vrshrn.u16 d19, q13, #8 + vrshrn.u16 d20, q10, #8 + vrshrn.u16 d21, q14, #8 + vrshrn.u16 d22, q11, #8 + vrshrn.u16 d23, q15, #8 + + vqadd.u8 q0, q8 + vqadd.u8 q1, q9 + vqadd.u8 q2, q10 + vqadd.u8 q3, q11 +.endm + +#define params_SRC_IN zipped=1 +.macro blend_kernel_SRC_IN + vmull.u8 q12, d7, d17 + vmull.u8 q0, d6, d16 + vmull.u8 q13, d7, d19 + vmull.u8 q1, d6, d18 + vmull.u8 q14, d7, d21 + vmull.u8 q2, d6, d20 + vmull.u8 q15, d7, d23 + vmull.u8 q3, d6, d22 + + vrshrn.u16 d8, q0, #8 + vrshrn.u16 d9, q12, #8 + vrshrn.u16 d10, q1, #8 + vrshrn.u16 d11, q13, #8 + vrshrn.u16 d12, q2, #8 + vrshrn.u16 d13, q14, #8 + vrshrn.u16 d14, q3, #8 + vrshrn.u16 d15, q15, #8 + + vaddw.u8 q0, d8 + vaddw.u8 q12, d9 + vaddw.u8 q1, d10 + vaddw.u8 q13, d11 + vaddw.u8 q2, d12 + vaddw.u8 q14, d13 + vaddw.u8 q3, d14 + vaddw.u8 q15, d15 + + vrshrn.u16 d0, q0, #8 + vrshrn.u16 d1, q12, #8 + vrshrn.u16 d2, q1, #8 + vrshrn.u16 d3, q13, #8 + vrshrn.u16 d4, q2, #8 + vrshrn.u16 d5, q14, #8 + vrshrn.u16 d6, q3, #8 + vrshrn.u16 d7, q15, #8 +.endm + +#define params_DST_IN zipped=1 +.macro blend_kernel_DST_IN + vmull.u8 q12, d1, d23 + vmull.u8 q0, d0, d22 + vmull.u8 q13, d3, d23 + vmull.u8 q1, d2, d22 + vmull.u8 q14, d5, d23 + vmull.u8 q2, d4, d22 + vmull.u8 q15, d7, d23 + vmull.u8 q3, d6, d22 + + vrshrn.u16 d8, q0, #8 + vrshrn.u16 d9, q12, #8 + vrshrn.u16 d10, q1, #8 + vrshrn.u16 d11, q13, #8 + vrshrn.u16 d12, q2, #8 + vrshrn.u16 d13, q14, #8 + vrshrn.u16 d14, q3, #8 + vrshrn.u16 d15, q15, #8 + + vaddw.u8 q0, d8 + vaddw.u8 q12, d9 + vaddw.u8 q1, d10 + vaddw.u8 q13, d11 + vaddw.u8 q2, d12 + vaddw.u8 q14, d13 + vaddw.u8 q3, d14 + vaddw.u8 q15, d15 + + vrshrn.u16 d0, q0, #8 + vrshrn.u16 d1, q12, #8 + vrshrn.u16 d2, q1, #8 + vrshrn.u16 d3, q13, #8 + vrshrn.u16 d4, q2, #8 + vrshrn.u16 d5, q14, #8 + vrshrn.u16 d6, q3, #8 + vrshrn.u16 d7, q15, #8 +.endm + +#define params_SRC_OUT zipped=1 +.macro blend_kernel_SRC_OUT + vmvn q3, q3 + blend_kernel_SRC_IN +.endm + + +#define params_DST_OUT zipped=1 +.macro blend_kernel_DST_OUT + vmvn q11, q11 + blend_kernel_DST_IN +.endm + +#define params_SRC_ATOP zipped=1 +.macro blend_kernel_SRC_ATOP + vmvn q11, q11 + + vmull.u8 q12, d23, d1 + vmull.u8 q0, d22, d0 + vmull.u8 q13, d23, d3 + vmull.u8 q1, d22, d2 + vmull.u8 q14, d23, d5 + vmull.u8 q2, d22, d4 + + vmull.u8 q4, d7, d17 + vmull.u8 q8, d6, d16 + vmull.u8 q5, d7, d19 + vmull.u8 q9, d6, d18 + vmull.u8 q6, d7, d21 + vmull.u8 q10, d6, d20 + + vqadd.u16 q12, q4 + vqadd.u16 q0, q8 + vqadd.u16 q13, q5 + vqadd.u16 q1, q9 + vqadd.u16 q14, q6 + vqadd.u16 q2, q10 + + vrshr.u16 q8, q0, #8 + vrshr.u16 q4, q12, #8 + vrshr.u16 q9, q1, #8 + vrshr.u16 q5, q13, #8 + vrshr.u16 q10, q2, #8 + vrshr.u16 q6, q14, #8 + + vqadd.u16 q0, q8 + vqadd.u16 q12, q4 + vqadd.u16 q1, q9 + vqadd.u16 q13, q5 + vqadd.u16 q2, q10 + vqadd.u16 q14, q6 + + vqrshrn.u16 d0, q0, #8 + vqrshrn.u16 d1, q12, #8 + vqrshrn.u16 d2, q1, #8 + vqrshrn.u16 d3, q13, #8 + vqrshrn.u16 d4, q2, #8 + vqrshrn.u16 d5, q14, #8 +.endm + +#define params_DST_ATOP zipped=1 +.macro blend_kernel_DST_ATOP + vmvn q3, q3 + + vmull.u8 q12, d23, d1 + vmull.u8 q0, d22, d0 + vmull.u8 q13, d23, d3 + vmull.u8 q1, d22, d2 + vmull.u8 q14, d23, d5 + vmull.u8 q2, d22, d4 + + vmull.u8 q4, d7, d17 + vmull.u8 q8, d6, d16 + vmull.u8 q5, d7, d19 + vmull.u8 q9, d6, d18 + vmull.u8 q6, d7, d21 + vmull.u8 q10, d6, d20 + + vqadd.u16 q12, q4 + vqadd.u16 q0, q8 + vqadd.u16 q13, q5 + vqadd.u16 q1, q9 + vqadd.u16 q14, q6 + vqadd.u16 q2, q10 + + vrshr.u16 q8, q0, #8 + vrshr.u16 q4, q12, #8 + vrshr.u16 q9, q1, #8 + vrshr.u16 q5, q13, #8 + vrshr.u16 q10, q2, #8 + vrshr.u16 q6, q14, #8 + + vqadd.u16 q0, q8 + vqadd.u16 q12, q4 + vqadd.u16 q1, q9 + vqadd.u16 q13, q5 + vqadd.u16 q2, q10 + vqadd.u16 q14, q6 + + vqrshrn.u16 d0, q0, #8 + vqrshrn.u16 d1, q12, #8 + vqrshrn.u16 d2, q1, #8 + vqrshrn.u16 d3, q13, #8 + vqrshrn.u16 d4, q2, #8 + vqrshrn.u16 d5, q14, #8 + + vmov q3, q11 +.endm + +#define params_MULTIPLY zipped=0 +.macro blend_kernel_MULTIPLY + vmull.u8 q12, d1, d17 + vmull.u8 q0, d0, d16 + vmull.u8 q13, d3, d19 + vmull.u8 q1, d2, d18 + vmull.u8 q14, d5, d21 + vmull.u8 q2, d4, d20 + vmull.u8 q15, d7, d23 + vmull.u8 q3, d6, d22 + + vrshrn.u16 d8, q0, #8 + vrshrn.u16 d9, q12, #8 + vrshrn.u16 d10, q1, #8 + vrshrn.u16 d11, q13, #8 + vrshrn.u16 d12, q2, #8 + vrshrn.u16 d13, q14, #8 + vrshrn.u16 d14, q3, #8 + vrshrn.u16 d15, q15, #8 + + vaddw.u8 q0, d8 + vaddw.u8 q12, d9 + vaddw.u8 q1, d10 + vaddw.u8 q13, d11 + vaddw.u8 q2, d12 + vaddw.u8 q14, d13 + vaddw.u8 q3, d14 + vaddw.u8 q15, d15 + + vrshrn.u16 d0, q0, #8 + vrshrn.u16 d1, q12, #8 + vrshrn.u16 d2, q1, #8 + vrshrn.u16 d3, q13, #8 + vrshrn.u16 d4, q2, #8 + vrshrn.u16 d5, q14, #8 + vrshrn.u16 d6, q3, #8 + vrshrn.u16 d7, q15, #8 +.endm + +#define params_ADD zipped=0 +.macro blend_kernel_ADD + vqadd.u8 q0, q0, q8 + vqadd.u8 q1, q1, q9 + vqadd.u8 q2, q2, q10 + vqadd.u8 q3, q3, q11 +.endm + +#define params_SUBTRACT zipped=0 +.macro blend_kernel_SUBTRACT + vqsub.u8 q0, q0, q8 + vqsub.u8 q1, q1, q9 + vqsub.u8 q2, q2, q10 + vqsub.u8 q3, q3, q11 +.endm + +#define params_DIFFERENCE zipped=0 +.macro blend_kernel_DIFFERENCE + vabd.u8 q0, q0, q8 + vabd.u8 q1, q1, q9 + vabd.u8 q2, q2, q10 + vabd.u8 q3, q3, q11 +.endm + +#define params_XOR zipped=0 +.macro blend_kernel_XOR + veor q0, q0, q8 + veor q1, q1, q9 + veor q2, q2, q10 + veor q3, q3, q11 +.endm + + +/* Define the wrapper code which will load and store the data, iterate the + * correct number of times, and safely handle the remainder at the end of the + * loop. Various sections of assembly code are dropped or substituted for + * simpler operations if they're not needed. + */ +.macro wrap_line kernel, nowrap=0, zipped=1, lddst=1, ldsrc=1, pld=1 +.if \nowrap + \kernel +.else + vpush {d8-d15} + subs r2, #64 + b 2f + .align 4 +1: + .if \lddst + .if \zipped + vld4.8 {d0,d2,d4,d6}, [r0]! + vld4.8 {d1,d3,d5,d7}, [r0]! + .else + vld1.8 {d0-d3}, [r0]! + vld1.8 {d4-d7}, [r0]! + .endif + sub r0, #64 + .endif + .if \ldsrc + .if \zipped + vld4.8 {d16,d18,d20,d22}, [r1]! + vld4.8 {d17,d19,d21,d23}, [r1]! + .else + vld1.8 {d16-d19}, [r1]! + vld1.8 {d20-d23}, [r1]! + .endif + .endif + .if \pld + .if \lddst ; pld [r0, #192] ; .endif + .if \ldsrc ; pld [r1, #192] ; .endif + .endif + + \kernel + + subs r2, #64 + .if \zipped + vst4.8 {d0,d2,d4,d6}, [r0]! + vst4.8 {d1,d3,d5,d7}, [r0]! + .else + vst1.8 {d0-d3}, [r0]! + vst1.8 {d4-d7}, [r0]! + .endif + +2: bge 1b + adds r2, #64 + beq 2f + + /* To handle the tail portion of the data (something less than 64 + * bytes) load small power-of-two chunks into working registers. It + * doesn't matter where they end up in the register; the same process + * will store them back out using the same positions and the operations + * don't require data to interact with its neighbours. + */ + vmov.i8 q0, #0 + vmov.i8 q1, #0 + vmov.i8 q2, #0 + vmov.i8 q3, #0 + + vmov.i8 q8, #0 + vmov.i8 q9, #0 + vmov.i8 q10, #0 + vmov.i8 q11, #0 + + tst r2, #32 + beq 1f + .if \lddst ; vld1.64 {d4-d7}, [r0]! ; .endif + .if \ldsrc ; vld1.64 {d20-d23}, [r1]! ; .endif +1: tst r2, #16 + beq 1f + .if \lddst ; vld1.64 {d2-d3}, [r0]! ; .endif + .if \ldsrc ; vld1.64 {d18-d19}, [r1]! ; .endif +1: tst r2, #8 + beq 1f + .if \lddst ; vld1.64 {d1}, [r0]! ; .endif + .if \ldsrc ; vld1.64 {d17}, [r1]! ; .endif +1: tst r2, #4 + beq 1f + .if \lddst ; vld1.32 {d0[1]}, [r0]! ; .endif + .if \ldsrc ; vld1.32 {d16[1]}, [r1]! ; .endif +1: tst r2, #2 + beq 1f + .if \lddst ; vld1.16 {d0[1]}, [r0]! ; .endif + .if \ldsrc ; vld1.16 {d16[1]}, [r1]! ; .endif +1: tst r2, #1 + beq 1f + .if \lddst ; vld1.8 {d0[1]}, [r0]! ; .endif + .if \ldsrc ; vld1.8 {d16[1]}, [r1]! ; .endif +1: + .if \lddst ; sub r0, r2 ; .endif + + .if \zipped + /* One small impediment in the process above is that some of the load + * operations can't perform byte-wise structure deinterleaving at the + * same time as loading only part of a register. So the data is loaded + * linearly and unpacked manually at this point. + */ + vuzp.8 q0, q1 + vuzp.8 q2, q3 + vuzp.8 q0, q2 + vuzp.8 q1, q3 + + vuzp.8 q8, q9 + vuzp.8 q10, q11 + vuzp.8 q8, q10 + vuzp.8 q9, q11 + + \kernel + + vzip.8 q0, q2 + vzip.8 q1, q3 + vzip.8 q0, q1 + vzip.8 q2, q3 + .else + \kernel + .endif + + tst r2, #32 + beq 1f + vst1.64 {d4-d7}, [r0]! +1: tst r2, #16 + beq 1f + vst1.64 {d2-d3}, [r0]! +1: tst r2, #8 + beq 1f + vst1.64 {d1}, [r0]! +1: tst r2, #4 + beq 1f + vst1.32 {d0[1]}, [r0]! +1: tst r2, #2 + beq 1f + vst1.16 {d0[1]}, [r0]! +1: tst r2, #1 + beq 2f + vst1.8 {d0[1]}, [r0]! +2: vpop {d8-d15} +.endif + mov r0, #0 + bx lr +.endm + + +/* produce list of blend_line_XX() functions; each function uses the wrap_line + * macro, passing it the name of the operation macro it wants along with + * optional parameters to remove unnecessary operations. + */ +#define BLEND_X(d, n) ENTRY(blend_line_##n) ; wrap_line blend_kernel_##n, params_##n ; END(blend_line_##n) ; + BLEND_LIST(BLEND_X) +#undef BLEND_X + + +/* int rsdIntrinsicBlend_K( + * uchar4 *out, // r0 + * uchar4 const *in, // r1 + * int slot, // r2 + * size_t xstart, // r3 + * size_t xend); // [sp] + */ +ENTRY(rsdIntrinsicBlend_K) + adr ip, blend_functions + cmp r2, #(blend_functions_end - blend_functions) >> 2 + ldrlo ip, [ip, r2, LSL #2] + movhs ip, #0 + ldr r2, [sp] + add r0, r3, LSL #2 + add r1, r3, LSL #2 + sub r2, r3 + mov r2, r2, LSL #2 + cmp ip, #0 + addne ip, ip, pc + bxne ip +1: mov r0, #-1 + bx lr + +blend_functions: +.set off,0 +#define BLEND_X(d, n) .rept d-off ; .word 0 ; .endr ; .word blend_line_##n-1b ; .set off, d+1 ; + BLEND_LIST(BLEND_X) +#undef BLEND_X +blend_functions_end: + +END(rsdIntrinsicBlend_K) diff --git a/toolkit/Blur.cpp b/toolkit/Blur.cpp new file mode 100644 index 00000000..a95ff435 --- /dev/null +++ b/toolkit/Blur.cpp @@ -0,0 +1,545 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <math.h> + +#include <cstdint> + +#include "RenderScriptToolkit.h" +#include "TaskProcessor.h" +#include "Utils.h" + +namespace android { +namespace renderscript { + +#define LOG_TAG "renderscript.toolkit.Blur" + +/** + * Blurs an image or a section of an image. + * + * Our algorithm does two passes: a vertical blur followed by an horizontal blur. + */ +class BlurTask : public Task { + // The image we're blurring. + const uchar* mIn; + // Where we store the blurred image. + uchar* outArray; + // The size of the kernel radius is limited to 25 in ScriptIntrinsicBlur.java. + // So, the max kernel size is 51 (= 2 * 25 + 1). + // Considering SSSE3 case, which requires the size is multiple of 4, + // at least 52 words are necessary. Values outside of the kernel should be 0. + float mFp[104]; + uint16_t mIp[104]; + + // Working area to store the result of the vertical blur, to be used by the horizontal pass. + // There's one area per thread. Since the needed working area may be too large to put on the + // stack, we are allocating it from the heap. To avoid paying the allocation cost for each + // tile, we cache the scratch area here. + std::vector<void*> mScratch; // Pointers to the scratch areas, one per thread. + std::vector<size_t> mScratchSize; // The size in bytes of the scratch areas, one per thread. + + // The radius of the blur, in floating point and integer format. + float mRadius; + int mIradius; + + void kernelU4(void* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY, + uint32_t threadIndex); + void kernelU1(void* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY); + void ComputeGaussianWeights(); + + // Process a 2D tile of the overall work. threadIndex identifies which thread does the work. + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) override; + + public: + BlurTask(const uint8_t* in, uint8_t* out, size_t sizeX, size_t sizeY, size_t vectorSize, + uint32_t threadCount, float radius, const Restriction* restriction) + : Task{sizeX, sizeY, vectorSize, false, restriction}, + mIn{in}, + outArray{out}, + mScratch{threadCount}, + mScratchSize{threadCount}, + mRadius{std::min(25.0f, radius)} { + ComputeGaussianWeights(); + } + + ~BlurTask() { + for (size_t i = 0; i < mScratch.size(); i++) { + if (mScratch[i]) { + free(mScratch[i]); + } + } + } +}; + +void BlurTask::ComputeGaussianWeights() { + memset(mFp, 0, sizeof(mFp)); + memset(mIp, 0, sizeof(mIp)); + + // Compute gaussian weights for the blur + // e is the euler's number + float e = 2.718281828459045f; + float pi = 3.1415926535897932f; + // g(x) = (1 / (sqrt(2 * pi) * sigma)) * e ^ (-x^2 / (2 * sigma^2)) + // x is of the form [-radius .. 0 .. radius] + // and sigma varies with the radius. + // Based on some experimental radius values and sigmas, + // we approximately fit sigma = f(radius) as + // sigma = radius * 0.4 + 0.6 + // The larger the radius gets, the more our gaussian blur + // will resemble a box blur since with large sigma + // the gaussian curve begins to lose its shape + float sigma = 0.4f * mRadius + 0.6f; + + // Now compute the coefficients. We will store some redundant values to save + // some math during the blur calculations precompute some values + float coeff1 = 1.0f / (sqrtf(2.0f * pi) * sigma); + float coeff2 = - 1.0f / (2.0f * sigma * sigma); + + float normalizeFactor = 0.0f; + float floatR = 0.0f; + int r; + mIradius = (float)ceil(mRadius) + 0.5f; + for (r = -mIradius; r <= mIradius; r ++) { + floatR = (float)r; + mFp[r + mIradius] = coeff1 * powf(e, floatR * floatR * coeff2); + normalizeFactor += mFp[r + mIradius]; + } + + // Now we need to normalize the weights because all our coefficients need to add up to one + normalizeFactor = 1.0f / normalizeFactor; + for (r = -mIradius; r <= mIradius; r ++) { + mFp[r + mIradius] *= normalizeFactor; + mIp[r + mIradius] = (uint16_t)(mFp[r + mIradius] * 65536.0f + 0.5f); + } +} + +/** + * Vertical blur of a uchar4 line. + * + * @param sizeY Number of cells of the input array in the vertical direction. + * @param out Where to place the computed value. + * @param x Coordinate of the point we're blurring. + * @param y Coordinate of the point we're blurring. + * @param ptrIn Start of the input array. + * @param iStride The size in byte of a row of the input array. + * @param gPtr The gaussian coefficients. + * @param iradius The radius of the blur. + */ +static void OneVU4(uint32_t sizeY, float4* out, int32_t x, int32_t y, const uchar* ptrIn, + int iStride, const float* gPtr, int iradius) { + const uchar *pi = ptrIn + x*4; + + float4 blurredPixel = 0; + for (int r = -iradius; r <= iradius; r ++) { + int validY = std::max((y + r), 0); + validY = std::min(validY, (int)(sizeY - 1)); + const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride]; + float4 pf = convert<float4>(pvy[0]); + blurredPixel += pf * gPtr[0]; + gPtr++; + } + + out[0] = blurredPixel; +} + +/** + * Vertical blur of a uchar1 line. + * + * @param sizeY Number of cells of the input array in the vertical direction. + * @param out Where to place the computed value. + * @param x Coordinate of the point we're blurring. + * @param y Coordinate of the point we're blurring. + * @param ptrIn Start of the input array. + * @param iStride The size in byte of a row of the input array. + * @param gPtr The gaussian coefficients. + * @param iradius The radius of the blur. + */ +static void OneVU1(uint32_t sizeY, float *out, int32_t x, int32_t y, + const uchar *ptrIn, int iStride, const float* gPtr, int iradius) { + + const uchar *pi = ptrIn + x; + + float blurredPixel = 0; + for (int r = -iradius; r <= iradius; r ++) { + int validY = std::max((y + r), 0); + validY = std::min(validY, (int)(sizeY - 1)); + float pf = (float)pi[validY * iStride]; + blurredPixel += pf * gPtr[0]; + gPtr++; + } + + out[0] = blurredPixel; +} + + +extern "C" void rsdIntrinsicBlurU1_K(uchar *out, uchar const *in, size_t w, size_t h, + size_t p, size_t x, size_t y, size_t count, size_t r, uint16_t const *tab); +extern "C" void rsdIntrinsicBlurU4_K(uchar4 *out, uchar4 const *in, size_t w, size_t h, + size_t p, size_t x, size_t y, size_t count, size_t r, uint16_t const *tab); + +#if defined(ARCH_X86_HAVE_SSSE3) +extern void rsdIntrinsicBlurVFU4_K(void *dst, const void *pin, int stride, const void *gptr, + int rct, int x1, int ct); +extern void rsdIntrinsicBlurHFU4_K(void *dst, const void *pin, const void *gptr, int rct, int x1, + int ct); +extern void rsdIntrinsicBlurHFU1_K(void *dst, const void *pin, const void *gptr, int rct, int x1, + int ct); +#endif + +/** + * Vertical blur of a line of RGBA, knowing that there's enough rows above and below us to avoid + * dealing with boundary conditions. + * + * @param out Where to store the results. This is the input to the horizontal blur. + * @param ptrIn The input data for this line. + * @param iStride The width of the input. + * @param gPtr The gaussian coefficients. + * @param ct The diameter of the blur. + * @param len How many cells to blur. + * @param usesSimd Whether this processor supports SIMD. + */ +static void OneVFU4(float4 *out, const uchar *ptrIn, int iStride, const float* gPtr, int ct, + int x2, bool usesSimd) { + int x1 = 0; +#if defined(ARCH_X86_HAVE_SSSE3) + if (usesSimd) { + int t = (x2 - x1); + t &= ~1; + if (t) { + rsdIntrinsicBlurVFU4_K(out, ptrIn, iStride, gPtr, ct, x1, x1 + t); + } + x1 += t; + out += t; + ptrIn += t << 2; + } +#else + (void) usesSimd; // Avoid unused parameter warning. +#endif + while(x2 > x1) { + const uchar *pi = ptrIn; + float4 blurredPixel = 0; + const float* gp = gPtr; + + for (int r = 0; r < ct; r++) { + float4 pf = convert<float4>(((const uchar4 *)pi)[0]); + blurredPixel += pf * gp[0]; + pi += iStride; + gp++; + } + out->xyzw = blurredPixel; + x1++; + out++; + ptrIn+=4; + } +} + +/** + * Vertical blur of a line of U_8, knowing that there's enough rows above and below us to avoid + * dealing with boundary conditions. + * + * @param out Where to store the results. This is the input to the horizontal blur. + * @param ptrIn The input data for this line. + * @param iStride The width of the input. + * @param gPtr The gaussian coefficients. + * @param ct The diameter of the blur. + * @param len How many cells to blur. + * @param usesSimd Whether this processor supports SIMD. + */ +static void OneVFU1(float* out, const uchar* ptrIn, int iStride, const float* gPtr, int ct, int len, + bool usesSimd) { + int x1 = 0; + + while((len > x1) && (((uintptr_t)ptrIn) & 0x3)) { + const uchar *pi = ptrIn; + float blurredPixel = 0; + const float* gp = gPtr; + + for (int r = 0; r < ct; r++) { + float pf = (float)pi[0]; + blurredPixel += pf * gp[0]; + pi += iStride; + gp++; + } + out[0] = blurredPixel; + x1++; + out++; + ptrIn++; + len--; + } +#if defined(ARCH_X86_HAVE_SSSE3) + if (usesSimd && (len > x1)) { + int t = (len - x1) >> 2; + t &= ~1; + if (t) { + rsdIntrinsicBlurVFU4_K(out, ptrIn, iStride, gPtr, ct, 0, t ); + len -= t << 2; + ptrIn += t << 2; + out += t << 2; + } + } +#else + (void) usesSimd; // Avoid unused parameter warning. +#endif + while(len > 0) { + const uchar *pi = ptrIn; + float blurredPixel = 0; + const float* gp = gPtr; + + for (int r = 0; r < ct; r++) { + float pf = (float)pi[0]; + blurredPixel += pf * gp[0]; + pi += iStride; + gp++; + } + out[0] = blurredPixel; + len--; + out++; + ptrIn++; + } +} + +/** + * Horizontal blur of a uchar4 line. + * + * @param sizeX Number of cells of the input array in the horizontal direction. + * @param out Where to place the computed value. + * @param x Coordinate of the point we're blurring. + * @param ptrIn The start of the input row from which we're indexing x. + * @param gPtr The gaussian coefficients. + * @param iradius The radius of the blur. + */ +static void OneHU4(uint32_t sizeX, uchar4* out, int32_t x, const float4* ptrIn, const float* gPtr, + int iradius) { + float4 blurredPixel = 0; + for (int r = -iradius; r <= iradius; r ++) { + int validX = std::max((x + r), 0); + validX = std::min(validX, (int)(sizeX - 1)); + float4 pf = ptrIn[validX]; + blurredPixel += pf * gPtr[0]; + gPtr++; + } + + out->xyzw = convert<uchar4>(blurredPixel); +} + +/** + * Horizontal blur of a uchar line. + * + * @param sizeX Number of cells of the input array in the horizontal direction. + * @param out Where to place the computed value. + * @param x Coordinate of the point we're blurring. + * @param ptrIn The start of the input row from which we're indexing x. + * @param gPtr The gaussian coefficients. + * @param iradius The radius of the blur. + */ +static void OneHU1(uint32_t sizeX, uchar* out, int32_t x, const float* ptrIn, const float* gPtr, + int iradius) { + float blurredPixel = 0; + for (int r = -iradius; r <= iradius; r ++) { + int validX = std::max((x + r), 0); + validX = std::min(validX, (int)(sizeX - 1)); + float pf = ptrIn[validX]; + blurredPixel += pf * gPtr[0]; + gPtr++; + } + + out[0] = (uchar)blurredPixel; +} + +/** + * Full blur of a line of RGBA data. + * + * @param outPtr Where to store the results + * @param xstart The index of the section we're starting to blur. + * @param xend The end index of the section. + * @param currentY The index of the line we're blurring. + * @param usesSimd Whether this processor supports SIMD. + */ +void BlurTask::kernelU4(void *outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY, + uint32_t threadIndex) { + float4 stackbuf[2048]; + float4 *buf = &stackbuf[0]; + const uint32_t stride = mSizeX * mVectorSize; + + uchar4 *out = (uchar4 *)outPtr; + uint32_t x1 = xstart; + uint32_t x2 = xend; + +#if defined(ARCH_ARM_USE_INTRINSICS) + if (mUsesSimd && mSizeX >= 4) { + rsdIntrinsicBlurU4_K(out, (uchar4 const *)(mIn + stride * currentY), + mSizeX, mSizeY, + stride, x1, currentY, x2 - x1, mIradius, mIp + mIradius); + return; + } +#endif + + if (mSizeX > 2048) { + if ((mSizeX > mScratchSize[threadIndex]) || !mScratch[threadIndex]) { + // Pad the side of the allocation by one unit to allow alignment later + mScratch[threadIndex] = realloc(mScratch[threadIndex], (mSizeX + 1) * 16); + mScratchSize[threadIndex] = mSizeX; + } + // realloc only aligns to 8 bytes so we manually align to 16. + buf = (float4 *) ((((intptr_t)mScratch[threadIndex]) + 15) & ~0xf); + } + float4 *fout = (float4 *)buf; + int y = currentY; + if ((y > mIradius) && (y < ((int)mSizeY - mIradius))) { + const uchar *pi = mIn + (y - mIradius) * stride; + OneVFU4(fout, pi, stride, mFp, mIradius * 2 + 1, mSizeX, mUsesSimd); + } else { + x1 = 0; + while(mSizeX > x1) { + OneVU4(mSizeY, fout, x1, y, mIn, stride, mFp, mIradius); + fout++; + x1++; + } + } + + x1 = xstart; + while ((x1 < (uint32_t)mIradius) && (x1 < x2)) { + OneHU4(mSizeX, out, x1, buf, mFp, mIradius); + out++; + x1++; + } +#if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if ((x1 + mIradius) < x2) { + rsdIntrinsicBlurHFU4_K(out, buf - mIradius, mFp, + mIradius * 2 + 1, x1, x2 - mIradius); + out += (x2 - mIradius) - x1; + x1 = x2 - mIradius; + } + } +#endif + while(x2 > x1) { + OneHU4(mSizeX, out, x1, buf, mFp, mIradius); + out++; + x1++; + } +} + +/** + * Full blur of a line of U_8 data. + * + * @param outPtr Where to store the results + * @param xstart The index of the section we're starting to blur. + * @param xend The end index of the section. + * @param currentY The index of the line we're blurring. + */ +void BlurTask::kernelU1(void *outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY) { + float buf[4 * 2048]; + const uint32_t stride = mSizeX * mVectorSize; + + uchar *out = (uchar *)outPtr; + uint32_t x1 = xstart; + uint32_t x2 = xend; + +#if defined(ARCH_ARM_USE_INTRINSICS) + if (mUsesSimd && mSizeX >= 16) { + // The specialisation for r<=8 has an awkward prefill case, which is + // fiddly to resolve, where starting close to the right edge can cause + // a read beyond the end of input. So avoid that case here. + if (mIradius > 8 || (mSizeX - std::max(0, (int32_t)x1 - 8)) >= 16) { + rsdIntrinsicBlurU1_K(out, mIn + stride * currentY, mSizeX, mSizeY, + stride, x1, currentY, x2 - x1, mIradius, mIp + mIradius); + return; + } + } +#endif + + float *fout = (float *)buf; + int y = currentY; + if ((y > mIradius) && (y < ((int)mSizeY - mIradius -1))) { + const uchar *pi = mIn + (y - mIradius) * stride; + OneVFU1(fout, pi, stride, mFp, mIradius * 2 + 1, mSizeX, mUsesSimd); + } else { + x1 = 0; + while(mSizeX > x1) { + OneVU1(mSizeY, fout, x1, y, mIn, stride, mFp, mIradius); + fout++; + x1++; + } + } + + x1 = xstart; + while ((x1 < x2) && + ((x1 < (uint32_t)mIradius) || (((uintptr_t)out) & 0x3))) { + OneHU1(mSizeX, out, x1, buf, mFp, mIradius); + out++; + x1++; + } +#if defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + if ((x1 + mIradius) < x2) { + uint32_t len = x2 - (x1 + mIradius); + len &= ~3; + + // rsdIntrinsicBlurHFU1_K() processes each four float values in |buf| at once, so it + // nees to ensure four more values can be accessed in order to avoid accessing + // uninitialized buffer. + if (len > 4) { + len -= 4; + rsdIntrinsicBlurHFU1_K(out, ((float *)buf) - mIradius, mFp, + mIradius * 2 + 1, x1, x1 + len); + out += len; + x1 += len; + } + } + } +#endif + while(x2 > x1) { + OneHU1(mSizeX, out, x1, buf, mFp, mIradius); + out++; + x1++; + } +} + +void BlurTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) { + for (size_t y = startY; y < endY; y++) { + void* outPtr = outArray + (mSizeX * y + startX) * mVectorSize; + if (mVectorSize == 4) { + kernelU4(outPtr, startX, endX, y, threadIndex); + } else { + kernelU1(outPtr, startX, endX, y); + } + } +} + +void RenderScriptToolkit::blur(const uint8_t* in, uint8_t* out, size_t sizeX, size_t sizeY, + size_t vectorSize, int radius, const Restriction* restriction) { +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE + if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) { + return; + } + if (radius <= 0 || radius > 25) { + ALOGE("The radius should be between 1 and 25. %d provided.", radius); + } + if (vectorSize != 1 && vectorSize != 4) { + ALOGE("The vectorSize should be 1 or 4. %zu provided.", vectorSize); + } +#endif + + BlurTask task(in, out, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(), radius, + restriction); + processor->doTask(&task); +} + +} // namespace renderscript +} // namespace android diff --git a/toolkit/Blur_advsimd.S b/toolkit/Blur_advsimd.S new file mode 100644 index 00000000..6d3cb8d7 --- /dev/null +++ b/toolkit/Blur_advsimd.S @@ -0,0 +1,1868 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: +#define PRIVATE(f) .text; .align 4; .type f,#function; f: +#define END(f) .size f, .-f; + +//#define ARCH_ARM64_USE_BLUR_PRELOAD + +/* Number of fractional bits to preserve in intermediate results. The + * intermediate storage is 16-bit, and we started with 8 bit data (the integer + * part), so this should be between 0 and 8. + */ +.set FRACTION_BITS, 7 +.set MAX_R, 25 + + +/* A quick way of making a line of code conditional on some other condition. + * Use `.set cc, 1` or `.set cc, 0` to enable or disable lines prefixed with + * `ifcc`: + */ +.macro ifcc zzz:vararg +.if cc + \zzz +.endif +.endm + +/* It's not always clear that prefetching is beneficial and this needs further + * testing on different cores, so it's made switchable here. + */ +#if defined(ARCH_ARM64_USE_BLUR_PRELOAD) +#define VERTPLD(...) prfm PLDL1KEEP, [__VA_ARGS__] +#else +#define VERTPLD(...) nop +#endif + +/* Fetch 16 columns of bytes (regardless of image format), convolve these + * vertically, and leave them in the register file. If working near the top or + * bottom of an image then clamp the addressing while loading the data in. + * + * The convolution is fully unrolled for windows up to max_r, with the + * outermost edges calculated first. This way it's possible to branch directly + * into the relevant part of the code for an arbitrary convolution radius. Two + * variants of the loop are produced; one eliminates the clamping code for a + * slight speed advantage. + * + * Where the macro is called with reg=x, the specified register is taken to + * contain a pre-calculated pointer into one of the two loops. + * + * Input: + * x1 -- src + * x2 -- pitch + * x5 -- r + * x6 -- rup (r, unless clipped to top of source image) + * x7 -- rdn (r, unless clipped to bottom of source image) + * x12 -- switch index + * v0-v3 -- coefficient table + * x13 = -pitch + * x15 = top-row in + * x19 = bottom-row in + * Output: + * x1 += 16 + * v10,v11 -- 16 convolved columns + * Modifies: + * x10 = upper row pointer + * x11 = lower row pointer + * v12-v15 = temporary sums + */ +.macro fetch, max_r=MAX_R, labelc=1, labelnc=2, reg=x12 /*{{{*/ + .ifc \reg,x12 ; .set cc, 1 ; .else ; .set cc, 0 ; .endif + + ld1 {v15.16b}, [x1], #16 + mov x10, x15 + + uxtl v14.8h, v15.8b + VERTPLD(x1, #16) + uxtl2 v15.8h, v15.16b + .if \max_r < 16 // approximate + ifcc adr \reg, 1f + .else + ifcc adrp \reg, 1f + ifcc add \reg, \reg, #:lo12:1f + .endif + + umull v12.4s, v14.4h, v0.h[0] + ifcc sub \reg, \reg, x5, LSL #6 + umull2 v13.4s, v14.8h, v0.h[0] + mov x11, x19 + umull v14.4s, v15.4h, v0.h[0] + ifcc add \reg, \reg, x5, LSL #3 + umull2 v15.4s, v15.8h, v0.h[0] + br \reg + + /* This version of the vertical fetch loop body is used away from the edges + * of the source image. The pointers start at the top and bottom source rows + * and work their way towards the centre on each iteration. This way the + * number of taps used can be controlled by jumping directly into the middle + * of the loop and running to completion. + * If the loop body changes size then the code which calculates the address of + * the initial iteration must be updated to accordingly. + */ + .macro vertfetch_noclamp i, dreg + .if 0 < \i && \i <= \max_r + ld1 {v10.16b}, [x10], x2 + ld1 {v11.16b}, [x11], x13 + uaddl v16.8h, v10.8b, v11.8b + uaddl2 v11.8h, v10.16b, v11.16b + umlal v12.4s, v16.4h, \dreg + umlal2 v13.4s, v16.8h, \dreg + VERTPLD(x10, #32) + umlal v14.4s, v11.4h, \dreg + VERTPLD(x11, #32) + umlal2 v15.4s, v11.8h, \dreg + .endif + .endm + + /* This version of the vertical fetch loop body is used near the edges of the + * source image, where one or both of the accesses may start with a clamped + * value, and the row addresses only begin to change after some number of + * iterations before the end. + * If the loop body changes size then the code which calculates the address of + * the initial iteration must be updated to accordingly. + */ + .macro vertfetch_clamped i, dreg + .if 0 < \i && \i <= \max_r + ld1 {v10.16b}, [x10], x2 + cmp x6, #\i + ld1 {v11.16b}, [x11], x13 + csel x10, x15, x10, lo + uaddl v16.8h, v10.8b, v11.8b + cmp x7, #\i + uaddl2 v11.8h, v10.16b, v11.16b + csel x11, x19, x11, lo + umlal v12.4s, v16.4h, \dreg + umlal2 v13.4s, v16.8h, \dreg + VERTPLD(x10, #32) + umlal v14.4s, v11.4h, \dreg + VERTPLD(x11, #32) + umlal2 v15.4s, v11.8h, \dreg + .endif + .endm + + /* Entry into this unrolled loop is computed as a negative index from + * \labelc at the end of the block. + */ + .align 4 + vertfetch_clamped 27, v3.h[3] + vertfetch_clamped 26, v3.h[2] + vertfetch_clamped 25, v3.h[1] + vertfetch_clamped 24, v3.h[0] + vertfetch_clamped 23, v2.h[7] + vertfetch_clamped 22, v2.h[6] + vertfetch_clamped 21, v2.h[5] + vertfetch_clamped 20, v2.h[4] + vertfetch_clamped 19, v2.h[3] + vertfetch_clamped 18, v2.h[2] + vertfetch_clamped 17, v2.h[1] + vertfetch_clamped 16, v2.h[0] + vertfetch_clamped 15, v1.h[7] + vertfetch_clamped 14, v1.h[6] + vertfetch_clamped 13, v1.h[5] + vertfetch_clamped 12, v1.h[4] + vertfetch_clamped 11, v1.h[3] + vertfetch_clamped 10, v1.h[2] + vertfetch_clamped 9, v1.h[1] + vertfetch_clamped 8, v1.h[0] + vertfetch_clamped 7, v0.h[7] + vertfetch_clamped 6, v0.h[6] + vertfetch_clamped 5, v0.h[5] + vertfetch_clamped 4, v0.h[4] + vertfetch_clamped 3, v0.h[3] + vertfetch_clamped 2, v0.h[2] + vertfetch_clamped 1, v0.h[1] + vertfetch_clamped 0, v0.h[0] + 1: + \labelc : b 2f /* done with clamped loop, skip over non-clamped loop */ + + /* Entry into this unrolled loop is computed as a negative index from + * \labelnc at the end of the block. + */ + .align 4 + vertfetch_noclamp 27, v3.h[3] + vertfetch_noclamp 26, v3.h[2] + vertfetch_noclamp 25, v3.h[1] + vertfetch_noclamp 24, v3.h[0] + vertfetch_noclamp 23, v2.h[7] + vertfetch_noclamp 22, v2.h[6] + vertfetch_noclamp 21, v2.h[5] + vertfetch_noclamp 20, v2.h[4] + vertfetch_noclamp 19, v2.h[3] + vertfetch_noclamp 18, v2.h[2] + vertfetch_noclamp 17, v2.h[1] + vertfetch_noclamp 16, v2.h[0] + vertfetch_noclamp 15, v1.h[7] + vertfetch_noclamp 14, v1.h[6] + vertfetch_noclamp 13, v1.h[5] + vertfetch_noclamp 12, v1.h[4] + vertfetch_noclamp 11, v1.h[3] + vertfetch_noclamp 10, v1.h[2] + vertfetch_noclamp 9, v1.h[1] + vertfetch_noclamp 8, v1.h[0] + vertfetch_noclamp 7, v0.h[7] + vertfetch_noclamp 6, v0.h[6] + vertfetch_noclamp 5, v0.h[5] + vertfetch_noclamp 4, v0.h[4] + vertfetch_noclamp 3, v0.h[3] + vertfetch_noclamp 2, v0.h[2] + vertfetch_noclamp 1, v0.h[1] + vertfetch_noclamp 0, v0.h[0] + \labelnc : + + .purgem vertfetch_clamped + .purgem vertfetch_noclamp + + 2: uqrshrn v10.4h, v12.4s, #16 - FRACTION_BITS + add x15, x15, #16 + uqrshrn2 v10.8h, v13.4s, #16 - FRACTION_BITS + add x19, x19, #16 + uqrshrn v11.4h, v14.4s, #16 - FRACTION_BITS + uqrshrn2 v11.8h, v15.4s, #16 - FRACTION_BITS +.endm /*}}}*/ + +/* Some portion of the convolution window (as much as will fit, and all of it + * for the uchar1 cases) is kept in the register file to avoid unnecessary + * memory accesses. This forces the horizontal loops to be unrolled because + * there's no indexed addressing into the register file. + * + * As in the fetch macro, the operations are ordered from outside to inside, so + * that jumping into the middle of the block bypasses the unwanted window taps. + * + * There are several variants of the macro because of the fixed offets of the + * taps -- the wider the maximum radius the further the centre tap is from the + * most recently fetched data. This means that pre-filling the window requires + * more data that won't be used and it means that rotating the window involves + * more mov operations. + * + * When the buffer gets too big the buffer at [x9] is used. + * + * Input: + * v16-v31,v4-v11 -- convoltion window + * x9 -- pointer to additional convolution window data + * Output: + * x9 -- updated buffer pointer (if used) + * d31 -- result to be stored + * Modifies: + * x12 -- temp buffer pointer + * v12-v13 -- temporaries for load and vext operations. + * v14-v15 -- intermediate sums + */ +#define TUNED_LIST1 8, 16 +.macro hconv1_8/*{{{*/ + +.rodata + 200: .hword -4 + .hword 101f-100f + .hword 102f-100f + .hword 103f-100f + .hword 104f-100f + .hword 105f-100f + .hword 106f-100f + .hword 107f-100f + .hword 108f-100f + .align 4 +.text + umull v14.4s, v9.4h, v0.h[0] + umull2 v15.4s, v9.8h, v0.h[0] + + adrp x16, 200b + add x16, x16, :lo12:200b + ldrsh x12, [x16, x5, LSL #1] + adr x16, 100f + add x12, x12, x16 + 100: br x12 + 108: umlal v14.4s, v8.4h, v1.h[0] + umlal2 v15.4s, v8.8h, v1.h[0] + umlal v14.4s, v10.4h, v1.h[0] + umlal2 v15.4s, v10.8h, v1.h[0] + 107: ext v12.16b, v8.16b, v9.16b, #1*2 + ext v13.16b, v9.16b, v10.16b, #7*2 + umlal v14.4s, v12.4h, v0.h[7] + umlal2 v15.4s, v12.8h, v0.h[7] + umlal v14.4s, v13.4h, v0.h[7] + umlal2 v15.4s, v13.8h, v0.h[7] + 106: ext v12.16b, v8.16b, v9.16b, #2*2 + ext v13.16b, v9.16b, v10.16b, #6*2 + umlal v14.4s, v12.4h, v0.h[6] + umlal2 v15.4s, v12.8h, v0.h[6] + umlal v14.4s, v13.4h, v0.h[6] + umlal2 v15.4s, v13.8h, v0.h[6] + 105: ext v12.16b, v8.16b, v9.16b, #3*2 + ext v13.16b, v9.16b, v10.16b, #5*2 + umlal v14.4s, v12.4h, v0.h[5] + umlal2 v15.4s, v12.8h, v0.h[5] + umlal v14.4s, v13.4h, v0.h[5] + umlal2 v15.4s, v13.8h, v0.h[5] + 104: //ext v12.16b, v8.16b, v9.16b, #4*2 + //ext v13.16b, v9.16b, v10.16b, #4*2 + umlal2 v14.4s, v8.8h, v0.h[4] + umlal v15.4s, v9.4h, v0.h[4] + umlal2 v14.4s, v9.8h, v0.h[4] + umlal v15.4s, v10.4h, v0.h[4] + 103: ext v12.16b, v8.16b, v9.16b, #5*2 + ext v13.16b, v9.16b, v10.16b, #3*2 + umlal v14.4s, v12.4h, v0.h[3] + umlal2 v15.4s, v12.8h, v0.h[3] + umlal v14.4s, v13.4h, v0.h[3] + umlal2 v15.4s, v13.8h, v0.h[3] + 102: ext v12.16b, v8.16b, v9.16b, #6*2 + ext v13.16b, v9.16b, v10.16b, #2*2 + umlal v14.4s, v12.4h, v0.h[2] + umlal2 v15.4s, v12.8h, v0.h[2] + umlal v14.4s, v13.4h, v0.h[2] + umlal2 v15.4s, v13.8h, v0.h[2] + 101: ext v12.16b, v8.16b, v9.16b, #7*2 + ext v13.16b, v9.16b, v10.16b, #1*2 + umlal v14.4s, v12.4h, v0.h[1] + umlal2 v15.4s, v12.8h, v0.h[1] + umlal v14.4s, v13.4h, v0.h[1] + umlal2 v15.4s, v13.8h, v0.h[1] + + uqrshrn v14.4h, v14.4s, #16 + uqrshrn2 v14.8h, v15.4s, #16 + uqrshrn v15.8b, v14.8h, #FRACTION_BITS + + mov v8.16b, v9.16b + mov v9.16b, v10.16b + mov v10.16b, v11.16b +.endm/*}}}*/ + +.macro hconv1_16/*{{{*/ +.rodata + 200: .hword -4 + .hword 101f-100f + .hword 102f-100f + .hword 103f-100f + .hword 104f-100f + .hword 105f-100f + .hword 106f-100f + .hword 107f-100f + .hword 108f-100f + .hword 109f-100f + .hword 110f-100f + .hword 111f-100f + .hword 112f-100f + .hword 113f-100f + .hword 114f-100f + .hword 115f-100f + .hword 116f-100f + .align 4 + +.text + umull v14.4s, v8.4h, v0.h[0] + umull2 v15.4s, v8.8h, v0.h[0] + + adrp x16, 200b + add x16, x16, :lo12:200b + ldrsh x12, [x16, x5, LSL #1] + adr x16, 100f + add x12, x12, x16 + 100: br x12 + 116: //ext v12.16b, v6.16b, v7.16b, #0*2 + //ext v13.16b, v10.16b, v11.16b, #0*2 + umlal v14.4s, v6.4h, v2.h[0] + umlal2 v15.4s, v6.8h, v2.h[0] + umlal v14.4s, v10.4h, v2.h[0] + umlal2 v15.4s, v10.8h, v2.h[0] + 115: ext v12.16b, v6.16b, v7.16b, #1*2 + ext v13.16b, v9.16b, v10.16b, #7*2 + umlal v14.4s, v12.4h, v1.h[7] + umlal2 v15.4s, v12.8h, v1.h[7] + umlal v14.4s, v13.4h, v1.h[7] + umlal2 v15.4s, v13.8h, v1.h[7] + 114: ext v12.16b, v6.16b, v7.16b, #2*2 + ext v13.16b, v9.16b, v10.16b, #6*2 + umlal v14.4s, v12.4h, v1.h[6] + umlal2 v15.4s, v12.8h, v1.h[6] + umlal v14.4s, v13.4h, v1.h[6] + umlal2 v15.4s, v13.8h, v1.h[6] + 113: ext v12.16b, v6.16b, v7.16b, #3*2 + ext v13.16b, v9.16b, v10.16b, #5*2 + umlal v14.4s, v12.4h, v1.h[5] + umlal2 v15.4s, v12.8h, v1.h[5] + umlal v14.4s, v13.4h, v1.h[5] + umlal2 v15.4s, v13.8h, v1.h[5] + 112: //ext v12.16b, v6.16b, v7.16b, #4*2 + //ext v13.16b, v9.16b, v10.16b, #4*2 + umlal2 v14.4s, v6.8h, v1.h[4] + umlal v15.4s, v7.4h, v1.h[4] + umlal2 v14.4s, v9.8h, v1.h[4] + umlal v15.4s, v10.4h, v1.h[4] + 111: ext v12.16b, v6.16b, v7.16b, #5*2 + ext v13.16b, v9.16b, v10.16b, #3*2 + umlal v14.4s, v12.4h, v1.h[3] + umlal2 v15.4s, v12.8h, v1.h[3] + umlal v14.4s, v13.4h, v1.h[3] + umlal2 v15.4s, v13.8h, v1.h[3] + 110: ext v12.16b, v6.16b, v7.16b, #6*2 + ext v13.16b, v9.16b, v10.16b, #2*2 + umlal v14.4s, v12.4h, v1.h[2] + umlal2 v15.4s, v12.8h, v1.h[2] + umlal v14.4s, v13.4h, v1.h[2] + umlal2 v15.4s, v13.8h, v1.h[2] + 109: ext v12.16b, v6.16b, v7.16b, #7*2 + ext v13.16b, v9.16b, v10.16b, #1*2 + umlal v14.4s, v12.4h, v1.h[1] + umlal2 v15.4s, v12.8h, v1.h[1] + umlal v14.4s, v13.4h, v1.h[1] + umlal2 v15.4s, v13.8h, v1.h[1] + 108: //ext v12.16b, v7.16b, v8.16b, #0*2 + //ext v13.16b, v9.16b, v10.16b, #0*2 + umlal v14.4s, v7.4h, v1.h[0] + umlal2 v15.4s, v7.8h, v1.h[0] + umlal v14.4s, v9.4h, v1.h[0] + umlal2 v15.4s, v9.8h, v1.h[0] + 107: ext v12.16b, v7.16b, v8.16b, #1*2 + ext v13.16b, v8.16b, v9.16b, #7*2 + umlal v14.4s, v12.4h, v0.h[7] + umlal2 v15.4s, v12.8h, v0.h[7] + umlal v14.4s, v13.4h, v0.h[7] + umlal2 v15.4s, v13.8h, v0.h[7] + 106: ext v12.16b, v7.16b, v8.16b, #2*2 + ext v13.16b, v8.16b, v9.16b, #6*2 + umlal v14.4s, v12.4h, v0.h[6] + umlal2 v15.4s, v12.8h, v0.h[6] + umlal v14.4s, v13.4h, v0.h[6] + umlal2 v15.4s, v13.8h, v0.h[6] + 105: ext v12.16b, v7.16b, v8.16b, #3*2 + ext v13.16b, v8.16b, v9.16b, #5*2 + umlal v14.4s, v12.4h, v0.h[5] + umlal2 v15.4s, v12.8h, v0.h[5] + umlal v14.4s, v13.4h, v0.h[5] + umlal2 v15.4s, v13.8h, v0.h[5] + 104: //ext v12.16b, v7.16b, v8.16b, #4*2 + //ext v13.16b, v8.16b, v9.16b, #4*2 + umlal2 v14.4s, v7.8h, v0.h[4] + umlal v15.4s, v8.4h, v0.h[4] + umlal2 v14.4s, v8.8h, v0.h[4] + umlal v15.4s, v9.4h, v0.h[4] + 103: ext v12.16b, v7.16b, v8.16b, #5*2 + ext v13.16b, v8.16b, v9.16b, #3*2 + umlal v14.4s, v12.4h, v0.h[3] + umlal2 v15.4s, v12.8h, v0.h[3] + umlal v14.4s, v13.4h, v0.h[3] + umlal2 v15.4s, v13.8h, v0.h[3] + 102: ext v12.16b, v7.16b, v8.16b, #6*2 + ext v13.16b, v8.16b, v9.16b, #2*2 + umlal v14.4s, v12.4h, v0.h[2] + umlal2 v15.4s, v12.8h, v0.h[2] + umlal v14.4s, v13.4h, v0.h[2] + umlal2 v15.4s, v13.8h, v0.h[2] + 101: ext v12.16b, v7.16b, v8.16b, #7*2 + ext v13.16b, v8.16b, v9.16b, #1*2 + umlal v14.4s, v12.4h, v0.h[1] + umlal2 v15.4s, v12.8h, v0.h[1] + umlal v14.4s, v13.4h, v0.h[1] + umlal2 v15.4s, v13.8h, v0.h[1] + + uqrshrn v14.4h, v14.4s, #16 + uqrshrn2 v14.8h, v15.4s, #16 + uqrshrn v15.8b, v14.8h, #FRACTION_BITS + + mov v6.16b, v7.16b + mov v7.16b, v8.16b + mov v8.16b, v9.16b + mov v9.16b, v10.16b + mov v10.16b, v11.16b +.endm/*}}}*/ + +.macro hconv1_25/*{{{*/ +.rodata + 200: .hword -4 + .hword 101f-100f + .hword 102f-100f + .hword 103f-100f + .hword 104f-100f + .hword 105f-100f + .hword 106f-100f + .hword 107f-100f + .hword 108f-100f + .hword 109f-100f + .hword 110f-100f + .hword 111f-100f + .hword 112f-100f + .hword 113f-100f + .hword 114f-100f + .hword 115f-100f + .hword 116f-100f + .hword 117f-100f + .hword 118f-100f + .hword 119f-100f + .hword 120f-100f + .hword 121f-100f + .hword 122f-100f + .hword 123f-100f + .hword 124f-100f + .hword 125f-100f + .align 4 +.text + ext v12.16b, v6.16b, v7.16b, #7*2 + umull v14.4s, v12.4h, v0.h[0] + umull2 v15.4s, v12.8h, v0.h[0] + + adrp x16, 200b + add x16, x16, :lo12:200b + ldrsh x12, [x16, x5, LSL #1] + adr x16, 100f + add x12, x12, x16 + 100: br x12 + 125: ext v12.16b, v31.16b, v4.16b, #6*2 + ext v13.16b, v10.16b, v11.16b, #0*2 + umlal v14.4s, v12.4h, v3.h[1] + umlal2 v15.4s, v12.8h, v3.h[1] + umlal v14.4s, v13.4h, v3.h[1] + umlal2 v15.4s, v13.8h, v3.h[1] + 124: ext v12.16b, v31.16b, v4.16b, #7*2 + ext v13.16b, v9.16b, v10.16b, #7*2 + umlal v14.4s, v12.4h, v3.h[0] + umlal2 v15.4s, v12.8h, v3.h[0] + umlal v14.4s, v13.4h, v3.h[0] + umlal2 v15.4s, v13.8h, v3.h[0] + 123: ext v12.16b, v4.16b, v5.16b, #0*2 + ext v13.16b, v9.16b, v10.16b, #6*2 + umlal v14.4s, v12.4h, v2.h[7] + umlal2 v15.4s, v12.8h, v2.h[7] + umlal v14.4s, v13.4h, v2.h[7] + umlal2 v15.4s, v13.8h, v2.h[7] + 122: ext v12.16b, v4.16b, v5.16b, #1*2 + ext v13.16b, v9.16b, v10.16b, #5*2 + umlal v14.4s, v12.4h, v2.h[6] + umlal2 v15.4s, v12.8h, v2.h[6] + umlal v14.4s, v13.4h, v2.h[6] + umlal2 v15.4s, v13.8h, v2.h[6] + 121: ext v12.16b, v4.16b, v5.16b, #2*2 + ext v13.16b, v9.16b, v10.16b, #4*2 + umlal v14.4s, v12.4h, v2.h[5] + umlal2 v15.4s, v12.8h, v2.h[5] + umlal v14.4s, v13.4h, v2.h[5] + umlal2 v15.4s, v13.8h, v2.h[5] + 120: ext v12.16b, v4.16b, v5.16b, #3*2 + ext v13.16b, v9.16b, v10.16b, #3*2 + umlal v14.4s, v12.4h, v2.h[4] + umlal2 v15.4s, v12.8h, v2.h[4] + umlal v14.4s, v13.4h, v2.h[4] + umlal2 v15.4s, v13.8h, v2.h[4] + 119: ext v12.16b, v4.16b, v5.16b, #4*2 + ext v13.16b, v9.16b, v10.16b, #2*2 + umlal v14.4s, v12.4h, v2.h[3] + umlal2 v15.4s, v12.8h, v2.h[3] + umlal v14.4s, v13.4h, v2.h[3] + umlal2 v15.4s, v13.8h, v2.h[3] + 118: ext v12.16b, v4.16b, v5.16b, #5*2 + ext v13.16b, v9.16b, v10.16b, #1*2 + umlal v14.4s, v12.4h, v2.h[2] + umlal2 v15.4s, v12.8h, v2.h[2] + umlal v14.4s, v13.4h, v2.h[2] + umlal2 v15.4s, v13.8h, v2.h[2] + 117: ext v12.16b, v4.16b, v5.16b, #6*2 + ext v13.16b, v9.16b, v10.16b, #0*2 + umlal v14.4s, v12.4h, v2.h[1] + umlal2 v15.4s, v12.8h, v2.h[1] + umlal v14.4s, v13.4h, v2.h[1] + umlal2 v15.4s, v13.8h, v2.h[1] + 116: ext v12.16b, v4.16b, v5.16b, #7*2 + ext v13.16b, v8.16b, v9.16b, #7*2 + umlal v14.4s, v12.4h, v2.h[0] + umlal2 v15.4s, v12.8h, v2.h[0] + umlal v14.4s, v13.4h, v2.h[0] + umlal2 v15.4s, v13.8h, v2.h[0] + 115: ext v12.16b, v5.16b, v6.16b, #0*2 + ext v13.16b, v8.16b, v9.16b, #6*2 + umlal v14.4s, v12.4h, v1.h[7] + umlal2 v15.4s, v12.8h, v1.h[7] + umlal v14.4s, v13.4h, v1.h[7] + umlal2 v15.4s, v13.8h, v1.h[7] + 114: ext v12.16b, v5.16b, v6.16b, #1*2 + ext v13.16b, v8.16b, v9.16b, #5*2 + umlal v14.4s, v12.4h, v1.h[6] + umlal2 v15.4s, v12.8h, v1.h[6] + umlal v14.4s, v13.4h, v1.h[6] + umlal2 v15.4s, v13.8h, v1.h[6] + 113: ext v12.16b, v5.16b, v6.16b, #2*2 + ext v13.16b, v8.16b, v9.16b, #4*2 + umlal v14.4s, v12.4h, v1.h[5] + umlal2 v15.4s, v12.8h, v1.h[5] + umlal v14.4s, v13.4h, v1.h[5] + umlal2 v15.4s, v13.8h, v1.h[5] + 112: ext v12.16b, v5.16b, v6.16b, #3*2 + ext v13.16b, v8.16b, v9.16b, #3*2 + umlal v14.4s, v12.4h, v1.h[4] + umlal2 v15.4s, v12.8h, v1.h[4] + umlal v14.4s, v13.4h, v1.h[4] + umlal2 v15.4s, v13.8h, v1.h[4] + 111: ext v12.16b, v5.16b, v6.16b, #4*2 + ext v13.16b, v8.16b, v9.16b, #2*2 + umlal v14.4s, v12.4h, v1.h[3] + umlal2 v15.4s, v12.8h, v1.h[3] + umlal v14.4s, v13.4h, v1.h[3] + umlal2 v15.4s, v13.8h, v1.h[3] + 110: ext v12.16b, v5.16b, v6.16b, #5*2 + ext v13.16b, v8.16b, v9.16b, #1*2 + umlal v14.4s, v12.4h, v1.h[2] + umlal2 v15.4s, v12.8h, v1.h[2] + umlal v14.4s, v13.4h, v1.h[2] + umlal2 v15.4s, v13.8h, v1.h[2] + 109: ext v12.16b, v5.16b, v6.16b, #6*2 + ext v13.16b, v8.16b, v9.16b, #0*2 + umlal v14.4s, v12.4h, v1.h[1] + umlal2 v15.4s, v12.8h, v1.h[1] + umlal v14.4s, v13.4h, v1.h[1] + umlal2 v15.4s, v13.8h, v1.h[1] + 108: ext v12.16b, v5.16b, v6.16b, #7*2 + ext v13.16b, v7.16b, v8.16b, #7*2 + umlal v14.4s, v12.4h, v1.h[0] + umlal2 v15.4s, v12.8h, v1.h[0] + umlal v14.4s, v13.4h, v1.h[0] + umlal2 v15.4s, v13.8h, v1.h[0] + 107: ext v12.16b, v6.16b, v7.16b, #0*2 + ext v13.16b, v7.16b, v8.16b, #6*2 + umlal v14.4s, v12.4h, v0.h[7] + umlal2 v15.4s, v12.8h, v0.h[7] + umlal v14.4s, v13.4h, v0.h[7] + umlal2 v15.4s, v13.8h, v0.h[7] + 106: ext v12.16b, v6.16b, v7.16b, #1*2 + ext v13.16b, v7.16b, v8.16b, #5*2 + umlal v14.4s, v12.4h, v0.h[6] + umlal2 v15.4s, v12.8h, v0.h[6] + umlal v14.4s, v13.4h, v0.h[6] + umlal2 v15.4s, v13.8h, v0.h[6] + 105: ext v12.16b, v6.16b, v7.16b, #2*2 + ext v13.16b, v7.16b, v8.16b, #4*2 + umlal v14.4s, v12.4h, v0.h[5] + umlal2 v15.4s, v12.8h, v0.h[5] + umlal v14.4s, v13.4h, v0.h[5] + umlal2 v15.4s, v13.8h, v0.h[5] + 104: ext v12.16b, v6.16b, v7.16b, #3*2 + ext v13.16b, v7.16b, v8.16b, #3*2 + umlal v14.4s, v12.4h, v0.h[4] + umlal2 v15.4s, v12.8h, v0.h[4] + umlal v14.4s, v13.4h, v0.h[4] + umlal2 v15.4s, v13.8h, v0.h[4] + 103: ext v12.16b, v6.16b, v7.16b, #4*2 + ext v13.16b, v7.16b, v8.16b, #2*2 + umlal v14.4s, v12.4h, v0.h[3] + umlal2 v15.4s, v12.8h, v0.h[3] + umlal v14.4s, v13.4h, v0.h[3] + umlal2 v15.4s, v13.8h, v0.h[3] + 102: ext v12.16b, v6.16b, v7.16b, #5*2 + ext v13.16b, v7.16b, v8.16b, #1*2 + umlal v14.4s, v12.4h, v0.h[2] + umlal2 v15.4s, v12.8h, v0.h[2] + umlal v14.4s, v13.4h, v0.h[2] + umlal2 v15.4s, v13.8h, v0.h[2] + 101: ext v12.16b, v6.16b, v7.16b, #6*2 + ext v13.16b, v7.16b, v8.16b, #0*2 + umlal v14.4s, v12.4h, v0.h[1] + umlal2 v15.4s, v12.8h, v0.h[1] + umlal v14.4s, v13.4h, v0.h[1] + umlal2 v15.4s, v13.8h, v0.h[1] + + uqrshrn v14.4h, v14.4s, #16 + uqrshrn2 v14.8h, v15.4s, #16 + uqrshrn v15.8b, v14.8h, #FRACTION_BITS + + mov v31.16b, v4.16b + mov v4.16b, v5.16b + mov v5.16b, v6.16b + mov v6.16b, v7.16b + mov v7.16b, v8.16b + mov v8.16b, v9.16b + mov v9.16b, v10.16b + mov v10.16b, v11.16b +.endm/*}}}*/ + +#define TUNED_LIST4 6, 12, 20 +.macro hconv4_6/*{{{*/ +.rodata + 200: .hword -4 + .hword 101f-100f + .hword 102f-100f + .hword 103f-100f + .hword 104f-100f + .hword 105f-100f + .hword 106f-100f + .align 4 +.text + umull v14.4s, v7.4h, v0.h[0] + umull2 v15.4s, v7.8h, v0.h[0] + + adrp x16, 200b + add x16, x16, :lo12:200b + ldrsh x12, [x16, x5, LSL #1] + adr x16, 100f + add x12, x12, x16 + 100: br x12 + 106: umlal v14.4s, v4.4h, v0.h[6] + umlal2 v15.4s, v4.8h, v0.h[6] + umlal v14.4s, v10.4h, v0.h[6] + umlal2 v15.4s, v10.8h, v0.h[6] + 105: umlal2 v14.4s, v4.8h, v0.h[5] + umlal v15.4s, v5.4h, v0.h[5] + umlal2 v14.4s, v9.8h, v0.h[5] + umlal v15.4s, v10.4h, v0.h[5] + 104: umlal v14.4s, v5.4h, v0.h[4] + umlal2 v15.4s, v5.8h, v0.h[4] + umlal v14.4s, v9.4h, v0.h[4] + umlal2 v15.4s, v9.8h, v0.h[4] + 103: umlal2 v14.4s, v5.8h, v0.h[3] + umlal v15.4s, v6.4h, v0.h[3] + umlal2 v14.4s, v8.8h, v0.h[3] + umlal v15.4s, v9.4h, v0.h[3] + 102: umlal v14.4s, v6.4h, v0.h[2] + umlal2 v15.4s, v6.8h, v0.h[2] + umlal v14.4s, v8.4h, v0.h[2] + umlal2 v15.4s, v8.8h, v0.h[2] + 101: umlal2 v14.4s, v6.8h, v0.h[1] + umlal v15.4s, v7.4h, v0.h[1] + umlal2 v14.4s, v7.8h, v0.h[1] + umlal v15.4s, v8.4h, v0.h[1] + + uqrshrn v14.4h, v14.4s, #16 + uqrshrn2 v14.8h, v15.4s, #16 + uqrshrn v15.8b, v14.8h, #FRACTION_BITS + + mov v4.16b, v5.16b + mov v5.16b, v6.16b + mov v6.16b, v7.16b + mov v7.16b, v8.16b + mov v8.16b, v9.16b + mov v9.16b, v10.16b + mov v10.16b, v11.16b +.endm/*}}}*/ + +.macro hconv4_12/*{{{*/ +.rodata + 200: .hword -4 //Might need to remove these... + .hword 101f-100f + .hword 102f-100f + .hword 103f-100f + .hword 104f-100f + .hword 105f-100f + .hword 106f-100f + .hword 107f-100f + .hword 108f-100f + .hword 109f-100f + .hword 110f-100f + .hword 111f-100f + .hword 112f-100f + .align 4 +.text + umull v14.4s, v4.4h, v0.h[0] + umull2 v15.4s, v4.8h, v0.h[0] + + adrp x16, 200b + add x16, x16, :lo12:200b + ldrsh x12, [x16, x5, LSL #1] + adr x16, 100f + add x12, x12, x16 + 100: br x12 + 112: umlal v14.4s, v26.4h, v1.h[4] + umlal2 v15.4s, v26.8h, v1.h[4] + umlal v14.4s, v10.4h, v1.h[4] + umlal2 v15.4s, v10.8h, v1.h[4] + 111: umlal2 v14.4s, v26.8h, v1.h[3] + umlal v15.4s, v27.4h, v1.h[3] + umlal2 v14.4s, v9.8h, v1.h[3] + umlal v15.4s, v10.4h, v1.h[3] + 110: umlal v14.4s, v27.4h, v1.h[2] + umlal2 v15.4s, v27.8h, v1.h[2] + umlal v14.4s, v9.4h, v1.h[2] + umlal2 v15.4s, v9.8h, v1.h[2] + 109: umlal2 v14.4s, v27.8h, v1.h[1] + umlal v15.4s, v28.4h, v1.h[1] + umlal2 v14.4s, v8.8h, v1.h[1] + umlal v15.4s, v9.4h, v1.h[1] + 108: umlal v14.4s, v28.4h, v1.h[0] + umlal2 v15.4s, v28.8h, v1.h[0] + umlal v14.4s, v8.4h, v1.h[0] + umlal2 v15.4s, v8.8h, v1.h[0] + 107: umlal2 v14.4s, v28.8h, v0.h[7] + umlal v15.4s, v29.4h, v0.h[7] + umlal2 v14.4s, v7.8h, v0.h[7] + umlal v15.4s, v8.4h, v0.h[7] + 106: umlal v14.4s, v29.4h, v0.h[6] + umlal2 v15.4s, v29.8h, v0.h[6] + umlal v14.4s, v7.4h, v0.h[6] + umlal2 v15.4s, v7.8h, v0.h[6] + 105: umlal2 v14.4s, v29.8h, v0.h[5] + umlal v15.4s, v30.4h, v0.h[5] + umlal2 v14.4s, v6.8h, v0.h[5] + umlal v15.4s, v7.4h, v0.h[5] + 104: umlal v14.4s, v30.4h, v0.h[4] + umlal2 v15.4s, v30.8h, v0.h[4] + umlal v14.4s, v6.4h, v0.h[4] + umlal2 v15.4s, v6.8h, v0.h[4] + 103: umlal2 v14.4s, v30.8h, v0.h[3] + umlal v15.4s, v31.4h, v0.h[3] + umlal2 v14.4s, v5.8h, v0.h[3] + umlal v15.4s, v6.4h, v0.h[3] + 102: umlal v14.4s, v31.4h, v0.h[2] + umlal2 v15.4s, v31.8h, v0.h[2] + umlal v14.4s, v5.4h, v0.h[2] + umlal2 v15.4s, v5.8h, v0.h[2] + 101: umlal2 v14.4s, v31.8h, v0.h[1] + umlal v15.4s, v4.4h, v0.h[1] + umlal2 v14.4s, v4.8h, v0.h[1] + umlal v15.4s, v5.4h, v0.h[1] + + uqrshrn v14.4h, v14.4s, #16 + uqrshrn2 v14.8h, v15.4s, #16 + uqrshrn v15.8b, v14.8h, #FRACTION_BITS + + mov v26.16b, v27.16b + mov v27.16b, v28.16b + mov v28.16b, v29.16b + mov v29.16b, v30.16b + mov v30.16b, v31.16b + mov v31.16b, v4.16b + mov v4.16b, v5.16b + mov v5.16b, v6.16b + mov v6.16b, v7.16b + mov v7.16b, v8.16b + mov v8.16b, v9.16b + mov v9.16b, v10.16b + mov v10.16b, v11.16b +.endm/*}}}*/ + +.macro hconv4_20/*{{{*/ +.rodata + 200: .hword -4 + .hword 101f-100f + .hword 102f-100f + .hword 103f-100f + .hword 104f-100f + .hword 105f-100f + .hword 106f-100f + .hword 107f-100f + .hword 108f-100f + .hword 109f-100f + .hword 110f-100f + .hword 111f-100f + .hword 112f-100f + .hword 113f-100f + .hword 114f-100f + .hword 115f-100f + .hword 116f-100f + .hword 117f-100f + .hword 118f-100f + .hword 119f-100f + .hword 120f-100f + .align 4 +.text + umull v14.4s, v28.4h, v0.h[0] + umull2 v15.4s, v28.8h, v0.h[0] + + adrp x16, 200b + add x16, x16, :lo12:200b + ldrsh x12, [x16, x5, LSL #1] + adr x16, 100f + add x12, x12, x16 + 100: br x12 + 120: umlal v14.4s, v18.4h, v2.h[4] + umlal2 v15.4s, v18.8h, v2.h[4] + umlal v14.4s, v10.4h, v2.h[4] + umlal2 v15.4s, v10.8h, v2.h[4] + 119: umlal2 v14.4s, v18.8h, v2.h[3] + umlal v15.4s, v19.4h, v2.h[3] + umlal2 v14.4s, v9.8h, v2.h[3] + umlal v15.4s, v10.4h, v2.h[3] + 118: umlal v14.4s, v19.4h, v2.h[2] + umlal2 v15.4s, v19.8h, v2.h[2] + umlal v14.4s, v9.4h, v2.h[2] + umlal2 v15.4s, v9.8h, v2.h[2] + 117: umlal2 v14.4s, v19.8h, v2.h[1] + umlal v15.4s, v20.4h, v2.h[1] + umlal2 v14.4s, v8.8h, v2.h[1] + umlal v15.4s, v9.4h, v2.h[1] + 116: umlal v14.4s, v20.4h, v2.h[0] + umlal2 v15.4s, v20.8h, v2.h[0] + umlal v14.4s, v8.4h, v2.h[0] + umlal2 v15.4s, v8.8h, v2.h[0] + 115: umlal2 v14.4s, v20.8h, v1.h[7] + umlal v15.4s, v21.4h, v1.h[7] + umlal2 v14.4s, v7.8h, v1.h[7] + umlal v15.4s, v8.4h, v1.h[7] + 114: umlal v14.4s, v21.4h, v1.h[6] + umlal2 v15.4s, v21.8h, v1.h[6] + umlal v14.4s, v7.4h, v1.h[6] + umlal2 v15.4s, v7.8h, v1.h[6] + 113: umlal2 v14.4s, v21.8h, v1.h[5] + umlal v15.4s, v22.4h, v1.h[5] + umlal2 v14.4s, v6.8h, v1.h[5] + umlal v15.4s, v7.4h, v1.h[5] + 112: umlal v14.4s, v22.4h, v1.h[4] + umlal2 v15.4s, v22.8h, v1.h[4] + umlal v14.4s, v6.4h, v1.h[4] + umlal2 v15.4s, v6.8h, v1.h[4] + 111: umlal2 v14.4s, v22.8h, v1.h[3] + umlal v15.4s, v23.4h, v1.h[3] + umlal2 v14.4s, v5.8h, v1.h[3] + umlal v15.4s, v6.4h, v1.h[3] + 110: umlal v14.4s, v23.4h, v1.h[2] + umlal2 v15.4s, v23.8h, v1.h[2] + umlal v14.4s, v5.4h, v1.h[2] + umlal2 v15.4s, v5.8h, v1.h[2] + 109: umlal2 v14.4s, v23.8h, v1.h[1] + umlal v15.4s, v24.4h, v1.h[1] + umlal2 v14.4s, v4.8h, v1.h[1] + umlal v15.4s, v5.4h, v1.h[1] + 108: umlal v14.4s, v24.4h, v1.h[0] + umlal2 v15.4s, v24.8h, v1.h[0] + umlal v14.4s, v4.4h, v1.h[0] + umlal2 v15.4s, v4.8h, v1.h[0] + 107: umlal2 v14.4s, v24.8h, v0.h[7] + umlal v15.4s, v25.4h, v0.h[7] + umlal2 v14.4s, v31.8h, v0.h[7] + umlal v15.4s, v4.4h, v0.h[7] + 106: umlal v14.4s, v25.4h, v0.h[6] + umlal2 v15.4s, v25.8h, v0.h[6] + umlal v14.4s, v31.4h, v0.h[6] + umlal2 v15.4s, v31.8h, v0.h[6] + 105: umlal2 v14.4s, v25.8h, v0.h[5] + umlal v15.4s, v26.4h, v0.h[5] + umlal2 v14.4s, v30.8h, v0.h[5] + umlal v15.4s, v31.4h, v0.h[5] + 104: umlal v14.4s, v26.4h, v0.h[4] + umlal2 v15.4s, v26.8h, v0.h[4] + umlal v14.4s, v30.4h, v0.h[4] + umlal2 v15.4s, v30.8h, v0.h[4] + 103: umlal2 v14.4s, v26.8h, v0.h[3] + umlal v15.4s, v27.4h, v0.h[3] + umlal2 v14.4s, v29.8h, v0.h[3] + umlal v15.4s, v30.4h, v0.h[3] + 102: umlal v14.4s, v27.4h, v0.h[2] + umlal2 v15.4s, v27.8h, v0.h[2] + umlal v14.4s, v29.4h, v0.h[2] + umlal2 v15.4s, v29.8h, v0.h[2] + 101: umlal2 v14.4s, v27.8h, v0.h[1] + umlal v15.4s, v28.4h, v0.h[1] + umlal2 v14.4s, v28.8h, v0.h[1] + umlal v15.4s, v29.4h, v0.h[1] + + uqrshrn v14.4h, v14.4s, #16 + uqrshrn2 v14.8h, v15.4s, #16 + uqrshrn v15.8b, v14.8h, #FRACTION_BITS + + mov v18.16b, v19.16b + mov v19.16b, v20.16b + mov v20.16b, v21.16b + mov v21.16b, v22.16b + mov v22.16b, v23.16b + mov v23.16b, v24.16b + mov v24.16b, v25.16b + mov v25.16b, v26.16b + mov v26.16b, v27.16b + mov v27.16b, v28.16b + mov v28.16b, v29.16b + mov v29.16b, v30.16b + mov v30.16b, v31.16b + mov v31.16b, v4.16b + mov v4.16b, v5.16b + mov v5.16b, v6.16b + mov v6.16b, v7.16b + mov v7.16b, v8.16b + mov v8.16b, v9.16b + mov v9.16b, v10.16b + mov v10.16b, v11.16b +.endm/*}}}*/ + +.macro hconv4_25/*{{{*/ +.rodata + 200: .hword -4 + .hword 101f-100f + .hword 102f-100f + .hword 103f-100f + .hword 104f-100f + .hword 105f-100f + .hword 106f-100f + .hword 107f-100f + .hword 108f-100f + .hword 109f-100f + .hword 110f-100f + .hword 111f-100f + .hword 112f-100f + .hword 113f-100f + .hword 114f-100f + .hword 115f-100f + .hword 116f-100f + .hword 117f-100f + .hword 118f-100f + .hword 119f-100f + .hword 120f-100f + .hword 121f-100f + .hword 122f-100f + .hword 123f-100f + .hword 124f-100f + .hword 125f-100f + .align 4 +.text + umull2 v14.4s, v25.8h, v0.h[0] + umull v15.4s, v26.4h, v0.h[0] + + adrp x16, 200b + add x16, x16, :lo12:200b + ldrsh x12, [x16, x5, LSL #1] + adr x16, 100f + add x12, x12, x16 + 100: br x12 + 125: ld1 {v12.8h}, [x9] + umlal v14.4s, v12.4h, v3.h[1] + umlal2 v15.4s, v12.8h, v3.h[1] + umlal v14.4s, v10.4h, v3.h[1] + umlal2 v15.4s, v10.8h, v3.h[1] + 124: add x12, x9, #0x08 + bic x12, x12, #0x40 + ld1 {v12.4h}, [x12], #8 + bic x12, x12, #0x40 + ld1 {v13.4h}, [x12] + umlal v14.4s, v12.4h, v3.h[0] + umlal v15.4s, v13.4h, v3.h[0] + umlal2 v14.4s, v9.8h, v3.h[0] + umlal v15.4s, v10.4h, v3.h[0] + 123: add x12, x9, #0x10 + bic x12, x12, #0x40 + ld1 {v12.8h}, [x12] + umlal v14.4s, v12.4h, v2.h[7] + umlal2 v15.4s, v12.8h, v2.h[7] + umlal v14.4s, v9.4h, v2.h[7] + umlal2 v15.4s, v9.8h, v2.h[7] + 122: add x12, x9, #0x18 + bic x12, x12, #0x40 + ld1 {v12.4h}, [x12], #8 + bic x12, x12, #0x40 + ld1 {v13.4h}, [x12] + umlal v14.4s, v12.4h, v2.h[6] + umlal v15.4s, v13.4h, v2.h[6] + umlal2 v14.4s, v8.8h, v2.h[6] + umlal v15.4s, v9.4h, v2.h[6] + 121: add x12, x9, #0x20 + bic x12, x12, #0x40 + ld1 {v12.8h}, [x12] + umlal v14.4s, v12.4h, v2.h[5] + umlal2 v15.4s, v12.8h, v2.h[5] + umlal v14.4s, v8.4h, v2.h[5] + umlal2 v15.4s, v8.8h, v2.h[5] + 120: add x12, x9, #0x28 + bic x12, x12, #0x40 + ld1 {v12.4h}, [x12], #8 + bic x12, x12, #0x40 + ld1 {v13.4h}, [x12] + umlal v14.4s, v12.4h, v2.h[4] + umlal v15.4s, v13.4h, v2.h[4] + umlal2 v14.4s, v7.8h, v2.h[4] + umlal v15.4s, v8.4h, v2.h[4] + 119: add x12, x9, #0x30 + bic x12, x12, #0x40 + ld1 {v12.8h}, [x12] + umlal v14.4s, v12.4h, v2.h[3] + umlal2 v15.4s, v12.8h, v2.h[3] + umlal v14.4s, v7.4h, v2.h[3] + umlal2 v15.4s, v7.8h, v2.h[3] + 118: add x12, x9, #0x38 + bic x12, x12, #0x40 + ld1 {v12.4h}, [x12] + umlal v14.4s, v12.4h, v2.h[2] + umlal v15.4s, v17.4h, v2.h[2] + umlal2 v14.4s, v6.8h, v2.h[2] + umlal v15.4s, v7.4h, v2.h[2] + 117: umlal v14.4s, v17.4h, v2.h[1] + umlal2 v15.4s, v17.8h, v2.h[1] + umlal v14.4s, v6.4h, v2.h[1] + umlal2 v15.4s, v6.8h, v2.h[1] + 116: umlal2 v14.4s, v17.8h, v2.h[0] + umlal v15.4s, v18.4h, v2.h[0] + umlal2 v14.4s, v5.8h, v2.h[0] + umlal v15.4s, v6.4h, v2.h[0] + 115: umlal v14.4s, v18.4h, v1.h[7] + umlal2 v15.4s, v18.8h, v1.h[7] + umlal v14.4s, v5.4h, v1.h[7] + umlal2 v15.4s, v5.8h, v1.h[7] + 114: umlal2 v14.4s, v18.8h, v1.h[6] + umlal v15.4s, v19.4h, v1.h[6] + umlal2 v14.4s, v4.8h, v1.h[6] + umlal v15.4s, v5.4h, v1.h[6] + 113: umlal v14.4s, v19.4h, v1.h[5] + umlal2 v15.4s, v19.8h, v1.h[5] + umlal v14.4s, v4.4h, v1.h[5] + umlal2 v15.4s, v4.8h, v1.h[5] + 112: umlal2 v14.4s, v19.8h, v1.h[4] + umlal v15.4s, v20.4h, v1.h[4] + umlal2 v14.4s, v31.8h, v1.h[4] + umlal v15.4s, v4.4h, v1.h[4] + 111: umlal v14.4s, v20.4h, v1.h[3] + umlal2 v15.4s, v20.8h, v1.h[3] + umlal v14.4s, v31.4h, v1.h[3] + umlal2 v15.4s, v31.8h, v1.h[3] + 110: umlal2 v14.4s, v20.8h, v1.h[2] + umlal v15.4s, v21.4h, v1.h[2] + umlal2 v14.4s, v30.8h, v1.h[2] + umlal v15.4s, v31.4h, v1.h[2] + 109: umlal v14.4s, v21.4h, v1.h[1] + umlal2 v15.4s, v21.8h, v1.h[1] + umlal v14.4s, v30.4h, v1.h[1] + umlal2 v15.4s, v30.8h, v1.h[1] + 108: umlal2 v14.4s, v21.8h, v1.h[0] + umlal v15.4s, v22.4h, v1.h[0] + umlal2 v14.4s, v29.8h, v1.h[0] + umlal v15.4s, v30.4h, v1.h[0] + 107: umlal v14.4s, v22.4h, v0.h[7] + umlal2 v15.4s, v22.8h, v0.h[7] + umlal v14.4s, v29.4h, v0.h[7] + umlal2 v15.4s, v29.8h, v0.h[7] + 106: umlal2 v14.4s, v22.8h, v0.h[6] + umlal v15.4s, v23.4h, v0.h[6] + umlal2 v14.4s, v28.8h, v0.h[6] + umlal v15.4s, v29.4h, v0.h[6] + 105: umlal v14.4s, v23.4h, v0.h[5] + umlal2 v15.4s, v23.8h, v0.h[5] + umlal v14.4s, v28.4h, v0.h[5] + umlal2 v15.4s, v28.8h, v0.h[5] + 104: umlal2 v14.4s, v23.8h, v0.h[4] + umlal v15.4s, v24.4h, v0.h[4] + umlal2 v14.4s, v27.8h, v0.h[4] + umlal v15.4s, v28.4h, v0.h[4] + 103: umlal v14.4s, v24.4h, v0.h[3] + umlal2 v15.4s, v24.8h, v0.h[3] + umlal v14.4s, v27.4h, v0.h[3] + umlal2 v15.4s, v27.8h, v0.h[3] + 102: umlal2 v14.4s, v24.8h, v0.h[2] + umlal v15.4s, v25.4h, v0.h[2] + umlal2 v14.4s, v26.8h, v0.h[2] + umlal v15.4s, v27.4h, v0.h[2] + 101: umlal v14.4s, v25.4h, v0.h[1] + umlal2 v15.4s, v25.8h, v0.h[1] + umlal v14.4s, v26.4h, v0.h[1] + umlal2 v15.4s, v26.8h, v0.h[1] + + uqrshrn v14.4h, v14.4s, #16 + uqrshrn2 v14.8h, v15.4s, #16 + uqrshrn v15.8b, v14.8h, #FRACTION_BITS + + st1 {v17.16b}, [x9], #16 + bic x9, x9, #0x40 + mov v17.16b, v18.16b + mov v18.16b, v19.16b + mov v19.16b, v20.16b + mov v20.16b, v21.16b + mov v21.16b, v22.16b + mov v22.16b, v23.16b + mov v23.16b, v24.16b + mov v24.16b, v25.16b + mov v25.16b, v26.16b + mov v26.16b, v27.16b + mov v27.16b, v28.16b + mov v28.16b, v29.16b + mov v29.16b, v30.16b + mov v30.16b, v31.16b + mov v31.16b, v4.16b + mov v4.16b, v5.16b + mov v5.16b, v6.16b + mov v6.16b, v7.16b + mov v7.16b, v8.16b + mov v8.16b, v9.16b + mov v9.16b, v10.16b + mov v10.16b, v11.16b +.endm/*}}}*/ + +/* Dedicated function wrapper for the fetch macro, for the cases where + * performance isn't that important, to keep code size down. + */ +PRIVATE(fetch_generic_asm) + stp x10, x11, [sp, #-16]! + fetch + ldp x10, x11, [sp], #16 + ret +END(fetch_generic_asm) + + +/* Fetch the next (16 - (x10 & 15)) columns of data, avoiding reading memory + * beyond that limit, and filling the rest of the vector with the last legal + * pixel. + * Result is in v10 and v11. v8 and v9 are filled with the first legal pixel. + * Note: This function can read beyond the right edge of input if the image is + * narrower than 16 bytes. + */ +PRIVATE(fetch_clampleft1) + stp x29, x30, [sp, #-16]! + bl fetch_generic_asm + dup v8.8h, v10.h[0] + dup v9.8h, v10.h[0] + ands x12, x10, #15 + beq 1f + sub x1, x1, x12 + sub x15, x15, x12 + sub x19, x19, x12 + sub x10, x10, x12 + sub x12, sp, x12, LSL #1 + sub sp, sp, #64 + sub x12, x12, #32 + st1 {v8.8h, v9.8h, v10.8h,v11.8h}, [sp] + ld1 {v10.8h,v11.8h}, [x12] + add sp, sp, #64 +1: ldp x29, x30, [sp], #16 + ret +END(fetch_clampleft1) + +PRIVATE(fetch_clampleft4) + stp x29, x30, [sp, #-16]! + bl fetch_generic_asm + dup v8.2d, v10.d[0] + dup v9.2d, v10.d[0] + ands x12, x10, #15 + beq 1f + sub x1, x1, x12 + sub x15, x15, x12 + sub x19, x19, x12 + sub x10, x10, x12 + sub x12, sp, x12, LSL #1 + sub sp, sp, #64 + sub x12, x12, #32 + st1 {v8.8h, v9.8h, v10.8h,v11.8h}, [sp] + ld1 {v10.8h,v11.8h}, [x12] + add sp, sp, #64 +1: ldp x29, x30, [sp], #16 + ret +END(fetch_clampleft4) + +/* Fetch only the next (x11 & 15) (where 0 means 16) columns of data, avoiding + * reading memory beyond that limit, and filling the rest of the vector with + * the last legal pixel. + * Result is in v10 and v11. v12 and v13 are filled with the last legal pixel. + * Note: This function can read beyond the left edge of input if the image is + * narrower than 16 bytes. + */ +PRIVATE(fetch_clampright1) + stp x29, x30, [sp, #-16]! + sub x12, xzr, x11 + ands x12, x12, #15 + beq 1f + sub x1, x1, x12 + sub x15, x15, x12 + sub x19, x19, x12 + bl fetch_generic_asm + dup v12.8h, v11.h[7] + dup v13.8h, v11.h[7] + sub x12, xzr, x11 + and x12, x12, #15 + sub sp, sp, #64 + add x12, sp, x12, LSL #1 + st1 {v10.8h,v11.8h,v12.8h,v13.8h}, [sp] + ld1 {v10.8h,v11.8h}, [x12] + add sp, sp, #64 + ldp x29, x30, [sp], #16 + ret +1: bl fetch_generic_asm + dup v12.8h, v11.h[7] + dup v13.8h, v11.h[7] + ldp x29, x30, [sp], #16 + ret +END(fetch_clampright1) + +PRIVATE(fetch_clampright4) + stp x29, x30, [sp, #-16]! + sub x12, xzr, x11 + ands x12, x12, #15 + beq 1f + sub x1, x1, x12 + sub x15, x15, x12 + sub x19, x19, x12 + bl fetch_generic_asm + dup v12.2d, v11.d[1] + dup v13.2d, v11.d[1] + sub x12, xzr, x11 + and x12, x12, #15 + sub sp, sp, #64 + add x12, sp, x12, LSL #1 + st1 {v10.8h,v11.8h,v12.8h,v13.8h}, [sp] + ld1 {v10.8h,v11.8h}, [x12] + add sp, sp, #64 + ldp x29, x30, [sp], #16 + ret +1: bl fetch_generic_asm + dup v12.2d, v11.d[1] + dup v13.2d, v11.d[1] + ldp x29, x30, [sp], #16 + ret +END(fetch_clampright4) + +/* Given values in v10 and v11, and an index in x11, sweep the (x11 & 15)th + * value across to fill the rest of the register pair. Used for filling the + * right hand edge of the window when reading too close to the right hand edge + * of the image. + * Also returns a dup-ed copy of the last element in v12 for the tail-fill + * case (this happens incidentally in common path, but must be done + * deliberately in the fast-out path). + */ +PRIVATE(prefill_sweepright1) + ands x12, x11, #15 + beq 1f + sub x12, x12, #1 + sub sp, sp, #64 + st1 {v10.8h,v11.8h}, [sp] + add x12, sp, x12, LSL #1 + ld1r {v12.8h}, [x12] + ld1r {v13.8h}, [x12] + st1 {v12.8h,v13.8h}, [x12] + ld1 {v10.8h,v11.8h}, [sp] + add sp, sp, #64 + ret +1: dup v12.8h, v11.h[7] + dup v13.8h, v11.h[7] + ret +END(prefill_sweepright1) + +PRIVATE(prefill_sweepright4) + ands x12, x11, #15 + beq 1f + sub x12, x12, #4 + sub sp, sp, #64 + st1 {v10.8h,v11.8h}, [sp] + add x12, sp, x12, LSL #1 + ld1r {v12.2d}, [x12] + st1 {v13.8h}, [x12] + ld1 {v10.8h,v11.8h}, [sp] + add sp, sp, #64 + ret +1: dup v12.2d, v11.d[1] + dup v13.2d, v11.d[1] + ret +END(prefill_sweepright4) + +/* The main loop keeps a sliding window of data that has already been convolved + * in the vertical axis for the current line. This usually stays in the + * register file, but spills to memory for large windows. The first thing that + * needs to be done at start-up is to fill this window with image data, taking + * into account the padding needed if the left or right edges of the image fall + * within this window. + */ + +/* Because the window is in the register file writes to it cannot be indexed + * by another register. Consequently the fill loops are unrolled to address + * the registers directly. This macro distinguishes between writes to the + * register file and writes to the spill buffer (indicated by a destination + * register named xx). + */ +.macro prefill_out ra, rb, sra, srb + .ifc \ra,xx + .ifc \rb,xx + st1 {\sra,\srb}, [x9], #32 + .else + bic x9, x9, #0x40 + st1 {\sra}, [x9], #16 + mov \rb, \srb + .endif + .else + .ifnc \ra,\sra + mov \ra, \sra + .endif + .ifnc \rb,\srb + mov \rb, \srb + .endif + .endif +.endm + +/* This macro provides the list of registers representing the window, and the + * cases where the register file is too small and a spill buffer is used + * instead. + * Since several specialisations of each function are generated, this also + * culls superfluous iterations, and sets the variable `i` for subsequent + * macros indicating the current index into the window. + */ +.macro prefill_list, macro, nextmacro, max_r, step, label + .macro ifneeded macro, nextmacro, line, nextline, ra, rb, step, label + .if windowsize >= (\line * 16) + .set i, windowsize - (\line * 16) +\label\macro\line: + prefill_\macro \label\nextmacro\line, \label\nextmacro\nextline, \ra, \rb, \step + .endif + .endm + ifneeded \macro \nextmacro, 13, 12, xx, xx, \step, \label + ifneeded \macro \nextmacro, 12, 11, xx, xx, \step, \label + ifneeded \macro \nextmacro, 11, 10, xx, v17.16b, \step, \label + ifneeded \macro \nextmacro, 10, 9, v18.16b, v19.16b, \step, \label + ifneeded \macro \nextmacro, 9, 8, v20.16b, v21.16b, \step, \label + ifneeded \macro \nextmacro, 8, 7, v22.16b, v23.16b, \step, \label + ifneeded \macro \nextmacro, 7, 6, v24.16b, v25.16b, \step, \label + ifneeded \macro \nextmacro, 6, 5, v26.16b, v27.16b, \step, \label + ifneeded \macro \nextmacro, 5, 4, v28.16b, v29.16b, \step, \label + ifneeded \macro \nextmacro, 4, 3, v30.16b, v31.16b, \step, \label + ifneeded \macro \nextmacro, 3, 2, v4.16b, v5.16b, \step, \label + ifneeded \macro \nextmacro, 2, 1, v6.16b, v7.16b, \step, \label + ifneeded \macro \nextmacro, 1, 0, v8.16b, v9.16b, \step, \label +\label\macro\()0: + b \label\()_end + .purgem ifneeded +.endm + +/* These macros represent the possible stages of filling the window. + * Each macro is unrolled enough times that it can fill the entire window + * itself, but normally it will have to hand control to subsequent macros + * part-way through and this is done using labels named \next and \after, where + * \next is the next macro starting at the same window position and \after is + * the next macro starting after the current window position. + */ + +/* leftfill: v8 and v9 contain the left padding value. While the window + * extends outside of the image on the left-hand side, and at least 16 more + * padding values are needed in the window, store v8 and v9 into the window. + * Otherwise skip forward to storing image data. + */ +.macro prefill_leftfill, next, after, ra, rb, step + cmp x10, #i+16 + blo \next + prefill_out \ra, \rb, v8.16b, v9.16b +.endm + +/* leftedge: The very first non-fill or partial-fill chunk from the image is + * already loaded (as it was used to calculate the left padding value), so + * store it here, and then drop into the regular load/store cycle in the next + * macro. + */ +.macro prefill_leftedge, next, after, ra, rb, step +1: prefill_out \ra, \rb, v10.16b, v11.16b + b \after +.endm + +/* dofetch: Copy chunks of the image into the window without any complications + * from edge conditions. + */ +.macro prefill_dofetch, next, after, ra, rb, step + cmp x11, #i+16 + bls \next + bl fetch_generic_asm + prefill_out \ra, \rb, v10.16b, v11.16b +.endm + +/* rightedge: The last fetch (currently in v10 and v11) may have gone beyond + * the right-hand edge of the image. In that case sweep the last valid pixel + * across the rest of the chunk, and in either case prepare padding data in v12 + * and v13 for the next macro. This is done in fetch_clampright. + * This only happens once before going on to the next macro. + * Sometimes leftedge also covers the rightedge case, in which case this has + * to be skipped altogether. + */ +.macro prefill_rightedge, next, after, ra, rb, step + cmp x11, #i + bls \next + bl fetch_clampright\step + prefill_out \ra, \rb, v10.16b, v11.16b + b \after +.endm + +/* rightfill: The rest of the window is simply filled with right padding from + * v12 and v13. + */ +.macro prefill_rightfill, next, after, ra, rb, step + prefill_out \ra, \rb, v12.16b, v13.16b +.endm + +/* Here all of the macros above are unrolled and laid out in the proper order. + */ +.macro prefill_body, max_r, step, label + prefill_list leftfill, leftedge, \max_r, \step, \label + prefill_list leftedge, dofetch, \max_r, \step, \label + prefill_list dofetch, rightedge, \max_r, \step, \label + prefill_list rightedge, rightfill, \max_r, \step, \label + prefill_list rightfill, oops, \max_r, \step, \label +\label\()_end: +.endm + + +/* Fill the convolution window with context data. The aim here is to load + * exactly 2*r columns, and in the main loop to read as many columns as will be + * written. This is complicated by the window being divided into chunks at + * register boundaries, and the need to handle cases when the input starts very + * close to the left or right (or both) edges of the image and the need to fill + * the spaces that leaves with left and right edge padding values. + * + * Input: + * x1 -- src + * x2 -- pitch + * x3 -- count + * x4 -- available image data right of src pointer + * x5 -- r + * x6 -- rup + * x7 -- rdn + * x8 -- available image data left of src pointer + * x9 -- buffer (if needed) + * x13 = -pitch + * x15 = top-row in + * x19 = bottom-row in + * Output: + * x4 -= min(inlen, count + windowsize - centertap) + * x1 += min(inlen, count + windowsize - centertap) + * x15 += min(inlen, count + windowsize - centertap) + * x19 += min(inlen, count + windowsize - centertap) + * Modifies: + * x10 -- fill start index in the window + * x11 -- fill stop index in the window + * x12 -- scratch + */ +.macro prefill step=1, max_r=25, label=xx +.set windowsize, (((\max_r + \max_r) * \step + 15) & ~15) +.set centertap, (windowsize - \max_r * \step) + mov x10, #centertap + subs x10, x10, x8 + csel x10, xzr, x10, lo + + subs x11, x4, #windowsize - centertap + csel x11, xzr, x11, hs + add x11, x11, #windowsize + + /* x10 indicates where in the window legal image data begins. + * x11 indicates where in the window legal image date ends. + * When starting near the centre of a large image these would be + * zero and windowsize respectively, but when starting near the + * edges this can change. + * When starting on the leftmost pixel, x10 will be centertap. + * When starting on the rightmost pixel, x11 will be centertap+1. + */ + + /* x4 indicates how much data there is between the current pointers + * and the right edge of the image. The pointers currently point + * to the data needed at centertap. The subsequent code will + * consume (windowsize - x10) data, but only the data from + * centertap to windowsize comes out of x4's budget. + */ +1: subs x4, x4, #windowsize - centertap + csel x4, xzr, x4, lo + + /* And the pointers need to rewind to the start of the window. + */ + sub x1, x1, #centertap + sub x15, x15, #centertap + sub x19, x19, #centertap + + /* Unless x8 indicated that there wasn't that much data available. + */ + add x1, x1, x10 + add x15, x15, x10 + add x19, x19, x10 + + /* Get the first chunk, and add padding to align it to the window + * if necessary. + */ + bl fetch_clampleft\step + + /* Sometimes the start and the end of the window are in the same + * chunk. In that case both ends need filler at the outset. + */ + sub x12, x11, #1 + eor x12, x10, x12 + cmp x12, #16 + bhs 1f + bl prefill_sweepright\step + + /* Iterate through all the points in the window and fill them in + * with padding or image data as needed. + */ +1: prefill_body \max_r, \step, \label +.endm + +/* The main body of the convolve functions. Having already pre-filled the + * convolution window with 2*r input values, the logic settles into a regular + * pattern of reading and writing at a 1:1 rate until either input or output + * expires. The input leads the output by r values, so when processing all the + * way to the right-hand edge, or within r pixels of that edge, the input will + * run out first. In the case of very narrow images, or sub-windows starting + * near the right edge, the input may already have run out while the + * convolution window was being filled and this loop will start with a + * zero-length input. + * + * Once the input runs out, the rest of the output must be processed by padding + * the remainder of the window with pad value from the last valid pixel from + * the source. + * + * Input: + * x0 = dst + * x1 = src + * x2 = pitch + * x3 = count + * x4 = inlen + * x5 = r + * x6 = rup + * x7 = rdn + * x9 = buffer + * x13 = -pitch + * x15 = top-row in + * x19 = bottom-row in + * Modifies + * x8 = fetch code pointer + */ +.macro conv_body core, step=1, max_r=25, labelc="", labelnc="" + + /* If x4 >= x3 then there's no need for clipping. The main loop + * needs to exit when either x3 or x4 runs out, so clamp x4 to be + * no greater than x3 and use x4 for the loop. + * However, if x4 comes out of the loop with less than 16 bytes + * left, a partial read would be necessary to avoid reading beyond + * the end of the image. To avoid this, clamp x4 to the next + * multiple of 16, which is still sufficient to force it out of the + * loop but doesn't imply a rewind. + */ + add x12, x3, #15 + bic x12, x12, #15 + cmp x4, x12 + csel x4, x12, x4, hi + + /* First calculate the entry-point into the internal fetch logic. + * This is done so the same function can service several kernel + * sizes. + */ + adrp x8, \labelnc + add x8, x8, #:lo12:\labelnc + sub x8, x8, x5, LSL #5 + sub x8, x8, x5, LSL #3 + cmp x5, x6 + ccmp x5, x7, #0, eq + beq 5f + + /* if (r != rup || r != rdn) then the address-clamping table should + * be used rather than the short-cut version. + */ + adrp x8, \labelc + add x8, x8, #:lo12:\labelc + sub x8, x8, x5, LSL #6 + add x8, x8, x5, LSL #3 + b 5f + + /* Main loop: ... */ + .align 4 +3: /* first perform a vertical convolution from memory to get the next + * 16 taps of the horizontal window into the register file... + */ + fetch max_r=\max_r, labelc=\labelc, labelnc=\labelnc, reg=x8 + + /* ...then perform a horizontal convolution on that window to + * produce eight output bytes, and slide the window along. + * This has to be done twice to match the 16-way vertical pass. + * It would be preferable to have twice the work done in \core, but + * that would demand yet another variant on those macros and would + * perturb the register allocation severely. + */ + \core + st1 {v15.8b}, [x0], #8 + \core + st1 {v15.8b}, [x0], #8 + + sub x3, x3, #16 +5: subs x4, x4, #16 + bhi 3b + /* Here there's 16 or fewer bytes available before the edge of the + * source image. x4 holds that count minus 16 (because it was + * decremented before the first iteration ran). The last read may + * not be a whole chunk, and beyond that a fill value must be used. + * + * Of course, none of that matters if there's no more output to + * produce... + */ + cbz x3, 5f + + /* Oh well. */ + adds x4, x4, #16 + bne 1f + .if \step==1 + dup v10.8h, v9.h[7] + dup v11.8h, v9.h[7] + .else + dup v10.2d, v9.d[1] + dup v11.2d, v9.d[1] + .endif + b 3f + + /* To avoid reading past end of input, rewind pointers by (16-x4) + * to ensure that they're exactly 16 bytes from the edge. + */ +1: mov x11, x4 + bl fetch_clampright\step + /* Now to put this padding to use, perform any remaining + * iterations. This is done at half the rate of the main loop, + * because there's no longer pressure from a 16-lane window filler. + */ +3: \core + .if \step==1 + dup v11.8h, v11.h[7] + .else + dup v11.2d, v11.d[1] + .endif + subs x3, x3, #8 + blo 4f + st1 {v15.8b}, [x0], #8 + bne 3b + b 5f + + /* If the final iteration contained 0 < l < 8 values, then perform + * a piecewise store of the final vector. + */ +4: tbz x3, #2, 1f + st1 {v15.s}[0], [x0], #4 + ext v15.8b, v15.8b, v15.8b, #4 +1: tbz x3, #1, 1f + st1 {v15.h}[0], [x0], #2 + ext v15.8b, v15.8b, v15.8b, #2 +1: tbz x3, #0, 5f + st1 {v15.b}[0], [x0], #1 + ext v15.8b, v15.8b, v15.8b, #1 +5: mov x0, #0 +.endm + + +.irp r, TUNED_LIST1, 25 +PRIVATE(convolve1_\r) + stp x29,x30, [sp, #-16]! + + prefill step=1, max_r=\r, label=.Lcnv1_\r + + conv_body core=hconv1_\r, step=1, max_r=\r, labelc=.Lcnv1_\r, labelnc=.Lcnvnc1_\r + + ldp x29,x30, [sp], #16 + ret +END(convolve1_\r) +.endr + +.irp r, TUNED_LIST4, 25 +PRIVATE(convolve4_\r) + sub x9, sp, #0x40 + stp x29,x30, [sp, #-(16 + 0x40 + 0x80)]! + bic x9, x9, #0x7f + + /* x9 now points to a 0x40 byte buffer on the stack whose address + * has the low 7 bits clear. This allows easy address calculation + * in the wrap-around cases. + */ + + prefill step=4, max_r=\r, label=.Lcnv4_\r + + conv_body core=hconv4_\r, step=4, max_r=\r, labelc=.Lcnv4_\r, labelnc=.Lcnvnc4_\r + + ldp x29,x30, [sp], #(16 + 0x40 + 0x80) + ret +END(convolve4_\r) +.endr + +/* void rsdIntrinsicBlurU1_K( + * void *out, // x0 + * void *in, // x1 + * size_t w, // x2 + * size_t h, // x3 + * size_t p, // x4 + * size_t x, // x5 + * size_t y, // x6 + * size_t count, // x7 + * size_t r, // [sp] + * uint16_t *tab); // [sp,#8] + */ +ENTRY(rsdIntrinsicBlurU1_K) + stp x19,x30, [sp, #-16]! + sub x8, sp, #32 + sub sp, sp, #64 + st1 {v8.1d - v11.1d}, [sp] + st1 {v12.1d - v15.1d}, [x8] + mov x8, x5 // x + ldr w5, [sp,#80] // r + sub x9, x2, x8 // w - x + sub x10, x3, x6 // h - y + mov x2, x4 // pitch + mov x3, x7 // count + sub x7, x10, #1 // h - y - 1 + mov x4, x9 // inlen = (w - x) + + ldr x12, [sp, #88] // tab + + add x1, x1, x8 // src += x + + cmp x6, x5 + csel x6, x5, x6, hs // rup = min(r, y) + cmp x7, x5 + csel x7, x5, x7, hs // rdn = min(r, h - y - 1) + + sub x13, xzr, x2 // -pitch + msub x15, x2, x6, x1 + madd x19, x2, x7, x1 + + ld1 {v0.8h,v1.8h}, [x12], #32 + ld1 {v2.8h,v3.8h}, [x12], #32 + + adr x30, 1f + .irp r, TUNED_LIST1 + cmp x5, #\r + bls convolve1_\r + .endr + b convolve1_25 + +1: ld1 {v8.1d - v11.1d}, [sp], #32 + ld1 {v12.1d - v15.1d}, [sp], #32 + ldp x19,x30, [sp], #16 + ret +END(rsdIntrinsicBlurU1_K) + +/* void rsdIntrinsicBlurU4_K( + * void *out, // x0 + * void *in, // x1 + * size_t w, // x2 + * size_t h, // x3 + * size_t p, // x4 + * size_t x, // x5 + * size_t y, // x6 + * size_t count, // x7 + * size_t r, // [sp] + * uint16_t *tab); // [sp,#8] + */ +ENTRY(rsdIntrinsicBlurU4_K) + stp x19,x30, [sp, #-16]! + sub x8, sp, #32 + sub sp, sp, #64 + st1 {v8.1d - v11.1d}, [sp] + st1 {v12.1d - v15.1d}, [x8] + lsl x8, x5, #2 // x + lsl x2, x2, #2 + ldr w5, [sp,#80] // r + sub x9, x2, x8 // w - x + sub x10, x3, x6 // h - y + mov x2, x4 // pitch + lsl x3, x7, #2 // count + sub x7, x10, #1 // h - y - 1 + mov x4, x9 // inlen = (w - x) + + ldr x12, [sp, #88] + + add x1, x1, x8 // in += x + + cmp x6, x5 + csel x6, x5, x6, hs // rup = min(r, y) + cmp x7, x5 + csel x7, x5, x7, hs // rdn = min(r, h - y - 1) + + + sub x13, xzr, x2 + msub x15, x2, x6, x1 + madd x19, x2, x7, x1 + + ld1 {v0.8h,v1.8h}, [x12], #32 + ld1 {v2.8h,v3.8h}, [x12], #32 + + adr x30, 1f + .irp r, TUNED_LIST4 + cmp x5, #\r + bls convolve4_\r + .endr + b convolve4_25 + +1: ld1 {v8.1d - v11.1d}, [sp], #32 + ld1 {v12.1d - v15.1d}, [sp], #32 + ldp x19,x30, [sp], #16 + ret +END(rsdIntrinsicBlurU4_K) diff --git a/toolkit/Blur_neon.S b/toolkit/Blur_neon.S new file mode 100644 index 00000000..241af5fe --- /dev/null +++ b/toolkit/Blur_neon.S @@ -0,0 +1,1824 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart +#define PRIVATE(f) .text; .align 4; .type f,#function; f: .fnstart +#define END(f) .fnend; .size f, .-f; + +#define ARCH_ARM_USE_BLUR_PRELOAD + +.eabi_attribute 25,1 @Tag_ABI_align8_preserved +.arm + +/* Number of fractional bits to preserve in intermediate results. The + * intermediate storage is 16-bit, and we started with 8 bit data (the integer + * part), so this should be between 0 and 8. + */ +.set FRACTION_BITS, 7 + +.set MAX_R, 25 + + +/* A quick way of making a line of code conditional on some other condition. + * Use `.set cc, 1` or `.set cc, 0` to enable or disable lines prefixed with + * `ifcc`: + */ +.macro ifcc zzz:vararg +.if cc + \zzz +.endif +.endm + +/* It's not always clear that prefetching is beneficial and this needs further + * testing on different cores, so it's made switchable here. + */ +#if defined(ARCH_ARM_USE_BLUR_PRELOAD) +#define VERTPLD(...) pld [__VA_ARGS__] +#else +#define VERTPLD(...) nop +#endif + +/* Fetch 16 columns of bytes (regardless of image format), convolve these + * vertically, and leave them in the register file. If working near the top or + * bottom of an image then clamp the addressing while loading the data in. + * + * The convolution is fully unrolled for windows up to max_r, with the + * outermost edges calculated first. This way it's possible to branch directly + * into the relevant part of the code for an arbitrary convolution radius. Two + * variants of the loop are produced; one eliminates the clamping code for a + * slight speed advantage. + * + * Where the macro is called with reg=x, the specified register is taken to + * contain a pre-calculated pointer into one of the two loops. + * + * Input: + * r1 -- src + * r2 -- pitch + * r5 -- r + * r6 -- rup (r, unless clipped to top of source image) + * r7 -- rdn (r, unless clipped to bottom of source image) + * r12 -- switch index + * q0-q3 -- coefficient table + * Output: + * r1 += 16 + * q10,q11 -- 16 convolved columns + * Modifies: + * r10 = upper row pointer + * r11 = lower row pointer + * q12-q15 = temporary sums + */ +.macro fetch, max_r=MAX_R, labelc=1, labelnc=2, reg=r12 /*{{{*/ + .ifc \reg,r12 ; .set cc, 1 ; .else ; .set cc, 0 ; .endif + + vld1.8 {d30,d31}, [r1] + mls r10, r2, r6, r1 + + vmovl.u8 q14, d30 + VERTPLD(r1, #32) + vmovl.u8 q15, d31 + .if \max_r < 16 // approximate + ifcc adr \reg, 1f + .else + ifcc ldr \reg, 2f +1: ifcc add \reg, \reg, pc + .endif + + vmull.u16 q12, d28, d0[0] + ifcc sub \reg, r5, LSL #6 + vmull.u16 q13, d29, d0[0] + mla r11, r2, r7, r1 + vmull.u16 q14, d30, d0[0] + add r1, r1, #16 + vmull.u16 q15, d31, d0[0] + bx \reg + + ifcc .align 2 + 2: ifcc .word 1f-1b-8 + + /* This version of the vertical fetch loop body is used away from the edges + * of the source image. The pointers start at the top and bottom source rows + * and work their way towards the centre on each iteration. This way the + * number of taps used can be controlled by jumping directly into the middle + * of the loop and running to completion. + * If the loop body changes size then the code which calculates the address of + * the initial iteration must be updated to accordingly. + */ + .macro vertfetch_noclamp i, dreg + .if 0 < \i && \i <= \max_r + vld1.8 {d20,d21}, [r10], r2 + vld1.8 {d22,d23}, [r11] + sub r11, r11, r2 + vswp d21, d22 + VERTPLD(r10, #32) + vaddl.u8 q10, d20, d21 + vaddl.u8 q11, d22, d23 + vmlal.u16 q12, d20, \dreg + VERTPLD(r11, #32) + vmlal.u16 q13, d21, \dreg + vmlal.u16 q14, d22, \dreg + vmlal.u16 q15, d23, \dreg + .endif + .endm + + /* This version of the vertical fetch loop body is used near the edges of the + * source image, where one or both of the accesses may start with a clamped + * value, and the row addresses only begin to change after some number of + * iterations before the end. + * If the loop body changes size then the code which calculates the address of + * the initial iteration must be updated to accordingly. + */ + .macro vertfetch_clamped i, dreg + .if 0 < \i && \i <= \max_r + vld1.8 {d20,d21}, [r10] + vld1.8 {d22,d23}, [r11] + cmp r6, #\i + vswp d21, d22 + VERTPLD(r10, #32) + vaddl.u8 q10, d20, d21 + addhs r10, r10, r2 + vaddl.u8 q11, d22, d23 + cmp r7, #\i + vmlal.u16 q12, d20, \dreg + VERTPLD(r11, #32) + vmlal.u16 q13, d21, \dreg + subhs r11, r11, r2 + vmlal.u16 q14, d22, \dreg + nop + vmlal.u16 q15, d23, \dreg + .endif + .endm + + /* Entry into this unrolled loop is computed as a negative index from + * \labelc at the end of the block. + */ + .align 4 + vertfetch_clamped 27, d6[3] + vertfetch_clamped 26, d6[2] + vertfetch_clamped 25, d6[1] + vertfetch_clamped 24, d6[0] + vertfetch_clamped 23, d5[3] + vertfetch_clamped 22, d5[2] + vertfetch_clamped 21, d5[1] + vertfetch_clamped 20, d5[0] + vertfetch_clamped 19, d4[3] + vertfetch_clamped 18, d4[2] + vertfetch_clamped 17, d4[1] + vertfetch_clamped 16, d4[0] + vertfetch_clamped 15, d3[3] + vertfetch_clamped 14, d3[2] + vertfetch_clamped 13, d3[1] + vertfetch_clamped 12, d3[0] + vertfetch_clamped 11, d2[3] + vertfetch_clamped 10, d2[2] + vertfetch_clamped 9, d2[1] + vertfetch_clamped 8, d2[0] + vertfetch_clamped 7, d1[3] + vertfetch_clamped 6, d1[2] + vertfetch_clamped 5, d1[1] + vertfetch_clamped 4, d1[0] + vertfetch_clamped 3, d0[3] + vertfetch_clamped 2, d0[2] + vertfetch_clamped 1, d0[1] + vertfetch_clamped 0, d0[0] + 1: + \labelc : b 2f /* done with clamped loop, skip over non-clamped loop */ + + /* Entry into this unrolled loop is computed as a negative index from + * \labelnc at the end of the block. + */ + .align 4 + vertfetch_noclamp 27, d6[3] + vertfetch_noclamp 26, d6[2] + vertfetch_noclamp 25, d6[1] + vertfetch_noclamp 24, d6[0] + vertfetch_noclamp 23, d5[3] + vertfetch_noclamp 22, d5[2] + vertfetch_noclamp 21, d5[1] + vertfetch_noclamp 20, d5[0] + vertfetch_noclamp 19, d4[3] + vertfetch_noclamp 18, d4[2] + vertfetch_noclamp 17, d4[1] + vertfetch_noclamp 16, d4[0] + vertfetch_noclamp 15, d3[3] + vertfetch_noclamp 14, d3[2] + vertfetch_noclamp 13, d3[1] + vertfetch_noclamp 12, d3[0] + vertfetch_noclamp 11, d2[3] + vertfetch_noclamp 10, d2[2] + vertfetch_noclamp 9, d2[1] + vertfetch_noclamp 8, d2[0] + vertfetch_noclamp 7, d1[3] + vertfetch_noclamp 6, d1[2] + vertfetch_noclamp 5, d1[1] + vertfetch_noclamp 4, d1[0] + vertfetch_noclamp 3, d0[3] + vertfetch_noclamp 2, d0[2] + vertfetch_noclamp 1, d0[1] + vertfetch_noclamp 0, d0[0] + \labelnc : + + .purgem vertfetch_clamped + .purgem vertfetch_noclamp + + 2: vqrshrn.u32 d20, q12, #16 - FRACTION_BITS + vqrshrn.u32 d21, q13, #16 - FRACTION_BITS + vqrshrn.u32 d22, q14, #16 - FRACTION_BITS + vqrshrn.u32 d23, q15, #16 - FRACTION_BITS +.endm /*}}}*/ + +/* Some portion of the convolution window (as much as will fit, and all of it + * for the uchar1 cases) is kept in the register file to avoid unnecessary + * memory accesses. This forces the horizontal loops to be unrolled because + * there's no indexed addressing into the register file. + * + * As in the fetch macro, the operations are ordered from outside to inside, so + * that jumping into the middle of the block bypasses the unwanted window taps. + * + * There are several variants of the macro because of the fixed offets of the + * taps -- the wider the maximum radius the further the centre tap is from the + * most recently fetched data. This means that pre-filling the window requires + * more data that won't be used and it means that rotating the window involves + * more mov operations. + * + * When the buffer gets too big the buffer at [r9] is used. + * + * Input: + * q4-q11 -- convoltion window + * r9 -- pointer to additional convolution window data + * Output: + * r9 -- updated buffer pointer (if used) + * d31 -- result to be stored + * Modifies: + * r12 -- temp buffer pointer + * q12-q13 -- temporaries for load and vext operations. + * q14-q15 -- intermediate sums + */ +#define TUNED_LIST1 8, 16 +.macro hconv1_8/*{{{*/ + vmull.u16 q14, d18, d0[0] + vmull.u16 q15, d19, d0[0] + + ldr r12, [pc, r5, LSL #2] + add pc, pc, r12 + bkpt + 100: .word 101f-100b + .word 102f-100b + .word 103f-100b + .word 104f-100b + .word 105f-100b + .word 106f-100b + .word 107f-100b + .word 108f-100b + 108: vmlal.u16 q14, d16, d2[0] + vmlal.u16 q15, d17, d2[0] + vmlal.u16 q14, d20, d2[0] + vmlal.u16 q15, d21, d2[0] + 107: vext.u16 q12, q8, q9, #1 + vext.u16 q13, q9, q10, #7 + vmlal.u16 q14, d24, d1[3] + vmlal.u16 q15, d25, d1[3] + vmlal.u16 q14, d26, d1[3] + vmlal.u16 q15, d27, d1[3] + 106: vext.u16 q12, q8, q9, #2 + vext.u16 q13, q9, q10, #6 + vmlal.u16 q14, d24, d1[2] + vmlal.u16 q15, d25, d1[2] + vmlal.u16 q14, d26, d1[2] + vmlal.u16 q15, d27, d1[2] + 105: vext.u16 q12, q8, q9, #3 + vext.u16 q13, q9, q10, #5 + vmlal.u16 q14, d24, d1[1] + vmlal.u16 q15, d25, d1[1] + vmlal.u16 q14, d26, d1[1] + vmlal.u16 q15, d27, d1[1] + 104: //vext.u16 q12, q8, q9, #4 + //vext.u16 q13, q9, q10, #4 + vmlal.u16 q14, d17, d1[0] + vmlal.u16 q15, d18, d1[0] + vmlal.u16 q14, d19, d1[0] + vmlal.u16 q15, d20, d1[0] + 103: vext.u16 q12, q8, q9, #5 + vext.u16 q13, q9, q10, #3 + vmlal.u16 q14, d24, d0[3] + vmlal.u16 q15, d25, d0[3] + vmlal.u16 q14, d26, d0[3] + vmlal.u16 q15, d27, d0[3] + 102: vext.u16 q12, q8, q9, #6 + vext.u16 q13, q9, q10, #2 + vmlal.u16 q14, d24, d0[2] + vmlal.u16 q15, d25, d0[2] + vmlal.u16 q14, d26, d0[2] + vmlal.u16 q15, d27, d0[2] + 101: vext.u16 q12, q8, q9, #7 + vext.u16 q13, q9, q10, #1 + vmlal.u16 q14, d24, d0[1] + vmlal.u16 q15, d25, d0[1] + vmlal.u16 q14, d26, d0[1] + vmlal.u16 q15, d27, d0[1] + + vqrshrn.u32 d28, q14, #16 + vqrshrn.u32 d29, q15, #16 + vqrshrn.u16 d31, q14, #FRACTION_BITS + + vmov q8, q9 + vmov q9, q10 + vmov q10, q11 +.endm/*}}}*/ + +.macro hconv1_16/*{{{*/ + vmull.u16 q14, d16, d0[0] + vmull.u16 q15, d17, d0[0] + + ldr r12, [pc, r5, LSL #2] + add pc, pc, r12 + bkpt + 100: .word 101f-100b + .word 102f-100b + .word 103f-100b + .word 104f-100b + .word 105f-100b + .word 106f-100b + .word 107f-100b + .word 108f-100b + .word 109f-100b + .word 110f-100b + .word 111f-100b + .word 112f-100b + .word 113f-100b + .word 114f-100b + .word 115f-100b + .word 116f-100b + 116: //vext.u16 q12, q6, q7, #0 + //vext.u16 q13, q10, q11, #0 + vmlal.u16 q14, d12, d4[0] + vmlal.u16 q15, d13, d4[0] + vmlal.u16 q14, d20, d4[0] + vmlal.u16 q15, d21, d4[0] + 115: vext.u16 q12, q6, q7, #1 + vext.u16 q13, q9, q10, #7 + vmlal.u16 q14, d24, d3[3] + vmlal.u16 q15, d25, d3[3] + vmlal.u16 q14, d26, d3[3] + vmlal.u16 q15, d27, d3[3] + 114: vext.u16 q12, q6, q7, #2 + vext.u16 q13, q9, q10, #6 + vmlal.u16 q14, d24, d3[2] + vmlal.u16 q15, d25, d3[2] + vmlal.u16 q14, d26, d3[2] + vmlal.u16 q15, d27, d3[2] + 113: vext.u16 q12, q6, q7, #3 + vext.u16 q13, q9, q10, #5 + vmlal.u16 q14, d24, d3[1] + vmlal.u16 q15, d25, d3[1] + vmlal.u16 q14, d26, d3[1] + vmlal.u16 q15, d27, d3[1] + 112: //vext.u16 q12, q6, q7, #4 + //vext.u16 q13, q9, q10, #4 + vmlal.u16 q14, d13, d3[0] + vmlal.u16 q15, d14, d3[0] + vmlal.u16 q14, d19, d3[0] + vmlal.u16 q15, d20, d3[0] + 111: vext.u16 q12, q6, q7, #5 + vext.u16 q13, q9, q10, #3 + vmlal.u16 q14, d24, d2[3] + vmlal.u16 q15, d25, d2[3] + vmlal.u16 q14, d26, d2[3] + vmlal.u16 q15, d27, d2[3] + 110: vext.u16 q12, q6, q7, #6 + vext.u16 q13, q9, q10, #2 + vmlal.u16 q14, d24, d2[2] + vmlal.u16 q15, d25, d2[2] + vmlal.u16 q14, d26, d2[2] + vmlal.u16 q15, d27, d2[2] + 109: vext.u16 q12, q6, q7, #7 + vext.u16 q13, q9, q10, #1 + vmlal.u16 q14, d24, d2[1] + vmlal.u16 q15, d25, d2[1] + vmlal.u16 q14, d26, d2[1] + vmlal.u16 q15, d27, d2[1] + 108: //vext.u16 q12, q7, q8, #0 + //vext.u16 q13, q9, q10, #0 + vmlal.u16 q14, d14, d2[0] + vmlal.u16 q15, d15, d2[0] + vmlal.u16 q14, d18, d2[0] + vmlal.u16 q15, d19, d2[0] + 107: vext.u16 q12, q7, q8, #1 + vext.u16 q13, q8, q9, #7 + vmlal.u16 q14, d24, d1[3] + vmlal.u16 q15, d25, d1[3] + vmlal.u16 q14, d26, d1[3] + vmlal.u16 q15, d27, d1[3] + 106: vext.u16 q12, q7, q8, #2 + vext.u16 q13, q8, q9, #6 + vmlal.u16 q14, d24, d1[2] + vmlal.u16 q15, d25, d1[2] + vmlal.u16 q14, d26, d1[2] + vmlal.u16 q15, d27, d1[2] + 105: vext.u16 q12, q7, q8, #3 + vext.u16 q13, q8, q9, #5 + vmlal.u16 q14, d24, d1[1] + vmlal.u16 q15, d25, d1[1] + vmlal.u16 q14, d26, d1[1] + vmlal.u16 q15, d27, d1[1] + 104: //vext.u16 q12, q7, q8, #4 + //vext.u16 q13, q8, q9, #4 + vmlal.u16 q14, d15, d1[0] + vmlal.u16 q15, d16, d1[0] + vmlal.u16 q14, d17, d1[0] + vmlal.u16 q15, d18, d1[0] + 103: vext.u16 q12, q7, q8, #5 + vext.u16 q13, q8, q9, #3 + vmlal.u16 q14, d24, d0[3] + vmlal.u16 q15, d25, d0[3] + vmlal.u16 q14, d26, d0[3] + vmlal.u16 q15, d27, d0[3] + 102: vext.u16 q12, q7, q8, #6 + vext.u16 q13, q8, q9, #2 + vmlal.u16 q14, d24, d0[2] + vmlal.u16 q15, d25, d0[2] + vmlal.u16 q14, d26, d0[2] + vmlal.u16 q15, d27, d0[2] + 101: vext.u16 q12, q7, q8, #7 + vext.u16 q13, q8, q9, #1 + vmlal.u16 q14, d24, d0[1] + vmlal.u16 q15, d25, d0[1] + vmlal.u16 q14, d26, d0[1] + vmlal.u16 q15, d27, d0[1] + + vqrshrn.u32 d28, q14, #16 + vqrshrn.u32 d29, q15, #16 + vqrshrn.u16 d31, q14, #FRACTION_BITS + + vmov q6, q7 + vmov q7, q8 + vmov q8, q9 + vmov q9, q10 + vmov q10, q11 +.endm/*}}}*/ + +.macro hconv1_25/*{{{*/ + vext.u16 q12, q6, q7, #7 + vmull.u16 q14, d24, d0[0] + vmull.u16 q15, d25, d0[0] + + ldr r12, [pc, r5, LSL #2] + add pc, pc, r12 + bkpt + 100: .word 101f-100b + .word 102f-100b + .word 103f-100b + .word 104f-100b + .word 105f-100b + .word 106f-100b + .word 107f-100b + .word 108f-100b + .word 109f-100b + .word 110f-100b + .word 111f-100b + .word 112f-100b + .word 113f-100b + .word 114f-100b + .word 115f-100b + .word 116f-100b + .word 117f-100b + .word 118f-100b + .word 119f-100b + .word 120f-100b + .word 121f-100b + .word 122f-100b + .word 123f-100b + .word 124f-100b + .word 125f-100b + 125: vext.u16 q12, q3, q4, #6 + vext.u16 q13, q10, q11, #0 + vmlal.u16 q14, d24, d6[1] + vmlal.u16 q15, d25, d6[1] + vmlal.u16 q14, d26, d6[1] + vmlal.u16 q15, d27, d6[1] + 124: vext.u16 q12, q3, q4, #7 + vext.u16 q13, q9, q10, #7 + vmlal.u16 q14, d24, d6[0] + vmlal.u16 q15, d25, d6[0] + vmlal.u16 q14, d26, d6[0] + vmlal.u16 q15, d27, d6[0] + 123: vext.u16 q12, q4, q5, #0 + vext.u16 q13, q9, q10, #6 + vmlal.u16 q14, d24, d5[3] + vmlal.u16 q15, d25, d5[3] + vmlal.u16 q14, d26, d5[3] + vmlal.u16 q15, d27, d5[3] + 122: vext.u16 q12, q4, q5, #1 + vext.u16 q13, q9, q10, #5 + vmlal.u16 q14, d24, d5[2] + vmlal.u16 q15, d25, d5[2] + vmlal.u16 q14, d26, d5[2] + vmlal.u16 q15, d27, d5[2] + 121: vext.u16 q12, q4, q5, #2 + vext.u16 q13, q9, q10, #4 + vmlal.u16 q14, d24, d5[1] + vmlal.u16 q15, d25, d5[1] + vmlal.u16 q14, d26, d5[1] + vmlal.u16 q15, d27, d5[1] + 120: vext.u16 q12, q4, q5, #3 + vext.u16 q13, q9, q10, #3 + vmlal.u16 q14, d24, d5[0] + vmlal.u16 q15, d25, d5[0] + vmlal.u16 q14, d26, d5[0] + vmlal.u16 q15, d27, d5[0] + 119: vext.u16 q12, q4, q5, #4 + vext.u16 q13, q9, q10, #2 + vmlal.u16 q14, d24, d4[3] + vmlal.u16 q15, d25, d4[3] + vmlal.u16 q14, d26, d4[3] + vmlal.u16 q15, d27, d4[3] + 118: vext.u16 q12, q4, q5, #5 + vext.u16 q13, q9, q10, #1 + vmlal.u16 q14, d24, d4[2] + vmlal.u16 q15, d25, d4[2] + vmlal.u16 q14, d26, d4[2] + vmlal.u16 q15, d27, d4[2] + 117: vext.u16 q12, q4, q5, #6 + vext.u16 q13, q9, q10, #0 + vmlal.u16 q14, d24, d4[1] + vmlal.u16 q15, d25, d4[1] + vmlal.u16 q14, d26, d4[1] + vmlal.u16 q15, d27, d4[1] + 116: vext.u16 q12, q4, q5, #7 + vext.u16 q13, q8, q9, #7 + vmlal.u16 q14, d24, d4[0] + vmlal.u16 q15, d25, d4[0] + vmlal.u16 q14, d26, d4[0] + vmlal.u16 q15, d27, d4[0] + 115: vext.u16 q12, q5, q6, #0 + vext.u16 q13, q8, q9, #6 + vmlal.u16 q14, d24, d3[3] + vmlal.u16 q15, d25, d3[3] + vmlal.u16 q14, d26, d3[3] + vmlal.u16 q15, d27, d3[3] + 114: vext.u16 q12, q5, q6, #1 + vext.u16 q13, q8, q9, #5 + vmlal.u16 q14, d24, d3[2] + vmlal.u16 q15, d25, d3[2] + vmlal.u16 q14, d26, d3[2] + vmlal.u16 q15, d27, d3[2] + 113: vext.u16 q12, q5, q6, #2 + vext.u16 q13, q8, q9, #4 + vmlal.u16 q14, d24, d3[1] + vmlal.u16 q15, d25, d3[1] + vmlal.u16 q14, d26, d3[1] + vmlal.u16 q15, d27, d3[1] + 112: vext.u16 q12, q5, q6, #3 + vext.u16 q13, q8, q9, #3 + vmlal.u16 q14, d24, d3[0] + vmlal.u16 q15, d25, d3[0] + vmlal.u16 q14, d26, d3[0] + vmlal.u16 q15, d27, d3[0] + 111: vext.u16 q12, q5, q6, #4 + vext.u16 q13, q8, q9, #2 + vmlal.u16 q14, d24, d2[3] + vmlal.u16 q15, d25, d2[3] + vmlal.u16 q14, d26, d2[3] + vmlal.u16 q15, d27, d2[3] + 110: vext.u16 q12, q5, q6, #5 + vext.u16 q13, q8, q9, #1 + vmlal.u16 q14, d24, d2[2] + vmlal.u16 q15, d25, d2[2] + vmlal.u16 q14, d26, d2[2] + vmlal.u16 q15, d27, d2[2] + 109: vext.u16 q12, q5, q6, #6 + vext.u16 q13, q8, q9, #0 + vmlal.u16 q14, d24, d2[1] + vmlal.u16 q15, d25, d2[1] + vmlal.u16 q14, d26, d2[1] + vmlal.u16 q15, d27, d2[1] + 108: vext.u16 q12, q5, q6, #7 + vext.u16 q13, q7, q8, #7 + vmlal.u16 q14, d24, d2[0] + vmlal.u16 q15, d25, d2[0] + vmlal.u16 q14, d26, d2[0] + vmlal.u16 q15, d27, d2[0] + 107: vext.u16 q12, q6, q7, #0 + vext.u16 q13, q7, q8, #6 + vmlal.u16 q14, d24, d1[3] + vmlal.u16 q15, d25, d1[3] + vmlal.u16 q14, d26, d1[3] + vmlal.u16 q15, d27, d1[3] + 106: vext.u16 q12, q6, q7, #1 + vext.u16 q13, q7, q8, #5 + vmlal.u16 q14, d24, d1[2] + vmlal.u16 q15, d25, d1[2] + vmlal.u16 q14, d26, d1[2] + vmlal.u16 q15, d27, d1[2] + 105: vext.u16 q12, q6, q7, #2 + vext.u16 q13, q7, q8, #4 + vmlal.u16 q14, d24, d1[1] + vmlal.u16 q15, d25, d1[1] + vmlal.u16 q14, d26, d1[1] + vmlal.u16 q15, d27, d1[1] + 104: vext.u16 q12, q6, q7, #3 + vext.u16 q13, q7, q8, #3 + vmlal.u16 q14, d24, d1[0] + vmlal.u16 q15, d25, d1[0] + vmlal.u16 q14, d26, d1[0] + vmlal.u16 q15, d27, d1[0] + 103: vext.u16 q12, q6, q7, #4 + vext.u16 q13, q7, q8, #2 + vmlal.u16 q14, d24, d0[3] + vmlal.u16 q15, d25, d0[3] + vmlal.u16 q14, d26, d0[3] + vmlal.u16 q15, d27, d0[3] + 102: vext.u16 q12, q6, q7, #5 + vext.u16 q13, q7, q8, #1 + vmlal.u16 q14, d24, d0[2] + vmlal.u16 q15, d25, d0[2] + vmlal.u16 q14, d26, d0[2] + vmlal.u16 q15, d27, d0[2] + 101: vext.u16 q12, q6, q7, #6 + vext.u16 q13, q7, q8, #0 + vmlal.u16 q14, d24, d0[1] + vmlal.u16 q15, d25, d0[1] + vmlal.u16 q14, d26, d0[1] + vmlal.u16 q15, d27, d0[1] + + vqrshrn.u32 d28, q14, #16 + vqrshrn.u32 d29, q15, #16 + vqrshrn.u16 d31, q14, #FRACTION_BITS + + vmov d7, d9 + vmov q4, q5 + vmov q5, q6 + vmov q6, q7 + vmov q7, q8 + vmov q8, q9 + vmov q9, q10 + vmov q10, q11 +.endm/*}}}*/ + +#define TUNED_LIST4 6, 12 +.macro hconv4_6/*{{{*/ + vmull.u16 q14, d14, d0[0] + vmull.u16 q15, d15, d0[0] + + ldr r12, [pc, r5, LSL #2] + add pc, pc, r12 + bkpt + 100: .word 101f-100b + .word 102f-100b + .word 103f-100b + .word 104f-100b + .word 105f-100b + .word 106f-100b + 106: vmlal.u16 q14, d8, d1[2] + vmlal.u16 q15, d9, d1[2] + vmlal.u16 q14, d20, d1[2] + vmlal.u16 q15, d21, d1[2] + 105: vmlal.u16 q14, d9, d1[1] + vmlal.u16 q15, d10, d1[1] + vmlal.u16 q14, d19, d1[1] + vmlal.u16 q15, d20, d1[1] + 104: vmlal.u16 q14, d10, d1[0] + vmlal.u16 q15, d11, d1[0] + vmlal.u16 q14, d18, d1[0] + vmlal.u16 q15, d19, d1[0] + 103: vmlal.u16 q14, d11, d0[3] + vmlal.u16 q15, d12, d0[3] + vmlal.u16 q14, d17, d0[3] + vmlal.u16 q15, d18, d0[3] + 102: vmlal.u16 q14, d12, d0[2] + vmlal.u16 q15, d13, d0[2] + vmlal.u16 q14, d16, d0[2] + vmlal.u16 q15, d17, d0[2] + 101: vmlal.u16 q14, d13, d0[1] + vmlal.u16 q15, d14, d0[1] + vmlal.u16 q14, d15, d0[1] + vmlal.u16 q15, d16, d0[1] + + vqrshrn.u32 d28, q14, #16 + vqrshrn.u32 d29, q15, #16 + vqrshrn.u16 d31, q14, #FRACTION_BITS + + vmov q4, q5 + vmov q5, q6 + vmov q6, q7 + vmov q7, q8 + vmov q8, q9 + vmov q9, q10 + vmov q10, q11 +.endm/*}}}*/ + +.macro hconv4_12/*{{{*/ + vmull.u16 q14, d8, d0[0] + vmull.u16 q15, d9, d0[0] + + ldr r12, [pc, r5, LSL #2] + add pc, pc, r12 + bkpt + 100: .word 101f-100b + .word 102f-100b + .word 103f-100b + .word 104f-100b + .word 105f-100b + .word 106f-100b + .word 107f-100b + .word 108f-100b + .word 109f-100b + .word 110f-100b + .word 111f-100b + .word 112f-100b + 112: add r12, r9, #0x1a0 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d3[0] + vmlal.u16 q15, d25, d3[0] + vmlal.u16 q14, d20, d3[0] + vmlal.u16 q15, d21, d3[0] + 111: add r12, r9, #0x1a8 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12:64] + vmlal.u16 q14, d24, d2[3] + vmlal.u16 q15, d25, d2[3] + vmlal.u16 q14, d19, d2[3] + vmlal.u16 q15, d20, d2[3] + 110: add r12, r9, #0x1b0 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d2[2] + vmlal.u16 q15, d25, d2[2] + vmlal.u16 q14, d18, d2[2] + vmlal.u16 q15, d19, d2[2] + 109: add r12, r9, #0x1b8 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12:64] + vmlal.u16 q14, d24, d2[1] + vmlal.u16 q15, d25, d2[1] + vmlal.u16 q14, d17, d2[1] + vmlal.u16 q15, d18, d2[1] + 108: add r12, r9, #0x1c0 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d2[0] + vmlal.u16 q15, d25, d2[0] + vmlal.u16 q14, d16, d2[0] + vmlal.u16 q15, d17, d2[0] + 107: add r12, r9, #0x1c8 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12:64] + vmlal.u16 q14, d24, d1[3] + vmlal.u16 q15, d25, d1[3] + vmlal.u16 q14, d15, d1[3] + vmlal.u16 q15, d16, d1[3] + 106: add r12, r9, #0x1d0 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d1[2] + vmlal.u16 q15, d25, d1[2] + vmlal.u16 q14, d14, d1[2] + vmlal.u16 q15, d15, d1[2] + 105: add r12, r9, #0x1d8 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12:64] + vmlal.u16 q14, d24, d1[1] + vmlal.u16 q15, d25, d1[1] + vmlal.u16 q14, d13, d1[1] + vmlal.u16 q15, d14, d1[1] + 104: add r12, r9, #0x1e0 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d1[0] + vmlal.u16 q15, d25, d1[0] + vmlal.u16 q14, d12, d1[0] + vmlal.u16 q15, d13, d1[0] + 103: add r12, r9, #0x1e8 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12:64] + vmlal.u16 q14, d24, d0[3] + vmlal.u16 q15, d25, d0[3] + vmlal.u16 q14, d11, d0[3] + vmlal.u16 q15, d12, d0[3] + 102: add r12, r9, #0x1f0 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d0[2] + vmlal.u16 q15, d25, d0[2] + vmlal.u16 q14, d10, d0[2] + vmlal.u16 q15, d11, d0[2] + 101: add r12, r9, #0x1f8 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64] + vmlal.u16 q14, d24, d0[1] + vmlal.u16 q15, d8, d0[1] + vmlal.u16 q14, d9, d0[1] + vmlal.u16 q15, d10, d0[1] + + vqrshrn.u32 d28, q14, #16 + vqrshrn.u32 d29, q15, #16 + vqrshrn.u16 d31, q14, #FRACTION_BITS + + vst1.u8 {q4}, [r9:128]! + bic r9, r9, #0x200 + vmov q4, q5 + vmov q5, q6 + vmov q6, q7 + vmov q7, q8 + vmov q8, q9 + vmov q9, q10 + vmov q10, q11 +.endm/*}}}*/ + +.macro hconv4_25/*{{{*/ + add r12, r9, #0x198 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12:64] + vmull.u16 q14, d24, d0[0] + vmull.u16 q15, d25, d0[0] + + ldr r12, [pc, r5, LSL #2] + add pc, pc, r12 + bkpt + 100: .word 101f-100b + .word 102f-100b + .word 103f-100b + .word 104f-100b + .word 105f-100b + .word 106f-100b + .word 107f-100b + .word 108f-100b + .word 109f-100b + .word 110f-100b + .word 111f-100b + .word 112f-100b + .word 113f-100b + .word 114f-100b + .word 115f-100b + .word 116f-100b + .word 117f-100b + .word 118f-100b + .word 119f-100b + .word 120f-100b + .word 121f-100b + .word 122f-100b + .word 123f-100b + .word 124f-100b + .word 125f-100b + 125: add r12, r9, #0x0d0 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d6[1] + vmlal.u16 q15, d25, d6[1] + vmlal.u16 q14, d20, d6[1] + vmlal.u16 q15, d21, d6[1] + 124: add r12, r9, #0x0d8 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12] + vmlal.u16 q14, d24, d6[0] + vmlal.u16 q15, d25, d6[0] + vmlal.u16 q14, d19, d6[0] + vmlal.u16 q15, d20, d6[0] + 123: add r12, r9, #0x0e0 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d5[3] + vmlal.u16 q15, d25, d5[3] + vmlal.u16 q14, d18, d5[3] + vmlal.u16 q15, d19, d5[3] + 122: add r12, r9, #0x0e8 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12] + vmlal.u16 q14, d24, d5[2] + vmlal.u16 q15, d25, d5[2] + vmlal.u16 q14, d17, d5[2] + vmlal.u16 q15, d18, d5[2] + 121: add r12, r9, #0x0f0 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d5[1] + vmlal.u16 q15, d25, d5[1] + vmlal.u16 q14, d16, d5[1] + vmlal.u16 q15, d17, d5[1] + 120: add r12, r9, #0x0f8 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12] + vmlal.u16 q14, d24, d5[0] + vmlal.u16 q15, d25, d5[0] + vmlal.u16 q14, d15, d5[0] + vmlal.u16 q15, d16, d5[0] + 119: add r12, r9, #0x100 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d4[3] + vmlal.u16 q15, d25, d4[3] + vmlal.u16 q14, d14, d4[3] + vmlal.u16 q15, d15, d4[3] + 118: add r12, r9, #0x108 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12] + vmlal.u16 q14, d24, d4[2] + vmlal.u16 q15, d25, d4[2] + vmlal.u16 q14, d13, d4[2] + vmlal.u16 q15, d14, d4[2] + 117: add r12, r9, #0x110 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d4[1] + vmlal.u16 q15, d25, d4[1] + vmlal.u16 q14, d12, d4[1] + vmlal.u16 q15, d13, d4[1] + 116: add r12, r9, #0x118 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12] + vmlal.u16 q14, d24, d4[0] + vmlal.u16 q15, d25, d4[0] + vmlal.u16 q14, d11, d4[0] + vmlal.u16 q15, d12, d4[0] + 115: add r12, r9, #0x120 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d3[3] + vmlal.u16 q15, d25, d3[3] + vmlal.u16 q14, d10, d3[3] + vmlal.u16 q15, d11, d3[3] + 114: add r12, r9, #0x128 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12] + vmlal.u16 q14, d24, d3[2] + vmlal.u16 q15, d25, d3[2] + vmlal.u16 q14, d9, d3[2] + vmlal.u16 q15, d10, d3[2] + 113: add r12, r9, #0x130 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + vmlal.u16 q14, d24, d3[1] + vmlal.u16 q15, d25, d3[1] + vmlal.u16 q14, d8, d3[1] + vmlal.u16 q15, d9, d3[1] + 112: add r12, r9, #0x138 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12] + add r12, r9, #0x1f8 + bic r12, r12, #0x200 + vld1.u16 {d26}, [r12:64] + vmlal.u16 q14, d24, d3[0] + vmlal.u16 q15, d25, d3[0] + vmlal.u16 q14, d26, d3[0] @ Could be d7, without the load, right? + vmlal.u16 q15, d8, d3[0] + 111: add r12, r9, #0x140 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + add r12, r9, #0x1f0 + bic r12, r12, #0x200 + vld1.u16 {d26,d27}, [r12:128] + vmlal.u16 q14, d24, d2[3] + vmlal.u16 q15, d25, d2[3] + vmlal.u16 q14, d26, d2[3] + vmlal.u16 q15, d27, d2[3] + 110: add r12, r9, #0x148 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12] + add r12, r9, #0x1e8 + bic r12, r12, #0x200 + vld1.u16 {d26}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d27}, [r12:64] + vmlal.u16 q14, d24, d2[2] + vmlal.u16 q15, d25, d2[2] + vmlal.u16 q14, d26, d2[2] + vmlal.u16 q15, d27, d2[2] + 109: add r12, r9, #0x150 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + add r12, r9, #0x1e0 + bic r12, r12, #0x200 + vld1.u16 {d26,d27}, [r12:128] + vmlal.u16 q14, d24, d2[1] + vmlal.u16 q15, d25, d2[1] + vmlal.u16 q14, d26, d2[1] + vmlal.u16 q15, d27, d2[1] + 108: add r12, r9, #0x158 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12] + add r12, r9, #0x1d8 + bic r12, r12, #0x200 + vld1.u16 {d26}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d27}, [r12:64] + vmlal.u16 q14, d24, d2[0] + vmlal.u16 q15, d25, d2[0] + vmlal.u16 q14, d26, d2[0] + vmlal.u16 q15, d27, d2[0] + 107: add r12, r9, #0x160 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + add r12, r9, #0x1d0 + bic r12, r12, #0x200 + vld1.u16 {d26,d27}, [r12:128] + vmlal.u16 q14, d24, d1[3] + vmlal.u16 q15, d25, d1[3] + vmlal.u16 q14, d26, d1[3] + vmlal.u16 q15, d27, d1[3] + 106: add r12, r9, #0x168 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12] + add r12, r9, #0x1c8 + bic r12, r12, #0x200 + vld1.u16 {d26}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d27}, [r12:64] + vmlal.u16 q14, d24, d1[2] + vmlal.u16 q15, d25, d1[2] + vmlal.u16 q14, d26, d1[2] + vmlal.u16 q15, d27, d1[2] + 105: add r12, r9, #0x170 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + add r12, r9, #0x1c0 + bic r12, r12, #0x200 + vld1.u16 {d26,d27}, [r12:128] + vmlal.u16 q14, d24, d1[1] + vmlal.u16 q15, d25, d1[1] + vmlal.u16 q14, d26, d1[1] + vmlal.u16 q15, d27, d1[1] + 104: add r12, r9, #0x178 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12] + add r12, r9, #0x1b8 + bic r12, r12, #0x200 + vld1.u16 {d26}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d27}, [r12:64] + vmlal.u16 q14, d24, d1[0] + vmlal.u16 q15, d25, d1[0] + vmlal.u16 q14, d26, d1[0] + vmlal.u16 q15, d27, d1[0] + 103: add r12, r9, #0x180 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128] + add r12, r9, #0x1b0 + bic r12, r12, #0x200 + vld1.u16 {d26,d27}, [r12:128] + vmlal.u16 q14, d24, d0[3] + vmlal.u16 q15, d25, d0[3] + vmlal.u16 q14, d26, d0[3] + vmlal.u16 q15, d27, d0[3] + 102: add r12, r9, #0x188 + bic r12, r12, #0x200 + vld1.u16 {d24}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d25}, [r12] + add r12, r9, #0x1a8 + bic r12, r12, #0x200 + vld1.u16 {d26}, [r12:64]! + bic r12, r12, #0x200 + vld1.u16 {d27}, [r12:64] + vmlal.u16 q14, d24, d0[2] + vmlal.u16 q15, d25, d0[2] + vmlal.u16 q14, d26, d0[2] + vmlal.u16 q15, d27, d0[2] + 101: add r12, r9, #0x190 + bic r12, r12, #0x200 + vld1.u16 {d24,d25}, [r12:128]! + bic r12, r12, #0x200 + vld1.u16 {d26,d27}, [r12:128] + vmlal.u16 q14, d24, d0[1] + vmlal.u16 q15, d25, d0[1] + vmlal.u16 q14, d26, d0[1] + vmlal.u16 q15, d27, d0[1] + + vqrshrn.u32 d28, q14, #16 + vqrshrn.u32 d29, q15, #16 + vqrshrn.u16 d31, q14, #FRACTION_BITS + + vst1.u8 {q4}, [r9:128]! + bic r9, r9, #0x200 + vmov q4, q5 + vmov q5, q6 + vmov q6, q7 + vmov q7, q8 + vmov q8, q9 + vmov q9, q10 + vmov q10, q11 +.endm/*}}}*/ + +/* Dedicated function wrapper for the fetch macro, for the cases where + * performance isn't that important, to keep code size down. + */ +PRIVATE(fetch_generic_asm) + push {r10,r11} + fetch + pop {r10,r11} + bx lr +END(fetch_generic_asm) + + +/* Fetch the next (16 - (r10 & 15)) columns of data, avoiding reading memory + * beyond that limit, and filling the rest of the vector with the last legal + * pixel. + * Result is in q10 and q11. q8 and q9 are filled with the first legal pixel. + * Note: This function can read beyond the right edge of input if the image is + * narrower than 16 bytes. + */ +PRIVATE(fetch_clampleft1) + push {r12,lr} + bl fetch_generic_asm + vdup.u16 q8, d20[0] + vdup.u16 q9, d20[0] + ands r12, r10, #15 + beq 1f + sub r1, r1, r12 + sub r10, r10, r12 + sub sp, sp, #32 + vst1.u16 {q10,q11}, [sp] + sub r12, sp, r12, LSL #1 + sub sp, sp, #32 + vst1.u16 {q8,q9}, [sp] + vld1.u16 {q10,q11}, [r12] + add sp, sp, #64 +1: pop {r12,pc} +END(fetch_clampleft1) + +PRIVATE(fetch_clampleft4) + push {r12,lr} + bl fetch_generic_asm + vmov.u16 d16, d20 + vmov.u16 d17, d20 + vmov.u16 d18, d20 + vmov.u16 d19, d20 + ands r12, r10, #15 + beq 1f + sub r1, r1, r12 + sub r10, r10, r12 + sub sp, sp, #32 + vst1.u16 {q10-q11}, [sp] + sub r12, sp, r12, LSL #1 + sub sp, sp, #32 + vst1.u16 {q8,q9}, [sp] + vld1.u16 {q10,q11}, [r12] + add sp, sp, #64 +1: pop {r12,pc} +END(fetch_clampleft4) + +/* Fetch only the next (r11 & 15) (where 0 means 16) columns of data, avoiding + * reading memory beyond that limit, and filling the rest of the vector with + * the last legal pixel. + * Result is in q10 and q11. q12 and q13 are filled with the last legal pixel. + * Note: This function can read beyond the left edge of input if the image is + * narrower than 16 bytes. + */ +PRIVATE(fetch_clampright1) + push {r12, lr} + rsb r12, r11, #0 + ands r12, r12, #15 + beq 1f + sub r1, r1, r12 + bl fetch_generic_asm + vdup.u16 q12, d23[3] + vdup.u16 q13, d23[3] + rsb r12, r11, #0 + and r12, r12, #15 + sub sp, sp, #32 + vst1.u16 {q12,q13}, [sp] + sub sp, sp, #32 + add r12, sp, r12, LSL #1 + vst1.u16 {q10,q11}, [sp] + vld1.u16 {q10,q11}, [r12] + add sp, sp, #64 + pop {r12,pc} +1: bl fetch_generic_asm + vdup.u16 q12, d23[3] + vdup.u16 q13, d23[3] + pop {r12,pc} +END(fetch_clampright1) + +PRIVATE(fetch_clampright4) + push {r12, lr} + rsb r12, r11, #0 + ands r12, r12, #15 + beq 1f + sub r1, r1, r12 + bl fetch_generic_asm + vmov.u16 d24, d23 + vmov.u16 d25, d23 + vmov.u16 d26, d23 + vmov.u16 d27, d23 + rsb r12, r11, #0 + and r12, r12, #15 + sub sp, sp, #32 + vst1.u16 {q12-q13}, [sp] + sub sp, sp, #32 + add r12, sp, r12, LSL #1 + vst1.u16 {q10,q11}, [sp] + vld1.u16 {q10,q11}, [r12] + add sp, sp, #64 + pop {r12,pc} +1: bl fetch_generic_asm + vmov.u16 d24, d23 + vmov.u16 d25, d23 + vmov.u16 d26, d23 + vmov.u16 d27, d23 + pop {r12,pc} +END(fetch_clampright4) + +/* Given values in q10 and q11, and an index in r11, sweep the (r11 & 15)th + * value across to fill the rest of the register pair. Used for filling the + * right hand edge of the window when reading too close to the right hand edge + * of the image. + * Also returns a dup-ed copy of the last element in q12 for the tail-fill + * case (this happens incidentally in common path, but must be done + * deliberately in the fast-out path). + */ +PRIVATE(prefill_sweepright1) + ands r12, r11, #15 + beq 1f + sub r12, r12, #1 + sub sp, sp, #64 + vst1.u16 {q10,q11}, [sp] + add r12, sp, r12, LSL #1 + vld1.u16 {d24[],d25[]}, [r12] + vld1.u16 {d26[],d27[]}, [r12] + vst1.u16 {q12,q13}, [r12] + vld1.u16 {q10,q11}, [sp] + add sp, sp, #64 + bx lr +1: vdup.u16 q12, d23[3] + vdup.u16 q13, d23[3] + bx lr +END(prefill_sweepright1) + +PRIVATE(prefill_sweepright4) + ands r12, r11, #15 + beq 1f + sub r12, r12, #4 + sub sp, sp, #64 + vst1.u16 {q10,q11}, [sp] + add r12, sp, r12, LSL #1 + vld1.u64 {d24}, [r12] + vld1.u64 {d25}, [r12] + vld1.u64 {d26}, [r12] + vld1.u64 {d27}, [r12] + vst1.u16 {q12,q13}, [r12] + vld1.u16 {q10,q11}, [sp] + add sp, sp, #64 + bx lr +1: vmov.u16 d24, d23 + vmov.u16 d25, d23 + vmov.u16 d26, d23 + vmov.u16 d27, d23 + bx lr +END(prefill_sweepright4) + +/* The main loop keeps a sliding window of data that has already been convolved + * in the vertical axis for the current line. This usually stays in the + * register file, but spills to memory for large windows. The first thing that + * needs to be done at start-up is to fill this window with image data, taking + * into account the padding needed if the left or right edges of the image fall + * within this window. + */ + +/* Because the window is in the register file writes to it cannot be indexed + * by another register. Consequently the fill loops are unrolled to address + * the registers directly. This macro distinguishes between writes to the + * register file and writes to the spill buffer (indicated by a destination + * register named xx). + */ +.macro prefill_out ra, rb, sra, srb, srb_hi + .ifc \ra,xx + .ifc \rb,xx + vst1.u16 {\sra,\srb}, [r9:128]! + .else + /* this case is used only for the last tap of uchar1 r=25 */ + /* discard \sra */ + vmov.u16 \rb, \srb_hi + .endif + .else + .ifnc \ra,\sra + vmov.u16 \ra, \sra + .endif + .ifnc \rb,\srb + vmov.u16 \rb, \srb + .endif + .endif +.endm + +/* This macro provides the list of registers representing the window, and the + * cases where the register file is too small and a spill buffer is used + * instead. + * Since several specialisations of each function are generated, this also + * culls superfluous iterations, and sets the variable `i` for subsequent + * macros indicating the current index into the window. + */ +.macro prefill_list, macro, nextmacro, max_r, step, label + .macro ifneeded macro, nextmacro, line, nextline, ra, rb, step, label + .if windowsize >= (\line * 16) + .set i, windowsize - (\line * 16) +\label\macro\line: + prefill_\macro \label\nextmacro\line, \label\nextmacro\nextline, \ra, \rb, \step + .endif + .endm + .if \step > 1 + ifneeded \macro \nextmacro, 13, 12, xx, xx, \step, \label + ifneeded \macro \nextmacro, 12, 11, xx, xx, \step, \label + ifneeded \macro \nextmacro, 11, 10, xx, xx, \step, \label + ifneeded \macro \nextmacro, 10, 9, xx, xx, \step, \label + ifneeded \macro \nextmacro, 9, 8, xx, xx, \step, \label + ifneeded \macro \nextmacro, 8, 7, xx, xx, \step, \label + ifneeded \macro \nextmacro, 7, 6, xx, xx, \step, \label + ifneeded \macro \nextmacro, 6, 5, xx, xx, \step, \label + ifneeded \macro \nextmacro, 5, 4, xx, xx, \step, \label + ifneeded \macro \nextmacro, 4, 3, xx, xx, \step, \label + .else + /* q3 normally contains the coefficient table, but it's not fully + * used. In the uchar1, r=25 case the other half of q3 is used for + * the last two window taps to avoid falling out to memory. + */ + ifneeded \macro \nextmacro, 4, 3, xx, d7, \step, \label + .endif + ifneeded \macro \nextmacro, 3, 2, q4, q5, \step, \label + ifneeded \macro \nextmacro, 2, 1, q6, q7, \step, \label + ifneeded \macro \nextmacro, 1, 0, q8, q9, \step, \label + +\label\macro\()0: + b \label\()_end + .purgem ifneeded +.endm + +/* These macros represent the possible stages of filling the window. + * Each macro is unrolled enough times that it can fill the entire window + * itself, but normally it will have to hand control to subsequent macros + * part-way through and this is done using labels named \next and \after, where + * \next is the next macro starting at the same window position and \after is + * the next macro starting after the current window position. + */ + +/* leftfill: v8 and v9 contain the left padding value. While the window + * extends outside of the image on the left-hand side, and at least 16 more + * padding values are needed in the window, store v8 and v9 into the window. + * Otherwise skip forward to storing image data. + */ +.macro prefill_leftfill, next, after, ra, rb, step + cmp r10, #i+16 + blo \next + prefill_out \ra, \rb, q8, q9, d19 +.endm + +/* leftedge: The very first non-fill or partial-fill chunk from the image is + * already loaded (as it was used to calculate the left padding value), so + * store it here, and then drop into the regular load/store cycle in the next + * macro. + */ +.macro prefill_leftedge, next, after, ra, rb, step +1: prefill_out \ra, \rb, q10, q11, d23 + b \after +.endm + +/* dofetch: Copy chunks of the image into the window without any complications + * from edge conditions. + */ +.macro prefill_dofetch, next, after, ra, rb, step + cmp r11, #i+16 + bls \next + bl fetch_generic_asm + prefill_out \ra, \rb, q10, q11, d23 +.endm + +/* rightedge: The last fetch (currently in v10 and v11) may have gone beyond + * the right-hand edge of the image. In that case sweep the last valid pixel + * across the rest of the chunk, and in either case prepare padding data in v12 + * and v13 for the next macro. This is done in fetch_clampright. + * This only happens once before going on to the next macro. + * Sometimes leftedge also covers the rightedge case, in which case this has + * to be skipped altogether. + */ +.macro prefill_rightedge, next, after, ra, rb, step + cmp r11, #i + bls \next + bl fetch_clampright\step + prefill_out \ra, \rb, q10, q11, d23 + b \after +.endm + +/* rightfill: The rest of the window is simply filled with right padding from + * v12 and v13. + */ +.macro prefill_rightfill, next, after, ra, rb, step + prefill_out \ra, \rb, q12, q13, d25 +.endm + +/* Here all of the macros above are unrolled and laid out in the proper order. + */ +.macro prefill_body, max_r, step, label + prefill_list leftfill, leftedge, \max_r, \step, \label + prefill_list leftedge, dofetch, \max_r, \step, \label + prefill_list dofetch, rightedge, \max_r, \step, \label + prefill_list rightedge, rightfill, \max_r, \step, \label + prefill_list rightfill, oops, \max_r, \step, \label +\label\()_end: +.endm + +/* Fill the convolution window with context data. The aim here is to load + * exactly 2*r columns, and in the main loop to read as many columns as will be + * written. This is complicated by the window being divided into chunks at + * register boundaries, and the need to handle cases when the input starts very + * close to the left or right (or both) edges of the image and the need to fill + * the spaces that leaves with left and right edge padding values. + * + * Input: + * r1 -- src + * r2 -- pitch + * r3 -- count + * r4 -- available image data right of src pointer + * r5 -- r + * r6 -- rup + * r7 -- rdn + * r8 -- available image data left of src pointer + * r9 -- buffer (if needed) + * Output: + * r4 -= min(inlen, count + windowsize - centertap) + * r1 += min(inlen, count + windowsize - centertap) + * Modifies: + * r10 -- fill start index in the window + * r11 -- fill stop index in the window + * r12 -- scratch + */ +.macro prefill step=1, max_r=25, label=xx +.set windowsize, (((\max_r + \max_r) * \step + 15) & ~15) +.set centertap, (windowsize - \max_r * \step) + mov r10, #centertap + subs r10, r10, r8 + movlo r10, #0 + + subs r11, r4, #windowsize - centertap + movhs r11, #0 + add r11, r11, #windowsize + + /* r10 indicates where in the window legal image data begins. + * r11 indicates where in the window legal image date ends. + * When starting near the centre of a large image these would be + * zero and windowsize respectively, but when starting near the + * edges this can change. + * When starting on the leftmost pixel, r10 will be centertap. + * When starting on the rightmost pixel, r11 will be centertap+1. + */ + + /* r4 indicates how much data there is between the current pointers + * and the right edge of the image. The pointers currently point + * to the data needed at centertap. The subsequent code will + * consume (windowsize - r10) data, but only the data from + * centertap to windowsize comes out of r4's budget. + */ +1: subs r4, r4, #windowsize - centertap + movlo r4, #0 + + /* And the pointers need to rewind to the start of the window. + */ + sub r1, r1, #centertap + + /* Unless x8 indicated that there wasn't that much data available. + */ + add r1, r1, r10 + + + /* Get the first chunk, and add padding to align it to the window + * if necessary. + */ + bl fetch_clampleft\step + + /* Sometimes the start and the end of the window are in the same + * chunk. In that case both ends need filler at the outset. + */ + sub r12, r11, #1 + eor r12, r10, r12 + cmp r12, #16 + bllo prefill_sweepright\step + + /* Iterate through all the points in the window and fill them in + * with padding or image data as needed. + */ + prefill_body \max_r, \step, \label +.endm + +/* The main body of the convolve functions. Having already pre-filled the + * convolution window with 2*r input values, the logic settles into a regular + * pattern of reading and writing at a 1:1 rate until either input or output + * expires. The input leads the output by r values, so when processing all the + * way to the right-hand edge, or within r pixels of that edge, the input will + * run out first. In the case of very narrow images, or sub-windows starting + * near the right edge, the input may already have run out while the + * convolution window was being filled and this loop will start with a + * zero-length input. + * + * Once the input runs out, the rest of the output must be processed by padding + * the remainder of the window with pad value from the last valid pixel from + * the source. + * + * Input: + * r0 = dst + * r1 = src + * r2 = pitch + * r3 = count + * r4 = inlen + * r5 = r + * r6 = rup + * r7 = rdn + * r9 = buffer + * Modifies + * r8 = fetch code pointer + */ +.macro conv_body core, step=1, max_r=25, labelc="", labelnc="" + + /* If x4 >= x3 then there's no need for clipping. The main loop + * needs to exit when either x3 or x4 runs out, so clamp x4 to be + * no greater than x3 and use x4 for the loop. + * However, if x4 comes out of the loop with less than 16 bytes + * left, a partial read would be necessary to avoid reading beyond + * the end of the image. To avoid this, clamp x4 to the next + * multiple of 16, which is still sufficient to force it out of the + * loop but doesn't imply a rewind. + */ + add r12, r3, #15 + bic r12, r12, #15 + cmp r4, r12 + movhi r4, r12 + + /* First calculate the entry-point into the internal fetch logic. + * This is done so the same function can service several kernel + * sizes. + */ + ldr r8, 3f +1: add r8, r8, pc + sub r8, r5, LSL #5 + sub r8, r5, LSL #4 + cmp r5, r6 + cmpeq r5, r7 + beq 5f + + /* if (r != rup || r != rdn) then the address-clamping table should + * be used rather than the short-cut version. + */ + ldr r8, 3f+4 +2: add r8, r8, pc + sub r8, r5, LSL #6 + b 5f + .align 3 +3: .word \labelnc-1b-8 + .word \labelc-2b-8 + + /* Main loop: ... */ + .align 4 +3: /* first perform a vertical convolution from memory to get the next + * 16 taps of the horizontal window into the register file... + */ + fetch max_r=\max_r, labelc=\labelc, labelnc=\labelnc, reg=r8 + + /* ...then perform a horizontal convolution on that window to + * produce eight output bytes, and slide the window along. + * This has to be done twice to match the 16-way vertical pass. + * It would be preferable to have twice the work done in \core, but + * that would demand yet another variant on those macros and would + * perturb the register allocation severely. + */ + \core + vst1.u8 {d31}, [r0]! + \core + vst1.u8 {d31}, [r0]! + + sub r3, r3, #16 +5: subs r4, r4, #16 + bhi 3b + /* Here there's 16 or fewer bytes available before the edge of the + * source image. x4 holds that count minus 16 (because it was + * decremented before the first iteration ran). The last read may + * not be a whole chunk, and beyond that a fill value must be used. + * + * Of course, none of that matters if there's no more output to + * produce... + */ + cmp r3, #0 + beq 5f + + /* Oh well. */ + adds r4, r4, #16 + bne 1f + .if \step==1 + vdup.u16 q10, d19[3] + vdup.u16 q11, d19[3] + .else + vmov.u64 d20, d19 + vmov.u64 d21, d19 + vmov.u64 d22, d19 + vmov.u64 d23, d19 + .endif + b 3f + + /* To avoid reading past end of input, rewind pointers by (16-r4) + * to ensure that they're exactly 16 bytes from the edge. + */ +1: mov r11, r4 + bl fetch_clampright\step + /* Now to put this padding to use, perform any remaining + * iterations. This is done at half the rate of the main loop, + * because there's no longer pressure from a 16-lane window filler. + */ +3: \core + .if \step==1 + vdup.u16 q11, d23[3] + .else + vmov.u64 d22, d23 + .endif + subs r3, r3, #8 + blo 4f + vst1.u8 {d31}, [r0]! + bne 3b + b 5f + + /* If the final iteration contained 0 < l < 8 values, then perform + * a piecewise store of the final vector. + */ +4: tst r3, #4 + beq 1f + vst1.u32 {d31[0]}, [r0]! + vext.u8 d31, d31, d31, #4 +1: tst r3, #2 + beq 1f + vst1.u16 {d31[0]}, [r0]! + vext.u8 d31, d31, d31, #2 +1: tst r3, #1 + beq 5f + vst1.u8 {d31[0]}, [r0]! + vext.u8 d31, d31, d31, #1 +5: mov r0, #0 +.endm + +.irp r, TUNED_LIST1, 25 +PRIVATE(convolve1_\r) + push {r12,lr} + + prefill step=1, max_r=\r, label=.Lcnv1_\r + + conv_body core=hconv1_\r, step=1, max_r=\r, labelc=.Lcnv1_\r, labelnc=.Lcnvnc1_\r + + pop {r12,pc} +END(convolve1_\r) +.endr + +.irp r, TUNED_LIST4, 25 +PRIVATE(convolve4_\r) + push {r12,lr} + sub r9, sp, #0x200 + sub sp, sp, #0x200 + 0x400 + bic r9, r9, #0x3fc + + /* r9 now points to a 0x200 byte buffer on the stack whose address + * has the low 10 bits clear. This allows easy address calculation + * in the wrap-around cases. + */ + + prefill step=4, max_r=\r, label=.Lcnv4_\r + + conv_body core=hconv4_\r, step=4, max_r=\r, labelc=.Lcnv4_\r, labelnc=.Lcnvnc4_\r + + add sp, sp, #0x200 + 0x400 + pop {r12,pc} +END(convolve4_\r) +.endr + +/* void rsdIntrinsicBlurU1_K( + * void *out, // r0 + * void *in, // r1 + * size_t w, // r2 + * size_t h, // r3 + * size_t p, // [sp] + * size_t x, // [sp,#4] + * size_t y, // [sp,#8] + * size_t count, // [sp,#12] + * size_t r, // [sp,#16] + * uint16_t *tab); // [sp,#20] + */ +ENTRY(rsdIntrinsicBlurU1_K) + push {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + vpush {d8-d15} + ldr r6, [sp,#112] // y + ldr r8, [sp,#108] // x + ldr r5, [sp,#120] // r + sub r4, r2, r8 // inlen = w - x + sub r7, r3, r6 // h - y + ldr r2, [sp,#104] // pitch + ldr r3, [sp,#116] // count + sub r7, r7, #1 // h - y - 1 + + ldr r12, [sp,#124] + + add r1, r1, r8 // src += x + + cmp r6, r5 + movhi r6, r5 // rup = min(r, y) + cmp r7, r5 + movhi r7, r5 // rdn = min(r, h - y - 1) + + vld1.u16 {d0,d1,d2,d3}, [r12]! + vld1.u16 {d4,d5,d6}, [r12]! + + adr lr, 1f + .irp r, TUNED_LIST1 + cmp r5, #\r + bls convolve1_\r + .endr + b convolve1_25 + +1: vpop {d8-d15} + pop {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +END(rsdIntrinsicBlurU1_K) + +/* void rsdIntrinsicBlurU4_K( + * void *out, // r0 + * void *in, // r1 + * size_t w, // r2 + * size_t h, // r3 + * size_t p, // [sp] + * size_t x, // [sp,#4] + * size_t y, // [sp,#8] + * size_t count, // [sp,#12] + * size_t r, // [sp,#16] + * uint16_t *tab); // [sp,#20] + */ +ENTRY(rsdIntrinsicBlurU4_K) + push {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + vpush {d8-d15} + ldr r6, [sp,#112] // y + ldr r8, [sp,#108] // x + ldr r5, [sp,#120] // r + lsl r8, r8, #2 + rsb r4, r8, r2, LSL #2 // inlen = (w - x) + sub r7, r3, r6 // h - y + ldr r2, [sp,#104] // pitch + ldr r3, [sp,#116] // count + sub r7, r7, #1 // h - y - 1 + lsl r3, r3, #2 // count + + ldr r12, [sp,#124] + + add r1, r1, r8 // in += x + + cmp r6, r5 + movhi r6, r5 // rup = min(r, y) + cmp r7, r5 + movhi r7, r5 // rdn = min(r, h - y - 1) + + vld1.u16 {d0,d1,d2,d3}, [r12]! + vld1.u16 {d4,d5,d6}, [r12]! + + adr lr, 1f + .irp r, TUNED_LIST4 + cmp r5, #\r + bls convolve4_\r + .endr + b convolve4_25 + +1: vpop {d8-d15} + pop {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +END(rsdIntrinsicBlurU4_K) diff --git a/toolkit/ColorMatrix.cpp b/toolkit/ColorMatrix.cpp new file mode 100644 index 00000000..dd426cff --- /dev/null +++ b/toolkit/ColorMatrix.cpp @@ -0,0 +1,1066 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "RenderScriptToolkit.h" +#include "TaskProcessor.h" +#include "Utils.h" +#include <assert.h> +#include <cstdint> +#include <sys/mman.h> + +namespace android { +namespace renderscript { + +#define LOG_TAG "renderscript.toolkit.ColorMatrix" + +/* uint kernel + * Q0 D0: Load slot for R + * D1: Load slot for G + * Q1 D2: Load slot for B + * D3: Load slot for A + * Q2 D4: Matrix + * D5: = + * Q3 D6: = + * D7: = + * Q4 D8: Add R + * D9: + * Q5 D10: Add G + * D11: + * Q6 D12: Add B + * D13: + * Q7 D14: Add A + * D15: + * Q8 D16: I32: R Sum + * D17: + * Q9 D18: I32: G Sum + * D19: + * Q10 D20: I32: B Sum + * D21: + * Q11 D22: I32: A Sum + * D23: + * Q12 D24: U16: expanded R + * D25: + * Q13 D26: U16: expanded G + * D27: + * Q14 D28: U16: expanded B + * D29: + * Q15 D30: U16: expanded A + * D31: + * + */ + +/* float kernel + * Q0 D0: Load slot for R + * D1: = + * Q1 D2: Load slot for G + * D3: = + * Q2 D4: Load slot for B + * D5: = + * Q3 D6: Load slot for A + * D7: = + * Q4 D8: Matrix + * D9: = + * Q5 D10: = + * D11: = + * Q6 D12: = + * D13: = + * Q7 D14: = + * D15: = + * Q8 D16: Add R + * D17: = + * Q9 D18: Add G + * D19: = + * Q10 D20: Add B + * D21: = + * Q11 D22: Add A + * D23: = + * Q12 D24: Sum R + * D25: = + * Q13 D26: Sum G + * D27: = + * Q14 D28: Sum B + * D29: = + * Q15 D30: Sum A + * D31: = + * + */ + +typedef union { + uint64_t key; + struct { + uint32_t inVecSize :2; // [0 - 1] + uint32_t outVecSize :2; // [2 - 3] + uint32_t inType :4; // [4 - 7] + uint32_t outType :4; // [8 - 11] + uint32_t dot :1; // [12] + uint32_t _unused1 :1; // [13] + uint32_t copyAlpha :1; // [14] + uint32_t _unused2 :1; // [15] + uint32_t coeffMask :16; // [16-31] + uint32_t addMask :4; // [32-35] + } u; +} Key_t; + +/* The two data types and their value, as specified in the RenderScript documentation. + * Only RS_TYPE_UNSIGNED_8 is currently supported. + * + * TODO: The actual values of these constants are likely not important. We may be + * able to simplify the key related code. + */ +const int RS_TYPE_UNSIGNED_8 = 8; +const int RS_TYPE_FLOAT_32 = 2; + +//Re-enable when intrinsic is fixed +#if defined(ARCH_ARM64_USE_INTRINSICS) +typedef struct { + void (*column[4])(void); + void (*store)(void); + void (*load)(void); + void (*store_end)(void); + void (*load_end)(void); +} FunctionTab_t; + +extern "C" void rsdIntrinsicColorMatrix_int_K( + void *out, void const *in, size_t count, + FunctionTab_t const *fns, + int16_t const *mult, int32_t const *add); + +extern "C" void rsdIntrinsicColorMatrix_float_K( + void *out, void const *in, size_t count, + FunctionTab_t const *fns, + float const *mult, float const *add); + +/* The setup functions fill in function tables to be used by above functions; + * this code also eliminates jump-to-another-jump cases by short-circuiting + * empty functions. While it's not performance critical, it works out easier + * to write the set-up code in assembly than to try to expose the same symbols + * and write the code in C. + */ +extern "C" void rsdIntrinsicColorMatrixSetup_int_K( + FunctionTab_t *fns, + uint32_t mask, int dt, int st); + +extern "C" void rsdIntrinsicColorMatrixSetup_float_K( + FunctionTab_t *fns, + uint32_t mask, int dt, int st); +#endif + +class ColorMatrixTask : public Task { + const void* mIn; + void* mOut; + size_t mInputVectorSize; + uint32_t mOutstep; + uint32_t mInstep; + + float mFp[16]; + float mFpa[4]; + + // The following four fields are read as constants + // by the SIMD assembly code. + int16_t mIp[16]; + int mIpa[4]; + float mTmpFp[16]; + float mTmpFpa[4]; +#if defined(ARCH_ARM64_USE_INTRINSICS) + FunctionTab_t mFnTab; +#endif + + void kernel(uchar* out, uchar* in, uint32_t xstart, uint32_t xend); + void updateCoeffCache(float fpMul, float addMul); + + Key_t mLastKey; + unsigned char* mBuf; + size_t mBufSize; + + bool build(Key_t key); + void (*mOptKernel)(void* dst, const void* src, const int16_t* coef, uint32_t count); + +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + Key_t computeKey(size_t inVectorSize, int inType, size_t outVectorSize, int outType); + void preLaunch(size_t inVectorSize, int inType, size_t outVectorSize, int outType); +#else + Key_t computeKey(size_t inVectorSize, size_t outVectorSize); + void preLaunch(size_t inVectorSize, size_t outVectorSize); +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + + // Process a 2D tile of the overall work. threadIndex identifies which thread does the work. + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) override; + + public: + ColorMatrixTask(const void* in, void* out, size_t inputVectorSize, size_t outputVectorSize, + size_t sizeX, size_t sizeY, const float* matrix, const float* addVector, + const Restriction* restriction) + : Task{sizeX, sizeY, outputVectorSize, true, restriction}, + mIn{in}, + mOut{out}, + mInputVectorSize{inputVectorSize} { + mLastKey.key = 0; + mBuf = nullptr; + mBufSize = 0; + mOptKernel = nullptr; + + mOutstep = paddedSize(outputVectorSize); + mInstep = paddedSize(inputVectorSize); + + memcpy(mFp, matrix, sizeof(mFp)); + memcpy(mFpa, addVector, sizeof(mFpa)); +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + // For float support, we'll have to pass the type in the constructor too. + preLaunch(inputVectorSize, RS_TYPE_UNSIGNED_8, outputVectorSize, RS_TYPE_UNSIGNED_8); +#else + preLaunch(inputVectorSize, outputVectorSize); +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + } + ~ColorMatrixTask() { + if (mBuf) munmap(mBuf, mBufSize); + mBuf = nullptr; + mOptKernel = nullptr; + } +}; + +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT +Key_t ColorMatrixTask::computeKey(size_t inVectorSize, int inType, size_t outVectorSize, + int outType) { + Key_t key; + key.key = 0; + + // Compute a unique code key for this operation + + // Add to the key the input and output types + bool hasFloat = false; + if (inType == RS_TYPE_FLOAT_32) { + hasFloat = true; + key.u.inType = RS_TYPE_FLOAT_32; + } + if (outType == RS_TYPE_FLOAT_32) { + hasFloat = true; + key.u.outType = RS_TYPE_FLOAT_32; + } + + // Mask in the bits indicating which coefficients in the + // color matrix are needed. + if (hasFloat) { + for (uint32_t i=0; i < 16; i++) { + if (fabs(mFp[i]) != 0.f) { + key.u.coeffMask |= 1 << i; + } + } + if (fabs(mFpa[0]) != 0.f) key.u.addMask |= 0x1; + if (fabs(mFpa[1]) != 0.f) key.u.addMask |= 0x2; + if (fabs(mFpa[2]) != 0.f) key.u.addMask |= 0x4; + if (fabs(mFpa[3]) != 0.f) key.u.addMask |= 0x8; + + } else { +#else +Key_t ColorMatrixTask::computeKey(size_t inVectorSize, size_t outVectorSize) { + Key_t key; + key.key = 0; + + // Compute a unique code key for this operation + { +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + + for (uint32_t i=0; i < 16; i++) { + if (mIp[i] != 0) { + key.u.coeffMask |= 1 << i; + } + } + if (mIpa[0] != 0) key.u.addMask |= 0x1; + if (mIpa[1] != 0) key.u.addMask |= 0x2; + if (mIpa[2] != 0) key.u.addMask |= 0x4; + if (mIpa[3] != 0) key.u.addMask |= 0x8; + } + + // Look for a dot product where the r,g,b colums are the same + if ((mIp[0] == mIp[1]) && (mIp[0] == mIp[2]) && + (mIp[4] == mIp[5]) && (mIp[4] == mIp[6]) && + (mIp[8] == mIp[9]) && (mIp[8] == mIp[10]) && + (mIp[12] == mIp[13]) && (mIp[12] == mIp[14])) { + + if (!key.u.addMask) key.u.dot = 1; + } + + // Is alpha a simple copy + if (!(key.u.coeffMask & 0x0888) && (mIp[15] == 256) && !(key.u.addMask & 0x8)) { + key.u.copyAlpha = !(key.u.inType || key.u.outType); + } + + //ALOGE("build key %08x, %08x", (int32_t)(key.key >> 32), (int32_t)key.key); + + switch (inVectorSize) { + case 4: + key.u.inVecSize = 3; + break; + case 3: + key.u.inVecSize = 2; + key.u.coeffMask &= ~0xF000; + break; + case 2: + key.u.inVecSize = 1; + key.u.coeffMask &= ~0xFF00; + break; + default: + key.u.coeffMask &= ~0xFFF0; + break; + } + + switch (outVectorSize) { + case 4: + key.u.outVecSize = 3; + break; + case 3: + key.u.outVecSize = 2; + key.u.coeffMask &= ~0x8888; + key.u.addMask &= 7; + break; + case 2: + key.u.outVecSize = 1; + key.u.coeffMask &= ~0xCCCC; + key.u.addMask &= 3; + break; + default: + key.u.coeffMask &= ~0xEEEE; + key.u.addMask &= 1; + break; + } + + if (key.u.inType && !key.u.outType) { + key.u.addMask |= 1; + if (key.u.outVecSize > 0) key.u.addMask |= 2; + if (key.u.outVecSize > 1) key.u.addMask |= 4; + if (key.u.outVecSize > 2) key.u.addMask |= 8; + } + + //ALOGE("build key %08x, %08x", (int32_t)(key.key >> 32), (int32_t)key.key); + return key; +} + +#if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS) + +#define DEF_SYM(x) \ + extern "C" uint32_t _N_ColorMatrix_##x; \ + extern "C" uint32_t _N_ColorMatrix_##x##_end; \ + extern "C" uint32_t _N_ColorMatrix_##x##_len; + +DEF_SYM(prefix_i) +DEF_SYM(prefix_f) +DEF_SYM(postfix1) +DEF_SYM(postfix2) + +DEF_SYM(load_u8_4) +DEF_SYM(load_u8_3) +DEF_SYM(load_u8_2) +DEF_SYM(load_u8_1) +DEF_SYM(load_u8f_4) +DEF_SYM(load_u8f_3) +DEF_SYM(load_u8f_2) +DEF_SYM(load_u8f_1) +DEF_SYM(load_f32_4) +DEF_SYM(load_f32_3) +DEF_SYM(load_f32_2) +DEF_SYM(load_f32_1) + +DEF_SYM(store_u8_4) +DEF_SYM(store_u8_2) +DEF_SYM(store_u8_1) +DEF_SYM(store_f32_4) +DEF_SYM(store_f32_3) +DEF_SYM(store_f32_2) +DEF_SYM(store_f32_1) +DEF_SYM(store_f32u_4) +DEF_SYM(store_f32u_2) +DEF_SYM(store_f32u_1) + +DEF_SYM(unpack_u8_4) +DEF_SYM(unpack_u8_3) +DEF_SYM(unpack_u8_2) +DEF_SYM(unpack_u8_1) +DEF_SYM(pack_u8_4) +DEF_SYM(pack_u8_3) +DEF_SYM(pack_u8_2) +DEF_SYM(pack_u8_1) +DEF_SYM(dot) +DEF_SYM(add_0_u8) +DEF_SYM(add_1_u8) +DEF_SYM(add_2_u8) +DEF_SYM(add_3_u8) + +#define ADD_CHUNK(x) \ + memcpy(buf, &_N_ColorMatrix_##x, _N_ColorMatrix_##x##_len); \ + buf += _N_ColorMatrix_##x##_len + + +static uint8_t * addBranch(uint8_t *buf, const uint8_t *target, uint32_t condition) { + size_t off = (target - buf - 8) >> 2; + assert(((off & 0xff000000) == 0) || + ((off & 0xff000000) == 0xff000000)); + + uint32_t op = (condition << 28); + op |= 0xa << 24; // branch + op |= 0xffffff & off; + ((uint32_t *)buf)[0] = op; + return buf + 4; +} + +static uint32_t encodeSIMDRegs(uint32_t vd, uint32_t vn, uint32_t vm) { + assert(vd < 32); + assert(vm < 32); + assert(vn < 32); + + uint32_t op = ((vd & 0xf) << 12) | (((vd & 0x10) >> 4) << 22); + op |= (vm & 0xf) | (((vm & 0x10) >> 4) << 5); + op |= ((vn & 0xf) << 16) | (((vn & 0x10) >> 4) << 7); + return op; +} + +static uint8_t * addVMLAL_S16(uint8_t *buf, uint32_t dest_q, uint32_t src_d1, uint32_t src_d2, + uint32_t src_d2_s) { + //vmlal.s16 Q#1, D#1, D#2[#] + uint32_t op = 0xf2900240 | encodeSIMDRegs(dest_q << 1, src_d1, src_d2 | (src_d2_s << 3)); + ((uint32_t *)buf)[0] = op; + return buf + 4; +} + +static uint8_t * addVMULL_S16(uint8_t *buf, uint32_t dest_q, uint32_t src_d1, uint32_t src_d2, + uint32_t src_d2_s) { + //vmull.s16 Q#1, D#1, D#2[#] + uint32_t op = 0xf2900A40 | encodeSIMDRegs(dest_q << 1, src_d1, src_d2 | (src_d2_s << 3)); + ((uint32_t *)buf)[0] = op; + return buf + 4; +} + +static uint8_t * addVQADD_S32(uint8_t *buf, uint32_t dest_q, uint32_t src_q1, uint32_t src_q2) { + //vqadd.s32 Q#1, Q#1, Q#2 + uint32_t op = 0xf2200050 | encodeSIMDRegs(dest_q << 1, src_q1 << 1, src_q2 << 1); + ((uint32_t *)buf)[0] = op; + return buf + 4; +} + +static uint8_t * addVMLAL_F32(uint8_t *buf, uint32_t dest_q, uint32_t src_d1, uint32_t src_d2, + uint32_t src_d2_s) { + //vmlal.f32 Q#1, D#1, D#2[#] + uint32_t op = 0xf3a00140 | encodeSIMDRegs(dest_q << 1, src_d1, src_d2 | (src_d2_s << 4)); + ((uint32_t *)buf)[0] = op; + return buf + 4; +} + +static uint8_t * addVMULL_F32(uint8_t *buf, uint32_t dest_q, uint32_t src_d1, uint32_t src_d2, + uint32_t src_d2_s) { + //vmull.f32 Q#1, D#1, D#2[#] + uint32_t op = 0xf3a00940 | encodeSIMDRegs(dest_q << 1, src_d1, src_d2 | (src_d2_s << 4)); + ((uint32_t *)buf)[0] = op; + return buf + 4; +} + +static uint8_t * addVORR_32(uint8_t *buf, uint32_t dest_q, uint32_t src_q1, uint32_t src_q2) { + //vadd.f32 Q#1, D#1, D#2 + uint32_t op = 0xf2200150 | encodeSIMDRegs(dest_q << 1, src_q1 << 1, src_q2 << 1); + ((uint32_t *)buf)[0] = op; + return buf + 4; +} + +static uint8_t * addVMOV_32(uint8_t *buf, uint32_t dest_q, uint32_t imm) { + //vmov.32 Q#1, #imm + assert(imm == 0); + (void) imm; // Avoid unused parameter warnings for non-debug builds + uint32_t op = 0xf2800050 | encodeSIMDRegs(dest_q << 1, 0, 0); + ((uint32_t *)buf)[0] = op; + return buf + 4; +} + +static uint8_t * addVADD_F32(uint8_t *buf, uint32_t dest_q, uint32_t src_q1, uint32_t src_q2) { + //vadd.f32 Q#1, D#1, D#2 + uint32_t op = 0xf2000d40 | encodeSIMDRegs(dest_q << 1, src_q1 << 1, src_q2 << 1); + ((uint32_t *)buf)[0] = op; + return buf + 4; +} +#endif + +#if defined(ARCH_X86_HAVE_SSSE3) +extern void rsdIntrinsicColorMatrixDot_K(void *dst, const void *src, + const int16_t *coef, uint32_t count); +extern void rsdIntrinsicColorMatrix3x3_K(void *dst, const void *src, + const int16_t *coef, uint32_t count); +extern void rsdIntrinsicColorMatrix4x4_K(void *dst, const void *src, + const int16_t *coef, uint32_t count); + +using android::renderscript::Key_t; + +void * selectKernel(Key_t key) +{ + void * kernel = nullptr; + + // inType, outType float if nonzero + if (!(key.u.inType || key.u.outType)) { + if (key.u.dot) + kernel = (void *)rsdIntrinsicColorMatrixDot_K; + else if (key.u.copyAlpha) + kernel = (void *)rsdIntrinsicColorMatrix3x3_K; + else + kernel = (void *)rsdIntrinsicColorMatrix4x4_K; + } + + return kernel; +} +#endif + +bool ColorMatrixTask::build(Key_t key) { +#if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS) + mBufSize = 4096; + //StopWatch build_time("rs cm: build time"); + mBuf = (uint8_t *)mmap(0, mBufSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (mBuf == MAP_FAILED) { + mBuf = NULL; + return false; + } + + uint8_t *buf = mBuf; + uint8_t *buf2 = nullptr; + + int ops[5][4]; // 0=unused, 1 = set, 2 = accumulate, 3 = final + int opInit[4] = {0, 0, 0, 0}; + + memset(ops, 0, sizeof(ops)); + for (int i=0; i < 4; i++) { + if (key.u.coeffMask & (1 << (i*4))) { + ops[i][0] = 0x2 | opInit[0]; + opInit[0] = 1; + } + if (!key.u.dot) { + if (key.u.coeffMask & (1 << (1 + i*4))) { + ops[i][1] = 0x2 | opInit[1]; + opInit[1] = 1; + } + if (key.u.coeffMask & (1 << (2 + i*4))) { + ops[i][2] = 0x2 | opInit[2]; + opInit[2] = 1; + } + } + if (!key.u.copyAlpha) { + if (key.u.coeffMask & (1 << (3 + i*4))) { + ops[i][3] = 0x2 | opInit[3]; + opInit[3] = 1; + } + } + } + + if (key.u.inType || key.u.outType) { + key.u.copyAlpha = 0; + ADD_CHUNK(prefix_f); + buf2 = buf; + + // Load the incoming r,g,b,a as needed + if (key.u.inType) { + switch(key.u.inVecSize) { + case 3: + ADD_CHUNK(load_f32_4); + break; + case 2: + ADD_CHUNK(load_f32_3); + break; + case 1: + ADD_CHUNK(load_f32_2); + break; + case 0: + ADD_CHUNK(load_f32_1); + break; + } + } else { + switch(key.u.inVecSize) { + case 3: + ADD_CHUNK(load_u8f_4); + break; + case 2: + ADD_CHUNK(load_u8f_3); + break; + case 1: + ADD_CHUNK(load_u8f_2); + break; + case 0: + ADD_CHUNK(load_u8f_1); + break; + } + } + + for (int i=0; i < 4; i++) { + for (int j=0; j < 4; j++) { + switch(ops[i][j]) { + case 0: + break; + case 2: + buf = addVMULL_F32(buf, 12+j, i*2, 8+i*2 + (j >> 1), j & 1); + break; + case 3: + buf = addVMLAL_F32(buf, 12+j, i*2, 8+i*2 + (j >> 1), j & 1); + break; + } + } + } + for (int j=0; j < 4; j++) { + if (opInit[j]) { + if (key.u.addMask & (1 << j)) { + buf = addVADD_F32(buf, j, 12+j, 8+j); + } else { + buf = addVORR_32(buf, j, 12+j, 12+j); + } + } else { + if (key.u.addMask & (1 << j)) { + buf = addVORR_32(buf, j, 8+j, 8+j); + } else { + buf = addVMOV_32(buf, j, 0); + } + } + } + + if (key.u.outType) { + switch(key.u.outVecSize) { + case 3: + ADD_CHUNK(store_f32_4); + break; + case 2: + ADD_CHUNK(store_f32_3); + break; + case 1: + ADD_CHUNK(store_f32_2); + break; + case 0: + ADD_CHUNK(store_f32_1); + break; + } + } else { + switch(key.u.outVecSize) { + case 3: + case 2: + ADD_CHUNK(store_f32u_4); + break; + case 1: + ADD_CHUNK(store_f32u_2); + break; + case 0: + ADD_CHUNK(store_f32u_1); + break; + } + } + + + } else { + // Add the function prefix + // Store the address for the loop return + ADD_CHUNK(prefix_i); + buf2 = buf; + + // Load the incoming r,g,b,a as needed + switch(key.u.inVecSize) { + case 3: + ADD_CHUNK(load_u8_4); + if (key.u.copyAlpha) { + ADD_CHUNK(unpack_u8_3); + } else { + ADD_CHUNK(unpack_u8_4); + } + break; + case 2: + ADD_CHUNK(load_u8_3); + ADD_CHUNK(unpack_u8_3); + break; + case 1: + ADD_CHUNK(load_u8_2); + ADD_CHUNK(unpack_u8_2); + break; + case 0: + ADD_CHUNK(load_u8_1); + ADD_CHUNK(unpack_u8_1); + break; + } + + // Add multiply and accumulate + // use MULL to init the output register, + // use MLAL from there + for (int i=0; i < 4; i++) { + for (int j=0; j < 4; j++) { + switch(ops[i][j]) { + case 0: + break; + case 2: + buf = addVMULL_S16(buf, 8+j, 24+i*2, 4+i, j); + break; + case 3: + buf = addVMLAL_S16(buf, 8+j, 24+i*2, 4+i, j); + break; + } + } + } + for (int j=0; j < 4; j++) { + if (opInit[j]) { + if (key.u.addMask & (1 << j)) { + buf = addVQADD_S32(buf, 8+j, 8+j, 4+j); + } + } else { + if (key.u.addMask & (1 << j)) { + buf = addVORR_32(buf, 8+j, 4+j, 4+j); + } + } + } + + // If we have a dot product, perform the special pack. + if (key.u.dot) { + ADD_CHUNK(pack_u8_1); + ADD_CHUNK(dot); + } else { + switch(key.u.outVecSize) { + case 3: + if (key.u.copyAlpha) { + ADD_CHUNK(pack_u8_3); + } else { + ADD_CHUNK(pack_u8_4); + } + break; + case 2: + ADD_CHUNK(pack_u8_3); + break; + case 1: + ADD_CHUNK(pack_u8_2); + break; + case 0: + ADD_CHUNK(pack_u8_1); + break; + } + } + + // Write out result + switch(key.u.outVecSize) { + case 3: + case 2: + ADD_CHUNK(store_u8_4); + break; + case 1: + ADD_CHUNK(store_u8_2); + break; + case 0: + ADD_CHUNK(store_u8_1); + break; + } + } + + if (key.u.inType != key.u.outType) { + key.u.copyAlpha = 0; + key.u.dot = 0; + } + + // Loop, branch, and cleanup + ADD_CHUNK(postfix1); + buf = addBranch(buf, buf2, 0x01); + ADD_CHUNK(postfix2); + + int ret = mprotect(mBuf, mBufSize, PROT_READ | PROT_EXEC); + if (ret == -1) { + ALOGE("mprotect error %i", ret); + return false; + } + + __builtin___clear_cache((char *) mBuf, (char*) mBuf + mBufSize); + return true; +#else + (void) key; // Avoid unused parameter warning. + return false; +#endif +} + +void ColorMatrixTask::updateCoeffCache(float fpMul, float addMul) { + for(int ct=0; ct < 16; ct++) { + mIp[ct] = (int16_t)(mFp[ct] * 256.f + 0.5f); + mTmpFp[ct] = mFp[ct] * fpMul; + //ALOGE("mat %i %f %f", ct, mFp[ct], tmpFp[ct]); + } + + float add = 0.f; + if (fpMul > 254.f) add = 0.5f; + for(int ct=0; ct < 4; ct++) { + mTmpFpa[ct] = mFpa[ct] * addMul + add; + //ALOGE("mFpa %i %f %f", ct, mFpa[ct], tmpFpa[ct * 4 + 0]); + } + + for(int ct=0; ct < 4; ct++) { + mIpa[ct] = (int)(mFpa[ct] * 65536.f + 0.5f); + } +} + + + +static void One(void *out, + const void *py, const float* coeff, const float *add, + uint32_t vsin, uint32_t vsout, bool fin, bool fout) { + + float4 f = 0.f; + if (fin) { + switch(vsin) { + case 3: + f = ((const float4 *)py)[0]; + break; + case 2: + f = ((const float4 *)py)[0]; + f.w = 0.f; + break; + case 1: + f.xy = ((const float2 *)py)[0]; + break; + case 0: + f.x = ((const float *)py)[0]; + break; + } + } else { + switch(vsin) { + case 3: + f = convert<float4>(((const uchar4 *)py)[0]); + break; + case 2: + f = convert<float4>(((const uchar4 *)py)[0]); + f.w = 0.f; + break; + case 1: + f.xy = convert<float2>(((const uchar2 *)py)[0]); + break; + case 0: + f.x = (float)(((const uchar *)py)[0]); + break; + } + } + //ALOGE("f1 %f %f %f %f", f.x, f.y, f.z, f.w); + + float4 sum; + sum.x = f.x * coeff[0] + + f.y * coeff[4] + + f.z * coeff[8] + + f.w * coeff[12]; + sum.y = f.x * coeff[1] + + f.y * coeff[5] + + f.z * coeff[9] + + f.w * coeff[13]; + sum.z = f.x * coeff[2] + + f.y * coeff[6] + + f.z * coeff[10] + + f.w * coeff[14]; + sum.w = f.x * coeff[3] + + f.y * coeff[7] + + f.z * coeff[11] + + f.w * coeff[15]; + //ALOGE("f2 %f %f %f %f", sum.x, sum.y, sum.z, sum.w); + + sum.x += add[0]; + sum.y += add[1]; + sum.z += add[2]; + sum.w += add[3]; + + + //ALOGE("fout %i vs %i, sum %f %f %f %f", fout, vsout, sum.x, sum.y, sum.z, sum.w); + if (fout) { + switch(vsout) { + case 3: + case 2: + ((float4 *)out)[0] = sum; + break; + case 1: + ((float2 *)out)[0] = sum.xy; + break; + case 0: + ((float *)out)[0] = sum.x; + break; + } + } else { + sum.x = sum.x < 0 ? 0 : (sum.x > 255.5 ? 255.5 : sum.x); + sum.y = sum.y < 0 ? 0 : (sum.y > 255.5 ? 255.5 : sum.y); + sum.z = sum.z < 0 ? 0 : (sum.z > 255.5 ? 255.5 : sum.z); + sum.w = sum.w < 0 ? 0 : (sum.w > 255.5 ? 255.5 : sum.w); + + switch(vsout) { + case 3: + case 2: + ((uchar4 *)out)[0] = convert<uchar4>(sum); + break; + case 1: + ((uchar2 *)out)[0] = convert<uchar2>(sum.xy); + break; + case 0: + ((uchar *)out)[0] = sum.x; + break; + } + } + //ALOGE("out %p %f %f %f %f", out, ((float *)out)[0], ((float *)out)[1], ((float *)out)[2], + // ((float *)out)[3]); +} + +void ColorMatrixTask::kernel(uchar *out, uchar *in, uint32_t xstart, uint32_t xend) { + uint32_t x1 = xstart; + uint32_t x2 = xend; + + uint32_t vsin = mLastKey.u.inVecSize; + uint32_t vsout = mLastKey.u.outVecSize; + bool floatIn = !!mLastKey.u.inType; + bool floatOut = !!mLastKey.u.outType; + + //if (!info->current.y) ALOGE("steps %i %i %i %i", instep, outstep, vsin, vsout); + + if(x2 > x1) { + int32_t len = x2 - x1; + if (mUsesSimd) { + if((mOptKernel != nullptr) && (len >= 4)) { + // The optimized kernel processes 4 pixels at once + // and requires a minimum of 1 chunk of 4 + mOptKernel(out, in, mIp, len >> 2); + // Update the len and pointers so the generic code can + // finish any leftover pixels + len &= ~3; + x1 += len; + out += mOutstep * len; + in += mInstep * len; + } +#if defined(ARCH_ARM64_USE_INTRINSICS) + else { + if (mLastKey.u.inType == RS_TYPE_FLOAT_32 || + mLastKey.u.outType == RS_TYPE_FLOAT_32) { + // Currently this generates off by one errors. + // rsdIntrinsicColorMatrix_float_K(out, in, len, &mFnTab, tmpFp, tmpFpa); + // x1 += len; + // out += outstep * len; + // in += instep * len; + } else { + rsdIntrinsicColorMatrix_int_K(out, in, len, &mFnTab, mIp, mIpa); + x1 += len; + out += mOutstep * len; + in += mInstep * len; + } + } +#endif + } + + while(x1 != x2) { + One(out, in, mTmpFp, mTmpFpa, vsin, vsout, floatIn, floatOut); + out += mOutstep; + in += mInstep; + x1++; + } + } +} + +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT +void ColorMatrixTask::preLaunch(size_t inVectorSize, int inType, size_t outVectorSize, + int outType) { + if (inType == outType) { + if (outType == RS_TYPE_UNSIGNED_8) { + updateCoeffCache(1.f, 255.f); + } else { + updateCoeffCache(1.f, 1.f); + } + } else { + if (outType == RS_TYPE_UNSIGNED_8) { + updateCoeffCache(255.f, 255.f); + } else { + updateCoeffCache(1.f / 255.f, 1.f); + } + } + + Key_t key = computeKey(inVectorSize, inType, outVectorSize, outType); +#else +void ColorMatrixTask::preLaunch(size_t inVectorSize, size_t outVectorSize) { + updateCoeffCache(1.f, 255.f); + + Key_t key = computeKey(inVectorSize, outVectorSize); +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + +#if defined(ARCH_X86_HAVE_SSSE3) + if ((mOptKernel == nullptr) || (mLastKey.key != key.key)) { + // FIXME: Disable mOptKernel to pass RS color matrix CTS cases + // mOptKernel = + // (void (*)(void *, const void *, const int16_t *, uint32_t)) selectKernel(key); + mLastKey = key; + } + +#else //if !defined(ARCH_X86_HAVE_SSSE3) + if ((mOptKernel == nullptr) || (mLastKey.key != key.key)) { + if (mBuf) munmap(mBuf, mBufSize); + mBuf = nullptr; + mOptKernel = nullptr; + if (build(key)) { + mOptKernel = (void (*)(void *, const void *, const int16_t *, uint32_t)) mBuf; + } +#if defined(ARCH_ARM64_USE_INTRINSICS) + else { + int dt = key.u.outVecSize + (key.u.outType == RS_TYPE_FLOAT_32 ? 4 : 0); + int st = key.u.inVecSize + (key.u.inType == RS_TYPE_FLOAT_32 ? 4 : 0); + uint32_t mm = 0; + int i; + for (i = 0; i < 4; i++) + { + uint32_t m = (key.u.coeffMask >> i) & 0x1111; + m = ((m * 0x249) >> 9) & 15; + m |= ((key.u.addMask >> i) & 1) << 4; + mm |= m << (i * 5); + } + + if (key.u.inType == RS_TYPE_FLOAT_32 || key.u.outType == RS_TYPE_FLOAT_32) { + rsdIntrinsicColorMatrixSetup_float_K(&mFnTab, mm, dt, st); + } else { + rsdIntrinsicColorMatrixSetup_int_K(&mFnTab, mm, dt, st); + } + } +#endif + mLastKey = key; + } +#endif //if !defined(ARCH_X86_HAVE_SSSE3) +} + +void ColorMatrixTask::processData(int /* threadIndex */, size_t startX, size_t startY, size_t endX, + size_t endY) { + for (size_t y = startY; y < endY; y++) { + size_t offset = mSizeX * y + startX; + uchar* in = ((uchar*)mIn) + offset * paddedSize(mInputVectorSize); + uchar* out = ((uchar*)mOut) + offset * paddedSize(mVectorSize); + kernel(out, in, startX, endX); + } +} + +static const float fourZeroes[]{0.0f, 0.0f, 0.0f, 0.0f}; + +void RenderScriptToolkit::colorMatrix(const void* in, void* out, size_t inputVectorSize, + size_t outputVectorSize, size_t sizeX, size_t sizeY, + const float* matrix, const float* addVector, + const Restriction* restriction) { +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE + if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) { + return; + } + if (inputVectorSize < 1 || inputVectorSize > 4) { + ALOGE("The inputVectorSize should be between 1 and 4. %zu provided.", inputVectorSize); + return; + } + if (outputVectorSize < 1 || outputVectorSize > 4) { + ALOGE("The outputVectorSize should be between 1 and 4. %zu provided.", outputVectorSize); + return; + } +#endif + + if (addVector == nullptr) { + addVector = fourZeroes; + } + ColorMatrixTask task(in, out, inputVectorSize, outputVectorSize, sizeX, sizeY, matrix, + addVector, restriction); + processor->doTask(&task); +} + +} // namespace renderscript +} // namespace android diff --git a/toolkit/ColorMatrix_advsimd.S b/toolkit/ColorMatrix_advsimd.S new file mode 100644 index 00000000..55b00298 --- /dev/null +++ b/toolkit/ColorMatrix_advsimd.S @@ -0,0 +1,1277 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: +#define END(f) .size f, .-f; + + +.macro vmxx_f32 i, mask, opd, opa, opb + .if (\i) & \mask + .if (\i) & (\mask - 1) + fmla \opd, \opa, \opb + .else + fmul \opd, \opa, \opb + .endif + .endif +.endm + +.macro vadd_f32 i, mask, opd, opa, opb, stupidsyntax1, stupidsyntax2 + .if (\i) & \mask + .if (\i) & (\mask - 1) + fadd \opd, \opa, \opb + .else + mov \stupidsyntax1, \stupidsyntax2 + .endif + .endif +.endm + +.macro vmxx_s16 i, mask, opd, opa, opb + .if (\i) & \mask + .if (\i) & (\mask - 1 + 16) + smlal \opd, \opa, \opb + .else + smull \opd, \opa, \opb + .endif + .endif +.endm + +.macro vmxx2_s16 i, mask, opd, opa, opb + .if (\i) & \mask + .if (\i) & (\mask - 1 + 16) + smlal2 \opd, \opa, \opb + .else + smull2 \opd, \opa, \opb + .endif + .endif +.endm + +/* x0 = dst + * x1 = src + * x2 = count + * x3 = params + * x4 = column0_fn + * x5 = column1_fn + * x6 = column2_fn + * x7 = column3_fn + * x8 = store_fn + * x9 = load_fn + */ +.irp i, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + +.align 6 +colormatrix_int_col0_\i: + .if \i & 16 + dup v6.4s, v4.s[0] + dup v7.4s, v4.s[0] + .endif + vmxx_s16 \i, 1, v6.4s, v12.4h, v0.h[0] + vmxx_s16 \i, 2, v6.4s, v13.4h, v0.h[4] + vmxx_s16 \i, 4, v6.4s, v14.4h, v1.h[0] + vmxx_s16 \i, 8, v6.4s, v15.4h, v1.h[4] + vmxx2_s16 \i, 1, v7.4s, v12.8h, v0.h[0] + vmxx2_s16 \i, 2, v7.4s, v13.8h, v0.h[4] + vmxx2_s16 \i, 4, v7.4s, v14.8h, v1.h[0] + vmxx2_s16 \i, 8, v7.4s, v15.8h, v1.h[4] + sqshrun v8.4h, v6.4s, #8 + sqshrun2 v8.8h, v7.4s, #8 + br x5 + +colormatrix_int_col0_n\i: + .if (\i^31) & 16 + dup v6.4s, v4.s[0] + dup v7.4s, v4.s[0] + .endif + vmxx_s16 \i^31, 1, v6.4s, v12.4h, v0.h[0] + vmxx_s16 \i^31, 2, v6.4s, v13.4h, v0.h[4] + vmxx_s16 \i^31, 4, v6.4s, v14.4h, v1.h[0] + vmxx_s16 \i^31, 8, v6.4s, v15.4h, v1.h[4] + vmxx2_s16 \i^31, 1, v7.4s, v12.8h, v0.h[0] + vmxx2_s16 \i^31, 2, v7.4s, v13.8h, v0.h[4] + vmxx2_s16 \i^31, 4, v7.4s, v14.8h, v1.h[0] + vmxx2_s16 \i^31, 8, v7.4s, v15.8h, v1.h[4] + sqshrun v8.4h, v6.4s, #8 + sqshrun2 v8.8h, v7.4s, #8 + br x5 + +.align 6 +colormatrix_int_col1_\i: + .if \i & 16 + dup v6.4s, v4.s[1] + dup v7.4s, v4.s[1] + .endif + vmxx_s16 \i, 1, v6.4s, v12.4h, v0.h[1] + vmxx_s16 \i, 2, v6.4s, v13.4h, v0.h[5] + vmxx_s16 \i, 4, v6.4s, v14.4h, v1.h[1] + vmxx_s16 \i, 8, v6.4s, v15.4h, v1.h[5] + vmxx2_s16 \i, 1, v7.4s, v12.8h, v0.h[1] + vmxx2_s16 \i, 2, v7.4s, v13.8h, v0.h[5] + vmxx2_s16 \i, 4, v7.4s, v14.8h, v1.h[1] + vmxx2_s16 \i, 8, v7.4s, v15.8h, v1.h[5] + sqshrun v9.4h, v6.4s, #8 + sqshrun2 v9.8h, v7.4s, #8 + br x6 + +colormatrix_int_col1_n\i: + .if (\i^31) & 16 + dup v6.4s, v4.s[1] + dup v7.4s, v4.s[1] + .endif + vmxx_s16 \i^31, 1, v6.4s, v12.4h, v0.h[1] + vmxx_s16 \i^31, 2, v6.4s, v13.4h, v0.h[5] + vmxx_s16 \i^31, 4, v6.4s, v14.4h, v1.h[1] + vmxx_s16 \i^31, 8, v6.4s, v15.4h, v1.h[5] + vmxx2_s16 \i^31, 1, v7.4s, v12.8h, v0.h[1] + vmxx2_s16 \i^31, 2, v7.4s, v13.8h, v0.h[5] + vmxx2_s16 \i^31, 4, v7.4s, v14.8h, v1.h[1] + vmxx2_s16 \i^31, 8, v7.4s, v15.8h, v1.h[5] + sqshrun v9.4h, v6.4s, #8 + sqshrun2 v9.8h, v7.4s, #8 + br x6 + +.align 6 +colormatrix_int_col2_\i: + .if \i & 16 + dup v6.4s, v4.s[2] + dup v7.4s, v4.s[2] + .endif + vmxx_s16 \i, 1, v6.4s, v12.4h, v0.h[2] + vmxx_s16 \i, 2, v6.4s, v13.4h, v0.h[6] + vmxx_s16 \i, 4, v6.4s, v14.4h, v1.h[2] + vmxx_s16 \i, 8, v6.4s, v15.4h, v1.h[6] + vmxx2_s16 \i, 1, v7.4s, v12.8h, v0.h[2] + vmxx2_s16 \i, 2, v7.4s, v13.8h, v0.h[6] + vmxx2_s16 \i, 4, v7.4s, v14.8h, v1.h[2] + vmxx2_s16 \i, 8, v7.4s, v15.8h, v1.h[6] + sqshrun v10.4h, v6.4s, #8 + sqshrun2 v10.8h, v7.4s, #8 + br x7 + +colormatrix_int_col2_n\i: + .if (\i^31) & 16 + dup v6.4s, v4.s[2] + dup v7.4s, v4.s[2] + .endif + vmxx_s16 \i^31, 1, v6.4s, v12.4h, v0.h[2] + vmxx_s16 \i^31, 2, v6.4s, v13.4h, v0.h[6] + vmxx_s16 \i^31, 4, v6.4s, v14.4h, v1.h[2] + vmxx_s16 \i^31, 8, v6.4s, v15.4h, v1.h[6] + vmxx2_s16 \i^31, 1, v7.4s, v12.8h, v0.h[2] + vmxx2_s16 \i^31, 2, v7.4s, v13.8h, v0.h[6] + vmxx2_s16 \i^31, 4, v7.4s, v14.8h, v1.h[2] + vmxx2_s16 \i^31, 8, v7.4s, v15.8h, v1.h[6] + sqshrun v10.4h, v6.4s, #8 + sqshrun2 v10.8h, v7.4s, #8 + br x7 + +.align 6 +colormatrix_int_col3_\i: + .if \i & 16 + dup v6.4s, v4.s[3] + dup v7.4s, v4.s[3] + .endif + vmxx_s16 \i, 1, v6.4s, v12.4h, v0.h[3] + vmxx_s16 \i, 2, v6.4s, v13.4h, v0.h[7] + vmxx_s16 \i, 4, v6.4s, v14.4h, v1.h[3] + vmxx_s16 \i, 8, v6.4s, v15.4h, v1.h[7] + vmxx2_s16 \i, 1, v7.4s, v12.8h, v0.h[3] + vmxx2_s16 \i, 2, v7.4s, v13.8h, v0.h[7] + vmxx2_s16 \i, 4, v7.4s, v14.8h, v1.h[3] + vmxx2_s16 \i, 8, v7.4s, v15.8h, v1.h[7] + sqshrun v11.4h, v6.4s, #8 + sqshrun2 v11.8h, v7.4s, #8 + br x8 + +colormatrix_int_col3_n\i: + .if (\i^31) & 16 + dup v6.4s, v4.s[3] + dup v7.4s, v4.s[3] + .endif + vmxx_s16 \i^31, 1, v6.4s, v12.4h, v0.h[3] + vmxx_s16 \i^31, 2, v6.4s, v13.4h, v0.h[7] + vmxx_s16 \i^31, 4, v6.4s, v14.4h, v1.h[3] + vmxx_s16 \i^31, 8, v6.4s, v15.4h, v1.h[7] + vmxx2_s16 \i^31, 1, v7.4s, v12.8h, v0.h[3] + vmxx2_s16 \i^31, 2, v7.4s, v13.8h, v0.h[7] + vmxx2_s16 \i^31, 4, v7.4s, v14.8h, v1.h[3] + vmxx2_s16 \i^31, 8, v7.4s, v15.8h, v1.h[7] + sqshrun v11.4h, v6.4s, #8 + sqshrun2 v11.8h, v7.4s, #8 + br x8 + +.align 5 +colormatrix_float_col0_\i: + vmxx_f32 \i, 1, v8.4s, v12.4s, v0.s[0] + vmxx_f32 \i, 2, v8.4s, v13.4s, v1.s[0] + vmxx_f32 \i, 4, v8.4s, v14.4s, v2.s[0] + vmxx_f32 \i, 8, v8.4s, v15.4s, v3.s[0] + vadd_f32 \i, 16, v8.4s, v8.4s, v4.4s, v8.16b, v4.16b + vmxx_f32 \i, 1, v16.4s, v20.4s, v0.s[0] + vmxx_f32 \i, 2, v16.4s, v21.4s, v1.s[0] + vmxx_f32 \i, 4, v16.4s, v22.4s, v2.s[0] + vmxx_f32 \i, 8, v16.4s, v23.4s, v3.s[0] + vadd_f32 \i, 16, v16.4s, v16.4s, v4.4s, v16.16b, v4.16b + br x5 + +.align 4 +colormatrix_float_col0_n\i: + vmxx_f32 \i^31, 1, v8.4s, v12.4s, v0.s[0] + vmxx_f32 \i^31, 2, v8.4s, v13.4s, v1.s[0] + vmxx_f32 \i^31, 4, v8.4s, v14.4s, v2.s[0] + vmxx_f32 \i^31, 8, v8.4s, v15.4s, v3.s[0] + vadd_f32 \i^31, 16, v8.4s, v8.4s, v4.4s, v8.16b, v4.16b + vmxx_f32 \i^31, 1, v16.4s, v20.4s, v0.s[0] + vmxx_f32 \i^31, 2, v16.4s, v21.4s, v1.s[0] + vmxx_f32 \i^31, 4, v16.4s, v22.4s, v2.s[0] + vmxx_f32 \i^31, 8, v16.4s, v23.4s, v3.s[0] + vadd_f32 \i^31, 16, v16.4s, v16.4s, v4.4s, v16.16b, v4.16b + br x5 + +.align 5 +colormatrix_float_col1_\i: + vmxx_f32 \i, 1, v9.4s, v12.4s, v0.s[1] + vmxx_f32 \i, 2, v9.4s, v13.4s, v1.s[1] + vmxx_f32 \i, 4, v9.4s, v14.4s, v2.s[1] + vmxx_f32 \i, 8, v9.4s, v15.4s, v3.s[1] + vadd_f32 \i, 16, v9.4s, v9.4s, v5.4s, v9.16b, v5.16b + vmxx_f32 \i, 1, v17.4s, v20.4s, v0.s[1] + vmxx_f32 \i, 2, v17.4s, v21.4s, v1.s[1] + vmxx_f32 \i, 4, v17.4s, v22.4s, v2.s[1] + vmxx_f32 \i, 8, v17.4s, v23.4s, v3.s[1] + vadd_f32 \i, 16, v17.4s, v17.4s, v5.4s, v17.16b, v5.16b + br x6 + +.align 4 +colormatrix_float_col1_n\i: + vmxx_f32 \i^31, 1, v9.4s, v12.4s, v0.s[1] + vmxx_f32 \i^31, 2, v9.4s, v13.4s, v1.s[1] + vmxx_f32 \i^31, 4, v9.4s, v14.4s, v2.s[1] + vmxx_f32 \i^31, 8, v9.4s, v15.4s, v3.s[1] + vadd_f32 \i^31, 16, v9.4s, v9.4s, v5.4s, v9.16b, v5.16b + vmxx_f32 \i^31, 1, v17.4s, v20.4s, v0.s[1] + vmxx_f32 \i^31, 2, v17.4s, v21.4s, v1.s[1] + vmxx_f32 \i^31, 4, v17.4s, v22.4s, v2.s[1] + vmxx_f32 \i^31, 8, v17.4s, v23.4s, v3.s[1] + vadd_f32 \i^31, 16, v17.4s, v17.4s, v5.4s, v17.16b, v5.16b + br x6 + +.align 5 +colormatrix_float_col2_\i: + vmxx_f32 \i, 1, v10.4s, v12.4s, v0.s[2] + vmxx_f32 \i, 2, v10.4s, v13.4s, v1.s[2] + vmxx_f32 \i, 4, v10.4s, v14.4s, v2.s[2] + vmxx_f32 \i, 8, v10.4s, v15.4s, v3.s[2] + vadd_f32 \i, 16, v10.4s, v10.4s, v6.4s, v10.16b, v6.16b + vmxx_f32 \i, 1, v18.4s, v20.4s, v0.s[2] + vmxx_f32 \i, 2, v18.4s, v21.4s, v1.s[2] + vmxx_f32 \i, 4, v18.4s, v22.4s, v2.s[2] + vmxx_f32 \i, 8, v18.4s, v23.4s, v3.s[2] + vadd_f32 \i, 16, v18.4s, v18.4s, v6.4s, v18.16b, v6.16b + br x7 + +.align 4 +colormatrix_float_col2_n\i: + vmxx_f32 \i^31, 1, v10.4s, v12.4s, v0.s[2] + vmxx_f32 \i^31, 2, v10.4s, v13.4s, v1.s[2] + vmxx_f32 \i^31, 4, v10.4s, v14.4s, v2.s[2] + vmxx_f32 \i^31, 8, v10.4s, v15.4s, v3.s[2] + vadd_f32 \i^31, 16, v10.4s, v10.4s, v6.4s, v10.16b, v6.16b + vmxx_f32 \i^31, 1, v18.4s, v20.4s, v0.s[2] + vmxx_f32 \i^31, 2, v18.4s, v21.4s, v1.s[2] + vmxx_f32 \i^31, 4, v18.4s, v22.4s, v2.s[2] + vmxx_f32 \i^31, 8, v18.4s, v23.4s, v3.s[2] + vadd_f32 \i^31, 16, v18.4s, v18.4s, v6.4s, v18.16b, v6.16b + br x7 + +.align 5 +colormatrix_float_col3_\i: + vmxx_f32 \i, 1, v11.4s, v12.4s, v0.s[3] + vmxx_f32 \i, 2, v11.4s, v13.4s, v1.s[3] + vmxx_f32 \i, 4, v11.4s, v14.4s, v2.s[3] + vmxx_f32 \i, 8, v11.4s, v15.4s, v3.s[3] + vadd_f32 \i, 16, v11.4s, v11.4s, v7.4s, v11.16b, v7.16b + vmxx_f32 \i, 1, v19.4s, v20.4s, v0.s[3] + vmxx_f32 \i, 2, v19.4s, v21.4s, v1.s[3] + vmxx_f32 \i, 4, v19.4s, v22.4s, v2.s[3] + vmxx_f32 \i, 8, v19.4s, v23.4s, v3.s[3] + vadd_f32 \i, 16, v19.4s, v19.4s, v7.4s, v19.16b, v7.16b + br x8 + +.align 4 +colormatrix_float_col3_n\i: + vmxx_f32 \i^31, 1, v11.4s, v12.4s, v0.s[3] + vmxx_f32 \i^31, 2, v11.4s, v13.4s, v1.s[3] + vmxx_f32 \i^31, 4, v11.4s, v14.4s, v2.s[3] + vmxx_f32 \i^31, 8, v11.4s, v15.4s, v3.s[3] + vadd_f32 \i^31, 16, v11.4s, v11.4s, v7.4s, v11.16b, v7.16b + vmxx_f32 \i^31, 1, v19.4s, v20.4s, v0.s[3] + vmxx_f32 \i^31, 2, v19.4s, v21.4s, v1.s[3] + vmxx_f32 \i^31, 4, v19.4s, v22.4s, v2.s[3] + vmxx_f32 \i^31, 8, v19.4s, v23.4s, v3.s[3] + vadd_f32 \i^31, 16, v19.4s, v19.4s, v7.4s, v19.16b, v7.16b + br x8 + +.endr + +.align 6 +colormatrix_float_ldu4: + ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [x1], #32 + uxtl v20.8h, v20.8b + uxtl v21.8h, v21.8b + uxtl v22.8h, v22.8b + uxtl v23.8h, v23.8b + uxtl v12.4s, v20.4h + uxtl v13.4s, v21.4h + uxtl v14.4s, v22.4h + uxtl v15.4s, v23.4h + uxtl2 v20.4s, v20.8h + uxtl2 v21.4s, v21.8h + uxtl2 v22.4s, v22.8h + uxtl2 v23.4s, v23.8h + ucvtf v12.4s, v12.4s + ucvtf v13.4s, v13.4s + ucvtf v14.4s, v14.4s + ucvtf v15.4s, v15.4s + ucvtf v20.4s, v20.4s + ucvtf v21.4s, v21.4s + ucvtf v22.4s, v22.4s + ucvtf v23.4s, v23.4s + br x4 + +.align 5 +colormatrix_int_ldu4: + ld4 {v12.8b,v13.8b,v14.8b,v15.8b}, [x1], #32 + uxtl v12.8h, v12.8b + uxtl v13.8h, v13.8b + uxtl v14.8h, v14.8b + uxtl v15.8h, v15.8b + br x4 + +.align 6 +colormatrix_float_ldu3: + ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [x1], #32 + uxtl v20.8h, v20.8b + uxtl v21.8h, v21.8b + uxtl v22.8h, v22.8b + uxtl v12.4s, v20.4h + uxtl v13.4s, v21.4h + uxtl v14.4s, v22.4h + uxtl2 v20.4s, v20.8h + uxtl2 v21.4s, v21.8h + uxtl2 v22.4s, v22.8h + ucvtf v12.4s, v12.4s + ucvtf v13.4s, v13.4s + ucvtf v14.4s, v14.4s + ucvtf v20.4s, v20.4s + ucvtf v21.4s, v21.4s + ucvtf v22.4s, v22.4s + br x4 + +colormatrix_int_ldu3: + ld4 {v12.8b,v13.8b,v14.8b,v15.8b}, [x1], #32 + uxtl v12.8h, v12.8b + uxtl v13.8h, v13.8b + uxtl v14.8h, v14.8b + br x4 + +.align 5 +colormatrix_float_ldu1: + ld1 {v20.8b}, [x1], #8 + uxtl v20.8h, v20.8b + uxtl v12.4s, v20.4h + uxtl2 v20.4s, v20.8h + ucvtf v12.4s, v12.4s + ucvtf v20.4s, v20.4s + br x4 + +.align 6 +colormatrix_float_ldu2: + ld2 {v20.8b,v21.8b}, [x1], #16 + uxtl v20.8h, v20.8b + uxtl v21.8h, v21.8b + uxtl v12.4s, v20.4h + uxtl v13.4s, v21.4h + uxtl2 v20.4s, v20.8h + uxtl2 v21.4s, v21.8h + ucvtf v12.4s, v12.4s + ucvtf v13.4s, v13.4s + ucvtf v20.4s, v20.4s + ucvtf v21.4s, v21.4s + br x4 + +.align 4 +colormatrix_int_ldu2: + ld2 {v12.8b,v13.8b}, [x1], #16 + uxtl v12.8h, v12.8b + uxtl v13.8h, v13.8b + br x4 + +.align 6 +colormatrix_float_stu4: + fcvtzs v24.4s, v8.4s, #1 + fcvtzs v25.4s, v9.4s, #1 + fcvtzs v26.4s, v10.4s, #1 + fcvtzs v27.4s, v11.4s, #1 + fcvtzs v28.4s, v16.4s, #1 + fcvtzs v29.4s, v17.4s, #1 + fcvtzs v30.4s, v18.4s, #1 + fcvtzs v31.4s, v19.4s, #1 + sqrshrun v24.4h, v24.4s, #1 + sqrshrun v25.4h, v25.4s, #1 + sqrshrun v26.4h, v26.4s, #1 + sqrshrun v27.4h, v27.4s, #1 + sqrshrun2 v24.8h, v28.4s, #1 + sqrshrun2 v25.8h, v29.4s, #1 + sqrshrun2 v26.8h, v30.4s, #1 + sqrshrun2 v27.8h, v31.4s, #1 + uqxtn v24.8b, v24.8h + uqxtn v25.8b, v25.8h + uqxtn v26.8b, v26.8h + uqxtn v27.8b, v27.8h + subs x2, x2, #8 + st4 {v24.8b,v25.8b,v26.8b,v27.8b}, [x0], #32 + blo colormatrix_float_end + br x9 + +.align 5 +colormatrix_int_stu4: + uqxtn v12.8b, v8.8h + uqxtn v13.8b, v9.8h + uqxtn v14.8b, v10.8h + uqxtn v15.8b, v11.8h + subs x2, x2, #8 + st4 {v12.8b,v13.8b,v14.8b,v15.8b}, [x0], #32 + blo colormatrix_int_end + br x9 + +.align 6 +colormatrix_float_stu3: + fcvtzs v24.4s, v8.4s, #1 + fcvtzs v25.4s, v9.4s, #1 + fcvtzs v26.4s, v10.4s, #1 + fcvtzs v28.4s, v16.4s, #1 + fcvtzs v29.4s, v17.4s, #1 + fcvtzs v30.4s, v18.4s, #1 + sqrshrun v24.4h, v24.4s, #1 + sqrshrun v25.4h, v25.4s, #1 + sqrshrun v26.4h, v26.4s, #1 + sqrshrun2 v24.8h, v28.4s, #1 + sqrshrun2 v25.8h, v29.4s, #1 + sqrshrun2 v26.8h, v30.4s, #1 + uqxtn v24.8b, v24.8h + uqxtn v25.8b, v25.8h + uqxtn v26.8b, v26.8h + movi v27.8b, #0 + subs x2, x2, #8 + st4 {v24.8b,v25.8b,v26.8b,v27.8b}, [x0], #32 + blo colormatrix_float_end + br x9 + +.align 4 +colormatrix_int_ldu1: + ld1 {v12.8b}, [x1], #8 + uxtl v12.8h, v12.8b + br x4 + +.align 5 +colormatrix_int_stu3: + uqxtn v12.8b, v8.8h + uqxtn v13.8b, v9.8h + uqxtn v14.8b, v10.8h + movi v15.8b, #0 + subs x2, x2, #8 + st4 {v12.8b,v13.8b,v14.8b,v15.8b}, [x0], #32 + blo colormatrix_int_end + br x9 + +.align 6 +colormatrix_float_stu2: + fcvtzs v24.4s, v8.4s, #1 + fcvtzs v25.4s, v9.4s, #1 + fcvtzs v28.4s, v16.4s, #1 + fcvtzs v29.4s, v17.4s, #1 + sqrshrun v24.4h, v24.4s, #1 + sqrshrun v25.4h, v25.4s, #1 + sqrshrun2 v24.8h, v28.4s, #1 + sqrshrun2 v25.8h, v29.4s, #1 + uqxtn v24.8b, v24.8h + uqxtn v25.8b, v25.8h + subs x2, x2, #8 + st2 {v24.8b,v25.8b}, [x0], #16 + blo colormatrix_float_end + br x9 + +.align 5 +colormatrix_int_stu2: + uqxtn v12.8b, v8.8h + uqxtn v13.8b, v9.8h + subs x2, x2, #8 + st2 {v12.8b,v13.8b}, [x0], #16 + blo colormatrix_int_end + br x9 + +.align 5 +colormatrix_int_stu1: + uqxtn v12.8b, v8.8h + subs x2, x2, #8 + st1 {v12.8b}, [x0], #8 + blo colormatrix_int_end + br x9 + +colormatrix_float_ldf3: + ld4 {v12.4s,v13.4s,v14.4s,v15.4s}, [x1], #64 + ld4 {v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64 + br x4 + +.align 6 +colormatrix_float_stu1: + fcvtzs v24.4s, v8.4s, #1 + fcvtzs v28.4s, v16.4s, #1 + sqrshrun v24.4h, v24.4s, #1 + sqrshrun2 v24.8h, v28.4s, #1 + uqxtn v24.8b, v24.8h + subs x2, x2, #8 + st1 {v24.8b}, [x0], #8 + blo colormatrix_float_end + br x9 + +colormatrix_float_stf3: + movi v11.16b, #0 + st4 {v8.4s,v9.4s,v10.4s,v11.4s}, [x0], #64 + movi v19.16b, #0 + subs x2, x2, #8 + st4 {v16.4s,v17.4s,v18.4s,v19.4s}, [x0], #64 + blo colormatrix_float_end + br x9 + +.align 5 +colormatrix_float_stf4: + st4 {v8.4s,v9.4s,v10.4s,v11.4s}, [x0], #64 + subs x2, x2, #8 + st4 {v16.4s,v17.4s,v18.4s,v19.4s}, [x0], #64 + blo colormatrix_float_end + br x9 + +colormatrix_float_ldf4: + ld4 {v12.4s,v13.4s,v14.4s,v15.4s}, [x1], #64 + ld4 {v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64 + br x4 + +.align 5 +colormatrix_float_stf2: + st2 {v8.4s, v9.4s}, [x0], #32 + subs x2, x2, #8 + st2 {v16.4s, v17.4s}, [x0], #32 + blo colormatrix_float_end + br x9 + +colormatrix_float_ldf2: + ld2 {v12.4s,v13.4s}, [x1], #32 + ld2 {v20.4s,v21.4s}, [x1], #32 + br x4 + +.align 5 +colormatrix_float_stf1: + st1 {v8.4s}, [x0], #16 + subs x2, x2, #8 + st1 {v16.4s}, [x0], #16 + blo colormatrix_float_end + br x9 + +colormatrix_float_ldf1: + ld1 {v12.4s}, [x1], #16 + ld1 {v20.4s}, [x1], #16 + br x4 + +colormatrix_int_stu1_end: + uqxtn v12.8b, v8.8h + tbz x2, #2, 1f + st1 {v12.s}[1], [x0], #4 +1: tbz x2, #1, 1f + st1 {v12.h}[1], [x0], #2 +1: tbz x2, #0, 1f + st1 {v12.b}[1], [x0], #1 +1: b colormatrix_int_realend + +colormatrix_int_stu2_end: + uqxtn v12.8b, v8.8h + uqxtn v13.8b, v9.8h + zip1 v12.16b, v12.16b, v13.16b + tbz x2, #2, 1f + st1 {v12.d}[1], [x0], #8 +1: tbz x2, #1, 1f + st1 {v12.s}[1], [x0], #4 +1: tbz x2, #0, 1f + st1 {v12.h}[1], [x0], #2 +1: b colormatrix_int_realend + +colormatrix_int_stu3_end: + uqxtn v12.8b, v8.8h + uqxtn v13.8b, v9.8h + uqxtn v14.8b, v10.8h + movi v15.8b, #0 + tbz x2, #2, 1f + st4 {v12.b,v13.b,v14.b,v15.b}[4], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[5], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[6], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[7], [x0], #4 +1: tbz x2, #1, 1f + st4 {v12.b,v13.b,v14.b,v15.b}[2], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[3], [x0], #4 +1: tbz x2, #0, 1f + st4 {v12.b,v13.b,v14.b,v15.b}[1], [x0], #4 +1: b colormatrix_int_realend + +colormatrix_int_stu4_end: + uqxtn v12.8b, v8.8h + uqxtn v13.8b, v9.8h + uqxtn v14.8b, v10.8h + uqxtn v15.8b, v11.8h + tbz x2, #2, 1f + st4 {v12.b,v13.b,v14.b,v15.b}[4], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[5], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[6], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[7], [x0], #4 +1: tbz x2, #1, 1f + st4 {v12.b,v13.b,v14.b,v15.b}[2], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[3], [x0], #4 +1: tbz x2, #0, 1f + st4 {v12.b,v13.b,v14.b,v15.b}[1], [x0], #4 +1: b colormatrix_int_realend + + +colormatrix_int_ldu1_end: + tbz x2, #2, 1f + ld1 {v15.s}[3], [x1], #4 +1: tbz x2, #1, 1f + ld1 {v15.h}[5], [x1], #2 +1: tbz x2, #0, 1f + ld1 {v15.b}[9], [x1], #1 +1: uxtl2 v12.8h, v15.16b + br x4 + +colormatrix_int_ldu2_end: + tbz x2, #2, 1f + ld1 {v15.d}[1], [x1], #8 +1: tbz x2, #1, 1f + ld1 {v15.s}[1], [x1], #4 +1: tbz x2, #0, 1f + ld1 {v15.h}[1], [x1], #2 +1: uzp1 v14.16b, v15.16b, v15.16b + uzp2 v15.16b, v15.16b, v15.16b + uxtl v12.8h, v14.8b + uxtl v13.8h, v15.8b + br x4 + +colormatrix_int_ldu3_end: + tbz x2, #2, 1f + ld4 {v12.b,v13.b,v14.b,v15.b}[4], [x1], #4 + ld4 {v12.b,v13.b,v14.b,v15.b}[5], [x1], #4 + ld4 {v12.b,v13.b,v14.b,v15.b}[6], [x1], #4 + ld4 {v12.b,v13.b,v14.b,v15.b}[7], [x1], #4 +1: tbz x2, #1, 1f + ld4 {v12.b,v13.b,v14.b,v15.b}[2], [x1], #4 + ld4 {v12.b,v13.b,v14.b,v15.b}[3], [x1], #4 +1: tbz x2, #0, 1f + ld4 {v12.b,v13.b,v14.b,v15.b}[1], [x1], #4 +1: uxtl v12.8h, v12.8b + uxtl v13.8h, v13.8b + uxtl v14.8h, v14.8b + br x4 + +colormatrix_int_ldu4_end: + tbz x2, #2, 1f + ld4 {v12.b,v13.b,v14.b,v15.b}[4], [x1], #4 + ld4 {v12.b,v13.b,v14.b,v15.b}[5], [x1], #4 + ld4 {v12.b,v13.b,v14.b,v15.b}[6], [x1], #4 + ld4 {v12.b,v13.b,v14.b,v15.b}[7], [x1], #4 +1: tbz x2, #1, 1f + ld4 {v12.b,v13.b,v14.b,v15.b}[2], [x1], #4 + ld4 {v12.b,v13.b,v14.b,v15.b}[3], [x1], #4 +1: tbz x2, #0, 1f + ld4 {v12.b,v13.b,v14.b,v15.b}[1], [x1], #4 +1: uxtl v12.8h, v12.8b + uxtl v13.8h, v13.8b + uxtl v14.8h, v14.8b + uxtl v15.8h, v15.8b + br x4 + +colormatrix_float_stu1_end: + fcvtzs v12.4s, v8.4s, #1 + fcvtzs v13.4s, v16.4s, #1 + sqrshrun v12.4h, v12.4s, #1 + sqrshrun2 v12.8h, v13.4s, #1 + uqxtn v12.8b, v12.8h + tbz x2, #2, 1f + st1 {v12.s}[1], [x0], #4 +1: tbz x2, #1, 1f + st1 {v12.h}[1], [x0], #2 +1: tbz x2, #0, 1f + st1 {v12.b}[1], [x0], #1 +1: b colormatrix_float_realend + +colormatrix_float_stu2_end: + fcvtzs v12.4s, v8.4s, #1 + fcvtzs v13.4s, v9.4s, #1 + fcvtzs v14.4s, v16.4s, #1 + fcvtzs v15.4s, v17.4s, #1 + sqrshrun v12.4h, v12.4s, #1 + sqrshrun v13.4h, v13.4s, #1 + sqrshrun v14.4h, v14.4s, #1 + sqrshrun v15.4h, v15.4s, #1 + zip1 v12.8h, v12.8h, v13.8h + zip1 v13.8h, v14.8h, v15.8h + uqxtn v12.8b, v12.8h + uqxtn2 v12.16b, v13.8h + tbz x2, #2, 1f + st1 {v12.d}[1], [x0], #8 +1: tbz x2, #1, 1f + st1 {v12.s}[1], [x0], #4 +1: tbz x2, #0, 1f + st1 {v12.h}[1], [x0], #2 +1: b colormatrix_float_realend + +colormatrix_float_stu3_end: + fcvtzs v24.4s, v8.4s, #1 + fcvtzs v25.4s, v9.4s, #1 + fcvtzs v26.4s, v10.4s, #1 + fcvtzs v28.4s, v16.4s, #1 + fcvtzs v29.4s, v17.4s, #1 + fcvtzs v30.4s, v18.4s, #1 + sqrshrun v24.4h, v24.4s, #1 + sqrshrun v25.4h, v25.4s, #1 + sqrshrun v26.4h, v26.4s, #1 + sqrshrun2 v24.8h, v28.4s, #1 + sqrshrun2 v25.8h, v29.4s, #1 + sqrshrun2 v26.8h, v30.4s, #1 + uqxtn v12.8b, v24.8h + uqxtn v13.8b, v25.8h + uqxtn v14.8b, v26.8h + movi v15.8b, #0 + tbz x2, #2, 1f + st4 {v12.b,v13.b,v14.b,v15.b}[4], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[5], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[6], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[7], [x0], #4 +1: tbz x2, #1, 1f + st4 {v12.b,v13.b,v14.b,v15.b}[2], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[3], [x0], #4 +1: tbz x2, #0, 1f + st4 {v12.b,v13.b,v14.b,v15.b}[1], [x0], #4 +1: b colormatrix_float_realend + +colormatrix_float_stu4_end: + fcvtzs v24.4s, v8.4s, #1 + fcvtzs v25.4s, v9.4s, #1 + fcvtzs v26.4s, v10.4s, #1 + fcvtzs v27.4s, v11.4s, #1 + fcvtzs v28.4s, v16.4s, #1 + fcvtzs v29.4s, v17.4s, #1 + fcvtzs v30.4s, v18.4s, #1 + fcvtzs v31.4s, v19.4s, #1 + sqrshrun v24.4h, v24.4s, #1 + sqrshrun v25.4h, v25.4s, #1 + sqrshrun v26.4h, v26.4s, #1 + sqrshrun v27.4h, v27.4s, #1 + sqrshrun2 v24.8h, v28.4s, #1 + sqrshrun2 v25.8h, v29.4s, #1 + sqrshrun2 v26.8h, v30.4s, #1 + sqrshrun2 v27.8h, v31.4s, #1 + uqxtn v12.8b, v24.8h + uqxtn v13.8b, v25.8h + uqxtn v14.8b, v26.8h + uqxtn v15.8b, v27.8h + tbz x2, #2, 1f + st4 {v12.b,v13.b,v14.b,v15.b}[4], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[5], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[6], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[7], [x0], #4 +1: tbz x2, #1, 1f + st4 {v12.b,v13.b,v14.b,v15.b}[2], [x0], #4 + st4 {v12.b,v13.b,v14.b,v15.b}[3], [x0], #4 +1: tbz x2, #0, 1f + st4 {v12.b,v13.b,v14.b,v15.b}[1], [x0], #4 +1: b colormatrix_float_realend + +colormatrix_float_stf1_end: + tbz x2, #2, 1f + st1 {v16.4s}, [x0], #16 +1: tbz x2, #1, 1f + st1 {v8.d}[1], [x0], #8 +1: tbz x2, #0, 1f + st1 {v8.s}[1], [x0], #4 +1: b colormatrix_float_realend + +colormatrix_float_stf2_end: + tbz x2, #2, 1f + st2 {v16.4s, v17.4s}, [x0], #32 +1: tbz x2, #1, 1f + st2 {v8.s,v9.s}[2], [x0], #8 + st2 {v8.s,v9.s}[3], [x0], #8 +1: tbz x2, #0, 1f + st2 {v8.s,v9.s}[1], [x0], #8 +1: b colormatrix_float_realend + +colormatrix_float_stf3_end: + movi v11.16b, #0 + movi v19.16b, #0 +colormatrix_float_stf4_end: + tbz x2, #2, 1f + st4 {v16.4s,v17.4s,v18.4s,v19.4s}, [x0], #64 +1: tbz x2, #1, 1f + st4 {v8.s,v9.s,v10.s,v11.s}[2], [x0], #16 + st4 {v8.s,v9.s,v10.s,v11.s}[3], [x0], #16 +1: tbz x2, #0, 1f + st4 {v8.s,v9.s,v10.s,v11.s}[1], [x0], #16 +1: b colormatrix_float_realend + +colormatrix_float_ldu1_end: + tbz x2, #2, 1f + ld1 {v15.s}[1], [x1], #4 +1: tbz x2, #1, 1f + ld1 {v15.h}[1], [x1], #2 +1: tbz x2, #0, 1f + ld1 {v15.b}[1], [x1], #1 +1: uxtl v15.8h, v15.8b + uxtl v12.4s, v15.4h + uxtl2 v20.4s, v15.8h + ucvtf v12.4s, v12.4s + ucvtf v20.4s, v20.4s + br x4 + +colormatrix_float_ldu2_end: + tbz x2, #2, 1f + ld1 {v15.d}[1], [x1], #8 +1: tbz x2, #1, 1f + ld1 {v15.s}[1], [x1], #4 +1: tbz x2, #0, 1f + ld1 {v15.h}[1], [x1], #2 +1: uxtl v14.8h, v15.8b + uxtl2 v15.8h, v15.16b + uzp1 v12.8h, v14.8h, v14.8h + uzp2 v13.8h, v14.8h, v14.8h + uzp1 v20.8h, v15.8h, v15.8h + uzp2 v21.8h, v15.8h, v15.8h + uxtl v12.4s, v12.4h + uxtl v13.4s, v13.4h + uxtl v20.4s, v20.4h + uxtl v21.4s, v21.4h + ucvtf v12.4s, v12.4s + ucvtf v13.4s, v13.4s + ucvtf v20.4s, v20.4s + ucvtf v21.4s, v21.4s + br x4 + +colormatrix_float_ldu3_end: + tbz x2, #2, 1f + ld4 {v20.b,v21.b,v22.b,v23.b}[4], [x1], #4 + ld4 {v20.b,v21.b,v22.b,v23.b}[5], [x1], #4 + ld4 {v20.b,v21.b,v22.b,v23.b}[6], [x1], #4 + ld4 {v20.b,v21.b,v22.b,v23.b}[7], [x1], #4 +1: tbz x2, #1, 1f + ld4 {v20.b,v21.b,v22.b,v23.b}[2], [x1], #4 + ld4 {v20.b,v21.b,v22.b,v23.b}[3], [x1], #4 +1: tbz x2, #0, 1f + ld4 {v20.b,v21.b,v22.b,v23.b}[1], [x1], #4 +1: uxtl v20.8h, v20.8b + uxtl v21.8h, v21.8b + uxtl v22.8h, v22.8b + uxtl v12.4s, v20.4h + uxtl v13.4s, v21.4h + uxtl v14.4s, v22.4h + uxtl2 v20.4s, v20.8h + uxtl2 v21.4s, v21.8h + uxtl2 v22.4s, v22.8h + ucvtf v12.4s, v12.4s + ucvtf v13.4s, v13.4s + ucvtf v14.4s, v14.4s + ucvtf v20.4s, v20.4s + ucvtf v21.4s, v21.4s + ucvtf v22.4s, v22.4s + br x4 + +colormatrix_float_ldu4_end: + tbz x2, #2, 1f + ld4 {v20.b,v21.b,v22.b,v23.b}[4], [x1], #4 + ld4 {v20.b,v21.b,v22.b,v23.b}[5], [x1], #4 + ld4 {v20.b,v21.b,v22.b,v23.b}[6], [x1], #4 + ld4 {v20.b,v21.b,v22.b,v23.b}[7], [x1], #4 +1: tbz x2, #1, 1f + ld4 {v20.b,v21.b,v22.b,v23.b}[2], [x1], #4 + ld4 {v20.b,v21.b,v22.b,v23.b}[3], [x1], #4 +1: tbz x2, #0, 1f + ld4 {v20.b,v21.b,v22.b,v23.b}[1], [x1], #4 +1: uxtl v20.8h, v20.8b + uxtl v21.8h, v21.8b + uxtl v22.8h, v22.8b + uxtl v23.8h, v23.8b + uxtl v12.4s, v20.4h + uxtl v13.4s, v21.4h + uxtl v14.4s, v22.4h + uxtl v15.4s, v23.4h + uxtl2 v20.4s, v20.8h + uxtl2 v21.4s, v21.8h + uxtl2 v22.4s, v22.8h + uxtl2 v23.4s, v23.8h + ucvtf v12.4s, v12.4s + ucvtf v13.4s, v13.4s + ucvtf v14.4s, v14.4s + ucvtf v15.4s, v15.4s + ucvtf v20.4s, v20.4s + ucvtf v21.4s, v21.4s + ucvtf v22.4s, v22.4s + ucvtf v23.4s, v23.4s + br x4 + +colormatrix_float_ldf1_end: + tbz x2, #2, 1f + ld1 {v20.4s}, [x1], #16 +1: tbz x2, #1, 1f + ld1 {v12.d}[1], [x1], #8 +1: tbz x2, #0, 1f + ld1 {v12.s}[1], [x1], #4 +1: br x4 + +colormatrix_float_ldf2_end: + tbz x2, #2, 1f + ld2 {v20.4s,v21.4s}, [x1], #32 +1: tbz x2, #1, 1f + ld2 {v12.s,v13.s}[2], [x1], #8 + ld2 {v12.s,v13.s}[3], [x1], #8 +1: tbz x2, #0, 1f + ld2 {v12.s,v13.s}[1], [x1], #8 +1: br x4 + +colormatrix_float_ldf3_end: +colormatrix_float_ldf4_end: + tbz x2, #2, 1f + ld4 {v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64 +1: tbz x2, #1, 1f + ld4 {v12.s,v13.s,v14.s,v15.s}[2], [x1], #16 + ld4 {v12.s,v13.s,v14.s,v15.s}[3], [x1], #16 +1: tbz x2, #0, 1f + ld4 {v12.s,v13.s,v14.s,v15.s}[1], [x1], #16 +1: br x4 + +/* void rsdIntrinsicColorMatrix_int_K( + * void *out, // x0 + * void const *in, // x1 + * size_t count, // x2 + * fntab_t const *fns, // x3 + * int16_t const *mult, // x4 + * int32_t const *add); // x5 + */ +ENTRY(rsdIntrinsicColorMatrix_int_K) + sub x7, sp, #32 + sub sp, sp, #64 + st1 {v8.1d-v11.1d}, [sp] + st1 {v12.1d-v15.1d}, [x7] + + ld1 {v0.8h,v1.8h}, [x4], #32 + ld1 {v4.4s}, [x5], #16 + + ldp x4,x5, [x3],#16 + ldp x6,x7, [x3],#16 + ldp x8,x9, [x3],#16 + + dup v12.4s, v4.s[0] + dup v13.4s, v4.s[1] + dup v14.4s, v4.s[2] + dup v15.4s, v4.s[3] + sqshrun v8.4h, v12.4s, #8 + sqshrun2 v8.8h, v12.4s, #8 + sqshrun v9.4h, v13.4s, #8 + sqshrun2 v9.8h, v13.4s, #8 + sqshrun v10.4h, v14.4s, #8 + sqshrun2 v10.8h, v14.4s, #8 + sqshrun v11.4h, v15.4s, #8 + sqshrun2 v11.8h, v15.4s, #8 + + subs x2, x2, #8 + blo colormatrix_int_end + br x9 + +colormatrix_int_end: + adds x2, x2, #8 + bls colormatrix_int_realend + mov x16, x8 + ldp x8, x9, [x3], #16 + cmp x4, x16 + csel x4, x8, x4, eq + cmp x5, x16 + csel x5, x8, x5, eq + cmp x6, x16 + csel x6, x8, x6, eq + cmp x7, x16 + csel x7, x8, x7, eq + br x9 + +colormatrix_int_realend: + ld1 {v8.1d-v11.1d}, [sp], #32 + ld1 {v12.1d-v15.1d}, [sp], #32 + ret +END(rsdIntrinsicColorMatrix_int_K) + +/* void rsdIntrinsicColorMatrixSetup_int_K( + * fntab_t const *fns, // x0 + * uint32_t mask, // x1 + * int dt, // x2 + * int st); // x3 + */ +ENTRY(rsdIntrinsicColorMatrixSetup_int_K) + adrp x7, 2f + add x7, x7, :lo12:2f + add x4, x7, x2, LSL #3 + ldrsw x2, [x4], #4 + ldrsw x4, [x4] + add x2, x2, x7 + add x4, x4, x7 + adrp x7, 3f + add x7, x7, :lo12:3f + add x5, x7, x3, LSL #3 + ldrsw x3, [x5], #4 + ldrsw x5, [x5] + add x3, x3, x7 + add x5, x5, x7 + stp x2, x3, [x0, #32] + stp x4, x5, [x0, #48] + +/* For each column function, if the matrix is all zeroes then write NULL, + * otherwise look up the appropriate function and store that. */ + + mov x3, #4 + adrp x7, 4f + add x7, x7, :lo12:4f +1: ands x2, x1, #15 + beq 9f + and x2, x1, #31 + lsl x2, x2, #4 + ldrsw x2, [x7, x2] + add x2, x2, x7 +9: str x2, [x0], #8 + lsr x1, x1, #5 + add x7, x7, #4 + subs x3, x3, #1 + bne 1b + +/* For every NULL entry, copy the non-NULL entry that follows it, or the store + * function. */ + + ldr x2, [x0] + mov x3, #4 +1: ldr x1, [x0, #-8]! + cmp x1, #0 + csel x2, x1, x2, ne + str x2, [x0] + subs x3, x3, #1 + bne 1b + ret + +END(rsdIntrinsicColorMatrixSetup_int_K) +.rodata + .align 4 +2: .word colormatrix_int_stu1-2b + .word colormatrix_int_stu1_end-2b + .word colormatrix_int_stu2-2b + .word colormatrix_int_stu2_end-2b + .word colormatrix_int_stu3-2b + .word colormatrix_int_stu3_end-2b + .word colormatrix_int_stu4-2b + .word colormatrix_int_stu4_end-2b +3: .word colormatrix_int_ldu1-3b + .word colormatrix_int_ldu1_end-3b + .word colormatrix_int_ldu2-3b + .word colormatrix_int_ldu2_end-3b + .word colormatrix_int_ldu3-3b + .word colormatrix_int_ldu3_end-3b + .word colormatrix_int_ldu4-3b + .word colormatrix_int_ldu4_end-3b +4: +.irp i, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + .word colormatrix_int_col0_\i-4b + .word colormatrix_int_col1_\i-4b-4 + .word colormatrix_int_col2_\i-4b-8 + .word colormatrix_int_col3_\i-4b-12 +.endr +.irp i, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + .word colormatrix_int_col0_n\i-4b + .word colormatrix_int_col1_n\i-4b-4 + .word colormatrix_int_col2_n\i-4b-8 + .word colormatrix_int_col3_n\i-4b-12 +.endr + + +/* void rsdIntrinsicColorMatrix_float_K( + * void *out, // x0 + * void const *in, // x1 + * size_t count, // x2 + * fntab_t const *fns, // x3 + * float const *mult, // x4 + * float const *add); // x5 + */ +ENTRY(rsdIntrinsicColorMatrix_float_K) + sub x7, sp, #32 + sub sp, sp, #64 + st1 {v8.1d-v11.1d}, [sp] + st1 {v12.1d-v15.1d}, [x7] + + ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x4], #64 + ld1r {v4.4s}, [x5], #4 + ld1r {v5.4s}, [x5], #4 + ld1r {v6.4s}, [x5], #4 + ld1r {v7.4s}, [x5], #4 + + ldp x4,x5, [x3], #16 + ldp x6,x7, [x3], #16 + ldp x8,x9, [x3], #16 + + mov v8.16b, v4.16b + mov v9.16b, v5.16b + mov v10.16b, v6.16b + mov v11.16b, v7.16b + + mov v16.16b, v4.16b + mov v17.16b, v5.16b + mov v18.16b, v6.16b + mov v19.16b, v7.16b + + subs x2, x2, #8 + blo colormatrix_float_end + br x9 + +colormatrix_float_end: + adds x2, x2, #8 + bls colormatrix_int_realend + mov x16, x8 + ldp x8,x9, [x3], #16 + cmp x4, x16 + csel x4, x8, x4, eq + cmp x5, x16 + csel x5, x8, x5, eq + cmp x6, x16 + csel x6, x8, x6, eq + cmp x7, x16 + csel x7, x8, x7, eq + br x9 + +colormatrix_float_realend: + ld1 {v8.1d-v11.1d}, [sp], #32 + ld1 {v12.1d-v15.1d}, [sp], #32 + ret +END(rsdIntrinsicColorMatrix_float_K) + +/* void rsdIntrinsicColorMatrixSetup_float_K( + * fntab_t const *fns, // x0 + * uint32_t mask, // x1 + * int dt, // x2 + * int st); // x3 + */ +ENTRY(rsdIntrinsicColorMatrixSetup_float_K) + adrp x7, 2f + add x7, x7, :lo12:2f + add x4, x7, x2, LSL #3 + ldrsw x2, [x4], #4 + ldrsw x4, [x4] + add x2, x2, x7 + add x4, x4, x7 + adrp x7, 3f + add x7, x7, :lo12:3f + add x5, x7, x3, LSL #3 + ldrsw x3, [x5], #4 + ldrsw x5, [x5] + add x3, x3, x7 + add x5, x5, x7 + stp x2, x3, [x0, #32] + stp x4, x5, [x0, #48] + +/* For each column function, if the matrix is all zeroes then write NULL, + * otherwise look up the appropriate function and store that. */ + + mov x3, #4 + adrp x7, 4f + add x7, x7, :lo12:4f +1: ands x2, x1, #15 + beq 9f + and x2, x1, #31 + lsl x2, x2, #4 + ldrsw x2, [x7, x2] + add x2, x2, x7 +9: str x2, [x0], #8 + lsr x1, x1, #5 + add x7, x7, #4 + subs x3, x3, #1 + bne 1b + +/* For every NULL entry, copy the non-NULL entry that follows it, or the store + * function. */ + + ldr x2, [x0] + mov x3, #4 +1: ldr x1, [x0, #-8]! + cmp x1, #0 + csel x2, x1, x2, ne + str x2, [x0] + subs x3, x3, #1 + bne 1b + ret + +END(rsdIntrinsicColorMatrixSetup_float_K) +.rodata + .align 4 +2: .word colormatrix_float_stu1-2b + .word colormatrix_float_stu1_end-2b + .word colormatrix_float_stu2-2b + .word colormatrix_float_stu2_end-2b + .word colormatrix_float_stu3-2b + .word colormatrix_float_stu3_end-2b + .word colormatrix_float_stu4-2b + .word colormatrix_float_stu4_end-2b + .word colormatrix_float_stf1-2b + .word colormatrix_float_stf1_end-2b + .word colormatrix_float_stf2-2b + .word colormatrix_float_stf2_end-2b + .word colormatrix_float_stf3-2b + .word colormatrix_float_stf3_end-2b + .word colormatrix_float_stf4-2b + .word colormatrix_float_stf4_end-2b +3: .word colormatrix_float_ldu1-3b + .word colormatrix_float_ldu1_end-3b + .word colormatrix_float_ldu2-3b + .word colormatrix_float_ldu2_end-3b + .word colormatrix_float_ldu3-3b + .word colormatrix_float_ldu3_end-3b + .word colormatrix_float_ldu4-3b + .word colormatrix_float_ldu4_end-3b + .word colormatrix_float_ldf1-3b + .word colormatrix_float_ldf1_end-3b + .word colormatrix_float_ldf2-3b + .word colormatrix_float_ldf2_end-3b + .word colormatrix_float_ldf3-3b + .word colormatrix_float_ldf3_end-3b + .word colormatrix_float_ldf4-3b + .word colormatrix_float_ldf4_end-3b +4: +.irp i, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + .word colormatrix_float_col0_\i-4b + .word colormatrix_float_col1_\i-4b-4 + .word colormatrix_float_col2_\i-4b-8 + .word colormatrix_float_col3_\i-4b-12 +.endr +.irp i, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + .word colormatrix_float_col0_n\i-4b + .word colormatrix_float_col1_n\i-4b-4 + .word colormatrix_float_col2_n\i-4b-8 + .word colormatrix_float_col3_n\i-4b-12 +.endr diff --git a/toolkit/ColorMatrix_neon.S b/toolkit/ColorMatrix_neon.S new file mode 100644 index 00000000..ecb8c134 --- /dev/null +++ b/toolkit/ColorMatrix_neon.S @@ -0,0 +1,361 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define SNIP_START(x) \ + .globl x; x: + +#define SNIP_END(x) \ + .globl x##_end; x##_end: \ + .globl x##_len; x##_len: \ + .word x##_end-x + +SNIP_START(_N_ColorMatrix_prefix_i) + stmfd sp!, {r4, lr} + vpush {q4-q7} + vld1.16 {q2}, [r2]! + vld1.16 {q3}, [r2]! + vld1.32 {d8[],d9[]}, [r2]! + vld1.32 {d10[],d11[]}, [r2]! + vld1.32 {d12[],d13[]}, [r2]! + vld1.32 {d14[],d15[]}, [r2]! + veor q0, q0 + veor q1, q1 + veor q9, q9 + veor q10, q10 + veor q11, q11 +SNIP_END(_N_ColorMatrix_prefix_i) + +SNIP_START(_N_ColorMatrix_prefix_f) + stmfd sp!, {r4, lr} + vpush {q4-q7} + add r2, #48 + vld1.32 {q4}, [r2]! + vld1.32 {q5}, [r2]! + vld1.32 {q6}, [r2]! + vld1.32 {q7}, [r2]! + vld1.32 {d16[],d17[]}, [r2]! + vld1.32 {d18[],d19[]}, [r2]! + vld1.32 {d20[],d21[]}, [r2]! + vld1.32 {d22[],d23[]}, [r2]! + veor q1, q1 + veor q2, q2 + veor q3, q3 +SNIP_END(_N_ColorMatrix_prefix_f) + +SNIP_START(_N_ColorMatrix_postfix1) + subs r3, r3, #1 + #bne 1b +SNIP_END(_N_ColorMatrix_postfix1) + +SNIP_START(_N_ColorMatrix_postfix2) + + #mov r0, #0 + #ldr r0, [r0] + + #vqadd.s32 q0,q0,q0 + #vadd.f32 q0,q0,q0 + #vmul.f32 q0,q0,d0[0] + #vmla.f32 q0,q0,d0[0] + #vmov q0, q0 + + + vpop {q4-q7} + ldmfd sp!, {r4, lr} + bx lr +SNIP_END(_N_ColorMatrix_postfix2) + +SNIP_START(_N_ColorMatrix_load_u8_4) + vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! + vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! + vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! + vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! +SNIP_END(_N_ColorMatrix_load_u8_4) + +SNIP_START(_N_ColorMatrix_load_u8_3) + vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! + vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! + vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! + vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! + veor d3, d3 +SNIP_END(_N_ColorMatrix_load_u8_3) + +SNIP_START(_N_ColorMatrix_load_u8_2) + vld2.8 {d0[0],d1[0]}, [r1]! + vld2.8 {d0[1],d1[1]}, [r1]! + vld2.8 {d0[2],d1[2]}, [r1]! + vld2.8 {d0[3],d1[3]}, [r1]! + veor d2, d2 + veor d3, d3 +SNIP_END(_N_ColorMatrix_load_u8_2) + +SNIP_START(_N_ColorMatrix_load_u8_1) + vld1.32 {d0[0]}, [r1]! + veor d1, d1 + veor d2, d2 + veor d3, d3 +SNIP_END(_N_ColorMatrix_load_u8_1) + +SNIP_START(_N_ColorMatrix_load_u8f_4) + vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! + vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! + vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! + vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! + vmovl.u8 q3, d3 + vmovl.u8 q2, d2 + vmovl.u8 q1, d1 + vmovl.u8 q0, d0 + vmovl.u16 q3, d6 + vmovl.u16 q2, d4 + vmovl.u16 q1, d2 + vmovl.u16 q0, d0 + vcvt.f32.s32 q3, q3 + vcvt.f32.s32 q2, q2 + vcvt.f32.s32 q1, q1 + vcvt.f32.s32 q0, q0 +SNIP_END(_N_ColorMatrix_load_u8f_4) + +SNIP_START(_N_ColorMatrix_load_u8f_3) + vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! + vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! + vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! + vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! + vmovl.u8 q2, d2 + vmovl.u8 q1, d1 + vmovl.u8 q0, d0 + vmovl.u16 q2, d4 + vmovl.u16 q1, d2 + vmovl.u16 q0, d0 + vcvt.f32.s32 q2, q2 + vcvt.f32.s32 q1, q1 + vcvt.f32.s32 q0, q0 + veor q3, q3 +SNIP_END(_N_ColorMatrix_load_u8f_3) + +SNIP_START(_N_ColorMatrix_load_u8f_2) + vld2.8 {d0[0],d1[0]}, [r1]! + vld2.8 {d0[1],d1[1]}, [r1]! + vld2.8 {d0[2],d1[2]}, [r1]! + vld2.8 {d0[3],d1[3]}, [r1]! + vmovl.u8 q1, d1 + vmovl.u8 q0, d0 + vmovl.u16 q1, d2 + vmovl.u16 q0, d0 + vcvt.f32.s32 q1, q1 + vcvt.f32.s32 q0, q0 + veor q2, q2 + veor q3, q3 +SNIP_END(_N_ColorMatrix_load_u8f_2) + +SNIP_START(_N_ColorMatrix_load_u8f_1) + vld1.32 {d0[0]}, [r1]! + vmovl.u8 q0, d0 + vmovl.u16 q0, d0 + vcvt.f32.s32 q0, q0 + veor q1, q1 + veor q2, q2 + veor q3, q3 +SNIP_END(_N_ColorMatrix_load_u8f_1) + +SNIP_START(_N_ColorMatrix_load_f32_4) + vld4.32 {d0[0],d2[0],d4[0],d6[0]}, [r1]! + vld4.32 {d0[1],d2[1],d4[1],d6[1]}, [r1]! + vld4.32 {d1[0],d3[0],d5[0],d7[0]}, [r1]! + vld4.32 {d1[1],d3[1],d5[1],d7[1]}, [r1]! +SNIP_END(_N_ColorMatrix_load_f32_4) + +SNIP_START(_N_ColorMatrix_load_f32_3) + vld3.32 {d0[0],d2[0],d4[0]}, [r1]! + add r1, r1, #4 + vld3.32 {d0[1],d2[1],d4[1]}, [r1]! + add r1, r1, #4 + vld3.32 {d1[0],d3[0],d5[0]}, [r1]! + add r1, r1, #4 + vld3.32 {d1[1],d3[1],d5[1]}, [r1]! + add r1, r1, #4 + veor q3, q3 +SNIP_END(_N_ColorMatrix_load_f32_3) + +SNIP_START(_N_ColorMatrix_load_f32_2) + vld2.32 {d0[0],d2[0]}, [r1]! + vld2.32 {d0[1],d2[1]}, [r1]! + vld2.32 {d1[0],d3[0]}, [r1]! + vld2.32 {d1[1],d3[1]}, [r1]! + veor q2, q2 + veor q3, q3 +SNIP_END(_N_ColorMatrix_load_f32_2) + +SNIP_START(_N_ColorMatrix_load_f32_1) + vld1.32 {q0}, [r1]! + veor q1, q1 + veor q2, q2 + veor q3, q3 +SNIP_END(_N_ColorMatrix_load_f32_1) + + +SNIP_START(_N_ColorMatrix_store_u8_4) +#mov r0, #0 + vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! + vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! + vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! + vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! +SNIP_END(_N_ColorMatrix_store_u8_4) + +SNIP_START(_N_ColorMatrix_store_u8_2) + vst2.8 {d0[0],d1[0]}, [r0]! + vst2.8 {d0[1],d1[1]}, [r0]! + vst2.8 {d0[2],d1[2]}, [r0]! + vst2.8 {d0[3],d1[3]}, [r0]! +SNIP_END(_N_ColorMatrix_store_u8_2) + +SNIP_START(_N_ColorMatrix_store_u8_1) + vst1.32 {d0[0]}, [r0]! +SNIP_END(_N_ColorMatrix_store_u8_1) + + +SNIP_START(_N_ColorMatrix_store_f32u_4) + vcvt.s32.f32 q0, q0 + vcvt.s32.f32 q1, q1 + vcvt.s32.f32 q2, q2 + vcvt.s32.f32 q3, q3 + vqmovn.s32 d0, q0 + vqmovn.s32 d2, q1 + vqmovn.s32 d4, q2 + vqmovn.s32 d6, q3 + vqmovun.s16 d0, q0 + vqmovun.s16 d1, q1 + vqmovun.s16 d2, q2 + vqmovun.s16 d3, q3 + vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! + vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! + vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! + vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! + + #mov r0, #0 + #ldr r0, [r0] + +SNIP_END(_N_ColorMatrix_store_f32u_4) + +SNIP_START(_N_ColorMatrix_store_f32u_2) + vcvt.s32.f32 q0, q0 + vcvt.s32.f32 q1, q1 + vqmovn.s32 d0, q0 + vqmovn.s32 d2, q1 + vqmovun.s16 d0, q0 + vqmovun.s16 d1, q1 + vst2.8 {d0[0],d1[0]}, [r0]! + vst2.8 {d0[1],d1[1]}, [r0]! + vst2.8 {d0[2],d1[2]}, [r0]! + vst2.8 {d0[3],d1[3]}, [r0]! +SNIP_END(_N_ColorMatrix_store_f32u_2) + +SNIP_START(_N_ColorMatrix_store_f32u_1) + vcvt.s32.f32 q0, q0 + vqmovn.s32 d0, q0 + vqmovun.s16 d0, q0 + vst1.32 {d0[0]}, [r0]! +SNIP_END(_N_ColorMatrix_store_f32u_1) + +SNIP_START(_N_ColorMatrix_store_f32_4) + vst4.32 {d0[0],d2[0],d4[0],d6[0]}, [r0]! + vst4.32 {d0[1],d2[1],d4[1],d6[1]}, [r0]! + vst4.32 {d1[0],d3[0],d5[0],d7[0]}, [r0]! + vst4.32 {d1[1],d3[1],d5[1],d7[1]}, [r0]! +SNIP_END(_N_ColorMatrix_store_f32_4) + +SNIP_START(_N_ColorMatrix_store_f32_3) + vst4.32 {d0[0],d2[0],d4[0],d6[0]}, [r0]! + vst4.32 {d0[1],d2[1],d4[1],d6[1]}, [r0]! + vst4.32 {d1[0],d3[0],d5[0],d7[0]}, [r0]! + vst4.32 {d1[1],d3[1],d5[1],d7[1]}, [r0]! +SNIP_END(_N_ColorMatrix_store_f32_3) + +SNIP_START(_N_ColorMatrix_store_f32_2) + vst2.32 {d0[0],d2[0]}, [r0]! + vst2.32 {d0[1],d2[1]}, [r0]! + vst2.32 {d1[0],d3[0]}, [r0]! + vst2.32 {d1[1],d3[1]}, [r0]! +SNIP_END(_N_ColorMatrix_store_f32_2) + +SNIP_START(_N_ColorMatrix_store_f32_1) + vst1.32 {q0}, [r0]! +SNIP_END(_N_ColorMatrix_store_f32_1) + + +SNIP_START(_N_ColorMatrix_unpack_u8_4) + vmovl.u8 q12, d0 /* R */ + vmovl.u8 q13, d1 /* G */ + vmovl.u8 q14, d2 /* B */ + vmovl.u8 q15, d3 /* A */ +SNIP_END(_N_ColorMatrix_unpack_u8_4) + +SNIP_START(_N_ColorMatrix_unpack_u8_3) + vmovl.u8 q12, d0 /* R */ + vmovl.u8 q13, d1 /* G */ + vmovl.u8 q14, d2 /* B */ + veor q15, q15 +SNIP_END(_N_ColorMatrix_unpack_u8_3) + +SNIP_START(_N_ColorMatrix_unpack_u8_2) + vmovl.u8 q12, d0 /* R */ + vmovl.u8 q13, d1 /* G */ + veor q14, q14 + veor q15, q15 +SNIP_END(_N_ColorMatrix_unpack_u8_2) + +SNIP_START(_N_ColorMatrix_unpack_u8_1) + vmovl.u8 q12, d0 /* R */ + veor q13, q13 + veor q14, q14 + veor q15, q15 +SNIP_END(_N_ColorMatrix_unpack_u8_1) + +SNIP_START(_N_ColorMatrix_pack_u8_4) + vqrshrn.s32 d24, q8, #8 + vqrshrn.s32 d26, q9, #8 + vqrshrn.s32 d28, q10, #8 + vqrshrn.s32 d30, q11, #8 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 +SNIP_END(_N_ColorMatrix_pack_u8_4) + +SNIP_START(_N_ColorMatrix_pack_u8_3) + vqrshrn.s32 d24, q8, #8 + vqrshrn.s32 d26, q9, #8 + vqrshrn.s32 d28, q10, #8 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vqmovun.s16 d2, q14 +SNIP_END(_N_ColorMatrix_pack_u8_3) + +SNIP_START(_N_ColorMatrix_pack_u8_2) + vqrshrn.s32 d24, q8, #8 + vqrshrn.s32 d26, q9, #8 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 +SNIP_END(_N_ColorMatrix_pack_u8_2) + +SNIP_START(_N_ColorMatrix_pack_u8_1) + vqrshrn.s32 d24, q8, #8 + vqmovun.s16 d0, q12 +SNIP_END(_N_ColorMatrix_pack_u8_1) + +SNIP_START(_N_ColorMatrix_dot) + vmov.u8 d1, d0 + vmov.u8 d2, d0 +SNIP_END(_N_ColorMatrix_dot) + diff --git a/toolkit/Convolve3x3.cpp b/toolkit/Convolve3x3.cpp new file mode 100644 index 00000000..51339a2b --- /dev/null +++ b/toolkit/Convolve3x3.cpp @@ -0,0 +1,264 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cstdint> + +#include "RenderScriptToolkit.h" +#include "TaskProcessor.h" +#include "Utils.h" + +#define LOG_TAG "renderscript.toolkit.Convolve3x3" + +namespace android { +namespace renderscript { + +extern "C" void rsdIntrinsicConvolve3x3_K(void* dst, const void* y0, const void* y1, const void* y2, + const int16_t* coef, uint32_t count); + +class Convolve3x3Task : public Task { + const void* mIn; + void* mOut; + // Even though we have exactly 9 coefficients, store them in an array of size 16 so that + // the SIMD instructions can load them in chunks multiple of 8. + float mFp[16]; + int16_t mIp[16]; + + void kernelU4(uchar* out, uint32_t xstart, uint32_t xend, const uchar* py0, const uchar* py1, + const uchar* py2); + void convolveU4(const uchar* pin, uchar* pout, size_t vectorSize, size_t sizeX, size_t sizeY, + size_t startX, size_t startY, size_t endX, size_t endY); + + // Process a 2D tile of the overall work. threadIndex identifies which thread does the work. + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) override; + + public: + Convolve3x3Task(const void* in, void* out, size_t vectorSize, size_t sizeX, size_t sizeY, + const float* coefficients, const Restriction* restriction) + : Task{sizeX, sizeY, vectorSize, false, restriction}, mIn{in}, mOut{out} { + for (int ct = 0; ct < 9; ct++) { + mFp[ct] = coefficients[ct]; + if (mFp[ct] >= 0) { + mIp[ct] = (int16_t)(mFp[ct] * 256.f + 0.5f); + } else { + mIp[ct] = (int16_t)(mFp[ct] * 256.f - 0.5f); + } + } + } +}; + +/** + * Computes one convolution and stores the result in the output. This is used for uchar, uchar2, + * uchar3, and uchar4 vectors. + * + * @tparam InputOutputType Type of the input and output arrays. A vector type, e.g. uchar4. + * @tparam ComputationType Type we use for the intermediate computations. + * @param x The index in the row of the value we'll convolve. + * @param out The location in the output array where we store the value. + * @param py0 The start of the top row. + * @param py1 The start of the middle row. + * @param py2 The start of the bottom row. + * @param coeff Pointer to the float coefficients, in row major format. + * @param sizeX The number of cells of one row. + */ +template <typename InputOutputType, typename ComputationType> +static void convolveOneU(uint32_t x, InputOutputType* out, const InputOutputType* py0, + const InputOutputType* py1, const InputOutputType* py2, const float* coeff, + int32_t sizeX) { + uint32_t x1 = std::max((int32_t)x - 1, 0); + uint32_t x2 = std::min((int32_t)x + 1, sizeX - 1); + + ComputationType px = convert<ComputationType>(py0[x1]) * coeff[0] + + convert<ComputationType>(py0[x]) * coeff[1] + + convert<ComputationType>(py0[x2]) * coeff[2] + + convert<ComputationType>(py1[x1]) * coeff[3] + + convert<ComputationType>(py1[x]) * coeff[4] + + convert<ComputationType>(py1[x2]) * coeff[5] + + convert<ComputationType>(py2[x1]) * coeff[6] + + convert<ComputationType>(py2[x]) * coeff[7] + + convert<ComputationType>(py2[x2]) * coeff[8]; + + px = clamp(px + 0.5f, 0.f, 255.f); + *out = convert<InputOutputType>(px); +} + +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT +/** + * Computes one convolution and stores the result in the output. This is used for float, float2, + * float3, and float4 vectors. + * + * @tparam InputOutputType Type of the input and output arrays. A vector type, e.g. float4. + * @param x The index in the row of the value we'll convolve. + * @param out The location in the output array where we store the value. + * @param py0 The start of the top row. + * @param py1 The start of the middle row. + * @param py2 The start of the bottom row. + * @param coeff Pointer to the float coefficients, in row major format. + * @param sizeX The number of cells of one row. + */ +template <typename InputOutputType> +static void ConvolveOneF(uint32_t x, InputOutputType* out, const InputOutputType* py0, + const InputOutputType* py1, const InputOutputType* py2, const float* coeff, + int32_t sizeX) { + uint32_t x1 = std::max((int32_t)x - 1, 0); + uint32_t x2 = std::min((int32_t)x + 1, sizeX - 1); + *out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) + + (py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) + + (py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]); +} +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + +/** + * This function convolves one line. + * + * @param pout Where to place the next output. + * @param xstart Index in the X direction of where to start. + * @param xend End index + * @param ppy0 Points to the start of the previous line. + * @param ppy1 Points to the start of the current line. + * @param ppy2 Points to the start of the next line. + */ +void Convolve3x3Task::kernelU4(uchar* pout, uint32_t xstart, uint32_t xend, const uchar* ppy0, + const uchar* ppy1, const uchar* ppy2) { + uchar4* out = (uchar4*)pout; + const uchar4* py0 = (const uchar4*)ppy0; + const uchar4* py1 = (const uchar4*)ppy1; + const uchar4* py2 = (const uchar4*)ppy2; + + uint32_t x1 = xstart; + uint32_t x2 = xend; + if (x1 == 0) { + convolveOneU<uchar4, float4>(0, out, py0, py1, py2, mFp, mSizeX); + x1++; + out++; + } + + if (x2 > x1) { +#if defined(ARCH_ARM_USE_INTRINSICS) || defined(ARCH_X86_HAVE_SSSE3) + if (mUsesSimd) { + int32_t len = (x2 - x1 - 1) >> 1; + if (len > 0) { + rsdIntrinsicConvolve3x3_K(out, &py0[x1 - 1], &py1[x1 - 1], &py2[x1 - 1], mIp, len); + x1 += len << 1; + out += len << 1; + } + } +#endif + + while (x1 != x2) { + convolveOneU<uchar4, float4>(x1, out, py0, py1, py2, mFp, mSizeX); + out++; + x1++; + } + } +} + +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT +template <typename T> +void RsdCpuScriptIntrinsicConvolve3x3_kernelF(void* in, T* out, uint32_t xstart, uint32_t xend, + uint32_t currentY, size_t sizeX, size_t sizeY, + size_t vectorSize, float* fp) { + const uchar* pin = (const uchar*)in; + const size_t stride = sizeX * vectorSize * 4; // float takes 4 bytes + + uint32_t y1 = std::min((int32_t)currentY + 1, (int32_t)(sizeY - 1)); + uint32_t y2 = std::max((int32_t)currentY - 1, 0); + const T* py0 = (const T*)(pin + stride * y2); + const T* py1 = (const T*)(pin + stride * currentY); + const T* py2 = (const T*)(pin + stride * y1); + + for (uint32_t x = xstart; x < xend; x++, out++) { + ConvolveOneF<T>(x, out, py0, py1, py2, fp, sizeX); + } +} +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + +template <typename InputOutputType, typename ComputationType> +static void convolveU(const uchar* pin, uchar* pout, size_t vectorSize, size_t sizeX, size_t sizeY, + size_t startX, size_t startY, size_t endX, size_t endY, float* fp) { + const size_t stride = vectorSize * sizeX; + for (size_t y = startY; y < endY; y++) { + uint32_t y1 = std::min((int32_t)y + 1, (int32_t)(sizeY - 1)); + uint32_t y2 = std::max((int32_t)y - 1, 0); + + size_t offset = (y * sizeX + startX) * vectorSize; + InputOutputType* px = (InputOutputType*)(pout + offset); + InputOutputType* py0 = (InputOutputType*)(pin + stride * y2); + InputOutputType* py1 = (InputOutputType*)(pin + stride * y); + InputOutputType* py2 = (InputOutputType*)(pin + stride * y1); + for (uint32_t x = startX; x < endX; x++, px++) { + convolveOneU<InputOutputType, ComputationType>(x, px, py0, py1, py2, fp, sizeX); + } + } +} + +void Convolve3x3Task::convolveU4(const uchar* pin, uchar* pout, size_t vectorSize, size_t sizeX, + size_t sizeY, size_t startX, size_t startY, size_t endX, + size_t endY) { + const size_t stride = paddedSize(vectorSize) * sizeX; + for (size_t y = startY; y < endY; y++) { + uint32_t y1 = std::min((int32_t)y + 1, (int32_t)(sizeY - 1)); + uint32_t y2 = std::max((int32_t)y - 1, 0); + + size_t offset = (y * sizeX + startX) * paddedSize(vectorSize); + uchar* px = pout + offset; + const uchar* py0 = pin + stride * y2; + const uchar* py1 = pin + stride * y; + const uchar* py2 = pin + stride * y1; + kernelU4(px, startX, endX, py0, py1, py2); + } +} + +void Convolve3x3Task::processData(int /* threadIndex */, size_t startX, size_t startY, size_t endX, + size_t endY) { + // ALOGI("Thread %d start tile from (%zd, %zd) to (%zd, %zd)", threadIndex, startX, startY, + // endX, endY); + switch (mVectorSize) { + case 1: + convolveU<uchar, float>((const uchar*)mIn, (uchar*)mOut, mVectorSize, mSizeX, mSizeY, + startX, startY, endX, endY, mFp); + break; + case 2: + convolveU<uchar2, float2>((const uchar*)mIn, (uchar*)mOut, mVectorSize, mSizeX, mSizeY, + startX, startY, endX, endY, mFp); + break; + case 3: + case 4: + convolveU4((const uchar*)mIn, (uchar*)mOut, mVectorSize, mSizeX, mSizeY, startX, startY, + endX, endY); + break; + } +} + +void RenderScriptToolkit::convolve3x3(const void* in, void* out, size_t vectorSize, size_t sizeX, + size_t sizeY, const float* coefficients, + const Restriction* restriction) { +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE + if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) { + return; + } + if (vectorSize < 1 || vectorSize > 4) { + ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize); + return; + } +#endif + + Convolve3x3Task task(in, out, vectorSize, sizeX, sizeY, coefficients, restriction); + processor->doTask(&task); +} + +} // namespace renderscript +} // namespace android diff --git a/toolkit/Convolve5x5.cpp b/toolkit/Convolve5x5.cpp new file mode 100644 index 00000000..1f3f75ca --- /dev/null +++ b/toolkit/Convolve5x5.cpp @@ -0,0 +1,350 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cstdint> + +#include "RenderScriptToolkit.h" +#include "TaskProcessor.h" +#include "Utils.h" + +namespace android { +namespace renderscript { + +#define LOG_TAG "renderscript.toolkit.Convolve5x5" + +extern "C" void rsdIntrinsicConvolve5x5_K(void* dst, const void* y0, const void* y1, const void* y2, + const void* y3, const void* y4, const int16_t* coef, + uint32_t count); + +class Convolve5x5Task : public Task { + const void* mIn; + void* mOut; + // Even though we have exactly 25 coefficients, store them in an array of size 28 so that + // the SIMD instructions can load them in three chunks of 8 and 1 of chunk of 4. + float mFp[28]; + int16_t mIp[28]; + + void kernelU4(uchar* out, uint32_t xstart, uint32_t xend, const uchar* py0, const uchar* py1, + const uchar* py2, const uchar* py3, const uchar* py4); + void convolveU4(const uchar* pin, uchar* pout, size_t vectorSize, size_t sizeX, size_t sizeY, + size_t startX, size_t startY, size_t endX, size_t endY); + + // Process a 2D tile of the overall work. threadIndex identifies which thread does the work. + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) override; + + public: + Convolve5x5Task(const void* in, void* out, size_t vectorSize, size_t sizeX, size_t sizeY, + const float* coefficients, const Restriction* restriction) + : Task{sizeX, sizeY, vectorSize, false, restriction}, mIn{in}, mOut{out} { + for (int ct = 0; ct < 25; ct++) { + mFp[ct] = coefficients[ct]; + if (mFp[ct] >= 0) { + mIp[ct] = (int16_t)(mFp[ct] * 256.f + 0.5f); + } else { + mIp[ct] = (int16_t)(mFp[ct] * 256.f - 0.5f); + } + } + } +}; + +template <typename InputOutputType, typename ComputationType> +static void ConvolveOneU(uint32_t x, InputOutputType* out, const InputOutputType* py0, + const InputOutputType* py1, const InputOutputType* py2, + const InputOutputType* py3, const InputOutputType* py4, const float* coeff, + int32_t width) { + uint32_t x0 = std::max((int32_t)x - 2, 0); + uint32_t x1 = std::max((int32_t)x - 1, 0); + uint32_t x2 = x; + uint32_t x3 = std::min((int32_t)x + 1, width - 1); + uint32_t x4 = std::min((int32_t)x + 2, width - 1); + + ComputationType px = convert<ComputationType>(py0[x0]) * coeff[0] + + convert<ComputationType>(py0[x1]) * coeff[1] + + convert<ComputationType>(py0[x2]) * coeff[2] + + convert<ComputationType>(py0[x3]) * coeff[3] + + convert<ComputationType>(py0[x4]) * coeff[4] + + + convert<ComputationType>(py1[x0]) * coeff[5] + + convert<ComputationType>(py1[x1]) * coeff[6] + + convert<ComputationType>(py1[x2]) * coeff[7] + + convert<ComputationType>(py1[x3]) * coeff[8] + + convert<ComputationType>(py1[x4]) * coeff[9] + + + convert<ComputationType>(py2[x0]) * coeff[10] + + convert<ComputationType>(py2[x1]) * coeff[11] + + convert<ComputationType>(py2[x2]) * coeff[12] + + convert<ComputationType>(py2[x3]) * coeff[13] + + convert<ComputationType>(py2[x4]) * coeff[14] + + + convert<ComputationType>(py3[x0]) * coeff[15] + + convert<ComputationType>(py3[x1]) * coeff[16] + + convert<ComputationType>(py3[x2]) * coeff[17] + + convert<ComputationType>(py3[x3]) * coeff[18] + + convert<ComputationType>(py3[x4]) * coeff[19] + + + convert<ComputationType>(py4[x0]) * coeff[20] + + convert<ComputationType>(py4[x1]) * coeff[21] + + convert<ComputationType>(py4[x2]) * coeff[22] + + convert<ComputationType>(py4[x3]) * coeff[23] + + convert<ComputationType>(py4[x4]) * coeff[24]; + px = clamp(px + 0.5f, 0.f, 255.f); + *out = convert<InputOutputType>(px); +} + +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT +template <typename InputOutputType> +static void ConvolveOneF(uint32_t x, InputOutputType* out, const InputOutputType* py0, + const InputOutputType* py1, const InputOutputType* py2, + const InputOutputType* py3, const InputOutputType* py4, const float* coeff, + int32_t width) { + uint32_t x0 = std::max((int32_t)x - 2, 0); + uint32_t x1 = std::max((int32_t)x - 1, 0); + uint32_t x2 = x; + uint32_t x3 = std::min((int32_t)x + 1, width - 1); + uint32_t x4 = std::min((int32_t)x + 2, width - 1); + + InputOutputType px = py0[x0] * coeff[0] + py0[x1] * coeff[1] + py0[x2] * coeff[2] + + py0[x3] * coeff[3] + py0[x4] * coeff[4] + + + py1[x0] * coeff[5] + py1[x1] * coeff[6] + py1[x2] * coeff[7] + + py1[x3] * coeff[8] + py1[x4] * coeff[9] + + + py2[x0] * coeff[10] + py2[x1] * coeff[11] + py2[x2] * coeff[12] + + py2[x3] * coeff[13] + py2[x4] * coeff[14] + + + py3[x0] * coeff[15] + py3[x1] * coeff[16] + py3[x2] * coeff[17] + + py3[x3] * coeff[18] + py3[x4] * coeff[19] + + + py4[x0] * coeff[20] + py4[x1] * coeff[21] + py4[x2] * coeff[22] + + py4[x3] * coeff[23] + py4[x4] * coeff[24]; + *out = px; +} +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + +/** + * This function convolves one line. + * + * @param pout Where to place the next output. + * @param xstart Index in the X direction of where to start. + * @param xend End index + * @param ppy0 Points to the start of the line two above. + * @param ppy1 Points to the start of the line one above. + * @param ppy2 Points to the start of the current line. + * @param ppy3 Points to the start of the line one below. + * @param ppy4 Points to the start of the line two below. + */ +void Convolve5x5Task::kernelU4(uchar* pout, uint32_t x1, uint32_t x2, const uchar* ppy0, + const uchar* ppy1, const uchar* ppy2, const uchar* ppy3, + const uchar* ppy4) { + uchar4* out = (uchar4*)pout; + const uchar4* py0 = (const uchar4*)ppy0; + const uchar4* py1 = (const uchar4*)ppy1; + const uchar4* py2 = (const uchar4*)ppy2; + const uchar4* py3 = (const uchar4*)ppy3; + const uchar4* py4 = (const uchar4*)ppy4; + + while ((x1 < x2) && (x1 < 2)) { + ConvolveOneU<uchar4, float4>(x1, out, py0, py1, py2, py3, py4, mFp, mSizeX); + out++; + x1++; + } +#if defined(ARCH_X86_HAVE_SSSE3) + // for x86 SIMD, require minimum of 7 elements (4 for SIMD, + // 3 for end boundary where x may hit the end boundary) + if (mUsesSimd && ((x1 + 6) < x2)) { + // subtract 3 for end boundary + uint32_t len = (x2 - x1 - 3) >> 2; + rsdIntrinsicConvolve5x5_K(out, py0 + x1 - 2, py1 + x1 - 2, py2 + x1 - 2, py3 + x1 - 2, + py4 + x1 - 2, mIp, len); + out += len << 2; + x1 += len << 2; + } +#endif + +#if defined(ARCH_ARM_USE_INTRINSICS) + if (mUsesSimd && ((x1 + 3) < x2)) { + uint32_t len = (x2 - x1 - 3) >> 1; + rsdIntrinsicConvolve5x5_K(out, py0 + x1 - 2, py1 + x1 - 2, py2 + x1 - 2, py3 + x1 - 2, + py4 + x1 - 2, mIp, len); + out += len << 1; + x1 += len << 1; + } +#endif + + while (x1 < x2) { + ConvolveOneU<uchar4, float4>(x1, out, py0, py1, py2, py3, py4, mFp, mSizeX); + out++; + x1++; + } +} + +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT +// This will need more cleanup before it can be used. +void Convolve5x5Task::kernelF4(const ConvolveInfo* info, float4* out, + uint32_t xstart, uint32_t xend, uint32_t currentY) { + const uchar* pin = (const uchar*)info->in; + const size_t stride = info->stride; + + uint32_t y0 = std::max((int32_t)currentY - 2, 0); + uint32_t y1 = std::max((int32_t)currentY - 1, 0); + uint32_t y2 = currentY; + uint32_t y3 = std::min((int32_t)currentY + 1, sizeY); + uint32_t y4 = std::min((int32_t)currentY + 2, sizeY); + + const float4* py0 = (const float4*)(pin + stride * y0); + const float4* py1 = (const float4*)(pin + stride * y1); + const float4* py2 = (const float4*)(pin + stride * y2); + const float4* py3 = (const float4*)(pin + stride * y3); + const float4* py4 = (const float4*)(pin + stride * y4); + + for (uint32_t x = xstart; x < xend; x++, out++) { + ConvolveOneF<float4>(x, out, py0, py1, py2, py3, py4, mFp, sizeX); + } +} + +void RsdCpuScriptIntrinsicConvolve5x5_kernelF2(const ConvolveInfo* info, float2* out, + uint32_t xstart, uint32_t xend, uint32_t currentY) { + const uchar* pin = (const uchar*)info->in; + const size_t stride = info->stride; + + uint32_t y0 = std::max((int32_t)currentY - 2, 0); + uint32_t y1 = std::max((int32_t)currentY - 1, 0); + uint32_t y2 = currentY; + uint32_t y3 = std::min((int32_t)currentY + 1, sizeY); + uint32_t y4 = std::min((int32_t)currentY + 2, sizeY); + + const float2* py0 = (const float2*)(pin + stride * y0); + const float2* py1 = (const float2*)(pin + stride * y1); + const float2* py2 = (const float2*)(pin + stride * y2); + const float2* py3 = (const float2*)(pin + stride * y3); + const float2* py4 = (const float2*)(pin + stride * y4); + + for (uint32_t x = xstart; x < xend; x++, out++) { + ConvolveOneF<float2>(x, out, py0, py1, py2, py3, py4, mFp, sizeX); + } +} + +void RsdCpuScriptIntrinsicConvolve5x5_kernelF1(const ConvolveInfo* info, float* out, + uint32_t xstart, uint32_t xend, uint32_t currentY) { + const uchar* pin = (const uchar*)info->in; + const size_t stride = info->stride; + + uint32_t y0 = std::max((int32_t)currentY - 2, 0); + uint32_t y1 = std::max((int32_t)currentY - 1, 0); + uint32_t y2 = currentY; + uint32_t y3 = std::min((int32_t)currentY + 1, sizeY); + uint32_t y4 = std::min((int32_t)currentY + 2, sizeY); + + const float* py0 = (const float*)(pin + stride * y0); + const float* py1 = (const float*)(pin + stride * y1); + const float* py2 = (const float*)(pin + stride * y2); + const float* py3 = (const float*)(pin + stride * y3); + const float* py4 = (const float*)(pin + stride * y4); + + for (uint32_t x = xstart; x < xend; x++, out++) { + ConvolveOneF<float>(x, out, py0, py1, py2, py3, py4, mFp, sizeX); + } +} +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + +template <typename InputOutputType, typename ComputationType> +static void convolveU(const uchar* pin, uchar* pout, size_t vectorSize, size_t sizeX, size_t sizeY, + size_t startX, size_t startY, size_t endX, size_t endY, float* mFp) { + const size_t stride = vectorSize * sizeX; + for (size_t y = startY; y < endY; y++) { + uint32_t y0 = std::max((int32_t)y - 2, 0); + uint32_t y1 = std::max((int32_t)y - 1, 0); + uint32_t y2 = y; + uint32_t y3 = std::min((int32_t)y + 1, (int32_t)(sizeY - 1)); + uint32_t y4 = std::min((int32_t)y + 2, (int32_t)(sizeY - 1)); + + size_t offset = (y * sizeX + startX) * vectorSize; + InputOutputType* px = (InputOutputType*)(pout + offset); + InputOutputType* py0 = (InputOutputType*)(pin + stride * y0); + InputOutputType* py1 = (InputOutputType*)(pin + stride * y1); + InputOutputType* py2 = (InputOutputType*)(pin + stride * y2); + InputOutputType* py3 = (InputOutputType*)(pin + stride * y3); + InputOutputType* py4 = (InputOutputType*)(pin + stride * y4); + for (uint32_t x = startX; x < endX; x++, px++) { + ConvolveOneU<InputOutputType, ComputationType>(x, px, py0, py1, py2, py3, py4, mFp, + sizeX); + } + } +} + +void Convolve5x5Task::convolveU4(const uchar* pin, uchar* pout, size_t vectorSize, size_t sizeX, + size_t sizeY, size_t startX, size_t startY, size_t endX, + size_t endY) { + const size_t stride = paddedSize(vectorSize) * sizeX; + for (size_t y = startY; y < endY; y++) { + uint32_t y0 = std::max((int32_t)y - 2, 0); + uint32_t y1 = std::max((int32_t)y - 1, 0); + uint32_t y2 = y; + uint32_t y3 = std::min((int32_t)y + 1, (int32_t)(sizeY - 1)); + uint32_t y4 = std::min((int32_t)y + 2, (int32_t)(sizeY - 1)); + + size_t offset = (y * sizeX + startX) * paddedSize(vectorSize); + uchar* px = pout + offset; + const uchar* py0 = pin + stride * y0; + const uchar* py1 = pin + stride * y1; + const uchar* py2 = pin + stride * y2; + const uchar* py3 = pin + stride * y3; + const uchar* py4 = pin + stride * y4; + kernelU4(px, startX, endX, py0, py1, py2, py3, py4); + } +} + +void Convolve5x5Task::processData(int /* threadIndex */, size_t startX, size_t startY, size_t endX, + size_t endY) { + // ALOGI("Thread %d start tile from (%zd, %zd) to (%zd, %zd)", threadIndex, startX, startY, + // endX, endY); + switch (mVectorSize) { + case 1: + convolveU<uchar, float>((const uchar*)mIn, (uchar*)mOut, mVectorSize, mSizeX, mSizeY, + startX, startY, endX, endY, mFp); + break; + case 2: + convolveU<uchar2, float2>((const uchar*)mIn, (uchar*)mOut, mVectorSize, mSizeX, mSizeY, + startX, startY, endX, endY, mFp); + break; + case 3: + case 4: + convolveU4((const uchar*)mIn, (uchar*)mOut, mVectorSize, mSizeX, mSizeY, startX, startY, + endX, endY); + break; + } +} + +void RenderScriptToolkit::convolve5x5(const void* in, void* out, size_t vectorSize, size_t sizeX, + size_t sizeY, const float* coefficients, + const Restriction* restriction) { +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE + if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) { + return; + } + if (vectorSize < 1 || vectorSize > 4) { + ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize); + return; + } +#endif + + Convolve5x5Task task(in, out, vectorSize, sizeX, sizeY, coefficients, restriction); + processor->doTask(&task); +} + +} // namespace renderscript +} // namespace android diff --git a/toolkit/Convolve_advsimd.S b/toolkit/Convolve_advsimd.S new file mode 100644 index 00000000..0daa0c5a --- /dev/null +++ b/toolkit/Convolve_advsimd.S @@ -0,0 +1,265 @@ +/* + * Copyright (C) 2012,2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + x0 = dst + x1 = y0 base pointer + x2 = y1 base pointer + x3 = y2 base pointer + x4 = coeffs + x5 = length / 2 +*/ + +#define ENTRY(f) .text; .align 2; .globl f; .type f,#function; f: +#define END(f) .size f, .-f; + +ENTRY(rsdIntrinsicConvolve3x3_K) + sub x6, sp, #64 + sub sp, sp, #64 + st1 {v8.1d-v11.1d}, [x6], #32 + st1 {v12.1d-v15.1d}, [x6] + + /* Load the coefficients in the v0, v1 registers */ + ld1 {v0.8h, v1.8h}, [x4] + + /* Load the frequently used immediate in a register */ + mov x4, #8 + +1: + /* Load and post-increase the address by x4=#8 */ + ld1 {v13.16b}, [x1], x4 + ld1 {v14.16b}, [x2], x4 + ld1 {v15.16b}, [x3], x4 + + /* Signal memory for data that will be used in the loop after the next */ +// prfm PLDL1KEEP,[x1, x4] // TODO: test this +// prfm PLDL1KEEP,[x2, x4] // TODO: test this +// prfm PLDL1KEEP,[x3, x4] // TODO: test this + + uxtl v2.8h, v13.8b + uxtl2 v3.8h, v13.16b + uxtl v4.8h, v14.8b + uxtl2 v5.8h, v14.16b + uxtl v6.8h, v15.8b + uxtl2 v7.8h, v15.16b + +/* + The two pixel source array is + v2, v2hi, v3lo, v3hi + v4, v4hi, v5lo, v5hi + v6, v6hi, v7lo, v7hi +*/ + + smull v8.4s, v2.4h, v0.h[0] + smull2 v9.4s, v2.8h, v0.h[0] + smlal2 v8.4s, v2.8h, v0.h[1] + smlal v9.4s, v3.4h, v0.h[1] + smlal v8.4s, v3.4h, v0.h[2] + smlal2 v9.4s, v3.8h, v0.h[2] + smlal v8.4s, v4.4h, v0.h[3] + smlal2 v9.4s, v4.8h, v0.h[3] + smlal2 v8.4s, v4.8h, v0.h[4] + smlal v9.4s, v5.4h, v0.h[4] + smlal v8.4s, v5.4h, v0.h[5] + smlal2 v9.4s, v5.8h, v0.h[5] + smlal v8.4s, v6.4h, v0.h[6] + smlal2 v9.4s, v6.8h, v0.h[6] + smlal2 v8.4s, v6.8h, v0.h[7] + smlal v9.4s, v7.4h, v0.h[7] + smlal v8.4s, v7.4h, v1.h[0] + smlal2 v9.4s, v7.8h, v1.h[0] + + shrn v8.4h, v8.4s, #8 + shrn2 v8.8h, v9.4s, #8 + + sqxtun v8.8b, v8.8h + st1 {v8.8b}, [x0], #8 + + /* Are we done yet? */ + subs x5, x5, #1 + bne 1b + + /* We're done, bye! */ + ld1 {v8.1d-v11.1d}, [sp], #32 + ld1 {v12.1d-v15.1d}, [sp], #32 + ret +END(rsdIntrinsicConvolve3x3_K) + + +/* Convolve 5x5 */ + +/* + x0 = dst + x1 = y0 base pointer + x2 = y1 base pointer + x3 = y2 base pointer + x4 = y3 base pointer + x5 = y4 base pointer + x6 = coeffs + x7 = length +*/ +ENTRY(rsdIntrinsicConvolve5x5_K) + sub x8, sp, #64 + sub sp, sp, #64 + st1 {v8.1d-v11.1d}, [x8], #32 + st1 {v12.1d-v15.1d}, [x8] + + /* Create the coefficients vector */ + ld1 {v0.8h-v2.8h}, [x6], #48 + ld1 {v3.4h}, [x6], #8 + + movi v15.4s, #0x7f + + /* Load the frequently used immediate in a register */ + mov x6, #8 + +1: + /* Load the y base pointers in Qregs and post-increase the address by x6=#8 */ + ld1 {v9.8b-v11.8b}, [x1], x6 // y0 ( y - 2 ) + ld1 {v12.8b-v14.8b}, [x2], x6 // y0 ( y - 1 ) + + /* Signal memory for data that will be used in the loop after the next */ +// prfm PLDL1KEEP,[x1, x6] // TODO: test this +// prfm PLDL1KEEP,[x2, x6] // TODO: test this + + /* Promoting the 8bit channels to 16bit */ + uxtl v9.8h, v9.8b + uxtl v10.8h, v10.8b + uxtl v11.8h, v11.8b + uxtl v12.8h, v12.8b + uxtl v13.8h, v13.8b + uxtl v14.8h, v14.8b + +/* + v9, v9hi, v10lo, v10hi, v11lo, v11hi, + v12, v12hi +*/ + smull v4.4s, v9.4h, v0.h[0] + smull2 v5.4s, v9.8h, v0.h[0] + smlal2 v4.4s, v9.8h, v0.h[1] + smlal v5.4s, v10.4h, v0.h[1] + smlal v4.4s, v10.4h, v0.h[2] + smlal2 v5.4s, v10.8h, v0.h[2] + smlal2 v4.4s, v10.8h, v0.h[3] + smlal v5.4s, v11.4h, v0.h[3] + smlal v4.4s, v11.4h, v0.h[4] + smlal2 v5.4s, v11.8h, v0.h[4] + + smlal v4.4s, v12.4h, v0.h[5] + smlal2 v5.4s, v12.8h, v0.h[5] + smlal2 v4.4s, v12.8h, v0.h[6] + smlal v5.4s, v13.4h, v0.h[6] + smlal v4.4s, v13.4h, v0.h[7] + smlal2 v5.4s, v13.8h, v0.h[7] + smlal2 v4.4s, v13.8h, v1.h[0] + smlal v5.4s, v14.4h, v1.h[0] + smlal v4.4s, v14.4h, v1.h[1] + smlal2 v5.4s, v14.8h, v1.h[1] + + /* Next 2 rows */ + /* Load the y base pointers in Qregs and post-increase the address by x6=#8 */ + ld1 {v9.8b-v11.8b}, [x3], x6 // y0 ( y ) + ld1 {v12.8b-v14.8b}, [x4], x6 // y0 ( y + 1 ) + + /* Signal memory for data that will be used in the loop after the next */ +// prfm PLDL1KEEP,[x3, x6] // TODO: test this +// prfm PLDL1KEEP,[x4, x6] // TODO: test this + + /* Promoting the 8bit channels to 16bit */ + uxtl v9.8h, v9.8b + uxtl v10.8h, v10.8b + uxtl v11.8h, v11.8b + uxtl v12.8h, v12.8b + uxtl v13.8h, v13.8b + uxtl v14.8h, v14.8b + +/* + v9, v9hi, v10lo, v10hi, v11lo, v11hi, + v12, v12hi +*/ + smlal v4.4s, v9.4h, v1.h[2] + smlal2 v5.4s, v9.8h, v1.h[2] + smlal2 v4.4s, v9.8h, v1.h[3] + smlal v5.4s, v10.4h, v1.h[3] + smlal v4.4s, v10.4h, v1.h[4] + smlal2 v5.4s, v10.8h, v1.h[4] + smlal2 v4.4s, v10.8h, v1.h[5] + smlal v5.4s, v11.4h, v1.h[5] + smlal v4.4s, v11.4h, v1.h[6] + smlal2 v5.4s, v11.8h, v1.h[6] + + smlal v4.4s, v12.4h, v1.h[7] + smlal2 v5.4s, v12.8h, v1.h[7] + smlal2 v4.4s, v12.8h, v2.h[0] + smlal v5.4s, v13.4h, v2.h[0] + smlal v4.4s, v13.4h, v2.h[1] + smlal2 v5.4s, v13.8h, v2.h[1] + smlal2 v4.4s, v13.8h, v2.h[2] + smlal v5.4s, v14.4h, v2.h[2] + smlal v4.4s, v14.4h, v2.h[3] + smlal2 v5.4s, v14.8h, v2.h[3] + + /* Last row */ + /* Load the y base pointers in Qregs and post-increase the address by x6=#8 */ + ld1 {v9.8b- v11.8b}, [x5], x6 // y0 ( y + 2 ) + + /* Signal memory for data that will be used in the loop after the next */ +// prfm PLDL1KEEP,[x5, x6] // TODO: test this + + /* Promoting the 8bit channels to 16bit */ + uxtl v9.8h, v9.8b + uxtl v10.8h, v10.8b + uxtl v11.8h, v11.8b + +/* + v9, v9hi, v10lo, v10hi, v11lo, v11hi, + v12, v12hi +*/ + + smlal v4.4s, v9.4h, v2.h[4] + smlal2 v5.4s, v9.8h, v2.h[4] + smlal2 v4.4s, v9.8h, v2.h[5] + smlal v5.4s, v10.4h, v2.h[5] + smlal v4.4s, v10.4h, v2.h[6] + smlal2 v5.4s, v10.8h, v2.h[6] + smlal2 v4.4s, v10.8h, v2.h[7] + smlal v5.4s, v11.4h, v2.h[7] + smlal v4.4s, v11.4h, v3.h[0] + smlal2 v5.4s, v11.8h, v3.h[0] + + add v4.4s, v4.4s, v15.4s + add v5.4s, v5.4s, v15.4s + +/* Narrow it to a d-reg 32 -> 16 bit */ + rshrn v4.4h, v4.4s, #8 + rshrn2 v4.8h, v5.4s, #8 + + +/* Pack 16 -> 8 bit, saturate, put two pixels into D reg */ + sqxtun v4.8b, v4.8h + + st1 {v4.8b}, [x0], #8 // return the output and increase the address of x0 + + /* Are we done? */ + subs x7, x7, #1 + bne 1b + + /* Yup, bye */ + ld1 {v8.1d-v11.1d}, [sp], #32 + ld1 {v12.1d-v15.1d}, [sp], #32 + ret + +END(rsdIntrinsicConvolve5x5_K) diff --git a/toolkit/Convolve_neon.S b/toolkit/Convolve_neon.S new file mode 100644 index 00000000..ee10884b --- /dev/null +++ b/toolkit/Convolve_neon.S @@ -0,0 +1,287 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + r0 = dst + r1 = y0 base pointer + r2 = y1 base pointer + r3 = y2 base pointer + sp = coeffs + sp = length / 2 +*/ + +#define ENTRY(f) .text; .align 0; .globl f; .type f,#function; f: .fnstart +#define END(f) .fnend; .size f, .-f; + +ENTRY(rsdIntrinsicConvolve3x3_K) + push {r4-r8, r10, r11, lr} + vpush {q4-q7} + + /* Get the coeffs pointer from the stack and load the + coefficients in the q0, q1 NEON registers */ + ldr r4, [sp, #32+64] + vld1.16 {q0, q1}, [r4] + + /* Get count from the stack */ + ldr r4, [sp, #36+64] + + /* Load the frequently used immediate in a register */ + mov r5, #8 + +1: + /* Load and post-increase the address by r5=#8 */ + vld1.8 {q13}, [r1], r5 + vld1.8 {q14}, [r2], r5 + vld1.8 {q15}, [r3], r5 + + /* Signal memory for data that will be used in the loop after the next */ + pld [r1, r5] + pld [r2, r5] + pld [r3, r5] + + vmovl.u8 q2, d26 + vmovl.u8 q3, d27 + vmovl.u8 q4, d28 + vmovl.u8 q5, d29 + vmovl.u8 q6, d30 + vmovl.u8 q7, d31 + +/* + The two pixel source array is + d4, d5, d6, d7 + d8, d9, d10, d11 + d12, d13, d14, d15 +*/ + + vmull.s16 q8, d4, d0[0] + vmlal.s16 q8, d5, d0[1] + vmlal.s16 q8, d6, d0[2] + vmlal.s16 q8, d8, d0[3] + vmlal.s16 q8, d9, d1[0] + vmlal.s16 q8, d10, d1[1] + vmlal.s16 q8, d12, d1[2] + vmlal.s16 q8, d13, d1[3] + vmlal.s16 q8, d14, d2[0] + + vmull.s16 q9, d5, d0[0] + vmlal.s16 q9, d6, d0[1] + vmlal.s16 q9, d7, d0[2] + vmlal.s16 q9, d9, d0[3] + vmlal.s16 q9, d10, d1[0] + vmlal.s16 q9, d11, d1[1] + vmlal.s16 q9, d13, d1[2] + vmlal.s16 q9, d14, d1[3] + vmlal.s16 q9, d15, d2[0] + + vshrn.i32 d16, q8, #8 + vshrn.i32 d17, q9, #8 + + vqmovun.s16 d16, q8 + vst1.8 d16, [r0]! + + /* Are we done yet? */ + subs r4, r4, #1 + bne 1b + + /* We're done, bye! */ + vpop {q4-q7} + pop {r4-r8, r10, r11, lr} + bx lr +END(rsdIntrinsicConvolve3x3_K) + + +/* Convolve 5x5 */ + +/* + r0 = dst + r1 = y0 base pointer + r2 = y1 base pointer + r3 = y2 base pointer + r4 = y3 base pointer + r5 = y4 base pointer + r6 = coeffs + r7 = length +*/ +ENTRY(rsdIntrinsicConvolve5x5_K) + push {r4-r7, lr} + vpush {q4-q7} + + /* load y3 in r4 */ + ldr r4, [sp, #20 + 64] + + /* load y4 in r5 */ + ldr r5, [sp, #24 + 64] + + /* Load the coefficients pointer */ + ldr r6, [sp, #28 + 64] + + /* Create the coefficients vector */ + vld1.16 {d0, d1, d2, d3}, [r6]! + vld1.16 {d4, d5, d6}, [r6] + + vmov.u32 q15, #0x7f + + /* load the count */ + ldr r6, [sp, #32 + 64] + + /* Load the frequently used immediate in a register */ + mov r7, #8 + +1: + /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */ + vld1.8 {d24, d25, d26}, [r1], r7 @ y0 ( y - 2 ) + vld1.8 {d27, d28, d29}, [r2], r7 @ y0 ( y - 1 ) + + /* Signal memory for data that will be used in the loop after the next */ + pld [r1, r7] + pld [r2, r7] + + /* Promoting the 8bit channels to 16bit */ + vmovl.u8 q9, d24 + vmovl.u8 q10, d25 + vmovl.u8 q11, d26 + vmovl.u8 q12, d27 + vmovl.u8 q13, d28 + vmovl.u8 q14, d29 + +/* + d18, d19, d20, d21, d22, d23, + d24, d25 +*/ + vmull.s16 q4, d18, d0[0] + vmlal.s16 q4, d19, d0[1] + vmlal.s16 q4, d20, d0[2] + vmlal.s16 q4, d21, d0[3] + vmlal.s16 q4, d22, d1[0] + + vmlal.s16 q4, d24, d1[1] + vmlal.s16 q4, d25, d1[2] + vmlal.s16 q4, d26, d1[3] + vmlal.s16 q4, d27, d2[0] + vmlal.s16 q4, d28, d2[1] + + vmull.s16 q5, d19, d0[0] + vmlal.s16 q5, d20, d0[1] + vmlal.s16 q5, d21, d0[2] + vmlal.s16 q5, d22, d0[3] + vmlal.s16 q5, d23, d1[0] + + vmlal.s16 q5, d25, d1[1] + vmlal.s16 q5, d26, d1[2] + vmlal.s16 q5, d27, d1[3] + vmlal.s16 q5, d28, d2[0] + vmlal.s16 q5, d29, d2[1] + + + /* Next 2 rows */ + /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */ + vld1.8 {d24, d25, d26}, [r3], r7 @ y0 ( y ) + vld1.8 {d27, d28, d29}, [r4], r7 @ y0 ( y + 1 ) + + /* Signal memory for data that will be used in the loop after the next */ + pld [r3, r7] + pld [r4, r7] + + /* Promoting the 8bit channels to 16bit */ + vmovl.u8 q9, d24 + vmovl.u8 q10, d25 + vmovl.u8 q11, d26 + vmovl.u8 q12, d27 + vmovl.u8 q13, d28 + vmovl.u8 q14, d29 + +/* + d18, d19, d20, d21, d22, d23, + d24, d25 +*/ + vmlal.s16 q4, d18, d2[2] + vmlal.s16 q4, d19, d2[3] + vmlal.s16 q4, d20, d3[0] + vmlal.s16 q4, d21, d3[1] + vmlal.s16 q4, d22, d3[2] + + vmlal.s16 q4, d24, d3[3] + vmlal.s16 q4, d25, d4[0] + vmlal.s16 q4, d26, d4[1] + vmlal.s16 q4, d27, d4[2] + vmlal.s16 q4, d28, d4[3] + + vmlal.s16 q5, d19, d2[2] + vmlal.s16 q5, d20, d2[3] + vmlal.s16 q5, d21, d3[0] + vmlal.s16 q5, d22, d3[1] + vmlal.s16 q5, d23, d3[2] + + vmlal.s16 q5, d25, d3[3] + vmlal.s16 q5, d26, d4[0] + vmlal.s16 q5, d27, d4[1] + vmlal.s16 q5, d28, d4[2] + vmlal.s16 q5, d29, d4[3] + + /* Last row */ + /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */ + vld1.8 {d24, d25, d26}, [r5], r7 @ y0 ( y + 2 ) + + /* Signal memory for data that will be used in the loop after the next */ + pld [r5, r7] + + /* Promoting the 8bit channels to 16bit */ + vmovl.u8 q9, d24 + vmovl.u8 q10, d25 + vmovl.u8 q11, d26 + +/* + d18, d19, d20, d21, d22, d23, + d24, d25 +*/ + + vmlal.s16 q4, d18, d5[0] + vmlal.s16 q4, d19, d5[1] + vmlal.s16 q4, d20, d5[2] + vmlal.s16 q4, d21, d5[3] + vmlal.s16 q4, d22, d6[0] + + vmlal.s16 q5, d19, d5[0] + vmlal.s16 q5, d20, d5[1] + vmlal.s16 q5, d21, d5[2] + vmlal.s16 q5, d22, d5[3] + vmlal.s16 q5, d23, d6[0] + + + + vadd.i32 q4, q4, q15 + vadd.i32 q5, q5, q15 + +/* Narrow it to a d-reg 32 -> 16 bit */ + vrshrn.i32 d8, q4, #8 + vrshrn.i32 d9, q5, #8 + + +/* Pack 16 -> 8 bit, saturate, put two pixels into D reg */ + vqmovun.s16 d8, q4 + + vst1.8 d8, [r0]! @ return the output and increase the address of r0 + + /* Are we done? */ + subs r6, r6, #1 + bne 1b + + /* Yup, bye */ + vpop {q4-q7} + pop {r4-r7, lr} + bx lr + +END(rsdIntrinsicConvolve5x5_K) diff --git a/toolkit/Histogram.cpp b/toolkit/Histogram.cpp new file mode 100644 index 00000000..86b4bed8 --- /dev/null +++ b/toolkit/Histogram.cpp @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <array> +#include <cstdint> + +#include "RenderScriptToolkit.h" +#include "TaskProcessor.h" +#include "Utils.h" + +#define LOG_TAG "renderscript.toolkit.Histogram" + +namespace android { +namespace renderscript { + +class HistogramTask : public Task { + const uchar* mIn; + std::vector<int> mSums; + uint32_t mThreadCount; + + // Process a 2D tile of the overall work. threadIndex identifies which thread does the work. + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) override; + + void kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); + void kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); + void kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); + void kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); + + public: + HistogramTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize, + uint32_t threadCount, const Restriction* restriction); + void collateSums(int* out); +}; + +class HistogramDotTask : public Task { + const uchar* mIn; + float mDot[4]; + int mDotI[4]; + std::vector<int> mSums; + uint32_t mThreadCount; + + void kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); + void kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); + void kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); + void kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend); + + public: + HistogramDotTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize, + uint32_t threadCount, const float* coefficients, + const Restriction* restriction); + void collateSums(int* out); + + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) override; +}; + +HistogramTask::HistogramTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize, + uint32_t threadCount, const Restriction* restriction) + : Task{sizeX, sizeY, vectorSize, true, restriction}, + mIn{in}, + mSums(256 * paddedSize(vectorSize) * threadCount) { + mThreadCount = threadCount; +} + +void HistogramTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) { + typedef void (HistogramTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t); + + KernelFunction kernel; + switch (mVectorSize) { + case 4: + kernel = &HistogramTask::kernelP1U4; + break; + case 3: + kernel = &HistogramTask::kernelP1U3; + break; + case 2: + kernel = &HistogramTask::kernelP1U2; + break; + case 1: + kernel = &HistogramTask::kernelP1U1; + break; + default: + ALOGE("Bad vector size %zd", mVectorSize); + return; + } + + int* sums = &mSums[256 * paddedSize(mVectorSize) * threadIndex]; + + for (size_t y = startY; y < endY; y++) { + const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize); + std::invoke(kernel, this, inPtr, sums, startX, endX); + } +} + +void HistogramTask::kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { + for (uint32_t x = xstart; x < xend; x++) { + sums[(in[0] << 2)]++; + sums[(in[1] << 2) + 1]++; + sums[(in[2] << 2) + 2]++; + sums[(in[3] << 2) + 3]++; + in += 4; + } +} + +void HistogramTask::kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { + for (uint32_t x = xstart; x < xend; x++) { + sums[(in[0] << 2)]++; + sums[(in[1] << 2) + 1]++; + sums[(in[2] << 2) + 2]++; + in += 4; + } +} + +void HistogramTask::kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { + for (uint32_t x = xstart; x < xend; x++) { + sums[(in[0] << 1)]++; + sums[(in[1] << 1) + 1]++; + in += 2; + } +} + +void HistogramTask::kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { + for (uint32_t x = xstart; x < xend; x++) { + sums[in[0]]++; + in++; + } +} + +void HistogramTask::collateSums(int* out) { + for (uint32_t ct = 0; ct < (256 * paddedSize(mVectorSize)); ct++) { + out[ct] = mSums[ct]; + for (uint32_t t = 1; t < mThreadCount; t++) { + out[ct] += mSums[ct + (256 * paddedSize(mVectorSize) * t)]; + } + } +} + +HistogramDotTask::HistogramDotTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize, + uint32_t threadCount, const float* coefficients, + const Restriction* restriction) + : Task{sizeX, sizeY, vectorSize, true, restriction}, mIn{in}, mSums(256 * threadCount, 0) { + mThreadCount = threadCount; + + if (coefficients == nullptr) { + mDot[0] = 0.299f; + mDot[1] = 0.587f; + mDot[2] = 0.114f; + mDot[3] = 0; + } else { + memcpy(mDot, coefficients, 16); + } + mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); + mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); + mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); + mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); +} + +void HistogramDotTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) { + typedef void (HistogramDotTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t); + + KernelFunction kernel; + switch (mVectorSize) { + case 4: + kernel = &HistogramDotTask::kernelP1L4; + break; + case 3: + kernel = &HistogramDotTask::kernelP1L3; + break; + case 2: + kernel = &HistogramDotTask::kernelP1L2; + break; + case 1: + kernel = &HistogramDotTask::kernelP1L1; + break; + default: + ALOGI("Bad vector size %zd", mVectorSize); + return; + } + + int* sums = &mSums[256 * threadIndex]; + + for (size_t y = startY; y < endY; y++) { + const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize); + std::invoke(kernel, this, inPtr, sums, startX, endX); + } +} + +void HistogramDotTask::kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { + for (uint32_t x = xstart; x < xend; x++) { + int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]) + (mDotI[3] * in[3]); + sums[(t + 0x7f) >> 8]++; + in += 4; + } +} + +void HistogramDotTask::kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { + for (uint32_t x = xstart; x < xend; x++) { + int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]); + sums[(t + 0x7f) >> 8]++; + in += 4; + } +} + +void HistogramDotTask::kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { + for (uint32_t x = xstart; x < xend; x++) { + int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]); + sums[(t + 0x7f) >> 8]++; + in += 2; + } +} + +void HistogramDotTask::kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) { + for (uint32_t x = xstart; x < xend; x++) { + int t = (mDotI[0] * in[0]); + sums[(t + 0x7f) >> 8]++; + in++; + } +} + +void HistogramDotTask::collateSums(int* out) { + for (uint32_t ct = 0; ct < 256; ct++) { + out[ct] = mSums[ct]; + for (uint32_t t = 1; t < mThreadCount; t++) { + out[ct] += mSums[ct + (256 * t)]; + } + } +} + +//////////////////////////////////////////////////////////////////////////// + +void RenderScriptToolkit::histogram(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY, + size_t vectorSize, const Restriction* restriction) { +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE + if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) { + return; + } + if (vectorSize < 1 || vectorSize > 4) { + ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize); + return; + } +#endif + + HistogramTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(), restriction); + processor->doTask(&task); + task.collateSums(out); +} + +void RenderScriptToolkit::histogramDot(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY, + size_t vectorSize, const float* coefficients, + const Restriction* restriction) { +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE + if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) { + return; + } + if (vectorSize < 1 || vectorSize > 4) { + ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize); + return; + } + if (coefficients != nullptr) { + float sum = 0.0f; + for (size_t i = 0; i < vectorSize; i++) { + if (coefficients[i] < 0.0f) { + ALOGE("histogramDot coefficients should not be negative. Coefficient %zu was %f.", + i, coefficients[i]); + return; + } + sum += coefficients[i]; + } + if (sum > 1.0f) { + ALOGE("histogramDot coefficients should add to 1 or less. Their sum is %f.", sum); + return; + } + } +#endif + + HistogramDotTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(), + coefficients, restriction); + processor->doTask(&task); + task.collateSums(out); +} + +} // namespace renderscript +} // namespace android diff --git a/toolkit/JniEntryPoints.cpp b/toolkit/JniEntryPoints.cpp new file mode 100644 index 00000000..3bf5911a --- /dev/null +++ b/toolkit/JniEntryPoints.cpp @@ -0,0 +1,480 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <android/bitmap.h> +#include <assert.h> +#include <jni.h> + +#include "RenderScriptToolkit.h" +#include "Utils.h" + +#define LOG_TAG "renderscript.toolkit.JniEntryPoints" + +using namespace android::renderscript; + +/** + * I compared using env->GetPrimitiveArrayCritical vs. env->GetByteArrayElements to get access + * to the underlying data. On Pixel 4, it's actually faster to not use critical. The code is left + * here if you want to experiment. Note that USE_CRITICAL could block the garbage collector. + */ +// #define USE_CRITICAL + +class ByteArrayGuard { + private: + JNIEnv* env; + jbyteArray array; + jbyte* data; + + public: + ByteArrayGuard(JNIEnv* env, jbyteArray array) : env{env}, array{array} { +#ifdef USE_CRITICAL + data = reinterpret_cast<jbyte*>(env->GetPrimitiveArrayCritical(array, nullptr)); +#else + data = env->GetByteArrayElements(array, nullptr); +#endif + } + ~ByteArrayGuard() { +#ifdef USE_CRITICAL + env->ReleasePrimitiveArrayCritical(array, data, 0); +#else + env->ReleaseByteArrayElements(array, data, 0); +#endif + } + uint8_t* get() { return reinterpret_cast<uint8_t*>(data); } +}; + +class IntArrayGuard { + private: + JNIEnv* env; + jintArray array; + jint* data; + + public: + IntArrayGuard(JNIEnv* env, jintArray array) : env{env}, array{array} { +#ifdef USE_CRITICAL + data = reinterpret_cast<jint*>(env->GetPrimitiveArrayCritical(array, nullptr)); +#else + data = env->GetIntArrayElements(array, nullptr); +#endif + } + ~IntArrayGuard() { +#ifdef USE_CRITICAL + env->ReleasePrimitiveArrayCritical(array, data, 0); +#else + env->ReleaseIntArrayElements(array, data, 0); +#endif + } + int* get() { return reinterpret_cast<int*>(data); } +}; + +class FloatArrayGuard { + private: + JNIEnv* env; + jfloatArray array; + jfloat* data; + + public: + FloatArrayGuard(JNIEnv* env, jfloatArray array) : env{env}, array{array} { +#ifdef USE_CRITICAL + data = reinterpret_cast<jfloat*>(env->GetPrimitiveArrayCritical(array, nullptr)); +#else + data = env->GetFloatArrayElements(array, nullptr); +#endif + } + ~FloatArrayGuard() { +#ifdef USE_CRITICAL + env->ReleasePrimitiveArrayCritical(array, data, 0); +#else + env->ReleaseFloatArrayElements(array, data, 0); +#endif + } + float* get() { return reinterpret_cast<float*>(data); } +}; + +class BitmapGuard { + private: + JNIEnv* env; + jobject bitmap; + AndroidBitmapInfo info; + int bytesPerPixel; + void* bytes; + bool valid; + + public: + BitmapGuard(JNIEnv* env, jobject jBitmap) : env{env}, bitmap{jBitmap}, bytes{nullptr} { + valid = false; + if (AndroidBitmap_getInfo(env, bitmap, &info) != ANDROID_BITMAP_RESULT_SUCCESS) { + ALOGE("AndroidBitmap_getInfo failed"); + return; + } + if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888 && + info.format != ANDROID_BITMAP_FORMAT_A_8) { + ALOGE("AndroidBitmap in the wrong format"); + return; + } + bytesPerPixel = info.stride / info.width; + if (bytesPerPixel != 1 && bytesPerPixel != 4) { + ALOGE("Expected a vector size of 1 or 4. Got %d. Extra padding per line not currently " + "supported", + bytesPerPixel); + return; + } + if (AndroidBitmap_lockPixels(env, bitmap, &bytes) != ANDROID_BITMAP_RESULT_SUCCESS) { + ALOGE("AndroidBitmap_lockPixels failed"); + return; + } + valid = true; + } + ~BitmapGuard() { + if (valid) { + AndroidBitmap_unlockPixels(env, bitmap); + } + } + uint8_t* get() const { + assert(valid); + return reinterpret_cast<uint8_t*>(bytes); + } + int width() const { return info.width; } + int height() const { return info.height; } + int vectorSize() const { return bytesPerPixel; } +}; + +/** + * Copies the content of Kotlin Range2d object into the equivalent C++ struct. + */ +class RestrictionParameter { + private: + bool isNull; + Restriction restriction; + + public: + RestrictionParameter(JNIEnv* env, jobject jRestriction) : isNull{jRestriction == nullptr} { + if (isNull) { + return; + } + /* TODO Measure how long FindClass and related functions take. Consider passing the + * four values instead. This would also require setting the default when Range2D is null. + */ + jclass restrictionClass = env->FindClass("android/renderscript/toolkit/Range2d"); + if (restrictionClass == nullptr) { + ALOGE("RenderScriptToolit. Internal error. Could not find the Kotlin Range2d class."); + isNull = true; + return; + } + jfieldID startXId = env->GetFieldID(restrictionClass, "startX", "I"); + jfieldID startYId = env->GetFieldID(restrictionClass, "startY", "I"); + jfieldID endXId = env->GetFieldID(restrictionClass, "endX", "I"); + jfieldID endYId = env->GetFieldID(restrictionClass, "endY", "I"); + restriction.startX = env->GetIntField(jRestriction, startXId); + restriction.startY = env->GetIntField(jRestriction, startYId); + restriction.endX = env->GetIntField(jRestriction, endXId); + restriction.endY = env->GetIntField(jRestriction, endYId); + } + Restriction* get() { return isNull ? nullptr : &restriction; } +}; + +extern "C" JNIEXPORT jlong JNICALL +Java_android_renderscript_toolkit_Toolkit_createNative(JNIEnv* /*env*/, jobject /*thiz*/) { + return reinterpret_cast<jlong>(new RenderScriptToolkit()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_destroyNative( + JNIEnv* /*env*/, jobject /*thiz*/, jlong native_handle) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + delete toolkit; +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeBlend( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jint jmode, jbyteArray source_array, + jbyteArray dest_array, jint size_x, jint size_y, jobject restriction) { + auto toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + auto mode = static_cast<RenderScriptToolkit::BlendingMode>(jmode); + RestrictionParameter restrict {env, restriction}; + ByteArrayGuard source{env, source_array}; + ByteArrayGuard dest{env, dest_array}; + + toolkit->blend(mode, source.get(), dest.get(), size_x, size_y, restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeBlendBitmap( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jint jmode, jobject source_bitmap, + jobject dest_bitmap, jobject restriction) { + auto toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + auto mode = static_cast<RenderScriptToolkit::BlendingMode>(jmode); + RestrictionParameter restrict {env, restriction}; + BitmapGuard source{env, source_bitmap}; + BitmapGuard dest{env, dest_bitmap}; + + toolkit->blend(mode, source.get(), dest.get(), source.width(), source.height(), restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeBlur( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, jint vectorSize, + jint size_x, jint size_y, jint radius, jbyteArray output_array, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + ByteArrayGuard input{env, input_array}; + ByteArrayGuard output{env, output_array}; + + toolkit->blur(input.get(), output.get(), size_x, size_y, vectorSize, radius, restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeBlurBitmap( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap, + jobject output_bitmap, jint radius, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + BitmapGuard input{env, input_bitmap}; + BitmapGuard output{env, output_bitmap}; + + toolkit->blur(input.get(), output.get(), input.width(), input.height(), input.vectorSize(), + radius, restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeColorMatrix( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, + jint input_vector_size, jint size_x, jint size_y, jbyteArray output_array, + jint output_vector_size, jfloatArray jmatrix, jfloatArray add_vector, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + ByteArrayGuard input{env, input_array}; + ByteArrayGuard output{env, output_array}; + FloatArrayGuard matrix{env, jmatrix}; + FloatArrayGuard add{env, add_vector}; + + toolkit->colorMatrix(input.get(), output.get(), input_vector_size, output_vector_size, size_x, + size_y, matrix.get(), add.get(), restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeColorMatrixBitmap( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap, + jobject output_bitmap, jfloatArray jmatrix, jfloatArray add_vector, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + BitmapGuard input{env, input_bitmap}; + BitmapGuard output{env, output_bitmap}; + FloatArrayGuard matrix{env, jmatrix}; + FloatArrayGuard add{env, add_vector}; + + toolkit->colorMatrix(input.get(), output.get(), input.vectorSize(), output.vectorSize(), + input.width(), input.height(), matrix.get(), add.get(), restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeConvolve( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, jint vectorSize, + jint size_x, jint size_y, jbyteArray output_array, jfloatArray coefficients, + jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + ByteArrayGuard input{env, input_array}; + ByteArrayGuard output{env, output_array}; + FloatArrayGuard coeffs{env, coefficients}; + + switch (env->GetArrayLength(coefficients)) { + case 9: + toolkit->convolve3x3(input.get(), output.get(), vectorSize, size_x, size_y, + coeffs.get(), restrict.get()); + break; + case 25: + toolkit->convolve5x5(input.get(), output.get(), vectorSize, size_x, size_y, + coeffs.get(), restrict.get()); + break; + } +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeConvolveBitmap( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap, + jobject output_bitmap, jfloatArray coefficients, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + BitmapGuard input{env, input_bitmap}; + BitmapGuard output{env, output_bitmap}; + FloatArrayGuard coeffs{env, coefficients}; + + switch (env->GetArrayLength(coefficients)) { + case 9: + toolkit->convolve3x3(input.get(), output.get(), input.vectorSize(), input.width(), + input.height(), coeffs.get(), restrict.get()); + break; + case 25: + toolkit->convolve5x5(input.get(), output.get(), input.vectorSize(), input.width(), + input.height(), coeffs.get(), restrict.get()); + break; + } +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeHistogram( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, + jint vector_size, jint size_x, jint size_y, jintArray output_array, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + ByteArrayGuard input{env, input_array}; + IntArrayGuard output{env, output_array}; + + toolkit->histogram(input.get(), output.get(), size_x, size_y, vector_size, restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeHistogramBitmap( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap, + jintArray output_array, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + BitmapGuard input{env, input_bitmap}; + IntArrayGuard output{env, output_array}; + + toolkit->histogram(input.get(), output.get(), input.width(), input.height(), input.vectorSize(), + restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeHistogramDot( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, + jint vector_size, jint size_x, jint size_y, jintArray output_array, + jfloatArray coefficients, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + ByteArrayGuard input{env, input_array}; + IntArrayGuard output{env, output_array}; + FloatArrayGuard coeffs{env, coefficients}; + + toolkit->histogramDot(input.get(), output.get(), size_x, size_y, vector_size, coeffs.get(), + restrict.get()); +} + +extern "C" JNIEXPORT +void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeHistogramDotBitmap( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap, + jintArray output_array, jfloatArray coefficients, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + BitmapGuard input{env, input_bitmap}; + IntArrayGuard output{env, output_array}; + FloatArrayGuard coeffs{env, coefficients}; + + toolkit->histogramDot(input.get(), output.get(), input.width(), input.height(), + input.vectorSize(), coeffs.get(), restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeLut( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, + jbyteArray output_array, jint size_x, jint size_y, jbyteArray red_table, + jbyteArray green_table, jbyteArray blue_table, jbyteArray alpha_table, + jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + + ByteArrayGuard input{env, input_array}; + ByteArrayGuard output{env, output_array}; + ByteArrayGuard red{env, red_table}; + ByteArrayGuard green{env, green_table}; + ByteArrayGuard blue{env, blue_table}; + ByteArrayGuard alpha{env, alpha_table}; + + toolkit->lut(input.get(), output.get(), size_x, size_y, red.get(), green.get(), blue.get(), + alpha.get(), restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeLutBitmap( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap, + jobject output_bitmap, jbyteArray red_table, jbyteArray green_table, jbyteArray blue_table, + jbyteArray alpha_table, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + + BitmapGuard input{env, input_bitmap}; + BitmapGuard output{env, output_bitmap}; + ByteArrayGuard red{env, red_table}; + ByteArrayGuard green{env, green_table}; + ByteArrayGuard blue{env, blue_table}; + ByteArrayGuard alpha{env, alpha_table}; + + toolkit->lut(input.get(), output.get(), input.width(), input.height(), red.get(), green.get(), + blue.get(), alpha.get(), restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeLut3d( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, + jbyteArray output_array, jint size_x, jint size_y, jbyteArray cube_values, jint cubeSizeX, + jint cubeSizeY, jint cubeSizeZ, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + ByteArrayGuard input{env, input_array}; + ByteArrayGuard output{env, output_array}; + ByteArrayGuard cube{env, cube_values}; + + toolkit->lut3d(input.get(), output.get(), size_x, size_y, cube.get(), cubeSizeX, cubeSizeY, + cubeSizeZ, restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeLut3dBitmap( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap, + jobject output_bitmap, jbyteArray cube_values, jint cubeSizeX, jint cubeSizeY, + jint cubeSizeZ, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + BitmapGuard input{env, input_bitmap}; + BitmapGuard output{env, output_bitmap}; + ByteArrayGuard cube{env, cube_values}; + + toolkit->lut3d(input.get(), output.get(), input.width(), input.height(), cube.get(), cubeSizeX, + cubeSizeY, cubeSizeZ, restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeResize( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, + jint vector_size, jint input_size_x, jint input_size_y, jbyteArray output_array, + jint output_size_x, jint output_size_y, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + ByteArrayGuard input{env, input_array}; + ByteArrayGuard output{env, output_array}; + + toolkit->resize(input.get(), output.get(), input_size_x, input_size_y, vector_size, + output_size_x, output_size_y, restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeResizeBitmap( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap, + jobject output_bitmap, jobject restriction) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + RestrictionParameter restrict {env, restriction}; + BitmapGuard input{env, input_bitmap}; + BitmapGuard output{env, output_bitmap}; + + toolkit->resize(input.get(), output.get(), input.width(), input.height(), input.vectorSize(), + output.width(), output.height(), restrict.get()); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeYuvToRgb( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, + jbyteArray output_array, jint size_x, jint size_y, jint format) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + ByteArrayGuard input{env, input_array}; + ByteArrayGuard output{env, output_array}; + + toolkit->yuvToRgb(input.get(), output.get(), size_x, size_y, + static_cast<RenderScriptToolkit::YuvFormat>(format)); +} + +extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeYuvToRgbBitmap( + JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, jint size_x, + jint size_y, jobject output_bitmap, jint format) { + RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle); + BitmapGuard output{env, output_bitmap}; + ByteArrayGuard input{env, input_array}; + + toolkit->yuvToRgb(input.get(), output.get(), size_x, size_y, + static_cast<RenderScriptToolkit::YuvFormat>(format)); +} diff --git a/toolkit/Lut.cpp b/toolkit/Lut.cpp new file mode 100644 index 00000000..4ac5cdc1 --- /dev/null +++ b/toolkit/Lut.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cstdint> + +#include "RenderScriptToolkit.h" +#include "TaskProcessor.h" +#include "Utils.h" + +#define LOG_TAG "renderscript.toolkit.Lut" + +namespace android { +namespace renderscript { + +class LutTask : public Task { + const uchar4* mIn; + uchar4* mOut; + const uchar* mRedTable; + const uchar* mGreenTable; + const uchar* mBlueTable; + const uchar* mAlphaTable; + + // Process a 2D tile of the overall work. threadIndex identifies which thread does the work. + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) override; + + public: + LutTask(const uint8_t* input, uint8_t* output, size_t sizeX, size_t sizeY, const uint8_t* red, + const uint8_t* green, const uint8_t* blue, const uint8_t* alpha, + const Restriction* restriction) + : Task{sizeX, sizeY, 4, true, restriction}, + mIn{reinterpret_cast<const uchar4*>(input)}, + mOut{reinterpret_cast<uchar4*>(output)}, + mRedTable{red}, + mGreenTable{green}, + mBlueTable{blue}, + mAlphaTable{alpha} {} +}; + +void LutTask::processData(int /* threadIndex */, size_t startX, size_t startY, size_t endX, + size_t endY) { + for (size_t y = startY; y < endY; y++) { + size_t offset = mSizeX * y + startX; + const uchar4* in = mIn + offset; + uchar4* out = mOut + offset; + for (size_t x = startX; x < endX; x++) { + auto v = *in; + *out = uchar4{mRedTable[v.x], mGreenTable[v.y], mBlueTable[v.z], mAlphaTable[v.w]}; + in++; + out++; + } + } +} + +void RenderScriptToolkit::lut(const uint8_t* input, uint8_t* output, size_t sizeX, size_t sizeY, + const uint8_t* red, const uint8_t* green, const uint8_t* blue, + const uint8_t* alpha, const Restriction* restriction) { +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE + if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) { + return; + } +#endif + + LutTask task(input, output, sizeX, sizeY, red, green, blue, alpha, restriction); + processor->doTask(&task); +} + +} // namespace renderscript +} // namespace android diff --git a/toolkit/Lut3d.cpp b/toolkit/Lut3d.cpp new file mode 100644 index 00000000..f8a7d618 --- /dev/null +++ b/toolkit/Lut3d.cpp @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cstdint> + +#include "RenderScriptToolkit.h" +#include "TaskProcessor.h" +#include "Utils.h" + +namespace android { +namespace renderscript { + +#define LOG_TAG "renderscript.toolkit.Lut3d" + +/** + * Converts a RGBA buffer using a 3D cube. + */ +class Lut3dTask : public Task { + // The input array we're transforming. + const uchar4* mIn; + // Where we'll store the transformed result. + uchar4* mOut; + // The size of each of the three cube dimensions. We don't make use of the last value. + int4 mCubeDimension; + // The translation cube, in row major format. + const uchar* mCubeTable; + + /** + * Converts a subset of a line of the 2D buffer. + * + * @param in The start of the data to transform. + * @param out Where to store the result. + * @param length The number of 4-byte vectors to transform. + */ + void kernel(const uchar4* in, uchar4* out, uint32_t length); + + // Process a 2D tile of the overall work. threadIndex identifies which thread does the work. + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) override; + + public: + Lut3dTask(const uint8_t* input, uint8_t* output, size_t sizeX, size_t sizeY, + const uint8_t* cube, int cubeSizeX, int cubeSizeY, int cubeSizeZ, + const Restriction* restriction) + : Task{sizeX, sizeY, 4, true, restriction}, + mIn{reinterpret_cast<const uchar4*>(input)}, + mOut{reinterpret_cast<uchar4*>(output)}, + mCubeDimension{cubeSizeX, cubeSizeY, cubeSizeZ, 0}, + mCubeTable{cube} {} +}; + +extern "C" void rsdIntrinsic3DLUT_K(void* dst, void const* in, size_t count, void const* lut, + int32_t pitchy, int32_t pitchz, int dimx, int dimy, int dimz); + +void Lut3dTask::kernel(const uchar4* in, uchar4* out, uint32_t length) { + uint32_t x1 = 0; + uint32_t x2 = length; + + const uchar* bp = mCubeTable; + + int4 dims = mCubeDimension - 1; + + const float4 m = (float4)(1.f / 255.f) * convert<float4>(dims); + const int4 coordMul = convert<int4>(m * (float4)0x8000); + const size_t stride_y = mCubeDimension.x * 4; + const size_t stride_z = stride_y * mCubeDimension.y; + + // ALOGE("strides %zu %zu", stride_y, stride_z); + +#if defined(ARCH_ARM_USE_INTRINSICS) + if (mUsesSimd) { + int32_t len = x2 - x1; + if (len > 0) { + rsdIntrinsic3DLUT_K(out, in, len, bp, stride_y, stride_z, dims.x, dims.y, dims.z); + x1 += len; + out += len; + in += len; + } + } +#endif + + while (x1 < x2) { + int4 baseCoord = convert<int4>(*in) * coordMul; + int4 coord1 = baseCoord >> (int4)15; + // int4 coord2 = min(coord1 + 1, gDims - 1); + + int4 weight2 = baseCoord & 0x7fff; + int4 weight1 = (int4)0x8000 - weight2; + + // ALOGE("coord1 %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w); + const uchar* bp2 = bp + (coord1.x * 4) + (coord1.y * stride_y) + (coord1.z * stride_z); + const uchar4* pt_00 = (const uchar4*)&bp2[0]; + const uchar4* pt_10 = (const uchar4*)&bp2[stride_y]; + const uchar4* pt_01 = (const uchar4*)&bp2[stride_z]; + const uchar4* pt_11 = (const uchar4*)&bp2[stride_y + stride_z]; + + uint4 v000 = convert<uint4>(pt_00[0]); + uint4 v100 = convert<uint4>(pt_00[1]); + uint4 v010 = convert<uint4>(pt_10[0]); + uint4 v110 = convert<uint4>(pt_10[1]); + uint4 v001 = convert<uint4>(pt_01[0]); + uint4 v101 = convert<uint4>(pt_01[1]); + uint4 v011 = convert<uint4>(pt_11[0]); + uint4 v111 = convert<uint4>(pt_11[1]); + + uint4 yz00 = ((v000 * weight1.x) + (v100 * weight2.x)) >> (int4)7; + uint4 yz10 = ((v010 * weight1.x) + (v110 * weight2.x)) >> (int4)7; + uint4 yz01 = ((v001 * weight1.x) + (v101 * weight2.x)) >> (int4)7; + uint4 yz11 = ((v011 * weight1.x) + (v111 * weight2.x)) >> (int4)7; + + uint4 z0 = ((yz00 * weight1.y) + (yz10 * weight2.y)) >> (int4)15; + uint4 z1 = ((yz01 * weight1.y) + (yz11 * weight2.y)) >> (int4)15; + + uint4 v = ((z0 * weight1.z) + (z1 * weight2.z)) >> (int4)15; + uint4 v2 = (v + 0x7f) >> (int4)8; + + uchar4 ret = convert<uchar4>(v2); + ret.w = in->w; + +#if 0 + if (!x1) { + ALOGE("in %08x %08x %08x %08x", in->r, in->g, in->b, in->a); + ALOGE("baseCoord %08x %08x %08x %08x", baseCoord.x, baseCoord.y, baseCoord.z, + baseCoord.w); + ALOGE("coord1 %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w); + ALOGE("weight1 %08x %08x %08x %08x", weight1.x, weight1.y, weight1.z, weight1.w); + ALOGE("weight2 %08x %08x %08x %08x", weight2.x, weight2.y, weight2.z, weight2.w); + + ALOGE("v000 %08x %08x %08x %08x", v000.x, v000.y, v000.z, v000.w); + ALOGE("v100 %08x %08x %08x %08x", v100.x, v100.y, v100.z, v100.w); + ALOGE("yz00 %08x %08x %08x %08x", yz00.x, yz00.y, yz00.z, yz00.w); + ALOGE("z0 %08x %08x %08x %08x", z0.x, z0.y, z0.z, z0.w); + + ALOGE("v %08x %08x %08x %08x", v.x, v.y, v.z, v.w); + ALOGE("v2 %08x %08x %08x %08x", v2.x, v2.y, v2.z, v2.w); + } +#endif + *out = ret; + + in++; + out++; + x1++; + } +} + +void Lut3dTask::processData(int /* threadIndex */, size_t startX, size_t startY, size_t endX, + size_t endY) { + for (size_t y = startY; y < endY; y++) { + size_t offset = mSizeX * y + startX; + kernel(mIn + offset, mOut + offset, endX - startX); + } +} + +void RenderScriptToolkit::lut3d(const uint8_t* input, uint8_t* output, size_t sizeX, size_t sizeY, + const uint8_t* cube, size_t cubeSizeX, size_t cubeSizeY, + size_t cubeSizeZ, const Restriction* restriction) { +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE + if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) { + return; + } +#endif + + Lut3dTask task(input, output, sizeX, sizeY, cube, cubeSizeX, cubeSizeY, cubeSizeZ, restriction); + processor->doTask(&task); +} + +} // namespace renderscript +} // namespace android diff --git a/toolkit/Lut3d_advsimd.S b/toolkit/Lut3d_advsimd.S new file mode 100644 index 00000000..edcb0381 --- /dev/null +++ b/toolkit/Lut3d_advsimd.S @@ -0,0 +1,250 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: +#define END(f) .size f, .-f; + + +.macro lanepair dst, src0, src1, xr0, xr1, yr0, yr1, zr0, zr1 + + smov x6, \src0 + smov x7, \src1 + + add x6, x6, x3 + add x7, x7, x3 + + ld1 {v16.2s}, [x6], x4 + ld1 {v17.2s}, [x7], x4 + + ld1 {v18.2s}, [x6], x5 + ld1 {v19.2s}, [x7], x5 + + dup v8.8b, \yr0 + dup v9.8b, \yr1 + /* Y interpolate, front, lanes 0 and 1 -> v12 and v13 */ + zip1 v12.16b, v5.16b, v16.16b + zip1 v13.16b, v5.16b, v17.16b + umlsl v12.8h, v16.8b, v8.8b + umlsl v13.8h, v17.8b, v9.8b + umlal v12.8h, v18.8b, v8.8b + umlal v13.8h, v19.8b, v9.8b + + ld1 {v18.2s}, [x6] + ld1 {v19.2s}, [x7] + + sub x6, x6, x4 + sub x7, x7, x4 + + ld1 {v16.2s}, [x6] + ld1 {v17.2s}, [x7] + + /* Y interpolate, rear, lanes 0 and 1 -> v14 and v15 */ + zip1 v14.16b, v5.16b, v16.16b + zip1 v15.16b, v5.16b, v17.16b + umlsl v14.8h, v16.8b, v8.8b + umlsl v15.8h, v17.8b, v9.8b + umlal v14.8h, v18.8b, v8.8b + umlal v15.8h, v19.8b, v9.8b + + /* Z interpolate, lane 0 v12/v14 -> v10 */ + ushll v8.4s, v12.4h, #8 + ushll2 v9.4s, v12.8h, #8 + umlsl v8.4s, v12.4h, \zr0 + umlsl2 v9.4s, v12.8h, \zr0 + umlal v8.4s, v14.4h, \zr0 + umlal2 v9.4s, v14.8h, \zr0 + rshrn v10.4h, v8.4s, #8 + rshrn2 v10.8h, v9.4s, #8 + + /* Z interpolate, lane 1 v13/v15 -> v11 */ + ushll v8.4s, v13.4h, #8 + ushll2 v9.4s, v13.8h, #8 + umlsl v8.4s, v13.4h, \zr1 + umlsl2 v9.4s, v13.8h, \zr1 + umlal v8.4s, v15.4h, \zr1 + umlal2 v9.4s, v15.8h, \zr1 + rshrn v11.4h, v8.4s, #8 + rshrn2 v11.8h, v9.4s, #8 + + /* X interpolate, lanes 0 and 1 v10,v11 -> v14 */ + ushll v8.4s, v10.4h, #8 + ushll v9.4s, v11.4h, #8 + umlsl v8.4s, v10.4h, \xr0 + umlsl v9.4s, v11.4h, \xr1 + umlal2 v8.4s, v10.8h, \xr0 + umlal2 v9.4s, v11.8h, \xr1 + shrn v14.4h, v8.4s, #8 + shrn2 v14.8h, v9.4s, #8 + + /* pack lanes 0-1 -> v6 */ +.ifc \dst, v20.16b + uqrshrn2 \dst, v14.8h, #8 +.else ; .ifc \dst, v21.16b + uqrshrn2 \dst, v14.8h, #8 +.else + uqrshrn \dst, v14.8h, #8 +.endif ; .endif +.endm + +/* void rsdIntrinsic3DLUT_K( + * void *dst, // x0 + * void const *in, // x1 + * size_t count, // x2 + * void const *lut, // x3 + * int32_t pitchy, // w4 + * int32_t pitchz, // w5 + * int dimx, // w6 + * int dimy, // w7 + * int dimz); // [sp] + */ +ENTRY(rsdIntrinsic3DLUT_K) + ldr w8, [sp] + stp d8, d9, [sp, #-64]! + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + movi v4.8b, #1 + ins v4.h[0], w6 + ins v4.h[1], w7 + ins v4.h[2], w8 + ins v4.s[2], w4 + ins v4.s[3], w5 + movi v5.16b, #0 + + subs x2, x2, #8 + bge 2f + cmn x2, #8 // same as cmp x2, #-8 + ble 9f + b 4f + + .align 6 +1: st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [x0], #32 +/* x0 = dst + * x1 = src + * x2 = count + * x3 = lut + * x4 = pitchy + * x5 = pitchz + * x6 = offset0 + * x7 = offset1 + */ +2: ld4 {v0.8b-v3.8b}, [x1], #32 +/* v0,v1,v2,v3 source data + * v4 dimensions and pitches + */ +3: uxtl v0.8h, v0.8b + uxtl v1.8h, v1.8b + uxtl v2.8h, v2.8b + mul v0.8h, v0.8h, v4.h[0] + mul v1.8h, v1.8h, v4.h[1] + mul v2.8h, v2.8h, v4.h[2] + +/* ursra below would be more accurate, but this can result in a dim.0 case + * where we try to read from the limit of the array and the limit +1 to + * interpolate, even though the fractional component is zero. Strictly this is + * correct, except for the llegal access problem. + */ + usra v0.8h, v0.8h, #8 + usra v1.8h, v1.8h, #8 + usra v2.8h, v2.8h, #8 + + ushr v12.8h, v0.8h, #8 + ushr v13.8h, v1.8h, #8 + ushr v14.8h, v2.8h, #8 + bic v0.8h, #0xff, LSL #8 + xtn v1.8b, v1.8h + bic v2.8h, #0xff, LSL #8 + +/* v0.8h,v1.8b,v2.hb fractional offset + * v12.8h,v13.8h,v14.8h integer offset + */ + + ushll v6.4s, v12.4h, #2 + ushll2 v7.4s, v12.8h, #2 + uxtl v8.4s, v13.4h + uxtl2 v9.4s, v13.8h + uxtl v10.4s, v14.4h + uxtl2 v11.4s, v14.8h + mla v6.4s, v8.4s, v4.s[2] + mla v7.4s, v9.4s, v4.s[2] + mla v6.4s, v10.4s, v4.s[3] + mla v7.4s, v11.4s, v4.s[3] + +/* v6,v7 list of table offsets */ + + /* lanes 0 and 1 */ + lanepair dst=v20.8b, src0=v6.s[0], src1=v6.s[1], xr0=v0.h[0], xr1=v0.h[1], yr0=v1.b[0], yr1=v1.b[1], zr0=v2.h[0], zr1=v2.h[1] + + /* lanes 2 and 3 */ + lanepair dst=v20.16b, src0=v6.s[2], src1=v6.s[3], xr0=v0.h[2], xr1=v0.h[3], yr0=v1.b[2], yr1=v1.b[3], zr0=v2.h[2], zr1=v2.h[3] + + /* lanes 4 and 5 */ + lanepair dst=v21.8b, src0=v7.s[0], src1=v7.s[1], xr0=v0.h[4], xr1=v0.h[5], yr0=v1.b[4], yr1=v1.b[5], zr0=v2.h[4], zr1=v2.h[5] + + /* lanes 6 and 7 */ + lanepair dst=v21.16b, src0=v7.s[2], src1=v7.s[3], xr0=v0.h[6], xr1=v0.h[7], yr0=v1.b[6], yr1=v1.b[7], zr0=v2.h[6], zr1=v2.h[7] + + uzp1 v6.16b, v20.16b, v21.16b + uzp2 v7.16b, v20.16b, v21.16b + uzp1 v20.16b, v6.16b, v7.16b + uzp2 v22.16b, v6.16b, v7.16b + mov v21.d[0], v20.d[1] + + subs x2, x2, #8 + mov v23.8b, v3.8b + + bge 1b + + cmn x2, #8 // same as cmp x2, #-8 + blt 1f + + st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [x0], #32 + beq 9f + + /* fill the vector with a safe value */ +4: ld4r {v0.8b-v3.8b}, [x1] + tbz x2, #2, 2f + ld4 {v0.b-v3.b}[0], [x1], #4 + ld4 {v0.b-v3.b}[1], [x1], #4 + ld4 {v0.b-v3.b}[2], [x1], #4 + ld4 {v0.b-v3.b}[3], [x1], #4 +2: tbz x2, #1, 2f + ld4 {v0.b-v3.b}[4], [x1], #4 + ld4 {v0.b-v3.b}[5], [x1], #4 +2: tbz x2, #0, 2f + ld4 {v0.b-v3.b}[6], [x1], #4 +2: b 3b + +1: tst x2, #4 + beq 2f + st4 {v20.b-v23.b}[0], [x0], #4 + st4 {v20.b-v23.b}[1], [x0], #4 + st4 {v20.b-v23.b}[2], [x0], #4 + st4 {v20.b-v23.b}[3], [x0], #4 +2: tst x2, #2 + beq 2f + st4 {v20.b-v23.b}[4], [x0], #4 + st4 {v20.b-v23.b}[5], [x0], #4 +2: tst x2, #1 + beq 9f + st4 {v20.b-v23.b}[6], [x0], #4 + +9: ldp d14, d15, [sp, #48] + ldp d12, d13, [sp, #32] + ldp d10, d11, [sp, #16] + ldp d8, d9, [sp], #64 + ret +END(rsdIntrinsic3DLUT_K) diff --git a/toolkit/Lut3d_neon.S b/toolkit/Lut3d_neon.S new file mode 100644 index 00000000..9590f9c8 --- /dev/null +++ b/toolkit/Lut3d_neon.S @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart +#define END(f) .fnend; .size f, .-f; + +.eabi_attribute 25,1 @Tag_ABI_align8_preserved +.arm + +.macro lanepair dst, src, xr0, xr1, yr0, yr1, zr0, zr1 + + vmov r6, r7, \src + + add r6, r6, r3 + add r7, r7, r3 + + vld1.u8 d16, [r6], r4 + vld1.u8 d17, [r7], r4 + + vld1.u8 d18, [r6], r5 + vld1.u8 d19, [r7], r5 + + vdup.u8 d6, \yr0 + vdup.u8 d7, \yr1 + /* Y interpolate, front, lanes 0 and 1 -> q12 and q13 */ + vshll.u8 q12, d16, #8 + vshll.u8 q13, d17, #8 + vmlsl.u8 q12, d16, d6 + vmlsl.u8 q13, d17, d7 + vmlal.u8 q12, d18, d6 + vmlal.u8 q13, d19, d7 + + vld1.u8 d18, [r6] + vld1.u8 d19, [r7] + + sub r6, r6, r4 + sub r7, r7, r4 + + vld1.u8 d16, [r6] + vld1.u8 d17, [r7] + + /* Y interpolate, rear, lanes 0 and 1 -> q14 and q15 */ + vshll.u8 q14, d16, #8 + vshll.u8 q15, d17, #8 + vmlsl.u8 q14, d16, d6 + vmlsl.u8 q15, d17, d7 + vmlal.u8 q14, d18, d6 + vmlal.u8 q15, d19, d7 + + /* Z interpolate, lane 0 q12/q14 -> q10 */ + vshll.u16 q8, d24, #8 + vshll.u16 q9, d25, #8 + vmlsl.u16 q8, d24, \zr0 + vmlsl.u16 q9, d25, \zr0 + vmlal.u16 q8, d28, \zr0 + vmlal.u16 q9, d29, \zr0 + vrshrn.u32 d20, q8, #8 + vrshrn.u32 d21, q9, #8 + + /* Z interpolate, lane 1 q13/q15 -> q11 */ + vshll.u16 q8, d26, #8 + vshll.u16 q9, d27, #8 + vmlsl.u16 q8, d26, \zr1 + vmlsl.u16 q9, d27, \zr1 + vmlal.u16 q8, d30, \zr1 + vmlal.u16 q9, d31, \zr1 + vrshrn.u32 d22, q8, #8 + vrshrn.u32 d23, q9, #8 + + /* X interpolate, lanes 0 and 1 q10,q11 -> q14 */ + vshll.u16 q8, d20, #8 + vshll.u16 q9, d22, #8 + vmlsl.u16 q8, d20, \xr0 + vmlsl.u16 q9, d22, \xr1 + vmlal.u16 q8, d21, \xr0 + vmlal.u16 q9, d23, \xr1 + vshrn.u32 d28, q8, #8 + vshrn.u32 d29, q9, #8 + + /* pack lanes 0-1 -> d12 */ + vqrshrn.u16 \dst, q14, #8 +.endm + +/* void rsdIntrinsic3DLUT_K( + * void *dst, // r0 + * void const *in, // r1 + * size_t count, // r2 + * void const *lut, // r3 + * int32_t pitchy, // [sp] + * int32_t pitchz, // [sp+#4] + * int dimx, // [sp+#8] + * int dimy, // [sp+#12] + * int dimz); // [sp+#16] + */ +ENTRY(rsdIntrinsic3DLUT_K) + push {r4,r5,r6,r7} + ldr r4, [sp, #16] + ldr r5, [sp, #20] + ldr r6, [sp, #24] + ldr r7, [sp, #28] + ldr r12, [sp, #32] + vpush {d8-d15} + + vmov.u8 d8, #1 + vmov.u16 d8[0], r6 + vmov.u16 d8[1], r7 + vmov.u16 d8[2], r12 + vmov d9, r4, r5 + + subs r2, #8 + bge 2f + cmp r2, #-8 + ble 9f + b 4f + + .align 6 +1: vst4.u8 {d12,d13,d14,d15}, [r0]! +/* r0 = dst + * r1 = src + * r2 = count + * r3 = lut + * r4 = pitchy + * r5 = pitchz + * r6 = offset0 + * r7 = offset1 + */ +2: vld4.u8 {d0,d2,d4,d6}, [r1]! +3: vmov d10, d6 +/* q0,q1,q2,q5 source data + * q4 dimensions and pitches + * q3, scratch register for scalar access + */ + vmov q3, q4 + vmovl.u8 q0, d0 + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 + vmul.u16 q0, q0, d6[0] + vmul.u16 q1, q1, d6[1] + vmul.u16 q2, q2, d6[2] + +/* vrsra.u16 below would be more accurate, but this can result in a dim.0 case + * where we try to read from the limit of the array and the limit +1 to + * interpolate, even though the fractional component is zero. Strictly this is + * correct, except for the llegal access problem. + */ + vsra.u16 q0, q0, #8 + vsra.u16 q1, q1, #8 + vsra.u16 q2, q2, #8 + + vshr.u16 q12, q0, #8 + vshr.u16 q13, q1, #8 + vshr.u16 q14, q2, #8 + + vbic.u16 q0, #0xff00 + vmovn.u16 d2, q1 + vbic.u16 q2, #0xff00 + +/* q0,d2,q2 fractional offset + * q12,q13,q14 integer offset + */ + + vshll.u16 q6, d24, #2 + vshll.u16 q7, d25, #2 + vmovl.u16 q8, d26 + vmovl.u16 q9, d27 + vmovl.u16 q10, d28 + vmovl.u16 q11, d29 + vmla.s32 q6, q8, d9[0] + vmla.s32 q7, q9, d9[0] + vmla.s32 q6, q10, d9[1] + vmla.s32 q7, q11, d9[1] + +/* q6,q7 list of table offsets */ + + /* lanes 0 and 1 */ + lanepair dst=d12, src=d12, xr0=d0[0], xr1=d0[1], yr0=d2[0], yr1=d2[1], zr0=d4[0], zr1=d4[1] + + /* lanes 2 and 3 */ + lanepair dst=d13, src=d13, xr0=d0[2], xr1=d0[3], yr0=d2[2], yr1=d2[3], zr0=d4[2], zr1=d4[3] + + /* lanes 4 and 5 */ + lanepair dst=d14, src=d14, xr0=d1[0], xr1=d1[1], yr0=d2[4], yr1=d2[5], zr0=d5[0], zr1=d5[1] + + /* lanes 6 and 7 */ + lanepair dst=d15, src=d15, xr0=d1[2], xr1=d1[3], yr0=d2[6], yr1=d2[7], zr0=d5[2], zr1=d5[3] + + vuzp.u8 d12, d13 + vuzp.u8 d14, d15 + vuzp.u8 d12, d14 + vuzp.u8 d13, d15 + + subs r2, r2, #8 + vmov.u8 d15, d10 + + bge 1b + + cmp r2, #-8 + blt 1f + + vst4.u8 {d12,d13,d14,d15}, [r0]! + + beq 9f + + /* fill the vector with a safe value */ +4: vld1.u32 {d0[]}, [r1] + vmov d2, d0 + vmov d4, d0 + vmov d6, d0 + tst r2, #4 + beq 2f + vld1.u32 {d0}, [r1]! + vld1.u32 {d2}, [r1]! +2: tst r2, #2 + beq 2f + vld1.u32 {d4}, [r1]! +2: tst r2, #1 + beq 2f + vld1.u32 {d6[0]}, [r1]! +2: vuzp.8 d0, d2 + vuzp.8 d4, d6 + vuzp.8 d0, d4 + vuzp.8 d2, d6 + b 3b + +1: vzip.8 d12, d14 + vzip.8 d13, d15 + vzip.8 d12, d13 + vzip.8 d14, d15 + tst r2, #4 + beq 2f + vst1.u32 {d12,d13}, [r0]! +2: tst r2, #2 + beq 2f + vst1.u32 {d14}, [r0]! +2: tst r2, #1 + beq 9f + vst1.u32 {d15[0]}, [r0]! + +9: mov r0, #0 + vpop {d8-d15} + pop {r4,r5,r6,r7} + bx lr +END(rsdIntrinsic3DLUT_K) diff --git a/toolkit/README.txt b/toolkit/README.txt new file mode 100644 index 00000000..4e08dc52 --- /dev/null +++ b/toolkit/README.txt @@ -0,0 +1,9 @@ +This directory will contain the standalone library meant to replace the RenderScript Intrinsics. + +The work in this directory is not complete. + +To make the review process manageable, a series of smaller CLs will be reviewed and submitted. + +While it is initially built with Soong, the end goal is to move this to github once the work +has been completed. This is a staging area for the reviews. + diff --git a/toolkit/RenderScriptToolkit.cpp b/toolkit/RenderScriptToolkit.cpp new file mode 100644 index 00000000..f1103173 --- /dev/null +++ b/toolkit/RenderScriptToolkit.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "RenderScriptToolkit.h" + +#include "TaskProcessor.h" + +#define LOG_TAG "renderscript.toolkit.RenderScriptToolkit" + +namespace android { +namespace renderscript { + +// You will find the implementation of the various transformations in the correspondingly +// named source file. E.g. RenderScriptToolkit::blur() is found in Blur.cpp. + +RenderScriptToolkit::RenderScriptToolkit(int numberOfThreads) + : processor{new TaskProcessor(numberOfThreads)} {} + +RenderScriptToolkit::~RenderScriptToolkit() { + // By defining the destructor here, we don't need to include TaskProcessor.h + // in RenderScriptToolkit.h. +} + +} // namespace renderscript +} // namespace android diff --git a/toolkit/RenderScriptToolkit.h b/toolkit/RenderScriptToolkit.h new file mode 100644 index 00000000..fb33195d --- /dev/null +++ b/toolkit/RenderScriptToolkit.h @@ -0,0 +1,540 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_RENDERSCRIPT_TOOLKIT_TOOLKIT_H +#define ANDROID_RENDERSCRIPT_TOOLKIT_TOOLKIT_H + +#include <cstdint> +#include <memory> + +namespace android { +namespace renderscript { + +class TaskProcessor; + +/** + * Define a range of data to process. + * + * This class is used to restrict a Toolkit operation to a rectangular subset of the input + * tensor. + * + * @property startX The index of the first value to be included on the X axis. + * @property endX The index after the last value to be included on the X axis. + * @property startY The index of the first value to be included on the Y axis. + * @property endY The index after the last value to be included on the Y axis. + */ +struct Restriction { + size_t startX; + size_t endX; + size_t startY; + size_t endY; +}; + +/** + * A collection of high-performance graphic utility functions like blur and blend. + * + * This toolkit provides ten image manipulation functions: blend, blur, color matrix, convolve, + * histogram, histogramDot, lut, lut3d, resize, and YUV to RGB. These functions execute + * multithreaded on the CPU. + * + * These functions work over raw byte arrays. You'll need to specify the width and height of + * the data to be processed, as well as the number of bytes per pixel. For most use cases, + * this will be 4. + * + * You should instantiate the Toolkit once and reuse it throughout your application. + * On instantiation, the Toolkit creates a thread pool that's used for processing all the functions. + * You can limit the number of pool threads used by the Toolkit via the constructor. The pool + * threads are destroyed once the Toolkit is destroyed, after any pending work is done. + * + * This library is thread safe. You can call methods from different pool threads. The functions will + * execute sequentially. + * + * A Java/Kotlin Toolkit is available. It calls this library through JNI. + * + * This toolkit can be used as a replacement for most RenderScript Intrinsic functions. Compared + * to RenderScript, it's simpler to use and more than twice as fast on the CPU. However RenderScript + * Intrinsics allow more flexibility for the type of allocation supported. In particular, this + * toolkit does not support allocations of floats. + */ +class RenderScriptToolkit { + /** Each Toolkit method call is converted to a Task. The processor owns the thread pool. It + * tiles the tasks and schedule them over the pool threads. + */ + std::unique_ptr<TaskProcessor> processor; + + public: + /** + * Creates the pool threads that are used for processing the method calls. + */ + RenderScriptToolkit(int numberOfThreads = 0); + /** + * Destroys the thread pool. This stops any in-progress work; the Toolkit methods called from + * other pool threads will return without having completed the work. Because of the undefined + * state of the output buffers, an application should avoid destroying the Toolkit if other pool + * threads are executing Toolkit methods. + */ + ~RenderScriptToolkit(); + + /** + * Determines how a source buffer is blended into a destination buffer. + * + * See {@link RenderScriptToolkit::blend}. + * + * blend only works on 4 byte RGBA data. In the descriptions below, ".a" represents + * the alpha channel. + */ + enum class BlendingMode { + /** + * dest = 0 + * + * The destination is cleared, i.e. each pixel is set to (0, 0, 0, 0) + */ + CLEAR = 0, + /** + * dest = src + * + * Sets each pixel of the destination to the corresponding one in the source. + */ + SRC = 1, + /** + * dest = dest + * + * Leaves the destination untouched. This is a no-op. + */ + DST = 2, + /** + * dest = src + dest * (1.0 - src.a) + */ + SRC_OVER = 3, + /** + * dest = dest + src * (1.0 - dest.a) + */ + DST_OVER = 4, + /** + * dest = src * dest.a + */ + SRC_IN = 5, + /** + * dest = dest * src.a + */ + DST_IN = 6, + /** + * dest = src * (1.0 - dest.a) + */ + SRC_OUT = 7, + /** + * dest = dest * (1.0 - src.a) + */ + DST_OUT = 8, + /** + * dest.rgb = src.rgb * dest.a + (1.0 - src.a) * dest.rgb, dest.a = dest.a + */ + SRC_ATOP = 9, + /** + * dest = dest.rgb * src.a + (1.0 - dest.a) * src.rgb, dest.a = src.a + */ + DST_ATOP = 10, + /** + * dest = {src.r ^ dest.r, src.g ^ dest.g, src.b ^ dest.b, src.a ^ dest.a} + * + * Note: this is NOT the Porter/Duff XOR mode; this is a bitwise xor. + */ + XOR = 11, + /** + * dest = src * dest + */ + MULTIPLY = 12, + /** + * dest = min(src + dest, 1.0) + */ + ADD = 13, + /** + * dest = max(dest - src, 0.0) + */ + SUBTRACT = 14 + }; + + /** + * Blend a source buffer with the destination buffer. + * + * Blends a source buffer and a destination buffer, placing the result in the destination + * buffer. The blending is done pairwise between two corresponding RGBA values found in + * each buffer. The mode parameter specifies one of fifteen blending operations. + * See {@link BlendingMode}. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. + * + * The source and destination buffers must have the same dimensions. Both buffers should be + * large enough for sizeX * sizeY * 4 bytes. The buffers have a row-major layout. + * + * @param mode The specific blending operation to do. + * @param source The RGBA input buffer. + * @param dest The destination buffer. Used for input and output. + * @param sizeX The width of both buffers, as a number of RGBA values. + * @param sizeY The height of both buffers, as a number of RGBA values. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + */ + void blend(BlendingMode mode, const uint8_t* _Nonnull source, uint8_t* _Nonnull dst, + size_t sizeX, size_t sizeY, const Restriction* _Nullable restriction = nullptr); + + /** + * Blur an image. + * + * Performs a Gaussian blur of the input image and stores the result in the out buffer. + * + * The radius determines which pixels are used to compute each blurred pixels. This Toolkit + * accepts values between 1 and 25. Larger values create a more blurred effect but also + * take longer to compute. When the radius extends past the edge, the edge pixel will + * be used as replacement for the pixel that's out off boundary. + * + * Each input pixel can either be represented by four bytes (RGBA format) or one byte + * for the less common blurring of alpha channel only image. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. + * + * The input and output buffers must have the same dimensions. Both buffers should be + * large enough for sizeX * sizeY * vectorSize bytes. The buffers have a row-major layout. + * + * @param in The buffer of the image to be blurred. + * @param out The buffer that receives the blurred image. + * @param sizeX The width of both buffers, as a number of 1 or 4 byte cells. + * @param sizeY The height of both buffers, as a number of 1 or 4 byte cells. + * @param vectorSize Either 1 or 4, the number of bytes in each cell, i.e. A vs. RGBA. + * @param radius The radius of the pixels used to blur. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + */ + void blur(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t sizeX, size_t sizeY, + size_t vectorSize, int radius, const Restriction* _Nullable restriction = nullptr); + + /** + * Identity matrix that can be passed to the {@link RenderScriptToolkit::colorMatrix} method. + * + * Using this matrix will result in no change to the pixel through multiplication although + * the pixel value can still be modified by the add vector, or transformed to a different + * format. + */ + static constexpr float kIdentityMatrix[] = { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + }; + + /** + * Matrix to turn color pixels to a grey scale. + * + * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert an + * image from color to greyscale. + */ + static constexpr float kGreyScaleColorMatrix[] = { + 0.299f, 0.299f, 0.299f, 0.0f, + 0.587f, 0.587f, 0.587f, 0.0f, + 0.114f, 0.114f, 0.114f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + }; + + /** + * Matrix to convert RGB to YUV. + * + * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert the + * first three bytes of each pixel from RGB to YUV. This leaves the last byte (the alpha + * channel) untouched. + * + * This is a simplistic conversion. Most YUV buffers have more complicated format, not supported + * by this method. + */ + static constexpr float kRgbToYuvMatrix[] = { + 0.299f, -0.14713f, 0.615f, 0.0f, + 0.587f, -0.28886f, -0.51499f, 0.0f, + 0.114f, 0.436f, -0.10001f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + }; + + /** + * Matrix to convert YUV to RGB. + * + * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert the + * first three bytes of each pixel from YUV to RGB. This leaves the last byte (the alpha + * channel) untouched. + * + * This is a simplistic conversion. Most YUV buffers have more complicated format, not supported + * by this method. Use {@link RenderScriptToolkit::yuvToRgb} to convert these buffers. + */ + static constexpr float kYuvToRgbMatrix[] = { + 1.0f, 1.0f, 1.0f, 0.0f, + 0.0f, -0.39465f, 2.03211f, 0.0f, + 1.13983f, -0.5806f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + }; + + /** + * Transform an image using a color matrix. + * + * Converts a 2D array of vectors of unsigned bytes, multiplying each vectors by a 4x4 matrix + * and adding an optional vector. + * + * Each input vector is composed of 1-4 unsigned bytes. If less than 4 bytes, it's extended to + * 4, padding with zeroes. The unsigned bytes are converted from 0-255 to 0.0-1.0 floats + * before the multiplication is done. + * + * The resulting value is normalized from 0.0-1.0 to a 0-255 value and stored in the output. + * If the output vector size is less than four, the unused channels are discarded. + * + * If addVector is null, a vector of zeroes is added, i.e. a noop. + * + * Check kIdentityMatrix, kGreyScaleColorMatrix, kRgbToYuvMatrix, and kYuvToRgbMatrix for sample + * matrices. The YUV conversion may not work for all color spaces. + * + * @param in The buffer of the image to be converted. + * @param out The buffer that receives the converted image. + * @param inputVectorSize The number of bytes in each input cell, a value from 1 to 4. + * @param outputVectorSize The number of bytes in each output cell, a value from 1 to 4. + * @param sizeX The width of both buffers, as a number of 1 to 4 byte cells. + * @param sizeY The height of both buffers, as a number of 1 to 4 byte cells. + * @param matrix The 4x4 matrix to multiply, in row major format. + * @param addVector A vector of four floats that's added to the result of the multiplication. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + */ + void colorMatrix(const void* _Nonnull in, void* _Nonnull out, size_t inputVectorSize, + size_t outputVectorSize, size_t sizeX, size_t sizeY, + const float* _Nonnull matrix, const float* _Nullable addVector = nullptr, + const Restriction* _Nullable restriction = nullptr); + + /** + * Convolve a ByteArray. + * + * Applies a 3x3 or 5x5 convolution to the input array using the provided coefficients. + * + * For 3x3 convolutions, 9 coefficients must be provided. For 5x5, 25 coefficients are needed. + * The coefficients should be provided in row-major format. + * + * When the square extends past the edge, the edge values will be used as replacement for the + * values that's are off boundary. + * + * Each input cell can either be represented by one to four bytes. Each byte is multiplied + * and accumulated independently of the other bytes of the cell. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. + * + * The input and output buffers must have the same dimensions. Both buffers should be + * large enough for sizeX * sizeY * vectorSize bytes. The buffers have a row-major layout. + * + * @param in The buffer of the image to be blurred. + * @param out The buffer that receives the blurred image. + * @param vectorSize The number of bytes in each cell, a value from 1 to 4. + * @param sizeX The width of both buffers, as a number of 1 or 4 byte cells. + * @param sizeY The height of both buffers, as a number of 1 or 4 byte cells. + * @param coefficients 9 or 25 multipliers. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + */ + void convolve3x3(const void* _Nonnull in, void* _Nonnull out, size_t vectorSize, size_t sizeX, + size_t sizeY, const float* _Nonnull coefficients, + const Restriction* _Nullable restriction = nullptr); + + void convolve5x5(const void* _Nonnull in, void* _Nonnull out, size_t vectorSize, size_t sizeX, + size_t sizeY, const float* _Nonnull coefficients, + const Restriction* _Nullable restriction = nullptr); + + /** + * Compute the histogram of an image. + * + * Tallies how many times each of the 256 possible values of a byte is found in the input. + * + * An input cell can be represented by one to four bytes. The tally is done independently + * for each of the bytes of the cell. Correspondingly, the out array will have + * 256 * vectorSize entries. The counts for value 0 are consecutive, followed by those for + * value 1, etc. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. + * + * The source buffers should be large enough for sizeX * sizeY * vectorSize bytes. The buffers + * have a row-major layout. The out buffer should be large enough for 256 * vectorSize ints. + * + * @param in The buffer of the image to be analyzed. + * @param out The resulting vector of counts. + * @param sizeX The width of the input buffers, as a number of 1 or 4 byte cells. + * @param sizeY The height of the input buffers, as a number of 1 or 4 byte cells. + * @param vectorSize The number of bytes in each cell, a value from 1 to 4. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + */ + void histogram(const uint8_t* _Nonnull in, int32_t* _Nonnull out, size_t sizeX, size_t sizeY, + size_t vectorSize, const Restriction* _Nullable restriction = nullptr); + + /** + * Compute the histogram of the dot product of an image. + * + * This method supports cells of 1 to 4 bytes in length. For each cell of the array, + * the dot product of its bytes with the provided coefficients is computed. The resulting + * floating point value is converted to an unsigned byte and tallied in the histogram. + * + * If coefficients is null, the coefficients used for RGBA luminosity calculation will be used, + * i.e. the values [0.299f, 0.587f, 0.114f, 0.f]. + * + * Each coefficients must be >= 0 and their sum must be 1.0 or less. There must be the same + * number of coefficients as vectorSize. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. + * + * The source buffers should be large enough for sizeX * sizeY * vectorSize bytes. The buffers + * have a row-major layout. The out array should be large enough for 256 ints. + * + * @param in The buffer of the image to be analyzed. + * @param out The resulting vector of counts. + * @param sizeX The width of the input buffers, as a number of 1 or 4 byte cells. + * @param sizeY The height of the input buffers, as a number of 1 or 4 byte cells. + * @param vectorSize The number of bytes in each cell, a value from 1 to 4. + * @param coefficients The values used for the dot product. Can be nullptr. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + */ + void histogramDot(const uint8_t* _Nonnull in, int32_t* _Nonnull out, size_t sizeX, size_t sizeY, + size_t vectorSize, const float* _Nullable coefficients, + const Restriction* _Nullable restriction = nullptr); + + /** + * Transform an image using a look up table + * + * Transforms an image by using a per-channel lookup table. Each channel of the input has an + * independent lookup table. The tables are 256 entries in size and can cover the full value + * range of a byte. + * + * The input array should be in RGBA format, where four consecutive bytes form an cell. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. + * + * The input and output buffers must have the same dimensions. Both buffers should be + * large enough for sizeX * sizeY * vectorSize bytes. The buffers have a row-major layout. + * + * @param in The buffer of the image to be transformed. + * @param out The buffer that receives the transformed image. + * @param sizeX The width of both buffers, as a number of 4 byte cells. + * @param sizeY The height of both buffers, as a number of 4 byte cells. + * @param red An array of 256 values that's used to convert the R channel. + * @param green An array of 256 values that's used to convert the G channel. + * @param blue An array of 256 values that's used to convert the B channel. + * @param alpha An array of 256 values that's used to convert the A channel. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + */ + void lut(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t sizeX, size_t sizeY, + const uint8_t* _Nonnull red, const uint8_t* _Nonnull green, + const uint8_t* _Nonnull blue, const uint8_t* _Nonnull alpha, + const Restriction* _Nullable restriction = nullptr); + + /** + * Transform an image using a 3D look up table + * + * Transforms an image, converting RGB to RGBA by using a 3D lookup table. The incoming R, G, + * and B values are normalized to the dimensions of the provided 3D buffer. The eight nearest + * values in that 3D buffer are sampled and linearly interpolated. The resulting RGBA entry + * is stored in the output. + * + * The input array should be in RGBA format, where four consecutive bytes form an cell. + * The fourth byte of each input cell is ignored. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. + * + * The input and output buffers must have the same dimensions. Both buffers should be + * large enough for sizeX * sizeY * vectorSize bytes. The buffers have a row-major layout. + * + * @param in The buffer of the image to be transformed. + * @param out The buffer that receives the transformed image. + * @param sizeX The width of both buffers, as a number of 4 byte cells. + * @param sizeY The height of both buffers, as a number of 4 byte cells. + * @param cube The translation cube, in row major-format. + * @param cubeSizeX The number of RGBA entries in the cube in the X direction. + * @param cubeSizeY The number of RGBA entries in the cube in the Y direction. + * @param cubeSizeZ The number of RGBA entries in the cube in the Z direction. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + */ + void lut3d(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t sizeX, size_t sizeY, + const uint8_t* _Nonnull cube, size_t cubeSizeX, size_t cubeSizeY, size_t cubeSizeZ, + const Restriction* _Nullable restriction = nullptr); + + /** + * Resize an image. + * + * Resizes an image using bicubic interpolation. + * + * This method supports cells of 1 to 4 bytes in length. Each byte of the cell is + * interpolated independently from the others. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of the output buffer. The corresponding scaled range of the input will be used. If provided, + * the range must be wholly contained with the dimensions described by outputSizeX and + * outputSizeY. + * + * The input and output buffers have a row-major layout. Both buffers should be + * large enough for sizeX * sizeY * vectorSize bytes. + * + * @param in The buffer of the image to be resized. + * @param out The buffer that receives the resized image. + * @param inputSizeX The width of the input buffer, as a number of 1-4 byte cells. + * @param inputSizeY The height of the input buffer, as a number of 1-4 byte cells. + * @param vectorSize The number of bytes in each cell of both buffers. A value from 1 to 4. + * @param outputSizeX The width of the output buffer, as a number of 1-4 byte cells. + * @param outputSizeY The height of the output buffer, as a number of 1-4 byte cells. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + */ + void resize(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t inputSizeX, + size_t inputSizeY, size_t vectorSize, size_t outputSizeX, size_t outputSizeY, + const Restriction* _Nullable restriction = nullptr); + + /** + * The YUV formats supported by yuvToRgb. + */ + enum class YuvFormat { + NV21 = 0x11, + YV12 = 0x32315659, + }; + + /** + * Convert an image from YUV to RGB. + * + * Converts an Android YUV buffer to RGB. The input allocation should be + * supplied in a supported YUV format as a YUV cell Allocation. + * The output is RGBA; the alpha channel will be set to 255. + * + * Note that for YV12 and a sizeX that's not a multiple of 32, the + * RenderScript Intrinsic may not have converted the image correctly. + * This Toolkit method should. + * + * @param in The buffer of the image to be converted. + * @param out The buffer that receives the converted image. + * @param sizeX The width in pixels of the image. Must be even. + * @param sizeY The height in pixels of the image. + * @param format Either YV12 or NV21. + */ + void yuvToRgb(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t sizeX, size_t sizeY, + YuvFormat format); +}; + +} // namespace renderscript +} // namespace android + +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_TOOLKIT_H diff --git a/toolkit/Resize.cpp b/toolkit/Resize.cpp new file mode 100644 index 00000000..624ae8eb --- /dev/null +++ b/toolkit/Resize.cpp @@ -0,0 +1,769 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <math.h> + +#include <cstdint> + +#include "RenderScriptToolkit.h" +#include "TaskProcessor.h" +#include "Utils.h" + +#if defined(ARCH_X86_HAVE_AVX2) +#include <stdint.h> +#include <x86intrin.h> +#include <xmmintrin.h> +#endif + +#define LOG_TAG "renderscript.toolkit.Resize" + +namespace android { +namespace renderscript { + +class ResizeTask : public Task { + const uchar* mIn; + uchar* mOut; + float mScaleX; + float mScaleY; + size_t mInputSizeX; + size_t mInputSizeY; + + void kernelU1(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY); + void kernelU2(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY); + void kernelU4(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY); +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + void kernelF1(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY); + void kernelF2(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY); + void kernelF4(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY); +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + + // Process a 2D tile of the overall work. threadIndex identifies which thread does the work. + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) override; + + public: + ResizeTask(const uchar* input, uchar* output, size_t inputSizeX, size_t inputSizeY, + size_t vectorSize, size_t outputSizeX, size_t outputSizeY, + const Restriction* restriction) + : Task{outputSizeX, outputSizeY, vectorSize, false, restriction}, + mIn{input}, + mOut{output}, + mInputSizeX{inputSizeX}, + mInputSizeY{inputSizeY} { + mScaleX = static_cast<float>(inputSizeX) / outputSizeX; + mScaleY = static_cast<float>(inputSizeY) / outputSizeY; + } +}; + +void ResizeTask::processData(int /* threadIndex */, size_t startX, size_t startY, size_t endX, + size_t endY) { + typedef void (ResizeTask::*KernelFunction)(uchar*, uint32_t, uint32_t, uint32_t); + + KernelFunction kernel; + switch (mVectorSize) { + case 4: + kernel = &ResizeTask::kernelU4; + break; + case 3: + kernel = &ResizeTask::kernelU4; + break; + case 2: + kernel = &ResizeTask::kernelU2; + break; + case 1: + kernel = &ResizeTask::kernelU1; + break; + default: + ALOGE("Bad vector size %zd", mVectorSize); + } + + for (size_t y = startY; y < endY; y++) { + size_t offset = (mSizeX * y + startX) * paddedSize(mVectorSize); + uchar* out = mOut + offset; + std::invoke(kernel, this, out, startX, endX, y); + } +} + +static float4 cubicInterpolate(float4 p0, float4 p1, float4 p2, float4 p3, float x) { + return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 + + x * (3.f * (p1 - p2) + p3 - p0))); +} + +static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) { + return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 + + x * (3.f * (p1 - p2) + p3 - p0))); +} + + +#if defined(ARCH_X86_HAVE_AVX2) +static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) { + return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + + _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(4.f), _mm_set1_ps(p2),_mm_set1_ps(p3))) + + x * (_mm_cvtss_f32(_mm_fmadd_ss (_mm_set1_ps(3.f),_mm_set1_ps(p1 - p2), + _mm_set1_ps(p3 - p0)))))); + +} +#else +static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) { + //ALOGI("CP, %f, %f, %f, %f, %f", p0, p1, p2, p3, x); + return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 + + x * (3.f * (p1 - p2) + p3 - p0))); +} +#endif + +static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3, + float xf, float yf, int width) { + int startx = (int) floor(xf - 1); + xf = xf - floor(xf); + int maxx = width - 1; + int xs0 = std::max(0, startx + 0); + int xs1 = std::max(0, startx + 1); + int xs2 = std::min(maxx, startx + 2); + int xs3 = std::min(maxx, startx + 3); + + float4 p0 = cubicInterpolate(convert<float4>(yp0[xs0]), + convert<float4>(yp0[xs1]), + convert<float4>(yp0[xs2]), + convert<float4>(yp0[xs3]), xf); + + float4 p1 = cubicInterpolate(convert<float4>(yp1[xs0]), + convert<float4>(yp1[xs1]), + convert<float4>(yp1[xs2]), + convert<float4>(yp1[xs3]), xf); + + float4 p2 = cubicInterpolate(convert<float4>(yp2[xs0]), + convert<float4>(yp2[xs1]), + convert<float4>(yp2[xs2]), + convert<float4>(yp2[xs3]), xf); + + float4 p3 = cubicInterpolate(convert<float4>(yp3[xs0]), + convert<float4>(yp3[xs1]), + convert<float4>(yp3[xs2]), + convert<float4>(yp3[xs3]), xf); + + float4 p = cubicInterpolate(p0, p1, p2, p3, yf); + p = clamp(p + 0.5f, 0.f, 255.f); + return convert<uchar4>(p); +} + +static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3, + float xf, float yf, int width) { + int startx = (int) floor(xf - 1); + xf = xf - floor(xf); + int maxx = width - 1; + int xs0 = std::max(0, startx + 0); + int xs1 = std::max(0, startx + 1); + int xs2 = std::min(maxx, startx + 2); + int xs3 = std::min(maxx, startx + 3); + + float2 p0 = cubicInterpolate(convert<float2>(yp0[xs0]), + convert<float2>(yp0[xs1]), + convert<float2>(yp0[xs2]), + convert<float2>(yp0[xs3]), xf); + + float2 p1 = cubicInterpolate(convert<float2>(yp1[xs0]), + convert<float2>(yp1[xs1]), + convert<float2>(yp1[xs2]), + convert<float2>(yp1[xs3]), xf); + + float2 p2 = cubicInterpolate(convert<float2>(yp2[xs0]), + convert<float2>(yp2[xs1]), + convert<float2>(yp2[xs2]), + convert<float2>(yp2[xs3]), xf); + + float2 p3 = cubicInterpolate(convert<float2>(yp3[xs0]), + convert<float2>(yp3[xs1]), + convert<float2>(yp3[xs2]), + convert<float2>(yp3[xs3]), xf); + + float2 p = cubicInterpolate(p0, p1, p2, p3, yf); + p = clamp(p + 0.5f, 0.f, 255.f); + return convert<uchar2>(p); +} + +static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3, + float xf, float yf, int width) { + int startx = (int) floor(xf - 1); + xf = xf - floor(xf); + int maxx = width - 1; + int xs0 = std::max(0, startx + 0); + int xs1 = std::max(0, startx + 1); + int xs2 = std::min(maxx, startx + 2); + int xs3 = std::min(maxx, startx + 3); + + float p0 = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1], + (float)yp0[xs2], (float)yp0[xs3], xf); + float p1 = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1], + (float)yp1[xs2], (float)yp1[xs3], xf); + float p2 = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1], + (float)yp2[xs2], (float)yp2[xs3], xf); + float p3 = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1], + (float)yp3[xs2], (float)yp3[xs3], xf); + + float p = cubicInterpolate(p0, p1, p2, p3, yf); + p = clamp(p + 0.5f, 0.f, 255.f); + //ALOGI("CUC,%f,%u", p, (uchar)p); + return (uchar)p; +} + +extern "C" uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc); + +extern "C" void rsdIntrinsicResizeB4_K( + uchar4 *dst, + size_t count, + uint32_t xf, + uint32_t xinc, + uchar4 const *srcn, + uchar4 const *src0, + uchar4 const *src1, + uchar4 const *src2, + size_t xclip, + size_t avail, + uint64_t osc_ctl, + int32_t const *yr); + +extern "C" void rsdIntrinsicResizeB2_K( + uchar2 *dst, + size_t count, + uint32_t xf, + uint32_t xinc, + uchar2 const *srcn, + uchar2 const *src0, + uchar2 const *src1, + uchar2 const *src2, + size_t xclip, + size_t avail, + uint64_t osc_ctl, + int32_t const *yr); + +extern "C" void rsdIntrinsicResizeB1_K( + uchar *dst, + size_t count, + uint32_t xf, + uint32_t xinc, + uchar const *srcn, + uchar const *src0, + uchar const *src1, + uchar const *src2, + size_t xclip, + size_t avail, + uint64_t osc_ctl, + int32_t const *yr); + +#if defined(ARCH_ARM_USE_INTRINSICS) +static void mkYCoeff(int32_t *yr, float yf) { + int32_t yf1 = rint(yf * 0x10000); + int32_t yf2 = rint(yf * yf * 0x10000); + int32_t yf3 = rint(yf * yf * yf * 0x10000); + + yr[0] = -(2 * yf2 - yf3 - yf1) >> 1; + yr[1] = (3 * yf3 - 5 * yf2 + 0x20000) >> 1; + yr[2] = (-3 * yf3 + 4 * yf2 + yf1) >> 1; + yr[3] = -(yf3 - yf2) >> 1; +} +#endif + +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT +static float4 OneBiCubic(const float4 *yp0, const float4 *yp1, const float4 *yp2, const float4 *yp3, + float xf, float yf, int width) { + int startx = (int) floor(xf - 1); + xf = xf - floor(xf); + int maxx = width - 1; + int xs0 = std::max(0, startx + 0); + int xs1 = std::max(0, startx + 1); + int xs2 = std::min(maxx, startx + 2); + int xs3 = std::min(maxx, startx + 3); + + float4 p0 = cubicInterpolate(yp0[xs0], yp0[xs1], + yp0[xs2], yp0[xs3], xf); + float4 p1 = cubicInterpolate(yp1[xs0], yp1[xs1], + yp1[xs2], yp1[xs3], xf); + float4 p2 = cubicInterpolate(yp2[xs0], yp2[xs1], + yp2[xs2], yp2[xs3], xf); + float4 p3 = cubicInterpolate(yp3[xs0], yp3[xs1], + yp3[xs2], yp3[xs3], xf); + + float4 p = cubicInterpolate(p0, p1, p2, p3, yf); + return p; +} + +static float2 OneBiCubic(const float2 *yp0, const float2 *yp1, const float2 *yp2, const float2 *yp3, + float xf, float yf, int width) { + int startx = (int) floor(xf - 1); + xf = xf - floor(xf); + int maxx = width - 1; + int xs0 = std::max(0, startx + 0); + int xs1 = std::max(0, startx + 1); + int xs2 = std::min(maxx, startx + 2); + int xs3 = std::min(maxx, startx + 3); + + float2 p0 = cubicInterpolate(yp0[xs0], yp0[xs1], + yp0[xs2], yp0[xs3], xf); + float2 p1 = cubicInterpolate(yp1[xs0], yp1[xs1], + yp1[xs2], yp1[xs3], xf); + float2 p2 = cubicInterpolate(yp2[xs0], yp2[xs1], + yp2[xs2], yp2[xs3], xf); + float2 p3 = cubicInterpolate(yp3[xs0], yp3[xs1], + yp3[xs2], yp3[xs3], xf); + + float2 p = cubicInterpolate(p0, p1, p2, p3, yf); + return p; +} + +static float OneBiCubic(const float *yp0, const float *yp1, const float *yp2, const float *yp3, + float xf, float yf, int width) { + int startx = (int) floor(xf - 1); + xf = xf - floor(xf); + int maxx = width - 1; + int xs0 = std::max(0, startx + 0); + int xs1 = std::max(0, startx + 1); + int xs2 = std::min(maxx, startx + 2); + int xs3 = std::min(maxx, startx + 3); + + float p0 = cubicInterpolate(yp0[xs0], yp0[xs1], + yp0[xs2], yp0[xs3], xf); + float p1 = cubicInterpolate(yp1[xs0], yp1[xs1], + yp1[xs2], yp1[xs3], xf); + float p2 = cubicInterpolate(yp2[xs0], yp2[xs1], + yp2[xs2], yp2[xs3], xf); + float p3 = cubicInterpolate(yp3[xs0], yp3[xs1], + yp3[xs2], yp3[xs3], xf); + + float p = cubicInterpolate(p0, p1, p2, p3, yf); + return p; +} +#endif + +void ResizeTask::kernelU4(uchar *outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY) { + const uchar *pin = mIn; + const int srcHeight = mInputSizeY; + const int srcWidth = mInputSizeX; + const size_t stride = mInputSizeX * paddedSize(mVectorSize); + + +#if defined(ARCH_X86_HAVE_AVX2) + float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(currentY + 0.5f), + _mm_set1_ps(scaleY), _mm_set1_ps(0.5f))); +#else + float yf = (currentY + 0.5f) * mScaleY - 0.5f; +#endif + + + int starty = (int) floor(yf - 1); + yf = yf - floor(yf); + int maxy = srcHeight - 1; + int ys0 = std::max(0, starty + 0); + int ys1 = std::max(0, starty + 1); + int ys2 = std::min(maxy, starty + 2); + int ys3 = std::min(maxy, starty + 3); + + const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0); + const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1); + const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2); + const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3); + + uchar4 *out = ((uchar4 *)outPtr); + uint32_t x1 = xstart; + uint32_t x2 = xend; + +#if defined(ARCH_ARM_USE_INTRINSICS) + if (mUsesSimd && x2 > x1 && mScaleX < 4.0f) { + float xf = (x1 + 0.5f) * mScaleX - 0.5f; + long xf16 = rint(xf * 0x10000); + uint32_t xinc16 = rint(mScaleX * 0x10000); + + int xoff = (xf16 >> 16) - 1; + int xclip = std::max(0, xoff) - xoff; + int len = x2 - x1; + + int32_t yr[4]; + uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16); + mkYCoeff(yr, yf); + + xoff += xclip; + + rsdIntrinsicResizeB4_K( + out, len, + xf16 & 0xffff, xinc16, + yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff, + xclip, srcWidth - xoff + xclip, + osc_ctl, yr); + out += len; + x1 += len; + } +#endif + + while(x1 < x2) { +#if defined(ARCH_X86_HAVE_AVX2) + float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(scaleX) , + _mm_set1_ps(0.5f))); +#else + float xf = (x1 + 0.5f) * mScaleX - 0.5f; +#endif + *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); + out++; + x1++; + } +} + +void ResizeTask::kernelU2(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY) { + const uchar *pin = mIn; + const int srcHeight = mInputSizeY; + const int srcWidth = mInputSizeX; + const size_t stride = mInputSizeX * mVectorSize; + + +#if defined(ARCH_X86_HAVE_AVX2) + float yf = _mm_cvtss_f32( + _mm_fmsub_ss(_mm_set1_ps(currentY + 0.5f), _mm_set1_ps(scaleY), _mm_set1_ps(0.5f))); +#else + float yf = (currentY + 0.5f) * mScaleY - 0.5f; +#endif + + int starty = (int) floor(yf - 1); + yf = yf - floor(yf); + int maxy = srcHeight - 1; + int ys0 = std::max(0, starty + 0); + int ys1 = std::max(0, starty + 1); + int ys2 = std::min(maxy, starty + 2); + int ys3 = std::min(maxy, starty + 3); + + const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0); + const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1); + const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2); + const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3); + + uchar2 *out = ((uchar2 *)outPtr); + uint32_t x1 = xstart; + uint32_t x2 = xend; + +#if defined(ARCH_ARM_USE_INTRINSICS) + if (mUsesSimd && x2 > x1 && mScaleX < 4.0f) { + float xf = (x1 + 0.5f) * mScaleX - 0.5f; + long xf16 = rint(xf * 0x10000); + uint32_t xinc16 = rint(mScaleX * 0x10000); + + int xoff = (xf16 >> 16) - 1; + int xclip = std::max(0, xoff) - xoff; + int len = x2 - x1; + + int32_t yr[4]; + uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16); + mkYCoeff(yr, yf); + + xoff += xclip; + + rsdIntrinsicResizeB2_K( + out, len, + xf16 & 0xffff, xinc16, + yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff, + xclip, srcWidth - xoff + xclip, + osc_ctl, yr); + out += len; + x1 += len; + } +#endif + + while(x1 < x2) { + +#if defined(ARCH_X86_HAVE_AVX2) + float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(scaleX) , + _mm_set1_ps(0.5f))); +#else + float xf = (x1 + 0.5f) * mScaleX - 0.5f; +#endif + *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); + out++; + x1++; + } +} + +void ResizeTask::kernelU1(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY) { + //ALOGI("TK kernelU1 xstart %u, xend %u, outstep %u", xstart, xend); + const uchar *pin = mIn; + const int srcHeight = mInputSizeY; + const int srcWidth = mInputSizeX; + const size_t stride = mInputSizeX * mVectorSize; + + // ALOGI("Toolkit ResizeU1 (%ux%u) by (%f,%f), xstart:%u to %u, stride %zu, out %p", srcWidth, + // srcHeight, scaleX, scaleY, xstart, xend, stride, outPtr); + +#if defined(ARCH_X86_HAVE_AVX2) + float yf = _mm_cvtss_f32( + _mm_fmsub_ss(_mm_set1_ps(currentY + 0.5f), _mm_set1_ps(scaleY), _mm_set1_ps(0.5f))); +#else + float yf = (currentY + 0.5f) * mScaleY - 0.5f; +#endif + + int starty = (int) floor(yf - 1); + yf = yf - floor(yf); + int maxy = srcHeight - 1; + int ys0 = std::max(0, starty + 0); + int ys1 = std::min(maxy, std::max(0, starty + 1)); + int ys2 = std::min(maxy, starty + 2); + int ys3 = std::min(maxy, starty + 3); + + const uchar *yp0 = pin + stride * ys0; + const uchar *yp1 = pin + stride * ys1; + const uchar *yp2 = pin + stride * ys2; + const uchar *yp3 = pin + stride * ys3; + + uchar *out = ((uchar *)outPtr); + uint32_t x1 = xstart; + uint32_t x2 = xend; + +#if defined(ARCH_ARM_USE_INTRINSICS) + if (mUsesSimd && x2 > x1 && mScaleX < 4.0f) { + float xf = (x1 + 0.5f) * mScaleX - 0.5f; + long xf16 = rint(xf * 0x10000); + uint32_t xinc16 = rint(mScaleX * 0x10000); + + int xoff = (xf16 >> 16) - 1; + int xclip = std::max(0, xoff) - xoff; + int len = x2 - x1; + + int32_t yr[4]; + uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16); + mkYCoeff(yr, yf); + + // ALOGI("ys0 %d, ys1 %d, ys2 %d, ys3 %d, x1 %u, x2 %u, xf %f, xf16 %ld, xinc16 %u, xoff %d, + // xclip %d, len %d, osc_ctl %lu)", + // ys0, ys1, ys2, ys3, x1, x2, xf, xf16, xinc16, xoff, xclip, len, (unsigned long) + // osc_ctl); + // ALOGI("TK scaleX %f, xf %f, xf16 %ld, xinc16 %d, xoff %d, xclip %d, len %d", scaleX, xf, + // xf16, xinc16, xoff, xclip, len); ALOGI("TK xf16 & 0xffff %ld, ys0 %u, ys1 %u, ys2 %u, ys3 + // %u, srcWidth - xoff + xclip %d", xf16 & 0xffff, ys0, ys1, ys2, ys3, srcWidth - xoff); + + xoff += xclip; + + rsdIntrinsicResizeB1_K( + out, len, + xf16 & 0xffff, xinc16, + yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff, + xclip, srcWidth - xoff + xclip, + osc_ctl, yr); + out += len; + x1 += len; + } +#endif + + while(x1 < x2) { + +#if defined(ARCH_X86_HAVE_AVX2) + float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(scaleX) , + _mm_set1_ps(0.5f))); +#else + float xf = (x1 + 0.5f) * mScaleX - 0.5f; +#endif + + *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); + out++; + x1++; + } +} + +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT +void ResizeTask::kernelF4(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY) { + const uchar *pin = mIn; + const int srcHeight = inputSizeY; + const int srcWidth = inputSizeX; + const size_t stride = sizeX * vectorSize; + +#if defined(ARCH_X86_HAVE_AVX2) + float yf = _mm_cvtss_f32( + _mm_fmsub_ss(_mm_set1_ps(currentY + 0.5f), _mm_set1_ps(scaleY), _mm_set1_ps(0.5f))); +#else + float yf = (currentY + 0.5f) * scaleY - 0.5f; +#endif + + int starty = (int) floor(yf - 1); + yf = yf - floor(yf); + int maxy = srcHeight - 1; + int ys0 = std::max(0, starty + 0); + int ys1 = std::max(0, starty + 1); + int ys2 = std::min(maxy, starty + 2); + int ys3 = std::min(maxy, starty + 3); + + const float4 *yp0 = (const float4 *)(pin + stride * ys0); + const float4 *yp1 = (const float4 *)(pin + stride * ys1); + const float4 *yp2 = (const float4 *)(pin + stride * ys2); + const float4 *yp3 = (const float4 *)(pin + stride * ys3); + + float4 *out = ((float4 *)outPtr); + uint32_t x1 = xstart; + uint32_t x2 = xend; + + while(x1 < x2) { + +#if defined(ARCH_X86_HAVE_AVX2) + float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(scaleX) , + _mm_set1_ps(0.5f))); +#else + float xf = (x1 + 0.5f) * scaleX - 0.5f; +#endif + + *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); + out++; + x1++; + } +} + +void ResizeTask::kernelF2(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY) { + const uchar *pin = mIn; + const int srcHeight = inputSizeY; + const int srcWidth = inputSizeX; + const size_t stride = sizeX * vectorSize; + + +#if defined(ARCH_X86_HAVE_AVX2) + float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(currentY + 0.5f), + _mm_set1_ps(scaleY), _mm_set1_ps(0.5f))); +#else + float yf = (currentY + 0.5f) * scaleY - 0.5f; +#endif + + int starty = (int) floor(yf - 1); + yf = yf - floor(yf); + int maxy = srcHeight - 1; + int ys0 = std::max(0, starty + 0); + int ys1 = std::max(0, starty + 1); + int ys2 = std::min(maxy, starty + 2); + int ys3 = std::min(maxy, starty + 3); + + const float2 *yp0 = (const float2 *)(pin + stride * ys0); + const float2 *yp1 = (const float2 *)(pin + stride * ys1); + const float2 *yp2 = (const float2 *)(pin + stride * ys2); + const float2 *yp3 = (const float2 *)(pin + stride * ys3); + + float2 *out = ((float2 *)outPtr); + uint32_t x1 = xstart; + uint32_t x2 = xend; + + while(x1 < x2) { + +#if defined(ARCH_X86_HAVE_AVX2) + float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(scaleX) , + _mm_set1_ps(0.5f))); +#else + float xf = (x1 + 0.5f) * scaleX - 0.5f; +#endif + + *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); + out++; + x1++; + } +} + +void ResizeTask::kernelF1(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY) { + const uchar *pin = mIn; + const int srcHeight = inputSizeY; + const int srcWidth = inputSizeX; + const size_t stride = sizeX * vectorSize; + + +#if defined(ARCH_X86_HAVE_AVX2) + float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(currentY + 0.5f), + _mm_set1_ps(scaleY), _mm_set1_ps(0.5f))); +#else + float yf = (currentY + 0.5f) * scaleY - 0.5f; +#endif + + int starty = (int) floor(yf - 1); + yf = yf - floor(yf); + int maxy = srcHeight - 1; + int ys0 = std::max(0, starty + 0); + int ys1 = std::max(0, starty + 1); + int ys2 = std::min(maxy, starty + 2); + int ys3 = std::min(maxy, starty + 3); + + const float *yp0 = (const float *)(pin + stride * ys0); + const float *yp1 = (const float *)(pin + stride * ys1); + const float *yp2 = (const float *)(pin + stride * ys2); + const float *yp3 = (const float *)(pin + stride * ys3); + + float *out = ((float *)outPtr); + uint32_t x1 = xstart; + uint32_t x2 = xend; + + while(x1 < x2) { + +#if defined(ARCH_X86_HAVE_AVX2) + float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(scaleX) , + _mm_set1_ps(0.5f))); +#else + float xf = (x1 + 0.5f) * scaleX - 0.5f; +#endif + + *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); + out++; + x1++; + } +} + +void ResizeTask::preLaunch(uint32_t slot, const RsScriptCall *sc) +{ + + //check the data type to determine F or U. + if (mAlloc->getType()->getElement()->getType() == RS_TYPE_UNSIGNED_8) { + switch(mAlloc->getType()->getElement()->getVectorSize()) { + case 1: + mRootPtr = &kernelU1; + break; + case 2: + mRootPtr = &kernelU2; + break; + case 3: + case 4: + mRootPtr = &kernelU4; + break; + } + } else { + switch(mAlloc->getType()->getElement()->getVectorSize()) { + case 1: + mRootPtr = &kernelF1; + break; + case 2: + mRootPtr = &kernelF2; + break; + case 3: + case 4: + mRootPtr = &kernelF4; + break; + } + } +} +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + +void RenderScriptToolkit::resize(const uint8_t* input, uint8_t* output, size_t inputSizeX, + size_t inputSizeY, size_t vectorSize, size_t outputSizeX, + size_t outputSizeY, const Restriction* restriction) { +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE + if (!validRestriction(LOG_TAG, outputSizeX, outputSizeY, restriction)) { + return; + } + if (vectorSize < 1 || vectorSize > 4) { + ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize); + return; + } +#endif + + ResizeTask task((const uchar*)input, (uchar*)output, inputSizeX, inputSizeY, vectorSize, + outputSizeX, outputSizeY, restriction); + processor->doTask(&task); +} + +} // namespace renderscript +} // namespace android diff --git a/toolkit/Resize_advsimd.S b/toolkit/Resize_advsimd.S new file mode 100644 index 00000000..59e735c2 --- /dev/null +++ b/toolkit/Resize_advsimd.S @@ -0,0 +1,754 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: +#define END(f) .size f, .-f; + +/* Fixed-point precision after vertical pass -- 16 bit data minus 1 sign and 1 + * integer (bicubic has a little overshoot). It would also be possible to add + * a temporary DC bias to eliminate the sign bit for more precision, but that's + * extra arithmetic. + */ +.set VERTBITS, 14 + +/* The size of the scratch buffer in which we store our vertically convolved + * intermediates. + */ +.set CHUNKSHIFT, 7 /* 5 tests better for uchar4, but 7 is necessary for ridiculous (10:1) scale factors */ +.set CHUNKSIZE, (1 << CHUNKSHIFT) + +/* The number of components processed in a single iteration of the innermost + * loop. + */ +.set VECSHIFT, 3 +.set VECSIZE, (1<<VECSHIFT) + +/* Read four different lines (except at edges where addresses may be clamped, + * which is why we don't simply take base and stride registers), and multiply + * and accumulate them by the coefficients in v3[0..3], leaving the results in + * v12. This gives eight 16-bit results representing a horizontal line of 2-8 + * input pixels (depending on number of components per pixel) to be fed into + * the horizontal scaling pass. + * + * Input coefficients are 16-bit unsigned fixed-point (although [0] and [3] are + * known to represent negative values and VMLS is used to implement this). + * Output is VERTBITS signed fixed-point, which must leave room for a little + * v12. This gives eight 16-bit results. + */ +.macro vert8, dstlo=v12.4h, dsthi=v12.8h + ld1 {v8.8b}, [x4], #8 + ld1 {v9.8b}, [x5], #8 + ld1 {v10.8b}, [x6], #8 + ld1 {v11.8b}, [x7], #8 + uxtl v8.8h, v8.8b + uxtl v9.8h, v9.8b + uxtl v10.8h, v10.8b + uxtl v11.8h, v11.8b + umull v12.4s, v9.4h, v3.h[1] + umull2 v13.4s, v9.8h, v3.h[1] + umlsl v12.4s, v8.4h, v3.h[0] + umlsl2 v13.4s, v8.8h, v3.h[0] + umlal v12.4s, v10.4h, v3.h[2] + umlal2 v13.4s, v10.8h, v3.h[2] + umlsl v12.4s, v11.4h, v3.h[3] + umlsl2 v13.4s, v11.8h, v3.h[3] + + /* Shift by 8 (bits per pixel), plus 16 (the fixed-point multiplies), + * minus VERTBITS (the number of fraction bits we want to keep from + * here on). + */ + sqshrn \dstlo, v12.4s, #8 + (16 - VERTBITS) + sqshrn2 \dsthi, v13.4s, #8 + (16 - VERTBITS) +.endm + +/* As above, but only four 16-bit results into v12hi. + */ +.macro vert4, dst=v12.8h + ld1 {v8.s}[0], [x4], #4 + ld1 {v9.s}[0], [x5], #4 + ld1 {v10.s}[0], [x6], #4 + ld1 {v11.s}[0], [x7], #4 + uxtl v8.8h, v8.8b + uxtl v9.8h, v9.8b + uxtl v10.8h, v10.8b + uxtl v11.8h, v11.8b + umull v12.4s, v9.4h, v3.h[1] + umlsl v12.4s, v8.4h, v3.h[0] + umlal v12.4s, v10.4h, v3.h[2] + umlsl v12.4s, v11.4h, v3.h[3] +.ifc \dst,v12.8h + sqshrn2 \dst, v12.4s, #8 + (16 - VERTBITS) +.else + sqshrn \dst, v12.4s, #8 + (16 - VERTBITS) +.endif +.endm + + +/* During horizontal resize having CHUNKSIZE input available means being able + * to produce a varying amount of output, depending on the phase of the data. + * This function calculates the minimum number of VECSIZE chunks extracted from + * a CHUNKSIZE window (x1), and the threshold value for when the count will be + * one higher than that (x0). + * These work out, conveniently, to be the quotient and remainder from: + * (CHUNKSIZE + xinc * VECSIZE - 1) / (xinc * VECSIZE) + * + * The two values are packed together in a uint64_t for convenience; and + * they are, in fact, used this way as an arithmetic short-cut later on. + */ +/* uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc) */ +ENTRY(rsdIntrinsicResize_oscctl_K) + lsl x2, x0, #VECSHIFT + mov x0, #(CHUNKSIZE << 16) - 1 + add x0, x0, x2 + udiv x1, x0, x2 + msub x0, x1, x2, x0 + add x0, x0, x1, LSL #32 + ret +END(rsdIntrinsicResize_oscctl_K) + +/* Iterate to generate the uchar1, uchar2, and uchar4 versions of the code. + * For the most part the vertical pass (the outer loop) is the same for all + * versions. Exceptions are handled in-line with conditional assembly. + */ +.irp comp, 1, 2, 4 +.if \comp == 1 +.set COMPONENT_SHIFT, 0 +.elseif \comp == 2 +.set COMPONENT_SHIFT, 1 +.elseif \comp == 4 +.set COMPONENT_SHIFT, 2 +.else +.error "Unknown component count" +.endif +.set COMPONENT_COUNT, (1 << COMPONENT_SHIFT) +.set LOOP_OUTPUT_SIZE, (VECSIZE * COMPONENT_COUNT) + +.set BUFFER_SIZE, (CHUNKSIZE * 2 + 4) * COMPONENT_COUNT * 2 + +/* void rsdIntrinsicResizeB1_K( + * uint8_t * restrict dst, // x0 + * size_t count, // x1 + * uint32_t xf, // x2 + * uint32_t xinc, // x3 + * uint8_t const * restrict srcn, // x4 + * uint8_t const * restrict src0, // x5 + * uint8_t const * restrict src1, // x6 + * uint8_t const * restrict src2, // x7 + * size_t xclip, // [sp,#0] -> [sp,#80] -> x12 + * size_t avail, // [sp,#8] -> [sp,#88] -> x11 + * uint64_t osc_ctl, // [sp,#16] -> [sp,#96] -> x10 + * int32 const *yr, // [sp,#24] -> [sp,#104] -> v4 (copied to v3 for scalar access) + */ +ENTRY(rsdIntrinsicResizeB\comp\()_K) + sub x8, sp, #48 + sub sp, sp, #80 + st1 {v8.1d - v11.1d}, [sp] + st1 {v12.1d - v15.1d}, [x8] + str x19, [x8, #32] + + /* align the working buffer on the stack to make it easy to use bit + * twiddling for address calculations. + */ + sub x12, sp, #BUFFER_SIZE + bic x12, x12, #(1 << (CHUNKSHIFT + 1 + COMPONENT_SHIFT + 1)) - 1 + + ldr x8, [sp,#104] // yr + adrp x9, intrinsic_resize_consts + add x9, x9, :lo12:intrinsic_resize_consts + ld1 {v4.4s}, [x8] + ld1 {v5.8h}, [x9] + sqxtun v4.4h, v4.4s // yr + dup v6.8h, w2 + dup v7.8h, w3 + mla v6.8h, v5.8h, v7.8h // vxf + shl v7.8h, v7.8h, #VECSHIFT // vxinc + + /* Compute starting condition for oscillator used to compute ahead + * of time how many iterations are possible before needing to + * refill the working buffer. This is based on the fixed-point + * index of the last element in the vector of pixels processed in + * each iteration, counting up until it would overflow. + */ + sub x8, x2, x3 + lsl x9, x3, #VECSHIFT + add x8, x8, x9 + + ldr x10, [sp,#96] // osc_ctl + ldp x13,x11, [sp,#80] // xclip, avail + + mov x19, sp + mov sp, x12 + + /* x4-x7 contain pointers to the four lines of input to be + * convolved. These pointers have been clamped vertically and + * horizontally (which is why it's not a simple row/stride pair), + * and the xclip argument (now in x13) indicates how many pixels + * from true the x position of the pointer is. This value should + * be 0, 1, or 2 only. + * + * Start by placing four pixels worth of input at the far end of + * the buffer. As many as two of these may be clipped, so four + * pixels are fetched, and then the first pixel is duplicated and + * the data shifted according to xclip. The source pointers are + * then also adjusted according to xclip so that subsequent fetches + * match. + */ + mov v3.8b, v4.8b /* make y coeffs available for vert4 and vert8 macros */ + sub x14, x12, x13, LSL #(COMPONENT_SHIFT + 1) + add x15, x12, #(2 * CHUNKSIZE - 4) * COMPONENT_COUNT * 2 + add x14, x14, #4 * COMPONENT_COUNT * 2 +.if \comp == 1 + vert4 v12.4h + dup v11.4h, v12.h[0] + st1 {v11.4h,v12.4h}, [x12] + ld1 {v12.4h}, [x14] + st1 {v12.4h}, [x15] +.elseif \comp == 2 + vert8 + dup v11.4s, v12.s[0] + st1 {v11.8h,v12.8h}, [x12] + ld1 {v12.8h}, [x14] + st1 {v12.8h}, [x15] +.elseif \comp == 4 + vert8 v14.4h, v14.8h + vert8 v15.4h, v15.8h + dup v12.2d, v14.d[0] + dup v13.2d, v14.d[0] + st1 {v12.8h,v13.8h}, [x12], #32 + st1 {v14.8h,v15.8h}, [x12] + sub x12, x12, #32 + ld1 {v11.8h,v12.8h}, [x14] + st1 {v11.8h,v12.8h}, [x15] +.endif + /* Count off four pixels into the working buffer. + */ + sub x11, x11, #4 + /* Incoming pointers were to the first _legal_ pixel. Four pixels + * were read unconditionally, but some may have been discarded by + * xclip, so we rewind the pointers to compensate. + */ + sub x4, x4, x13, LSL #(COMPONENT_SHIFT) + sub x5, x5, x13, LSL #(COMPONENT_SHIFT) + sub x6, x6, x13, LSL #(COMPONENT_SHIFT) + sub x7, x7, x13, LSL #(COMPONENT_SHIFT) + + /* First tap starts where we just pre-filled, at the end of the + * buffer. + */ + add x2, x2, #(CHUNKSIZE * 2 - 4) << 16 + + /* Use overflowing arithmetic to implement wraparound array + * indexing. + */ + lsl x2, x2, #(47 - CHUNKSHIFT) + lsl x3, x3, #(47 - CHUNKSHIFT) + + + /* Start of outermost loop. + * Fetch CHUNKSIZE pixels into scratch buffer, then calculate the + * number of iterations of the inner loop that can be performed and + * get into that. + * + * The fill is complicated by the possibility of running out of + * input before the scratch buffer is filled. If this isn't a risk + * then it's handled by the simple loop at 2:, otherwise the + * horrible loop at 3:. + */ +1: mov v3.8b, v4.8b /* put y scaling coefficients somewhere handy */ + subs x11, x11, #CHUNKSIZE + bge 2f /* if at least CHUNKSIZE are available... */ + add x11, x11, #CHUNKSIZE /* if they're not... */ + b 4f + /* basic fill loop, processing 8 bytes at a time until there are + * fewer than eight bytes available. + */ +3: vert8 + sub x11, x11, #8 / COMPONENT_COUNT + st1 {v12.8h}, [x12], #16 +4: cmp x11, #8 / COMPONENT_COUNT - 1 + bgt 3b +.if \comp == 4 + blt 3f + /* The last pixel (four bytes) if necessary */ + vert4 +.else + cmp x11, #1 + blt 3f + /* The last pixels if necessary */ + sub x4, x4, #8 + sub x5, x5, #8 + sub x6, x6, #8 + sub x7, x7, #8 + add x4, x4, x11, LSL #(COMPONENT_SHIFT) + add x5, x5, x11, LSL #(COMPONENT_SHIFT) + add x6, x6, x11, LSL #(COMPONENT_SHIFT) + add x7, x7, x11, LSL #(COMPONENT_SHIFT) + vert8 + sub x11, sp, x11, LSL #(COMPONENT_SHIFT + 1) + sub sp, sp, #32 + sub x11, x11, #16 +.if \comp == 1 + dup v13.8h, v12.h[7] +.elseif \comp == 2 + dup v13.4s, v12.s[3] +.endif + st1 {v12.8h,v13.8h}, [sp] + ld1 {v12.8h}, [x11] + add sp, sp, #32 + b 4f +.endif + /* Keep filling until we get to the end of this chunk of the buffer */ +3: +.if \comp == 1 + dup v12.8h, v12.h[7] +.elseif \comp == 2 + dup v12.4s, v12.s[3] +.elseif \comp == 4 + dup v12.2d, v12.d[1] +.endif +4: st1 {v12.8h}, [x12], #16 + tst x12, #(CHUNKSIZE - 1) * COMPONENT_COUNT * 2 + bne 3b + b 4f + +.align 4 +2: /* Quickly pull a chunk of data into the working buffer. + */ + vert8 + st1 {v12.8h}, [x12], #16 + vert8 + st1 {v12.8h}, [x12], #16 + tst x12, #(CHUNKSIZE - 1) * COMPONENT_COUNT * 2 + bne 2b + cmp x11, #0 + bne 3f +4: /* if we end with 0 pixels left we'll have nothing handy to spread + * across to the right, so we rewind a bit. + */ + mov x11, #1 + sub x4, x4, #COMPONENT_COUNT + sub x5, x5, #COMPONENT_COUNT + sub x6, x6, #COMPONENT_COUNT + sub x7, x7, #COMPONENT_COUNT +3: /* copy four taps (width of cubic window) to far end for overflow + * address handling + */ + sub x13, x12, #CHUNKSIZE * COMPONENT_COUNT * 2 + eor x12, x13, #CHUNKSIZE * COMPONENT_COUNT * 2 +.if \comp == 1 + ld1 {v14.4h}, [x13] +.elseif \comp == 2 + ld1 {v14.8h}, [x13] +.elseif \comp == 4 + ld1 {v14.8h,v15.8h}, [x13] +.endif + add x13, x12, #CHUNKSIZE * COMPONENT_COUNT * 2 +.if \comp == 1 + st1 {v14.4h}, [x13] +.elseif \comp == 2 + st1 {v14.8h}, [x13] +.elseif \comp == 4 + st1 {v14.8h,v15.8h}, [x13] +.endif + /* The high 32-bits of x10 contains the maximum possible iteration + * count, but if x8 is greater than the low 32-bits of x10 then + * this indicates that the count must be reduced by one for this + * iteration to avoid reading past the end of the available data. + */ + sub x13, x10, x8 + lsr x13, x13, #32 + + madd x8, x13, x9, x8 + sub x8, x8, #(CHUNKSIZE << 16) + + /* prefer to count pixels, rather than vectors, to clarify the tail + * store case on exit. + */ + lsl x13, x13, #VECSHIFT + cmp x13, x1 + csel x13, x1, x13, gt + + sub x1, x1, x13 + + lsl x13, x13, #COMPONENT_SHIFT + + mov w14, #0x8000 + movi v30.8h, #3 + dup v31.8h, w14 + + cmp x13, #0 + bgt 3f + cmp x1, #0 + bgt 1b /* an extreme case where we shouldn't use code in this structure */ + b 9f + + .align 4 +2: /* Inner loop continues here, but starts at 3:, see end of loop + * below for explanation. */ +.if LOOP_OUTPUT_SIZE == 4 + st1 {v8.s}[0], [x0], #4 +.elseif LOOP_OUTPUT_SIZE == 8 + st1 {v8.8b}, [x0], #8 +.elseif LOOP_OUTPUT_SIZE == 16 + st1 {v8.16b}, [x0], #16 +.elseif LOOP_OUTPUT_SIZE == 32 + st1 {v8.16b,v9.16b}, [x0], #32 +.endif + /* Inner loop: here the four x coefficients for each tap are + * calculated in vector code, and the addresses are calculated in + * scalar code, and these calculations are interleaved. + */ +3: ushr v8.8h, v6.8h, #1 // sxf + lsr x14, x2, #(63 - CHUNKSHIFT) + sqrdmulh v9.8h, v8.8h, v8.8h // sxf**2 + add x2, x2, x3 + sqrdmulh v10.8h, v9.8h, v8.8h // sxf**3 + lsr x15, x2, #(63 - CHUNKSHIFT) + sshll v11.4s, v9.4h, #2 + sshll2 v12.4s, v9.8h, #2 + add x2, x2, x3 + smlsl v11.4s, v10.4h, v30.4h + smlsl2 v12.4s, v10.8h, v30.8h + lsr x16, x2, #(63 - CHUNKSHIFT) + + shadd v0.8h, v10.8h, v8.8h + add x2, x2, x3 + sub v0.8h, v9.8h, v0.8h + lsr x17, x2, #(63 - CHUNKSHIFT) + + saddw v1.4s, v11.4s, v9.4h + saddw2 v13.4s, v12.4s, v9.8h + add x2, x2, x3 + shrn v1.4h, v1.4s, #1 + shrn2 v1.8h, v13.4s, #1 + add x14, sp, x14, LSL #(COMPONENT_SHIFT + 1) + sub v1.8h, v1.8h, v31.8h + add x15, sp, x15, LSL #(COMPONENT_SHIFT + 1) + + saddw v2.4s, v11.4s, v8.4h + saddw2 v13.4s, v12.4s, v8.8h + add x16, sp, x16, LSL #(COMPONENT_SHIFT + 1) + shrn v2.4h, v2.4s, #1 + shrn2 v2.8h, v13.4s, #1 + add x17, sp, x17, LSL #(COMPONENT_SHIFT + 1) + neg v2.8h, v2.8h + + shsub v3.8h, v10.8h, v9.8h + + /* increment the x fractional parts (oveflow is ignored, as the + * scalar arithmetic shadows this addition with full precision). + */ + add v6.8h, v6.8h, v7.8h + + /* At this point we have four pointers in x8-x11, pointing to the + * four taps in the scratch buffer that must be convolved together + * to produce an output pixel (one output pixel per pointer). + * These pointers usually overlap, but their spacing is irregular + * so resolving the redundancy through L1 is a pragmatic solution. + * + * The scratch buffer is made of signed 16-bit data, holding over + * some extra precision, and overshoot, from the vertical pass. + * + * We also have the 16-bit unsigned fixed-point weights for each + * of the four taps in v0 - v3. That's eight pixels worth of + * coefficients when we have only four pointers, so calculations + * for four more pixels are interleaved with the fetch and permute + * code for each variant in the following code. + * + * The data arrangement is less than ideal for any pixel format, + * but permuting loads help to mitigate most of the problems. + * + * Note also that the two outside taps of a bicubic are negative, + * but these coefficients are unsigned. The sign is hard-coded by + * use of multiply-and-subtract operations. + */ +.if \comp == 1 + /* The uchar 1 case. + * Issue one lanewise ld4.h to load four consecutive pixels from + * one pointer (one pixel) into four different registers; then load + * four consecutive s16 values from the next pointer (pixel) into + * the next lane of those four registers, etc., so that we finish + * with v12 - v15 representing the four taps, and each lane + * representing a separate pixel. + * + * The first ld4 uses a splat to avoid any false dependency on + * the previous state of the register. + */ + ld4r {v12.8h,v13.8h,v14.8h,v15.8h}, [x14] + lsr x14, x2, #(63 - CHUNKSHIFT) + add x2, x2, x3 + ld4 {v12.h,v13.h,v14.h,v15.h}[1], [x15] + add x14, sp, x14, LSL #(COMPONENT_SHIFT + 1) + lsr x15, x2, #(63 - CHUNKSHIFT) + add x2, x2, x3 + ld4 {v12.h,v13.h,v14.h,v15.h}[2], [x16] + add x15, sp, x15, LSL #(COMPONENT_SHIFT + 1) + lsr x16, x2, #(63 - CHUNKSHIFT) + add x2, x2, x3 + ld4 {v12.h,v13.h,v14.h,v15.h}[3], [x17] + add x16, sp, x16, LSL #(COMPONENT_SHIFT + 1) + lsr x17, x2, #(63 - CHUNKSHIFT) + add x2, x2, x3 + ld4 {v12.h,v13.h,v14.h,v15.h}[4], [x14] + add x17, sp, x17, LSL #(COMPONENT_SHIFT + 1) + ld4 {v12.h,v13.h,v14.h,v15.h}[5], [x15] + ld4 {v12.h,v13.h,v14.h,v15.h}[6], [x16] + ld4 {v12.h,v13.h,v14.h,v15.h}[7], [x17] + + smull v8.4s, v12.4h, v0.4h + smull2 v9.4s, v12.8h, v0.8h + smlsl v8.4s, v13.4h, v1.4h + smlsl2 v9.4s, v13.8h, v1.8h + smlsl v8.4s, v14.4h, v2.4h + smlsl2 v9.4s, v14.8h, v2.8h + smlal v8.4s, v15.4h, v3.4h + smlal2 v9.4s, v15.8h, v3.8h + + subs x13, x13, #LOOP_OUTPUT_SIZE + + sqrshrn v8.4h, v8.4s, #15 + sqrshrn2 v8.8h, v9.4s, #15 + + sqrshrun v8.8b, v8.8h, #VERTBITS - 8 +.elseif \comp == 2 + /* The uchar2 case: + * This time load pairs of values into adjacent lanes in v12 - v15 + * by aliasing them as u32 data; leaving room for only four pixels, + * so the process has to be done twice. This also means that the + * coefficient registers fail to align with the coefficient data + * (eight separate pixels), so that has to be doubled-up to match. + */ + ld4r {v12.4s,v13.4s,v14.4s,v15.4s}, [x14] + lsr x14, x2, #(63 - CHUNKSHIFT) + add x2, x2, x3 + ld4 {v12.s,v13.s,v14.s,v15.s}[1], [x15] + add x14, sp, x14, LSL #(COMPONENT_SHIFT + 1) + lsr x15, x2, #(63 - CHUNKSHIFT) + add x2, x2, x3 + ld4 {v12.s,v13.s,v14.s,v15.s}[2], [x16] + add x15, sp, x15, LSL #(COMPONENT_SHIFT + 1) + lsr x16, x2, #(63 - CHUNKSHIFT) + add x2, x2, x3 + ld4 {v12.s,v13.s,v14.s,v15.s}[3], [x17] + add x16, sp, x16, LSL #(COMPONENT_SHIFT + 1) + lsr x17, x2, #(63 - CHUNKSHIFT) + add x2, x2, x3 + + /* double-up coefficients to align with component pairs */ + zip1 v16.8h, v0.8h, v0.8h + add x17, sp, x17, LSL #(COMPONENT_SHIFT + 1) + zip1 v17.8h, v1.8h, v1.8h + zip1 v18.8h, v2.8h, v2.8h + zip1 v19.8h, v3.8h, v3.8h + + smull v8.4s, v12.4h, v16.4h + smull2 v9.4s, v12.8h, v16.8h + smlsl v8.4s, v13.4h, v17.4h + smlsl2 v9.4s, v13.8h, v17.8h + smlsl v8.4s, v14.4h, v18.4h + smlsl2 v9.4s, v14.8h, v18.8h + smlal v8.4s, v15.4h, v19.4h + smlal2 v9.4s, v15.8h, v19.8h + + sqrshrn v8.4h, v8.4s, #15 + sqrshrn2 v8.8h, v9.4s, #15 + + ld4r {v12.4s,v13.4s,v14.4s,v15.4s}, [x14] + ld4 {v12.s,v13.s,v14.s,v15.s}[1], [x15] + ld4 {v12.s,v13.s,v14.s,v15.s}[2], [x16] + ld4 {v12.s,v13.s,v14.s,v15.s}[3], [x17] + + /* double-up coefficients to align with component pairs */ + zip2 v16.8h, v0.8h, v0.8h + zip2 v17.8h, v1.8h, v1.8h + zip2 v18.8h, v2.8h, v2.8h + zip2 v19.8h, v3.8h, v3.8h + + smull v10.4s, v12.4h, v16.4h + smull2 v11.4s, v12.8h, v16.8h + smlsl v10.4s, v13.4h, v17.4h + smlsl2 v11.4s, v13.8h, v17.8h + smlsl v10.4s, v14.4h, v18.4h + smlsl2 v11.4s, v14.8h, v18.8h + smlal v10.4s, v15.4h, v19.4h + smlal2 v11.4s, v15.8h, v19.8h + + subs x13, x13, #LOOP_OUTPUT_SIZE + + sqrshrn v9.4h, v10.4s, #15 + sqrshrn2 v9.8h, v11.4s, #15 + + sqrshrun v8.8b, v8.8h, #VERTBITS - 8 + sqrshrun2 v8.16b, v9.8h, #VERTBITS - 8 +.elseif \comp == 4 + /* The uchar4 case. + * This case is comparatively painless because four s16s are the + * smallest addressable unit for a vmul-by-scalar. Rather than + * permute the data, simply arrange the multiplies to suit the way + * the data comes in. That's a lot of data, though, so things + * progress in pairs of pixels at a time. + */ + ld1 {v12.8h,v13.8h}, [x14] + lsr x14, x2, #(63 - CHUNKSHIFT) + add x2, x2, x3 + ld1 {v14.8h,v15.8h}, [x15] + add x14, sp, x14, LSL #(COMPONENT_SHIFT + 1) + lsr x15, x2, #(63 - CHUNKSHIFT) + add x2, x2, x3 + + smull v8.4s, v12.4h, v0.h[0] + smull v9.4s, v14.4h, v0.h[1] + smlsl2 v8.4s, v12.8h, v1.h[0] + smlsl2 v9.4s, v14.8h, v1.h[1] + smlsl v8.4s, v13.4h, v2.h[0] + smlsl v9.4s, v15.4h, v2.h[1] + smlal2 v8.4s, v13.8h, v3.h[0] + smlal2 v9.4s, v15.8h, v3.h[1] + + /* And two more... */ + ld1 {v12.8h,v13.8h}, [x16] + add x15, sp, x15, LSL #(COMPONENT_SHIFT + 1) + lsr x16, x2, #(63 - CHUNKSHIFT) + add x2, x2, x3 + ld1 {v14.8h,v15.8h}, [x17] + add x16, sp, x16, LSL #(COMPONENT_SHIFT + 1) + lsr x17, x2, #(63 - CHUNKSHIFT) + add x2, x2, x3 + + sqrshrn v8.4h, v8.4s, #15 + add x17, sp, x17, LSL #(COMPONENT_SHIFT + 1) + sqrshrn2 v8.8h, v9.4s, #15 + + smull v10.4s, v12.4h, v0.h[2] + smull v11.4s, v14.4h, v0.h[3] + smlsl2 v10.4s, v12.8h, v1.h[2] + smlsl2 v11.4s, v14.8h, v1.h[3] + smlsl v10.4s, v13.4h, v2.h[2] + smlsl v11.4s, v15.4h, v2.h[3] + smlal2 v10.4s, v13.8h, v3.h[2] + smlal2 v11.4s, v15.8h, v3.h[3] + + sqrshrn v9.4h, v10.4s, #15 + sqrshrn2 v9.8h, v11.4s, #15 + + sqrshrun v8.8b, v8.8h, #VERTBITS - 8 + sqrshrun2 v8.16b, v9.8h, #VERTBITS - 8 + + /* And two more... */ + ld1 {v12.8h,v13.8h}, [x14] + ld1 {v14.8h,v15.8h}, [x15] + + smull v10.4s, v12.4h, v0.h[4] + smull v11.4s, v14.4h, v0.h[5] + smlsl2 v10.4s, v12.8h, v1.h[4] + smlsl2 v11.4s, v14.8h, v1.h[5] + smlsl v10.4s, v13.4h, v2.h[4] + smlsl v11.4s, v15.4h, v2.h[5] + smlal2 v10.4s, v13.8h, v3.h[4] + smlal2 v11.4s, v15.8h, v3.h[5] + + /* And two more... */ + ld1 {v12.8h,v13.8h}, [x16] + ld1 {v14.8h,v15.8h}, [x17] + + subs x13, x13, #LOOP_OUTPUT_SIZE + + sqrshrn v9.4h, v10.4s, #15 + sqrshrn2 v9.8h, v11.4s, #15 + + smull v10.4s, v12.4h, v0.h[6] + smull v11.4s, v14.4h, v0.h[7] + smlsl2 v10.4s, v12.8h, v1.h[6] + smlsl2 v11.4s, v14.8h, v1.h[7] + smlsl v10.4s, v13.4h, v2.h[6] + smlsl v11.4s, v15.4h, v2.h[7] + smlal2 v10.4s, v13.8h, v3.h[6] + smlal2 v11.4s, v15.8h, v3.h[7] + + sqrshrn v10.4h, v10.4s, #15 + sqrshrn2 v10.8h, v11.4s, #15 + + sqrshrun v9.8b, v9.8h, #VERTBITS - 8 + sqrshrun2 v9.16b, v10.8h, #VERTBITS - 8 +.endif + bgt 2b /* continue inner loop */ + /* The inner loop has already been limited to ensure that none of + * the earlier iterations could overfill the output, so the store + * appears within the loop but after the conditional branch (at the + * top). At the end, provided it won't overfill, perform the final + * store here. If it would, then break out to the tricky tail case + * instead. + */ + blt 1f + /* Store the amount of data appropriate to the configuration of the + * instance being assembled. + */ +.if LOOP_OUTPUT_SIZE == 4 + st1 {v8.s}[0], [x0], #4 +.elseif LOOP_OUTPUT_SIZE == 8 + st1 {v8.8b}, [x0], #8 +.elseif LOOP_OUTPUT_SIZE == 16 + st1 {v8.16b}, [x0], #16 +.elseif LOOP_OUTPUT_SIZE == 32 + st1 {v8.16b,v9.16b}, [x0], #32 +.endif + b 1b /* resume outer loop */ + /* Partial tail store case: + * Different versions of the code need different subsets of the + * following partial stores. Here the number of components and the + * size of the chunk of data produced by each inner loop iteration + * is tested to figure out whether or not each phrase is relevant. + */ +.if 16 < LOOP_OUTPUT_SIZE && COMPONENT_COUNT <= 16 +1: tst x13, #16 + beq 1f + st1 {v8.16b}, [x0], #16 + mov v8.16b, v9.16b +.endif +.if 8 < LOOP_OUTPUT_SIZE && COMPONENT_COUNT <= 8 +1: tst x13, #8 + beq 1f + st1 {v8.8b}, [x0], #8 + ext v8.16b, v8.16b, v8.16b, #8 +.endif +.if 4 < LOOP_OUTPUT_SIZE && COMPONENT_COUNT <= 4 +1: tst x13, #4 + beq 1f + st1 {v8.s}[0], [x0], #4 + ext v8.8b, v8.8b, v8.8b, #4 +.endif +.if 2 < LOOP_OUTPUT_SIZE && COMPONENT_COUNT <= 2 +1: tst x13, #2 + beq 1f + st1 {v8.h}[0], [x0], #2 + ext v8.8b, v8.8b, v8.8b, #2 +.endif +.if 1 < LOOP_OUTPUT_SIZE && COMPONENT_COUNT <= 1 +1: tst x13, #1 + beq 1f + st1 {v8.b}[0], [x0], #1 +.endif +1: +9: mov sp, x19 + ld1 {v8.1d - v11.1d}, [sp], #32 + ld1 {v12.1d - v15.1d}, [sp], #32 + ldr x19, [sp], #16 + ret +END(rsdIntrinsicResizeB\comp\()_K) +.endr + +.rodata +intrinsic_resize_consts: .hword 0, 1, 2, 3, 4, 5, 6, 7 diff --git a/toolkit/Resize_neon.S b/toolkit/Resize_neon.S new file mode 100644 index 00000000..eb7f6941 --- /dev/null +++ b/toolkit/Resize_neon.S @@ -0,0 +1,799 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart +#define END(f) .fnend; .size f, .-f; + +.eabi_attribute 25,1 @Tag_ABI_align8_preserved +.arm + +/* Fixed-point precision after vertical pass -- 16 bit data minus 1 sign and 1 + * integer (bicubic has a little overshoot). It would also be possible to add + * a temporary DC bias to eliminate the sign bit for more precision, but that's + * extra arithmetic. + */ +.set VERTBITS, 14 + +/* The size of the scratch buffer in which we store our vertically convolved + * intermediates. + */ +.set CHUNKSHIFT, 7 +.set CHUNKSIZE, (1 << CHUNKSHIFT) + +/* The number of components processed in a single iteration of the innermost + * loop. + */ +.set VECSHIFT, 3 +.set VECSIZE, (1<<VECSHIFT) + +/* Read four different lines (except at edges where addresses may be clamped, + * which is why we don't simply take base and stride registers), and multiply + * and accumulate them by the coefficients in d6[0..3], leaving the results in + * q12. This gives eight 16-bit results representing a horizontal line of 2-8 + * input pixels (depending on number of components per pixel) to be fed into + * the horizontal scaling pass. + * + * Input coefficients are 16-bit unsigned fixed-point (although [0] and [3] are + * known to represent negative values and VMLS is used to implement this). + * Output is VERTBITS signed fixed-point, which must leave room for a little + * bit of overshoot beyond [0,1.0). + */ +.macro vert8, dstlo=d24, dsthi=d25 + vld1.u8 d16, [r4]! + vld1.u8 d18, [r5]! + vld1.u8 d20, [r6]! + vld1.u8 d22, [r7]! + vmovl.u8 q8, d16 + vmovl.u8 q9, d18 + vmovl.u8 q10, d20 + vmovl.u8 q11, d22 + vmull.u16 q12, d18, d6[1] + vmull.u16 q13, d19, d6[1] + vmlsl.u16 q12, d16, d6[0] + vmlsl.u16 q13, d17, d6[0] + vmlal.u16 q12, d20, d6[2] + vmlal.u16 q13, d21, d6[2] + vmlsl.u16 q12, d22, d6[3] + vmlsl.u16 q13, d23, d6[3] + + /* Shift by 8 (bits per pixel), plus 16 (the fixed-point multiplies), + * minus VERTBITS (the number of fraction bits we want to keep from + * here on). + */ + vqshrn.s32 \dstlo, q12, #8 + 16 - VERTBITS + vqshrn.s32 \dsthi, q13, #8 + 16 - VERTBITS +.endm + +/* As above, but only four 16-bit results into d25. + */ +.macro vert4 + vld1.u32 d16[0], [r4]! + vld1.u32 d18[0], [r5]! + vld1.u32 d20[0], [r6]! + vld1.u32 d22[0], [r7]! + vmovl.u8 q8, d16 + vmovl.u8 q9, d18 + vmovl.u8 q10, d20 + vmovl.u8 q11, d22 + vmull.u16 q12, d18, d6[1] + vmlsl.u16 q12, d16, d6[0] + vmlal.u16 q12, d20, d6[2] + vmlsl.u16 q12, d22, d6[3] + vqshrn.s32 d25, q12, #8 + 16 - VERTBITS +.endm + + +/* During horizontal resize having CHUNKSIZE input available means being able + * to produce a varying amount of output, depending on the phase of the data. + * This function calculates the minimum number of VECSIZE chunks extracted from + * a CHUNKSIZE window (r1), and the threshold value for when the count will be + * one higher than that (r0). + * These work out, conveniently, to be the quotient and remainder from: + * (CHUNKSIZE + xinc * VECSIZE - 1) / (xinc * VECSIZE) + * + * The two values can be packed together in a uint64_t for convenience; and + * they are, in fact, used this way as an arithmetic short-cut later on. + */ + +/* uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc); */ +ENTRY(rsdIntrinsicResize_oscctl_K) + lsl r2, r0, #VECSHIFT + movw r0, #:lower16:(CHUNKSIZE << 16) - 1 + movt r0, #:upper16:(CHUNKSIZE << 16) - 1 + add r0, r0, r2 +#if defined(ARCH_ARM_USE_UDIV) + udiv r1, r0, r2 + mls r0, r1, r2, r0 +#else + clz r3, r2 + clz r1, r0 + subs r3, r3, r1 + movlt r3, #0 + mov r1, #1 + lsl r2, r2, r3 + lsl r3, r1, r3 + mov r1, #0 +1: cmp r2, r0 + addls r1, r3 + subls r0, r2 + lsrs r3, r3, #1 + lsr r2, r2, #1 + bne 1b +#endif + bx lr +END(rsdIntrinsicResize_oscctl_K) + +/* Iterate to generate the uchar1, uchar2, and uchar4 versions of the code. + * For the most part the vertical pass (the outer loop) is the same for all + * versions. Exceptions are handled in-line with conditional assembly. + */ +.irp comp, 1, 2, 4 +.if \comp == 1 +.set COMPONENT_SHIFT, 0 +.elseif \comp == 2 +.set COMPONENT_SHIFT, 1 +.elseif \comp == 4 +.set COMPONENT_SHIFT, 2 +.else +.error "Unknown component count" +.endif +.set COMPONENT_COUNT, (1 << COMPONENT_SHIFT) +.set LOOP_OUTPUT_SIZE, (VECSIZE * COMPONENT_COUNT) + +.set BUFFER_SIZE, (CHUNKSIZE * 2 + 4) * COMPONENT_COUNT * 2 +.set OSC_STORE, (BUFFER_SIZE + 0) +.set OSCSTEP_STORE, (BUFFER_SIZE + 4) +.set OSCCTL_STORE, (BUFFER_SIZE + 8) +.set AVAIL_STORE, (BUFFER_SIZE + 16) +.set SP_STORE, (BUFFER_SIZE + 24) /* should be +20, but rounded up to make a legal constant somewhere */ + +/* void rsdIntrinsicResizeB\comp\()_K( + * uint8_t * restrict dst, // r0 + * size_t count, // r1 + * uint32_t xf, // r2 + * uint32_t xinc, // r3 + * uint8_t const * restrict srcn, // [sp] -> [sp,#104] -> r4 + * uint8_t const * restrict src0, // [sp,#4] -> [sp,#108] -> r5 + * uint8_t const * restrict src1, // [sp,#8] -> [sp,#112] -> r6 + * uint8_t const * restrict src2, // [sp,#12] -> [sp,#116] -> r7 + * size_t xclip, // [sp,#16] -> [sp,#120] + * size_t avail, // [sp,#20] -> [sp,#124] -> lr + * uint64_t osc_ctl, // [sp,#24] -> [sp,#128] + * int32_t const *yr); // [sp,#32] -> [sp,#136] -> d8 (copied to d6 for scalar access) + */ +ENTRY(rsdIntrinsicResizeB\comp\()_K) + push {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + vpush {d8-d15} + + /* align the working buffer on the stack to make it easy to use bit + * twiddling for address calculations and bounds tests. + */ + sub r12, sp, #BUFFER_SIZE + 32 + mov lr, sp + bfc r12, #0, #CHUNKSHIFT + 1 + COMPONENT_SHIFT + 1 + mov sp, r12 + str lr, [sp,#SP_STORE] + + ldr r8, [lr,#136] // yr + adr r9, 8f + vld1.s32 {q4}, [r8] + vld1.s16 {q5}, [r9] + vqmovun.s32 d8, q4 // yr + vdup.s16 q6, r2 + vdup.s16 q7, r3 + vmla.s16 q6, q5, q7 // vxf + vshl.s16 q7, q7, #VECSHIFT // vxinc + + ldrd r4,r5, [lr,#104] // srcn, src0 + ldrd r6,r7, [lr,#112] // src1, src2 + + /* Compute starting condition for oscillator used to compute ahead + * of time how many iterations are possible before needing to + * refill the working buffer. This is based on the fixed-point + * index of the last element in the vector of pixels processed in + * each iteration, counting up until it would overflow. + */ + sub r8, r2, r3 + mov r9, r3, LSL #VECSHIFT + add r8, r8, r9 + + ldrd r10,r11, [lr,#128] // osc_ctl + + str r8, [sp,#OSC_STORE] + str r9, [sp,#OSCSTEP_STORE] + str r10, [sp,#OSCCTL_STORE] + str r11, [sp,#OSCCTL_STORE+4] + ldrd r10,r11, [lr,#120] // xclip,avail + + + /* r4-r7 contain pointers to the four lines of input to be + * convolved. These pointers have been clamped vertically and + * horizontally (which is why it's not a simple row/stride pair), + * and the xclip argument (now in r10) indicates how many pixels + * from true the x position of the pointer is. This value should + * be 0, 1, or 2 only. + * + * Start by placing four pixels worth of input at the far end of + * the buffer. As many as two of these may be clipped, so four + * pixels are fetched, and then the first pixel is duplicated and + * the data shifted according to xclip. The source pointers are + * then also adjusted according to xclip so that subsequent fetches + * match. + */ + vmov d6, d8 /* make y coeffs available for vert4 and vert8 macros */ + + sub r8, r12, r10, LSL #COMPONENT_SHIFT + 1 + add r9, r12, #(2 * CHUNKSIZE - 4) * COMPONENT_COUNT * 2 + add r8, r8, #4 * COMPONENT_COUNT * 2 +.if \comp == 1 + vert4 + vdup.s16 d24, d25[0] + vst1.s16 {q12}, [r12] + vld1.s16 {d24}, [r8] + vst1.s16 {d24}, [r9] +.elseif \comp == 2 + vert8 + vdup.u32 q11, d24[0] + vst1.s16 {q11,q12}, [r12] + vld1.s16 {q12}, [r8] + vst1.s16 {q12}, [r9] +.elseif \comp == 4 + vert8 d28, d29 + vert8 d30, d31 + vmov.u64 d24, d28 + vmov.u64 d25, d28 + vmov.u64 d26, d28 + vmov.u64 d27, d28 + vst1.s16 {q12,q13}, [r12]! + vst1.s16 {q14,q15}, [r12] + sub r12, r12, #32 + vld1.s16 {q11,q12}, [r8] + vst1.s16 {q11,q12}, [r9] +.endif + /* Count off four pixels into the working buffer, and move count to + * its new home. + */ + sub lr, r11, #4 + /* Incoming pointers were to the first _legal_ pixel. Four pixels + * were read unconditionally, but some may have been discarded by + * xclip, so we rewind the pointers to compensate. + */ + sub r4, r4, r10, LSL #COMPONENT_SHIFT + sub r5, r5, r10, LSL #COMPONENT_SHIFT + sub r6, r6, r10, LSL #COMPONENT_SHIFT + sub r7, r7, r10, LSL #COMPONENT_SHIFT + + /* First tap starts where we just pre-filled, at the end of the + * buffer. + */ + add r2, r2, #(CHUNKSIZE * 2 - 4) << 16 + + /* Use overflowing arithmetic to implement wraparound array + * indexing. + */ + mov r2, r2, LSL #(15 - CHUNKSHIFT) + mov r3, r3, LSL #(15 - CHUNKSHIFT) + + str lr, [sp,#AVAIL_STORE] + + /* Start of outermost loop. + * Fetch CHUNKSIZE pixels into scratch buffer, then calculate the + * number of iterations of the inner loop that can be performed and + * get into that. + * + * The fill is complicated by the possibility of running out of + * input before the scratch buffer is filled. If this isn't a risk + * then it's handled by the simple loop at 2:, otherwise the + * horrible loop at 3:. + */ +1: ldr lr, [sp,#AVAIL_STORE] /* get number of pixels available */ + vmov d6, d8 /* put y scaling coefficients somewhere handy */ + subs lr, #CHUNKSIZE + bge 2f /* if at least CHUNKSIZE are available... */ + add lr, #CHUNKSIZE /* if they're not... */ + b 4f + /* ..just sneaking a literal in here after this unconditional branch.. */ +8: .hword 0, 1, 2, 3, 4, 5, 6, 7 + /* basic fill loop, processing 8 bytes at a time until there are + * fewer than eight bytes available. + */ +3: vert8 + sub lr, lr, #8 / COMPONENT_COUNT + vst1.s16 {q12}, [r12]! +4: cmp lr, #8 / COMPONENT_COUNT - 1 + bgt 3b +.if \comp == 4 + blt 3f + /* The last pixel (four bytes) if necessary */ + vert4 +.else + cmp lr, #1 + blt 3f + /* The last pixels if necessary */ + sub r4, r4, #8 + sub r5, r5, #8 + sub r6, r6, #8 + sub r7, r7, #8 + add r4, r4, lr, LSL #COMPONENT_SHIFT + add r5, r5, lr, LSL #COMPONENT_SHIFT + add r6, r6, lr, LSL #COMPONENT_SHIFT + add r7, r7, lr, LSL #COMPONENT_SHIFT + vert8 + sub lr, sp, lr, LSL #COMPONENT_SHIFT + 1 + sub sp, sp, #32 + sub lr, lr, #16 +.if \comp == 1 + vdup.s16 q13, d25[3] +.elseif \comp == 2 + vdup.u32 q13, d25[1] +.endif + vst1.s16 {q12,q13}, [sp] + vld1.s16 {q12}, [lr] + add sp, sp, #32 + b 4f +.endif + /* Keep filling until we get to the end of this chunk of the buffer */ +3: +.if \comp == 1 + vdup.s16 q12, d25[3] +.elseif \comp == 2 + vdup.u32 q12, d25[1] +.elseif \comp == 4 + vmov.u64 d24, d25 +.endif +4: vst1.s16 {q12}, [r12]! + tst r12, #(CHUNKSIZE - 1) * COMPONENT_COUNT * 2 + bne 3b + b 4f + +.align 4 +2: /* Quickly pull a chunk of data into the working buffer. + */ + vert8 + vst1.s16 {q12}, [r12]! + vert8 + vst1.s16 {q12}, [r12]! + tst r12, #(CHUNKSIZE - 1) * COMPONENT_COUNT * 2 + bne 2b + cmp lr, #0 + bne 3f +4: /* if we end with 0 pixels left we'll have nothing handy to spread + * across to the right, so we rewind a bit. + */ + mov lr, #1 + sub r4, r4, #COMPONENT_COUNT + sub r5, r5, #COMPONENT_COUNT + sub r6, r6, #COMPONENT_COUNT + sub r7, r7, #COMPONENT_COUNT +3: str lr, [sp,#AVAIL_STORE] /* done with available pixel count */ + add lr, sp, #OSC_STORE + ldrd r8,r9, [lr,#0] /* need osc, osc_step soon */ + ldrd r10,r11, [lr,#OSCCTL_STORE-OSC_STORE] /* need osc_ctl too */ + + /* copy four taps (width of cubic window) to far end for overflow + * address handling + */ + sub lr, r12, #CHUNKSIZE * COMPONENT_COUNT * 2 + eor r12, lr, #CHUNKSIZE * COMPONENT_COUNT * 2 +.if \comp == 1 + vld1.s16 {d28}, [lr] +.elseif \comp == 2 + vld1.s16 {q14}, [lr] +.elseif \comp == 4 + vld1.s16 {q14,q15}, [lr] +.endif + add lr, r12, #CHUNKSIZE * COMPONENT_COUNT * 2 +.if \comp == 1 + vst1.s16 {d28}, [lr] +.elseif \comp == 2 + vst1.s16 {q14}, [lr] +.elseif \comp == 4 + vst1.s16 {q14,q15}, [lr] +.endif + /* r11 contains the maximum possible iteration count, but if r8 is + * greater than r10 then this indicates that the count must be + * reduced by one for this iteration to avoid reading past the end + * of the available data. + */ + cmp r10, r8 + sbc lr, r11, #0 + + mla r8, lr, r9, r8 + sub r8, r8, #(CHUNKSIZE << 16) + + str r8, [sp,#OSC_STORE] /* done with osc */ + + /* prefer to count pixels, rather than vectors, to clarify the tail + * store case on exit. + */ + mov lr, lr, LSL #VECSHIFT + cmp lr, r1 + movgt lr, r1 + + sub r1, r1, lr + + mov lr, lr, LSL #COMPONENT_SHIFT + + vmov.i16 d10, #3 + vmov.i16 d11, #0x8000 + + cmp lr, #0 + bgt 3f + cmp r1, #0 + bgt 1b /* an extreme case where we shouldn't use code in this structure */ + b 9f + + .align 4 +2: /* Inner loop continues here, but starts at 3:, see end of loop + * below for explanation. */ +.if LOOP_OUTPUT_SIZE == 4 + vst1.u32 {d16[0]}, [r0]! +.elseif LOOP_OUTPUT_SIZE == 8 + vst1.u8 {d16}, [r0]! +.elseif LOOP_OUTPUT_SIZE == 16 + vst1.u8 {q8}, [r0]! +.elseif LOOP_OUTPUT_SIZE == 32 + vst1.u8 {q8,q9}, [r0]! +.endif + /* Inner loop: here the four x coefficients for each tap are + * calculated in vector code, and the addresses are calculated in + * scalar code, and these calculations are interleaved. + */ +3: vshr.u16 q8, q6, #1 + mov r8, r2, LSR #(31 - CHUNKSHIFT) + vqrdmulh.s16 q9, q8, q8 + add r2, r2, r3 + vqrdmulh.s16 q10, q9, q8 + mov r9, r2, LSR #(31 - CHUNKSHIFT) + vshll.s16 q11, d18, #2 + vshll.s16 q12, d19, #2 + add r2, r2, r3 + vmlsl.s16 q11, d20, d10 + vmlsl.s16 q12, d21, d10 + mov r10, r2, LSR #(31 - CHUNKSHIFT) + + vhadd.s16 q0, q10, q8 + add r2, r2, r3 + vsub.s16 q0, q9, q0 + mov r11, r2, LSR #(31 - CHUNKSHIFT) + + vaddw.s16 q1, q11, d18 + vaddw.s16 q13, q12, d19 + add r2, r2, r3 + vshrn.s32 d2, q1, #1 + vshrn.s32 d3, q13, #1 + add r8, sp, r8, LSL #(COMPONENT_SHIFT + 1) + vsub.s16 d2, d2, d11 + vsub.s16 d3, d3, d11 // TODO: find a wider d11 and use q-reg operation + add r9, sp, r9, LSL #(COMPONENT_SHIFT + 1) + + vaddw.s16 q2, q11, d16 + vaddw.s16 q13, q12, d17 + add r10, sp, r10, LSL #(COMPONENT_SHIFT + 1) + vshrn.s32 d4, q2, #1 + vshrn.s32 d5, q13, #1 + add r11, sp, r11, LSL #(COMPONENT_SHIFT + 1) + vneg.s16 q2, q2 + + vhsub.s16 q3, q10, q9 + + /* increment the x fractional parts (oveflow is ignored, as the + * scalar arithmetic shadows this addition with full precision). + */ + vadd.s16 q6, q6, q7 + + /* At this point we have four pointers in r8-r11, pointing to the + * four taps in the scratch buffer that must be convolved together + * to produce an output pixel (one output pixel per pointer). + * These pointers usually overlap, but their spacing is irregular + * so resolving the redundancy through L1 is a pragmatic solution. + * + * The scratch buffer is made of signed 16-bit data, holding over + * some extra precision, and overshoot, from the vertical pass. + * + * We also have the 16-bit unsigned fixed-point weights for each + * of the four taps in q0 - q3. That's eight pixels worth of + * coefficients when we have only four pointers, so calculations + * for four more pixels are interleaved with the fetch and permute + * code for each variant in the following code. + * + * The data arrangement is less than ideal for any pixel format, + * but permuting loads help to mitigate most of the problems. + * + * Note also that the two outside taps of a bicubic are negative, + * but these coefficients are unsigned. The sign is hard-coded by + * use of multiply-and-subtract operations. + */ +.if \comp == 1 + /* The uchar 1 case. + * Issue one lanewise vld4.s16 to load four consecutive pixels from + * one pointer (one pixel) into four different registers; then load + * four consecutive s16 values from the next pointer (pixel) into + * the next lane of those four registers, etc., so that we finish + * with q12 - q15 representing the four taps, and each lane + * representing a separate pixel. + * + * The first vld4 uses a splat to avoid any false dependency on + * the previous state of the register. + */ + vld4.s16 {d24[],d26[],d28[],d30[]}, [r8] + mov r8, r2, LSR #(31 - CHUNKSHIFT) + add r2, r2, r3 + vld4.s16 {d24[1],d26[1],d28[1],d30[1]}, [r9] + add r8, sp, r8, LSL #(COMPONENT_SHIFT + 1) + mov r9, r2, LSR #(31 - CHUNKSHIFT) + add r2, r2, r3 + vld4.s16 {d24[2],d26[2],d28[2],d30[2]}, [r10] + add r9, sp, r9, LSL #(COMPONENT_SHIFT + 1) + mov r10, r2, LSR #(31 - CHUNKSHIFT) + add r2, r2, r3 + vld4.s16 {d24[3],d26[3],d28[3],d30[3]}, [r11] + add r10, sp, r10, LSL #(COMPONENT_SHIFT + 1) + mov r11, r2, LSR #(31 - CHUNKSHIFT) + add r2, r2, r3 + vld4.s16 {d25[],d27[],d29[],d31[]}, [r8] + add r11, sp, r11, LSL #(COMPONENT_SHIFT + 1) + vld4.s16 {d25[1],d27[1],d29[1],d31[1]}, [r9] + vld4.s16 {d25[2],d27[2],d29[2],d31[2]}, [r10] + vld4.s16 {d25[3],d27[3],d29[3],d31[3]}, [r11] + + vmull.s16 q8, d24, d0 + vmull.s16 q9, d25, d1 + vmlsl.s16 q8, d26, d2 + vmlsl.s16 q9, d27, d3 + vmlsl.s16 q8, d28, d4 + vmlsl.s16 q9, d29, d5 + vmlal.s16 q8, d30, d6 + vmlal.s16 q9, d31, d7 + + subs lr, lr, #LOOP_OUTPUT_SIZE + + vqrshrn.s32 d16, q8, #15 + vqrshrn.s32 d17, q9, #15 + + vqrshrun.s16 d16, q8, #VERTBITS - 8 +.elseif \comp == 2 + /* The uchar2 case: + * This time load pairs of values into adjacent lanes in q12 - q15 + * by aliasing them as u32 data; leaving room for only four pixels, + * so the process has to be done twice. This also means that the + * coefficient registers fail to align with the coefficient data + * (eight separate pixels), so that has to be doubled-up to match. + */ + vld4.u32 {d24[],d26[],d28[],d30[]}, [r8] + mov r8, r2, LSR #(31 - CHUNKSHIFT) + add r2, r2, r3 + vld4.u32 {d24[1],d26[1],d28[1],d30[1]}, [r9] + add r8, sp, r8, LSL #(COMPONENT_SHIFT + 1) + mov r9, r2, LSR #(31 - CHUNKSHIFT) + add r2, r2, r3 + vld4.u32 {d25[],d27[],d29[],d31[]}, [r10] + add r9, sp, r9, LSL #(COMPONENT_SHIFT + 1) + mov r10, r2, LSR #(31 - CHUNKSHIFT) + add r2, r2, r3 + vld4.u32 {d25[1],d27[1],d29[1],d31[1]}, [r11] + add r10, sp, r10, LSL #(COMPONENT_SHIFT + 1) + mov r11, r2, LSR #(31 - CHUNKSHIFT) + add r2, r2, r3 + + /* double-up coefficients to align with component pairs */ + vmov d20, d0 + add r11, sp, r11, LSL #(COMPONENT_SHIFT + 1) + vmov d21, d2 + vmov d22, d4 + vmov d23, d6 + vzip.s16 d0, d20 + vzip.s16 d2, d21 + vzip.s16 d4, d22 + vzip.s16 d6, d23 + + vmull.s16 q8, d24, d0 + vmull.s16 q9, d25, d20 + vmlsl.s16 q8, d26, d2 + vmlsl.s16 q9, d27, d21 + vmlsl.s16 q8, d28, d4 + vmlsl.s16 q9, d29, d22 + vmlal.s16 q8, d30, d6 + vmlal.s16 q9, d31, d23 + + vqrshrn.s32 d16, q8, #15 + vqrshrn.s32 d17, q9, #15 + + vld4.u32 {d24[],d26[],d28[],d30[]}, [r8] + vld4.u32 {d24[1],d26[1],d28[1],d30[1]}, [r9] + vld4.u32 {d25[],d27[],d29[],d31[]}, [r10] + vld4.u32 {d25[1],d27[1],d29[1],d31[1]}, [r11] + + /* double-up coefficients to align with component pairs */ + vmov d0, d1 + vmov d2, d3 + vmov d4, d5 + vmov d6, d7 + vzip.s16 d0, d1 + vzip.s16 d2, d3 + vzip.s16 d4, d5 + vzip.s16 d6, d7 + + vmull.s16 q10, d24, d0 + vmull.s16 q11, d25, d1 + vmlsl.s16 q10, d26, d2 + vmlsl.s16 q11, d27, d3 + vmlsl.s16 q10, d28, d4 + vmlsl.s16 q11, d29, d5 + vmlal.s16 q10, d30, d6 + vmlal.s16 q11, d31, d7 + + subs lr, lr, #LOOP_OUTPUT_SIZE + + vqrshrn.s32 d18, q10, #15 + vqrshrn.s32 d19, q11, #15 + + vqrshrun.s16 d16, q8, #VERTBITS - 8 + vqrshrun.s16 d17, q9, #VERTBITS - 8 +.elseif \comp == 4 + /* The uchar4 case. + * This case is comparatively painless because four s16s are the + * smallest addressable unit for a vmul-by-scalar. Rather than + * permute the data, simply arrange the multiplies to suit the way + * the data comes in. That's a lot of data, though, so things + * progress in pairs of pixels at a time. + */ + vld1.s16 {q12,q13}, [r8] + mov r8, r2, LSR #(31 - CHUNKSHIFT) + add r2, r2, r3 + vld1.s16 {q14,q15}, [r9] + add r8, sp, r8, LSL #(COMPONENT_SHIFT + 1) + mov r9, r2, LSR #(31 - CHUNKSHIFT) + add r2, r2, r3 + + vmull.s16 q8, d24, d0[0] + vmull.s16 q9, d28, d0[1] + vmlsl.s16 q8, d25, d2[0] + vmlsl.s16 q9, d29, d2[1] + vmlsl.s16 q8, d26, d4[0] + vmlsl.s16 q9, d30, d4[1] + vmlal.s16 q8, d27, d6[0] + vmlal.s16 q9, d31, d6[1] + + /* And two more... */ + vld1.s16 {q12,q13}, [r10] + add r9, sp, r9, LSL #(COMPONENT_SHIFT + 1) + mov r10, r2, LSR #(31 - CHUNKSHIFT) + add r2, r2, r3 + vld1.s16 {q14,q15}, [r11] + add r10, sp, r10, LSL #(COMPONENT_SHIFT + 1) + mov r11, r2, LSR #(31 - CHUNKSHIFT) + add r2, r2, r3 + + vqrshrn.s32 d16, q8, #15 + add r11, sp, r11, LSL #(COMPONENT_SHIFT + 1) + vqrshrn.s32 d17, q9, #15 + + vmull.s16 q10, d24, d0[2] + vmull.s16 q11, d28, d0[3] + vmlsl.s16 q10, d25, d2[2] + vmlsl.s16 q11, d29, d2[3] + vmlsl.s16 q10, d26, d4[2] + vmlsl.s16 q11, d30, d4[3] + vmlal.s16 q10, d27, d6[2] + vmlal.s16 q11, d31, d6[3] + + vqrshrn.s32 d18, q10, #15 + vqrshrn.s32 d19, q11, #15 + + vqrshrun.s16 d16, q8, #VERTBITS - 8 + vqrshrun.s16 d17, q9, #VERTBITS - 8 + + /* And two more... */ + vld1.s16 {q12,q13}, [r8] + vld1.s16 {q14,q15}, [r9] + + vmull.s16 q10, d24, d1[0] + vmull.s16 q11, d28, d1[1] + vmlsl.s16 q10, d25, d3[0] + vmlsl.s16 q11, d29, d3[1] + vmlsl.s16 q10, d26, d5[0] + vmlsl.s16 q11, d30, d5[1] + vmlal.s16 q10, d27, d7[0] + vmlal.s16 q11, d31, d7[1] + + /* And two more... */ + vld1.s16 {q12,q13}, [r10] + vld1.s16 {q14,q15}, [r11] + + subs lr, lr, #LOOP_OUTPUT_SIZE + + vqrshrn.s32 d18, q10, #15 + vqrshrn.s32 d19, q11, #15 + + vmull.s16 q10, d24, d1[2] + vmull.s16 q11, d28, d1[3] + vmlsl.s16 q10, d25, d3[2] + vmlsl.s16 q11, d29, d3[3] + vmlsl.s16 q10, d26, d5[2] + vmlsl.s16 q11, d30, d5[3] + vmlal.s16 q10, d27, d7[2] + vmlal.s16 q11, d31, d7[3] + + vqrshrn.s32 d20, q10, #15 + vqrshrn.s32 d21, q11, #15 + + vqrshrun.s16 d18, q9, #VERTBITS - 8 + vqrshrun.s16 d19, q10, #VERTBITS - 8 +.endif + bgt 2b /* continue inner loop */ + /* The inner loop has already been limited to ensure that none of + * the earlier iterations could overfill the output, so the store + * appears within the loop but after the conditional branch (at the + * top). At the end, provided it won't overfill, perform the final + * store here. If it would, then break out to the tricky tail case + * instead. + */ + blt 1f + /* Store the amount of data appropriate to the configuration of the + * instance being assembled. + */ +.if LOOP_OUTPUT_SIZE == 4 + vst1.u32 {d16[0]}, [r0]! +.elseif LOOP_OUTPUT_SIZE == 8 + vst1.u8 {d16}, [r0]! +.elseif LOOP_OUTPUT_SIZE == 16 + vst1.u8 {q8}, [r0]! +.elseif LOOP_OUTPUT_SIZE == 32 + vst1.u8 {q8,q9}, [r0]! +.endif + b 1b /* resume outer loop */ + /* Partial tail store case: + * Different versions of the code need different subsets of the + * following partial stores. Here the number of components and the + * size of the chunk of data produced by each inner loop iteration + * is tested to figure out whether or not each phrase is relevant. + */ +.if 16 < LOOP_OUTPUT_SIZE && COMPONENT_COUNT <= 16 +1: tst lr, #16 + beq 1f + vst1.u8 {q8}, [r0]! + vmov q8, q9 +.endif +.if 8 < LOOP_OUTPUT_SIZE && COMPONENT_COUNT <= 8 +1: tst lr, #8 + beq 1f + vst1.u8 {d16}, [r0]! + vmov.u8 d16, d17 +.endif +.if 4 < LOOP_OUTPUT_SIZE && COMPONENT_COUNT <= 4 +1: tst lr, #4 + beq 1f + vst1.u32 {d16[0]}, [r0]! + vext.u32 d16, d16, d16, #1 +.endif +.if 2 < LOOP_OUTPUT_SIZE && COMPONENT_COUNT <= 2 +1: tst lr, #2 + beq 1f + vst1.u16 {d16[0]}, [r0]! + vext.u16 d16, d16, d16, #1 +.endif +.if 1 < LOOP_OUTPUT_SIZE && COMPONENT_COUNT <= 1 +1: tst lr, #1 + beq 1f + vst1.u8 {d16[0]}, [r0]! +.endif +1: +9: ldr sp, [sp,#SP_STORE] + vpop {d8-d15} + pop {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +END(rsdIntrinsicResizeB\comp\()_K) +.endr diff --git a/toolkit/TaskProcessor.cpp b/toolkit/TaskProcessor.cpp new file mode 100644 index 00000000..d9ae83ca --- /dev/null +++ b/toolkit/TaskProcessor.cpp @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TaskProcessor.h" + +#include <assert.h> +#include <sys/prctl.h> + +#include "RenderScriptToolkit.h" +#include "Utils.h" + +#define LOG_TAG "renderscript.toolkit.TaskProcessor" + +namespace android { +namespace renderscript { + +int Task::setTiling(unsigned int targetTileSizeInBytes) { + // Empirically, values smaller than 1000 are unlikely to give good performance. + targetTileSizeInBytes = std::max(1000u, targetTileSizeInBytes); + const size_t cellSizeInBytes = + mVectorSize; // If we add float support, vectorSize * 4 for that. + const size_t targetCellsPerTile = targetTileSizeInBytes / cellSizeInBytes; + assert(targetCellsPerTile > 0); + + size_t cellsToProcessY; + size_t cellsToProcessX; + if (mRestriction == nullptr) { + cellsToProcessX = mSizeX; + cellsToProcessY = mSizeY; + } else { + assert(mRestriction->endX > mRestriction->startX); + assert(mRestriction->endY > mRestriction->startY); + cellsToProcessX = mRestriction->endX - mRestriction->startX; + cellsToProcessY = mRestriction->endY - mRestriction->startY; + } + + // We want rows as large as possible, as the SIMD code we have is more efficient with + // large rows. + mTilesPerRow = divideRoundingUp(cellsToProcessX, targetCellsPerTile); + // Once we know the number of tiles per row, we divide that row evenly. We round up to make + // sure all cells are included in the last tile of the row. + mCellsPerTileX = divideRoundingUp(cellsToProcessX, mTilesPerRow); + + // We do the same thing for the Y direction. + size_t targetRowsPerTile = divideRoundingUp(targetCellsPerTile, mCellsPerTileX); + mTilesPerColumn = divideRoundingUp(cellsToProcessY, targetRowsPerTile); + mCellsPerTileY = divideRoundingUp(cellsToProcessY, mTilesPerColumn); + + return mTilesPerRow * mTilesPerColumn; +} + +void Task::processTile(unsigned int threadIndex, size_t tileIndex) { + // Figure out the overall boundaries. + size_t startWorkX; + size_t startWorkY; + size_t endWorkX; + size_t endWorkY; + if (mRestriction == nullptr) { + startWorkX = 0; + startWorkY = 0; + endWorkX = mSizeX; + endWorkY = mSizeY; + } else { + startWorkX = mRestriction->startX; + startWorkY = mRestriction->startY; + endWorkX = mRestriction->endX; + endWorkY = mRestriction->endY; + } + // Figure out the rectangle for this tileIndex. All our tiles form a 2D grid. Identify + // first the X, Y coordinate of our tile in that grid. + size_t tileIndexY = tileIndex / mTilesPerRow; + size_t tileIndexX = tileIndex % mTilesPerRow; + // Calculate the starting and ending point of that tile. + size_t startCellX = startWorkX + tileIndexX * mCellsPerTileX; + size_t startCellY = startWorkY + tileIndexY * mCellsPerTileY; + size_t endCellX = std::min(startCellX + mCellsPerTileX, endWorkX); + size_t endCellY = std::min(startCellY + mCellsPerTileY, endWorkY); + + // Call the derived class to do the specific work. + if (mPrefersDataAsOneRow && startCellX == 0 && endCellX == mSizeX) { + // When the tile covers entire rows, we can take advantage that some ops are not 2D. + processData(threadIndex, 0, startCellY, mSizeX * (endCellY - startCellY), startCellY + 1); + } else { + processData(threadIndex, startCellX, startCellY, endCellX, endCellY); + } +} + +TaskProcessor::TaskProcessor(unsigned int numThreads) + : mUsesSimd{cpuSupportsSimd()}, + /* If the requested number of threads is 0, we'll decide based on the number of cores. + * Through empirical testing, we've found that using more than 6 threads does not help. + * There may be more optimal choices to make depending on the SoC but we'll stick to + * this simple heuristic for now. + * + * We'll re-use the thread that calls the processor doTask method, so we'll spawn one less + * worker pool thread than the total number of threads. + */ + mNumberOfPoolThreads{numThreads ? numThreads - 1 + : std::min(6u, std::thread::hardware_concurrency() - 1)} { + for (size_t i = 0; i < mNumberOfPoolThreads; i++) { + mPoolThreads.emplace_back( + std::bind(&TaskProcessor::processTilesOfWork, this, i + 1, false)); + } +} + +TaskProcessor::~TaskProcessor() { + { + std::lock_guard<std::mutex> lock(mQueueMutex); + mStopThreads = true; + mWorkAvailableOrStop.notify_all(); + } + + for (auto& thread : mPoolThreads) { + thread.join(); + } +} + +void TaskProcessor::processTilesOfWork(int threadIndex, bool returnWhenNoWork) { + if (threadIndex != 0) { + // Set the name of the thread, except for thread 0, which is not part of the pool. + // PR_SET_NAME takes a maximum of 16 characters, including the terminating null. + char name[16]{"RenderScToolkit"}; + prctl(PR_SET_NAME, name, 0, 0, 0); + // ALOGI("Starting thread%d", threadIndex); + } + + std::unique_lock<std::mutex> lock(mQueueMutex); + while (true) { + mWorkAvailableOrStop.wait(lock, [this, returnWhenNoWork]() REQUIRES(mQueueMutex) { + return mStopThreads || (mTilesNotYetStarted > 0) || + (returnWhenNoWork && (mTilesNotYetStarted == 0)); + }); + // ALOGI("Woke thread%d", threadIndex); + + // This ScopedLockAssertion is to help the compiler when it checks thread annotations + // to realize that we have the lock. It's however not completely true; we don't + // hold the lock while processing the tile. + // TODO Figure out how to fix that. + android::base::ScopedLockAssertion lockAssert(mQueueMutex); + if (mStopThreads || (returnWhenNoWork && mTilesNotYetStarted == 0)) { + break; + } + + while (mTilesNotYetStarted > 0 && !mStopThreads) { + // This picks the tiles in decreasing order but that does not matter. + int myTile = --mTilesNotYetStarted; + mTilesInProcess++; + lock.unlock(); + { + // We won't be executing this code unless the main thread is + // holding the mTaskMutex lock, which guards mCurrentTask. + // The compiler can't figure this out. + android::base::ScopedLockAssertion lockAssert(mTaskMutex); + mCurrentTask->processTile(threadIndex, myTile); + } + lock.lock(); + mTilesInProcess--; + if (mTilesInProcess == 0 && mTilesNotYetStarted == 0) { + mWorkIsFinished.notify_one(); + } + } + } + // if (threadIndex != 0) { + // ALOGI("Ending thread%d", threadIndex); + // } +} + +void TaskProcessor::doTask(Task* task) { + std::lock_guard<std::mutex> lockGuard(mTaskMutex); + task->setUsesSimd(mUsesSimd); + mCurrentTask = task; + // Notify the thread pool of available work. + startWork(task); + // Start processing some of the tiles on the calling thread. + processTilesOfWork(0, true); + // Wait for all the pool workers to complete. + waitForPoolWorkersToComplete(); + mCurrentTask = nullptr; +} + +void TaskProcessor::startWork(Task* task) { + /** + * The size in bytes that we're hoping each tile will be. If this value is too small, + * we'll spend too much time in synchronization. If it's too large, some cores may be + * idle while others still have a lot of work to do. Ideally, it would depend on the + * device we're running. 16k is the same value used by RenderScript and seems reasonable + * from ad-hoc tests. + */ + const size_t targetTileSize = 16 * 1024; + + std::lock_guard<std::mutex> lock(mQueueMutex); + assert(mTilesInProcess == 0); + mTilesNotYetStarted = task->setTiling(targetTileSize); + mWorkAvailableOrStop.notify_all(); +} + +void TaskProcessor::waitForPoolWorkersToComplete() { + std::unique_lock<std::mutex> lock(mQueueMutex); + // The predicate, i.e. the lambda, will make sure that + // we terminate even if the main thread calls this after + // mWorkIsFinished is signaled. + mWorkIsFinished.wait(lock, [this]() REQUIRES(mQueueMutex) { + return mTilesNotYetStarted == 0 && mTilesInProcess == 0; + }); +} + +} // namespace renderscript +} // namespace android diff --git a/toolkit/TaskProcessor.h b/toolkit/TaskProcessor.h new file mode 100644 index 00000000..4d274fa6 --- /dev/null +++ b/toolkit/TaskProcessor.h @@ -0,0 +1,264 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H +#define ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H + +#include <android-base/thread_annotations.h> + +#include <atomic> +#include <condition_variable> +#include <cstddef> +#include <mutex> +#include <thread> +#include <vector> + +namespace android { +namespace renderscript { + +/** + * Description of the data to be processed for one Toolkit method call, e.g. one blur or one + * blend operation. + * + * The data to be processed is a 2D array of cells. Each cell is a vector of 1 to 4 unsigned bytes. + * The most typical configuration is a 2D array of uchar4 used to represent RGBA images. + * + * This is a base class. There will be a subclass for each Toolkit op. + * + * Typical usage of a derived class would look like: + * BlurTask task(in, out, sizeX, sizeY, vectorSize, etc); + * processor->doTask(&task); + * + * The TaskProcessor should call setTiling() and setUsesSimd() once, before calling processTile(). + * Other classes should not call setTiling(), setUsesSimd(), and processTile(). + */ +class Task { + protected: + /** + * Number of cells in the X direction. + */ + const size_t mSizeX; + /** + * Number of cells in the Y direction. + */ + const size_t mSizeY; + /** + * Number of elements in a vector (cell). From 1-4. + */ + const size_t mVectorSize; + /** + * Whether the task prefers the processData call to represent the work to be done as + * one line rather than a rectangle. This would be the case for work that don't involve + * vertical neighbors, e.g. blend or histogram. A task would prefer this to minimize the + * number of SIMD calls to make, i.e. have one call that covers all the rows. + * + * This setting will be used only when a tile covers the entire width of the data to be + * processed. + */ + const bool mPrefersDataAsOneRow; + /** + * Whether the processor we're working on supports SIMD operations. + */ + bool mUsesSimd = false; + + private: + /** + * If not null, we'll process a subset of the whole 2D array. This specifies the restriction. + */ + const struct Restriction* mRestriction; + + /** + * We'll divide the work into rectangular tiles. See setTiling(). + */ + + /** + * Size of a tile in the X direction, as a number of cells. + */ + size_t mCellsPerTileX = 0; + /** + * Size of a tile in the Y direction, as a number of cells. + */ + size_t mCellsPerTileY = 0; + /** + * Number of tiles per row of the restricted area we're working on. + */ + size_t mTilesPerRow = 0; + /** + * Number of tiles per column of the restricted area we're working on. + */ + size_t mTilesPerColumn = 0; + + public: + /** + * Construct a task. + * + * sizeX and sizeY should be greater than 0. vectorSize should be between 1 and 4. + * The restriction should outlive this instance. The Toolkit validates the + * arguments so we won't do that again here. + */ + Task(size_t sizeX, size_t sizeY, size_t vectorSize, bool prefersDataAsOneRow, + const Restriction* restriction) + : mSizeX{sizeX}, + mSizeY{sizeY}, + mVectorSize{vectorSize}, + mPrefersDataAsOneRow{prefersDataAsOneRow}, + mRestriction{restriction} {} + virtual ~Task() {} + + void setUsesSimd(bool uses) { mUsesSimd = uses; } + + /** + * Divide the work into a number of tiles that can be distributed to the various threads. + * A tile will be a rectangular region. To be robust, we'll want to handle regular cases + * like 400x300 but also unusual ones like 1x120000, 120000x1, 1x1. + * + * We have a target size for the tiles, which corresponds roughly to how much data a thread + * will want to process before checking for more work. If the target is set too low, we'll spend + * more time in synchronization. If it's too large, some cores may not be used as efficiently. + * + * This method returns the number of tiles. + * + * @param targetTileSizeInBytes Target size. Values less than 1000 will be treated as 1000. + */ + int setTiling(unsigned int targetTileSizeInBytes); + + /** + * This is called by the TaskProcessor to instruct the task to process a tile. + * + * @param threadIndex The index of the thread that's processing the tile. + * @param tileIndex The index of the tile to process. + */ + void processTile(unsigned int threadIndex, size_t tileIndex); + + private: + /** + * Call to the derived class to process the data bounded by the rectangle specified + * by (startX, startY) and (endX, endY). The end values are EXCLUDED. This rectangle + * will be contained with the restriction, if one is provided. + */ + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) = 0; +}; + +/** + * There's one instance of the task processor for the Toolkit. This class owns the thread pool, + * and dispatches the tiles of work to the threads. + */ +class TaskProcessor { + /** + * Does this processor support SIMD-like instructions? + */ + const bool mUsesSimd; + /** + * The number of separate threads we'll spawn. It's one less than the number of threads that + * do the work as the client thread that starts the work will also be used. + */ + const unsigned int mNumberOfPoolThreads; + /** + * Ensures that only one task is done at a time. + */ + std::mutex mTaskMutex; + /** + * Ensures consistent access to the shared queue state. + */ + std::mutex mQueueMutex; + /** + * The thread pool workers. + */ + std::vector<std::thread> mPoolThreads; + /** + * The task being processed, if any. We only do one task at a time. We could create a queue + * of tasks but using a mTaskMutex is sufficient for now. + */ + Task* mCurrentTask GUARDED_BY(mTaskMutex) = nullptr; + /** + * Signals that the mPoolThreads should terminate. + */ + bool mStopThreads GUARDED_BY(mQueueMutex) = false; + /** + * Signaled when work is available or the mPoolThreads need to shut down. mStopThreads is used + * to distinguish between the two. + */ + std::condition_variable mWorkAvailableOrStop; + /** + * Signaled when the work for the task is finished. + */ + std::condition_variable mWorkIsFinished; + /** + * A user task, e.g. a blend or a blur, is split into a number of tiles. When a thread starts + * working on a new tile, it uses this count to identify which tile to work on. The tile + * number is sufficient to determine the boundaries of the data to process. + * + * The number of tiles left to process. + */ + int mTilesNotYetStarted GUARDED_BY(mQueueMutex) = 0; + /** + * The number of tiles currently being processed. Must not be greater than + * mNumberOfPoolThreads + 1. + */ + int mTilesInProcess GUARDED_BY(mQueueMutex) = 0; + + /** + * Determines how we'll tile the work and signals the thread pool of available work. + * + * @param task The task to be performed. + */ + void startWork(Task* task) REQUIRES(mTaskMutex); + + /** + * Tells the thread to start processing work off the queue. + * + * The flag is used for prevent the main thread from blocking forever if the work is + * so trivial that the worker threads complete the work before the main thread calls this + * method. + * + * @param threadIndex The index number (0..mNumberOfPoolThreads) this thread will referred by. + * @param returnWhenNoWork If there's no work, return immediately. + */ + void processTilesOfWork(int threadIndex, bool returnWhenNoWork); + + /** + * Wait for the pool workers to complete the work on the current task. + */ + void waitForPoolWorkersToComplete(); + + public: + /** + * Create the processor. + * + * @param numThreads The total number of threads to use. If 0, we'll decided based on system + * properties. + */ + explicit TaskProcessor(unsigned int numThreads = 0); + + ~TaskProcessor(); + + /** + * Do the specified task. Returns only after the task has been completed. + */ + void doTask(Task* task); + + /** + * Some Tasks need to allocate temporary storage for each worker thread. + * This provides the number of threads. + */ + unsigned int getNumberOfThreads() const { return mNumberOfPoolThreads + 1; } +}; + +} // namespace renderscript +} // namespace android + +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H diff --git a/toolkit/TestTaskProcessor.cpp b/toolkit/TestTaskProcessor.cpp new file mode 100644 index 00000000..36a94f4a --- /dev/null +++ b/toolkit/TestTaskProcessor.cpp @@ -0,0 +1,105 @@ +#include <array> + +#include "TaskProcessor.h" + +/** + * Sets all entries of the buffer to a value that depends on its coordinate and a delta. + */ +class SimpleTask : public android::renderscript::Task { + uint8_t* mBuffer; + uint8_t mDelta; + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY); + + public: + SimpleTask(uint8_t* buffer, size_t vectorSize, size_t sizeX, size_t sizeY, uint8_t delta) + : Task{sizeX, sizeY, vectorSize, false, nullptr}, mBuffer{buffer}, mDelta{delta} {} +}; + +/** + * Create a new value that's a function of the x, y coordinates and a delta. + */ +static uint8_t newValue(size_t x, size_t y, uint8_t delta) { + return (((x & 0xff) << 4) | (y & 0xff)) + delta; +} + +void SimpleTask::processData(int /*threadIndex*/, size_t startX, size_t startY, size_t endX, + size_t endY) { + for (size_t y = startY; y < endY; y++) { + for (size_t x = startX; x < endX; x++) { + size_t index = (y * mSizeX + x) * mVectorSize; + for (size_t i = 0; i < mVectorSize; i++) { + // Use add to make sure the opertion is only done once. This assumes + // the buffer starts set at 0. + mBuffer[index + i] += newValue(x, y, mDelta + i); + } + } + } +} + +/** + * Returns true if all the entries of the vector are the expected value. + * Prints an error if not. + */ +bool verifyAllTheSame(const std::vector<uint8_t>& buffer, size_t vectorSize, size_t sizeX, + size_t sizeY, uint8_t delta) { + for (size_t y = 0; y < sizeY; y++) { + for (size_t x = 0; x < sizeX; x++) { + size_t index = (y * sizeX + x) * vectorSize; + for (size_t i = 0; i < vectorSize; i++) { + uint8_t expectedValue = newValue(x, y, delta + i); + if (buffer[index + i] != expectedValue) { + printf("Test Error at %zu, %zu. Expected %u found %u instead\n", x, y, + expectedValue, buffer[index + i]); + return false; + } + } + } + } + return true; +} + +/** + * Create a buffer of the specified size, set each entry of that buffer + * to the specified value using TaskProcessor, and verify the results. + */ +void testOne(android::renderscript::TaskProcessor* processor, uint8_t delta, size_t vectorSize, + size_t sizeX, size_t sizeY) { + std::vector<uint8_t> buffer(sizeX * sizeY * vectorSize); + + SimpleTask task{buffer.data(), vectorSize, sizeX, sizeY, delta}; + processor->doTask(&task); + + if (verifyAllTheSame(buffer, vectorSize, sizeX, sizeY, delta)) { + printf("Test %u: All good!\n", delta); + } +} + +int main() { + std::vector<std::thread> testThreads; + + // Test with multiple threads, to help find synchronization errors. + android::renderscript::TaskProcessor processorA(1); + android::renderscript::TaskProcessor processorB(4); + testThreads.emplace_back(testOne, &processorA, 1, 4, 30, 40); + testThreads.emplace_back(testOne, &processorB, 1, 4, 30, 40); + testThreads.emplace_back(testOne, &processorA, 2, 4, 800, 600); + testThreads.emplace_back(testOne, &processorB, 2, 4, 800, 600); + testThreads.emplace_back(testOne, &processorA, 3, 1, 123, 47); + testThreads.emplace_back(testOne, &processorB, 3, 1, 123, 47); + testThreads.emplace_back(testOne, &processorA, 5, 2, 5000, 8000); + testThreads.emplace_back(testOne, &processorB, 5, 2, 5000, 8000); + testThreads.emplace_back(testOne, &processorA, 6, 3, 26000, 1); + testThreads.emplace_back(testOne, &processorB, 6, 3, 26000, 1); + testThreads.emplace_back(testOne, &processorA, 7, 4, 1, 26000); + testThreads.emplace_back(testOne, &processorB, 7, 4, 1, 26000); + testThreads.emplace_back(testOne, &processorA, 8, 4, 1000, 1000); + testThreads.emplace_back(testOne, &processorB, 8, 4, 1000, 1000); + testThreads.emplace_back(testOne, &processorA, 9, 1, 1, 1); + testThreads.emplace_back(testOne, &processorB, 9, 1, 1, 1); + + for (auto& thread : testThreads) { + thread.join(); + } + return 0; +} diff --git a/toolkit/Utils.cpp b/toolkit/Utils.cpp new file mode 100644 index 00000000..8ec9fbec --- /dev/null +++ b/toolkit/Utils.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Utils.h" + +#include <cpu-features.h> + +#include "RenderScriptToolkit.h" + +namespace android { +namespace renderscript { + +#define LOG_TAG "renderscript.toolkit.Utils" + +bool cpuSupportsSimd() { + AndroidCpuFamily family = android_getCpuFamily(); + uint64_t features = android_getCpuFeatures(); + + if (family == ANDROID_CPU_FAMILY_ARM && (features & ANDROID_CPU_ARM_FEATURE_NEON)) { + // ALOGI("Arm with Neon"); + return true; + } else if (family == ANDROID_CPU_FAMILY_ARM64 && (features & ANDROID_CPU_ARM64_FEATURE_ASIMD)) { + // ALOGI("Arm64 with ASIMD"); + return true; + } else if ((family == ANDROID_CPU_FAMILY_X86 || family == ANDROID_CPU_FAMILY_X86_64) && + (features & ANDROID_CPU_X86_FEATURE_SSSE3)) { + // ALOGI("x86* with SSE3"); + return true; + } + // ALOGI("Not simd"); + return false; +} + +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE +bool validRestriction(const char* tag, size_t sizeX, size_t sizeY, const Restriction* restriction) { + if (restriction == nullptr) { + return true; + } + if (restriction->startX >= sizeX || restriction->endX > sizeX) { + ALOGE("%s. sizeX should be greater than restriction->startX and greater or equal to " + "restriction->endX. %zu, %zu, and %zu were provided respectively.", + tag, sizeX, restriction->startX, restriction->endY); + return false; + } + if (restriction->startY >= sizeY && restriction->endY > sizeY) { + ALOGE("%s. sizeY should be greater than restriction->startY and greater or equal to " + "restriction->endY. %zu, %zu, and %zu were provided respectively.", + tag, sizeY, restriction->startY, restriction->endY); + return false; + } + if (restriction->startX >= restriction->endX) { + ALOGE("%s. Restriction startX should be less than endX. " + "%zu and %zu were provided respectively.", + tag, restriction->startX, restriction->endX); + return false; + } + if (restriction->startY >= restriction->endY) { + ALOGE("%s. Restriction startY should be less than endY. " + "%zu and %zu were provided respectively.", + tag, restriction->startY, restriction->endY); + return false; + } + return true; +} +#endif + +} // namespace renderscript +} // namespace android diff --git a/toolkit/Utils.h b/toolkit/Utils.h new file mode 100644 index 00000000..ff9eb430 --- /dev/null +++ b/toolkit/Utils.h @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_RENDERSCRIPT_TOOLKIT_UTILS_H +#define ANDROID_RENDERSCRIPT_TOOLKIT_UTILS_H + +#include <android/log.h> + +namespace android { +namespace renderscript { + +/* The Toolkit does not support floating point buffers but the original RenderScript Intrinsics + * did for some operations. That code was preserved and protected by + * ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT. + */ +// TODO: On final packaging, decide whether this should be define in the build file, and for which +// config. #define ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT + +/* If we release the Toolkit as a C++ API, we'll want to enable validation at the C++ level + * by uncommenting this define. + * + * If we only have a Java/Kotlin API, the Kotlin layer does validation. We don't need to duplicate + * this effort. + */ +#define ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE + +#define ALOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__) +#define ALOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG_TAG, __VA_ARGS__) +#define ALOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__) + +using uchar = unsigned char; +using uint = unsigned int; +using ushort = unsigned short; + +using uint8_t = uchar; +using uint16_t = ushort; +using uint32_t = uint; + +typedef float float2 __attribute__((ext_vector_type(2))); +typedef float float3 __attribute__((ext_vector_type(3))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef uchar uchar2 __attribute__((ext_vector_type(2))); +typedef uchar uchar3 __attribute__((ext_vector_type(3))); +typedef uchar uchar4 __attribute__((ext_vector_type(4))); +typedef ushort ushort2 __attribute__((ext_vector_type(2))); +typedef ushort ushort3 __attribute__((ext_vector_type(3))); +typedef ushort ushort4 __attribute__((ext_vector_type(4))); +typedef uint uint2 __attribute__((ext_vector_type(2))); +typedef uint uint3 __attribute__((ext_vector_type(3))); +typedef uint uint4 __attribute__((ext_vector_type(4))); +typedef short short2 __attribute__((ext_vector_type(2))); +typedef short short3 __attribute__((ext_vector_type(3))); +typedef short short4 __attribute__((ext_vector_type(4))); +typedef int int2 __attribute__((ext_vector_type(2))); +typedef int int3 __attribute__((ext_vector_type(3))); +typedef int int4 __attribute__((ext_vector_type(4))); + +template <typename TO, typename TI> +inline TO convert(TI i) { + // assert(i.x >= 0 && i.y >= 0 && i.z >= 0 && i.w >= 0); + // assert(i.x <= 255 && i.y <= 255 && i.z <= 255 && i.w <= 255); + return __builtin_convertvector(i, TO); +} + +template <> +inline uchar convert(float i) { + // assert(i.x >= 0 && i.y >= 0 && i.z >= 0 && i.w >= 0); + // assert(i.x <= 255 && i.y <= 255 && i.z <= 255 && i.w <= 255); + return (uchar)i; +} + +template <> +inline float convert(uchar i) { + // assert(i.x >= 0 && i.y >= 0 && i.z >= 0 && i.w >= 0); + // assert(i.x <= 255 && i.y <= 255 && i.z <= 255 && i.w <= 255); + return (float)i; +} + +inline int4 clamp(int4 amount, int low, int high) { + int4 r; + r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); + r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); + r.z = amount.z < low ? low : (amount.z > high ? high : amount.z); + r.w = amount.w < low ? low : (amount.w > high ? high : amount.w); + return r; +} + +inline float4 clamp(float4 amount, float low, float high) { + float4 r; + r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); + r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); + r.z = amount.z < low ? low : (amount.z > high ? high : amount.z); + r.w = amount.w < low ? low : (amount.w > high ? high : amount.w); + return r; +} + +inline int2 clamp(int2 amount, int low, int high) { + int2 r; + r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); + r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); + return r; +} + +inline float2 clamp(float2 amount, float low, float high) { + float2 r; + r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); + r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); + return r; +} + +inline int clamp(int amount, int low, int high) { + return amount < low ? low : (amount > high ? high : amount); +} + +inline float clamp(float amount, float low, float high) { + return amount < low ? low : (amount > high ? high : amount); +} + +#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE +struct Restriction; + +bool validRestriction(const char* tag, size_t sizeX, size_t sizeY, const Restriction* restriction); +#endif + +/** + * Returns true if the processor we're running on supports the SIMD instructions that are + * used in our assembly code. + */ +bool cpuSupportsSimd(); + +inline size_t divideRoundingUp(size_t a, size_t b) { + return a / b + (a % b == 0 ? 0 : 1); +} + +inline size_t paddedSize(size_t size) { + return size == 3 ? 4 : size; +} + +} // namespace renderscript +} // namespace android + +#endif // ANDROID_RENDERSCRIPT_TOOLKIT_UTILS_H diff --git a/toolkit/YuvToRgb.cpp b/toolkit/YuvToRgb.cpp new file mode 100644 index 00000000..2da0f5c0 --- /dev/null +++ b/toolkit/YuvToRgb.cpp @@ -0,0 +1,241 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cstdint> + +#include "RenderScriptToolkit.h" +#include "TaskProcessor.h" +#include "Utils.h" + +#define LOG_TAG "renderscript.toolkit.YuvToRgb" + +namespace android { +namespace renderscript { + +inline size_t roundUpTo16(size_t val) { + return (val + 15) & ~15; +} + +class YuvToRgbTask : public Task { + uchar4* mOut; + size_t mCstep; + size_t mStrideY; + size_t mStrideU; + size_t mStrideV; + const uchar* mInY; + const uchar* mInU; + const uchar* mInV; + + void kernel(uchar4* out, uint32_t xstart, uint32_t xend, uint32_t currentY); + // Process a 2D tile of the overall work. threadIndex identifies which thread does the work. + virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, + size_t endY) override; + + public: + YuvToRgbTask(const uint8_t* input, uint8_t* output, size_t sizeX, size_t sizeY, + RenderScriptToolkit::YuvFormat format) + : Task{sizeX, sizeY, 4, false, nullptr}, mOut{reinterpret_cast<uchar4*>(output)} { + switch (format) { + case RenderScriptToolkit::YuvFormat::NV21: + mCstep = 2; + mStrideY = sizeX; + mStrideU = mStrideY; + mStrideV = mStrideY; + mInY = reinterpret_cast<const uchar*>(input); + mInV = reinterpret_cast<const uchar*>(input + mStrideY * sizeY); + mInU = mInV + 1; + break; + case RenderScriptToolkit::YuvFormat::YV12: + mCstep = 1; + mStrideY = roundUpTo16(sizeX); + mStrideU = roundUpTo16(mStrideY >> 1); + mStrideV = mStrideU; + mInY = reinterpret_cast<const uchar*>(input); + mInU = reinterpret_cast<const uchar*>(input + mStrideY * sizeY); + mInV = mInU + mStrideV * sizeY / 2; + break; + } + } +}; + +void YuvToRgbTask::processData(int /* threadIndex */, size_t startX, size_t startY, size_t endX, + size_t endY) { + for (size_t y = startY; y < endY; y++) { + size_t offset = mSizeX * y + startX; + uchar4* out = mOut + offset; + kernel(out, startX, endX, y); + } +} + +static uchar4 rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) { + int16_t Y = ((int16_t)y) - 16; + int16_t U = ((int16_t)u) - 128; + int16_t V = ((int16_t)v) - 128; + + short4 p; + p.x = (Y * 298 + V * 409 + 128) >> 8; + p.y = (Y * 298 - U * 100 - V * 208 + 128) >> 8; + p.z = (Y * 298 + U * 516 + 128) >> 8; + p.w = 255; + if(p.x < 0) { + p.x = 0; + } + if(p.x > 255) { + p.x = 255; + } + if(p.y < 0) { + p.y = 0; + } + if(p.y > 255) { + p.y = 255; + } + if(p.z < 0) { + p.z = 0; + } + if(p.z > 255) { + p.z = 255; + } + + return (uchar4){static_cast<uchar>(p.x), static_cast<uchar>(p.y), + static_cast<uchar>(p.z), static_cast<uchar>(p.w)}; +} + +extern "C" void rsdIntrinsicYuv_K(void *dst, const uchar *Y, const uchar *uv, uint32_t xstart, + size_t xend); +extern "C" void rsdIntrinsicYuvR_K(void *dst, const uchar *Y, const uchar *uv, uint32_t xstart, + size_t xend); +extern "C" void rsdIntrinsicYuv2_K(void *dst, const uchar *Y, const uchar *u, const uchar *v, + size_t xstart, size_t xend); + +void YuvToRgbTask::kernel(uchar4 *out, uint32_t xstart, uint32_t xend, uint32_t currentY) { + //ALOGI("kernel out %p, xstart=%u, xend=%u, currentY=%u", out, xstart, xend, currentY); + + const uchar *y = mInY + (currentY * mStrideY); + const uchar *v = mInV + ((currentY >> 1) * mStrideV); + const uchar *u = mInU + ((currentY >> 1) * mStrideU); + + //ALOGI("pinY %p, pinV %p, pinU %p", pinY, pinV, pinU); + + uint32_t x1 = xstart; + uint32_t x2 = xend; + + /* + ALOGE("pinY, %p, Y, %p, currentY, %d, strideY, %zu", pinY, y, currentY, mStrideY); + ALOGE("pinU, %p, U, %p, currentY, %d, strideU, %zu", pinU, u, currentY, mStrideU); + ALOGE("pinV, %p, V, %p, currentY, %d, strideV, %zu", pinV, v, currentY, mStrideV); + ALOGE("dimX, %d, dimY, %d", cp->alloc->mHal.drvState.lod[0].dimX, + cp->alloc->mHal.drvState.lod[0].dimY); + ALOGE("info->dim.x, %d, info->dim.y, %d", info->dim.x, info->dim.y); + uchar* pinY = (uchar*)mInY; + uchar* pinU = (uchar*)mInU; + uchar* pinV = (uchar*)mInV; + ALOGE("Y %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx " + "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx", + pinY, pinY[0], pinY[1], pinY[2], pinY[3], pinY[4], pinY[5], pinY[6], pinY[7], pinY[8], + pinY[9], pinY[10], pinY[11], pinY[12], pinY[13], pinY[14], pinY[15]); + ALOGE("Y %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx " + "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx", + pinY, pinY[16], pinY[17], pinY[18], pinY[19], pinY[20], pinY[21], pinY[22], pinY[23], + pinY[24], pinY[25], pinY[26], pinY[27], pinY[28], pinY[29], pinY[30], pinY[31]); + ALOGE("Y %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx " + "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx", + pinY, pinY[32], pinY[33], pinY[34], pinY[35], pinY[36], pinY[37], pinY[38], pinY[39], + pinY[40], pinY[41], pinY[42], pinY[43], pinY[44], pinY[45], pinY[46], pinY[47]); + + ALOGE("U %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx " + "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx", + pinU, pinU[0], pinU[1], pinU[2], pinU[3], pinU[4], pinU[5], pinU[6], pinU[7], pinU[8], + pinU[9], pinU[10], pinU[11], pinU[12], pinU[13], pinU[14], pinU[15]); + ALOGE("U %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx " + "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx", + pinU, pinU[16], pinU[17], pinU[18], pinU[19], pinU[20], pinU[21], pinU[22], pinU[23], + pinU[24], pinU[25], pinU[26], pinU[27], pinU[28], pinU[29], pinU[30], pinU[31]); + ALOGE("U %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx " + "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx", + pinU, pinU[32], pinU[33], pinU[34], pinU[35], pinU[36], pinU[37], pinU[38], pinU[39], + pinU[40], pinU[41], pinU[42], pinU[43], pinU[44], pinU[45], pinU[46], pinU[47]); + + ALOGE("V %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx " + "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx", + pinV, pinV[0], pinV[1], pinV[2], pinV[3], pinV[4], pinV[5], pinV[6], pinV[7], pinV[8], + pinV[9], pinV[10], pinV[11], pinV[12], pinV[13], pinV[14], pinV[15]); + ALOGE("V %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx " + "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx", + pinV, pinV[16], pinV[17], pinV[18], pinV[19], pinV[20], pinV[21], pinV[22], pinV[23], + pinV[24], pinV[25], pinV[26], pinV[27], pinV[28], pinV[29], pinV[30], pinV[31]); + ALOGE("V %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx " + "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx", + pinV, pinV[32], pinV[33], pinV[34], pinV[35], pinV[36], pinV[37], pinV[38], pinV[39], + pinV[40], pinV[41], pinV[42], pinV[43], pinV[44], pinV[45], pinV[46], pinV[47]); + */ + + /* If we start on an odd pixel then deal with it here and bump things along + * so that subsequent code can carry on with even-odd pairing assumptions. + */ + if((x1 & 1) && (x2 > x1)) { + int cx = (x1 >> 1) * mCstep; + *out = rsYuvToRGBA_uchar4(y[x1], u[cx], v[cx]); + out++; + x1++; + } + +#if defined(ARCH_ARM_USE_INTRINSICS) + if((x2 > x1) && mUsesSimd) { + int32_t len = x2 - x1; + if (mCstep == 1) { + rsdIntrinsicYuv2_K(out, y, u, v, x1, x2); + x1 += len; + out += len; + } else if (mCstep == 2) { + // Check for proper interleave + intptr_t ipu = (intptr_t)u; + intptr_t ipv = (intptr_t)v; + + if (ipu == (ipv + 1)) { + rsdIntrinsicYuv_K(out, y, v, x1, x2); + x1 += len; + out += len; + } else if (ipu == (ipv - 1)) { + rsdIntrinsicYuvR_K(out, y, u, x1, x2); + x1 += len; + out += len; + } + } + } +#endif + + if(x2 > x1) { + // ALOGE("y %i %i %i", currentY, x1, x2); + while(x1 < x2) { + int cx = (x1 >> 1) * mCstep; + *out = rsYuvToRGBA_uchar4(y[x1], u[cx], v[cx]); + out++; + x1++; + *out = rsYuvToRGBA_uchar4(y[x1], u[cx], v[cx]); + out++; + x1++; + } + } +} + +void RenderScriptToolkit::yuvToRgb(const uint8_t* input, uint8_t* output, size_t sizeX, + size_t sizeY, YuvFormat format) { + YuvToRgbTask task(input, output, sizeX, sizeY, format); + processor->doTask(&task); +} + +} // namespace renderscript +} // namespace android diff --git a/toolkit/YuvToRgb_advsimd.S b/toolkit/YuvToRgb_advsimd.S new file mode 100644 index 00000000..bb4b7ae3 --- /dev/null +++ b/toolkit/YuvToRgb_advsimd.S @@ -0,0 +1,377 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: +#define END(f) .size f, .-f; + +/* Perform the actual YuvToRGB conversion in a macro, from register to + * register. This macro will be called from within several different wrapper + * variants for different data layouts. Y data starts with the even and odd + * bytes split into the low parts of v8 and v9 respectively. U and V are in + * v10 and v11. Working constants are pre-loaded into v24-v31, and v3 and v7 + * are pre-loaded with a constant 0xff alpha channel. + * + * The complicated arithmetic is the result of refactoring the original + * equations to avoid 16-bit overflow without losing any precision. + */ +.macro yuvkern, regu=v10, regv=v11 + /* v0 out R_lo / even R_lo accumulator + * v1 out G_lo / even G_lo accumulator + * v2 out B_lo / even B_lo accumulator + * v3 out A_lo / const 0xff*ff + * v4 out R_hi / even R_hi accumulator + * v5 out G_hi / even G_hi accumulator + * v6 out B_hi / even B_hi accumulator + * v7 out A_hi / const 0xff*ff + * v8 even Y / G_lo luma tmp + * v9 odd Y / G_lo luma tmp + * \regu in U + * \regv in V + * v12 R_lo luma tmp + * v13 B_lo luma tmp + * v14 R_hi luma tmp + * v15 B_hi luma tmp + * v16 odd R_lo accumulator + * v17 odd G_lo accumulator + * v18 odd B_lo accumulator + * v19 multiplier extra bits low + * v20 odd R_hi accumulator + * v21 odd G_hi accumulator + * v22 odd B_hi accumulator + * v23 multiplier extra bits high + * v24 constant 149 + * v25 constant 50 + * v26 constant 104 + * v27 constant 204 + * v28 constant 254 + * v29 constant ((16 * 149 + (128 >> 1) + 128 * 204) >> 1) + * v30 constant ((-16 * 149 + 128 * 50 + 128 * 104) >> 0) + * v31 constant ((16 * 149 + (128 << 2) + 128 * 254) >> 1) + */ + + umull v1.8h, v8.8b, v24.8b // g0 = y0 * 149 + umull v17.8h, v9.8b, v24.8b // g1 = y1 * 149 + umull2 v5.8h, v8.16b, v24.16b // g0_hi = y0_hi * 149 + umull2 v21.8h, v9.16b, v24.16b // g1_hi = y1_hi * 149 + + umull v8.8h, \regu\().8b, v25.8b // g2 = u * 50 + v * 104 + umlal v8.8h, \regv\().8b, v26.8b + umull2 v9.8h, \regu\().16b, v25.16b // g2_hi = u_hi * 50 + v_hi * 104 + umlal2 v9.8h, \regv\().16b, v26.16b + + ushr v19.16b, \regv\().16b, #1 + uaddw v0.8h, v1.8h, v19.8b // r0 = g0 + (v >> 1) + uaddw v16.8h, v17.8h, v19.8b // r1 = g1 + (v >> 1) + + uaddw2 v4.8h, v5.8h, v19.16b // r0_hi = g0_hi + (v_hi >> 1) + uaddw2 v20.8h, v21.8h, v19.16b // r1_hi = g1_hi + (v_hi >> 1) + + ushll v19.8h, \regu\().8b, #2 + ushll2 v23.8h, \regu\().16b, #2 + add v2.8h, v1.8h, v19.8h // b0 = g0 + (u << 2) + add v18.8h, v17.8h, v19.8h // b1 = g1 + (u << 2) + + add v6.8h, v5.8h, v23.8h // b0_hi = g0_hi + (u_hi << 2) + add v22.8h, v21.8h, v23.8h // b1_hi = g1_hi + (u_hi << 2) + + umull v12.8h, \regv\().8b, v27.8b // r2 = v * 204 + umull v13.8h, \regu\().8b, v28.8b // b2 = u * 254 + + umull2 v14.8h, \regv\().16b, v27.16b // r2_hi = v_hi * 204 + umull2 v15.8h, \regu\().16b, v28.16b // b2_hi = u_hi * 254 + + uhadd v0.8h, v0.8h, v12.8h // r0 = (r0 + r2) >> 1 + uhadd v16.8h, v16.8h, v12.8h // r1 = (r1 + r2) >> 1 + uqadd v1.8h, v1.8h, v30.8h // g0 = satu16(g0 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0) + uqadd v17.8h, v17.8h, v30.8h // g1 = satu16(g1 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0) + uhadd v2.8h, v2.8h, v13.8h // b0 = (b0 + b2) >> 1 + uhadd v18.8h, v18.8h, v13.8h // b1 = (b1 + b2) >> 1 + + uhadd v4.8h, v4.8h, v14.8h // r0_hi = (r0_hi + r2_hi) >> 1 + uhadd v20.8h, v20.8h, v14.8h // r1_hi = (r1_hi + r2_hi) >> 1 + uqadd v5.8h, v5.8h, v30.8h // g0_hi = satu16(g0_hi + (-16 * 149 + 128 * 50 + 128 * 104) >> 0) + uqadd v21.8h, v21.8h, v30.8h // g1_hi = satu16(g1_hi + (-16 * 149 + 128 * 50 + 128 * 104) >> 0) + uhadd v6.8h, v6.8h, v15.8h // b0_hi = (b0_hi + b2_hi) >> 1 + uhadd v22.8h, v22.8h, v15.8h // b1_hi = (b1_hi + b2_hi) >> 1 + + uqsub v0.8h, v0.8h, v29.8h // r0 = satu16(r0 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1) + uqsub v16.8h, v16.8h, v29.8h // r1 = satu16(r1 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1) + uqsub v1.8h, v1.8h, v8.8h // g0 = satu16(g0 - g2) + uqsub v17.8h, v17.8h, v8.8h // g1 = satu16(g1 - g2) + uqsub v2.8h, v2.8h, v31.8h // b0 = satu16(b0 - (16 * 149 + (128 << 2) + 128 * 254) >> 1) + uqsub v18.8h, v18.8h, v31.8h // b1 = satu16(b1 - (16 * 149 + (128 << 2) + 128 * 254) >> 1) + + uqsub v4.8h, v4.8h, v29.8h // r0_hi = satu16(r0_hi - (16 * 149 + (128 >> 1) + 128 * 204) >> 1) + uqsub v20.8h, v20.8h, v29.8h // r1_hi = satu16(r1_hi - (16 * 149 + (128 >> 1) + 128 * 204) >> 1) + uqsub v5.8h, v5.8h, v9.8h // g0_hi = satu16(g0_hi - g2_hi) + uqsub v21.8h, v21.8h, v9.8h // g1_hi = satu16(g1_hi - g2_hi) + uqsub v6.8h, v6.8h, v31.8h // b0_hi = satu16(b0_hi - (16 * 149 + (128 << 2) + 128 * 254) >> 1) + uqsub v22.8h, v22.8h, v31.8h // b1_hi = satu16(b1_hi - (16 * 149 + (128 << 2) + 128 * 254) >> 1) + + uqrshrn v0.8b, v0.8h, #6 + uqrshrn v16.8b, v16.8h, #6 + uqrshrn v1.8b, v1.8h, #7 + uqrshrn v17.8b, v17.8h, #7 + uqrshrn v2.8b, v2.8h, #6 + uqrshrn v18.8b, v18.8h, #6 + + uqrshrn v4.8b, v4.8h, #6 + uqrshrn v20.8b, v20.8h, #6 + uqrshrn v5.8b, v5.8h, #7 + uqrshrn v21.8b, v21.8h, #7 + uqrshrn v6.8b, v6.8h, #6 + uqrshrn v22.8b, v22.8h, #6 + + zip1 v0.16b, v0.16b, v16.16b + zip1 v1.16b, v1.16b, v17.16b + zip1 v2.16b, v2.16b, v18.16b + + zip1 v4.16b, v4.16b, v20.16b + zip1 v5.16b, v5.16b, v21.16b + zip1 v6.16b, v6.16b, v22.16b +.endm + +/* Define the wrapper code which will load and store the data, iterate the + * correct number of times, and safely handle the remainder at the end of the + * loop. Some sections of code are switched out depending on the data packing + * being handled. + */ +.macro wrap_line kernel, interleaved=0, swapuv=0 + movi v24.16b, #149 + movi v25.16b, #50 + movi v26.16b, #104 + movi v27.16b, #204 + movi v28.16b, #254 + mov w5, #((16 * 149 + (128 >> 1) + 128 * 204) >> 1) + dup v29.8h, w5 + mov w5, #((-16 * 149 + 128 * 50 + 128 * 104) >> 0) + dup v30.8h, w5 + mov w5, #((16 * 149 + (128 << 2) + 128 * 254) >> 1) + dup v31.8h, w5 + + movi v3.16b, #0xff + movi v7.16b, #0xff + + subs x2, x2, #32 + bhs 1f + b 2f + + .align 4 +1: ld2 {v8.16b,v9.16b}, [x1], #32 + .if \interleaved + ld2 {v10.16b,v11.16b}, [x3], #32 + .else + ld1 {v10.16b}, [x3], #16 + ld1 {v11.16b}, [x4], #16 + .endif + + .if \swapuv + \kernel regu=v11, regv=v10 + .else + \kernel + .endif + + subs x2, x2, #32 + + st4 {v0.16b - v3.16b}, [x0], #64 + st4 {v4.16b - v7.16b}, [x0], #64 + + bhs 1b + +2: adds x2, x2, #32 + beq 2f + + /* To handle the tail portion of the data (something less than 32 + * bytes) load small power-of-two chunks into working registers. It + * doesn't matter where they end up in the register; the same process + * will store them back out using the same positions and the + * interaction between neighbouring pixels is constrained to odd + * boundaries where the load operations don't interfere. + */ + movi v8.8b, #0 + movi v9.8b, #0 + movi v10.8b, #0 + movi v11.8b, #0 + + tbz x2, #4, 1f + ld1 {v9.16b}, [x1], #16 + .if \interleaved + ld1 {v11.16b}, [x3], #16 + .else + ld1 {v10.d}[1], [x3], #8 + ld1 {v11.d}[1], [x4], #8 + .endif +1: tbz x2, #3, 1f + ld1 {v8.d}[1], [x1], #8 + .if \interleaved + ld1 {v10.d}[1], [x3], #8 + .else + ld1 {v10.s}[1], [x3], #4 + ld1 {v11.s}[1], [x4], #4 + .endif +1: tbz x2, #2, 1f + ld1 {v8.s}[1], [x1], #4 + .if \interleaved + ld1 {v10.s}[1], [x3], #4 + .else + ld1 {v10.h}[1], [x3], #2 + ld1 {v11.h}[1], [x4], #2 + .endif +1: tbz x2, #1, 1f + ld1 {v8.h}[1], [x1], #2 + .if \interleaved + ld1 {v10.h}[1], [x3], #2 + .else + ld1 {v10.b}[1], [x3], #1 + ld1 {v11.b}[1], [x4], #1 + .endif +1: tbz x2, #0, 1f + ld1 {v8.b}[1], [x1], #1 + .if \interleaved + ld1 {v10.h}[0], [x3], #2 + .else + ld1 {v10.b}[0], [x3], #1 + ld1 {v11.b}[0], [x4], #1 + .endif + + /* One small impediment in the process above is that some of the load + * operations can't perform byte-wise structure deinterleaving at the + * same time as loading only part of a register. So the data is loaded + * linearly and unpacked manually at this point if necessary. + */ +1: mov v12.16b, v8.16b + uzp1 v8.16b, v12.16b, v9.16b + uzp2 v9.16b, v12.16b, v9.16b + .if \interleaved + mov v12.16b, v10.16b + uzp1 v10.16b, v12.16b, v11.16b + uzp2 v11.16b, v12.16b, v11.16b + .endif + + .if \swapuv + \kernel regu=v11, regv=v10 + .else + \kernel + .endif + + /* As above but with the output; structured stores for partial vectors + * aren't available, so the data is re-packed first and stored linearly. + */ + zip1 v16.16b, v0.16b, v2.16b + zip2 v18.16b, v0.16b, v2.16b + zip1 v17.16b, v1.16b, v3.16b + zip2 v19.16b, v1.16b, v3.16b + zip1 v0.16b, v16.16b, v17.16b + zip2 v1.16b, v16.16b, v17.16b + zip1 v2.16b, v18.16b, v19.16b + zip2 v3.16b, v18.16b, v19.16b + + /* Luckily v4-v7 don't need to be unzipped because the complete set of + * four and can be stored using st4. */ + + tbz x2, #4, 1f + st4 {v4.16b - v7.16b}, [x0], #64 +1: tbz x2, #3, 1f + st1 {v2.16b,v3.16b}, [x0], #32 +1: tbz x2, #2, 1f + st1 {v1.16b}, [x0], #16 +1: tbz x2, #1, 1f + st1 {v0.d}[1], [x0], #8 +1: tbz x2, #0, 2f + st1 {v0.s}[1], [x0], #4 +2: +.endm + + +/* void rsdIntrinsicYuv2_K( + * void *out, // x0 + * void const *yin, // x1 + * void const *uin, // x2 + * void const *vin, // x3 + * size_t xstart, // x4 + * size_t xend); // x5 + */ +ENTRY(rsdIntrinsicYuv2_K) + lsr x6, x4, #1 + add x0, x0, x4, LSL #2 + add x1, x1, x4 + add x4, x3, x6 + add x3, x2, x6 + sub x2, x5, x6, LSL #1 + + sub x6, sp, #32 + sub sp, sp, #64 + st1 {v8.1d - v11.1d}, [sp] + st1 {v12.1d - v15.1d}, [x6] + + wrap_line yuvkern, 0 + + ld1 {v8.1d - v11.1d}, [sp], #32 + ld1 {v12.1d - v15.1d}, [sp], #32 + ret +END(rsdIntrinsicYuv2_K) + +/* void rsdIntrinsicYuv_K( + * void *out, // x0 + * void const *yin, // x1 + * void const *uvin, // x2 + * size_t xstart, // x3 + * size_t xend); // x4 + */ +ENTRY(rsdIntrinsicYuv_K) + bic x5, x3, #1 + add x0, x0, x5, LSL #2 + add x1, x1, x5 + add x3, x2, x5 + sub x2, x4, x5 + + sub x5, sp, #32 + sub sp, sp, #64 + st1 {v8.1d - v11.1d}, [sp] + st1 {v12.1d - v15.1d}, [x5] + + wrap_line yuvkern, 1, 1 + + ld1 {v8.1d - v11.1d}, [sp], #32 + ld1 {v12.1d - v15.1d}, [sp], #32 + ret +END(rsdIntrinsicYuv_K) + +/* void rsdIntrinsicYuvR_K( + * void *out, // x0 + * void const *yin, // x1 + * void const *uvin, // x2 + * size_t xstart, // x3 + * size_t xend); // x4 + */ +ENTRY(rsdIntrinsicYuvR_K) + bic x5, x3, #1 + add x0, x0, x5, LSL #2 + add x1, x1, x5 + add x3, x2, x5 + sub x2, x4, x5 + + sub x5, sp, #32 + sub sp, sp, #64 + st1 {v8.1d - v11.1d}, [sp] + st1 {v12.1d - v15.1d}, [x5] + + wrap_line yuvkern, 1 + + ld1 {v8.1d - v11.1d}, [sp], #32 + ld1 {v12.1d - v15.1d}, [sp], #32 + ret +END(rsdIntrinsicYuvR_K) diff --git a/toolkit/YuvToRgb_neon.S b/toolkit/YuvToRgb_neon.S new file mode 100644 index 00000000..5c3bce41 --- /dev/null +++ b/toolkit/YuvToRgb_neon.S @@ -0,0 +1,298 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart +#define END(f) .fnend; .size f, .-f; + +.eabi_attribute 25,1 @Tag_ABI_align8_preserved +.arm + +/* Perform the actual YuvToRGB conversion in a macro, from register to + * register. This macro will be called from within several different wrapper + * variants for different data layouts. Y data starts in q8, but with the even + * and odd bytes split into d16 and d17 respectively. U and V are in d20 + * and d21. Working constants are pre-loaded into q13-q15, and q3 is + * pre-loaded with a constant 0xff alpha channel. + * + * The complicated arithmetic is the result of refactoring the original + * equations to avoid 16-bit overflow without losing any precision. + */ +.macro yuvkern + vmov.i8 d15, #149 + + vmull.u8 q1, d16, d15 // g0 = y0 * 149 + vmull.u8 q5, d17, d15 // g1 = y1 * 149 + + vmov.i8 d14, #50 + vmov.i8 d15, #104 + vmull.u8 q8, d20, d14 // g2 = u * 50 + v * 104 + vmlal.u8 q8, d21, d15 + + vshr.u8 d14, d21, #1 + vaddw.u8 q0, q1, d14 // r0 = y0 * 149 + (v >> 1) + vaddw.u8 q4, q5, d14 // r1 = y1 * 149 + (v >> 1) + + vshll.u8 q7, d20, #2 + vadd.u16 q2, q1, q7 // b0 = y0 * 149 + (u << 2) + vadd.u16 q6, q5, q7 // b1 = y1 * 149 + (u << 2) + + vmov.i8 d14, #204 + vmov.i8 d15, #254 + vmull.u8 q11, d21, d14 // r2 = v * 204 + vmull.u8 q12, d20, d15 // b2 = u * 254 + + vhadd.u16 q0, q11 // r0 = (r0 + r2) >> 1 + vhadd.u16 q4, q11 // r1 = (r1 + r2) >> 1 + vqadd.u16 q1, q14 // g0 = satu16(g0 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0) + vqadd.u16 q5, q14 // g1 = satu16(g1 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0) + vhadd.u16 q2, q12 // b0 = (b0 + b2) >> 1 + vhadd.u16 q6, q12 // b1 = (b1 + b2) >> 1 + + vqsub.u16 q0, q13 // r0 = satu16(r0 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1) + vqsub.u16 q4, q13 // r1 = satu16(r1 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1) + vqsub.u16 q1, q8 // g0 = satu16(g0 - g2) + vqsub.u16 q5, q8 // g1 = satu16(g1 - g2) + vqsub.u16 q2, q15 // b0 = satu16(b0 - (16 * 149 + (128 << 2) + 128 * 254) >> 1) + vqsub.u16 q6, q15 // b1 = satu16(b1 - (16 * 149 + (128 << 2) + 128 * 254) >> 1) + + vqrshrn.u16 d0, q0, #6 + vqrshrn.u16 d1, q1, #7 + vqrshrn.u16 d2, q4, #6 + vqrshrn.u16 d3, q5, #7 + vqrshrn.u16 d4, q2, #6 + vqrshrn.u16 d5, q6, #6 + + vzip.u8 q0, q1 + vzip.u8 d4, d5 +.endm + +/* Define the wrapper code which will load and store the data, iterate the + * correct number of times, and safely handle the remainder at the end of the + * loop. Some sections of code are switched out depending on the data packing + * being handled. + */ +.macro wrap_line kernel, interleaved=0, swapuv=0 + + movw r5, #((16 * 149 + (128 >> 1) + 128 * 204) >> 1) + vdup.i16 q13, r5 + movw r5, #((-16 * 149 + 128 * 50 + 128 * 104) >> 0) + vdup.i16 q14, r5 + movw r5, #((16 * 149 + (128 << 2) + 128 * 254) >> 1) + vdup.i16 q15, r5 + + vmov.i8 q3, #0xff + + subs r2, #16 + bhs 1f + b 2f + + .align 4 +1: vld2.u8 {d16,d17}, [r1]! + pld [r1, #256] + .if \interleaved + vld2.u8 {d20,d21}, [r3]! + .if \swapuv + vswp d20, d21 + .endif + pld [r3, #256] + .else + vld1.u8 d20, [r3]! + vld1.u8 d21, [r4]! + pld [r3, #128] + pld [r4, #128] + .endif + + \kernel + + subs r2, #16 + + vst4.u8 {d0,d2,d4,d6}, [r0]! + vst4.u8 {d1,d3,d5,d7}, [r0]! + + bhs 1b + +2: adds r2, #16 + beq 2f + + /* To handle the tail portion of the data (something less than 16 + * bytes) load small power-of-two chunks into working registers. It + * doesn't matter where they end up in the register; the same process + * will store them back out using the same positions and the + * interaction between neighbouring pixels is constrained to odd + * boundaries where the load operations don't interfere. + */ + vmov.i8 q8, #0 + vmov.i8 q10, #0 + + tst r2, #8 + beq 1f + vld1.u8 d17, [r1]! + .if \interleaved + vld1.u8 d21, [r3]! + .else + vld1.u32 d20[1], [r3]! + vld1.u32 d21[1], [r4]! + .endif + +1: tst r2, #4 + beq 1f + vld1.u32 d16[1], [r1]! + .if \interleaved + vld1.u32 d20[1], [r3]! + .else + vld1.u16 d20[1], [r3]! + vld1.u16 d21[1], [r4]! + .endif +1: tst r2, #2 + beq 1f + vld1.u16 d16[1], [r1]! + .if \interleaved + vld1.u16 d20[1], [r3]! + .else + vld1.u8 d20[1], [r3]! + vld1.u8 d21[1], [r4]! + .endif +1: tst r2, #1 + beq 1f + vld1.u8 d16[1], [r1]! + .if \interleaved + vld1.u16 d20[0], [r3]! + .else + vld1.u8 d20[0], [r3]! + vld1.u8 d21[0], [r4]! + .endif + + /* One small impediment in the process above is that some of the load + * operations can't perform byte-wise structure deinterleaving at the + * same time as loading only part of a register. So the data is loaded + * linearly and unpacked manually at this point if necessary. + */ +1: vuzp.8 d16, d17 + .if \interleaved + vuzp.8 d20, d21 + .if \swapuv + vswp d20, d21 + .endif + .endif + + \kernel + + /* As above but with the output; structured stores for partial vectors + * aren't available, so the data is re-packed first and stored linearly. + */ + vzip.8 q0, q2 + vzip.8 q1, q3 + vzip.8 q0, q1 + vzip.8 q2, q3 + +1: tst r2, #8 + beq 1f + vst1.u8 {d4,d5,d6,d7}, [r0]! + +1: tst r2, #4 + beq 1f + vst1.u8 {d2,d3}, [r0]! +1: tst r2, #2 + beq 1f + vst1.u8 d1, [r0]! +1: tst r2, #1 + beq 2f + vst1.u32 d0[1], [r0]! +2: +.endm + + +/* void rsdIntrinsicYuv2_K( + * void *out, // r0 + * void const *yin, // r1 + * void const *uin, // r2 + * void const *vin, // r3 + * size_t xstart, // [sp] + * size_t xend); // [sp+#4] + */ +ENTRY(rsdIntrinsicYuv2_K) + push {r4,r5} + ldr r5, [sp, #8] + mov r4, r3 + mov r3, r2 + ldr r2, [sp, #12] + + add r0, r5, LSL #2 + add r1, r5 + add r3, r5, LSR #1 + add r4, r5, LSR #1 + sub r2, r5 + + vpush {d8-d15} + + wrap_line yuvkern, 0 + + vpop {d8-d15} + pop {r4,r5} + bx lr +END(rsdIntrinsicYuv2_K) + +/* void rsdIntrinsicYuv_K( + * void *out, // r0 + * void const *yin, // r1 + * void const *uvin, // r2 + * size_t xstart, // r3 + * size_t xend); // [sp] + */ +ENTRY(rsdIntrinsicYuv_K) + push {r4,r5} + bic r4, r3, #1 + add r3, r2, r4 + ldr r2, [sp, #8] + + add r0, r4, LSL #2 + add r1, r4 + sub r2, r4 + + vpush {d8-d15} + + wrap_line yuvkern, 1, 1 + + vpop {d8-d15} + pop {r4,r5} + bx lr +END(rsdIntrinsicYuv_K) + +/* void rsdIntrinsicYuvR_K( + * void *out, // r0 + * void const *yin, // r1 + * void const *uvin, // r2 + * size_t xstart, // r3 + * size_t xend); // [sp] + */ +ENTRY(rsdIntrinsicYuvR_K) + push {r4,r5} + bic r4, r3, #1 + add r3, r2, r4 + ldr r2, [sp, #8] + + add r0, r4, LSL #2 + add r1, r4 + sub r2, r4 + + vpush {d8-d15} + + wrap_line yuvkern, 1 + + vpop {d8-d15} + pop {r4,r5} + bx lr +END(rsdIntrinsicYuvR_K) diff --git a/toolkit/java/Toolkit.kt b/toolkit/java/Toolkit.kt new file mode 100644 index 00000000..438f2414 --- /dev/null +++ b/toolkit/java/Toolkit.kt @@ -0,0 +1,1566 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.android.renderscript + +import android.graphics.Bitmap +import java.lang.IllegalArgumentException + +// This string is used for error messages. +private const val externalName = "RenderScript Toolkit" + +/** + * A collection of high-performance graphic utility functions like blur and blend. + * + * This toolkit provides ten image manipulation functions: blend, blur, color matrix, convolve, + * histogram, histogramDot, lut, lut3d, resize, and YUV to RGB. These functions execute + * multithreaded on the CPU. + * + * Most of the functions have two variants: one that manipulates Bitmaps, the other ByteArrays. + * For ByteArrays, you need to specify the width and height of the data to be processed, as + * well as the number of bytes per pixel. For most use cases, this will be 4. + * + * The Toolkit creates a thread pool that's used for processing the functions. The threads live + * for the duration of the application. They can be destroyed by calling the method shutdown(). + * + * This library is thread safe. You can call methods from different poolThreads. The functions will + * execute sequentially. + * + * A native C++ version of this Toolkit is available. Check the RenderScriptToolkit.h file in the + * cpp directory. + * + * This toolkit can be used as a replacement for most RenderScript Intrinsic functions. Compared + * to RenderScript, it's simpler to use and more than twice as fast on the CPU. However RenderScript + * Intrinsics allow more flexibility for the type of allocation supported. In particular, this + * toolkit does not support allocations of floats. + */ +object Toolkit { + /** + * Blends a source buffer with the destination buffer. + * + * Blends a source buffer and a destination buffer, placing the result in the destination + * buffer. The blending is done pairwise between two corresponding RGBA values found in + * each buffer. The mode parameter specifies one of fifteen supported blending operations. + * See {@link BlendingMode}. + * + * A variant of this method is also available to blend Bitmaps. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. + * + * The source and destination buffer must have the same dimensions. Both arrays should have + * a size greater or equal to sizeX * sizeY * 4. The buffers have a row-major layout. + * + * @param mode The specific blending operation to do. + * @param sourceArray The RGBA input buffer. + * @param destArray The destination buffer. Used for input and output. + * @param sizeX The width of both buffers, as a number of RGBA values. + * @param sizeY The height of both buffers, as a number of RGBA values. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + */ + @JvmOverloads + fun blend( + mode: BlendingMode, + sourceArray: ByteArray, + destArray: ByteArray, + sizeX: Int, + sizeY: Int, + restriction: Range2d? = null + ) { + require(sourceArray.size >= sizeX * sizeY * 4) { + "$externalName blend. sourceArray is too small for the given dimensions. " + + "$sizeX*$sizeY*4 < ${sourceArray.size}." + } + require(destArray.size >= sizeX * sizeY * 4) { + "$externalName blend. sourceArray is too small for the given dimensions. " + + "$sizeX*$sizeY*4 < ${sourceArray.size}." + } + validateRestriction("blend", sizeX, sizeY, restriction) + + nativeBlend(nativeHandle, mode.value, sourceArray, destArray, sizeX, sizeY, restriction) + } + + /** + * Blends a source bitmap with the destination bitmap. + * + * Blends a source bitmap and a destination bitmap, placing the result in the destination + * bitmap. The blending is done pairwise between two corresponding RGBA values found in + * each bitmap. The mode parameter specify one of fifteen supported blending operations. + * See {@link BlendingMode}. + * + * A variant of this method is available to blend ByteArrays. + * + * The bitmaps should have identical width and height, and have a config of ARGB_8888. + * Bitmaps with a stride different than width * vectorSize are not currently supported. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each bitmap. If provided, the range must be wholly contained with the dimensions + * of the bitmap. + * + * @param mode The specific blending operation to do. + * @param sourceBitmap The RGBA input buffer. + * @param destBitmap The destination buffer. Used for input and output. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + */ + @JvmOverloads + fun blend( + mode: BlendingMode, + sourceBitmap: Bitmap, + destBitmap: Bitmap, + restriction: Range2d? = null + ) { + validateBitmap("blend", sourceBitmap) + validateBitmap("blend", destBitmap) + require( + sourceBitmap.width == destBitmap.width && + sourceBitmap.height == destBitmap.height + ) { + "$externalName blend. Source and destination bitmaps should be the same size. " + + "${sourceBitmap.width}x${sourceBitmap.height} and " + + "${destBitmap.width}x${destBitmap.height} provided." + } + require(sourceBitmap.config == destBitmap.config) { + "RenderScript Toolkit blend. Source and destination bitmaps should have the same " + + "config. ${sourceBitmap.config} and ${destBitmap.config} provided." + } + validateRestriction("blend", sourceBitmap.width, sourceBitmap.height, restriction) + + nativeBlendBitmap(nativeHandle, mode.value, sourceBitmap, destBitmap, restriction) + } + + /** + * Blurs an image. + * + * Performs a Gaussian blur of an image and returns result in a ByteArray buffer. A variant of + * this method is available to blur Bitmaps. + * + * The radius determines which pixels are used to compute each blurred pixels. This Toolkit + * accepts values between 1 and 25. Larger values create a more blurred effect but also + * take longer to compute. When the radius extends past the edge, the edge pixel will + * be used as replacement for the pixel that's out off boundary. + * + * Each input pixel can either be represented by four bytes (RGBA format) or one byte + * for the less common blurring of alpha channel only image. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. NOTE: The output buffer will still be full size, with the + * section that's not blurred all set to 0. This is to stay compatible with RenderScript. + * + * The source buffer should be large enough for sizeX * sizeY * mVectorSize bytes. It has a + * row-major layout. + * + * @param inputArray The buffer of the image to be blurred. + * @param vectorSize Either 1 or 4, the number of bytes in each cell, i.e. A vs. RGBA. + * @param sizeX The width of both buffers, as a number of 1 or 4 byte cells. + * @param sizeY The height of both buffers, as a number of 1 or 4 byte cells. + * @param radius The radius of the pixels used to blur, a value from 1 to 25. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The blurred pixels, a ByteArray of size. + */ + @JvmOverloads + fun blur( + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + radius: Int = 5, + restriction: Range2d? = null + ): ByteArray { + require(vectorSize == 1 || vectorSize == 4) { + "$externalName blur. The vectorSize should be 1 or 4. $vectorSize provided." + } + require(inputArray.size >= sizeX * sizeY * vectorSize) { + "$externalName blur. inputArray is too small for the given dimensions. " + + "$sizeX*$sizeY*$vectorSize < ${inputArray.size}." + } + require(radius in 1..25) { + "$externalName blur. The radius should be between 1 and 25. $radius provided." + } + validateRestriction("blur", sizeX, sizeY, restriction) + + val outputArray = ByteArray(inputArray.size) + nativeBlur( + nativeHandle, inputArray, vectorSize, sizeX, sizeY, radius, outputArray, restriction + ) + return outputArray + } + + /** + * Blurs an image. + * + * Performs a Gaussian blur of a Bitmap and returns result as a Bitmap. A variant of + * this method is available to blur ByteArrays. + * + * The radius determines which pixels are used to compute each blurred pixels. This Toolkit + * accepts values between 1 and 25. Larger values create a more blurred effect but also + * take longer to compute. When the radius extends past the edge, the edge pixel will + * be used as replacement for the pixel that's out off boundary. + * + * This method supports input Bitmap of config ARGB_8888 and ALPHA_8. Bitmaps with a stride + * different than width * vectorSize are not currently supported. The returned Bitmap has the + * same config. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. NOTE: The output Bitmap will still be full size, with the + * section that's not blurred all set to 0. This is to stay compatible with RenderScript. + * + * @param inputBitmap The buffer of the image to be blurred. + * @param radius The radius of the pixels used to blur, a value from 1 to 25. Default is 5. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The blurred Bitmap. + */ + @JvmOverloads + fun blur(inputBitmap: Bitmap, radius: Int = 5, restriction: Range2d? = null): Bitmap { + validateBitmap("blur", inputBitmap) + require(radius in 1..25) { + "$externalName blur. The radius should be between 1 and 25. $radius provided." + } + validateRestriction("blur", inputBitmap.width, inputBitmap.height, restriction) + + val outputBitmap = createCompatibleBitmap(inputBitmap) + nativeBlurBitmap(nativeHandle, inputBitmap, outputBitmap, radius, restriction) + return outputBitmap + } + + /** + * Identity matrix that can be passed to the {@link RenderScriptToolkit::colorMatrix} method. + * + * Using this matrix will result in no change to the pixel through multiplication although + * the pixel value can still be modified by the add vector, or transformed to a different + * format. + */ + val identityMatrix: FloatArray + get() = floatArrayOf( + 1f, 0f, 0f, 0f, + 0f, 1f, 0f, 0f, + 0f, 0f, 1f, 0f, + 0f, 0f, 0f, 1f + ) + + /** + * Matrix to turn color pixels to a grey scale. + * + * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert an + * image from color to greyscale. + */ + val greyScaleColorMatrix: FloatArray + get() = floatArrayOf( + 0.299f, 0.299f, 0.299f, 0f, + 0.587f, 0.587f, 0.587f, 0f, + 0.114f, 0.114f, 0.114f, 0f, + 0f, 0f, 0f, 1f + ) + + /** + * Matrix to convert RGB to YUV. + * + * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert the + * first three bytes of each pixel from RGB to YUV. This leaves the last byte (the alpha + * channel) untouched. + * + * This is a simplistic conversion. Most YUV buffers have more complicated format, not supported + * by this method. + */ + val rgbToYuvMatrix: FloatArray + get() = floatArrayOf( + 0.299f, -0.14713f, 0.615f, 0f, + 0.587f, -0.28886f, -0.51499f, 0f, + 0.114f, 0.436f, -0.10001f, 0f, + 0f, 0f, 0f, 1f + ) + + /** + * Matrix to convert YUV to RGB. + * + * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert the + * first three bytes of each pixel from YUV to RGB. This leaves the last byte (the alpha + * channel) untouched. + * + * This is a simplistic conversion. Most YUV buffers have more complicated format, not supported + * by this method. Use {@link RenderScriptToolkit::yuvToRgb} to convert these buffers. + */ + val yuvToRgbMatrix: FloatArray + get() = floatArrayOf( + 1f, 1f, 1f, 0f, + 0f, -0.39465f, 2.03211f, 0f, + 1.13983f, -0.5806f, 0f, 0f, + 0f, 0f, 0f, 1f + ) + + /** + * Transform an image using a color matrix. + * + * Converts a 2D array of vectors of unsigned bytes, multiplying each vectors by a 4x4 matrix + * and adding an optional vector. + * + * Each input vector is composed of 1-4 unsigned bytes. If less than 4 bytes, it's extended to + * 4, padding with zeroes. The unsigned bytes are converted from 0-255 to 0.0-1.0 floats + * before the multiplication is done. + * + * The resulting value is normalized from 0.0-1.0 to a 0-255 value and stored in the output. + * If the output vector size is less than four, the unused channels are discarded. + * + * If addVector is not specified, a vector of zeroes is added, i.e. a noop. + * + * Like the RenderScript Intrinsics, vectorSize of size 3 are padded to occupy 4 bytes. + * + * Check identityMatrix, greyScaleColorMatrix, rgbToYuvMatrix, and yuvToRgbMatrix for sample + * matrices. The YUV conversion may not work for all color spaces. + * + * @param inputArray The buffer of the image to be converted. + * @param inputVectorSize The number of bytes in each input cell, a value from 1 to 4. + * @param sizeX The width of both buffers, as a number of 1 to 4 byte cells. + * @param sizeY The height of both buffers, as a number of 1 to 4 byte cells. + * @param outputVectorSize The number of bytes in each output cell, a value from 1 to 4. + * @param matrix The 4x4 matrix to multiply, in row major format. + * @param addVector A vector of four floats that's added to the result of the multiplication. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The converted buffer. + */ + @JvmOverloads + fun colorMatrix( + inputArray: ByteArray, + inputVectorSize: Int, + sizeX: Int, + sizeY: Int, + outputVectorSize: Int, + matrix: FloatArray, + addVector: FloatArray = floatArrayOf(0f, 0f, 0f, 0f), + restriction: Range2d? = null + ): ByteArray { + require(inputVectorSize in 1..4) { + "$externalName colorMatrix. The inputVectorSize should be between 1 and 4. " + + "$inputVectorSize provided." + } + require(outputVectorSize in 1..4) { + "$externalName colorMatrix. The outputVectorSize should be between 1 and 4. " + + "$outputVectorSize provided." + } + require(inputArray.size >= sizeX * sizeY * inputVectorSize) { + "$externalName colorMatrix. inputArray is too small for the given dimensions. " + + "$sizeX*$sizeY*$inputVectorSize < ${inputArray.size}." + } + require(matrix.size == 16) { + "$externalName colorMatrix. matrix should have 16 entries. ${matrix.size} provided." + } + require(addVector.size == 4) { + "$externalName colorMatrix. addVector should have 4 entries. " + + "${addVector.size} provided." + } + validateRestriction("colorMatrix", sizeX, sizeY, restriction) + + val outputArray = ByteArray(sizeX * sizeY * paddedSize(outputVectorSize)) + nativeColorMatrix( + nativeHandle, inputArray, inputVectorSize, sizeX, sizeY, outputArray, outputVectorSize, + matrix, addVector, restriction + ) + return outputArray + } + + /** + * Transform an image using a color matrix. + * + * Converts a bitmap, multiplying each RGBA value by a 4x4 matrix and adding an optional vector. + * Each byte of the RGBA is converted from 0-255 to 0.0-1.0 floats before the multiplication + * is done. + * + * Bitmaps with a stride different than width * vectorSize are not currently supported. + * + * The resulting value is normalized from 0.0-1.0 to a 0-255 value and stored in the output. + * + * If addVector is not specified, a vector of zeroes is added, i.e. a noop. + * + * Check identityMatrix, greyScaleColorMatrix, rgbToYuvMatrix, and yuvToRgbMatrix for sample + * matrices. The YUV conversion may not work for all color spaces. + * + * @param inputBitmap The image to be converted. + * @param matrix The 4x4 matrix to multiply, in row major format. + * @param addVector A vector of four floats that's added to the result of the multiplication. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The converted buffer. + */ + @JvmOverloads + fun colorMatrix( + inputBitmap: Bitmap, + matrix: FloatArray, + addVector: FloatArray = floatArrayOf(0f, 0f, 0f, 0f), + restriction: Range2d? = null + ): Bitmap { + validateBitmap("colorMatrix", inputBitmap) + require(matrix.size == 16) { + "$externalName colorMatrix. matrix should have 16 entries. ${matrix.size} provided." + } + require(addVector.size == 4) { + "$externalName colorMatrix. addVector should have 4 entries." + } + validateRestriction("colorMatrix", inputBitmap.width, inputBitmap.height, restriction) + + val outputBitmap = createCompatibleBitmap(inputBitmap) + nativeColorMatrixBitmap( + nativeHandle, + inputBitmap, + outputBitmap, + matrix, + addVector, + restriction + ) + return outputBitmap + } + + /** + * Convolve a ByteArray. + * + * Applies a 3x3 or 5x5 convolution to the input array using the provided coefficients. + * A variant of this method is available to convolve Bitmaps. + * + * For 3x3 convolutions, 9 coefficients must be provided. For 5x5, 25 coefficients are needed. + * The coefficients should be provided in row-major format. + * + * When the square extends past the edge, the edge values will be used as replacement for the + * values that's are off boundary. + * + * Each input cell can either be represented by one to four bytes. Each byte is multiplied + * and accumulated independently of the other bytes of the cell. + * + * An optional range parameter can be set to restrict the convolve operation to a rectangular + * subset of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. NOTE: The output buffer will still be full size, with the + * section that's not convolved all set to 0. This is to stay compatible with RenderScript. + * + * The source array should be large enough for sizeX * sizeY * vectorSize bytes. It has a + * row-major layout. The output array will have the same dimensions. + * + * Like the RenderScript Intrinsics, vectorSize of size 3 are padded to occupy 4 bytes. + * + * @param inputArray The buffer of the image to be blurred. + * @param vectorSize The number of bytes in each cell, a value from 1 to 4. + * @param sizeX The width of both buffers, as a number of 1 or 4 byte cells. + * @param sizeY The height of both buffers, as a number of 1 or 4 byte cells. + * @param coefficients A FloatArray of size 9 or 25, containing the multipliers. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The convolved array. + */ + @JvmOverloads + fun convolve( + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + coefficients: FloatArray, + restriction: Range2d? = null + ): ByteArray { + require(vectorSize in 1..4) { + "$externalName convolve. The vectorSize should be between 1 and 4. " + + "$vectorSize provided." + } + require(inputArray.size >= sizeX * sizeY * vectorSize) { + "$externalName convolve. inputArray is too small for the given dimensions. " + + "$sizeX*$sizeY*$vectorSize < ${inputArray.size}." + } + require(coefficients.size == 9 || coefficients.size == 25) { + "$externalName convolve. Only 3x3 or 5x5 convolutions are supported. " + + "${coefficients.size} coefficients provided." + } + validateRestriction("convolve", sizeX, sizeY, restriction) + + val outputArray = ByteArray(inputArray.size) + nativeConvolve( + nativeHandle, + inputArray, + vectorSize, + sizeX, + sizeY, + outputArray, + coefficients, + restriction + ) + return outputArray + } + + /** + * Convolve a Bitmap. + * + * Applies a 3x3 or 5x5 convolution to the input Bitmap using the provided coefficients. + * A variant of this method is available to convolve ByteArrays. Bitmaps with a stride different + * than width * vectorSize are not currently supported. + * + * For 3x3 convolutions, 9 coefficients must be provided. For 5x5, 25 coefficients are needed. + * The coefficients should be provided in row-major format. + * + * Each input cell can either be represented by one to four bytes. Each byte is multiplied + * and accumulated independently of the other bytes of the cell. + * + * An optional range parameter can be set to restrict the convolve operation to a rectangular + * subset of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. NOTE: The output Bitmap will still be full size, with the + * section that's not convolved all set to 0. This is to stay compatible with RenderScript. + * + * @param inputBitmap The image to be blurred. + * @param coefficients A FloatArray of size 9 or 25, containing the multipliers. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The convolved Bitmap. + */ + @JvmOverloads + fun convolve( + inputBitmap: Bitmap, + coefficients: FloatArray, + restriction: Range2d? = null + ): Bitmap { + validateBitmap("convolve", inputBitmap) + require(coefficients.size == 9 || coefficients.size == 25) { + "$externalName convolve. Only 3x3 or 5x5 convolutions are supported. " + + "${coefficients.size} coefficients provided." + } + validateRestriction("convolve", inputBitmap, restriction) + + val outputBitmap = createCompatibleBitmap(inputBitmap) + nativeConvolveBitmap(nativeHandle, inputBitmap, outputBitmap, coefficients, restriction) + return outputBitmap + } + + /** + * Compute the histogram of an image. + * + * Tallies how many times each of the 256 possible values of a byte is found in the input. + * A variant of this method is available to do the histogram of a Bitmap. + * + * An input cell can be represented by one to four bytes. The tally is done independently + * for each of the bytes of the cell. Correspondingly, the returned IntArray will have + * 256 * vectorSize entries. The counts for value 0 are consecutive, followed by those for + * value 1, etc. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. + * + * The source buffer should be large enough for sizeX * sizeY * vectorSize bytes. It has a + * row-major layout. + * + * Like the RenderScript Intrinsics, vectorSize of size 3 are padded to occupy 4 bytes. + * + * @param inputArray The buffer of the image to be analyzed. + * @param vectorSize The number of bytes in each cell, a value from 1 to 4. + * @param sizeX The width of the input buffers, as a number of 1 to 4 byte cells. + * @param sizeY The height of the input buffers, as a number of 1 to 4 byte cells. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The resulting array of counts. + */ + @JvmOverloads + fun histogram( + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + restriction: Range2d? = null + ): IntArray { + require(vectorSize in 1..4) { + "$externalName histogram. The vectorSize should be between 1 and 4. " + + "$vectorSize provided." + } + require(inputArray.size >= sizeX * sizeY * vectorSize) { + "$externalName histogram. inputArray is too small for the given dimensions. " + + "$sizeX*$sizeY*$vectorSize < ${inputArray.size}." + } + validateRestriction("histogram", sizeX, sizeY, restriction) + + val outputArray = IntArray(256 * paddedSize(vectorSize)) + nativeHistogram( + nativeHandle, + inputArray, + vectorSize, + sizeX, + sizeY, + outputArray, + restriction + ) + return outputArray + } + + /** + * Compute the histogram of an image. + * + * Tallies how many times each of the 256 possible values of a byte is found in the bitmap. + * This method supports Bitmaps of config ARGB_8888 and ALPHA_8. + * + * For ARGB_8888, the tally is done independently of the four bytes. Correspondingly, the + * returned IntArray will have 4 * 256 entries. The counts for value 0 are consecutive, + * followed by those for value 1, etc. + * + * For ALPHA_8, an IntArray of size 256 is returned. + * + * Bitmaps with a stride different than width * vectorSize are not currently supported. + * + * A variant of this method is available to do the histogram of a ByteArray. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. + * + * @param inputBitmap The bitmap to be analyzed. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The resulting array of counts. + */ + @JvmOverloads + fun histogram( + inputBitmap: Bitmap, + restriction: Range2d? = null + ): IntArray { + validateBitmap("histogram", inputBitmap) + validateRestriction("histogram", inputBitmap, restriction) + + val outputArray = IntArray(256 * vectorSize(inputBitmap)) + nativeHistogramBitmap(nativeHandle, inputBitmap, outputArray, restriction) + return outputArray + } + + /** + * Compute the histogram of the dot product of an image. + * + * This method supports cells of 1 to 4 bytes in length. For each cell of the array, + * the dot product of its bytes with the provided coefficients is computed. The resulting + * floating point value is converted to an unsigned byte and tallied in the histogram. + * + * If coefficients is null, the coefficients used for RGBA luminosity calculation will be used, + * i.e. the values [0.299f, 0.587f, 0.114f, 0.f]. + * + * Each coefficients must be >= 0 and their sum must be 1.0 or less. There must be the same + * number of coefficients as vectorSize. + * + * A variant of this method is available to do the histogram of a Bitmap. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. + * + * The source buffer should be large enough for sizeX * sizeY * vectorSize bytes. The returned + * array will have 256 ints. + * + * Like the RenderScript Intrinsics, vectorSize of size 3 are padded to occupy 4 bytes. + * + * @param inputArray The buffer of the image to be analyzed. + * @param vectorSize The number of bytes in each cell, a value from 1 to 4. + * @param sizeX The width of the input buffers, as a number of 1 to 4 byte cells. + * @param sizeY The height of the input buffers, as a number of 1 to 4 byte cells. + * @param coefficients The dot product multipliers. Size should equal vectorSize. Can be null. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The resulting vector of counts. + */ + @JvmOverloads + fun histogramDot( + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + coefficients: FloatArray? = null, + restriction: Range2d? = null + ): IntArray { + require(vectorSize in 1..4) { + "$externalName histogramDot. The vectorSize should be between 1 and 4. " + + "$vectorSize provided." + } + require(inputArray.size >= sizeX * sizeY * vectorSize) { + "$externalName histogramDot. inputArray is too small for the given dimensions. " + + "$sizeX*$sizeY*$vectorSize < ${inputArray.size}." + } + validateHistogramDotCoefficients(coefficients, vectorSize) + validateRestriction("histogramDot", sizeX, sizeY, restriction) + + val outputArray = IntArray(256) + val actualCoefficients = coefficients ?: floatArrayOf(0.299f, 0.587f, 0.114f, 0f) + nativeHistogramDot( + nativeHandle, + inputArray, + vectorSize, + sizeX, + sizeY, + outputArray, + actualCoefficients, + restriction + ) + return outputArray + } + + /** + * Compute the histogram of the dot product of an image. + * + * This method supports Bitmaps of config ARGB_8888 and ALPHA_8. For each pixel of the bitmap, + * the dot product of its bytes with the provided coefficients is computed. The resulting + * floating point value is converted to an unsigned byte and tallied in the histogram. + * + * If coefficients is null, the coefficients used for RGBA luminosity calculation will be used, + * i.e. the values [0.299f, 0.587f, 0.114f, 0.f]. + * + * Each coefficients must be >= 0 and their sum must be 1.0 or less. For ARGB_8888, four values + * must be provided; for ALPHA_8, one. + * + * Bitmaps with a stride different than width * vectorSize are not currently supported. + * + * A variant of this method is available to do the histogram of a ByteArray. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. + * + * The returned array will have 256 ints. + * + * @param inputBitmap The bitmap to be analyzed. + * @param coefficients The one or four values used for the dot product. Can be null. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The resulting vector of counts. + */ + @JvmOverloads + fun histogramDot( + inputBitmap: Bitmap, + coefficients: FloatArray? = null, + restriction: Range2d? = null + ): IntArray { + validateBitmap("histogramDot", inputBitmap) + validateHistogramDotCoefficients(coefficients, vectorSize(inputBitmap)) + validateRestriction("histogramDot", inputBitmap, restriction) + + val outputArray = IntArray(256) + val actualCoefficients = coefficients ?: floatArrayOf(0.299f, 0.587f, 0.114f, 0f) + nativeHistogramDotBitmap( + nativeHandle, inputBitmap, outputArray, actualCoefficients, restriction + ) + return outputArray + } + + /** + * Transform an image using a look up table + * + * Transforms an image by using a per-channel lookup table. Each channel of the input has an + * independent lookup table. The tables are 256 entries in size and can cover the full value + * range of a byte. + * + * The input array should be in RGBA format, where four consecutive bytes form an cell. + * A variant of this method is available to transform a Bitmap. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. NOTE: The output Bitmap will still be full size, with the + * section that's not convolved all set to 0. This is to stay compatible with RenderScript. + * + * The source array should be large enough for sizeX * sizeY * vectorSize bytes. The returned + * ray has the same dimensions as the input. The arrays have a row-major layout. + * + * @param inputArray The buffer of the image to be transformed. + * @param sizeX The width of both buffers, as a number of 4 byte cells. + * @param sizeY The height of both buffers, as a number of 4 byte cells. + * @param table The four arrays of 256 values that's used to convert each channel. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The transformed image. + */ + @JvmOverloads + fun lut( + inputArray: ByteArray, + sizeX: Int, + sizeY: Int, + table: LookupTable, + restriction: Range2d? = null + ): ByteArray { + require(inputArray.size >= sizeX * sizeY * 4) { + "$externalName lut. inputArray is too small for the given dimensions. " + + "$sizeX*$sizeY*4 < ${inputArray.size}." + } + validateRestriction("lut", sizeX, sizeY, restriction) + + val outputArray = ByteArray(inputArray.size) + nativeLut( + nativeHandle, + inputArray, + outputArray, + sizeX, + sizeY, + table.red, + table.green, + table.blue, + table.alpha, + restriction + ) + return outputArray + } + + /** + * Transform an image using a look up table + * + * Transforms an image by using a per-channel lookup table. Each channel of the input has an + * independent lookup table. The tables are 256 entries in size and can cover the full value + * range of a byte. + * + * The input Bitmap should be in config ARGB_8888. A variant of this method is available to + * transform a ByteArray. Bitmaps with a stride different than width * vectorSize are not + * currently supported. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. NOTE: The output Bitmap will still be full size, with the + * section that's not convolved all set to 0. This is to stay compatible with RenderScript. + * + * @param inputBitmap The buffer of the image to be transformed. + * @param table The four arrays of 256 values that's used to convert each channel. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The transformed image. + */ + @JvmOverloads + fun lut( + inputBitmap: Bitmap, + table: LookupTable, + restriction: Range2d? = null + ): Bitmap { + validateBitmap("lut", inputBitmap) + validateRestriction("lut", inputBitmap, restriction) + + val outputBitmap = createCompatibleBitmap(inputBitmap) + nativeLutBitmap( + nativeHandle, + inputBitmap, + outputBitmap, + table.red, + table.green, + table.blue, + table.alpha, + restriction + ) + return outputBitmap + } + + /** + * Transform an image using a 3D look up table + * + * Transforms an image, converting RGB to RGBA by using a 3D lookup table. The incoming R, G, + * and B values are normalized to the dimensions of the provided 3D buffer. The eight nearest + * values in that 3D buffer are sampled and linearly interpolated. The resulting RGBA entry + * is returned in the output array. + * + * The input array should be in RGBA format, where four consecutive bytes form an cell. + * The fourth byte of each input cell is ignored. A variant of this method is also available + * to transform Bitmaps. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. NOTE: The output array will still be full size, with the + * section that's not convolved all set to 0. This is to stay compatible with RenderScript. + * + * The source array should be large enough for sizeX * sizeY * vectorSize bytes. The returned + * array will have the same dimensions. The arrays have a row-major layout. + * + * @param inputArray The buffer of the image to be transformed. + * @param sizeX The width of both buffers, as a number of 4 byte cells. + * @param sizeY The height of both buffers, as a number of 4 byte cells. + * @param cube The translation cube. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The transformed image. + */ + @JvmOverloads + fun lut3d( + inputArray: ByteArray, + sizeX: Int, + sizeY: Int, + cube: Rgba3dArray, + restriction: Range2d? = null + ): ByteArray { + require(inputArray.size >= sizeX * sizeY * 4) { + "$externalName lut3d. inputArray is too small for the given dimensions. " + + "$sizeX*$sizeY*4 < ${inputArray.size}." + } + require( + cube.sizeX >= 2 && cube.sizeY >= 2 && cube.sizeZ >= 2 && + cube.sizeX <= 256 && cube.sizeY <= 256 && cube.sizeZ <= 256 + ) { + "$externalName lut3d. The dimensions of the cube should be between 2 and 256. " + + "(${cube.sizeX}, ${cube.sizeY}, ${cube.sizeZ}) provided." + } + validateRestriction("lut3d", sizeX, sizeY, restriction) + + val outputArray = ByteArray(inputArray.size) + nativeLut3d( + nativeHandle, inputArray, outputArray, sizeX, sizeY, cube.values, cube.sizeX, + cube.sizeY, cube.sizeZ, restriction + ) + return outputArray + } + + /** + * Transform an image using a 3D look up table + * + * Transforms an image, converting RGB to RGBA by using a 3D lookup table. The incoming R, G, + * and B values are normalized to the dimensions of the provided 3D buffer. The eight nearest + * values in that 3D buffer are sampled and linearly interpolated. The resulting RGBA entry + * is returned in the output array. + * + * The input bitmap should be in RGBA_8888 format. The A channel is preserved. A variant of this + * method is also available to transform ByteArray. Bitmaps with a stride different than + * width * vectorSize are not currently supported. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of each buffer. If provided, the range must be wholly contained with the dimensions + * described by sizeX and sizeY. NOTE: The output array will still be full size, with the + * section that's not convolved all set to 0. This is to stay compatible with RenderScript. + * + * The source array should be large enough for sizeX * sizeY * vectorSize bytes. The returned + * array will have the same dimensions. The arrays have a row-major layout. + * + * @param inputBitmap The image to be transformed. + * @param cube The translation cube. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return The transformed image. + */ + @JvmOverloads + fun lut3d( + inputBitmap: Bitmap, + cube: Rgba3dArray, + restriction: Range2d? = null + ): Bitmap { + validateBitmap("lut3d", inputBitmap) + validateRestriction("lut3d", inputBitmap, restriction) + + val outputBitmap = createCompatibleBitmap(inputBitmap) + nativeLut3dBitmap( + nativeHandle, inputBitmap, outputBitmap, cube.values, cube.sizeX, + cube.sizeY, cube.sizeZ, restriction + ) + return outputBitmap + } + + /** + * Resize an image. + * + * Resizes an image using bicubic interpolation. + * + * This method supports elements of 1 to 4 bytes in length. Each byte of the element is + * interpolated independently from the others. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of the output buffer. The corresponding scaled range of the input will be used. If provided, + * the range must be wholly contained with the dimensions described by outputSizeX and + * outputSizeY. + * + * The input and output arrays have a row-major layout. The input array should be + * large enough for sizeX * sizeY * vectorSize bytes. + * + * Like the RenderScript Intrinsics, vectorSize of size 3 are padded to occupy 4 bytes. + * + * @param inputArray The buffer of the image to be resized. + * @param vectorSize The number of bytes in each element of both buffers. A value from 1 to 4. + * @param inputSizeX The width of the input buffer, as a number of 1-4 byte elements. + * @param inputSizeY The height of the input buffer, as a number of 1-4 byte elements. + * @param outputSizeX The width of the output buffer, as a number of 1-4 byte elements. + * @param outputSizeY The height of the output buffer, as a number of 1-4 byte elements. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return An array that contains the rescaled image. + */ + @JvmOverloads + fun resize( + inputArray: ByteArray, + vectorSize: Int, + inputSizeX: Int, + inputSizeY: Int, + outputSizeX: Int, + outputSizeY: Int, + restriction: Range2d? = null + ): ByteArray { + require(vectorSize in 1..4) { + "$externalName resize. The vectorSize should be between 1 and 4. $vectorSize provided." + } + require(inputArray.size >= inputSizeX * inputSizeY * vectorSize) { + "$externalName resize. inputArray is too small for the given dimensions. " + + "$inputSizeX*$inputSizeY*$vectorSize < ${inputArray.size}." + } + validateRestriction("resize", outputSizeX, outputSizeY, restriction) + + val outputArray = ByteArray(outputSizeX * outputSizeY * paddedSize(vectorSize)) + nativeResize( + nativeHandle, + inputArray, + vectorSize, + inputSizeX, + inputSizeY, + outputArray, + outputSizeX, + outputSizeY, + restriction + ) + return outputArray + } + + /** + * Resize an image. + * + * Resizes an image using bicubic interpolation. + * + * This method supports input Bitmap of config ARGB_8888 and ALPHA_8. The returned Bitmap + * has the same config. Bitmaps with a stride different than width * vectorSize are not + * currently supported. + * + * An optional range parameter can be set to restrict the operation to a rectangular subset + * of the output buffer. The corresponding scaled range of the input will be used. If provided, + * the range must be wholly contained with the dimensions described by outputSizeX and + * outputSizeY. + * + * @param inputBitmap The Bitmap to be resized. + * @param outputSizeX The width of the output buffer, as a number of 1-4 byte elements. + * @param outputSizeY The height of the output buffer, as a number of 1-4 byte elements. + * @param restriction When not null, restricts the operation to a 2D range of pixels. + * @return A Bitmap that contains the rescaled image. + */ + @JvmOverloads + fun resize( + inputBitmap: Bitmap, + outputSizeX: Int, + outputSizeY: Int, + restriction: Range2d? = null + ): Bitmap { + validateBitmap("resize", inputBitmap) + validateRestriction("resize", outputSizeX, outputSizeY, restriction) + + val outputBitmap = Bitmap.createBitmap(outputSizeX, outputSizeY, Bitmap.Config.ARGB_8888) + nativeResizeBitmap(nativeHandle, inputBitmap, outputBitmap, restriction) + return outputBitmap + } + + /** + * Convert an image from YUV to RGB. + * + * Converts a YUV buffer to RGB. The input array should be supplied in a supported YUV format. + * The output is RGBA; the alpha channel will be set to 255. + * + * Note that for YV12 and a sizeX that's not a multiple of 32, the RenderScript Intrinsic may + * not have converted the image correctly. This Toolkit method should. + * + * @param inputArray The buffer of the image to be converted. + * @param sizeX The width in pixels of the image. + * @param sizeY The height in pixels of the image. + * @param format Either YV12 or NV21. + * @return The converted image as a byte array. + */ + fun yuvToRgb(inputArray: ByteArray, sizeX: Int, sizeY: Int, format: YuvFormat): ByteArray { + require(sizeX % 2 == 0 && sizeY % 2 == 0) { + "$externalName yuvToRgb. Non-even dimensions are not supported. " + + "$sizeX and $sizeY were provided." + } + + val outputArray = ByteArray(sizeX * sizeY * 4) + nativeYuvToRgb(nativeHandle, inputArray, outputArray, sizeX, sizeY, format.value) + return outputArray + } + + /** + * Convert an image from YUV to an RGB Bitmap. + * + * Converts a YUV buffer to an RGB Bitmap. The input array should be supplied in a supported + * YUV format. The output is RGBA; the alpha channel will be set to 255. + * + * Note that for YV12 and a sizeX that's not a multiple of 32, the RenderScript Intrinsic may + * not have converted the image correctly. This Toolkit method should. + * + * @param inputArray The buffer of the image to be converted. + * @param sizeX The width in pixels of the image. + * @param sizeY The height in pixels of the image. + * @param format Either YV12 or NV21. + * @return The converted image. + */ + fun yuvToRgbBitmap(inputArray: ByteArray, sizeX: Int, sizeY: Int, format: YuvFormat): Bitmap { + require(sizeX % 2 == 0 && sizeY % 2 == 0) { + "$externalName yuvToRgbBitmap. Non-even dimensions are not supported. " + + "$sizeX and $sizeY were provided." + } + + val outputBitmap = Bitmap.createBitmap(sizeX, sizeY, Bitmap.Config.ARGB_8888) + nativeYuvToRgbBitmap(nativeHandle, inputArray, sizeX, sizeY, outputBitmap, format.value) + return outputBitmap + } + + init { + System.loadLibrary("renderscript-toolkit") + nativeHandle = createNative() + } + + /** + * Shutdown the thread pool. + * + * Waits for the threads to complete their work and destroys them. + * + * An application should call this method only if it is sure that it won't call the + * toolkit again, as it is irreversible. + */ + fun shutdown() { + destroyNative(nativeHandle) + nativeHandle = 0 + } + + private var nativeHandle: Long = 0 + + private external fun createNative(): Long + + private external fun destroyNative(nativeHandle: Long) + + private external fun nativeBlend( + nativeHandle: Long, + mode: Int, + sourceArray: ByteArray, + destArray: ByteArray, + sizeX: Int, + sizeY: Int, + restriction: Range2d? + ) + + private external fun nativeBlendBitmap( + nativeHandle: Long, + mode: Int, + sourceBitmap: Bitmap, + destBitmap: Bitmap, + restriction: Range2d? + ) + + private external fun nativeBlur( + nativeHandle: Long, + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + radius: Int, + outputArray: ByteArray, + restriction: Range2d? + ) + + private external fun nativeBlurBitmap( + nativeHandle: Long, + inputBitmap: Bitmap, + outputBitmap: Bitmap, + radius: Int, + restriction: Range2d? + ) + + private external fun nativeColorMatrix( + nativeHandle: Long, + inputArray: ByteArray, + inputVectorSize: Int, + sizeX: Int, + sizeY: Int, + outputArray: ByteArray, + outputVectorSize: Int, + matrix: FloatArray, + addVector: FloatArray, + restriction: Range2d? + ) + + private external fun nativeColorMatrixBitmap( + nativeHandle: Long, + inputBitmap: Bitmap, + outputBitmap: Bitmap, + matrix: FloatArray, + addVector: FloatArray, + restriction: Range2d? + ) + + private external fun nativeConvolve( + nativeHandle: Long, + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + outputArray: ByteArray, + coefficients: FloatArray, + restriction: Range2d? + ) + + private external fun nativeConvolveBitmap( + nativeHandle: Long, + inputBitmap: Bitmap, + outputBitmap: Bitmap, + coefficients: FloatArray, + restriction: Range2d? + ) + + private external fun nativeHistogram( + nativeHandle: Long, + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + outputArray: IntArray, + restriction: Range2d? + ) + + private external fun nativeHistogramBitmap( + nativeHandle: Long, + inputBitmap: Bitmap, + outputArray: IntArray, + restriction: Range2d? + ) + + private external fun nativeHistogramDot( + nativeHandle: Long, + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + outputArray: IntArray, + coefficients: FloatArray, + restriction: Range2d? + ) + + private external fun nativeHistogramDotBitmap( + nativeHandle: Long, + inputBitmap: Bitmap, + outputArray: IntArray, + coefficients: FloatArray, + restriction: Range2d? + ) + + private external fun nativeLut( + nativeHandle: Long, + inputArray: ByteArray, + outputArray: ByteArray, + sizeX: Int, + sizeY: Int, + red: ByteArray, + green: ByteArray, + blue: ByteArray, + alpha: ByteArray, + restriction: Range2d? + ) + + private external fun nativeLutBitmap( + nativeHandle: Long, + inputBitmap: Bitmap, + outputBitmap: Bitmap, + red: ByteArray, + green: ByteArray, + blue: ByteArray, + alpha: ByteArray, + restriction: Range2d? + ) + + private external fun nativeLut3d( + nativeHandle: Long, + inputArray: ByteArray, + outputArray: ByteArray, + sizeX: Int, + sizeY: Int, + cube: ByteArray, + cubeSizeX: Int, + cubeSizeY: Int, + cubeSizeZ: Int, + restriction: Range2d? + ) + + private external fun nativeLut3dBitmap( + nativeHandle: Long, + inputBitmap: Bitmap, + outputBitmap: Bitmap, + cube: ByteArray, + cubeSizeX: Int, + cubeSizeY: Int, + cubeSizeZ: Int, + restriction: Range2d? + ) + + private external fun nativeResize( + nativeHandle: Long, + inputArray: ByteArray, + vectorSize: Int, + inputSizeX: Int, + inputSizeY: Int, + outputArray: ByteArray, + outputSizeX: Int, + outputSizeY: Int, + restriction: Range2d? + ) + + private external fun nativeResizeBitmap( + nativeHandle: Long, + inputBitmap: Bitmap, + outputBitmap: Bitmap, + restriction: Range2d? + ) + + private external fun nativeYuvToRgb( + nativeHandle: Long, + inputArray: ByteArray, + outputArray: ByteArray, + sizeX: Int, + sizeY: Int, + format: Int + ) + + private external fun nativeYuvToRgbBitmap( + nativeHandle: Long, + inputArray: ByteArray, + sizeX: Int, + sizeY: Int, + outputBitmap: Bitmap, + value: Int + ) +} + +/** + * Determines how a source buffer is blended into a destination buffer. + * See {@link RenderScriptToolkit::blend}. + * + * blend only works on 4 byte RGBA data. In the descriptions below, ".a" represents + * the alpha channel. + */ +enum class BlendingMode(val value: Int) { + /** + * dest = 0 + * + * The destination is cleared, i.e. each pixel is set to (0, 0, 0, 0) + */ + CLEAR(0), + + /** + * dest = src + * + * Sets each pixel of the destination to the corresponding one in the source. + */ + SRC(1), + + /** + * dest = dest + * + * Leaves the destination untouched. This is a no-op. + */ + DST(2), + + /** + * dest = src + dest * (1.0 - src.a) + */ + SRC_OVER(3), + + /** + * dest = dest + src * (1.0 - dest.a) + */ + DST_OVER(4), + + /** + * dest = src * dest.a + */ + SRC_IN(5), + + /** + * dest = dest * src.a + */ + DST_IN(6), + + /** + * dest = src * (1.0 - dest.a) + */ + SRC_OUT(7), + + /** + * dest = dest * (1.0 - src.a) + */ + DST_OUT(8), + + /** + * dest.rgb = src.rgb * dest.a + (1.0 - src.a) * dest.rgb, dest.a = dest.a + */ + SRC_ATOP(9), + + /** + * dest = dest.rgb * src.a + (1.0 - dest.a) * src.rgb, dest.a = src.a + */ + DST_ATOP(10), + + /** + * dest = {src.r ^ dest.r, src.g ^ dest.g, src.b ^ dest.b, src.a ^ dest.a} + * + * Note: this is NOT the Porter/Duff XOR mode; this is a bitwise xor. + */ + XOR(11), + + /** + * dest = src * dest + */ + MULTIPLY(12), + + /** + * dest = min(src + dest, 1.0) + */ + ADD(13), + + /** + * dest = max(dest - src, 0.0) + */ + SUBTRACT(14) +} + +/** + * A translation table used by the lut method. For each potential red, green, blue, and alpha + * value, specifies it's replacement value. + * + * The fields are initialized to be a no-op operation, i.e. replace 1 by 1, 2 by 2, etc. + * You can modify just the values you're interested in having a translation. + */ +class LookupTable { + var red = ByteArray(256) { it.toByte() } + var green = ByteArray(256) { it.toByte() } + var blue = ByteArray(256) { it.toByte() } + var alpha = ByteArray(256) { it.toByte() } +} + +/** + * The YUV formats supported by yuvToRgb. + */ +enum class YuvFormat(val value: Int) { + NV21(0x11), + YV12(0x32315659), +} + +/** + * Define a range of data to process. + * + * This class is used to restrict a [Toolkit] operation to a rectangular subset of the input + * tensor. + * + * @property startX The index of the first value to be included on the X axis. + * @property endX The index after the last value to be included on the X axis. + * @property startY The index of the first value to be included on the Y axis. + * @property endY The index after the last value to be included on the Y axis. + */ +data class Range2d( + val startX: Int, + val endX: Int, + val startY: Int, + val endY: Int +) { + constructor() : this(0, 0, 0, 0) +} + +class Rgba3dArray(val values: ByteArray, val sizeX: Int, val sizeY: Int, val sizeZ: Int) { + init { + require(values.size >= sizeX * sizeY * sizeZ * 4) + } + + operator fun get(x: Int, y: Int, z: Int): ByteArray { + val index = indexOfVector(x, y, z) + return ByteArray(4) { values[index + it] } + } + + operator fun set(x: Int, y: Int, z: Int, value: ByteArray) { + require(value.size == 4) + val index = indexOfVector(x, y, z) + for (i in 0..3) { + values[index + i] = value[i] + } + } + + private fun indexOfVector(x: Int, y: Int, z: Int): Int { + require(x in 0 until sizeX) + require(y in 0 until sizeY) + require(z in 0 until sizeZ) + return ((z * sizeY + y) * sizeX + x) * 4 + } +} + +internal fun validateBitmap( + function: String, + inputBitmap: Bitmap, + alphaAllowed: Boolean = true +) { + if (alphaAllowed) { + require( + inputBitmap.config == Bitmap.Config.ARGB_8888 || + inputBitmap.config == Bitmap.Config.ALPHA_8 + ) { + "$externalName. $function supports only ARGB_8888 and ALPHA_8 bitmaps. " + + "${inputBitmap.config} provided." + } + } else { + require(inputBitmap.config == Bitmap.Config.ARGB_8888) { + "$externalName. $function supports only ARGB_8888. " + + "${inputBitmap.config} provided." + } + } + require(inputBitmap.width * vectorSize(inputBitmap) == inputBitmap.rowBytes) { + "$externalName $function. Only bitmaps with rowSize equal to the width * vectorSize are " + + "currently supported. Provided were rowBytes=${inputBitmap.rowBytes}, " + + "width={${inputBitmap.width}, and vectorSize=${vectorSize(inputBitmap)}." + } +} + +internal fun createCompatibleBitmap(inputBitmap: Bitmap) = + Bitmap.createBitmap(inputBitmap.width, inputBitmap.height, inputBitmap.config) + +internal fun validateHistogramDotCoefficients( + coefficients: FloatArray?, + vectorSize: Int +) { + require(coefficients == null || coefficients.size == vectorSize) { + "$externalName histogramDot. The coefficients should be null or have $vectorSize values." + } + if (coefficients !== null) { + var sum = 0f + for (i in 0 until vectorSize) { + require(coefficients[i] >= 0.0f) { + "$externalName histogramDot. Coefficients should not be negative. " + + "Coefficient $i was ${coefficients[i]}." + } + sum += coefficients[i] + } + require(sum <= 1.0f) { + "$externalName histogramDot. Coefficients should add to 1 or less. Their sum is $sum." + } + } +} + +internal fun validateRestriction(tag: String, bitmap: Bitmap, restriction: Range2d? = null) { + validateRestriction(tag, bitmap.width, bitmap.height, restriction) +} + +internal fun validateRestriction( + tag: String, + sizeX: Int, + sizeY: Int, + restriction: Range2d? = null +) { + if (restriction == null) return + require(restriction.startX < sizeX && restriction.endX <= sizeX) { + "$externalName $tag. sizeX should be greater than restriction.startX and greater " + + "or equal to restriction.endX. $sizeX, ${restriction.startX}, " + + "and ${restriction.endX} were provided respectively." + } + require(restriction.startY < sizeY && restriction.endY <= sizeY) { + "$externalName $tag. sizeY should be greater than restriction.startY and greater " + + "or equal to restriction.endY. $sizeY, ${restriction.startY}, " + + "and ${restriction.endY} were provided respectively." + } + require(restriction.startX < restriction.endX) { + "$externalName $tag. Restriction startX should be less than endX. " + + "${restriction.startX} and ${restriction.endX} were provided respectively." + } + require(restriction.startY < restriction.endY) { + "$externalName $tag. Restriction startY should be less than endY. " + + "${restriction.startY} and ${restriction.endY} were provided respectively." + } +} + +internal fun vectorSize(bitmap: Bitmap): Int { + return when (bitmap.config) { + Bitmap.Config.ARGB_8888 -> 4 + Bitmap.Config.ALPHA_8 -> 1 + else -> throw IllegalArgumentException( + "$externalName. Only ARGB_8888 and ALPHA_8 Bitmap are supported." + ) + } +} + +internal fun paddedSize(vectorSize: Int) = if (vectorSize == 3) 4 else vectorSize diff --git a/toolkit/test/AllTests.kt b/toolkit/test/AllTests.kt new file mode 100644 index 00000000..58337951 --- /dev/null +++ b/toolkit/test/AllTests.kt @@ -0,0 +1,1244 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TODO Rename to something better +package com.example.testapp + +import android.content.Context +import android.graphics.Bitmap +import android.graphics.BitmapFactory +import android.renderscript.RenderScript +import android.renderscript.toolkit.BlendingMode +import android.renderscript.toolkit.LookupTable +import android.renderscript.toolkit.Range2d +import android.renderscript.toolkit.Rgba3dArray +import android.renderscript.toolkit.Toolkit +import android.renderscript.toolkit.YuvFormat +import kotlin.math.abs +import kotlin.math.min + +data class TestLayout( + val sizeX: Int, + val sizeY: Int, + val restriction: Range2d? +) + +// List of dimensions (sizeX, sizeY) to try when generating random data. +val commonLayoutsToTry = listOf( + // Small layouts to start with + TestLayout(3, 4, null), + TestLayout(3, 4, Range2d(0, 1, 0, 3)), + TestLayout(3, 4, Range2d(2, 3, 1, 4)), + TestLayout(10, 14, null), + TestLayout(10, 14, Range2d(2, 3, 8, 14)), + // The size of most CTS intrinsic tests + TestLayout(160, 100, null), + TestLayout(125, 227, Range2d(50, 125, 100, 227)), + // A larger one + TestLayout(800, 600, null), + // Weirdly shaped ones + TestLayout(1, 1, null), // A single item + // TODO This size makes Intrinsic Blur fail. + TestLayout(16000, 1, null), // A single item + TestLayout(1, 16000, null), // One large row + // A very large test + TestLayout(1024, 2048, null), +) + + +class Tester(context: Context, private val validate: Boolean) { + private val renderscriptContext = RenderScript.create(context) + private val toolkit = Toolkit() + private val testImage1 = BitmapFactory.decodeResource(context.resources, R.drawable.img800x450a) + private val testImage2 = BitmapFactory.decodeResource(context.resources, R.drawable.img800x450b) + + init { + validateTestImage(testImage1) + validateTestImage(testImage2) + } + + /** + * Verify that the test images are in format that works for our tests. + */ + private fun validateTestImage(bitmap: Bitmap) { + require(bitmap.config == Bitmap.Config.ARGB_8888) + require(bitmap.rowBytes == bitmap.width * 4) { + "Can't handle bitmaps that have extra padding. " + + "${bitmap.rowBytes} != ${bitmap.width} * 4." } + require(bitmap.byteCount == bitmap.rowBytes * bitmap.height) + } + + fun destroy() { + renderscriptContext.destroy() + } + + @ExperimentalUnsignedTypes + fun testAll(timer: TimingTracker): String { + val tests = listOf( + Pair("blend", ::testBlend), + Pair("blur", ::testBlur), + Pair("colorMatrix", ::testColorMatrix), + Pair("convolve", ::testConvolve), + Pair("histogram", ::testHistogram), + Pair("lut", ::testLut), + Pair("lut3d", ::testLut3d), + Pair("resize", ::testResize), + Pair("yuvToRgb", ::testYuvToRgb), + ) + val results = Array(tests.size) { "" } + for (i in tests.indices) { + val (name, test) = tests[i] + println("Doing $name") + val success = test(timer) + results[i] = "$name " + if (success) "succeeded" else "FAILED! FAILED! FAILED! FAILED!" + println(" ${results[i]}") + } + + return results.joinToString("\n") + } + + @ExperimentalUnsignedTypes + private fun testBlend(timer: TimingTracker): Boolean { + return BlendingMode.values().all { mode -> + testOneBitmapBlend(timer, testImage1, testImage2, mode, null) and + testOneBitmapBlend( + timer, testImage1, testImage2, mode, + Range2d(6, 23, 2, 4) + ) and + commonLayoutsToTry.all { (sizeX, sizeY, restriction) -> + testOneRandomBlend(timer, sizeX, sizeY, mode, restriction) + } + } + } + + @ExperimentalUnsignedTypes + private fun testOneRandomBlend( + timer: TimingTracker, + sizeX: Int, + sizeY: Int, + mode: BlendingMode, + restriction: Range2d? + ): Boolean { + val sourceArray = randomByteArray(0x50521f0, sizeX, sizeY, 4) + val destArray = randomByteArray(0x2932147, sizeX, sizeY, 4) + // Make clones because these will be modified by the blend. + val intrinsicDestArray = destArray.clone() + val referenceDestArray = destArray.clone() + val toolkitDestArray = destArray.clone() + + timer.measure("IntrinsicBlend") { + intrinsicBlend( + renderscriptContext, mode, sourceArray, intrinsicDestArray, sizeX, sizeY, + restriction + ) + } + timer.measure("ToolkitBlend") { + toolkit.blend(mode, sourceArray, toolkitDestArray, sizeX, sizeY, restriction) + } + if (!validate) return true + + timer.measure("ReferenceBlend") { + referenceBlend(mode, sourceArray, referenceDestArray, sizeX, sizeY, restriction) + } + + return validateSame( + "Blend_$mode", intrinsicDestArray, referenceDestArray, toolkitDestArray + ) { + println("blend $mode ($sizeX, $sizeY) $restriction") + logArray("Blend_$mode src", sourceArray, 48) + logArray("Blend_$mode dst", destArray, 48) + logArray("Blend_$mode reference out", referenceDestArray, 48) + logArray("Blend_$mode intrinsic out", intrinsicDestArray, 48) + logArray("Blend_$mode toolkit out", toolkitDestArray, 48) + } + } + + @ExperimentalUnsignedTypes + private fun testOneBitmapBlend( + timer: TimingTracker, + sourceBitmap: Bitmap, + destBitmap: Bitmap, + mode: BlendingMode, + restriction: Range2d? + ): Boolean { + // Make clones because these will be modified by the blend. + val intrinsicDestBitmap = duplicateBitmap(destBitmap) + val toolkitDestBitmap = duplicateBitmap(destBitmap) + val referenceDestBitmap = duplicateBitmap(destBitmap) + + timer.measure("IntrinsicBlend") { + intrinsicBlend( + renderscriptContext, mode, sourceBitmap, intrinsicDestBitmap, restriction + ) + } + timer.measure("ToolkitBlend") { + toolkit.blend(mode, sourceBitmap, toolkitDestBitmap, restriction) + } + if (!validate) return true + + val referenceDestArray = getBitmapBytes(referenceDestBitmap) + timer.measure("ReferenceBlend") { + referenceBlend( + mode, getBitmapBytes(sourceBitmap), referenceDestArray, sourceBitmap.width, + sourceBitmap.height, restriction + ) + } + + val intrinsicDestArray = getBitmapBytes(intrinsicDestBitmap) + val toolkitDestArray = getBitmapBytes(toolkitDestBitmap) + return validateSame( + "BlendBitmap_$mode", intrinsicDestArray, referenceDestArray, toolkitDestArray + ) { + println("BlendBitmap $mode $restriction") + //logArray("BlendBitmap_$mode src", sourceArray, 48) + //logArray("BlendBitmap_$mode dst", destArray, 48) + logArray("BlendBitmap_$mode reference out", referenceDestArray, 48) + logArray("BlendBitmap_$mode intrinsic out", intrinsicDestArray, 48) + logArray("BlendBitmap_$mode toolkit out", toolkitDestArray, 48) + } + } + + @ExperimentalUnsignedTypes + private fun testBlur(timer: TimingTracker): Boolean { + return arrayOf(1, 3, 8, 25).all { radius -> + testOneBitmapBlur(timer, testImage1, radius, null) and + testOneBitmapBlur(timer, testImage1, radius, Range2d(6, 23, 2, 4)) and + commonLayoutsToTry.all { (sizeX, sizeY, restriction) -> + arrayOf(1, 4).all { vectorSize -> + testOneRandomBlur(timer, vectorSize, sizeX, sizeY, radius, restriction) + } + } + } + } + + @ExperimentalUnsignedTypes + private fun testOneRandomBlur( + timer: TimingTracker, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + radius: Int, + restriction: Range2d? + ): Boolean { + val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, vectorSize) + val intrinsicOutArray = timer.measure("IntrinsicBlur") { + intrinsicBlur( + renderscriptContext, inputArray, vectorSize, sizeX, sizeY, radius, restriction + ) + } + val toolkitOutArray = timer.measure("ToolkitBlur") { + toolkit.blur(inputArray, vectorSize, sizeX, sizeY, radius, restriction) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceBlur") { + referenceBlur(inputArray, vectorSize, sizeX, sizeY, radius, restriction) + } + return validateSame("blur", intrinsicOutArray, referenceOutArray, toolkitOutArray) { + println("blur $vectorSize ($sizeX, $sizeY) radius = $radius $restriction") + logArray("blur input ", inputArray) + logArray("blur reference out", referenceOutArray) + logArray("blur intrinsic out", intrinsicOutArray) + logArray("blur toolkit out", toolkitOutArray) + } + } + + @ExperimentalUnsignedTypes + private fun testOneBitmapBlur( + timer: TimingTracker, + bitmap: Bitmap, + radius: Int, + restriction: Range2d? + ): Boolean { + val intrinsicOutArray = timer.measure("IntrinsicBlur") { + intrinsicBlur(renderscriptContext, bitmap, radius, restriction) + } + + val toolkitOutBitmap = timer.measure("ToolkitBlur") { + toolkit.blur(bitmap, radius, restriction) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceBlur") { + referenceBlur( + getBitmapBytes(bitmap), + vectorSizeOfBitmap(bitmap), + bitmap.width, + bitmap.height, + radius, + restriction + ) + } + + val toolkitOutArray = getBitmapBytes(toolkitOutBitmap) + return validateSame("blur", intrinsicOutArray, referenceOutArray, toolkitOutArray) { + println("BlurBitmap ${bitmap.config} $radius $restriction") + logArray("blur reference out", referenceOutArray) + logArray("blur intrinsic out", intrinsicOutArray) + logArray("blur toolkit out", toolkitOutArray) + } + } + + enum class ColorMatrixConversionType { + RGB_TO_YUV, + YUV_TO_RGB, + GREYSCALE, + RANDOM + } + + @ExperimentalUnsignedTypes + private fun testColorMatrix(timer: TimingTracker): Boolean { + return ColorMatrixConversionType.values().all { conversion -> + testOneBitmapColorMatrix(timer, testImage1, conversion, null) and + testOneBitmapColorMatrix( + timer, + testImage1, + conversion, + Range2d(6, 23, 2, 4) + ) and + commonLayoutsToTry.all { (sizeX, sizeY, restriction) -> + (1..4).all { inputVectorSize -> + (1..4).all { outputVectorSize -> + testOneRandomColorMatrix( + timer, + inputVectorSize, + sizeX, + sizeY, + outputVectorSize, + conversion, + restriction + ) + } + } + } + } + } + + @ExperimentalUnsignedTypes + private fun testOneRandomColorMatrix( + timer: TimingTracker, + inputVectorSize: Int, + sizeX: Int, + sizeY: Int, + outputVectorSize: Int, + conversion: ColorMatrixConversionType, + restriction: Range2d? + ): Boolean { + val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, paddedSize(inputVectorSize)) + val addVector = randomFloatArray(0x243238, 4, 1, 1, 0.3f) + val matrix = when (conversion) { + ColorMatrixConversionType.RGB_TO_YUV -> toolkit.rgbToYuvMatrix + ColorMatrixConversionType.YUV_TO_RGB -> toolkit.yuvToRgbMatrix + ColorMatrixConversionType.GREYSCALE -> toolkit.greyScaleColorMatrix + ColorMatrixConversionType.RANDOM -> randomFloatArray(0x234348, 4, 4, 1) + } + + val intrinsicOutArray = timer.measure("IntrinsicColorMatrix") { + intrinsicColorMatrix( + renderscriptContext, + conversion, + inputArray, + inputVectorSize, + sizeX, + sizeY, + outputVectorSize, + matrix, + addVector, + restriction + ) + } + val toolkitOutArray = timer.measure("ToolkitColorMatrix") { + toolkit.colorMatrix( + inputArray, + inputVectorSize, + sizeX, + sizeY, + outputVectorSize, + matrix, + addVector, + restriction + ) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceColorMatrix") { + referenceColorMatrix( + inputArray, inputVectorSize, sizeX, sizeY, outputVectorSize, matrix, addVector, + restriction + ) + } + + return validateSame("colorMatrix", intrinsicOutArray, referenceOutArray, toolkitOutArray, + outputVectorSize == 3) { + println("colorMatrix ($sizeX, $sizeY) $inputVectorSize->$outputVectorSize $restriction") + logArray("colorMatrix matrix ", matrix, 16) + logArray("colorMatrix addVector", addVector, 4) + logArray("colorMatrix in ", inputArray) + logArray("colorMatrix reference out", referenceOutArray, 300) + logArray("colorMatrix intrinsic out", intrinsicOutArray, 300) + logArray("colorMatrix toolkit out", toolkitOutArray, 300) + } + } + + @ExperimentalUnsignedTypes + private fun testOneBitmapColorMatrix( + timer: TimingTracker, + bitmap: Bitmap, + conversion: ColorMatrixConversionType, + restriction: Range2d? + ): Boolean { + val addVector = randomFloatArray(0x243238, 4, 1, 1, 0.3f) + val matrix = when (conversion) { + ColorMatrixConversionType.RGB_TO_YUV -> toolkit.rgbToYuvMatrix + ColorMatrixConversionType.YUV_TO_RGB -> toolkit.yuvToRgbMatrix + ColorMatrixConversionType.GREYSCALE -> toolkit.greyScaleColorMatrix + ColorMatrixConversionType.RANDOM -> randomFloatArray(0x234348, 4, 4, 1) + } + + val intrinsicOutArray = timer.measure("IntrinsicColorMatrix") { + intrinsicColorMatrix( + renderscriptContext, conversion, bitmap, matrix, addVector, restriction + ) + } + val toolkitOutBitmap = timer.measure("ToolkitColorMatrix") { + toolkit.colorMatrix(bitmap, matrix, addVector, restriction) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceColorMatrix") { + referenceColorMatrix( + getBitmapBytes(bitmap), vectorSizeOfBitmap(bitmap), bitmap.width, bitmap.height, + vectorSizeOfBitmap(bitmap), matrix, addVector, restriction + ) + } + + val toolkitOutArray = getBitmapBytes(toolkitOutBitmap) + return validateSame("ColorMatrix", intrinsicOutArray, referenceOutArray, toolkitOutArray) { + println("colorMatrixBitmap $restriction") + logArray("colorMatrixBitmap matrix ", matrix, 16) + logArray("colorMatrixBitmap addVector", addVector, 4) + logArray("colorMatrixBitmap reference out", referenceOutArray) + logArray("colorMatrixBitmap intrinsic out", intrinsicOutArray) + logArray("colorMatrixBitmap toolkit out", toolkitOutArray) + } + } + + @ExperimentalUnsignedTypes + private fun testConvolve(timer: TimingTracker): Boolean { + val coefficientsToTry = listOf( + randomFloatArray(0x2937021, 3, 3, 1, 0.1f), + randomFloatArray(0x2937021, 5, 5, 1, 0.05f) + ) + return coefficientsToTry.all { coefficients -> + testOneBitmapConvolve(timer, testImage1, coefficients, null) and + testOneBitmapConvolve(timer, testImage1, coefficients, Range2d(6, 23, 2, 4)) and + + commonLayoutsToTry.all { (sizeX, sizeY, restriction) -> + (1..4).all { vectorSize -> + testOneRandomConvolve( + timer, + vectorSize, + sizeX, + sizeY, + coefficients, + restriction + ) + } + } + } + } + + @ExperimentalUnsignedTypes + private fun testOneRandomConvolve( + timer: TimingTracker, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + coefficients: FloatArray, + restriction: Range2d? + ): Boolean { + val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, paddedSize(vectorSize)) + + val intrinsicOutArray = timer.measure("IntrinsicConvolve") { + intrinsicConvolve( + renderscriptContext, inputArray, vectorSize, sizeX, sizeY, coefficients, restriction + ) + } + val toolkitOutArray = timer.measure("ToolkitConvolve") { + toolkit.convolve(inputArray, vectorSize, sizeX, sizeY, coefficients, restriction) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceConvolve") { + referenceConvolve(inputArray, vectorSize, sizeX, sizeY, coefficients, restriction) + } + + val task = if (coefficients.size == 9) "convolve3x3 $vectorSize" else "convolve5x5 $vectorSize" + return validateSame(task, intrinsicOutArray, referenceOutArray, toolkitOutArray) { + println("Convolve $vectorSize ($sizeX, $sizeY) $restriction") + logArray("Convolve coefficients", coefficients, 25) + logArray("Convolve in ", inputArray) + logArray("Convolve reference out", referenceOutArray) + logArray("Convolve intrinsic out", intrinsicOutArray) + logArray("Convolve toolkit out", toolkitOutArray) + } + } + + @ExperimentalUnsignedTypes + private fun testOneBitmapConvolve( + timer: TimingTracker, + bitmap: Bitmap, + coefficients: FloatArray, + restriction: Range2d? + ): Boolean { + val intrinsicOutArray = timer.measure("IntrinsicConvolve") { + intrinsicConvolve(renderscriptContext, bitmap, coefficients, restriction) + } + val toolkitOutBitmap = timer.measure("ToolkitConvolve") { + toolkit.convolve(bitmap, coefficients, restriction) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceConvolve") { + referenceConvolve( + getBitmapBytes(bitmap), vectorSizeOfBitmap(bitmap), bitmap.width, bitmap.height, + coefficients, restriction + ) + } + + val task = if (coefficients.size == 9) "convolve3x3" else "convolve5x5" + val toolkitOutArray = getBitmapBytes(toolkitOutBitmap) + return validateSame(task, intrinsicOutArray, referenceOutArray, toolkitOutArray) { + println("ConvolveBitmap $restriction") + logArray("ConvolveBitmap coefficients", coefficients, 25) + //logArray("ConvolveBitmap in ", inputArray) + logArray("ConvolveBitmap reference out", referenceOutArray) + logArray("ConvolveBitmap intrinsic out", intrinsicOutArray) + logArray("ConvolveBitmap toolkit out", toolkitOutArray) + } + } + + @ExperimentalUnsignedTypes + private fun testHistogram(timer: TimingTracker): Boolean { + val coefficients = floatArrayOf(0.1f, 0.3f, 0.5f, 0.05f) + return testOneBitmapHistogram(timer, testImage1, null) and + testOneBitmapHistogram(timer, testImage1, Range2d(6, 23, 2, 4)) and + testOneBitmapHistogramDot(timer, testImage1, null, null) and + testOneBitmapHistogramDot(timer, testImage1, coefficients, null) and + testOneBitmapHistogramDot(timer, testImage1, coefficients, Range2d(6, 23, 2, 4)) and + commonLayoutsToTry.all { (sizeX, sizeY, restriction) -> + (1..4).all { vectorSize -> + testOneRandomHistogram(timer, vectorSize, sizeX, sizeY, restriction) && + testOneRandomHistogramDot( + timer, + vectorSize, + sizeX, + sizeY, + null, + restriction + ) && + testOneRandomHistogramDot( + timer, + vectorSize, + sizeX, + sizeY, + coefficients.sliceArray(0 until vectorSize), + restriction + ) + } + } + } + + @ExperimentalUnsignedTypes + private fun testOneRandomHistogram( + timer: TimingTracker, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + restriction: Range2d? + ): Boolean { + val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, paddedSize(vectorSize)) + + val intrinsicOutput = timer.measure("IntrinsicHistogram") { + intrinsicHistogram( + renderscriptContext, inputArray, vectorSize, sizeX, sizeY, restriction + ) + } + val toolkitOutput = timer.measure("ToolkitHistogram") { + toolkit.histogram(inputArray, vectorSize, sizeX, sizeY, restriction) + } + if (!validate) return true + + val referenceOutput = timer.measure("ReferenceHistogram") { + referenceHistogram( + inputArray, vectorSize, sizeX, sizeY, restriction + ) + } + + return validateSame("histogram", intrinsicOutput, referenceOutput, toolkitOutput, 0) { + println("histogram $vectorSize ($sizeX, $sizeY) $restriction") + logArray("histogram in ", inputArray, 200) + logArray("histogram reference out", referenceOutput, 200) + logArray("histogram intrinsic out", intrinsicOutput, 200) + logArray("histogram toolkit out", toolkitOutput, 200) + } + } + + @ExperimentalUnsignedTypes + private fun testOneBitmapHistogram( + timer: TimingTracker, + bitmap: Bitmap, + restriction: Range2d? + ): Boolean { + val intrinsicOutput = timer.measure("IntrinsicHistogram") { + intrinsicHistogram(renderscriptContext, bitmap, restriction) + } + val toolkitOutput = timer.measure("ToolkitHistogram") { + toolkit.histogram(bitmap, restriction) + } + if (!validate) return true + + val referenceOutput = timer.measure("ReferenceHistogram") { + referenceHistogram( + getBitmapBytes(bitmap), vectorSizeOfBitmap(bitmap), bitmap.width, bitmap.height, + restriction + ) + } + + return validateSame("histogram", intrinsicOutput, referenceOutput, toolkitOutput, 0) { + println("HistogramBitmap $restriction") + logArray("HistogramBitmap reference out", referenceOutput) + logArray("HistogramBitmap intrinsic out", intrinsicOutput) + logArray("HistogramBitmap toolkit out", toolkitOutput) + } + } + + @ExperimentalUnsignedTypes + private fun testOneRandomHistogramDot( + timer: TimingTracker, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + coefficients: FloatArray?, restriction: Range2d? + ): Boolean { + val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, paddedSize(vectorSize)) + + val intrinsicOutArray = timer.measure("IntrinsicHistogramDot") { + intrinsicHistogramDot( + renderscriptContext, inputArray, vectorSize, sizeX, sizeY, coefficients, restriction + ) + } + val toolkitOutArray = timer.measure("ToolkitHistogramDot") { + toolkit.histogramDot( + inputArray, vectorSize, sizeX, sizeY, coefficients, restriction + ) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceHistogramDot") { + referenceHistogramDot(inputArray, vectorSize, sizeX, sizeY, coefficients, restriction) + } + + return validateSame("histogramDot", intrinsicOutArray, referenceOutArray, toolkitOutArray) { + println("histogramDot $vectorSize ($sizeX, $sizeY) $restriction") + logArray("histogramDot coefficients ", coefficients) + logArray("histogramDot in ", inputArray) + logArray("histogramDot reference out", referenceOutArray, 256) + logArray("histogramDot intrinsic out", intrinsicOutArray, 256) + logArray("histogramDot toolkit out", toolkitOutArray, 256) + } + } + + @ExperimentalUnsignedTypes + private fun testOneBitmapHistogramDot( + timer: TimingTracker, + bitmap: Bitmap, + coefficients: FloatArray?, + restriction: Range2d? + ): Boolean { + val intrinsicOutArray = timer.measure("IntrinsicHistogramDot") { + intrinsicHistogramDot(renderscriptContext, bitmap, coefficients, restriction) + } + val toolkitOutArray = timer.measure("ToolkitHistogramDot") { + toolkit.histogramDot(bitmap, coefficients, restriction) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceHistogramDot") { + referenceHistogramDot( + getBitmapBytes(bitmap), + vectorSizeOfBitmap(bitmap), + bitmap.width, + bitmap.height, + coefficients, + restriction + ) + } + + return validateSame( + "HistogramDotBitmap", + intrinsicOutArray, + referenceOutArray, + toolkitOutArray + ) { + println("HistogramDotBitmap $restriction") + logArray("HistogramDotBitmap coefficients ", coefficients) + //logArray("HistogramDotBitmap in ", inputArray) + logArray("HistogramDotBitmap reference out", referenceOutArray, 256) + logArray("HistogramDotBitmap intrinsic out", intrinsicOutArray, 256) + logArray("HistogramDotBitmap toolkit out", toolkitOutArray, 256) + } + } + + @ExperimentalUnsignedTypes + private fun testLut(timer: TimingTracker): Boolean { + return testOneBitmapLut(timer, testImage1, null) and + testOneBitmapLut(timer, testImage1, Range2d(6, 23, 2, 4)) and + commonLayoutsToTry.all { (sizeX, sizeY, restriction) -> + testOneRandomLut(timer, sizeX, sizeY, restriction) + } + } + + @ExperimentalUnsignedTypes + private fun testOneRandomLut( + timer: TimingTracker, + sizeX: Int, + sizeY: Int, + restriction: Range2d? + ): Boolean { + val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, 4) + val newRed = randomByteArray(0x32425, 256, 1, 1) + val newGreen = randomByteArray(0x1F3225, 256, 1, 1) + val newBlue = randomByteArray(0x32D4F27, 256, 1, 1) + val newAlpha = randomByteArray(0x3A20001, 256, 1, 1) + val table = LookupTable() + table.red = newRed + table.blue = newBlue + table.green = newGreen + table.alpha = newAlpha + + val intrinsicOutArray = timer.measure("IntrinsicLUT") { + intrinsicLut( + renderscriptContext, inputArray, sizeX, sizeY, newRed, newGreen, newBlue, newAlpha, + restriction + ) + } + val toolkitOutArray = timer.measure("ToolkitLUT") { + toolkit.lut(inputArray, sizeX, sizeY, table, restriction) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceLUT") { + referenceLut(inputArray, sizeX, sizeY, table, restriction) + } + + return validateSame("LUT", intrinsicOutArray, referenceOutArray, toolkitOutArray) { + println("lut ($sizeX, $sizeY) $restriction") + logArray("LUT red ", newRed, 256) + logArray("LUT green", newGreen, 256) + logArray("LUT blue ", newBlue, 256) + logArray("LUT alpha", newAlpha, 256) + logArray("LUT in ", inputArray) + logArray("LUT reference out", referenceOutArray) + logArray("LUT intrinsic out", intrinsicOutArray) + logArray("LUT toolkit out", toolkitOutArray) + } + } + + @ExperimentalUnsignedTypes + private fun testOneBitmapLut( + timer: TimingTracker, + bitmap: Bitmap, + restriction: Range2d? + ): Boolean { + val newRed = randomByteArray(0x32425, 256, 1, 1) + val newGreen = randomByteArray(0x1F3225, 256, 1, 1) + val newBlue = randomByteArray(0x32D4F27, 256, 1, 1) + val newAlpha = randomByteArray(0x3A20001, 256, 1, 1) + val table = LookupTable() + table.red = newRed + table.blue = newBlue + table.green = newGreen + table.alpha = newAlpha + + val intrinsicOutArray = timer.measure("IntrinsicLUT") { + intrinsicLut( + renderscriptContext, bitmap, newRed, newGreen, newBlue, newAlpha, restriction + ) + } + val toolkitOutBitmap = timer.measure("ToolkitLUT") { + toolkit.lut(bitmap, table, restriction) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceLUT") { + referenceLut( + getBitmapBytes(bitmap), + bitmap.width, + bitmap.height, + table, + restriction + ) + } + + val toolkitOutArray = getBitmapBytes(toolkitOutBitmap) + return validateSame("LutBitmap", intrinsicOutArray, referenceOutArray, toolkitOutArray) { + println("LutBitmap $restriction") + logArray("LutBitmap red ", newRed, 256) + logArray("LutBitmap green", newGreen, 256) + logArray("LutBitmap blue ", newBlue, 256) + logArray("LutBitmap alpha", newAlpha, 256) + //logArray("LutBitmap in ", inputArray, 80) + logArray("LutBitmap reference out", referenceOutArray) + logArray("LutBitmap intrinsic out", intrinsicOutArray) + logArray("LutBitmap toolkit out", toolkitOutArray) + } + } + + @ExperimentalUnsignedTypes + private fun testLut3d(timer: TimingTracker): Boolean { + val cubeSizesToTry = listOf( + Dimension(2, 2, 2), + Dimension(32, 32, 16), + Dimension(256, 256, 256) + ) + return cubeSizesToTry.all { cubeSize -> + val identityCube = identityCube(cubeSize) + val randomCube = randomCube(0x23424, cubeSize) + testOneBitmapLut3d(timer, testImage1, cubeSize, identityCube, 1, null) and + testOneBitmapLut3d(timer, testImage2, cubeSize, randomCube, 3, null) and + testOneBitmapLut3d(timer, testImage2, cubeSize, randomCube, 3, Range2d(6, 23, 2, 4)) and + commonLayoutsToTry.all { (sizeX, sizeY, restriction) -> + testOneRandomLut3d(timer, sizeX, sizeY, cubeSize, identityCube, 1, restriction) && + testOneRandomLut3d( + timer, + sizeX, + sizeY, + cubeSize, + randomCube, + 3, + restriction + ) + } + } + } + + @ExperimentalUnsignedTypes + private fun testOneRandomLut3d( + timer: TimingTracker, + sizeX: Int, + sizeY: Int, + cubeSize: Dimension, + cubeArray: ByteArray, + allowedIntError: Int, restriction: Range2d? + ): Boolean { + val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, 4) + + val intrinsicOutArray = timer.measure("IntrinsicLut3d") { + intrinsicLut3d( + renderscriptContext, inputArray, sizeX, sizeY, cubeArray, cubeSize, restriction + ) + } + val toolkitOutArray = timer.measure("ToolkitLut3d") { + val toolkitCube = Rgba3dArray(cubeArray, cubeSize.sizeX, cubeSize.sizeY, cubeSize.sizeZ) + toolkit.lut3d(inputArray, sizeX, sizeY, toolkitCube, restriction) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceLut3d") { + val cube = Rgba3dArray(cubeArray, cubeSize.sizeX, cubeSize.sizeY, cubeSize.sizeZ) + referenceLut3d(inputArray, sizeX, sizeY, cube, restriction) + } + + return validateSame( + "lut3d", + intrinsicOutArray, + referenceOutArray, + toolkitOutArray, + false, + allowedIntError + ) { + println("lut3d ($sizeX, $sizeY) $restriction") + logArray("lut3d cube", cubeArray, 256) + logArray("lut3d in ", inputArray, 64) + logArray("lut3d reference out", referenceOutArray, 64) + logArray("lut3d intrinsic out", intrinsicOutArray, 64) + logArray("lut3d toolkit out", toolkitOutArray) + } + } + + @ExperimentalUnsignedTypes + private fun testOneBitmapLut3d( + timer: TimingTracker, + bitmap: Bitmap, + cubeSize: Dimension, + cubeArray: ByteArray, + allowedIntError: Int, restriction: Range2d? + ): Boolean { + val intrinsicOutArray = timer.measure("IntrinsicLut3d") { + intrinsicLut3d(renderscriptContext, bitmap, cubeArray, cubeSize, restriction) + } + val toolkitOutBitmap = timer.measure("ToolkitLut3d") { + val toolkitCube = Rgba3dArray(cubeArray, cubeSize.sizeX, cubeSize.sizeY, cubeSize.sizeZ) + toolkit.lut3d(bitmap, toolkitCube, restriction) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceLut3d") { + val cube = Rgba3dArray(cubeArray, cubeSize.sizeX, cubeSize.sizeY, cubeSize.sizeZ) + referenceLut3d(getBitmapBytes(bitmap), bitmap.width, bitmap.height, cube, restriction) + } + + val toolkitOutArray = getBitmapBytes(toolkitOutBitmap) + return validateSame( + "Lut3dBitmap", + intrinsicOutArray, + referenceOutArray, + toolkitOutArray, + false, + allowedIntError + ) { + println("Lut3dBitmap $restriction") + logArray("Lut3dBitmap cube", cubeArray, 256) + //logArray("Lut3dBitmap in ", inputArray, 64) + logArray("Lut3dBitmap reference out", referenceOutArray, 64) + logArray("Lut3dBitmap intrinsic out", intrinsicOutArray, 64) + logArray("Lut3dBitmap toolkit out", toolkitOutArray) + } + } + + @ExperimentalUnsignedTypes + private fun testResize(timer: TimingTracker): Boolean { + val factorsToTry = listOf( + Pair(1f, 1f), + Pair(0.5f, 1f), + Pair(2f, 2f), + Pair(0.5f, 2f), + Pair(2f, 0.5f), + // The RenderScript Intrinsic tests used the above factors. It's tempting to use + // less regular ones like Pair(6.37f, 0.17f) however this creates small offset + // errors between the result provided by the C++ code and the SIMD code. This is + // due to the SIMD code using a scaled integer to increment going from one pixel to the + // next, while the C++ code uses float operations. + ) + val layoutsToTry = listOf( + TestLayout(37, 47, null), + TestLayout(60, 10, null), + TestLayout(6, 4, Range2d(1, 3, 0, 2)), + TestLayout(10, 14, Range2d(2, 3, 3, 7)), + ) + + return factorsToTry.all { (scaleX, scaleY) -> + // Do one resize that's greater than 4x, as that's used in the code but don't do it + // for everything, as some images will get very large + testOneRandomResize(timer, 1, 25, 30, 6f, 6f, null) and + testOneBitmapResize(timer, testImage1, scaleX, scaleY, null) and + testOneBitmapResize(timer, testImage1, scaleX, scaleY, Range2d(6, 23, 2, 4)) and + layoutsToTry.all { (sizeX, sizeY, restriction) -> + (1..4).all { vectorSize -> + testOneRandomResize( + timer, + vectorSize, + sizeX, + sizeY, + scaleX, + scaleY, + restriction + ) + } + } + } + } + + @ExperimentalUnsignedTypes + private fun testOneRandomResize( + timer: TimingTracker, + vectorSize: Int, + inSizeX: Int, + inSizeY: Int, + scaleX: Float, + scaleY: Float, + restriction: Range2d? + ): Boolean { + val inputArray = randomByteArray(0x50521f0, inSizeX, inSizeY, paddedSize(vectorSize)) + val outSizeX = (inSizeX * scaleX).toInt() + val outSizeY = (inSizeY * scaleY).toInt() + + val intrinsicOutArray = timer.measure("IntrinsicResize") { + intrinsicResize( + renderscriptContext, inputArray, vectorSize, inSizeX, inSizeY, outSizeX, outSizeY, + restriction + ) + } + val toolkitOutArray = timer.measure("ToolkitResize") { + toolkit.resize( + inputArray, vectorSize, inSizeX, inSizeY, outSizeX, outSizeY, restriction + ) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceResize") { + referenceResize( + inputArray, vectorSize, inSizeX, inSizeY, outSizeX, outSizeY, restriction + ) + } + + return validateSame("resize", intrinsicOutArray, referenceOutArray, toolkitOutArray) { + println("resize $vectorSize ($inSizeX, $inSizeY) by ($scaleX, $scaleY) to ($outSizeX, $outSizeY), $restriction") + logArray("resize in ", inputArray) + logArray("resize reference out", referenceOutArray) + logArray("resize intrinsic out", intrinsicOutArray) + logArray("resize toolkit out", toolkitOutArray) + } + } + + @ExperimentalUnsignedTypes + private fun testOneBitmapResize( + timer: TimingTracker, + bitmap: Bitmap, + scaleX: Float, + scaleY: Float, + restriction: Range2d? + ): Boolean { + // println("Doing resize $inSizeX x $inSizeY x $vectorSize, $scaleX x $scaleY, $restriction") + val outSizeX = (bitmap.width * scaleX).toInt() + val outSizeY = (bitmap.height * scaleY).toInt() + + val intrinsicOutArray = timer.measure("IntrinsicResize") { + intrinsicResize(renderscriptContext, bitmap, outSizeX, outSizeY, restriction) + } + val toolkitOutBitmap = timer.measure("ToolkitResize") { + toolkit.resize(bitmap, outSizeX, outSizeY, restriction) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceResize") { + referenceResize( + getBitmapBytes(bitmap), + vectorSizeOfBitmap(bitmap), + bitmap.width, + bitmap.height, + outSizeX, + outSizeY, + restriction + ) + } + + val toolkitOutArray = getBitmapBytes(toolkitOutBitmap) + return validateSame("ResizeBitmap", intrinsicOutArray, referenceOutArray, toolkitOutArray) { + println("ResizeBitmap by ($scaleX, $scaleY) to ($outSizeX, $outSizeY), $restriction") + //logArray("ResizeBitmap in ", inputArray, 100) + logArray("ResizeBitmap reference out", referenceOutArray) + logArray("ResizeBitmap intrinsic out", intrinsicOutArray) + logArray("ResizeBitmap toolkit out", toolkitOutArray) + } + } + + @ExperimentalUnsignedTypes + private fun testYuvToRgb(timer: TimingTracker): Boolean { + val layoutsToTry = listOf( + // Don't try sizeX with odd values. That's not allowed by definition of some + // of the video formats. + TestLayout(10, 14, null), + TestLayout(64, 40, null), + TestLayout(96, 94, null), + ) + return layoutsToTry.all { (sizeX, sizeY, _) -> + YuvFormat.values().all { format -> + testOneRandomYuvToRgb(timer, sizeX, sizeY, format) and + testOneRandomYuvToRgbBitmap(timer, sizeX, sizeY, format) + } + } + } + + @ExperimentalUnsignedTypes + private fun testOneRandomYuvToRgb( + timer: TimingTracker, + sizeX: Int, + sizeY: Int, + format: YuvFormat + ): Boolean { + // The RenderScript Intrinsic does not handle this combination correctly. + if (format == YuvFormat.YV12 && sizeX % 32 != 0) { + return true + } + val inputArray = randomYuvArray(0x50521f0, sizeX, sizeY, format) + + val intrinsicOutArray = timer.measure("IntrinsicYuvToRgb") { + intrinsicYuvToRgb(renderscriptContext, inputArray, sizeX, sizeY, format) + } + val toolkitOutArray = timer.measure("ToolkitYuvToRgb") { + toolkit.yuvToRgb(inputArray, sizeX, sizeY, format) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceYuvToRgb") { + referenceYuvToRgb(inputArray, sizeX, sizeY, format) + } + + return validateSame("yuvToRgb", intrinsicOutArray, referenceOutArray, toolkitOutArray) { + println("yuvToRgb ($sizeX, $sizeY) $format") + logArray("yuvToRgb in ", inputArray) + logArray("yuvToRgb reference out", referenceOutArray) + logArray("yuvToRgb intrinsic out", intrinsicOutArray) + logArray("yuvToRgb toolkit out", toolkitOutArray) + } + } + + @ExperimentalUnsignedTypes + private fun testOneRandomYuvToRgbBitmap( + timer: TimingTracker, + sizeX: Int, + sizeY: Int, + format: YuvFormat + ): Boolean { + // The RenderScript Intrinsic does not handle this combination correctly. + if (format == YuvFormat.YV12 && sizeX % 32 != 0) { + return true + } + val inputArray = randomYuvArray(0x50521f0, sizeX, sizeY, format) + + val intrinsicOutArray = timer.measure("IntrinsicYuvToRgb") { + intrinsicYuvToRgb(renderscriptContext, inputArray, sizeX, sizeY, format) + } + val toolkitOutBitmap = timer.measure("ToolkitYuvToRgb") { + toolkit.yuvToRgbBitmap(inputArray, sizeX, sizeY, format) + } + if (!validate) return true + + val referenceOutArray = timer.measure("ReferenceYuvToRgb") { + referenceYuvToRgb(inputArray, sizeX, sizeY, format) + } + + val toolkitOutArray = getBitmapBytes(toolkitOutBitmap) + return validateSame("yuvToRgb", intrinsicOutArray, referenceOutArray, toolkitOutArray) { + println("yuvToRgb ($sizeX, $sizeY) $format") + logArray("yuvToRgb in ", inputArray) + logArray("yuvToRgb reference out", referenceOutArray) + logArray("yuvToRgb intrinsic out", intrinsicOutArray) + logArray("yuvToRgb toolkit out", toolkitOutArray) + } + } + + /** + * Verifies that the arrays returned by the Intrinsic, the reference code, and the Toolkit + * are all within a margin of error. + * + * RenderScript Intrinsic test (rc/android/cts/rscpp/RSCppTest.java) used 3 for ints. + * For floats, rc/android/cts/rscpp/verify.rscript uses 0.0001f. + */ + @ExperimentalUnsignedTypes + private fun validateSame( + task: String, + intrinsic: ByteArray, + reference: ByteArray, + toolkit: ByteArray, + skipFourth: Boolean = false, + allowedIntDelta: Int = 3, + errorLogging: () -> Unit + ): Boolean { + val success = validateAgainstReference( + task, reference, "Intrinsic", intrinsic, skipFourth, allowedIntDelta + ) and validateAgainstReference( + task, reference, "Toolkit", toolkit, skipFourth, allowedIntDelta + ) + if (!success) { + println("$task FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!") + errorLogging() + } + return success + } + + private fun validateSame( + task: String, + intrinsic: IntArray, + reference: IntArray, + toolkit: IntArray, + allowedIntDelta: Int = 3, + errorLogging: () -> Unit + ): Boolean { + val success = validateAgainstReference( + task, reference, "Intrinsic", intrinsic, allowedIntDelta + ) and validateAgainstReference( + task, reference, "Toolkit", toolkit, allowedIntDelta + ) + if (!success) { + println("$task FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!") + errorLogging() + } + return success + } + + @ExperimentalUnsignedTypes + private fun validateAgainstReference( + task: String, + in1: ByteArray, + name2: String, + in2: ByteArray, + skipFourth: Boolean, + allowedIntDelta: Int + ): Boolean { + if (in1.size != in2.size) { + println("$task. Sizes don't match: Reference ${in1.size}, $name2 ${in2.size}") + return false + } + var same = true + val maxDetails = 80 + val diffs = CharArray(min(in1.size, maxDetails)) {'.'} + for (i in in1.indices) { + if (skipFourth && i % 4 == 3) { + continue + } + val delta = abs(in1[i].toUByte().toInt() - in2[i].toUByte().toInt()) + if (delta > allowedIntDelta) { + if (same) { + println( + "$task. At $i, Reference is ${in1[i].toUByte()}, $name2 is ${in2[i].toUByte()}" + ) + } + if (i < maxDetails) diffs[i] = 'X' + same = false + } + } + if (!same) { + for (i in 0 until (min(in1.size, maxDetails) / 4)) print("%-3d|".format(i)) + println() + println(diffs) + } + return same + } + + private fun validateAgainstReference( + task: String, + in1: IntArray, + name2: String, + in2: IntArray, + allowedIntDelta: Int + ): Boolean { + if (in1.size != in2.size) { + println("$task. Sizes don't match: Reference ${in1.size}, $name2 ${in2.size}") + return false + } + for (i in in1.indices) { + val delta = abs(in1[i] - in2[i]) + if (delta > allowedIntDelta) { + println("$task. At $i, Reference is ${in1[i]}, $name2 is ${in2[i]}") + return false + } + } + return true + } +} diff --git a/toolkit/test/Android.bp b/toolkit/test/Android.bp new file mode 100644 index 00000000..abeace14 --- /dev/null +++ b/toolkit/test/Android.bp @@ -0,0 +1,35 @@ +// +// Copyright (C) 2021 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + default_applicable_licenses: ["Android-Apache-2.0"], +} + +android_app { + name: "RenderScriptToolkitTest", +// srcs: ["src/**/*.kt"], + sdk_version: "current", + resource_dirs: ["res"], +// jni_libs: [ "librenderscripttoolkit"], +// certificate: "platform", +// //product_specific: true, +// //optimize: { +// // proguard_flags_files: ["proguard.flags"], +// //}, +// shared_libs: ["librenderscripttoolkit", +// +// ] +} diff --git a/tests/lldb/java/Reduction/AndroidManifest.xml b/toolkit/test/AndroidManifest.xml index 61177d9e..f7097907 100644 --- a/tests/lldb/java/Reduction/AndroidManifest.xml +++ b/toolkit/test/AndroidManifest.xml @@ -1,11 +1,15 @@ <?xml version="1.0" encoding="utf-8"?> <manifest xmlns:android="http://schemas.android.com/apk/res/android" - package="com.android.rs.lldbreductiontest"> - <uses-sdk android:minSdkVersion="21" /> - <application android:label="Reduction"> - <activity android:name="MainActivity" android:screenOrientation="portrait"> + package="com.example.testapp"> + + <application + android:allowBackup="true" + android:label="Toolkit Test" + android:supportsRtl="true"> + <activity android:name=".MainActivity"> <intent-filter> <action android:name="android.intent.action.MAIN" /> + <category android:name="android.intent.category.LAUNCHER" /> </intent-filter> </activity> diff --git a/toolkit/test/BufferUtils.kt b/toolkit/test/BufferUtils.kt new file mode 100644 index 00000000..f2197b06 --- /dev/null +++ b/toolkit/test/BufferUtils.kt @@ -0,0 +1,508 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.graphics.Bitmap +import android.graphics.Canvas +import android.renderscript.Element +import android.renderscript.RenderScript +import android.renderscript.toolkit.Range2d +import android.renderscript.toolkit.Rgba3dArray +import android.renderscript.toolkit.YuvFormat +import java.nio.ByteBuffer +import java.util.Random +import kotlin.math.floor +import kotlin.math.max +import kotlin.math.min + +/** + * A vector of 4 integers. + */ +class Int4( + var x: Int = 0, + var y: Int = 0, + var z: Int = 0, + var w: Int = 0 +) { + operator fun plus(other: Int4) = Int4(x + other.x, y + other.y, z + other.z, w + other.w) + operator fun plus(n: Int) = Int4(x + n, y + n, z + n, w + n) + + operator fun minus(other: Int4) = Int4(x - other.x, y - other.y, z - other.z, w - other.w) + operator fun minus(n: Int) = Int4(x - n, y - n, z - n, w - n) + + operator fun times(other: Int4) = Int4(x * other.x, y * other.y, z * other.z, w * other.w) + operator fun times(n: Int) = Int4(x * n, y * n, z * n, w * n) + + fun toFloat4() = Float4(x.toFloat(), y.toFloat(), z.toFloat(), w.toFloat()) +} + +fun min(a: Int4, b: Int4) = Int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)) + +/** + * A vector of 4 floats. + */ +data class Float4( + var x: Float = 0f, + var y: Float = 0f, + var z: Float = 0f, + var w: Float = 0f +) { + operator fun plus(other: Float4) = Float4(x + other.x, y + other.y, z + other.z, w + other.w) + operator fun plus(f: Float) = Float4(x + f, y + f, z + f, w + f) + + operator fun minus(other: Float4) = Float4(x - other.x, y - other.y, z - other.z, w - other.w) + operator fun minus(f: Float) = Float4(x - f, y - f, z - f, w - f) + + operator fun times(other: Float4) = Float4(x * other.x, y * other.y, z * other.z, w * other.w) + operator fun times(f: Float) = Float4(x * f, y * f, z * f, w * f) + + operator fun div(other: Float4) = Float4(x / other.x, y / other.y, z / other.z, w / other.w) + operator fun div(f: Float) = Float4(x / f, y / f, z / f, w / f) + + fun intFloor() = Int4(floor(x).toInt(), floor(y).toInt(), floor(z).toInt(), floor(w).toInt()) +} + +/** + * Convert a UByteArray to a Float4 vector + */ +@ExperimentalUnsignedTypes +fun UByteArray.toFloat4(): Float4 { + require(size == 4) + return Float4(this[0].toFloat(), this[1].toFloat(), this[2].toFloat(), this[3].toFloat()) +} + +/** + * Convert a ByteArray to a Float4 vector + */ +@ExperimentalUnsignedTypes +fun ByteArray.toFloat4(): Float4 { + require(size == 4) + return Float4( + this[0].toUByte().toFloat(), + this[1].toUByte().toFloat(), + this[2].toUByte().toFloat(), + this[3].toUByte().toFloat() + ) +} + +data class Dimension(val sizeX: Int, val sizeY: Int, val sizeZ: Int) + +/** + * An RGBA value represented by 4 Int. + * + * Note that the arithmetical operations consider a 0..255 value the equivalent of 0f..1f. + * After adding or subtracting, the value is clamped. After multiplying, the value is rescaled to + * stay in the 0..255 range. This is useful for the Blend operation. + */ +@ExperimentalUnsignedTypes +data class Rgba( + var r: Int = 0, + var g: Int = 0, + var b: Int = 0, + var a: Int = 0 +) { + operator fun plus(other: Rgba) = + Rgba(r + other.r, g + other.g, b + other.b, a + other.a).clampToUByteRange() + + operator fun minus(other: Rgba) = + Rgba(r - other.r, g - other.g, b - other.b, a - other.a).clampToUByteRange() + + operator fun times(other: Rgba) = Rgba(r * other.r, g * other.g, b * other.b, a * other.a) shr 8 + operator fun times(scalar: Int) = Rgba(r * scalar, g * scalar, b * scalar, a * scalar) shr 8 + + infix fun xor(other: Rgba) = Rgba(r xor other.r, g xor other.g, b xor other.b, a xor other.a) + + infix fun shr(other: Int) = Rgba(r shr other, g shr other, b shr other, a shr other) + + private fun clampToUByteRange() = Rgba( + r.clampToUByteRange(), + g.clampToUByteRange(), + b.clampToUByteRange(), + a.clampToUByteRange() + ) +} + +/** + * A 2D array of UByte vectors, stored in row-major format. + * + * Arrays of vectorSize == 3 are padded to 4. + */ +@ExperimentalUnsignedTypes +class Vector2dArray( + val values: UByteArray, + val vectorSize: Int, + val sizeX: Int, + val sizeY: Int +) { + /** + * If true, index access that would try to get a value that's out of bounds will simply + * return the border value instead. E.g. for [3, -3] would return the value for [3, 0], + * assuming that the sizeX > 3. + */ + var clipReadToRange: Boolean = false + + operator fun get(x: Int, y: Int): UByteArray { + var fixedX = x + var fixedY = y + if (clipReadToRange) { + fixedX = min(max(x, 0), sizeX - 1) + fixedY = min(max(y, 0), sizeY - 1) + } else { + require(x in 0 until sizeX && y in 0 until sizeY) { "Out of bounds" } + } + val start = indexOfVector(fixedX, fixedY) + return UByteArray(paddedSize(vectorSize)) { values[start + it] } + } + + operator fun set(x: Int, y: Int, value: UByteArray) { + require(value.size == paddedSize(vectorSize)) { "Not the expected vector size" } + require(x in 0 until sizeX && y in 0 until sizeY) { "Out of bounds" } + val start = indexOfVector(x, y) + for (i in value.indices) { + values[start + i] = value[i] + } + } + + private fun indexOfVector(x: Int, y: Int) = ((y * sizeX) + x) * paddedSize(vectorSize) + + fun createSameSized() = Vector2dArray(UByteArray(values.size), vectorSize, sizeX, sizeY) + + fun forEach(restriction: Range2d?, work: (Int, Int) -> (Unit)) { + forEachCell(sizeX, sizeY, restriction, work) + } +} + +/** + * A 2D array of float vectors, stored in row-major format. + * + * Arrays of vectorSize == 3 are padded to 4. + */ +class FloatVector2dArray( + val values: FloatArray, + val vectorSize: Int, + val sizeX: Int, + val sizeY: Int +) { + /** + * If true, index access that would try to get a value that's out of bounds will simply + * return the border value instead. E.g. for [3, -3] would return the value for [3, 0], + * assuming that the sizeX > 3. + */ + var clipAccessToRange: Boolean = false + + operator fun get(x: Int, y: Int): FloatArray { + var fixedX = x + var fixedY = y + if (clipAccessToRange) { + fixedX = min(max(x, 0), sizeX - 1) + fixedY = min(max(y, 0), sizeY - 1) + } else { + require(x in 0 until sizeX && y in 0 until sizeY) { "Out of bounds" } + } + val start = indexOfVector(fixedX, fixedY) + return FloatArray(vectorSize) { values[start + it] } + } + + operator fun set(x: Int, y: Int, value: FloatArray) { + require(x in 0 until sizeX && y in 0 until sizeY) { "Out of bounds" } + val start = indexOfVector(x, y) + for (i in value.indices) { + values[start + i] = value[i] + } + } + + private fun indexOfVector(x: Int, y: Int) = ((y * sizeX) + x) * paddedSize(vectorSize) + + fun createSameSized() = FloatVector2dArray(FloatArray(values.size), vectorSize, sizeX, sizeY) + + fun forEach(restriction: Range2d?, work: (Int, Int) -> (Unit)) { + forEachCell(sizeX, sizeY, restriction, work) + } +} + +/** + * A 2D array of RGBA data. + */ +@ExperimentalUnsignedTypes +class Rgba2dArray( + private val values: ByteArray, + val sizeX: Int, + val sizeY: Int +) { + operator fun get(x: Int, y: Int): Rgba { + val i = indexOfVector(x, y) + return Rgba( + values[i].toUByte().toInt(), + values[i + 1].toUByte().toInt(), + values[i + 2].toUByte().toInt(), + values[i + 3].toUByte().toInt() + ) + } + + operator fun set(x: Int, y: Int, value: Rgba) { + // Verify that x, y, z, w are in the 0..255 range + require(value.r in 0..255) + require(value.g in 0..255) + require(value.b in 0..255) + require(value.a in 0..255) + val i = indexOfVector(x, y) + values[i] = value.r.toUByte().toByte() + values[i + 1] = value.g.toUByte().toByte() + values[i + 2] = value.b.toUByte().toByte() + values[i + 3] = value.a.toUByte().toByte() + } + + private fun indexOfVector(x: Int, y: Int) = ((y * sizeX) + x) * 4 + + fun forEachCell(restriction: Range2d?, work: (Int, Int) -> (Unit)) = + forEachCell(sizeX, sizeY, restriction, work) +} + +/** + * Return a value that's between start and end, with the fraction indicating how far along. + */ +fun mix(start: Float, end: Float, fraction: Float) = start + (end - start) * fraction + +fun mix(a: Float4, b: Float4, fraction: Float) = Float4( + mix(a.x, b.x, fraction), + mix(a.y, b.y, fraction), + mix(a.z, b.z, fraction), + mix(a.w, b.w, fraction) +) + +/** + * For vectors of size 3, the original RenderScript has them occupy the same space as a size 4. + * While RenderScript had a method to avoid this padding, it did not apply to Intrinsics. + * + * To preserve compatibility, the Toolkit doing the same. + */ +fun paddedSize(vectorSize: Int) = if (vectorSize == 3) 4 else vectorSize + +/** + * Create a ByteArray of the specified size filled with random data. + */ +fun randomByteArray(seed: Long, sizeX: Int, sizeY: Int, elementSize: Int): ByteArray { + val r = Random(seed) + return ByteArray(sizeX * sizeY * elementSize) { (r.nextInt(255) - 128).toByte() } +} + +/** + * Create a FloatArray of the specified size filled with random data. + * + * By default, the random data is between 0f and 1f. The factor can be used to scale that. + */ +fun randomFloatArray( + seed: Long, + sizeX: Int, + sizeY: Int, + elementSize: Int, + factor: Float = 1f +): FloatArray { + val r = Random(seed) + return FloatArray(sizeX * sizeY * elementSize) { r.nextFloat() * factor } +} + +/** + * Create a cube of the specified size filled with random data. + */ +fun randomCube(seed: Long, cubeSize: Dimension): ByteArray { + val r = Random(seed) + return ByteArray(cubeSize.sizeX * cubeSize.sizeY * cubeSize.sizeZ * 4) { + (r.nextInt(255) - 128).toByte() + } +} + +/** + * Create the identity cube, i.e. one that if used in Lut3d, the output is the same as the input + */ +@ExperimentalUnsignedTypes +fun identityCube(cubeSize: Dimension): ByteArray { + val data = ByteArray(cubeSize.sizeX * cubeSize.sizeY * cubeSize.sizeZ * 4) + val cube = Rgba3dArray(data, cubeSize.sizeX, cubeSize.sizeY, cubeSize.sizeZ) + for (z in 0 until cubeSize.sizeZ) { + for (y in 0 until cubeSize.sizeY) { + for (x in 0 until cubeSize.sizeX) { + cube[x, y, z] = + byteArrayOf( + (x * 255 / (cubeSize.sizeX - 1)).toByte(), + (y * 255 / (cubeSize.sizeY - 1)).toByte(), + (z * 255 / (cubeSize.sizeZ - 1)).toByte(), + (255).toByte() + ) + } + } + } + return data +} + +fun randomYuvArray(seed: Long, sizeX: Int, sizeY: Int, format: YuvFormat): ByteArray { + // YUV formats are not well defined for odd dimensions + require(sizeX % 2 == 0 && sizeY % 2 == 0) + val halfSizeX = sizeX / 2 + val halfSizeY = sizeY / 2 + var totalSize = 0 + when (format) { + YuvFormat.YV12 -> { + val strideX = roundUpTo16(sizeX) + totalSize = strideX * sizeY + roundUpTo16(strideX / 2) * halfSizeY * 2 + } + YuvFormat.NV21 -> totalSize = sizeX * sizeY + halfSizeX * halfSizeY * 2 + else -> require(false) { "Unknown YUV format $format" } + } + + return randomByteArray(seed, totalSize, 1, 1) +} + +/** + * Converts a float to a byte, clamping to make it fit the limited range. + */ +@ExperimentalUnsignedTypes +fun Float.clampToUByte(): UByte = min(255, max(0, (this + 0.5f).toInt())).toUByte() + +/** + * Converts a FloatArray to UByteArray, clamping. + */ +@ExperimentalUnsignedTypes +fun FloatArray.clampToUByte() = UByteArray(size) { this[it].clampToUByte() } + +/** + * Limits an Int to what can fit in a UByte. + */ +fun Int.clampToUByteRange(): Int = min(255, max(0, this)) + +/** + * Converts an Int to a UByte, clamping. + */ +@ExperimentalUnsignedTypes +fun Int.clampToUByte(): UByte = this.clampToUByteRange().toUByte() + +/** + * Converts a float (0f .. 1f) to a byte (0 .. 255) + */ +@ExperimentalUnsignedTypes +fun unitFloatClampedToUByte(num: Float): UByte = (num * 255f).clampToUByte() + +/** + * Convert a byte (0 .. 255) to a float (0f .. 1f) + */ +@ExperimentalUnsignedTypes +fun byteToUnitFloat(num: UByte) = num.toFloat() * 0.003921569f + +@ExperimentalUnsignedTypes +fun UByteArray.toFloatArray() = FloatArray(size) { this[it].toFloat() } + +/** + * For each cell that's in the 2D array defined by sizeX and sizeY, and clipped down by the + * restriction, invoke the work function. + */ +fun forEachCell(sizeX: Int, sizeY: Int, restriction: Range2d?, work: (Int, Int) -> (Unit)) { + val startX = restriction?.startX ?: 0 + val startY = restriction?.startY ?: 0 + val endX = restriction?.endX ?: sizeX + val endY = restriction?.endY ?: sizeY + for (y in startY until endY) { + for (x in startX until endX) { + work(x, y) + } + } +} + +operator fun FloatArray.times(other: FloatArray) = FloatArray(size) { this[it] * other[it] } +operator fun FloatArray.times(other: Float) = FloatArray(size) { this[it] * other } +operator fun FloatArray.plus(other: FloatArray) = FloatArray(size) { this[it] + other[it] } +operator fun FloatArray.minus(other: FloatArray) = FloatArray(size) { this[it] - other[it] } + +fun renderScriptVectorElementForU8(rs: RenderScript?, vectorSize: Int): Element { + when (vectorSize) { + 1 -> return Element.U8(rs) + 2 -> return Element.U8_2(rs) + 3 -> return Element.U8_3(rs) + 4 -> return Element.U8_4(rs) + } + throw java.lang.IllegalArgumentException("RenderScriptToolkit tests. Only vectors of size 1-4 are supported. $vectorSize provided.") +} + +fun renderScriptVectorElementForI32(rs: RenderScript?, vectorSize: Int): Element { + when (vectorSize) { + 1 -> return Element.I32(rs) + 2 -> return Element.I32_2(rs) + 3 -> return Element.I32_3(rs) + 4 -> return Element.I32_4(rs) + } + throw java.lang.IllegalArgumentException("RenderScriptToolkit tests. Only vectors of size 1-4 are supported. $vectorSize provided.") +} + +/* When we'll handle floats +fun renderScriptVectorElementForF32(rs: RenderScript?, vectorSize: Int): Element { + when (vectorSize) { + 1 -> return Element.F32(rs) + 2 -> return Element.F32_2(rs) + 3 -> return Element.F32_3(rs) + 4 -> return Element.F32_4(rs) + } + throw java.lang.IllegalArgumentException("RenderScriptToolkit tests. Only vectors of size 1-4 are supported. $vectorSize provided.") +}*/ + +fun renderScriptElementForBitmap(context: RenderScript, bitmap: Bitmap): Element { + return when (val config = bitmap.config) { + Bitmap.Config.ALPHA_8 -> Element.A_8(context) + Bitmap.Config.ARGB_8888 -> Element.RGBA_8888(context) + else -> throw IllegalArgumentException("RenderScript Toolkit can't support bitmaps with config $config.") + } +} + +fun getBitmapBytes(bitmap: Bitmap): ByteArray { + val buffer: ByteBuffer = ByteBuffer.allocate(bitmap.byteCount) + bitmap.copyPixelsToBuffer(buffer) + return buffer.array() +} + +fun vectorSizeOfBitmap(bitmap: Bitmap): Int { + return when (val config = bitmap.config) { + Bitmap.Config.ALPHA_8 -> 1 + Bitmap.Config.ARGB_8888 -> 4 + else -> throw IllegalArgumentException("RenderScript Toolkit can't support bitmaps with config $config.") + } +} + +fun duplicateBitmap(original: Bitmap): Bitmap { + val copy = Bitmap.createBitmap(original.width, original.height, original.config) + val canvas = Canvas(copy) + canvas.drawBitmap(original, 0f, 0f, null) + return copy +} + +@ExperimentalUnsignedTypes +fun logArray(prefix: String, array: ByteArray, number: Int = 20) { + val values = array.joinToString(limit = number) { it.toUByte().toString() } + println("$prefix[${array.size}] $values}\n") +} + +fun logArray(prefix: String, array: IntArray, number: Int = 20) { + val values = array.joinToString(limit = number) + println("$prefix[${array.size}] $values}\n") +} + +fun logArray(prefix: String, array: FloatArray?, number: Int = 20) { + val values = array?.joinToString(limit = number) { "%.2f".format(it) } ?: "(null)" + println("$prefix[${array?.size}] $values}\n") +} + +fun roundUpTo16(value: Int): Int { + require(value >= 0) + return (value + 15) and 15.inv() +} diff --git a/toolkit/test/IntrinsicBlend.kt b/toolkit/test/IntrinsicBlend.kt new file mode 100644 index 00000000..873cb158 --- /dev/null +++ b/toolkit/test/IntrinsicBlend.kt @@ -0,0 +1,188 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.graphics.Bitmap +import android.renderscript.Allocation +import android.renderscript.Element +import android.renderscript.RenderScript +import android.renderscript.Script +import android.renderscript.ScriptIntrinsicBlend +import android.renderscript.Type +import android.renderscript.toolkit.BlendingMode +import android.renderscript.toolkit.Range2d + +/** + * Does a Blend operation using the RenderScript Intrinsics. + */ +fun intrinsicBlend( + context: RenderScript, + mode: BlendingMode, + sourceArray: ByteArray, + destArray: ByteArray, + sizeX: Int, + sizeY: Int, + restriction: Range2d? +) { + val scriptBlend = ScriptIntrinsicBlend.create(context, Element.U8_4(context)) + val builder = Type.Builder(context, Element.U8_4(context)) + builder.setX(sizeX) + builder.setY(sizeY) + val arrayType = builder.create() + val sourceAllocation = Allocation.createTyped(context, arrayType) + val destAllocation = Allocation.createTyped(context, arrayType) + sourceAllocation.copyFrom(sourceArray) + destAllocation.copyFrom(destArray) + + callBlendForEach(scriptBlend, sourceAllocation, destAllocation, mode, restriction) + destAllocation.copyTo(destArray) + + sourceAllocation.destroy() + destAllocation.destroy() + arrayType.destroy() + scriptBlend.destroy() +} + +fun intrinsicBlend( + context: RenderScript, + mode: BlendingMode, + sourceBitmap: Bitmap, + destBitmap: Bitmap, + restriction: Range2d? +) { + val scriptBlend = ScriptIntrinsicBlend.create(context, Element.U8_4(context)) + val sourceAllocation = Allocation.createFromBitmap(context, sourceBitmap) + val destAllocation = Allocation.createFromBitmap(context, destBitmap) + sourceAllocation.copyFrom(sourceBitmap) + destAllocation.copyFrom(destBitmap) + + callBlendForEach(scriptBlend, sourceAllocation, destAllocation, mode, restriction) + destAllocation.copyTo(destBitmap) + + sourceAllocation.destroy() + destAllocation.destroy() + scriptBlend.destroy() +} + +private fun callBlendForEach( + scriptBlend: ScriptIntrinsicBlend, + sourceAllocation: Allocation, + destAllocation: Allocation, + mode: BlendingMode, + restriction: Range2d? +) { + if (restriction != null) { + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + when (mode) { + BlendingMode.CLEAR -> scriptBlend.forEachClear( + sourceAllocation, destAllocation, options + ) + BlendingMode.SRC -> scriptBlend.forEachSrc( + sourceAllocation, destAllocation, options + ) + BlendingMode.DST -> scriptBlend.forEachDst( + sourceAllocation, destAllocation, options + ) + BlendingMode.SRC_OVER -> scriptBlend.forEachSrcOver( + sourceAllocation, destAllocation, options + ) + BlendingMode.DST_OVER -> scriptBlend.forEachDstOver( + sourceAllocation, destAllocation, options + ) + BlendingMode.SRC_IN -> scriptBlend.forEachSrcIn( + sourceAllocation, destAllocation, options + ) + BlendingMode.DST_IN -> scriptBlend.forEachDstIn( + sourceAllocation, destAllocation, options + ) + BlendingMode.SRC_OUT -> scriptBlend.forEachSrcOut( + sourceAllocation, destAllocation, options + ) + BlendingMode.DST_OUT -> scriptBlend.forEachDstOut( + sourceAllocation, destAllocation, options + ) + BlendingMode.SRC_ATOP -> scriptBlend.forEachSrcAtop( + sourceAllocation, destAllocation, options + ) + BlendingMode.DST_ATOP -> scriptBlend.forEachDstAtop( + sourceAllocation, destAllocation, options + ) + BlendingMode.XOR -> scriptBlend.forEachXor( + sourceAllocation, destAllocation, options + ) + BlendingMode.MULTIPLY -> scriptBlend.forEachMultiply( + sourceAllocation, destAllocation, options + ) + BlendingMode.ADD -> scriptBlend.forEachAdd( + sourceAllocation, destAllocation, options + ) + BlendingMode.SUBTRACT -> scriptBlend.forEachSubtract( + sourceAllocation, destAllocation, options + ) + } + } else { + when (mode) { + BlendingMode.CLEAR -> scriptBlend.forEachClear( + sourceAllocation, destAllocation + ) + BlendingMode.SRC -> scriptBlend.forEachSrc( + sourceAllocation, destAllocation + ) + BlendingMode.DST -> scriptBlend.forEachDst( + sourceAllocation, destAllocation + ) + BlendingMode.SRC_OVER -> scriptBlend.forEachSrcOver( + sourceAllocation, destAllocation + ) + BlendingMode.DST_OVER -> scriptBlend.forEachDstOver( + sourceAllocation, destAllocation + ) + BlendingMode.SRC_IN -> scriptBlend.forEachSrcIn( + sourceAllocation, destAllocation + ) + BlendingMode.DST_IN -> scriptBlend.forEachDstIn( + sourceAllocation, destAllocation + ) + BlendingMode.SRC_OUT -> scriptBlend.forEachSrcOut( + sourceAllocation, destAllocation + ) + BlendingMode.DST_OUT -> scriptBlend.forEachDstOut( + sourceAllocation, destAllocation + ) + BlendingMode.SRC_ATOP -> scriptBlend.forEachSrcAtop( + sourceAllocation, destAllocation + ) + BlendingMode.DST_ATOP -> scriptBlend.forEachDstAtop( + sourceAllocation, destAllocation + ) + BlendingMode.XOR -> scriptBlend.forEachXor( + sourceAllocation, destAllocation + ) + BlendingMode.MULTIPLY -> scriptBlend.forEachMultiply( + sourceAllocation, destAllocation + ) + BlendingMode.ADD -> scriptBlend.forEachAdd( + sourceAllocation, destAllocation + ) + BlendingMode.SUBTRACT -> scriptBlend.forEachSubtract( + sourceAllocation, destAllocation + ) + } + } +} diff --git a/toolkit/test/IntrinsicBlur.kt b/toolkit/test/IntrinsicBlur.kt new file mode 100644 index 00000000..be09094a --- /dev/null +++ b/toolkit/test/IntrinsicBlur.kt @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.graphics.Bitmap +import android.renderscript.Allocation +import android.renderscript.Element +import android.renderscript.RenderScript +import android.renderscript.Script +import android.renderscript.ScriptIntrinsicBlur +import android.renderscript.Type +import android.renderscript.toolkit.Range2d + +/** + * Does a Blur operation using the RenderScript Intrinsics. + */ +fun intrinsicBlur( + context: RenderScript, + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + radius: Int, + restriction: Range2d? +): ByteArray { + val scriptBlur = ScriptIntrinsicBlur.create( + context, + if (vectorSize == 4) Element.RGBA_8888(context) else Element.U8(context) + ) + val builder = + Type.Builder( + context, + renderScriptVectorElementForU8(context, vectorSize) + ) + builder.setX(sizeX) + builder.setY(sizeY) + val arrayType = builder.create() + val inputAllocation = Allocation.createTyped(context, arrayType) + inputAllocation.copyFrom(inputArray) + val outAllocation = Allocation.createTyped(context, arrayType) + + val intrinsicOutArray = ByteArray(sizeX * sizeY * vectorSize) + scriptBlur.setRadius(radius.toFloat()) + scriptBlur.setInput(inputAllocation) + + if (restriction != null) { + outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptBlur.forEach(outAllocation, options) + } else { + scriptBlur.forEach(outAllocation) + } + outAllocation.copyTo(intrinsicOutArray) + inputAllocation.destroy() + outAllocation.destroy() + arrayType.destroy() + scriptBlur.destroy() + return intrinsicOutArray +} + +fun intrinsicBlur( + context: RenderScript, + bitmap: Bitmap, + radius: Int, + restriction: Range2d? +): ByteArray { + val baseElement = renderScriptElementForBitmap(context, bitmap) + val scriptBlur = ScriptIntrinsicBlur.create(context, baseElement) + val inputAllocation = Allocation.createFromBitmap(context, bitmap) + inputAllocation.copyFrom(bitmap) + val outAllocation = Allocation.createTyped(context, inputAllocation.type) + val intrinsicOutArray = ByteArray(bitmap.byteCount) + + scriptBlur.setRadius(radius.toFloat()) + scriptBlur.setInput(inputAllocation) + + if (restriction != null) { + outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptBlur.forEach(outAllocation, options) + } else { + scriptBlur.forEach(outAllocation) + } + outAllocation.copyTo(intrinsicOutArray) + + inputAllocation.destroy() + outAllocation.destroy() + scriptBlur.destroy() + return intrinsicOutArray +} diff --git a/toolkit/test/IntrinsicColorMatrix.kt b/toolkit/test/IntrinsicColorMatrix.kt new file mode 100644 index 00000000..c0ccc673 --- /dev/null +++ b/toolkit/test/IntrinsicColorMatrix.kt @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.graphics.Bitmap +import android.renderscript.Allocation +import android.renderscript.Matrix4f +import android.renderscript.RenderScript +import android.renderscript.Script +import android.renderscript.ScriptIntrinsicColorMatrix +import android.renderscript.Type +import android.renderscript.Float4 +import android.renderscript.toolkit.Range2d + +/** + * Does a ColorMatrix operation using the RenderScript Intrinsics. + */ +fun intrinsicColorMatrix( + context: RenderScript, + conversion: Tester.ColorMatrixConversionType, + inputArray: ByteArray, + inputVectorSize: Int, + sizeX: Int, + sizeY: Int, + outputVectorSize: Int, + matrix: FloatArray, + addVector: FloatArray, + restriction: Range2d? +): ByteArray { + val scriptColorMatrix = ScriptIntrinsicColorMatrix.create(context) + val inputBuilder = Type.Builder( + context, renderScriptVectorElementForU8( + context, + inputVectorSize + ) + ) + inputBuilder.setX(sizeX) + inputBuilder.setY(sizeY) + val inputArrayType = inputBuilder.create() + val inputAllocation = Allocation.createTyped(context, inputArrayType) + val outputBuilder = Type.Builder( + context, renderScriptVectorElementForU8( + context, + outputVectorSize + ) + ) + outputBuilder.setX(sizeX) + outputBuilder.setY(sizeY) + val outputArrayType = outputBuilder.create() + val outAllocation = Allocation.createTyped(context, outputArrayType) + + inputAllocation.copyFrom(inputArray) + val intrinsicOutArray = ByteArray(sizeX * sizeY * paddedSize(outputVectorSize)) + when (conversion) { + Tester.ColorMatrixConversionType.RGB_TO_YUV -> scriptColorMatrix.setRGBtoYUV() + Tester.ColorMatrixConversionType.YUV_TO_RGB -> scriptColorMatrix.setYUVtoRGB() + Tester.ColorMatrixConversionType.GREYSCALE -> scriptColorMatrix.setGreyscale() + Tester.ColorMatrixConversionType.RANDOM -> { + val m = Matrix4f() + var index = 0 + // RS is column major + for (x in 0..3) { + for (y in 0..3) { + m.set(x, y, matrix[index++]) + } + } + scriptColorMatrix.setColorMatrix(m) + } + } + val vector = Float4( + addVector[0], + addVector[1], + addVector[2], + addVector[3] + ) + scriptColorMatrix.setAdd(vector) + if (restriction != null) { + outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptColorMatrix.forEach(inputAllocation, outAllocation, options) + } else { + scriptColorMatrix.forEach(inputAllocation, outAllocation) + } + outAllocation.copyTo(intrinsicOutArray) + + inputAllocation.destroy() + outAllocation.destroy() + inputArrayType.destroy() + outputArrayType.destroy() + scriptColorMatrix.destroy() + return intrinsicOutArray +} + +fun intrinsicColorMatrix( + context: RenderScript, + conversion: Tester.ColorMatrixConversionType, + bitmap: Bitmap, + matrix: FloatArray, + addVector: FloatArray, + restriction: Range2d? +): ByteArray { + val scriptColorMatrix = ScriptIntrinsicColorMatrix.create(context) + val inputAllocation = Allocation.createFromBitmap(context, bitmap) + inputAllocation.copyFrom(bitmap) + val outAllocation = Allocation.createTyped(context, inputAllocation.type) + val intrinsicOutArray = ByteArray(bitmap.byteCount) + + when (conversion) { + Tester.ColorMatrixConversionType.RGB_TO_YUV -> scriptColorMatrix.setRGBtoYUV() + Tester.ColorMatrixConversionType.YUV_TO_RGB -> scriptColorMatrix.setYUVtoRGB() + Tester.ColorMatrixConversionType.GREYSCALE -> scriptColorMatrix.setGreyscale() + Tester.ColorMatrixConversionType.RANDOM -> { + val m = Matrix4f() + var index = 0 + // RS is column major + for (x in 0..3) { + for (y in 0..3) { + m.set(x, y, matrix[index++]) + } + } + scriptColorMatrix.setColorMatrix(m) + } + } + val vector = Float4( + addVector[0], + addVector[1], + addVector[2], + addVector[3] + ) + scriptColorMatrix.setAdd(vector) + if (restriction != null) { + outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptColorMatrix.forEach(inputAllocation, outAllocation, options) + } else { + scriptColorMatrix.forEach(inputAllocation, outAllocation) + } + outAllocation.copyTo(intrinsicOutArray) + + inputAllocation.destroy() + outAllocation.destroy() + scriptColorMatrix.destroy() + return intrinsicOutArray +} diff --git a/toolkit/test/IntrinsicConvolve.kt b/toolkit/test/IntrinsicConvolve.kt new file mode 100644 index 00000000..0c9e4f00 --- /dev/null +++ b/toolkit/test/IntrinsicConvolve.kt @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.graphics.Bitmap +import android.renderscript.Allocation +import android.renderscript.Element +import android.renderscript.RenderScript +import android.renderscript.Script +import android.renderscript.ScriptIntrinsicConvolve3x3 +import android.renderscript.ScriptIntrinsicConvolve5x5 +import android.renderscript.Type +import android.renderscript.toolkit.Range2d + +/** + * Does a Convolve operation using the RenderScript Intrinsics. + */ +fun intrinsicConvolve( + context: RenderScript, + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + coefficients: FloatArray, + restriction: Range2d? +): ByteArray { + val baseElement = renderScriptVectorElementForU8(context, vectorSize) + val builder = Type.Builder(context, baseElement) + builder.setX(sizeX) + builder.setY(sizeY) + val arrayType = builder.create() + val inputAllocation = Allocation.createTyped(context, arrayType) + val outAllocation = Allocation.createTyped(context, arrayType) + inputAllocation.copyFrom(inputArray) + val intrinsicOutArray = ByteArray(sizeX * sizeY * paddedSize(vectorSize)) + if (restriction != null) { + outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero + } + invokeConvolveKernel( + coefficients, + context, + baseElement, + inputAllocation, + restriction, + outAllocation + ) + outAllocation.copyTo(intrinsicOutArray) + inputAllocation.destroy() + outAllocation.destroy() + arrayType.destroy() + return intrinsicOutArray +} + +fun intrinsicConvolve( + context: RenderScript, + bitmap: Bitmap, + coefficients: FloatArray, + restriction: Range2d? +): ByteArray { + val baseElement = renderScriptElementForBitmap(context, bitmap) + + val inputAllocation = Allocation.createFromBitmap(context, bitmap) + val outAllocation = Allocation.createTyped(context, inputAllocation.type) + val intrinsicOutArray = ByteArray(bitmap.byteCount) + inputAllocation.copyFrom(bitmap) + if (restriction != null) { + outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero + } + invokeConvolveKernel( + coefficients, + context, + baseElement, + inputAllocation, + restriction, + outAllocation + ) + outAllocation.copyTo(intrinsicOutArray) + inputAllocation.destroy() + outAllocation.destroy() + return intrinsicOutArray +} + +private fun invokeConvolveKernel( + coefficients: FloatArray, + context: RenderScript, + baseElement: Element, + inputAllocation: Allocation?, + restriction: Range2d?, + outAllocation: Allocation? +) { + when (coefficients.size) { + 9 -> { + val scriptConvolve3x3 = + ScriptIntrinsicConvolve3x3.create(context, baseElement) + scriptConvolve3x3.setCoefficients(coefficients) + scriptConvolve3x3.setInput(inputAllocation) + if (restriction != null) { + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptConvolve3x3.forEach(outAllocation, options) + } else { + scriptConvolve3x3.forEach(outAllocation) + } + scriptConvolve3x3.destroy() + } + 25 -> { + val scriptConvolve5x5 = + ScriptIntrinsicConvolve5x5.create(context, baseElement) + scriptConvolve5x5.setCoefficients(coefficients) + scriptConvolve5x5.setInput(inputAllocation) + if (restriction != null) { + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptConvolve5x5.forEach(outAllocation, options) + } else { + scriptConvolve5x5.forEach(outAllocation) + } + scriptConvolve5x5.destroy() + } + else -> { + throw IllegalArgumentException("RenderScriptToolkit tests. Only 3x3 and 5x5 convolutions are supported. ${coefficients.size} coefficients provided.") + } + } +} diff --git a/toolkit/test/IntrinsicHistogram.kt b/toolkit/test/IntrinsicHistogram.kt new file mode 100644 index 00000000..25cc55d2 --- /dev/null +++ b/toolkit/test/IntrinsicHistogram.kt @@ -0,0 +1,196 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.graphics.Bitmap +import android.renderscript.Allocation +import android.renderscript.Element +import android.renderscript.RenderScript +import android.renderscript.Script +import android.renderscript.ScriptIntrinsicHistogram +import android.renderscript.Type +import android.renderscript.toolkit.Range2d + +/** + * Does a Histogram operation using the RenderScript Intrinsics. + */ +fun intrinsicHistogram( + context: RenderScript, + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + restriction: Range2d? +): IntArray { + val element = renderScriptVectorElementForU8(context, vectorSize) + val scriptHistogram = ScriptIntrinsicHistogram.create(context, element) + val builder = Type.Builder(context, element) + builder.setX(sizeX) + builder.setY(sizeY) + val arrayType = builder.create() + val inputAllocation = Allocation.createTyped(context, arrayType) + val outAllocation = + Allocation.createSized( + context, + renderScriptVectorElementForI32(context, vectorSize), + 256 + ) + inputAllocation.copyFrom(inputArray) + scriptHistogram.setOutput(outAllocation) + if (restriction != null) { + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptHistogram.forEach(inputAllocation, options) + } else { + scriptHistogram.forEach(inputAllocation) + } + + val intrinsicOutArray = IntArray(256 * paddedSize(vectorSize)) + outAllocation.copyTo(intrinsicOutArray) + inputAllocation.destroy() + outAllocation.destroy() + arrayType.destroy() + scriptHistogram.destroy() + return intrinsicOutArray +} + +fun intrinsicHistogram( + context: RenderScript, + bitmap: Bitmap, + restriction: Range2d? +): IntArray { + val baseElement = renderScriptElementForBitmap(context, bitmap) + val scriptHistogram = ScriptIntrinsicHistogram.create(context, baseElement) + val inputAllocation = Allocation.createFromBitmap(context, bitmap) + inputAllocation.copyFrom(bitmap) + val vectorSize = vectorSizeOfBitmap(bitmap) + val outAllocation = + Allocation.createSized( + context, + renderScriptVectorElementForI32(context, vectorSize), + 256 + ) + scriptHistogram.setOutput(outAllocation) + if (restriction != null) { + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptHistogram.forEach(inputAllocation, options) + } else { + scriptHistogram.forEach(inputAllocation) + } + + val intrinsicOutArray = IntArray(256 * vectorSize) + outAllocation.copyTo(intrinsicOutArray) + inputAllocation.destroy() + outAllocation.destroy() + scriptHistogram.destroy() + return intrinsicOutArray +} + +fun intrinsicHistogramDot( + context: RenderScript, + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + coefficients: FloatArray?, + restriction: Range2d? +): IntArray { + val element = renderScriptVectorElementForU8(context, vectorSize) + val scriptHistogram = ScriptIntrinsicHistogram.create(context, element) + val builder = Type.Builder(context, element) + builder.setX(sizeX) + builder.setY(sizeY) + val arrayType = builder.create() + val inputAllocation = Allocation.createTyped(context, arrayType) + val outAllocation = + Allocation.createSized(context, Element.I32(context), 256) + inputAllocation.copyFrom(inputArray) + + if (coefficients != null) { + require(coefficients.size == vectorSize) { + "RenderScriptToolkit tests. $vectorSize coefficients are required for histogram. " + + "${coefficients.size} provided." + } + scriptHistogram.setDotCoefficients( + coefficients[0], + if (vectorSize > 1) coefficients[1] else 0f, + if (vectorSize > 2) coefficients[2] else 0f, + if (vectorSize > 3) coefficients[3] else 0f + ) + } + scriptHistogram.setOutput(outAllocation) + if (restriction != null) { + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptHistogram.forEach_Dot(inputAllocation, options) + } else { + scriptHistogram.forEach_Dot(inputAllocation) + } + val intrinsicOutArray = IntArray(256) + outAllocation.copyTo(intrinsicOutArray) + inputAllocation.destroy() + outAllocation.destroy() + arrayType.destroy() + scriptHistogram.destroy() + return intrinsicOutArray +} + +fun intrinsicHistogramDot( + context: RenderScript, + bitmap: Bitmap, + coefficients: FloatArray?, + restriction: Range2d? +): IntArray { + val baseElement = renderScriptElementForBitmap(context, bitmap) + val scriptHistogram = ScriptIntrinsicHistogram.create(context, baseElement) + val inputAllocation = Allocation.createFromBitmap(context, bitmap) + inputAllocation.copyFrom(bitmap) + val outAllocation = + Allocation.createSized(context, Element.I32(context), 256) + + if (coefficients != null) { + require(coefficients.size == 4) { + "RenderScriptToolkit tests. Four coefficients are required for histogram. " + + "${coefficients.size} provided." + } + scriptHistogram.setDotCoefficients( + coefficients[0], + coefficients[1], + coefficients[2], + coefficients[3] + ) + } + scriptHistogram.setOutput(outAllocation) + if (restriction != null) { + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptHistogram.forEach_Dot(inputAllocation, options) + } else { + scriptHistogram.forEach_Dot(inputAllocation) + } + val intrinsicOutArray = IntArray(256) + outAllocation.copyTo(intrinsicOutArray) + inputAllocation.destroy() + outAllocation.destroy() + scriptHistogram.destroy() + return intrinsicOutArray +} diff --git a/toolkit/test/IntrinsicLut.kt b/toolkit/test/IntrinsicLut.kt new file mode 100644 index 00000000..1ed03ac8 --- /dev/null +++ b/toolkit/test/IntrinsicLut.kt @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.graphics.Bitmap +import android.renderscript.Allocation +import android.renderscript.Element +import android.renderscript.RenderScript +import android.renderscript.Script +import android.renderscript.ScriptIntrinsicLUT +import android.renderscript.Type +import android.renderscript.toolkit.Range2d + +/** + * Does a LookUpTable operation using the RenderScript Intrinsics. + */ +@ExperimentalUnsignedTypes +fun intrinsicLut( + context: RenderScript, + inputArray: ByteArray, + sizeX: Int, + sizeY: Int, + newRed: ByteArray, + newGreen: ByteArray, + newBlue: ByteArray, + newAlpha: ByteArray, + restriction: Range2d? +): ByteArray { + val scriptLut: ScriptIntrinsicLUT = ScriptIntrinsicLUT.create( + context, + Element.U8_4(context) + ) + val builder = Type.Builder(context, Element.U8_4(context)) + builder.setX(sizeX) + builder.setY(sizeY) + val arrayType = builder.create() + val inputAllocation = Allocation.createTyped(context, arrayType) + val outAllocation = Allocation.createTyped(context, arrayType) + inputAllocation.copyFrom(inputArray) + val intrinsicOutArray = ByteArray(sizeX * sizeY * 4) + + for (v in 0..255) { + scriptLut.setRed(v, newRed[v].toUByte().toInt()) + scriptLut.setGreen(v, newGreen[v].toUByte().toInt()) + scriptLut.setBlue(v, newBlue[v].toUByte().toInt()) + scriptLut.setAlpha(v, newAlpha[v].toUByte().toInt()) + } + if (restriction != null) { + outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptLut.forEach(inputAllocation, outAllocation, options) + } else { + scriptLut.forEach(inputAllocation, outAllocation) + } + + outAllocation.copyTo(intrinsicOutArray) + inputAllocation.destroy() + outAllocation.destroy() + arrayType.destroy() + scriptLut.destroy() + return intrinsicOutArray +} + +@ExperimentalUnsignedTypes +fun intrinsicLut( + context: RenderScript, + bitmap: Bitmap, + newRed: ByteArray, + newGreen: ByteArray, + newBlue: ByteArray, + newAlpha: ByteArray, + restriction: Range2d? +): ByteArray { + val baseElement = renderScriptElementForBitmap(context, bitmap) + val scriptLut: ScriptIntrinsicLUT = ScriptIntrinsicLUT.create(context, baseElement) + val inputAllocation = Allocation.createFromBitmap(context, bitmap) + inputAllocation.copyFrom(bitmap) + val outAllocation = Allocation.createTyped(context, inputAllocation.type) + val intrinsicOutArray = ByteArray(bitmap.byteCount) + + for (v in 0..255) { + scriptLut.setRed(v, newRed[v].toUByte().toInt()) + scriptLut.setGreen(v, newGreen[v].toUByte().toInt()) + scriptLut.setBlue(v, newBlue[v].toUByte().toInt()) + scriptLut.setAlpha(v, newAlpha[v].toUByte().toInt()) + } + if (restriction != null) { + outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptLut.forEach(inputAllocation, outAllocation, options) + } else { + scriptLut.forEach(inputAllocation, outAllocation) + } + + outAllocation.copyTo(intrinsicOutArray) + inputAllocation.destroy() + outAllocation.destroy() + scriptLut.destroy() + return intrinsicOutArray +} diff --git a/toolkit/test/IntrinsicLut3d.kt b/toolkit/test/IntrinsicLut3d.kt new file mode 100644 index 00000000..48e785ec --- /dev/null +++ b/toolkit/test/IntrinsicLut3d.kt @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.graphics.Bitmap +import android.renderscript.Allocation +import android.renderscript.Element +import android.renderscript.RenderScript +import android.renderscript.Script +import android.renderscript.ScriptIntrinsic3DLUT +import android.renderscript.Type +import android.renderscript.toolkit.Range2d + +/** + * Does a 3D LookUpTable operation using the RenderScript Intrinsics. + */ +fun intrinsicLut3d( + context: RenderScript, + inputArray: ByteArray, + sizeX: Int, + sizeY: Int, + cubeArray: ByteArray, + cubeSize: Dimension, + restriction: Range2d? +): ByteArray { + val scriptLut3d: ScriptIntrinsic3DLUT = ScriptIntrinsic3DLUT.create( + context, Element.U8_4( + context + ) + ) + val builder = Type.Builder(context, Element.U8_4(context)) + builder.setX(sizeX) + builder.setY(sizeY) + val arrayType = builder.create() + val inputAllocation = Allocation.createTyped(context, arrayType) + val outAllocation = Allocation.createTyped(context, arrayType) + inputAllocation.copyFrom(inputArray) + val intrinsicOutArray = ByteArray(sizeX * sizeY * 4) + + val cubeTypeBuilder: Type.Builder = + Type.Builder(context, Element.U8_4(context)) + cubeTypeBuilder.setX(cubeSize.sizeX) + cubeTypeBuilder.setY(cubeSize.sizeY) + cubeTypeBuilder.setZ(cubeSize.sizeZ) + val cubeType: Type = cubeTypeBuilder.create() + val cubeAllocation = Allocation.createTyped(context, cubeType) + cubeAllocation.copyFrom(cubeArray) + scriptLut3d.setLUT(cubeAllocation) + if (restriction != null) { + outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptLut3d.forEach(inputAllocation, outAllocation, options) + } else { + scriptLut3d.forEach(inputAllocation, outAllocation) + } + + outAllocation.copyTo(intrinsicOutArray) + inputAllocation.destroy() + outAllocation.destroy() + cubeAllocation.destroy() + arrayType.destroy() + cubeType.destroy() + scriptLut3d.destroy() + return intrinsicOutArray +} + +fun intrinsicLut3d( + context: RenderScript, + bitmap: Bitmap, + cubeArray: ByteArray, + cubeSize: Dimension, + restriction: Range2d? +): ByteArray { + val baseElement = renderScriptElementForBitmap(context, bitmap) + val scriptLut3d: ScriptIntrinsic3DLUT = ScriptIntrinsic3DLUT.create(context, baseElement) + val inputAllocation = Allocation.createFromBitmap(context, bitmap) + inputAllocation.copyFrom(bitmap) + val outAllocation = Allocation.createTyped(context, inputAllocation.type) + val intrinsicOutArray = ByteArray(bitmap.byteCount) + + val cubeTypeBuilder: Type.Builder = + Type.Builder(context, Element.U8_4(context)) + cubeTypeBuilder.setX(cubeSize.sizeX) + cubeTypeBuilder.setY(cubeSize.sizeY) + cubeTypeBuilder.setZ(cubeSize.sizeZ) + val cubeType: Type = cubeTypeBuilder.create() + val cubeAllocation = Allocation.createTyped(context, cubeType) + cubeAllocation.copyFrom(cubeArray) + scriptLut3d.setLUT(cubeAllocation) + if (restriction != null) { + outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptLut3d.forEach(inputAllocation, outAllocation, options) + } else { + scriptLut3d.forEach(inputAllocation, outAllocation) + } + + outAllocation.copyTo(intrinsicOutArray) + inputAllocation.destroy() + outAllocation.destroy() + cubeAllocation.destroy() + cubeType.destroy() + scriptLut3d.destroy() + return intrinsicOutArray +} diff --git a/toolkit/test/IntrinsicResize.kt b/toolkit/test/IntrinsicResize.kt new file mode 100644 index 00000000..5cdf89a2 --- /dev/null +++ b/toolkit/test/IntrinsicResize.kt @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.graphics.Bitmap +import android.renderscript.Allocation +import android.renderscript.RenderScript +import android.renderscript.Script +import android.renderscript.ScriptIntrinsicResize +import android.renderscript.Type +import android.renderscript.toolkit.Range2d + +/** + * Does a Resize operation using the RenderScript Intrinsics. + */ +fun intrinsicResize( + context: RenderScript, + inputArray: ByteArray, + vectorSize: Int, + inSizeX: Int, + inSizeY: Int, + outSizeX: Int, + outSizeY: Int, + restriction: Range2d? +): ByteArray { + val scriptResize = ScriptIntrinsicResize.create(context) + val builder = Type.Builder( + context, + renderScriptVectorElementForU8(context, vectorSize) + ) + builder.setX(inSizeX) + builder.setY(inSizeY) + val inputArrayType = builder.create() + val inputAllocation = Allocation.createTyped(context, inputArrayType) + builder.setX(outSizeX) + builder.setY(outSizeY) + val outputArrayType = builder.create() + val outAllocation = Allocation.createTyped(context, outputArrayType) + val intrinsicOutArray = ByteArray(outSizeX * outSizeY * paddedSize(vectorSize)) + + inputAllocation.copyFrom(inputArray) + scriptResize.setInput(inputAllocation) + if (restriction != null) { + outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptResize.forEach_bicubic(outAllocation, options) + } else { + scriptResize.forEach_bicubic(outAllocation) + } + outAllocation.copyTo(intrinsicOutArray) + + inputAllocation.destroy() + outAllocation.destroy() + scriptResize.destroy() + inputArrayType.destroy() + outputArrayType.destroy() + return intrinsicOutArray +} + +fun intrinsicResize( + context: RenderScript, + bitmap: Bitmap, + outSizeX: Int, + outSizeY: Int, + restriction: Range2d? +): ByteArray { + val scriptResize = ScriptIntrinsicResize.create(context) + val inputAllocation = Allocation.createFromBitmap(context, bitmap) + inputAllocation.copyFrom(bitmap) + + val vectorSize = when (bitmap.config) { + Bitmap.Config.ARGB_8888 -> 4 + Bitmap.Config.ALPHA_8 -> 1 + else -> error("Unrecognized bitmap config $bitmap.config") + } + val builder = Type.Builder( + context, + renderScriptVectorElementForU8(context, vectorSize) + ) + builder.setX(outSizeX) + builder.setY(outSizeY) + val outputArrayType = builder.create() + val outAllocation = Allocation.createTyped(context, outputArrayType) + val intrinsicOutArray = ByteArray(outSizeX * outSizeY * vectorSize) + + scriptResize.setInput(inputAllocation) + if (restriction != null) { + outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero + val options = Script.LaunchOptions() + options.setX(restriction.startX, restriction.endX) + options.setY(restriction.startY, restriction.endY) + scriptResize.forEach_bicubic(outAllocation, options) + } else { + scriptResize.forEach_bicubic(outAllocation) + } + outAllocation.copyTo(intrinsicOutArray) + + inputAllocation.destroy() + outAllocation.destroy() + outputArrayType.destroy() + scriptResize.destroy() + return intrinsicOutArray +} diff --git a/toolkit/test/IntrinsicYuvToRgb.kt b/toolkit/test/IntrinsicYuvToRgb.kt new file mode 100644 index 00000000..5e46f2e1 --- /dev/null +++ b/toolkit/test/IntrinsicYuvToRgb.kt @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.graphics.ImageFormat +import android.renderscript.Allocation +import android.renderscript.Element +import android.renderscript.RenderScript +import android.renderscript.ScriptIntrinsicYuvToRGB +import android.renderscript.Type +import android.renderscript.toolkit.YuvFormat + +/** + * Does a YUV to RGB operation using the RenderScript Intrinsics. + */ +fun intrinsicYuvToRgb( + context: RenderScript, + inputArray: ByteArray, + sizeX: Int, + sizeY: Int, + format: YuvFormat +): ByteArray { + val scriptYuvToRgb = ScriptIntrinsicYuvToRGB.create( + context, + Element.YUV(context) + ) + val inputBuilder = Type.Builder(context, Element.YUV(context)) + inputBuilder.setX(sizeX) + inputBuilder.setY(sizeY) + when (format) { + YuvFormat.NV21 -> inputBuilder.setYuvFormat(ImageFormat.NV21) + YuvFormat.YV12 -> inputBuilder.setYuvFormat(ImageFormat.YV12) + else -> require(false) { "Unknown YUV format $format" } + } + val inputArrayType = inputBuilder.create() + val inputAllocation = Allocation.createTyped(context, inputArrayType) + + val outputBuilder = Type.Builder(context, Element.U8_4(context)) + outputBuilder.setX(sizeX) + outputBuilder.setY(sizeY) + val outputArrayType = outputBuilder.create() + val outAllocation = Allocation.createTyped(context, outputArrayType) + val intrinsicOutArray = ByteArray(sizeX * sizeY * 4) + + inputAllocation.copyFrom(inputArray) + scriptYuvToRgb.setInput(inputAllocation) + scriptYuvToRgb.forEach(outAllocation) + outAllocation.copyTo(intrinsicOutArray) + + inputAllocation.destroy() + outAllocation.destroy() + inputArrayType.destroy() + outputArrayType.destroy() + scriptYuvToRgb.destroy() + return intrinsicOutArray +} diff --git a/toolkit/test/MainActivity.kt b/toolkit/test/MainActivity.kt new file mode 100644 index 00000000..4092861d --- /dev/null +++ b/toolkit/test/MainActivity.kt @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.os.Bundle +import android.widget.TextView +import androidx.appcompat.app.AppCompatActivity + +@ExperimentalUnsignedTypes +class MainActivity : AppCompatActivity() { + + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + setContentView(R.layout.activity_main) + + // To debug resources not destroyed + // "A resource failed to call destroy." + try { + Class.forName("dalvik.system.CloseGuard") + .getMethod("setEnabled", Boolean::class.javaPrimitiveType) + .invoke(null, true) + } catch (e: ReflectiveOperationException) { + throw RuntimeException(e) + } + + val validate = true + val tester = Tester(this, validate) + val numberOfIterations = if (validate) 1 else 28 + val t = TimingTracker(numberOfIterations, 0) + for (i in 1..numberOfIterations) { + println("*** Iteration $i of $numberOfIterations ****") + //startMethodTracing("myTracing") + //startMethodTracingSampling("myTracing_sample", 8000000, 10) + val r = tester.testAll(t) + //stopMethodTracing() + findViewById<TextView>(R.id.sample_text).text = "$r\n\n${t.report()}" + t.nextIteration() + } + tester.destroy() + } +} diff --git a/toolkit/test/ReferenceBlend.kt b/toolkit/test/ReferenceBlend.kt new file mode 100644 index 00000000..ba60bc82 --- /dev/null +++ b/toolkit/test/ReferenceBlend.kt @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.renderscript.toolkit.BlendingMode +import android.renderscript.toolkit.Range2d + +/** + * Reference implementation of a Blend operation. + * + * See the class Rgba for details of arithmetic operation using that class. + */ +@ExperimentalUnsignedTypes +fun referenceBlend( + mode: BlendingMode, + sourceArray: ByteArray, + destArray: ByteArray, + sizeX: Int, + sizeY: Int, + restriction: Range2d? +) { + val source = Rgba2dArray(sourceArray, sizeX, sizeY) + val dest = Rgba2dArray(destArray, sizeX, sizeY) + + /** + * For each corresponding RGBA value of the source and destination arrays, invoke the blend + * function and store the result in the destination array. + */ + fun blendEachPair(blendFunction: (src: Rgba, dst: Rgba) -> Rgba) { + dest.forEachCell(restriction) { x, y -> + dest[x, y] = blendFunction(source[x, y], dest[x, y]) + } + } + + when (mode) { + BlendingMode.CLEAR -> blendEachPair { _, _ -> Rgba(0, 0, 0, 0) } + BlendingMode.SRC -> blendEachPair { src, _ -> src } + BlendingMode.DST -> { /* This doesn't do anything. */ } + BlendingMode.SRC_OVER -> blendEachPair { src, dst -> blendOver(src, dst) } + BlendingMode.DST_OVER -> blendEachPair { src, dst -> blendOver(dst, src) } + BlendingMode.SRC_IN -> blendEachPair { src, dst -> blendIn(src, dst) } + BlendingMode.DST_IN -> blendEachPair { src, dst -> blendIn(dst, src) } + BlendingMode.SRC_OUT -> blendEachPair { src, dst -> blendOut(src, dst) } + BlendingMode.DST_OUT -> blendEachPair { src, dst -> blendOut(dst, src) } + BlendingMode.SRC_ATOP -> blendEachPair { src, dst -> blendAtop(src, dst) } + BlendingMode.DST_ATOP -> blendEachPair { src, dst -> blendAtop(dst, src) } + BlendingMode.XOR -> blendEachPair { src, dst -> src xor dst } + BlendingMode.MULTIPLY -> blendEachPair { src, dst -> src * dst } + BlendingMode.ADD -> blendEachPair { src, dst -> dst + src } + BlendingMode.SUBTRACT -> blendEachPair { src, dst -> dst - src } + } +} + +@ExperimentalUnsignedTypes +private fun blendOver(src: Rgba, dst: Rgba) = src + (dst * (255 - src.a)) + +@ExperimentalUnsignedTypes +private fun blendIn(src: Rgba, dst: Rgba) = src * dst.a + +@ExperimentalUnsignedTypes +private fun blendOut(src: Rgba, dst: Rgba) = src * (255 - dst.a) + +@ExperimentalUnsignedTypes +private fun blendAtop(src: Rgba, dst: Rgba): Rgba { + val value = src * dst.a + dst * (255 - src.a) + value.a = dst.a + return value +} diff --git a/toolkit/test/ReferenceBlur.kt b/toolkit/test/ReferenceBlur.kt new file mode 100644 index 00000000..66c2a055 --- /dev/null +++ b/toolkit/test/ReferenceBlur.kt @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.renderscript.toolkit.Range2d +import kotlin.math.max +import kotlin.math.min +import kotlin.math.pow +import kotlin.math.sqrt + +/** + * Reference implementation of a Blur operation. + */ +@ExperimentalUnsignedTypes +fun referenceBlur(inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + radius: Int = 5, restriction: Range2d?): ByteArray { + val maxRadius = 25 + require (radius in 1..maxRadius) { + "RenderScriptToolkit blur. Radius should be between 1 and $maxRadius. $radius provided." + } + val gaussian = buildGaussian(radius) + + // Convert input data to float so that the blurring goes faster. + val inputValues = FloatArray(inputArray.size) { byteToUnitFloat(inputArray[it].toUByte()) } + val inputInFloat = FloatVector2dArray(inputValues, vectorSize, sizeX, sizeY) + + val scratch = horizontalBlur(inputInFloat, gaussian, radius, restriction) + val outInFloat = verticalBlur(scratch, gaussian, radius, restriction) + + // Convert the results back to bytes. + return ByteArray(outInFloat.values.size) { unitFloatClampedToUByte(outInFloat.values[it]).toByte() } +} + +/** + * Blurs along the horizontal direction using the specified gaussian weights. + */ +private fun horizontalBlur( + input: FloatVector2dArray, + gaussian: FloatArray, + radius: Int, + restriction: Range2d? +): FloatVector2dArray { + var expandedRestriction: Range2d? = null + if (restriction != null) { + // Expand the restriction in the vertical direction so that the vertical pass + // will have all the data it needs. + expandedRestriction = Range2d( + restriction.startX, + restriction.endX, + max(restriction.startY - radius, 0), + min(restriction.endY + radius, input.sizeY) + ) + } + + input.clipAccessToRange = true + val out = input.createSameSized() + out.forEach(expandedRestriction) { x, y -> + for ((gaussianIndex, delta: Int) in (-radius..radius).withIndex()) { + val v = input[x + delta, y] * gaussian[gaussianIndex] + out[x, y] += v + } + } + return out +} + +/** + * Blurs along the horizontal direction using the specified gaussian weights. + */ +private fun verticalBlur( + input: FloatVector2dArray, + gaussian: FloatArray, + radius: Int, + restriction: Range2d? +): FloatVector2dArray { + input.clipAccessToRange = true + val out = input.createSameSized() + out.forEach(restriction) { x, y -> + for ((gaussianIndex, delta: Int) in (-radius..radius).withIndex()) { + val v = input[x, y + delta] * gaussian[gaussianIndex] + out[x, y] += v + } + } + return out +} + +/** + * Builds an array of gaussian weights that will be used for doing the horizontal and vertical + * blur. + * + * @return An array of (2 * radius + 1) floats. + */ +private fun buildGaussian(radius: Int): FloatArray { + val e: Float = kotlin.math.E.toFloat() + val pi: Float = kotlin.math.PI.toFloat() + val sigma: Float = 0.4f * radius.toFloat() + 0.6f + val coefficient1: Float = 1.0f / (sqrt(2.0f * pi) * sigma) + val coefficient2: Float = -1.0f / (2.0f * sigma * sigma) + + var sum = 0.0f + val gaussian = FloatArray(radius * 2 + 1) + for (r in -radius..radius) { + val floatR: Float = r.toFloat() + val v: Float = coefficient1 * e.pow(floatR * floatR * coefficient2) + gaussian[r + radius] = v + sum += v + } + + // Normalize so that the sum of the weights equal 1f. + val normalizeFactor: Float = 1.0f / sum + for (r in -radius..radius) { + gaussian[r + radius] *= normalizeFactor + } + return gaussian +} diff --git a/toolkit/test/ReferenceColorMatrix.kt b/toolkit/test/ReferenceColorMatrix.kt new file mode 100644 index 00000000..75f93af4 --- /dev/null +++ b/toolkit/test/ReferenceColorMatrix.kt @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.renderscript.toolkit.Range2d + +/** + * Reference implementation of a ColorMatrix operation. + */ +@ExperimentalUnsignedTypes +fun referenceColorMatrix(inputArray: ByteArray, + inputVectorSize: Int, + sizeX: Int, + sizeY: Int, + outputVectorSize: Int, + matrix: FloatArray, addVector: FloatArray, + restriction: Range2d?): ByteArray { + require (matrix.size == 16) { "RenderScriptToolkit colorMatrix. Matrix should have 16 values. ${matrix.size} provided." } + + val input = Vector2dArray(inputArray.asUByteArray(), inputVectorSize, sizeX, sizeY) + val outputArray = ByteArray(sizeX * sizeY * paddedSize(outputVectorSize)) + val output = Vector2dArray(outputArray.asUByteArray(), outputVectorSize, sizeX, sizeY) + + output.forEach (restriction) { x, y -> + val inUByteValue = input[x, y] + val inFloatValue = FloatArray(4) { if (it >= inputVectorSize) 0f else byteToUnitFloat(inUByteValue[it]) } + val outFloatValue = multiplyAndAdd(matrix, inFloatValue, addVector) + val outUByteValue = UByteArray(paddedSize(output.vectorSize)) { unitFloatClampedToUByte(outFloatValue[it]) } + output[x, y] = outUByteValue + } + return outputArray +} + +private fun multiplyAndAdd(matrix: FloatArray, inVector: FloatArray, addVector: FloatArray): FloatArray { + // In RenderScript, matrix were set in column major format + val result = addVector.clone() + for (i in 0..3) { + for (j in 0..3) { + result[i] += matrix[j * 4 + i] * inVector[j] + } + } + return result +} diff --git a/toolkit/test/ReferenceConvolve.kt b/toolkit/test/ReferenceConvolve.kt new file mode 100644 index 00000000..b9181a96 --- /dev/null +++ b/toolkit/test/ReferenceConvolve.kt @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.renderscript.toolkit.Range2d + +/** + * Reference implementation of a Convolve operation. + */ +@ExperimentalUnsignedTypes +fun referenceConvolve( + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + coefficients: FloatArray, + restriction: Range2d? +): ByteArray { + val input = Vector2dArray(inputArray.asUByteArray(), vectorSize, sizeX, sizeY) + val radius = when (coefficients.size) { + 9 -> 1 + 25 -> 2 + else -> { + throw IllegalArgumentException("RenderScriptToolkit Convolve. Only 3x3 and 5x5 convolutions are supported. ${coefficients.size} coefficients provided.") + } + } + + input.clipReadToRange = true + val output = input.createSameSized() + input.forEach(restriction) { x, y -> + output[x, y] = convolveOne(input, x, y, coefficients, radius) + } + return output.values.asByteArray() +} + +@ExperimentalUnsignedTypes +private fun convolveOne( + inputAlloc: Vector2dArray, + x: Int, + y: Int, + coefficients: FloatArray, + radius: Int +): UByteArray { + var sum = FloatArray(paddedSize(inputAlloc.vectorSize)) + var coefficientIndex = 0 + for (deltaY in -radius..radius) { + for (deltaX in -radius..radius) { + val inputVector = inputAlloc[x + deltaX, y + deltaY] + sum += inputVector.toFloatArray() * coefficients[coefficientIndex] + coefficientIndex++ + } + } + return sum.clampToUByte() +} diff --git a/toolkit/test/ReferenceHistogram.kt b/toolkit/test/ReferenceHistogram.kt new file mode 100644 index 00000000..6bd91678 --- /dev/null +++ b/toolkit/test/ReferenceHistogram.kt @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.renderscript.toolkit.Range2d + +/** + * Reference implementation of a Histogram operation. + * + * Return an array of 4 * 256 ints. + * Position 0 is the number of R with a value of 0, + * Position 1 is the number of G with a value of 0, + * Position 2 is the number of B with a value of 0, + * Position 3 is the number of A with a value of 0, + * Position 4 is the number of R with a value of 1, + * etc. +*/ +@ExperimentalUnsignedTypes +fun referenceHistogram( + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + restriction: Range2d? +): IntArray { + val input = Vector2dArray(inputArray.asUByteArray(), vectorSize, sizeX, sizeY) + + val counts = IntArray(paddedSize(input.vectorSize) * 256) + input.forEach(restriction) { x, y -> + val value = input[x, y] + for (i in 0 until vectorSize) { + counts[value[i].toInt() * paddedSize(input.vectorSize) + i]++ + } + } + return counts +} + +/** + * Reference implementation of a HistogramDot operation. + * + * Each RGBA input value is dot-multiplied first by the specified coefficients. + * The resulting value is converted to an integer and used for the histogram. + */ +@ExperimentalUnsignedTypes +fun referenceHistogramDot( + inputArray: ByteArray, + vectorSize: Int, + sizeX: Int, + sizeY: Int, + coefficients: FloatArray?, + restriction: Range2d? +): IntArray { + val floatCoefficients = coefficients ?: floatArrayOf(0.299f, 0.587f, 0.114f, 0f) + val input = Vector2dArray(inputArray.asUByteArray(), vectorSize, sizeX, sizeY) + var coefficientSum = 0f + for (c in floatCoefficients) { + require (c >= 0) { + "RenderScriptToolkit histogramDot. Coefficients must be positive. $c provided." + } + coefficientSum += c + } + require(coefficientSum <= 1f) { "RenderScriptToolkit histogramDot. Coefficients should " + + "add to 1.0 or less. $coefficientSum provided." } + + // Compute integer + val intCoefficients = IntArray(input.vectorSize) { (floatCoefficients[it] * 256f + 0.5f).toInt() } + + val counts = IntArray(256) + input.forEach(restriction) { x, y -> + val value = input[x, y] + // While we could do the computation using floats, we won't get the same results as + // the existing intrinsics. + var sum = 0 + // We don't use value.indices because we want to accumulate only 3 values, in the case + // of vectorSize == 3. + for (i in 0 until vectorSize) { + sum += intCoefficients[i] * value[i].toInt() + } + // Round up and normalize + val index = (sum + 0x7f) shr 8 + counts[index]++ + } + return counts +} diff --git a/toolkit/test/ReferenceLut.kt b/toolkit/test/ReferenceLut.kt new file mode 100644 index 00000000..cd832f03 --- /dev/null +++ b/toolkit/test/ReferenceLut.kt @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.renderscript.toolkit.LookupTable +import android.renderscript.toolkit.Range2d + +/** + * Reference implementation of a LookUpTable operation. + */ +@ExperimentalUnsignedTypes +fun referenceLut( + inputArray: ByteArray, + sizeX: Int, + sizeY: Int, + table: LookupTable, + restriction: Range2d? +): ByteArray { + val input = Vector2dArray(inputArray.asUByteArray(), 4, sizeX, sizeY) + + val output = input.createSameSized() + input.forEach(restriction) { x, y -> + val oldValue = input[x, y] + val newValue = byteArrayOf( + table.red[oldValue[0].toInt()], + table.green[oldValue[1].toInt()], + table.blue[oldValue[2].toInt()], + table.alpha[oldValue[3].toInt()] + ) + output[x, y] = newValue.asUByteArray() + } + return output.values.asByteArray() +} + diff --git a/toolkit/test/ReferenceLut3d.kt b/toolkit/test/ReferenceLut3d.kt new file mode 100644 index 00000000..afd977b3 --- /dev/null +++ b/toolkit/test/ReferenceLut3d.kt @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.renderscript.toolkit.Range2d +import android.renderscript.toolkit.Rgba3dArray + +/** + * Reference implementation of a 3D LookUpTable operation. + */ +@ExperimentalUnsignedTypes +fun referenceLut3d( + inputArray: ByteArray, + sizeX: Int, + sizeY: Int, + cube: Rgba3dArray, + restriction: Range2d? +): ByteArray { + val input = Vector2dArray(inputArray.asUByteArray(), 4, sizeX, sizeY) + val output = input.createSameSized() + input.forEach(restriction) { x, y -> + output[x, y] = lookup(input[x, y], cube) + } + return output.values.asByteArray() +} + +@ExperimentalUnsignedTypes +private fun lookup(input: UByteArray, cube: Rgba3dArray): UByteArray { + // Calculate the two points at opposite edges of the size 1 + // cube that contains our point. + val maxIndex = Int4(cube.sizeX - 1, cube.sizeY - 1, cube.sizeZ - 1, 0) + val baseCoordinate: Float4 = input.toFloat4() * maxIndex.toFloat4() / 255f + val point1: Int4 = baseCoordinate.intFloor() + val point2: Int4 = min(point1 + 1, maxIndex) + val fractionAwayFromPoint1: Float4 = baseCoordinate - point1.toFloat4() + + // Get the RGBA values at each of the four corners of the size 1 cube. + val v000 = cube[point1.x, point1.y, point1.z].toFloat4() + val v100 = cube[point2.x, point1.y, point1.z].toFloat4() + val v010 = cube[point1.x, point2.y, point1.z].toFloat4() + val v110 = cube[point2.x, point2.y, point1.z].toFloat4() + val v001 = cube[point1.x, point1.y, point2.z].toFloat4() + val v101 = cube[point2.x, point1.y, point2.z].toFloat4() + val v011 = cube[point1.x, point2.y, point2.z].toFloat4() + val v111 = cube[point2.x, point2.y, point2.z].toFloat4() + + // Do the linear mixing of these eight values. + val yz00 = mix(v000, v100, fractionAwayFromPoint1.x) + val yz10 = mix(v010, v110, fractionAwayFromPoint1.x) + val yz01 = mix(v001, v101, fractionAwayFromPoint1.x) + val yz11 = mix(v011, v111, fractionAwayFromPoint1.x) + + val z0 = mix(yz00, yz10, fractionAwayFromPoint1.y) + val z1 = mix(yz01, yz11, fractionAwayFromPoint1.y) + + val v = mix(z0, z1, fractionAwayFromPoint1.z) + + // Preserve the alpha of the original value + return ubyteArrayOf(v.x.clampToUByte(), v.y.clampToUByte(), v.z.clampToUByte(), input[3]) +} diff --git a/toolkit/test/ReferenceResize.kt b/toolkit/test/ReferenceResize.kt new file mode 100644 index 00000000..023825ee --- /dev/null +++ b/toolkit/test/ReferenceResize.kt @@ -0,0 +1,157 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.renderscript.toolkit.Range2d +import kotlin.math.floor +import kotlin.math.max + +var trace = false + +/** + * Reference implementation of a Resize operation. + */ +@ExperimentalUnsignedTypes +fun referenceResize(inputArray: ByteArray, + vectorSize: Int, + inSizeX: Int, + inSizeY: Int, + outSizeX: Int, outSizeY: Int, + restriction: Range2d?): ByteArray { + val input = Vector2dArray(inputArray.asUByteArray(), vectorSize, inSizeX, inSizeY) + val scaleX: Float = input.sizeX.toFloat() / outSizeX.toFloat() + val scaleY: Float = input.sizeY.toFloat() / outSizeY.toFloat() + val outArray = UByteArray(outSizeX * outSizeY * paddedSize(input.vectorSize)) + val out = Vector2dArray(outArray, input.vectorSize, outSizeX, outSizeY) + out.forEach (restriction) { x, y -> + if (x == 1827 && y == 46) { + println("Found it") + trace = true + } + val o = bicubicU4(x, y, input, scaleX, scaleY) + out[x, y] = o.clampToUByte() + } + return out.values.asByteArray() +} + +private fun cubicInterpolateF(p0: FloatArray, p1: FloatArray, p2: FloatArray, p3: FloatArray, + x: Float): FloatArray { + return p1 + (p2 - p0 + (p0 * 2f - p1 * 5f + p2 * 4f - p3 + + ((p1 - p2) * 3f + p3 - p0) * x) * x) * x * 0.5f +} + +@ExperimentalUnsignedTypes +private fun bicubicU4(x: Int, y: Int, gIn: Vector2dArray, scaleX: Float, scaleY: Float): FloatArray { + var xf: Float = (x + 0.5f) * scaleX - 0.5f + var yf: Float = (y + 0.5f) * scaleY - 0.5f + + val startX: Int = floor(xf - 1).toInt() + val startY: Int = floor(yf - 1).toInt() + xf -= floor(xf) + yf -= floor(yf) + val maxX: Int = gIn.sizeX - 1 + val maxY: Int = gIn.sizeY - 1 + + val xs0: Int = max(0, startX + 0) + val xs1: Int = max(0, startX + 1) + val xs2: Int = kotlin.math.min(maxX, startX + 2) + val xs3: Int = kotlin.math.min(maxX, startX + 3) + + val ys0: Int = max(0, startY + 0) + val ys1: Int = max(0, startY + 1) + val ys2: Int = kotlin.math.min(maxY, startY + 2) + val ys3: Int = kotlin.math.min(maxY, startY + 3) + + val p00 = gIn[xs0, ys0].toFloatArray() + val p01 = gIn[xs1, ys0].toFloatArray() + val p02 = gIn[xs2, ys0].toFloatArray() + val p03 = gIn[xs3, ys0].toFloatArray() + val p0 = cubicInterpolateF(p00, p01, p02, p03, xf) + + val p10 = gIn[xs0, ys1].toFloatArray() + val p11 = gIn[xs1, ys1].toFloatArray() + val p12 = gIn[xs2, ys1].toFloatArray() + val p13 = gIn[xs3, ys1].toFloatArray() + val p1 = cubicInterpolateF(p10, p11, p12, p13, xf) + + val p20 = gIn[xs0, ys2].toFloatArray() + val p21 = gIn[xs1, ys2].toFloatArray() + val p22 = gIn[xs2, ys2].toFloatArray() + val p23 = gIn[xs3, ys2].toFloatArray() + val p2 = cubicInterpolateF(p20, p21, p22, p23, xf) + + val p30 = gIn[xs0, ys3].toFloatArray() + val p31 = gIn[xs1, ys3].toFloatArray() + val p32 = gIn[xs2, ys3].toFloatArray() + val p33 = gIn[xs3, ys3].toFloatArray() + val p3 = cubicInterpolateF(p30, p31, p32, p33, xf) + + return cubicInterpolateF(p0, p1, p2, p3, yf) +} + + +/* To be used if we implement Floats +private fun bicubic_F4(x: Int, y: Int, gin: ByteArray, sizeX: Int, sizeY: Int, scaleX: Float, scaleY: Float): Float4 { + var xf: Float = (x + 0.5f) * scaleX - 0.5f + var yf: Float = (y + 0.5f) * scaleY - 0.5f + + val startX: Int = floor(xf - 1).toInt() + val startY: Int = floor(yf - 1).toInt() + xf = xf - floor(xf) + yf = yf - floor(yf) + val maxX: Int = sizeX - 1 + val maxY: Int = sizeY - 1 + + val xs0: Int = max(0, startX + 0) + val xs1: Int = max(0, startX + 1) + val xs2: Int = min(maxX, startX + 2) + val xs3: Int = min(maxX, startX + 3) + + val ys0: Int = max(0, startY + 0) + val ys1: Int = max(0, startY + 1) + val ys2: Int = min(maxY, startY + 2) + val ys3: Int = min(maxY, startY + 3) + + val p00: Float4 = rsGetElementAt_Float4(gIn, xs0, ys0) + val p01: Float4 = rsGetElementAt_Float4(gIn, xs1, ys0) + val p02: Float4 = rsGetElementAt_Float4(gIn, xs2, ys0) + val p03: Float4 = rsGetElementAt_Float4(gIn, xs3, ys0) + val p0: Float4 = cubicInterpolate_F4(p00, p01, p02, p03, xf) + + val p10: Float4 = rsGetElementAt_Float4(gIn, xs0, ys1) + val p11: Float4 = rsGetElementAt_Float4(gIn, xs1, ys1) + val p12: Float4 = rsGetElementAt_Float4(gIn, xs2, ys1) + val p13: Float4 = rsGetElementAt_Float4(gIn, xs3, ys1) + val p1: Float4 = cubicInterpolate_F4(p10, p11, p12, p13, xf) + + val p20: Float4 = rsGetElementAt_Float4(gIn, xs0, ys2) + val p21: Float4 = rsGetElementAt_Float4(gIn, xs1, ys2) + val p22: Float4 = rsGetElementAt_Float4(gIn, xs2, ys2) + val p23: Float4 = rsGetElementAt_Float4(gIn, xs3, ys2) + val p2: Float4 = cubicInterpolate_F4(p20, p21, p22, p23, xf) + + val p30: Float4 = rsGetElementAt_Float4(gIn, xs0, ys3) + val p31: Float4 = rsGetElementAt_Float4(gIn, xs1, ys3) + val p32: Float4 = rsGetElementAt_Float4(gIn, xs2, ys3) + val p33: Float4 = rsGetElementAt_Float4(gIn, xs3, ys3) + val p3: Float4 = cubicInterpolate_F4(p30, p31, p32, p33, xf) + + val p: Float4 = cubicInterpolate_F4(p0, p1, p2, p3, yf) + + return p +} +*/ diff --git a/toolkit/test/ReferenceYuvToRgb.kt b/toolkit/test/ReferenceYuvToRgb.kt new file mode 100644 index 00000000..4d91cf61 --- /dev/null +++ b/toolkit/test/ReferenceYuvToRgb.kt @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +import android.renderscript.toolkit.YuvFormat +import java.lang.IllegalArgumentException + +/** + * Reference implementation of a YUV to RGB operation. + */ +@ExperimentalUnsignedTypes +fun referenceYuvToRgb(inputSignedArray: ByteArray, sizeX: Int, sizeY: Int, format: YuvFormat): ByteArray { + require(sizeX % 2 == 0) { "The width of the input should be even."} + val inputArray = inputSignedArray.asUByteArray() + + val outputArray = ByteArray(sizeX * sizeY * 4) + val output = Vector2dArray(outputArray.asUByteArray(), 4, sizeX, sizeY) + + when (format) { + YuvFormat.NV21 -> { + val startY = 0 + val startU = sizeX * sizeY + 1 + val startV = sizeX * sizeY + + for (y in 0 until sizeY) { + for (x in 0 until sizeX) { + val offsetY = y * sizeX + x + val offsetU = ((y shr 1) * sizeX + (x shr 1) * 2) + val offsetV = ((y shr 1) * sizeX + (x shr 1) * 2) + output[x, y] = yuvToRGBA4( + inputArray[startY + offsetY], + inputArray[startU + offsetU], + inputArray[startV + offsetV] + ) + } + } + } + + YuvFormat.YV12 -> { + /* According to https://developer.android.com/reference/kotlin/android/graphics/ImageFormat#yv12, + * strideX and strideUV should be aligned to 16 byte boundaries. If we do this, we + * won't get the same results as RenderScript. + * + * We may want to test & require that sizeX is a multiple of 16/32. + */ + val strideX = roundUpTo16(sizeX) // sizeX // + val strideUV = roundUpTo16(strideX / 2) // strideX / 2 // + val startY = 0 + val startU = strideX * sizeY + val startV = startU + strideUV * sizeY / 2 + + for (y in 0 until sizeY) { + for (x in 0 until sizeX) { + val offsetY = y * sizeX + x + val offsetUV = (y shr 1) * strideUV + (x shr 1) + output[x, y] = yuvToRGBA4( + inputArray[startY + offsetY], + inputArray[startU + offsetUV], + inputArray[startV + offsetUV], + ) + } + } + } + else -> throw IllegalArgumentException("Unknown YUV format $format") + } + + return outputArray +} + +@ExperimentalUnsignedTypes +private fun yuvToRGBA4(y: UByte, u: UByte, v: UByte): UByteArray { + val intY = y.toInt() - 16 + val intU = u.toInt() - 128 + val intV = v.toInt() - 128 + val p = intArrayOf( + intY * 298 + intV * 409 + 128 shr 8, + intY * 298 - intU * 100 - intV * 208 + 128 shr 8, + intY * 298 + intU * 516 + 128 shr 8, + 255 + ) + return UByteArray(4) { p[it].clampToUByte() } +} + +/* To be used if we support Float +private fun yuvToRGBA_f4(y: UByte, u: UByte, v: UByte): UByteArray { + val yuv_U_values = floatArrayOf(0f, -0.392f * 0.003921569f, 2.02f * 0.003921569f, 0f) + val yuv_V_values = floatArrayOf(1.603f * 0.003921569f, -0.815f * 0.003921569f, 0f, 0f) + + var color = FloatArray(4) {y.toFloat() * 0.003921569f} + val fU = FloatArray(4) {u.toFloat() - 128f} + val fV = FloatArray(4) {v.toFloat() - 128f} + + color += fU * yuv_U_values; + color += fV * yuv_V_values; + //color = clamp(color, 0.f, 1.f); + return UByteArray(4) { unitFloatClampedToUByte(color[it]) } +} +*/ diff --git a/toolkit/test/TimingTracker.kt b/toolkit/test/TimingTracker.kt new file mode 100644 index 00000000..81e90f23 --- /dev/null +++ b/toolkit/test/TimingTracker.kt @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.testapp + +class TimingTracker( + private val numberOfIterations: Int = 1, + private var numberOfIterationsToIgnore: Int = 0 +) { + init { + require(numberOfIterations > numberOfIterationsToIgnore) + } + private val timings = mutableMapOf<String, IntArray>() + private var currentIteration: Int = 0 + fun nextIteration() { + currentIteration++ + } + fun <T> measure(name: String, workToTime: () -> T): T { + val start = System.nanoTime() + val t = workToTime() + if (currentIteration >= numberOfIterationsToIgnore) { + val end = System.nanoTime() + val deltaInMicroseconds: Int = ((end - start) / 1000).toInt() + val timing = timings.getOrPut(name) { + IntArray(numberOfIterations - numberOfIterationsToIgnore) + } + timing[currentIteration - numberOfIterationsToIgnore] += deltaInMicroseconds + } + return t + } + fun report(): String { + var minimum: Int = Int.MAX_VALUE + for (timing in timings.values) { + val m = timing.minOrNull() + if (m != null && m < minimum) minimum = m + } + + println(timings.map { (name, timing) -> name + ": " + timing.minOrNull() }.joinToString(separator = "\n")) + + return (timings.map { (name, timing) -> name + ": " + timing.joinToString() }.joinToString() + "\n\n" + + timings.map { (name, timing) -> name + ": " + timing.joinToString { "%.2f".format(it.toFloat() / minimum) } }.joinToString() + "\n\n" + + timings.map { (name, timing) -> name + ": " + timing.minOrNull() }.joinToString()) + } +} + diff --git a/toolkit/test/res/drawable-nodpi/img800x450a.jpg b/toolkit/test/res/drawable-nodpi/img800x450a.jpg Binary files differnew file mode 100644 index 00000000..6d5b6236 --- /dev/null +++ b/toolkit/test/res/drawable-nodpi/img800x450a.jpg diff --git a/toolkit/test/res/drawable-nodpi/img800x450b.jpg b/toolkit/test/res/drawable-nodpi/img800x450b.jpg Binary files differnew file mode 100644 index 00000000..2013e07b --- /dev/null +++ b/toolkit/test/res/drawable-nodpi/img800x450b.jpg diff --git a/toolkit/x86.cpp b/toolkit/x86.cpp new file mode 100644 index 00000000..d25c3d7f --- /dev/null +++ b/toolkit/x86.cpp @@ -0,0 +1,1323 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdint.h> +#include <x86intrin.h> + +namespace android { +namespace renderscript { + +/* Unsigned extend packed 8-bit integer (in LBS) into packed 32-bit integer */ +static inline __m128i cvtepu8_epi32(__m128i x) { +#if defined(__SSE4_1__) + return _mm_cvtepu8_epi32(x); +#elif defined(__SSSE3__) + const __m128i M8to32 = _mm_set_epi32(0xffffff03, 0xffffff02, 0xffffff01, 0xffffff00); + x = _mm_shuffle_epi8(x, M8to32); + return x; +#else +# error "Require at least SSSE3" +#endif +} + +static inline __m128i packus_epi32(__m128i lo, __m128i hi) { +#if defined(__SSE4_1__) + return _mm_packus_epi32(lo, hi); +#elif defined(__SSSE3__) + const __m128i C0 = _mm_set_epi32(0x0000, 0x0000, 0x0000, 0x0000); + const __m128i C1 = _mm_set_epi32(0xffff, 0xffff, 0xffff, 0xffff); + const __m128i M32to16L = _mm_set_epi32(0xffffffff, 0xffffffff, 0x0d0c0908, 0x05040100); + const __m128i M32to16H = _mm_set_epi32(0x0d0c0908, 0x05040100, 0xffffffff, 0xffffffff); + lo = _mm_and_si128(lo, _mm_cmpgt_epi32(lo, C0)); + lo = _mm_or_si128(lo, _mm_cmpgt_epi32(lo, C1)); + hi = _mm_and_si128(hi, _mm_cmpgt_epi32(hi, C0)); + hi = _mm_or_si128(hi, _mm_cmpgt_epi32(hi, C1)); + return _mm_or_si128(_mm_shuffle_epi8(lo, M32to16L), + _mm_shuffle_epi8(hi, M32to16H)); +#else +# error "Require at least SSSE3" +#endif +} + +static inline __m128i mullo_epi32(__m128i x, __m128i y) { +#if defined(__SSE4_1__) + return _mm_mullo_epi32(x, y); +#elif defined(__SSSE3__) + const __m128i Meven = _mm_set_epi32(0x00000000, 0xffffffff, 0x00000000, 0xffffffff); + __m128i even = _mm_mul_epu32(x, y); + __m128i odd = _mm_mul_epu32(_mm_srli_si128(x, 4), + _mm_srli_si128(y, 4)); + even = _mm_and_si128(even, Meven); + odd = _mm_and_si128(odd, Meven); + return _mm_or_si128(even, _mm_slli_si128(odd, 4)); +#else +# error "Require at least SSSE3" +#endif +} + +/* 'mask' must packed 8-bit of 0x00 or 0xff */ +static inline __m128i blendv_epi8(__m128i x, __m128i y, __m128i mask) { +#if defined(__SSE4_1__) + return _mm_blendv_epi8(x, y, mask); +#elif defined(__SSSE3__) + return _mm_or_si128(_mm_andnot_si128(mask, x), _mm_and_si128(y, mask)); +#else +# error "Require at least SSSE3" +#endif +} + +extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, + const void *y1, const void *y2, + const short *coef, uint32_t count) { + __m128i x; + __m128i c0, c2, c4, c6, c8; + __m128i r0, r1, r2; + __m128i p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11; + __m128i o0, o1; + uint32_t i; + + x = _mm_loadl_epi64((const __m128i *)(coef+0)); + c0 = _mm_shuffle_epi32(x, 0x00); + c2 = _mm_shuffle_epi32(x, 0x55); + x = _mm_loadl_epi64((const __m128i *)(coef+4)); + c4 = _mm_shuffle_epi32(x, 0x00); + c6 = _mm_shuffle_epi32(x, 0x55); + x = _mm_loadl_epi64((const __m128i *)(coef+8)); + c8 = _mm_shuffle_epi32(x, 0x00); + + for (i = 0; i < count; ++i) { + + p0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y0)), _mm_setzero_si128()); + p1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y0+1)), _mm_setzero_si128()); + p2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y0+2)), _mm_setzero_si128()); + p3 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y0+3)), _mm_setzero_si128()); + p4 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y1)), _mm_setzero_si128()); + p5 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y1+1)), _mm_setzero_si128()); + p6 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y1+2)), _mm_setzero_si128()); + p7 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y1+3)), _mm_setzero_si128()); + p8 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y2)), _mm_setzero_si128()); + p9 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y2+1)), _mm_setzero_si128()); + p10 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y2+2)), _mm_setzero_si128()); + p11 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y2+3)), _mm_setzero_si128()); + + o0 = _mm_madd_epi16(_mm_unpacklo_epi16(p0, p1), c0); + o1 = _mm_madd_epi16(_mm_unpacklo_epi16(p1, p2), c0); + + o0 = _mm_add_epi32(o0, _mm_madd_epi16(_mm_unpacklo_epi16(p2, p4), c2)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16(_mm_unpacklo_epi16(p3, p5), c2)); + + o0 = _mm_add_epi32(o0, _mm_madd_epi16(_mm_unpacklo_epi16(p5, p6), c4)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16(_mm_unpacklo_epi16(p6, p7), c4)); + + o0 = _mm_add_epi32(o0, _mm_madd_epi16(_mm_unpacklo_epi16(p8, p9), c6)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16(_mm_unpacklo_epi16(p9, p10), c6)); + + o0 = _mm_add_epi32(o0, _mm_madd_epi16(_mm_unpacklo_epi16(p10, _mm_setzero_si128()), c8)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16(_mm_unpacklo_epi16(p11, _mm_setzero_si128()), c8)); + + o0 = _mm_srai_epi32(o0, 8); + o1 = _mm_srai_epi32(o1, 8); + + o0 = packus_epi32(o0, o1); + o0 = _mm_packus_epi16(o0, o0); + _mm_storel_epi64((__m128i *)dst, o0); + + y0 = (const char *)y0 + 8; + y1 = (const char *)y1 + 8; + y2 = (const char *)y2 + 8; + dst = (char *)dst + 8; + } +} + +void rsdIntrinsicColorMatrix4x4_K(void *dst, const void *src, + const short *coef, uint32_t count) { + const __m128i T4x4 = _mm_set_epi8(15, 11, 7, 3, + 14, 10, 6, 2, + 13, 9, 5, 1, + 12, 8, 4, 0); + + const __m128i Mxy = _mm_set_epi32(0xff0dff0c, 0xff09ff08, 0xff05ff04, 0xff01ff00); + const __m128i Mzw = _mm_set_epi32(0xff0fff0e, 0xff0bff0a, 0xff07ff06, 0xff03ff02); + __m128i c0, c1, c2, c3; + __m128i i4, o4; + __m128i xy, zw; + __m128i x2, y2, z2, w2; + uint32_t i; + + c0 = _mm_loadl_epi64((const __m128i *)(coef+0)); + c1 = _mm_loadl_epi64((const __m128i *)(coef+4)); + c0 = _mm_unpacklo_epi16(c0, c1); + + c2 = _mm_loadl_epi64((const __m128i *)(coef+8)); + c3 = _mm_loadl_epi64((const __m128i *)(coef+12)); + c2 = _mm_unpacklo_epi16(c2, c3); + + for (i = 0; i < count; ++i) { + i4 = _mm_load_si128((const __m128i *)src); + xy = _mm_shuffle_epi8(i4, Mxy); + zw = _mm_shuffle_epi8(i4, Mzw); + + x2 = _mm_madd_epi16(xy, _mm_shuffle_epi32(c0, 0x00)); + y2 = _mm_madd_epi16(xy, _mm_shuffle_epi32(c0, 0x55)); + z2 = _mm_madd_epi16(xy, _mm_shuffle_epi32(c0, 0xaa)); + w2 = _mm_madd_epi16(xy, _mm_shuffle_epi32(c0, 0xff)); + + x2 = _mm_add_epi32(x2, _mm_madd_epi16(zw, _mm_shuffle_epi32(c2, 0x00))); + y2 = _mm_add_epi32(y2, _mm_madd_epi16(zw, _mm_shuffle_epi32(c2, 0x55))); + z2 = _mm_add_epi32(z2, _mm_madd_epi16(zw, _mm_shuffle_epi32(c2, 0xaa))); + w2 = _mm_add_epi32(w2, _mm_madd_epi16(zw, _mm_shuffle_epi32(c2, 0xff))); + + x2 = _mm_srai_epi32(x2, 8); + y2 = _mm_srai_epi32(y2, 8); + z2 = _mm_srai_epi32(z2, 8); + w2 = _mm_srai_epi32(w2, 8); + + x2 = packus_epi32(x2, y2); + z2 = packus_epi32(z2, w2); + o4 = _mm_packus_epi16(x2, z2); + + o4 = _mm_shuffle_epi8(o4, T4x4); + _mm_storeu_si128((__m128i *)dst, o4); + + src = (const char *)src + 16; + dst = (char *)dst + 16; + } +} + +void rsdIntrinsicColorMatrix3x3_K(void *dst, const void *src, + const short *coef, uint32_t count) { + const __m128i T4x4 = _mm_set_epi8(15, 11, 7, 3, + 14, 10, 6, 2, + 13, 9, 5, 1, + 12, 8, 4, 0); + + const __m128i Mxy = _mm_set_epi32(0xff0dff0c, 0xff09ff08, 0xff05ff04, 0xff01ff00); + const __m128i Mzw = _mm_set_epi32(0xff0fff0e, 0xff0bff0a, 0xff07ff06, 0xff03ff02); + + __m128i c0, c1, c2, c3; + __m128i i4, o4; + __m128i xy, zw; + __m128i x2, y2, z2, w2; + uint32_t i; + + c0 = _mm_loadl_epi64((const __m128i *)(coef+0)); + c1 = _mm_loadl_epi64((const __m128i *)(coef+4)); + c0 = _mm_unpacklo_epi16(c0, c1); + + c2 = _mm_loadl_epi64((const __m128i *)(coef+8)); + c3 = _mm_loadl_epi64((const __m128i *)(coef+12)); + c2 = _mm_unpacklo_epi16(c2, c3); + + for (i = 0; i < count; ++i) { + i4 = _mm_loadu_si128((const __m128i *)src); + xy = _mm_shuffle_epi8(i4, Mxy); + zw = _mm_shuffle_epi8(i4, Mzw); + + x2 = _mm_madd_epi16(xy, _mm_shuffle_epi32(c0, 0x00)); + y2 = _mm_madd_epi16(xy, _mm_shuffle_epi32(c0, 0x55)); + z2 = _mm_madd_epi16(xy, _mm_shuffle_epi32(c0, 0xaa)); + + x2 = _mm_add_epi32(x2, _mm_madd_epi16(zw, _mm_shuffle_epi32(c2, 0x00))); + y2 = _mm_add_epi32(y2, _mm_madd_epi16(zw, _mm_shuffle_epi32(c2, 0x55))); + z2 = _mm_add_epi32(z2, _mm_madd_epi16(zw, _mm_shuffle_epi32(c2, 0xaa))); + + x2 = _mm_srai_epi32(x2, 8); + y2 = _mm_srai_epi32(y2, 8); + z2 = _mm_srai_epi32(z2, 8); + w2 = _mm_srli_epi32(zw, 16); + + x2 = packus_epi32(x2, y2); + z2 = packus_epi32(z2, w2); + o4 = _mm_packus_epi16(x2, z2); + + o4 = _mm_shuffle_epi8(o4, T4x4); + _mm_storeu_si128((__m128i *)dst, o4); + + src = (const char *)src + 16; + dst = (char *)dst + 16; + } +} + +void rsdIntrinsicColorMatrixDot_K(void *dst, const void *src, + const short *coef, uint32_t count) { + const __m128i T4x4 = _mm_set_epi8(15, 11, 7, 3, + 14, 10, 6, 2, + 13, 9, 5, 1, + 12, 8, 4, 0); + const __m128i Mxy = _mm_set_epi32(0xff0dff0c, 0xff09ff08, 0xff05ff04, 0xff01ff00); + const __m128i Mzw = _mm_set_epi32(0xff0fff0e, 0xff0bff0a, 0xff07ff06, 0xff03ff02); + __m128i c0, c1, c2, c3; + __m128i i4, o4; + __m128i xy, zw; + __m128i x2, y2, z2, w2; + uint32_t i; + + c0 = _mm_loadl_epi64((const __m128i *)(coef+0)); + c0 = _mm_shufflelo_epi16(c0, 0); + c1 = _mm_loadl_epi64((const __m128i *)(coef+4)); + c1 = _mm_shufflelo_epi16(c1, 0); + c0 = _mm_unpacklo_epi16(c0, c1); + + c2 = _mm_loadl_epi64((const __m128i *)(coef+8)); + c2 = _mm_shufflelo_epi16(c2, 0); + c3 = _mm_loadl_epi64((const __m128i *)(coef+12)); + c3 = _mm_shufflelo_epi16(c3, 0); + c2 = _mm_unpacklo_epi16(c2, c3); + + for (i = 0; i < count; ++i) { + i4 = _mm_loadu_si128((const __m128i *)src); + + xy = _mm_shuffle_epi8(i4, Mxy); + zw = _mm_shuffle_epi8(i4, Mzw); + + x2 = _mm_madd_epi16(xy, c0); + x2 = _mm_add_epi32(x2, _mm_madd_epi16(zw, c2)); + + x2 = _mm_srai_epi32(x2, 8); + y2 = x2; + z2 = x2; + w2 = _mm_srli_epi32(zw, 16); + + x2 = packus_epi32(x2, y2); + z2 = packus_epi32(z2, w2); + o4 = _mm_packus_epi16(x2, z2); + + o4 = _mm_shuffle_epi8(o4, T4x4); + _mm_storeu_si128((__m128i *)dst, o4); + + src = (const char *)src + 16; + dst = (char *)dst + 16; + } +} + +void rsdIntrinsicBlurVFU4_K(void *dst, + const void *pin, int stride, const void *gptr, + int rct, int x1, int x2) { + const char *pi; + __m128i pi0, pi1; + __m128 pf0, pf1; + __m128 bp0, bp1; + __m128 x; + int r; + + for (; x1 < x2; x1 += 2) { + pi = (const char *)pin + (x1 << 2); + bp0 = _mm_setzero_ps(); + bp1 = _mm_setzero_ps(); + + for (r = 0; r < rct; ++r) { + x = _mm_load_ss((const float *)gptr + r); + x = _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 0, 0, 0)); + + pi0 = _mm_cvtsi32_si128(*(const int *)pi); + pi1 = _mm_cvtsi32_si128(*((const int *)pi + 1)); + + pf0 = _mm_cvtepi32_ps(cvtepu8_epi32(pi0)); + pf1 = _mm_cvtepi32_ps(cvtepu8_epi32(pi1)); + + bp0 = _mm_add_ps(bp0, _mm_mul_ps(pf0, x)); + bp1 = _mm_add_ps(bp1, _mm_mul_ps(pf1, x)); + + pi += stride; + } + + _mm_storeu_ps((float *)dst, bp0); + _mm_storeu_ps((float *)dst + 4, bp1); + dst = (char *)dst + 32; + } +} + +void rsdIntrinsicBlurHFU4_K(void *dst, + const void *pin, const void *gptr, + int rct, int x1, int x2) { + const __m128i Mu8 = _mm_set_epi32(0xffffffff, 0xffffffff, 0xffffffff, 0x0c080400); + const float *pi; + __m128 pf, x, y; + __m128i o; + int r; + + for (; x1 < x2; ++x1) { + /* rct is define as 2*r+1 by the caller */ + x = _mm_load_ss((const float *)gptr); + x = _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 0, 0, 0)); + + pi = (const float *)pin + (x1 << 2); + pf = _mm_mul_ps(x, _mm_load_ps(pi)); + + for (r = 1; r < rct; r += 2) { + x = _mm_load_ss((const float *)gptr + r); + y = _mm_load_ss((const float *)gptr + r + 1); + x = _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 0, 0, 0)); + y = _mm_shuffle_ps(y, y, _MM_SHUFFLE(0, 0, 0, 0)); + + pf = _mm_add_ps(pf, _mm_mul_ps(x, _mm_load_ps(pi + (r << 2)))); + pf = _mm_add_ps(pf, _mm_mul_ps(y, _mm_load_ps(pi + (r << 2) + 4))); + } + + o = _mm_cvtps_epi32(pf); + *(int *)dst = _mm_cvtsi128_si32(_mm_shuffle_epi8(o, Mu8)); + dst = (char *)dst + 4; + } +} + +void rsdIntrinsicBlurHFU1_K(void *dst, + const void *pin, const void *gptr, + int rct, int x1, int x2) { + const __m128i Mu8 = _mm_set_epi32(0xffffffff, 0xffffffff, 0xffffffff, 0x0c080400); + const float *pi; + __m128 pf, g0, g1, g2, g3, gx, p0, p1; + __m128i o; + int r; + + for (; x1 < x2; x1+=4) { + g0 = _mm_load_ss((const float *)gptr); + g0 = _mm_shuffle_ps(g0, g0, _MM_SHUFFLE(0, 0, 0, 0)); + + pi = (const float *)pin + x1; + pf = _mm_mul_ps(g0, _mm_loadu_ps(pi)); + + for (r = 1; r < rct; r += 4) { + gx = _mm_loadu_ps((const float *)gptr + r); + p0 = _mm_loadu_ps(pi + r); + p1 = _mm_loadu_ps(pi + r + 4); + + g0 = _mm_shuffle_ps(gx, gx, _MM_SHUFFLE(0, 0, 0, 0)); + pf = _mm_add_ps(pf, _mm_mul_ps(g0, p0)); + g1 = _mm_shuffle_ps(gx, gx, _MM_SHUFFLE(1, 1, 1, 1)); + pf = _mm_add_ps(pf, _mm_mul_ps(g1, _mm_alignr_epi8(p1, p0, 4))); + g2 = _mm_shuffle_ps(gx, gx, _MM_SHUFFLE(2, 2, 2, 2)); + pf = _mm_add_ps(pf, _mm_mul_ps(g2, _mm_alignr_epi8(p1, p0, 8))); + g3 = _mm_shuffle_ps(gx, gx, _MM_SHUFFLE(3, 3, 3, 3)); + pf = _mm_add_ps(pf, _mm_mul_ps(g3, _mm_alignr_epi8(p1, p0, 12))); + } + + o = _mm_cvtps_epi32(pf); + *(int *)dst = _mm_cvtsi128_si32(_mm_shuffle_epi8(o, Mu8)); + dst = (char *)dst + 4; + } +} + +void rsdIntrinsicYuv_K(void *dst, + const unsigned char *pY, const unsigned char *pUV, + uint32_t count, const short *param) { + __m128i biasY, biasUV; + __m128i c0, c1, c2, c3, c4; + + biasY = _mm_set1_epi32(param[8]); /* 16 */ + biasUV = _mm_set1_epi32(param[16]); /* 128 */ + + c0 = _mm_set1_epi32(param[0]); /* 298 */ + c1 = _mm_set1_epi32(param[1]); /* 409 */ + c2 = _mm_set1_epi32(param[2]); /* -100 */ + c3 = _mm_set1_epi32(param[3]); /* 516 */ + c4 = _mm_set1_epi32(param[4]); /* -208 */ + + __m128i Y, UV, U, V, R, G, B, A; + + A = _mm_set1_epi32(255); + uint32_t i; + + for (i = 0; i < (count << 1); ++i) { + Y = cvtepu8_epi32(_mm_set1_epi32(*(const int *)pY)); + UV = cvtepu8_epi32(_mm_set1_epi32(*(const int *)pUV)); + + Y = _mm_sub_epi32(Y, biasY); + UV = _mm_sub_epi32(UV, biasUV); + + U = _mm_shuffle_epi32(UV, 0xf5); + V = _mm_shuffle_epi32(UV, 0xa0); + + Y = mullo_epi32(Y, c0); + + R = _mm_add_epi32(Y, mullo_epi32(V, c1)); + R = _mm_add_epi32(R, biasUV); + R = _mm_srai_epi32(R, 8); + + G = _mm_add_epi32(Y, mullo_epi32(U, c2)); + G = _mm_add_epi32(G, mullo_epi32(V, c4)); + G = _mm_add_epi32(G, biasUV); + G = _mm_srai_epi32(G, 8); + + B = _mm_add_epi32(Y, mullo_epi32(U, c3)); + B = _mm_add_epi32(B, biasUV); + B = _mm_srai_epi32(B, 8); + + __m128i y1, y2, y3, y4; + + y1 = packus_epi32(R, G); + y2 = packus_epi32(B, A); + y3 = _mm_packus_epi16(y1, y2); + const __m128i T4x4 = _mm_set_epi8(15, 11, 7, 3, + 14, 10, 6, 2, + 13, 9, 5, 1, + 12, 8, 4, 0); + y4 = _mm_shuffle_epi8(y3, T4x4); + _mm_storeu_si128((__m128i *)dst, y4); + pY += 4; + pUV += 4; + dst = (__m128i *)dst + 1; + } +} + +void rsdIntrinsicYuvR_K(void *dst, + const unsigned char *pY, const unsigned char *pUV, + uint32_t count, const short *param) { + __m128i biasY, biasUV; + __m128i c0, c1, c2, c3, c4; + + biasY = _mm_set1_epi32(param[8]); /* 16 */ + biasUV = _mm_set1_epi32(param[16]); /* 128 */ + + c0 = _mm_set1_epi32(param[0]); /* 298 */ + c1 = _mm_set1_epi32(param[1]); /* 409 */ + c2 = _mm_set1_epi32(param[2]); /* -100 */ + c3 = _mm_set1_epi32(param[3]); /* 516 */ + c4 = _mm_set1_epi32(param[4]); /* -208 */ + + __m128i Y, UV, U, V, R, G, B, A; + + A = _mm_set1_epi32(255); + uint32_t i; + + for (i = 0; i < (count << 1); ++i) { + Y = cvtepu8_epi32(_mm_set1_epi32(*(const int *)pY)); + UV = cvtepu8_epi32(_mm_set1_epi32(*(const int *)pUV)); + + Y = _mm_sub_epi32(Y, biasY); + UV = _mm_sub_epi32(UV, biasUV); + + V = _mm_shuffle_epi32(UV, 0xf5); + U = _mm_shuffle_epi32(UV, 0xa0); + + Y = mullo_epi32(Y, c0); + + R = _mm_add_epi32(Y, mullo_epi32(V, c1)); + R = _mm_add_epi32(R, biasUV); + R = _mm_srai_epi32(R, 8); + + G = _mm_add_epi32(Y, mullo_epi32(U, c2)); + G = _mm_add_epi32(G, mullo_epi32(V, c4)); + G = _mm_add_epi32(G, biasUV); + G = _mm_srai_epi32(G, 8); + + B = _mm_add_epi32(Y, mullo_epi32(U, c3)); + B = _mm_add_epi32(B, biasUV); + B = _mm_srai_epi32(B, 8); + + __m128i y1, y2, y3, y4; + + y1 = packus_epi32(R, G); + y2 = packus_epi32(B, A); + y3 = _mm_packus_epi16(y1, y2); + const __m128i T4x4 = _mm_set_epi8(15, 11, 7, 3, + 14, 10, 6, 2, + 13, 9, 5, 1, + 12, 8, 4, 0); + y4 = _mm_shuffle_epi8(y3, T4x4); + _mm_storeu_si128((__m128i *)dst, y4); + pY += 4; + pUV += 4; + dst = (__m128i *)dst + 1; + } +} + +void rsdIntrinsicYuv2_K(void *dst, + const unsigned char *pY, const unsigned char *pU, + const unsigned char *pV, uint32_t count, const short *param) { + __m128i biasY, biasUV; + __m128i c0, c1, c2, c3, c4; + + biasY = _mm_set1_epi32(param[8]); /* 16 */ + biasUV = _mm_set1_epi32(param[16]); /* 128 */ + + c0 = _mm_set1_epi32(param[0]); /* 298 */ + c1 = _mm_set1_epi32(param[1]); /* 409 */ + c2 = _mm_set1_epi32(param[2]); /* -100 */ + c3 = _mm_set1_epi32(param[3]); /* 516 */ + c4 = _mm_set1_epi32(param[4]); /* -208 */ + + __m128i Y, U, V, R, G, B, A; + + A = _mm_set1_epi32(255); + uint32_t i; + + for (i = 0; i < (count << 1); ++i) { + Y = cvtepu8_epi32(_mm_set1_epi32(*(const int *)pY)); + U = cvtepu8_epi32(_mm_set1_epi32(*(const int *)pU)); + V = cvtepu8_epi32(_mm_set1_epi32(*(const int *)pV)); + + Y = _mm_sub_epi32(Y, biasY); + U = _mm_sub_epi32(U, biasUV); + V = _mm_sub_epi32(V, biasUV); + + Y = mullo_epi32(Y, c0); + + R = _mm_add_epi32(Y, mullo_epi32(V, c1)); + R = _mm_add_epi32(R, biasUV); + R = _mm_srai_epi32(R, 8); + + G = _mm_add_epi32(Y, mullo_epi32(U, c2)); + G = _mm_add_epi32(G, mullo_epi32(V, c4)); + G = _mm_add_epi32(G, biasUV); + G = _mm_srai_epi32(G, 8); + + B = _mm_add_epi32(Y, mullo_epi32(U, c3)); + B = _mm_add_epi32(B, biasUV); + B = _mm_srai_epi32(B, 8); + + __m128i y1, y2, y3, y4; + + y1 = packus_epi32(R, G); + y2 = packus_epi32(B, A); + y3 = _mm_packus_epi16(y1, y2); + const __m128i T4x4 = _mm_set_epi8(15, 11, 7, 3, + 14, 10, 6, 2, + 13, 9, 5, 1, + 12, 8, 4, 0); + y4 = _mm_shuffle_epi8(y3, T4x4); + _mm_storeu_si128((__m128i *)dst, y4); + pY += 4; + pU += 4; + pV += 4; + dst = (__m128i *)dst + 1; + } +} + +extern "C" void rsdIntrinsicConvolve5x5_K(void *dst, const void *y0, + const void *y1, const void *y2, + const void *y3, const void *y4, + const short *coef, uint32_t count) { + __m128i x; + __m128i c0, c2, c4, c6, c8, c10, c12; + __m128i c14, c16, c18, c20, c22, c24; + __m128i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; + __m128i p0, p1, p2, p3, p4, p5, p6, p7; + __m128i p8, p9, p10, p11, p12, p13, p14, p15; + __m128i p16, p17, p18, p19, p20, p21, p22, p23; + __m128i p24, p25, p26, p27, p28, p29, p30, p31; + __m128i p32, p33, p34, p35, p36, p37, p38, p39; + __m128i o0, o1, o2, o3; + uint32_t i; + + x = _mm_loadl_epi64((const __m128i *)(coef+0)); + c0 = _mm_shuffle_epi32(x, 0x00); + c2 = _mm_shuffle_epi32(x, 0x55); + + x = _mm_loadl_epi64((const __m128i *)(coef+4)); + c4 = _mm_shuffle_epi32(x, 0x00); + c6 = _mm_shuffle_epi32(x, 0x55); + + x = _mm_loadl_epi64((const __m128i *)(coef+8)); + c8 = _mm_shuffle_epi32(x, 0x00); + c10 = _mm_shuffle_epi32(x, 0x55); + + x = _mm_loadl_epi64((const __m128i *)(coef+12)); + c12 = _mm_shuffle_epi32(x, 0x00); + c14 = _mm_shuffle_epi32(x, 0x55); + + x = _mm_loadl_epi64((const __m128i *)(coef+16)); + c16 = _mm_shuffle_epi32(x, 0x00); + c18 = _mm_shuffle_epi32(x, 0x55); + + x = _mm_loadl_epi64((const __m128i *)(coef+20)); + c20 = _mm_shuffle_epi32(x, 0x00); + c22 = _mm_shuffle_epi32(x, 0x55); + + x = _mm_loadl_epi64((const __m128i *)(coef+24)); + c24 = _mm_shuffle_epi32(x, 0x00); + + for (i = 0; i < count; ++i) { + + p0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int32_t *)y0), _mm_setzero_si128()); + p1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y0+1)), _mm_setzero_si128()); + p2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y0+2)), _mm_setzero_si128()); + p3 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y0+3)), _mm_setzero_si128()); + p4 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y0+4)), _mm_setzero_si128()); + p5 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y0+5)), _mm_setzero_si128()); + p6 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y0+6)), _mm_setzero_si128()); + p7 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y0+7)), _mm_setzero_si128()); + + p8 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y1)), _mm_setzero_si128()); + p9 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y1+1)), _mm_setzero_si128()); + p10 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y1+2)), _mm_setzero_si128()); + p11 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y1+3)), _mm_setzero_si128()); + p12 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y1+4)), _mm_setzero_si128()); + p13 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y1+5)), _mm_setzero_si128()); + p14 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y1+6)), _mm_setzero_si128()); + p15 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y1+7)), _mm_setzero_si128()); + + p16 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y2)), _mm_setzero_si128()); + p17 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y2+1)), _mm_setzero_si128()); + p18 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y2+2)), _mm_setzero_si128()); + p19 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y2+3)), _mm_setzero_si128()); + p20 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y2+4)), _mm_setzero_si128()); + p21 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y2+5)), _mm_setzero_si128()); + p22 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y2+6)), _mm_setzero_si128()); + p23 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y2+7)), _mm_setzero_si128()); + + p24 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y3)), _mm_setzero_si128()); + p25 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y3+1)), _mm_setzero_si128()); + p26 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y3+2)), _mm_setzero_si128()); + p27 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y3+3)), _mm_setzero_si128()); + p28 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y3+4)), _mm_setzero_si128()); + p29 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y3+5)), _mm_setzero_si128()); + p30 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y3+6)), _mm_setzero_si128()); + p31 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y3+7)), _mm_setzero_si128()); + + p32 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y4)), _mm_setzero_si128()); + p33 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y4+1)), _mm_setzero_si128()); + p34 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y4+2)), _mm_setzero_si128()); + p35 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y4+3)), _mm_setzero_si128()); + p36 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y4+4)), _mm_setzero_si128()); + p37 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y4+5)), _mm_setzero_si128()); + p38 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y4+6)), _mm_setzero_si128()); + p39 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*((int32_t *)y4+7)), _mm_setzero_si128()); + + o0 = _mm_madd_epi16( _mm_unpacklo_epi16(p0, p1), c0); + o0 = _mm_add_epi32(o0, _mm_madd_epi16( _mm_unpacklo_epi16(p2, p3), c2)); + o0 = _mm_add_epi32(o0, _mm_madd_epi16( _mm_unpacklo_epi16(p4, p8), c4)); + o0 = _mm_add_epi32(o0, _mm_madd_epi16( _mm_unpacklo_epi16(p9,p10), c6)); + o0 = _mm_add_epi32(o0, _mm_madd_epi16( _mm_unpacklo_epi16(p11, p12), c8)); + o0 = _mm_add_epi32(o0, _mm_madd_epi16( _mm_unpacklo_epi16(p16, p17), c10)); + o0 = _mm_add_epi32(o0, _mm_madd_epi16( _mm_unpacklo_epi16(p18, p19), c12)); + o0 = _mm_add_epi32(o0, _mm_madd_epi16( _mm_unpacklo_epi16(p20, p24), c14)); + o0 = _mm_add_epi32(o0, _mm_madd_epi16( _mm_unpacklo_epi16(p25,p26), c16)); + o0 = _mm_add_epi32(o0, _mm_madd_epi16( _mm_unpacklo_epi16(p27, p28), c18)); + o0 = _mm_add_epi32(o0, _mm_madd_epi16( _mm_unpacklo_epi16(p32, p33), c20)); + o0 = _mm_add_epi32(o0, _mm_madd_epi16( _mm_unpacklo_epi16(p34, p35), c22)); + o0 = _mm_add_epi32(o0, _mm_madd_epi16( _mm_unpacklo_epi16(p36, _mm_setzero_si128()), c24)); + o0 = _mm_srai_epi32(o0, 8); + + o1 = _mm_madd_epi16( _mm_unpacklo_epi16(p1, p2), c0); + o1 = _mm_add_epi32(o1, _mm_madd_epi16( _mm_unpacklo_epi16(p3,p4), c2)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16( _mm_unpacklo_epi16(p5, p9), c4)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16( _mm_unpacklo_epi16(p10,p11), c6)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16( _mm_unpacklo_epi16(p12,p13), c8)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16( _mm_unpacklo_epi16(p17,p18), c10)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16( _mm_unpacklo_epi16(p19,p20), c12)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16( _mm_unpacklo_epi16(p21,p25), c14)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16( _mm_unpacklo_epi16(p26, p27), c16)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16( _mm_unpacklo_epi16(p28, p29), c18)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16( _mm_unpacklo_epi16(p33, p34), c20)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16( _mm_unpacklo_epi16(p35, p36), c22)); + o1 = _mm_add_epi32(o1, _mm_madd_epi16( _mm_unpacklo_epi16(p37, _mm_setzero_si128()), c24)); + o1 = _mm_srai_epi32(o1, 8); + + o2 = _mm_madd_epi16( _mm_unpacklo_epi16(p2,p3), c0); + o2 = _mm_add_epi32(o2, _mm_madd_epi16( _mm_unpacklo_epi16(p4, p5), c2)); + o2 = _mm_add_epi32(o2, _mm_madd_epi16( _mm_unpacklo_epi16(p6, p10), c4)); + o2 = _mm_add_epi32(o2, _mm_madd_epi16( _mm_unpacklo_epi16(p11, p12), c6)); + o2 = _mm_add_epi32(o2, _mm_madd_epi16( _mm_unpacklo_epi16(p13, p14), c8)); + o2 = _mm_add_epi32(o2, _mm_madd_epi16( _mm_unpacklo_epi16(p18, p19), c10)); + o2 = _mm_add_epi32(o2, _mm_madd_epi16( _mm_unpacklo_epi16(p20, p21), c12)); + o2 = _mm_add_epi32(o2, _mm_madd_epi16( _mm_unpacklo_epi16(p22, p26), c14)); + o2 = _mm_add_epi32(o2, _mm_madd_epi16( _mm_unpacklo_epi16(p27, p28), c16)); + o2 = _mm_add_epi32(o2, _mm_madd_epi16( _mm_unpacklo_epi16(p29, p30), c18)); + o2 = _mm_add_epi32(o2, _mm_madd_epi16( _mm_unpacklo_epi16(p34, p35), c20)); + o2 = _mm_add_epi32(o2, _mm_madd_epi16( _mm_unpacklo_epi16(p36, p37), c22)); + o2 = _mm_add_epi32(o2, _mm_madd_epi16( _mm_unpacklo_epi16(p38, _mm_setzero_si128()), c24)); + o2 = _mm_srai_epi32(o2, 8); + + o3 = _mm_madd_epi16( _mm_unpacklo_epi16(p3,p4), c0); + o3 = _mm_add_epi32(o3, _mm_madd_epi16( _mm_unpacklo_epi16(p5, p6), c2)); + o3 = _mm_add_epi32(o3, _mm_madd_epi16( _mm_unpacklo_epi16(p7, p11), c4)); + o3 = _mm_add_epi32(o3, _mm_madd_epi16( _mm_unpacklo_epi16(p12, p13), c6)); + o3 = _mm_add_epi32(o3, _mm_madd_epi16( _mm_unpacklo_epi16(p14, p15), c8)); + o3 = _mm_add_epi32(o3, _mm_madd_epi16( _mm_unpacklo_epi16(p19, p20), c10)); + o3 = _mm_add_epi32(o3, _mm_madd_epi16( _mm_unpacklo_epi16(p21, p22), c12)); + o3 = _mm_add_epi32(o3, _mm_madd_epi16( _mm_unpacklo_epi16(p23, p27), c14)); + o3 = _mm_add_epi32(o3, _mm_madd_epi16( _mm_unpacklo_epi16(p28, p29), c16)); + o3 = _mm_add_epi32(o3, _mm_madd_epi16( _mm_unpacklo_epi16(p30, p31), c18)); + o3 = _mm_add_epi32(o3, _mm_madd_epi16( _mm_unpacklo_epi16(p35, p36), c20)); + o3 = _mm_add_epi32(o3, _mm_madd_epi16( _mm_unpacklo_epi16(p37,p38), c22)); + o3 = _mm_add_epi32(o3, _mm_madd_epi16( _mm_unpacklo_epi16(p39, _mm_setzero_si128()), c24)); + o3 = _mm_srai_epi32(o3, 8); + + o0 = packus_epi32(o0, o1); + o2 = packus_epi32(o2, o3); + o0 = _mm_packus_epi16(o0, o2); + _mm_storeu_si128((__m128i *)dst, o0); + + y0 = (const char *)y0 + 16; + y1 = (const char *)y1 + 16; + y2 = (const char *)y2 + 16; + y3 = (const char *)y3 + 16; + y4 = (const char *)y4 + 16; + dst = (char *)dst + 16; + } +} + +void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8) { + __m128i all1s, ina, ins; + __m128i in0, in1, out0, out1; + __m128i t0, t1, t2, t3; + uint32_t i; + + all1s = _mm_set1_epi16(255); + + for (i = 0; i < count8; ++i) { + in0 = _mm_loadu_si128((const __m128i *)src); + in1 = _mm_loadu_si128((const __m128i *)src + 1); + out0 = _mm_loadu_si128((const __m128i *)dst); + out1 = _mm_loadu_si128((const __m128i *)dst + 1); + + ins = _mm_unpacklo_epi8(in0, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ins, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + t0 = _mm_unpacklo_epi8(out0, _mm_setzero_si128()); + t0 = _mm_mullo_epi16(t0, _mm_sub_epi16(all1s, ina)); + t0 = _mm_srli_epi16(t0, 8); + t0 = _mm_add_epi16(t0, ins); + + ins = _mm_unpackhi_epi8(in0, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ins, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + t1 = _mm_unpackhi_epi8(out0, _mm_setzero_si128()); + t1 = _mm_mullo_epi16(t1, _mm_sub_epi16(all1s, ina)); + t1 = _mm_srli_epi16(t1, 8); + t1 = _mm_add_epi16(t1, ins); + + ins = _mm_unpacklo_epi8(in1, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ins, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + t2 = _mm_unpacklo_epi8(out1, _mm_setzero_si128()); + t2 = _mm_mullo_epi16(t2, _mm_sub_epi16(all1s, ina)); + t2 = _mm_srli_epi16(t2, 8); + t2 = _mm_add_epi16(t2, ins); + + ins = _mm_unpackhi_epi8(in1, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ins, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + t3 = _mm_unpackhi_epi8(out1, _mm_setzero_si128()); + t3 = _mm_mullo_epi16(t3, _mm_sub_epi16(all1s, ina)); + t3 = _mm_srli_epi16(t3, 8); + t3 = _mm_add_epi16(t3, ins); + + t0 = _mm_packus_epi16(t0, t1); + t2 = _mm_packus_epi16(t2, t3); + _mm_storeu_si128((__m128i *)dst, t0); + _mm_storeu_si128((__m128i *)dst + 1, t2); + + src = (const __m128i *)src + 2; + dst = (__m128i *)dst + 2; + } +} + +void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8) { + __m128i all1s, outa, outs; + __m128i in0, in1, out0, out1; + __m128i t0, t1, t2, t3; + uint32_t i; + + all1s = _mm_set1_epi16(255); + + for (i = 0; i < count8; ++i) { + in0 = _mm_loadu_si128((const __m128i *)src); + in1 = _mm_loadu_si128((const __m128i *)src + 1); + out0 = _mm_loadu_si128((const __m128i *)dst); + out1 = _mm_loadu_si128((const __m128i *)dst + 1); + + + outs = _mm_unpacklo_epi8(out0, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outs, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t0 = _mm_unpacklo_epi8(in0, _mm_setzero_si128()); + t0 = _mm_mullo_epi16(t0, _mm_sub_epi16(all1s, outa)); + t0 = _mm_srli_epi16(t0, 8); + t0 = _mm_add_epi16(t0, outs); + + outs = _mm_unpackhi_epi8(out0, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outs, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t1 = _mm_unpackhi_epi8(in0, _mm_setzero_si128()); + t1 = _mm_mullo_epi16(t1, _mm_sub_epi16(all1s, outa)); + t1 = _mm_srli_epi16(t1, 8); + t1 = _mm_add_epi16(t1, outs); + + outs = _mm_unpacklo_epi8(out1, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outs, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t2 = _mm_unpacklo_epi8(in1, _mm_setzero_si128()); + t2 = _mm_mullo_epi16(t2, _mm_sub_epi16(all1s, outa)); + t2 = _mm_srli_epi16(t2, 8); + t2 = _mm_add_epi16(t2, outs); + + outs = _mm_unpackhi_epi8(out1, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outs, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t3 = _mm_unpackhi_epi8(in1, _mm_setzero_si128()); + t3 = _mm_mullo_epi16(t3, _mm_sub_epi16(all1s, outa)); + t3 = _mm_srli_epi16(t3, 8); + t3 = _mm_add_epi16(t3, outs); + + t0 = _mm_packus_epi16(t0, t1); + t2 = _mm_packus_epi16(t2, t3); + _mm_storeu_si128((__m128i *)dst, t0); + _mm_storeu_si128((__m128i *)dst + 1, t2); + + src = (const __m128i *)src + 2; + dst = (__m128i *)dst + 2; + } +} + +void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8) { + __m128i outa; + __m128i in0, in1, out0, out1; + __m128i t0, t1, t2, t3; + uint32_t i; + + for (i = 0; i < count8; ++i) { + in0 = _mm_loadu_si128((const __m128i *)src); + in1 = _mm_loadu_si128((const __m128i *)src + 1); + out0 = _mm_loadu_si128((const __m128i *)dst); + out1 = _mm_loadu_si128((const __m128i *)dst + 1); + + outa = _mm_unpacklo_epi8(out0, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outa, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t0 = _mm_unpacklo_epi8(in0, _mm_setzero_si128()); + t0 = _mm_mullo_epi16(t0, outa); + t0 = _mm_srli_epi16(t0, 8); + + outa = _mm_unpackhi_epi8(out0, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outa, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t1 = _mm_unpackhi_epi8(in0, _mm_setzero_si128()); + t1 = _mm_mullo_epi16(t1, outa); + t1 = _mm_srli_epi16(t1, 8); + + outa = _mm_unpacklo_epi8(out1, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outa, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t2 = _mm_unpacklo_epi8(in1, _mm_setzero_si128()); + t2 = _mm_mullo_epi16(t2, outa); + t2 = _mm_srli_epi16(t2, 8); + + outa = _mm_unpackhi_epi8(out1, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outa, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t3 = _mm_unpackhi_epi8(in1, _mm_setzero_si128()); + t3 = _mm_mullo_epi16(t3, outa); + t3 = _mm_srli_epi16(t3, 8); + + t0 = _mm_packus_epi16(t0, t1); + t2 = _mm_packus_epi16(t2, t3); + _mm_storeu_si128((__m128i *)dst, t0); + _mm_storeu_si128((__m128i *)dst + 1, t2); + + src = (const __m128i *)src + 2; + dst = (__m128i *)dst + 2; + } +} + +void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8) { + __m128i ina; + __m128i in0, in1, out0, out1; + __m128i t0, t1, t2, t3; + uint32_t i; + + for (i = 0; i < count8; ++i) { + in0 = _mm_loadu_si128((const __m128i *)src); + in1 = _mm_loadu_si128((const __m128i *)src + 1); + out0 = _mm_loadu_si128((const __m128i *)dst); + out1 = _mm_loadu_si128((const __m128i *)dst + 1); + + ina = _mm_unpacklo_epi8(in0, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ina, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + t0 = _mm_unpacklo_epi8(out0, _mm_setzero_si128()); + t0 = _mm_mullo_epi16(t0, ina); + t0 = _mm_srli_epi16(t0, 8); + + ina = _mm_unpackhi_epi8(in0, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ina, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + t1 = _mm_unpackhi_epi8(out0, _mm_setzero_si128()); + t1 = _mm_mullo_epi16(t1, ina); + t1 = _mm_srli_epi16(t1, 8); + + ina = _mm_unpacklo_epi8(in1, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ina, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + t2 = _mm_unpacklo_epi8(out1, _mm_setzero_si128()); + t2 = _mm_mullo_epi16(t2, ina); + t2 = _mm_srli_epi16(t2, 8); + + ina = _mm_unpackhi_epi8(in1, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ina, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + t3 = _mm_unpackhi_epi8(out1, _mm_setzero_si128()); + t3 = _mm_mullo_epi16(t3, ina); + t3 = _mm_srli_epi16(t3, 8); + + t0 = _mm_packus_epi16(t0, t1); + t2 = _mm_packus_epi16(t2, t3); + _mm_storeu_si128((__m128i *)dst, t0); + _mm_storeu_si128((__m128i *)dst + 1, t2); + + src = (const __m128i *)src + 2; + dst = (__m128i *)dst + 2; + } +} + +void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8) { + __m128i all1s, outa; + __m128i in0, in1, out0, out1; + __m128i t0, t1, t2, t3; + uint32_t i; + + all1s = _mm_set1_epi16(255); + + for (i = 0; i < count8; ++i) { + in0 = _mm_loadu_si128((const __m128i *)src); + in1 = _mm_loadu_si128((const __m128i *)src + 1); + out0 = _mm_loadu_si128((const __m128i *)dst); + out1 = _mm_loadu_si128((const __m128i *)dst + 1); + + outa = _mm_unpacklo_epi8(out0, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outa, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t0 = _mm_unpacklo_epi8(in0, _mm_setzero_si128()); + t0 = _mm_mullo_epi16(t0, _mm_sub_epi16(all1s, outa)); + t0 = _mm_srli_epi16(t0, 8); + + outa = _mm_unpackhi_epi8(out0, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outa, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t1 = _mm_unpackhi_epi8(in0, _mm_setzero_si128()); + t1 = _mm_mullo_epi16(t1, _mm_sub_epi16(all1s, outa)); + t1 = _mm_srli_epi16(t1, 8); + + outa = _mm_unpacklo_epi8(out1, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outa, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t2 = _mm_unpacklo_epi8(in1, _mm_setzero_si128()); + t2 = _mm_mullo_epi16(t2, _mm_sub_epi16(all1s, outa)); + t2 = _mm_srli_epi16(t2, 8); + + outa = _mm_unpackhi_epi8(out1, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outa, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t3 = _mm_unpackhi_epi8(in1, _mm_setzero_si128()); + t3 = _mm_mullo_epi16(t3, _mm_sub_epi16(all1s, outa)); + t3 = _mm_srli_epi16(t3, 8); + + t0 = _mm_packus_epi16(t0, t1); + t2 = _mm_packus_epi16(t2, t3); + _mm_storeu_si128((__m128i *)dst, t0); + _mm_storeu_si128((__m128i *)dst + 1, t2); + + src = (const __m128i *)src + 2; + dst = (__m128i *)dst + 2; + } +} + +void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8) { + __m128i all1s, ina; + __m128i in0, in1, out0, out1; + __m128i t0, t1, t2, t3; + uint32_t i; + + all1s = _mm_set1_epi16(255); + + for (i = 0; i < count8; ++i) { + in0 = _mm_loadu_si128((const __m128i *)src); + in1 = _mm_loadu_si128((const __m128i *)src + 1); + out0 = _mm_loadu_si128((const __m128i *)dst); + out1 = _mm_loadu_si128((const __m128i *)dst + 1); + + ina = _mm_unpacklo_epi8(in0, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ina, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + t0 = _mm_unpacklo_epi8(out0, _mm_setzero_si128()); + t0 = _mm_mullo_epi16(t0, _mm_sub_epi16(all1s, ina)); + t0 = _mm_srli_epi16(t0, 8); + + ina = _mm_unpackhi_epi8(in0, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ina, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + t1 = _mm_unpackhi_epi8(out0, _mm_setzero_si128()); + t1 = _mm_mullo_epi16(t1, _mm_sub_epi16(all1s, ina)); + t1 = _mm_srli_epi16(t1, 8); + + ina = _mm_unpacklo_epi8(in1, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ina, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + t2 = _mm_unpacklo_epi8(out1, _mm_setzero_si128()); + t2 = _mm_mullo_epi16(t2, _mm_sub_epi16(all1s, ina)); + t2 = _mm_srli_epi16(t2, 8); + + ina = _mm_unpackhi_epi8(in1, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ina, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + t3 = _mm_unpackhi_epi8(out1, _mm_setzero_si128()); + t3 = _mm_mullo_epi16(t3, _mm_sub_epi16(all1s, ina)); + t3 = _mm_srli_epi16(t3, 8); + + t0 = _mm_packus_epi16(t0, t1); + t2 = _mm_packus_epi16(t2, t3); + _mm_storeu_si128((__m128i *)dst, t0); + _mm_storeu_si128((__m128i *)dst + 1, t2); + + src = (const __m128i *)src + 2; + dst = (__m128i *)dst + 2; + } +} + +void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8) { + const __m128i M0001 = _mm_set_epi32(0xff000000, 0xff000000, 0xff000000, 0xff000000); + __m128i all1s, ina, outa, ins, outs; + __m128i in0, in1, out0, out1; + __m128i t0, t1, t2, t3; + uint32_t i; + + all1s = _mm_set1_epi16(255); + + for (i = 0; i < count8; ++i) { + in0 = _mm_loadu_si128((const __m128i *)src); + in1 = _mm_loadu_si128((const __m128i *)src + 1); + out0 = _mm_loadu_si128((const __m128i *)dst); + out1 = _mm_loadu_si128((const __m128i *)dst + 1); + + ins = _mm_unpacklo_epi8(in0, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ins, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + outs = _mm_unpacklo_epi8(out0, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outs, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t0 = _mm_sub_epi16(all1s, ina); + t0 = _mm_mullo_epi16(t0, outs); + t0 = _mm_adds_epu16(t0, _mm_mullo_epi16(outa, ins)); + t0 = _mm_srli_epi16(t0, 8); + + ins = _mm_unpackhi_epi8(in0, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ins, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + outs = _mm_unpackhi_epi8(out0, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outs, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t1 = _mm_sub_epi16(all1s, ina); + t1 = _mm_mullo_epi16(t1, outs); + t1 = _mm_adds_epu16(t1, _mm_mullo_epi16(outa, ins)); + t1 = _mm_srli_epi16(t1, 8); + + ins = _mm_unpacklo_epi8(in1, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ins, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + outs = _mm_unpacklo_epi8(out1, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outs, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t2 = _mm_sub_epi16(all1s, ina); + t2 = _mm_mullo_epi16(t2, outs); + t2 = _mm_adds_epu16(t2, _mm_mullo_epi16(outa, ins)); + t2 = _mm_srli_epi16(t2, 8); + + ins = _mm_unpackhi_epi8(in1, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ins, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + outs = _mm_unpackhi_epi8(out1, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outs, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t3 = _mm_sub_epi16(all1s, ina); + t3 = _mm_mullo_epi16(t3, outs); + t3 = _mm_adds_epu16(t3, _mm_mullo_epi16(outa, ins)); + t3 = _mm_srli_epi16(t3, 8); + + t0 = _mm_packus_epi16(t0, t1); + t0 = blendv_epi8(t0, out0, M0001); + t2 = _mm_packus_epi16(t2, t3); + t2 = blendv_epi8(t2, out1, M0001); + _mm_storeu_si128((__m128i *)dst, t0); + _mm_storeu_si128((__m128i *)dst + 1, t2); + + src = (const __m128i *)src + 2; + dst = (__m128i *)dst + 2; + } +} + +void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8) { + const __m128i M0001 = _mm_set_epi32(0xff000000, 0xff000000, 0xff000000, 0xff000000); + __m128i all1s, ina, ins, outa, outs; + __m128i in0, in1, out0, out1; + __m128i t0, t1, t2, t3; + uint32_t i; + + all1s = _mm_set1_epi16(255); + + for (i = 0; i < count8; ++i) { + in0 = _mm_loadu_si128((const __m128i *)src); + in1 = _mm_loadu_si128((const __m128i *)src + 1); + out0 = _mm_loadu_si128((const __m128i *)dst); + out1 = _mm_loadu_si128((const __m128i *)dst + 1); + + ins = _mm_unpacklo_epi8(in0, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ins, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + outs = _mm_unpacklo_epi8(out0, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outs, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t0 = _mm_sub_epi16(all1s, outa); + t0 = _mm_mullo_epi16(t0, ins); + t0 = _mm_adds_epu16(t0, _mm_mullo_epi16(ina, outs)); + t0 = _mm_srli_epi16(t0, 8); + + ins = _mm_unpackhi_epi8(in0, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ins, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + outs = _mm_unpackhi_epi8(out0, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outs, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t1 = _mm_sub_epi16(all1s, outa); + t1 = _mm_mullo_epi16(t1, ins); + t1 = _mm_adds_epu16(t1, _mm_mullo_epi16(ina, outs)); + t1 = _mm_srli_epi16(t1, 8); + + ins = _mm_unpacklo_epi8(in1, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ins, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + outs = _mm_unpacklo_epi8(out1, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outs, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t2 = _mm_sub_epi16(all1s, outa); + t2 = _mm_mullo_epi16(t2, ins); + t2 = _mm_adds_epu16(t2, _mm_mullo_epi16(ina, outs)); + t2 = _mm_srli_epi16(t2, 8); + + ins = _mm_unpackhi_epi8(in1, _mm_setzero_si128()); + ina = _mm_shufflelo_epi16(ins, 0xFF); + ina = _mm_shufflehi_epi16(ina, 0xFF); + outs = _mm_unpackhi_epi8(out1, _mm_setzero_si128()); + outa = _mm_shufflelo_epi16(outs, 0xFF); + outa = _mm_shufflehi_epi16(outa, 0xFF); + t3 = _mm_sub_epi16(all1s, outa); + t3 = _mm_mullo_epi16(t3, ins); + t3 = _mm_adds_epu16(t3, _mm_mullo_epi16(ina, outs)); + t3 = _mm_srli_epi16(t3, 8); + + t0 = _mm_packus_epi16(t0, t1); + t0 = blendv_epi8(t0, in0, M0001); + t2 = _mm_packus_epi16(t2, t3); + t2 = blendv_epi8(t2, in1, M0001); + _mm_storeu_si128((__m128i *)dst, t0); + _mm_storeu_si128((__m128i *)dst + 1, t2); + + src = (const __m128i *)src + 2; + dst = (__m128i *)dst + 2; + } +} + +void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8) { + __m128i in0, in1, out0, out1; + uint32_t i; + + for (i = 0; i < count8; ++i) { + in0 = _mm_loadu_si128((const __m128i *)src); + in1 = _mm_loadu_si128((const __m128i *)src + 1); + out0 = _mm_loadu_si128((const __m128i *)dst); + out1 = _mm_loadu_si128((const __m128i *)dst + 1); + + out0 = _mm_xor_si128(out0, in0); + out1 = _mm_xor_si128(out1, in1); + + _mm_storeu_si128((__m128i *)dst, out0); + _mm_storeu_si128((__m128i *)dst + 1, out1); + + src = (const __m128i *)src + 2; + dst = (__m128i *)dst + 2; + } +} + +void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8) { + __m128i in0, in1, out0, out1; + __m128i t0, t1, t2, t3; + uint32_t i; + + for (i = 0; i < count8; ++i) { + in0 = _mm_loadu_si128((const __m128i *)src); + in1 = _mm_loadu_si128((const __m128i *)src + 1); + out0 = _mm_loadu_si128((const __m128i *)dst); + out1 = _mm_loadu_si128((const __m128i *)dst + 1); + + t0 = _mm_unpacklo_epi8(in0, _mm_setzero_si128()); + t0 = _mm_mullo_epi16(t0, _mm_unpacklo_epi8(out0, _mm_setzero_si128())); + t0 = _mm_srli_epi16(t0, 8); + + t1 = _mm_unpackhi_epi8(in0, _mm_setzero_si128()); + t1 = _mm_mullo_epi16(t1, _mm_unpackhi_epi8(out0, _mm_setzero_si128())); + t1 = _mm_srli_epi16(t1, 8); + + t2 = _mm_unpacklo_epi8(in1, _mm_setzero_si128()); + t2 = _mm_mullo_epi16(t2, _mm_unpacklo_epi8(out1, _mm_setzero_si128())); + t2 = _mm_srli_epi16(t2, 8); + + t3 = _mm_unpackhi_epi8(in1, _mm_setzero_si128()); + t3 = _mm_mullo_epi16(t3, _mm_unpackhi_epi8(out1, _mm_setzero_si128())); + t3 = _mm_srli_epi16(t3, 8); + + t0 = _mm_packus_epi16(t0, t1); + t2 = _mm_packus_epi16(t2, t3); + _mm_storeu_si128((__m128i *)dst, t0); + _mm_storeu_si128((__m128i *)dst + 1, t2); + + src = (const __m128i *)src + 2; + dst = (__m128i *)dst + 2; + } +} + +void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8) { + __m128i in0, in1, out0, out1; + uint32_t i; + + for (i = 0; i < count8; ++i) { + in0 = _mm_loadu_si128((const __m128i *)src); + in1 = _mm_loadu_si128((const __m128i *)src + 1); + out0 = _mm_loadu_si128((const __m128i *)dst); + out1 = _mm_loadu_si128((const __m128i *)dst + 1); + + out0 = _mm_adds_epu8(out0, in0); + out1 = _mm_adds_epu8(out1, in1); + + _mm_storeu_si128((__m128i *)dst, out0); + _mm_storeu_si128((__m128i *)dst + 1, out1); + + src = (const __m128i *)src + 2; + dst = (__m128i *)dst + 2; + } +} + +void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8) { + __m128i in0, in1, out0, out1; + uint32_t i; + + for (i = 0; i < count8; ++i) { + in0 = _mm_loadu_si128((const __m128i *)src); + in1 = _mm_loadu_si128((const __m128i *)src + 1); + out0 = _mm_loadu_si128((const __m128i *)dst); + out1 = _mm_loadu_si128((const __m128i *)dst + 1); + + out0 = _mm_subs_epu8(out0, in0); + out1 = _mm_subs_epu8(out1, in1); + + _mm_storeu_si128((__m128i *)dst, out0); + _mm_storeu_si128((__m128i *)dst + 1, out1); + + src = (const __m128i *)src + 2; + dst = (__m128i *)dst + 2; + } +} + +} // namespace android +} // namespace renderscript |