diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2024-05-10 15:43:44 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2024-05-10 15:43:44 +0000 |
commit | f3de6d992cb37d6ff14a1907d544d5e9121ddf0e (patch) | |
tree | 6b5bf6fe73d6621b9e2f398896b213e793adf226 | |
parent | 06dad597df8cbe3970c5d588545c8102cdc738d8 (diff) | |
parent | 3fd49b0c0bc8410604521d097e01408b351736e7 (diff) | |
download | scudo-busytown-mac-infra-release.tar.gz |
Snap for 11819167 from 3fd49b0c0bc8410604521d097e01408b351736e7 to busytown-mac-infra-releasebusytown-mac-infra-release
Change-Id: I51bc6619147639898f6888a56430bf82da74ee6d
95 files changed, 7882 insertions, 2686 deletions
diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000000..9b3aa8b7213 --- /dev/null +++ b/.clang-format @@ -0,0 +1 @@ +BasedOnStyle: LLVM diff --git a/Android.bp b/Android.bp index 5395d48ccba..88435b917bc 100644 --- a/Android.bp +++ b/Android.bp @@ -48,6 +48,7 @@ license { cc_library_headers { name: "scudo_headers", + ramdisk_available: true, recovery_available: true, vendor_ramdisk_available: true, @@ -57,6 +58,7 @@ cc_library_headers { apex_available: [ "com.android.runtime", + "//apex_available:platform", ], visibility: [ @@ -65,7 +67,26 @@ cc_library_headers { } cc_defaults { + name: "scudo_config_defaults", + cflags: [ + // Use a custom Android configuration. + "-DSCUDO_USE_CUSTOM_CONFIG", + ], + + include_dirs: [ + "external/scudo/config", + ], + + product_variables: { + malloc_low_memory: { + cflags: ["-DSCUDO_LOW_MEMORY"], + }, + }, +} + +cc_defaults { name: "libscudo_defaults", + defaults: ["scudo_config_defaults"], native_coverage: false, ramdisk_available: true, vendor_ramdisk_available: true, @@ -81,12 +102,12 @@ cc_defaults { "-fno-rtti", // This option speeds up alloc/free code paths by about 5% to 7%. "-fno-stack-protector", - "-fno-emulated-tls", "-Wall", "-Wextra", "-Wunused", "-Wno-unused-result", + "-Wconversion", "-Werror=pointer-to-int-cast", "-Werror=int-to-pointer-cast", @@ -115,13 +136,17 @@ cc_defaults { srcs: [ "standalone/checksum.cpp", "standalone/common.cpp", + "standalone/condition_variable_linux.cpp", "standalone/flags.cpp", "standalone/flags_parser.cpp", "standalone/linux.cpp", + "standalone/mem_map.cpp", + "standalone/mem_map_linux.cpp", "standalone/release.cpp", "standalone/report.cpp", - "standalone/rss_limit_checker.cpp", + "standalone/report_linux.cpp", "standalone/string_utils.cpp", + "standalone/timing.cpp", "standalone/wrappers_c_bionic.cpp" ], arch: { @@ -171,9 +196,12 @@ cc_library_static { ], visibility: [ "//bionic:__subpackages__", - "//frameworks/libs/native_bridge_support/libc:__subpackages__", + "//frameworks/libs/native_bridge_support/android_api/libc:__subpackages__", "//system/core/debuggerd:__subpackages__", ], + apex_available: [ + "com.android.runtime", + ], } cc_library_static { @@ -186,15 +214,22 @@ cc_library_static { cc_defaults { name: "scudo_unit_tests_default", + defaults: ["scudo_config_defaults"], + isolated: true, static_libs: ["libscudo_for_testing"], include_dirs: [ "external/scudo/standalone", "external/scudo/standalone/include", ], cflags: [ - "-Wno-unused-parameter", - "-fno-emulated-tls", + "-Wconversion", + // In memtag_test.cpp, some tests are disabled by GTEST_SKIP() so that + // they won't be run. However, for those disabled tests, it may contain + // unreachable code paths which will mislead some compiler checks. Given + // this flag won't be impacted too much, disable it only in the test. + "-Wno-unreachable-code-loop-increment", "-DSCUDO_DEBUG", + "-DSCUDO_NO_TEST_MAIN", ], target: { bionic: { @@ -203,45 +238,54 @@ cc_defaults { }, test_suites: ["general-tests"], bootstrap: true, + srcs: [ + "standalone/tests/scudo_unit_test_main.cpp", + ], } cc_test { name: "scudo_unit_tests", defaults: ["scudo_unit_tests_default"], - // Temporarily disabled on host due to a 15-20s per-test timeout, - // which is currently exceeded by ScudoCombinedTest.BasicCombined. - host_supported: false, + host_supported: true, srcs: [ + "standalone/tests/allocator_config_test.cpp", "standalone/tests/atomic_test.cpp", "standalone/tests/bytemap_test.cpp", "standalone/tests/checksum_test.cpp", "standalone/tests/chunk_test.cpp", "standalone/tests/combined_test.cpp", + "standalone/tests/condition_variable_test.cpp", "standalone/tests/flags_test.cpp", "standalone/tests/list_test.cpp", "standalone/tests/map_test.cpp", + "standalone/tests/memtag_test.cpp", "standalone/tests/mutex_test.cpp", "standalone/tests/primary_test.cpp", "standalone/tests/quarantine_test.cpp", "standalone/tests/release_test.cpp", "standalone/tests/report_test.cpp", - "standalone/tests/scudo_unit_test_main.cpp", "standalone/tests/secondary_test.cpp", "standalone/tests/size_class_map_test.cpp", "standalone/tests/stats_test.cpp", "standalone/tests/strings_test.cpp", + "standalone/tests/timing_test.cpp", "standalone/tests/tsd_test.cpp", "standalone/tests/vector_test.cpp", ], } cc_test { - name: "scudo_hooks_unit_tests", + name: "scudo_wrappers_unit_tests", defaults: ["scudo_unit_tests_default"], - host_supported: true, + // These are wrapper tests, disable the host tests since they would run + // against glibc. + host_supported: false, + cflags: [ + "-Wno-mismatched-new-delete", + ], srcs: [ - "standalone/tests/scudo_hooks_test.cpp", - "standalone/tests/scudo_unit_test_main.cpp", + "standalone/tests/wrappers_c_test.cpp", + "standalone/tests/wrappers_cpp_test.cpp", ], } @@ -262,3 +306,73 @@ cc_fuzz { componentid: 87896 }, } + +cc_test { + name: "size_map_verify_unit_tests", + host_supported: true, + static_libs: ["libscudo"], + + include_dirs: [ + "external/scudo/android/tools", + "external/scudo/standalone", + "external/scudo/standalone/include", + "external/scudo/standalone/tools", + ], + srcs: [ + "android/tests/size_map_verify_unit_tests.cpp", + ], + +} + +cc_binary { + name: "size_map_gen", + defaults: ["scudo_config_defaults"], + host_supported: true, + static_libs: ["libscudo"], + include_dirs: [ + "external/scudo/android/tools", + "external/scudo/standalone", + "external/scudo/standalone/include", + ], + srcs: ["android/tools/size_map_gen.cpp"], +} + +// The targets below verify that all configuration is set up properly for +// the library or tests. +cc_defaults { + name: "scudo_verify_defaults", + host_supported: true, + srcs: ["config/config_build_check.cpp"], + + include_dirs: [ + "external/scudo/standalone", + ], + + product_variables: { + malloc_low_memory: { + cflags: ["-DSCUDO_LOW_MEMORY_CHECK"], + }, + }, +} + +cc_library { + name: "libscudo_verify_config", + stl: "libc++", + defaults: [ + "scudo_verify_defaults", + "libscudo_defaults", + ], + target: { + bionic: { + system_shared_libs: ["libc"], + }, + }, +} + +cc_test { + name: "scudo_verify_config", + defaults: [ + "scudo_verify_defaults", + "scudo_unit_tests_default", + ], +} diff --git a/PREUPLOAD.cfg b/PREUPLOAD.cfg new file mode 100644 index 00000000000..dcf92be1eb8 --- /dev/null +++ b/PREUPLOAD.cfg @@ -0,0 +1,8 @@ +[Builtin Hooks] +clang_format = true + +[Builtin Hooks Options] +clang_format = --commit ${PREUPLOAD_COMMIT} --style file --extensions c,h,cc,cpp + +[Hook Scripts] +aosp_hook = ${REPO_ROOT}/frameworks/base/tools/aosp/aosp_sha.sh ${PREUPLOAD_COMMIT} "." diff --git a/TEST_MAPPING b/TEST_MAPPING index 32f13f0954f..5de14076e5e 100644 --- a/TEST_MAPPING +++ b/TEST_MAPPING @@ -4,7 +4,7 @@ "name": "scudo_unit_tests" }, { - "name": "scudo_hooks_unit_tests" + "name": "scudo_wrappers_unit_tests" }, { "name": "memunreachable_unit_test" diff --git a/android/tests/size_map_verify_unit_tests.cpp b/android/tests/size_map_verify_unit_tests.cpp new file mode 100644 index 00000000000..5aac2c7b900 --- /dev/null +++ b/android/tests/size_map_verify_unit_tests.cpp @@ -0,0 +1,289 @@ +//===-- size_map_verify_unit_tests.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "libsize_map_verify.h" +#include "tests/scudo_unit_test.h" + +namespace scudo { + +class SmokeConfigTest { + // This test is the base test config. +public: + static constexpr u32 Classes[] = { + 32, 48, 64, 80, 96, 112, 144, 176, 192, 224, 288, + 352, 448, 592, 800, 1104, 1648, 2096, 2576, 3120, 4112, 4624, + 7120, 8720, 11664, 14224, 16400, 18448, 23056, 29456, 33296, 65552, + }; + static const u32 MinSizeLog = 4; + static const u32 MidSizeLog = 6; + static const u32 MaxSizeLog = 16; + static const u32 NumBits = 7; + + static const u32 SizeDelta = 16; + + static const u32 MaxNumCachedHint = 13; + static const u32 MaxBytesCachedLog = 13; +}; +TEST(ScudoToolSizeMapVerifyTest, generate_smoke_config) { + std::string NumBitsMessage; + EXPECT_TRUE(generateNumBits<SmokeConfigTest>(NumBitsMessage)); + EXPECT_EQ("NumBits = 7\n", NumBitsMessage); +} +TEST(ScudoToolSizeMapVerifyTest, verify_smoke_config) { + std::string verifySizeMessage; + EXPECT_TRUE(verifySizeClass<SmokeConfigTest>(verifySizeMessage)); + EXPECT_EQ("MidSizeLog non-szTable formula is used until: 80\n", + verifySizeMessage); +} +class SizeIncreaseConfigPass { + // Test shows that when every size and Min/Mid/Max changes that + // NumBits remains the same. + // Demonstrating NumBits changes based on how close the sizes + // are to each other. +public: + static constexpr u32 Classes[] = { + 80, 144, 208, 272, 336, 400, 528, 656, + 720, 848, 1104, 1360, 1744, 2320, 3152, 4368, + 6544, 8336, 10256, 12432, 16400, 18448, 28432, 34832, + 46608, 56848, 65552, 73744, 92176, 117776, 133136, 262160, + }; + static const u32 MinSizeLog = 6; + static const u32 MidSizeLog = 8; + static const u32 MaxSizeLog = 18; + static const u32 NumBits = 7; + + static const u32 SizeDelta = 16; + + static const u32 MaxNumCachedHint = 13; + static const u32 MaxBytesCachedLog = 13; +}; +TEST(ScudoToolSizeMapVerifyTest, generate_size_increase_config) { + std::string NumBitsMessage; + EXPECT_TRUE(generateNumBits<SizeIncreaseConfigPass>(NumBitsMessage)); + EXPECT_EQ("NumBits = 7\n", NumBitsMessage); +} +TEST(ScudoToolSizeMapVerifyTest, verify_size_increase_config) { + std::string verifySizeMessage; + EXPECT_TRUE(verifySizeClass<SizeIncreaseConfigPass>(verifySizeMessage)); + EXPECT_EQ("MidSizeLog non-szTable formula is used until: 272\n", + verifySizeMessage); +} +class MaxSizeConfigPass { + // This config uses the largest sizes permitted in size_class_map + // showing that NumBits does not need to increase due to sizes being + // too large and also shows the limit for MaxSizeLog. + // Primary allocator works up to 524304. +public: + static constexpr u32 Classes[] = { + 144, 272, 400, 528, 656, 784, 1040, 1296, + 1424, 1680, 2192, 2704, 3472, 4624, 6288, 8720, + 13072, 16656, 20496, 24848, 32784, 36880, 56848, 69648, + 93200, 113680, 131088, 147472, 184336, 235536, 266256, 524304, + }; + static const u32 MinSizeLog = 7; + static const u32 MidSizeLog = 9; + static const u32 MaxSizeLog = 19; + static const u32 NumBits = 7; + + static const u32 SizeDelta = 16; + + static const u32 MaxNumCachedHint = 13; + static const u32 MaxBytesCachedLog = 13; +}; +TEST(ScudoToolSizeMapVerifyTest, generate_max_size_config) { + std::string NumBitsMessage; + EXPECT_TRUE(generateNumBits<MaxSizeConfigPass>(NumBitsMessage)); + EXPECT_EQ("NumBits = 7\n", NumBitsMessage); +} +TEST(ScudoToolSizeMapVerifyTest, verify_max_size_config) { + std::string verifySizeMessage; + EXPECT_TRUE(verifySizeClass<MaxSizeConfigPass>(verifySizeMessage)); + EXPECT_EQ("MidSizeLog non-szTable formula is used until: 528\n", + verifySizeMessage); +} +class SizeDecreaseConfigFail { + // The NumBits decreasing causes a failure: + // NumBits not large enough to notice bit difference between numbers. + // NumBits cannot be increased due to MidSizeLog - 1 being the limit. +public: + static constexpr u32 Classes[] = { + 24, 32, 40, 48, 56, 64, 80, 96, 104, 120, 152, + 184, 232, 304, 408, 560, 832, 1056, 1296, 1568, 2064, 2320, + 3568, 4368, 5840, 7120, 8208, 9232, 11536, 14736, 16656, 32784, + }; + static const u32 MinSizeLog = 3; + static const u32 MidSizeLog = 5; + static const u32 MaxSizeLog = 15; + static const u32 NumBits = 6; + + static const u32 SizeDelta = 16; + + static const u32 MaxNumCachedHint = 13; + static const u32 MaxBytesCachedLog = 13; +}; +TEST(ScudoToolSizeMapVerifyTest, generate_size_decrease_config) { + std::string NumBitsMessage; + EXPECT_FALSE(generateNumBits<SizeDecreaseConfigFail>(NumBitsMessage)); + EXPECT_NE("NumBits = 7\n", NumBitsMessage); +} +TEST(ScudoToolSizeMapVerifyTest, verify_size_decrease_config) { + std::string verifySizeMessage; + EXPECT_FALSE(verifySizeClass<SizeDecreaseConfigFail>(verifySizeMessage)); + EXPECT_EQ( + "MidSizeLog non-szTable formula is used until: 48\n\nNumBits not " + "large enough to distinguish between values. \nHard max NumBits - 1 " + "cannot exceed MidSizeLog.\nIf NumBits is at max then increase " + "Min/Mid/Max sizelogs and increase the sizes accordingly.\n\n\n", + verifySizeMessage); +} +class MidSizeLog10ConfigPass { + // Expands the use of the non-table formula to 1040. + // Shows how to expand the non-table formula by increasing MidSizeLog and + // by ensuring an equal step between sizes up to MidSizeLog. + // Shows the tool's ability to use a larger MidSizeLog and a smaller szTable. + // Demonstrates how many sizes are needed to increase the MidSizeLog. +public: + static constexpr u32 Classes[] = { + 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, + 208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, + 384, 400, 416, 432, 448, 464, 480, 496, 512, 528, 544, + 560, 576, 592, 608, 624, 640, 656, 672, 688, 704, 720, + 736, 752, 768, 784, 800, 816, 832, 848, 864, 880, 896, + 912, 928, 944, 960, 976, 992, 1008, 1024, 1040, 1104, 1648, + 2096, 2576, 3120, 4112, 4624, 7120, 8720, 11664, 14224, 16400, 18448, + 23056, 29456, 33296, 65552, + }; + static const u32 MinSizeLog = 4; + static const u32 MidSizeLog = 10; + static const u32 MaxSizeLog = 16; + static const u32 NumBits = 7; + + static const u32 SizeDelta = 16; + + static const u32 MaxNumCachedHint = 13; + static const u32 MaxBytesCachedLog = 13; +}; +TEST(ScudoToolSizeMapVerifyTest, generate_midsizelog_10_config) { + std::string NumBitsMessage; + EXPECT_TRUE(generateNumBits<MidSizeLog10ConfigPass>(NumBitsMessage)); + EXPECT_EQ("NumBits = 7\n", NumBitsMessage); +} +TEST(ScudoToolSizeMapVerifyTest, verify_midsizelog_10_config) { + std::string verifySizeMessage; + EXPECT_TRUE(verifySizeClass<MidSizeLog10ConfigPass>(verifySizeMessage)); + EXPECT_EQ("MidSizeLog non-szTable formula is used until: 1040\n", + verifySizeMessage); +} +class NumBitsIncreaseConfigPass { + // Demonstrates when to increase NumBits and how to do it. + // Ensure NumBits - 1 <= MidSizeLog, with an equal step until MidSizeLog. + // Increasing NumBits allows more bits to be checked when analyzing sizes + // NumBits 8 checks 7 bits from the most-significant-bit-index. + // Here NumBits 8 is needed for the sizes 288 and 290. + // Shows NumBits increases szTable's flexibility for new sizes. + // Another condition to remember: + // Sizes cannot be just 1 larger than previous size. +public: + static constexpr u32 Classes[] = { + 32, 48, 64, 80, 96, 112, 128, 144, 176, + 192, 224, 288, 290, 352, 448, 592, 800, 1104, + 1648, 2096, 2576, 3120, 4112, 4624, 7120, 8720, 11664, + 14224, 16400, 18448, 23056, 29456, 33296, 65552, + }; + static const u32 MinSizeLog = 4; + static const u32 MidSizeLog = 7; + static const u32 MaxSizeLog = 16; + static const u32 NumBits = 8; + + static const u32 SizeDelta = 16; + + static const u32 MaxNumCachedHint = 13; + static const u32 MaxBytesCachedLog = 13; +}; +TEST(ScudoToolSizeMapVerifyTest, generate_numbits_increase_config) { + std::string NumBitsMessage; + EXPECT_TRUE(generateNumBits<NumBitsIncreaseConfigPass>(NumBitsMessage)); + EXPECT_EQ("NumBits = 8\n", NumBitsMessage); +} +TEST(ScudoToolSizeMapVerifyTest, verify_numbits_increase_config) { + std::string verifySizeMessage; + EXPECT_TRUE(verifySizeClass<NumBitsIncreaseConfigPass>(verifySizeMessage)); + EXPECT_EQ("MidSizeLog non-szTable formula is used until: 144\n", + verifySizeMessage); +} +class MidEqualMaxConfigPass { + // The equality of MidSizeLog and MaxSizeLog shows how the szTable + // does not need to be used, which makes NumBits obselete. + // The test shows that the formula can be used for every size. +public: + static constexpr u32 Classes[] = { + 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, + 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, + 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608, 624, 640, + 656, 672, 688, 704, 720, 736, 752, 768, 784, 800, 816, 832, 848, + 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008, 1024, 1040, + }; + static const u32 MinSizeLog = 4; + static const u32 MidSizeLog = 10; + static const u32 MaxSizeLog = 10; + static const u32 NumBits = 7; + + static const u32 SizeDelta = 16; + + static const u32 MaxNumCachedHint = 13; + static const u32 MaxBytesCachedLog = 13; +}; +TEST(ScudoToolSizeMapVerifyTest, generate_mid_equal_max_config) { + std::string NumBitsMessage; + EXPECT_TRUE(generateNumBits<MidEqualMaxConfigPass>(NumBitsMessage)); + EXPECT_EQ( + "MidSizeLog = MaxSizeLog, NumBits not used for these sizes. Only uses " + "the formula without szTable.\n", + NumBitsMessage); +} +TEST(ScudoToolSizeMapVerifyTest, verify_mid_equal_max_config) { + std::string verifySizeMessage; + EXPECT_TRUE(verifySizeClass<MidEqualMaxConfigPass>(verifySizeMessage)); + EXPECT_EQ("MidSizeLog non-szTable formula is used until: 1040\nMidSizeLog = " + "MaxSizeLog, szTable and NumBits are not used at all.\n", + verifySizeMessage); +} +class SizeDeltaConfigPass { + // Test shows that when changing SizeDelta, min and max have to + // change to match. + // Every size needs to change by whatever the SizeDelta changed by. + // Sizes need to be added to make Mid match. +public: + static constexpr u32 Classes[] = { + 16, 24, 32, 40, 48, 56, 64, 72, 88, + 104, 136, 168, 184, 216, 280, 344, 440, 584, + 792, 1096, 1640, 2088, 2568, 3112, 4104, 4616, 7112, + 8712, 11656, 14216, 16392, 18440, 23048, 29448, 33288, 65544, + }; + static const u32 MinSizeLog = 3; + static const u32 MidSizeLog = 6; + static const u32 MaxSizeLog = 16; + static const u32 NumBits = 7; + + static const u32 SizeDelta = 8; + + static const u32 MaxNumCachedHint = 13; + static const u32 MaxBytesCachedLog = 13; +}; +TEST(ScudoToolSizeMapVerifyTest, generate_size_delta_config) { + std::string NumBitsMessage; + EXPECT_TRUE(generateNumBits<SizeDeltaConfigPass>(NumBitsMessage)); + EXPECT_EQ("NumBits = 7\n", NumBitsMessage); +} +TEST(ScudoToolSizeMapVerifyTest, verify_size_delta_config) { + std::string verifySizeMessage; + EXPECT_TRUE(verifySizeClass<SizeDeltaConfigPass>(verifySizeMessage)); + EXPECT_EQ("MidSizeLog non-szTable formula is used until: 72\n", + verifySizeMessage); +} +} // namespace scudo
\ No newline at end of file diff --git a/android/tools/libsize_map_verify.h b/android/tools/libsize_map_verify.h new file mode 100644 index 00000000000..98b1835d48a --- /dev/null +++ b/android/tools/libsize_map_verify.h @@ -0,0 +1,308 @@ +//===-- libsize_map_verify.h -------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_LIBSIZE_MAP_VERIFY_H_ +#define SCUDO_LIBSIZE_MAP_VERIFY_H_ + +#include "common.h" +#include "size_class_map.h" +#include <string> +#include <vector> + +namespace scudo { +typedef u8 szTableT; + +// Returns the index of each size class. +// Attempting to find the smallest size that fits within each size class. +// Example: +// If a size class is 8 then 4,5,6,7,8 return the index of size 8 +// but 9 would return the index of the next size class, 16. +u8 computeClassId(uptr Size, u32 ClassesSize, u32 Classes[]) { + for (uptr i = 0; i != ClassesSize; ++i) + if (Size <= Classes[i]) + return static_cast<u8>(i + 1); + return static_cast<u8>(-1); +} +// Function returns a vector that contains the classIds for all the sizes. +// Needed to check if the NumBits generated assigns the indexes to +// classIds correctly. +std::vector<u8> szTableCreate(u32 NumBits, u32 MidSizeLog, u32 MaxSizeLog, + u32 SizeDelta, u32 ClassesSize, u32 Classes[]) { + std::vector<u8> Tab((MaxSizeLog - MidSizeLog) << NumBits); + // Pos starts at the MidSize, which ignores the sizes not used in szTable. + // Inc uses NumBits - 1 to determine the starting incrementing value. + // Tab gets the classId of each size based on computeClassId. + uptr Pos = 1 << MidSizeLog; + uptr Inc = 1 << (MidSizeLog - NumBits); + for (uptr i = 0; i != Tab.size(); ++i) { + Pos += Inc; + if ((Pos & (Pos - 1)) == 0) + Inc *= 2; + Tab[i] = computeClassId(Pos + SizeDelta, ClassesSize, Classes); + } + return Tab; +}; + +// Function returns the index of the first value greater than MidSizeLog. +template <typename Config> uptr findMidSizeIndex() { + const u32 len = sizeof(Config::Classes) / sizeof(Config::Classes[0]); + u32 largerMid = 0; + for (uptr i = 0; i < len; ++i) { + if (Config::Classes[i] > (1 << Config::MidSizeLog) + Config::SizeDelta) { + largerMid = i; + break; + } + } + return largerMid; +} + +// Calculates the minimum NumBits that can be used for the given sizes and +// Min/Mid/Max. Smaller NumBits creates a szTable nearly half the size and +// quickens navigation of the table. The sizes smaller than MidSizeLog do not +// use NumBits or szTable, instead using a formula. This method is faster but +// requires sizes to have the exact same spacing of 2^MinSizeLog; therefore, +// having an efficient NumBits allows for the table to be more flexible than the +// formula while still moving at a reasonable speed. +template <typename Config> bool generateNumBits(std::string &manipMessage) { + // In size_class_map S is used, so it is used for consistency. + u32 S = Config::NumBits - 1; + const u32 len = sizeof(Config::Classes) / sizeof(Config::Classes[0]); + // This is used to display the NumBits calculated + u32 minNumBits = S; + // largerMid equals the index of the first value greater than MidSizeLog. + // These sizes are the only ones used with NumBits, smaller sizes are + // ignored. + const u32 largerMid = findMidSizeIndex<Config>(); + if (largerMid == 0) { + manipMessage += + "MidSizeLog = MaxSizeLog, NumBits not used for these sizes. " + "Only uses the formula without szTable.\n"; + return true; + } + + // Create Classes array that can be inputed into functions and referenced. + u32 ClassesFunc[len]; + for (uptr i = 0; i < len; ++i) + ClassesFunc[i] = Config::Classes[i]; + // Create smaller Classes array that can be manipulated to calculate NumBits. + u32 ClassesManip[len - largerMid]; + for (uptr i = 0; i < len - largerMid; ++i) + ClassesManip[i] = ClassesFunc[i + largerMid] - Config::SizeDelta; + + u32 holdIndex[len - largerMid]; + bool failed = false; + // Starting at intial S, it decreases to find the smallest working S + // for the current config. + for (; S > 0; --S) { + // For each size it calls the on the algorithm which retuns an index. + for (uptr i = 0; i < len - largerMid; ++i) + holdIndex[i] = scaledLog2(ClassesManip[i] - 1, Config::MidSizeLog, S); + + // Vector that holds classIds for sizes that is navigated using indexes + // stored in holdIndex. + std::vector szTableT = + szTableCreate(S, Config::MidSizeLog, Config::MaxSizeLog, + Config::SizeDelta, len, ClassesFunc); + + // Checks that each index in holdIndex should refer to a unique classId, + // therefore a unique size a duplicate means that the calculated index + // for two different sizes refers to the same classId. + for (uptr i = 1; i < len - largerMid; ++i) { + if (szTableT[holdIndex[i]] == szTableT[holdIndex[i - 1]]) { + failed = true; + break; + } + } + if (failed == true) + break; + } + // Setting minNumBits to the last working NumBits and Numbits = S + 1. + minNumBits = S + 2; + // Adds a check to ensure NumBits calculated is not too large. + if (minNumBits - 1 > Config::MidSizeLog) { + manipMessage += + "Calculated Numbits too large. The max size for NumBits is: " + "NumBits - 1 = MidSizeLog.\n" + "NumBits = " + + std::to_string(minNumBits) + "\n"; + return false; + } + manipMessage += "NumBits = " + std::to_string(minNumBits) + "\n"; + return true; +} + +// Verify the sizes and variables entered are functional. +// If not, gives a brief explaination of the error. +template <typename Config> bool verifySizeClass(std::string &manipMessage) { + const u32 len = sizeof(Config::Classes) / sizeof(Config::Classes[0]); + u32 ClassesFunc[len]; + for (uptr i = 0; i < len; ++i) + ClassesFunc[i] = Config::Classes[i]; + + // Verify smallest size = MinSizeLog and largest size = MaxSizeLog. + // Log base 2 of (smallest size - SizeDelta) and + // Log base 2 of (largest size - SizeDelta). + const u32 MinSize = (1 << Config::MinSizeLog); + const u32 MaxSize = (1 << Config::MaxSizeLog); + if (ClassesFunc[0] - Config::SizeDelta != MinSize) { + manipMessage += "MinSizeLog + SizeDelta not equal to the smallest size. " + + std::to_string(MinSize) + + " != " + std::to_string(ClassesFunc[0]) + "\n\n"; + return false; + } + if (ClassesFunc[len - 1] - Config::SizeDelta != MaxSize) { + manipMessage += "Largest size (" + std::to_string(ClassesFunc[len - 1]) + + ") - SizeDelta (" + std::to_string(Config::SizeDelta) + + ") != MaxSize (" + std::to_string(MaxSize) + ")\n\n"; + return false; + } + // Verify MidSizeLog is greater than MinSizeLog. + const u32 MidSize = (1 << Config::MidSizeLog); + if (MidSize <= MinSize) { + manipMessage += + "MidSizeLog needs to be greater than MinSizeLog\n" + "If the MidSizeLog is equal to MinSizeLog then the szTable will be " + "used for every size.\nMin size = " + + std::to_string(MinSize) + "\tMid size = " + std::to_string(MidSize) + + "\n\n"; + return false; + } + + // Displays why MidSizeLog is not working. + for (uptr i = 1; i < len; ++i) { + // If the step ends prior to MidSize, the step needs extending. + if (ClassesFunc[i] - ClassesFunc[i - 1] != 1 << Config::MinSizeLog && + ClassesFunc[i - 1] - Config::SizeDelta < MidSize) { + manipMessage += + "MidSizeLog non-table formula can be used until: " + + std::to_string(ClassesFunc[i - 1]) + + "\n\nCurrently stops at: " + std::to_string(MidSize) + + "\nFor size_map to work, formula must work for a number >= " + "the current MidSize.\nMidSizeLog is either too large or their " + "is not an equal step between desired sizes." + "\nThe step between sizes should equal 2^MinSizeLog.\n\n"; + return false; + } else if (ClassesFunc[i] - ClassesFunc[i - 1] != 1 << Config::MinSizeLog || + MidSize == MaxSize) { + manipMessage += "MidSizeLog non-szTable formula is used until: " + + std::to_string(MidSize + Config::SizeDelta) + "\n"; + break; + } + } + // Verifying if the MidSizeLog and MaxSizeLog. + if (MidSize == MaxSize) { + manipMessage += + "MidSizeLog = MaxSizeLog, szTable and NumBits are not used at " + "all.\n"; + return true; + } + + // Recreates NumBits arrays/vectors to verify the NumBits. + // Explained in generateNumBits. + u32 S = Config::NumBits - 1; + std::vector szTableT = + szTableCreate(S, Config::MidSizeLog, Config::MaxSizeLog, + Config::SizeDelta, len, ClassesFunc); + const u32 largerMid = findMidSizeIndex<Config>(); + u32 ClassesManip[len - largerMid]; + for (uptr i = 0; i < len - largerMid; ++i) + ClassesManip[i] = Config::Classes[i + largerMid] - Config::SizeDelta; + u32 holdIndex[len - largerMid]; + for (uptr i = 0; i < len - largerMid; ++i) + holdIndex[i] = scaledLog2(ClassesManip[i] - 1, Config::MidSizeLog, S); + + for (uptr i = 1; i < len - largerMid; ++i) { + if (szTableT[holdIndex[i]] == szTableT[holdIndex[i - 1]]) { + manipMessage += + "\nNumBits not large enough to distinguish between values. " + "\nHard max NumBits - 1 cannot exceed MidSizeLog.\n" + "If NumBits is at max then increase Min/Mid/Max sizelogs and " + "increase the sizes accordingly.\n\n\n"; + return false; + } + } + return true; +} + +// Display to what size MidSizeLog will work with and most efficient numbers. +// MidSizeLog uses a formula, not a table. +template <typename Config> void optimizeMidSizeLog(std::string &manipMessage) { + const u32 len = sizeof(Config::Classes) / sizeof(Config::Classes[0]); + u32 ClassesFunc[len]; + for (uptr i = 0; i < len; ++i) + ClassesFunc[i] = Config::Classes[i]; + + const u32 MaxSize = (1 << Config::MaxSizeLog); + const u32 MidSize = (1 << Config::MidSizeLog); + for (uptr i = 1; i < len; ++i) { + if (ClassesFunc[i] - ClassesFunc[i - 1] == 1 << Config::MinSizeLog) + continue; + manipMessage += + "MidSizeLog non-table formula can be used until: " + + std::to_string(ClassesFunc[i - 1]) + + "\nCurrently stops at: " + std::to_string(MidSize + Config::SizeDelta) + + "\n"; + if (MidSize == ClassesFunc[i - 1] - Config::SizeDelta) { + manipMessage += + "MidSizeLog is used efficiently and fully for current config\n"; + } else { + manipMessage += + "For size_map to work, formula must work for a number " + ">= the current MidSize.\nMax efficiency is achieved if they " + "are equal.\n"; + if (ClassesFunc[i - 1] - Config::SizeDelta > MidSize) { + manipMessage += + "In order to match numbers, increase MidSizeLog.\nEnsure " + "each size up to the new MidSize has an equal step between " + "each size.\nThe step equals 2^MinSizeLog.\n"; + } else if (ClassesFunc[i - 1] - Config::SizeDelta < MidSize) { + manipMessage += + "MidSizeLog is either too large or their is not an equal " + "step between desired sizes.\nThe step between sizes " + "should equal 2^MinSizeLog.\n"; + } + } + break; + } + if (ClassesFunc[len - 1] - Config::SizeDelta == MidSize) { + manipMessage += + "MidSizeLog non-table formula can be used until: " + + std::to_string(ClassesFunc[len - 1]) + + "\nCurrently stops at: " + std::to_string(MidSize) + + "\n" + "MidSizeLog is used efficiently and fully for current config\n"; + } +} + +// Dumps the size of szTable in elements and bits. +template <typename Config> bool dumpszTableInfo(std::string &manipMessage) { + u32 S = Config::NumBits - 1; + const u32 len = sizeof(Config::Classes) / sizeof(Config::Classes[0]); + u32 minNumBits = S; + const u32 largerMid = findMidSizeIndex<Config>(); + bool failed = false; + + if (largerMid == 0) { + manipMessage += "Does not use NumBits. MidSizeLog = MaxsizeLog."; + return true; + } + u32 ClassesFunc[len]; + for (uptr i = 0; i < len; ++i) + ClassesFunc[i] = Config::Classes[i]; + std::vector szTableT = + szTableCreate(S, Config::MidSizeLog, Config::MaxSizeLog, + Config::SizeDelta, len, ClassesFunc); + manipMessage += + "szTable Number of Elements: " + std::to_string(szTableT.size()) + + "\nSize of szTable in Bits: " + + std::to_string(szTableT.size() * sizeof(u8)) + "\n"; + return true; +} +} // namespace scudo + +#endif // SCUDO_LIBSIZE_MAP_VERIFY_H_ diff --git a/android/tools/size_map_gen.cpp b/android/tools/size_map_gen.cpp new file mode 100644 index 00000000000..177b8556624 --- /dev/null +++ b/android/tools/size_map_gen.cpp @@ -0,0 +1,57 @@ +//===-- size_map_gen.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "allocator_config.h" +#include "libsize_map_verify.h" +#include "size_class_map.h" +#include <iostream> + +int main() { + bool fullyPassed = true; + std::string NumBitsMessage; + std::string verifySizeMessage; + std::string optimizeMessage; + std::string dumpMessage; + + fullyPassed = fullyPassed && + scudo::generateNumBits<scudo::AndroidNormalSizeClassConfig>( + NumBitsMessage); + fullyPassed = fullyPassed && + scudo::verifySizeClass<scudo::AndroidNormalSizeClassConfig>( + verifySizeMessage); + scudo::optimizeMidSizeLog<scudo::AndroidNormalSizeClassConfig>( + optimizeMessage); + scudo::dumpszTableInfo<scudo::AndroidNormalSizeClassConfig>(dumpMessage); + + if (!NumBitsMessage.empty()) { + std::cout << "NumBits Calculator:" << std::endl; + std::cout << NumBitsMessage << std::endl; + } + if (!verifySizeMessage.empty()) { + std::cout << "Sizes Verification:" << std::endl; + std::cout << verifySizeMessage << std::endl; + } + if (!verifySizeMessage.empty()) { + std::cout << "Optimizations:" << std::endl; + std::cout << optimizeMessage << std::endl; + } + if (!verifySizeMessage.empty()) { + std::cout << "szTable Dump:" << std::endl; + std::cout << dumpMessage << std::endl; + } + + if (fullyPassed == true) + std::cout << "All Parameters Passed.\n\n"; + else + std::cout << "Errors Detected. Check NumBits Calculator or Size " + "Verification\n\n"; + + scudo::validateMap<scudo::AndroidNormalSizeClassMap>(); + + return fullyPassed ? 0 : 1; +} diff --git a/config/config_build_check.cpp b/config/config_build_check.cpp new file mode 100644 index 00000000000..3f97fc7514e --- /dev/null +++ b/config/config_build_check.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stddef.h> +#include <stdint.h> + +#include <type_traits> + +#include "allocator_config.h" + +#if defined(SCUDO_LOW_MEMORY_CHECK) +static_assert( + std::is_same<scudo::Config, scudo::AndroidLowMemoryConfig>() == true, + "Low Memory is enabled, but AndroidLowMemoryConfig is not the default"); +#else +static_assert(std::is_same<scudo::Config, scudo::AndroidNormalConfig>() == true, + "Not using AndrodNormalConfig as the default"); +#endif + +static_assert(std::is_same<scudo::Config, scudo::DefaultConfig>() == true, + "DefaultConfig and Config are not the same"); diff --git a/config/custom_scudo_config.h b/config/custom_scudo_config.h new file mode 100644 index 00000000000..a0fa59230ce --- /dev/null +++ b/config/custom_scudo_config.h @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +//===-- custom_scudo-config.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +// Use a custom config instead of the config found in allocator_config.h +namespace scudo { + +struct AndroidNormalSizeClassConfig { +#if SCUDO_WORDSIZE == 64U + static const uptr NumBits = 7; + static const uptr MinSizeLog = 4; + static const uptr MidSizeLog = 6; + static const uptr MaxSizeLog = 16; + static const u16 MaxNumCachedHint = 13; + static const uptr MaxBytesCachedLog = 13; + + static constexpr uptr Classes[] = { + 0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00090, 0x000b0, + 0x000c0, 0x000e0, 0x00120, 0x00160, 0x001c0, 0x00250, 0x00320, 0x00450, + 0x00670, 0x00830, 0x00a10, 0x00c30, 0x01010, 0x01210, 0x01bd0, 0x02210, + 0x02d90, 0x03790, 0x04010, 0x04810, 0x05a10, 0x07310, 0x08210, 0x10010, + }; + static const uptr SizeDelta = 16; +#else + static const uptr NumBits = 8; + static const uptr MinSizeLog = 4; + static const uptr MidSizeLog = 7; + static const uptr MaxSizeLog = 16; + static const u16 MaxNumCachedHint = 14; + static const uptr MaxBytesCachedLog = 13; + + static constexpr uptr Classes[] = { + 0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00080, 0x00090, + 0x000a0, 0x000b0, 0x000c0, 0x000e0, 0x000f0, 0x00110, 0x00120, 0x00130, + 0x00150, 0x00160, 0x00170, 0x00190, 0x001d0, 0x00210, 0x00240, 0x002a0, + 0x00330, 0x00370, 0x003a0, 0x00400, 0x00430, 0x004a0, 0x00530, 0x00610, + 0x00730, 0x00840, 0x00910, 0x009c0, 0x00a60, 0x00b10, 0x00ca0, 0x00e00, + 0x00fb0, 0x01030, 0x01130, 0x011f0, 0x01490, 0x01650, 0x01930, 0x02010, + 0x02190, 0x02490, 0x02850, 0x02d50, 0x03010, 0x03210, 0x03c90, 0x04090, + 0x04510, 0x04810, 0x05c10, 0x06f10, 0x07310, 0x08010, 0x0c010, 0x10010, + }; + static const uptr SizeDelta = 16; +#endif +}; + +typedef TableSizeClassMap<AndroidNormalSizeClassConfig> + AndroidNormalSizeClassMap; + +#if defined(__LP64__) +static_assert(AndroidNormalSizeClassMap::usesCompressedLSBFormat(), ""); +#endif + +struct AndroidNormalConfig { +#if defined(__aarch64__) + static const bool MaySupportMemoryTagging = true; +#else + static const bool MaySupportMemoryTagging = false; +#endif + template <class A> + using TSDRegistryT = TSDRegistrySharedT<A, 8U, 2U>; // Shared, max 8 TSDs. + + struct Primary { + using SizeClassMap = AndroidNormalSizeClassMap; +#if SCUDO_CAN_USE_PRIMARY64 + static const uptr RegionSizeLog = 28U; + typedef u32 CompactPtrT; + static const uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const uptr GroupSizeLog = 20U; + static const bool EnableRandomOffset = true; + static const uptr MapSizeIncrement = 1UL << 18; +#else + static const uptr RegionSizeLog = 18U; + static const uptr GroupSizeLog = 18U; + typedef uptr CompactPtrT; +#endif + static const s32 MinReleaseToOsIntervalMs = -1; + static const s32 MaxReleaseToOsIntervalMs = 1000; + static const s32 DefaultReleaseToOsIntervalMs = 1000; + }; +#if SCUDO_CAN_USE_PRIMARY64 + template <typename Config> using PrimaryT = SizeClassAllocator64<Config>; +#else + template <typename Config> using PrimaryT = SizeClassAllocator32<Config>; +#endif + + struct Secondary { + struct Cache { + static const u32 EntriesArraySize = 256U; + static const u32 QuarantineSize = 32U; + static const u32 DefaultMaxEntriesCount = 32U; + static const uptr DefaultMaxEntrySize = 2UL << 20; + static const s32 MinReleaseToOsIntervalMs = -1; + static const s32 MaxReleaseToOsIntervalMs = 1000; + static const s32 DefaultReleaseToOsIntervalMs = 0; + }; + template <typename Config> using CacheT = MapAllocatorCache<Config>; + }; + + template <typename Config> using SecondaryT = MapAllocator<Config>; +}; + +struct AndroidLowMemoryConfig { +#if defined(__aarch64__) + static const bool MaySupportMemoryTagging = true; +#else + static const bool MaySupportMemoryTagging = false; +#endif + template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 1U, 1U>; + + struct Primary { + // Use the same size class map as the normal config. + using SizeClassMap = AndroidNormalSizeClassMap; +#if SCUDO_CAN_USE_PRIMARY64 + static const uptr RegionSizeLog = 28U; + typedef u32 CompactPtrT; + static const uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const uptr GroupSizeLog = 18U; + static const bool EnableRandomOffset = true; + static const uptr MapSizeIncrement = 1UL << 18; +#else + static const uptr RegionSizeLog = 20U; + static const uptr GroupSizeLog = 20U; + typedef uptr CompactPtrT; +#endif + static const s32 MinReleaseToOsIntervalMs = 100; + static const s32 MaxReleaseToOsIntervalMs = 1000; + }; +#if SCUDO_CAN_USE_PRIMARY64 + template <typename Config> using PrimaryT = SizeClassAllocator64<Config>; +#else + template <typename Config> using PrimaryT = SizeClassAllocator32<Config>; +#endif + + struct Secondary { + // TODO(cferris): After secondary caching tuned, re-add a cache config. + template <typename Config> using CacheT = MapAllocatorNoCache<Config>; + }; + + template <typename Config> using SecondaryT = MapAllocator<Config>; +}; + +#if defined(SCUDO_LOW_MEMORY) +typedef AndroidLowMemoryConfig Config; +#else +typedef AndroidNormalConfig Config; +#endif + +typedef Config DefaultConfig; + +} // namespace scudo diff --git a/standalone/allocator_common.h b/standalone/allocator_common.h new file mode 100644 index 00000000000..2b77516ad11 --- /dev/null +++ b/standalone/allocator_common.h @@ -0,0 +1,92 @@ +//===-- allocator_common.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_ALLOCATOR_COMMON_H_ +#define SCUDO_ALLOCATOR_COMMON_H_ + +#include "common.h" +#include "list.h" + +namespace scudo { + +template <class SizeClassAllocator> struct TransferBatch { + typedef typename SizeClassAllocator::SizeClassMap SizeClassMap; + typedef typename SizeClassAllocator::CompactPtrT CompactPtrT; + + static const u16 MaxNumCached = SizeClassMap::MaxNumCachedHint; + void setFromArray(CompactPtrT *Array, u16 N) { + DCHECK_LE(N, MaxNumCached); + Count = N; + memcpy(Batch, Array, sizeof(Batch[0]) * Count); + } + void appendFromArray(CompactPtrT *Array, u16 N) { + DCHECK_LE(N, MaxNumCached - Count); + memcpy(Batch + Count, Array, sizeof(Batch[0]) * N); + // u16 will be promoted to int by arithmetic type conversion. + Count = static_cast<u16>(Count + N); + } + void appendFromTransferBatch(TransferBatch *B, u16 N) { + DCHECK_LE(N, MaxNumCached - Count); + DCHECK_GE(B->Count, N); + // Append from the back of `B`. + memcpy(Batch + Count, B->Batch + (B->Count - N), sizeof(Batch[0]) * N); + // u16 will be promoted to int by arithmetic type conversion. + Count = static_cast<u16>(Count + N); + B->Count = static_cast<u16>(B->Count - N); + } + void clear() { Count = 0; } + bool empty() { return Count == 0; } + void add(CompactPtrT P) { + DCHECK_LT(Count, MaxNumCached); + Batch[Count++] = P; + } + void moveToArray(CompactPtrT *Array) { + memcpy(Array, Batch, sizeof(Batch[0]) * Count); + clear(); + } + + void moveNToArray(CompactPtrT *Array, u16 N) { + DCHECK_LE(N, Count); + memcpy(Array, Batch + Count - N, sizeof(Batch[0]) * N); + Count = static_cast<u16>(Count - N); + } + u16 getCount() const { return Count; } + bool isEmpty() const { return Count == 0U; } + CompactPtrT get(u16 I) const { + DCHECK_LE(I, Count); + return Batch[I]; + } + TransferBatch *Next; + +private: + CompactPtrT Batch[MaxNumCached]; + u16 Count; +}; + +// A BatchGroup is used to collect blocks. Each group has a group id to +// identify the group kind of contained blocks. +template <class SizeClassAllocator> struct BatchGroup { + // `Next` is used by IntrusiveList. + BatchGroup *Next; + // The compact base address of each group + uptr CompactPtrGroupBase; + // Cache value of SizeClassAllocatorLocalCache::getMaxCached() + u16 MaxCachedPerBatch; + // Number of blocks pushed into this group. This is an increment-only + // counter. + uptr PushedBlocks; + // This is used to track how many bytes are not in-use since last time we + // tried to release pages. + uptr BytesInBGAtLastCheckpoint; + // Blocks are managed by TransferBatch in a list. + SinglyLinkedList<TransferBatch<SizeClassAllocator>> Batches; +}; + +} // namespace scudo + +#endif // SCUDO_ALLOCATOR_COMMON_H_ diff --git a/standalone/allocator_config.def b/standalone/allocator_config.def new file mode 100644 index 00000000000..dcd130ac449 --- /dev/null +++ b/standalone/allocator_config.def @@ -0,0 +1,131 @@ +//===-- allocator_config.def ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines all the flags and types supported in Scudo. For optional +// flags and types, only explicitly define them when interested (i.e., unused +// optional flags or types can be skipped). + +#ifndef BASE_REQUIRED_TEMPLATE_TYPE +#define BASE_REQUIRED_TEMPLATE_TYPE(...) +#endif +#ifndef BASE_OPTIONAL +#define BASE_OPTIONAL(...) +#endif +#ifndef PRIMARY_REQUIRED_TYPE +#define PRIMARY_REQUIRED_TYPE(...) +#endif +#ifndef PRIMARY_REQUIRED +#define PRIMARY_REQUIRED(...) +#endif +#ifndef PRIMARY_OPTIONAL +#define PRIMARY_OPTIONAL(...) +#endif +#ifndef PRIMARY_OPTIONAL_TYPE +#define PRIMARY_OPTIONAL_TYPE(...) +#endif +#ifndef SECONDARY_REQUIRED_TEMPLATE_TYPE +#define SECONDARY_REQUIRED_TEMPLATE_TYPE(...) +#endif +#ifndef SECONDARY_CACHE_OPTIONAL +#define SECONDARY_CACHE_OPTIONAL(...) +#endif + +// BASE_REQUIRED_TEMPLATE_TYPE(NAME) +// +// Thread-Specific Data Registry used, shared or exclusive. +BASE_REQUIRED_TEMPLATE_TYPE(TSDRegistryT) + +// Defines the type of Primary allocator to use. +BASE_REQUIRED_TEMPLATE_TYPE(PrimaryT) + +// Defines the type of Secondary allocator to use. +BASE_REQUIRED_TEMPLATE_TYPE(SecondaryT) + +// BASE_OPTIONAL(TYPE, NAME, DEFAULT) +// +// Indicates possible support for Memory Tagging. +BASE_OPTIONAL(const bool, MaySupportMemoryTagging, false) + +// PRIMARY_REQUIRED_TYPE(NAME) +// +// SizeClassMap to use with the Primary. +PRIMARY_REQUIRED_TYPE(SizeClassMap) + +// Defines the type and scale of a compact pointer. A compact pointer can +// be understood as the offset of a pointer within the region it belongs +// to, in increments of a power-of-2 scale. See `CompactPtrScale` also. +PRIMARY_REQUIRED_TYPE(CompactPtrT) + +// PRIMARY_REQUIRED(TYPE, NAME) +// +// The scale of a compact pointer. E.g., Ptr = Base + (CompactPtr << Scale). +PRIMARY_REQUIRED(const uptr, CompactPtrScale) + +// Log2 of the size of a size class region, as used by the Primary. +PRIMARY_REQUIRED(const uptr, RegionSizeLog) + +// Conceptually, a region will be divided into groups based on the address +// range. Each allocation consumes blocks in the same group until exhaustion +// then it pops out blocks in a new group. Therefore, `GroupSizeLog` is always +// smaller or equal to `RegionSizeLog`. Note that `GroupSizeLog` needs to be +// equal to `RegionSizeLog` for SizeClassAllocator32 because of certain +// constraints. +PRIMARY_REQUIRED(const uptr, GroupSizeLog) + +// Call map for user memory with at least this size. Only used with primary64. +PRIMARY_REQUIRED(const uptr, MapSizeIncrement) + +// Defines the minimal & maximal release interval that can be set. +PRIMARY_REQUIRED(const s32, MinReleaseToOsIntervalMs) +PRIMARY_REQUIRED(const s32, MaxReleaseToOsIntervalMs) + +// PRIMARY_OPTIONAL(TYPE, NAME, DEFAULT) +// +// Indicates support for offsetting the start of a region by a random number of +// pages. This is only used if `EnableContiguousRegions` is enabled. +PRIMARY_OPTIONAL(const bool, EnableRandomOffset, false) +PRIMARY_OPTIONAL(const s32, DefaultReleaseToOsIntervalMs, INT32_MIN) + +// When `EnableContiguousRegions` is true, all regions will be be arranged in +// adjacency. This will reduce the fragmentation caused by region allocations +// but may require a huge amount of contiguous pages at initialization. +PRIMARY_OPTIONAL(const bool, EnableContiguousRegions, true) + +// PRIMARY_OPTIONAL_TYPE(NAME, DEFAULT) +// +// Use condition variable to shorten the waiting time of refillment of +// freelist. Note that this depends on the implementation of condition +// variable on each platform and the performance may vary so that it does not +// guarantee a performance benefit. +PRIMARY_OPTIONAL_TYPE(ConditionVariableT, ConditionVariableDummy) + +// SECONDARY_REQUIRED_TEMPLATE_TYPE(NAME) +// +// Defines the type of Secondary Cache to use. +SECONDARY_REQUIRED_TEMPLATE_TYPE(CacheT) + +// SECONDARY_CACHE_OPTIONAL(TYPE, NAME, DEFAULT) +// +// Defines the type of cache used by the Secondary. Some additional +// configuration entries can be necessary depending on the Cache. +SECONDARY_CACHE_OPTIONAL(const u32, EntriesArraySize, 0) +SECONDARY_CACHE_OPTIONAL(const u32, QuarantineSize, 0) +SECONDARY_CACHE_OPTIONAL(const u32, DefaultMaxEntriesCount, 0) +SECONDARY_CACHE_OPTIONAL(const uptr, DefaultMaxEntrySize, 0) +SECONDARY_CACHE_OPTIONAL(const s32, MinReleaseToOsIntervalMs, INT32_MIN) +SECONDARY_CACHE_OPTIONAL(const s32, MaxReleaseToOsIntervalMs, INT32_MAX) +SECONDARY_CACHE_OPTIONAL(const s32, DefaultReleaseToOsIntervalMs, INT32_MIN) + +#undef SECONDARY_CACHE_OPTIONAL +#undef SECONDARY_REQUIRED_TEMPLATE_TYPE +#undef PRIMARY_OPTIONAL_TYPE +#undef PRIMARY_OPTIONAL +#undef PRIMARY_REQUIRED +#undef PRIMARY_REQUIRED_TYPE +#undef BASE_OPTIONAL +#undef BASE_REQUIRED_TEMPLATE_TYPE diff --git a/standalone/allocator_config.h b/standalone/allocator_config.h index 64306066123..60f59bdd2f4 100644 --- a/standalone/allocator_config.h +++ b/standalone/allocator_config.h @@ -11,6 +11,7 @@ #include "combined.h" #include "common.h" +#include "condition_variable.h" #include "flags.h" #include "primary32.h" #include "primary64.h" @@ -19,192 +20,180 @@ #include "tsd_exclusive.h" #include "tsd_shared.h" +// To import a custom configuration, define `SCUDO_USE_CUSTOM_CONFIG` and +// aliasing the `Config` like: +// +// namespace scudo { +// // The instance of Scudo will be initiated with `Config`. +// typedef CustomConfig Config; +// // Aliasing as default configuration to run the tests with this config. +// typedef CustomConfig DefaultConfig; +// } // namespace scudo +// +// Put them in the header `custom_scudo_config.h` then you will be using the +// custom configuration and able to run all the tests as well. +#ifdef SCUDO_USE_CUSTOM_CONFIG +#include "custom_scudo_config.h" +#endif + namespace scudo { -// The combined allocator uses a structure as a template argument that -// specifies the configuration options for the various subcomponents of the -// allocator. -// -// struct ExampleConfig { -// // SizeClassMap to use with the Primary. -// using SizeClassMap = DefaultSizeClassMap; -// // Indicates possible support for Memory Tagging. -// static const bool MaySupportMemoryTagging = false; -// // Defines the Primary allocator to use. -// typedef SizeClassAllocator64<ExampleConfig> Primary; -// // Log2 of the size of a size class region, as used by the Primary. -// static const uptr PrimaryRegionSizeLog = 30U; -// // Log2 of the size of block group, as used by the Primary. Each group -// // contains a range of memory addresses, blocks in the range will belong to -// // the same group. In general, single region may have 1 or 2MB group size. -// // Multiple regions will have the group size equal to the region size -// // because the region size is usually smaller than 1 MB. -// // Smaller value gives fine-grained control of memory usage but the trade -// // off is that it may take longer time of deallocation. -// static const uptr PrimaryGroupSizeLog = 20U; -// // Defines the type and scale of a compact pointer. A compact pointer can -// // be understood as the offset of a pointer within the region it belongs -// // to, in increments of a power-of-2 scale. -// // eg: Ptr = Base + (CompactPtr << Scale). -// typedef u32 PrimaryCompactPtrT; -// static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; -// // Indicates support for offsetting the start of a region by -// // a random number of pages. Only used with primary64. -// static const bool PrimaryEnableRandomOffset = true; -// // Call map for user memory with at least this size. Only used with -// // primary64. -// static const uptr PrimaryMapSizeIncrement = 1UL << 18; -// // Defines the minimal & maximal release interval that can be set. -// static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; -// static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; -// // Defines the type of cache used by the Secondary. Some additional -// // configuration entries can be necessary depending on the Cache. -// typedef MapAllocatorNoCache SecondaryCache; -// // Thread-Specific Data Registry used, shared or exclusive. -// template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 8U, 4U>; -// }; - -// Default configurations for various platforms. +// Scudo uses a structure as a template argument that specifies the +// configuration options for the various subcomponents of the allocator. See the +// following configs as examples and check `allocator_config.def` for all the +// available options. +#ifndef SCUDO_USE_CUSTOM_CONFIG + +// Default configurations for various platforms. Note this is only enabled when +// there's no custom configuration in the build system. struct DefaultConfig { - using SizeClassMap = DefaultSizeClassMap; static const bool MaySupportMemoryTagging = true; + template <class A> using TSDRegistryT = TSDRegistryExT<A>; // Exclusive + struct Primary { + using SizeClassMap = DefaultSizeClassMap; #if SCUDO_CAN_USE_PRIMARY64 - typedef SizeClassAllocator64<DefaultConfig> Primary; - static const uptr PrimaryRegionSizeLog = 32U; - static const uptr PrimaryGroupSizeLog = 21U; - typedef uptr PrimaryCompactPtrT; - static const uptr PrimaryCompactPtrScale = 0; - static const bool PrimaryEnableRandomOffset = true; - static const uptr PrimaryMapSizeIncrement = 1UL << 18; + static const uptr RegionSizeLog = 32U; + static const uptr GroupSizeLog = 21U; + typedef uptr CompactPtrT; + static const uptr CompactPtrScale = 0; + static const bool EnableRandomOffset = true; + static const uptr MapSizeIncrement = 1UL << 18; #else - typedef SizeClassAllocator32<DefaultConfig> Primary; - static const uptr PrimaryRegionSizeLog = 19U; - static const uptr PrimaryGroupSizeLog = 19U; - typedef uptr PrimaryCompactPtrT; + static const uptr RegionSizeLog = 19U; + static const uptr GroupSizeLog = 19U; + typedef uptr CompactPtrT; #endif - static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; - - typedef MapAllocatorCache<DefaultConfig> SecondaryCache; - static const u32 SecondaryCacheEntriesArraySize = 32U; - static const u32 SecondaryCacheQuarantineSize = 0U; - static const u32 SecondaryCacheDefaultMaxEntriesCount = 32U; - static const uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 19; - static const s32 SecondaryCacheMinReleaseToOsIntervalMs = INT32_MIN; - static const s32 SecondaryCacheMaxReleaseToOsIntervalMs = INT32_MAX; - - template <class A> using TSDRegistryT = TSDRegistryExT<A>; // Exclusive -}; -struct AndroidConfig { - using SizeClassMap = AndroidSizeClassMap; - static const bool MaySupportMemoryTagging = true; - + static const s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const s32 MaxReleaseToOsIntervalMs = INT32_MAX; + }; #if SCUDO_CAN_USE_PRIMARY64 - typedef SizeClassAllocator64<AndroidConfig> Primary; - static const uptr PrimaryRegionSizeLog = 28U; - typedef u32 PrimaryCompactPtrT; - static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; - static const uptr PrimaryGroupSizeLog = 20U; - static const bool PrimaryEnableRandomOffset = true; - static const uptr PrimaryMapSizeIncrement = 1UL << 18; + template <typename Config> using PrimaryT = SizeClassAllocator64<Config>; #else - typedef SizeClassAllocator32<AndroidConfig> Primary; - static const uptr PrimaryRegionSizeLog = 18U; - static const uptr PrimaryGroupSizeLog = 18U; - typedef uptr PrimaryCompactPtrT; + template <typename Config> using PrimaryT = SizeClassAllocator32<Config>; #endif - static const s32 PrimaryMinReleaseToOsIntervalMs = 1000; - static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000; - typedef MapAllocatorCache<AndroidConfig> SecondaryCache; - static const u32 SecondaryCacheEntriesArraySize = 256U; - static const u32 SecondaryCacheQuarantineSize = 32U; - static const u32 SecondaryCacheDefaultMaxEntriesCount = 32U; - static const uptr SecondaryCacheDefaultMaxEntrySize = 2UL << 20; - static const s32 SecondaryCacheMinReleaseToOsIntervalMs = 0; - static const s32 SecondaryCacheMaxReleaseToOsIntervalMs = 1000; + struct Secondary { + struct Cache { + static const u32 EntriesArraySize = 32U; + static const u32 QuarantineSize = 0U; + static const u32 DefaultMaxEntriesCount = 32U; + static const uptr DefaultMaxEntrySize = 1UL << 19; + static const s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const s32 MaxReleaseToOsIntervalMs = INT32_MAX; + }; + template <typename Config> using CacheT = MapAllocatorCache<Config>; + }; + + template <typename Config> using SecondaryT = MapAllocator<Config>; +}; + +#endif // SCUDO_USE_CUSTOM_CONFIG +struct AndroidConfig { + static const bool MaySupportMemoryTagging = true; template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 8U, 2U>; // Shared, max 8 TSDs. -}; - -struct AndroidSvelteConfig { - using SizeClassMap = SvelteSizeClassMap; - static const bool MaySupportMemoryTagging = false; + struct Primary { + using SizeClassMap = AndroidSizeClassMap; #if SCUDO_CAN_USE_PRIMARY64 - typedef SizeClassAllocator64<AndroidSvelteConfig> Primary; - static const uptr PrimaryRegionSizeLog = 27U; - typedef u32 PrimaryCompactPtrT; - static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; - static const uptr PrimaryGroupSizeLog = 18U; - static const bool PrimaryEnableRandomOffset = true; - static const uptr PrimaryMapSizeIncrement = 1UL << 18; + static const uptr RegionSizeLog = 28U; + typedef u32 CompactPtrT; + static const uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const uptr GroupSizeLog = 20U; + static const bool EnableRandomOffset = true; + static const uptr MapSizeIncrement = 1UL << 18; #else - typedef SizeClassAllocator32<AndroidSvelteConfig> Primary; - static const uptr PrimaryRegionSizeLog = 16U; - static const uptr PrimaryGroupSizeLog = 16U; - typedef uptr PrimaryCompactPtrT; + static const uptr RegionSizeLog = 18U; + static const uptr GroupSizeLog = 18U; + typedef uptr CompactPtrT; +#endif + static const s32 MinReleaseToOsIntervalMs = 1000; + static const s32 MaxReleaseToOsIntervalMs = 1000; + }; +#if SCUDO_CAN_USE_PRIMARY64 + template <typename Config> using PrimaryT = SizeClassAllocator64<Config>; +#else + template <typename Config> using PrimaryT = SizeClassAllocator32<Config>; #endif - static const s32 PrimaryMinReleaseToOsIntervalMs = 1000; - static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000; - - typedef MapAllocatorCache<AndroidSvelteConfig> SecondaryCache; - static const u32 SecondaryCacheEntriesArraySize = 16U; - static const u32 SecondaryCacheQuarantineSize = 32U; - static const u32 SecondaryCacheDefaultMaxEntriesCount = 4U; - static const uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 18; - static const s32 SecondaryCacheMinReleaseToOsIntervalMs = 0; - static const s32 SecondaryCacheMaxReleaseToOsIntervalMs = 0; - template <class A> - using TSDRegistryT = TSDRegistrySharedT<A, 2U, 1U>; // Shared, max 2 TSDs. + struct Secondary { + struct Cache { + static const u32 EntriesArraySize = 256U; + static const u32 QuarantineSize = 32U; + static const u32 DefaultMaxEntriesCount = 32U; + static const uptr DefaultMaxEntrySize = 2UL << 20; + static const s32 MinReleaseToOsIntervalMs = 0; + static const s32 MaxReleaseToOsIntervalMs = 1000; + }; + template <typename Config> using CacheT = MapAllocatorCache<Config>; + }; + + template <typename Config> using SecondaryT = MapAllocator<Config>; }; #if SCUDO_CAN_USE_PRIMARY64 struct FuchsiaConfig { - using SizeClassMap = FuchsiaSizeClassMap; static const bool MaySupportMemoryTagging = false; - - typedef SizeClassAllocator64<FuchsiaConfig> Primary; - static const uptr PrimaryRegionSizeLog = 30U; - static const uptr PrimaryGroupSizeLog = 21U; - typedef u32 PrimaryCompactPtrT; - static const bool PrimaryEnableRandomOffset = true; - static const uptr PrimaryMapSizeIncrement = 1UL << 18; - static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; - static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; - - typedef MapAllocatorNoCache SecondaryCache; template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 8U, 4U>; // Shared, max 8 TSDs. + + struct Primary { + using SizeClassMap = FuchsiaSizeClassMap; +#if SCUDO_RISCV64 + // Support 39-bit VMA for riscv-64 + static const uptr RegionSizeLog = 28U; + static const uptr GroupSizeLog = 19U; + static const bool EnableContiguousRegions = false; +#else + static const uptr RegionSizeLog = 30U; + static const uptr GroupSizeLog = 21U; +#endif + typedef u32 CompactPtrT; + static const bool EnableRandomOffset = true; + static const uptr MapSizeIncrement = 1UL << 18; + static const uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const s32 MaxReleaseToOsIntervalMs = INT32_MAX; + }; + template <typename Config> using PrimaryT = SizeClassAllocator64<Config>; + + struct Secondary { + template <typename Config> using CacheT = MapAllocatorNoCache<Config>; + }; + template <typename Config> using SecondaryT = MapAllocator<Config>; }; struct TrustyConfig { - using SizeClassMap = TrustySizeClassMap; - static const bool MaySupportMemoryTagging = false; - - typedef SizeClassAllocator64<TrustyConfig> Primary; - // Some apps have 1 page of heap total so small regions are necessary. - static const uptr PrimaryRegionSizeLog = 10U; - static const uptr PrimaryGroupSizeLog = 10U; - typedef u32 PrimaryCompactPtrT; - static const bool PrimaryEnableRandomOffset = false; - // Trusty is extremely memory-constrained so minimally round up map calls. - static const uptr PrimaryMapSizeIncrement = 1UL << 4; - static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; - static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; - - typedef MapAllocatorNoCache SecondaryCache; + static const bool MaySupportMemoryTagging = true; template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 1U, 1U>; // Shared, max 1 TSD. + + struct Primary { + using SizeClassMap = TrustySizeClassMap; + static const uptr RegionSizeLog = 28U; + static const uptr GroupSizeLog = 20U; + typedef u32 CompactPtrT; + static const bool EnableRandomOffset = false; + static const uptr MapSizeIncrement = 1UL << 12; + static const uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const s32 MaxReleaseToOsIntervalMs = INT32_MAX; + }; + template <typename Config> using PrimaryT = SizeClassAllocator64<Config>; + + struct Secondary { + template <typename Config> using CacheT = MapAllocatorNoCache<Config>; + }; + + template <typename Config> using SecondaryT = MapAllocator<Config>; }; #endif +#ifndef SCUDO_USE_CUSTOM_CONFIG + #if SCUDO_ANDROID typedef AndroidConfig Config; #elif SCUDO_FUCHSIA @@ -215,6 +204,8 @@ typedef TrustyConfig Config; typedef DefaultConfig Config; #endif +#endif // SCUDO_USE_CUSTOM_CONFIG + } // namespace scudo #endif // SCUDO_ALLOCATOR_CONFIG_H_ diff --git a/standalone/allocator_config_wrapper.h b/standalone/allocator_config_wrapper.h new file mode 100644 index 00000000000..5477236ac1f --- /dev/null +++ b/standalone/allocator_config_wrapper.h @@ -0,0 +1,149 @@ +//===-- allocator_config_wrapper.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_ALLOCATOR_CONFIG_WRAPPER_H_ +#define SCUDO_ALLOCATOR_CONFIG_WRAPPER_H_ + +#include "condition_variable.h" +#include "internal_defs.h" +#include "secondary.h" + +namespace { + +template <typename T> struct removeConst { + using type = T; +}; +template <typename T> struct removeConst<const T> { + using type = T; +}; + +// This is only used for SFINAE when detecting if a type is defined. +template <typename T> struct voidAdaptor { + using type = void; +}; + +// This is used for detecting the case that defines the flag with wrong type and +// it'll be viewed as undefined optional flag. +template <typename L, typename R> struct assertSameType { + template <typename, typename> struct isSame { + static constexpr bool value = false; + }; + template <typename T> struct isSame<T, T> { + static constexpr bool value = true; + }; + static_assert(isSame<L, R>::value, "Flag type mismatches"); + using type = R; +}; + +} // namespace + +namespace scudo { + +#define OPTIONAL_TEMPLATE(TYPE, NAME, DEFAULT, MEMBER) \ + template <typename Config, typename = TYPE> struct NAME##State { \ + static constexpr removeConst<TYPE>::type getValue() { return DEFAULT; } \ + }; \ + template <typename Config> \ + struct NAME##State< \ + Config, typename assertSameType<decltype(Config::MEMBER), TYPE>::type> { \ + static constexpr removeConst<TYPE>::type getValue() { \ + return Config::MEMBER; \ + } \ + }; + +#define OPTIONAL_TYPE_TEMPLATE(NAME, DEFAULT, MEMBER) \ + template <typename Config, typename Void = void> struct NAME##Type { \ + static constexpr bool enabled() { return false; } \ + using NAME = DEFAULT; \ + }; \ + template <typename Config> \ + struct NAME##Type<Config, \ + typename voidAdaptor<typename Config::MEMBER>::type> { \ + static constexpr bool enabled() { return true; } \ + using NAME = typename Config::MEMBER; \ + }; + +template <typename AllocatorConfig> struct BaseConfig { +#define BASE_REQUIRED_TEMPLATE_TYPE(NAME) \ + template <typename T> using NAME = typename AllocatorConfig::template NAME<T>; + +#define BASE_OPTIONAL(TYPE, NAME, DEFAULT) \ + OPTIONAL_TEMPLATE(TYPE, NAME, DEFAULT, NAME) \ + static constexpr removeConst<TYPE>::type get##NAME() { \ + return NAME##State<AllocatorConfig>::getValue(); \ + } + +#include "allocator_config.def" +}; // BaseConfig + +template <typename AllocatorConfig> struct PrimaryConfig { + // TODO: Pass this flag through template argument to remove this hard-coded + // function. + static constexpr bool getMaySupportMemoryTagging() { + return BaseConfig<AllocatorConfig>::getMaySupportMemoryTagging(); + } + +#define PRIMARY_REQUIRED_TYPE(NAME) \ + using NAME = typename AllocatorConfig::Primary::NAME; + +#define PRIMARY_REQUIRED(TYPE, NAME) \ + static constexpr removeConst<TYPE>::type get##NAME() { \ + return AllocatorConfig::Primary::NAME; \ + } + +#define PRIMARY_OPTIONAL(TYPE, NAME, DEFAULT) \ + OPTIONAL_TEMPLATE(TYPE, NAME, DEFAULT, NAME) \ + static constexpr removeConst<TYPE>::type get##NAME() { \ + return NAME##State<typename AllocatorConfig::Primary>::getValue(); \ + } + +#define PRIMARY_OPTIONAL_TYPE(NAME, DEFAULT) \ + OPTIONAL_TYPE_TEMPLATE(NAME, DEFAULT, NAME) \ + static constexpr bool has##NAME() { \ + return NAME##Type<typename AllocatorConfig::Primary>::enabled(); \ + } \ + using NAME = typename NAME##Type<typename AllocatorConfig::Primary>::NAME; + +#include "allocator_config.def" + +}; // PrimaryConfig + +template <typename AllocatorConfig> struct SecondaryConfig { + // TODO: Pass this flag through template argument to remove this hard-coded + // function. + static constexpr bool getMaySupportMemoryTagging() { + return BaseConfig<AllocatorConfig>::getMaySupportMemoryTagging(); + } + +#define SECONDARY_REQUIRED_TEMPLATE_TYPE(NAME) \ + template <typename T> \ + using NAME = typename AllocatorConfig::Secondary::template NAME<T>; +#include "allocator_config.def" + + struct CacheConfig { + // TODO: Pass this flag through template argument to remove this hard-coded + // function. + static constexpr bool getMaySupportMemoryTagging() { + return BaseConfig<AllocatorConfig>::getMaySupportMemoryTagging(); + } + +#define SECONDARY_CACHE_OPTIONAL(TYPE, NAME, DEFAULT) \ + OPTIONAL_TEMPLATE(TYPE, NAME, DEFAULT, Cache::NAME) \ + static constexpr removeConst<TYPE>::type get##NAME() { \ + return NAME##State<typename AllocatorConfig::Secondary>::getValue(); \ + } +#include "allocator_config.def" + }; // CacheConfig +}; // SecondaryConfig + +#undef OPTIONAL_TEMPLATE +#undef OPTIONAL_TEMPLATE_TYPE + +} // namespace scudo + +#endif // SCUDO_ALLOCATOR_CONFIG_WRAPPER_H_ diff --git a/standalone/atomic_helpers.h b/standalone/atomic_helpers.h index d88f5d7be64..a68ffd16291 100644 --- a/standalone/atomic_helpers.h +++ b/standalone/atomic_helpers.h @@ -133,10 +133,10 @@ inline void atomic_store_relaxed(volatile T *A, typename T::Type V) { } template <typename T> -inline typename T::Type atomic_compare_exchange(volatile T *A, - typename T::Type Cmp, - typename T::Type Xchg) { - atomic_compare_exchange_strong(A, &Cmp, Xchg, memory_order_acquire); +inline typename T::Type +atomic_compare_exchange_strong(volatile T *A, typename T::Type Cmp, + typename T::Type Xchg, memory_order MO) { + atomic_compare_exchange_strong(A, &Cmp, Xchg, MO); return Cmp; } diff --git a/standalone/benchmarks/malloc_benchmark.cpp b/standalone/benchmarks/malloc_benchmark.cpp index 2adec88da3e..4fb05b7614c 100644 --- a/standalone/benchmarks/malloc_benchmark.cpp +++ b/standalone/benchmarks/malloc_benchmark.cpp @@ -52,8 +52,6 @@ static const size_t MaxSize = 128 * 1024; // cleanly. BENCHMARK_TEMPLATE(BM_malloc_free, scudo::AndroidConfig) ->Range(MinSize, MaxSize); -BENCHMARK_TEMPLATE(BM_malloc_free, scudo::AndroidSvelteConfig) - ->Range(MinSize, MaxSize); #if SCUDO_CAN_USE_PRIMARY64 BENCHMARK_TEMPLATE(BM_malloc_free, scudo::FuchsiaConfig) ->Range(MinSize, MaxSize); @@ -99,8 +97,6 @@ static const size_t MaxIters = 32 * 1024; // cleanly. BENCHMARK_TEMPLATE(BM_malloc_free_loop, scudo::AndroidConfig) ->Range(MinIters, MaxIters); -BENCHMARK_TEMPLATE(BM_malloc_free_loop, scudo::AndroidSvelteConfig) - ->Range(MinIters, MaxIters); #if SCUDO_CAN_USE_PRIMARY64 BENCHMARK_TEMPLATE(BM_malloc_free_loop, scudo::FuchsiaConfig) ->Range(MinIters, MaxIters); diff --git a/standalone/checksum.cpp b/standalone/checksum.cpp index 2c277391a2e..efa4055bcbc 100644 --- a/standalone/checksum.cpp +++ b/standalone/checksum.cpp @@ -19,6 +19,8 @@ #else #include <sys/auxv.h> #endif +#elif defined(__loongarch__) +#include <sys/auxv.h> #endif namespace scudo { @@ -75,6 +77,20 @@ bool hasHardwareCRC32() { return !!(getauxval(AT_HWCAP) & HWCAP_CRC32); #endif // SCUDO_FUCHSIA } +#elif defined(__loongarch__) +// The definition is only pulled in by <sys/auxv.h> since glibc 2.38, so +// supply it if missing. +#ifndef HWCAP_LOONGARCH_CRC32 +#define HWCAP_LOONGARCH_CRC32 (1 << 6) +#endif +// Query HWCAP for platform capability, according to *Software Development and +// Build Convention for LoongArch Architectures* v0.1, Section 9.1. +// +// Link: +// https://github.com/loongson/la-softdev-convention/blob/v0.1/la-softdev-convention.adoc#kernel-development +bool hasHardwareCRC32() { + return !!(getauxval(AT_HWCAP) & HWCAP_LOONGARCH_CRC32); +} #else // No hardware CRC32 implemented in Scudo for other architectures. bool hasHardwareCRC32() { return false; } diff --git a/standalone/checksum.h b/standalone/checksum.h index f8eda81fd91..32ca372b097 100644 --- a/standalone/checksum.h +++ b/standalone/checksum.h @@ -30,6 +30,10 @@ #include <arm_acle.h> #define CRC32_INTRINSIC FIRST_32_SECOND_64(__crc32cw, __crc32cd) #endif +#ifdef __loongarch__ +#include <larchintrin.h> +#define CRC32_INTRINSIC FIRST_32_SECOND_64(__crcc_w_w_w, __crcc_w_d_w) +#endif namespace scudo { diff --git a/standalone/chunk.h b/standalone/chunk.h index 32874a8df64..9228df04718 100644 --- a/standalone/chunk.h +++ b/standalone/chunk.h @@ -128,19 +128,6 @@ inline void loadHeader(u32 Cookie, const void *Ptr, reportHeaderCorruption(const_cast<void *>(Ptr)); } -inline void compareExchangeHeader(u32 Cookie, void *Ptr, - UnpackedHeader *NewUnpackedHeader, - UnpackedHeader *OldUnpackedHeader) { - NewUnpackedHeader->Checksum = - computeHeaderChecksum(Cookie, Ptr, NewUnpackedHeader); - PackedHeader NewPackedHeader = bit_cast<PackedHeader>(*NewUnpackedHeader); - PackedHeader OldPackedHeader = bit_cast<PackedHeader>(*OldUnpackedHeader); - if (UNLIKELY(!atomic_compare_exchange_strong( - getAtomicHeader(Ptr), &OldPackedHeader, NewPackedHeader, - memory_order_relaxed))) - reportHeaderRace(Ptr); -} - inline bool isValid(u32 Cookie, const void *Ptr, UnpackedHeader *NewUnpackedHeader) { PackedHeader NewPackedHeader = atomic_load_relaxed(getConstAtomicHeader(Ptr)); diff --git a/standalone/combined.h b/standalone/combined.h index 52dbf6b1452..927513dea92 100644 --- a/standalone/combined.h +++ b/standalone/combined.h @@ -9,16 +9,19 @@ #ifndef SCUDO_COMBINED_H_ #define SCUDO_COMBINED_H_ +#include "allocator_config_wrapper.h" +#include "atomic_helpers.h" #include "chunk.h" #include "common.h" #include "flags.h" #include "flags_parser.h" #include "local_cache.h" +#include "mem_map.h" #include "memtag.h" +#include "mutex.h" #include "options.h" #include "quarantine.h" #include "report.h" -#include "rss_limit_checker.h" #include "secondary.h" #include "stack_depot.h" #include "string_utils.h" @@ -43,13 +46,17 @@ extern "C" size_t android_unsafe_frame_pointer_chase(scudo::uptr *buf, namespace scudo { -template <class Params, void (*PostInitCallback)(void) = EmptyCallback> +template <class Config, void (*PostInitCallback)(void) = EmptyCallback> class Allocator { public: - using PrimaryT = typename Params::Primary; + using AllocatorConfig = BaseConfig<Config>; + using PrimaryT = + typename AllocatorConfig::template PrimaryT<PrimaryConfig<Config>>; + using SecondaryT = + typename AllocatorConfig::template SecondaryT<SecondaryConfig<Config>>; using CacheT = typename PrimaryT::CacheT; - typedef Allocator<Params, PostInitCallback> ThisT; - typedef typename Params::template TSDRegistryT<ThisT> TSDRegistryT; + typedef Allocator<Config, PostInitCallback> ThisT; + typedef typename AllocatorConfig::template TSDRegistryT<ThisT> TSDRegistryT; void callPostInitCallback() { pthread_once(&PostInitNonce, PostInitCallback); @@ -67,14 +74,13 @@ public: if (UNLIKELY(Header.State != Chunk::State::Quarantined)) reportInvalidChunkState(AllocatorAction::Recycling, Ptr); - Chunk::UnpackedHeader NewHeader = Header; - NewHeader.State = Chunk::State::Available; - Chunk::compareExchangeHeader(Allocator.Cookie, Ptr, &NewHeader, &Header); + Header.State = Chunk::State::Available; + Chunk::storeHeader(Allocator.Cookie, Ptr, &Header); - if (allocatorSupportsMemoryTagging<Params>()) + if (allocatorSupportsMemoryTagging<AllocatorConfig>()) Ptr = untagPointer(Ptr); - void *BlockBegin = Allocator::getBlockBegin(Ptr, &NewHeader); - Cache.deallocate(NewHeader.ClassId, BlockBegin); + void *BlockBegin = Allocator::getBlockBegin(Ptr, &Header); + Cache.deallocate(Header.ClassId, BlockBegin); } // We take a shortcut when allocating a quarantine batch by working with the @@ -98,7 +104,8 @@ public: // Reset tag to 0 as this chunk may have been previously used for a tagged // user allocation. - if (UNLIKELY(useMemoryTagging<Params>(Allocator.Primary.Options.load()))) + if (UNLIKELY(useMemoryTagging<AllocatorConfig>( + Allocator.Primary.Options.load()))) storeTags(reinterpret_cast<uptr>(Ptr), reinterpret_cast<uptr>(Ptr) + sizeof(QuarantineBatch)); @@ -117,9 +124,8 @@ public: DCHECK_EQ(Header.Offset, 0); DCHECK_EQ(Header.SizeOrUnusedBytes, sizeof(QuarantineBatch)); - Chunk::UnpackedHeader NewHeader = Header; - NewHeader.State = Chunk::State::Available; - Chunk::compareExchangeHeader(Allocator.Cookie, Ptr, &NewHeader, &Header); + Header.State = Chunk::State::Available; + Chunk::storeHeader(Allocator.Cookie, Ptr, &Header); Cache.deallocate(QuarantineClassId, reinterpret_cast<void *>(reinterpret_cast<uptr>(Ptr) - Chunk::getHeaderSize())); @@ -148,9 +154,6 @@ public: initFlags(); reportUnrecognizedFlags(); - RssChecker.init(scudo::getFlags()->soft_rss_limit_mb, - scudo::getFlags()->hard_rss_limit_mb); - // Store some flags locally. if (getFlags()->may_return_null) Primary.Options.set(OptionBit::MayReturnNull); @@ -162,23 +165,37 @@ public: Primary.Options.set(OptionBit::DeallocTypeMismatch); if (getFlags()->delete_size_mismatch) Primary.Options.set(OptionBit::DeleteSizeMismatch); - if (allocatorSupportsMemoryTagging<Params>() && + if (allocatorSupportsMemoryTagging<AllocatorConfig>() && systemSupportsMemoryTagging()) Primary.Options.set(OptionBit::UseMemoryTagging); - Primary.Options.set(OptionBit::UseOddEvenTags); QuarantineMaxChunkSize = static_cast<u32>(getFlags()->quarantine_max_chunk_size); Stats.init(); + // TODO(chiahungduan): Given that we support setting the default value in + // the PrimaryConfig and CacheConfig, consider to deprecate the use of + // `release_to_os_interval_ms` flag. const s32 ReleaseToOsIntervalMs = getFlags()->release_to_os_interval_ms; Primary.init(ReleaseToOsIntervalMs); Secondary.init(&Stats, ReleaseToOsIntervalMs); Quarantine.init( static_cast<uptr>(getFlags()->quarantine_size_kb << 10), static_cast<uptr>(getFlags()->thread_local_quarantine_size_kb << 10)); + } + + void enableRingBuffer() NO_THREAD_SAFETY_ANALYSIS { + AllocationRingBuffer *RB = getRingBuffer(); + if (RB) + RB->Depot->enable(); + RingBufferInitLock.unlock(); + } - initRingBuffer(); + void disableRingBuffer() NO_THREAD_SAFETY_ANALYSIS { + RingBufferInitLock.lock(); + AllocationRingBuffer *RB = getRingBuffer(); + if (RB) + RB->Depot->disable(); } // Initialize the embedded GWP-ASan instance. Requires the main allocator to @@ -228,6 +245,7 @@ public: } void unmapTestOnly() { + unmapRingBuffer(); TSDRegistry.unmapTestOnly(this); Primary.unmapTestOnly(); Secondary.unmapTestOnly(); @@ -239,6 +257,7 @@ public: } TSDRegistryT *getTSDRegistry() { return &TSDRegistry; } + QuarantineT *getQuarantine() { return &Quarantine; } // The Cache must be provided zero-initialized. void initCache(CacheT *Cache) { Cache->init(&Stats, &Primary); } @@ -249,13 +268,22 @@ public: // - unlinking the local stats from the global ones (destroying the cache does // the last two items). void commitBack(TSD<ThisT> *TSD) { + TSD->assertLocked(/*BypassCheck=*/true); Quarantine.drain(&TSD->getQuarantineCache(), QuarantineCallback(*this, TSD->getCache())); TSD->getCache().destroy(&Stats); } + void drainCache(TSD<ThisT> *TSD) { + TSD->assertLocked(/*BypassCheck=*/true); + Quarantine.drainAndRecycle(&TSD->getQuarantineCache(), + QuarantineCallback(*this, TSD->getCache())); + TSD->getCache().drain(); + } + void drainCaches() { TSDRegistry.drainCaches(this); } + ALWAYS_INLINE void *getHeaderTaggedPointer(void *Ptr) { - if (!allocatorSupportsMemoryTagging<Params>()) + if (!allocatorSupportsMemoryTagging<AllocatorConfig>()) return Ptr; auto UntaggedPtr = untagPointer(Ptr); if (UntaggedPtr != Ptr) @@ -267,7 +295,7 @@ public: } ALWAYS_INLINE uptr addHeaderTag(uptr Ptr) { - if (!allocatorSupportsMemoryTagging<Params>()) + if (!allocatorSupportsMemoryTagging<AllocatorConfig>()) return Ptr; return addFixedTag(Ptr, 2); } @@ -276,7 +304,7 @@ public: return reinterpret_cast<void *>(addHeaderTag(reinterpret_cast<uptr>(Ptr))); } - NOINLINE u32 collectStackTrace() { + NOINLINE u32 collectStackTrace(UNUSED StackDepot *Depot) { #ifdef HAVE_ANDROID_UNSAFE_FRAME_POINTER_CHASE // Discard collectStackTrace() frame and allocator function frame. constexpr uptr DiscardFrames = 2; @@ -284,13 +312,13 @@ public: uptr Size = android_unsafe_frame_pointer_chase(Stack, MaxTraceSize + DiscardFrames); Size = Min<uptr>(Size, MaxTraceSize + DiscardFrames); - return Depot.insert(Stack + Min<uptr>(DiscardFrames, Size), Stack + Size); + return Depot->insert(Stack + Min<uptr>(DiscardFrames, Size), Stack + Size); #else return 0; #endif } - uptr computeOddEvenMaskForPointerMaybe(Options Options, uptr Ptr, + uptr computeOddEvenMaskForPointerMaybe(const Options &Options, uptr Ptr, uptr ClassId) { if (!Options.get(OptionBit::UseOddEvenTags)) return 0; @@ -320,8 +348,6 @@ public: #ifdef GWP_ASAN_HOOKS if (UNLIKELY(GuardedAlloc.shouldSample())) { if (void *Ptr = GuardedAlloc.allocate(Size, Alignment)) { - if (UNLIKELY(&__scudo_allocate_hook)) - __scudo_allocate_hook(Ptr, Size); Stats.lock(); Stats.add(StatAllocated, GuardedAllocSlotSize); Stats.sub(StatFree, GuardedAllocSlotSize); @@ -354,40 +380,23 @@ public: } DCHECK_LE(Size, NeededSize); - switch (RssChecker.getRssLimitExceeded()) { - case RssLimitChecker::Neither: - break; - case RssLimitChecker::Soft: - if (Options.get(OptionBit::MayReturnNull)) - return nullptr; - reportSoftRSSLimit(RssChecker.getSoftRssLimit()); - break; - case RssLimitChecker::Hard: - reportHardRSSLimit(RssChecker.getHardRssLimit()); - break; - } - void *Block = nullptr; uptr ClassId = 0; uptr SecondaryBlockEnd = 0; if (LIKELY(PrimaryT::canAllocate(NeededSize))) { ClassId = SizeClassMap::getClassIdBySize(NeededSize); DCHECK_NE(ClassId, 0U); - bool UnlockRequired; - auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); + typename TSDRegistryT::ScopedTSD TSD(TSDRegistry); Block = TSD->getCache().allocate(ClassId); - // If the allocation failed, the most likely reason with a 32-bit primary - // is the region being full. In that event, retry in each successively - // larger class until it fits. If it fails to fit in the largest class, - // fallback to the Secondary. + // If the allocation failed, retry in each successively larger class until + // it fits. If it fails to fit in the largest class, fallback to the + // Secondary. if (UNLIKELY(!Block)) { while (ClassId < SizeClassMap::LargestClassId && !Block) Block = TSD->getCache().allocate(++ClassId); if (!Block) ClassId = 0; } - if (UnlockRequired) - TSD->unlock(); } if (UNLIKELY(ClassId == 0)) { Block = Secondary.allocate(Options, Size, Alignment, &SecondaryBlockEnd, @@ -397,6 +406,7 @@ public: if (UNLIKELY(!Block)) { if (Options.get(OptionBit::MayReturnNull)) return nullptr; + printStats(); reportOutOfMemory(NeededSize); } @@ -418,7 +428,7 @@ public: // // When memory tagging is enabled, zeroing the contents is done as part of // setting the tag. - if (UNLIKELY(useMemoryTagging<Params>(Options))) { + if (UNLIKELY(useMemoryTagging<AllocatorConfig>(Options))) { uptr PrevUserPtr; Chunk::UnpackedHeader Header; const uptr BlockSize = PrimaryT::getSizeByClassId(ClassId); @@ -500,7 +510,7 @@ public: } else { Block = addHeaderTag(Block); Ptr = addHeaderTag(Ptr); - if (UNLIKELY(useMemoryTagging<Params>(Options))) { + if (UNLIKELY(useMemoryTagging<AllocatorConfig>(Options))) { storeTags(reinterpret_cast<uptr>(Block), reinterpret_cast<uptr>(Ptr)); storeSecondaryAllocationStackMaybe(Options, Ptr, Size); } @@ -526,14 +536,14 @@ public: Chunk::SizeOrUnusedBytesMask; Chunk::storeHeader(Cookie, Ptr, &Header); - if (UNLIKELY(&__scudo_allocate_hook)) - __scudo_allocate_hook(TaggedPtr, Size); - return TaggedPtr; } NOINLINE void deallocate(void *Ptr, Chunk::Origin Origin, uptr DeleteSize = 0, UNUSED uptr Alignment = MinAlignment) { + if (UNLIKELY(!Ptr)) + return; + // For a deallocation, we only ensure minimal initialization, meaning thread // local data will be left uninitialized for now (when using ELF TLS). The // fallback cache will be used instead. This is a workaround for a situation @@ -542,12 +552,6 @@ public: // being destroyed properly. Any other heap operation will do a full init. initThreadMaybe(/*MinimalInit=*/true); - if (UNLIKELY(&__scudo_deallocate_hook)) - __scudo_deallocate_hook(Ptr); - - if (UNLIKELY(!Ptr)) - return; - #ifdef GWP_ASAN_HOOKS if (UNLIKELY(GuardedAlloc.pointerIsMine(Ptr))) { GuardedAlloc.deallocate(Ptr); @@ -626,48 +630,47 @@ public: if (UNLIKELY(!isAligned(reinterpret_cast<uptr>(OldPtr), MinAlignment))) reportMisalignedPointer(AllocatorAction::Reallocating, OldPtr); - Chunk::UnpackedHeader OldHeader; - Chunk::loadHeader(Cookie, OldPtr, &OldHeader); + Chunk::UnpackedHeader Header; + Chunk::loadHeader(Cookie, OldPtr, &Header); - if (UNLIKELY(OldHeader.State != Chunk::State::Allocated)) + if (UNLIKELY(Header.State != Chunk::State::Allocated)) reportInvalidChunkState(AllocatorAction::Reallocating, OldPtr); // Pointer has to be allocated with a malloc-type function. Some // applications think that it is OK to realloc a memalign'ed pointer, which // will trigger this check. It really isn't. if (Options.get(OptionBit::DeallocTypeMismatch)) { - if (UNLIKELY(OldHeader.OriginOrWasZeroed != Chunk::Origin::Malloc)) + if (UNLIKELY(Header.OriginOrWasZeroed != Chunk::Origin::Malloc)) reportDeallocTypeMismatch(AllocatorAction::Reallocating, OldPtr, - OldHeader.OriginOrWasZeroed, + Header.OriginOrWasZeroed, Chunk::Origin::Malloc); } - void *BlockBegin = getBlockBegin(OldTaggedPtr, &OldHeader); + void *BlockBegin = getBlockBegin(OldTaggedPtr, &Header); uptr BlockEnd; uptr OldSize; - const uptr ClassId = OldHeader.ClassId; + const uptr ClassId = Header.ClassId; if (LIKELY(ClassId)) { BlockEnd = reinterpret_cast<uptr>(BlockBegin) + SizeClassMap::getSizeByClassId(ClassId); - OldSize = OldHeader.SizeOrUnusedBytes; + OldSize = Header.SizeOrUnusedBytes; } else { BlockEnd = SecondaryT::getBlockEnd(BlockBegin); OldSize = BlockEnd - (reinterpret_cast<uptr>(OldTaggedPtr) + - OldHeader.SizeOrUnusedBytes); + Header.SizeOrUnusedBytes); } // If the new chunk still fits in the previously allocated block (with a // reasonable delta), we just keep the old block, and update the chunk // header to reflect the size change. if (reinterpret_cast<uptr>(OldTaggedPtr) + NewSize <= BlockEnd) { if (NewSize > OldSize || (OldSize - NewSize) < getPageSizeCached()) { - Chunk::UnpackedHeader NewHeader = OldHeader; - NewHeader.SizeOrUnusedBytes = + Header.SizeOrUnusedBytes = (ClassId ? NewSize : BlockEnd - (reinterpret_cast<uptr>(OldTaggedPtr) + NewSize)) & Chunk::SizeOrUnusedBytesMask; - Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader); - if (UNLIKELY(useMemoryTagging<Params>(Options))) { + Chunk::storeHeader(Cookie, OldPtr, &Header); + if (UNLIKELY(useMemoryTagging<AllocatorConfig>(Options))) { if (ClassId) { resizeTaggedChunk(reinterpret_cast<uptr>(OldTaggedPtr) + OldSize, reinterpret_cast<uptr>(OldTaggedPtr) + NewSize, @@ -688,9 +691,7 @@ public: void *NewPtr = allocate(NewSize, Chunk::Origin::Malloc, Alignment); if (LIKELY(NewPtr)) { memcpy(NewPtr, OldTaggedPtr, Min(NewSize, OldSize)); - if (UNLIKELY(&__scudo_deallocate_hook)) - __scudo_deallocate_hook(OldTaggedPtr); - quarantineOrDeallocateChunk(Options, OldTaggedPtr, &OldHeader, OldSize); + quarantineOrDeallocateChunk(Options, OldTaggedPtr, &Header, OldSize); } return NewPtr; } @@ -708,10 +709,12 @@ public: Quarantine.disable(); Primary.disable(); Secondary.disable(); + disableRingBuffer(); } void enable() NO_THREAD_SAFETY_ANALYSIS { initThreadMaybe(); + enableRingBuffer(); Secondary.enable(); Primary.enable(); Quarantine.enable(); @@ -745,9 +748,18 @@ public: Str.output(); } - void releaseToOS() { + void printFragmentationInfo() { + ScopedString Str; + Primary.getFragmentationInfo(&Str); + // Secondary allocator dumps the fragmentation data in getStats(). + Str.output(); + } + + void releaseToOS(ReleaseToOS ReleaseType) { initThreadMaybe(); - Primary.releaseToOS(); + if (ReleaseType == ReleaseToOS::ForceAll) + drainCaches(); + Primary.releaseToOS(ReleaseType); Secondary.releaseToOS(); } @@ -761,8 +773,9 @@ public: Base = untagPointer(Base); const uptr From = Base; const uptr To = Base + Size; - bool MayHaveTaggedPrimary = allocatorSupportsMemoryTagging<Params>() && - systemSupportsMemoryTagging(); + bool MayHaveTaggedPrimary = + allocatorSupportsMemoryTagging<AllocatorConfig>() && + systemSupportsMemoryTagging(); auto Lambda = [this, From, To, MayHaveTaggedPrimary, Callback, Arg](uptr Block) { if (Block < From || Block >= To) @@ -783,9 +796,9 @@ public: } if (Header.State == Chunk::State::Allocated) { uptr TaggedChunk = Chunk; - if (allocatorSupportsMemoryTagging<Params>()) + if (allocatorSupportsMemoryTagging<AllocatorConfig>()) TaggedChunk = untagPointer(TaggedChunk); - if (useMemoryTagging<Params>(Primary.Options.load())) + if (useMemoryTagging<AllocatorConfig>(Primary.Options.load())) TaggedChunk = loadTag(Chunk); Callback(TaggedChunk, getSize(reinterpret_cast<void *>(Chunk), &Header), Arg); @@ -836,10 +849,15 @@ public: // for it, which then forces realloc to copy the usable size of a chunk as // opposed to its actual size. uptr getUsableSize(const void *Ptr) { - initThreadMaybe(); if (UNLIKELY(!Ptr)) return 0; + return getAllocSize(Ptr); + } + + uptr getAllocSize(const void *Ptr) { + initThreadMaybe(); + #ifdef GWP_ASAN_HOOKS if (UNLIKELY(GuardedAlloc.pointerIsMine(Ptr))) return GuardedAlloc.getSize(Ptr); @@ -848,9 +866,11 @@ public: Ptr = getHeaderTaggedPointer(const_cast<void *>(Ptr)); Chunk::UnpackedHeader Header; Chunk::loadHeader(Cookie, Ptr, &Header); - // Getting the usable size of a chunk only makes sense if it's allocated. + + // Getting the alloc size of a chunk only makes sense if it's allocated. if (UNLIKELY(Header.State != Chunk::State::Allocated)) reportInvalidChunkState(AllocatorAction::Sizing, const_cast<void *>(Ptr)); + return getSize(Ptr, &Header); } @@ -876,15 +896,8 @@ public: Header.State == Chunk::State::Allocated; } - void setRssLimitsTestOnly(int SoftRssLimitMb, int HardRssLimitMb, - bool MayReturnNull) { - RssChecker.init(SoftRssLimitMb, HardRssLimitMb); - if (MayReturnNull) - Primary.Options.set(OptionBit::MayReturnNull); - } - bool useMemoryTaggingTestOnly() const { - return useMemoryTagging<Params>(Primary.Options.load()); + return useMemoryTagging<AllocatorConfig>(Primary.Options.load()); } void disableMemoryTagging() { // If we haven't been initialized yet, we need to initialize now in order to @@ -894,7 +907,7 @@ public: // callback), which may cause mappings to be created with memory tagging // enabled. TSDRegistry.initOnceMaybe(this); - if (allocatorSupportsMemoryTagging<Params>()) { + if (allocatorSupportsMemoryTagging<AllocatorConfig>()) { Secondary.disableMemoryTagging(); Primary.Options.clear(OptionBit::UseMemoryTagging); } @@ -902,13 +915,15 @@ public: void setTrackAllocationStacks(bool Track) { initThreadMaybe(); - if (getFlags()->allocation_ring_buffer_size == 0) { + if (getFlags()->allocation_ring_buffer_size <= 0) { DCHECK(!Primary.Options.load().get(OptionBit::TrackAllocationStacks)); return; } - if (Track) + + if (Track) { + initRingBufferMaybe(); Primary.Options.set(OptionBit::TrackAllocationStacks); - else + } else Primary.Options.clear(OptionBit::TrackAllocationStacks); } @@ -925,8 +940,16 @@ public: Primary.Options.clear(OptionBit::AddLargeAllocationSlack); } - const char *getStackDepotAddress() const { - return reinterpret_cast<const char *>(&Depot); + const char *getStackDepotAddress() { + initThreadMaybe(); + AllocationRingBuffer *RB = getRingBuffer(); + return RB ? reinterpret_cast<char *>(RB->Depot) : nullptr; + } + + uptr getStackDepotSize() { + initThreadMaybe(); + AllocationRingBuffer *RB = getRingBuffer(); + return RB ? RB->StackDepotSize : 0; } const char *getRegionInfoArrayAddress() const { @@ -939,26 +962,15 @@ public: const char *getRingBufferAddress() { initThreadMaybe(); - return RawRingBuffer; + return reinterpret_cast<char *>(getRingBuffer()); } uptr getRingBufferSize() { initThreadMaybe(); - auto *RingBuffer = getRingBuffer(); - return RingBuffer ? ringBufferSizeInBytes(RingBuffer->Size) : 0; - } - - static bool setRingBufferSizeForBuffer(char *Buffer, size_t Size) { - // Need at least one entry. - if (Size < sizeof(AllocationRingBuffer) + - sizeof(typename AllocationRingBuffer::Entry)) { - return false; - } - AllocationRingBuffer *RingBuffer = - reinterpret_cast<AllocationRingBuffer *>(Buffer); - RingBuffer->Size = (Size - sizeof(AllocationRingBuffer)) / - sizeof(typename AllocationRingBuffer::Entry); - return true; + AllocationRingBuffer *RB = getRingBuffer(); + return RB && RB->RingBufferElements + ? ringBufferSizeInBytes(RB->RingBufferElements) + : 0; } static const uptr MaxTraceSize = 64; @@ -969,20 +981,35 @@ public: if (!Depot->find(Hash, &RingPos, &Size)) return; for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I) - Trace[I] = static_cast<uintptr_t>((*Depot)[RingPos + I]); + Trace[I] = static_cast<uintptr_t>(Depot->at(RingPos + I)); } static void getErrorInfo(struct scudo_error_info *ErrorInfo, uintptr_t FaultAddr, const char *DepotPtr, - const char *RegionInfoPtr, const char *RingBufferPtr, + size_t DepotSize, const char *RegionInfoPtr, + const char *RingBufferPtr, size_t RingBufferSize, const char *Memory, const char *MemoryTags, uintptr_t MemoryAddr, size_t MemorySize) { + // N.B. we need to support corrupted data in any of the buffers here. We get + // this information from an external process (the crashing process) that + // should not be able to crash the crash dumper (crash_dump on Android). + // See also the get_error_info_fuzzer. *ErrorInfo = {}; - if (!allocatorSupportsMemoryTagging<Params>() || + if (!allocatorSupportsMemoryTagging<AllocatorConfig>() || MemoryAddr + MemorySize < MemoryAddr) return; - auto *Depot = reinterpret_cast<const StackDepot *>(DepotPtr); + const StackDepot *Depot = nullptr; + if (DepotPtr) { + // check for corrupted StackDepot. First we need to check whether we can + // read the metadata, then whether the metadata matches the size. + if (DepotSize < sizeof(*Depot)) + return; + Depot = reinterpret_cast<const StackDepot *>(DepotPtr); + if (!Depot->isValid(DepotSize)) + return; + } + size_t NextErrorReport = 0; // Check for OOB in the current block and the two surrounding blocks. Beyond @@ -995,7 +1022,7 @@ public: // Check the ring buffer. For primary allocations this will only find UAF; // for secondary allocations we can find either UAF or OOB. getRingBufferErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, Depot, - RingBufferPtr); + RingBufferPtr, RingBufferSize); // Check for OOB in the 28 blocks surrounding the 3 we checked earlier. // Beyond that we are likely to hit false positives. @@ -1006,7 +1033,6 @@ public: } private: - using SecondaryT = MapAllocator<Params>; typedef typename PrimaryT::SizeClassMap SizeClassMap; static const uptr MinAlignmentLog = SCUDO_MIN_ALIGNMENT_LOG; @@ -1018,7 +1044,7 @@ private: static_assert(MinAlignment >= sizeof(Chunk::PackedHeader), "Minimal alignment must at least cover a chunk header."); - static_assert(!allocatorSupportsMemoryTagging<Params>() || + static_assert(!allocatorSupportsMemoryTagging<AllocatorConfig>() || MinAlignment >= archMemoryTagGranuleSize(), ""); @@ -1043,15 +1069,12 @@ private: QuarantineT Quarantine; TSDRegistryT TSDRegistry; pthread_once_t PostInitNonce = PTHREAD_ONCE_INIT; - RssLimitChecker RssChecker; #ifdef GWP_ASAN_HOOKS gwp_asan::GuardedPoolAllocator GuardedAlloc; uptr GuardedAllocSlotSize = 0; #endif // GWP_ASAN_HOOKS - StackDepot Depot; - struct AllocationRingBuffer { struct Entry { atomic_uptr Ptr; @@ -1061,15 +1084,31 @@ private: atomic_u32 DeallocationTrace; atomic_u32 DeallocationTid; }; - + StackDepot *Depot = nullptr; + uptr StackDepotSize = 0; + MemMapT RawRingBufferMap; + MemMapT RawStackDepotMap; + u32 RingBufferElements = 0; atomic_uptr Pos; - u32 Size; // An array of Size (at least one) elements of type Entry is immediately // following to this struct. }; + static_assert(sizeof(AllocationRingBuffer) % + alignof(typename AllocationRingBuffer::Entry) == + 0, + "invalid alignment"); + + // Lock to initialize the RingBuffer + HybridMutex RingBufferInitLock; + // Pointer to memory mapped area starting with AllocationRingBuffer struct, // and immediately followed by Size elements of type Entry. - char *RawRingBuffer = {}; + atomic_uptr RingBufferAddress = {}; + + AllocationRingBuffer *getRingBuffer() { + return reinterpret_cast<AllocationRingBuffer *>( + atomic_load(&RingBufferAddress, memory_order_acquire)); + } // The following might get optimized out by the compiler. NOINLINE void performSanityChecks() { @@ -1118,41 +1157,40 @@ private: const uptr SizeOrUnusedBytes = Header->SizeOrUnusedBytes; if (LIKELY(Header->ClassId)) return SizeOrUnusedBytes; - if (allocatorSupportsMemoryTagging<Params>()) + if (allocatorSupportsMemoryTagging<AllocatorConfig>()) Ptr = untagPointer(const_cast<void *>(Ptr)); return SecondaryT::getBlockEnd(getBlockBegin(Ptr, Header)) - reinterpret_cast<uptr>(Ptr) - SizeOrUnusedBytes; } - void quarantineOrDeallocateChunk(Options Options, void *TaggedPtr, + void quarantineOrDeallocateChunk(const Options &Options, void *TaggedPtr, Chunk::UnpackedHeader *Header, uptr Size) NO_THREAD_SAFETY_ANALYSIS { void *Ptr = getHeaderTaggedPointer(TaggedPtr); - Chunk::UnpackedHeader NewHeader = *Header; // If the quarantine is disabled, the actual size of a chunk is 0 or larger // than the maximum allowed, we return a chunk directly to the backend. // This purposefully underflows for Size == 0. const bool BypassQuarantine = !Quarantine.getCacheSize() || ((Size - 1) >= QuarantineMaxChunkSize) || - !NewHeader.ClassId; + !Header->ClassId; if (BypassQuarantine) - NewHeader.State = Chunk::State::Available; + Header->State = Chunk::State::Available; else - NewHeader.State = Chunk::State::Quarantined; - NewHeader.OriginOrWasZeroed = useMemoryTagging<Params>(Options) && - NewHeader.ClassId && - !TSDRegistry.getDisableMemInit(); - Chunk::compareExchangeHeader(Cookie, Ptr, &NewHeader, Header); + Header->State = Chunk::State::Quarantined; + Header->OriginOrWasZeroed = useMemoryTagging<AllocatorConfig>(Options) && + Header->ClassId && + !TSDRegistry.getDisableMemInit(); + Chunk::storeHeader(Cookie, Ptr, Header); - if (UNLIKELY(useMemoryTagging<Params>(Options))) { + if (UNLIKELY(useMemoryTagging<AllocatorConfig>(Options))) { u8 PrevTag = extractTag(reinterpret_cast<uptr>(TaggedPtr)); storeDeallocationStackMaybe(Options, Ptr, PrevTag, Size); - if (NewHeader.ClassId) { + if (Header->ClassId) { if (!TSDRegistry.getDisableMemInit()) { uptr TaggedBegin, TaggedEnd; const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe( - Options, reinterpret_cast<uptr>(getBlockBegin(Ptr, &NewHeader)), - NewHeader.ClassId); + Options, reinterpret_cast<uptr>(getBlockBegin(Ptr, Header)), + Header->ClassId); // Exclude the previous tag so that immediate use after free is // detected 100% of the time. setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin, @@ -1161,29 +1199,32 @@ private: } } if (BypassQuarantine) { - if (allocatorSupportsMemoryTagging<Params>()) + if (allocatorSupportsMemoryTagging<AllocatorConfig>()) Ptr = untagPointer(Ptr); - void *BlockBegin = getBlockBegin(Ptr, &NewHeader); - const uptr ClassId = NewHeader.ClassId; + void *BlockBegin = getBlockBegin(Ptr, Header); + const uptr ClassId = Header->ClassId; if (LIKELY(ClassId)) { - bool UnlockRequired; - auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); - TSD->getCache().deallocate(ClassId, BlockBegin); - if (UnlockRequired) - TSD->unlock(); + bool CacheDrained; + { + typename TSDRegistryT::ScopedTSD TSD(TSDRegistry); + CacheDrained = TSD->getCache().deallocate(ClassId, BlockBegin); + } + // When we have drained some blocks back to the Primary from TSD, that + // implies that we may have the chance to release some pages as well. + // Note that in order not to block other thread's accessing the TSD, + // release the TSD first then try the page release. + if (CacheDrained) + Primary.tryReleaseToOS(ClassId, ReleaseToOS::Normal); } else { - if (UNLIKELY(useMemoryTagging<Params>(Options))) + if (UNLIKELY(useMemoryTagging<AllocatorConfig>(Options))) storeTags(reinterpret_cast<uptr>(BlockBegin), reinterpret_cast<uptr>(Ptr)); Secondary.deallocate(Options, BlockBegin); } } else { - bool UnlockRequired; - auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); + typename TSDRegistryT::ScopedTSD TSD(TSDRegistry); Quarantine.put(&TSD->getQuarantineCache(), QuarantineCallback(*this, TSD->getCache()), Ptr, Size); - if (UnlockRequired) - TSD->unlock(); } } @@ -1256,20 +1297,24 @@ private: storeEndMarker(RoundNewPtr, NewSize, BlockEnd); } - void storePrimaryAllocationStackMaybe(Options Options, void *Ptr) { + void storePrimaryAllocationStackMaybe(const Options &Options, void *Ptr) { if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) return; + AllocationRingBuffer *RB = getRingBuffer(); + if (!RB) + return; auto *Ptr32 = reinterpret_cast<u32 *>(Ptr); - Ptr32[MemTagAllocationTraceIndex] = collectStackTrace(); + Ptr32[MemTagAllocationTraceIndex] = collectStackTrace(RB->Depot); Ptr32[MemTagAllocationTidIndex] = getThreadID(); } - void storeRingBufferEntry(void *Ptr, u32 AllocationTrace, u32 AllocationTid, + void storeRingBufferEntry(AllocationRingBuffer *RB, void *Ptr, + u32 AllocationTrace, u32 AllocationTid, uptr AllocationSize, u32 DeallocationTrace, u32 DeallocationTid) { - uptr Pos = atomic_fetch_add(&getRingBuffer()->Pos, 1, memory_order_relaxed); + uptr Pos = atomic_fetch_add(&RB->Pos, 1, memory_order_relaxed); typename AllocationRingBuffer::Entry *Entry = - getRingBufferEntry(RawRingBuffer, Pos % getRingBuffer()->Size); + getRingBufferEntry(RB, Pos % RB->RingBufferElements); // First invalidate our entry so that we don't attempt to interpret a // partially written state in getSecondaryErrorInfo(). The fences below @@ -1288,34 +1333,38 @@ private: atomic_store_relaxed(&Entry->Ptr, reinterpret_cast<uptr>(Ptr)); } - void storeSecondaryAllocationStackMaybe(Options Options, void *Ptr, + void storeSecondaryAllocationStackMaybe(const Options &Options, void *Ptr, uptr Size) { if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) return; - - u32 Trace = collectStackTrace(); + AllocationRingBuffer *RB = getRingBuffer(); + if (!RB) + return; + u32 Trace = collectStackTrace(RB->Depot); u32 Tid = getThreadID(); auto *Ptr32 = reinterpret_cast<u32 *>(Ptr); Ptr32[MemTagAllocationTraceIndex] = Trace; Ptr32[MemTagAllocationTidIndex] = Tid; - storeRingBufferEntry(untagPointer(Ptr), Trace, Tid, Size, 0, 0); + storeRingBufferEntry(RB, untagPointer(Ptr), Trace, Tid, Size, 0, 0); } - void storeDeallocationStackMaybe(Options Options, void *Ptr, u8 PrevTag, - uptr Size) { + void storeDeallocationStackMaybe(const Options &Options, void *Ptr, + u8 PrevTag, uptr Size) { if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) return; - + AllocationRingBuffer *RB = getRingBuffer(); + if (!RB) + return; auto *Ptr32 = reinterpret_cast<u32 *>(Ptr); u32 AllocationTrace = Ptr32[MemTagAllocationTraceIndex]; u32 AllocationTid = Ptr32[MemTagAllocationTidIndex]; - u32 DeallocationTrace = collectStackTrace(); + u32 DeallocationTrace = collectStackTrace(RB->Depot); u32 DeallocationTid = getThreadID(); - storeRingBufferEntry(addFixedTag(untagPointer(Ptr), PrevTag), + storeRingBufferEntry(RB, addFixedTag(untagPointer(Ptr), PrevTag), AllocationTrace, AllocationTid, Size, DeallocationTrace, DeallocationTid); } @@ -1391,8 +1440,10 @@ private: UntaggedFaultAddr < ChunkAddr ? BUFFER_UNDERFLOW : BUFFER_OVERFLOW; R->allocation_address = ChunkAddr; R->allocation_size = Header.SizeOrUnusedBytes; - collectTraceMaybe(Depot, R->allocation_trace, - Data[MemTagAllocationTraceIndex]); + if (Depot) { + collectTraceMaybe(Depot, R->allocation_trace, + Data[MemTagAllocationTraceIndex]); + } R->allocation_tid = Data[MemTagAllocationTidIndex]; return NextErrorReport == NumErrorReports; }; @@ -1410,17 +1461,19 @@ private: size_t &NextErrorReport, uintptr_t FaultAddr, const StackDepot *Depot, - const char *RingBufferPtr) { + const char *RingBufferPtr, + size_t RingBufferSize) { auto *RingBuffer = reinterpret_cast<const AllocationRingBuffer *>(RingBufferPtr); - if (!RingBuffer || RingBuffer->Size == 0) + size_t RingBufferElements = ringBufferElementsFromBytes(RingBufferSize); + if (!RingBuffer || RingBufferElements == 0 || !Depot) return; uptr Pos = atomic_load_relaxed(&RingBuffer->Pos); - for (uptr I = Pos - 1; - I != Pos - 1 - RingBuffer->Size && NextErrorReport != NumErrorReports; + for (uptr I = Pos - 1; I != Pos - 1 - RingBufferElements && + NextErrorReport != NumErrorReports; --I) { - auto *Entry = getRingBufferEntry(RingBufferPtr, I % RingBuffer->Size); + auto *Entry = getRingBufferEntry(RingBuffer, I % RingBufferElements); uptr EntryPtr = atomic_load_relaxed(&Entry->Ptr); if (!EntryPtr) continue; @@ -1483,47 +1536,110 @@ private: Primary.getStats(Str); Secondary.getStats(Str); Quarantine.getStats(Str); + TSDRegistry.getStats(Str); return Str->length(); } static typename AllocationRingBuffer::Entry * - getRingBufferEntry(char *RawRingBuffer, uptr N) { + getRingBufferEntry(AllocationRingBuffer *RB, uptr N) { + char *RBEntryStart = + &reinterpret_cast<char *>(RB)[sizeof(AllocationRingBuffer)]; return &reinterpret_cast<typename AllocationRingBuffer::Entry *>( - &RawRingBuffer[sizeof(AllocationRingBuffer)])[N]; + RBEntryStart)[N]; } static const typename AllocationRingBuffer::Entry * - getRingBufferEntry(const char *RawRingBuffer, uptr N) { + getRingBufferEntry(const AllocationRingBuffer *RB, uptr N) { + const char *RBEntryStart = + &reinterpret_cast<const char *>(RB)[sizeof(AllocationRingBuffer)]; return &reinterpret_cast<const typename AllocationRingBuffer::Entry *>( - &RawRingBuffer[sizeof(AllocationRingBuffer)])[N]; + RBEntryStart)[N]; + } + + void initRingBufferMaybe() { + ScopedLock L(RingBufferInitLock); + if (getRingBuffer() != nullptr) + return; + + int ring_buffer_size = getFlags()->allocation_ring_buffer_size; + if (ring_buffer_size <= 0) + return; + + u32 AllocationRingBufferSize = static_cast<u32>(ring_buffer_size); + + // We store alloc and free stacks for each entry. + constexpr u32 kStacksPerRingBufferEntry = 2; + constexpr u32 kMaxU32Pow2 = ~(UINT32_MAX >> 1); + static_assert(isPowerOfTwo(kMaxU32Pow2)); + // On Android we always have 3 frames at the bottom: __start_main, + // __libc_init, main, and 3 at the top: malloc, scudo_malloc and + // Allocator::allocate. This leaves 10 frames for the user app. The next + // smallest power of two (8) would only leave 2, which is clearly too + // little. + constexpr u32 kFramesPerStack = 16; + static_assert(isPowerOfTwo(kFramesPerStack)); + + if (AllocationRingBufferSize > kMaxU32Pow2 / kStacksPerRingBufferEntry) + return; + u32 TabSize = static_cast<u32>(roundUpPowerOfTwo(kStacksPerRingBufferEntry * + AllocationRingBufferSize)); + if (TabSize > UINT32_MAX / kFramesPerStack) + return; + u32 RingSize = static_cast<u32>(TabSize * kFramesPerStack); + + uptr StackDepotSize = sizeof(StackDepot) + sizeof(atomic_u64) * RingSize + + sizeof(atomic_u32) * TabSize; + MemMapT DepotMap; + DepotMap.map( + /*Addr=*/0U, roundUp(StackDepotSize, getPageSizeCached()), + "scudo:stack_depot"); + auto *Depot = reinterpret_cast<StackDepot *>(DepotMap.getBase()); + Depot->init(RingSize, TabSize); + + MemMapT MemMap; + MemMap.map( + /*Addr=*/0U, + roundUp(ringBufferSizeInBytes(AllocationRingBufferSize), + getPageSizeCached()), + "scudo:ring_buffer"); + auto *RB = reinterpret_cast<AllocationRingBuffer *>(MemMap.getBase()); + RB->RawRingBufferMap = MemMap; + RB->RingBufferElements = AllocationRingBufferSize; + RB->Depot = Depot; + RB->StackDepotSize = StackDepotSize; + RB->RawStackDepotMap = DepotMap; + + atomic_store(&RingBufferAddress, reinterpret_cast<uptr>(RB), + memory_order_release); } - void initRingBuffer() { - u32 AllocationRingBufferSize = - static_cast<u32>(getFlags()->allocation_ring_buffer_size); - if (AllocationRingBufferSize < 1) + void unmapRingBuffer() { + AllocationRingBuffer *RB = getRingBuffer(); + if (RB == nullptr) return; - MapPlatformData Data = {}; - RawRingBuffer = static_cast<char *>( - map(/*Addr=*/nullptr, - roundUp(ringBufferSizeInBytes(AllocationRingBufferSize), - getPageSizeCached()), - "AllocatorRingBuffer", /*Flags=*/0, &Data)); - auto *RingBuffer = reinterpret_cast<AllocationRingBuffer *>(RawRingBuffer); - RingBuffer->Size = AllocationRingBufferSize; - static_assert(sizeof(AllocationRingBuffer) % - alignof(typename AllocationRingBuffer::Entry) == - 0, - "invalid alignment"); - } - - static constexpr size_t ringBufferSizeInBytes(u32 AllocationRingBufferSize) { + // N.B. because RawStackDepotMap is part of RawRingBufferMap, the order + // is very important. + RB->RawStackDepotMap.unmap(RB->RawStackDepotMap.getBase(), + RB->RawStackDepotMap.getCapacity()); + // Note that the `RB->RawRingBufferMap` is stored on the pages managed by + // itself. Take over the ownership before calling unmap() so that any + // operation along with unmap() won't touch inaccessible pages. + MemMapT RawRingBufferMap = RB->RawRingBufferMap; + RawRingBufferMap.unmap(RawRingBufferMap.getBase(), + RawRingBufferMap.getCapacity()); + atomic_store(&RingBufferAddress, 0, memory_order_release); + } + + static constexpr size_t ringBufferSizeInBytes(u32 RingBufferElements) { return sizeof(AllocationRingBuffer) + - AllocationRingBufferSize * - sizeof(typename AllocationRingBuffer::Entry); + RingBufferElements * sizeof(typename AllocationRingBuffer::Entry); } - inline AllocationRingBuffer *getRingBuffer() { - return reinterpret_cast<AllocationRingBuffer *>(RawRingBuffer); + static constexpr size_t ringBufferElementsFromBytes(size_t Bytes) { + if (Bytes < sizeof(AllocationRingBuffer)) { + return 0; + } + return (Bytes - sizeof(AllocationRingBuffer)) / + sizeof(typename AllocationRingBuffer::Entry); } }; diff --git a/standalone/common.cpp b/standalone/common.cpp index 9f14faeef28..06e930638f6 100644 --- a/standalone/common.cpp +++ b/standalone/common.cpp @@ -21,22 +21,4 @@ uptr getPageSizeSlow() { return PageSizeCached; } -// Fatal internal map() or unmap() error (potentially OOM related). -void NORETURN dieOnMapUnmapError(uptr SizeIfOOM) { - char Error[128] = "Scudo ERROR: internal map or unmap failure\n"; - if (SizeIfOOM) { - formatString( - Error, sizeof(Error), - "Scudo ERROR: internal map failure (NO MEMORY) requesting %zuKB\n", - SizeIfOOM >> 10); - } - outputRaw(Error); - setAbortMessage(Error); - die(); -} - -#if !SCUDO_LINUX -uptr GetRSS() { return 0; } -#endif - } // namespace scudo diff --git a/standalone/common.h b/standalone/common.h index aa15e9e787e..151fbd317e7 100644 --- a/standalone/common.h +++ b/standalone/common.h @@ -17,6 +17,7 @@ #include <stddef.h> #include <string.h> +#include <unistd.h> namespace scudo { @@ -27,7 +28,11 @@ template <class Dest, class Source> inline Dest bit_cast(const Source &S) { return D; } -inline constexpr bool isPowerOfTwo(uptr X) { return (X & (X - 1)) == 0; } +inline constexpr bool isPowerOfTwo(uptr X) { + if (X == 0) + return false; + return (X & (X - 1)) == 0; +} inline constexpr uptr roundUp(uptr X, uptr Boundary) { DCHECK(isPowerOfTwo(Boundary)); @@ -111,19 +116,19 @@ template <typename T> inline void shuffle(T *A, u32 N, u32 *RandState) { *RandState = State; } -// Hardware specific inlinable functions. +inline void computePercentage(uptr Numerator, uptr Denominator, uptr *Integral, + uptr *Fractional) { + constexpr uptr Digits = 100; + if (Denominator == 0) { + *Integral = 100; + *Fractional = 0; + return; + } -inline void yieldProcessor(UNUSED u8 Count) { -#if defined(__i386__) || defined(__x86_64__) - __asm__ __volatile__("" ::: "memory"); - for (u8 I = 0; I < Count; I++) - __asm__ __volatile__("pause"); -#elif defined(__aarch64__) || defined(__arm__) - __asm__ __volatile__("" ::: "memory"); - for (u8 I = 0; I < Count; I++) - __asm__ __volatile__("yield"); -#endif - __asm__ __volatile__("" ::: "memory"); + *Integral = Numerator * Digits / Denominator; + *Fractional = + (((Numerator * Digits) % Denominator) * Digits + Denominator / 2) / + Denominator; } // Platform specific functions. @@ -131,9 +136,10 @@ inline void yieldProcessor(UNUSED u8 Count) { extern uptr PageSizeCached; uptr getPageSizeSlow(); inline uptr getPageSizeCached() { - // Bionic uses a hardcoded value. - if (SCUDO_ANDROID) - return 4096U; +#if SCUDO_ANDROID && defined(PAGE_SIZE) + // Most Android builds have a build-time constant page size. + return PAGE_SIZE; +#endif if (LIKELY(PageSizeCached)) return PageSizeCached; return getPageSizeSlow(); @@ -144,9 +150,10 @@ u32 getNumberOfCPUs(); const char *getEnv(const char *Name); -uptr GetRSS(); - u64 getMonotonicTime(); +// Gets the time faster but with less accuracy. Can call getMonotonicTime +// if no fast version is available. +u64 getMonotonicTimeFast(); u32 getThreadID(); @@ -187,10 +194,6 @@ void setMemoryPermission(uptr Addr, uptr Size, uptr Flags, void releasePagesToOS(uptr BaseAddress, uptr Offset, uptr Size, MapPlatformData *Data = nullptr); -// Internal map & unmap fatal error. This must not call map(). SizeIfOOM shall -// hold the requested size on an out-of-memory error, 0 otherwise. -void NORETURN dieOnMapUnmapError(uptr SizeIfOOM = 0); - // Logging related functions. void setAbortMessage(const char *Message); @@ -212,6 +215,13 @@ enum class Option : u8 { MaxTSDsCount, // Number of usable TSDs for the shared registry. }; +enum class ReleaseToOS : u8 { + Normal, // Follow the normal rules for releasing pages to the OS + Force, // Force release pages to the OS, but avoid cases that take too long. + ForceAll, // Force release every page possible regardless of how long it will + // take. +}; + constexpr unsigned char PatternFillByte = 0xAB; enum FillContentsMode { diff --git a/standalone/condition_variable.h b/standalone/condition_variable.h new file mode 100644 index 00000000000..3f16c86651e --- /dev/null +++ b/standalone/condition_variable.h @@ -0,0 +1,44 @@ +//===-- condition_variable.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_CONDITION_VARIABLE_H_ +#define SCUDO_CONDITION_VARIABLE_H_ + +#include "condition_variable_base.h" + +#include "common.h" +#include "platform.h" + +#include "condition_variable_linux.h" + +namespace scudo { + +// A default implementation of default condition variable. It doesn't do a real +// `wait`, instead it spins a short amount of time only. +class ConditionVariableDummy + : public ConditionVariableBase<ConditionVariableDummy> { +public: + void notifyAllImpl(UNUSED HybridMutex &M) REQUIRES(M) {} + + void waitImpl(UNUSED HybridMutex &M) REQUIRES(M) { + M.unlock(); + + constexpr u32 SpinTimes = 64; + volatile u32 V = 0; + for (u32 I = 0; I < SpinTimes; ++I) { + u32 Tmp = V + 1; + V = Tmp; + } + + M.lock(); + } +}; + +} // namespace scudo + +#endif // SCUDO_CONDITION_VARIABLE_H_ diff --git a/standalone/condition_variable_base.h b/standalone/condition_variable_base.h new file mode 100644 index 00000000000..416c327fed4 --- /dev/null +++ b/standalone/condition_variable_base.h @@ -0,0 +1,56 @@ +//===-- condition_variable_base.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_CONDITION_VARIABLE_BASE_H_ +#define SCUDO_CONDITION_VARIABLE_BASE_H_ + +#include "mutex.h" +#include "thread_annotations.h" + +namespace scudo { + +template <typename Derived> class ConditionVariableBase { +public: + constexpr ConditionVariableBase() = default; + + void bindTestOnly(HybridMutex &Mutex) { +#if SCUDO_DEBUG + boundMutex = &Mutex; +#else + (void)Mutex; +#endif + } + + void notifyAll(HybridMutex &M) REQUIRES(M) { +#if SCUDO_DEBUG + CHECK_EQ(&M, boundMutex); +#endif + getDerived()->notifyAllImpl(M); + } + + void wait(HybridMutex &M) REQUIRES(M) { +#if SCUDO_DEBUG + CHECK_EQ(&M, boundMutex); +#endif + getDerived()->waitImpl(M); + } + +protected: + Derived *getDerived() { return static_cast<Derived *>(this); } + +#if SCUDO_DEBUG + // Because thread-safety analysis doesn't support pointer aliasing, we are not + // able to mark the proper annotations without false positive. Instead, we + // pass the lock and do the same-lock check separately. + HybridMutex *boundMutex = nullptr; +#endif +}; + +} // namespace scudo + +#endif // SCUDO_CONDITION_VARIABLE_BASE_H_ diff --git a/standalone/condition_variable_linux.cpp b/standalone/condition_variable_linux.cpp new file mode 100644 index 00000000000..e6d9bd1771a --- /dev/null +++ b/standalone/condition_variable_linux.cpp @@ -0,0 +1,52 @@ +//===-- condition_variable_linux.cpp ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "platform.h" + +#if SCUDO_LINUX + +#include "condition_variable_linux.h" + +#include "atomic_helpers.h" + +#include <limits.h> +#include <linux/futex.h> +#include <sys/syscall.h> +#include <unistd.h> + +namespace scudo { + +void ConditionVariableLinux::notifyAllImpl(UNUSED HybridMutex &M) { + const u32 V = atomic_load_relaxed(&Counter); + atomic_store_relaxed(&Counter, V + 1); + + // TODO(chiahungduan): Move the waiters from the futex waiting queue + // `Counter` to futex waiting queue `M` so that the awoken threads won't be + // blocked again due to locked `M` by current thread. + if (LastNotifyAll != V) { + syscall(SYS_futex, reinterpret_cast<uptr>(&Counter), FUTEX_WAKE_PRIVATE, + INT_MAX, nullptr, nullptr, 0); + } + + LastNotifyAll = V + 1; +} + +void ConditionVariableLinux::waitImpl(HybridMutex &M) { + const u32 V = atomic_load_relaxed(&Counter) + 1; + atomic_store_relaxed(&Counter, V); + + // TODO: Use ScopedUnlock when it's supported. + M.unlock(); + syscall(SYS_futex, reinterpret_cast<uptr>(&Counter), FUTEX_WAIT_PRIVATE, V, + nullptr, nullptr, 0); + M.lock(); +} + +} // namespace scudo + +#endif // SCUDO_LINUX diff --git a/standalone/condition_variable_linux.h b/standalone/condition_variable_linux.h new file mode 100644 index 00000000000..cd073287326 --- /dev/null +++ b/standalone/condition_variable_linux.h @@ -0,0 +1,38 @@ +//===-- condition_variable_linux.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_CONDITION_VARIABLE_LINUX_H_ +#define SCUDO_CONDITION_VARIABLE_LINUX_H_ + +#include "platform.h" + +#if SCUDO_LINUX + +#include "atomic_helpers.h" +#include "condition_variable_base.h" +#include "thread_annotations.h" + +namespace scudo { + +class ConditionVariableLinux + : public ConditionVariableBase<ConditionVariableLinux> { +public: + void notifyAllImpl(HybridMutex &M) REQUIRES(M); + + void waitImpl(HybridMutex &M) REQUIRES(M); + +private: + u32 LastNotifyAll = 0; + atomic_u32 Counter = {}; +}; + +} // namespace scudo + +#endif // SCUDO_LINUX + +#endif // SCUDO_CONDITION_VARIABLE_LINUX_H_ diff --git a/standalone/crc32_hw.cpp b/standalone/crc32_hw.cpp index 73f2ae000c6..910cf946031 100644 --- a/standalone/crc32_hw.cpp +++ b/standalone/crc32_hw.cpp @@ -17,4 +17,13 @@ u32 computeHardwareCRC32(u32 Crc, uptr Data) { #endif // defined(__CRC32__) || defined(__SSE4_2__) || // defined(__ARM_FEATURE_CRC32) +#if defined(__loongarch__) +u32 computeHardwareCRC32(u32 Crc, uptr Data) { + // The LoongArch CRC intrinsics have the two input arguments swapped, and + // expect them to be signed. + return static_cast<u32>( + CRC32_INTRINSIC(static_cast<long>(Data), static_cast<int>(Crc))); +} +#endif // defined(__loongarch__) + } // namespace scudo diff --git a/standalone/flags.cpp b/standalone/flags.cpp index de5153b288b..f498edfbd32 100644 --- a/standalone/flags.cpp +++ b/standalone/flags.cpp @@ -68,6 +68,9 @@ void initFlags() { Parser.parseString(getCompileDefinitionScudoDefaultOptions()); Parser.parseString(getScudoDefaultOptions()); Parser.parseString(getEnv("SCUDO_OPTIONS")); + if (const char *V = getEnv("SCUDO_ALLOCATION_RING_BUFFER_SIZE")) { + Parser.parseStringPair("allocation_ring_buffer_size", V); + } } } // namespace scudo diff --git a/standalone/flags.inc b/standalone/flags.inc index c1f153bafdd..ff0c28e1db7 100644 --- a/standalone/flags.inc +++ b/standalone/flags.inc @@ -42,18 +42,10 @@ SCUDO_FLAG(bool, may_return_null, true, "returning NULL in otherwise non-fatal error scenarios, eg: OOM, " "invalid allocation alignments, etc.") -SCUDO_FLAG(int, release_to_os_interval_ms, SCUDO_ANDROID ? INT32_MIN : 5000, +SCUDO_FLAG(int, release_to_os_interval_ms, 5000, "Interval (in milliseconds) at which to attempt release of unused " "memory to the OS. Negative values disable the feature.") -SCUDO_FLAG(int, hard_rss_limit_mb, 0, - "Hard RSS Limit in Mb. If non-zero, once the limit is achieved, " - "abort the process") - -SCUDO_FLAG(int, soft_rss_limit_mb, 0, - "Soft RSS Limit in Mb. If non-zero, once the limit is reached, all " - "subsequent calls will fail or return NULL until the RSS goes below " - "the soft limit") - SCUDO_FLAG(int, allocation_ring_buffer_size, 32768, - "Entries to keep in the allocation ring buffer for scudo.") + "Entries to keep in the allocation ring buffer for scudo. " + "Values less or equal to zero disable the buffer.") diff --git a/standalone/flags_parser.cpp b/standalone/flags_parser.cpp index be39fcd4f88..3d8c6f3789b 100644 --- a/standalone/flags_parser.cpp +++ b/standalone/flags_parser.cpp @@ -10,6 +10,8 @@ #include "common.h" #include "report.h" +#include <errno.h> +#include <limits.h> #include <stdlib.h> #include <string.h> @@ -80,7 +82,7 @@ void FlagParser::parseFlag() { ++Pos; Value = Buffer + ValueStart; } - if (!runHandler(Name, Value)) + if (!runHandler(Name, Value, '=')) reportError("flag parsing failed."); } @@ -122,10 +124,16 @@ inline bool parseBool(const char *Value, bool *b) { return false; } -bool FlagParser::runHandler(const char *Name, const char *Value) { +void FlagParser::parseStringPair(const char *Name, const char *Value) { + if (!runHandler(Name, Value, '\0')) + reportError("flag parsing failed."); +} + +bool FlagParser::runHandler(const char *Name, const char *Value, + const char Sep) { for (u32 I = 0; I < NumberOfFlags; ++I) { const uptr Len = strlen(Flags[I].Name); - if (strncmp(Name, Flags[I].Name, Len) != 0 || Name[Len] != '=') + if (strncmp(Name, Flags[I].Name, Len) != 0 || Name[Len] != Sep) continue; bool Ok = false; switch (Flags[I].Type) { @@ -136,12 +144,18 @@ bool FlagParser::runHandler(const char *Name, const char *Value) { break; case FlagType::FT_int: char *ValueEnd; - *reinterpret_cast<int *>(Flags[I].Var) = - static_cast<int>(strtol(Value, &ValueEnd, 10)); - Ok = - *ValueEnd == '"' || *ValueEnd == '\'' || isSeparatorOrNull(*ValueEnd); - if (!Ok) + errno = 0; + long V = strtol(Value, &ValueEnd, 10); + if (errno != 0 || // strtol failed (over or underflow) + V > INT_MAX || V < INT_MIN || // overflows integer + // contains unexpected characters + (*ValueEnd != '"' && *ValueEnd != '\'' && + !isSeparatorOrNull(*ValueEnd))) { reportInvalidFlag("int", Value); + break; + } + *reinterpret_cast<int *>(Flags[I].Var) = static_cast<int>(V); + Ok = true; break; } return Ok; diff --git a/standalone/flags_parser.h b/standalone/flags_parser.h index ba832adbd90..ded496fda3b 100644 --- a/standalone/flags_parser.h +++ b/standalone/flags_parser.h @@ -27,6 +27,7 @@ public: void *Var); void parseString(const char *S); void printFlagDescriptions(); + void parseStringPair(const char *Name, const char *Value); private: static const u32 MaxFlags = 20; @@ -45,7 +46,7 @@ private: void skipWhitespace(); void parseFlags(); void parseFlag(); - bool runHandler(const char *Name, const char *Value); + bool runHandler(const char *Name, const char *Value, char Sep); }; void reportUnrecognizedFlags(); diff --git a/standalone/fuchsia.cpp b/standalone/fuchsia.cpp index da684e7f1de..2144f1b63f8 100644 --- a/standalone/fuchsia.cpp +++ b/standalone/fuchsia.cpp @@ -19,6 +19,7 @@ #include <zircon/compiler.h> #include <zircon/process.h> #include <zircon/sanitizer.h> +#include <zircon/status.h> #include <zircon/syscalls.h> namespace scudo { @@ -31,6 +32,15 @@ void NORETURN die() { __builtin_trap(); } // with ZX_HANDLE_INVALID. static_assert(ZX_HANDLE_INVALID == 0, ""); +static void NORETURN dieOnError(zx_status_t Status, const char *FnName, + uptr Size) { + ScopedString Error; + Error.append("SCUDO ERROR: %s failed with size %zuKB (%s)", FnName, + Size >> 10, zx_status_get_string(Status)); + outputRaw(Error.data()); + die(); +} + static void *allocateVmar(uptr Size, MapPlatformData *Data, bool AllowNoMem) { // Only scenario so far. DCHECK(Data); @@ -42,7 +52,7 @@ static void *allocateVmar(uptr Size, MapPlatformData *Data, bool AllowNoMem) { Size, &Data->Vmar, &Data->VmarBase); if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmar_allocate", Size); return nullptr; } return reinterpret_cast<void *>(Data->VmarBase); @@ -73,7 +83,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, Status = _zx_vmo_set_size(Vmo, VmoSize + Size); if (Status != ZX_OK) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmo_set_size", VmoSize + Size); return nullptr; } } else { @@ -81,7 +91,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, Status = _zx_vmo_create(Size, ZX_VMO_RESIZABLE, &Vmo); if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmo_create", Size); return nullptr; } _zx_object_set_property(Vmo, ZX_PROP_NAME, Name, strlen(Name)); @@ -99,7 +109,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, Status = _zx_vmar_map(Vmar, MapFlags, Offset, Vmo, VmoSize, Size, &P); if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmar_map", Size); return nullptr; } @@ -120,7 +130,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, } if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmar_op_range", Size); return nullptr; } @@ -145,7 +155,7 @@ void unmap(void *Addr, uptr Size, uptr Flags, MapPlatformData *Data) { const zx_status_t Status = _zx_vmar_unmap(Vmar, reinterpret_cast<uintptr_t>(Addr), Size); if (UNLIKELY(Status != ZX_OK)) - dieOnMapUnmapError(); + dieOnError(Status, "zx_vmar_unmap", Size); } if (Data) { if (Data->Vmo != ZX_HANDLE_INVALID) @@ -160,12 +170,15 @@ void setMemoryPermission(UNUSED uptr Addr, UNUSED uptr Size, UNUSED uptr Flags, (Flags & MAP_NOACCESS) ? 0 : (ZX_VM_PERM_READ | ZX_VM_PERM_WRITE); DCHECK(Data); DCHECK_NE(Data->Vmar, ZX_HANDLE_INVALID); - if (_zx_vmar_protect(Data->Vmar, Prot, Addr, Size) != ZX_OK) - dieOnMapUnmapError(); + const zx_status_t Status = _zx_vmar_protect(Data->Vmar, Prot, Addr, Size); + if (Status != ZX_OK) + dieOnError(Status, "zx_vmar_protect", Size); } void releasePagesToOS(UNUSED uptr BaseAddress, uptr Offset, uptr Size, MapPlatformData *Data) { + // TODO: DCHECK the BaseAddress is consistent with the data in + // MapPlatformData. DCHECK(Data); DCHECK_NE(Data->Vmar, ZX_HANDLE_INVALID); DCHECK_NE(Data->Vmo, ZX_HANDLE_INVALID); @@ -198,6 +211,7 @@ void HybridMutex::unlock() __TA_NO_THREAD_SAFETY_ANALYSIS { void HybridMutex::assertHeldImpl() __TA_NO_THREAD_SAFETY_ANALYSIS {} u64 getMonotonicTime() { return _zx_clock_get_monotonic(); } +u64 getMonotonicTimeFast() { return _zx_clock_get_monotonic(); } u32 getNumberOfCPUs() { return _zx_system_get_num_cpus(); } diff --git a/standalone/fuzz/get_error_info_fuzzer.cpp b/standalone/fuzz/get_error_info_fuzzer.cpp index 74456450a47..2cef1c44fad 100644 --- a/standalone/fuzz/get_error_info_fuzzer.cpp +++ b/standalone/fuzz/get_error_info_fuzzer.cpp @@ -9,6 +9,7 @@ #define SCUDO_FUZZ #include "allocator_config.h" #include "combined.h" +#include "common.h" #include <fuzzer/FuzzedDataProvider.h> @@ -31,11 +32,6 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) { std::string StackDepotBytes = FDP.ConsumeRandomLengthString(FDP.remaining_bytes()); - std::vector<char> StackDepot(sizeof(scudo::StackDepot), 0); - for (size_t i = 0; i < StackDepotBytes.length() && i < StackDepot.size(); - ++i) { - StackDepot[i] = StackDepotBytes[i]; - } std::string RegionInfoBytes = FDP.ConsumeRandomLengthString(FDP.remaining_bytes()); @@ -46,14 +42,11 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) { } std::string RingBufferBytes = FDP.ConsumeRemainingBytesAsString(); - // RingBuffer is too short. - if (!AllocatorT::setRingBufferSizeForBuffer(RingBufferBytes.data(), - RingBufferBytes.size())) - return 0; scudo_error_info ErrorInfo; - AllocatorT::getErrorInfo(&ErrorInfo, FaultAddr, StackDepot.data(), - RegionInfo.data(), RingBufferBytes.data(), Memory, - MemoryTags, MemoryAddr, MemorySize); + AllocatorT::getErrorInfo(&ErrorInfo, FaultAddr, StackDepotBytes.data(), + StackDepotBytes.size(), RegionInfo.data(), + RingBufferBytes.data(), RingBufferBytes.size(), + Memory, MemoryTags, MemoryAddr, MemorySize); return 0; } diff --git a/standalone/include/scudo/interface.h b/standalone/include/scudo/interface.h index 23bcfba3982..a2dedea910c 100644 --- a/standalone/include/scudo/interface.h +++ b/standalone/include/scudo/interface.h @@ -17,10 +17,22 @@ extern "C" { __attribute__((weak)) const char *__scudo_default_options(void); // Post-allocation & pre-deallocation hooks. -// They must be thread-safe and not use heap related functions. __attribute__((weak)) void __scudo_allocate_hook(void *ptr, size_t size); __attribute__((weak)) void __scudo_deallocate_hook(void *ptr); +// `realloc` involves both deallocation and allocation but they are not reported +// atomically. In one specific case which may keep taking a snapshot right in +// the middle of `realloc` reporting the deallocation and allocation, it may +// confuse the user by missing memory from `realloc`. To alleviate that case, +// define the two `realloc` hooks to get the knowledge of the bundled hook +// calls. These hooks are optional and should only be used when a hooks user +// wants to track reallocs more closely. +// +// See more details in the comment of `realloc` in wrapper_c.inc. +__attribute__((weak)) void +__scudo_realloc_allocate_hook(void *old_ptr, void *new_ptr, size_t size); +__attribute__((weak)) void __scudo_realloc_deallocate_hook(void *old_ptr); + void __scudo_print_stats(void); typedef void (*iterate_callback)(uintptr_t base, size_t size, void *arg); @@ -73,7 +85,8 @@ typedef void (*iterate_callback)(uintptr_t base, size_t size, void *arg); // pointer. void __scudo_get_error_info(struct scudo_error_info *error_info, uintptr_t fault_addr, const char *stack_depot, - const char *region_info, const char *ring_buffer, + size_t stack_depot_size, const char *region_info, + const char *ring_buffer, size_t ring_buffer_size, const char *memory, const char *memory_tags, uintptr_t memory_addr, size_t memory_size); @@ -118,6 +131,10 @@ size_t __scudo_get_ring_buffer_size(void); #define M_PURGE -101 #endif +#ifndef M_PURGE_ALL +#define M_PURGE_ALL -104 +#endif + // Tune the allocator's choice of memory tags to make it more likely that // a certain class of memory errors will be detected. The value argument should // be one of the M_MEMTAG_TUNING_* constants below. @@ -155,6 +172,11 @@ size_t __scudo_get_ring_buffer_size(void); #define M_MEMTAG_TUNING_UAF 1 #endif +// Print internal stats to the log. +#ifndef M_LOG_STATS +#define M_LOG_STATS -205 +#endif + } // extern "C" #endif // SCUDO_INTERFACE_H_ diff --git a/standalone/linux.cpp b/standalone/linux.cpp index 33757e292f2..27469510810 100644 --- a/standalone/linux.cpp +++ b/standalone/linux.cpp @@ -14,6 +14,7 @@ #include "internal_defs.h" #include "linux.h" #include "mutex.h" +#include "report_linux.h" #include "string_utils.h" #include <errno.h> @@ -43,6 +44,7 @@ uptr getPageSize() { return static_cast<uptr>(sysconf(_SC_PAGESIZE)); } void NORETURN die() { abort(); } +// TODO: Will be deprecated. Use the interfaces in MemMapLinux instead. void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, UNUSED MapPlatformData *Data) { int MmapFlags = MAP_PRIVATE | MAP_ANONYMOUS; @@ -65,7 +67,7 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, void *P = mmap(Addr, Size, MmapProt, MmapFlags, -1, 0); if (P == MAP_FAILED) { if (!(Flags & MAP_ALLOWNOMEM) || errno != ENOMEM) - dieOnMapUnmapError(errno == ENOMEM ? Size : 0); + reportMapError(errno == ENOMEM ? Size : 0); return nullptr; } #if SCUDO_ANDROID @@ -75,19 +77,22 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, return P; } +// TODO: Will be deprecated. Use the interfaces in MemMapLinux instead. void unmap(void *Addr, uptr Size, UNUSED uptr Flags, UNUSED MapPlatformData *Data) { if (munmap(Addr, Size) != 0) - dieOnMapUnmapError(); + reportUnmapError(reinterpret_cast<uptr>(Addr), Size); } +// TODO: Will be deprecated. Use the interfaces in MemMapLinux instead. void setMemoryPermission(uptr Addr, uptr Size, uptr Flags, UNUSED MapPlatformData *Data) { int Prot = (Flags & MAP_NOACCESS) ? PROT_NONE : (PROT_READ | PROT_WRITE); if (mprotect(reinterpret_cast<void *>(Addr), Size, Prot) != 0) - dieOnMapUnmapError(); + reportProtectError(Addr, Size, Prot); } +// TODO: Will be deprecated. Use the interfaces in MemMapLinux instead. void releasePagesToOS(uptr BaseAddress, uptr Offset, uptr Size, UNUSED MapPlatformData *Data) { void *Addr = reinterpret_cast<void *>(BaseAddress + Offset); @@ -104,12 +109,14 @@ enum State : u32 { Unlocked = 0, Locked = 1, Sleeping = 2 }; } bool HybridMutex::tryLock() { - return atomic_compare_exchange(&M, Unlocked, Locked) == Unlocked; + return atomic_compare_exchange_strong(&M, Unlocked, Locked, + memory_order_acquire) == Unlocked; } // The following is based on https://akkadia.org/drepper/futex.pdf. void HybridMutex::lockSlow() { - u32 V = atomic_compare_exchange(&M, Unlocked, Locked); + u32 V = atomic_compare_exchange_strong(&M, Unlocked, Locked, + memory_order_acquire); if (V == Unlocked) return; if (V != Sleeping) @@ -140,6 +147,17 @@ u64 getMonotonicTime() { static_cast<u64>(TS.tv_nsec); } +u64 getMonotonicTimeFast() { +#if defined(CLOCK_MONOTONIC_COARSE) + timespec TS; + clock_gettime(CLOCK_MONOTONIC_COARSE, &TS); + return static_cast<u64>(TS.tv_sec) * (1000ULL * 1000 * 1000) + + static_cast<u64>(TS.tv_nsec); +#else + return getMonotonicTime(); +#endif +} + u32 getNumberOfCPUs() { cpu_set_t CPUs; // sched_getaffinity can fail for a variety of legitimate reasons (lack of @@ -186,39 +204,6 @@ bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) { extern "C" WEAK int async_safe_write_log(int pri, const char *tag, const char *msg); -static uptr GetRSSFromBuffer(const char *Buf) { - // The format of the file is: - // 1084 89 69 11 0 79 0 - // We need the second number which is RSS in pages. - const char *Pos = Buf; - // Skip the first number. - while (*Pos >= '0' && *Pos <= '9') - Pos++; - // Skip whitespaces. - while (!(*Pos >= '0' && *Pos <= '9') && *Pos != 0) - Pos++; - // Read the number. - u64 Rss = 0; - for (; *Pos >= '0' && *Pos <= '9'; Pos++) - Rss = Rss * 10 + static_cast<u64>(*Pos) - '0'; - return static_cast<uptr>(Rss * getPageSizeCached()); -} - -uptr GetRSS() { - // TODO: We currently use sanitizer_common's GetRSS which reads the - // RSS from /proc/self/statm by default. We might want to - // call getrusage directly, even if it's less accurate. - auto Fd = open("/proc/self/statm", O_RDONLY); - char Buf[64]; - s64 Len = read(Fd, Buf, sizeof(Buf) - 1); - close(Fd); - if (Len <= 0) - return 0; - Buf[Len] = 0; - - return GetRSSFromBuffer(Buf); -} - void outputRaw(const char *Buffer) { if (&async_safe_write_log) { constexpr s32 AndroidLogInfo = 4; diff --git a/standalone/local_cache.h b/standalone/local_cache.h index 6e84158659a..46d6affdc03 100644 --- a/standalone/local_cache.h +++ b/standalone/local_cache.h @@ -14,6 +14,7 @@ #include "platform.h" #include "report.h" #include "stats.h" +#include "string_utils.h" namespace scudo { @@ -21,71 +22,13 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { typedef typename SizeClassAllocator::SizeClassMap SizeClassMap; typedef typename SizeClassAllocator::CompactPtrT CompactPtrT; - struct TransferBatch { - static const u16 MaxNumCached = SizeClassMap::MaxNumCachedHint; - void setFromArray(CompactPtrT *Array, u16 N) { - DCHECK_LE(N, MaxNumCached); - Count = N; - memcpy(Batch, Array, sizeof(Batch[0]) * Count); - } - void appendFromArray(CompactPtrT *Array, u16 N) { - DCHECK_LE(N, MaxNumCached - Count); - memcpy(Batch + Count, Array, sizeof(Batch[0]) * N); - // u16 will be promoted to int by arithmetic type conversion. - Count = static_cast<u16>(Count + N); - } - void clear() { Count = 0; } - void add(CompactPtrT P) { - DCHECK_LT(Count, MaxNumCached); - Batch[Count++] = P; - } - void copyToArray(CompactPtrT *Array) const { - memcpy(Array, Batch, sizeof(Batch[0]) * Count); - } - u16 getCount() const { return Count; } - CompactPtrT get(u16 I) const { - DCHECK_LE(I, Count); - return Batch[I]; - } - static u16 getMaxCached(uptr Size) { - return Min(MaxNumCached, SizeClassMap::getMaxCachedHint(Size)); - } - TransferBatch *Next; - - private: - CompactPtrT Batch[MaxNumCached]; - u16 Count; - }; - - // A BatchGroup is used to collect blocks. Each group has a group id to - // identify the group kind of contained blocks. - struct BatchGroup { - // `Next` is used by IntrusiveList. - BatchGroup *Next; - // The identifier of each group - uptr GroupId; - // Cache value of TransferBatch::getMaxCached() - u16 MaxCachedPerBatch; - // Number of blocks pushed into this group. This is an increment-only - // counter. - uptr PushedBlocks; - // This is used to track how many blocks are pushed since last time we - // checked `PushedBlocks`. It's useful for page releasing to determine the - // usage of a BatchGroup. - uptr PushedBlocksAtLastCheckpoint; - // Blocks are managed by TransferBatch in a list. - SinglyLinkedList<TransferBatch> Batches; - }; - - static_assert(sizeof(BatchGroup) <= sizeof(TransferBatch), - "BatchGroup uses the same class size as TransferBatch"); - void init(GlobalStats *S, SizeClassAllocator *A) { DCHECK(isEmpty()); Stats.init(); if (LIKELY(S)) S->link(&Stats); Allocator = A; + initCache(); } void destroy(GlobalStats *S) { @@ -98,7 +41,9 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { DCHECK_LT(ClassId, NumClasses); PerClass *C = &PerClassArray[ClassId]; if (C->Count == 0) { - if (UNLIKELY(!refill(C, ClassId))) + // Refill half of the number of max cached. + DCHECK_GT(C->MaxCount / 2, 0U); + if (UNLIKELY(!refill(C, ClassId, C->MaxCount / 2))) return nullptr; DCHECK_GT(C->Count, 0); } @@ -112,13 +57,13 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { return Allocator->decompactPtr(ClassId, CompactP); } - void deallocate(uptr ClassId, void *P) { + bool deallocate(uptr ClassId, void *P) { CHECK_LT(ClassId, NumClasses); PerClass *C = &PerClassArray[ClassId]; - // We still have to initialize the cache in the event that the first heap - // operation in a thread is a deallocation. - initCacheMaybe(C); - if (C->Count == C->MaxCount) + + // If the cache is full, drain half of blocks back to the main allocator. + const bool NeedToDrainCache = C->Count == C->MaxCount; + if (NeedToDrainCache) drain(C, ClassId); // See comment in allocate() about memory accesses. const uptr ClassSize = C->ClassSize; @@ -126,6 +71,8 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { Allocator->compactPtr(ClassId, reinterpret_cast<uptr>(P)); Stats.sub(StatAllocated, ClassSize); Stats.add(StatFree, ClassSize); + + return NeedToDrainCache; } bool isEmpty() const { @@ -136,7 +83,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { } void drain() { - // Drain BatchClassId last as createBatch can refill it. + // Drain BatchClassId last as it may be needed while draining normal blocks. for (uptr I = 0; I < NumClasses; ++I) { if (I == BatchClassId) continue; @@ -148,22 +95,42 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { DCHECK(isEmpty()); } - TransferBatch *createBatch(uptr ClassId, void *B) { - if (ClassId != BatchClassId) - B = allocate(BatchClassId); + void *getBatchClassBlock() { + void *B = allocate(BatchClassId); if (UNLIKELY(!B)) reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId)); - return reinterpret_cast<TransferBatch *>(B); + return B; } - BatchGroup *createGroup() { - void *Ptr = allocate(BatchClassId); - if (UNLIKELY(!Ptr)) - reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId)); - return reinterpret_cast<BatchGroup *>(Ptr); + LocalStats &getStats() { return Stats; } + + void getStats(ScopedString *Str) { + bool EmptyCache = true; + for (uptr I = 0; I < NumClasses; ++I) { + if (PerClassArray[I].Count == 0) + continue; + + EmptyCache = false; + // The size of BatchClass is set to 0 intentionally. See the comment in + // initCache() for more details. + const uptr ClassSize = I == BatchClassId + ? SizeClassAllocator::getSizeByClassId(I) + : PerClassArray[I].ClassSize; + // Note that the string utils don't support printing u16 thus we cast it + // to a common use type uptr. + Str->append(" %02zu (%6zu): cached: %4zu max: %4zu\n", I, ClassSize, + static_cast<uptr>(PerClassArray[I].Count), + static_cast<uptr>(PerClassArray[I].MaxCount)); + } + + if (EmptyCache) + Str->append(" No block is cached.\n"); } - LocalStats &getStats() { return Stats; } + static u16 getMaxCached(uptr Size) { + return Min(SizeClassMap::MaxNumCachedHint, + SizeClassMap::getMaxCachedHint(Size)); + } private: static const uptr NumClasses = SizeClassMap::NumClasses; @@ -173,24 +140,17 @@ private: u16 MaxCount; // Note: ClassSize is zero for the transfer batch. uptr ClassSize; - CompactPtrT Chunks[2 * TransferBatch::MaxNumCached]; + CompactPtrT Chunks[2 * SizeClassMap::MaxNumCachedHint]; }; PerClass PerClassArray[NumClasses] = {}; LocalStats Stats; SizeClassAllocator *Allocator = nullptr; - ALWAYS_INLINE void initCacheMaybe(PerClass *C) { - if (LIKELY(C->MaxCount)) - return; - initCache(); - DCHECK_NE(C->MaxCount, 0U); - } - NOINLINE void initCache() { for (uptr I = 0; I < NumClasses; I++) { PerClass *P = &PerClassArray[I]; const uptr Size = SizeClassAllocator::getSizeByClassId(I); - P->MaxCount = static_cast<u16>(2 * TransferBatch::getMaxCached(Size)); + P->MaxCount = static_cast<u16>(2 * getMaxCached(Size)); if (I != BatchClassId) { P->ClassSize = Size; } else { @@ -206,17 +166,12 @@ private: deallocate(BatchClassId, B); } - NOINLINE bool refill(PerClass *C, uptr ClassId) { - initCacheMaybe(C); - TransferBatch *B = Allocator->popBatch(this, ClassId); - if (UNLIKELY(!B)) - return false; - DCHECK_GT(B->getCount(), 0); - C->Count = B->getCount(); - B->copyToArray(C->Chunks); - B->clear(); - destroyBatch(ClassId, B); - return true; + NOINLINE bool refill(PerClass *C, uptr ClassId, u16 MaxRefill) { + const u16 NumBlocksRefilled = + Allocator->popBlocks(this, ClassId, C->Chunks, MaxRefill); + DCHECK_LE(NumBlocksRefilled, MaxRefill); + C->Count = static_cast<u16>(C->Count + NumBlocksRefilled); + return NumBlocksRefilled != 0; } NOINLINE void drain(PerClass *C, uptr ClassId) { diff --git a/standalone/mem_map.cpp b/standalone/mem_map.cpp new file mode 100644 index 00000000000..115cc34e706 --- /dev/null +++ b/standalone/mem_map.cpp @@ -0,0 +1,84 @@ +//===-- mem_map.cpp ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mem_map.h" + +#include "common.h" + +namespace scudo { + +bool MemMapDefault::mapImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + void *MappedAddr = + ::scudo::map(reinterpret_cast<void *>(Addr), Size, Name, Flags, &Data); + if (MappedAddr == nullptr) + return false; + Base = reinterpret_cast<uptr>(MappedAddr); + MappedBase = Base; + Capacity = Size; + return true; +} + +void MemMapDefault::unmapImpl(uptr Addr, uptr Size) { + if (Size == Capacity) { + Base = MappedBase = Capacity = 0; + } else { + if (Base == Addr) { + Base = Addr + Size; + MappedBase = MappedBase == 0 ? Base : Max(MappedBase, Base); + } + Capacity -= Size; + } + + ::scudo::unmap(reinterpret_cast<void *>(Addr), Size, UNMAP_ALL, &Data); +} + +bool MemMapDefault::remapImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + void *RemappedPtr = + ::scudo::map(reinterpret_cast<void *>(Addr), Size, Name, Flags, &Data); + const uptr RemappedAddr = reinterpret_cast<uptr>(RemappedPtr); + MappedBase = MappedBase == 0 ? RemappedAddr : Min(MappedBase, RemappedAddr); + return RemappedAddr == Addr; +} + +void MemMapDefault::releaseAndZeroPagesToOSImpl(uptr From, uptr Size) { + DCHECK_NE(MappedBase, 0U); + DCHECK_GE(From, MappedBase); + return ::scudo::releasePagesToOS(MappedBase, From - MappedBase, Size, &Data); +} + +void MemMapDefault::setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags) { + return ::scudo::setMemoryPermission(Addr, Size, Flags); +} + +void ReservedMemoryDefault::releaseImpl() { + ::scudo::unmap(reinterpret_cast<void *>(Base), Capacity, UNMAP_ALL, &Data); +} + +bool ReservedMemoryDefault::createImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + void *Reserved = ::scudo::map(reinterpret_cast<void *>(Addr), Size, Name, + Flags | MAP_NOACCESS, &Data); + if (Reserved == nullptr) + return false; + + Base = reinterpret_cast<uptr>(Reserved); + Capacity = Size; + + return true; +} + +ReservedMemoryDefault::MemMapT ReservedMemoryDefault::dispatchImpl(uptr Addr, + uptr Size) { + ReservedMemoryDefault::MemMapT NewMap(Addr, Size); + NewMap.setMapPlatformData(Data); + return NewMap; +} + +} // namespace scudo diff --git a/standalone/mem_map.h b/standalone/mem_map.h new file mode 100644 index 00000000000..b92216cf271 --- /dev/null +++ b/standalone/mem_map.h @@ -0,0 +1,92 @@ +//===-- mem_map.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_MEM_MAP_H_ +#define SCUDO_MEM_MAP_H_ + +#include "mem_map_base.h" + +#include "common.h" +#include "internal_defs.h" + +// TODO: This is only used for `MapPlatformData`. Remove these includes when we +// have all three platform specific `MemMap` and `ReservedMemory` +// implementations. +#include "fuchsia.h" +#include "linux.h" +#include "trusty.h" + +#include "mem_map_fuchsia.h" +#include "mem_map_linux.h" + +namespace scudo { + +// This will be deprecated when every allocator has been supported by each +// platform's `MemMap` implementation. +class MemMapDefault final : public MemMapBase<MemMapDefault> { +public: + constexpr MemMapDefault() = default; + MemMapDefault(uptr Base, uptr Capacity) : Base(Base), Capacity(Capacity) {} + + // Impls for base functions. + bool mapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void unmapImpl(uptr Addr, uptr Size); + bool remapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags); + void releasePagesToOSImpl(uptr From, uptr Size) { + return releaseAndZeroPagesToOSImpl(From, Size); + } + void releaseAndZeroPagesToOSImpl(uptr From, uptr Size); + uptr getBaseImpl() { return Base; } + uptr getCapacityImpl() { return Capacity; } + + void setMapPlatformData(MapPlatformData &NewData) { Data = NewData; } + +private: + uptr Base = 0; + uptr Capacity = 0; + uptr MappedBase = 0; + MapPlatformData Data = {}; +}; + +// This will be deprecated when every allocator has been supported by each +// platform's `MemMap` implementation. +class ReservedMemoryDefault final + : public ReservedMemory<ReservedMemoryDefault, MemMapDefault> { +public: + constexpr ReservedMemoryDefault() = default; + + bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void releaseImpl(); + MemMapT dispatchImpl(uptr Addr, uptr Size); + uptr getBaseImpl() { return Base; } + uptr getCapacityImpl() { return Capacity; } + +private: + uptr Base = 0; + uptr Capacity = 0; + MapPlatformData Data = {}; +}; + +#if SCUDO_LINUX +using ReservedMemoryT = ReservedMemoryLinux; +using MemMapT = ReservedMemoryT::MemMapT; +#elif SCUDO_FUCHSIA +using ReservedMemoryT = ReservedMemoryFuchsia; +using MemMapT = ReservedMemoryT::MemMapT; +#elif SCUDO_TRUSTY +using ReservedMemoryT = ReservedMemoryDefault; +using MemMapT = ReservedMemoryT::MemMapT; +#else +#error \ + "Unsupported platform, please implement the ReservedMemory for your platform!" +#endif + +} // namespace scudo + +#endif // SCUDO_MEM_MAP_H_ diff --git a/standalone/mem_map_base.h b/standalone/mem_map_base.h new file mode 100644 index 00000000000..99ab0cba604 --- /dev/null +++ b/standalone/mem_map_base.h @@ -0,0 +1,129 @@ +//===-- mem_map_base.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_MEM_MAP_BASE_H_ +#define SCUDO_MEM_MAP_BASE_H_ + +#include "common.h" + +namespace scudo { + +// In Scudo, every memory operation will be fulfilled through a +// platform-specific `MemMap` instance. The essential APIs are listed in the +// `MemMapBase` below. This is implemented in CRTP, so for each implementation, +// it has to implement all of the 'Impl' named functions. +template <class Derived> class MemMapBase { +public: + constexpr MemMapBase() = default; + + // This is used to map a new set of contiguous pages. Note that the `Addr` is + // only a suggestion to the system. + bool map(uptr Addr, uptr Size, const char *Name, uptr Flags = 0) { + DCHECK(!isAllocated()); + return invokeImpl(&Derived::mapImpl, Addr, Size, Name, Flags); + } + + // This is used to unmap partial/full pages from the beginning or the end. + // I.e., the result pages are expected to be still contiguous. + void unmap(uptr Addr, uptr Size) { + DCHECK(isAllocated()); + DCHECK((Addr == getBase()) || (Addr + Size == getBase() + getCapacity())); + invokeImpl(&Derived::unmapImpl, Addr, Size); + } + + // This is used to remap a mapped range (either from map() or dispatched from + // ReservedMemory). For example, we have reserved several pages and then we + // want to remap them with different accessibility. + bool remap(uptr Addr, uptr Size, const char *Name, uptr Flags = 0) { + DCHECK(isAllocated()); + DCHECK((Addr >= getBase()) && (Addr + Size <= getBase() + getCapacity())); + return invokeImpl(&Derived::remapImpl, Addr, Size, Name, Flags); + } + + // This is used to update the pages' access permission. For example, mark + // pages as no read/write permission. + void setMemoryPermission(uptr Addr, uptr Size, uptr Flags) { + DCHECK(isAllocated()); + DCHECK((Addr >= getBase()) && (Addr + Size <= getBase() + getCapacity())); + return invokeImpl(&Derived::setMemoryPermissionImpl, Addr, Size, Flags); + } + + // Suggest releasing a set of contiguous physical pages back to the OS. Note + // that only physical pages are supposed to be released. Any release of + // virtual pages may lead to undefined behavior. + void releasePagesToOS(uptr From, uptr Size) { + DCHECK(isAllocated()); + DCHECK((From >= getBase()) && (From + Size <= getBase() + getCapacity())); + invokeImpl(&Derived::releasePagesToOSImpl, From, Size); + } + // This is similar to the above one except that any subsequent access to the + // released pages will return with zero-filled pages. + void releaseAndZeroPagesToOS(uptr From, uptr Size) { + DCHECK(isAllocated()); + DCHECK((From >= getBase()) && (From + Size <= getBase() + getCapacity())); + invokeImpl(&Derived::releaseAndZeroPagesToOSImpl, From, Size); + } + + uptr getBase() { return invokeImpl(&Derived::getBaseImpl); } + uptr getCapacity() { return invokeImpl(&Derived::getCapacityImpl); } + + bool isAllocated() { return getBase() != 0U; } + +protected: + template <typename R, typename... Args> + R invokeImpl(R (Derived::*MemFn)(Args...), Args... args) { + return (static_cast<Derived *>(this)->*MemFn)(args...); + } +}; + +// `ReservedMemory` is a special memory handle which can be viewed as a page +// allocator. `ReservedMemory` will reserve a contiguous pages and the later +// page request can be fulfilled at the designated address. This is used when +// we want to ensure the virtual address of the MemMap will be in a known range. +// This is implemented in CRTP, so for each +// implementation, it has to implement all of the 'Impl' named functions. +template <class Derived, typename MemMapTy> class ReservedMemory { +public: + using MemMapT = MemMapTy; + constexpr ReservedMemory() = default; + + // Reserve a chunk of memory at a suggested address. + bool create(uptr Addr, uptr Size, const char *Name, uptr Flags = 0) { + DCHECK(!isCreated()); + return invokeImpl(&Derived::createImpl, Addr, Size, Name, Flags); + } + + // Release the entire reserved memory. + void release() { + DCHECK(isCreated()); + invokeImpl(&Derived::releaseImpl); + } + + // Dispatch a sub-range of reserved memory. Note that any fragmentation of + // the reserved pages is managed by each implementation. + MemMapT dispatch(uptr Addr, uptr Size) { + DCHECK(isCreated()); + DCHECK((Addr >= getBase()) && (Addr + Size <= getBase() + getCapacity())); + return invokeImpl(&Derived::dispatchImpl, Addr, Size); + } + + uptr getBase() { return invokeImpl(&Derived::getBaseImpl); } + uptr getCapacity() { return invokeImpl(&Derived::getCapacityImpl); } + + bool isCreated() { return getBase() != 0U; } + +protected: + template <typename R, typename... Args> + R invokeImpl(R (Derived::*MemFn)(Args...), Args... args) { + return (static_cast<Derived *>(this)->*MemFn)(args...); + } +}; + +} // namespace scudo + +#endif // SCUDO_MEM_MAP_BASE_H_ diff --git a/standalone/mem_map_fuchsia.cpp b/standalone/mem_map_fuchsia.cpp new file mode 100644 index 00000000000..fc793abf44c --- /dev/null +++ b/standalone/mem_map_fuchsia.cpp @@ -0,0 +1,258 @@ +//===-- mem_map_fuchsia.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mem_map_fuchsia.h" + +#include "atomic_helpers.h" +#include "common.h" +#include "string_utils.h" + +#if SCUDO_FUCHSIA + +#include <zircon/process.h> +#include <zircon/status.h> +#include <zircon/syscalls.h> + +namespace scudo { + +static void NORETURN dieOnError(zx_status_t Status, const char *FnName, + uptr Size) { + ScopedString Error; + Error.append("SCUDO ERROR: %s failed with size %zuKB (%s)", FnName, + Size >> 10, _zx_status_get_string(Status)); + outputRaw(Error.data()); + die(); +} + +static void setVmoName(zx_handle_t Vmo, const char *Name) { + size_t Len = strlen(Name); + DCHECK_LT(Len, ZX_MAX_NAME_LEN); + zx_status_t Status = _zx_object_set_property(Vmo, ZX_PROP_NAME, Name, Len); + CHECK_EQ(Status, ZX_OK); +} + +// Returns the (cached) base address of the root VMAR. +static uptr getRootVmarBase() { + static atomic_uptr CachedResult = {0}; + + uptr Result = atomic_load(&CachedResult, memory_order_acquire); + if (UNLIKELY(!Result)) { + zx_info_vmar_t VmarInfo; + zx_status_t Status = + _zx_object_get_info(_zx_vmar_root_self(), ZX_INFO_VMAR, &VmarInfo, + sizeof(VmarInfo), nullptr, nullptr); + CHECK_EQ(Status, ZX_OK); + CHECK_NE(VmarInfo.base, 0); + + atomic_store(&CachedResult, VmarInfo.base, memory_order_release); + Result = VmarInfo.base; + } + + return Result; +} + +// Lazily creates and then always returns the same zero-sized VMO. +static zx_handle_t getPlaceholderVmo() { + static atomic_u32 StoredVmo = {ZX_HANDLE_INVALID}; + + zx_handle_t Vmo = atomic_load(&StoredVmo, memory_order_acquire); + if (UNLIKELY(Vmo == ZX_HANDLE_INVALID)) { + // Create a zero-sized placeholder VMO. + zx_status_t Status = _zx_vmo_create(0, 0, &Vmo); + if (UNLIKELY(Status != ZX_OK)) + dieOnError(Status, "zx_vmo_create", 0); + + setVmoName(Vmo, "scudo:reserved"); + + // Atomically store its handle. If some other thread wins the race, use its + // handle and discard ours. + zx_handle_t OldValue = atomic_compare_exchange_strong( + &StoredVmo, ZX_HANDLE_INVALID, Vmo, memory_order_acq_rel); + if (UNLIKELY(OldValue != ZX_HANDLE_INVALID)) { + Status = _zx_handle_close(Vmo); + CHECK_EQ(Status, ZX_OK); + + Vmo = OldValue; + } + } + + return Vmo; +} + +// Checks if MAP_ALLOWNOMEM allows the given error code. +static bool IsNoMemError(zx_status_t Status) { + // Note: _zx_vmar_map returns ZX_ERR_NO_RESOURCES if the VMAR does not contain + // a suitable free spot. + return Status == ZX_ERR_NO_MEMORY || Status == ZX_ERR_NO_RESOURCES; +} + +MemMapFuchsia::MemMapFuchsia(uptr Base, uptr Capacity) + : MapAddr(Base), WindowBase(Base), WindowSize(Capacity) { + // Create the VMO. + zx_status_t Status = _zx_vmo_create(Capacity, 0, &Vmo); + if (UNLIKELY(Status != ZX_OK)) + dieOnError(Status, "zx_vmo_create", Capacity); +} + +bool MemMapFuchsia::mapImpl(UNUSED uptr Addr, uptr Size, const char *Name, + uptr Flags) { + const bool AllowNoMem = !!(Flags & MAP_ALLOWNOMEM); + const bool PreCommit = !!(Flags & MAP_PRECOMMIT); + const bool NoAccess = !!(Flags & MAP_NOACCESS); + + // Create the VMO. + zx_status_t Status = _zx_vmo_create(Size, 0, &Vmo); + if (UNLIKELY(Status != ZX_OK)) { + if (AllowNoMem && IsNoMemError(Status)) + return false; + dieOnError(Status, "zx_vmo_create", Size); + } + + if (Name != nullptr) + setVmoName(Vmo, Name); + + // Map it. + zx_vm_option_t MapFlags = ZX_VM_ALLOW_FAULTS; + if (!NoAccess) + MapFlags |= ZX_VM_PERM_READ | ZX_VM_PERM_WRITE; + Status = + _zx_vmar_map(_zx_vmar_root_self(), MapFlags, 0, Vmo, 0, Size, &MapAddr); + if (UNLIKELY(Status != ZX_OK)) { + if (AllowNoMem && IsNoMemError(Status)) { + Status = _zx_handle_close(Vmo); + CHECK_EQ(Status, ZX_OK); + + MapAddr = 0; + Vmo = ZX_HANDLE_INVALID; + return false; + } + dieOnError(Status, "zx_vmar_map", Size); + } + + if (PreCommit) { + Status = _zx_vmar_op_range(_zx_vmar_root_self(), ZX_VMAR_OP_COMMIT, MapAddr, + Size, nullptr, 0); + CHECK_EQ(Status, ZX_OK); + } + + WindowBase = MapAddr; + WindowSize = Size; + return true; +} + +void MemMapFuchsia::unmapImpl(uptr Addr, uptr Size) { + zx_status_t Status; + + if (Size == WindowSize) { + // NOTE: Closing first and then unmapping seems slightly faster than doing + // the same operations in the opposite order. + Status = _zx_handle_close(Vmo); + CHECK_EQ(Status, ZX_OK); + Status = _zx_vmar_unmap(_zx_vmar_root_self(), Addr, Size); + CHECK_EQ(Status, ZX_OK); + + MapAddr = WindowBase = WindowSize = 0; + Vmo = ZX_HANDLE_INVALID; + } else { + // Unmap the subrange. + Status = _zx_vmar_unmap(_zx_vmar_root_self(), Addr, Size); + CHECK_EQ(Status, ZX_OK); + + // Decommit the pages that we just unmapped. + Status = _zx_vmo_op_range(Vmo, ZX_VMO_OP_DECOMMIT, Addr - MapAddr, Size, + nullptr, 0); + CHECK_EQ(Status, ZX_OK); + + if (Addr == WindowBase) + WindowBase += Size; + WindowSize -= Size; + } +} + +bool MemMapFuchsia::remapImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + const bool AllowNoMem = !!(Flags & MAP_ALLOWNOMEM); + const bool PreCommit = !!(Flags & MAP_PRECOMMIT); + const bool NoAccess = !!(Flags & MAP_NOACCESS); + + // NOTE: This will rename the *whole* VMO, not only the requested portion of + // it. But we cannot do better than this given the MemMap API. In practice, + // the upper layers of Scudo always pass the same Name for a given MemMap. + if (Name != nullptr) + setVmoName(Vmo, Name); + + uptr MappedAddr; + zx_vm_option_t MapFlags = ZX_VM_ALLOW_FAULTS | ZX_VM_SPECIFIC_OVERWRITE; + if (!NoAccess) + MapFlags |= ZX_VM_PERM_READ | ZX_VM_PERM_WRITE; + zx_status_t Status = + _zx_vmar_map(_zx_vmar_root_self(), MapFlags, Addr - getRootVmarBase(), + Vmo, Addr - MapAddr, Size, &MappedAddr); + if (UNLIKELY(Status != ZX_OK)) { + if (AllowNoMem && IsNoMemError(Status)) + return false; + dieOnError(Status, "zx_vmar_map", Size); + } + DCHECK_EQ(Addr, MappedAddr); + + if (PreCommit) { + Status = _zx_vmar_op_range(_zx_vmar_root_self(), ZX_VMAR_OP_COMMIT, MapAddr, + Size, nullptr, 0); + CHECK_EQ(Status, ZX_OK); + } + + return true; +} + +void MemMapFuchsia::releaseAndZeroPagesToOSImpl(uptr From, uptr Size) { + zx_status_t Status = _zx_vmo_op_range(Vmo, ZX_VMO_OP_DECOMMIT, From - MapAddr, + Size, nullptr, 0); + CHECK_EQ(Status, ZX_OK); +} + +void MemMapFuchsia::setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags) { + const bool NoAccess = !!(Flags & MAP_NOACCESS); + + zx_vm_option_t MapFlags = 0; + if (!NoAccess) + MapFlags |= ZX_VM_PERM_READ | ZX_VM_PERM_WRITE; + zx_status_t Status = + _zx_vmar_protect(_zx_vmar_root_self(), MapFlags, Addr, Size); + CHECK_EQ(Status, ZX_OK); +} + +bool ReservedMemoryFuchsia::createImpl(UNUSED uptr Addr, uptr Size, + UNUSED const char *Name, uptr Flags) { + const bool AllowNoMem = !!(Flags & MAP_ALLOWNOMEM); + + // Reserve memory by mapping the placeholder VMO without any permission. + zx_status_t Status = _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_ALLOW_FAULTS, 0, + getPlaceholderVmo(), 0, Size, &Base); + if (UNLIKELY(Status != ZX_OK)) { + if (AllowNoMem && IsNoMemError(Status)) + return false; + dieOnError(Status, "zx_vmar_map", Size); + } + + Capacity = Size; + return true; +} + +void ReservedMemoryFuchsia::releaseImpl() { + zx_status_t Status = _zx_vmar_unmap(_zx_vmar_root_self(), Base, Capacity); + CHECK_EQ(Status, ZX_OK); +} + +ReservedMemoryFuchsia::MemMapT ReservedMemoryFuchsia::dispatchImpl(uptr Addr, + uptr Size) { + return ReservedMemoryFuchsia::MemMapT(Addr, Size); +} + +} // namespace scudo + +#endif // SCUDO_FUCHSIA diff --git a/standalone/mem_map_fuchsia.h b/standalone/mem_map_fuchsia.h new file mode 100644 index 00000000000..2e66f89cfca --- /dev/null +++ b/standalone/mem_map_fuchsia.h @@ -0,0 +1,75 @@ +//===-- mem_map_fuchsia.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_MEM_MAP_FUCHSIA_H_ +#define SCUDO_MEM_MAP_FUCHSIA_H_ + +#include "mem_map_base.h" + +#if SCUDO_FUCHSIA + +#include <stdint.h> +#include <zircon/types.h> + +namespace scudo { + +class MemMapFuchsia final : public MemMapBase<MemMapFuchsia> { +public: + constexpr MemMapFuchsia() = default; + + // Impls for base functions. + bool mapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void unmapImpl(uptr Addr, uptr Size); + bool remapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags); + void releasePagesToOSImpl(uptr From, uptr Size) { + return releaseAndZeroPagesToOSImpl(From, Size); + } + void releaseAndZeroPagesToOSImpl(uptr From, uptr Size); + uptr getBaseImpl() { return WindowBase; } + uptr getCapacityImpl() { return WindowSize; } + +private: + friend class ReservedMemoryFuchsia; + + // Used by ReservedMemoryFuchsia::dispatch. + MemMapFuchsia(uptr Base, uptr Capacity); + + // Virtual memory address corresponding to VMO offset 0. + uptr MapAddr = 0; + + // Virtual memory base address and size of the VMO subrange that is still in + // use. unmapImpl() can shrink this range, either at the beginning or at the + // end. + uptr WindowBase = 0; + uptr WindowSize = 0; + + zx_handle_t Vmo = ZX_HANDLE_INVALID; +}; + +class ReservedMemoryFuchsia final + : public ReservedMemory<ReservedMemoryFuchsia, MemMapFuchsia> { +public: + constexpr ReservedMemoryFuchsia() = default; + + bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void releaseImpl(); + MemMapT dispatchImpl(uptr Addr, uptr Size); + uptr getBaseImpl() { return Base; } + uptr getCapacityImpl() { return Capacity; } + +private: + uptr Base = 0; + uptr Capacity = 0; +}; + +} // namespace scudo + +#endif // SCUDO_FUCHSIA + +#endif // SCUDO_MEM_MAP_FUCHSIA_H_ diff --git a/standalone/mem_map_linux.cpp b/standalone/mem_map_linux.cpp new file mode 100644 index 00000000000..783c4f0d9ab --- /dev/null +++ b/standalone/mem_map_linux.cpp @@ -0,0 +1,153 @@ +//===-- mem_map_linux.cpp ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "platform.h" + +#if SCUDO_LINUX + +#include "mem_map_linux.h" + +#include "common.h" +#include "internal_defs.h" +#include "linux.h" +#include "mutex.h" +#include "report_linux.h" +#include "string_utils.h" + +#include <errno.h> +#include <fcntl.h> +#include <linux/futex.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <time.h> +#include <unistd.h> + +#if SCUDO_ANDROID +// TODO(chiahungduan): Review if we still need the followings macros. +#include <sys/prctl.h> +// Definitions of prctl arguments to set a vma name in Android kernels. +#define ANDROID_PR_SET_VMA 0x53564d41 +#define ANDROID_PR_SET_VMA_ANON_NAME 0 +#endif + +namespace scudo { + +static void *mmapWrapper(uptr Addr, uptr Size, const char *Name, uptr Flags) { + int MmapFlags = MAP_PRIVATE | MAP_ANONYMOUS; + int MmapProt; + if (Flags & MAP_NOACCESS) { + MmapFlags |= MAP_NORESERVE; + MmapProt = PROT_NONE; + } else { + MmapProt = PROT_READ | PROT_WRITE; + } +#if defined(__aarch64__) +#ifndef PROT_MTE +#define PROT_MTE 0x20 +#endif + if (Flags & MAP_MEMTAG) + MmapProt |= PROT_MTE; +#endif + if (Addr) + MmapFlags |= MAP_FIXED; + void *P = + mmap(reinterpret_cast<void *>(Addr), Size, MmapProt, MmapFlags, -1, 0); + if (P == MAP_FAILED) { + if (!(Flags & MAP_ALLOWNOMEM) || errno != ENOMEM) + reportMapError(errno == ENOMEM ? Size : 0); + return nullptr; + } +#if SCUDO_ANDROID + if (Name) + prctl(ANDROID_PR_SET_VMA, ANDROID_PR_SET_VMA_ANON_NAME, P, Size, Name); +#else + (void)Name; +#endif + + return P; +} + +bool MemMapLinux::mapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags) { + void *P = mmapWrapper(Addr, Size, Name, Flags); + if (P == nullptr) + return false; + + MapBase = reinterpret_cast<uptr>(P); + MapCapacity = Size; + return true; +} + +void MemMapLinux::unmapImpl(uptr Addr, uptr Size) { + // If we unmap all the pages, also mark `MapBase` to 0 to indicate invalid + // status. + if (Size == MapCapacity) { + MapBase = MapCapacity = 0; + } else { + // This is partial unmap and is unmapping the pages from the beginning, + // shift `MapBase` to the new base. + if (MapBase == Addr) + MapBase = Addr + Size; + MapCapacity -= Size; + } + + if (munmap(reinterpret_cast<void *>(Addr), Size) != 0) + reportUnmapError(Addr, Size); +} + +bool MemMapLinux::remapImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + void *P = mmapWrapper(Addr, Size, Name, Flags); + if (reinterpret_cast<uptr>(P) != Addr) + reportMapError(); + return true; +} + +void MemMapLinux::setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags) { + int Prot = (Flags & MAP_NOACCESS) ? PROT_NONE : (PROT_READ | PROT_WRITE); + if (mprotect(reinterpret_cast<void *>(Addr), Size, Prot) != 0) + reportProtectError(Addr, Size, Prot); +} + +void MemMapLinux::releaseAndZeroPagesToOSImpl(uptr From, uptr Size) { + void *Addr = reinterpret_cast<void *>(From); + + while (madvise(Addr, Size, MADV_DONTNEED) == -1 && errno == EAGAIN) { + } +} + +bool ReservedMemoryLinux::createImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + ReservedMemoryLinux::MemMapT MemMap; + if (!MemMap.map(Addr, Size, Name, Flags | MAP_NOACCESS)) + return false; + + MapBase = MemMap.getBase(); + MapCapacity = MemMap.getCapacity(); + + return true; +} + +void ReservedMemoryLinux::releaseImpl() { + if (munmap(reinterpret_cast<void *>(getBase()), getCapacity()) != 0) + reportUnmapError(getBase(), getCapacity()); +} + +ReservedMemoryLinux::MemMapT ReservedMemoryLinux::dispatchImpl(uptr Addr, + uptr Size) { + return ReservedMemoryLinux::MemMapT(Addr, Size); +} + +} // namespace scudo + +#endif // SCUDO_LINUX diff --git a/standalone/mem_map_linux.h b/standalone/mem_map_linux.h new file mode 100644 index 00000000000..7a89b3bff5e --- /dev/null +++ b/standalone/mem_map_linux.h @@ -0,0 +1,67 @@ +//===-- mem_map_linux.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_MEM_MAP_LINUX_H_ +#define SCUDO_MEM_MAP_LINUX_H_ + +#include "platform.h" + +#if SCUDO_LINUX + +#include "common.h" +#include "mem_map_base.h" + +namespace scudo { + +class MemMapLinux final : public MemMapBase<MemMapLinux> { +public: + constexpr MemMapLinux() = default; + MemMapLinux(uptr Base, uptr Capacity) + : MapBase(Base), MapCapacity(Capacity) {} + + // Impls for base functions. + bool mapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags = 0); + void unmapImpl(uptr Addr, uptr Size); + bool remapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags = 0); + void setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags); + void releasePagesToOSImpl(uptr From, uptr Size) { + return releaseAndZeroPagesToOSImpl(From, Size); + } + void releaseAndZeroPagesToOSImpl(uptr From, uptr Size); + uptr getBaseImpl() { return MapBase; } + uptr getCapacityImpl() { return MapCapacity; } + +private: + uptr MapBase = 0; + uptr MapCapacity = 0; +}; + +// This will be deprecated when every allocator has been supported by each +// platform's `MemMap` implementation. +class ReservedMemoryLinux final + : public ReservedMemory<ReservedMemoryLinux, MemMapLinux> { +public: + // The following two are the Impls for function in `MemMapBase`. + uptr getBaseImpl() { return MapBase; } + uptr getCapacityImpl() { return MapCapacity; } + + // These threes are specific to `ReservedMemory`. + bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void releaseImpl(); + MemMapT dispatchImpl(uptr Addr, uptr Size); + +private: + uptr MapBase = 0; + uptr MapCapacity = 0; +}; + +} // namespace scudo + +#endif // SCUDO_LINUX + +#endif // SCUDO_MEM_MAP_LINUX_H_ diff --git a/standalone/memtag.h b/standalone/memtag.h index 7f14a30fee1..1f6983e9940 100644 --- a/standalone/memtag.h +++ b/standalone/memtag.h @@ -11,7 +11,7 @@ #include "internal_defs.h" -#if SCUDO_LINUX +#if SCUDO_CAN_USE_MTE #include <sys/auxv.h> #include <sys/prctl.h> #endif @@ -25,7 +25,7 @@ namespace scudo { // tagging. Not all operating systems enable TBI, so we only claim architectural // support for memory tagging if the operating system enables TBI. // HWASan uses the top byte for its own purpose and Scudo should not touch it. -#if SCUDO_LINUX && !defined(SCUDO_DISABLE_TBI) && \ +#if SCUDO_CAN_USE_MTE && !defined(SCUDO_DISABLE_TBI) && \ !__has_feature(hwaddress_sanitizer) inline constexpr bool archSupportsMemoryTagging() { return true; } #else @@ -60,7 +60,7 @@ inline NORETURN uint8_t extractTag(uptr Ptr) { #if __clang_major__ >= 12 && defined(__aarch64__) && !defined(__ILP32__) -#if SCUDO_LINUX +#if SCUDO_CAN_USE_MTE inline bool systemSupportsMemoryTagging() { #ifndef HWCAP2_MTE @@ -106,7 +106,7 @@ inline void enableSystemMemoryTaggingTestOnly() { 0, 0, 0); } -#else // !SCUDO_LINUX +#else // !SCUDO_CAN_USE_MTE inline bool systemSupportsMemoryTagging() { return false; } @@ -118,7 +118,7 @@ inline NORETURN void enableSystemMemoryTaggingTestOnly() { UNREACHABLE("memory tagging not supported"); } -#endif // SCUDO_LINUX +#endif // SCUDO_CAN_USE_MTE class ScopedDisableMemoryTagChecks { uptr PrevTCO; @@ -326,7 +326,7 @@ inline void *addFixedTag(void *Ptr, uptr Tag) { template <typename Config> inline constexpr bool allocatorSupportsMemoryTagging() { - return archSupportsMemoryTagging() && Config::MaySupportMemoryTagging && + return archSupportsMemoryTagging() && Config::getMaySupportMemoryTagging() && (1 << SCUDO_MIN_ALIGNMENT_LOG) >= archMemoryTagGranuleSize(); } diff --git a/standalone/mutex.h b/standalone/mutex.h index 05340de3e12..4caa945219b 100644 --- a/standalone/mutex.h +++ b/standalone/mutex.h @@ -35,7 +35,7 @@ public: #pragma nounroll #endif for (u8 I = 0U; I < NumberOfTries; I++) { - yieldProcessor(NumberOfYields); + delayLoop(); if (tryLock()) return; } @@ -53,10 +53,23 @@ public: } private: + void delayLoop() { + // The value comes from the average time spent in accessing caches (which + // are the fastest operations) so that we are unlikely to wait too long for + // fast operations. + constexpr u32 SpinTimes = 16; + volatile u32 V = 0; + for (u32 I = 0; I < SpinTimes; ++I) { + u32 Tmp = V + 1; + V = Tmp; + } + } + void assertHeldImpl(); - static constexpr u8 NumberOfTries = 8U; - static constexpr u8 NumberOfYields = 8U; + // TODO(chiahungduan): Adapt this value based on scenarios. E.g., primary and + // secondary allocator have different allocation times. + static constexpr u8 NumberOfTries = 32U; #if SCUDO_LINUX atomic_u32 M = {}; diff --git a/standalone/options.h b/standalone/options.h index 4e678651333..b20142a4159 100644 --- a/standalone/options.h +++ b/standalone/options.h @@ -38,7 +38,7 @@ struct Options { } }; -template <typename Config> bool useMemoryTagging(Options Options) { +template <typename Config> bool useMemoryTagging(const Options &Options) { return allocatorSupportsMemoryTagging<Config>() && Options.get(OptionBit::UseMemoryTagging); } diff --git a/standalone/platform.h b/standalone/platform.h index db4217ddab9..5af1275e32d 100644 --- a/standalone/platform.h +++ b/standalone/platform.h @@ -37,6 +37,12 @@ #define SCUDO_TRUSTY 0 #endif +#if defined(__riscv) && (__riscv_xlen == 64) +#define SCUDO_RISCV64 1 +#else +#define SCUDO_RISCV64 0 +#endif + #if defined(__LP64__) #define SCUDO_WORDSIZE 64U #else @@ -53,6 +59,14 @@ #define SCUDO_CAN_USE_PRIMARY64 (SCUDO_WORDSIZE == 64U) #endif +#ifndef SCUDO_CAN_USE_MTE +#define SCUDO_CAN_USE_MTE (SCUDO_LINUX || SCUDO_TRUSTY) +#endif + +#ifndef SCUDO_ENABLE_HOOKS +#define SCUDO_ENABLE_HOOKS 0 +#endif + #ifndef SCUDO_MIN_ALIGNMENT_LOG // We force malloc-type functions to be aligned to std::max_align_t, but there // is no reason why the minimum alignment for all other functions can't be 8 diff --git a/standalone/primary32.h b/standalone/primary32.h index 0edc40d7e6c..ebfb8dfe0a3 100644 --- a/standalone/primary32.h +++ b/standalone/primary32.h @@ -9,6 +9,7 @@ #ifndef SCUDO_PRIMARY32_H_ #define SCUDO_PRIMARY32_H_ +#include "allocator_common.h" #include "bytemap.h" #include "common.h" #include "list.h" @@ -42,22 +43,25 @@ namespace scudo { template <typename Config> class SizeClassAllocator32 { public: - typedef typename Config::PrimaryCompactPtrT CompactPtrT; + typedef typename Config::CompactPtrT CompactPtrT; typedef typename Config::SizeClassMap SizeClassMap; - static const uptr GroupSizeLog = Config::PrimaryGroupSizeLog; + static const uptr GroupSizeLog = Config::getGroupSizeLog(); // The bytemap can only track UINT8_MAX - 1 classes. static_assert(SizeClassMap::LargestClassId <= (UINT8_MAX - 1), ""); // Regions should be large enough to hold the largest Block. - static_assert((1UL << Config::PrimaryRegionSizeLog) >= SizeClassMap::MaxSize, + static_assert((1UL << Config::getRegionSizeLog()) >= SizeClassMap::MaxSize, ""); typedef SizeClassAllocator32<Config> ThisT; typedef SizeClassAllocatorLocalCache<ThisT> CacheT; - typedef typename CacheT::TransferBatch TransferBatch; - typedef typename CacheT::BatchGroup BatchGroup; + typedef TransferBatch<ThisT> TransferBatchT; + typedef BatchGroup<ThisT> BatchGroupT; + + static_assert(sizeof(BatchGroupT) <= sizeof(TransferBatchT), + "BatchGroupT uses the same class size as TransferBatchT"); static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) - ? sizeof(TransferBatch) + ? sizeof(TransferBatchT) : SizeClassMap::getSizeByClassId(ClassId); } @@ -73,7 +77,7 @@ public: DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT))); PossibleRegions.init(); u32 Seed; - const u64 Time = getMonotonicTime(); + const u64 Time = getMonotonicTimeFast(); if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))) Seed = static_cast<u32>( Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6)); @@ -84,6 +88,10 @@ public: Sci->MinRegionIndex = NumRegions; Sci->ReleaseInfo.LastReleaseAtNs = Time; } + + // The default value in the primary config has the higher priority. + if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN) + ReleaseToOsInterval = Config::getDefaultReleaseToOsIntervalMs(); setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); } @@ -108,12 +116,57 @@ public: } ScopedLock L(ByteMapMutex); - for (uptr I = MinRegionIndex; I < MaxRegionIndex; I++) + for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) if (PossibleRegions[I]) unmap(reinterpret_cast<void *>(I * RegionSize), RegionSize); PossibleRegions.unmapTestOnly(); } + // When all blocks are freed, it has to be the same size as `AllocatedUser`. + void verifyAllBlocksAreReleasedTestOnly() { + // `BatchGroup` and `TransferBatch` also use the blocks from BatchClass. + uptr BatchClassUsedInFreeLists = 0; + for (uptr I = 0; I < NumClasses; I++) { + // We have to count BatchClassUsedInFreeLists in other regions first. + if (I == SizeClassMap::BatchClassId) + continue; + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L1(Sci->Mutex); + uptr TotalBlocks = 0; + for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) { + // `BG::Batches` are `TransferBatches`. +1 for `BatchGroup`. + BatchClassUsedInFreeLists += BG.Batches.size() + 1; + for (const auto &It : BG.Batches) + TotalBlocks += It.getCount(); + } + + const uptr BlockSize = getSizeByClassId(I); + DCHECK_EQ(TotalBlocks, Sci->AllocatedUser / BlockSize); + DCHECK_EQ(Sci->FreeListInfo.PushedBlocks, Sci->FreeListInfo.PoppedBlocks); + } + + SizeClassInfo *Sci = getSizeClassInfo(SizeClassMap::BatchClassId); + ScopedLock L1(Sci->Mutex); + uptr TotalBlocks = 0; + for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) { + if (LIKELY(!BG.Batches.empty())) { + for (const auto &It : BG.Batches) + TotalBlocks += It.getCount(); + } else { + // `BatchGroup` with empty freelist doesn't have `TransferBatch` record + // itself. + ++TotalBlocks; + } + } + + const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId); + DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists, + Sci->AllocatedUser / BlockSize); + const uptr BlocksInUse = + Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks; + DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists); + } + CompactPtrT compactPtr(UNUSED uptr ClassId, uptr Ptr) const { return static_cast<CompactPtrT>(Ptr); } @@ -122,26 +175,40 @@ public: return reinterpret_cast<void *>(static_cast<uptr>(CompactPtr)); } - uptr compactPtrGroup(CompactPtrT CompactPtr) { - return CompactPtr >> GroupSizeLog; + uptr compactPtrGroupBase(CompactPtrT CompactPtr) { + const uptr Mask = (static_cast<uptr>(1) << GroupSizeLog) - 1; + return CompactPtr & ~Mask; + } + + uptr decompactGroupBase(uptr CompactPtrGroupBase) { + return CompactPtrGroupBase; + } + + ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) { + const uptr PageSize = getPageSizeCached(); + return BlockSize < PageSize / 16U; } - uptr batchGroupBase(uptr GroupId) { return GroupId << GroupSizeLog; } + ALWAYS_INLINE static bool isLargeBlock(uptr BlockSize) { + const uptr PageSize = getPageSizeCached(); + return BlockSize > PageSize; + } - TransferBatch *popBatch(CacheT *C, uptr ClassId) { + u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray, + const u16 MaxBlockCount) { DCHECK_LT(ClassId, NumClasses); SizeClassInfo *Sci = getSizeClassInfo(ClassId); ScopedLock L(Sci->Mutex); - TransferBatch *B = popBatchImpl(C, ClassId, Sci); - if (UNLIKELY(!B)) { + + u16 PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount); + if (UNLIKELY(PopCount == 0)) { if (UNLIKELY(!populateFreeList(C, ClassId, Sci))) - return nullptr; - B = popBatchImpl(C, ClassId, Sci); - // if `populateFreeList` succeeded, we are supposed to get free blocks. - DCHECK_NE(B, nullptr); + return 0U; + PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount); + DCHECK_NE(PopCount, 0U); } - Sci->Stats.PoppedBlocks += B->getCount(); - return B; + + return PopCount; } // Push the array of free blocks to the designated batch group. @@ -152,16 +219,7 @@ public: SizeClassInfo *Sci = getSizeClassInfo(ClassId); if (ClassId == SizeClassMap::BatchClassId) { ScopedLock L(Sci->Mutex); - // Constructing a batch group in the free list will use two blocks in - // BatchClassId. If we are pushing BatchClassId blocks, we will use the - // blocks in the array directly (can't delegate local cache which will - // cause a recursive allocation). However, The number of free blocks may - // be less than two. Therefore, populate the free list before inserting - // the blocks. - if (Size == 1 && !populateFreeList(C, ClassId, Sci)) - return; - pushBlocksImpl(C, ClassId, Sci, Array, Size); - Sci->Stats.PushedBlocks += Size; + pushBatchClassBlocks(Sci, Array, Size); return; } @@ -172,11 +230,12 @@ public: // together. bool SameGroup = true; for (u32 I = 1; I < Size; ++I) { - if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) + if (compactPtrGroupBase(Array[I - 1]) != compactPtrGroupBase(Array[I])) SameGroup = false; CompactPtrT Cur = Array[I]; u32 J = I; - while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) { + while (J > 0 && + compactPtrGroupBase(Cur) < compactPtrGroupBase(Array[J - 1])) { Array[J] = Array[J - 1]; --J; } @@ -185,10 +244,6 @@ public: ScopedLock L(Sci->Mutex); pushBlocksImpl(C, ClassId, Sci, Array, Size, SameGroup); - - Sci->Stats.PushedBlocks += Size; - if (ClassId != SizeClassMap::BatchClassId) - releaseToOSMaybe(Sci, ClassId); } void disable() NO_THREAD_SAFETY_ANALYSIS { @@ -252,8 +307,8 @@ public: SizeClassInfo *Sci = getSizeClassInfo(I); ScopedLock L(Sci->Mutex); TotalMapped += Sci->AllocatedUser; - PoppedBlocks += Sci->Stats.PoppedBlocks; - PushedBlocks += Sci->Stats.PushedBlocks; + PoppedBlocks += Sci->FreeListInfo.PoppedBlocks; + PushedBlocks += Sci->FreeListInfo.PushedBlocks; } Str->append("Stats: SizeClassAllocator32: %zuM mapped in %zu allocations; " "remains %zu\n", @@ -261,15 +316,27 @@ public: for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); ScopedLock L(Sci->Mutex); - getStats(Str, I, Sci, 0); + getStats(Str, I, Sci); + } + } + + void getFragmentationInfo(ScopedString *Str) { + Str->append( + "Fragmentation Stats: SizeClassAllocator32: page size = %zu bytes\n", + getPageSizeCached()); + + for (uptr I = 1; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); + getSizeClassFragmentationInfo(Sci, I, Str); } } bool setOption(Option O, sptr Value) { if (O == Option::ReleaseInterval) { const s32 Interval = Max( - Min(static_cast<s32>(Value), Config::PrimaryMaxReleaseToOsIntervalMs), - Config::PrimaryMinReleaseToOsIntervalMs); + Min(static_cast<s32>(Value), Config::getMaxReleaseToOsIntervalMs()), + Config::getMinReleaseToOsIntervalMs()); atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); return true; } @@ -277,14 +344,22 @@ public: return true; } - uptr releaseToOS() { + uptr tryReleaseToOS(uptr ClassId, ReleaseToOS ReleaseType) { + SizeClassInfo *Sci = getSizeClassInfo(ClassId); + // TODO: Once we have separate locks like primary64, we may consider using + // tryLock() as well. + ScopedLock L(Sci->Mutex); + return releaseToOSMaybe(Sci, ClassId, ReleaseType); + } + + uptr releaseToOS(ReleaseToOS ReleaseType) { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; SizeClassInfo *Sci = getSizeClassInfo(I); ScopedLock L(Sci->Mutex); - TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true); + TotalReleasedBytes += releaseToOSMaybe(Sci, I, ReleaseType); } return TotalReleasedBytes; } @@ -301,30 +376,30 @@ public: private: static const uptr NumClasses = SizeClassMap::NumClasses; - static const uptr RegionSize = 1UL << Config::PrimaryRegionSizeLog; - static const uptr NumRegions = - SCUDO_MMAP_RANGE_SIZE >> Config::PrimaryRegionSizeLog; + static const uptr RegionSize = 1UL << Config::getRegionSizeLog(); + static const uptr NumRegions = SCUDO_MMAP_RANGE_SIZE >> + Config::getRegionSizeLog(); static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U; typedef FlatByteMap<NumRegions> ByteMap; - struct SizeClassStats { - uptr PoppedBlocks; - uptr PushedBlocks; - }; - struct ReleaseToOsInfo { - uptr PushedBlocksAtLastRelease; + uptr BytesInFreeListAtLastCheckpoint; uptr RangesReleased; uptr LastReleasedBytes; u64 LastReleaseAtNs; }; + struct BlocksInfo { + SinglyLinkedList<BatchGroupT> BlockList = {}; + uptr PoppedBlocks = 0; + uptr PushedBlocks = 0; + }; + struct alignas(SCUDO_CACHE_LINE_SIZE) SizeClassInfo { HybridMutex Mutex; - SinglyLinkedList<BatchGroup> FreeList GUARDED_BY(Mutex); + BlocksInfo FreeListInfo GUARDED_BY(Mutex); uptr CurrentRegion GUARDED_BY(Mutex); uptr CurrentRegionAllocated GUARDED_BY(Mutex); - SizeClassStats Stats GUARDED_BY(Mutex); u32 RandState; uptr AllocatedUser GUARDED_BY(Mutex); // Lowest & highest region index allocated for this size class, to avoid @@ -336,7 +411,7 @@ private: static_assert(sizeof(SizeClassInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); uptr computeRegionId(uptr Mem) { - const uptr Id = Mem >> Config::PrimaryRegionSizeLog; + const uptr Id = Mem >> Config::getRegionSizeLog(); CHECK_LT(Id, NumRegions); return Id; } @@ -363,6 +438,11 @@ private: const uptr End = Region + MapSize; if (End != MapEnd) unmap(reinterpret_cast<void *>(End), MapEnd - End); + + DCHECK_EQ(Region % RegionSize, 0U); + static_assert(Config::getRegionSizeLog() == GroupSizeLog, + "Memory group should be the same size as Region"); + return Region; } @@ -394,15 +474,125 @@ private: return &SizeClassInfoArray[ClassId]; } + void pushBatchClassBlocks(SizeClassInfo *Sci, CompactPtrT *Array, u32 Size) + REQUIRES(Sci->Mutex) { + DCHECK_EQ(Sci, getSizeClassInfo(SizeClassMap::BatchClassId)); + + // Free blocks are recorded by TransferBatch in freelist for all + // size-classes. In addition, TransferBatch is allocated from BatchClassId. + // In order not to use additional block to record the free blocks in + // BatchClassId, they are self-contained. I.e., A TransferBatch records the + // block address of itself. See the figure below: + // + // TransferBatch at 0xABCD + // +----------------------------+ + // | Free blocks' addr | + // | +------+------+------+ | + // | |0xABCD|... |... | | + // | +------+------+------+ | + // +----------------------------+ + // + // When we allocate all the free blocks in the TransferBatch, the block used + // by TransferBatch is also free for use. We don't need to recycle the + // TransferBatch. Note that the correctness is maintained by the invariant, + // + // Each popBlocks() request returns the entire TransferBatch. Returning + // part of the blocks in a TransferBatch is invalid. + // + // This ensures that TransferBatch won't leak the address itself while it's + // still holding other valid data. + // + // Besides, BatchGroup is also allocated from BatchClassId and has its + // address recorded in the TransferBatch too. To maintain the correctness, + // + // The address of BatchGroup is always recorded in the last TransferBatch + // in the freelist (also imply that the freelist should only be + // updated with push_front). Once the last TransferBatch is popped, + // the block used by BatchGroup is also free for use. + // + // With this approach, the blocks used by BatchGroup and TransferBatch are + // reusable and don't need additional space for them. + + Sci->FreeListInfo.PushedBlocks += Size; + BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); + + if (BG == nullptr) { + // Construct `BatchGroup` on the last element. + BG = reinterpret_cast<BatchGroupT *>( + decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); + --Size; + BG->Batches.clear(); + // BatchClass hasn't enabled memory group. Use `0` to indicate there's no + // memory group here. + BG->CompactPtrGroupBase = 0; + // `BG` is also the block of BatchClassId. Note that this is different + // from `CreateGroup` in `pushBlocksImpl` + BG->PushedBlocks = 1; + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = + CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId)); + + Sci->FreeListInfo.BlockList.push_front(BG); + } + + if (UNLIKELY(Size == 0)) + return; + + // This happens under 2 cases. + // 1. just allocated a new `BatchGroup`. + // 2. Only 1 block is pushed when the freelist is empty. + if (BG->Batches.empty()) { + // Construct the `TransferBatch` on the last element. + TransferBatchT *TB = reinterpret_cast<TransferBatchT *>( + decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); + TB->clear(); + // As mentioned above, addresses of `TransferBatch` and `BatchGroup` are + // recorded in the TransferBatch. + TB->add(Array[Size - 1]); + TB->add( + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(BG))); + --Size; + DCHECK_EQ(BG->PushedBlocks, 1U); + // `TB` is also the block of BatchClassId. + BG->PushedBlocks += 1; + BG->Batches.push_front(TB); + } + + TransferBatchT *CurBatch = BG->Batches.front(); + DCHECK_NE(CurBatch, nullptr); + + for (u32 I = 0; I < Size;) { + u16 UnusedSlots = + static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount()); + if (UnusedSlots == 0) { + CurBatch = reinterpret_cast<TransferBatchT *>( + decompactPtr(SizeClassMap::BatchClassId, Array[I])); + CurBatch->clear(); + // Self-contained + CurBatch->add(Array[I]); + ++I; + // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of + // BatchClassId. + BG->Batches.push_front(CurBatch); + UnusedSlots = static_cast<u16>(BG->MaxCachedPerBatch - 1); + } + // `UnusedSlots` is u16 so the result will be also fit in u16. + const u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I)); + CurBatch->appendFromArray(&Array[I], AppendSize); + I += AppendSize; + } + + BG->PushedBlocks += Size; + } // Push the blocks to their batch group. The layout will be like, // - // FreeList - > BG -> BG -> BG - // | | | - // v v v - // TB TB TB - // | - // v - // TB + // FreeListInfo.BlockList - > BG -> BG -> BG + // | | | + // v v v + // TB TB TB + // | + // v + // TB // // Each BlockGroup(BG) will associate with unique group id and the free blocks // are managed by a list of TransferBatch(TB). To reduce the time of inserting @@ -415,81 +605,29 @@ private: void pushBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci, CompactPtrT *Array, u32 Size, bool SameGroup = false) REQUIRES(Sci->Mutex) { + DCHECK_NE(ClassId, SizeClassMap::BatchClassId); DCHECK_GT(Size, 0U); - auto CreateGroup = [&](uptr GroupId) { - BatchGroup *BG = nullptr; - TransferBatch *TB = nullptr; - if (ClassId == SizeClassMap::BatchClassId) { - DCHECK_GE(Size, 2U); - - // Free blocks are recorded by TransferBatch in freelist, blocks of - // BatchClassId are included. In order not to use additional memory to - // record blocks of BatchClassId, they are self-contained. I.e., A - // TransferBatch may record the block address of itself. See the figure - // below: - // - // TransferBatch at 0xABCD - // +----------------------------+ - // | Free blocks' addr | - // | +------+------+------+ | - // | |0xABCD|... |... | | - // | +------+------+------+ | - // +----------------------------+ - // - // The safeness of manipulating TransferBatch is kept by the invariant, - // - // The unit of each pop-block request is a TransferBatch. Return - // part of the blocks in a TransferBatch is not allowed. - // - // This ensures that TransferBatch won't leak the address itself while - // it's still holding other valid data. - // - // Besides, BatchGroup uses the same size-class as TransferBatch does - // and its address is recorded in the TransferBatch too. To maintain the - // safeness, the invariant to keep is, - // - // The address of itself is always recorded in the last TransferBatch - // of the freelist (also imply that the freelist should only be - // updated with push_front). Once the last TransferBatch is popped, - // the BatchGroup becomes invalid. - // - // As a result, the blocks used by BatchGroup and TransferBatch are - // reusable and don't need additional space for them. - BG = reinterpret_cast<BatchGroup *>( - decompactPtr(ClassId, Array[Size - 1])); - BG->Batches.clear(); - - TB = reinterpret_cast<TransferBatch *>( - decompactPtr(ClassId, Array[Size - 2])); - TB->clear(); - - // Append the blocks used by BatchGroup and TransferBatch immediately so - // that we ensure that they are in the last TransBatch. - TB->appendFromArray(Array + Size - 2, 2); - Size -= 2; - } else { - BG = C->createGroup(); - BG->Batches.clear(); + auto CreateGroup = [&](uptr CompactPtrGroupBase) { + BatchGroupT *BG = + reinterpret_cast<BatchGroupT *>(C->getBatchClassBlock()); + BG->Batches.clear(); + TransferBatchT *TB = + reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock()); + TB->clear(); - TB = C->createBatch(ClassId, nullptr); - TB->clear(); - } - - BG->GroupId = GroupId; - // TODO(chiahungduan): Avoid the use of push_back() in `Batches`. + BG->CompactPtrGroupBase = CompactPtrGroupBase; BG->Batches.push_front(TB); BG->PushedBlocks = 0; - BG->PushedBlocksAtLastCheckpoint = 0; - BG->MaxCachedPerBatch = - TransferBatch::getMaxCached(getSizeByClassId(ClassId)); + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached; return BG; }; - auto InsertBlocks = [&](BatchGroup *BG, CompactPtrT *Array, u32 Size) { - SinglyLinkedList<TransferBatch> &Batches = BG->Batches; - TransferBatch *CurBatch = Batches.front(); + auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) { + SinglyLinkedList<TransferBatchT> &Batches = BG->Batches; + TransferBatchT *CurBatch = Batches.front(); DCHECK_NE(CurBatch, nullptr); for (u32 I = 0; I < Size;) { @@ -497,9 +635,8 @@ private: u16 UnusedSlots = static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount()); if (UnusedSlots == 0) { - CurBatch = C->createBatch( - ClassId, - reinterpret_cast<void *>(decompactPtr(ClassId, Array[I]))); + CurBatch = + reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock()); CurBatch->clear(); Batches.push_front(CurBatch); UnusedSlots = BG->MaxCachedPerBatch; @@ -513,40 +650,33 @@ private: BG->PushedBlocks += Size; }; - BatchGroup *Cur = Sci->FreeList.front(); - - if (ClassId == SizeClassMap::BatchClassId) { - if (Cur == nullptr) { - // Don't need to classify BatchClassId. - Cur = CreateGroup(/*GroupId=*/0); - Sci->FreeList.push_front(Cur); - } - InsertBlocks(Cur, Array, Size); - return; - } + Sci->FreeListInfo.PushedBlocks += Size; + BatchGroupT *Cur = Sci->FreeListInfo.BlockList.front(); // In the following, `Cur` always points to the BatchGroup for blocks that // will be pushed next. `Prev` is the element right before `Cur`. - BatchGroup *Prev = nullptr; + BatchGroupT *Prev = nullptr; - while (Cur != nullptr && compactPtrGroup(Array[0]) > Cur->GroupId) { + while (Cur != nullptr && + compactPtrGroupBase(Array[0]) > Cur->CompactPtrGroupBase) { Prev = Cur; Cur = Cur->Next; } - if (Cur == nullptr || compactPtrGroup(Array[0]) != Cur->GroupId) { - Cur = CreateGroup(compactPtrGroup(Array[0])); + if (Cur == nullptr || + compactPtrGroupBase(Array[0]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroupBase(Array[0])); if (Prev == nullptr) - Sci->FreeList.push_front(Cur); + Sci->FreeListInfo.BlockList.push_front(Cur); else - Sci->FreeList.insert(Prev, Cur); + Sci->FreeListInfo.BlockList.insert(Prev, Cur); } // All the blocks are from the same group, just push without checking group // id. if (SameGroup) { for (u32 I = 0; I < Size; ++I) - DCHECK_EQ(compactPtrGroup(Array[I]), Cur->GroupId); + DCHECK_EQ(compactPtrGroupBase(Array[I]), Cur->CompactPtrGroupBase); InsertBlocks(Cur, Array, Size); return; @@ -556,19 +686,21 @@ private: // push them to their group together. u32 Count = 1; for (u32 I = 1; I < Size; ++I) { - if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) { - DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->GroupId); + if (compactPtrGroupBase(Array[I - 1]) != compactPtrGroupBase(Array[I])) { + DCHECK_EQ(compactPtrGroupBase(Array[I - 1]), Cur->CompactPtrGroupBase); InsertBlocks(Cur, Array + I - Count, Count); - while (Cur != nullptr && compactPtrGroup(Array[I]) > Cur->GroupId) { + while (Cur != nullptr && + compactPtrGroupBase(Array[I]) > Cur->CompactPtrGroupBase) { Prev = Cur; Cur = Cur->Next; } - if (Cur == nullptr || compactPtrGroup(Array[I]) != Cur->GroupId) { - Cur = CreateGroup(compactPtrGroup(Array[I])); + if (Cur == nullptr || + compactPtrGroupBase(Array[I]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroupBase(Array[I])); DCHECK_NE(Prev, nullptr); - Sci->FreeList.insert(Prev, Cur); + Sci->FreeListInfo.BlockList.insert(Prev, Cur); } Count = 1; @@ -580,37 +712,74 @@ private: InsertBlocks(Cur, Array + Size - Count, Count); } - // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest - // group id will be considered first. - // - // The region mutex needs to be held while calling this method. - TransferBatch *popBatchImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci) + u16 popBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci, + CompactPtrT *ToArray, const u16 MaxBlockCount) REQUIRES(Sci->Mutex) { - if (Sci->FreeList.empty()) - return nullptr; + if (Sci->FreeListInfo.BlockList.empty()) + return 0U; - SinglyLinkedList<TransferBatch> &Batches = Sci->FreeList.front()->Batches; - DCHECK(!Batches.empty()); + SinglyLinkedList<TransferBatchT> &Batches = + Sci->FreeListInfo.BlockList.front()->Batches; - TransferBatch *B = Batches.front(); - Batches.pop_front(); + if (Batches.empty()) { + DCHECK_EQ(ClassId, SizeClassMap::BatchClassId); + BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); + Sci->FreeListInfo.BlockList.pop_front(); + + // Block used by `BatchGroup` is from BatchClassId. Turn the block into + // `TransferBatch` with single block. + TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG); + ToArray[0] = + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB)); + Sci->FreeListInfo.PoppedBlocks += 1; + return 1U; + } + + // So far, instead of always filling the blocks to `MaxBlockCount`, we only + // examine single `TransferBatch` to minimize the time spent on the primary + // allocator. Besides, the sizes of `TransferBatch` and + // `CacheT::getMaxCached()` may also impact the time spent on accessing the + // primary allocator. + // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount` + // blocks and/or adjust the size of `TransferBatch` according to + // `CacheT::getMaxCached()`. + TransferBatchT *B = Batches.front(); DCHECK_NE(B, nullptr); DCHECK_GT(B->getCount(), 0U); - if (Batches.empty()) { - BatchGroup *BG = Sci->FreeList.front(); - Sci->FreeList.pop_front(); - - // We don't keep BatchGroup with zero blocks to avoid empty-checking while - // allocating. Note that block used by constructing BatchGroup is recorded - // as free blocks in the last element of BatchGroup::Batches. Which means, - // once we pop the last TransferBatch, the block is implicitly - // deallocated. + // BachClassId should always take all blocks in the TransferBatch. Read the + // comment in `pushBatchClassBlocks()` for more details. + const u16 PopCount = ClassId == SizeClassMap::BatchClassId + ? B->getCount() + : Min(MaxBlockCount, B->getCount()); + B->moveNToArray(ToArray, PopCount); + + // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be + // done without holding `Mutex`. + if (B->empty()) { + Batches.pop_front(); + // `TransferBatch` of BatchClassId is self-contained, no need to + // deallocate. Read the comment in `pushBatchClassBlocks()` for more + // details. if (ClassId != SizeClassMap::BatchClassId) - C->deallocate(SizeClassMap::BatchClassId, BG); + C->deallocate(SizeClassMap::BatchClassId, B); + + if (Batches.empty()) { + BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); + Sci->FreeListInfo.BlockList.pop_front(); + + // We don't keep BatchGroup with zero blocks to avoid empty-checking + // while allocating. Note that block used for constructing BatchGroup is + // recorded as free blocks in the last element of BatchGroup::Batches. + // Which means, once we pop the last TransferBatch, the block is + // implicitly deallocated. + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, BG); + } } - return B; + Sci->FreeListInfo.PoppedBlocks += PopCount; + return PopCount; } NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci) @@ -636,7 +805,7 @@ private: } const uptr Size = getSizeByClassId(ClassId); - const u16 MaxCount = TransferBatch::getMaxCached(Size); + const u16 MaxCount = CacheT::getMaxCached(Size); DCHECK_GT(MaxCount, 0U); // The maximum number of blocks we should carve in the region is dictated // by the maximum number of batches we want to fill, and the amount of @@ -649,7 +818,7 @@ private: DCHECK_GT(NumberOfBlocks, 0U); constexpr u32 ShuffleArraySize = - MaxNumBatches * TransferBatch::MaxNumCached; + MaxNumBatches * TransferBatchT::MaxNumCached; // Fill the transfer batches and put them in the size-class freelist. We // need to randomize the blocks for security purposes, so we first fill a // local array that we then shuffle before populating the batches. @@ -662,14 +831,14 @@ private: if (ClassId != SizeClassMap::BatchClassId) { u32 N = 1; - uptr CurGroup = compactPtrGroup(ShuffleArray[0]); + uptr CurGroup = compactPtrGroupBase(ShuffleArray[0]); for (u32 I = 1; I < NumberOfBlocks; I++) { - if (UNLIKELY(compactPtrGroup(ShuffleArray[I]) != CurGroup)) { + if (UNLIKELY(compactPtrGroupBase(ShuffleArray[I]) != CurGroup)) { shuffle(ShuffleArray + I - N, N, &Sci->RandState); pushBlocksImpl(C, ClassId, Sci, ShuffleArray + I - N, N, /*SameGroup=*/true); N = 1; - CurGroup = compactPtrGroup(ShuffleArray[I]); + CurGroup = compactPtrGroupBase(ShuffleArray[I]); } else { ++N; } @@ -679,10 +848,15 @@ private: pushBlocksImpl(C, ClassId, Sci, &ShuffleArray[NumberOfBlocks - N], N, /*SameGroup=*/true); } else { - pushBlocksImpl(C, ClassId, Sci, ShuffleArray, NumberOfBlocks, - /*SameGroup=*/true); + pushBatchClassBlocks(Sci, ShuffleArray, NumberOfBlocks); } + // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record + // the requests from `PushBlocks` and `PopBatch` which are external + // interfaces. `populateFreeList` is the internal interface so we should set + // the values back to avoid incorrectly setting the stats. + Sci->FreeListInfo.PushedBlocks -= NumberOfBlocks; + const uptr AllocatedUser = Size * NumberOfBlocks; C->getStats().add(StatFree, AllocatedUser); DCHECK_LE(Sci->CurrentRegionAllocated + AllocatedUser, RegionSize); @@ -700,54 +874,102 @@ private: return true; } - void getStats(ScopedString *Str, uptr ClassId, SizeClassInfo *Sci, uptr Rss) + void getStats(ScopedString *Str, uptr ClassId, SizeClassInfo *Sci) REQUIRES(Sci->Mutex) { if (Sci->AllocatedUser == 0) return; - const uptr InUse = Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks; - const uptr AvailableChunks = Sci->AllocatedUser / getSizeByClassId(ClassId); + const uptr BlockSize = getSizeByClassId(ClassId); + const uptr InUse = + Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks; + const uptr BytesInFreeList = Sci->AllocatedUser - InUse * BlockSize; + uptr PushedBytesDelta = 0; + if (BytesInFreeList >= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { + PushedBytesDelta = + BytesInFreeList - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + } + const uptr AvailableChunks = Sci->AllocatedUser / BlockSize; Str->append(" %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " - "inuse: %6zu avail: %6zu rss: %6zuK releases: %6zu\n", + "inuse: %6zu avail: %6zu releases: %6zu last released: %6zuK " + "latest pushed bytes: %6zuK\n", ClassId, getSizeByClassId(ClassId), Sci->AllocatedUser >> 10, - Sci->Stats.PoppedBlocks, Sci->Stats.PushedBlocks, InUse, - AvailableChunks, Rss >> 10, Sci->ReleaseInfo.RangesReleased); + Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks, + InUse, AvailableChunks, Sci->ReleaseInfo.RangesReleased, + Sci->ReleaseInfo.LastReleasedBytes >> 10, + PushedBytesDelta >> 10); } - NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, - bool Force = false) REQUIRES(Sci->Mutex) { + void getSizeClassFragmentationInfo(SizeClassInfo *Sci, uptr ClassId, + ScopedString *Str) REQUIRES(Sci->Mutex) { const uptr BlockSize = getSizeByClassId(ClassId); + const uptr First = Sci->MinRegionIndex; + const uptr Last = Sci->MaxRegionIndex; + const uptr Base = First * RegionSize; + const uptr NumberOfRegions = Last - First + 1U; + auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { + ScopedLock L(ByteMapMutex); + return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; + }; + + FragmentationRecorder Recorder; + if (!Sci->FreeListInfo.BlockList.empty()) { + PageReleaseContext Context = + markFreeBlocks(Sci, ClassId, BlockSize, Base, NumberOfRegions, + ReleaseToOS::ForceAll); + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + } + const uptr PageSize = getPageSizeCached(); + const uptr TotalBlocks = Sci->AllocatedUser / BlockSize; + const uptr InUseBlocks = + Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks; + uptr AllocatedPagesCount = 0; + if (TotalBlocks != 0U) { + for (uptr I = 0; I < NumberOfRegions; ++I) { + if (SkipRegion(I)) + continue; + AllocatedPagesCount += RegionSize / PageSize; + } + + DCHECK_NE(AllocatedPagesCount, 0U); + } + + DCHECK_GE(AllocatedPagesCount, Recorder.getReleasedPagesCount()); + const uptr InUsePages = + AllocatedPagesCount - Recorder.getReleasedPagesCount(); + const uptr InUseBytes = InUsePages * PageSize; + + uptr Integral; + uptr Fractional; + computePercentage(BlockSize * InUseBlocks, InUsePages * PageSize, &Integral, + &Fractional); + Str->append(" %02zu (%6zu): inuse/total blocks: %6zu/%6zu inuse/total " + "pages: %6zu/%6zu inuse bytes: %6zuK util: %3zu.%02zu%%\n", + ClassId, BlockSize, InUseBlocks, TotalBlocks, InUsePages, + AllocatedPagesCount, InUseBytes >> 10, Integral, Fractional); + } + + NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, + ReleaseToOS ReleaseType = ReleaseToOS::Normal) + REQUIRES(Sci->Mutex) { + const uptr BlockSize = getSizeByClassId(ClassId); - DCHECK_GE(Sci->Stats.PoppedBlocks, Sci->Stats.PushedBlocks); + DCHECK_GE(Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks); const uptr BytesInFreeList = Sci->AllocatedUser - - (Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks) * BlockSize; - if (BytesInFreeList < PageSize) - return 0; // No chance to release anything. - const uptr BytesPushed = - (Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) * - BlockSize; - if (BytesPushed < PageSize) - return 0; // Nothing new to release. - - const bool CheckDensity = BlockSize < PageSize / 16U; - // Releasing smaller blocks is expensive, so we want to make sure that a - // significant amount of bytes are free, and that there has been a good - // amount of batches pushed to the freelist before attempting to release. - if (CheckDensity) { - if (!Force && BytesPushed < Sci->AllocatedUser / 16U) - return 0; - } + (Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks) * + BlockSize; - if (!Force) { - const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); - if (IntervalMs < 0) - return 0; - if (Sci->ReleaseInfo.LastReleaseAtNs + - static_cast<u64>(IntervalMs) * 1000000 > - getMonotonicTime()) { - return 0; // Memory was returned recently. - } + if (UNLIKELY(BytesInFreeList == 0)) + return 0; + + // ====================================================================== // + // 1. Check if we have enough free blocks and if it's worth doing a page + // release. + // ====================================================================== // + if (ReleaseType != ReleaseToOS::ForceAll && + !hasChanceToReleasePages(Sci, BlockSize, BytesInFreeList, + ReleaseType)) { + return 0; } const uptr First = Sci->MinRegionIndex; @@ -757,26 +979,122 @@ private: uptr TotalReleasedBytes = 0; const uptr Base = First * RegionSize; const uptr NumberOfRegions = Last - First + 1U; - const uptr GroupSize = (1U << GroupSizeLog); - const uptr CurRegionGroupId = - compactPtrGroup(compactPtr(ClassId, Sci->CurrentRegion)); + // ==================================================================== // + // 2. Mark the free blocks and we can tell which pages are in-use by + // querying `PageReleaseContext`. + // ==================================================================== // + PageReleaseContext Context = markFreeBlocks(Sci, ClassId, BlockSize, Base, + NumberOfRegions, ReleaseType); + if (!Context.hasBlockMarked()) + return 0; + + // ==================================================================== // + // 3. Release the unused physical pages back to the OS. + // ==================================================================== // ReleaseRecorder Recorder(Base); - PageReleaseContext Context(BlockSize, RegionSize, NumberOfRegions, + auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { + ScopedLock L(ByteMapMutex); + return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; + }; + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + + if (Recorder.getReleasedRangesCount() > 0) { + Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); + Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; + } + Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); + + return TotalReleasedBytes; + } + + bool hasChanceToReleasePages(SizeClassInfo *Sci, uptr BlockSize, + uptr BytesInFreeList, ReleaseToOS ReleaseType) + REQUIRES(Sci->Mutex) { + DCHECK_GE(Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks); + const uptr PageSize = getPageSizeCached(); + + if (BytesInFreeList <= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint) + Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + + // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value + // so that we won't underestimate the releasable pages. For example, the + // following is the region usage, + // + // BytesInFreeListAtLastCheckpoint AllocatedUser + // v v + // |---------------------------------------> + // ^ ^ + // BytesInFreeList ReleaseThreshold + // + // In general, if we have collected enough bytes and the amount of free + // bytes meets the ReleaseThreshold, we will try to do page release. If we + // don't update `BytesInFreeListAtLastCheckpoint` when the current + // `BytesInFreeList` is smaller, we may take longer time to wait for enough + // freed blocks because we miss the bytes between + // (BytesInFreeListAtLastCheckpoint - BytesInFreeList). + const uptr PushedBytesDelta = + BytesInFreeList - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + if (PushedBytesDelta < PageSize) + return false; + + // Releasing smaller blocks is expensive, so we want to make sure that a + // significant amount of bytes are free, and that there has been a good + // amount of batches pushed to the freelist before attempting to release. + if (isSmallBlock(BlockSize) && ReleaseType == ReleaseToOS::Normal) + if (PushedBytesDelta < Sci->AllocatedUser / 16U) + return false; + + if (ReleaseType == ReleaseToOS::Normal) { + const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); + if (IntervalMs < 0) + return false; + + // The constant 8 here is selected from profiling some apps and the number + // of unreleased pages in the large size classes is around 16 pages or + // more. Choose half of it as a heuristic and which also avoids page + // release every time for every pushBlocks() attempt by large blocks. + const bool ByPassReleaseInterval = + isLargeBlock(BlockSize) && PushedBytesDelta > 8 * PageSize; + if (!ByPassReleaseInterval) { + if (Sci->ReleaseInfo.LastReleaseAtNs + + static_cast<u64>(IntervalMs) * 1000000 > + getMonotonicTimeFast()) { + // Memory was returned recently. + return false; + } + } + } // if (ReleaseType == ReleaseToOS::Normal) + + return true; + } + + PageReleaseContext markFreeBlocks(SizeClassInfo *Sci, const uptr ClassId, + const uptr BlockSize, const uptr Base, + const uptr NumberOfRegions, + ReleaseToOS ReleaseType) + REQUIRES(Sci->Mutex) { + const uptr PageSize = getPageSizeCached(); + const uptr GroupSize = (1UL << GroupSizeLog); + const uptr CurGroupBase = + compactPtrGroupBase(compactPtr(ClassId, Sci->CurrentRegion)); + + PageReleaseContext Context(BlockSize, NumberOfRegions, /*ReleaseSize=*/RegionSize); auto DecompactPtr = [](CompactPtrT CompactPtr) { return reinterpret_cast<uptr>(CompactPtr); }; - for (BatchGroup &BG : Sci->FreeList) { - const uptr PushedBytesDelta = - BG.PushedBlocks - BG.PushedBlocksAtLastCheckpoint; - if (PushedBytesDelta * BlockSize < PageSize) - continue; - - uptr AllocatedGroupSize = BG.GroupId == CurRegionGroupId + for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) { + const uptr GroupBase = decompactGroupBase(BG.CompactPtrGroupBase); + // The `GroupSize` may not be divided by `BlockSize`, which means there is + // an unused space at the end of Region. Exclude that space to avoid + // unused page map entry. + uptr AllocatedGroupSize = GroupBase == CurGroupBase ? Sci->CurrentRegionAllocated - : GroupSize; + : roundDownSlow(GroupSize, BlockSize); if (AllocatedGroupSize == 0) continue; @@ -785,65 +1103,59 @@ private: const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch + BG.Batches.front()->getCount(); const uptr BytesInBG = NumBlocks * BlockSize; - // Given the randomness property, we try to release the pages only if the - // bytes used by free blocks exceed certain proportion of allocated - // spaces. - if (CheckDensity && (BytesInBG * 100U) / AllocatedGroupSize < - (100U - 1U - BlockSize / 16U)) { - continue; + + if (ReleaseType != ReleaseToOS::ForceAll) { + if (BytesInBG <= BG.BytesInBGAtLastCheckpoint) { + BG.BytesInBGAtLastCheckpoint = BytesInBG; + continue; + } + + const uptr PushedBytesDelta = BytesInBG - BG.BytesInBGAtLastCheckpoint; + if (PushedBytesDelta < PageSize) + continue; + + // Given the randomness property, we try to release the pages only if + // the bytes used by free blocks exceed certain proportion of allocated + // spaces. + if (isSmallBlock(BlockSize) && (BytesInBG * 100U) / AllocatedGroupSize < + (100U - 1U - BlockSize / 16U)) { + continue; + } } - BG.PushedBlocksAtLastCheckpoint = BG.PushedBlocks; + // TODO: Consider updating this after page release if `ReleaseRecorder` + // can tell the released bytes in each group. + BG.BytesInBGAtLastCheckpoint = BytesInBG; const uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize; - // The first condition to do range marking is that all the blocks in the - // range need to be from the same region. In SizeClassAllocator32, this is - // true when GroupSize and RegionSize are the same. Another tricky case, - // while range marking, the last block in a region needs the logic to mark - // the last page. However, in SizeClassAllocator32, the RegionSize - // recorded in PageReleaseContext may be different from - // `CurrentRegionAllocated` of the current region. This exception excludes - // the chance of doing range marking for the current region. - const bool CanDoRangeMark = - GroupSize == RegionSize && BG.GroupId != CurRegionGroupId; - - if (CanDoRangeMark && NumBlocks == MaxContainedBlocks) { + const uptr RegionIndex = (GroupBase - Base) / RegionSize; + + if (NumBlocks == MaxContainedBlocks) { for (const auto &It : BG.Batches) for (u16 I = 0; I < It.getCount(); ++I) - DCHECK_EQ(compactPtrGroup(It.get(I)), BG.GroupId); + DCHECK_EQ(compactPtrGroupBase(It.get(I)), BG.CompactPtrGroupBase); - const uptr From = batchGroupBase(BG.GroupId); - const uptr To = batchGroupBase(BG.GroupId) + AllocatedGroupSize; - Context.markRangeAsAllCounted(From, To, Base); + const uptr To = GroupBase + AllocatedGroupSize; + Context.markRangeAsAllCounted(GroupBase, To, GroupBase, RegionIndex, + AllocatedGroupSize); } else { - if (CanDoRangeMark) - DCHECK_LT(NumBlocks, MaxContainedBlocks); + DCHECK_LT(NumBlocks, MaxContainedBlocks); // Note that we don't always visit blocks in each BatchGroup so that we // may miss the chance of releasing certain pages that cross // BatchGroups. - Context.markFreeBlocks(BG.Batches, DecompactPtr, Base); + Context.markFreeBlocksInRegion(BG.Batches, DecompactPtr, GroupBase, + RegionIndex, AllocatedGroupSize, + /*MayContainLastBlockInRegion=*/true); } - } - - if (!Context.hasBlockMarked()) - return 0; - auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { - ScopedLock L(ByteMapMutex); - return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; - }; - releaseFreeMemoryToOS(Context, Recorder, SkipRegion); - - if (Recorder.getReleasedRangesCount() > 0) { - Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; - Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); - Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); - TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; + // We may not be able to do the page release In a rare case that we may + // fail on PageMap allocation. + if (UNLIKELY(!Context.hasBlockMarked())) + break; } - Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); - return TotalReleasedBytes; + return Context; } SizeClassInfo SizeClassInfoArray[NumClasses] = {}; diff --git a/standalone/primary64.h b/standalone/primary64.h index 4caf8eb1751..bed2ccb8b99 100644 --- a/standalone/primary64.h +++ b/standalone/primary64.h @@ -9,10 +9,13 @@ #ifndef SCUDO_PRIMARY64_H_ #define SCUDO_PRIMARY64_H_ +#include "allocator_common.h" #include "bytemap.h" #include "common.h" +#include "condition_variable.h" #include "list.h" #include "local_cache.h" +#include "mem_map.h" #include "memtag.h" #include "options.h" #include "release.h" @@ -44,96 +47,226 @@ namespace scudo { template <typename Config> class SizeClassAllocator64 { public: - typedef typename Config::PrimaryCompactPtrT CompactPtrT; - static const uptr CompactPtrScale = Config::PrimaryCompactPtrScale; - static const uptr GroupSizeLog = Config::PrimaryGroupSizeLog; + typedef typename Config::CompactPtrT CompactPtrT; typedef typename Config::SizeClassMap SizeClassMap; + typedef typename Config::ConditionVariableT ConditionVariableT; + static const uptr CompactPtrScale = Config::getCompactPtrScale(); + static const uptr RegionSizeLog = Config::getRegionSizeLog(); + static const uptr GroupSizeLog = Config::getGroupSizeLog(); + static_assert(RegionSizeLog >= GroupSizeLog, + "Group size shouldn't be greater than the region size"); + static const uptr GroupScale = GroupSizeLog - CompactPtrScale; typedef SizeClassAllocator64<Config> ThisT; typedef SizeClassAllocatorLocalCache<ThisT> CacheT; - typedef typename CacheT::TransferBatch TransferBatch; - typedef typename CacheT::BatchGroup BatchGroup; + typedef TransferBatch<ThisT> TransferBatchT; + typedef BatchGroup<ThisT> BatchGroupT; + + static_assert(sizeof(BatchGroupT) <= sizeof(TransferBatchT), + "BatchGroupT uses the same class size as TransferBatchT"); static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) - ? roundUp(sizeof(TransferBatch), 1U << CompactPtrScale) + ? roundUp(sizeof(TransferBatchT), 1U << CompactPtrScale) : SizeClassMap::getSizeByClassId(ClassId); } static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } + static bool conditionVariableEnabled() { + return Config::hasConditionVariableT(); + } + void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT))); - DCHECK_EQ(PrimaryBase, 0U); - // Reserve the space required for the Primary. - PrimaryBase = reinterpret_cast<uptr>(map( - nullptr, PrimarySize, "scudo:primary_reserve", MAP_NOACCESS, &Data)); + + const uptr PageSize = getPageSizeCached(); + const uptr GroupSize = (1UL << GroupSizeLog); + const uptr PagesInGroup = GroupSize / PageSize; + const uptr MinSizeClass = getSizeByClassId(1); + // When trying to release pages back to memory, visiting smaller size + // classes is expensive. Therefore, we only try to release smaller size + // classes when the amount of free blocks goes over a certain threshold (See + // the comment in releaseToOSMaybe() for more details). For example, for + // size class 32, we only do the release when the size of free blocks is + // greater than 97% of pages in a group. However, this may introduce another + // issue that if the number of free blocks is bouncing between 97% ~ 100%. + // Which means we may try many page releases but only release very few of + // them (less than 3% in a group). Even though we have + // `&ReleaseToOsIntervalMs` which slightly reduce the frequency of these + // calls but it will be better to have another guard to mitigate this issue. + // + // Here we add another constraint on the minimum size requirement. The + // constraint is determined by the size of in-use blocks in the minimal size + // class. Take size class 32 as an example, + // + // +- one memory group -+ + // +----------------------+------+ + // | 97% of free blocks | | + // +----------------------+------+ + // \ / + // 3% in-use blocks + // + // * The release size threshold is 97%. + // + // The 3% size in a group is about 7 pages. For two consecutive + // releaseToOSMaybe(), we require the difference between `PushedBlocks` + // should be greater than 7 pages. This mitigates the page releasing + // thrashing which is caused by memory usage bouncing around the threshold. + // The smallest size class takes longest time to do the page release so we + // use its size of in-use blocks as a heuristic. + SmallerBlockReleasePageDelta = + PagesInGroup * (1 + MinSizeClass / 16U) / 100; u32 Seed; - const u64 Time = getMonotonicTime(); + const u64 Time = getMonotonicTimeFast(); if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))) - Seed = static_cast<u32>(Time ^ (PrimaryBase >> 12)); - const uptr PageSize = getPageSizeCached(); - for (uptr I = 0; I < NumClasses; I++) { - RegionInfo *Region = getRegionInfo(I); - // The actual start of a region is offset by a random number of pages - // when PrimaryEnableRandomOffset is set. - Region->RegionBeg = getRegionBaseByClassId(I) + - (Config::PrimaryEnableRandomOffset - ? ((getRandomModN(&Seed, 16) + 1) * PageSize) - : 0); - Region->RandState = getRandomU32(&Seed); - Region->ReleaseInfo.LastReleaseAtNs = Time; + Seed = static_cast<u32>(Time ^ (reinterpret_cast<uptr>(&Seed) >> 12)); + + for (uptr I = 0; I < NumClasses; I++) + getRegionInfo(I)->RandState = getRandomU32(&Seed); + + if (Config::getEnableContiguousRegions()) { + ReservedMemoryT ReservedMemory = {}; + // Reserve the space required for the Primary. + CHECK(ReservedMemory.create(/*Addr=*/0U, RegionSize * NumClasses, + "scudo:primary_reserve")); + const uptr PrimaryBase = ReservedMemory.getBase(); + + for (uptr I = 0; I < NumClasses; I++) { + MemMapT RegionMemMap = ReservedMemory.dispatch( + PrimaryBase + (I << RegionSizeLog), RegionSize); + RegionInfo *Region = getRegionInfo(I); + + initRegion(Region, I, RegionMemMap, Config::getEnableRandomOffset()); + } + shuffle(RegionInfoArray, NumClasses, &Seed); } + + // The binding should be done after region shuffling so that it won't bind + // the FLLock from the wrong region. + for (uptr I = 0; I < NumClasses; I++) + getRegionInfo(I)->FLLockCV.bindTestOnly(getRegionInfo(I)->FLLock); + + // The default value in the primary config has the higher priority. + if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN) + ReleaseToOsInterval = Config::getDefaultReleaseToOsIntervalMs(); setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); } - void unmapTestOnly() NO_THREAD_SAFETY_ANALYSIS { + void unmapTestOnly() { for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); + { + ScopedLock ML(Region->MMLock); + MemMapT MemMap = Region->MemMapInfo.MemMap; + if (MemMap.isAllocated()) + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + } *Region = {}; } - if (PrimaryBase) - unmap(reinterpret_cast<void *>(PrimaryBase), PrimarySize, UNMAP_ALL, - &Data); - PrimaryBase = 0U; } - TransferBatch *popBatch(CacheT *C, uptr ClassId) { + // When all blocks are freed, it has to be the same size as `AllocatedUser`. + void verifyAllBlocksAreReleasedTestOnly() { + // `BatchGroup` and `TransferBatch` also use the blocks from BatchClass. + uptr BatchClassUsedInFreeLists = 0; + for (uptr I = 0; I < NumClasses; I++) { + // We have to count BatchClassUsedInFreeLists in other regions first. + if (I == SizeClassMap::BatchClassId) + continue; + RegionInfo *Region = getRegionInfo(I); + ScopedLock ML(Region->MMLock); + ScopedLock FL(Region->FLLock); + const uptr BlockSize = getSizeByClassId(I); + uptr TotalBlocks = 0; + for (BatchGroupT &BG : Region->FreeListInfo.BlockList) { + // `BG::Batches` are `TransferBatches`. +1 for `BatchGroup`. + BatchClassUsedInFreeLists += BG.Batches.size() + 1; + for (const auto &It : BG.Batches) + TotalBlocks += It.getCount(); + } + + DCHECK_EQ(TotalBlocks, Region->MemMapInfo.AllocatedUser / BlockSize); + DCHECK_EQ(Region->FreeListInfo.PushedBlocks, + Region->FreeListInfo.PoppedBlocks); + } + + RegionInfo *Region = getRegionInfo(SizeClassMap::BatchClassId); + ScopedLock ML(Region->MMLock); + ScopedLock FL(Region->FLLock); + const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId); + uptr TotalBlocks = 0; + for (BatchGroupT &BG : Region->FreeListInfo.BlockList) { + if (LIKELY(!BG.Batches.empty())) { + for (const auto &It : BG.Batches) + TotalBlocks += It.getCount(); + } else { + // `BatchGroup` with empty freelist doesn't have `TransferBatch` record + // itself. + ++TotalBlocks; + } + } + DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists, + Region->MemMapInfo.AllocatedUser / BlockSize); + DCHECK_GE(Region->FreeListInfo.PoppedBlocks, + Region->FreeListInfo.PushedBlocks); + const uptr BlocksInUse = + Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; + DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists); + } + + u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray, + const u16 MaxBlockCount) { DCHECK_LT(ClassId, NumClasses); RegionInfo *Region = getRegionInfo(ClassId); - bool PrintStats = false; + u16 PopCount = 0; + { - ScopedLock L(Region->Mutex); - TransferBatch *B = popBatchImpl(C, ClassId, Region); - if (LIKELY(B)) { - Region->Stats.PoppedBlocks += B->getCount(); - return B; - } + ScopedLock L(Region->FLLock); + PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); + if (PopCount != 0U) + return PopCount; + } - const bool RegionIsExhausted = Region->Exhausted; - if (UNLIKELY(RegionIsExhausted || - !populateFreeList(C, ClassId, Region))) { - PrintStats = !RegionIsExhausted && Region->Exhausted; - } else { - B = popBatchImpl(C, ClassId, Region); - // if `populateFreeList` succeeded, we are supposed to get free blocks. - DCHECK_NE(B, nullptr); - Region->Stats.PoppedBlocks += B->getCount(); - return B; + bool ReportRegionExhausted = false; + + if (conditionVariableEnabled()) { + PopCount = popBlocksWithCV(C, ClassId, Region, ToArray, MaxBlockCount, + ReportRegionExhausted); + } else { + while (true) { + // When two threads compete for `Region->MMLock`, we only want one of + // them to call populateFreeListAndPopBatch(). To avoid both of them + // doing that, always check the freelist before mapping new pages. + ScopedLock ML(Region->MMLock); + { + ScopedLock FL(Region->FLLock); + PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); + if (PopCount != 0U) + return PopCount; + } + + const bool RegionIsExhausted = Region->Exhausted; + if (!RegionIsExhausted) { + PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray, + MaxBlockCount); + } + ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; + break; } } - // Note that `getStats()` requires locking each region so we can't call it - // while locking the Region->Mutex in the above. - if (UNLIKELY(PrintStats)) { - ScopedString Str; - getStats(&Str); - Str.append( - "Scudo OOM: The process has exhausted %zuM for size class %zu.\n", - RegionSize >> 20, getSizeByClassId(ClassId)); - Str.output(); + if (UNLIKELY(ReportRegionExhausted)) { + Printf("Can't populate more pages for size class %zu.\n", + getSizeByClassId(ClassId)); + + // Theoretically, BatchClass shouldn't be used up. Abort immediately when + // it happens. + if (ClassId == SizeClassMap::BatchClassId) + reportOutOfBatchClass(); } - return nullptr; + + return PopCount; } // Push the array of free blocks to the designated batch group. @@ -143,68 +276,39 @@ public: RegionInfo *Region = getRegionInfo(ClassId); if (ClassId == SizeClassMap::BatchClassId) { - bool PrintStats = false; - { - ScopedLock L(Region->Mutex); - // Constructing a batch group in the free list will use two blocks in - // BatchClassId. If we are pushing BatchClassId blocks, we will use the - // blocks in the array directly (can't delegate local cache which will - // cause a recursive allocation). However, The number of free blocks may - // be less than two. Therefore, populate the free list before inserting - // the blocks. - if (Size >= 2U) { - pushBlocksImpl(C, SizeClassMap::BatchClassId, Region, Array, Size); - Region->Stats.PushedBlocks += Size; - } else { - const bool RegionIsExhausted = Region->Exhausted; - if (UNLIKELY( - RegionIsExhausted || - !populateFreeList(C, SizeClassMap::BatchClassId, Region))) { - PrintStats = !RegionIsExhausted && Region->Exhausted; - } - } - } - - // Note that `getStats()` requires the lock of each region so we can't - // call it while locking the Region->Mutex in the above. - if (UNLIKELY(PrintStats)) { - ScopedString Str; - getStats(&Str); - Str.append( - "Scudo OOM: The process has exhausted %zuM for size class %zu.\n", - RegionSize >> 20, getSizeByClassId(ClassId)); - Str.output(); - // Theoretically, BatchClass shouldn't be used up. Abort immediately - // when it happens. - reportOutOfBatchClass(); - } + ScopedLock L(Region->FLLock); + pushBatchClassBlocks(Region, Array, Size); + if (conditionVariableEnabled()) + Region->FLLockCV.notifyAll(Region->FLLock); return; } // TODO(chiahungduan): Consider not doing grouping if the group size is not // greater than the block size with a certain scale. - // Sort the blocks so that blocks belonging to the same group can be pushed - // together. bool SameGroup = true; - for (u32 I = 1; I < Size; ++I) { - if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) - SameGroup = false; - CompactPtrT Cur = Array[I]; - u32 J = I; - while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) { - Array[J] = Array[J - 1]; - --J; + if (GroupSizeLog < RegionSizeLog) { + // Sort the blocks so that blocks belonging to the same group can be + // pushed together. + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) + SameGroup = false; + CompactPtrT Cur = Array[I]; + u32 J = I; + while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) { + Array[J] = Array[J - 1]; + --J; + } + Array[J] = Cur; } - Array[J] = Cur; } - ScopedLock L(Region->Mutex); - pushBlocksImpl(C, ClassId, Region, Array, Size, SameGroup); - - Region->Stats.PushedBlocks += Size; - if (ClassId != SizeClassMap::BatchClassId) - releaseToOSMaybe(Region, ClassId); + { + ScopedLock L(Region->FLLock); + pushBlocksImpl(C, ClassId, Region, Array, Size, SameGroup); + if (conditionVariableEnabled()) + Region->FLLockCV.notifyAll(Region->FLLock); + } } void disable() NO_THREAD_SAFETY_ANALYSIS { @@ -212,17 +316,21 @@ public: for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--) { if (static_cast<uptr>(I) == SizeClassMap::BatchClassId) continue; - getRegionInfo(static_cast<uptr>(I))->Mutex.lock(); + getRegionInfo(static_cast<uptr>(I))->MMLock.lock(); + getRegionInfo(static_cast<uptr>(I))->FLLock.lock(); } - getRegionInfo(SizeClassMap::BatchClassId)->Mutex.lock(); + getRegionInfo(SizeClassMap::BatchClassId)->MMLock.lock(); + getRegionInfo(SizeClassMap::BatchClassId)->FLLock.lock(); } void enable() NO_THREAD_SAFETY_ANALYSIS { - getRegionInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); + getRegionInfo(SizeClassMap::BatchClassId)->FLLock.unlock(); + getRegionInfo(SizeClassMap::BatchClassId)->MMLock.unlock(); for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; - getRegionInfo(I)->Mutex.unlock(); + getRegionInfo(I)->FLLock.unlock(); + getRegionInfo(I)->MMLock.unlock(); } } @@ -234,10 +342,11 @@ public: // TODO: The call of `iterateOverBlocks` requires disabling // SizeClassAllocator64. We may consider locking each region on demand // only. - Region->Mutex.assertHeld(); + Region->FLLock.assertHeld(); + Region->MMLock.assertHeld(); const uptr BlockSize = getSizeByClassId(I); const uptr From = Region->RegionBeg; - const uptr To = From + Region->AllocatedUser; + const uptr To = From + Region->MemMapInfo.AllocatedUser; for (uptr Block = From; Block < To; Block += BlockSize) Callback(Block); } @@ -250,29 +359,47 @@ public: uptr PushedBlocks = 0; for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); - ScopedLock L(Region->Mutex); - if (Region->MappedUser) - TotalMapped += Region->MappedUser; - PoppedBlocks += Region->Stats.PoppedBlocks; - PushedBlocks += Region->Stats.PushedBlocks; + { + ScopedLock L(Region->MMLock); + TotalMapped += Region->MemMapInfo.MappedUser; + } + { + ScopedLock L(Region->FLLock); + PoppedBlocks += Region->FreeListInfo.PoppedBlocks; + PushedBlocks += Region->FreeListInfo.PushedBlocks; + } } + const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); Str->append("Stats: SizeClassAllocator64: %zuM mapped (%uM rss) in %zu " - "allocations; remains %zu\n", + "allocations; remains %zu; ReleaseToOsIntervalMs = %d\n", TotalMapped >> 20, 0U, PoppedBlocks, - PoppedBlocks - PushedBlocks); + PoppedBlocks - PushedBlocks, IntervalMs >= 0 ? IntervalMs : -1); for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); - ScopedLock L(Region->Mutex); - getStats(Str, I, Region, 0); + ScopedLock L1(Region->MMLock); + ScopedLock L2(Region->FLLock); + getStats(Str, I, Region); + } + } + + void getFragmentationInfo(ScopedString *Str) { + Str->append( + "Fragmentation Stats: SizeClassAllocator64: page size = %zu bytes\n", + getPageSizeCached()); + + for (uptr I = 1; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + ScopedLock L(Region->MMLock); + getRegionFragmentationInfo(Region, I, Str); } } bool setOption(Option O, sptr Value) { if (O == Option::ReleaseInterval) { const s32 Interval = Max( - Min(static_cast<s32>(Value), Config::PrimaryMaxReleaseToOsIntervalMs), - Config::PrimaryMinReleaseToOsIntervalMs); + Min(static_cast<s32>(Value), Config::getMaxReleaseToOsIntervalMs()), + Config::getMinReleaseToOsIntervalMs()); atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); return true; } @@ -280,14 +407,27 @@ public: return true; } - uptr releaseToOS() { + uptr tryReleaseToOS(uptr ClassId, ReleaseToOS ReleaseType) { + RegionInfo *Region = getRegionInfo(ClassId); + // Note that the tryLock() may fail spuriously, given that it should rarely + // happen and page releasing is fine to skip, we don't take certain + // approaches to ensure one page release is done. + if (Region->MMLock.tryLock()) { + uptr BytesReleased = releaseToOSMaybe(Region, ClassId, ReleaseType); + Region->MMLock.unlock(); + return BytesReleased; + } + return 0; + } + + uptr releaseToOS(ReleaseToOS ReleaseType) { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; RegionInfo *Region = getRegionInfo(I); - ScopedLock L(Region->Mutex); - TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true); + ScopedLock L(Region->MMLock); + TotalReleasedBytes += releaseToOSMaybe(Region, I, ReleaseType); } return TotalReleasedBytes; } @@ -299,9 +439,6 @@ public: static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); } uptr getCompactPtrBaseByClassId(uptr ClassId) { - // If we are not compacting pointers, base everything off of 0. - if (sizeof(CompactPtrT) == sizeof(uptr) && CompactPtrScale == 0) - return 0; return getRegionInfo(ClassId)->RegionBeg; } @@ -327,13 +464,13 @@ public: if (I == SizeClassMap::BatchClassId) continue; uptr Begin = RegionInfoArray[I].RegionBeg; - // TODO(chiahungduan): In fact, We need to lock the RegionInfo::Mutex. + // TODO(chiahungduan): In fact, We need to lock the RegionInfo::MMLock. // However, the RegionInfoData is passed with const qualifier and lock the // mutex requires modifying RegionInfoData, which means we need to remove // the const qualifier. This may lead to another undefined behavior (The // first one is accessing `AllocatedUser` without locking. It's better to // pass `RegionInfoData` as `void *` then we can lock the mutex properly. - uptr End = Begin + RegionInfoArray[I].AllocatedUser; + uptr End = Begin + RegionInfoArray[I].MemMapInfo.AllocatedUser; if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I)) continue; uptr RegionDistance; @@ -355,7 +492,8 @@ public: BlockInfo B = {}; if (MinDistance <= 8192) { B.RegionBegin = RegionInfoArray[ClassId].RegionBeg; - B.RegionEnd = B.RegionBegin + RegionInfoArray[ClassId].AllocatedUser; + B.RegionEnd = + B.RegionBegin + RegionInfoArray[ClassId].MemMapInfo.AllocatedUser; B.BlockSize = SizeClassMap::getSizeByClassId(ClassId); B.BlockBegin = B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / sptr(B.BlockSize) * @@ -371,40 +509,50 @@ public: AtomicOptions Options; private: - static const uptr RegionSize = 1UL << Config::PrimaryRegionSizeLog; + static const uptr RegionSize = 1UL << RegionSizeLog; static const uptr NumClasses = SizeClassMap::NumClasses; - static const uptr PrimarySize = RegionSize * NumClasses; - static const uptr MapSizeIncrement = Config::PrimaryMapSizeIncrement; + static const uptr MapSizeIncrement = Config::getMapSizeIncrement(); // Fill at most this number of batches from the newly map'd memory. static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U; - struct RegionStats { - uptr PoppedBlocks; - uptr PushedBlocks; - }; - struct ReleaseToOsInfo { - uptr PushedBlocksAtLastRelease; + uptr BytesInFreeListAtLastCheckpoint; uptr RangesReleased; uptr LastReleasedBytes; u64 LastReleaseAtNs; }; - struct UnpaddedRegionInfo { - HybridMutex Mutex; - SinglyLinkedList<BatchGroup> FreeList GUARDED_BY(Mutex); - // This is initialized before thread creation. - uptr RegionBeg = 0; - RegionStats Stats GUARDED_BY(Mutex) = {}; - u32 RandState GUARDED_BY(Mutex) = 0; + struct BlocksInfo { + SinglyLinkedList<BatchGroupT> BlockList = {}; + uptr PoppedBlocks = 0; + uptr PushedBlocks = 0; + }; + + struct PagesInfo { + MemMapT MemMap = {}; // Bytes mapped for user memory. - uptr MappedUser GUARDED_BY(Mutex) = 0; + uptr MappedUser = 0; // Bytes allocated for user memory. - uptr AllocatedUser GUARDED_BY(Mutex) = 0; - MapPlatformData Data GUARDED_BY(Mutex) = {}; - ReleaseToOsInfo ReleaseInfo GUARDED_BY(Mutex) = {}; - bool Exhausted GUARDED_BY(Mutex) = false; + uptr AllocatedUser = 0; + }; + + struct UnpaddedRegionInfo { + // Mutex for operations on freelist + HybridMutex FLLock; + ConditionVariableT FLLockCV GUARDED_BY(FLLock); + // Mutex for memmap operations + HybridMutex MMLock ACQUIRED_BEFORE(FLLock); + // `RegionBeg` is initialized before thread creation and won't be changed. + uptr RegionBeg = 0; + u32 RandState GUARDED_BY(MMLock) = 0; + BlocksInfo FreeListInfo GUARDED_BY(FLLock); + PagesInfo MemMapInfo GUARDED_BY(MMLock); + // The minimum size of pushed blocks to trigger page release. + uptr TryReleaseThreshold GUARDED_BY(MMLock) = 0; + ReleaseToOsInfo ReleaseInfo GUARDED_BY(MMLock) = {}; + bool Exhausted GUARDED_BY(MMLock) = false; + bool isPopulatingFreeList GUARDED_BY(FLLock) = false; }; struct RegionInfo : UnpaddedRegionInfo { char Padding[SCUDO_CACHE_LINE_SIZE - @@ -412,18 +560,20 @@ private: }; static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); - uptr PrimaryBase = 0; - MapPlatformData Data = {}; - atomic_s32 ReleaseToOsIntervalMs = {}; - alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; - RegionInfo *getRegionInfo(uptr ClassId) { DCHECK_LT(ClassId, NumClasses); return &RegionInfoArray[ClassId]; } - uptr getRegionBaseByClassId(uptr ClassId) const { - return PrimaryBase + (ClassId << Config::PrimaryRegionSizeLog); + uptr getRegionBaseByClassId(uptr ClassId) { + RegionInfo *Region = getRegionInfo(ClassId); + Region->MMLock.assertHeld(); + + if (!Config::getEnableContiguousRegions() && + !Region->MemMapInfo.MemMap.isAllocated()) { + return 0U; + } + return Region->MemMapInfo.MemMap.getBase(); } static CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) { @@ -435,21 +585,168 @@ private: } static uptr compactPtrGroup(CompactPtrT CompactPtr) { - return static_cast<uptr>(CompactPtr) >> (GroupSizeLog - CompactPtrScale); + const uptr Mask = (static_cast<uptr>(1) << GroupScale) - 1; + return static_cast<uptr>(CompactPtr) & ~Mask; + } + static uptr decompactGroupBase(uptr Base, uptr CompactPtrGroupBase) { + DCHECK_EQ(CompactPtrGroupBase % (static_cast<uptr>(1) << (GroupScale)), 0U); + return Base + (CompactPtrGroupBase << CompactPtrScale); + } + + ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) { + const uptr PageSize = getPageSizeCached(); + return BlockSize < PageSize / 16U; } - static uptr batchGroupBase(uptr Base, uptr GroupId) { - return (GroupId << GroupSizeLog) + Base; + + ALWAYS_INLINE static bool isLargeBlock(uptr BlockSize) { + const uptr PageSize = getPageSizeCached(); + return BlockSize > PageSize; + } + + ALWAYS_INLINE void initRegion(RegionInfo *Region, uptr ClassId, + MemMapT MemMap, bool EnableRandomOffset) + REQUIRES(Region->MMLock) { + DCHECK(!Region->MemMapInfo.MemMap.isAllocated()); + DCHECK(MemMap.isAllocated()); + + const uptr PageSize = getPageSizeCached(); + + Region->MemMapInfo.MemMap = MemMap; + + Region->RegionBeg = MemMap.getBase(); + if (EnableRandomOffset) { + Region->RegionBeg += + (getRandomModN(&Region->RandState, 16) + 1) * PageSize; + } + + // Releasing small blocks is expensive, set a higher threshold to avoid + // frequent page releases. + if (isSmallBlock(getSizeByClassId(ClassId))) + Region->TryReleaseThreshold = PageSize * SmallerBlockReleasePageDelta; + else + Region->TryReleaseThreshold = PageSize; + } + + void pushBatchClassBlocks(RegionInfo *Region, CompactPtrT *Array, u32 Size) + REQUIRES(Region->FLLock) { + DCHECK_EQ(Region, getRegionInfo(SizeClassMap::BatchClassId)); + + // Free blocks are recorded by TransferBatch in freelist for all + // size-classes. In addition, TransferBatch is allocated from BatchClassId. + // In order not to use additional block to record the free blocks in + // BatchClassId, they are self-contained. I.e., A TransferBatch records the + // block address of itself. See the figure below: + // + // TransferBatch at 0xABCD + // +----------------------------+ + // | Free blocks' addr | + // | +------+------+------+ | + // | |0xABCD|... |... | | + // | +------+------+------+ | + // +----------------------------+ + // + // When we allocate all the free blocks in the TransferBatch, the block used + // by TransferBatch is also free for use. We don't need to recycle the + // TransferBatch. Note that the correctness is maintained by the invariant, + // + // Each popBlocks() request returns the entire TransferBatch. Returning + // part of the blocks in a TransferBatch is invalid. + // + // This ensures that TransferBatch won't leak the address itself while it's + // still holding other valid data. + // + // Besides, BatchGroup is also allocated from BatchClassId and has its + // address recorded in the TransferBatch too. To maintain the correctness, + // + // The address of BatchGroup is always recorded in the last TransferBatch + // in the freelist (also imply that the freelist should only be + // updated with push_front). Once the last TransferBatch is popped, + // the block used by BatchGroup is also free for use. + // + // With this approach, the blocks used by BatchGroup and TransferBatch are + // reusable and don't need additional space for them. + + Region->FreeListInfo.PushedBlocks += Size; + BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); + + if (BG == nullptr) { + // Construct `BatchGroup` on the last element. + BG = reinterpret_cast<BatchGroupT *>( + decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); + --Size; + BG->Batches.clear(); + // BatchClass hasn't enabled memory group. Use `0` to indicate there's no + // memory group here. + BG->CompactPtrGroupBase = 0; + // `BG` is also the block of BatchClassId. Note that this is different + // from `CreateGroup` in `pushBlocksImpl` + BG->PushedBlocks = 1; + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = + CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId)); + + Region->FreeListInfo.BlockList.push_front(BG); + } + + if (UNLIKELY(Size == 0)) + return; + + // This happens under 2 cases. + // 1. just allocated a new `BatchGroup`. + // 2. Only 1 block is pushed when the freelist is empty. + if (BG->Batches.empty()) { + // Construct the `TransferBatch` on the last element. + TransferBatchT *TB = reinterpret_cast<TransferBatchT *>( + decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); + TB->clear(); + // As mentioned above, addresses of `TransferBatch` and `BatchGroup` are + // recorded in the TransferBatch. + TB->add(Array[Size - 1]); + TB->add( + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(BG))); + --Size; + DCHECK_EQ(BG->PushedBlocks, 1U); + // `TB` is also the block of BatchClassId. + BG->PushedBlocks += 1; + BG->Batches.push_front(TB); + } + + TransferBatchT *CurBatch = BG->Batches.front(); + DCHECK_NE(CurBatch, nullptr); + + for (u32 I = 0; I < Size;) { + u16 UnusedSlots = + static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount()); + if (UnusedSlots == 0) { + CurBatch = reinterpret_cast<TransferBatchT *>( + decompactPtr(SizeClassMap::BatchClassId, Array[I])); + CurBatch->clear(); + // Self-contained + CurBatch->add(Array[I]); + ++I; + // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of + // BatchClassId. + BG->Batches.push_front(CurBatch); + UnusedSlots = static_cast<u16>(BG->MaxCachedPerBatch - 1); + } + // `UnusedSlots` is u16 so the result will be also fit in u16. + const u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I)); + CurBatch->appendFromArray(&Array[I], AppendSize); + I += AppendSize; + } + + BG->PushedBlocks += Size; } // Push the blocks to their batch group. The layout will be like, // - // FreeList - > BG -> BG -> BG - // | | | - // v v v - // TB TB TB - // | - // v - // TB + // FreeListInfo.BlockList - > BG -> BG -> BG + // | | | + // v v v + // TB TB TB + // | + // v + // TB // // Each BlockGroup(BG) will associate with unique group id and the free blocks // are managed by a list of TransferBatch(TB). To reduce the time of inserting @@ -457,86 +754,32 @@ private: // that we can get better performance of maintaining sorted property. // Use `SameGroup=true` to indicate that all blocks in the array are from the // same group then we will skip checking the group id of each block. - // - // The region mutex needs to be held while calling this method. void pushBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region, CompactPtrT *Array, u32 Size, bool SameGroup = false) - REQUIRES(Region->Mutex) { + REQUIRES(Region->FLLock) { + DCHECK_NE(ClassId, SizeClassMap::BatchClassId); DCHECK_GT(Size, 0U); - auto CreateGroup = [&](uptr GroupId) { - BatchGroup *BG = nullptr; - TransferBatch *TB = nullptr; - if (ClassId == SizeClassMap::BatchClassId) { - DCHECK_GE(Size, 2U); - - // Free blocks are recorded by TransferBatch in freelist, blocks of - // BatchClassId are included. In order not to use additional memory to - // record blocks of BatchClassId, they are self-contained. I.e., A - // TransferBatch may record the block address of itself. See the figure - // below: - // - // TransferBatch at 0xABCD - // +----------------------------+ - // | Free blocks' addr | - // | +------+------+------+ | - // | |0xABCD|... |... | | - // | +------+------+------+ | - // +----------------------------+ - // - // The safeness of manipulating TransferBatch is kept by the invariant, - // - // The unit of each pop-block request is a TransferBatch. Return - // part of the blocks in a TransferBatch is not allowed. - // - // This ensures that TransferBatch won't leak the address itself while - // it's still holding other valid data. - // - // Besides, BatchGroup uses the same size-class as TransferBatch does - // and its address is recorded in the TransferBatch too. To maintain the - // safeness, the invariant to keep is, - // - // The address of itself is always recorded in the last TransferBatch - // of the freelist (also imply that the freelist should only be - // updated with push_front). Once the last TransferBatch is popped, - // the BatchGroup becomes invalid. - // - // As a result, the blocks used by BatchGroup and TransferBatch are - // reusable and don't need additional space for them. - BG = reinterpret_cast<BatchGroup *>( - decompactPtr(ClassId, Array[Size - 1])); - BG->Batches.clear(); - - TB = reinterpret_cast<TransferBatch *>( - decompactPtr(ClassId, Array[Size - 2])); - TB->clear(); - - // Append the blocks used by BatchGroup and TransferBatch immediately so - // that we ensure that they are in the last TransBatch. - TB->appendFromArray(Array + Size - 2, 2); - Size -= 2; - } else { - BG = C->createGroup(); - BG->Batches.clear(); - - TB = C->createBatch(ClassId, nullptr); - TB->clear(); - } + auto CreateGroup = [&](uptr CompactPtrGroupBase) { + BatchGroupT *BG = + reinterpret_cast<BatchGroupT *>(C->getBatchClassBlock()); + BG->Batches.clear(); + TransferBatchT *TB = + reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock()); + TB->clear(); - BG->GroupId = GroupId; - // TODO(chiahungduan): Avoid the use of push_back() in `Batches`. + BG->CompactPtrGroupBase = CompactPtrGroupBase; BG->Batches.push_front(TB); BG->PushedBlocks = 0; - BG->PushedBlocksAtLastCheckpoint = 0; - BG->MaxCachedPerBatch = - TransferBatch::getMaxCached(getSizeByClassId(ClassId)); + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached; return BG; }; - auto InsertBlocks = [&](BatchGroup *BG, CompactPtrT *Array, u32 Size) { - SinglyLinkedList<TransferBatch> &Batches = BG->Batches; - TransferBatch *CurBatch = Batches.front(); + auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) { + SinglyLinkedList<TransferBatchT> &Batches = BG->Batches; + TransferBatchT *CurBatch = Batches.front(); DCHECK_NE(CurBatch, nullptr); for (u32 I = 0; I < Size;) { @@ -544,9 +787,8 @@ private: u16 UnusedSlots = static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount()); if (UnusedSlots == 0) { - CurBatch = C->createBatch( - ClassId, - reinterpret_cast<void *>(decompactPtr(ClassId, Array[I]))); + CurBatch = + reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock()); CurBatch->clear(); Batches.push_front(CurBatch); UnusedSlots = BG->MaxCachedPerBatch; @@ -560,40 +802,33 @@ private: BG->PushedBlocks += Size; }; - BatchGroup *Cur = Region->FreeList.front(); - - if (ClassId == SizeClassMap::BatchClassId) { - if (Cur == nullptr) { - // Don't need to classify BatchClassId. - Cur = CreateGroup(/*GroupId=*/0); - Region->FreeList.push_front(Cur); - } - InsertBlocks(Cur, Array, Size); - return; - } + Region->FreeListInfo.PushedBlocks += Size; + BatchGroupT *Cur = Region->FreeListInfo.BlockList.front(); // In the following, `Cur` always points to the BatchGroup for blocks that // will be pushed next. `Prev` is the element right before `Cur`. - BatchGroup *Prev = nullptr; + BatchGroupT *Prev = nullptr; - while (Cur != nullptr && compactPtrGroup(Array[0]) > Cur->GroupId) { + while (Cur != nullptr && + compactPtrGroup(Array[0]) > Cur->CompactPtrGroupBase) { Prev = Cur; Cur = Cur->Next; } - if (Cur == nullptr || compactPtrGroup(Array[0]) != Cur->GroupId) { + if (Cur == nullptr || + compactPtrGroup(Array[0]) != Cur->CompactPtrGroupBase) { Cur = CreateGroup(compactPtrGroup(Array[0])); if (Prev == nullptr) - Region->FreeList.push_front(Cur); + Region->FreeListInfo.BlockList.push_front(Cur); else - Region->FreeList.insert(Prev, Cur); + Region->FreeListInfo.BlockList.insert(Prev, Cur); } // All the blocks are from the same group, just push without checking group // id. if (SameGroup) { for (u32 I = 0; I < Size; ++I) - DCHECK_EQ(compactPtrGroup(Array[I]), Cur->GroupId); + DCHECK_EQ(compactPtrGroup(Array[I]), Cur->CompactPtrGroupBase); InsertBlocks(Cur, Array, Size); return; @@ -604,18 +839,20 @@ private: u32 Count = 1; for (u32 I = 1; I < Size; ++I) { if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) { - DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->GroupId); + DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->CompactPtrGroupBase); InsertBlocks(Cur, Array + I - Count, Count); - while (Cur != nullptr && compactPtrGroup(Array[I]) > Cur->GroupId) { + while (Cur != nullptr && + compactPtrGroup(Array[I]) > Cur->CompactPtrGroupBase) { Prev = Cur; Cur = Cur->Next; } - if (Cur == nullptr || compactPtrGroup(Array[I]) != Cur->GroupId) { + if (Cur == nullptr || + compactPtrGroup(Array[I]) != Cur->CompactPtrGroupBase) { Cur = CreateGroup(compactPtrGroup(Array[I])); DCHECK_NE(Prev, nullptr); - Region->FreeList.insert(Prev, Cur); + Region->FreeListInfo.BlockList.insert(Prev, Cur); } Count = 1; @@ -627,48 +864,180 @@ private: InsertBlocks(Cur, Array + Size - Count, Count); } - // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest - // group id will be considered first. - // - // The region mutex needs to be held while calling this method. - TransferBatch *popBatchImpl(CacheT *C, uptr ClassId, RegionInfo *Region) - REQUIRES(Region->Mutex) { - if (Region->FreeList.empty()) - return nullptr; - - SinglyLinkedList<TransferBatch> &Batches = - Region->FreeList.front()->Batches; - DCHECK(!Batches.empty()); - - TransferBatch *B = Batches.front(); - Batches.pop_front(); + u16 popBlocksWithCV(CacheT *C, uptr ClassId, RegionInfo *Region, + CompactPtrT *ToArray, const u16 MaxBlockCount, + bool &ReportRegionExhausted) { + u16 PopCount = 0; + + while (true) { + // We only expect one thread doing the freelist refillment and other + // threads will be waiting for either the completion of the + // `populateFreeListAndPopBatch()` or `pushBlocks()` called by other + // threads. + bool PopulateFreeList = false; + { + ScopedLock FL(Region->FLLock); + if (!Region->isPopulatingFreeList) { + Region->isPopulatingFreeList = true; + PopulateFreeList = true; + } + } + + if (PopulateFreeList) { + ScopedLock ML(Region->MMLock); + + const bool RegionIsExhausted = Region->Exhausted; + if (!RegionIsExhausted) { + PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray, + MaxBlockCount); + } + ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; + + { + // Before reacquiring the `FLLock`, the freelist may be used up again + // and some threads are waiting for the freelist refillment by the + // current thread. It's important to set + // `Region->isPopulatingFreeList` to false so the threads about to + // sleep will notice the status change. + ScopedLock FL(Region->FLLock); + Region->isPopulatingFreeList = false; + Region->FLLockCV.notifyAll(Region->FLLock); + } + + break; + } + + // At here, there are two preconditions to be met before waiting, + // 1. The freelist is empty. + // 2. Region->isPopulatingFreeList == true, i.e, someone is still doing + // `populateFreeListAndPopBatch()`. + // + // Note that it has the chance that freelist is empty but + // Region->isPopulatingFreeList == false because all the new populated + // blocks were used up right after the refillment. Therefore, we have to + // check if someone is still populating the freelist. + ScopedLock FL(Region->FLLock); + PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); + if (PopCount != 0U) + break; + + if (!Region->isPopulatingFreeList) + continue; + + // Now the freelist is empty and someone's doing the refillment. We will + // wait until anyone refills the freelist or someone finishes doing + // `populateFreeListAndPopBatch()`. The refillment can be done by + // `populateFreeListAndPopBatch()`, `pushBlocks()`, + // `pushBatchClassBlocks()` and `mergeGroupsToReleaseBack()`. + Region->FLLockCV.wait(Region->FLLock); + + PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); + if (PopCount != 0U) + break; + } + + return PopCount; + } + + u16 popBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region, + CompactPtrT *ToArray, const u16 MaxBlockCount) + REQUIRES(Region->FLLock) { + if (Region->FreeListInfo.BlockList.empty()) + return 0U; + + SinglyLinkedList<TransferBatchT> &Batches = + Region->FreeListInfo.BlockList.front()->Batches; + + if (Batches.empty()) { + DCHECK_EQ(ClassId, SizeClassMap::BatchClassId); + BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); + Region->FreeListInfo.BlockList.pop_front(); + + // Block used by `BatchGroup` is from BatchClassId. Turn the block into + // `TransferBatch` with single block. + TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG); + ToArray[0] = + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB)); + Region->FreeListInfo.PoppedBlocks += 1; + return 1U; + } + + // So far, instead of always filling blocks to `MaxBlockCount`, we only + // examine single `TransferBatch` to minimize the time spent in the primary + // allocator. Besides, the sizes of `TransferBatch` and + // `CacheT::getMaxCached()` may also impact the time spent on accessing the + // primary allocator. + // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount` + // blocks and/or adjust the size of `TransferBatch` according to + // `CacheT::getMaxCached()`. + TransferBatchT *B = Batches.front(); DCHECK_NE(B, nullptr); DCHECK_GT(B->getCount(), 0U); - if (Batches.empty()) { - BatchGroup *BG = Region->FreeList.front(); - Region->FreeList.pop_front(); - - // We don't keep BatchGroup with zero blocks to avoid empty-checking while - // allocating. Note that block used by constructing BatchGroup is recorded - // as free blocks in the last element of BatchGroup::Batches. Which means, - // once we pop the last TransferBatch, the block is implicitly - // deallocated. + // BachClassId should always take all blocks in the TransferBatch. Read the + // comment in `pushBatchClassBlocks()` for more details. + const u16 PopCount = ClassId == SizeClassMap::BatchClassId + ? B->getCount() + : Min(MaxBlockCount, B->getCount()); + B->moveNToArray(ToArray, PopCount); + + // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be + // done without holding `FLLock`. + if (B->empty()) { + Batches.pop_front(); + // `TransferBatch` of BatchClassId is self-contained, no need to + // deallocate. Read the comment in `pushBatchClassBlocks()` for more + // details. if (ClassId != SizeClassMap::BatchClassId) - C->deallocate(SizeClassMap::BatchClassId, BG); + C->deallocate(SizeClassMap::BatchClassId, B); + + if (Batches.empty()) { + BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); + Region->FreeListInfo.BlockList.pop_front(); + + // We don't keep BatchGroup with zero blocks to avoid empty-checking + // while allocating. Note that block used for constructing BatchGroup is + // recorded as free blocks in the last element of BatchGroup::Batches. + // Which means, once we pop the last TransferBatch, the block is + // implicitly deallocated. + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, BG); + } } - return B; + Region->FreeListInfo.PoppedBlocks += PopCount; + + return PopCount; } - NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, RegionInfo *Region) - REQUIRES(Region->Mutex) { - const uptr Size = getSizeByClassId(ClassId); - const u16 MaxCount = TransferBatch::getMaxCached(Size); + NOINLINE u16 populateFreeListAndPopBlocks(CacheT *C, uptr ClassId, + RegionInfo *Region, + CompactPtrT *ToArray, + const u16 MaxBlockCount) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { + if (!Config::getEnableContiguousRegions() && + !Region->MemMapInfo.MemMap.isAllocated()) { + ReservedMemoryT ReservedMemory; + if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, RegionSize, + "scudo:primary_reserve", + MAP_ALLOWNOMEM))) { + Printf("Can't reserve pages for size class %zu.\n", + getSizeByClassId(ClassId)); + return 0U; + } + initRegion(Region, ClassId, + ReservedMemory.dispatch(ReservedMemory.getBase(), + ReservedMemory.getCapacity()), + /*EnableRandomOffset=*/false); + } + DCHECK(Region->MemMapInfo.MemMap.isAllocated()); + const uptr Size = getSizeByClassId(ClassId); + const u16 MaxCount = CacheT::getMaxCached(Size); const uptr RegionBeg = Region->RegionBeg; - const uptr MappedUser = Region->MappedUser; - const uptr TotalUserBytes = Region->AllocatedUser + MaxCount * Size; + const uptr MappedUser = Region->MemMapInfo.MappedUser; + const uptr TotalUserBytes = + Region->MemMapInfo.AllocatedUser + MaxCount * Size; // Map more space for blocks, if necessary. if (TotalUserBytes > MappedUser) { // Do the mmap for the user memory. @@ -677,37 +1046,39 @@ private: const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId); if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) { Region->Exhausted = true; - return false; + return 0U; } - if (MappedUser == 0) - Region->Data = Data; - if (UNLIKELY(!map( - reinterpret_cast<void *>(RegionBeg + MappedUser), MapSize, - "scudo:primary", + + if (UNLIKELY(!Region->MemMapInfo.MemMap.remap( + RegionBeg + MappedUser, MapSize, "scudo:primary", MAP_ALLOWNOMEM | MAP_RESIZABLE | - (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG : 0), - &Region->Data))) { - return false; + (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG + : 0)))) { + return 0U; } - Region->MappedUser += MapSize; + Region->MemMapInfo.MappedUser += MapSize; C->getStats().add(StatMapped, MapSize); } - const u32 NumberOfBlocks = Min( - MaxNumBatches * MaxCount, - static_cast<u32>((Region->MappedUser - Region->AllocatedUser) / Size)); + const u32 NumberOfBlocks = + Min(MaxNumBatches * MaxCount, + static_cast<u32>((Region->MemMapInfo.MappedUser - + Region->MemMapInfo.AllocatedUser) / + Size)); DCHECK_GT(NumberOfBlocks, 0); constexpr u32 ShuffleArraySize = - MaxNumBatches * TransferBatch::MaxNumCached; + MaxNumBatches * TransferBatchT::MaxNumCached; CompactPtrT ShuffleArray[ShuffleArraySize]; DCHECK_LE(NumberOfBlocks, ShuffleArraySize); const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId); - uptr P = RegionBeg + Region->AllocatedUser; + uptr P = RegionBeg + Region->MemMapInfo.AllocatedUser; for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P); + ScopedLock L(Region->FLLock); + if (ClassId != SizeClassMap::BatchClassId) { u32 N = 1; uptr CurGroup = compactPtrGroup(ShuffleArray[0]); @@ -727,161 +1098,354 @@ private: pushBlocksImpl(C, ClassId, Region, &ShuffleArray[NumberOfBlocks - N], N, /*SameGroup=*/true); } else { - pushBlocksImpl(C, ClassId, Region, ShuffleArray, NumberOfBlocks, - /*SameGroup=*/true); + pushBatchClassBlocks(Region, ShuffleArray, NumberOfBlocks); } + const u16 PopCount = + popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); + DCHECK_NE(PopCount, 0U); + + // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record + // the requests from `PushBlocks` and `PopBatch` which are external + // interfaces. `populateFreeListAndPopBatch` is the internal interface so we + // should set the values back to avoid incorrectly setting the stats. + Region->FreeListInfo.PushedBlocks -= NumberOfBlocks; + const uptr AllocatedUser = Size * NumberOfBlocks; C->getStats().add(StatFree, AllocatedUser); - Region->AllocatedUser += AllocatedUser; + Region->MemMapInfo.AllocatedUser += AllocatedUser; - return true; + return PopCount; } - void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region, uptr Rss) - REQUIRES(Region->Mutex) { - if (Region->MappedUser == 0) + void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region) + REQUIRES(Region->MMLock, Region->FLLock) { + if (Region->MemMapInfo.MappedUser == 0) return; - const uptr InUse = Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks; - const uptr TotalChunks = Region->AllocatedUser / getSizeByClassId(ClassId); - Str->append("%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " - "inuse: %6zu total: %6zu rss: %6zuK releases: %6zu last " - "released: %6zuK region: 0x%zx (0x%zx)\n", - Region->Exhausted ? "F" : " ", ClassId, - getSizeByClassId(ClassId), Region->MappedUser >> 10, - Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks, InUse, - TotalChunks, Rss >> 10, Region->ReleaseInfo.RangesReleased, - Region->ReleaseInfo.LastReleasedBytes >> 10, Region->RegionBeg, - getRegionBaseByClassId(ClassId)); + const uptr BlockSize = getSizeByClassId(ClassId); + const uptr InUseBlocks = + Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; + const uptr BytesInFreeList = + Region->MemMapInfo.AllocatedUser - InUseBlocks * BlockSize; + uptr RegionPushedBytesDelta = 0; + if (BytesInFreeList >= + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { + RegionPushedBytesDelta = + BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + } + const uptr TotalChunks = Region->MemMapInfo.AllocatedUser / BlockSize; + Str->append( + "%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " + "inuse: %6zu total: %6zu releases: %6zu last " + "released: %6zuK latest pushed bytes: %6zuK region: 0x%zx (0x%zx)\n", + Region->Exhausted ? "E" : " ", ClassId, getSizeByClassId(ClassId), + Region->MemMapInfo.MappedUser >> 10, Region->FreeListInfo.PoppedBlocks, + Region->FreeListInfo.PushedBlocks, InUseBlocks, TotalChunks, + Region->ReleaseInfo.RangesReleased, + Region->ReleaseInfo.LastReleasedBytes >> 10, + RegionPushedBytesDelta >> 10, Region->RegionBeg, + getRegionBaseByClassId(ClassId)); + } + + void getRegionFragmentationInfo(RegionInfo *Region, uptr ClassId, + ScopedString *Str) REQUIRES(Region->MMLock) { + const uptr BlockSize = getSizeByClassId(ClassId); + const uptr AllocatedUserEnd = + Region->MemMapInfo.AllocatedUser + Region->RegionBeg; + + SinglyLinkedList<BatchGroupT> GroupsToRelease; + { + ScopedLock L(Region->FLLock); + GroupsToRelease = Region->FreeListInfo.BlockList; + Region->FreeListInfo.BlockList.clear(); + } + + FragmentationRecorder Recorder; + if (!GroupsToRelease.empty()) { + PageReleaseContext Context = + markFreeBlocks(Region, BlockSize, AllocatedUserEnd, + getCompactPtrBaseByClassId(ClassId), GroupsToRelease); + auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + + mergeGroupsToReleaseBack(Region, GroupsToRelease); + } + + ScopedLock L(Region->FLLock); + const uptr PageSize = getPageSizeCached(); + const uptr TotalBlocks = Region->MemMapInfo.AllocatedUser / BlockSize; + const uptr InUseBlocks = + Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; + const uptr AllocatedPagesCount = + roundUp(Region->MemMapInfo.AllocatedUser, PageSize) / PageSize; + DCHECK_GE(AllocatedPagesCount, Recorder.getReleasedPagesCount()); + const uptr InUsePages = + AllocatedPagesCount - Recorder.getReleasedPagesCount(); + const uptr InUseBytes = InUsePages * PageSize; + + uptr Integral; + uptr Fractional; + computePercentage(BlockSize * InUseBlocks, InUsePages * PageSize, &Integral, + &Fractional); + Str->append(" %02zu (%6zu): inuse/total blocks: %6zu/%6zu inuse/total " + "pages: %6zu/%6zu inuse bytes: %6zuK util: %3zu.%02zu%%\n", + ClassId, BlockSize, InUseBlocks, TotalBlocks, InUsePages, + AllocatedPagesCount, InUseBytes >> 10, Integral, Fractional); } NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, - bool Force = false) REQUIRES(Region->Mutex) { + ReleaseToOS ReleaseType = ReleaseToOS::Normal) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { const uptr BlockSize = getSizeByClassId(ClassId); + uptr BytesInFreeList; + const uptr AllocatedUserEnd = + Region->MemMapInfo.AllocatedUser + Region->RegionBeg; + SinglyLinkedList<BatchGroupT> GroupsToRelease; + + { + ScopedLock L(Region->FLLock); + + BytesInFreeList = Region->MemMapInfo.AllocatedUser - + (Region->FreeListInfo.PoppedBlocks - + Region->FreeListInfo.PushedBlocks) * + BlockSize; + if (UNLIKELY(BytesInFreeList == 0)) + return false; + + // ==================================================================== // + // 1. Check if we have enough free blocks and if it's worth doing a page + // release. + // ==================================================================== // + if (ReleaseType != ReleaseToOS::ForceAll && + !hasChanceToReleasePages(Region, BlockSize, BytesInFreeList, + ReleaseType)) { + return 0; + } + + // ==================================================================== // + // 2. Determine which groups can release the pages. Use a heuristic to + // gather groups that are candidates for doing a release. + // ==================================================================== // + if (ReleaseType == ReleaseToOS::ForceAll) { + GroupsToRelease = Region->FreeListInfo.BlockList; + Region->FreeListInfo.BlockList.clear(); + } else { + GroupsToRelease = + collectGroupsToRelease(Region, BlockSize, AllocatedUserEnd, + getCompactPtrBaseByClassId(ClassId)); + } + if (GroupsToRelease.empty()) + return 0; + } + + // Note that we have extracted the `GroupsToRelease` from region freelist. + // It's safe to let pushBlocks()/popBlocks() access the remaining region + // freelist. In the steps 3 and 4, we will temporarily release the FLLock + // and lock it again before step 5. + + // ==================================================================== // + // 3. Mark the free blocks in `GroupsToRelease` in the `PageReleaseContext`. + // Then we can tell which pages are in-use by querying + // `PageReleaseContext`. + // ==================================================================== // + PageReleaseContext Context = + markFreeBlocks(Region, BlockSize, AllocatedUserEnd, + getCompactPtrBaseByClassId(ClassId), GroupsToRelease); + if (UNLIKELY(!Context.hasBlockMarked())) { + mergeGroupsToReleaseBack(Region, GroupsToRelease); + return 0; + } + + // ==================================================================== // + // 4. Release the unused physical pages back to the OS. + // ==================================================================== // + RegionReleaseRecorder<MemMapT> Recorder(&Region->MemMapInfo.MemMap, + Region->RegionBeg, + Context.getReleaseOffset()); + auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + if (Recorder.getReleasedRangesCount() > 0) { + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); + Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + } + Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); + + // ====================================================================== // + // 5. Merge the `GroupsToRelease` back to the freelist. + // ====================================================================== // + mergeGroupsToReleaseBack(Region, GroupsToRelease); + + return Recorder.getReleasedBytes(); + } + + bool hasChanceToReleasePages(RegionInfo *Region, uptr BlockSize, + uptr BytesInFreeList, ReleaseToOS ReleaseType) + REQUIRES(Region->MMLock, Region->FLLock) { + DCHECK_GE(Region->FreeListInfo.PoppedBlocks, + Region->FreeListInfo.PushedBlocks); const uptr PageSize = getPageSizeCached(); - DCHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks); - const uptr BytesInFreeList = - Region->AllocatedUser - - (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize; - if (BytesInFreeList < PageSize) - return 0; // No chance to release anything. - const uptr BytesPushed = (Region->Stats.PushedBlocks - - Region->ReleaseInfo.PushedBlocksAtLastRelease) * - BlockSize; - if (BytesPushed < PageSize) - return 0; // Nothing new to release. - - bool CheckDensity = BlockSize < PageSize / 16U; + // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value + // so that we won't underestimate the releasable pages. For example, the + // following is the region usage, + // + // BytesInFreeListAtLastCheckpoint AllocatedUser + // v v + // |---------------------------------------> + // ^ ^ + // BytesInFreeList ReleaseThreshold + // + // In general, if we have collected enough bytes and the amount of free + // bytes meets the ReleaseThreshold, we will try to do page release. If we + // don't update `BytesInFreeListAtLastCheckpoint` when the current + // `BytesInFreeList` is smaller, we may take longer time to wait for enough + // freed blocks because we miss the bytes between + // (BytesInFreeListAtLastCheckpoint - BytesInFreeList). + if (BytesInFreeList <= + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + } + + const uptr RegionPushedBytesDelta = + BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + if (RegionPushedBytesDelta < PageSize) + return false; + // Releasing smaller blocks is expensive, so we want to make sure that a // significant amount of bytes are free, and that there has been a good // amount of batches pushed to the freelist before attempting to release. - if (CheckDensity) { - if (!Force && BytesPushed < Region->AllocatedUser / 16U) - return 0; - } + if (isSmallBlock(BlockSize) && ReleaseType == ReleaseToOS::Normal) + if (RegionPushedBytesDelta < Region->TryReleaseThreshold) + return false; - if (!Force) { + if (ReleaseType == ReleaseToOS::Normal) { const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); if (IntervalMs < 0) - return 0; - if (Region->ReleaseInfo.LastReleaseAtNs + - static_cast<u64>(IntervalMs) * 1000000 > - getMonotonicTime()) { - return 0; // Memory was returned recently. - } - } + return false; - const uptr GroupSize = (1U << GroupSizeLog); - const uptr AllocatedUserEnd = Region->AllocatedUser + Region->RegionBeg; - const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId); - auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) { - return decompactPtrInternal(CompactPtrBase, CompactPtr); - }; + // The constant 8 here is selected from profiling some apps and the number + // of unreleased pages in the large size classes is around 16 pages or + // more. Choose half of it as a heuristic and which also avoids page + // release every time for every pushBlocks() attempt by large blocks. + const bool ByPassReleaseInterval = + isLargeBlock(BlockSize) && RegionPushedBytesDelta > 8 * PageSize; + if (!ByPassReleaseInterval) { + if (Region->ReleaseInfo.LastReleaseAtNs + + static_cast<u64>(IntervalMs) * 1000000 > + getMonotonicTimeFast()) { + // Memory was returned recently. + return false; + } + } + } // if (ReleaseType == ReleaseToOS::Normal) - // Instead of always preparing PageMap for the entire region, we only do it - // for the range of releasing groups. To do that, the free-block marking - // process includes visiting BlockGroups twice. + return true; + } - // The first visit is to determine the range of BatchGroups we are going to - // release. And we will extract those BatchGroups out and push into - // `GroupToRelease`. - SinglyLinkedList<BatchGroup> GroupToRelease; - GroupToRelease.clear(); + SinglyLinkedList<BatchGroupT> + collectGroupsToRelease(RegionInfo *Region, const uptr BlockSize, + const uptr AllocatedUserEnd, const uptr CompactPtrBase) + REQUIRES(Region->MMLock, Region->FLLock) { + const uptr GroupSize = (1UL << GroupSizeLog); + const uptr PageSize = getPageSizeCached(); + SinglyLinkedList<BatchGroupT> GroupsToRelease; - // This is only used for debugging to ensure the consistency of the number - // of groups. - uptr NumberOfBatchGroups = Region->FreeList.size(); + // We are examining each group and will take the minimum distance to the + // release threshold as the next Region::TryReleaseThreshold(). Note that if + // the size of free blocks has reached the release threshold, the distance + // to the next release will be PageSize * SmallerBlockReleasePageDelta. See + // the comment on `SmallerBlockReleasePageDelta` for more details. + uptr MinDistToThreshold = GroupSize; - for (BatchGroup *BG = Region->FreeList.front(), *Prev = nullptr; + for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(), + *Prev = nullptr; BG != nullptr;) { - const uptr PushedBytesDelta = - BG->PushedBlocks - BG->PushedBlocksAtLastCheckpoint; - if (PushedBytesDelta * BlockSize < PageSize) { - Prev = BG; - BG = BG->Next; - continue; - } - - // Group boundary does not necessarily have the same alignment as Region. - // It may sit across a Region boundary. Which means that we may have the - // following two cases, + // Group boundary is always GroupSize-aligned from CompactPtr base. The + // layout of memory groups is like, // - // 1. Group boundary sits before RegionBeg. + // (CompactPtrBase) + // #1 CompactPtrGroupBase #2 CompactPtrGroupBase ... + // | | | + // v v v + // +-----------------------+-----------------------+ + // \ / \ / + // --- GroupSize --- --- GroupSize --- // - // (BatchGroupBase) - // batchGroupBase RegionBeg BatchGroupEnd - // | | | - // v v v - // +------------+----------------+ - // \ / - // ------ GroupSize ------ - // - // 2. Group boundary sits after RegionBeg. - // - // (BatchGroupBase) - // RegionBeg batchGroupBase BatchGroupEnd - // | | | - // v v v - // +-----------+-----------------------------+ - // \ / - // ------ GroupSize ------ - // - // Note that in the first case, the group range before RegionBeg is never - // used. Therefore, while calculating the used group size, we should - // exclude that part to get the correct size. + // After decompacting the CompactPtrGroupBase, we expect the alignment + // property is held as well. const uptr BatchGroupBase = - Max(batchGroupBase(CompactPtrBase, BG->GroupId), Region->RegionBeg); + decompactGroupBase(CompactPtrBase, BG->CompactPtrGroupBase); + DCHECK_LE(Region->RegionBeg, BatchGroupBase); DCHECK_GE(AllocatedUserEnd, BatchGroupBase); - const uptr BatchGroupEnd = - batchGroupBase(CompactPtrBase, BG->GroupId) + GroupSize; - const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd - ? BatchGroupEnd - BatchGroupBase - : AllocatedUserEnd - BatchGroupBase; - if (AllocatedGroupSize == 0) { - Prev = BG; - BG = BG->Next; - continue; - } - + DCHECK_EQ((Region->RegionBeg - BatchGroupBase) % GroupSize, 0U); // TransferBatches are pushed in front of BG.Batches. The first one may // not have all caches used. const uptr NumBlocks = (BG->Batches.size() - 1) * BG->MaxCachedPerBatch + BG->Batches.front()->getCount(); const uptr BytesInBG = NumBlocks * BlockSize; + + if (BytesInBG <= BG->BytesInBGAtLastCheckpoint) { + BG->BytesInBGAtLastCheckpoint = BytesInBG; + Prev = BG; + BG = BG->Next; + continue; + } + + const uptr PushedBytesDelta = BG->BytesInBGAtLastCheckpoint - BytesInBG; + // Given the randomness property, we try to release the pages only if the // bytes used by free blocks exceed certain proportion of group size. Note // that this heuristic only applies when all the spaces in a BatchGroup // are allocated. - if (CheckDensity && (BytesInBG * 100U) / AllocatedGroupSize < - (100U - 1U - BlockSize / 16U)) { - Prev = BG; - BG = BG->Next; - continue; + if (isSmallBlock(BlockSize)) { + const uptr BatchGroupEnd = BatchGroupBase + GroupSize; + const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd + ? GroupSize + : AllocatedUserEnd - BatchGroupBase; + const uptr ReleaseThreshold = + (AllocatedGroupSize * (100 - 1U - BlockSize / 16U)) / 100U; + const bool HighDensity = BytesInBG >= ReleaseThreshold; + const bool MayHaveReleasedAll = NumBlocks >= (GroupSize / BlockSize); + // If all blocks in the group are released, we will do range marking + // which is fast. Otherwise, we will wait until we have accumulated + // a certain amount of free memory. + const bool ReachReleaseDelta = + MayHaveReleasedAll + ? true + : PushedBytesDelta >= PageSize * SmallerBlockReleasePageDelta; + + if (!HighDensity) { + DCHECK_LE(BytesInBG, ReleaseThreshold); + // The following is the usage of a memroy group, + // + // BytesInBG ReleaseThreshold + // / \ v + // +---+---------------------------+-----+ + // | | | | | + // +---+---------------------------+-----+ + // \ / ^ + // PushedBytesDelta GroupEnd + MinDistToThreshold = + Min(MinDistToThreshold, + ReleaseThreshold - BytesInBG + PushedBytesDelta); + } else { + // If it reaches high density at this round, the next time we will try + // to release is based on SmallerBlockReleasePageDelta + MinDistToThreshold = + Min(MinDistToThreshold, PageSize * SmallerBlockReleasePageDelta); + } + + if (!HighDensity || !ReachReleaseDelta) { + Prev = BG; + BG = BG->Next; + continue; + } } - // If `BG` is the first BatchGroup in the list, we only need to advance - // `BG` and call FreeList::pop_front(). No update is needed for `Prev`. + // If `BG` is the first BatchGroupT in the list, we only need to advance + // `BG` and call FreeListInfo.BlockList::pop_front(). No update is needed + // for `Prev`. // // (BG) (BG->Next) // Prev Cur BG @@ -892,7 +1456,7 @@ private: // +--+ +--+ // // Otherwise, `Prev` will be used to extract the `Cur` from the - // `FreeList`. + // `FreeListInfo.BlockList`. // // (BG) (BG->Next) // Prev Cur BG @@ -902,7 +1466,7 @@ private: // | | -> |X | -> | | -> ... // +--+ +--+ +--+ // - // After FreeList::extract(), + // After FreeListInfo.BlockList::extract(), // // Prev Cur BG // | | | @@ -913,26 +1477,53 @@ private: // +--------+ // // Note that we need to advance before pushing this BatchGroup to - // GroupToRelease because it's a destructive operation. + // GroupsToRelease because it's a destructive operation. - BatchGroup *Cur = BG; + BatchGroupT *Cur = BG; BG = BG->Next; + // Ideally, we may want to update this only after successful release. + // However, for smaller blocks, each block marking is a costly operation. + // Therefore, we update it earlier. + // TODO: Consider updating this after releasing pages if `ReleaseRecorder` + // can tell the released bytes in each group. + Cur->BytesInBGAtLastCheckpoint = BytesInBG; + if (Prev != nullptr) - Region->FreeList.extract(Prev, Cur); + Region->FreeListInfo.BlockList.extract(Prev, Cur); else - Region->FreeList.pop_front(); - GroupToRelease.push_back(Cur); + Region->FreeListInfo.BlockList.pop_front(); + GroupsToRelease.push_back(Cur); } - if (GroupToRelease.empty()) - return 0; + // Only small blocks have the adaptive `TryReleaseThreshold`. + if (isSmallBlock(BlockSize)) { + // If the MinDistToThreshold is not updated, that means each memory group + // may have only pushed less than a page size. In that case, just set it + // back to normal. + if (MinDistToThreshold == GroupSize) + MinDistToThreshold = PageSize * SmallerBlockReleasePageDelta; + Region->TryReleaseThreshold = MinDistToThreshold; + } - const uptr ReleaseBase = - Max(batchGroupBase(CompactPtrBase, GroupToRelease.front()->GroupId), - Region->RegionBeg); + return GroupsToRelease; + } + + PageReleaseContext + markFreeBlocks(RegionInfo *Region, const uptr BlockSize, + const uptr AllocatedUserEnd, const uptr CompactPtrBase, + SinglyLinkedList<BatchGroupT> &GroupsToRelease) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { + const uptr GroupSize = (1UL << GroupSizeLog); + auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) { + return decompactPtrInternal(CompactPtrBase, CompactPtr); + }; + + const uptr ReleaseBase = decompactGroupBase( + CompactPtrBase, GroupsToRelease.front()->CompactPtrGroupBase); const uptr LastGroupEnd = - Min(batchGroupBase(CompactPtrBase, GroupToRelease.back()->GroupId) + + Min(decompactGroupBase(CompactPtrBase, + GroupsToRelease.back()->CompactPtrGroupBase) + GroupSize, AllocatedUserEnd); // The last block may straddle the group boundary. Rounding up to BlockSize @@ -941,26 +1532,25 @@ private: roundUpSlow(LastGroupEnd - Region->RegionBeg, BlockSize) + Region->RegionBeg; const uptr ReleaseRangeSize = ReleaseEnd - ReleaseBase; + const uptr ReleaseOffset = ReleaseBase - Region->RegionBeg; - ReleaseRecorder Recorder(ReleaseBase, &Region->Data); - PageReleaseContext Context( - BlockSize, Region->AllocatedUser, /*NumberOfRegions=*/1U, - ReleaseRangeSize, /*ReleaseOffset=*/ReleaseBase - Region->RegionBeg); - - for (BatchGroup &BG : GroupToRelease) { - BG.PushedBlocksAtLastCheckpoint = BG.PushedBlocks; + PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + ReleaseRangeSize, ReleaseOffset); + // We may not be able to do the page release in a rare case that we may + // fail on PageMap allocation. + if (UNLIKELY(!Context.ensurePageMapAllocated())) + return Context; - // TODO(chiahungduan): Replace GroupId with BatchGroupBase to simplify the - // calculation here and above (where we determine the set of groups to - // release). + for (BatchGroupT &BG : GroupsToRelease) { const uptr BatchGroupBase = - Max(batchGroupBase(CompactPtrBase, BG.GroupId), Region->RegionBeg); - const uptr BatchGroupEnd = - batchGroupBase(CompactPtrBase, BG.GroupId) + GroupSize; + decompactGroupBase(CompactPtrBase, BG.CompactPtrGroupBase); + const uptr BatchGroupEnd = BatchGroupBase + GroupSize; const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd - ? BatchGroupEnd - BatchGroupBase + ? GroupSize : AllocatedUserEnd - BatchGroupBase; const uptr BatchGroupUsedEnd = BatchGroupBase + AllocatedGroupSize; + const bool MayContainLastBlockInRegion = + BatchGroupUsedEnd == AllocatedUserEnd; const bool BlockAlignedWithUsedEnd = (BatchGroupUsedEnd - Region->RegionBeg) % BlockSize == 0; @@ -972,84 +1562,174 @@ private: BG.Batches.front()->getCount(); if (NumBlocks == MaxContainedBlocks) { - for (const auto &It : BG.Batches) + for (const auto &It : BG.Batches) { + if (&It != BG.Batches.front()) + DCHECK_EQ(It.getCount(), BG.MaxCachedPerBatch); for (u16 I = 0; I < It.getCount(); ++I) - DCHECK_EQ(compactPtrGroup(It.get(I)), BG.GroupId); + DCHECK_EQ(compactPtrGroup(It.get(I)), BG.CompactPtrGroupBase); + } Context.markRangeAsAllCounted(BatchGroupBase, BatchGroupUsedEnd, - Region->RegionBeg); + Region->RegionBeg, /*RegionIndex=*/0, + Region->MemMapInfo.AllocatedUser); } else { DCHECK_LT(NumBlocks, MaxContainedBlocks); // Note that we don't always visit blocks in each BatchGroup so that we // may miss the chance of releasing certain pages that cross // BatchGroups. - Context.markFreeBlocks(BG.Batches, DecompactPtr, Region->RegionBeg); + Context.markFreeBlocksInRegion( + BG.Batches, DecompactPtr, Region->RegionBeg, /*RegionIndex=*/0, + Region->MemMapInfo.AllocatedUser, MayContainLastBlockInRegion); } } DCHECK(Context.hasBlockMarked()); - auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; - releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + return Context; + } - if (Recorder.getReleasedRangesCount() > 0) { - Region->ReleaseInfo.PushedBlocksAtLastRelease = - Region->Stats.PushedBlocks; - Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); - Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); - } - Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); - - // Merge GroupToRelease back to the Region::FreeList. Note that both - // `Region->FreeList` and `GroupToRelease` are sorted. - for (BatchGroup *BG = Region->FreeList.front(), *Prev = nullptr;;) { - if (BG == nullptr || GroupToRelease.empty()) { - if (!GroupToRelease.empty()) - Region->FreeList.append_back(&GroupToRelease); + void mergeGroupsToReleaseBack(RegionInfo *Region, + SinglyLinkedList<BatchGroupT> &GroupsToRelease) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { + ScopedLock L(Region->FLLock); + + // After merging two freelists, we may have redundant `BatchGroup`s that + // need to be recycled. The number of unused `BatchGroup`s is expected to be + // small. Pick a constant which is inferred from real programs. + constexpr uptr MaxUnusedSize = 8; + CompactPtrT Blocks[MaxUnusedSize]; + u32 Idx = 0; + RegionInfo *BatchClassRegion = getRegionInfo(SizeClassMap::BatchClassId); + // We can't call pushBatchClassBlocks() to recycle the unused `BatchGroup`s + // when we are manipulating the freelist of `BatchClassRegion`. Instead, we + // should just push it back to the freelist when we merge two `BatchGroup`s. + // This logic hasn't been implemented because we haven't supported releasing + // pages in `BatchClassRegion`. + DCHECK_NE(BatchClassRegion, Region); + + // Merge GroupsToRelease back to the Region::FreeListInfo.BlockList. Note + // that both `Region->FreeListInfo.BlockList` and `GroupsToRelease` are + // sorted. + for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(), + *Prev = nullptr; + ;) { + if (BG == nullptr || GroupsToRelease.empty()) { + if (!GroupsToRelease.empty()) + Region->FreeListInfo.BlockList.append_back(&GroupsToRelease); break; } - DCHECK_NE(BG->GroupId, GroupToRelease.front()->GroupId); + DCHECK(!BG->Batches.empty()); + + if (BG->CompactPtrGroupBase < + GroupsToRelease.front()->CompactPtrGroupBase) { + Prev = BG; + BG = BG->Next; + continue; + } + + BatchGroupT *Cur = GroupsToRelease.front(); + TransferBatchT *UnusedTransferBatch = nullptr; + GroupsToRelease.pop_front(); + + if (BG->CompactPtrGroupBase == Cur->CompactPtrGroupBase) { + BG->PushedBlocks += Cur->PushedBlocks; + // We have updated `BatchGroup::BytesInBGAtLastCheckpoint` while + // collecting the `GroupsToRelease`. + BG->BytesInBGAtLastCheckpoint = Cur->BytesInBGAtLastCheckpoint; + const uptr MaxCachedPerBatch = BG->MaxCachedPerBatch; + + // Note that the first TransferBatches in both `Batches` may not be + // full and only the first TransferBatch can have non-full blocks. Thus + // we have to merge them before appending one to another. + if (Cur->Batches.front()->getCount() == MaxCachedPerBatch) { + BG->Batches.append_back(&Cur->Batches); + } else { + TransferBatchT *NonFullBatch = Cur->Batches.front(); + Cur->Batches.pop_front(); + const u16 NonFullBatchCount = NonFullBatch->getCount(); + // The remaining Batches in `Cur` are full. + BG->Batches.append_back(&Cur->Batches); + + if (BG->Batches.front()->getCount() == MaxCachedPerBatch) { + // Only 1 non-full TransferBatch, push it to the front. + BG->Batches.push_front(NonFullBatch); + } else { + const u16 NumBlocksToMove = static_cast<u16>( + Min(static_cast<u16>(MaxCachedPerBatch - + BG->Batches.front()->getCount()), + NonFullBatchCount)); + BG->Batches.front()->appendFromTransferBatch(NonFullBatch, + NumBlocksToMove); + if (NonFullBatch->isEmpty()) + UnusedTransferBatch = NonFullBatch; + else + BG->Batches.push_front(NonFullBatch); + } + } - if (BG->GroupId < GroupToRelease.front()->GroupId) { + const u32 NeededSlots = UnusedTransferBatch == nullptr ? 1U : 2U; + if (UNLIKELY(Idx + NeededSlots > MaxUnusedSize)) { + ScopedLock L(BatchClassRegion->FLLock); + pushBatchClassBlocks(BatchClassRegion, Blocks, Idx); + if (conditionVariableEnabled()) + BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock); + Idx = 0; + } + Blocks[Idx++] = + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(Cur)); + if (UnusedTransferBatch) { + Blocks[Idx++] = + compactPtr(SizeClassMap::BatchClassId, + reinterpret_cast<uptr>(UnusedTransferBatch)); + } Prev = BG; BG = BG->Next; continue; } - // At here, the `BG` is the first BatchGroup with GroupId larger than the - // first element in `GroupToRelease`. We need to insert - // `GroupToRelease::front()` (which is `Cur` below) before `BG`. + // At here, the `BG` is the first BatchGroup with CompactPtrGroupBase + // larger than the first element in `GroupsToRelease`. We need to insert + // `GroupsToRelease::front()` (which is `Cur` below) before `BG`. // // 1. If `Prev` is nullptr, we simply push `Cur` to the front of - // FreeList. + // FreeListInfo.BlockList. // 2. Otherwise, use `insert()` which inserts an element next to `Prev`. // // Afterwards, we don't need to advance `BG` because the order between - // `BG` and the new `GroupToRelease::front()` hasn't been checked. - BatchGroup *Cur = GroupToRelease.front(); - GroupToRelease.pop_front(); + // `BG` and the new `GroupsToRelease::front()` hasn't been checked. if (Prev == nullptr) - Region->FreeList.push_front(Cur); + Region->FreeListInfo.BlockList.push_front(Cur); else - Region->FreeList.insert(Prev, Cur); + Region->FreeListInfo.BlockList.insert(Prev, Cur); DCHECK_EQ(Cur->Next, BG); Prev = Cur; } - DCHECK_EQ(Region->FreeList.size(), NumberOfBatchGroups); - (void)NumberOfBatchGroups; + if (Idx != 0) { + ScopedLock L(BatchClassRegion->FLLock); + pushBatchClassBlocks(BatchClassRegion, Blocks, Idx); + if (conditionVariableEnabled()) + BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock); + } if (SCUDO_DEBUG) { - BatchGroup *Prev = Region->FreeList.front(); - for (BatchGroup *Cur = Prev->Next; Cur != nullptr; + BatchGroupT *Prev = Region->FreeListInfo.BlockList.front(); + for (BatchGroupT *Cur = Prev->Next; Cur != nullptr; Prev = Cur, Cur = Cur->Next) { - CHECK_LT(Prev->GroupId, Cur->GroupId); + CHECK_LT(Prev->CompactPtrGroupBase, Cur->CompactPtrGroupBase); } } - return Recorder.getReleasedBytes(); + if (conditionVariableEnabled()) + Region->FLLockCV.notifyAll(Region->FLLock); } + + // The minimum size of pushed blocks that we will try to release the pages in + // that size class. + uptr SmallerBlockReleasePageDelta = 0; + atomic_s32 ReleaseToOsIntervalMs = {}; + alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; }; } // namespace scudo diff --git a/standalone/quarantine.h b/standalone/quarantine.h index e65a733ced7..b5f8db0e87c 100644 --- a/standalone/quarantine.h +++ b/standalone/quarantine.h @@ -192,6 +192,12 @@ public: uptr getMaxSize() const { return atomic_load_relaxed(&MaxSize); } uptr getCacheSize() const { return atomic_load_relaxed(&MaxCacheSize); } + // This is supposed to be used in test only. + bool isEmpty() { + ScopedLock L(CacheMutex); + return Cache.getSize() == 0U; + } + void put(CacheT *C, Callback Cb, Node *Ptr, uptr Size) { C->enqueue(Cb, Ptr, Size); if (C->getSize() > getCacheSize()) diff --git a/standalone/release.cpp b/standalone/release.cpp index 3f40dbec6d7..875a2b0c1c5 100644 --- a/standalone/release.cpp +++ b/standalone/release.cpp @@ -10,7 +10,8 @@ namespace scudo { -HybridMutex RegionPageMap::Mutex = {}; -uptr RegionPageMap::StaticBuffer[RegionPageMap::StaticBufferCount]; +BufferPool<RegionPageMap::StaticBufferCount, + RegionPageMap::StaticBufferNumElements> + RegionPageMap::Buffers; } // namespace scudo diff --git a/standalone/release.h b/standalone/release.h index d29d1c1f53f..b6f76a4d205 100644 --- a/standalone/release.h +++ b/standalone/release.h @@ -11,15 +11,46 @@ #include "common.h" #include "list.h" +#include "mem_map.h" #include "mutex.h" #include "thread_annotations.h" namespace scudo { +template <typename MemMapT> class RegionReleaseRecorder { +public: + RegionReleaseRecorder(MemMapT *RegionMemMap, uptr Base, uptr Offset = 0) + : RegionMemMap(RegionMemMap), Base(Base), Offset(Offset) {} + + uptr getReleasedRangesCount() const { return ReleasedRangesCount; } + + uptr getReleasedBytes() const { return ReleasedBytes; } + + uptr getBase() const { return Base; } + + // Releases [From, To) range of pages back to OS. Note that `From` and `To` + // are offseted from `Base` + Offset. + void releasePageRangeToOS(uptr From, uptr To) { + const uptr Size = To - From; + RegionMemMap->releasePagesToOS(getBase() + Offset + From, Size); + ReleasedRangesCount++; + ReleasedBytes += Size; + } + +private: + uptr ReleasedRangesCount = 0; + uptr ReleasedBytes = 0; + MemMapT *RegionMemMap = nullptr; + uptr Base = 0; + // The release offset from Base. This is used when we know a given range after + // Base will not be released. + uptr Offset = 0; +}; + class ReleaseRecorder { public: - ReleaseRecorder(uptr Base, MapPlatformData *Data = nullptr) - : Base(Base), Data(Data) {} + ReleaseRecorder(uptr Base, uptr Offset = 0, MapPlatformData *Data = nullptr) + : Base(Base), Offset(Offset), Data(Data) {} uptr getReleasedRangesCount() const { return ReleasedRangesCount; } @@ -30,7 +61,7 @@ public: // Releases [From, To) range of pages back to OS. void releasePageRangeToOS(uptr From, uptr To) { const uptr Size = To - From; - releasePagesToOS(Base, From, Size, Data); + releasePagesToOS(Base, From + Offset, Size, Data); ReleasedRangesCount++; ReleasedBytes += Size; } @@ -38,10 +69,129 @@ public: private: uptr ReleasedRangesCount = 0; uptr ReleasedBytes = 0; + // The starting address to release. Note that we may want to combine (Base + + // Offset) as a new Base. However, the Base is retrieved from + // `MapPlatformData` on Fuchsia, which means the offset won't be aware. + // Therefore, store them separately to make it work on all the platforms. uptr Base = 0; + // The release offset from Base. This is used when we know a given range after + // Base will not be released. + uptr Offset = 0; MapPlatformData *Data = nullptr; }; +class FragmentationRecorder { +public: + FragmentationRecorder() = default; + + uptr getReleasedPagesCount() const { return ReleasedPagesCount; } + + void releasePageRangeToOS(uptr From, uptr To) { + DCHECK_EQ((To - From) % getPageSizeCached(), 0U); + ReleasedPagesCount += (To - From) / getPageSizeCached(); + } + +private: + uptr ReleasedPagesCount = 0; +}; + +// A buffer pool which holds a fixed number of static buffers of `uptr` elements +// for fast buffer allocation. If the request size is greater than +// `StaticBufferNumElements` or if all the static buffers are in use, it'll +// delegate the allocation to map(). +template <uptr StaticBufferCount, uptr StaticBufferNumElements> +class BufferPool { +public: + // Preserve 1 bit in the `Mask` so that we don't need to do zero-check while + // extracting the least significant bit from the `Mask`. + static_assert(StaticBufferCount < SCUDO_WORDSIZE, ""); + static_assert(isAligned(StaticBufferNumElements * sizeof(uptr), + SCUDO_CACHE_LINE_SIZE), + ""); + + struct Buffer { + // Pointer to the buffer's memory, or nullptr if no buffer was allocated. + uptr *Data = nullptr; + + // The index of the underlying static buffer, or StaticBufferCount if this + // buffer was dynamically allocated. This value is initially set to a poison + // value to aid debugging. + uptr BufferIndex = ~static_cast<uptr>(0); + + // Only valid if BufferIndex == StaticBufferCount. + MemMapT MemMap = {}; + }; + + // Return a zero-initialized buffer which can contain at least the given + // number of elements, or nullptr on failure. + Buffer getBuffer(const uptr NumElements) { + if (UNLIKELY(NumElements > StaticBufferNumElements)) + return getDynamicBuffer(NumElements); + + uptr index; + { + // TODO: In general, we expect this operation should be fast so the + // waiting thread won't be put into sleep. The HybridMutex does implement + // the busy-waiting but we may want to review the performance and see if + // we need an explict spin lock here. + ScopedLock L(Mutex); + index = getLeastSignificantSetBitIndex(Mask); + if (index < StaticBufferCount) + Mask ^= static_cast<uptr>(1) << index; + } + + if (index >= StaticBufferCount) + return getDynamicBuffer(NumElements); + + Buffer Buf; + Buf.Data = &RawBuffer[index * StaticBufferNumElements]; + Buf.BufferIndex = index; + memset(Buf.Data, 0, StaticBufferNumElements * sizeof(uptr)); + return Buf; + } + + void releaseBuffer(Buffer Buf) { + DCHECK_NE(Buf.Data, nullptr); + DCHECK_LE(Buf.BufferIndex, StaticBufferCount); + if (Buf.BufferIndex != StaticBufferCount) { + ScopedLock L(Mutex); + DCHECK_EQ((Mask & (static_cast<uptr>(1) << Buf.BufferIndex)), 0U); + Mask |= static_cast<uptr>(1) << Buf.BufferIndex; + } else { + Buf.MemMap.unmap(Buf.MemMap.getBase(), Buf.MemMap.getCapacity()); + } + } + + bool isStaticBufferTestOnly(const Buffer &Buf) { + DCHECK_NE(Buf.Data, nullptr); + DCHECK_LE(Buf.BufferIndex, StaticBufferCount); + return Buf.BufferIndex != StaticBufferCount; + } + +private: + Buffer getDynamicBuffer(const uptr NumElements) { + // When using a heap-based buffer, precommit the pages backing the + // Vmar by passing |MAP_PRECOMMIT| flag. This allows an optimization + // where page fault exceptions are skipped as the allocated memory + // is accessed. So far, this is only enabled on Fuchsia. It hasn't proven a + // performance benefit on other platforms. + const uptr MmapFlags = MAP_ALLOWNOMEM | (SCUDO_FUCHSIA ? MAP_PRECOMMIT : 0); + const uptr MappedSize = + roundUp(NumElements * sizeof(uptr), getPageSizeCached()); + Buffer Buf; + if (Buf.MemMap.map(/*Addr=*/0, MappedSize, "scudo:counters", MmapFlags)) { + Buf.Data = reinterpret_cast<uptr *>(Buf.MemMap.getBase()); + Buf.BufferIndex = StaticBufferCount; + } + return Buf; + } + + HybridMutex Mutex; + // '1' means that buffer index is not used. '0' means the buffer is in use. + uptr Mask GUARDED_BY(Mutex) = ~static_cast<uptr>(0); + uptr RawBuffer[StaticBufferCount * StaticBufferNumElements] GUARDED_BY(Mutex); +}; + // A Region page map is used to record the usage of pages in the regions. It // implements a packed array of Counters. Each counter occupies 2^N bits, enough // to store counter's MaxValue. Ctor will try to use a static buffer first, and @@ -54,35 +204,24 @@ private: class RegionPageMap { public: RegionPageMap() - : Regions(0), - NumCounters(0), - CounterSizeBitsLog(0), - CounterMask(0), - PackingRatioLog(0), - BitOffsetMask(0), - SizePerRegion(0), - BufferSize(0), - Buffer(nullptr) {} + : Regions(0), NumCounters(0), CounterSizeBitsLog(0), CounterMask(0), + PackingRatioLog(0), BitOffsetMask(0), SizePerRegion(0), + BufferNumElements(0) {} RegionPageMap(uptr NumberOfRegions, uptr CountersPerRegion, uptr MaxValue) { reset(NumberOfRegions, CountersPerRegion, MaxValue); } ~RegionPageMap() { if (!isAllocated()) return; - if (Buffer == &StaticBuffer[0]) - Mutex.unlock(); - else - unmap(reinterpret_cast<void *>(Buffer), - roundUp(BufferSize, getPageSizeCached())); - Buffer = nullptr; + Buffers.releaseBuffer(Buffer); + Buffer = {}; } // Lock of `StaticBuffer` is acquired conditionally and there's no easy way to // specify the thread-safety attribute properly in current code structure. // Besides, it's the only place we may want to check thread safety. Therefore, // it's fine to bypass the thread-safety analysis now. - void reset(uptr NumberOfRegion, uptr CountersPerRegion, - uptr MaxValue) NO_THREAD_SAFETY_ANALYSIS { + void reset(uptr NumberOfRegion, uptr CountersPerRegion, uptr MaxValue) { DCHECK_GT(NumberOfRegion, 0); DCHECK_GT(CountersPerRegion, 0); DCHECK_GT(MaxValue, 0); @@ -90,7 +229,7 @@ public: Regions = NumberOfRegion; NumCounters = CountersPerRegion; - constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL; + constexpr uptr MaxCounterBits = sizeof(*Buffer.Data) * 8UL; // Rounding counter storage size up to the power of two allows for using // bit shifts calculating particular counter's Index and offset. const uptr CounterSizeBits = @@ -107,25 +246,11 @@ public: SizePerRegion = roundUp(NumCounters, static_cast<uptr>(1U) << PackingRatioLog) >> PackingRatioLog; - BufferSize = SizePerRegion * sizeof(*Buffer) * Regions; - if (BufferSize <= (StaticBufferCount * sizeof(Buffer[0])) && - Mutex.tryLock()) { - Buffer = &StaticBuffer[0]; - memset(Buffer, 0, BufferSize); - } else { - // When using a heap-based buffer, precommit the pages backing the - // Vmar by passing |MAP_PRECOMMIT| flag. This allows an optimization - // where page fault exceptions are skipped as the allocated memory - // is accessed. - const uptr MmapFlags = - MAP_ALLOWNOMEM | (SCUDO_FUCHSIA ? MAP_PRECOMMIT : 0); - Buffer = reinterpret_cast<uptr *>( - map(nullptr, roundUp(BufferSize, getPageSizeCached()), - "scudo:counters", MmapFlags, &MapData)); - } + BufferNumElements = SizePerRegion * Regions; + Buffer = Buffers.getBuffer(BufferNumElements); } - bool isAllocated() const { return !!Buffer; } + bool isAllocated() const { return Buffer.Data != nullptr; } uptr getCount() const { return NumCounters; } @@ -134,7 +259,8 @@ public: DCHECK_LT(I, NumCounters); const uptr Index = I >> PackingRatioLog; const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; - return (Buffer[Region * SizePerRegion + Index] >> BitOffset) & CounterMask; + return (Buffer.Data[Region * SizePerRegion + Index] >> BitOffset) & + CounterMask; } void inc(uptr Region, uptr I) const { @@ -143,8 +269,8 @@ public: const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; DCHECK_LT(BitOffset, SCUDO_WORDSIZE); DCHECK_EQ(isAllCounted(Region, I), false); - Buffer[Region * SizePerRegion + Index] += static_cast<uptr>(1U) - << BitOffset; + Buffer.Data[Region * SizePerRegion + Index] += static_cast<uptr>(1U) + << BitOffset; } void incN(uptr Region, uptr I, uptr N) const { @@ -155,7 +281,7 @@ public: const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; DCHECK_LT(BitOffset, SCUDO_WORDSIZE); DCHECK_EQ(isAllCounted(Region, I), false); - Buffer[Region * SizePerRegion + Index] += N << BitOffset; + Buffer.Data[Region * SizePerRegion + Index] += N << BitOffset; } void incRange(uptr Region, uptr From, uptr To) const { @@ -174,7 +300,7 @@ public: const uptr Index = I >> PackingRatioLog; const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; DCHECK_LT(BitOffset, SCUDO_WORDSIZE); - Buffer[Region * SizePerRegion + Index] |= CounterMask << BitOffset; + Buffer.Data[Region * SizePerRegion + Index] |= CounterMask << BitOffset; } void setAsAllCountedRange(uptr Region, uptr From, uptr To) const { DCHECK_LE(From, To); @@ -197,11 +323,16 @@ public: return get(Region, I) == CounterMask; } - uptr getBufferSize() const { return BufferSize; } - - static const uptr StaticBufferCount = 2048U; + uptr getBufferNumElements() const { return BufferNumElements; } private: + // We may consider making this configurable if there are cases which may + // benefit from this. + static const uptr StaticBufferCount = 2U; + static const uptr StaticBufferNumElements = 512U; + using BufferPoolT = BufferPool<StaticBufferCount, StaticBufferNumElements>; + static BufferPoolT Buffers; + uptr Regions; uptr NumCounters; uptr CounterSizeBitsLog; @@ -210,12 +341,8 @@ private: uptr BitOffsetMask; uptr SizePerRegion; - uptr BufferSize; - uptr *Buffer; - [[no_unique_address]] MapPlatformData MapData = {}; - - static HybridMutex Mutex; - static uptr StaticBuffer[StaticBufferCount] GUARDED_BY(Mutex); + uptr BufferNumElements; + BufferPoolT::Buffer Buffer; }; template <class ReleaseRecorderT> class FreePagesRangeTracker { @@ -259,10 +386,9 @@ private: }; struct PageReleaseContext { - PageReleaseContext(uptr BlockSize, uptr RegionSize, uptr NumberOfRegions, - uptr ReleaseSize, uptr ReleaseOffset = 0) - : BlockSize(BlockSize), RegionSize(RegionSize), - NumberOfRegions(NumberOfRegions) { + PageReleaseContext(uptr BlockSize, uptr NumberOfRegions, uptr ReleaseSize, + uptr ReleaseOffset = 0) + : BlockSize(BlockSize), NumberOfRegions(NumberOfRegions) { PageSize = getPageSizeCached(); if (BlockSize <= PageSize) { if (PageSize % BlockSize == 0) { @@ -298,15 +424,11 @@ struct PageReleaseContext { // region marking (which includes the complexity of how to handle the last // block in a region). We may consider this after markFreeBlocks() accepts // only free blocks from the same region. - if (NumberOfRegions != 1) { - DCHECK_EQ(ReleaseSize, RegionSize); + if (NumberOfRegions != 1) DCHECK_EQ(ReleaseOffset, 0U); - } PagesCount = roundUp(ReleaseSize, PageSize) / PageSize; PageSizeLog = getLog2(PageSize); - RoundedRegionSize = roundUp(RegionSize, PageSize); - RoundedSize = NumberOfRegions * RoundedRegionSize; ReleasePageOffset = ReleaseOffset >> PageSizeLog; } @@ -315,41 +437,35 @@ struct PageReleaseContext { return PageMap.isAllocated(); } - void ensurePageMapAllocated() { + bool ensurePageMapAllocated() { if (PageMap.isAllocated()) - return; + return true; PageMap.reset(NumberOfRegions, PagesCount, FullPagesBlockCountMax); - DCHECK(PageMap.isAllocated()); + // TODO: Log some message when we fail on PageMap allocation. + return PageMap.isAllocated(); } // Mark all the blocks in the given range [From, to). Instead of visiting all // the blocks, we will just mark the page as all counted. Note the `From` and // `To` has to be page aligned but with one exception, if `To` is equal to the // RegionSize, it's not necessary to be aligned with page size. - void markRangeAsAllCounted(uptr From, uptr To, uptr Base) { + bool markRangeAsAllCounted(uptr From, uptr To, uptr Base, + const uptr RegionIndex, const uptr RegionSize) { DCHECK_LT(From, To); + DCHECK_LE(To, Base + RegionSize); DCHECK_EQ(From % PageSize, 0U); + DCHECK_LE(To - From, RegionSize); - ensurePageMapAllocated(); - - const uptr FromOffset = From - Base; - const uptr ToOffset = To - Base; - - const uptr RegionIndex = - NumberOfRegions == 1U ? 0 : FromOffset / RegionSize; - if (SCUDO_DEBUG) { - const uptr ToRegionIndex = - NumberOfRegions == 1U ? 0 : (ToOffset - 1) / RegionSize; - CHECK_EQ(RegionIndex, ToRegionIndex); - } + if (!ensurePageMapAllocated()) + return false; - uptr FromInRegion = FromOffset - RegionIndex * RegionSize; - uptr ToInRegion = ToOffset - RegionIndex * RegionSize; + uptr FromInRegion = From - Base; + uptr ToInRegion = To - Base; uptr FirstBlockInRange = roundUpSlow(FromInRegion, BlockSize); // The straddling block sits across entire range. if (FirstBlockInRange >= ToInRegion) - return; + return true; // First block may not sit at the first pape in the range, move // `FromInRegion` to the first block page. @@ -380,8 +496,9 @@ struct PageReleaseContext { } uptr LastBlockInRange = roundDownSlow(ToInRegion - 1, BlockSize); - if (LastBlockInRange < FromInRegion) - return; + + // Note that LastBlockInRange may be smaller than `FromInRegion` at this + // point because it may contain only one block in the range. // When the last block sits across `To`, we can't just mark the pages // occupied by the last block as all counted. Instead, we increment the @@ -415,25 +532,52 @@ struct PageReleaseContext { PageMap.setAsAllCountedRange(RegionIndex, getPageIndex(FromInRegion), getPageIndex(ToInRegion - 1)); } + + return true; } - template<class TransferBatchT, typename DecompactPtrT> - void markFreeBlocks(const IntrusiveList<TransferBatchT> &FreeList, - DecompactPtrT DecompactPtr, uptr Base) { - ensurePageMapAllocated(); - - const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize; - - // The last block in a region may not use the entire page, so if it's free, - // we mark the following "pretend" memory block(s) as free. - auto markLastBlock = [this, LastBlockInRegion](const uptr RegionIndex) { - uptr PInRegion = LastBlockInRegion + BlockSize; - while (PInRegion < RoundedRegionSize) { - PageMap.incRange(RegionIndex, getPageIndex(PInRegion), - getPageIndex(PInRegion + BlockSize - 1)); - PInRegion += BlockSize; + template <class TransferBatchT, typename DecompactPtrT> + bool markFreeBlocksInRegion(const IntrusiveList<TransferBatchT> &FreeList, + DecompactPtrT DecompactPtr, const uptr Base, + const uptr RegionIndex, const uptr RegionSize, + bool MayContainLastBlockInRegion) { + if (!ensurePageMapAllocated()) + return false; + + if (MayContainLastBlockInRegion) { + const uptr LastBlockInRegion = + ((RegionSize / BlockSize) - 1U) * BlockSize; + // The last block in a region may not use the entire page, we mark the + // following "pretend" memory block(s) as free in advance. + // + // Region Boundary + // v + // -----+-----------------------+ + // | Last Page | <- Rounded Region Boundary + // -----+-----------------------+ + // |-----||- trailing blocks -| + // ^ + // last block + const uptr RoundedRegionSize = roundUp(RegionSize, PageSize); + const uptr TrailingBlockBase = LastBlockInRegion + BlockSize; + // If the difference between `RoundedRegionSize` and + // `TrailingBlockBase` is larger than a page, that implies the reported + // `RegionSize` may not be accurate. + DCHECK_LT(RoundedRegionSize - TrailingBlockBase, PageSize); + + // Only the last page touched by the last block needs to mark the trailing + // blocks. Note that if the last "pretend" block straddles the boundary, + // we still have to count it in so that the logic of counting the number + // of blocks on a page is consistent. + uptr NumTrailingBlocks = + (roundUpSlow(RoundedRegionSize - TrailingBlockBase, BlockSize) + + BlockSize - 1) / + BlockSize; + if (NumTrailingBlocks > 0) { + PageMap.incN(RegionIndex, getPageIndex(TrailingBlockBase), + NumTrailingBlocks); } - }; + } // Iterate over free chunks and count how many free chunks affect each // allocated page. @@ -441,14 +585,9 @@ struct PageReleaseContext { // Each chunk affects one page only. for (const auto &It : FreeList) { for (u16 I = 0; I < It.getCount(); I++) { - const uptr P = DecompactPtr(It.get(I)) - Base; - if (P >= RoundedSize) - continue; - const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; - const uptr PInRegion = P - RegionIndex * RegionSize; + const uptr PInRegion = DecompactPtr(It.get(I)) - Base; + DCHECK_LT(PInRegion, RegionSize); PageMap.inc(RegionIndex, getPageIndex(PInRegion)); - if (PInRegion == LastBlockInRegion) - markLastBlock(RegionIndex); } } } else { @@ -456,24 +595,20 @@ struct PageReleaseContext { DCHECK_GE(RegionSize, BlockSize); for (const auto &It : FreeList) { for (u16 I = 0; I < It.getCount(); I++) { - const uptr P = DecompactPtr(It.get(I)) - Base; - if (P >= RoundedSize) - continue; - const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; - uptr PInRegion = P - RegionIndex * RegionSize; + const uptr PInRegion = DecompactPtr(It.get(I)) - Base; PageMap.incRange(RegionIndex, getPageIndex(PInRegion), getPageIndex(PInRegion + BlockSize - 1)); - if (PInRegion == LastBlockInRegion) - markLastBlock(RegionIndex); } } } + + return true; } uptr getPageIndex(uptr P) { return (P >> PageSizeLog) - ReleasePageOffset; } + uptr getReleaseOffset() { return ReleasePageOffset << PageSizeLog; } uptr BlockSize; - uptr RegionSize; uptr NumberOfRegions; // For partial region marking, some pages in front are not needed to be // counted. @@ -481,8 +616,6 @@ struct PageReleaseContext { uptr PageSize; uptr PagesCount; uptr PageSizeLog; - uptr RoundedRegionSize; - uptr RoundedSize; uptr FullPagesBlockCountMax; bool SameBlockCountPerPage; RegionPageMap PageMap; @@ -563,21 +696,6 @@ releaseFreeMemoryToOS(PageReleaseContext &Context, RangeTracker.finish(); } -// An overload releaseFreeMemoryToOS which doesn't require the page usage -// information after releasing. -template <class TransferBatchT, class ReleaseRecorderT, typename DecompactPtrT, - typename SkipRegionT> -NOINLINE void -releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, - uptr RegionSize, uptr NumberOfRegions, uptr BlockSize, - ReleaseRecorderT &Recorder, DecompactPtrT DecompactPtr, - SkipRegionT SkipRegion) { - PageReleaseContext Context(BlockSize, /*ReleaseSize=*/RegionSize, RegionSize, - NumberOfRegions); - Context.markFreeBlocks(FreeList, DecompactPtr, Recorder.getBase()); - releaseFreeMemoryToOS(Context, Recorder, SkipRegion); -} - } // namespace scudo #endif // SCUDO_RELEASE_H_ diff --git a/standalone/report.cpp b/standalone/report.cpp index 16eae8c3136..9cef0adc0bb 100644 --- a/standalone/report.cpp +++ b/standalone/report.cpp @@ -21,14 +21,10 @@ public: void append(const char *Format, ...) { va_list Args; va_start(Args, Format); - Message.append(Format, Args); + Message.vappend(Format, Args); va_end(Args); } - NORETURN ~ScopedErrorReport() { - outputRaw(Message.data()); - setAbortMessage(Message.data()); - die(); - } + NORETURN ~ScopedErrorReport() { reportRawError(Message.data()); } private: ScopedString Message; @@ -36,18 +32,6 @@ private: inline void NORETURN trap() { __builtin_trap(); } -void NORETURN reportSoftRSSLimit(uptr RssLimitMb) { - ScopedErrorReport Report; - Report.append("Soft RSS limit of %zu MB exhausted, current RSS is %zu MB\n", - RssLimitMb, GetRSS() >> 20); -} - -void NORETURN reportHardRSSLimit(uptr RssLimitMb) { - ScopedErrorReport Report; - Report.append("Hard RSS limit of %zu MB exhausted, current RSS is %zu MB\n", - RssLimitMb, GetRSS() >> 20); -} - // This could potentially be called recursively if a CHECK fails in the reports. void NORETURN reportCheckFailed(const char *File, int Line, const char *Condition, u64 Value1, u64 Value2) { @@ -67,6 +51,13 @@ void NORETURN reportError(const char *Message) { Report.append("%s\n", Message); } +// Generic fatal error message without ScopedString. +void NORETURN reportRawError(const char *Message) { + outputRaw(Message); + setAbortMessage(Message); + die(); +} + void NORETURN reportInvalidFlag(const char *FlagType, const char *Value) { ScopedErrorReport Report; Report.append("invalid value for %s option: '%s'\n", FlagType, Value); @@ -79,14 +70,6 @@ void NORETURN reportHeaderCorruption(void *Ptr) { Report.append("corrupted chunk header at address %p\n", Ptr); } -// Two threads have attempted to modify a chunk header at the same time. This is -// symptomatic of a race-condition in the application code, or general lack of -// proper locking. -void NORETURN reportHeaderRace(void *Ptr) { - ScopedErrorReport Report; - Report.append("race on chunk header at address %p\n", Ptr); -} - // The allocator was compiled with parameters that conflict with field size // requirements. void NORETURN reportSanityCheckError(const char *Field) { diff --git a/standalone/report.h b/standalone/report.h index 3a78ab64b13..a510fdaebb6 100644 --- a/standalone/report.h +++ b/standalone/report.h @@ -15,15 +15,17 @@ namespace scudo { // Reports are *fatal* unless stated otherwise. -// Generic error. +// Generic error, adds newline to end of message. void NORETURN reportError(const char *Message); +// Generic error, but the message is not modified. +void NORETURN reportRawError(const char *Message); + // Flags related errors. void NORETURN reportInvalidFlag(const char *FlagType, const char *Value); // Chunk header related errors. void NORETURN reportHeaderCorruption(void *Ptr); -void NORETURN reportHeaderRace(void *Ptr); // Sanity checks related error. void NORETURN reportSanityCheckError(const char *Field); @@ -34,8 +36,6 @@ void NORETURN reportAllocationSizeTooBig(uptr UserSize, uptr TotalSize, uptr MaxSize); void NORETURN reportOutOfBatchClass(); void NORETURN reportOutOfMemory(uptr RequestedSize); -void NORETURN reportSoftRSSLimit(uptr RssLimitMb); -void NORETURN reportHardRSSLimit(uptr RssLimitMb); enum class AllocatorAction : u8 { Recycling, Deallocating, diff --git a/standalone/report_linux.cpp b/standalone/report_linux.cpp new file mode 100644 index 00000000000..432f6a01696 --- /dev/null +++ b/standalone/report_linux.cpp @@ -0,0 +1,55 @@ +//===-- report_linux.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "platform.h" + +#if SCUDO_LINUX || SCUDO_TRUSTY + +#include "common.h" +#include "internal_defs.h" +#include "report.h" +#include "report_linux.h" +#include "string_utils.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +namespace scudo { + +// Fatal internal map() error (potentially OOM related). +void NORETURN reportMapError(uptr SizeIfOOM) { + ScopedString Error; + Error.append("Scudo ERROR: internal map failure (error desc=%s)", + strerror(errno)); + if (SizeIfOOM) + Error.append(" requesting %zuKB", SizeIfOOM >> 10); + Error.append("\n"); + reportRawError(Error.data()); +} + +void NORETURN reportUnmapError(uptr Addr, uptr Size) { + ScopedString Error; + Error.append("Scudo ERROR: internal unmap failure (error desc=%s) Addr 0x%zx " + "Size %zu\n", + strerror(errno), Addr, Size); + reportRawError(Error.data()); +} + +void NORETURN reportProtectError(uptr Addr, uptr Size, int Prot) { + ScopedString Error; + Error.append( + "Scudo ERROR: internal protect failure (error desc=%s) Addr 0x%zx " + "Size %zu Prot %x\n", + strerror(errno), Addr, Size, Prot); + reportRawError(Error.data()); +} + +} // namespace scudo + +#endif // SCUDO_LINUX || SCUDO_TRUSTY diff --git a/standalone/report_linux.h b/standalone/report_linux.h new file mode 100644 index 00000000000..aa0bb247e67 --- /dev/null +++ b/standalone/report_linux.h @@ -0,0 +1,34 @@ +//===-- report_linux.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_REPORT_LINUX_H_ +#define SCUDO_REPORT_LINUX_H_ + +#include "platform.h" + +#if SCUDO_LINUX || SCUDO_TRUSTY + +#include "internal_defs.h" + +namespace scudo { + +// Report a fatal error when a map call fails. SizeIfOOM shall +// hold the requested size on an out-of-memory error, 0 otherwise. +void NORETURN reportMapError(uptr SizeIfOOM = 0); + +// Report a fatal error when an unmap call fails. +void NORETURN reportUnmapError(uptr Addr, uptr Size); + +// Report a fatal error when a mprotect call fails. +void NORETURN reportProtectError(uptr Addr, uptr Size, int Prot); + +} // namespace scudo + +#endif // SCUDO_LINUX || SCUDO_TRUSTY + +#endif // SCUDO_REPORT_LINUX_H_ diff --git a/standalone/rss_limit_checker.cpp b/standalone/rss_limit_checker.cpp deleted file mode 100644 index f428386b755..00000000000 --- a/standalone/rss_limit_checker.cpp +++ /dev/null @@ -1,37 +0,0 @@ -//===-- common.cpp ----------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "rss_limit_checker.h" -#include "atomic_helpers.h" -#include "string_utils.h" - -namespace scudo { - -void RssLimitChecker::check(u64 NextCheck) { - // The interval for the checks is 250ms. - static constexpr u64 CheckInterval = 250 * 1000000; - - // Early return in case another thread already did the calculation. - if (!atomic_compare_exchange_strong(&RssNextCheckAtNS, &NextCheck, - getMonotonicTime() + CheckInterval, - memory_order_relaxed)) { - return; - } - - const uptr CurrentRssMb = GetRSS() >> 20; - - RssLimitExceeded Result = RssLimitExceeded::Neither; - if (UNLIKELY(HardRssLimitMb && HardRssLimitMb < CurrentRssMb)) - Result = RssLimitExceeded::Hard; - else if (UNLIKELY(SoftRssLimitMb && SoftRssLimitMb < CurrentRssMb)) - Result = RssLimitExceeded::Soft; - - atomic_store_relaxed(&RssLimitStatus, static_cast<u8>(Result)); -} - -} // namespace scudo diff --git a/standalone/rss_limit_checker.h b/standalone/rss_limit_checker.h deleted file mode 100644 index 29dc063f3fc..00000000000 --- a/standalone/rss_limit_checker.h +++ /dev/null @@ -1,63 +0,0 @@ -//===-- common.h ------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef SCUDO_RSS_LIMIT_CHECKER_H_ -#define SCUDO_RSS_LIMIT_CHECKER_H_ - -#include "atomic_helpers.h" -#include "common.h" -#include "internal_defs.h" - -namespace scudo { - -class RssLimitChecker { -public: - enum RssLimitExceeded { - Neither, - Soft, - Hard, - }; - - void init(int SoftRssLimitMb, int HardRssLimitMb) { - CHECK_GE(SoftRssLimitMb, 0); - CHECK_GE(HardRssLimitMb, 0); - this->SoftRssLimitMb = static_cast<uptr>(SoftRssLimitMb); - this->HardRssLimitMb = static_cast<uptr>(HardRssLimitMb); - } - - // Opportunistic RSS limit check. This will update the RSS limit status, if - // it can, every 250ms, otherwise it will just return the current one. - RssLimitExceeded getRssLimitExceeded() { - if (!HardRssLimitMb && !SoftRssLimitMb) - return RssLimitExceeded::Neither; - - u64 NextCheck = atomic_load_relaxed(&RssNextCheckAtNS); - u64 Now = getMonotonicTime(); - - if (UNLIKELY(Now >= NextCheck)) - check(NextCheck); - - return static_cast<RssLimitExceeded>(atomic_load_relaxed(&RssLimitStatus)); - } - - uptr getSoftRssLimit() const { return SoftRssLimitMb; } - uptr getHardRssLimit() const { return HardRssLimitMb; } - -private: - void check(u64 NextCheck); - - uptr SoftRssLimitMb = 0; - uptr HardRssLimitMb = 0; - - atomic_u64 RssNextCheckAtNS = {}; - atomic_u8 RssLimitStatus = {}; -}; - -} // namespace scudo - -#endif // SCUDO_RSS_LIMIT_CHECKER_H_ diff --git a/standalone/secondary.h b/standalone/secondary.h index b3128877222..d8c9f5bcfca 100644 --- a/standalone/secondary.h +++ b/standalone/secondary.h @@ -12,6 +12,7 @@ #include "chunk.h" #include "common.h" #include "list.h" +#include "mem_map.h" #include "memtag.h" #include "mutex.h" #include "options.h" @@ -37,9 +38,7 @@ struct alignas(Max<uptr>(archSupportsMemoryTagging() LargeBlock::Header *Next; uptr CommitBase; uptr CommitSize; - uptr MapBase; - uptr MapSize; - [[no_unique_address]] MapPlatformData Data; + MemMapT MemMap; }; static_assert(sizeof(Header) % (1U << SCUDO_MIN_ALIGNMENT_LOG) == 0, ""); @@ -65,16 +64,34 @@ template <typename Config> static Header *getHeader(const void *Ptr) { } // namespace LargeBlock -static void unmap(LargeBlock::Header *H) { - MapPlatformData Data = H->Data; - unmap(reinterpret_cast<void *>(H->MapBase), H->MapSize, UNMAP_ALL, &Data); +static inline void unmap(LargeBlock::Header *H) { + // Note that the `H->MapMap` is stored on the pages managed by itself. Take + // over the ownership before unmap() so that any operation along with unmap() + // won't touch inaccessible pages. + MemMapT MemMap = H->MemMap; + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); } -class MapAllocatorNoCache { +namespace { +struct CachedBlock { + uptr CommitBase = 0; + uptr CommitSize = 0; + uptr BlockBegin = 0; + MemMapT MemMap = {}; + u64 Time = 0; + + bool isValid() { return CommitBase != 0; } + + void invalidate() { CommitBase = 0; } +}; +} // namespace + +template <typename Config> class MapAllocatorNoCache { public: void init(UNUSED s32 ReleaseToOsInterval) {} bool retrieve(UNUSED Options Options, UNUSED uptr Size, UNUSED uptr Alignment, - UNUSED LargeBlock::Header **H, UNUSED bool *Zeroed) { + UNUSED uptr HeadersSize, UNUSED LargeBlock::Header **H, + UNUSED bool *Zeroed) { return false; } void store(UNUSED Options Options, LargeBlock::Header *H) { unmap(H); } @@ -91,26 +108,53 @@ public: // Not supported by the Secondary Cache, but not an error either. return true; } + + void getStats(UNUSED ScopedString *Str) { + Str->append("Secondary Cache Disabled\n"); + } }; static const uptr MaxUnusedCachePages = 4U; template <typename Config> -void mapSecondary(Options Options, uptr CommitBase, uptr CommitSize, - uptr AllocPos, uptr Flags, MapPlatformData *Data) { - const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * getPageSizeCached(); +bool mapSecondary(const Options &Options, uptr CommitBase, uptr CommitSize, + uptr AllocPos, uptr Flags, MemMapT &MemMap) { + Flags |= MAP_RESIZABLE; + Flags |= MAP_ALLOWNOMEM; + + const uptr PageSize = getPageSizeCached(); + if (SCUDO_TRUSTY) { + /* + * On Trusty we need AllocPos to be usable for shared memory, which cannot + * cross multiple mappings. This means we need to split around AllocPos + * and not over it. We can only do this if the address is page-aligned. + */ + const uptr TaggedSize = AllocPos - CommitBase; + if (useMemoryTagging<Config>(Options) && isAligned(TaggedSize, PageSize)) { + DCHECK_GT(TaggedSize, 0); + return MemMap.remap(CommitBase, TaggedSize, "scudo:secondary", + MAP_MEMTAG | Flags) && + MemMap.remap(AllocPos, CommitSize - TaggedSize, "scudo:secondary", + Flags); + } else { + const uptr RemapFlags = + (useMemoryTagging<Config>(Options) ? MAP_MEMTAG : 0) | Flags; + return MemMap.remap(CommitBase, CommitSize, "scudo:secondary", + RemapFlags); + } + } + + const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize; if (useMemoryTagging<Config>(Options) && CommitSize > MaxUnusedCacheBytes) { const uptr UntaggedPos = Max(AllocPos, CommitBase + MaxUnusedCacheBytes); - map(reinterpret_cast<void *>(CommitBase), UntaggedPos - CommitBase, - "scudo:secondary", MAP_RESIZABLE | MAP_MEMTAG | Flags, Data); - map(reinterpret_cast<void *>(UntaggedPos), - CommitBase + CommitSize - UntaggedPos, "scudo:secondary", - MAP_RESIZABLE | Flags, Data); + return MemMap.remap(CommitBase, UntaggedPos - CommitBase, "scudo:secondary", + MAP_MEMTAG | Flags) && + MemMap.remap(UntaggedPos, CommitBase + CommitSize - UntaggedPos, + "scudo:secondary", Flags); } else { - map(reinterpret_cast<void *>(CommitBase), CommitSize, "scudo:secondary", - MAP_RESIZABLE | (useMemoryTagging<Config>(Options) ? MAP_MEMTAG : 0) | - Flags, - Data); + const uptr RemapFlags = + (useMemoryTagging<Config>(Options) ? MAP_MEMTAG : 0) | Flags; + return MemMap.remap(CommitBase, CommitSize, "scudo:secondary", RemapFlags); } } @@ -129,36 +173,62 @@ public: template <typename Config> class MapAllocatorCache { public: + void getStats(ScopedString *Str) { + ScopedLock L(Mutex); + uptr Integral; + uptr Fractional; + computePercentage(SuccessfulRetrieves, CallsToRetrieve, &Integral, + &Fractional); + const s32 Interval = atomic_load_relaxed(&ReleaseToOsIntervalMs); + Str->append( + "Stats: MapAllocatorCache: EntriesCount: %d, " + "MaxEntriesCount: %u, MaxEntrySize: %zu, ReleaseToOsIntervalMs = %d\n", + EntriesCount, atomic_load_relaxed(&MaxEntriesCount), + atomic_load_relaxed(&MaxEntrySize), Interval >= 0 ? Interval : -1); + Str->append("Stats: CacheRetrievalStats: SuccessRate: %u/%u " + "(%zu.%02zu%%)\n", + SuccessfulRetrieves, CallsToRetrieve, Integral, Fractional); + for (CachedBlock Entry : Entries) { + if (!Entry.isValid()) + continue; + Str->append("StartBlockAddress: 0x%zx, EndBlockAddress: 0x%zx, " + "BlockSize: %zu %s\n", + Entry.CommitBase, Entry.CommitBase + Entry.CommitSize, + Entry.CommitSize, Entry.Time == 0 ? "[R]" : ""); + } + } + // Ensure the default maximum specified fits the array. - static_assert(Config::SecondaryCacheDefaultMaxEntriesCount <= - Config::SecondaryCacheEntriesArraySize, + static_assert(Config::getDefaultMaxEntriesCount() <= + Config::getEntriesArraySize(), ""); void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { DCHECK_EQ(EntriesCount, 0U); setOption(Option::MaxCacheEntriesCount, - static_cast<sptr>(Config::SecondaryCacheDefaultMaxEntriesCount)); + static_cast<sptr>(Config::getDefaultMaxEntriesCount())); setOption(Option::MaxCacheEntrySize, - static_cast<sptr>(Config::SecondaryCacheDefaultMaxEntrySize)); + static_cast<sptr>(Config::getDefaultMaxEntrySize())); + // The default value in the cache config has the higher priority. + if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN) + ReleaseToOsInterval = Config::getDefaultReleaseToOsIntervalMs(); setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); } - void store(Options Options, LargeBlock::Header *H) EXCLUDES(Mutex) { + void store(const Options &Options, LargeBlock::Header *H) EXCLUDES(Mutex) { if (!canCache(H->CommitSize)) return unmap(H); bool EntryCached = false; bool EmptyCache = false; const s32 Interval = atomic_load_relaxed(&ReleaseToOsIntervalMs); - const u64 Time = getMonotonicTime(); + const u64 Time = getMonotonicTimeFast(); const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); CachedBlock Entry; Entry.CommitBase = H->CommitBase; Entry.CommitSize = H->CommitSize; - Entry.MapBase = H->MapBase; - Entry.MapSize = H->MapSize; Entry.BlockBegin = reinterpret_cast<uptr>(H + 1); - Entry.Data = H->Data; + Entry.MemMap = H->MemMap; Entry.Time = Time; if (useMemoryTagging<Config>(Options)) { if (Interval == 0 && !SCUDO_FUCHSIA) { @@ -168,13 +238,13 @@ public: // on top so we just do the two syscalls there. Entry.Time = 0; mapSecondary<Config>(Options, Entry.CommitBase, Entry.CommitSize, - Entry.CommitBase, MAP_NOACCESS, &Entry.Data); + Entry.CommitBase, MAP_NOACCESS, Entry.MemMap); } else { - setMemoryPermission(Entry.CommitBase, Entry.CommitSize, MAP_NOACCESS, - &Entry.Data); + Entry.MemMap.setMemoryPermission(Entry.CommitBase, Entry.CommitSize, + MAP_NOACCESS); } } else if (Interval == 0) { - releasePagesToOS(Entry.CommitBase, 0, Entry.CommitSize, &Entry.Data); + Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, Entry.CommitSize); Entry.Time = 0; } do { @@ -186,11 +256,10 @@ public: // just unmap it. break; } - if (Config::SecondaryCacheQuarantineSize && - useMemoryTagging<Config>(Options)) { + if (Config::getQuarantineSize() && useMemoryTagging<Config>(Options)) { QuarantinePos = - (QuarantinePos + 1) % Max(Config::SecondaryCacheQuarantineSize, 1u); - if (!Quarantine[QuarantinePos].CommitBase) { + (QuarantinePos + 1) % Max(Config::getQuarantineSize(), 1u); + if (!Quarantine[QuarantinePos].isValid()) { Quarantine[QuarantinePos] = Entry; return; } @@ -205,7 +274,7 @@ public: EmptyCache = true; } else { for (u32 I = 0; I < MaxCount; I++) { - if (Entries[I].CommitBase) + if (Entries[I].isValid()) continue; if (I != 0) Entries[I] = Entries[0]; @@ -223,30 +292,34 @@ public: else if (Interval >= 0) releaseOlderThan(Time - static_cast<u64>(Interval) * 1000000); if (!EntryCached) - unmap(reinterpret_cast<void *>(Entry.MapBase), Entry.MapSize, UNMAP_ALL, - &Entry.Data); + Entry.MemMap.unmap(Entry.MemMap.getBase(), Entry.MemMap.getCapacity()); } - bool retrieve(Options Options, uptr Size, uptr Alignment, + bool retrieve(Options Options, uptr Size, uptr Alignment, uptr HeadersSize, LargeBlock::Header **H, bool *Zeroed) EXCLUDES(Mutex) { const uptr PageSize = getPageSizeCached(); const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); + // 10% of the requested size proved to be the optimal choice for + // retrieving cached blocks after testing several options. + constexpr u32 FragmentedBytesDivisor = 10; bool Found = false; CachedBlock Entry; - uptr HeaderPos = 0; + uptr EntryHeaderPos = 0; { ScopedLock L(Mutex); + CallsToRetrieve++; if (EntriesCount == 0) return false; + u32 OptimalFitIndex = 0; + uptr MinDiff = UINTPTR_MAX; for (u32 I = 0; I < MaxCount; I++) { - const uptr CommitBase = Entries[I].CommitBase; - if (!CommitBase) + if (!Entries[I].isValid()) continue; + const uptr CommitBase = Entries[I].CommitBase; const uptr CommitSize = Entries[I].CommitSize; const uptr AllocPos = roundDown(CommitBase + CommitSize - Size, Alignment); - HeaderPos = - AllocPos - Chunk::getHeaderSize() - LargeBlock::getHeaderSize(); + const uptr HeaderPos = AllocPos - HeadersSize; if (HeaderPos > CommitBase + CommitSize) continue; if (HeaderPos < CommitBase || @@ -254,38 +327,54 @@ public: continue; } Found = true; - Entry = Entries[I]; - Entries[I].CommitBase = 0; - break; + const uptr Diff = HeaderPos - CommitBase; + // immediately use a cached block if it's size is close enough to the + // requested size. + const uptr MaxAllowedFragmentedBytes = + (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor; + if (Diff <= MaxAllowedFragmentedBytes) { + OptimalFitIndex = I; + EntryHeaderPos = HeaderPos; + break; + } + // keep track of the smallest cached block + // that is greater than (AllocSize + HeaderSize) + if (Diff > MinDiff) + continue; + OptimalFitIndex = I; + MinDiff = Diff; + EntryHeaderPos = HeaderPos; } - } - if (Found) { - *H = reinterpret_cast<LargeBlock::Header *>( - LargeBlock::addHeaderTag<Config>(HeaderPos)); - *Zeroed = Entry.Time == 0; - if (useMemoryTagging<Config>(Options)) - setMemoryPermission(Entry.CommitBase, Entry.CommitSize, 0, &Entry.Data); - uptr NewBlockBegin = reinterpret_cast<uptr>(*H + 1); - if (useMemoryTagging<Config>(Options)) { - if (*Zeroed) - storeTags(LargeBlock::addHeaderTag<Config>(Entry.CommitBase), - NewBlockBegin); - else if (Entry.BlockBegin < NewBlockBegin) - storeTags(Entry.BlockBegin, NewBlockBegin); - else - storeTags(untagPointer(NewBlockBegin), - untagPointer(Entry.BlockBegin)); + if (Found) { + Entry = Entries[OptimalFitIndex]; + Entries[OptimalFitIndex].invalidate(); + EntriesCount--; + SuccessfulRetrieves++; } - (*H)->CommitBase = Entry.CommitBase; - (*H)->CommitSize = Entry.CommitSize; - (*H)->MapBase = Entry.MapBase; - (*H)->MapSize = Entry.MapSize; - (*H)->Data = Entry.Data; + } + if (!Found) + return false; - ScopedLock L(Mutex); - EntriesCount--; + *H = reinterpret_cast<LargeBlock::Header *>( + LargeBlock::addHeaderTag<Config>(EntryHeaderPos)); + *Zeroed = Entry.Time == 0; + if (useMemoryTagging<Config>(Options)) + Entry.MemMap.setMemoryPermission(Entry.CommitBase, Entry.CommitSize, 0); + uptr NewBlockBegin = reinterpret_cast<uptr>(*H + 1); + if (useMemoryTagging<Config>(Options)) { + if (*Zeroed) { + storeTags(LargeBlock::addHeaderTag<Config>(Entry.CommitBase), + NewBlockBegin); + } else if (Entry.BlockBegin < NewBlockBegin) { + storeTags(Entry.BlockBegin, NewBlockBegin); + } else { + storeTags(untagPointer(NewBlockBegin), untagPointer(Entry.BlockBegin)); + } } - return Found; + (*H)->CommitBase = Entry.CommitBase; + (*H)->CommitSize = Entry.CommitSize; + (*H)->MemMap = Entry.MemMap; + return true; } bool canCache(uptr Size) { @@ -295,16 +384,15 @@ public: bool setOption(Option O, sptr Value) { if (O == Option::ReleaseInterval) { - const s32 Interval = - Max(Min(static_cast<s32>(Value), - Config::SecondaryCacheMaxReleaseToOsIntervalMs), - Config::SecondaryCacheMinReleaseToOsIntervalMs); + const s32 Interval = Max( + Min(static_cast<s32>(Value), Config::getMaxReleaseToOsIntervalMs()), + Config::getMinReleaseToOsIntervalMs()); atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); return true; } if (O == Option::MaxCacheEntriesCount) { const u32 MaxCount = static_cast<u32>(Value); - if (MaxCount > Config::SecondaryCacheEntriesArraySize) + if (MaxCount > Config::getEntriesArraySize()) return false; atomic_store_relaxed(&MaxEntriesCount, MaxCount); return true; @@ -321,18 +409,20 @@ public: void disableMemoryTagging() EXCLUDES(Mutex) { ScopedLock L(Mutex); - for (u32 I = 0; I != Config::SecondaryCacheQuarantineSize; ++I) { - if (Quarantine[I].CommitBase) { - unmap(reinterpret_cast<void *>(Quarantine[I].MapBase), - Quarantine[I].MapSize, UNMAP_ALL, &Quarantine[I].Data); - Quarantine[I].CommitBase = 0; + for (u32 I = 0; I != Config::getQuarantineSize(); ++I) { + if (Quarantine[I].isValid()) { + MemMapT &MemMap = Quarantine[I].MemMap; + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + Quarantine[I].invalidate(); } } const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); - for (u32 I = 0; I < MaxCount; I++) - if (Entries[I].CommitBase) - setMemoryPermission(Entries[I].CommitBase, Entries[I].CommitSize, 0, - &Entries[I].Data); + for (u32 I = 0; I < MaxCount; I++) { + if (Entries[I].isValid()) { + Entries[I].MemMap.setMemoryPermission(Entries[I].CommitBase, + Entries[I].CommitSize, 0); + } + } QuarantinePos = -1U; } @@ -344,50 +434,35 @@ public: private: void empty() { - struct { - void *MapBase; - uptr MapSize; - MapPlatformData Data; - } MapInfo[Config::SecondaryCacheEntriesArraySize]; + MemMapT MapInfo[Config::getEntriesArraySize()]; uptr N = 0; { ScopedLock L(Mutex); - for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++) { - if (!Entries[I].CommitBase) + for (uptr I = 0; I < Config::getEntriesArraySize(); I++) { + if (!Entries[I].isValid()) continue; - MapInfo[N].MapBase = reinterpret_cast<void *>(Entries[I].MapBase); - MapInfo[N].MapSize = Entries[I].MapSize; - MapInfo[N].Data = Entries[I].Data; - Entries[I].CommitBase = 0; + MapInfo[N] = Entries[I].MemMap; + Entries[I].invalidate(); N++; } EntriesCount = 0; IsFullEvents = 0; } - for (uptr I = 0; I < N; I++) - unmap(MapInfo[I].MapBase, MapInfo[I].MapSize, UNMAP_ALL, - &MapInfo[I].Data); + for (uptr I = 0; I < N; I++) { + MemMapT &MemMap = MapInfo[I]; + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + } } - struct CachedBlock { - uptr CommitBase; - uptr CommitSize; - uptr MapBase; - uptr MapSize; - uptr BlockBegin; - [[no_unique_address]] MapPlatformData Data; - u64 Time; - }; - void releaseIfOlderThan(CachedBlock &Entry, u64 Time) REQUIRES(Mutex) { - if (!Entry.CommitBase || !Entry.Time) + if (!Entry.isValid() || !Entry.Time) return; if (Entry.Time > Time) { if (OldestTime == 0 || Entry.Time < OldestTime) OldestTime = Entry.Time; return; } - releasePagesToOS(Entry.CommitBase, 0, Entry.CommitSize, &Entry.Data); + Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, Entry.CommitSize); Entry.Time = 0; } @@ -396,9 +471,9 @@ private: if (!EntriesCount || OldestTime == 0 || OldestTime > Time) return; OldestTime = 0; - for (uptr I = 0; I < Config::SecondaryCacheQuarantineSize; I++) + for (uptr I = 0; I < Config::getQuarantineSize(); I++) releaseIfOlderThan(Quarantine[I], Time); - for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++) + for (uptr I = 0; I < Config::getEntriesArraySize(); I++) releaseIfOlderThan(Entries[I], Time); } @@ -410,10 +485,11 @@ private: u64 OldestTime GUARDED_BY(Mutex) = 0; u32 IsFullEvents GUARDED_BY(Mutex) = 0; atomic_s32 ReleaseToOsIntervalMs = {}; + u32 CallsToRetrieve GUARDED_BY(Mutex) = 0; + u32 SuccessfulRetrieves GUARDED_BY(Mutex) = 0; - CachedBlock - Entries[Config::SecondaryCacheEntriesArraySize] GUARDED_BY(Mutex) = {}; - NonZeroLengthArray<CachedBlock, Config::SecondaryCacheQuarantineSize> + CachedBlock Entries[Config::getEntriesArraySize()] GUARDED_BY(Mutex) = {}; + NonZeroLengthArray<CachedBlock, Config::getQuarantineSize()> Quarantine GUARDED_BY(Mutex) = {}; }; @@ -429,11 +505,11 @@ public: S->link(&Stats); } - void *allocate(Options Options, uptr Size, uptr AlignmentHint = 0, + void *allocate(const Options &Options, uptr Size, uptr AlignmentHint = 0, uptr *BlockEnd = nullptr, FillContentsMode FillContents = NoFill); - void deallocate(Options Options, void *Ptr); + void deallocate(const Options &Options, void *Ptr); static uptr getBlockEnd(void *Ptr) { auto *B = LargeBlock::getHeader<Config>(Ptr); @@ -444,7 +520,9 @@ public: return getBlockEnd(Ptr) - reinterpret_cast<uptr>(Ptr); } - void getStats(ScopedString *Str); + static constexpr uptr getHeadersSize() { + return Chunk::getHeaderSize() + LargeBlock::getHeaderSize(); + } void disable() NO_THREAD_SAFETY_ANALYSIS { Mutex.lock(); @@ -477,13 +555,16 @@ public: void unmapTestOnly() { Cache.unmapTestOnly(); } + void getStats(ScopedString *Str); + private: - typename Config::SecondaryCache Cache; + typename Config::template CacheT<typename Config::CacheConfig> Cache; mutable HybridMutex Mutex; DoublyLinkedList<LargeBlock::Header> InUseBlocks GUARDED_BY(Mutex); uptr AllocatedBytes GUARDED_BY(Mutex) = 0; uptr FreedBytes GUARDED_BY(Mutex) = 0; + uptr FragmentedBytes GUARDED_BY(Mutex) = 0; uptr LargestSize GUARDED_BY(Mutex) = 0; u32 NumberOfAllocs GUARDED_BY(Mutex) = 0; u32 NumberOfFrees GUARDED_BY(Mutex) = 0; @@ -502,24 +583,23 @@ private: // the committed memory will amount to something close to Size - AlignmentHint // (pending rounding and headers). template <typename Config> -void *MapAllocator<Config>::allocate(Options Options, uptr Size, uptr Alignment, - uptr *BlockEndPtr, +void *MapAllocator<Config>::allocate(const Options &Options, uptr Size, + uptr Alignment, uptr *BlockEndPtr, FillContentsMode FillContents) { if (Options.get(OptionBit::AddLargeAllocationSlack)) Size += 1UL << SCUDO_MIN_ALIGNMENT_LOG; Alignment = Max(Alignment, uptr(1U) << SCUDO_MIN_ALIGNMENT_LOG); const uptr PageSize = getPageSizeCached(); - uptr RoundedSize = - roundUp(roundUp(Size, Alignment) + LargeBlock::getHeaderSize() + - Chunk::getHeaderSize(), - PageSize); - if (Alignment > PageSize) - RoundedSize += Alignment - PageSize; - if (Alignment < PageSize && Cache.canCache(RoundedSize)) { + // Note that cached blocks may have aligned address already. Thus we simply + // pass the required size (`Size` + `getHeadersSize()`) to do cache look up. + const uptr MinNeededSizeForCache = roundUp(Size + getHeadersSize(), PageSize); + + if (Alignment < PageSize && Cache.canCache(MinNeededSizeForCache)) { LargeBlock::Header *H; bool Zeroed; - if (Cache.retrieve(Options, Size, Alignment, &H, &Zeroed)) { + if (Cache.retrieve(Options, Size, Alignment, getHeadersSize(), &H, + &Zeroed)) { const uptr BlockEnd = H->CommitBase + H->CommitSize; if (BlockEndPtr) *BlockEndPtr = BlockEnd; @@ -531,25 +611,35 @@ void *MapAllocator<Config>::allocate(Options Options, uptr Size, uptr Alignment, if (FillContents && !Zeroed) memset(Ptr, FillContents == ZeroFill ? 0 : PatternFillByte, BlockEnd - PtrInt); - const uptr BlockSize = BlockEnd - HInt; { ScopedLock L(Mutex); InUseBlocks.push_back(H); - AllocatedBytes += BlockSize; + AllocatedBytes += H->CommitSize; + FragmentedBytes += H->MemMap.getCapacity() - H->CommitSize; NumberOfAllocs++; - Stats.add(StatAllocated, BlockSize); - Stats.add(StatMapped, H->MapSize); + Stats.add(StatAllocated, H->CommitSize); + Stats.add(StatMapped, H->MemMap.getCapacity()); } return Ptr; } } - MapPlatformData Data = {}; + uptr RoundedSize = + roundUp(roundUp(Size, Alignment) + getHeadersSize(), PageSize); + if (Alignment > PageSize) + RoundedSize += Alignment - PageSize; + + ReservedMemoryT ReservedMemory; const uptr MapSize = RoundedSize + 2 * PageSize; - uptr MapBase = reinterpret_cast<uptr>( - map(nullptr, MapSize, nullptr, MAP_NOACCESS | MAP_ALLOWNOMEM, &Data)); - if (UNLIKELY(!MapBase)) + if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, MapSize, nullptr, + MAP_ALLOWNOMEM))) { return nullptr; + } + + // Take the entire ownership of reserved region. + MemMapT MemMap = ReservedMemory.dispatch(ReservedMemory.getBase(), + ReservedMemory.getCapacity()); + uptr MapBase = MemMap.getBase(); uptr CommitBase = MapBase + PageSize; uptr MapEnd = MapBase + MapSize; @@ -565,50 +655,52 @@ void *MapAllocator<Config>::allocate(Options Options, uptr Size, uptr Alignment, // We only trim the extra memory on 32-bit platforms: 64-bit platforms // are less constrained memory wise, and that saves us two syscalls. if (SCUDO_WORDSIZE == 32U && NewMapBase != MapBase) { - unmap(reinterpret_cast<void *>(MapBase), NewMapBase - MapBase, 0, &Data); + MemMap.unmap(MapBase, NewMapBase - MapBase); MapBase = NewMapBase; } const uptr NewMapEnd = CommitBase + PageSize + roundUp(Size, PageSize) + PageSize; DCHECK_LE(NewMapEnd, MapEnd); if (SCUDO_WORDSIZE == 32U && NewMapEnd != MapEnd) { - unmap(reinterpret_cast<void *>(NewMapEnd), MapEnd - NewMapEnd, 0, &Data); + MemMap.unmap(NewMapEnd, MapEnd - NewMapEnd); MapEnd = NewMapEnd; } } const uptr CommitSize = MapEnd - PageSize - CommitBase; const uptr AllocPos = roundDown(CommitBase + CommitSize - Size, Alignment); - mapSecondary<Config>(Options, CommitBase, CommitSize, AllocPos, 0, &Data); - const uptr HeaderPos = - AllocPos - Chunk::getHeaderSize() - LargeBlock::getHeaderSize(); + if (!mapSecondary<Config>(Options, CommitBase, CommitSize, AllocPos, 0, + MemMap)) { + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + return nullptr; + } + const uptr HeaderPos = AllocPos - getHeadersSize(); LargeBlock::Header *H = reinterpret_cast<LargeBlock::Header *>( LargeBlock::addHeaderTag<Config>(HeaderPos)); if (useMemoryTagging<Config>(Options)) storeTags(LargeBlock::addHeaderTag<Config>(CommitBase), reinterpret_cast<uptr>(H + 1)); - H->MapBase = MapBase; - H->MapSize = MapEnd - MapBase; H->CommitBase = CommitBase; H->CommitSize = CommitSize; - H->Data = Data; + H->MemMap = MemMap; if (BlockEndPtr) *BlockEndPtr = CommitBase + CommitSize; { ScopedLock L(Mutex); InUseBlocks.push_back(H); AllocatedBytes += CommitSize; + FragmentedBytes += H->MemMap.getCapacity() - CommitSize; if (LargestSize < CommitSize) LargestSize = CommitSize; NumberOfAllocs++; Stats.add(StatAllocated, CommitSize); - Stats.add(StatMapped, H->MapSize); + Stats.add(StatMapped, H->MemMap.getCapacity()); } return reinterpret_cast<void *>(HeaderPos + LargeBlock::getHeaderSize()); } template <typename Config> -void MapAllocator<Config>::deallocate(Options Options, void *Ptr) +void MapAllocator<Config>::deallocate(const Options &Options, void *Ptr) EXCLUDES(Mutex) { LargeBlock::Header *H = LargeBlock::getHeader<Config>(Ptr); const uptr CommitSize = H->CommitSize; @@ -616,9 +708,10 @@ void MapAllocator<Config>::deallocate(Options Options, void *Ptr) ScopedLock L(Mutex); InUseBlocks.remove(H); FreedBytes += CommitSize; + FragmentedBytes -= H->MemMap.getCapacity() - CommitSize; NumberOfFrees++; Stats.sub(StatAllocated, CommitSize); - Stats.sub(StatMapped, H->MapSize); + Stats.sub(StatMapped, H->MemMap.getCapacity()); } Cache.store(Options, H); } @@ -627,10 +720,12 @@ template <typename Config> void MapAllocator<Config>::getStats(ScopedString *Str) EXCLUDES(Mutex) { ScopedLock L(Mutex); Str->append("Stats: MapAllocator: allocated %u times (%zuK), freed %u times " - "(%zuK), remains %u (%zuK) max %zuM\n", + "(%zuK), remains %u (%zuK) max %zuM, Fragmented %zuK\n", NumberOfAllocs, AllocatedBytes >> 10, NumberOfFrees, FreedBytes >> 10, NumberOfAllocs - NumberOfFrees, - (AllocatedBytes - FreedBytes) >> 10, LargestSize >> 20); + (AllocatedBytes - FreedBytes) >> 10, LargestSize >> 20, + FragmentedBytes >> 10); + Cache.getStats(Str); } } // namespace scudo diff --git a/standalone/size_class_map.h b/standalone/size_class_map.h index 766562495ec..4138885de33 100644 --- a/standalone/size_class_map.h +++ b/standalone/size_class_map.h @@ -254,7 +254,7 @@ struct AndroidSizeClassConfig { static const u16 MaxNumCachedHint = 13; static const uptr MaxBytesCachedLog = 13; - static constexpr u32 Classes[] = { + static constexpr uptr Classes[] = { 0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00090, 0x000b0, 0x000c0, 0x000e0, 0x00120, 0x00160, 0x001c0, 0x00250, 0x00320, 0x00450, 0x00670, 0x00830, 0x00a10, 0x00c30, 0x01010, 0x01210, 0x01bd0, 0x02210, @@ -269,7 +269,7 @@ struct AndroidSizeClassConfig { static const u16 MaxNumCachedHint = 14; static const uptr MaxBytesCachedLog = 13; - static constexpr u32 Classes[] = { + static constexpr uptr Classes[] = { 0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00080, 0x00090, 0x000a0, 0x000b0, 0x000c0, 0x000e0, 0x000f0, 0x00110, 0x00120, 0x00130, 0x00150, 0x00160, 0x00170, 0x00190, 0x001d0, 0x00210, 0x00240, 0x002a0, @@ -289,35 +289,11 @@ typedef TableSizeClassMap<AndroidSizeClassConfig> AndroidSizeClassMap; static_assert(AndroidSizeClassMap::usesCompressedLSBFormat(), ""); #endif -struct SvelteSizeClassConfig { -#if SCUDO_WORDSIZE == 64U - static const uptr NumBits = 4; - static const uptr MinSizeLog = 4; - static const uptr MidSizeLog = 8; - static const uptr MaxSizeLog = 14; - static const u16 MaxNumCachedHint = 13; - static const uptr MaxBytesCachedLog = 10; - static const uptr SizeDelta = Chunk::getHeaderSize(); -#else - static const uptr NumBits = 4; - static const uptr MinSizeLog = 3; - static const uptr MidSizeLog = 7; - static const uptr MaxSizeLog = 14; - static const u16 MaxNumCachedHint = 14; - static const uptr MaxBytesCachedLog = 10; - static const uptr SizeDelta = Chunk::getHeaderSize(); -#endif -}; - -typedef FixedSizeClassMap<SvelteSizeClassConfig> SvelteSizeClassMap; - -// Trusty is configured to only have one region containing blocks of size -// 2^7 bytes. struct TrustySizeClassConfig { static const uptr NumBits = 1; - static const uptr MinSizeLog = 7; - static const uptr MidSizeLog = 7; - static const uptr MaxSizeLog = 7; + static const uptr MinSizeLog = 5; + static const uptr MidSizeLog = 5; + static const uptr MaxSizeLog = 15; static const u16 MaxNumCachedHint = 12; static const uptr MaxBytesCachedLog = 10; static const uptr SizeDelta = 0; diff --git a/standalone/stack_depot.h b/standalone/stack_depot.h index 458198fcb7a..0176c40aa89 100644 --- a/standalone/stack_depot.h +++ b/standalone/stack_depot.h @@ -10,6 +10,7 @@ #define SCUDO_STACK_DEPOT_H_ #include "atomic_helpers.h" +#include "common.h" #include "mutex.h" namespace scudo { @@ -38,7 +39,7 @@ public: } }; -class StackDepot { +class alignas(atomic_u64) StackDepot { HybridMutex RingEndMu; u32 RingEnd = 0; @@ -62,29 +63,77 @@ class StackDepot { // This is achieved by re-checking the hash of the stack trace before // returning the trace. -#ifdef SCUDO_FUZZ - // Use smaller table sizes for fuzzing in order to reduce input size. - static const uptr TabBits = 4; -#else - static const uptr TabBits = 16; -#endif - static const uptr TabSize = 1 << TabBits; - static const uptr TabMask = TabSize - 1; - atomic_u32 Tab[TabSize] = {}; - -#ifdef SCUDO_FUZZ - static const uptr RingBits = 4; -#else - static const uptr RingBits = 19; -#endif - static const uptr RingSize = 1 << RingBits; - static const uptr RingMask = RingSize - 1; - atomic_u64 Ring[RingSize] = {}; + u32 RingSize = 0; + u32 RingMask = 0; + u32 TabMask = 0; + // This is immediately followed by RingSize atomic_u64 and + // (TabMask + 1) atomic_u32. + + atomic_u64 *getRing() { + return reinterpret_cast<atomic_u64 *>(reinterpret_cast<char *>(this) + + sizeof(StackDepot)); + } + + atomic_u32 *getTab() { + return reinterpret_cast<atomic_u32 *>(reinterpret_cast<char *>(this) + + sizeof(StackDepot) + + sizeof(atomic_u64) * RingSize); + } + + const atomic_u64 *getRing() const { + return reinterpret_cast<const atomic_u64 *>( + reinterpret_cast<const char *>(this) + sizeof(StackDepot)); + } + + const atomic_u32 *getTab() const { + return reinterpret_cast<const atomic_u32 *>( + reinterpret_cast<const char *>(this) + sizeof(StackDepot) + + sizeof(atomic_u64) * RingSize); + } public: + void init(u32 RingSz, u32 TabSz) { + DCHECK(isPowerOfTwo(RingSz)); + DCHECK(isPowerOfTwo(TabSz)); + RingSize = RingSz; + RingMask = RingSz - 1; + TabMask = TabSz - 1; + } + + // Ensure that RingSize, RingMask and TabMask are set up in a way that + // all accesses are within range of BufSize. + bool isValid(uptr BufSize) const { + if (!isPowerOfTwo(RingSize)) + return false; + uptr RingBytes = sizeof(atomic_u64) * RingSize; + if (RingMask + 1 != RingSize) + return false; + + if (TabMask == 0) + return false; + uptr TabSize = TabMask + 1; + if (!isPowerOfTwo(TabSize)) + return false; + uptr TabBytes = sizeof(atomic_u32) * TabSize; + + // Subtract and detect underflow. + if (BufSize < sizeof(StackDepot)) + return false; + BufSize -= sizeof(StackDepot); + if (BufSize < TabBytes) + return false; + BufSize -= TabBytes; + if (BufSize < RingBytes) + return false; + return BufSize == RingBytes; + } + // Insert hash of the stack trace [Begin, End) into the stack depot, and // return the hash. u32 insert(uptr *Begin, uptr *End) { + auto *Tab = getTab(); + auto *Ring = getRing(); + MurMur2HashBuilder B; for (uptr *I = Begin; I != End; ++I) B.add(u32(*I) >> 2); @@ -113,6 +162,9 @@ public: // accessed via operator[] passing indexes between *RingPosPtr and // *RingPosPtr + *SizePtr. bool find(u32 Hash, uptr *RingPosPtr, uptr *SizePtr) const { + auto *Tab = getTab(); + auto *Ring = getRing(); + u32 Pos = Hash & TabMask; u32 RingPos = atomic_load_relaxed(&Tab[Pos]); if (RingPos >= RingSize) @@ -134,11 +186,23 @@ public: return B.get() == Hash; } - u64 operator[](uptr RingPos) const { + u64 at(uptr RingPos) const { + auto *Ring = getRing(); return atomic_load_relaxed(&Ring[RingPos & RingMask]); } + + // This is done for the purpose of fork safety in multithreaded programs and + // does not fully disable StackDepot. In particular, find() still works and + // only insert() is blocked. + void disable() NO_THREAD_SAFETY_ANALYSIS { RingEndMu.lock(); } + + void enable() NO_THREAD_SAFETY_ANALYSIS { RingEndMu.unlock(); } }; +// We need StackDepot to be aligned to 8-bytes so the ring we store after +// is correctly assigned. +static_assert(sizeof(StackDepot) % alignof(atomic_u64) == 0); + } // namespace scudo #endif // SCUDO_STACK_DEPOT_H_ diff --git a/standalone/string_utils.cpp b/standalone/string_utils.cpp index 13fdb9c6ca6..e584bd806e5 100644 --- a/standalone/string_utils.cpp +++ b/standalone/string_utils.cpp @@ -14,30 +14,21 @@ namespace scudo { -static int appendChar(char **Buffer, const char *BufferEnd, char C) { - if (*Buffer < BufferEnd) { - **Buffer = C; - (*Buffer)++; - } - return 1; -} - // Appends number in a given Base to buffer. If its length is less than // |MinNumberLength|, it is padded with leading zeroes or spaces, depending // on the value of |PadWithZero|. -static int appendNumber(char **Buffer, const char *BufferEnd, u64 AbsoluteValue, - u8 Base, u8 MinNumberLength, bool PadWithZero, - bool Negative, bool Upper) { +void ScopedString::appendNumber(u64 AbsoluteValue, u8 Base, u8 MinNumberLength, + bool PadWithZero, bool Negative, bool Upper) { constexpr uptr MaxLen = 30; RAW_CHECK(Base == 10 || Base == 16); RAW_CHECK(Base == 10 || !Negative); RAW_CHECK(AbsoluteValue || !Negative); RAW_CHECK(MinNumberLength < MaxLen); - int Res = 0; if (Negative && MinNumberLength) --MinNumberLength; - if (Negative && PadWithZero) - Res += appendChar(Buffer, BufferEnd, '-'); + if (Negative && PadWithZero) { + String.push_back('-'); + } uptr NumBuffer[MaxLen]; int Pos = 0; do { @@ -55,34 +46,32 @@ static int appendNumber(char **Buffer, const char *BufferEnd, u64 AbsoluteValue, Pos--; for (; Pos >= 0 && NumBuffer[Pos] == 0; Pos--) { char c = (PadWithZero || Pos == 0) ? '0' : ' '; - Res += appendChar(Buffer, BufferEnd, c); + String.push_back(c); } if (Negative && !PadWithZero) - Res += appendChar(Buffer, BufferEnd, '-'); + String.push_back('-'); for (; Pos >= 0; Pos--) { char Digit = static_cast<char>(NumBuffer[Pos]); Digit = static_cast<char>((Digit < 10) ? '0' + Digit : (Upper ? 'A' : 'a') + Digit - 10); - Res += appendChar(Buffer, BufferEnd, Digit); + String.push_back(Digit); } - return Res; } -static int appendUnsigned(char **Buffer, const char *BufferEnd, u64 Num, - u8 Base, u8 MinNumberLength, bool PadWithZero, - bool Upper) { - return appendNumber(Buffer, BufferEnd, Num, Base, MinNumberLength, - PadWithZero, /*Negative=*/false, Upper); +void ScopedString::appendUnsigned(u64 Num, u8 Base, u8 MinNumberLength, + bool PadWithZero, bool Upper) { + appendNumber(Num, Base, MinNumberLength, PadWithZero, /*Negative=*/false, + Upper); } -static int appendSignedDecimal(char **Buffer, const char *BufferEnd, s64 Num, - u8 MinNumberLength, bool PadWithZero) { +void ScopedString::appendSignedDecimal(s64 Num, u8 MinNumberLength, + bool PadWithZero) { const bool Negative = (Num < 0); const u64 UnsignedNum = (Num == INT64_MIN) ? static_cast<u64>(INT64_MAX) + 1 : static_cast<u64>(Negative ? -Num : Num); - return appendNumber(Buffer, BufferEnd, UnsignedNum, 10, MinNumberLength, - PadWithZero, Negative, /*Upper=*/false); + appendNumber(UnsignedNum, 10, MinNumberLength, PadWithZero, Negative, + /*Upper=*/false); } // Use the fact that explicitly requesting 0 Width (%0s) results in UB and @@ -90,44 +79,45 @@ static int appendSignedDecimal(char **Buffer, const char *BufferEnd, s64 Num, // Width == 0 - no Width requested // Width < 0 - left-justify S within and pad it to -Width chars, if necessary // Width > 0 - right-justify S, not implemented yet -static int appendString(char **Buffer, const char *BufferEnd, int Width, - int MaxChars, const char *S) { +void ScopedString::appendString(int Width, int MaxChars, const char *S) { if (!S) S = "<null>"; - int Res = 0; + int NumChars = 0; for (; *S; S++) { - if (MaxChars >= 0 && Res >= MaxChars) + if (MaxChars >= 0 && NumChars >= MaxChars) break; - Res += appendChar(Buffer, BufferEnd, *S); + String.push_back(*S); + NumChars++; + } + if (Width < 0) { + // Only left justification supported. + Width = -Width - NumChars; + while (Width-- > 0) + String.push_back(' '); } - // Only the left justified strings are supported. - while (Width < -Res) - Res += appendChar(Buffer, BufferEnd, ' '); - return Res; } -static int appendPointer(char **Buffer, const char *BufferEnd, u64 ptr_value) { - int Res = 0; - Res += appendString(Buffer, BufferEnd, 0, -1, "0x"); - Res += appendUnsigned(Buffer, BufferEnd, ptr_value, 16, - SCUDO_POINTER_FORMAT_LENGTH, /*PadWithZero=*/true, - /*Upper=*/false); - return Res; +void ScopedString::appendPointer(u64 ptr_value) { + appendString(0, -1, "0x"); + appendUnsigned(ptr_value, 16, SCUDO_POINTER_FORMAT_LENGTH, + /*PadWithZero=*/true, + /*Upper=*/false); } -static int formatString(char *Buffer, uptr BufferLength, const char *Format, - va_list Args) { +void ScopedString::vappend(const char *Format, va_list &Args) { + // Since the string contains the '\0' terminator, put our size before it + // so that push_back calls work correctly. + DCHECK(String.size() > 0); + String.resize(String.size() - 1); + static const char *PrintfFormatsHelp = - "Supported formatString formats: %([0-9]*)?(z|ll)?{d,u,x,X}; %p; " + "Supported formats: %([0-9]*)?(z|ll)?{d,u,x,X}; %p; " "%[-]([0-9]*)?(\\.\\*)?s; %c\n"; RAW_CHECK(Format); - RAW_CHECK(BufferLength > 0); - const char *BufferEnd = &Buffer[BufferLength - 1]; const char *Cur = Format; - int Res = 0; for (; *Cur; Cur++) { if (*Cur != '%') { - Res += appendChar(&Buffer, BufferEnd, *Cur); + String.push_back(*Cur); continue; } Cur++; @@ -162,7 +152,7 @@ static int formatString(char *Buffer, uptr BufferLength, const char *Format, DVal = HaveLL ? va_arg(Args, s64) : HaveZ ? va_arg(Args, sptr) : va_arg(Args, int); - Res += appendSignedDecimal(&Buffer, BufferEnd, DVal, Width, PadWithZero); + appendSignedDecimal(DVal, Width, PadWithZero); break; } case 'u': @@ -172,32 +162,50 @@ static int formatString(char *Buffer, uptr BufferLength, const char *Format, : HaveZ ? va_arg(Args, uptr) : va_arg(Args, unsigned); const bool Upper = (*Cur == 'X'); - Res += appendUnsigned(&Buffer, BufferEnd, UVal, (*Cur == 'u') ? 10 : 16, - Width, PadWithZero, Upper); + appendUnsigned(UVal, (*Cur == 'u') ? 10 : 16, Width, PadWithZero, Upper); break; } case 'p': { RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp); - Res += appendPointer(&Buffer, BufferEnd, va_arg(Args, uptr)); + appendPointer(va_arg(Args, uptr)); break; } case 's': { RAW_CHECK_MSG(!HaveLength, PrintfFormatsHelp); // Only left-justified Width is supported. CHECK(!HaveWidth || LeftJustified); - Res += appendString(&Buffer, BufferEnd, LeftJustified ? -Width : Width, - Precision, va_arg(Args, char *)); + appendString(LeftJustified ? -Width : Width, Precision, + va_arg(Args, char *)); break; } case 'c': { RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp); - Res += - appendChar(&Buffer, BufferEnd, static_cast<char>(va_arg(Args, int))); + String.push_back(static_cast<char>(va_arg(Args, int))); + break; + } + // In Scudo, `s64`/`u64` are supposed to use `lld` and `llu` respectively. + // However, `-Wformat` doesn't know we have a different parser for those + // placeholders and it keeps complaining the type mismatch on 64-bit + // platform which uses `ld`/`lu` for `s64`/`u64`. Therefore, in order to + // silence the warning, we turn to use `PRId64`/`PRIu64` for printing + // `s64`/`u64` and handle the `ld`/`lu` here. + case 'l': { + ++Cur; + RAW_CHECK(*Cur == 'd' || *Cur == 'u'); + + if (*Cur == 'd') { + DVal = va_arg(Args, s64); + appendSignedDecimal(DVal, Width, PadWithZero); + } else { + UVal = va_arg(Args, u64); + appendUnsigned(UVal, 10, Width, PadWithZero, false); + } + break; } case '%': { RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp); - Res += appendChar(&Buffer, BufferEnd, '%'); + String.push_back('%'); break; } default: { @@ -205,41 +213,19 @@ static int formatString(char *Buffer, uptr BufferLength, const char *Format, } } } - RAW_CHECK(Buffer <= BufferEnd); - appendChar(&Buffer, BufferEnd + 1, '\0'); - return Res; -} - -int formatString(char *Buffer, uptr BufferLength, const char *Format, ...) { - va_list Args; - va_start(Args, Format); - int Res = formatString(Buffer, BufferLength, Format, Args); - va_end(Args); - return Res; -} - -void ScopedString::append(const char *Format, va_list Args) { - va_list ArgsCopy; - va_copy(ArgsCopy, Args); - // formatString doesn't currently support a null buffer or zero buffer length, - // so in order to get the resulting formatted string length, we use a one-char - // buffer. - char C[1]; - const uptr AdditionalLength = - static_cast<uptr>(formatString(C, sizeof(C), Format, Args)) + 1; - const uptr Length = length(); - String.resize(Length + AdditionalLength); - const uptr FormattedLength = static_cast<uptr>(formatString( - String.data() + Length, String.size() - Length, Format, ArgsCopy)); - RAW_CHECK(data()[length()] == '\0'); - RAW_CHECK(FormattedLength + 1 == AdditionalLength); - va_end(ArgsCopy); + String.push_back('\0'); + if (String.back() != '\0') { + // String truncated, make sure the string is terminated properly. + // This can happen if there is no more memory when trying to resize + // the string. + String.back() = '\0'; + } } void ScopedString::append(const char *Format, ...) { va_list Args; va_start(Args, Format); - append(Format, Args); + vappend(Format, Args); va_end(Args); } @@ -247,7 +233,7 @@ void Printf(const char *Format, ...) { va_list Args; va_start(Args, Format); ScopedString Msg; - Msg.append(Format, Args); + Msg.vappend(Format, Args); outputRaw(Msg.data()); va_end(Args); } diff --git a/standalone/string_utils.h b/standalone/string_utils.h index 41901194dfd..6e00b637797 100644 --- a/standalone/string_utils.h +++ b/standalone/string_utils.h @@ -25,17 +25,24 @@ public: String.clear(); String.push_back('\0'); } - void append(const char *Format, va_list Args); + void vappend(const char *Format, va_list &Args); void append(const char *Format, ...) FORMAT(2, 3); void output() const { outputRaw(String.data()); } void reserve(size_t Size) { String.reserve(Size + 1); } + uptr capacity() { return String.capacity() - 1; } private: + void appendNumber(u64 AbsoluteValue, u8 Base, u8 MinNumberLength, + bool PadWithZero, bool Negative, bool Upper); + void appendUnsigned(u64 Num, u8 Base, u8 MinNumberLength, bool PadWithZero, + bool Upper); + void appendSignedDecimal(s64 Num, u8 MinNumberLength, bool PadWithZero); + void appendString(int Width, int MaxChars, const char *S); + void appendPointer(u64 ptr_value); + Vector<char> String; }; -int formatString(char *Buffer, uptr BufferLength, const char *Format, ...) - FORMAT(3, 4); void Printf(const char *Format, ...) FORMAT(1, 2); } // namespace scudo diff --git a/standalone/tests/allocator_config_test.cpp b/standalone/tests/allocator_config_test.cpp new file mode 100644 index 00000000000..4c4ceb832e2 --- /dev/null +++ b/standalone/tests/allocator_config_test.cpp @@ -0,0 +1,119 @@ +//===-- allocator_config_test.cpp -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "tests/scudo_unit_test.h" + +#include "allocator_config.h" +#include "allocator_config_wrapper.h" +#include "common.h" +#include "secondary.h" + +#include <type_traits> + +struct TestBaseConfig { + template <typename> using TSDRegistryT = void; + template <typename> using PrimaryT = void; + template <typename> using SecondaryT = void; +}; + +struct TestBaseConfigEnableOptionalFlag : public TestBaseConfig { + static const bool MaySupportMemoryTagging = true; + // Use the getter to avoid the test to `use` the address of static const + // variable (which requires additional explicit definition). + static bool getMaySupportMemoryTagging() { return MaySupportMemoryTagging; } +}; + +struct TestBasePrimaryConfig { + using SizeClassMap = void; + static const scudo::uptr RegionSizeLog = 18U; + static const scudo::uptr GroupSizeLog = 18U; + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + typedef scudo::uptr CompactPtrT; + static const scudo::uptr CompactPtrScale = 0; + static const scudo::uptr MapSizeIncrement = 1UL << 18; +}; + +struct TestPrimaryConfig : public TestBaseConfig { + struct Primary : TestBasePrimaryConfig {}; +}; + +struct TestPrimaryConfigEnableOptionalFlag : public TestBaseConfig { + struct Primary : TestBasePrimaryConfig { + static const bool EnableRandomOffset = true; + static bool getEnableRandomOffset() { return EnableRandomOffset; } + }; +}; + +struct TestPrimaryConfigEnableOptionalType : public TestBaseConfig { + struct DummyConditionVariable {}; + + struct Primary : TestBasePrimaryConfig { + using ConditionVariableT = DummyConditionVariable; + }; +}; + +struct TestSecondaryConfig : public TestPrimaryConfig { + struct Secondary { + template <typename Config> + using CacheT = scudo::MapAllocatorNoCache<Config>; + }; +}; + +struct TestSecondaryCacheConfigEnableOptionalFlag : public TestPrimaryConfig { + struct Secondary { + struct Cache { + static const scudo::u32 EntriesArraySize = 256U; + static scudo::u32 getEntriesArraySize() { return EntriesArraySize; } + }; + template <typename T> using CacheT = scudo::MapAllocatorCache<T>; + }; +}; + +TEST(ScudoAllocatorConfigTest, VerifyOptionalFlags) { + // Test the top level allocator optional config. + // + // `MaySupportMemoryTagging` is default off. + EXPECT_FALSE(scudo::BaseConfig<TestBaseConfig>::getMaySupportMemoryTagging()); + EXPECT_EQ(scudo::BaseConfig< + TestBaseConfigEnableOptionalFlag>::getMaySupportMemoryTagging(), + TestBaseConfigEnableOptionalFlag::getMaySupportMemoryTagging()); + + // Test primary optional config. + // + // `EnableRandomeOffset` is default off. + EXPECT_FALSE( + scudo::PrimaryConfig<TestPrimaryConfig>::getEnableRandomOffset()); + EXPECT_EQ( + scudo::PrimaryConfig< + TestPrimaryConfigEnableOptionalFlag>::getEnableRandomOffset(), + TestPrimaryConfigEnableOptionalFlag::Primary::getEnableRandomOffset()); + + // `ConditionVariableT` is default off. + EXPECT_FALSE( + scudo::PrimaryConfig<TestPrimaryConfig>::hasConditionVariableT()); + EXPECT_TRUE(scudo::PrimaryConfig< + TestPrimaryConfigEnableOptionalType>::hasConditionVariableT()); + EXPECT_TRUE((std::is_same_v< + typename scudo::PrimaryConfig< + TestPrimaryConfigEnableOptionalType>::ConditionVariableT, + typename TestPrimaryConfigEnableOptionalType::Primary:: + ConditionVariableT>)); + + // Test secondary cache optional config. + using NoCacheConfig = + scudo::SecondaryConfig<TestSecondaryConfig>::CacheConfig; + // `EntriesArraySize` is default 0. + EXPECT_EQ(NoCacheConfig::getEntriesArraySize(), 0U); + + using CacheConfig = scudo::SecondaryConfig< + TestSecondaryCacheConfigEnableOptionalFlag>::CacheConfig; + EXPECT_EQ(CacheConfig::getEntriesArraySize(), + TestSecondaryCacheConfigEnableOptionalFlag::Secondary::Cache:: + getEntriesArraySize()); +} diff --git a/standalone/tests/chunk_test.cpp b/standalone/tests/chunk_test.cpp index 7a29f3c11b7..1b2c1eb5a7d 100644 --- a/standalone/tests/chunk_test.cpp +++ b/standalone/tests/chunk_test.cpp @@ -37,29 +37,6 @@ TEST(ScudoChunkDeathTest, ChunkBasic) { free(Block); } -TEST(ScudoChunkTest, ChunkCmpXchg) { - initChecksum(); - const scudo::uptr Size = 0x100U; - scudo::Chunk::UnpackedHeader OldHeader = {}; - OldHeader.OriginOrWasZeroed = scudo::Chunk::Origin::Malloc; - OldHeader.ClassId = 0x42U; - OldHeader.SizeOrUnusedBytes = Size; - OldHeader.State = scudo::Chunk::State::Allocated; - void *Block = malloc(HeaderSize + Size); - void *P = reinterpret_cast<void *>(reinterpret_cast<scudo::uptr>(Block) + - HeaderSize); - scudo::Chunk::storeHeader(Cookie, P, &OldHeader); - memset(P, 'A', Size); - scudo::Chunk::UnpackedHeader NewHeader = OldHeader; - NewHeader.State = scudo::Chunk::State::Quarantined; - scudo::Chunk::compareExchangeHeader(Cookie, P, &NewHeader, &OldHeader); - NewHeader = {}; - EXPECT_TRUE(scudo::Chunk::isValid(Cookie, P, &NewHeader)); - EXPECT_EQ(NewHeader.State, scudo::Chunk::State::Quarantined); - EXPECT_FALSE(scudo::Chunk::isValid(InvalidCookie, P, &NewHeader)); - free(Block); -} - TEST(ScudoChunkDeathTest, CorruptHeader) { initChecksum(); const scudo::uptr Size = 0x100U; diff --git a/standalone/tests/combined_test.cpp b/standalone/tests/combined_test.cpp index 6f4fa748ed9..1a36155bcd4 100644 --- a/standalone/tests/combined_test.cpp +++ b/standalone/tests/combined_test.cpp @@ -7,12 +7,17 @@ //===----------------------------------------------------------------------===// #include "memtag.h" +#include "stack_depot.h" #include "tests/scudo_unit_test.h" #include "allocator_config.h" #include "chunk.h" #include "combined.h" +#include "condition_variable.h" +#include "mem_map.h" +#include "size_class_map.h" +#include <algorithm> #include <condition_variable> #include <memory> #include <mutex> @@ -53,7 +58,7 @@ void checkMemoryTaggingMaybe(AllocatorT *Allocator, void *P, scudo::uptr Size, EXPECT_DEATH( { disableDebuggerdMaybe(); - reinterpret_cast<char *>(P)[-1] = 0xaa; + reinterpret_cast<char *>(P)[-1] = 'A'; }, ""); if (isPrimaryAllocation<AllocatorT>(Size, Alignment) @@ -62,7 +67,7 @@ void checkMemoryTaggingMaybe(AllocatorT *Allocator, void *P, scudo::uptr Size, EXPECT_DEATH( { disableDebuggerdMaybe(); - reinterpret_cast<char *>(P)[Size] = 0xaa; + reinterpret_cast<char *>(P)[Size] = 'A'; }, ""); } @@ -77,14 +82,70 @@ template <typename Config> struct TestAllocator : scudo::Allocator<Config> { } ~TestAllocator() { this->unmapTestOnly(); } - void *operator new(size_t size) { + void *operator new(size_t size); + void operator delete(void *ptr); +}; + +constexpr size_t kMaxAlign = std::max({ + alignof(scudo::Allocator<scudo::DefaultConfig>), +#if SCUDO_CAN_USE_PRIMARY64 + alignof(scudo::Allocator<scudo::FuchsiaConfig>), +#endif + alignof(scudo::Allocator<scudo::AndroidConfig>) +}); + +#if SCUDO_RISCV64 +// The allocator is over 4MB large. Rather than creating an instance of this on +// the heap, keep it in a global storage to reduce fragmentation from having to +// mmap this at the start of every test. +struct TestAllocatorStorage { + static constexpr size_t kMaxSize = std::max({ + sizeof(scudo::Allocator<scudo::DefaultConfig>), +#if SCUDO_CAN_USE_PRIMARY64 + sizeof(scudo::Allocator<scudo::FuchsiaConfig>), +#endif + sizeof(scudo::Allocator<scudo::AndroidConfig>) + }); + + // To alleviate some problem, let's skip the thread safety analysis here. + static void *get(size_t size) NO_THREAD_SAFETY_ANALYSIS { + CHECK(size <= kMaxSize && + "Allocation size doesn't fit in the allocator storage"); + M.lock(); + return AllocatorStorage; + } + + static void release(void *ptr) NO_THREAD_SAFETY_ANALYSIS { + M.assertHeld(); + M.unlock(); + ASSERT_EQ(ptr, AllocatorStorage); + } + + static scudo::HybridMutex M; + static uint8_t AllocatorStorage[kMaxSize]; +}; +scudo::HybridMutex TestAllocatorStorage::M; +alignas(kMaxAlign) uint8_t TestAllocatorStorage::AllocatorStorage[kMaxSize]; +#else +struct TestAllocatorStorage { + static void *get(size_t size) NO_THREAD_SAFETY_ANALYSIS { void *p = nullptr; - EXPECT_EQ(0, posix_memalign(&p, alignof(TestAllocator), size)); + EXPECT_EQ(0, posix_memalign(&p, kMaxAlign, size)); return p; } - - void operator delete(void *ptr) { free(ptr); } + static void release(void *ptr) NO_THREAD_SAFETY_ANALYSIS { free(ptr); } }; +#endif + +template <typename Config> +void *TestAllocator<Config>::operator new(size_t size) { + return TestAllocatorStorage::get(size); +} + +template <typename Config> +void TestAllocator<Config>::operator delete(void *ptr) { + TestAllocatorStorage::release(ptr); +} template <class TypeParam> struct ScudoCombinedTest : public Test { ScudoCombinedTest() { @@ -92,7 +153,7 @@ template <class TypeParam> struct ScudoCombinedTest : public Test { Allocator = std::make_unique<AllocatorT>(); } ~ScudoCombinedTest() { - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); UseQuarantine = true; } @@ -106,15 +167,59 @@ template <class TypeParam> struct ScudoCombinedTest : public Test { template <typename T> using ScudoCombinedDeathTest = ScudoCombinedTest<T>; +namespace scudo { +struct TestConditionVariableConfig { + static const bool MaySupportMemoryTagging = true; + template <class A> + using TSDRegistryT = + scudo::TSDRegistrySharedT<A, 8U, 4U>; // Shared, max 8 TSDs. + + struct Primary { + using SizeClassMap = scudo::AndroidSizeClassMap; +#if SCUDO_CAN_USE_PRIMARY64 + static const scudo::uptr RegionSizeLog = 28U; + typedef scudo::u32 CompactPtrT; + static const scudo::uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const scudo::uptr GroupSizeLog = 20U; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; +#else + static const scudo::uptr RegionSizeLog = 18U; + static const scudo::uptr GroupSizeLog = 18U; + typedef scudo::uptr CompactPtrT; +#endif + static const scudo::s32 MinReleaseToOsIntervalMs = 1000; + static const scudo::s32 MaxReleaseToOsIntervalMs = 1000; +#if SCUDO_LINUX + using ConditionVariableT = scudo::ConditionVariableLinux; +#else + using ConditionVariableT = scudo::ConditionVariableDummy; +#endif + }; +#if SCUDO_CAN_USE_PRIMARY64 + template <typename Config> + using PrimaryT = scudo::SizeClassAllocator64<Config>; +#else + template <typename Config> + using PrimaryT = scudo::SizeClassAllocator32<Config>; +#endif + + struct Secondary { + template <typename Config> + using CacheT = scudo::MapAllocatorNoCache<Config>; + }; + template <typename Config> using SecondaryT = scudo::MapAllocator<Config>; +}; +} // namespace scudo + #if SCUDO_FUCHSIA #define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ - SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidSvelteConfig) \ SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, FuchsiaConfig) #else #define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ - SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidSvelteConfig) \ SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, DefaultConfig) \ - SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidConfig) + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidConfig) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConditionVariableConfig) #endif #define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE) \ @@ -124,11 +229,25 @@ template <typename T> using ScudoCombinedDeathTest = ScudoCombinedTest<T>; #define SCUDO_TYPED_TEST(FIXTURE, NAME) \ template <class TypeParam> \ struct FIXTURE##NAME : public FIXTURE<TypeParam> { \ + using BaseT = FIXTURE<TypeParam>; \ void Run(); \ }; \ SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ template <class TypeParam> void FIXTURE##NAME<TypeParam>::Run() +// Accessing `TSD->getCache()` requires `TSD::Mutex` which isn't easy to test +// using thread-safety analysis. Alternatively, we verify the thread safety +// through a runtime check in ScopedTSD and mark the test body with +// NO_THREAD_SAFETY_ANALYSIS. +#define SCUDO_TYPED_TEST_SKIP_THREAD_SAFETY(FIXTURE, NAME) \ + template <class TypeParam> \ + struct FIXTURE##NAME : public FIXTURE<TypeParam> { \ + using BaseT = FIXTURE<TypeParam>; \ + void Run() NO_THREAD_SAFETY_ANALYSIS; \ + }; \ + SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ + template <class TypeParam> void FIXTURE##NAME<TypeParam>::Run() + SCUDO_TYPED_TEST(ScudoCombinedTest, IsOwned) { auto *Allocator = this->Allocator.get(); static scudo::u8 StaticBuffer[scudo::Chunk::getHeaderSize() + 1]; @@ -153,9 +272,10 @@ void ScudoCombinedTest<Config>::BasicTest(scudo::uptr SizeLog) { for (scudo::uptr AlignLog = MinAlignLog; AlignLog <= 16U; AlignLog++) { const scudo::uptr Align = 1U << AlignLog; for (scudo::sptr Delta = -32; Delta <= 32; Delta++) { - if (static_cast<scudo::sptr>(1U << SizeLog) + Delta < 0) + if ((1LL << SizeLog) + Delta < 0) continue; - const scudo::uptr Size = (1U << SizeLog) + Delta; + const scudo::uptr Size = + static_cast<scudo::uptr>((1LL << SizeLog) + Delta); void *P = Allocator->allocate(Size, Origin, Align); EXPECT_NE(P, nullptr); EXPECT_TRUE(Allocator->isOwned(P)); @@ -166,6 +286,9 @@ void ScudoCombinedTest<Config>::BasicTest(scudo::uptr SizeLog) { Allocator->deallocate(P, Origin, Size); } } + + Allocator->printStats(); + Allocator->printFragmentationInfo(); } #define SCUDO_MAKE_BASIC_TEST(SizeLog) \ @@ -205,7 +328,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ZeroContents) { void *P = Allocator->allocate(Size, Origin, 1U << MinAlignLog, true); EXPECT_NE(P, nullptr); for (scudo::uptr I = 0; I < Size; I++) - ASSERT_EQ((reinterpret_cast<char *>(P))[I], 0); + ASSERT_EQ((reinterpret_cast<char *>(P))[I], '\0'); memset(P, 0xaa, Size); Allocator->deallocate(P, Origin, Size); } @@ -223,7 +346,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ZeroFill) { void *P = Allocator->allocate(Size, Origin, 1U << MinAlignLog, false); EXPECT_NE(P, nullptr); for (scudo::uptr I = 0; I < Size; I++) - ASSERT_EQ((reinterpret_cast<char *>(P))[I], 0); + ASSERT_EQ((reinterpret_cast<char *>(P))[I], '\0'); memset(P, 0xaa, Size); Allocator->deallocate(P, Origin, Size); } @@ -282,7 +405,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateLargeIncreasing) { // we preserve the data in the process. scudo::uptr Size = 16; void *P = Allocator->allocate(Size, Origin); - const char Marker = 0xab; + const char Marker = 'A'; memset(P, Marker, Size); while (Size < TypeParam::Primary::SizeClassMap::MaxSize * 4) { void *NewP = Allocator->reallocate(P, Size * 2); @@ -304,7 +427,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateLargeDecreasing) { scudo::uptr Size = TypeParam::Primary::SizeClassMap::MaxSize * 2; const scudo::uptr DataSize = 2048U; void *P = Allocator->allocate(Size, Origin); - const char Marker = 0xab; + const char Marker = 'A'; memset(P, Marker, scudo::Min(Size, DataSize)); while (Size > 1U) { Size /= 2U; @@ -327,10 +450,11 @@ SCUDO_TYPED_TEST(ScudoCombinedDeathTest, ReallocateSame) { constexpr scudo::uptr ReallocSize = TypeParam::Primary::SizeClassMap::MaxSize - 64; void *P = Allocator->allocate(ReallocSize, Origin); - const char Marker = 0xab; + const char Marker = 'A'; memset(P, Marker, ReallocSize); for (scudo::sptr Delta = -32; Delta < 32; Delta += 8) { - const scudo::uptr NewSize = ReallocSize + Delta; + const scudo::uptr NewSize = + static_cast<scudo::uptr>(static_cast<scudo::sptr>(ReallocSize) + Delta); void *NewP = Allocator->reallocate(P, NewSize); EXPECT_EQ(NewP, P); for (scudo::uptr I = 0; I < ReallocSize - 32; I++) @@ -352,11 +476,13 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, IterateOverChunks) { std::vector<void *> V; for (scudo::uptr I = 0; I < 64U; I++) V.push_back(Allocator->allocate( - rand() % (TypeParam::Primary::SizeClassMap::MaxSize / 2U), Origin)); + static_cast<scudo::uptr>(std::rand()) % + (TypeParam::Primary::SizeClassMap::MaxSize / 2U), + Origin)); Allocator->disable(); Allocator->iterateOverChunks( 0U, static_cast<scudo::uptr>(SCUDO_MMAP_RANGE_SIZE - 1), - [](uintptr_t Base, size_t Size, void *Arg) { + [](uintptr_t Base, UNUSED size_t Size, void *Arg) { std::vector<void *> *V = reinterpret_cast<std::vector<void *> *>(Arg); void *P = reinterpret_cast<void *>(Base); EXPECT_NE(std::find(V->begin(), V->end(), P), V->end()); @@ -381,7 +507,7 @@ SCUDO_TYPED_TEST(ScudoCombinedDeathTest, UseAfterFree) { disableDebuggerdMaybe(); void *P = Allocator->allocate(Size, Origin); Allocator->deallocate(P, Origin); - reinterpret_cast<char *>(P)[0] = 0xaa; + reinterpret_cast<char *>(P)[0] = 'A'; }, ""); EXPECT_DEATH( @@ -389,7 +515,7 @@ SCUDO_TYPED_TEST(ScudoCombinedDeathTest, UseAfterFree) { disableDebuggerdMaybe(); void *P = Allocator->allocate(Size, Origin); Allocator->deallocate(P, Origin); - reinterpret_cast<char *>(P)[Size - 1] = 0xaa; + reinterpret_cast<char *>(P)[Size - 1] = 'A'; }, ""); } @@ -401,18 +527,18 @@ SCUDO_TYPED_TEST(ScudoCombinedDeathTest, DisableMemoryTagging) { if (Allocator->useMemoryTaggingTestOnly()) { // Check that disabling memory tagging works correctly. void *P = Allocator->allocate(2048, Origin); - EXPECT_DEATH(reinterpret_cast<char *>(P)[2048] = 0xaa, ""); + EXPECT_DEATH(reinterpret_cast<char *>(P)[2048] = 'A', ""); scudo::ScopedDisableMemoryTagChecks NoTagChecks; Allocator->disableMemoryTagging(); - reinterpret_cast<char *>(P)[2048] = 0xaa; + reinterpret_cast<char *>(P)[2048] = 'A'; Allocator->deallocate(P, Origin); P = Allocator->allocate(2048, Origin); EXPECT_EQ(scudo::untagPointer(P), P); - reinterpret_cast<char *>(P)[2048] = 0xaa; + reinterpret_cast<char *>(P)[2048] = 'A'; Allocator->deallocate(P, Origin); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); } } @@ -435,23 +561,47 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, Stats) { EXPECT_NE(Stats.find("Stats: Quarantine"), std::string::npos); } -SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) NO_THREAD_SAFETY_ANALYSIS { +SCUDO_TYPED_TEST_SKIP_THREAD_SAFETY(ScudoCombinedTest, CacheDrain) { + using AllocatorT = typename BaseT::AllocatorT; auto *Allocator = this->Allocator.get(); std::vector<void *> V; for (scudo::uptr I = 0; I < 64U; I++) V.push_back(Allocator->allocate( - rand() % (TypeParam::Primary::SizeClassMap::MaxSize / 2U), Origin)); + static_cast<scudo::uptr>(std::rand()) % + (TypeParam::Primary::SizeClassMap::MaxSize / 2U), + Origin)); for (auto P : V) Allocator->deallocate(P, Origin); - bool UnlockRequired; - auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired); + typename AllocatorT::TSDRegistryT::ScopedTSD TSD( + *Allocator->getTSDRegistry()); EXPECT_TRUE(!TSD->getCache().isEmpty()); TSD->getCache().drain(); EXPECT_TRUE(TSD->getCache().isEmpty()); - if (UnlockRequired) - TSD->unlock(); +} + +SCUDO_TYPED_TEST_SKIP_THREAD_SAFETY(ScudoCombinedTest, ForceCacheDrain) { + using AllocatorT = typename BaseT::AllocatorT; + auto *Allocator = this->Allocator.get(); + + std::vector<void *> V; + for (scudo::uptr I = 0; I < 64U; I++) + V.push_back(Allocator->allocate( + static_cast<scudo::uptr>(std::rand()) % + (TypeParam::Primary::SizeClassMap::MaxSize / 2U), + Origin)); + for (auto P : V) + Allocator->deallocate(P, Origin); + + // `ForceAll` will also drain the caches. + Allocator->releaseToOS(scudo::ReleaseToOS::ForceAll); + + typename AllocatorT::TSDRegistryT::ScopedTSD TSD( + *Allocator->getTSDRegistry()); + EXPECT_TRUE(TSD->getCache().isEmpty()); + EXPECT_EQ(TSD->getQuarantineCache().getSize(), 0U); + EXPECT_TRUE(Allocator->getQuarantine()->isEmpty()); } SCUDO_TYPED_TEST(ScudoCombinedTest, ThreadedCombined) { @@ -469,12 +619,16 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ThreadedCombined) { } std::vector<std::pair<void *, scudo::uptr>> V; for (scudo::uptr I = 0; I < 256U; I++) { - const scudo::uptr Size = std::rand() % 4096U; + const scudo::uptr Size = static_cast<scudo::uptr>(std::rand()) % 4096U; void *P = Allocator->allocate(Size, Origin); // A region could have ran out of memory, resulting in a null P. if (P) V.push_back(std::make_pair(P, Size)); } + + // Try to interleave pushBlocks(), popBatch() and releaseToOS(). + Allocator->releaseToOS(scudo::ReleaseToOS::Force); + while (!V.empty()) { auto Pair = V.back(); Allocator->deallocate(Pair.first, Origin, Pair.second); @@ -488,18 +642,19 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ThreadedCombined) { } for (auto &T : Threads) T.join(); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); } // Test that multiple instantiations of the allocator have not messed up the // process's signal handlers (GWP-ASan used to do this). TEST(ScudoCombinedDeathTest, SKIP_ON_FUCHSIA(testSEGV)) { const scudo::uptr Size = 4 * scudo::getPageSizeCached(); - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size, "testSEGV", MAP_NOACCESS, &Data); - EXPECT_NE(P, nullptr); + scudo::ReservedMemoryT ReservedMemory; + ASSERT_TRUE(ReservedMemory.create(/*Addr=*/0U, Size, "testSEGV")); + void *P = reinterpret_cast<void *>(ReservedMemory.getBase()); + ASSERT_NE(P, nullptr); EXPECT_DEATH(memset(P, 0xaa, Size), ""); - scudo::unmap(P, Size, UNMAP_ALL, &Data); + ReservedMemory.release(); } struct DeathSizeClassConfig { @@ -515,21 +670,29 @@ struct DeathSizeClassConfig { static const scudo::uptr DeathRegionSizeLog = 21U; struct DeathConfig { static const bool MaySupportMemoryTagging = false; - - // Tiny allocator, its Primary only serves chunks of four sizes. - using SizeClassMap = scudo::FixedSizeClassMap<DeathSizeClassConfig>; - typedef scudo::SizeClassAllocator64<DeathConfig> Primary; - static const scudo::uptr PrimaryRegionSizeLog = DeathRegionSizeLog; - static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; - typedef scudo::uptr PrimaryCompactPtrT; - static const scudo::uptr PrimaryCompactPtrScale = 0; - static const bool PrimaryEnableRandomOffset = true; - static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; - static const scudo::uptr PrimaryGroupSizeLog = 18; - - typedef scudo::MapAllocatorNoCache SecondaryCache; template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U, 1U>; + + struct Primary { + // Tiny allocator, its Primary only serves chunks of four sizes. + using SizeClassMap = scudo::FixedSizeClassMap<DeathSizeClassConfig>; + static const scudo::uptr RegionSizeLog = DeathRegionSizeLog; + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + typedef scudo::uptr CompactPtrT; + static const scudo::uptr CompactPtrScale = 0; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; + static const scudo::uptr GroupSizeLog = 18; + }; + template <typename Config> + using PrimaryT = scudo::SizeClassAllocator64<Config>; + + struct Secondary { + template <typename Config> + using CacheT = scudo::MapAllocatorNoCache<Config>; + }; + + template <typename Config> using SecondaryT = scudo::MapAllocator<Config>; }; TEST(ScudoCombinedDeathTest, DeathCombined) { @@ -574,13 +737,14 @@ TEST(ScudoCombinedTest, FullRegion) { std::vector<void *> V; scudo::uptr FailedAllocationsCount = 0; for (scudo::uptr ClassId = 1U; - ClassId <= DeathConfig::SizeClassMap::LargestClassId; ClassId++) { + ClassId <= DeathConfig::Primary::SizeClassMap::LargestClassId; + ClassId++) { const scudo::uptr Size = - DeathConfig::SizeClassMap::getSizeByClassId(ClassId); + DeathConfig::Primary::SizeClassMap::getSizeByClassId(ClassId); // Allocate enough to fill all of the regions above this one. const scudo::uptr MaxNumberOfChunks = ((1U << DeathRegionSizeLog) / Size) * - (DeathConfig::SizeClassMap::LargestClassId - ClassId + 1); + (DeathConfig::Primary::SizeClassMap::LargestClassId - ClassId + 1); void *P; for (scudo::uptr I = 0; I <= MaxNumberOfChunks; I++) { P = Allocator->allocate(Size - 64U, Origin); @@ -601,11 +765,12 @@ TEST(ScudoCombinedTest, FullRegion) { // operation without issue. SCUDO_TYPED_TEST(ScudoCombinedTest, ReleaseToOS) { auto *Allocator = this->Allocator.get(); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); } SCUDO_TYPED_TEST(ScudoCombinedTest, OddEven) { auto *Allocator = this->Allocator.get(); + Allocator->setOption(scudo::Option::MemtagTuning, M_MEMTAG_TUNING_BUFFER_OVERFLOW); if (!Allocator->useMemoryTaggingTestOnly()) return; @@ -675,7 +840,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, DisableMemInit) { for (unsigned I = 0; I != Ptrs.size(); ++I) { Ptrs[I] = Allocator->allocate(Size, Origin, 1U << MinAlignLog, true); for (scudo::uptr J = 0; J < Size; ++J) - ASSERT_EQ((reinterpret_cast<char *>(Ptrs[I]))[J], 0); + ASSERT_EQ((reinterpret_cast<char *>(Ptrs[I]))[J], '\0'); } } @@ -687,33 +852,120 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateInPlaceStress) { // Regression test: make realloc-in-place happen at the very right end of a // mapped region. - constexpr int nPtrs = 10000; - for (int i = 1; i < 32; ++i) { + constexpr size_t nPtrs = 10000; + for (scudo::uptr i = 1; i < 32; ++i) { scudo::uptr Size = 16 * i - 1; std::vector<void *> Ptrs; - for (int i = 0; i < nPtrs; ++i) { + for (size_t i = 0; i < nPtrs; ++i) { void *P = Allocator->allocate(Size, Origin); P = Allocator->reallocate(P, Size + 1); Ptrs.push_back(P); } - for (int i = 0; i < nPtrs; ++i) + for (size_t i = 0; i < nPtrs; ++i) Allocator->deallocate(Ptrs[i], Origin); } } +SCUDO_TYPED_TEST(ScudoCombinedTest, RingBufferDefaultDisabled) { + // The RingBuffer is not initialized until tracking is enabled for the + // first time. + auto *Allocator = this->Allocator.get(); + EXPECT_EQ(0u, Allocator->getRingBufferSize()); + EXPECT_EQ(nullptr, Allocator->getRingBufferAddress()); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, RingBufferInitOnce) { + auto *Allocator = this->Allocator.get(); + Allocator->setTrackAllocationStacks(true); + + auto RingBufferSize = Allocator->getRingBufferSize(); + ASSERT_GT(RingBufferSize, 0u); + auto *RingBufferAddress = Allocator->getRingBufferAddress(); + EXPECT_NE(nullptr, RingBufferAddress); + + // Enable tracking again to verify that the initialization only happens once. + Allocator->setTrackAllocationStacks(true); + ASSERT_EQ(RingBufferSize, Allocator->getRingBufferSize()); + EXPECT_EQ(RingBufferAddress, Allocator->getRingBufferAddress()); +} + SCUDO_TYPED_TEST(ScudoCombinedTest, RingBufferSize) { auto *Allocator = this->Allocator.get(); - auto Size = Allocator->getRingBufferSize(); - if (Size > 0) - EXPECT_EQ(Allocator->getRingBufferAddress()[Size - 1], '\0'); + Allocator->setTrackAllocationStacks(true); + + auto RingBufferSize = Allocator->getRingBufferSize(); + ASSERT_GT(RingBufferSize, 0u); + EXPECT_EQ(Allocator->getRingBufferAddress()[RingBufferSize - 1], '\0'); } SCUDO_TYPED_TEST(ScudoCombinedTest, RingBufferAddress) { auto *Allocator = this->Allocator.get(); - auto *Addr = Allocator->getRingBufferAddress(); - EXPECT_NE(Addr, nullptr); - EXPECT_EQ(Addr, Allocator->getRingBufferAddress()); + Allocator->setTrackAllocationStacks(true); + + auto *RingBufferAddress = Allocator->getRingBufferAddress(); + EXPECT_NE(RingBufferAddress, nullptr); + EXPECT_EQ(RingBufferAddress, Allocator->getRingBufferAddress()); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, StackDepotDefaultDisabled) { + // The StackDepot is not initialized until tracking is enabled for the + // first time. + auto *Allocator = this->Allocator.get(); + EXPECT_EQ(0u, Allocator->getStackDepotSize()); + EXPECT_EQ(nullptr, Allocator->getStackDepotAddress()); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, StackDepotInitOnce) { + auto *Allocator = this->Allocator.get(); + Allocator->setTrackAllocationStacks(true); + + auto StackDepotSize = Allocator->getStackDepotSize(); + EXPECT_GT(StackDepotSize, 0u); + auto *StackDepotAddress = Allocator->getStackDepotAddress(); + EXPECT_NE(nullptr, StackDepotAddress); + + // Enable tracking again to verify that the initialization only happens once. + Allocator->setTrackAllocationStacks(true); + EXPECT_EQ(StackDepotSize, Allocator->getStackDepotSize()); + EXPECT_EQ(StackDepotAddress, Allocator->getStackDepotAddress()); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, StackDepotSize) { + auto *Allocator = this->Allocator.get(); + Allocator->setTrackAllocationStacks(true); + + auto StackDepotSize = Allocator->getStackDepotSize(); + EXPECT_GT(StackDepotSize, 0u); + EXPECT_EQ(Allocator->getStackDepotAddress()[StackDepotSize - 1], '\0'); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, StackDepotAddress) { + auto *Allocator = this->Allocator.get(); + Allocator->setTrackAllocationStacks(true); + + auto *StackDepotAddress = Allocator->getStackDepotAddress(); + EXPECT_NE(StackDepotAddress, nullptr); + EXPECT_EQ(StackDepotAddress, Allocator->getStackDepotAddress()); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, StackDepot) { + alignas(scudo::StackDepot) char Buf[sizeof(scudo::StackDepot) + + 1024 * sizeof(scudo::atomic_u64) + + 1024 * sizeof(scudo::atomic_u32)] = {}; + auto *Depot = reinterpret_cast<scudo::StackDepot *>(Buf); + Depot->init(1024, 1024); + ASSERT_TRUE(Depot->isValid(sizeof(Buf))); + ASSERT_FALSE(Depot->isValid(sizeof(Buf) - 1)); + scudo::uptr Stack[] = {1, 2, 3}; + scudo::u32 Elem = Depot->insert(&Stack[0], &Stack[3]); + scudo::uptr RingPosPtr = 0; + scudo::uptr SizePtr = 0; + ASSERT_TRUE(Depot->find(Elem, &RingPosPtr, &SizePtr)); + ASSERT_EQ(SizePtr, 3u); + EXPECT_EQ(Depot->at(RingPosPtr), 1u); + EXPECT_EQ(Depot->at(RingPosPtr + 1), 2u); + EXPECT_EQ(Depot->at(RingPosPtr + 2), 3u); } #if SCUDO_CAN_USE_PRIMARY64 @@ -737,49 +989,12 @@ TEST(ScudoCombinedTest, BasicTrustyConfig) { } bool UnlockRequired; - auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired); + typename AllocatorT::TSDRegistryT::ScopedTSD TSD( + *Allocator->getTSDRegistry()); TSD->getCache().drain(); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); } #endif #endif - -#if SCUDO_LINUX - -SCUDO_TYPED_TEST(ScudoCombinedTest, SoftRssLimit) { - auto *Allocator = this->Allocator.get(); - Allocator->setRssLimitsTestOnly(1, 0, true); - - size_t Megabyte = 1024 * 1024; - size_t ChunkSize = 16; - size_t Error = 256; - - std::vector<void *> Ptrs; - for (size_t index = 0; index < Megabyte + Error; index += ChunkSize) { - void *Ptr = Allocator->allocate(ChunkSize, Origin); - Ptrs.push_back(Ptr); - } - - EXPECT_EQ(nullptr, Allocator->allocate(ChunkSize, Origin)); - - for (void *Ptr : Ptrs) - Allocator->deallocate(Ptr, Origin); -} - -SCUDO_TYPED_TEST(ScudoCombinedTest, HardRssLimit) { - auto *Allocator = this->Allocator.get(); - Allocator->setRssLimitsTestOnly(0, 1, false); - - size_t Megabyte = 1024 * 1024; - - EXPECT_DEATH( - { - disableDebuggerdMaybe(); - Allocator->allocate(Megabyte, Origin); - }, - ""); -} - -#endif diff --git a/standalone/tests/common_test.cpp b/standalone/tests/common_test.cpp index a322a01fb93..fff7c662a41 100644 --- a/standalone/tests/common_test.cpp +++ b/standalone/tests/common_test.cpp @@ -10,6 +10,7 @@ #include "tests/scudo_unit_test.h" #include "common.h" +#include "mem_map.h" #include <algorithm> #include <fstream> @@ -34,60 +35,41 @@ TEST(ScudoCommonTest, SKIP_ON_FUCHSIA(ResidentMemorySize)) { const uptr Size = 1ull << 30; const uptr Threshold = Size >> 3; - MapPlatformData Data = {}; - void *P = map(nullptr, Size, "ResidentMemorySize", 0, &Data); - ASSERT_NE(nullptr, P); + MemMapT MemMap; + ASSERT_TRUE(MemMap.map(/*Addr=*/0U, Size, "ResidentMemorySize")); + ASSERT_NE(MemMap.getBase(), 0U); + void *P = reinterpret_cast<void *>(MemMap.getBase()); EXPECT_LT(getResidentMemorySize(), OnStart + Threshold); memset(P, 1, Size); EXPECT_GT(getResidentMemorySize(), OnStart + Size - Threshold); - releasePagesToOS((uptr)P, 0, Size, &Data); + MemMap.releasePagesToOS(MemMap.getBase(), Size); EXPECT_LT(getResidentMemorySize(), OnStart + Threshold); memset(P, 1, Size); EXPECT_GT(getResidentMemorySize(), OnStart + Size - Threshold); - unmap(P, Size, 0, &Data); + MemMap.unmap(MemMap.getBase(), Size); } TEST(ScudoCommonTest, Zeros) { const uptr Size = 1ull << 20; - MapPlatformData Data = {}; - uptr *P = reinterpret_cast<uptr *>(map(nullptr, Size, "Zeros", 0, &Data)); - const ptrdiff_t N = Size / sizeof(*P); - ASSERT_NE(nullptr, P); + MemMapT MemMap; + ASSERT_TRUE(MemMap.map(/*Addr=*/0U, Size, "Zeros")); + ASSERT_NE(MemMap.getBase(), 0U); + uptr *P = reinterpret_cast<uptr *>(MemMap.getBase()); + const ptrdiff_t N = Size / sizeof(uptr); EXPECT_EQ(std::count(P, P + N, 0), N); memset(P, 1, Size); EXPECT_EQ(std::count(P, P + N, 0), 0); - releasePagesToOS((uptr)P, 0, Size, &Data); + MemMap.releasePagesToOS(MemMap.getBase(), Size); EXPECT_EQ(std::count(P, P + N, 0), N); - unmap(P, Size, 0, &Data); + MemMap.unmap(MemMap.getBase(), Size); } -#if SCUDO_LINUX && !defined(__powerpc__) -// This test fails intermediately on PPC, which is why this test is disabled -// for now on this platform. -TEST(ScudoCommonTest, GetRssFromBuffer) { - constexpr int64_t AllocSize = 10000000; - constexpr int64_t Error = 3000000; - constexpr size_t Runs = 10; - - int64_t Rss = scudo::GetRSS(); - EXPECT_GT(Rss, 0); - - std::vector<std::unique_ptr<char[]>> Allocs(Runs); - for (auto &Alloc : Allocs) { - Alloc.reset(new char[AllocSize]()); - int64_t Prev = Rss; - Rss = scudo::GetRSS(); - EXPECT_LE(std::abs(Rss - AllocSize - Prev), Error); - } -} -#endif // SCUDO_LINUX - } // namespace scudo diff --git a/standalone/tests/condition_variable_test.cpp b/standalone/tests/condition_variable_test.cpp new file mode 100644 index 00000000000..caba1f64ab0 --- /dev/null +++ b/standalone/tests/condition_variable_test.cpp @@ -0,0 +1,59 @@ +//===-- condition_variable_test.cpp -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "tests/scudo_unit_test.h" + +#include "common.h" +#include "condition_variable.h" +#include "mutex.h" + +#include <thread> + +template <typename ConditionVariableT> void simpleWaitAndNotifyAll() { + constexpr scudo::u32 NumThreads = 2; + constexpr scudo::u32 CounterMax = 1024; + std::thread Threads[NumThreads]; + + scudo::HybridMutex M; + ConditionVariableT CV; + CV.bindTestOnly(M); + scudo::u32 Counter = 0; + + for (scudo::u32 I = 0; I < NumThreads; ++I) { + Threads[I] = std::thread( + [&](scudo::u32 Id) { + do { + scudo::ScopedLock L(M); + if (Counter % NumThreads != Id && Counter < CounterMax) + CV.wait(M); + if (Counter >= CounterMax) { + break; + } else { + ++Counter; + CV.notifyAll(M); + } + } while (true); + }, + I); + } + + for (std::thread &T : Threads) + T.join(); + + EXPECT_EQ(Counter, CounterMax); +} + +TEST(ScudoConditionVariableTest, DummyCVWaitAndNotifyAll) { + simpleWaitAndNotifyAll<scudo::ConditionVariableDummy>(); +} + +#ifdef SCUDO_LINUX +TEST(ScudoConditionVariableTest, LinuxCVWaitAndNotifyAll) { + simpleWaitAndNotifyAll<scudo::ConditionVariableLinux>(); +} +#endif diff --git a/standalone/tests/map_test.cpp b/standalone/tests/map_test.cpp index ff05258db58..06a56f84803 100644 --- a/standalone/tests/map_test.cpp +++ b/standalone/tests/map_test.cpp @@ -9,6 +9,7 @@ #include "tests/scudo_unit_test.h" #include "common.h" +#include "mem_map.h" #include <string.h> #include <unistd.h> @@ -22,11 +23,15 @@ TEST(ScudoMapTest, PageSize) { TEST(ScudoMapDeathTest, MapNoAccessUnmap) { const scudo::uptr Size = 4 * scudo::getPageSizeCached(); - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size, MappingName, MAP_NOACCESS, &Data); - EXPECT_NE(P, nullptr); - EXPECT_DEATH(memset(P, 0xaa, Size), ""); - scudo::unmap(P, Size, UNMAP_ALL, &Data); + scudo::ReservedMemoryT ReservedMemory; + + ASSERT_TRUE(ReservedMemory.create(/*Addr=*/0U, Size, MappingName)); + EXPECT_NE(ReservedMemory.getBase(), 0U); + EXPECT_DEATH( + memset(reinterpret_cast<void *>(ReservedMemory.getBase()), 0xaa, Size), + ""); + + ReservedMemory.release(); } TEST(ScudoMapDeathTest, MapUnmap) { @@ -36,11 +41,13 @@ TEST(ScudoMapDeathTest, MapUnmap) { // Repeat few time to avoid missing crash if it's mmaped by unrelated // code. for (int i = 0; i < 10; ++i) { - void *P = scudo::map(nullptr, Size, MappingName, 0, nullptr); - if (!P) + scudo::MemMapT MemMap; + MemMap.map(/*Addr=*/0U, Size, MappingName); + scudo::uptr P = MemMap.getBase(); + if (P == 0U) continue; - scudo::unmap(P, Size, 0, nullptr); - memset(P, 0xbb, Size); + MemMap.unmap(MemMap.getBase(), Size); + memset(reinterpret_cast<void *>(P), 0xbb, Size); } }, ""); @@ -49,30 +56,36 @@ TEST(ScudoMapDeathTest, MapUnmap) { TEST(ScudoMapDeathTest, MapWithGuardUnmap) { const scudo::uptr PageSize = scudo::getPageSizeCached(); const scudo::uptr Size = 4 * PageSize; - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size + 2 * PageSize, MappingName, MAP_NOACCESS, - &Data); - EXPECT_NE(P, nullptr); - void *Q = - reinterpret_cast<void *>(reinterpret_cast<scudo::uptr>(P) + PageSize); - EXPECT_EQ(scudo::map(Q, Size, MappingName, 0, &Data), Q); - memset(Q, 0xaa, Size); - EXPECT_DEATH(memset(Q, 0xaa, Size + 1), ""); - scudo::unmap(P, Size + 2 * PageSize, UNMAP_ALL, &Data); + scudo::ReservedMemoryT ReservedMemory; + ASSERT_TRUE( + ReservedMemory.create(/*Addr=*/0U, Size + 2 * PageSize, MappingName)); + ASSERT_NE(ReservedMemory.getBase(), 0U); + + scudo::MemMapT MemMap = + ReservedMemory.dispatch(ReservedMemory.getBase(), Size + 2 * PageSize); + ASSERT_TRUE(MemMap.isAllocated()); + scudo::uptr Q = MemMap.getBase() + PageSize; + ASSERT_TRUE(MemMap.remap(Q, Size, MappingName)); + memset(reinterpret_cast<void *>(Q), 0xaa, Size); + EXPECT_DEATH(memset(reinterpret_cast<void *>(Q), 0xaa, Size + 1), ""); + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); } TEST(ScudoMapTest, MapGrowUnmap) { const scudo::uptr PageSize = scudo::getPageSizeCached(); const scudo::uptr Size = 4 * PageSize; - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size, MappingName, MAP_NOACCESS, &Data); - EXPECT_NE(P, nullptr); - void *Q = - reinterpret_cast<void *>(reinterpret_cast<scudo::uptr>(P) + PageSize); - EXPECT_EQ(scudo::map(Q, PageSize, MappingName, 0, &Data), Q); - memset(Q, 0xaa, PageSize); - Q = reinterpret_cast<void *>(reinterpret_cast<scudo::uptr>(Q) + PageSize); - EXPECT_EQ(scudo::map(Q, PageSize, MappingName, 0, &Data), Q); - memset(Q, 0xbb, PageSize); - scudo::unmap(P, Size, UNMAP_ALL, &Data); + scudo::ReservedMemoryT ReservedMemory; + ReservedMemory.create(/*Addr=*/0U, Size, MappingName); + ASSERT_TRUE(ReservedMemory.isCreated()); + + scudo::MemMapT MemMap = + ReservedMemory.dispatch(ReservedMemory.getBase(), Size); + ASSERT_TRUE(MemMap.isAllocated()); + scudo::uptr Q = MemMap.getBase() + PageSize; + ASSERT_TRUE(MemMap.remap(Q, PageSize, MappingName)); + memset(reinterpret_cast<void *>(Q), 0xaa, PageSize); + Q += PageSize; + ASSERT_TRUE(MemMap.remap(Q, PageSize, MappingName)); + memset(reinterpret_cast<void *>(Q), 0xbb, PageSize); + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); } diff --git a/standalone/tests/memtag_test.cpp b/standalone/tests/memtag_test.cpp index 8a40eda3a57..37a18858e67 100644 --- a/standalone/tests/memtag_test.cpp +++ b/standalone/tests/memtag_test.cpp @@ -7,16 +7,22 @@ //===----------------------------------------------------------------------===// #include "common.h" +#include "mem_map.h" #include "memtag.h" #include "platform.h" #include "tests/scudo_unit_test.h" +extern "C" void __hwasan_init() __attribute__((weak)); + #if SCUDO_LINUX namespace scudo { TEST(MemtagBasicDeathTest, Unsupported) { if (archSupportsMemoryTagging()) GTEST_SKIP(); + // Skip when running with HWASan. + if (&__hwasan_init != 0) + GTEST_SKIP(); EXPECT_DEATH(archMemoryTagGranuleSize(), "not supported"); EXPECT_DEATH(untagPointer((uptr)0), "not supported"); @@ -45,20 +51,24 @@ protected: GTEST_SKIP() << "Memory tagging is not supported"; BufferSize = getPageSizeCached(); - Buffer = reinterpret_cast<u8 *>( - map(nullptr, BufferSize, "MemtagTest", MAP_MEMTAG, &Data)); - Addr = reinterpret_cast<uptr>(Buffer); + ASSERT_FALSE(MemMap.isAllocated()); + ASSERT_TRUE(MemMap.map(/*Addr=*/0U, BufferSize, "MemtagTest", MAP_MEMTAG)); + ASSERT_NE(MemMap.getBase(), 0U); + Addr = MemMap.getBase(); + Buffer = reinterpret_cast<u8 *>(Addr); EXPECT_TRUE(isAligned(Addr, archMemoryTagGranuleSize())); EXPECT_EQ(Addr, untagPointer(Addr)); } void TearDown() override { - if (Buffer) - unmap(Buffer, BufferSize, 0, &Data); + if (Buffer) { + ASSERT_TRUE(MemMap.isAllocated()); + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + } } uptr BufferSize = 0; - MapPlatformData Data = {}; + scudo::MemMapT MemMap = {}; u8 *Buffer = nullptr; uptr Addr = 0; }; @@ -71,20 +81,24 @@ TEST_F(MemtagTest, ArchMemoryTagGranuleSize) { } TEST_F(MemtagTest, ExtractTag) { +// The test is already skipped on anything other than 64 bit. But +// compiling on 32 bit leads to warnings/errors, so skip compiling the test. +#if defined(__LP64__) uptr Tags = 0; // Try all value for the top byte and check the tags values are in the // expected range. for (u64 Top = 0; Top < 0x100; ++Top) Tags = Tags | (1u << extractTag(Addr | (Top << 56))); EXPECT_EQ(0xffffull, Tags); +#endif } TEST_F(MemtagDeathTest, AddFixedTag) { for (uptr Tag = 0; Tag < 0x10; ++Tag) EXPECT_EQ(Tag, extractTag(addFixedTag(Addr, Tag))); if (SCUDO_DEBUG) { - EXPECT_DEBUG_DEATH(addFixedTag(Addr, 16), ""); - EXPECT_DEBUG_DEATH(addFixedTag(~Addr, 0), ""); + EXPECT_DEATH(addFixedTag(Addr, 16), ""); + EXPECT_DEATH(addFixedTag(~Addr, 0), ""); } } @@ -111,23 +125,35 @@ TEST_F(MemtagTest, SelectRandomTag) { uptr Tags = 0; for (uptr I = 0; I < 100000; ++I) Tags = Tags | (1u << extractTag(selectRandomTag(Ptr, 0))); - EXPECT_EQ(0xfffeull, Tags); + // std::popcnt is C++20 + int PopCnt = 0; + while (Tags) { + PopCnt += Tags & 1; + Tags >>= 1; + } + // Random tags are not always very random, and this test is not about PRNG + // quality. Anything above half would be satisfactory. + EXPECT_GE(PopCnt, 8); } } TEST_F(MemtagTest, SelectRandomTagWithMask) { +// The test is already skipped on anything other than 64 bit. But +// compiling on 32 bit leads to warnings/errors, so skip compiling the test. +#if defined(__LP64__) for (uptr j = 0; j < 32; ++j) { for (uptr i = 0; i < 1000; ++i) EXPECT_NE(j, extractTag(selectRandomTag(Addr, 1ull << j))); } +#endif } TEST_F(MemtagDeathTest, SKIP_NO_DEBUG(LoadStoreTagUnaligned)) { for (uptr P = Addr; P < Addr + 4 * archMemoryTagGranuleSize(); ++P) { if (P % archMemoryTagGranuleSize() == 0) continue; - EXPECT_DEBUG_DEATH(loadTag(P), ""); - EXPECT_DEBUG_DEATH(storeTag(P), ""); + EXPECT_DEATH(loadTag(P), ""); + EXPECT_DEATH(storeTag(P), ""); } } @@ -148,11 +174,14 @@ TEST_F(MemtagDeathTest, SKIP_NO_DEBUG(StoreTagsUnaligned)) { uptr Tagged = addFixedTag(P, 5); if (Tagged % archMemoryTagGranuleSize() == 0) continue; - EXPECT_DEBUG_DEATH(storeTags(Tagged, Tagged), ""); + EXPECT_DEATH(storeTags(Tagged, Tagged), ""); } } TEST_F(MemtagTest, StoreTags) { +// The test is already skipped on anything other than 64 bit. But +// compiling on 32 bit leads to warnings/errors, so skip compiling the test. +#if defined(__LP64__) const uptr MaxTaggedSize = 4 * archMemoryTagGranuleSize(); for (uptr Size = 0; Size <= MaxTaggedSize; ++Size) { uptr NoTagBegin = Addr + archMemoryTagGranuleSize(); @@ -179,8 +208,9 @@ TEST_F(MemtagTest, StoreTags) { EXPECT_EQ(LoadPtr, loadTag(LoadPtr)); // Reset tags without using StoreTags. - releasePagesToOS(Addr, 0, BufferSize, &Data); + MemMap.releasePagesToOS(Addr, BufferSize); } +#endif } } // namespace scudo diff --git a/standalone/tests/primary_test.cpp b/standalone/tests/primary_test.cpp index c7ebcc3f82f..1cf3bb51db0 100644 --- a/standalone/tests/primary_test.cpp +++ b/standalone/tests/primary_test.cpp @@ -8,6 +8,9 @@ #include "tests/scudo_unit_test.h" +#include "allocator_config.h" +#include "allocator_config_wrapper.h" +#include "condition_variable.h" #include "primary32.h" #include "primary64.h" #include "size_class_map.h" @@ -25,84 +28,146 @@ // 32-bit architectures. It's not something we want to encourage, but we still // should ensure the tests pass. -struct TestConfig1 { - static const scudo::uptr PrimaryRegionSizeLog = 18U; - static const scudo::uptr PrimaryGroupSizeLog = 18U; - static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; +template <typename SizeClassMapT> struct TestConfig1 { static const bool MaySupportMemoryTagging = false; - typedef scudo::uptr PrimaryCompactPtrT; - static const scudo::uptr PrimaryCompactPtrScale = 0; - static const bool PrimaryEnableRandomOffset = true; - static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; + template <typename> using TSDRegistryT = void; + template <typename> using PrimaryT = void; + template <typename> using SecondaryT = void; + + struct Primary { + using SizeClassMap = SizeClassMapT; + static const scudo::uptr RegionSizeLog = 18U; + static const scudo::uptr GroupSizeLog = 18U; + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + typedef scudo::uptr CompactPtrT; + static const scudo::uptr CompactPtrScale = 0; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; + }; }; -struct TestConfig2 { +template <typename SizeClassMapT> struct TestConfig2 { + static const bool MaySupportMemoryTagging = false; + template <typename> using TSDRegistryT = void; + template <typename> using PrimaryT = void; + template <typename> using SecondaryT = void; + + struct Primary { + using SizeClassMap = SizeClassMapT; #if defined(__mips__) - // Unable to allocate greater size on QEMU-user. - static const scudo::uptr PrimaryRegionSizeLog = 23U; + // Unable to allocate greater size on QEMU-user. + static const scudo::uptr RegionSizeLog = 23U; #else - static const scudo::uptr PrimaryRegionSizeLog = 24U; + static const scudo::uptr RegionSizeLog = 24U; #endif - static const scudo::uptr PrimaryGroupSizeLog = 20U; - static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; - static const bool MaySupportMemoryTagging = false; - typedef scudo::uptr PrimaryCompactPtrT; - static const scudo::uptr PrimaryCompactPtrScale = 0; - static const bool PrimaryEnableRandomOffset = true; - static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; + static const scudo::uptr GroupSizeLog = 20U; + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + typedef scudo::uptr CompactPtrT; + static const scudo::uptr CompactPtrScale = 0; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; + }; }; -struct TestConfig3 { +template <typename SizeClassMapT> struct TestConfig3 { + static const bool MaySupportMemoryTagging = true; + template <typename> using TSDRegistryT = void; + template <typename> using PrimaryT = void; + template <typename> using SecondaryT = void; + + struct Primary { + using SizeClassMap = SizeClassMapT; #if defined(__mips__) - // Unable to allocate greater size on QEMU-user. - static const scudo::uptr PrimaryRegionSizeLog = 23U; + // Unable to allocate greater size on QEMU-user. + static const scudo::uptr RegionSizeLog = 23U; #else - static const scudo::uptr PrimaryRegionSizeLog = 24U; + static const scudo::uptr RegionSizeLog = 24U; #endif - static const scudo::uptr PrimaryGroupSizeLog = 20U; - static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; - static const bool MaySupportMemoryTagging = true; - typedef scudo::uptr PrimaryCompactPtrT; - static const scudo::uptr PrimaryCompactPtrScale = 0; - static const bool PrimaryEnableRandomOffset = true; - static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; + static const scudo::uptr GroupSizeLog = 20U; + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + typedef scudo::uptr CompactPtrT; + static const scudo::uptr CompactPtrScale = 0; + static const bool EnableContiguousRegions = false; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; + }; }; -struct TestConfig4 { +template <typename SizeClassMapT> struct TestConfig4 { + static const bool MaySupportMemoryTagging = true; + template <typename> using TSDRegistryT = void; + template <typename> using PrimaryT = void; + template <typename> using SecondaryT = void; + + struct Primary { + using SizeClassMap = SizeClassMapT; #if defined(__mips__) - // Unable to allocate greater size on QEMU-user. - static const scudo::uptr PrimaryRegionSizeLog = 23U; + // Unable to allocate greater size on QEMU-user. + static const scudo::uptr RegionSizeLog = 23U; #else - static const scudo::uptr PrimaryRegionSizeLog = 24U; + static const scudo::uptr RegionSizeLog = 24U; #endif - static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; - static const bool MaySupportMemoryTagging = true; - static const scudo::uptr PrimaryCompactPtrScale = 3U; - static const scudo::uptr PrimaryGroupSizeLog = 20U; - typedef scudo::u32 PrimaryCompactPtrT; - static const bool PrimaryEnableRandomOffset = true; - static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + static const scudo::uptr CompactPtrScale = 3U; + static const scudo::uptr GroupSizeLog = 20U; + typedef scudo::u32 CompactPtrT; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; + }; }; -template <typename BaseConfig, typename SizeClassMapT> -struct Config : public BaseConfig { - using SizeClassMap = SizeClassMapT; +// This is the only test config that enables the condition variable. +template <typename SizeClassMapT> struct TestConfig5 { + static const bool MaySupportMemoryTagging = true; + template <typename> using TSDRegistryT = void; + template <typename> using PrimaryT = void; + template <typename> using SecondaryT = void; + + struct Primary { + using SizeClassMap = SizeClassMapT; +#if defined(__mips__) + // Unable to allocate greater size on QEMU-user. + static const scudo::uptr RegionSizeLog = 23U; +#else + static const scudo::uptr RegionSizeLog = 24U; +#endif + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + static const scudo::uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const scudo::uptr GroupSizeLog = 18U; + typedef scudo::u32 CompactPtrT; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; +#if SCUDO_LINUX + using ConditionVariableT = scudo::ConditionVariableLinux; +#else + using ConditionVariableT = scudo::ConditionVariableDummy; +#endif + }; }; -template <typename BaseConfig, typename SizeClassMapT> +template <template <typename> class BaseConfig, typename SizeClassMapT> +struct Config : public BaseConfig<SizeClassMapT> {}; + +template <template <typename> class BaseConfig, typename SizeClassMapT> struct SizeClassAllocator - : public scudo::SizeClassAllocator64<Config<BaseConfig, SizeClassMapT>> {}; + : public scudo::SizeClassAllocator64< + scudo::PrimaryConfig<Config<BaseConfig, SizeClassMapT>>> {}; template <typename SizeClassMapT> struct SizeClassAllocator<TestConfig1, SizeClassMapT> - : public scudo::SizeClassAllocator32<Config<TestConfig1, SizeClassMapT>> {}; + : public scudo::SizeClassAllocator32< + scudo::PrimaryConfig<Config<TestConfig1, SizeClassMapT>>> {}; -template <typename BaseConfig, typename SizeClassMapT> +template <template <typename> class BaseConfig, typename SizeClassMapT> struct TestAllocator : public SizeClassAllocator<BaseConfig, SizeClassMapT> { - ~TestAllocator() { this->unmapTestOnly(); } + ~TestAllocator() { + this->verifyAllBlocksAreReleasedTestOnly(); + this->unmapTestOnly(); + } void *operator new(size_t size) { void *p = nullptr; @@ -113,7 +178,8 @@ struct TestAllocator : public SizeClassAllocator<BaseConfig, SizeClassMapT> { void operator delete(void *ptr) { free(ptr); } }; -template <class BaseConfig> struct ScudoPrimaryTest : public Test {}; +template <template <typename> class BaseConfig> +struct ScudoPrimaryTest : public Test {}; #if SCUDO_FUCHSIA #define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ @@ -124,7 +190,8 @@ template <class BaseConfig> struct ScudoPrimaryTest : public Test {}; SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig1) \ SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig2) \ SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig3) \ - SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig4) + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig4) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig5) #endif #define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE) \ @@ -132,12 +199,13 @@ template <class BaseConfig> struct ScudoPrimaryTest : public Test {}; TEST_F(FIXTURE##NAME##_##TYPE, NAME) { FIXTURE##NAME<TYPE>::Run(); } #define SCUDO_TYPED_TEST(FIXTURE, NAME) \ - template <class TypeParam> \ + template <template <typename> class TypeParam> \ struct FIXTURE##NAME : public FIXTURE<TypeParam> { \ void Run(); \ }; \ SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ - template <class TypeParam> void FIXTURE##NAME<TypeParam>::Run() + template <template <typename> class TypeParam> \ + void FIXTURE##NAME<TypeParam>::Run() SCUDO_TYPED_TEST(ScudoPrimaryTest, BasicPrimary) { using Primary = TestAllocator<TypeParam, scudo::DefaultSizeClassMap>; @@ -161,30 +229,36 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, BasicPrimary) { Cache.deallocate(ClassId, Pointers[J]); } Cache.destroy(nullptr); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); scudo::ScopedString Str; Allocator->getStats(&Str); Str.output(); } struct SmallRegionsConfig { - using SizeClassMap = scudo::DefaultSizeClassMap; - static const scudo::uptr PrimaryRegionSizeLog = 21U; - static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; static const bool MaySupportMemoryTagging = false; - typedef scudo::uptr PrimaryCompactPtrT; - static const scudo::uptr PrimaryCompactPtrScale = 0; - static const bool PrimaryEnableRandomOffset = true; - static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; - static const scudo::uptr PrimaryGroupSizeLog = 20U; + template <typename> using TSDRegistryT = void; + template <typename> using PrimaryT = void; + template <typename> using SecondaryT = void; + + struct Primary { + using SizeClassMap = scudo::DefaultSizeClassMap; + static const scudo::uptr RegionSizeLog = 21U; + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + typedef scudo::uptr CompactPtrT; + static const scudo::uptr CompactPtrScale = 0; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; + static const scudo::uptr GroupSizeLog = 20U; + }; }; // The 64-bit SizeClassAllocator can be easily OOM'd with small region sizes. // For the 32-bit one, it requires actually exhausting memory, so we skip it. TEST(ScudoPrimaryTest, Primary64OOM) { - using Primary = scudo::SizeClassAllocator64<SmallRegionsConfig>; - using TransferBatch = Primary::CacheT::TransferBatch; + using Primary = + scudo::SizeClassAllocator64<scudo::PrimaryConfig<SmallRegionsConfig>>; Primary Allocator; Allocator.init(/*ReleaseToOsInterval=*/-1); typename Primary::CacheT Cache; @@ -192,30 +266,28 @@ TEST(ScudoPrimaryTest, Primary64OOM) { Stats.init(); Cache.init(&Stats, &Allocator); bool AllocationFailed = false; - std::vector<TransferBatch *> Batches; + std::vector<void *> Blocks; const scudo::uptr ClassId = Primary::SizeClassMap::LargestClassId; const scudo::uptr Size = Primary::getSizeByClassId(ClassId); - typename Primary::CacheT::CompactPtrT Blocks[TransferBatch::MaxNumCached]; + const scudo::u16 MaxCachedBlockCount = Primary::CacheT::getMaxCached(Size); for (scudo::uptr I = 0; I < 10000U; I++) { - TransferBatch *B = Allocator.popBatch(&Cache, ClassId); - if (!B) { - AllocationFailed = true; - break; + for (scudo::uptr J = 0; J < MaxCachedBlockCount; ++J) { + void *Ptr = Cache.allocate(ClassId); + if (Ptr == nullptr) { + AllocationFailed = true; + break; + } + memset(Ptr, 'B', Size); + Blocks.push_back(Ptr); } - for (scudo::u16 J = 0; J < B->getCount(); J++) - memset(Allocator.decompactPtr(ClassId, B->get(J)), 'B', Size); - Batches.push_back(B); - } - while (!Batches.empty()) { - TransferBatch *B = Batches.back(); - Batches.pop_back(); - B->copyToArray(Blocks); - Allocator.pushBlocks(&Cache, ClassId, Blocks, B->getCount()); - Cache.deallocate(Primary::SizeClassMap::BatchClassId, B); } + + for (auto *Ptr : Blocks) + Cache.deallocate(ClassId, Ptr); + Cache.destroy(nullptr); - Allocator.releaseToOS(); + Allocator.releaseToOS(scudo::ReleaseToOS::Force); scudo::ScopedString Str; Allocator.getStats(&Str); Str.output(); @@ -231,7 +303,8 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryIterate) { Cache.init(nullptr, Allocator.get()); std::vector<std::pair<scudo::uptr, void *>> V; for (scudo::uptr I = 0; I < 64U; I++) { - const scudo::uptr Size = std::rand() % Primary::SizeClassMap::MaxSize; + const scudo::uptr Size = + static_cast<scudo::uptr>(std::rand()) % Primary::SizeClassMap::MaxSize; const scudo::uptr ClassId = Primary::SizeClassMap::getClassIdBySize(Size); void *P = Cache.allocate(ClassId); V.push_back(std::make_pair(ClassId, P)); @@ -253,21 +326,21 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryIterate) { V.pop_back(); } Cache.destroy(nullptr); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); scudo::ScopedString Str; Allocator->getStats(&Str); Str.output(); } SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryThreaded) { - using Primary = TestAllocator<TypeParam, scudo::SvelteSizeClassMap>; + using Primary = TestAllocator<TypeParam, scudo::Config::Primary::SizeClassMap>; std::unique_ptr<Primary> Allocator(new Primary); Allocator->init(/*ReleaseToOsInterval=*/-1); std::mutex Mutex; std::condition_variable Cv; bool Ready = false; std::thread Threads[32]; - for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++) + for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++) { Threads[I] = std::thread([&]() { static thread_local typename Primary::CacheT Cache; Cache.init(nullptr, Allocator.get()); @@ -278,21 +351,30 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryThreaded) { Cv.wait(Lock); } for (scudo::uptr I = 0; I < 256U; I++) { - const scudo::uptr Size = - std::rand() % Primary::SizeClassMap::MaxSize / 4; + const scudo::uptr Size = static_cast<scudo::uptr>(std::rand()) % + Primary::SizeClassMap::MaxSize / 4; const scudo::uptr ClassId = Primary::SizeClassMap::getClassIdBySize(Size); void *P = Cache.allocate(ClassId); if (P) V.push_back(std::make_pair(ClassId, P)); } + + // Try to interleave pushBlocks(), popBlocks() and releaseToOS(). + Allocator->releaseToOS(scudo::ReleaseToOS::Force); + while (!V.empty()) { auto Pair = V.back(); Cache.deallocate(Pair.first, Pair.second); V.pop_back(); + // This increases the chance of having non-full TransferBatches and it + // will jump into the code path of merging TransferBatches. + if (std::rand() % 8 == 0) + Cache.drain(); } Cache.destroy(nullptr); }); + } { std::unique_lock<std::mutex> Lock(Mutex); Ready = true; @@ -300,9 +382,10 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryThreaded) { } for (auto &T : Threads) T.join(); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); scudo::ScopedString Str; Allocator->getStats(&Str); + Allocator->getFragmentationInfo(&Str); Str.output(); } @@ -322,7 +405,7 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, ReleaseToOS) { EXPECT_NE(P, nullptr); Cache.deallocate(ClassId, P); Cache.destroy(nullptr); - EXPECT_GT(Allocator->releaseToOS(), 0U); + EXPECT_GT(Allocator->releaseToOS(scudo::ReleaseToOS::ForceAll), 0U); } SCUDO_TYPED_TEST(ScudoPrimaryTest, MemoryGroup) { @@ -367,4 +450,10 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, MemoryGroup) { EXPECT_LE(*std::max_element(Blocks.begin(), Blocks.end()) - *std::min_element(Blocks.begin(), Blocks.end()), GroupSizeMem * 2); + + while (!Blocks.empty()) { + Cache.deallocate(ClassId, reinterpret_cast<void *>(Blocks.back())); + Blocks.pop_back(); + } + Cache.drain(); } diff --git a/standalone/tests/release_test.cpp b/standalone/tests/release_test.cpp index 2e738214700..14b398a91fc 100644 --- a/standalone/tests/release_test.cpp +++ b/standalone/tests/release_test.cpp @@ -22,15 +22,17 @@ TEST(ScudoReleaseTest, RegionPageMap) { for (scudo::uptr I = 0; I < SCUDO_WORDSIZE; I++) { // Various valid counter's max values packed into one word. scudo::RegionPageMap PageMap2N(1U, 1U, 1UL << I); - EXPECT_EQ(sizeof(scudo::uptr), PageMap2N.getBufferSize()); + ASSERT_TRUE(PageMap2N.isAllocated()); + EXPECT_EQ(1U, PageMap2N.getBufferNumElements()); // Check the "all bit set" values too. scudo::RegionPageMap PageMap2N1_1(1U, 1U, ~0UL >> I); - EXPECT_EQ(sizeof(scudo::uptr), PageMap2N1_1.getBufferSize()); + ASSERT_TRUE(PageMap2N1_1.isAllocated()); + EXPECT_EQ(1U, PageMap2N1_1.getBufferNumElements()); // Verify the packing ratio, the counter is Expected to be packed into the // closest power of 2 bits. scudo::RegionPageMap PageMap(1U, SCUDO_WORDSIZE, 1UL << I); - EXPECT_EQ(sizeof(scudo::uptr) * scudo::roundUpPowerOfTwo(I + 1), - PageMap.getBufferSize()); + ASSERT_TRUE(PageMap.isAllocated()); + EXPECT_EQ(scudo::roundUpPowerOfTwo(I + 1), PageMap.getBufferNumElements()); } // Go through 1, 2, 4, 8, .. {32,64} bits per counter. @@ -40,6 +42,7 @@ TEST(ScudoReleaseTest, RegionPageMap) { (scudo::getPageSizeCached() / 8) * (SCUDO_WORDSIZE >> I); scudo::RegionPageMap PageMap(1U, NumCounters, 1UL << ((1UL << I) - 1)); + ASSERT_TRUE(PageMap.isAllocated()); PageMap.inc(0U, 0U); for (scudo::uptr C = 1; C < NumCounters - 1; C++) { EXPECT_EQ(0UL, PageMap.get(0U, C)); @@ -130,8 +133,9 @@ TEST(ScudoReleaseTest, FreePagesRangeTracker) { // Strip trailing '.'-pages before comparing the results as they are not // going to be reported to range_recorder anyway. const char *LastX = strrchr(TestCase, 'x'); - std::string Expected(TestCase, - LastX == nullptr ? 0 : (LastX - TestCase + 1)); + std::string Expected( + TestCase, + LastX == nullptr ? 0U : static_cast<size_t>(LastX - TestCase + 1)); EXPECT_STREQ(Expected.c_str(), Recorder.ReportedPages.c_str()); } } @@ -220,12 +224,12 @@ template <class SizeClassMap> void testReleaseFreeMemoryToOS() { auto SkipRegion = [](UNUSED scudo::uptr RegionIndex) { return false; }; auto DecompactPtr = [](scudo::uptr P) { return P; }; ReleasedPagesRecorder Recorder; - scudo::PageReleaseContext Context(BlockSize, - /*RegionSize=*/MaxBlocks * BlockSize, - /*NumberOfRegions=*/1U, + scudo::PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, /*ReleaseSize=*/MaxBlocks * BlockSize); ASSERT_FALSE(Context.hasBlockMarked()); - Context.markFreeBlocks(FreeList, DecompactPtr, Recorder.getBase()); + Context.markFreeBlocksInRegion(FreeList, DecompactPtr, Recorder.getBase(), + /*RegionIndex=*/0, MaxBlocks * BlockSize, + /*MayContainLastBlockInRegion=*/true); ASSERT_TRUE(Context.hasBlockMarked()); releaseFreeMemoryToOS(Context, Recorder, SkipRegion); scudo::RegionPageMap &PageMap = Context.PageMap; @@ -315,12 +319,13 @@ template <class SizeClassMap> void testPageMapMarkRange() { const scudo::uptr RoundedRegionSize = scudo::roundUp(RegionSize, PageSize); std::vector<scudo::uptr> Pages(RoundedRegionSize / PageSize, 0); - for (scudo::uptr Block = 0; Block + BlockSize <= RoundedRegionSize; - Block += BlockSize) { - for (scudo::uptr page = Block / PageSize; - page <= (Block + BlockSize - 1) / PageSize; ++page) { - ASSERT_LT(page, Pages.size()); - ++Pages[page]; + for (scudo::uptr Block = 0; Block < RoundedRegionSize; Block += BlockSize) { + for (scudo::uptr Page = Block / PageSize; + Page <= (Block + BlockSize - 1) / PageSize && + Page < RoundedRegionSize / PageSize; + ++Page) { + ASSERT_LT(Page, Pages.size()); + ++Pages[Page]; } } @@ -328,10 +333,10 @@ template <class SizeClassMap> void testPageMapMarkRange() { const scudo::uptr GroupBeg = GroupId * GroupSize; const scudo::uptr GroupEnd = GroupBeg + GroupSize; - scudo::PageReleaseContext Context(BlockSize, RegionSize, - /*NumberOfRegions=*/1U, + scudo::PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, /*ReleaseSize=*/RegionSize); - Context.markRangeAsAllCounted(GroupBeg, GroupEnd, /*Base=*/0U); + Context.markRangeAsAllCounted(GroupBeg, GroupEnd, /*Base=*/0U, + /*RegionIndex=*/0, RegionSize); scudo::uptr FirstBlock = ((GroupBeg + BlockSize - 1) / BlockSize) * BlockSize; @@ -398,10 +403,10 @@ template <class SizeClassMap> void testPageMapMarkRange() { } // Iterate each Group // Release the entire region. This is to ensure the last page is counted. - scudo::PageReleaseContext Context(BlockSize, RegionSize, - /*NumberOfRegions=*/1U, + scudo::PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, /*ReleaseSize=*/RegionSize); - Context.markRangeAsAllCounted(/*From=*/0U, /*To=*/RegionSize, /*Base=*/0); + Context.markRangeAsAllCounted(/*From=*/0U, /*To=*/RegionSize, /*Base=*/0, + /*RegionIndex=*/0, RegionSize); for (scudo::uptr Page = 0; Page < RoundedRegionSize / PageSize; ++Page) EXPECT_TRUE(Context.PageMap.isAllCounted(/*Region=*/0, Page)); } // Iterate each size class @@ -410,8 +415,6 @@ template <class SizeClassMap> void testPageMapMarkRange() { template <class SizeClassMap> void testReleasePartialRegion() { typedef FreeBatch<SizeClassMap> Batch; const scudo::uptr PageSize = scudo::getPageSizeCached(); - const scudo::uptr ReleaseBase = PageSize; - const scudo::uptr BasePageOffset = ReleaseBase / PageSize; for (scudo::uptr I = 1; I <= SizeClassMap::LargestClassId; I++) { // In the following, we want to ensure the region includes at least 2 pages @@ -419,8 +422,11 @@ template <class SizeClassMap> void testReleasePartialRegion() { // the last block is tricky, so we always test the case that includes the // last block. const scudo::uptr BlockSize = SizeClassMap::getSizeByClassId(I); + const scudo::uptr ReleaseBase = scudo::roundUp(BlockSize, PageSize); + const scudo::uptr BasePageOffset = ReleaseBase / PageSize; const scudo::uptr RegionSize = - scudo::roundUpSlow(scudo::roundUp(BlockSize, PageSize) * 2, BlockSize) + + scudo::roundUpSlow(scudo::roundUp(BlockSize, PageSize) + ReleaseBase, + BlockSize) + BlockSize; const scudo::uptr RoundedRegionSize = scudo::roundUp(RegionSize, PageSize); @@ -429,7 +435,7 @@ template <class SizeClassMap> void testReleasePartialRegion() { // Skip the blocks in the first page and add the remaining. std::vector<scudo::uptr> Pages(RoundedRegionSize / PageSize, 0); - for (scudo::uptr Block = scudo::roundUpSlow(PageSize, BlockSize); + for (scudo::uptr Block = scudo::roundUpSlow(ReleaseBase, BlockSize); Block + BlockSize <= RoundedRegionSize; Block += BlockSize) { for (scudo::uptr Page = Block / PageSize; Page <= (Block + BlockSize - 1) / PageSize; ++Page) { @@ -439,12 +445,12 @@ template <class SizeClassMap> void testReleasePartialRegion() { } // This follows the logic how we count the last page. It should be - // consistent with how markFreeBlocks() handles the last block. + // consistent with how markFreeBlocksInRegion() handles the last block. if (RoundedRegionSize % BlockSize != 0) ++Pages.back(); Batch *CurrentBatch = nullptr; - for (scudo::uptr Block = scudo::roundUpSlow(PageSize, BlockSize); + for (scudo::uptr Block = scudo::roundUpSlow(ReleaseBase, BlockSize); Block < RegionSize; Block += BlockSize) { if (CurrentBatch == nullptr || CurrentBatch->getCount() == Batch::MaxCount) { @@ -459,7 +465,7 @@ template <class SizeClassMap> void testReleasePartialRegion() { auto SkipRegion = [](UNUSED scudo::uptr RegionIndex) { return false; }; ReleasedPagesRecorder Recorder(ReleaseBase); releaseFreeMemoryToOS(Context, Recorder, SkipRegion); - const scudo::uptr FirstBlock = scudo::roundUpSlow(PageSize, BlockSize); + const scudo::uptr FirstBlock = scudo::roundUpSlow(ReleaseBase, BlockSize); for (scudo::uptr P = 0; P < RoundedRegionSize; P += PageSize) { if (P < FirstBlock) { @@ -477,10 +483,12 @@ template <class SizeClassMap> void testReleasePartialRegion() { // Test marking by visiting each block. { auto DecompactPtr = [](scudo::uptr P) { return P; }; - scudo::PageReleaseContext Context( - BlockSize, RegionSize, /*NumberOfRegions=*/1U, - /*ReleaseSize=*/RegionSize - PageSize, ReleaseBase); - Context.markFreeBlocks(FreeList, DecompactPtr, /*Base=*/0U); + scudo::PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + /*ReleaseSize=*/RegionSize - PageSize, + ReleaseBase); + Context.markFreeBlocksInRegion(FreeList, DecompactPtr, /*Base=*/0U, + /*RegionIndex=*/0, RegionSize, + /*MayContainLastBlockInRegion=*/true); for (const Batch &It : FreeList) { for (scudo::u16 I = 0; I < It.getCount(); I++) { scudo::uptr Block = It.get(I); @@ -497,10 +505,11 @@ template <class SizeClassMap> void testReleasePartialRegion() { // Test range marking. { - scudo::PageReleaseContext Context( - BlockSize, RegionSize, /*NumberOfRegions=*/1U, - /*ReleaseSize=*/RegionSize - PageSize, ReleaseBase); - Context.markRangeAsAllCounted(ReleaseBase, RegionSize, /*Base=*/0U); + scudo::PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + /*ReleaseSize=*/RegionSize - PageSize, + ReleaseBase); + Context.markRangeAsAllCounted(ReleaseBase, RegionSize, /*Base=*/0U, + /*RegionIndex=*/0, RegionSize); for (scudo::uptr Page = ReleaseBase / PageSize; Page < RoundedRegionSize / PageSize; ++Page) { if (Context.PageMap.get(/*Region=*/0, Page - BasePageOffset) != @@ -515,16 +524,16 @@ template <class SizeClassMap> void testReleasePartialRegion() { // Check the buffer size of PageMap. { - scudo::PageReleaseContext Full(BlockSize, RegionSize, - /*NumberOfRegions=*/1U, + scudo::PageReleaseContext Full(BlockSize, /*NumberOfRegions=*/1U, /*ReleaseSize=*/RegionSize); Full.ensurePageMapAllocated(); - scudo::PageReleaseContext Partial( - BlockSize, RegionSize, /*NumberOfRegions=*/1U, - /*ReleaseSize=*/RegionSize - PageSize, ReleaseBase); + scudo::PageReleaseContext Partial(BlockSize, /*NumberOfRegions=*/1U, + /*ReleaseSize=*/RegionSize - PageSize, + ReleaseBase); Partial.ensurePageMapAllocated(); - EXPECT_GE(Full.PageMap.getBufferSize(), Partial.PageMap.getBufferSize()); + EXPECT_GE(Full.PageMap.getBufferNumElements(), + Partial.PageMap.getBufferNumElements()); } while (!FreeList.empty()) { @@ -543,20 +552,103 @@ TEST(ScudoReleaseTest, ReleaseFreeMemoryToOSAndroid) { testReleaseFreeMemoryToOS<scudo::AndroidSizeClassMap>(); } -TEST(ScudoReleaseTest, ReleaseFreeMemoryToOSSvelte) { - testReleaseFreeMemoryToOS<scudo::SvelteSizeClassMap>(); -} - TEST(ScudoReleaseTest, PageMapMarkRange) { testPageMapMarkRange<scudo::DefaultSizeClassMap>(); testPageMapMarkRange<scudo::AndroidSizeClassMap>(); testPageMapMarkRange<scudo::FuchsiaSizeClassMap>(); - testPageMapMarkRange<scudo::SvelteSizeClassMap>(); } TEST(ScudoReleaseTest, ReleasePartialRegion) { testReleasePartialRegion<scudo::DefaultSizeClassMap>(); testReleasePartialRegion<scudo::AndroidSizeClassMap>(); testReleasePartialRegion<scudo::FuchsiaSizeClassMap>(); - testReleasePartialRegion<scudo::SvelteSizeClassMap>(); +} + +template <class SizeClassMap> void testReleaseRangeWithSingleBlock() { + const scudo::uptr PageSize = scudo::getPageSizeCached(); + + // We want to test if a memory group only contains single block that will be + // handled properly. The case is like: + // + // From To + // +----------------------+ + // +------------+------------+ + // | | | + // +------------+------------+ + // ^ + // RegionSize + // + // Note that `From` will be page aligned. + // + // If the second from the last block is aligned at `From`, then we expect all + // the pages after `From` will be marked as can-be-released. Otherwise, the + // pages only touched by the last blocks will be marked as can-be-released. + for (scudo::uptr I = 1; I <= SizeClassMap::LargestClassId; I++) { + const scudo::uptr BlockSize = SizeClassMap::getSizeByClassId(I); + const scudo::uptr From = scudo::roundUp(BlockSize, PageSize); + const scudo::uptr To = + From % BlockSize == 0 + ? From + BlockSize + : scudo::roundDownSlow(From + BlockSize, BlockSize) + BlockSize; + const scudo::uptr RoundedRegionSize = scudo::roundUp(To, PageSize); + + std::vector<scudo::uptr> Pages(RoundedRegionSize / PageSize, 0); + for (scudo::uptr Block = (To - BlockSize); Block < RoundedRegionSize; + Block += BlockSize) { + for (scudo::uptr Page = Block / PageSize; + Page <= (Block + BlockSize - 1) / PageSize && + Page < RoundedRegionSize / PageSize; + ++Page) { + ASSERT_LT(Page, Pages.size()); + ++Pages[Page]; + } + } + + scudo::PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + /*ReleaseSize=*/To, + /*ReleaseBase=*/0U); + Context.markRangeAsAllCounted(From, To, /*Base=*/0U, /*RegionIndex=*/0, + /*RegionSize=*/To); + + for (scudo::uptr Page = 0; Page < RoundedRegionSize; Page += PageSize) { + if (Context.PageMap.get(/*Region=*/0U, Page / PageSize) != + Pages[Page / PageSize]) { + EXPECT_TRUE( + Context.PageMap.isAllCounted(/*Region=*/0U, Page / PageSize)); + } + } + } // for each size class +} + +TEST(ScudoReleaseTest, RangeReleaseRegionWithSingleBlock) { + testReleaseRangeWithSingleBlock<scudo::DefaultSizeClassMap>(); + testReleaseRangeWithSingleBlock<scudo::AndroidSizeClassMap>(); + testReleaseRangeWithSingleBlock<scudo::FuchsiaSizeClassMap>(); +} + +TEST(ScudoReleaseTest, BufferPool) { + constexpr scudo::uptr StaticBufferCount = SCUDO_WORDSIZE - 1; + constexpr scudo::uptr StaticBufferNumElements = 512U; + + // Allocate the buffer pool on the heap because it is quite large (slightly + // more than StaticBufferCount * StaticBufferNumElements * sizeof(uptr)) and + // it may not fit in the stack on some platforms. + using BufferPool = + scudo::BufferPool<StaticBufferCount, StaticBufferNumElements>; + std::unique_ptr<BufferPool> Pool(new BufferPool()); + + std::vector<BufferPool::Buffer> Buffers; + for (scudo::uptr I = 0; I < StaticBufferCount; ++I) { + BufferPool::Buffer Buffer = Pool->getBuffer(StaticBufferNumElements); + EXPECT_TRUE(Pool->isStaticBufferTestOnly(Buffer)); + Buffers.push_back(Buffer); + } + + // The static buffer is supposed to be used up. + BufferPool::Buffer Buffer = Pool->getBuffer(StaticBufferNumElements); + EXPECT_FALSE(Pool->isStaticBufferTestOnly(Buffer)); + + Pool->releaseBuffer(Buffer); + for (auto &Buffer : Buffers) + Pool->releaseBuffer(Buffer); } diff --git a/standalone/tests/report_test.cpp b/standalone/tests/report_test.cpp index 81587bae6b5..6c46243053d 100644 --- a/standalone/tests/report_test.cpp +++ b/standalone/tests/report_test.cpp @@ -23,7 +23,6 @@ TEST(ScudoReportDeathTest, Generic) { EXPECT_DEATH(scudo::reportError("TEST123"), "Scudo ERROR.*TEST123"); EXPECT_DEATH(scudo::reportInvalidFlag("ABC", "DEF"), "Scudo ERROR.*ABC.*DEF"); EXPECT_DEATH(scudo::reportHeaderCorruption(P), "Scudo ERROR.*42424242"); - EXPECT_DEATH(scudo::reportHeaderRace(P), "Scudo ERROR.*42424242"); EXPECT_DEATH(scudo::reportSanityCheckError("XYZ"), "Scudo ERROR.*XYZ"); EXPECT_DEATH(scudo::reportAlignmentTooBig(123, 456), "Scudo ERROR.*123.*456"); EXPECT_DEATH(scudo::reportAllocationSizeTooBig(123, 456, 789), @@ -54,3 +53,28 @@ TEST(ScudoReportDeathTest, CSpecific) { EXPECT_DEATH(scudo::reportInvalidAlignedAllocAlignment(123, 456), "Scudo ERROR.*123.*456"); } + +#if SCUDO_LINUX || SCUDO_TRUSTY || SCUDO_ANDROID +#include "report_linux.h" + +#include <errno.h> +#include <sys/mman.h> + +TEST(ScudoReportDeathTest, Linux) { + errno = ENOMEM; + EXPECT_DEATH(scudo::reportMapError(), + "Scudo ERROR:.*internal map failure \\(error desc=.*\\)"); + errno = ENOMEM; + EXPECT_DEATH(scudo::reportMapError(1024U), + "Scudo ERROR:.*internal map failure \\(error desc=.*\\) " + "requesting 1KB"); + errno = ENOMEM; + EXPECT_DEATH(scudo::reportUnmapError(0x1000U, 100U), + "Scudo ERROR:.*internal unmap failure \\(error desc=.*\\) Addr " + "0x1000 Size 100"); + errno = ENOMEM; + EXPECT_DEATH(scudo::reportProtectError(0x1000U, 100U, PROT_READ), + "Scudo ERROR:.*internal protect failure \\(error desc=.*\\) " + "Addr 0x1000 Size 100 Prot 1"); +} +#endif diff --git a/standalone/tests/scudo_hooks_test.cpp b/standalone/tests/scudo_hooks_test.cpp deleted file mode 100644 index 7184ec12a8b..00000000000 --- a/standalone/tests/scudo_hooks_test.cpp +++ /dev/null @@ -1,114 +0,0 @@ -//===-- scudo_hooks_test.cpp ------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "tests/scudo_unit_test.h" - -#include "allocator_config.h" -#include "combined.h" - -namespace { -void *LastAllocatedPtr = nullptr; -size_t LastRequestSize = 0; -void *LastDeallocatedPtr = nullptr; -} // namespace - -// Scudo defines weak symbols that can be defined by a client binary -// to register callbacks at key points in the allocation timeline. In -// order to enforce those invariants, we provide definitions that -// update some global state every time they are called, so that tests -// can inspect their effects. An unfortunate side effect of this -// setup is that because those symbols are part of the binary, they -// can't be selectively enabled; that means that they will get called -// on unrelated tests in the same compilation unit. To mitigate this -// issue, we insulate those tests in a separate compilation unit. -extern "C" { -__attribute__((visibility("default"))) void __scudo_allocate_hook(void *Ptr, - size_t Size) { - LastAllocatedPtr = Ptr; - LastRequestSize = Size; -} -__attribute__((visibility("default"))) void __scudo_deallocate_hook(void *Ptr) { - LastDeallocatedPtr = Ptr; -} -} - -// Simple check that allocation callbacks, when registered, are called: -// 1) __scudo_allocate_hook is called when allocating. -// 2) __scudo_deallocate_hook is called when deallocating. -// 3) Both hooks are called when reallocating. -// 4) Neither are called for a no-op reallocation. -TEST(ScudoHooksTest, AllocateHooks) { - scudo::Allocator<scudo::DefaultConfig> Allocator; - constexpr scudo::uptr DefaultSize = 16U; - constexpr scudo::Chunk::Origin Origin = scudo::Chunk::Origin::Malloc; - - // Simple allocation and deallocation. - { - LastAllocatedPtr = nullptr; - LastRequestSize = 0; - - void *Ptr = Allocator.allocate(DefaultSize, Origin); - - EXPECT_EQ(Ptr, LastAllocatedPtr); - EXPECT_EQ(DefaultSize, LastRequestSize); - - LastDeallocatedPtr = nullptr; - - Allocator.deallocate(Ptr, Origin); - - EXPECT_EQ(Ptr, LastDeallocatedPtr); - } - - // Simple no-op, same size reallocation. - { - void *Ptr = Allocator.allocate(DefaultSize, Origin); - - LastAllocatedPtr = nullptr; - LastRequestSize = 0; - LastDeallocatedPtr = nullptr; - - void *NewPtr = Allocator.reallocate(Ptr, DefaultSize); - - EXPECT_EQ(Ptr, NewPtr); - EXPECT_EQ(nullptr, LastAllocatedPtr); - EXPECT_EQ(0U, LastRequestSize); - EXPECT_EQ(nullptr, LastDeallocatedPtr); - } - - // Reallocation in increasing size classes. This ensures that at - // least one of the reallocations will be meaningful. - { - void *Ptr = Allocator.allocate(0, Origin); - - for (scudo::uptr ClassId = 1U; - ClassId <= scudo::DefaultConfig::Primary::SizeClassMap::LargestClassId; - ++ClassId) { - const scudo::uptr Size = - scudo::DefaultConfig::Primary::SizeClassMap::getSizeByClassId( - ClassId); - - LastAllocatedPtr = nullptr; - LastRequestSize = 0; - LastDeallocatedPtr = nullptr; - - void *NewPtr = Allocator.reallocate(Ptr, Size); - - if (NewPtr != Ptr) { - EXPECT_EQ(NewPtr, LastAllocatedPtr); - EXPECT_EQ(Size, LastRequestSize); - EXPECT_EQ(Ptr, LastDeallocatedPtr); - } else { - EXPECT_EQ(nullptr, LastAllocatedPtr); - EXPECT_EQ(0U, LastRequestSize); - EXPECT_EQ(nullptr, LastDeallocatedPtr); - } - - Ptr = NewPtr; - } - } -} diff --git a/standalone/tests/scudo_unit_test.h b/standalone/tests/scudo_unit_test.h index 1665fa87e5f..4283416435b 100644 --- a/standalone/tests/scudo_unit_test.h +++ b/standalone/tests/scudo_unit_test.h @@ -45,4 +45,10 @@ using Test = ::testing::Test; #define SKIP_NO_DEBUG(T) DISABLED_##T #endif +#if SCUDO_FUCHSIA +// The zxtest library provides a default main function that does the same thing +// for Fuchsia builds. +#define SCUDO_NO_TEST_MAIN +#endif + extern bool UseQuarantine; diff --git a/standalone/tests/scudo_unit_test_main.cpp b/standalone/tests/scudo_unit_test_main.cpp index fbfefa5c93d..881e0265bb3 100644 --- a/standalone/tests/scudo_unit_test_main.cpp +++ b/standalone/tests/scudo_unit_test_main.cpp @@ -45,9 +45,7 @@ __scudo_default_options() { "dealloc_type_mismatch=" DEALLOC_TYPE_MISMATCH; } -// The zxtest library provides a default main function that does the same thing -// for Fuchsia builds. -#if !SCUDO_FUCHSIA +#if !defined(SCUDO_NO_TEST_MAIN) int main(int argc, char **argv) { EnableMemoryTaggingIfSupported(); testing::InitGoogleTest(&argc, argv); diff --git a/standalone/tests/secondary_test.cpp b/standalone/tests/secondary_test.cpp index b0319011771..8f0250e88eb 100644 --- a/standalone/tests/secondary_test.cpp +++ b/standalone/tests/secondary_test.cpp @@ -10,6 +10,7 @@ #include "tests/scudo_unit_test.h" #include "allocator_config.h" +#include "allocator_config_wrapper.h" #include "secondary.h" #include <algorithm> @@ -22,7 +23,8 @@ #include <vector> template <typename Config> static scudo::Options getOptionsForConfig() { - if (!Config::MaySupportMemoryTagging || !scudo::archSupportsMemoryTagging() || + if (!Config::getMaySupportMemoryTagging() || + !scudo::archSupportsMemoryTagging() || !scudo::systemSupportsMemoryTagging()) return {}; scudo::AtomicOptions AO; @@ -31,8 +33,9 @@ template <typename Config> static scudo::Options getOptionsForConfig() { } template <typename Config> static void testSecondaryBasic(void) { - using SecondaryT = scudo::MapAllocator<Config>; - scudo::Options Options = getOptionsForConfig<Config>(); + using SecondaryT = scudo::MapAllocator<scudo::SecondaryConfig<Config>>; + scudo::Options Options = + getOptionsForConfig<scudo::SecondaryConfig<Config>>(); scudo::GlobalStats S; S.init(); @@ -83,19 +86,35 @@ template <typename Config> static void testSecondaryBasic(void) { } struct NoCacheConfig { - typedef scudo::MapAllocatorNoCache SecondaryCache; static const bool MaySupportMemoryTagging = false; + template <typename> using TSDRegistryT = void; + template <typename> using PrimaryT = void; + template <typename Config> using SecondaryT = scudo::MapAllocator<Config>; + + struct Secondary { + template <typename Config> + using CacheT = scudo::MapAllocatorNoCache<Config>; + }; }; struct TestConfig { - typedef scudo::MapAllocatorCache<TestConfig> SecondaryCache; static const bool MaySupportMemoryTagging = false; - static const scudo::u32 SecondaryCacheEntriesArraySize = 128U; - static const scudo::u32 SecondaryCacheQuarantineSize = 0U; - static const scudo::u32 SecondaryCacheDefaultMaxEntriesCount = 64U; - static const scudo::uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 20; - static const scudo::s32 SecondaryCacheMinReleaseToOsIntervalMs = INT32_MIN; - static const scudo::s32 SecondaryCacheMaxReleaseToOsIntervalMs = INT32_MAX; + template <typename> using TSDRegistryT = void; + template <typename> using PrimaryT = void; + template <typename> using SecondaryT = void; + + struct Secondary { + struct Cache { + static const scudo::u32 EntriesArraySize = 128U; + static const scudo::u32 QuarantineSize = 0U; + static const scudo::u32 DefaultMaxEntriesCount = 64U; + static const scudo::uptr DefaultMaxEntrySize = 1UL << 20; + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + }; + + template <typename Config> using CacheT = scudo::MapAllocatorCache<Config>; + }; }; TEST(ScudoSecondaryTest, SecondaryBasic) { @@ -106,7 +125,7 @@ TEST(ScudoSecondaryTest, SecondaryBasic) { struct MapAllocatorTest : public Test { using Config = scudo::DefaultConfig; - using LargeAllocator = scudo::MapAllocator<Config>; + using LargeAllocator = scudo::MapAllocator<scudo::SecondaryConfig<Config>>; void SetUp() override { Allocator->init(nullptr); } @@ -114,7 +133,8 @@ struct MapAllocatorTest : public Test { std::unique_ptr<LargeAllocator> Allocator = std::make_unique<LargeAllocator>(); - scudo::Options Options = getOptionsForConfig<Config>(); + scudo::Options Options = + getOptionsForConfig<scudo::SecondaryConfig<Config>>(); }; // This exercises a variety of combinations of size and alignment for the @@ -128,10 +148,10 @@ TEST_F(MapAllocatorTest, SecondaryCombinations) { AlignLog++) { const scudo::uptr Align = 1U << AlignLog; for (scudo::sptr Delta = -128; Delta <= 128; Delta += 8) { - if (static_cast<scudo::sptr>(1U << SizeLog) + Delta <= 0) + if ((1LL << SizeLog) + Delta <= 0) continue; - const scudo::uptr UserSize = - scudo::roundUp((1U << SizeLog) + Delta, MinAlign); + const scudo::uptr UserSize = scudo::roundUp( + static_cast<scudo::uptr>((1LL << SizeLog) + Delta), MinAlign); const scudo::uptr Size = HeaderSize + UserSize + (Align > MinAlign ? Align - HeaderSize : 0); void *P = Allocator->allocate(Options, Size, Align); @@ -152,7 +172,8 @@ TEST_F(MapAllocatorTest, SecondaryIterate) { std::vector<void *> V; const scudo::uptr PageSize = scudo::getPageSizeCached(); for (scudo::uptr I = 0; I < 32U; I++) - V.push_back(Allocator->allocate(Options, (std::rand() % 16) * PageSize)); + V.push_back(Allocator->allocate( + Options, (static_cast<scudo::uptr>(std::rand()) % 16U) * PageSize)); auto Lambda = [&V](scudo::uptr Block) { EXPECT_NE(std::find(V.begin(), V.end(), reinterpret_cast<void *>(Block)), V.end()); @@ -207,8 +228,9 @@ struct MapAllocatorWithReleaseTest : public MapAllocatorTest { } for (scudo::uptr I = 0; I < 128U; I++) { // Deallocate 75% of the blocks. - const bool Deallocate = (rand() & 3) != 0; - void *P = Allocator->allocate(Options, (std::rand() % 16) * PageSize); + const bool Deallocate = (std::rand() & 3) != 0; + void *P = Allocator->allocate( + Options, (static_cast<scudo::uptr>(std::rand()) % 16U) * PageSize); if (Deallocate) Allocator->deallocate(Options, P); else diff --git a/standalone/tests/size_class_map_test.cpp b/standalone/tests/size_class_map_test.cpp index b11db1e9f64..05b5835ff0b 100644 --- a/standalone/tests/size_class_map_test.cpp +++ b/standalone/tests/size_class_map_test.cpp @@ -20,10 +20,6 @@ TEST(ScudoSizeClassMapTest, DefaultSizeClassMap) { testSizeClassMap<scudo::DefaultSizeClassMap>(); } -TEST(ScudoSizeClassMapTest, SvelteSizeClassMap) { - testSizeClassMap<scudo::SvelteSizeClassMap>(); -} - TEST(ScudoSizeClassMapTest, AndroidSizeClassMap) { testSizeClassMap<scudo::AndroidSizeClassMap>(); } diff --git a/standalone/tests/strings_test.cpp b/standalone/tests/strings_test.cpp index 7a69ffd9762..abb81803f65 100644 --- a/standalone/tests/strings_test.cpp +++ b/standalone/tests/strings_test.cpp @@ -66,6 +66,10 @@ TEST(ScudoStringsTest, Precision) { Str.append("%-6s", "12345"); EXPECT_EQ(Str.length(), strlen(Str.data())); EXPECT_STREQ("12345 ", Str.data()); + Str.clear(); + Str.append("%-8s", "12345"); + EXPECT_EQ(Str.length(), strlen(Str.data())); + EXPECT_STREQ("12345 ", Str.data()); } static void fillString(scudo::ScopedString &Str, scudo::uptr Size) { @@ -123,3 +127,42 @@ TEST(ScudoStringsTest, Padding) { testAgainstLibc<int>("%03d - %03d", 12, 1234); testAgainstLibc<int>("%03d - %03d", -12, -1234); } + +#if defined(__linux__) + +#include <sys/resource.h> + +TEST(ScudoStringsTest, CapacityIncreaseFails) { + scudo::ScopedString Str; + + rlimit Limit = {}; + EXPECT_EQ(0, getrlimit(RLIMIT_AS, &Limit)); + + rlimit EmptyLimit = {.rlim_cur = 0, .rlim_max = Limit.rlim_max}; + EXPECT_EQ(0, setrlimit(RLIMIT_AS, &EmptyLimit)); + + // qemu does not honor the setrlimit, so verify before proceeding. + scudo::MemMapT MemMap; + if (MemMap.map(/*Addr=*/0U, scudo::getPageSizeCached(), "scudo:test", + MAP_ALLOWNOMEM)) { + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + setrlimit(RLIMIT_AS, &Limit); + GTEST_SKIP() << "Limiting address space does not prevent mmap."; + } + + // Test requires that the default length is at least 6 characters. + scudo::uptr MaxSize = Str.capacity(); + EXPECT_LE(6u, MaxSize); + + for (size_t i = 0; i < MaxSize - 5; i++) { + Str.append("B"); + } + + // Attempt to append past the end of the current capacity. + Str.append("%d", 12345678); + EXPECT_EQ(MaxSize, Str.capacity()); + EXPECT_STREQ("B12345", &Str.data()[MaxSize - 6]); + + EXPECT_EQ(0, setrlimit(RLIMIT_AS, &Limit)); +} +#endif diff --git a/standalone/tests/timing_test.cpp b/standalone/tests/timing_test.cpp new file mode 100644 index 00000000000..09a6c312246 --- /dev/null +++ b/standalone/tests/timing_test.cpp @@ -0,0 +1,86 @@ +//===-- timing_test.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "tests/scudo_unit_test.h" + +#include "timing.h" + +#include <string> + +class ScudoTimingTest : public Test { +public: + void testFunc1() { scudo::ScopedTimer ST(Manager, __func__); } + + void testFunc2() { + scudo::ScopedTimer ST(Manager, __func__); + testFunc1(); + } + + void testChainedCalls() { + scudo::ScopedTimer ST(Manager, __func__); + testFunc2(); + } + + void testIgnoredTimer() { + scudo::ScopedTimer ST(Manager, __func__); + ST.ignore(); + } + + void printAllTimersStats() { Manager.printAll(); } + + scudo::TimingManager &getTimingManager() { return Manager; } + +private: + scudo::TimingManager Manager; +}; + +// Given that the output of statistics of timers are dumped through +// `scudo::Printf` which is platform dependent, so we don't have a reliable way +// to catch the output and verify the details. Now we only verify the number of +// invocations on linux. +TEST_F(ScudoTimingTest, SimpleTimer) { +#if SCUDO_LINUX + testing::internal::LogToStderr(); + testing::internal::CaptureStderr(); +#endif + + testIgnoredTimer(); + testChainedCalls(); + printAllTimersStats(); + +#if SCUDO_LINUX + std::string output = testing::internal::GetCapturedStderr(); + EXPECT_TRUE(output.find("testIgnoredTimer (1)") == std::string::npos); + EXPECT_TRUE(output.find("testChainedCalls (1)") != std::string::npos); + EXPECT_TRUE(output.find("testFunc2 (1)") != std::string::npos); + EXPECT_TRUE(output.find("testFunc1 (1)") != std::string::npos); +#endif +} + +TEST_F(ScudoTimingTest, NestedTimer) { +#if SCUDO_LINUX + testing::internal::LogToStderr(); + testing::internal::CaptureStderr(); +#endif + + { + scudo::ScopedTimer Outer(getTimingManager(), "Outer"); + { + scudo::ScopedTimer Inner1(getTimingManager(), Outer, "Inner1"); + { scudo::ScopedTimer Inner2(getTimingManager(), Inner1, "Inner2"); } + } + } + printAllTimersStats(); + +#if SCUDO_LINUX + std::string output = testing::internal::GetCapturedStderr(); + EXPECT_TRUE(output.find("Outer (1)") != std::string::npos); + EXPECT_TRUE(output.find("Inner1 (1)") != std::string::npos); + EXPECT_TRUE(output.find("Inner2 (1)") != std::string::npos); +#endif +} diff --git a/standalone/tests/tsd_test.cpp b/standalone/tests/tsd_test.cpp index a092fdde904..851ac46b9f0 100644 --- a/standalone/tests/tsd_test.cpp +++ b/standalone/tests/tsd_test.cpp @@ -17,6 +17,7 @@ #include <mutex> #include <set> #include <thread> +#include <type_traits> // We mock out an allocator with a TSD registry, mostly using empty stubs. The // cache contains a single volatile uptr, to be able to test that several @@ -38,7 +39,7 @@ public: void unmapTestOnly() { TSDRegistry.unmapTestOnly(this); } void initCache(CacheT *Cache) { *Cache = {}; } - void commitBack(scudo::TSD<MockAllocator> *TSD) {} + void commitBack(UNUSED scudo::TSD<MockAllocator> *TSD) {} TSDRegistryT *getTSDRegistry() { return &TSDRegistry; } void callPostInitCallback() {} @@ -86,7 +87,8 @@ TEST(ScudoTSDTest, TSDRegistryInit) { EXPECT_TRUE(Allocator->isInitialized()); } -template <class AllocatorT> static void testRegistry() { +template <class AllocatorT> +static void testRegistry() NO_THREAD_SAFETY_ANALYSIS { auto Deleter = [](AllocatorT *A) { A->unmapTestOnly(); delete A; @@ -99,20 +101,17 @@ template <class AllocatorT> static void testRegistry() { Registry->initThreadMaybe(Allocator.get(), /*MinimalInit=*/true); EXPECT_TRUE(Allocator->isInitialized()); - bool UnlockRequired; - auto TSD = Registry->getTSDAndLock(&UnlockRequired); - EXPECT_NE(TSD, nullptr); - EXPECT_EQ(TSD->getCache().Canary, 0U); - if (UnlockRequired) - TSD->unlock(); + { + typename AllocatorT::TSDRegistryT::ScopedTSD TSD(*Registry); + EXPECT_EQ(TSD->getCache().Canary, 0U); + } Registry->initThreadMaybe(Allocator.get(), /*MinimalInit=*/false); - TSD = Registry->getTSDAndLock(&UnlockRequired); - EXPECT_NE(TSD, nullptr); - EXPECT_EQ(TSD->getCache().Canary, 0U); - memset(&TSD->getCache(), 0x42, sizeof(TSD->getCache())); - if (UnlockRequired) - TSD->unlock(); + { + typename AllocatorT::TSDRegistryT::ScopedTSD TSD(*Registry); + EXPECT_EQ(TSD->getCache().Canary, 0U); + memset(&TSD->getCache(), 0x42, sizeof(TSD->getCache())); + } } TEST(ScudoTSDTest, TSDRegistryBasic) { @@ -127,7 +126,12 @@ static std::mutex Mutex; static std::condition_variable Cv; static bool Ready; -template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) { +// Accessing `TSD->getCache()` requires `TSD::Mutex` which isn't easy to test +// using thread-safety analysis. Alternatively, we verify the thread safety +// through a runtime check in ScopedTSD and mark the test body with +// NO_THREAD_SAFETY_ANALYSIS. +template <typename AllocatorT> +static void stressCache(AllocatorT *Allocator) NO_THREAD_SAFETY_ANALYSIS { auto Registry = Allocator->getTSDRegistry(); { std::unique_lock<std::mutex> Lock(Mutex); @@ -135,13 +139,13 @@ template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) { Cv.wait(Lock); } Registry->initThreadMaybe(Allocator, /*MinimalInit=*/false); - bool UnlockRequired; - auto TSD = Registry->getTSDAndLock(&UnlockRequired); - EXPECT_NE(TSD, nullptr); + typename AllocatorT::TSDRegistryT::ScopedTSD TSD(*Registry); // For an exclusive TSD, the cache should be empty. We cannot guarantee the // same for a shared TSD. - if (!UnlockRequired) + if (std::is_same<typename AllocatorT::TSDRegistryT, + scudo::TSDRegistryExT<AllocatorT>>()) { EXPECT_EQ(TSD->getCache().Canary, 0U); + } // Transform the thread id to a uptr to use it as canary. const scudo::uptr Canary = static_cast<scudo::uptr>( std::hash<std::thread::id>{}(std::this_thread::get_id())); @@ -149,8 +153,6 @@ template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) { // Loop a few times to make sure that a concurrent thread isn't modifying it. for (scudo::uptr I = 0; I < 4096U; I++) EXPECT_EQ(TSD->getCache().Canary, Canary); - if (UnlockRequired) - TSD->unlock(); } template <class AllocatorT> static void testRegistryThreaded() { @@ -192,13 +194,10 @@ static void stressSharedRegistry(MockAllocator<SharedCaches> *Allocator) { Cv.wait(Lock); } Registry->initThreadMaybe(Allocator, /*MinimalInit=*/false); - bool UnlockRequired; for (scudo::uptr I = 0; I < 4096U; I++) { - auto TSD = Registry->getTSDAndLock(&UnlockRequired); - EXPECT_NE(TSD, nullptr); - Set.insert(reinterpret_cast<void *>(TSD)); - if (UnlockRequired) - TSD->unlock(); + typename MockAllocator<SharedCaches>::TSDRegistryT::ScopedTSD TSD( + *Registry); + Set.insert(reinterpret_cast<void *>(&*TSD)); } { std::unique_lock<std::mutex> Lock(Mutex); diff --git a/standalone/tests/vector_test.cpp b/standalone/tests/vector_test.cpp index dc23c2a3471..b612676b7bd 100644 --- a/standalone/tests/vector_test.cpp +++ b/standalone/tests/vector_test.cpp @@ -41,3 +41,47 @@ TEST(ScudoVectorTest, ResizeReduction) { V.resize(1); EXPECT_EQ(V.size(), 1U); } + +#if defined(__linux__) + +#include <sys/resource.h> + +// Verify that if the reallocate fails, nothing new is added. +TEST(ScudoVectorTest, ReallocateFails) { + scudo::Vector<char> V; + scudo::uptr capacity = V.capacity(); + + // Get the current address space size. + rlimit Limit = {}; + EXPECT_EQ(0, getrlimit(RLIMIT_AS, &Limit)); + + rlimit EmptyLimit = {.rlim_cur = 0, .rlim_max = Limit.rlim_max}; + EXPECT_EQ(0, setrlimit(RLIMIT_AS, &EmptyLimit)); + + // qemu does not honor the setrlimit, so verify before proceeding. + scudo::MemMapT MemMap; + if (MemMap.map(/*Addr=*/0U, scudo::getPageSizeCached(), "scudo:test", + MAP_ALLOWNOMEM)) { + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + setrlimit(RLIMIT_AS, &Limit); + GTEST_SKIP() << "Limiting address space does not prevent mmap."; + } + + V.resize(capacity); + // Set the last element so we can check it later. + V.back() = '\0'; + + // The reallocate should fail, so the capacity should not change. + V.reserve(capacity + 1000); + EXPECT_EQ(capacity, V.capacity()); + + // Now try to do a push back and verify that the size does not change. + scudo::uptr Size = V.size(); + V.push_back('2'); + EXPECT_EQ(Size, V.size()); + // Verify that the last element in the vector did not change. + EXPECT_EQ('\0', V.back()); + + EXPECT_EQ(0, setrlimit(RLIMIT_AS, &Limit)); +} +#endif diff --git a/standalone/tests/wrappers_c_test.cpp b/standalone/tests/wrappers_c_test.cpp index 616cf5491b5..f5e17d72148 100644 --- a/standalone/tests/wrappers_c_test.cpp +++ b/standalone/tests/wrappers_c_test.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "common.h" #include "memtag.h" #include "scudo/interface.h" #include "tests/scudo_unit_test.h" @@ -15,11 +16,27 @@ #include <malloc.h> #include <stdlib.h> #include <unistd.h> +#include <vector> #ifndef __GLIBC_PREREQ #define __GLIBC_PREREQ(x, y) 0 #endif +#if SCUDO_FUCHSIA +// Fuchsia only has valloc +#define HAVE_VALLOC 1 +#elif SCUDO_ANDROID +// Android only has pvalloc/valloc on 32 bit +#if !defined(__LP64__) +#define HAVE_PVALLOC 1 +#define HAVE_VALLOC 1 +#endif // !defined(__LP64__) +#else +// All others assumed to support both functions. +#define HAVE_PVALLOC 1 +#define HAVE_VALLOC 1 +#endif + extern "C" { void malloc_enable(void); void malloc_disable(void); @@ -28,7 +45,101 @@ int malloc_iterate(uintptr_t base, size_t size, void *arg); void *valloc(size_t size); void *pvalloc(size_t size); + +#ifndef SCUDO_ENABLE_HOOKS_TESTS +#define SCUDO_ENABLE_HOOKS_TESTS 0 +#endif + +#if (SCUDO_ENABLE_HOOKS_TESTS == 1) && (SCUDO_ENABLE_HOOKS == 0) +#error "Hooks tests should have hooks enabled as well!" +#endif + +struct AllocContext { + void *Ptr; + size_t Size; +}; +struct DeallocContext { + void *Ptr; +}; +struct ReallocContext { + void *AllocPtr; + void *DeallocPtr; + size_t Size; +}; +static AllocContext AC; +static DeallocContext DC; +static ReallocContext RC; + +#if (SCUDO_ENABLE_HOOKS_TESTS == 1) +__attribute__((visibility("default"))) void __scudo_allocate_hook(void *Ptr, + size_t Size) { + AC.Ptr = Ptr; + AC.Size = Size; +} +__attribute__((visibility("default"))) void __scudo_deallocate_hook(void *Ptr) { + DC.Ptr = Ptr; } +__attribute__((visibility("default"))) void +__scudo_realloc_allocate_hook(void *OldPtr, void *NewPtr, size_t Size) { + // Verify that __scudo_realloc_deallocate_hook is called first and set the + // right pointer. + EXPECT_EQ(OldPtr, RC.DeallocPtr); + RC.AllocPtr = NewPtr; + RC.Size = Size; + + // Note that this is only used for testing. In general, only one pair of hooks + // will be invoked in `realloc`. if __scudo_realloc_*_hook are not defined, + // it'll call the general hooks only. To make the test easier, we call the + // general one here so that either case (whether __scudo_realloc_*_hook are + // defined) will be verified without separating them into different tests. + __scudo_allocate_hook(NewPtr, Size); +} +__attribute__((visibility("default"))) void +__scudo_realloc_deallocate_hook(void *Ptr) { + RC.DeallocPtr = Ptr; + + // See the comment in the __scudo_realloc_allocate_hook above. + __scudo_deallocate_hook(Ptr); +} +#endif // (SCUDO_ENABLE_HOOKS_TESTS == 1) +} + +class ScudoWrappersCTest : public Test { +protected: + void SetUp() override { + if (SCUDO_ENABLE_HOOKS && !SCUDO_ENABLE_HOOKS_TESTS) + printf("Hooks are enabled but hooks tests are disabled.\n"); + } + + void invalidateHookPtrs() { + if (SCUDO_ENABLE_HOOKS_TESTS) { + void *InvalidPtr = reinterpret_cast<void *>(0xdeadbeef); + AC.Ptr = InvalidPtr; + DC.Ptr = InvalidPtr; + RC.AllocPtr = RC.DeallocPtr = InvalidPtr; + } + } + void verifyAllocHookPtr(UNUSED void *Ptr) { + if (SCUDO_ENABLE_HOOKS_TESTS) + EXPECT_EQ(Ptr, AC.Ptr); + } + void verifyAllocHookSize(UNUSED size_t Size) { + if (SCUDO_ENABLE_HOOKS_TESTS) + EXPECT_EQ(Size, AC.Size); + } + void verifyDeallocHookPtr(UNUSED void *Ptr) { + if (SCUDO_ENABLE_HOOKS_TESTS) + EXPECT_EQ(Ptr, DC.Ptr); + } + void verifyReallocHookPtrs(UNUSED void *OldPtr, void *NewPtr, size_t Size) { + if (SCUDO_ENABLE_HOOKS_TESTS) { + EXPECT_EQ(OldPtr, RC.DeallocPtr); + EXPECT_EQ(NewPtr, RC.AllocPtr); + EXPECT_EQ(Size, RC.Size); + } + } +}; +using ScudoWrappersCDeathTest = ScudoWrappersCTest; // Note that every C allocation function in the test binary will be fulfilled // by Scudo (this includes the gtest APIs, etc.), which is a test by itself. @@ -42,11 +153,13 @@ void *pvalloc(size_t size); static const size_t Size = 100U; -TEST(ScudoWrappersCDeathTest, Malloc) { +TEST_F(ScudoWrappersCDeathTest, Malloc) { void *P = malloc(Size); EXPECT_NE(P, nullptr); EXPECT_LE(Size, malloc_usable_size(P)); EXPECT_EQ(reinterpret_cast<uintptr_t>(P) % FIRST_32_SECOND_64(8U, 16U), 0U); + verifyAllocHookPtr(P); + verifyAllocHookSize(Size); // An update to this warning in Clang now triggers in this line, but it's ok // because the check is expecting a bad pointer and should fail. @@ -61,6 +174,7 @@ TEST(ScudoWrappersCDeathTest, Malloc) { #endif free(P); + verifyDeallocHookPtr(P); EXPECT_DEATH(free(P), ""); P = malloc(0U); @@ -72,13 +186,16 @@ TEST(ScudoWrappersCDeathTest, Malloc) { EXPECT_EQ(errno, ENOMEM); } -TEST(ScudoWrappersCTest, Calloc) { +TEST_F(ScudoWrappersCTest, Calloc) { void *P = calloc(1U, Size); EXPECT_NE(P, nullptr); EXPECT_LE(Size, malloc_usable_size(P)); + verifyAllocHookPtr(P); + verifyAllocHookSize(Size); for (size_t I = 0; I < Size; I++) EXPECT_EQ((reinterpret_cast<uint8_t *>(P))[I], 0U); free(P); + verifyDeallocHookPtr(P); P = calloc(1U, 0U); EXPECT_NE(P, nullptr); @@ -99,19 +216,28 @@ TEST(ScudoWrappersCTest, Calloc) { EXPECT_EQ(errno, ENOMEM); } -TEST(ScudoWrappersCTest, SmallAlign) { - void *P; - for (size_t Size = 1; Size <= 0x10000; Size <<= 1) { - for (size_t Align = 1; Align <= 0x10000; Align <<= 1) { +TEST_F(ScudoWrappersCTest, SmallAlign) { + // Allocating pointers by the powers of 2 from 1 to 0x10000 + // Using powers of 2 due to memalign using powers of 2 and test more sizes + constexpr size_t MaxSize = 0x10000; + std::vector<void *> ptrs; + // Reserving space to prevent further allocation during the test + ptrs.reserve((scudo::getLeastSignificantSetBitIndex(MaxSize) + 1) * + (scudo::getLeastSignificantSetBitIndex(MaxSize) + 1) * 3); + for (size_t Size = 1; Size <= MaxSize; Size <<= 1) { + for (size_t Align = 1; Align <= MaxSize; Align <<= 1) { for (size_t Count = 0; Count < 3; ++Count) { - P = memalign(Align, Size); + void *P = memalign(Align, Size); EXPECT_TRUE(reinterpret_cast<uintptr_t>(P) % Align == 0); + ptrs.push_back(P); } } } + for (void *ptr : ptrs) + free(ptr); } -TEST(ScudoWrappersCTest, Memalign) { +TEST_F(ScudoWrappersCTest, Memalign) { void *P; for (size_t I = FIRST_32_SECOND_64(2U, 3U); I <= 18U; I++) { const size_t Alignment = 1U << I; @@ -120,14 +246,20 @@ TEST(ScudoWrappersCTest, Memalign) { EXPECT_NE(P, nullptr); EXPECT_LE(Size, malloc_usable_size(P)); EXPECT_EQ(reinterpret_cast<uintptr_t>(P) % Alignment, 0U); + verifyAllocHookPtr(P); + verifyAllocHookSize(Size); free(P); + verifyDeallocHookPtr(P); P = nullptr; EXPECT_EQ(posix_memalign(&P, Alignment, Size), 0); EXPECT_NE(P, nullptr); EXPECT_LE(Size, malloc_usable_size(P)); EXPECT_EQ(reinterpret_cast<uintptr_t>(P) % Alignment, 0U); + verifyAllocHookPtr(P); + verifyAllocHookSize(Size); free(P); + verifyDeallocHookPtr(P); } EXPECT_EQ(memalign(4096U, SIZE_MAX), nullptr); @@ -139,18 +271,24 @@ TEST(ScudoWrappersCTest, Memalign) { for (size_t Alignment = 0U; Alignment <= 128U; Alignment++) { P = memalign(Alignment, 1024U); EXPECT_NE(P, nullptr); + verifyAllocHookPtr(P); + verifyAllocHookSize(Size); free(P); + verifyDeallocHookPtr(P); } } } -TEST(ScudoWrappersCTest, AlignedAlloc) { +TEST_F(ScudoWrappersCTest, AlignedAlloc) { const size_t Alignment = 4096U; void *P = aligned_alloc(Alignment, Alignment * 4U); EXPECT_NE(P, nullptr); EXPECT_LE(Alignment * 4U, malloc_usable_size(P)); EXPECT_EQ(reinterpret_cast<uintptr_t>(P) % Alignment, 0U); + verifyAllocHookPtr(P); + verifyAllocHookSize(Alignment * 4U); free(P); + verifyDeallocHookPtr(P); errno = 0; P = aligned_alloc(Alignment, Size); @@ -158,33 +296,60 @@ TEST(ScudoWrappersCTest, AlignedAlloc) { EXPECT_EQ(errno, EINVAL); } -TEST(ScudoWrappersCDeathTest, Realloc) { +TEST_F(ScudoWrappersCDeathTest, Realloc) { + invalidateHookPtrs(); // realloc(nullptr, N) is malloc(N) - void *P = realloc(nullptr, 0U); + void *P = realloc(nullptr, Size); EXPECT_NE(P, nullptr); + verifyAllocHookPtr(P); + verifyAllocHookSize(Size); free(P); + verifyDeallocHookPtr(P); + invalidateHookPtrs(); P = malloc(Size); EXPECT_NE(P, nullptr); // realloc(P, 0U) is free(P) and returns nullptr EXPECT_EQ(realloc(P, 0U), nullptr); + verifyDeallocHookPtr(P); P = malloc(Size); EXPECT_NE(P, nullptr); EXPECT_LE(Size, malloc_usable_size(P)); memset(P, 0x42, Size); + invalidateHookPtrs(); + void *OldP = P; P = realloc(P, Size * 2U); EXPECT_NE(P, nullptr); EXPECT_LE(Size * 2U, malloc_usable_size(P)); for (size_t I = 0; I < Size; I++) EXPECT_EQ(0x42, (reinterpret_cast<uint8_t *>(P))[I]); + if (OldP == P) { + verifyDeallocHookPtr(OldP); + verifyAllocHookPtr(OldP); + } else { + verifyAllocHookPtr(P); + verifyAllocHookSize(Size * 2U); + verifyDeallocHookPtr(OldP); + } + verifyReallocHookPtrs(OldP, P, Size * 2U); + invalidateHookPtrs(); + OldP = P; P = realloc(P, Size / 2U); EXPECT_NE(P, nullptr); EXPECT_LE(Size / 2U, malloc_usable_size(P)); for (size_t I = 0; I < Size / 2U; I++) EXPECT_EQ(0x42, (reinterpret_cast<uint8_t *>(P))[I]); + if (OldP == P) { + verifyDeallocHookPtr(OldP); + verifyAllocHookPtr(OldP); + } else { + verifyAllocHookPtr(P); + verifyAllocHookSize(Size / 2U); + } + verifyReallocHookPtrs(OldP, P, Size / 2U); free(P); EXPECT_DEATH(P = realloc(P, Size), ""); @@ -218,7 +383,7 @@ TEST(ScudoWrappersCDeathTest, Realloc) { } #if !SCUDO_FUCHSIA -TEST(ScudoWrappersCTest, MallOpt) { +TEST_F(ScudoWrappersCTest, MallOpt) { errno = 0; EXPECT_EQ(mallopt(-1000, 1), 0); // mallopt doesn't set errno. @@ -239,15 +404,19 @@ TEST(ScudoWrappersCTest, MallOpt) { } #endif -TEST(ScudoWrappersCTest, OtherAlloc) { -#if !SCUDO_FUCHSIA - const size_t PageSize = sysconf(_SC_PAGESIZE); +TEST_F(ScudoWrappersCTest, OtherAlloc) { +#if HAVE_PVALLOC + const size_t PageSize = static_cast<size_t>(sysconf(_SC_PAGESIZE)); void *P = pvalloc(Size); EXPECT_NE(P, nullptr); EXPECT_EQ(reinterpret_cast<uintptr_t>(P) & (PageSize - 1), 0U); EXPECT_LE(PageSize, malloc_usable_size(P)); + verifyAllocHookPtr(P); + // Size will be rounded up to PageSize. + verifyAllocHookSize(PageSize); free(P); + verifyDeallocHookPtr(P); EXPECT_EQ(pvalloc(SIZE_MAX), nullptr); @@ -257,32 +426,44 @@ TEST(ScudoWrappersCTest, OtherAlloc) { free(P); #endif +#if HAVE_VALLOC EXPECT_EQ(valloc(SIZE_MAX), nullptr); +#endif } -#if !SCUDO_FUCHSIA -TEST(ScudoWrappersCTest, MallInfo) { +template<typename FieldType> +void MallInfoTest() { // mallinfo is deprecated. #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" - const size_t BypassQuarantineSize = 1024U; + const FieldType BypassQuarantineSize = 1024U; struct mallinfo MI = mallinfo(); - size_t Allocated = MI.uordblks; + FieldType Allocated = MI.uordblks; void *P = malloc(BypassQuarantineSize); EXPECT_NE(P, nullptr); MI = mallinfo(); - EXPECT_GE(static_cast<size_t>(MI.uordblks), Allocated + BypassQuarantineSize); - EXPECT_GT(static_cast<size_t>(MI.hblkhd), 0U); - size_t Free = MI.fordblks; + EXPECT_GE(MI.uordblks, Allocated + BypassQuarantineSize); + EXPECT_GT(MI.hblkhd, static_cast<FieldType>(0)); + FieldType Free = MI.fordblks; free(P); MI = mallinfo(); - EXPECT_GE(static_cast<size_t>(MI.fordblks), Free + BypassQuarantineSize); + EXPECT_GE(MI.fordblks, Free + BypassQuarantineSize); #pragma clang diagnostic pop } + +#if !SCUDO_FUCHSIA +TEST_F(ScudoWrappersCTest, MallInfo) { +#if SCUDO_ANDROID + // Android accidentally set the fields to size_t instead of int. + MallInfoTest<size_t>(); +#else + MallInfoTest<int>(); +#endif +} #endif -#if __GLIBC_PREREQ(2, 33) -TEST(ScudoWrappersCTest, MallInfo2) { +#if __GLIBC_PREREQ(2, 33) || SCUDO_ANDROID +TEST_F(ScudoWrappersCTest, MallInfo2) { const size_t BypassQuarantineSize = 1024U; struct mallinfo2 MI = mallinfo2(); size_t Allocated = MI.uordblks; @@ -301,7 +482,7 @@ TEST(ScudoWrappersCTest, MallInfo2) { static uintptr_t BoundaryP; static size_t Count; -static void callback(uintptr_t Base, size_t Size, void *Arg) { +static void callback(uintptr_t Base, UNUSED size_t Size, UNUSED void *Arg) { if (scudo::archSupportsMemoryTagging()) { Base = scudo::untagPointer(Base); BoundaryP = scudo::untagPointer(BoundaryP); @@ -314,13 +495,22 @@ static void callback(uintptr_t Base, size_t Size, void *Arg) { // To achieve this, we allocate a chunk for which the backing block will be // aligned on a page, then run the malloc_iterate on both the pages that the // block is a boundary for. It must only be seen once by the callback function. -TEST(ScudoWrappersCTest, MallocIterateBoundary) { - const size_t PageSize = sysconf(_SC_PAGESIZE); +TEST_F(ScudoWrappersCTest, MallocIterateBoundary) { + const size_t PageSize = static_cast<size_t>(sysconf(_SC_PAGESIZE)); +#if SCUDO_ANDROID + // Android uses a 16 byte alignment for both 32 bit and 64 bit. + const size_t BlockDelta = 16U; +#else const size_t BlockDelta = FIRST_32_SECOND_64(8U, 16U); +#endif const size_t SpecialSize = PageSize - BlockDelta; // We aren't guaranteed that any size class is exactly a page wide. So we need - // to keep making allocations until we succeed. + // to keep making allocations until we get an allocation that starts exactly + // on a page boundary. The BlockDelta value is expected to be the number of + // bytes to subtract from a returned pointer to get to the actual start of + // the pointer in the size class. In practice, this means BlockDelta should + // be set to the minimum alignment in bytes for the allocation. // // With a 16-byte block alignment and 4096-byte page size, each allocation has // a probability of (1 - (16/4096)) of failing to meet the alignment @@ -357,7 +547,7 @@ TEST(ScudoWrappersCTest, MallocIterateBoundary) { // Fuchsia doesn't have alarm, fork or malloc_info. #if !SCUDO_FUCHSIA -TEST(ScudoWrappersCDeathTest, MallocDisableDeadlock) { +TEST_F(ScudoWrappersCDeathTest, MallocDisableDeadlock) { // We expect heap operations within a disable/enable scope to deadlock. EXPECT_DEATH( { @@ -372,7 +562,7 @@ TEST(ScudoWrappersCDeathTest, MallocDisableDeadlock) { ""); } -TEST(ScudoWrappersCTest, MallocInfo) { +TEST_F(ScudoWrappersCTest, MallocInfo) { // Use volatile so that the allocations don't get optimized away. void *volatile P1 = malloc(1234); void *volatile P2 = malloc(4321); @@ -392,7 +582,7 @@ TEST(ScudoWrappersCTest, MallocInfo) { free(P2); } -TEST(ScudoWrappersCDeathTest, Fork) { +TEST_F(ScudoWrappersCDeathTest, Fork) { void *P; pid_t Pid = fork(); EXPECT_GE(Pid, 0) << strerror(errno); @@ -424,7 +614,7 @@ static pthread_mutex_t Mutex; static pthread_cond_t Conditional = PTHREAD_COND_INITIALIZER; static bool Ready; -static void *enableMalloc(void *Unused) { +static void *enableMalloc(UNUSED void *Unused) { // Initialize the allocator for this thread. void *P = malloc(Size); EXPECT_NE(P, nullptr); @@ -444,7 +634,7 @@ static void *enableMalloc(void *Unused) { return nullptr; } -TEST(ScudoWrappersCTest, DisableForkEnable) { +TEST_F(ScudoWrappersCTest, DisableForkEnable) { pthread_t ThreadId; Ready = false; EXPECT_EQ(pthread_create(&ThreadId, nullptr, &enableMalloc, nullptr), 0); diff --git a/standalone/tests/wrappers_cpp_test.cpp b/standalone/tests/wrappers_cpp_test.cpp index a88dc4aacd5..c802ed22fba 100644 --- a/standalone/tests/wrappers_cpp_test.cpp +++ b/standalone/tests/wrappers_cpp_test.cpp @@ -17,49 +17,120 @@ #include <thread> #include <vector> +// Android does not support checking for new/delete mismatches. +#if SCUDO_ANDROID +#define SKIP_MISMATCH_TESTS 1 +#else +#define SKIP_MISMATCH_TESTS 0 +#endif + void operator delete(void *, size_t) noexcept; void operator delete[](void *, size_t) noexcept; -// Note that every Cxx allocation function in the test binary will be fulfilled -// by Scudo. See the comment in the C counterpart of this file. +extern "C" { +#ifndef SCUDO_ENABLE_HOOKS_TESTS +#define SCUDO_ENABLE_HOOKS_TESTS 0 +#endif + +#if (SCUDO_ENABLE_HOOKS_TESTS == 1) && (SCUDO_ENABLE_HOOKS == 0) +#error "Hooks tests should have hooks enabled as well!" +#endif -template <typename T> static void testCxxNew() { - T *P = new T; - EXPECT_NE(P, nullptr); - memset(P, 0x42, sizeof(T)); - EXPECT_DEATH(delete[] P, ""); - delete P; - EXPECT_DEATH(delete P, ""); - - P = new T; - EXPECT_NE(P, nullptr); - memset(P, 0x42, sizeof(T)); - operator delete(P, sizeof(T)); - - P = new (std::nothrow) T; - EXPECT_NE(P, nullptr); - memset(P, 0x42, sizeof(T)); - delete P; - - const size_t N = 16U; - T *A = new T[N]; - EXPECT_NE(A, nullptr); - memset(A, 0x42, sizeof(T) * N); - EXPECT_DEATH(delete A, ""); - delete[] A; - EXPECT_DEATH(delete[] A, ""); - - A = new T[N]; - EXPECT_NE(A, nullptr); - memset(A, 0x42, sizeof(T) * N); - operator delete[](A, sizeof(T) * N); - - A = new (std::nothrow) T[N]; - EXPECT_NE(A, nullptr); - memset(A, 0x42, sizeof(T) * N); - delete[] A; +struct AllocContext { + void *Ptr; + size_t Size; +}; +struct DeallocContext { + void *Ptr; +}; +static AllocContext AC; +static DeallocContext DC; + +#if (SCUDO_ENABLE_HOOKS_TESTS == 1) +__attribute__((visibility("default"))) void __scudo_allocate_hook(void *Ptr, + size_t Size) { + AC.Ptr = Ptr; + AC.Size = Size; +} +__attribute__((visibility("default"))) void __scudo_deallocate_hook(void *Ptr) { + DC.Ptr = Ptr; +} +#endif // (SCUDO_ENABLE_HOOKS_TESTS == 1) } +class ScudoWrappersCppTest : public Test { +protected: + void SetUp() override { + if (SCUDO_ENABLE_HOOKS && !SCUDO_ENABLE_HOOKS_TESTS) + printf("Hooks are enabled but hooks tests are disabled.\n"); + } + + void verifyAllocHookPtr(UNUSED void *Ptr) { + if (SCUDO_ENABLE_HOOKS_TESTS) + EXPECT_EQ(Ptr, AC.Ptr); + } + void verifyAllocHookSize(UNUSED size_t Size) { + if (SCUDO_ENABLE_HOOKS_TESTS) + EXPECT_EQ(Size, AC.Size); + } + void verifyDeallocHookPtr(UNUSED void *Ptr) { + if (SCUDO_ENABLE_HOOKS_TESTS) + EXPECT_EQ(Ptr, DC.Ptr); + } + + template <typename T> void testCxxNew() { + T *P = new T; + EXPECT_NE(P, nullptr); + verifyAllocHookPtr(P); + verifyAllocHookSize(sizeof(T)); + memset(P, 0x42, sizeof(T)); + EXPECT_DEATH(delete[] P, ""); + delete P; + verifyDeallocHookPtr(P); + EXPECT_DEATH(delete P, ""); + + P = new T; + EXPECT_NE(P, nullptr); + memset(P, 0x42, sizeof(T)); + operator delete(P, sizeof(T)); + verifyDeallocHookPtr(P); + + P = new (std::nothrow) T; + verifyAllocHookPtr(P); + verifyAllocHookSize(sizeof(T)); + EXPECT_NE(P, nullptr); + memset(P, 0x42, sizeof(T)); + delete P; + verifyDeallocHookPtr(P); + + const size_t N = 16U; + T *A = new T[N]; + EXPECT_NE(A, nullptr); + verifyAllocHookPtr(A); + verifyAllocHookSize(sizeof(T) * N); + memset(A, 0x42, sizeof(T) * N); + EXPECT_DEATH(delete A, ""); + delete[] A; + verifyDeallocHookPtr(A); + EXPECT_DEATH(delete[] A, ""); + + A = new T[N]; + EXPECT_NE(A, nullptr); + memset(A, 0x42, sizeof(T) * N); + operator delete[](A, sizeof(T) * N); + verifyDeallocHookPtr(A); + + A = new (std::nothrow) T[N]; + verifyAllocHookPtr(A); + verifyAllocHookSize(sizeof(T) * N); + EXPECT_NE(A, nullptr); + memset(A, 0x42, sizeof(T) * N); + delete[] A; + verifyDeallocHookPtr(A); + } +}; +using ScudoWrappersCppDeathTest = ScudoWrappersCppTest; + class Pixel { public: enum class Color { Red, Green, Blue }; @@ -68,8 +139,11 @@ public: Color C = Color::Red; }; -TEST(ScudoWrappersCppDeathTest, New) { - if (getenv("SKIP_TYPE_MISMATCH")) { +// Note that every Cxx allocation function in the test binary will be fulfilled +// by Scudo. See the comment in the C counterpart of this file. + +TEST_F(ScudoWrappersCppDeathTest, New) { + if (getenv("SKIP_TYPE_MISMATCH") || SKIP_MISMATCH_TESTS) { printf("Skipped type mismatch tests.\n"); return; } @@ -96,7 +170,7 @@ static void stressNew() { Cv.wait(Lock); } for (size_t I = 0; I < 256U; I++) { - const size_t N = std::rand() % 128U; + const size_t N = static_cast<size_t>(std::rand()) % 128U; uintptr_t *P = new uintptr_t[N]; if (P) { memset(P, 0x42, sizeof(uintptr_t) * N); @@ -109,7 +183,7 @@ static void stressNew() { } } -TEST(ScudoWrappersCppTest, ThreadedNew) { +TEST_F(ScudoWrappersCppTest, ThreadedNew) { // TODO: Investigate why libc sometimes crashes with tag missmatch in // __pthread_clockjoin_ex. std::unique_ptr<scudo::ScopedDisableMemoryTagChecks> NoTags; @@ -131,7 +205,7 @@ TEST(ScudoWrappersCppTest, ThreadedNew) { } #if !SCUDO_FUCHSIA -TEST(ScudoWrappersCppTest, AllocAfterFork) { +TEST_F(ScudoWrappersCppTest, AllocAfterFork) { // This test can fail flakily when ran as a part of large number of // other tests if the maxmimum number of mappings allowed is low. // We tried to reduce the number of iterations of the loops with diff --git a/standalone/timing.cpp b/standalone/timing.cpp new file mode 100644 index 00000000000..59ae21d10f0 --- /dev/null +++ b/standalone/timing.cpp @@ -0,0 +1,29 @@ +//===-- timing.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "timing.h" + +namespace scudo { + +Timer::~Timer() { + if (Manager) + Manager->report(*this); +} + +ScopedTimer::ScopedTimer(TimingManager &Manager, const char *Name) + : Timer(Manager.getOrCreateTimer(Name)) { + start(); +} + +ScopedTimer::ScopedTimer(TimingManager &Manager, const Timer &Nest, + const char *Name) + : Timer(Manager.nest(Nest, Name)) { + start(); +} + +} // namespace scudo diff --git a/standalone/timing.h b/standalone/timing.h new file mode 100644 index 00000000000..84caa79e5c3 --- /dev/null +++ b/standalone/timing.h @@ -0,0 +1,221 @@ +//===-- timing.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_TIMING_H_ +#define SCUDO_TIMING_H_ + +#include "common.h" +#include "mutex.h" +#include "string_utils.h" +#include "thread_annotations.h" + +#include <inttypes.h> +#include <string.h> + +namespace scudo { + +class TimingManager; + +// A simple timer for evaluating execution time of code snippets. It can be used +// along with TimingManager or standalone. +class Timer { +public: + // The use of Timer without binding to a TimingManager is supposed to do the + // timer logging manually. Otherwise, TimingManager will do the logging stuff + // for you. + Timer() = default; + Timer(Timer &&Other) + : StartTime(0), AccTime(Other.AccTime), Manager(Other.Manager), + HandleId(Other.HandleId) { + Other.Manager = nullptr; + } + + Timer(const Timer &) = delete; + + ~Timer(); + + void start() { + CHECK_EQ(StartTime, 0U); + StartTime = getMonotonicTime(); + } + void stop() { + AccTime += getMonotonicTime() - StartTime; + StartTime = 0; + } + u64 getAccumulatedTime() const { return AccTime; } + + // Unset the bound TimingManager so that we don't report the data back. This + // is useful if we only want to track subset of certain scope events. + void ignore() { + StartTime = 0; + AccTime = 0; + Manager = nullptr; + } + +protected: + friend class TimingManager; + Timer(TimingManager &Manager, u32 HandleId) + : Manager(&Manager), HandleId(HandleId) {} + + u64 StartTime = 0; + u64 AccTime = 0; + TimingManager *Manager = nullptr; + u32 HandleId; +}; + +// A RAII-style wrapper for easy scope execution measurement. Note that in order +// not to take additional space for the message like `Name`. It only works with +// TimingManager. +class ScopedTimer : public Timer { +public: + ScopedTimer(TimingManager &Manager, const char *Name); + ScopedTimer(TimingManager &Manager, const Timer &Nest, const char *Name); + ~ScopedTimer() { stop(); } +}; + +// In Scudo, the execution time of single run of code snippets may not be +// useful, we are more interested in the average time from several runs. +// TimingManager lets the registered timer report their data and reports the +// average execution time for each timer periodically. +class TimingManager { +public: + TimingManager(u32 PrintingInterval = DefaultPrintingInterval) + : PrintingInterval(PrintingInterval) {} + ~TimingManager() { + if (NumAllocatedTimers != 0) + printAll(); + } + + Timer getOrCreateTimer(const char *Name) EXCLUDES(Mutex) { + ScopedLock L(Mutex); + + CHECK_LT(strlen(Name), MaxLenOfTimerName); + for (u32 I = 0; I < NumAllocatedTimers; ++I) { + if (strncmp(Name, Timers[I].Name, MaxLenOfTimerName) == 0) + return Timer(*this, I); + } + + CHECK_LT(NumAllocatedTimers, MaxNumberOfTimers); + strncpy(Timers[NumAllocatedTimers].Name, Name, MaxLenOfTimerName); + TimerRecords[NumAllocatedTimers].AccumulatedTime = 0; + TimerRecords[NumAllocatedTimers].Occurrence = 0; + return Timer(*this, NumAllocatedTimers++); + } + + // Add a sub-Timer associated with another Timer. This is used when we want to + // detail the execution time in the scope of a Timer. + // For example, + // void Foo() { + // // T1 records the time spent in both first and second tasks. + // ScopedTimer T1(getTimingManager(), "Task1"); + // { + // // T2 records the time spent in first task + // ScopedTimer T2(getTimingManager, T1, "Task2"); + // // Do first task. + // } + // // Do second task. + // } + // + // The report will show proper indents to indicate the nested relation like, + // -- Average Operation Time -- -- Name (# of Calls) -- + // 10.0(ns) Task1 (1) + // 5.0(ns) Task2 (1) + Timer nest(const Timer &T, const char *Name) EXCLUDES(Mutex) { + CHECK_EQ(T.Manager, this); + Timer Nesting = getOrCreateTimer(Name); + + ScopedLock L(Mutex); + CHECK_NE(Nesting.HandleId, T.HandleId); + Timers[Nesting.HandleId].Nesting = T.HandleId; + return Nesting; + } + + void report(const Timer &T) EXCLUDES(Mutex) { + ScopedLock L(Mutex); + + const u32 HandleId = T.HandleId; + CHECK_LT(HandleId, MaxNumberOfTimers); + TimerRecords[HandleId].AccumulatedTime += T.getAccumulatedTime(); + ++TimerRecords[HandleId].Occurrence; + ++NumEventsReported; + if (NumEventsReported % PrintingInterval == 0) + printAllImpl(); + } + + void printAll() EXCLUDES(Mutex) { + ScopedLock L(Mutex); + printAllImpl(); + } + +private: + void printAllImpl() REQUIRES(Mutex) { + static char NameHeader[] = "-- Name (# of Calls) --"; + static char AvgHeader[] = "-- Average Operation Time --"; + ScopedString Str; + Str.append("%-15s %-15s\n", AvgHeader, NameHeader); + + for (u32 I = 0; I < NumAllocatedTimers; ++I) { + if (Timers[I].Nesting != MaxNumberOfTimers) + continue; + printImpl(Str, I); + } + + Str.output(); + } + + void printImpl(ScopedString &Str, const u32 HandleId, + const u32 ExtraIndent = 0) REQUIRES(Mutex) { + const u64 AccumulatedTime = TimerRecords[HandleId].AccumulatedTime; + const u64 Occurrence = TimerRecords[HandleId].Occurrence; + const u64 Integral = Occurrence == 0 ? 0 : AccumulatedTime / Occurrence; + // Only keep single digit of fraction is enough and it enables easier layout + // maintenance. + const u64 Fraction = + Occurrence == 0 ? 0 + : ((AccumulatedTime % Occurrence) * 10) / Occurrence; + + Str.append("%14" PRId64 ".%" PRId64 "(ns) %-11s", Integral, Fraction, " "); + + for (u32 I = 0; I < ExtraIndent; ++I) + Str.append("%s", " "); + Str.append("%s (%" PRId64 ")\n", Timers[HandleId].Name, Occurrence); + + for (u32 I = 0; I < NumAllocatedTimers; ++I) + if (Timers[I].Nesting == HandleId) + printImpl(Str, I, ExtraIndent + 1); + } + + // Instead of maintaining pages for timer registration, a static buffer is + // sufficient for most use cases in Scudo. + static constexpr u32 MaxNumberOfTimers = 50; + static constexpr u32 MaxLenOfTimerName = 50; + static constexpr u32 DefaultPrintingInterval = 100; + + struct Record { + u64 AccumulatedTime = 0; + u64 Occurrence = 0; + }; + + struct TimerInfo { + char Name[MaxLenOfTimerName + 1]; + u32 Nesting = MaxNumberOfTimers; + }; + + HybridMutex Mutex; + // The frequency of proactively dumping the timer statistics. For example, the + // default setting is to dump the statistics every 100 reported events. + u32 PrintingInterval GUARDED_BY(Mutex); + u64 NumEventsReported GUARDED_BY(Mutex) = 0; + u32 NumAllocatedTimers GUARDED_BY(Mutex) = 0; + TimerInfo Timers[MaxNumberOfTimers] GUARDED_BY(Mutex); + Record TimerRecords[MaxNumberOfTimers] GUARDED_BY(Mutex); +}; + +} // namespace scudo + +#endif // SCUDO_TIMING_H_ diff --git a/standalone/trusty.cpp b/standalone/trusty.cpp index 592514d4c3a..26b349c6e50 100644 --- a/standalone/trusty.cpp +++ b/standalone/trusty.cpp @@ -12,17 +12,18 @@ #include "common.h" #include "mutex.h" -#include "string_utils.h" +#include "report_linux.h" #include "trusty.h" #include <errno.h> // for errno +#include <lk/err_ptr.h> // for PTR_ERR and IS_ERR #include <stdio.h> // for printf() #include <stdlib.h> // for getenv() #include <sys/auxv.h> // for getauxval() #include <time.h> // for clock_gettime() +#include <trusty_err.h> // for lk_err_to_errno() #include <trusty_syscalls.h> // for _trusty_brk() - -#define SBRK_ALIGN 32 +#include <uapi/mm.h> // for MMAP flags namespace scudo { @@ -30,35 +31,39 @@ uptr getPageSize() { return getauxval(AT_PAGESZ); } void NORETURN die() { abort(); } -void *map(UNUSED void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, +void *map(void *Addr, uptr Size, const char *Name, uptr Flags, UNUSED MapPlatformData *Data) { - // Calling _trusty_brk(0) returns the current program break. - uptr ProgramBreak = reinterpret_cast<uptr>(_trusty_brk(0)); - uptr Start; - uptr End; - - Start = roundUp(ProgramBreak, SBRK_ALIGN); - // Don't actually extend the heap if MAP_NOACCESS flag is set since this is - // the case where Scudo tries to reserve a memory region without mapping - // physical pages. + uint32_t MmapFlags = + MMAP_FLAG_ANONYMOUS | MMAP_FLAG_PROT_READ | MMAP_FLAG_PROT_WRITE; + + // If the MAP_NOACCESS flag is set, Scudo tries to reserve + // a memory region without mapping physical pages. This corresponds + // to MMAP_FLAG_NO_PHYSICAL in Trusty. if (Flags & MAP_NOACCESS) - return reinterpret_cast<void *>(Start); - - // Attempt to extend the heap by Size bytes using _trusty_brk. - End = roundUp(Start + Size, SBRK_ALIGN); - ProgramBreak = - reinterpret_cast<uptr>(_trusty_brk(reinterpret_cast<void *>(End))); - if (ProgramBreak < End) { - errno = ENOMEM; - dieOnMapUnmapError(Size); + MmapFlags |= MMAP_FLAG_NO_PHYSICAL; + if (Addr) + MmapFlags |= MMAP_FLAG_FIXED_NOREPLACE; + + if (Flags & MAP_MEMTAG) + MmapFlags |= MMAP_FLAG_PROT_MTE; + + void *P = (void *)_trusty_mmap(Addr, Size, MmapFlags, 0); + + if (IS_ERR(P)) { + errno = lk_err_to_errno(PTR_ERR(P)); + if (!(Flags & MAP_ALLOWNOMEM) || errno != ENOMEM) + reportMapError(Size); return nullptr; } - return reinterpret_cast<void *>(Start); // Base of new reserved region. + + return P; } -// Unmap is a no-op since Trusty uses sbrk instead of memory mapping. void unmap(UNUSED void *Addr, UNUSED uptr Size, UNUSED uptr Flags, - UNUSED MapPlatformData *Data) {} + UNUSED MapPlatformData *Data) { + if (_trusty_munmap(Addr, Size) != 0) + reportUnmapError(reinterpret_cast<uptr>(Addr), Size); +} void setMemoryPermission(UNUSED uptr Addr, UNUSED uptr Size, UNUSED uptr Flags, UNUSED MapPlatformData *Data) {} @@ -85,6 +90,17 @@ u64 getMonotonicTime() { static_cast<u64>(TS.tv_nsec); } +u64 getMonotonicTimeFast() { +#if defined(CLOCK_MONOTONIC_COARSE) + timespec TS; + clock_gettime(CLOCK_MONOTONIC_COARSE, &TS); + return static_cast<u64>(TS.tv_sec) * (1000ULL * 1000 * 1000) + + static_cast<u64>(TS.tv_nsec); +#else + return getMonotonicTime(); +#endif +} + u32 getNumberOfCPUs() { return 0; } u32 getThreadID() { return 0; } diff --git a/standalone/tsd.h b/standalone/tsd.h index c5ed6ddfa12..72773f2f72b 100644 --- a/standalone/tsd.h +++ b/standalone/tsd.h @@ -53,8 +53,14 @@ template <class Allocator> struct alignas(SCUDO_CACHE_LINE_SIZE) TSD { inline void unlock() NO_THREAD_SAFETY_ANALYSIS { Mutex.unlock(); } inline uptr getPrecedence() { return atomic_load_relaxed(&Precedence); } - void commitBack(Allocator *Instance) ASSERT_CAPABILITY(Mutex) { - Instance->commitBack(this); + void commitBack(Allocator *Instance) { Instance->commitBack(this); } + + // As the comments attached to `getCache()`, the TSD doesn't always need to be + // locked. In that case, we would only skip the check before we have all TSDs + // locked in all paths. + void assertLocked(bool BypassCheck) ASSERT_CAPABILITY(Mutex) { + if (SCUDO_DEBUG && !BypassCheck) + Mutex.assertHeld(); } // Ideally, we may want to assert that all the operations on @@ -66,11 +72,8 @@ template <class Allocator> struct alignas(SCUDO_CACHE_LINE_SIZE) TSD { // TODO(chiahungduan): Ideally, we want to do `Mutex.assertHeld` but acquiring // TSD doesn't always require holding the lock. Add this assertion while the // lock is always acquired. - typename Allocator::CacheT &getCache() ASSERT_CAPABILITY(Mutex) { - return Cache; - } - typename Allocator::QuarantineCacheT &getQuarantineCache() - ASSERT_CAPABILITY(Mutex) { + typename Allocator::CacheT &getCache() REQUIRES(Mutex) { return Cache; } + typename Allocator::QuarantineCacheT &getQuarantineCache() REQUIRES(Mutex) { return QuarantineCache; } diff --git a/standalone/tsd_exclusive.h b/standalone/tsd_exclusive.h index 62da8aeb537..a58ba650508 100644 --- a/standalone/tsd_exclusive.h +++ b/standalone/tsd_exclusive.h @@ -11,6 +11,8 @@ #include "tsd.h" +#include "string_utils.h" + namespace scudo { struct ThreadState { @@ -25,6 +27,31 @@ struct ThreadState { template <class Allocator> void teardownThread(void *Ptr); template <class Allocator> struct TSDRegistryExT { + using ThisT = TSDRegistryExT<Allocator>; + + struct ScopedTSD { + ALWAYS_INLINE ScopedTSD(ThisT &TSDRegistry) { + CurrentTSD = TSDRegistry.getTSDAndLock(&UnlockRequired); + DCHECK_NE(CurrentTSD, nullptr); + } + + ~ScopedTSD() { + if (UNLIKELY(UnlockRequired)) + CurrentTSD->unlock(); + } + + TSD<Allocator> &operator*() { return *CurrentTSD; } + + TSD<Allocator> *operator->() { + CurrentTSD->assertLocked(/*BypassCheck=*/!UnlockRequired); + return CurrentTSD; + } + + private: + TSD<Allocator> *CurrentTSD; + bool UnlockRequired; + }; + void init(Allocator *Instance) REQUIRES(Mutex) { DCHECK(!Initialized); Instance->init(); @@ -57,29 +84,21 @@ template <class Allocator> struct TSDRegistryExT { Initialized = false; } + void drainCaches(Allocator *Instance) { + // We don't have a way to iterate all thread local `ThreadTSD`s. Simply + // drain the `ThreadTSD` of current thread and `FallbackTSD`. + Instance->drainCache(&ThreadTSD); + FallbackTSD.lock(); + Instance->drainCache(&FallbackTSD); + FallbackTSD.unlock(); + } + ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) { if (LIKELY(State.InitState != ThreadState::NotInitialized)) return; initThread(Instance, MinimalInit); } - // TODO(chiahungduan): Consider removing the argument `UnlockRequired` by - // embedding the logic into TSD or always locking the TSD. It will enable us - // to properly mark thread annotation here and adding proper runtime - // assertions in the member functions of TSD. For example, assert the lock is - // acquired before calling TSD::commitBack(). - ALWAYS_INLINE TSD<Allocator> * - getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS { - if (LIKELY(State.InitState == ThreadState::Initialized && - !atomic_load(&Disabled, memory_order_acquire))) { - *UnlockRequired = false; - return &ThreadTSD; - } - FallbackTSD.lock(); - *UnlockRequired = true; - return &FallbackTSD; - } - // To disable the exclusive TSD registry, we effectively lock the fallback TSD // and force all threads to attempt to use it instead of their local one. void disable() NO_THREAD_SAFETY_ANALYSIS { @@ -104,7 +123,26 @@ template <class Allocator> struct TSDRegistryExT { bool getDisableMemInit() { return State.DisableMemInit; } + void getStats(ScopedString *Str) { + // We don't have a way to iterate all thread local `ThreadTSD`s. Instead of + // printing only self `ThreadTSD` which may mislead the usage, we just skip + // it. + Str->append("Exclusive TSD don't support iterating each TSD\n"); + } + private: + ALWAYS_INLINE TSD<Allocator> * + getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS { + if (LIKELY(State.InitState == ThreadState::Initialized && + !atomic_load(&Disabled, memory_order_acquire))) { + *UnlockRequired = false; + return &ThreadTSD; + } + FallbackTSD.lock(); + *UnlockRequired = true; + return &FallbackTSD; + } + // Using minimal initialization allows for global initialization while keeping // the thread specific structure untouched. The fallback structure will be // used instead. diff --git a/standalone/tsd_shared.h b/standalone/tsd_shared.h index 64b3bd844b0..dade16dad9f 100644 --- a/standalone/tsd_shared.h +++ b/standalone/tsd_shared.h @@ -11,6 +11,8 @@ #include "tsd.h" +#include "string_utils.h" + #if SCUDO_HAS_PLATFORM_TLS_SLOT // This is a platform-provided header that needs to be on the include path when // Scudo is compiled. It must declare a function with the prototype: @@ -24,6 +26,27 @@ namespace scudo { template <class Allocator, u32 TSDsArraySize, u32 DefaultTSDCount> struct TSDRegistrySharedT { + using ThisT = TSDRegistrySharedT<Allocator, TSDsArraySize, DefaultTSDCount>; + + struct ScopedTSD { + ALWAYS_INLINE ScopedTSD(ThisT &TSDRegistry) { + CurrentTSD = TSDRegistry.getTSDAndLock(); + DCHECK_NE(CurrentTSD, nullptr); + } + + ~ScopedTSD() { CurrentTSD->unlock(); } + + TSD<Allocator> &operator*() { return *CurrentTSD; } + + TSD<Allocator> *operator->() { + CurrentTSD->assertLocked(/*BypassCheck=*/false); + return CurrentTSD; + } + + private: + TSD<Allocator> *CurrentTSD; + }; + void init(Allocator *Instance) REQUIRES(Mutex) { DCHECK(!Initialized); Instance->init(); @@ -52,6 +75,15 @@ struct TSDRegistrySharedT { Initialized = false; } + void drainCaches(Allocator *Instance) { + ScopedLock L(MutexTSDs); + for (uptr I = 0; I < NumberOfTSDs; ++I) { + TSDs[I].lock(); + Instance->drainCache(&TSDs[I]); + TSDs[I].unlock(); + } + } + ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, UNUSED bool MinimalInit) { if (LIKELY(getCurrentTSD())) @@ -59,26 +91,6 @@ struct TSDRegistrySharedT { initThread(Instance); } - // TSDs is an array of locks and which is not supported for marking - // thread-safety capability. - ALWAYS_INLINE TSD<Allocator> * - getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS { - TSD<Allocator> *TSD = getCurrentTSD(); - DCHECK(TSD); - *UnlockRequired = true; - // Try to lock the currently associated context. - if (TSD->tryLock()) - return TSD; - // If that fails, go down the slow path. - if (TSDsArraySize == 1U) { - // Only 1 TSD, not need to go any further. - // The compiler will optimize this one way or the other. - TSD->lock(); - return TSD; - } - return getTSDAndLockSlow(TSD); - } - void disable() NO_THREAD_SAFETY_ANALYSIS { Mutex.lock(); for (u32 I = 0; I < TSDsArraySize; I++) @@ -102,7 +114,41 @@ struct TSDRegistrySharedT { bool getDisableMemInit() const { return *getTlsPtr() & 1; } + void getStats(ScopedString *Str) EXCLUDES(MutexTSDs) { + ScopedLock L(MutexTSDs); + + Str->append("Stats: SharedTSDs: %u available; total %u\n", NumberOfTSDs, + TSDsArraySize); + for (uptr I = 0; I < NumberOfTSDs; ++I) { + TSDs[I].lock(); + // Theoretically, we want to mark TSD::lock()/TSD::unlock() with proper + // thread annotations. However, given the TSD is only locked on shared + // path, do the assertion in a separate path to avoid confusing the + // analyzer. + TSDs[I].assertLocked(/*BypassCheck=*/true); + Str->append(" Shared TSD[%zu]:\n", I); + TSDs[I].getCache().getStats(Str); + TSDs[I].unlock(); + } + } + private: + ALWAYS_INLINE TSD<Allocator> *getTSDAndLock() NO_THREAD_SAFETY_ANALYSIS { + TSD<Allocator> *TSD = getCurrentTSD(); + DCHECK(TSD); + // Try to lock the currently associated context. + if (TSD->tryLock()) + return TSD; + // If that fails, go down the slow path. + if (TSDsArraySize == 1U) { + // Only 1 TSD, not need to go any further. + // The compiler will optimize this one way or the other. + TSD->lock(); + return TSD; + } + return getTSDAndLockSlow(TSD); + } + ALWAYS_INLINE uptr *getTlsPtr() const { #if SCUDO_HAS_PLATFORM_TLS_SLOT return reinterpret_cast<uptr *>(getPlatformAllocatorTlsSlot()); diff --git a/standalone/vector.h b/standalone/vector.h index 9f2c200958f..ca10cc281d7 100644 --- a/standalone/vector.h +++ b/standalone/vector.h @@ -9,26 +9,20 @@ #ifndef SCUDO_VECTOR_H_ #define SCUDO_VECTOR_H_ -#include "common.h" +#include "mem_map.h" #include <string.h> namespace scudo { -// A low-level vector based on map. May incur a significant memory overhead for -// small vectors. The current implementation supports only POD types. +// A low-level vector based on map. It stores the contents inline up to a fixed +// capacity, or in an external memory buffer if it grows bigger than that. May +// incur a significant memory overhead for small vectors. The current +// implementation supports only POD types. +// +// NOTE: This class is not meant to be used directly, use Vector<T> instead. template <typename T> class VectorNoCtor { public: - constexpr void init(uptr InitialCapacity = 0) { - Data = &LocalData[0]; - CapacityBytes = sizeof(LocalData); - if (InitialCapacity > capacity()) - reserve(InitialCapacity); - } - void destroy() { - if (Data != &LocalData[0]) - unmap(Data, CapacityBytes, 0, &MapData); - } T &operator[](uptr I) { DCHECK_LT(I, Size); return Data[I]; @@ -41,7 +35,9 @@ public: DCHECK_LE(Size, capacity()); if (Size == capacity()) { const uptr NewCapacity = roundUpPowerOfTwo(Size + 1); - reallocate(NewCapacity); + if (!reallocate(NewCapacity)) { + return; + } } memcpy(&Data[Size++], &Element, sizeof(T)); } @@ -57,14 +53,17 @@ public: const T *data() const { return Data; } T *data() { return Data; } constexpr uptr capacity() const { return CapacityBytes / sizeof(T); } - void reserve(uptr NewSize) { + bool reserve(uptr NewSize) { // Never downsize internal buffer. if (NewSize > capacity()) - reallocate(NewSize); + return reallocate(NewSize); + return true; } void resize(uptr NewSize) { if (NewSize > Size) { - reserve(NewSize); + if (!reserve(NewSize)) { + return; + } memset(&Data[Size], 0, sizeof(T) * (NewSize - Size)); } Size = NewSize; @@ -78,24 +77,47 @@ public: const T *end() const { return data() + size(); } T *end() { return data() + size(); } +protected: + constexpr void init(uptr InitialCapacity = 0) { + Data = &LocalData[0]; + CapacityBytes = sizeof(LocalData); + if (InitialCapacity > capacity()) + reserve(InitialCapacity); + } + void destroy() { + if (Data != &LocalData[0]) + ExternalBuffer.unmap(ExternalBuffer.getBase(), + ExternalBuffer.getCapacity()); + } + private: - void reallocate(uptr NewCapacity) { + bool reallocate(uptr NewCapacity) { DCHECK_GT(NewCapacity, 0); DCHECK_LE(Size, NewCapacity); + + MemMapT NewExternalBuffer; NewCapacity = roundUp(NewCapacity * sizeof(T), getPageSizeCached()); - T *NewData = reinterpret_cast<T *>( - map(nullptr, NewCapacity, "scudo:vector", 0, &MapData)); - memcpy(NewData, Data, Size * sizeof(T)); + if (!NewExternalBuffer.map(/*Addr=*/0U, NewCapacity, "scudo:vector", + MAP_ALLOWNOMEM)) { + return false; + } + T *NewExternalData = reinterpret_cast<T *>(NewExternalBuffer.getBase()); + + memcpy(NewExternalData, Data, Size * sizeof(T)); destroy(); - Data = NewData; + + Data = NewExternalData; CapacityBytes = NewCapacity; + ExternalBuffer = NewExternalBuffer; + return true; } T *Data = nullptr; - T LocalData[256 / sizeof(T)] = {}; uptr CapacityBytes = 0; uptr Size = 0; - [[no_unique_address]] MapPlatformData MapData = {}; + + T LocalData[256 / sizeof(T)] = {}; + MemMapT ExternalBuffer; }; template <typename T> class Vector : public VectorNoCtor<T> { diff --git a/standalone/wrappers_c.cpp b/standalone/wrappers_c.cpp index b4d51be716c..60014a0f66b 100644 --- a/standalone/wrappers_c.cpp +++ b/standalone/wrappers_c.cpp @@ -12,6 +12,9 @@ #if !SCUDO_ANDROID || !_BIONIC #include "allocator_config.h" +#include "internal_defs.h" +#include "platform.h" +#include "scudo/interface.h" #include "wrappers_c.h" #include "wrappers_c_checks.h" diff --git a/standalone/wrappers_c.inc b/standalone/wrappers_c.inc index 37e336ee09d..59f3fb0962f 100644 --- a/standalone/wrappers_c.inc +++ b/standalone/wrappers_c.inc @@ -17,6 +17,35 @@ #define SCUDO_MALLOC_ALIGNMENT FIRST_32_SECOND_64(8U, 16U) #endif +static void reportAllocation(void *ptr, size_t size) { + if (SCUDO_ENABLE_HOOKS) + if (__scudo_allocate_hook && ptr) + __scudo_allocate_hook(ptr, size); +} +static void reportDeallocation(void *ptr) { + if (SCUDO_ENABLE_HOOKS) + if (__scudo_deallocate_hook) + __scudo_deallocate_hook(ptr); +} +static void reportReallocAllocation(void *old_ptr, void *new_ptr, size_t size) { + DCHECK_NE(new_ptr, nullptr); + + if (SCUDO_ENABLE_HOOKS) { + if (__scudo_realloc_allocate_hook) + __scudo_realloc_allocate_hook(old_ptr, new_ptr, size); + else if (__scudo_allocate_hook) + __scudo_allocate_hook(new_ptr, size); + } +} +static void reportReallocDeallocation(void *old_ptr) { + if (SCUDO_ENABLE_HOOKS) { + if (__scudo_realloc_deallocate_hook) + __scudo_realloc_deallocate_hook(old_ptr); + else if (__scudo_deallocate_hook) + __scudo_deallocate_hook(old_ptr); + } +} + extern "C" { INTERFACE WEAK void *SCUDO_PREFIX(calloc)(size_t nmemb, size_t size) { @@ -28,11 +57,14 @@ INTERFACE WEAK void *SCUDO_PREFIX(calloc)(size_t nmemb, size_t size) { } scudo::reportCallocOverflow(nmemb, size); } - return scudo::setErrnoOnNull(SCUDO_ALLOCATOR.allocate( - Product, scudo::Chunk::Origin::Malloc, SCUDO_MALLOC_ALIGNMENT, true)); + void *Ptr = SCUDO_ALLOCATOR.allocate(Product, scudo::Chunk::Origin::Malloc, + SCUDO_MALLOC_ALIGNMENT, true); + reportAllocation(Ptr, Product); + return scudo::setErrnoOnNull(Ptr); } INTERFACE WEAK void SCUDO_PREFIX(free)(void *ptr) { + reportDeallocation(ptr); SCUDO_ALLOCATOR.deallocate(ptr, scudo::Chunk::Origin::Malloc); } @@ -54,6 +86,8 @@ INTERFACE WEAK struct SCUDO_MALLINFO SCUDO_PREFIX(mallinfo)(void) { return Info; } +// On Android, mallinfo2 is an alias of mallinfo, so don't define both. +#if !SCUDO_ANDROID INTERFACE WEAK struct __scudo_mallinfo2 SCUDO_PREFIX(mallinfo2)(void) { struct __scudo_mallinfo2 Info = {}; scudo::StatCounters Stats; @@ -70,10 +104,13 @@ INTERFACE WEAK struct __scudo_mallinfo2 SCUDO_PREFIX(mallinfo2)(void) { Info.fordblks = Info.fsmblks; return Info; } +#endif INTERFACE WEAK void *SCUDO_PREFIX(malloc)(size_t size) { - return scudo::setErrnoOnNull(SCUDO_ALLOCATOR.allocate( - size, scudo::Chunk::Origin::Malloc, SCUDO_MALLOC_ALIGNMENT)); + void *Ptr = SCUDO_ALLOCATOR.allocate(size, scudo::Chunk::Origin::Malloc, + SCUDO_MALLOC_ALIGNMENT); + reportAllocation(Ptr, size); + return scudo::setErrnoOnNull(Ptr); } #if SCUDO_ANDROID @@ -102,8 +139,10 @@ INTERFACE WEAK void *SCUDO_PREFIX(memalign)(size_t alignment, size_t size) { scudo::reportAlignmentNotPowerOfTwo(alignment); } } - return SCUDO_ALLOCATOR.allocate(size, scudo::Chunk::Origin::Memalign, - alignment); + void *Ptr = + SCUDO_ALLOCATOR.allocate(size, scudo::Chunk::Origin::Memalign, alignment); + reportAllocation(Ptr, size); + return Ptr; } INTERFACE WEAK int SCUDO_PREFIX(posix_memalign)(void **memptr, size_t alignment, @@ -117,6 +156,8 @@ INTERFACE WEAK int SCUDO_PREFIX(posix_memalign)(void **memptr, size_t alignment, SCUDO_ALLOCATOR.allocate(size, scudo::Chunk::Origin::Memalign, alignment); if (UNLIKELY(!Ptr)) return ENOMEM; + reportAllocation(Ptr, size); + *memptr = Ptr; return 0; } @@ -131,26 +172,57 @@ INTERFACE WEAK void *SCUDO_PREFIX(pvalloc)(size_t size) { scudo::reportPvallocOverflow(size); } // pvalloc(0) should allocate one page. - return scudo::setErrnoOnNull( + void *Ptr = SCUDO_ALLOCATOR.allocate(size ? scudo::roundUp(size, PageSize) : PageSize, - scudo::Chunk::Origin::Memalign, PageSize)); + scudo::Chunk::Origin::Memalign, PageSize); + reportAllocation(Ptr, scudo::roundUp(size, PageSize)); + + return scudo::setErrnoOnNull(Ptr); } INTERFACE WEAK void *SCUDO_PREFIX(realloc)(void *ptr, size_t size) { - if (!ptr) - return scudo::setErrnoOnNull(SCUDO_ALLOCATOR.allocate( - size, scudo::Chunk::Origin::Malloc, SCUDO_MALLOC_ALIGNMENT)); + if (!ptr) { + void *Ptr = SCUDO_ALLOCATOR.allocate(size, scudo::Chunk::Origin::Malloc, + SCUDO_MALLOC_ALIGNMENT); + reportAllocation(Ptr, size); + return scudo::setErrnoOnNull(Ptr); + } if (size == 0) { + reportDeallocation(ptr); SCUDO_ALLOCATOR.deallocate(ptr, scudo::Chunk::Origin::Malloc); return nullptr; } - return scudo::setErrnoOnNull( - SCUDO_ALLOCATOR.reallocate(ptr, size, SCUDO_MALLOC_ALIGNMENT)); + + // Given that the reporting of deallocation and allocation are not atomic, we + // always pretend the old pointer will be released so that the user doesn't + // need to worry about the false double-use case from the view of hooks. + // + // For example, assume that `realloc` releases the old pointer and allocates a + // new pointer. Before the reporting of both operations has been done, another + // thread may get the old pointer from `malloc`. It may be misinterpreted as + // double-use if it's not handled properly on the hook side. + reportReallocDeallocation(ptr); + void *NewPtr = SCUDO_ALLOCATOR.reallocate(ptr, size, SCUDO_MALLOC_ALIGNMENT); + if (NewPtr != nullptr) { + // Note that even if NewPtr == ptr, the size has changed. We still need to + // report the new size. + reportReallocAllocation(/*OldPtr=*/ptr, NewPtr, size); + } else { + // If `realloc` fails, the old pointer is not released. Report the old + // pointer as allocated again. + reportReallocAllocation(/*OldPtr=*/ptr, /*NewPtr=*/ptr, + SCUDO_ALLOCATOR.getAllocSize(ptr)); + } + + return scudo::setErrnoOnNull(NewPtr); } INTERFACE WEAK void *SCUDO_PREFIX(valloc)(size_t size) { - return scudo::setErrnoOnNull(SCUDO_ALLOCATOR.allocate( - size, scudo::Chunk::Origin::Memalign, scudo::getPageSizeCached())); + void *Ptr = SCUDO_ALLOCATOR.allocate(size, scudo::Chunk::Origin::Memalign, + scudo::getPageSizeCached()); + reportAllocation(Ptr, size); + + return scudo::setErrnoOnNull(Ptr); } INTERFACE WEAK int SCUDO_PREFIX(malloc_iterate)( @@ -175,20 +247,30 @@ void SCUDO_PREFIX(malloc_postinit)() { INTERFACE WEAK int SCUDO_PREFIX(mallopt)(int param, int value) { if (param == M_DECAY_TIME) { if (SCUDO_ANDROID) { - if (value == 0) { - // Will set the release values to their minimum values. - value = INT32_MIN; - } else { - // Will set the release values to their maximum values. + // Before changing the interval, reset the memory usage status by doing a + // M_PURGE call so that we can minimize the impact of any unreleased pages + // introduced by interval transition. + SCUDO_ALLOCATOR.releaseToOS(scudo::ReleaseToOS::Force); + + // The values allowed on Android are {-1, 0, 1}. "1" means the longest + // interval. + CHECK(value >= -1 && value <= 1); + if (value == 1) value = INT32_MAX; - } } SCUDO_ALLOCATOR.setOption(scudo::Option::ReleaseInterval, static_cast<scudo::sptr>(value)); return 1; } else if (param == M_PURGE) { - SCUDO_ALLOCATOR.releaseToOS(); + SCUDO_ALLOCATOR.releaseToOS(scudo::ReleaseToOS::Force); + return 1; + } else if (param == M_PURGE_ALL) { + SCUDO_ALLOCATOR.releaseToOS(scudo::ReleaseToOS::ForceAll); + return 1; + } else if (param == M_LOG_STATS) { + SCUDO_ALLOCATOR.printStats(); + SCUDO_ALLOCATOR.printFragmentationInfo(); return 1; } else { scudo::Option option; @@ -224,8 +306,12 @@ INTERFACE WEAK void *SCUDO_PREFIX(aligned_alloc)(size_t alignment, } scudo::reportInvalidAlignedAllocAlignment(alignment, size); } - return scudo::setErrnoOnNull( - SCUDO_ALLOCATOR.allocate(size, scudo::Chunk::Origin::Malloc, alignment)); + + void *Ptr = + SCUDO_ALLOCATOR.allocate(size, scudo::Chunk::Origin::Malloc, alignment); + reportAllocation(Ptr, size); + + return scudo::setErrnoOnNull(Ptr); } INTERFACE WEAK int SCUDO_PREFIX(malloc_info)(UNUSED int options, FILE *stream) { diff --git a/standalone/wrappers_c_bionic.cpp b/standalone/wrappers_c_bionic.cpp index 18c3bf2c0ed..e9d8c1e8d3d 100644 --- a/standalone/wrappers_c_bionic.cpp +++ b/standalone/wrappers_c_bionic.cpp @@ -12,6 +12,9 @@ #if SCUDO_ANDROID && _BIONIC #include "allocator_config.h" +#include "internal_defs.h" +#include "platform.h" +#include "scudo/interface.h" #include "wrappers_c.h" #include "wrappers_c_checks.h" @@ -24,22 +27,7 @@ extern "C" void SCUDO_PREFIX(malloc_postinit)(); SCUDO_REQUIRE_CONSTANT_INITIALIZATION -static scudo::Allocator<scudo::AndroidConfig, SCUDO_PREFIX(malloc_postinit)> - SCUDO_ALLOCATOR; - -#include "wrappers_c.inc" - -#undef SCUDO_ALLOCATOR -#undef SCUDO_PREFIX - -// Svelte MallocDispatch definitions. -#define SCUDO_PREFIX(name) CONCATENATE(scudo_svelte_, name) -#define SCUDO_ALLOCATOR SvelteAllocator - -extern "C" void SCUDO_PREFIX(malloc_postinit)(); -SCUDO_REQUIRE_CONSTANT_INITIALIZATION -static scudo::Allocator<scudo::AndroidSvelteConfig, - SCUDO_PREFIX(malloc_postinit)> +static scudo::Allocator<scudo::Config, SCUDO_PREFIX(malloc_postinit)> SCUDO_ALLOCATOR; #include "wrappers_c.inc" @@ -50,15 +38,14 @@ static scudo::Allocator<scudo::AndroidSvelteConfig, // TODO(kostyak): support both allocators. INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); } -INTERFACE void -__scudo_get_error_info(struct scudo_error_info *error_info, - uintptr_t fault_addr, const char *stack_depot, - const char *region_info, const char *ring_buffer, - const char *memory, const char *memory_tags, - uintptr_t memory_addr, size_t memory_size) { - Allocator.getErrorInfo(error_info, fault_addr, stack_depot, region_info, - ring_buffer, memory, memory_tags, memory_addr, - memory_size); +INTERFACE void __scudo_get_error_info( + struct scudo_error_info *error_info, uintptr_t fault_addr, + const char *stack_depot, size_t stack_depot_size, const char *region_info, + const char *ring_buffer, size_t ring_buffer_size, const char *memory, + const char *memory_tags, uintptr_t memory_addr, size_t memory_size) { + Allocator.getErrorInfo(error_info, fault_addr, stack_depot, stack_depot_size, + region_info, ring_buffer, ring_buffer_size, memory, + memory_tags, memory_addr, memory_size); } INTERFACE const char *__scudo_get_stack_depot_addr() { @@ -66,7 +53,7 @@ INTERFACE const char *__scudo_get_stack_depot_addr() { } INTERFACE size_t __scudo_get_stack_depot_size() { - return sizeof(scudo::StackDepot); + return Allocator.getStackDepotSize(); } INTERFACE const char *__scudo_get_region_info_addr() { diff --git a/standalone/wrappers_c_checks.h b/standalone/wrappers_c_checks.h index 9cd48e82792..d0288699cf1 100644 --- a/standalone/wrappers_c_checks.h +++ b/standalone/wrappers_c_checks.h @@ -31,15 +31,13 @@ inline void *setErrnoOnNull(void *Ptr) { // Checks aligned_alloc() parameters, verifies that the alignment is a power of // two and that the size is a multiple of alignment. inline bool checkAlignedAllocAlignmentAndSize(uptr Alignment, uptr Size) { - return Alignment == 0 || !isPowerOfTwo(Alignment) || - !isAligned(Size, Alignment); + return !isPowerOfTwo(Alignment) || !isAligned(Size, Alignment); } // Checks posix_memalign() parameters, verifies that alignment is a power of two // and a multiple of sizeof(void *). inline bool checkPosixMemalignAlignment(uptr Alignment) { - return Alignment == 0 || !isPowerOfTwo(Alignment) || - !isAligned(Alignment, sizeof(void *)); + return !isPowerOfTwo(Alignment) || !isAligned(Alignment, sizeof(void *)); } // Returns true if calloc(Size, N) overflows on Size*N calculation. Use a diff --git a/standalone/wrappers_cpp.cpp b/standalone/wrappers_cpp.cpp index 374e36d72b3..098d4f71acc 100644 --- a/standalone/wrappers_cpp.cpp +++ b/standalone/wrappers_cpp.cpp @@ -12,6 +12,9 @@ #if !SCUDO_ANDROID || !_BIONIC #include "allocator_config.h" +#include "internal_defs.h" +#include "platform.h" +#include "scudo/interface.h" #include "wrappers_c.h" #include <stdint.h> @@ -21,86 +24,125 @@ struct nothrow_t {}; enum class align_val_t : size_t {}; } // namespace std +static void reportAllocation(void *ptr, size_t size) { + if (SCUDO_ENABLE_HOOKS) + if (__scudo_allocate_hook && ptr) + __scudo_allocate_hook(ptr, size); +} +static void reportDeallocation(void *ptr) { + if (SCUDO_ENABLE_HOOKS) + if (__scudo_deallocate_hook) + __scudo_deallocate_hook(ptr); +} + INTERFACE WEAK void *operator new(size_t size) { - return Allocator.allocate(size, scudo::Chunk::Origin::New); + void *Ptr = Allocator.allocate(size, scudo::Chunk::Origin::New); + reportAllocation(Ptr, size); + return Ptr; } INTERFACE WEAK void *operator new[](size_t size) { - return Allocator.allocate(size, scudo::Chunk::Origin::NewArray); + void *Ptr = Allocator.allocate(size, scudo::Chunk::Origin::NewArray); + reportAllocation(Ptr, size); + return Ptr; } INTERFACE WEAK void *operator new(size_t size, std::nothrow_t const &) NOEXCEPT { - return Allocator.allocate(size, scudo::Chunk::Origin::New); + void *Ptr = Allocator.allocate(size, scudo::Chunk::Origin::New); + reportAllocation(Ptr, size); + return Ptr; } INTERFACE WEAK void *operator new[](size_t size, std::nothrow_t const &) NOEXCEPT { - return Allocator.allocate(size, scudo::Chunk::Origin::NewArray); + void *Ptr = Allocator.allocate(size, scudo::Chunk::Origin::NewArray); + reportAllocation(Ptr, size); + return Ptr; } INTERFACE WEAK void *operator new(size_t size, std::align_val_t align) { - return Allocator.allocate(size, scudo::Chunk::Origin::New, - static_cast<scudo::uptr>(align)); + void *Ptr = Allocator.allocate(size, scudo::Chunk::Origin::New, + static_cast<scudo::uptr>(align)); + reportAllocation(Ptr, size); + return Ptr; } INTERFACE WEAK void *operator new[](size_t size, std::align_val_t align) { - return Allocator.allocate(size, scudo::Chunk::Origin::NewArray, - static_cast<scudo::uptr>(align)); + void *Ptr = Allocator.allocate(size, scudo::Chunk::Origin::NewArray, + static_cast<scudo::uptr>(align)); + reportAllocation(Ptr, size); + return Ptr; } INTERFACE WEAK void *operator new(size_t size, std::align_val_t align, std::nothrow_t const &) NOEXCEPT { - return Allocator.allocate(size, scudo::Chunk::Origin::New, - static_cast<scudo::uptr>(align)); + void *Ptr = Allocator.allocate(size, scudo::Chunk::Origin::New, + static_cast<scudo::uptr>(align)); + reportAllocation(Ptr, size); + return Ptr; } INTERFACE WEAK void *operator new[](size_t size, std::align_val_t align, std::nothrow_t const &) NOEXCEPT { - return Allocator.allocate(size, scudo::Chunk::Origin::NewArray, - static_cast<scudo::uptr>(align)); + void *Ptr = Allocator.allocate(size, scudo::Chunk::Origin::NewArray, + static_cast<scudo::uptr>(align)); + reportAllocation(Ptr, size); + return Ptr; } INTERFACE WEAK void operator delete(void *ptr) NOEXCEPT { + reportDeallocation(ptr); Allocator.deallocate(ptr, scudo::Chunk::Origin::New); } INTERFACE WEAK void operator delete[](void *ptr) NOEXCEPT { + reportDeallocation(ptr); Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray); } INTERFACE WEAK void operator delete(void *ptr, std::nothrow_t const &) NOEXCEPT { + reportDeallocation(ptr); Allocator.deallocate(ptr, scudo::Chunk::Origin::New); } INTERFACE WEAK void operator delete[](void *ptr, std::nothrow_t const &) NOEXCEPT { + reportDeallocation(ptr); Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray); } INTERFACE WEAK void operator delete(void *ptr, size_t size) NOEXCEPT { + reportDeallocation(ptr); Allocator.deallocate(ptr, scudo::Chunk::Origin::New, size); } INTERFACE WEAK void operator delete[](void *ptr, size_t size) NOEXCEPT { + reportDeallocation(ptr); Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, size); } INTERFACE WEAK void operator delete(void *ptr, std::align_val_t align) NOEXCEPT { + reportDeallocation(ptr); Allocator.deallocate(ptr, scudo::Chunk::Origin::New, 0, static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete[](void *ptr, std::align_val_t align) NOEXCEPT { + reportDeallocation(ptr); Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, 0, static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete(void *ptr, std::align_val_t align, std::nothrow_t const &) NOEXCEPT { + reportDeallocation(ptr); Allocator.deallocate(ptr, scudo::Chunk::Origin::New, 0, static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete[](void *ptr, std::align_val_t align, std::nothrow_t const &) NOEXCEPT { + reportDeallocation(ptr); Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, 0, static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete(void *ptr, size_t size, std::align_val_t align) NOEXCEPT { + reportDeallocation(ptr); Allocator.deallocate(ptr, scudo::Chunk::Origin::New, size, static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete[](void *ptr, size_t size, std::align_val_t align) NOEXCEPT { + reportDeallocation(ptr); Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, size, static_cast<scudo::uptr>(align)); } |