diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-03-30 01:28:05 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-03-30 01:28:05 +0000 |
commit | bd6b50c892cf95904a8221dbc2aedb6910d34cdb (patch) | |
tree | 38e5e39a238bff0418f9fae0a2660058f265a93d | |
parent | 1631b4955a63a0b6c5468d40c9291931301d0092 (diff) | |
parent | ab98d0240123ebb6f25be7112be05839eee9bfb2 (diff) | |
download | art-android13-mainline-tethering-release.tar.gz |
Snap for 9847046 from ab98d0240123ebb6f25be7112be05839eee9bfb2 to mainline-tethering-releaseaml_tet_331820050android13-mainline-tethering-release
Change-Id: I31006921e1972dcf8adf62a2bdeeaeefc8fd2502
36 files changed, 2627 insertions, 1248 deletions
diff --git a/libartbase/base/metrics/metrics.h b/libartbase/base/metrics/metrics.h index fd0ae54dac..0ae8e69c40 100644 --- a/libartbase/base/metrics/metrics.h +++ b/libartbase/base/metrics/metrics.h @@ -113,8 +113,6 @@ class MetricsBase; namespace gc { class HeapTest_GCMetrics_Test; -template <typename T> -bool AnyIsNonNull(const metrics::MetricsBase<T>* x, const metrics::MetricsBase<T>* y); } // namespace gc namespace metrics { @@ -304,8 +302,6 @@ class MetricsBase { virtual bool IsNull() const = 0; ART_FRIEND_TEST(gc::HeapTest, GCMetrics); - template <typename T> - friend bool gc::AnyIsNonNull(const MetricsBase<T>* x, const MetricsBase<T>* y); }; template <DatumId counter_type, typename T = uint64_t> diff --git a/odrefresh/odr_config.h b/odrefresh/odr_config.h index 5c3f4eedad..d0fec4ff6a 100644 --- a/odrefresh/odr_config.h +++ b/odrefresh/odr_config.h @@ -41,6 +41,11 @@ namespace odrefresh { // everything if any property matching a prefix changes. constexpr const char* kCheckedSystemPropertyPrefixes[]{"dalvik.vm.", "ro.dalvik.vm."}; +// System property for the phenotype flag to override the device or default-configured +// system server compiler filter setting. +static constexpr char kSystemPropertySystemServerCompilerFilterOverride[] = + "persist.device_config.runtime_native_boot.systemservercompilerfilter_override"; + // The list of system properties that odrefresh ignores. They don't affect compilation results. const std::unordered_set<std::string> kIgnoredSystemProperties{ "dalvik.vm.dex2oat-cpu-set", @@ -67,8 +72,10 @@ struct SystemPropertyConfig { // requirement (go/platform-experiments-flags#pre-requisites). const android::base::NoDestructor<std::vector<SystemPropertyConfig>> kSystemProperties{ {SystemPropertyConfig{.name = "persist.device_config.runtime_native_boot.enable_uffd_gc", - .default_value = "false"}, - SystemPropertyConfig{.name = kPhDisableCompactDex, .default_value = "false"}}}; + .default_value = ""}, + SystemPropertyConfig{.name = kPhDisableCompactDex, .default_value = "false"}, + SystemPropertyConfig{.name = kSystemPropertySystemServerCompilerFilterOverride, + .default_value = ""}}}; // An enumeration of the possible zygote configurations on Android. enum class ZygoteKind : uint8_t { diff --git a/odrefresh/odrefresh.cc b/odrefresh/odrefresh.cc index 4c8b8769e8..10525b2a34 100644 --- a/odrefresh/odrefresh.cc +++ b/odrefresh/odrefresh.cc @@ -56,6 +56,7 @@ #include "android-base/file.h" #include "android-base/logging.h" #include "android-base/macros.h" +#include "android-base/parsebool.h" #include "android-base/parseint.h" #include "android-base/properties.h" #include "android-base/result.h" @@ -76,6 +77,8 @@ #include "dex/art_dex_file_loader.h" #include "dexoptanalyzer.h" #include "exec_utils.h" +#include "fmt/format.h" +#include "gc/collector/mark_compact.h" #include "log/log.h" #include "odr_artifacts.h" #include "odr_common.h" @@ -86,16 +89,21 @@ #include "odrefresh/odrefresh.h" #include "palette/palette.h" #include "palette/palette_types.h" +#include "read_barrier_config.h" namespace art { namespace odrefresh { +namespace { + namespace apex = com::android::apex; namespace art_apex = com::android::art; -using android::base::Result; +using ::android::base::ParseBool; +using ::android::base::ParseBoolResult; +using ::android::base::Result; -namespace { +using ::fmt::literals::operator""_format; // NOLINT // Name of cache info file in the ART Apex artifact cache. constexpr const char* kCacheInfoFile = "cache-info.xml"; @@ -632,8 +640,16 @@ std::optional<std::vector<apex::ApexInfo>> OnDeviceRefresh::GetApexInfoList() co return filtered_info_list; } -std::optional<art_apex::CacheInfo> OnDeviceRefresh::ReadCacheInfo() const { - return art_apex::read(cache_info_filename_.c_str()); +Result<art_apex::CacheInfo> OnDeviceRefresh::ReadCacheInfo() const { + std::optional<art_apex::CacheInfo> cache_info = art_apex::read(cache_info_filename_.c_str()); + if (!cache_info.has_value()) { + if (errno != 0) { + return ErrnoErrorf("Failed to load {}", QuotePath(cache_info_filename_)); + } else { + return Errorf("Failed to parse {}", QuotePath(cache_info_filename_)); + } + } + return cache_info.value(); } Result<void> OnDeviceRefresh::WriteCacheInfo() const { @@ -823,7 +839,7 @@ WARN_UNUSED bool OnDeviceRefresh::BootClasspathArtifactsExist( return true; } -WARN_UNUSED bool OnDeviceRefresh::SystemServerArtifactsExist( +bool OnDeviceRefresh::SystemServerArtifactsExist( bool on_system, /*out*/ std::string* error_msg, /*out*/ std::set<std::string>* jars_missing_artifacts, @@ -907,106 +923,124 @@ WARN_UNUSED bool OnDeviceRefresh::CheckSystemPropertiesHaveNotChanged( return true; } -WARN_UNUSED bool OnDeviceRefresh::BootClasspathArtifactsOnSystemUsable( - const apex::ApexInfo& art_apex_info) const { - if (!art_apex_info.getIsFactory()) { - return false; - } - LOG(INFO) << "Factory ART APEX mounted."; - - if (!CheckSystemPropertiesAreDefault()) { +WARN_UNUSED bool OnDeviceRefresh::CheckBuildUserfaultFdGc() const { + auto it = config_.GetSystemProperties().find("ro.dalvik.vm.enable_uffd_gc"); + bool build_enable_uffd_gc = it != config_.GetSystemProperties().end() ? + ParseBool(it->second) == ParseBoolResult::kTrue : + false; + bool kernel_supports_uffd = KernelSupportsUffd(); + if (build_enable_uffd_gc && !kernel_supports_uffd) { + // Normally, this should not happen. If this happens, the system image was probably built with a + // wrong PRODUCT_ENABLE_UFFD_GC flag. + LOG(WARNING) << "Userfaultfd GC check failed (build-time: {}, runtime: {})."_format( + build_enable_uffd_gc, kernel_supports_uffd); return false; } - LOG(INFO) << "System properties are set to default values."; - return true; } -WARN_UNUSED bool OnDeviceRefresh::SystemServerArtifactsOnSystemUsable( +WARN_UNUSED PreconditionCheckResult OnDeviceRefresh::CheckPreconditionForSystem( const std::vector<apex::ApexInfo>& apex_info_list) const { - if (std::any_of(apex_info_list.begin(), - apex_info_list.end(), - [](const apex::ApexInfo& apex_info) { return !apex_info.getIsFactory(); })) { - return false; - } - LOG(INFO) << "Factory APEXes mounted."; - if (!CheckSystemPropertiesAreDefault()) { - return false; + return PreconditionCheckResult::NoneOk(OdrMetrics::Trigger::kApexVersionMismatch); } - LOG(INFO) << "System properties are set to default values."; - return true; -} + if (!CheckBuildUserfaultFdGc()) { + return PreconditionCheckResult::NoneOk(OdrMetrics::Trigger::kApexVersionMismatch); + } -WARN_UNUSED bool OnDeviceRefresh::CheckBootClasspathArtifactsAreUpToDate( - OdrMetrics& metrics, - const InstructionSet isa, - const apex::ApexInfo& art_apex_info, - const std::optional<art_apex::CacheInfo>& cache_info, - /*out*/ std::vector<std::string>* checked_artifacts) const { - if (BootClasspathArtifactsOnSystemUsable(art_apex_info)) { - // We can use the artifacts on /system. Check if they exist. - std::string error_msg; - if (BootClasspathArtifactsExist(/*on_system=*/true, /*minimal=*/false, isa, &error_msg)) { - return true; - } + std::optional<apex::ApexInfo> art_apex_info = GetArtApexInfo(apex_info_list); + if (!art_apex_info.has_value()) { + // This should never happen, further up-to-date checks are not possible if it does. + LOG(ERROR) << "Could not get ART APEX info."; + return PreconditionCheckResult::NoneOk(OdrMetrics::Trigger::kUnknown); + } - LOG(INFO) << "Incomplete boot classpath artifacts on /system. " << error_msg; - LOG(INFO) << "Checking cache."; + if (!art_apex_info->getIsFactory()) { + LOG(INFO) << "Updated ART APEX mounted"; + return PreconditionCheckResult::NoneOk(OdrMetrics::Trigger::kApexVersionMismatch); } - if (!cache_info.has_value()) { - // If the cache info file does not exist, it usually means on-device compilation has not been - // done before because the device was using the factory version of modules, or artifacts were - // cleared because an updated version was uninstalled. Set the trigger to be - // `kApexVersionMismatch` so that compilation will always be performed. - PLOG(INFO) << "No prior cache-info file: " << QuotePath(cache_info_filename_); - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); - return false; + if (std::any_of(apex_info_list.begin(), + apex_info_list.end(), + [](const apex::ApexInfo& apex_info) { return !apex_info.getIsFactory(); })) { + LOG(INFO) << "Updated APEXes mounted"; + return PreconditionCheckResult::SystemServerNotOk(OdrMetrics::Trigger::kApexVersionMismatch); } - // Check whether the current cache ART module info differs from the current ART module info. - const art_apex::ModuleInfo* cached_art_info = cache_info->getFirstArtModuleInfo(); + return PreconditionCheckResult::AllOk(); +} - if (cached_art_info == nullptr) { - LOG(INFO) << "Missing ART APEX info from cache-info."; - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); +WARN_UNUSED static bool CheckModuleInfo(const art_apex::ModuleInfo& cached_info, + const apex::ApexInfo& current_info) { + if (cached_info.getVersionCode() != current_info.getVersionCode()) { + LOG(INFO) << "APEX ({}) version code mismatch (before: {}, now: {})"_format( + current_info.getModuleName(), cached_info.getVersionCode(), current_info.getVersionCode()); return false; } - if (cached_art_info->getVersionCode() != art_apex_info.getVersionCode()) { - LOG(INFO) << "ART APEX version code mismatch (" << cached_art_info->getVersionCode() - << " != " << art_apex_info.getVersionCode() << ")."; - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); + if (cached_info.getVersionName() != current_info.getVersionName()) { + LOG(INFO) << "APEX ({}) version name mismatch (before: {}, now: {})"_format( + current_info.getModuleName(), cached_info.getVersionName(), current_info.getVersionName()); return false; } - if (cached_art_info->getVersionName() != art_apex_info.getVersionName()) { - LOG(INFO) << "ART APEX version name mismatch (" << cached_art_info->getVersionName() - << " != " << art_apex_info.getVersionName() << ")."; - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); + // Check lastUpdateMillis for samegrade installs. If `cached_info` is missing the lastUpdateMillis + // field then it is not current with the schema used by this binary so treat it as a samegrade + // update. Otherwise check whether the lastUpdateMillis changed. + const int64_t cached_last_update_millis = + cached_info.hasLastUpdateMillis() ? cached_info.getLastUpdateMillis() : -1; + if (cached_last_update_millis != current_info.getLastUpdateMillis()) { + LOG(INFO) << "APEX ({}) last update time mismatch (before: {}, now: {})"_format( + current_info.getModuleName(), + cached_info.getLastUpdateMillis(), + current_info.getLastUpdateMillis()); return false; } - // Check lastUpdateMillis for samegrade installs. If `cached_art_info` is missing the - // lastUpdateMillis field then it is not current with the schema used by this binary so treat - // it as a samegrade update. Otherwise check whether the lastUpdateMillis changed. - const int64_t cached_art_last_update_millis = - cached_art_info->hasLastUpdateMillis() ? cached_art_info->getLastUpdateMillis() : -1; - if (cached_art_last_update_millis != art_apex_info.getLastUpdateMillis()) { - LOG(INFO) << "ART APEX last update time mismatch (" << cached_art_last_update_millis - << " != " << art_apex_info.getLastUpdateMillis() << ")."; - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); - return false; + return true; +} + +WARN_UNUSED PreconditionCheckResult OnDeviceRefresh::CheckPreconditionForData( + const std::vector<com::android::apex::ApexInfo>& apex_info_list) const { + Result<art_apex::CacheInfo> cache_info = ReadCacheInfo(); + if (!cache_info.ok()) { + if (cache_info.error().code() == ENOENT) { + // If the cache info file does not exist, it usually means it's the first boot, or the + // dalvik-cache directory is cleared by odsign due to corrupted files. Set the trigger to be + // `kApexVersionMismatch` to force generate the cache info file and compile if necessary. + LOG(INFO) << "No prior cache-info file: " << QuotePath(cache_info_filename_); + } else { + // This should not happen unless odrefresh is updated to a new version that is not compatible + // with an old cache-info file. Further up-to-date checks are not possible if it does. + LOG(ERROR) << cache_info.error().message(); + } + return PreconditionCheckResult::NoneOk(OdrMetrics::Trigger::kApexVersionMismatch); } if (!CheckSystemPropertiesHaveNotChanged(cache_info.value())) { // We don't have a trigger kind for system property changes. For now, we reuse // `kApexVersionMismatch` as it implies the expected behavior: re-compile regardless of the last // compilation attempt. - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); - return false; + return PreconditionCheckResult::NoneOk(OdrMetrics::Trigger::kApexVersionMismatch); + } + + // Check whether the current cache ART module info differs from the current ART module info. + const art_apex::ModuleInfo* cached_art_info = cache_info->getFirstArtModuleInfo(); + if (cached_art_info == nullptr) { + LOG(ERROR) << "Missing ART APEX info from cache-info."; + return PreconditionCheckResult::NoneOk(OdrMetrics::Trigger::kApexVersionMismatch); + } + + std::optional<apex::ApexInfo> current_art_info = GetArtApexInfo(apex_info_list); + if (!current_art_info.has_value()) { + // This should never happen, further up-to-date checks are not possible if it does. + LOG(ERROR) << "Could not get ART APEX info."; + return PreconditionCheckResult::NoneOk(OdrMetrics::Trigger::kUnknown); + } + + if (!CheckModuleInfo(*cached_art_info, *current_art_info)) { + return PreconditionCheckResult::NoneOk(OdrMetrics::Trigger::kApexVersionMismatch); } // Check boot class components. @@ -1017,102 +1051,32 @@ WARN_UNUSED bool OnDeviceRefresh::CheckBootClasspathArtifactsAreUpToDate( // // The boot class components may change unexpectedly, for example an OTA could update // framework.jar. - const std::vector<art_apex::Component> expected_bcp_compilable_components = + const std::vector<art_apex::Component> current_bcp_compilable_components = GenerateBootClasspathCompilableComponents(); - if (expected_bcp_compilable_components.size() != 0 && - (!cache_info->hasDex2oatBootClasspath() || - !cache_info->getFirstDex2oatBootClasspath()->hasComponent())) { + + const art_apex::Classpath* cached_bcp_compilable_components = + cache_info->getFirstDex2oatBootClasspath(); + if (cached_bcp_compilable_components == nullptr) { LOG(INFO) << "Missing Dex2oatBootClasspath components."; - metrics.SetTrigger(OdrMetrics::Trigger::kDexFilesChanged); - return false; + return PreconditionCheckResult::NoneOk(OdrMetrics::Trigger::kApexVersionMismatch); } - const std::vector<art_apex::Component>& bcp_compilable_components = - cache_info->getFirstDex2oatBootClasspath()->getComponent(); - Result<void> result = - CheckComponents(expected_bcp_compilable_components, bcp_compilable_components); + Result<void> result = CheckComponents(current_bcp_compilable_components, + cached_bcp_compilable_components->getComponent()); if (!result.ok()) { LOG(INFO) << "Dex2OatClasspath components mismatch: " << result.error(); - metrics.SetTrigger(OdrMetrics::Trigger::kDexFilesChanged); - return false; - } - - // Cache info looks good, check all compilation artifacts exist. - std::string error_msg; - if (!BootClasspathArtifactsExist( - /*on_system=*/false, /*minimal=*/false, isa, &error_msg, checked_artifacts)) { - LOG(INFO) << "Incomplete boot classpath artifacts. " << error_msg; - metrics.SetTrigger(OdrMetrics::Trigger::kMissingArtifacts); - // Add the minimal boot image to `checked_artifacts` if exists. This is to prevent the minimal - // boot image from being deleted. It does not affect the return value because we should still - // attempt to generate a full boot image even if the minimal one exists. - if (BootClasspathArtifactsExist( - /*on_system=*/false, /*minimal=*/true, isa, &error_msg, checked_artifacts)) { - LOG(INFO) << "Found minimal boot classpath artifacts."; - } - return false; - } - - return true; -} - -bool OnDeviceRefresh::CheckSystemServerArtifactsAreUpToDate( - OdrMetrics& metrics, - const std::vector<apex::ApexInfo>& apex_info_list, - const std::optional<art_apex::CacheInfo>& cache_info, - /*out*/ std::set<std::string>* jars_to_compile, - /*out*/ std::vector<std::string>* checked_artifacts) const { - auto compile_all = [&, this]() { - *jars_to_compile = AllSystemServerJars(); - return false; - }; - - std::set<std::string> jars_missing_artifacts_on_system; - bool artifacts_on_system_up_to_date = false; - - if (SystemServerArtifactsOnSystemUsable(apex_info_list)) { - // We can use the artifacts on /system. Check if they exist. - std::string error_msg; - if (SystemServerArtifactsExist( - /*on_system=*/true, &error_msg, &jars_missing_artifacts_on_system)) { - return true; - } - - LOG(INFO) << "Incomplete system server artifacts on /system. " << error_msg; - LOG(INFO) << "Checking cache."; - artifacts_on_system_up_to_date = true; - } - - if (!cache_info.has_value()) { - // If the cache info file does not exist, it usually means on-device compilation has not been - // done before because the device was using the factory version of modules, or artifacts were - // cleared because an updated version was uninstalled. Set the trigger to be - // `kApexVersionMismatch` so that compilation will always be performed. - PLOG(INFO) << "No prior cache-info file: " << QuotePath(cache_info_filename_); - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); - if (artifacts_on_system_up_to_date) { - *jars_to_compile = jars_missing_artifacts_on_system; - return false; - } - return compile_all(); + return PreconditionCheckResult::NoneOk(OdrMetrics::Trigger::kDexFilesChanged); } // Check whether the current cached module info differs from the current module info. const art_apex::ModuleInfoList* cached_module_info_list = cache_info->getFirstModuleInfoList(); - if (cached_module_info_list == nullptr) { - LOG(INFO) << "Missing APEX info list from cache-info."; - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); - return compile_all(); + LOG(ERROR) << "Missing APEX info list from cache-info."; + return PreconditionCheckResult::SystemServerNotOk(OdrMetrics::Trigger::kApexVersionMismatch); } std::unordered_map<std::string, const art_apex::ModuleInfo*> cached_module_info_map; for (const art_apex::ModuleInfo& module_info : cached_module_info_list->getModuleInfo()) { - if (!module_info.hasName()) { - LOG(INFO) << "Unexpected module info from cache-info. Missing module name."; - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); - return compile_all(); - } cached_module_info_map[module_info.getName()] = &module_info; } @@ -1125,44 +1089,13 @@ bool OnDeviceRefresh::CheckSystemServerArtifactsAreUpToDate( auto it = cached_module_info_map.find(apex_name); if (it == cached_module_info_map.end()) { LOG(INFO) << "Missing APEX info from cache-info (" << apex_name << ")."; - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); - return compile_all(); + return PreconditionCheckResult::SystemServerNotOk(OdrMetrics::Trigger::kApexVersionMismatch); } const art_apex::ModuleInfo* cached_module_info = it->second; - - if (cached_module_info->getVersionCode() != current_apex_info.getVersionCode()) { - LOG(INFO) << "APEX (" << apex_name << ") version code mismatch (" - << cached_module_info->getVersionCode() - << " != " << current_apex_info.getVersionCode() << ")."; - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); - return compile_all(); - } - - if (cached_module_info->getVersionName() != current_apex_info.getVersionName()) { - LOG(INFO) << "APEX (" << apex_name << ") version name mismatch (" - << cached_module_info->getVersionName() - << " != " << current_apex_info.getVersionName() << ")."; - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); - return compile_all(); + if (!CheckModuleInfo(*cached_module_info, current_apex_info)) { + return PreconditionCheckResult::SystemServerNotOk(OdrMetrics::Trigger::kApexVersionMismatch); } - - if (!cached_module_info->hasLastUpdateMillis() || - cached_module_info->getLastUpdateMillis() != current_apex_info.getLastUpdateMillis()) { - LOG(INFO) << "APEX (" << apex_name << ") last update time mismatch (" - << cached_module_info->getLastUpdateMillis() - << " != " << current_apex_info.getLastUpdateMillis() << ")."; - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); - return compile_all(); - } - } - - if (!CheckSystemPropertiesHaveNotChanged(cache_info.value())) { - // We don't have a trigger kind for system property changes. For now, we reuse - // `kApexVersionMismatch` as it implies the expected behavior: re-compile regardless of the last - // compilation attempt. - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); - return false; } // Check system server components. @@ -1174,73 +1107,130 @@ bool OnDeviceRefresh::CheckSystemServerArtifactsAreUpToDate( // // The system_server components may change unexpectedly, for example an OTA could update // services.jar. - const std::vector<art_apex::SystemServerComponent> expected_system_server_components = + const std::vector<art_apex::SystemServerComponent> current_system_server_components = GenerateSystemServerComponents(); - if (expected_system_server_components.size() != 0 && - (!cache_info->hasSystemServerComponents() || - !cache_info->getFirstSystemServerComponents()->hasComponent())) { + + const art_apex::SystemServerComponents* cached_system_server_components = + cache_info->getFirstSystemServerComponents(); + if (cached_system_server_components == nullptr) { LOG(INFO) << "Missing SystemServerComponents."; - metrics.SetTrigger(OdrMetrics::Trigger::kDexFilesChanged); - return compile_all(); + return PreconditionCheckResult::SystemServerNotOk(OdrMetrics::Trigger::kApexVersionMismatch); } - const std::vector<art_apex::SystemServerComponent>& system_server_components = - cache_info->getFirstSystemServerComponents()->getComponent(); - Result<void> result = - CheckSystemServerComponents(expected_system_server_components, system_server_components); + result = CheckSystemServerComponents(current_system_server_components, + cached_system_server_components->getComponent()); if (!result.ok()) { LOG(INFO) << "SystemServerComponents mismatch: " << result.error(); - metrics.SetTrigger(OdrMetrics::Trigger::kDexFilesChanged); - return compile_all(); + return PreconditionCheckResult::SystemServerNotOk(OdrMetrics::Trigger::kDexFilesChanged); } - const std::vector<art_apex::Component> expected_bcp_components = - GenerateBootClasspathComponents(); - if (expected_bcp_components.size() != 0 && - (!cache_info->hasBootClasspath() || !cache_info->getFirstBootClasspath()->hasComponent())) { + const std::vector<art_apex::Component> current_bcp_components = GenerateBootClasspathComponents(); + + const art_apex::Classpath* cached_bcp_components = cache_info->getFirstBootClasspath(); + if (cached_bcp_components == nullptr) { LOG(INFO) << "Missing BootClasspath components."; - metrics.SetTrigger(OdrMetrics::Trigger::kDexFilesChanged); - return false; + return PreconditionCheckResult::SystemServerNotOk(OdrMetrics::Trigger::kApexVersionMismatch); } - const std::vector<art_apex::Component>& bcp_components = - cache_info->getFirstBootClasspath()->getComponent(); - result = CheckComponents(expected_bcp_components, bcp_components); + result = CheckComponents(current_bcp_components, cached_bcp_components->getComponent()); if (!result.ok()) { LOG(INFO) << "BootClasspath components mismatch: " << result.error(); - metrics.SetTrigger(OdrMetrics::Trigger::kDexFilesChanged); // Boot classpath components can be dependencies of system_server components, so system_server // components need to be recompiled if boot classpath components are changed. - return compile_all(); + return PreconditionCheckResult::SystemServerNotOk(OdrMetrics::Trigger::kDexFilesChanged); } - std::string error_msg; - std::set<std::string> jars_missing_artifacts_on_data; - if (!SystemServerArtifactsExist( - /*on_system=*/false, &error_msg, &jars_missing_artifacts_on_data, checked_artifacts)) { - if (artifacts_on_system_up_to_date) { - // Check if the remaining system_server artifacts are on /data. - std::set_intersection(jars_missing_artifacts_on_system.begin(), - jars_missing_artifacts_on_system.end(), - jars_missing_artifacts_on_data.begin(), - jars_missing_artifacts_on_data.end(), - std::inserter(*jars_to_compile, jars_to_compile->end())); - if (!jars_to_compile->empty()) { - LOG(INFO) << "Incomplete system_server artifacts on /data. " << error_msg; - metrics.SetTrigger(OdrMetrics::Trigger::kMissingArtifacts); - return false; - } + return PreconditionCheckResult::AllOk(); +} - LOG(INFO) << "Found the remaining system_server artifacts on /data."; +WARN_UNUSED bool OnDeviceRefresh::CheckBootClasspathArtifactsAreUpToDate( + OdrMetrics& metrics, + const InstructionSet isa, + const PreconditionCheckResult& system_result, + const PreconditionCheckResult& data_result, + /*out*/ std::vector<std::string>* checked_artifacts) const { + if (system_result.IsBootClasspathOk()) { + // We can use the artifacts on /system. Check if they exist. + std::string error_msg; + if (BootClasspathArtifactsExist(/*on_system=*/true, /*minimal=*/false, isa, &error_msg)) { return true; } - LOG(INFO) << "Incomplete system_server artifacts. " << error_msg; + LOG(INFO) << "Incomplete boot classpath artifacts on /system: " << error_msg; + LOG(INFO) << "Checking /data"; + } + + if (!data_result.IsBootClasspathOk()) { + metrics.SetTrigger(data_result.GetTrigger()); + return false; + } + + // Cache info looks good, check all compilation artifacts exist. + std::string error_msg; + if (!BootClasspathArtifactsExist( + /*on_system=*/false, /*minimal=*/false, isa, &error_msg, checked_artifacts)) { + LOG(INFO) << "Incomplete boot classpath artifacts on /data: " << error_msg; metrics.SetTrigger(OdrMetrics::Trigger::kMissingArtifacts); - *jars_to_compile = jars_missing_artifacts_on_data; + // Add the minimal boot image to `checked_artifacts` if exists. This is to prevent the minimal + // boot image from being deleted. It does not affect the return value because we should still + // attempt to generate a full boot image even if the minimal one exists. + if (BootClasspathArtifactsExist( + /*on_system=*/false, /*minimal=*/true, isa, &error_msg, checked_artifacts)) { + LOG(INFO) << "Found minimal boot classpath artifacts"; + } return false; } + LOG(INFO) << "Boot classpath artifacts on /data OK"; + return true; +} + +bool OnDeviceRefresh::CheckSystemServerArtifactsAreUpToDate( + OdrMetrics& metrics, + const PreconditionCheckResult& system_result, + const PreconditionCheckResult& data_result, + /*out*/ std::set<std::string>* jars_to_compile, + /*out*/ std::vector<std::string>* checked_artifacts) const { + std::set<std::string> jars_missing_artifacts_on_system; + if (system_result.IsSystemServerOk()) { + // We can use the artifacts on /system. Check if they exist. + std::string error_msg; + if (SystemServerArtifactsExist( + /*on_system=*/true, &error_msg, &jars_missing_artifacts_on_system)) { + return true; + } + + LOG(INFO) << "Incomplete system server artifacts on /system: " << error_msg; + LOG(INFO) << "Checking /data"; + } else { + jars_missing_artifacts_on_system = AllSystemServerJars(); + } + + std::set<std::string> jars_missing_artifacts_on_data; + std::string error_msg; + if (data_result.IsSystemServerOk()) { + SystemServerArtifactsExist( + /*on_system=*/false, &error_msg, &jars_missing_artifacts_on_data, checked_artifacts); + } else { + jars_missing_artifacts_on_data = AllSystemServerJars(); + } + + std::set_intersection(jars_missing_artifacts_on_system.begin(), + jars_missing_artifacts_on_system.end(), + jars_missing_artifacts_on_data.begin(), + jars_missing_artifacts_on_data.end(), + std::inserter(*jars_to_compile, jars_to_compile->end())); + if (!jars_to_compile->empty()) { + if (data_result.IsSystemServerOk()) { + LOG(INFO) << "Incomplete system_server artifacts on /data: " << error_msg; + metrics.SetTrigger(OdrMetrics::Trigger::kMissingArtifacts); + } else { + metrics.SetTrigger(data_result.GetTrigger()); + } + return false; + } + + LOG(INFO) << "system_server artifacts on /data OK"; return true; } @@ -1376,22 +1366,15 @@ OnDeviceRefresh::CheckArtifactsAreUpToDate(OdrMetrics& metrics, // Record ART APEX last update milliseconds (used in compilation log). metrics.SetArtApexLastUpdateMillis(art_apex_info->getLastUpdateMillis()); - std::optional<art_apex::CacheInfo> cache_info = ReadCacheInfo(); - if (!cache_info.has_value() && OS::FileExists(cache_info_filename_.c_str())) { - // This should not happen unless odrefresh is updated to a new version that is not - // compatible with an old cache-info file. Further up-to-date checks are not possible if it - // does. - PLOG(ERROR) << "Failed to parse cache-info file: " << QuotePath(cache_info_filename_); - metrics.SetTrigger(OdrMetrics::Trigger::kApexVersionMismatch); - return cleanup_and_compile_all(); - } - InstructionSet system_server_isa = config_.GetSystemServerIsa(); std::vector<std::string> checked_artifacts; + PreconditionCheckResult system_result = CheckPreconditionForSystem(apex_info_list.value()); + PreconditionCheckResult data_result = CheckPreconditionForData(apex_info_list.value()); + for (const InstructionSet isa : config_.GetBootClasspathIsas()) { if (!CheckBootClasspathArtifactsAreUpToDate( - metrics, isa, art_apex_info.value(), cache_info, &checked_artifacts)) { + metrics, isa, system_result, data_result, &checked_artifacts)) { compilation_options->compile_boot_classpath_for_isas.push_back(isa); // system_server artifacts are invalid without valid boot classpath artifacts. if (isa == system_server_isa) { @@ -1402,14 +1385,20 @@ OnDeviceRefresh::CheckArtifactsAreUpToDate(OdrMetrics& metrics, if (compilation_options->system_server_jars_to_compile.empty()) { CheckSystemServerArtifactsAreUpToDate(metrics, - apex_info_list.value(), - cache_info, + system_result, + data_result, &compilation_options->system_server_jars_to_compile, &checked_artifacts); } - bool compilation_required = (!compilation_options->compile_boot_classpath_for_isas.empty() || - !compilation_options->system_server_jars_to_compile.empty()); + bool compilation_required = !compilation_options->compile_boot_classpath_for_isas.empty() || + !compilation_options->system_server_jars_to_compile.empty(); + + if (!compilation_required && !data_result.IsAllOk()) { + // Return kCompilationRequired to generate the cache info even if there's nothing to compile. + compilation_required = true; + metrics.SetTrigger(data_result.GetTrigger()); + } // If partial compilation is disabled, we should compile everything regardless of what's in // `compilation_options`. @@ -1417,10 +1406,8 @@ OnDeviceRefresh::CheckArtifactsAreUpToDate(OdrMetrics& metrics, return cleanup_and_compile_all(); } - // We should only keep the cache info if we have artifacts on /data. - if (!checked_artifacts.empty()) { - checked_artifacts.push_back(cache_info_filename_); - } + // Always keep the cache info. + checked_artifacts.push_back(cache_info_filename_); Result<void> result = CleanupArtifactDirectory(metrics, checked_artifacts); if (!result.ok()) { diff --git a/odrefresh/odrefresh.h b/odrefresh/odrefresh.h index b14aa41260..c43528e9b5 100644 --- a/odrefresh/odrefresh.h +++ b/odrefresh/odrefresh.h @@ -46,6 +46,43 @@ struct CompilationOptions { std::set<std::string> system_server_jars_to_compile; }; +class PreconditionCheckResult { + public: + static PreconditionCheckResult NoneOk(OdrMetrics::Trigger trigger) { + return PreconditionCheckResult(trigger, + /*boot_classpath_ok=*/false, + /*system_server_ok=*/false); + } + static PreconditionCheckResult SystemServerNotOk(OdrMetrics::Trigger trigger) { + return PreconditionCheckResult(trigger, + /*boot_classpath_ok=*/true, + /*system_server_ok=*/false); + } + static PreconditionCheckResult AllOk() { + return PreconditionCheckResult(/*trigger=*/std::nullopt, + /*boot_classpath_ok=*/true, + /*system_server_ok=*/true); + } + bool IsAllOk() const { return !trigger_.has_value(); } + OdrMetrics::Trigger GetTrigger() const { return trigger_.value(); } + bool IsBootClasspathOk() const { return boot_classpath_ok_; } + bool IsSystemServerOk() const { return system_server_ok_; } + + private: + // Use static factory methods instead. + PreconditionCheckResult(std::optional<OdrMetrics::Trigger> trigger, + bool boot_classpath_ok, + bool system_server_ok) + : trigger_(trigger), + boot_classpath_ok_(boot_classpath_ok), + system_server_ok_(system_server_ok) {} + + // Indicates why the precondition is not okay, or `std::nullopt` if it's okay. + std::optional<OdrMetrics::Trigger> trigger_; + bool boot_classpath_ok_; + bool system_server_ok_; +}; + class OnDeviceRefresh final { public: explicit OnDeviceRefresh(const OdrConfig& config); @@ -81,7 +118,7 @@ class OnDeviceRefresh final { std::optional<std::vector<com::android::apex::ApexInfo>> GetApexInfoList() const; // Reads the ART APEX cache information (if any) found in the output artifact directory. - std::optional<com::android::art::CacheInfo> ReadCacheInfo() const; + android::base::Result<com::android::art::CacheInfo> ReadCacheInfo() const; // Writes ART APEX cache information to `kOnDeviceRefreshOdrefreshArtifactDirectory`. android::base::Result<void> WriteCacheInfo() const; @@ -134,7 +171,7 @@ class OnDeviceRefresh final { // order of compilation. Returns true if all are present, false otherwise. // Adds the paths to the jars that are missing artifacts in `jars_with_missing_artifacts`. // If `checked_artifacts` is present, adds checked artifacts to `checked_artifacts`. - WARN_UNUSED bool SystemServerArtifactsExist( + bool SystemServerArtifactsExist( bool on_system, /*out*/ std::string* error_msg, /*out*/ std::set<std::string>* jars_missing_artifacts, @@ -150,15 +187,18 @@ class OnDeviceRefresh final { WARN_UNUSED bool CheckSystemPropertiesHaveNotChanged( const com::android::art::CacheInfo& cache_info) const; - // Returns true if boot classpath artifacts on /system are usable if they exist. Note that this - // function does not check file existence. - WARN_UNUSED bool BootClasspathArtifactsOnSystemUsable( - const com::android::apex::ApexInfo& art_apex_info) const; + // Returns true if the system image is built with the right userfaultfd GC flag. + WARN_UNUSED bool CheckBuildUserfaultFdGc() const; + + // Returns whether the precondition for using artifacts on /system is met. Note that this function + // does not check the artifacts. + WARN_UNUSED PreconditionCheckResult + CheckPreconditionForSystem(const std::vector<com::android::apex::ApexInfo>& apex_info_list) const; - // Returns true if system_server artifacts on /system are usable if they exist. Note that this - // function does not check file existence. - WARN_UNUSED bool SystemServerArtifactsOnSystemUsable( - const std::vector<com::android::apex::ApexInfo>& apex_info_list) const; + // Returns whether the precondition for using artifacts on /data is met. Note that this function + // does not check the artifacts. + WARN_UNUSED PreconditionCheckResult + CheckPreconditionForData(const std::vector<com::android::apex::ApexInfo>& apex_info_list) const; // Checks whether all boot classpath artifacts are up to date. Returns true if all are present, // false otherwise. @@ -166,8 +206,8 @@ class OnDeviceRefresh final { WARN_UNUSED bool CheckBootClasspathArtifactsAreUpToDate( OdrMetrics& metrics, const InstructionSet isa, - const com::android::apex::ApexInfo& art_apex_info, - const std::optional<com::android::art::CacheInfo>& cache_info, + const PreconditionCheckResult& system_result, + const PreconditionCheckResult& data_result, /*out*/ std::vector<std::string>* checked_artifacts) const; // Checks whether all system_server artifacts are up to date. The artifacts are checked in their @@ -176,8 +216,8 @@ class OnDeviceRefresh final { // If `checked_artifacts` is present, adds checked artifacts to `checked_artifacts`. bool CheckSystemServerArtifactsAreUpToDate( OdrMetrics& metrics, - const std::vector<com::android::apex::ApexInfo>& apex_info_list, - const std::optional<com::android::art::CacheInfo>& cache_info, + const PreconditionCheckResult& system_result, + const PreconditionCheckResult& data_result, /*out*/ std::set<std::string>* jars_to_compile, /*out*/ std::vector<std::string>* checked_artifacts) const; diff --git a/odrefresh/odrefresh_main.cc b/odrefresh/odrefresh_main.cc index a3761ef913..378b9aa808 100644 --- a/odrefresh/odrefresh_main.cc +++ b/odrefresh/odrefresh_main.cc @@ -43,6 +43,7 @@ using ::art::odrefresh::ExitCode; using ::art::odrefresh::kCheckedSystemPropertyPrefixes; using ::art::odrefresh::kIgnoredSystemProperties; using ::art::odrefresh::kSystemProperties; +using ::art::odrefresh::kSystemPropertySystemServerCompilerFilterOverride; using ::art::odrefresh::OdrCompilationLog; using ::art::odrefresh::OdrConfig; using ::art::odrefresh::OdrMetrics; @@ -175,6 +176,7 @@ int InitializeConfig(int argc, char** argv, OdrConfig* config) { if (config->GetSystemServerCompilerFilter().empty()) { std::string filter = GetProperty("dalvik.vm.systemservercompilerfilter", "speed"); + filter = GetProperty(kSystemPropertySystemServerCompilerFilterOverride, filter); config->SetSystemServerCompilerFilter(filter); } diff --git a/perfetto_hprof/Android.bp b/perfetto_hprof/Android.bp index a81a4fa5e0..2a2d35e4eb 100644 --- a/perfetto_hprof/Android.bp +++ b/perfetto_hprof/Android.bp @@ -50,6 +50,7 @@ cc_defaults { compile_multilib: "both", shared_libs: [ + "libartpalette", "libbase", "liblog", ], diff --git a/perfetto_hprof/perfetto_hprof.cc b/perfetto_hprof/perfetto_hprof.cc index 669fb0cac8..d5f7f5344d 100644 --- a/perfetto_hprof/perfetto_hprof.cc +++ b/perfetto_hprof/perfetto_hprof.cc @@ -18,9 +18,8 @@ #include "perfetto_hprof.h" -#include <android-base/logging.h> -#include <base/fast_exit.h> #include <fcntl.h> +#include <fnmatch.h> #include <inttypes.h> #include <sched.h> #include <signal.h> @@ -36,6 +35,11 @@ #include <optional> #include <type_traits> +#include "android-base/file.h" +#include "android-base/logging.h" +#include "android-base/properties.h" +#include "base/fast_exit.h" +#include "base/systrace.h" #include "gc/heap-visit-objects-inl.h" #include "gc/heap.h" #include "gc/scoped_gc_critical_section.h" @@ -86,6 +90,8 @@ static art::ConditionVariable& GetStateCV() { static int requested_tracing_session_id = 0; static State g_state = State::kUninitialized; +static bool g_oome_triggered = false; +static uint32_t g_oome_sessions_pending = 0; // Pipe to signal from the signal handler into a worker thread that handles the // dump requests. @@ -151,19 +157,52 @@ bool ShouldSampleSmapsEntry(const perfetto::profiling::SmapsEntry& e) { return false; } +uint64_t GetCurrentBootClockNs() { + struct timespec ts = {}; + if (clock_gettime(CLOCK_BOOTTIME, &ts) != 0) { + LOG(FATAL) << "Failed to get boottime."; + } + return ts.tv_sec * 1000000000LL + ts.tv_nsec; +} + +bool IsDebugBuild() { + std::string build_type = android::base::GetProperty("ro.build.type", ""); + return !build_type.empty() && build_type != "user"; +} + +// Verifies the manifest restrictions are respected. +// For regular heap dumps this is already handled by heapprofd. +bool IsOomeHeapDumpAllowed(const perfetto::DataSourceConfig& ds_config) { + if (art::Runtime::Current()->IsJavaDebuggable() || IsDebugBuild()) { + return true; + } + + if (ds_config.session_initiator() == + perfetto::DataSourceConfig::SESSION_INITIATOR_TRUSTED_SYSTEM) { + return art::Runtime::Current()->IsProfileable() || art::Runtime::Current()->IsSystemServer(); + } else { + return art::Runtime::Current()->IsProfileableFromShell(); + } +} + class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> { public: constexpr static perfetto::BufferExhaustedPolicy kBufferExhaustedPolicy = perfetto::BufferExhaustedPolicy::kStall; + + explicit JavaHprofDataSource(bool is_oome_heap) : is_oome_heap_(is_oome_heap) {} + void OnSetup(const SetupArgs& args) override { - uint64_t normalized_cfg_tracing_session_id = - args.config->tracing_session_id() % std::numeric_limits<int32_t>::max(); - if (requested_tracing_session_id < 0) { - LOG(ERROR) << "invalid requested tracing session id " << requested_tracing_session_id; - return; - } - if (static_cast<uint64_t>(requested_tracing_session_id) != normalized_cfg_tracing_session_id) { - return; + if (!is_oome_heap_) { + uint64_t normalized_tracing_session_id = + args.config->tracing_session_id() % std::numeric_limits<int32_t>::max(); + if (requested_tracing_session_id < 0) { + LOG(ERROR) << "invalid requested tracing session id " << requested_tracing_session_id; + return; + } + if (static_cast<uint64_t>(requested_tracing_session_id) != normalized_tracing_session_id) { + return; + } } // This is on the heap as it triggers -Wframe-larger-than. @@ -178,20 +217,31 @@ class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> { } // This tracing session ID matches the requesting tracing session ID, so we know heapprofd // has verified it targets this process. - enabled_ = true; + enabled_ = + !is_oome_heap_ || (IsOomeHeapDumpAllowed(*args.config) && IsOomeDumpEnabled(*cfg.get())); } bool dump_smaps() { return dump_smaps_; } + + // Per-DataSource enable bit. Invoked by the ::Trace method. bool enabled() { return enabled_; } void OnStart(const StartArgs&) override { - if (!enabled()) { - return; - } art::MutexLock lk(art_thread(), GetStateMutex()); + // In case there are multiple tracing sessions waiting for an OOME error, + // there will be a data source instance for each of them. Before the + // transition to kStart and signaling the dumping thread, we need to make + // sure all the data sources are ready. + if (is_oome_heap_ && g_oome_sessions_pending > 0) { + --g_oome_sessions_pending; + } if (g_state == State::kWaitForStart) { - g_state = State::kStart; - GetStateCV().Broadcast(art_thread()); + // WriteHeapPackets is responsible for checking whether the DataSource is\ + // actually enabled. + if (!is_oome_heap_ || g_oome_sessions_pending == 0) { + g_state = State::kStart; + GetStateCV().Broadcast(art_thread()); + } } } @@ -232,10 +282,26 @@ class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> { } private: + static bool IsOomeDumpEnabled(const perfetto::protos::pbzero::JavaHprofConfig::Decoder& cfg) { + std::string cmdline; + if (!android::base::ReadFileToString("/proc/self/cmdline", &cmdline)) { + return false; + } + const char* argv0 = cmdline.c_str(); + + for (auto it = cfg.process_cmdline(); it; ++it) { + std::string pattern = (*it).ToStdString(); + if (fnmatch(pattern.c_str(), argv0, FNM_NOESCAPE) == 0) { + return true; + } + } + return false; + } + + bool is_oome_heap_ = false; bool enabled_ = false; bool dump_smaps_ = false; std::vector<std::string> ignored_types_; - static art::Thread* self_; art::Mutex finish_mutex_{"perfetto_hprof_ds_mutex", art::LockLevel::kGenericBottomLock}; bool is_finished_ = false; @@ -243,27 +309,40 @@ class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> { std::function<void()> async_stop_; }; -art::Thread* JavaHprofDataSource::self_ = nullptr; - - -void WaitForDataSource(art::Thread* self) { +void SetupDataSource(const std::string& ds_name, bool is_oome_heap) { perfetto::TracingInitArgs args; args.backends = perfetto::BackendType::kSystemBackend; perfetto::Tracing::Initialize(args); perfetto::DataSourceDescriptor dsd; - dsd.set_name("android.java_hprof"); + dsd.set_name(ds_name); dsd.set_will_notify_on_stop(true); - JavaHprofDataSource::Register(dsd); - - LOG(INFO) << "waiting for data source"; + JavaHprofDataSource::Register(dsd, is_oome_heap); + LOG(INFO) << "registered data source " << ds_name; +} +// Waits for the data source OnStart +void WaitForDataSource(art::Thread* self) { art::MutexLock lk(self, GetStateMutex()); while (g_state != State::kStart) { GetStateCV().Wait(self); } } +// Waits for the data source OnStart with a timeout. Returns false on timeout. +bool TimedWaitForDataSource(art::Thread* self, int64_t timeout_ms) { + const uint64_t cutoff_ns = GetCurrentBootClockNs() + timeout_ms * 1000000; + art::MutexLock lk(self, GetStateMutex()); + while (g_state != State::kStart) { + const uint64_t current_ns = GetCurrentBootClockNs(); + if (current_ns >= cutoff_ns) { + return false; + } + GetStateCV().TimedWait(self, (cutoff_ns - current_ns) / 1000000, 0); + } + return true; +} + // Helper class to write Java heap dumps to `ctx`. The whole heap dump can be // split into more perfetto.protos.HeapGraph messages, to avoid making each // message too big. @@ -831,10 +910,46 @@ class HeapGraphDumper { uint64_t prev_object_id_ = 0; }; -void DumpPerfetto(art::Thread* self) { - pid_t parent_pid = getpid(); - LOG(INFO) << "preparing to dump heap for " << parent_pid; +// waitpid with a timeout implemented by ~busy-waiting +// See b/181031512 for rationale. +void BusyWaitpid(pid_t pid, uint32_t timeout_ms) { + for (size_t i = 0;; ++i) { + if (i == timeout_ms) { + // The child hasn't exited. + // Give up and SIGKILL it. The next waitpid should succeed. + LOG(ERROR) << "perfetto_hprof child timed out. Sending SIGKILL."; + kill(pid, SIGKILL); + } + int stat_loc; + pid_t wait_result = waitpid(pid, &stat_loc, WNOHANG); + if (wait_result == -1 && errno != EINTR) { + if (errno != ECHILD) { + // This hopefully never happens (should only be EINVAL). + PLOG(FATAL_WITHOUT_ABORT) << "waitpid"; + } + // If we get ECHILD, the parent process was handling SIGCHLD, or did a wildcard wait. + // The child is no longer here either way, so that's good enough for us. + break; + } else if (wait_result > 0) { + break; + } else { // wait_result == 0 || errno == EINTR. + usleep(1000); + } + } +} + +enum class ResumeParentPolicy { + IMMEDIATELY, + DEFERRED +}; +void ForkAndRun( + art::Thread* self, + ResumeParentPolicy resume_parent_policy, + std::function<void(pid_t child)> parent_runnable, + std::function<void(pid_t parent, uint64_t timestamp)> child_runnable) { + pid_t parent_pid = getpid(); + LOG(INFO) << "forking for " << parent_pid; // Need to take a heap dump while GC isn't running. See the comment in // Heap::VisitObjects(). Also we need the critical section to avoid visiting // the same object twice. See b/34967844. @@ -859,41 +974,20 @@ void DumpPerfetto(art::Thread* self) { } if (pid != 0) { // Parent - // Stop the thread suspension as soon as possible to allow the rest of the application to - // continue while we waitpid here. - ssa.reset(); - gcs.reset(); - for (size_t i = 0;; ++i) { - if (i == 1000) { - // The child hasn't exited for 1 second (and all it was supposed to do was fork itself). - // Give up and SIGKILL it. The next waitpid should succeed. - LOG(ERROR) << "perfetto_hprof child timed out. Sending SIGKILL."; - kill(pid, SIGKILL); - } - // Busy waiting here will introduce some extra latency, but that is okay because we have - // already unsuspended all other threads. This runs on the perfetto_hprof_listener, which - // is not needed for progress of the app itself. - int stat_loc; - pid_t wait_result = waitpid(pid, &stat_loc, WNOHANG); - if (wait_result == -1 && errno != EINTR) { - if (errno != ECHILD) { - // This hopefully never happens (should only be EINVAL). - PLOG(FATAL_WITHOUT_ABORT) << "waitpid"; - } - // If we get ECHILD, the parent process was handling SIGCHLD, or did a wildcard wait. - // The child is no longer here either way, so that's good enough for us. - break; - } else if (wait_result > 0) { - break; - } else { // wait_result == 0 || errno == EINTR. - usleep(1000); - } + if (resume_parent_policy == ResumeParentPolicy::IMMEDIATELY) { + // Stop the thread suspension as soon as possible to allow the rest of the application to + // continue while we waitpid here. + ssa.reset(); + gcs.reset(); + } + parent_runnable(pid); + if (resume_parent_policy != ResumeParentPolicy::IMMEDIATELY) { + ssa.reset(); + gcs.reset(); } return; } - // The following code is only executed by the child of the original process. - // Uninstall signal handler, so we don't trigger a profile on it. if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) { close(g_signal_pipe_fds[0]); @@ -902,25 +996,14 @@ void DumpPerfetto(art::Thread* self) { return; } - // Daemon creates a new process that is the grand-child of the original process, and exits. - if (daemon(0, 0) == -1) { - PLOG(FATAL) << "daemon"; - } - - // The following code is only executed by the grand-child of the original process. - - // Make sure that this is the first thing we do after forking, so if anything - // below hangs, the fork will go away from the watchdog. - ArmWatchdogOrDie(); - - struct timespec ts = {}; - if (clock_gettime(CLOCK_BOOTTIME, &ts) != 0) { - LOG(FATAL) << "Failed to get boottime."; - } - uint64_t timestamp = ts.tv_sec * 1000000000LL + ts.tv_nsec; - - WaitForDataSource(self); + uint64_t ts = GetCurrentBootClockNs(); + child_runnable(parent_pid, ts); + // Prevent the `atexit` handlers from running. We do not want to call cleanup + // functions the parent process has registered. + art::FastExit(0); +} +void WriteHeapPackets(pid_t parent_pid, uint64_t timestamp) { JavaHprofDataSource::Trace( [parent_pid, timestamp](JavaHprofDataSource::TraceContext ctx) NO_THREAD_SAFETY_ANALYSIS { @@ -968,11 +1051,101 @@ void DumpPerfetto(art::Thread* self) { } } }); +} - LOG(INFO) << "finished dumping heap for " << parent_pid; - // Prevent the `atexit` handlers from running. We do not want to call cleanup - // functions the parent process has registered. - art::FastExit(0); +void DumpPerfetto(art::Thread* self) { + ForkAndRun( + self, + ResumeParentPolicy::IMMEDIATELY, + // parent thread + [](pid_t child) { + // Busy waiting here will introduce some extra latency, but that is okay because we have + // already unsuspended all other threads. This runs on the perfetto_hprof_listener, which + // is not needed for progress of the app itself. + // We daemonize the child process, so effectively we only need to wait + // for it to fork and exit. + BusyWaitpid(child, 1000); + }, + // child thread + [self](pid_t dumped_pid, uint64_t timestamp) { + // Daemon creates a new process that is the grand-child of the original process, and exits. + if (daemon(0, 0) == -1) { + PLOG(FATAL) << "daemon"; + } + // The following code is only executed by the grand-child of the original process. + + // Make sure that this is the first thing we do after forking, so if anything + // below hangs, the fork will go away from the watchdog. + ArmWatchdogOrDie(); + SetupDataSource("android.java_hprof", false); + WaitForDataSource(self); + WriteHeapPackets(dumped_pid, timestamp); + LOG(INFO) << "finished dumping heap for " << dumped_pid; + }); +} + +void DumpPerfettoOutOfMemory() REQUIRES_SHARED(art::Locks::mutator_lock_) { + art::Thread* self = art::Thread::Current(); + if (!self) { + LOG(FATAL_WITHOUT_ABORT) << "no thread in DumpPerfettoOutOfMemory"; + return; + } + + // Ensure that there is an active, armed tracing session + uint32_t session_cnt = + android::base::GetUintProperty<uint32_t>("traced.oome_heap_session.count", 0); + if (session_cnt == 0) { + return; + } + { + // OutOfMemoryErrors are reentrant, make sure we do not fork and process + // more than once. + art::MutexLock lk(self, GetStateMutex()); + if (g_oome_triggered) { + return; + } + g_oome_triggered = true; + g_oome_sessions_pending = session_cnt; + } + + art::ScopedThreadSuspension sts(self, art::ThreadState::kSuspended); + // If we fork & resume the original process execution it will most likely exit + // ~immediately due to the OOME error thrown. When the system detects that + // that, it will cleanup by killing all processes in the cgroup (including + // the process we just forked). + // We need to avoid the race between the heap dump and the process group + // cleanup, and the only way to do this is to avoid resuming the original + // process until the heap dump is complete. + // Given we are already about to crash anyway, the diagnostic data we get + // outweighs the cost of introducing some latency. + ForkAndRun( + self, + ResumeParentPolicy::DEFERRED, + // parent process + [](pid_t child) { + // waitpid to reap the zombie + // we are explicitly waiting for the child to exit + // The reason for the timeout on top of the watchdog is that it is + // possible (albeit unlikely) that even the watchdog will fail to be + // activated in the case of an atfork handler. + BusyWaitpid(child, kWatchdogTimeoutSec * 1000); + }, + // child process + [self](pid_t dumped_pid, uint64_t timestamp) { + ArmWatchdogOrDie(); + art::ScopedTrace trace("perfetto_hprof oome"); + SetupDataSource("android.java_hprof.oom", true); + perfetto::Tracing::ActivateTriggers({"com.android.telemetry.art-outofmemory"}, 500); + + // A pre-armed tracing session might not exist, so we should wait for a + // limited amount of time before we decide to let the execution continue. + if (!TimedWaitForDataSource(self, 1000)) { + LOG(INFO) << "OOME hprof timeout (state " << g_state << ")"; + return; + } + WriteHeapPackets(dumped_pid, timestamp); + LOG(INFO) << "OOME hprof complete for " << dumped_pid; + }); } // The plugin initialization function. @@ -1062,10 +1235,15 @@ extern "C" bool ArtPlugin_Initialize() { }); th.detach(); + // Register the OOM error handler. + art::Runtime::Current()->SetOutOfMemoryErrorHook(perfetto_hprof::DumpPerfettoOutOfMemory); + return true; } extern "C" bool ArtPlugin_Deinitialize() { + art::Runtime::Current()->SetOutOfMemoryErrorHook(nullptr); + if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) { PLOG(ERROR) << "failed to reset signal handler"; // We cannot close the pipe if the signal handler wasn't unregistered, diff --git a/runtime/Android.bp b/runtime/Android.bp index bbc625e09c..f21d199241 100644 --- a/runtime/Android.bp +++ b/runtime/Android.bp @@ -419,7 +419,9 @@ libart_cc_defaults { ], generated_sources: [ "apex-info-list-tinyxml", + "art-apex-cache-info", ], + tidy_disabled_srcs: [":art-apex-cache-info"], }, android_arm: { ldflags: JIT_DEBUG_REGISTER_CODE_LDFLAGS, diff --git a/runtime/base/gc_visited_arena_pool.cc b/runtime/base/gc_visited_arena_pool.cc index 6bf52ce438..52b3829401 100644 --- a/runtime/base/gc_visited_arena_pool.cc +++ b/runtime/base/gc_visited_arena_pool.cc @@ -273,6 +273,7 @@ void GcVisitedArenaPool::FreeArenaChain(Arena* first) { } std::lock_guard<std::mutex> lock(lock_); + arenas_freed_ = true; while (first != nullptr) { FreeRangeLocked(first->Begin(), first->Size()); // In other implementations of ArenaPool this is calculated when asked for, diff --git a/runtime/base/gc_visited_arena_pool.h b/runtime/base/gc_visited_arena_pool.h index 4f176ef3df..e307147c9e 100644 --- a/runtime/base/gc_visited_arena_pool.h +++ b/runtime/base/gc_visited_arena_pool.h @@ -142,6 +142,29 @@ class GcVisitedArenaPool final : public ArenaPool { pre_zygote_fork_ = false; } + // For userfaultfd GC to be able to acquire the lock to avoid concurrent + // release of arenas when it is visiting them. + std::mutex& GetLock() { return lock_; } + + // Find the given arena in allocated_arenas_. The function is called with + // lock_ acquired. + bool FindAllocatedArena(const TrackedArena* arena) const NO_THREAD_SAFETY_ANALYSIS { + for (auto& allocated_arena : allocated_arenas_) { + if (arena == &allocated_arena) { + return true; + } + } + return false; + } + + void ClearArenasFreed() { + std::lock_guard<std::mutex> lock(lock_); + arenas_freed_ = false; + } + + // The function is called with lock_ acquired. + bool AreArenasFreed() const NO_THREAD_SAFETY_ANALYSIS { return arenas_freed_; } + private: void FreeRangeLocked(uint8_t* range_begin, size_t range_size) REQUIRES(lock_); // Add a map (to be visited by userfaultfd) to the pool of at least min_size @@ -194,6 +217,11 @@ class GcVisitedArenaPool final : public ArenaPool { // Number of bytes allocated so far. size_t bytes_allocated_ GUARDED_BY(lock_); const char* name_; + // Flag to indicate that some arenas have been freed. This flag is used as an + // optimization by GC to know if it needs to find if the arena being visited + // has been freed or not. The flag is cleared in the compaction pause and read + // when linear-alloc space is concurrently visited updated to update GC roots. + bool arenas_freed_ GUARDED_BY(lock_); const bool low_4gb_; // Set to true in zygote process so that all linear-alloc allocations are in // private-anonymous mappings and not on userfaultfd visited pages. At diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 444cc6307e..4ff97a0191 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -2151,8 +2151,7 @@ void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) { // Don't visit class-loaders if compacting with userfaultfd GC as these // weaks are updated using Runtime::SweepSystemWeaks() and the GC doesn't // tolerate double updates. - if (!gUseUserfaultfd - || !heap->MarkCompactCollector()->IsCompacting(self)) { + if (!heap->IsPerformingUffdCompaction()) { for (const ClassLoaderData& data : class_loaders_) { GcRoot<mirror::Object> root(GcRoot<mirror::Object>(self->DecodeJObject(data.weak_root))); root.VisitRoot(visitor, RootInfo(kRootVMInternal)); diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc index c6940fa17b..35b71fd829 100644 --- a/runtime/fault_handler.cc +++ b/runtime/fault_handler.cc @@ -20,11 +20,14 @@ #include <sys/mman.h> #include <sys/ucontext.h> +#include <atomic> + #include "art_method-inl.h" #include "base/logging.h" // For VLOG #include "base/safe_copy.h" #include "base/stl_util.h" #include "dex/dex_file_types.h" +#include "gc/heap.h" #include "gc/space/bump_pointer_space.h" #include "jit/jit.h" #include "jit/jit_code_cache.h" @@ -48,8 +51,13 @@ extern "C" NO_INLINE __attribute__((visibility("default"))) void art_sigsegv_fau } // Signal handler called on SIGSEGV. -static bool art_fault_handler(int sig, siginfo_t* info, void* context) { - return fault_manager.HandleFault(sig, info, context); +static bool art_sigsegv_handler(int sig, siginfo_t* info, void* context) { + return fault_manager.HandleSigsegvFault(sig, info, context); +} + +// Signal handler called on SIGBUS. +static bool art_sigbus_handler(int sig, siginfo_t* info, void* context) { + return fault_manager.HandleSigbusFault(sig, info, context); } #if defined(__linux__) @@ -148,36 +156,94 @@ static bool SafeVerifyClassClass(mirror::Class* cls) REQUIRES_SHARED(Locks::muta #endif -FaultManager::FaultManager() : initialized_(false) { - sigaction(SIGSEGV, nullptr, &oldaction_); +FaultManager::FaultManager() : initialized_(false) {} + +FaultManager::~FaultManager() {} + +static const char* SignalCodeName(int sig, int code) { + if (sig == SIGSEGV) { + switch (code) { + case SEGV_MAPERR: return "SEGV_MAPERR"; + case SEGV_ACCERR: return "SEGV_ACCERR"; + case 8: return "SEGV_MTEAERR"; + case 9: return "SEGV_MTESERR"; + default: return "SEGV_UNKNOWN"; + } + } else if (sig == SIGBUS) { + switch (code) { + case BUS_ADRALN: return "BUS_ADRALN"; + case BUS_ADRERR: return "BUS_ADRERR"; + case BUS_OBJERR: return "BUS_OBJERR"; + default: return "BUS_UNKNOWN"; + } + } else { + return "UNKNOWN"; + } } -FaultManager::~FaultManager() { +static std::ostream& PrintSignalInfo(std::ostream& os, siginfo_t* info) { + os << " si_signo: " << info->si_signo << " (" << strsignal(info->si_signo) << ")\n" + << " si_code: " << info->si_code + << " (" << SignalCodeName(info->si_signo, info->si_code) << ")"; + if (info->si_signo == SIGSEGV || info->si_signo == SIGBUS) { + os << "\n" << " si_addr: " << info->si_addr; + } + return os; } -void FaultManager::Init() { +static bool InstallSigbusHandler() { + return gUseUserfaultfd && + Runtime::Current()->GetHeap()->MarkCompactCollector()->IsUsingSigbusFeature(); +} + +void FaultManager::Init(bool use_sig_chain) { CHECK(!initialized_); - sigset_t mask; - sigfillset(&mask); - sigdelset(&mask, SIGABRT); - sigdelset(&mask, SIGBUS); - sigdelset(&mask, SIGFPE); - sigdelset(&mask, SIGILL); - sigdelset(&mask, SIGSEGV); - - SigchainAction sa = { - .sc_sigaction = art_fault_handler, - .sc_mask = mask, - .sc_flags = 0UL, - }; - - AddSpecialSignalHandlerFn(SIGSEGV, &sa); - initialized_ = true; + if (use_sig_chain) { + sigset_t mask; + sigfillset(&mask); + sigdelset(&mask, SIGABRT); + sigdelset(&mask, SIGBUS); + sigdelset(&mask, SIGFPE); + sigdelset(&mask, SIGILL); + sigdelset(&mask, SIGSEGV); + + SigchainAction sa = { + .sc_sigaction = art_sigsegv_handler, + .sc_mask = mask, + .sc_flags = 0UL, + }; + + AddSpecialSignalHandlerFn(SIGSEGV, &sa); + if (InstallSigbusHandler()) { + sa.sc_sigaction = art_sigbus_handler; + AddSpecialSignalHandlerFn(SIGBUS, &sa); + } + initialized_ = true; + } else if (InstallSigbusHandler()) { + struct sigaction act; + std::memset(&act, '\0', sizeof(act)); + act.sa_flags = SA_SIGINFO | SA_RESTART; + act.sa_sigaction = [](int sig, siginfo_t* info, void* context) { + if (!art_sigbus_handler(sig, info, context)) { + std::ostringstream oss; + PrintSignalInfo(oss, info); + LOG(FATAL) << "Couldn't handle SIGBUS fault:" + << "\n" + << oss.str(); + } + }; + if (sigaction(SIGBUS, &act, nullptr)) { + LOG(FATAL) << "Fault handler for SIGBUS couldn't be setup: " << strerror(errno); + } + } } void FaultManager::Release() { if (initialized_) { - RemoveSpecialSignalHandlerFn(SIGSEGV, art_fault_handler); + RemoveSpecialSignalHandlerFn(SIGSEGV, art_sigsegv_handler); + if (InstallSigbusHandler()) { + RemoveSpecialSignalHandlerFn(SIGBUS, art_sigbus_handler); + } initialized_ = false; } } @@ -210,32 +276,22 @@ bool FaultManager::HandleFaultByOtherHandlers(int sig, siginfo_t* info, void* co return false; } -static const char* SignalCodeName(int sig, int code) { - if (sig != SIGSEGV) { - return "UNKNOWN"; - } else { - switch (code) { - case SEGV_MAPERR: return "SEGV_MAPERR"; - case SEGV_ACCERR: return "SEGV_ACCERR"; - case 8: return "SEGV_MTEAERR"; - case 9: return "SEGV_MTESERR"; - default: return "UNKNOWN"; - } - } -} -static std::ostream& PrintSignalInfo(std::ostream& os, siginfo_t* info) { - os << " si_signo: " << info->si_signo << " (" << strsignal(info->si_signo) << ")\n" - << " si_code: " << info->si_code - << " (" << SignalCodeName(info->si_signo, info->si_code) << ")"; - if (info->si_signo == SIGSEGV) { - os << "\n" << " si_addr: " << info->si_addr; +bool FaultManager::HandleSigbusFault(int sig, siginfo_t* info, void* context ATTRIBUTE_UNUSED) { + DCHECK_EQ(sig, SIGBUS); + if (VLOG_IS_ON(signals)) { + PrintSignalInfo(VLOG_STREAM(signals) << "Handling SIGBUS fault:\n", info); } - return os; + +#ifdef TEST_NESTED_SIGNAL + // Simulate a crash in a handler. + raise(SIGBUS); +#endif + return Runtime::Current()->GetHeap()->MarkCompactCollector()->SigbusHandler(info); } -bool FaultManager::HandleFault(int sig, siginfo_t* info, void* context) { +bool FaultManager::HandleSigsegvFault(int sig, siginfo_t* info, void* context) { if (VLOG_IS_ON(signals)) { - PrintSignalInfo(VLOG_STREAM(signals) << "Handling fault:" << "\n", info); + PrintSignalInfo(VLOG_STREAM(signals) << "Handling SIGSEGV fault:\n", info); } #ifdef TEST_NESTED_SIGNAL diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h index 8b89c22a0f..6ffdbaba61 100644 --- a/runtime/fault_handler.h +++ b/runtime/fault_handler.h @@ -36,7 +36,9 @@ class FaultManager { FaultManager(); ~FaultManager(); - void Init(); + // Use libsigchain if use_sig_chain is true. Otherwise, setup SIGBUS directly + // using sigaction(). + void Init(bool use_sig_chain); // Unclaim signals. void Release(); @@ -44,8 +46,11 @@ class FaultManager { // Unclaim signals and delete registered handlers. void Shutdown(); - // Try to handle a fault, returns true if successful. - bool HandleFault(int sig, siginfo_t* info, void* context); + // Try to handle a SIGSEGV fault, returns true if successful. + bool HandleSigsegvFault(int sig, siginfo_t* info, void* context); + + // Try to handle a SIGBUS fault, returns true if successful. + bool HandleSigbusFault(int sig, siginfo_t* info, void* context); // Added handlers are owned by the fault handler and will be freed on Shutdown(). void AddHandler(FaultHandler* handler, bool generated_code); @@ -72,7 +77,6 @@ class FaultManager { std::vector<FaultHandler*> generated_code_handlers_; std::vector<FaultHandler*> other_handlers_; - struct sigaction oldaction_; bool initialized_; DISALLOW_COPY_AND_ASSIGN(FaultManager); }; diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc index 9586e9d70a..f0d379fde6 100644 --- a/runtime/gc/allocation_record.cc +++ b/runtime/gc/allocation_record.cc @@ -59,11 +59,9 @@ AllocRecordObjectMap::~AllocRecordObjectMap() { } void AllocRecordObjectMap::VisitRoots(RootVisitor* visitor) { - gc::Heap* const heap = Runtime::Current()->GetHeap(); // When we are compacting in userfaultfd GC, the class GC-roots are already // updated in SweepAllocationRecords()->SweepClassObject(). - if (heap->CurrentCollectorType() == gc::CollectorType::kCollectorTypeCMC - && heap->MarkCompactCollector()->IsCompacting(Thread::Current())) { + if (Runtime::Current()->GetHeap()->IsPerformingUffdCompaction()) { return; } CHECK_LE(recent_record_max_, alloc_record_max_); diff --git a/runtime/gc/collector/mark_compact-inl.h b/runtime/gc/collector/mark_compact-inl.h index 57517d54fc..c9b792e8f6 100644 --- a/runtime/gc/collector/mark_compact-inl.h +++ b/runtime/gc/collector/mark_compact-inl.h @@ -270,12 +270,13 @@ inline bool MarkCompact::VerifyRootSingleUpdate(void* root, if (!live_words_bitmap_->HasAddress(old_ref)) { return false; } + Thread* self = Thread::Current(); if (UNLIKELY(stack_low_addr == nullptr)) { - Thread* self = Thread::Current(); stack_low_addr = self->GetStackEnd(); stack_high_addr = reinterpret_cast<char*>(stack_low_addr) + self->GetStackSize(); } if (root < stack_low_addr || root > stack_high_addr) { + MutexLock mu(self, lock_); auto ret = updated_roots_->insert(root); DCHECK(ret.second) << "root=" << root << " old_ref=" << old_ref << " stack_low_addr=" << stack_low_addr diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index 8aba47fb3f..b61bc0e327 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -31,7 +31,9 @@ #include <numeric> #include "android-base/file.h" +#include "android-base/parsebool.h" #include "android-base/properties.h" +#include "base/file_utils.h" #include "base/memfd.h" #include "base/quasi_atomic.h" #include "base/systrace.h" @@ -50,6 +52,10 @@ #include "sigchain.h" #include "thread_list.h" +#ifdef ART_TARGET_ANDROID +#include "com_android_art.h" +#endif + #ifndef __BIONIC__ #ifndef MREMAP_DONTUNMAP #define MREMAP_DONTUNMAP 4 @@ -75,8 +81,10 @@ namespace { using ::android::base::GetBoolProperty; +using ::android::base::ParseBool; +using ::android::base::ParseBoolResult; -} +} // namespace namespace art { @@ -100,8 +108,19 @@ static uint64_t gUffdFeatures = 0; // Both, missing and minor faults on shmem are needed only for minor-fault mode. static constexpr uint64_t kUffdFeaturesForMinorFault = UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM; - -static bool KernelSupportsUffd() { +static constexpr uint64_t kUffdFeaturesForSigbus = UFFD_FEATURE_SIGBUS; +// We consider SIGBUS feature necessary to enable this GC as it's superior than +// threading-based implementation for janks. However, since we have the latter +// already implemented, for testing purposes, we allow choosing either of the +// two at boot time in the constructor below. +// Note that having minor-fault feature implies having SIGBUS feature as the +// latter was introduced earlier than the former. In other words, having +// minor-fault feature implies having SIGBUS. We still want minor-fault to be +// available for making jit-code-cache updation concurrent, which uses shmem. +static constexpr uint64_t kUffdFeaturesRequired = + kUffdFeaturesForMinorFault | kUffdFeaturesForSigbus; + +bool KernelSupportsUffd() { #ifdef __linux__ if (gHaveMremapDontunmap) { int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY); @@ -118,8 +137,8 @@ static bool KernelSupportsUffd() { CHECK_EQ(ioctl(fd, UFFDIO_API, &api), 0) << "ioctl_userfaultfd : API:" << strerror(errno); gUffdFeatures = api.features; close(fd); - // Allow this GC to be used only if minor-fault feature is available. - return (api.features & kUffdFeaturesForMinorFault) == kUffdFeaturesForMinorFault; + // Allow this GC to be used only if minor-fault and sigbus feature is available. + return (api.features & kUffdFeaturesRequired) == kUffdFeaturesRequired; } } #endif @@ -142,10 +161,48 @@ static gc::CollectorType FetchCmdlineGcType() { return gc_type; } +#ifdef ART_TARGET_ANDROID +static bool GetCachedBoolProperty(const std::string& key, bool default_value) { + std::string path = GetApexDataDalvikCacheDirectory(InstructionSet::kNone) + "/cache-info.xml"; + std::optional<com::android::art::CacheInfo> cache_info = com::android::art::read(path.c_str()); + if (!cache_info.has_value()) { + // We are in chroot or in a standalone runtime process (e.g., IncidentHelper), or + // odsign/odrefresh failed to generate and sign the cache info. There's nothing we can do. + return default_value; + } + const com::android::art::KeyValuePairList* list = cache_info->getFirstSystemProperties(); + if (list == nullptr) { + // This should never happen. + LOG(ERROR) << "Missing system properties from cache-info."; + return default_value; + } + const std::vector<com::android::art::KeyValuePair>& properties = list->getItem(); + for (const com::android::art::KeyValuePair& pair : properties) { + if (pair.getK() == key) { + ParseBoolResult result = ParseBool(pair.getV()); + switch (result) { + case ParseBoolResult::kTrue: + return true; + case ParseBoolResult::kFalse: + return false; + case ParseBoolResult::kError: + return default_value; + } + } + } + return default_value; +} + static bool SysPropSaysUffdGc() { - return GetBoolProperty("persist.device_config.runtime_native_boot.enable_uffd_gc", - GetBoolProperty("ro.dalvik.vm.enable_uffd_gc", false)); + // The phenotype flag can change at time time after boot, but it shouldn't take effect until a + // reboot. Therefore, we read the phenotype flag from the cache info, which is generated on boot. + return GetCachedBoolProperty("persist.device_config.runtime_native_boot.enable_uffd_gc", + GetBoolProperty("ro.dalvik.vm.enable_uffd_gc", false)); } +#else +// Never called. +static bool SysPropSaysUffdGc() { return false; } +#endif static bool ShouldUseUserfaultfd() { static_assert(kUseBakerReadBarrier || kUseTableLookupReadBarrier); @@ -177,6 +234,12 @@ static constexpr bool kCheckLocks = kDebugLocking; static constexpr bool kVerifyRootsMarked = kIsDebugBuild; // Two threads should suffice on devices. static constexpr size_t kMaxNumUffdWorkers = 2; +// Number of compaction buffers reserved for mutator threads in SIGBUS feature +// case. It's extremely unlikely that we will ever have more than these number +// of mutator threads trying to access the moving-space during one compaction +// phase. Using a lower number in debug builds to hopefully catch the issue +// before it becomes a problem on user builds. +static constexpr size_t kMutatorCompactionBufferCount = kIsDebugBuild ? 256 : 512; // Minimum from-space chunk to be madvised (during concurrent compaction) in one go. static constexpr ssize_t kMinFromSpaceMadviseSize = 1 * MB; // Concurrent compaction termination logic is different (and slightly more efficient) if the @@ -222,8 +285,8 @@ bool MarkCompact::CreateUserfaultfd(bool post_fork) { } else { DCHECK(IsValidFd(uffd_)); // Initialize uffd with the features which are required and available. - struct uffdio_api api = { - .api = UFFD_API, .features = gUffdFeatures & kUffdFeaturesForMinorFault, .ioctls = 0}; + struct uffdio_api api = {.api = UFFD_API, .features = gUffdFeatures, .ioctls = 0}; + api.features &= use_uffd_sigbus_ ? kUffdFeaturesRequired : kUffdFeaturesForMinorFault; CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0) << "ioctl_userfaultfd: API: " << strerror(errno); } } @@ -238,25 +301,41 @@ MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignmen MemRangeBitmap::Create("Concurrent Mark Compact live words bitmap", begin, end)); } +static bool IsSigbusFeatureAvailable() { + MarkCompact::GetUffdAndMinorFault(); + return gUffdFeatures & UFFD_FEATURE_SIGBUS; +} + MarkCompact::MarkCompact(Heap* heap) : GarbageCollector(heap, "concurrent mark compact"), gc_barrier_(0), - mark_stack_lock_("mark compact mark stack lock", kMarkSweepMarkStackLock), + lock_("mark compact lock", kGenericBottomLock), bump_pointer_space_(heap->GetBumpPointerSpace()), moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()), moving_to_space_fd_(kFdUnused), moving_from_space_fd_(kFdUnused), uffd_(kFdUnused), - thread_pool_counter_(0), + sigbus_in_progress_count_(kSigbusCounterCompactionDoneMask), compaction_in_progress_count_(0), + thread_pool_counter_(0), compacting_(false), uffd_initialized_(false), uffd_minor_fault_supported_(false), + use_uffd_sigbus_(IsSigbusFeatureAvailable()), minor_fault_initialized_(false), map_linear_alloc_shared_(false) { if (kIsDebugBuild) { updated_roots_.reset(new std::unordered_set<void*>()); } + // TODO: When using minor-fault feature, the first GC after zygote-fork + // requires mapping the linear-alloc again with MAP_SHARED. This leaves a + // gap for suspended threads to access linear-alloc when it's empty (after + // mremap) and not yet userfaultfd registered. This cannot be fixed by merely + // doing uffd registration first. For now, just assert that we are not using + // minor-fault. Eventually, a cleanup of linear-alloc update logic to only + // use private anonymous would be ideal. + CHECK(!uffd_minor_fault_supported_); + // TODO: Depending on how the bump-pointer space move is implemented. If we // switch between two virtual memories each time, then we will have to // initialize live_words_bitmap_ accordingly. @@ -337,7 +416,9 @@ MarkCompact::MarkCompact(Heap* heap) LOG(WARNING) << "Failed to allocate concurrent mark-compact moving-space shadow: " << err_msg; } } - const size_t num_pages = 1 + std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers); + const size_t num_pages = + 1 + (use_uffd_sigbus_ ? kMutatorCompactionBufferCount : + std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers)); compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers", kPageSize * num_pages, PROT_READ | PROT_WRITE, @@ -350,7 +431,8 @@ MarkCompact::MarkCompact(Heap* heap) conc_compaction_termination_page_ = compaction_buffers_map_.Begin(); // Touch the page deliberately to avoid userfaults on it. We madvise it in // CompactionPhase() before using it to terminate concurrent compaction. - CHECK_EQ(*conc_compaction_termination_page_, 0); + ForceRead(conc_compaction_termination_page_); + // In most of the cases, we don't expect more than one LinearAlloc space. linear_alloc_spaces_data_.reserve(1); @@ -496,19 +578,56 @@ void MarkCompact::InitializePhase() { moving_first_objs_count_ = 0; non_moving_first_objs_count_ = 0; black_page_count_ = 0; + bytes_scanned_ = 0; freed_objects_ = 0; + // The first buffer is used by gc-thread. + compaction_buffer_counter_ = 1; from_space_slide_diff_ = from_space_begin_ - bump_pointer_space_->Begin(); black_allocations_begin_ = bump_pointer_space_->Limit(); walk_super_class_cache_ = nullptr; - compacting_ = false; // TODO: Would it suffice to read it once in the constructor, which is called // in zygote process? pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); } +class MarkCompact::ThreadFlipVisitor : public Closure { + public: + explicit ThreadFlipVisitor(MarkCompact* collector) : collector_(collector) {} + + void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) { + // Note: self is not necessarily equal to thread since thread may be suspended. + Thread* self = Thread::Current(); + CHECK(thread == self || thread->GetState() != ThreadState::kRunnable) + << thread->GetState() << " thread " << thread << " self " << self; + thread->VisitRoots(collector_, kVisitRootFlagAllRoots); + // Interpreter cache is thread-local so it needs to be swept either in a + // flip, or a stop-the-world pause. + CHECK(collector_->compacting_); + thread->SweepInterpreterCache(collector_); + thread->AdjustTlab(collector_->black_objs_slide_diff_); + collector_->GetBarrier().Pass(self); + } + + private: + MarkCompact* const collector_; +}; + +class MarkCompact::FlipCallback : public Closure { + public: + explicit FlipCallback(MarkCompact* collector) : collector_(collector) {} + + void Run(Thread* thread ATTRIBUTE_UNUSED) override REQUIRES(Locks::mutator_lock_) { + collector_->CompactionPause(); + } + + private: + MarkCompact* const collector_; +}; + void MarkCompact::RunPhases() { Thread* self = Thread::Current(); thread_running_gc_ = self; + Runtime* runtime = Runtime::Current(); InitializePhase(); GetHeap()->PreGcVerification(this); { @@ -516,6 +635,7 @@ void MarkCompact::RunPhases() { MarkingPhase(); } { + // Marking pause ScopedPause pause(this); MarkingPause(); if (kIsDebugBuild) { @@ -531,16 +651,21 @@ void MarkCompact::RunPhases() { ReclaimPhase(); PrepareForCompaction(); } - if (uffd_ != kFallbackMode) { + if (uffd_ != kFallbackMode && !use_uffd_sigbus_) { heap_->GetThreadPool()->WaitForWorkersToBeCreated(); } + { - heap_->ThreadFlipBegin(self); + // Compaction pause + gc_barrier_.Init(self, 0); + ThreadFlipVisitor visitor(this); + FlipCallback callback(this); + size_t barrier_count = runtime->GetThreadList()->FlipThreadRoots( + &visitor, &callback, this, GetHeap()->GetGcPauseListener()); { - ScopedPause pause(this); - PreCompactionPhase(); + ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun); + gc_barrier_.Increment(self, barrier_count); } - heap_->ThreadFlipEnd(self); } if (IsValidFd(uffd_)) { @@ -801,14 +926,15 @@ void MarkCompact::PrepareForCompaction() { bool is_zygote = Runtime::Current()->IsZygote(); if (!uffd_initialized_ && CreateUserfaultfd(/*post_fork*/false)) { - // Register the buffer that we use for terminating concurrent compaction - struct uffdio_register uffd_register; - uffd_register.range.start = reinterpret_cast<uintptr_t>(conc_compaction_termination_page_); - uffd_register.range.len = kPageSize; - uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; - CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0) + if (!use_uffd_sigbus_) { + // Register the buffer that we use for terminating concurrent compaction + struct uffdio_register uffd_register; + uffd_register.range.start = reinterpret_cast<uintptr_t>(conc_compaction_termination_page_); + uffd_register.range.len = kPageSize; + uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; + CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0) << "ioctl_userfaultfd: register compaction termination page: " << strerror(errno); - + } if (!uffd_minor_fault_supported_ && shadow_to_space_map_.IsValid()) { // A valid shadow-map for moving space is only possible if we // were able to map it in the constructor. That also means that its size @@ -823,20 +949,21 @@ void MarkCompact::PrepareForCompaction() { // and get rid of it when finished. This is expected to happen rarely as // zygote spends most of the time in native fork loop. if (uffd_ != kFallbackMode) { - ThreadPool* pool = heap_->GetThreadPool(); - if (UNLIKELY(pool == nullptr)) { - // On devices with 2 cores, GetParallelGCThreadCount() will return 1, - // which is desired number of workers on such devices. - heap_->CreateThreadPool(std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers)); - pool = heap_->GetThreadPool(); - } - size_t num_threads = pool->GetThreadCount(); - thread_pool_counter_ = num_threads; - for (size_t i = 0; i < num_threads; i++) { - pool->AddTask(thread_running_gc_, new ConcurrentCompactionGcTask(this, i + 1)); + if (!use_uffd_sigbus_) { + ThreadPool* pool = heap_->GetThreadPool(); + if (UNLIKELY(pool == nullptr)) { + // On devices with 2 cores, GetParallelGCThreadCount() will return 1, + // which is desired number of workers on such devices. + heap_->CreateThreadPool(std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers)); + pool = heap_->GetThreadPool(); + } + size_t num_threads = pool->GetThreadCount(); + thread_pool_counter_ = num_threads; + for (size_t i = 0; i < num_threads; i++) { + pool->AddTask(thread_running_gc_, new ConcurrentCompactionGcTask(this, i + 1)); + } + CHECK_EQ(pool->GetTaskCount(thread_running_gc_), num_threads); } - CHECK_EQ(pool->GetTaskCount(thread_running_gc_), num_threads); - /* * Possible scenarios for mappings: * A) All zygote GCs (or if minor-fault feature isn't available): uses @@ -1056,6 +1183,7 @@ void MarkCompact::MarkingPause() { std::list<Thread*> thread_list = runtime->GetThreadList()->GetList(); for (Thread* thread : thread_list) { thread->VisitRoots(this, static_cast<VisitRootFlags>(0)); + DCHECK_EQ(thread->GetThreadLocalGcBuffer(), nullptr); // Need to revoke all the thread-local allocation stacks since we will // swap the allocation stacks (below) and don't want anybody to allocate // into the live stack. @@ -1063,6 +1191,18 @@ void MarkCompact::MarkingPause() { bump_pointer_space_->RevokeThreadLocalBuffers(thread); } } + // Fetch only the accumulated objects-allocated count as it is guaranteed to + // be up-to-date after the TLAB revocation above. + freed_objects_ += bump_pointer_space_->GetAccumulatedObjectsAllocated(); + // Capture 'end' of moving-space at this point. Every allocation beyond this + // point will be considered as black. + // Align-up to page boundary so that black allocations happen from next page + // onwards. Also, it ensures that 'end' is aligned for card-table's + // ClearCardRange(). + black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, kPageSize); + DCHECK(IsAligned<kAlignment>(black_allocations_begin_)); + black_allocations_begin_ = AlignUp(black_allocations_begin_, kPageSize); + // Re-mark root set. Doesn't include thread-roots as they are already marked // above. ReMarkRoots(runtime); @@ -1074,9 +1214,6 @@ void MarkCompact::MarkingPause() { live_stack_freeze_size_ = heap_->GetLiveStack()->Size(); } } - // Fetch only the accumulated objects-allocated count as it is guaranteed to - // be up-to-date after the TLAB revocation above. - freed_objects_ += bump_pointer_space_->GetAccumulatedObjectsAllocated(); // TODO: For PreSweepingGcVerification(), find correct strategy to visit/walk // objects in bump-pointer space when we have a mark-bitmap to indicate live // objects. At the same time we also need to be able to visit black allocations, @@ -1096,14 +1233,6 @@ void MarkCompact::MarkingPause() { // Enable the reference processing slow path, needs to be done with mutators // paused since there is no lock in the GetReferent fast path. heap_->GetReferenceProcessor()->EnableSlowPath(); - - // Capture 'end' of moving-space at this point. Every allocation beyond this - // point will be considered as black. - // Align-up to page boundary so that black allocations happen from next page - // onwards. - black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, kPageSize); - DCHECK(IsAligned<kAlignment>(black_allocations_begin_)); - black_allocations_begin_ = AlignUp(black_allocations_begin_, kPageSize); } void MarkCompact::SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) { @@ -1152,6 +1281,24 @@ void MarkCompact::SweepLargeObjects(bool swap_bitmaps) { } } +class MarkCompact::CheckpointSweepInterpreterCache : public Closure { + public: + explicit CheckpointSweepInterpreterCache(MarkCompact* collector) : collector_(collector) {} + + void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) { + Thread* const self = Thread::Current(); + CHECK(thread == self + || thread->IsSuspended() + || thread->GetState() == ThreadState::kWaitingPerformingGc) + << thread->GetState() << " thread " << thread << " self " << self; + thread->SweepInterpreterCache(collector_); + collector_->GetBarrier().Pass(self); + } + + private: + MarkCompact* collector_; +}; + void MarkCompact::ReclaimPhase() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); DCHECK(thread_running_gc_ == Thread::Current()); @@ -1176,6 +1323,24 @@ void MarkCompact::ReclaimPhase() { // Unbind the live and mark bitmaps. GetHeap()->UnBindBitmaps(); } + { + // TODO: Once the logic in Runtime::ProcessWeakClass() is streamlined to not + // check for class-loader's liveness, we can remove this as the Sweep during + // compaction pause would suffice. + CHECK(!compacting_); + CheckpointSweepInterpreterCache check_point(this); + gc_barrier_.Init(thread_running_gc_, 0); + size_t barrier_count = runtime->GetThreadList()->RunCheckpoint(&check_point); + // Release locks, then wait for all mutator threads to pass the barrier. If there are + // no threads to wait for, which implies that all the checkpoint functions are finished, + // then no need to release locks. + if (barrier_count != 0) { + Locks::mutator_lock_->SharedUnlock(thread_running_gc_); + ScopedThreadStateChange tsc(thread_running_gc_, ThreadState::kWaitingForCheckPointsToRun); + gc_barrier_.Increment(thread_running_gc_, barrier_count); + Locks::mutator_lock_->SharedLock(thread_running_gc_); + } + } } // We want to avoid checking for every reference if it's within the page or @@ -1516,12 +1681,15 @@ void MarkCompact::SlideBlackPage(mirror::Object* first_obj, to_obj, dest, dest_page_end); - from_obj->VisitRefsForCompaction< - /*kFetchObjSize*/false, /*kVisitNativeRoots*/false>(visitor, - MemberOffset(offset), - MemberOffset(offset - + kPageSize)); - return; + obj_size = from_obj->VisitRefsForCompaction< + /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor, + MemberOffset(offset), + MemberOffset(offset + + kPageSize)); + if (first_obj == next_page_first_obj) { + // First object is the only object on this page. So there's nothing else left to do. + return; + } } obj_size = RoundUp(obj_size, kAlignment); obj_size -= offset; @@ -1715,26 +1883,52 @@ void MarkCompact::MapProcessedPages(uint8_t* to_space_start, DCHECK_EQ(uffd_continue.mapped, static_cast<ssize_t>(length)); } } + if (use_uffd_sigbus_) { + // Nobody else would modify these pages' state simultaneously so atomic + // store is sufficient. + for (; uffd_continue.mapped > 0; uffd_continue.mapped -= kPageSize) { + arr_idx--; + DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed), + PageState::kProcessedAndMapping); + state_arr[arr_idx].store(PageState::kProcessedAndMapped, std::memory_order_release); + } + } + } +} + +void MarkCompact::ZeropageIoctl(void* addr, bool tolerate_eexist, bool tolerate_enoent) { + struct uffdio_zeropage uffd_zeropage; + DCHECK(IsAligned<kPageSize>(addr)); + uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr); + uffd_zeropage.range.len = kPageSize; + uffd_zeropage.mode = 0; + int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage); + if (LIKELY(ret == 0)) { + DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(kPageSize)); + } else { + CHECK((tolerate_enoent && errno == ENOENT) || (tolerate_eexist && errno == EEXIST)) + << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr; } } +void MarkCompact::CopyIoctl(void* dst, void* buffer) { + struct uffdio_copy uffd_copy; + uffd_copy.src = reinterpret_cast<uintptr_t>(buffer); + uffd_copy.dst = reinterpret_cast<uintptr_t>(dst); + uffd_copy.len = kPageSize; + uffd_copy.mode = 0; + CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0) + << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer + << " dst:" << dst; + DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize)); +} + template <int kMode, typename CompactionFn> void MarkCompact::DoPageCompactionWithStateChange(size_t page_idx, size_t status_arr_len, uint8_t* to_space_page, uint8_t* page, CompactionFn func) { - auto copy_ioctl = [this] (void* dst, void* buffer) { - struct uffdio_copy uffd_copy; - uffd_copy.src = reinterpret_cast<uintptr_t>(buffer); - uffd_copy.dst = reinterpret_cast<uintptr_t>(dst); - uffd_copy.len = kPageSize; - uffd_copy.mode = 0; - CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0) - << "ioctl_userfaultfd: copy failed: " << strerror(errno) - << ". src:" << buffer << " dst:" << dst; - DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize)); - }; PageState expected_state = PageState::kUnprocessed; PageState desired_state = kMode == kCopyMode ? PageState::kProcessingAndMapping : PageState::kProcessing; @@ -1742,17 +1936,18 @@ void MarkCompact::DoPageCompactionWithStateChange(size_t page_idx, // to moving_spaces_status_[page_idx] is released before the contents of the page are // made accessible to other threads. // - // In minor-fault case, we need acquire ordering here to ensure that when the - // CAS fails, another thread has completed processing the page, which is guaranteed - // by the release below. - // Relaxed memory-order is used in copy mode as the subsequent ioctl syscall acts as a fence. - std::memory_order order = - kMode == kCopyMode ? std::memory_order_relaxed : std::memory_order_acquire; + // We need acquire ordering here to ensure that when the CAS fails, another thread + // has completed processing the page, which is guaranteed by the release below. if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong( - expected_state, desired_state, order)) { + expected_state, desired_state, std::memory_order_acquire)) { func(); if (kMode == kCopyMode) { - copy_ioctl(to_space_page, page); + CopyIoctl(to_space_page, page); + if (use_uffd_sigbus_) { + // Store is sufficient as no other thread would modify the status at this point. + moving_pages_status_[page_idx].store(PageState::kProcessedAndMapped, + std::memory_order_release); + } } else if (kMode == kMinorFaultMode) { expected_state = PageState::kProcessing; desired_state = PageState::kProcessed; @@ -1772,7 +1967,7 @@ void MarkCompact::DoPageCompactionWithStateChange(size_t page_idx, } } -void MarkCompact::FreeFromSpacePages(size_t cur_page_idx) { +void MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode) { // Thanks to sliding compaction, bump-pointer allocations, and reverse // compaction (see CompactMovingSpace) the logic here is pretty simple: find // the to-space page up to which compaction has finished, all the from-space @@ -1788,7 +1983,8 @@ void MarkCompact::FreeFromSpacePages(size_t cur_page_idx) { break; } DCHECK(state >= PageState::kProcessed || - (state == PageState::kUnprocessed && idx > moving_first_objs_count_)); + (state == PageState::kUnprocessed && + (mode == kFallbackMode || idx > moving_first_objs_count_))); } uint8_t* reclaim_begin; @@ -1962,7 +2158,7 @@ void MarkCompact::CompactMovingSpace(uint8_t* page) { // We are sliding here, so no point attempting to madvise for every // page. Wait for enough pages to be done. if (idx % (kMinFromSpaceMadviseSize / kPageSize) == 0) { - FreeFromSpacePages(idx); + FreeFromSpacePages(idx, kMode); } } } @@ -1982,7 +2178,7 @@ void MarkCompact::CompactMovingSpace(uint8_t* page) { idx, page_status_arr_len, to_space_end, page, [&]() REQUIRES_SHARED(Locks::mutator_lock_) { CompactPage(first_obj, pre_compact_offset_moving_space_[idx], page, kMode == kCopyMode); }); - FreeFromSpacePages(idx); + FreeFromSpacePages(idx, kMode); } DCHECK_EQ(to_space_end, bump_pointer_space_->Begin()); } @@ -2096,6 +2292,12 @@ void MarkCompact::UpdateMovingSpaceBlackAllocations() { // BumpPointerSpace::Walk() also works similarly. while (black_allocs < block_end && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) { + // Try to keep instructions which access class instance together to + // avoid reloading the pointer from object. + size_t obj_size = obj->SizeOf(); + bytes_scanned_ += obj_size; + obj_size = RoundUp(obj_size, kAlignment); + UpdateClassAfterObjectMap(obj); if (first_obj == nullptr) { first_obj = obj; } @@ -2104,8 +2306,6 @@ void MarkCompact::UpdateMovingSpaceBlackAllocations() { if (set_mark_bit) { moving_space_bitmap_->Set(obj); } - UpdateClassAfterObjectMap(obj); - size_t obj_size = RoundUp(obj->SizeOf(), kAlignment); // Handle objects which cross page boundary, including objects larger // than page size. if (remaining_chunk_size + obj_size >= kPageSize) { @@ -2143,11 +2343,11 @@ void MarkCompact::UpdateMovingSpaceBlackAllocations() { // consume the unallocated portion of the block if (black_allocs < block_end) { // first-chunk of the current page ends here. Store it. - if (first_chunk_size > 0) { + if (first_chunk_size > 0 && black_alloc_pages_first_chunk_size_[black_page_idx] == 0) { black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size; first_objs_moving_space_[black_page_idx].Assign(first_obj); - first_chunk_size = 0; } + first_chunk_size = 0; first_obj = nullptr; size_t page_remaining = kPageSize - remaining_chunk_size; size_t block_remaining = block_end - black_allocs; @@ -2162,6 +2362,16 @@ void MarkCompact::UpdateMovingSpaceBlackAllocations() { black_allocs = block_end; } } + if (black_page_idx < bump_pointer_space_->Size() / kPageSize) { + // Store the leftover first-chunk, if any, and update page index. + if (black_alloc_pages_first_chunk_size_[black_page_idx] > 0) { + black_page_idx++; + } else if (first_chunk_size > 0) { + black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size; + first_objs_moving_space_[black_page_idx].Assign(first_obj); + black_page_idx++; + } + } black_page_count_ = black_page_idx - moving_first_objs_count_; delete block_sizes; } @@ -2266,7 +2476,7 @@ class MarkCompact::LinearAllocPageUpdater { public: explicit LinearAllocPageUpdater(MarkCompact* collector) : collector_(collector) {} - void operator()(uint8_t* page_begin, uint8_t* first_obj) const ALWAYS_INLINE + void operator()(uint8_t* page_begin, uint8_t* first_obj) ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK_ALIGNED(page_begin, kPageSize); uint8_t* page_end = page_begin + kPageSize; @@ -2276,7 +2486,8 @@ class MarkCompact::LinearAllocPageUpdater { obj_size = header->GetSize(); if (UNLIKELY(obj_size == 0)) { // No more objects in this page to visit. - break; + last_page_touched_ = byte >= page_begin; + return; } uint8_t* obj = byte + sizeof(TrackingHeader); uint8_t* obj_end = byte + obj_size; @@ -2293,8 +2504,11 @@ class MarkCompact::LinearAllocPageUpdater { } byte += RoundUp(obj_size, LinearAlloc::kAlignment); } + last_page_touched_ = true; } + bool WasLastPageTouched() const { return last_page_touched_; } + void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) { if (!root->IsNull()) { @@ -2375,9 +2589,11 @@ class MarkCompact::LinearAllocPageUpdater { } MarkCompact* const collector_; + // Whether the last page was touched or not. + bool last_page_touched_; }; -void MarkCompact::PreCompactionPhase() { +void MarkCompact::CompactionPause() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Runtime* runtime = Runtime::Current(); non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap(); @@ -2387,9 +2603,6 @@ void MarkCompact::PreCompactionPhase() { stack_high_addr_ = reinterpret_cast<char*>(stack_low_addr_) + thread_running_gc_->GetStackSize(); } - - compacting_ = true; - { TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings()); ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_); @@ -2426,24 +2639,12 @@ void MarkCompact::PreCompactionPhase() { // then do so. UpdateNonMovingSpaceBlackAllocations(); + // This store is visible to mutator (or uffd worker threads) as the mutator + // lock's unlock guarantees that. + compacting_ = true; + // Start updating roots and system weaks now. heap_->GetReferenceProcessor()->UpdateRoots(this); } - - { - // Thread roots must be updated first (before space mremap and native root - // updation) to ensure that pre-update content is accessible. - TimingLogger::ScopedTiming t2("(Paused)UpdateThreadRoots", GetTimings()); - MutexLock mu1(thread_running_gc_, *Locks::runtime_shutdown_lock_); - MutexLock mu2(thread_running_gc_, *Locks::thread_list_lock_); - std::list<Thread*> thread_list = runtime->GetThreadList()->GetList(); - for (Thread* thread : thread_list) { - thread->VisitRoots(this, kVisitRootFlagAllRoots); - // Interpreter cache is thread-local so it needs to be swept either in a - // checkpoint, or a stop-the-world pause. - thread->SweepInterpreterCache(this); - thread->AdjustTlab(black_objs_slide_diff_); - } - } { TimingLogger::ScopedTiming t2("(Paused)UpdateClassLoaderRoots", GetTimings()); ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_); @@ -2473,6 +2674,9 @@ void MarkCompact::PreCompactionPhase() { LinearAllocPageUpdater updater(this); arena_pool->VisitRoots(updater); } else { + // Clear the flag as we care about this only if arenas are freed during + // concurrent compaction. + arena_pool->ClearArenasFreed(); arena_pool->ForEachAllocatedArena( [this](const TrackedArena& arena) REQUIRES_SHARED(Locks::mutator_lock_) { // The pre-zygote fork arenas are not visited concurrently in the @@ -2531,6 +2735,10 @@ void MarkCompact::PreCompactionPhase() { } } + if (use_uffd_sigbus_) { + // Release order wrt to mutator threads' SIGBUS handler load. + sigbus_in_progress_count_.store(0, std::memory_order_release); + } KernelPreparation(); UpdateNonMovingSpace(); // fallback mode @@ -2542,19 +2750,19 @@ void MarkCompact::PreCompactionPhase() { RecordFree(ObjectBytePair(freed_objects_, freed_bytes)); } else { DCHECK_EQ(compaction_in_progress_count_.load(std::memory_order_relaxed), 0u); - // We must start worker threads before resuming mutators to avoid deadlocks. - heap_->GetThreadPool()->StartWorkers(thread_running_gc_); + if (!use_uffd_sigbus_) { + // We must start worker threads before resuming mutators to avoid deadlocks. + heap_->GetThreadPool()->StartWorkers(thread_running_gc_); + } } stack_low_addr_ = nullptr; } -void MarkCompact::KernelPrepareRange(uint8_t* to_addr, - uint8_t* from_addr, - size_t map_size, - size_t uffd_size, - int fd, - int uffd_mode, - uint8_t* shadow_addr) { +void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr, + uint8_t* from_addr, + size_t map_size, + int fd, + uint8_t* shadow_addr) { int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED; if (gHaveMremapDontunmap) { mremap_flags |= MREMAP_DONTUNMAP; @@ -2593,19 +2801,6 @@ void MarkCompact::KernelPrepareRange(uint8_t* to_addr, CHECK_EQ(ret, static_cast<void*>(to_addr)) << "mmap for moving space failed: " << strerror(errno); } - if (IsValidFd(uffd_)) { - // Userfaultfd registration - struct uffdio_register uffd_register; - uffd_register.range.start = reinterpret_cast<uintptr_t>(to_addr); - uffd_register.range.len = uffd_size; - uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; - if (uffd_mode == kMinorFaultMode) { - uffd_register.mode |= UFFDIO_REGISTER_MODE_MINOR; - } - CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0) - << "ioctl_userfaultfd: register failed: " << strerror(errno) - << ". start:" << static_cast<void*>(to_addr) << " len:" << PrettySize(uffd_size); - } } void MarkCompact::KernelPreparation() { @@ -2657,24 +2852,37 @@ void MarkCompact::KernelPreparation() { shadow_addr = shadow_to_space_map_.Begin(); } - KernelPrepareRange(moving_space_begin, - from_space_begin_, - moving_space_size, - moving_space_register_sz, - moving_to_space_fd_, - mode, - shadow_addr); + KernelPrepareRangeForUffd(moving_space_begin, + from_space_begin_, + moving_space_size, + moving_to_space_fd_, + shadow_addr); if (IsValidFd(uffd_)) { + // Register the moving space with userfaultfd. + RegisterUffd(moving_space_begin, moving_space_register_sz, mode); + // Prepare linear-alloc for concurrent compaction. for (auto& data : linear_alloc_spaces_data_) { - KernelPrepareRange(data.begin_, - data.shadow_.Begin(), - data.shadow_.Size(), - data.shadow_.Size(), - map_shared && !data.already_shared_ ? kFdSharedAnon : kFdUnused, - minor_fault_initialized_ ? kMinorFaultMode : kCopyMode); - if (map_shared) { + bool mmap_again = map_shared && !data.already_shared_; + DCHECK_EQ(static_cast<ssize_t>(data.shadow_.Size()), data.end_ - data.begin_); + // There could be threads running in suspended mode when the compaction + // pause is being executed. In order to make the userfaultfd setup atomic, + // the registration has to be done *before* moving the pages to shadow map. + if (!mmap_again) { + // See the comment in the constructor as to why it's conditionally done. + RegisterUffd(data.begin_, + data.shadow_.Size(), + minor_fault_initialized_ ? kMinorFaultMode : kCopyMode); + } + KernelPrepareRangeForUffd(data.begin_, + data.shadow_.Begin(), + data.shadow_.Size(), + mmap_again ? kFdSharedAnon : kFdUnused); + if (mmap_again) { data.already_shared_ = true; + RegisterUffd(data.begin_, + data.shadow_.Size(), + minor_fault_initialized_ ? kMinorFaultMode : kCopyMode); } } } @@ -2692,32 +2900,6 @@ template <int kMode> void MarkCompact::ConcurrentCompaction(uint8_t* buf) { DCHECK_NE(kMode, kFallbackMode); DCHECK(kMode != kCopyMode || buf != nullptr); - auto zeropage_ioctl = [this](void* addr, bool tolerate_eexist, bool tolerate_enoent) { - struct uffdio_zeropage uffd_zeropage; - DCHECK(IsAligned<kPageSize>(addr)); - uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr); - uffd_zeropage.range.len = kPageSize; - uffd_zeropage.mode = 0; - int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage); - if (LIKELY(ret == 0)) { - DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(kPageSize)); - } else { - CHECK((tolerate_enoent && errno == ENOENT) || (tolerate_eexist && errno == EEXIST)) - << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr; - } - }; - - auto copy_ioctl = [this] (void* fault_page, void* src) { - struct uffdio_copy uffd_copy; - uffd_copy.src = reinterpret_cast<uintptr_t>(src); - uffd_copy.dst = reinterpret_cast<uintptr_t>(fault_page); - uffd_copy.len = kPageSize; - uffd_copy.mode = 0; - int ret = ioctl(uffd_, UFFDIO_COPY, &uffd_copy); - CHECK_EQ(ret, 0) << "ioctl_userfaultfd: copy failed: " << strerror(errno) - << ". src:" << src << " fault_page:" << fault_page; - DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize)); - }; size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_; while (true) { struct uffd_msg msg; @@ -2738,7 +2920,7 @@ void MarkCompact::ConcurrentCompaction(uint8_t* buf) { // zeropage so that the gc-thread can proceed. Otherwise, each thread does // it and the gc-thread will repeat this fault until thread_pool_counter == 0. if (!gKernelHasFaultRetry || ret == 1) { - zeropage_ioctl(fault_addr, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false); + ZeropageIoctl(fault_addr, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false); } else { struct uffdio_range uffd_range; uffd_range.start = msg.arg.pagefault.address; @@ -2751,28 +2933,123 @@ void MarkCompact::ConcurrentCompaction(uint8_t* buf) { } uint8_t* fault_page = AlignDown(fault_addr, kPageSize); if (bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_addr))) { - ConcurrentlyProcessMovingPage<kMode>( - zeropage_ioctl, copy_ioctl, fault_page, buf, nr_moving_space_used_pages); + ConcurrentlyProcessMovingPage<kMode>(fault_page, buf, nr_moving_space_used_pages); } else if (minor_fault_initialized_) { ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>( - zeropage_ioctl, - copy_ioctl, - fault_page, - (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0); + fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0); } else { ConcurrentlyProcessLinearAllocPage<kCopyMode>( - zeropage_ioctl, - copy_ioctl, - fault_page, - (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0); + fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0); } } } -template <int kMode, typename ZeropageType, typename CopyType> -void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, - CopyType& copy_ioctl, - uint8_t* fault_page, +bool MarkCompact::SigbusHandler(siginfo_t* info) { + class ScopedInProgressCount { + public: + explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) { + // Increment the count only if compaction is not done yet. + SigbusCounterType prev = + collector_->sigbus_in_progress_count_.load(std::memory_order_relaxed); + while ((prev & kSigbusCounterCompactionDoneMask) == 0) { + if (collector_->sigbus_in_progress_count_.compare_exchange_strong( + prev, prev + 1, std::memory_order_acquire)) { + DCHECK_LT(prev, kSigbusCounterCompactionDoneMask - 1); + compaction_done_ = false; + return; + } + } + compaction_done_ = true; + } + + bool IsCompactionDone() const { + return compaction_done_; + } + + ~ScopedInProgressCount() { + if (!IsCompactionDone()) { + collector_->sigbus_in_progress_count_.fetch_sub(1, std::memory_order_release); + } + } + + private: + MarkCompact* const collector_; + bool compaction_done_; + }; + + DCHECK(use_uffd_sigbus_); + if (info->si_code != BUS_ADRERR) { + // Userfaultfd raises SIGBUS with BUS_ADRERR. All other causes can't be + // handled here. + return false; + } + + ScopedInProgressCount spc(this); + uint8_t* fault_page = AlignDown(reinterpret_cast<uint8_t*>(info->si_addr), kPageSize); + if (!spc.IsCompactionDone()) { + if (bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_page))) { + Thread* self = Thread::Current(); + Locks::mutator_lock_->AssertSharedHeld(self); + size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_; + if (minor_fault_initialized_) { + ConcurrentlyProcessMovingPage<kMinorFaultMode>( + fault_page, nullptr, nr_moving_space_used_pages); + } else { + uint8_t* buf = self->GetThreadLocalGcBuffer(); + if (buf == nullptr) { + uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed); + // The buffer-map is one page bigger as the first buffer is used by GC-thread. + CHECK_LE(idx, kMutatorCompactionBufferCount); + buf = compaction_buffers_map_.Begin() + idx * kPageSize; + DCHECK(compaction_buffers_map_.HasAddress(buf)); + self->SetThreadLocalGcBuffer(buf); + } + ConcurrentlyProcessMovingPage<kCopyMode>(fault_page, buf, nr_moving_space_used_pages); + } + return true; + } else { + // Find the linear-alloc space containing fault-addr + for (auto& data : linear_alloc_spaces_data_) { + if (data.begin_ <= fault_page && data.end_ > fault_page) { + if (minor_fault_initialized_) { + ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>(fault_page, false); + } else { + ConcurrentlyProcessLinearAllocPage<kCopyMode>(fault_page, false); + } + return true; + } + } + // Fault address doesn't belong to either moving-space or linear-alloc. + return false; + } + } else { + // We may spuriously get SIGBUS fault, which was initiated before the + // compaction was finished, but ends up here. In that case, if the fault + // address is valid then consider it handled. + return bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_page)) || + linear_alloc_spaces_data_.end() != + std::find_if(linear_alloc_spaces_data_.begin(), + linear_alloc_spaces_data_.end(), + [fault_page](const LinearAllocSpaceData& data) { + return data.begin_ <= fault_page && data.end_ > fault_page; + }); + } +} + +static void BackOff(uint32_t i) { + static constexpr uint32_t kYieldMax = 5; + // TODO: Consider adding x86 PAUSE and/or ARM YIELD here. + if (i <= kYieldMax) { + sched_yield(); + } else { + // nanosleep is not in the async-signal-safe list, but bionic implements it + // with a pure system call, so it should be fine. + NanoSleep(10000ull * (i - kYieldMax)); + } +} + +template <int kMode> +void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page, uint8_t* buf, size_t nr_moving_space_used_pages) { class ScopedInProgressCount { @@ -2782,7 +3059,7 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, } ~ScopedInProgressCount() { - collector_->compaction_in_progress_count_.fetch_add(-1, std::memory_order_relaxed); + collector_->compaction_in_progress_count_.fetch_sub(1, std::memory_order_relaxed); } private: @@ -2797,7 +3074,7 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, // There is a race which allows more than one thread to install a // zero-page. But we can tolerate that. So absorb the EEXIST returned by // the ioctl and move on. - zeropage_ioctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true); + ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true); return; } size_t page_idx = (fault_page - bump_pointer_space_->Begin()) / kPageSize; @@ -2809,14 +3086,16 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, if (moving_pages_status_[page_idx].compare_exchange_strong( expected_state, PageState::kProcessedAndMapping, std::memory_order_relaxed)) { // Note: ioctl acts as an acquire fence. - zeropage_ioctl(fault_page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/true); + ZeropageIoctl(fault_page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/true); } else { DCHECK_EQ(expected_state, PageState::kProcessedAndMapping); } return; } - PageState state = moving_pages_status_[page_idx].load(std::memory_order_relaxed); + PageState state = moving_pages_status_[page_idx].load( + use_uffd_sigbus_ ? std::memory_order_acquire : std::memory_order_relaxed); + uint32_t backoff_count = 0; while (true) { switch (state) { case PageState::kUnprocessed: { @@ -2824,13 +3103,13 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, // the page's state. Otherwise, we will end up leaving a window wherein // the GC-thread could observe that no worker is working on compaction // and could end up unregistering the moving space from userfaultfd. - ScopedInProgressCount in_progress(this); + ScopedInProgressCount spc(this); // Acquire order to ensure we don't start writing to shadow map, which is // shared, before the CAS is successful. Release order to ensure that the // increment to moving_compactions_in_progress above is not re-ordered // after the CAS. if (moving_pages_status_[page_idx].compare_exchange_strong( - state, PageState::kMutatorProcessing, std::memory_order_acquire)) { + state, PageState::kMutatorProcessing, std::memory_order_acq_rel)) { if (kMode == kMinorFaultMode) { DCHECK_EQ(buf, nullptr); buf = shadow_to_space_map_.Begin() + page_idx * kPageSize; @@ -2853,7 +3132,12 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, moving_pages_status_[page_idx].store(PageState::kProcessedAndMapping, std::memory_order_release); if (kMode == kCopyMode) { - copy_ioctl(fault_page, buf); + CopyIoctl(fault_page, buf); + if (use_uffd_sigbus_) { + // Store is sufficient as no other thread modifies the status at this stage. + moving_pages_status_[page_idx].store(PageState::kProcessedAndMapped, + std::memory_order_release); + } return; } else { break; @@ -2864,7 +3148,8 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, case PageState::kProcessing: DCHECK_EQ(kMode, kMinorFaultMode); if (moving_pages_status_[page_idx].compare_exchange_strong( - state, PageState::kProcessingAndMapping, std::memory_order_relaxed)) { + state, PageState::kProcessingAndMapping, std::memory_order_relaxed) && + !use_uffd_sigbus_) { // Somebody else took or will take care of finishing the compaction and // then mapping the page. return; @@ -2873,7 +3158,17 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, case PageState::kProcessed: // The page is processed but not mapped. We should map it. break; - default: + case PageState::kProcessingAndMapping: + case PageState::kMutatorProcessing: + case PageState::kProcessedAndMapping: + if (use_uffd_sigbus_) { + // Wait for the page to be mapped before returning. + BackOff(backoff_count++); + state = moving_pages_status_[page_idx].load(std::memory_order_acquire); + continue; + } + return; + case PageState::kProcessedAndMapped: // Somebody else took care of the page. return; } @@ -2891,11 +3186,32 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, } } -template <int kMode, typename ZeropageType, typename CopyType> -void MarkCompact::ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioctl, - CopyType& copy_ioctl, - uint8_t* fault_page, - bool is_minor_fault) { +void MarkCompact::MapUpdatedLinearAllocPage(uint8_t* page, + uint8_t* shadow_page, + Atomic<PageState>& state, + bool page_touched) { + DCHECK(!minor_fault_initialized_); + if (page_touched) { + CopyIoctl(page, shadow_page); + } else { + // If the page wasn't touched, then it means it is empty and + // is most likely not present on the shadow-side. Furthermore, + // since the shadow is also userfaultfd registered doing copy + // ioctl fail as the copy-from-user in the kernel will cause + // userfault. Instead, just map a zeropage, which is not only + // correct but also efficient as it avoids unnecessary memcpy + // in the kernel. + ZeropageIoctl(page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false); + } + if (use_uffd_sigbus_) { + // Store is sufficient as no other thread can modify the + // status of this page at this point. + state.store(PageState::kProcessedAndMapped, std::memory_order_release); + } +} + +template <int kMode> +void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault) { DCHECK(!is_minor_fault || kMode == kMinorFaultMode); auto arena_iter = linear_alloc_arenas_.end(); { @@ -2907,7 +3223,7 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioct if (arena_iter == linear_alloc_arenas_.end() || arena_iter->second <= fault_page) { // Fault page isn't in any of the arenas that existed before we started // compaction. So map zeropage and return. - zeropage_ioctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false); + ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false); } else { // fault_page should always belong to some arena. DCHECK(arena_iter != linear_alloc_arenas_.end()) @@ -2925,19 +3241,26 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioct size_t page_idx = (fault_page - space_data->begin_) / kPageSize; Atomic<PageState>* state_arr = reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin()); - PageState state = state_arr[page_idx].load(std::memory_order_relaxed); + PageState state = state_arr[page_idx].load(use_uffd_sigbus_ ? std::memory_order_acquire : + std::memory_order_relaxed); + uint32_t backoff_count = 0; while (true) { switch (state) { - case PageState::kUnprocessed: - if (state_arr[page_idx].compare_exchange_strong( - state, PageState::kProcessingAndMapping, std::memory_order_acquire)) { + case PageState::kUnprocessed: { + // Acquire order to ensure we don't start writing to shadow map, which is + // shared, before the CAS is successful. + if (state_arr[page_idx].compare_exchange_strong( + state, PageState::kProcessingAndMapping, std::memory_order_acquire)) { if (kMode == kCopyMode || is_minor_fault) { uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page); DCHECK_NE(first_obj, nullptr); LinearAllocPageUpdater updater(this); updater(fault_page + diff, first_obj + diff); if (kMode == kCopyMode) { - copy_ioctl(fault_page, fault_page + diff); + MapUpdatedLinearAllocPage(fault_page, + fault_page + diff, + state_arr[page_idx], + updater.WasLastPageTouched()); return; } } else { @@ -2952,23 +3275,36 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioct MapProcessedPages</*kFirstPageMapping=*/true>( fault_page, state_arr, page_idx, space_data->page_status_map_.Size()); return; - } - continue; + } + } + continue; case PageState::kProcessing: - DCHECK_EQ(kMode, kMinorFaultMode); - if (state_arr[page_idx].compare_exchange_strong( - state, PageState::kProcessingAndMapping, std::memory_order_relaxed)) { + DCHECK_EQ(kMode, kMinorFaultMode); + if (state_arr[page_idx].compare_exchange_strong( + state, PageState::kProcessingAndMapping, std::memory_order_relaxed) && + !use_uffd_sigbus_) { // Somebody else took or will take care of finishing the updates and // then mapping the page. return; - } - continue; + } + continue; case PageState::kProcessed: - // The page is processed but not mapped. We should map it. - break; - default: - // Somebody else took care of the page. - return; + // The page is processed but not mapped. We should map it. + break; + case PageState::kMutatorProcessing: + UNREACHABLE(); + case PageState::kProcessingAndMapping: + case PageState::kProcessedAndMapping: + if (use_uffd_sigbus_) { + // Wait for the page to be mapped before returning. + BackOff(backoff_count++); + state = state_arr[page_idx].load(std::memory_order_acquire); + continue; + } + return; + case PageState::kProcessedAndMapped: + // Somebody else took care of the page. + return; } break; } @@ -2986,80 +3322,106 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioct } void MarkCompact::ProcessLinearAlloc() { + GcVisitedArenaPool* arena_pool = + static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); for (auto& pair : linear_alloc_arenas_) { const TrackedArena* arena = pair.first; - uint8_t* last_byte = pair.second; - DCHECK_ALIGNED(last_byte, kPageSize); - bool others_processing = false; - // Find the linear-alloc space containing the arena - LinearAllocSpaceData* space_data = nullptr; - for (auto& data : linear_alloc_spaces_data_) { - if (data.begin_ <= arena->Begin() && arena->Begin() < data.end_) { - space_data = &data; - break; - } - } - DCHECK_NE(space_data, nullptr); - ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_; - auto visitor = [space_data, last_byte, diff, this, &others_processing]( - uint8_t* page_begin, - uint8_t* first_obj) REQUIRES_SHARED(Locks::mutator_lock_) { - // No need to process pages past last_byte as they already have updated - // gc-roots, if any. - if (page_begin >= last_byte) { - return; + size_t arena_size; + uint8_t* arena_begin; + ptrdiff_t diff; + bool others_processing; + { + // Acquire arena-pool's lock so that the arena being worked cannot be + // deallocated at the same time. + std::lock_guard<std::mutex> lock(arena_pool->GetLock()); + // If any arenas were freed since compaction pause then skip them from + // visiting. + if (arena_pool->AreArenasFreed() && !arena_pool->FindAllocatedArena(arena)) { + continue; } - LinearAllocPageUpdater updater(this); - size_t page_idx = (page_begin - space_data->begin_) / kPageSize; - DCHECK_LT(page_idx, space_data->page_status_map_.Size()); - Atomic<PageState>* state_arr = - reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin()); - PageState expected_state = PageState::kUnprocessed; - PageState desired_state = - minor_fault_initialized_ ? PageState::kProcessing : PageState::kProcessingAndMapping; - // Acquire order to ensure that we don't start accessing the shadow page, - // which is shared with other threads, prior to CAS. Also, for same - // reason, we used 'release' order for changing the state to 'processed'. - if (state_arr[page_idx].compare_exchange_strong( - expected_state, desired_state, std::memory_order_acquire)) { - updater(page_begin + diff, first_obj + diff); - expected_state = PageState::kProcessing; - if (!minor_fault_initialized_) { - struct uffdio_copy uffd_copy; - uffd_copy.src = reinterpret_cast<uintptr_t>(page_begin + diff); - uffd_copy.dst = reinterpret_cast<uintptr_t>(page_begin); - uffd_copy.len = kPageSize; - uffd_copy.mode = 0; - CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0) - << "ioctl_userfaultfd: linear-alloc copy failed:" << strerror(errno) - << ". dst:" << static_cast<void*>(page_begin); - DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize)); - } else if (!state_arr[page_idx].compare_exchange_strong( - expected_state, PageState::kProcessed, std::memory_order_release)) { - DCHECK_EQ(expected_state, PageState::kProcessingAndMapping); - // Force read in case the page was missing and updater didn't touch it - // as there was nothing to do. This will ensure that a zeropage is - // faulted on the shadow map. - ForceRead(page_begin + diff); - MapProcessedPages</*kFirstPageMapping=*/true>( - page_begin, state_arr, page_idx, space_data->page_status_map_.Size()); + uint8_t* last_byte = pair.second; + DCHECK_ALIGNED(last_byte, kPageSize); + others_processing = false; + arena_begin = arena->Begin(); + arena_size = arena->Size(); + // Find the linear-alloc space containing the arena + LinearAllocSpaceData* space_data = nullptr; + for (auto& data : linear_alloc_spaces_data_) { + if (data.begin_ <= arena_begin && arena_begin < data.end_) { + space_data = &data; + break; } - } else { - others_processing = true; } - }; + DCHECK_NE(space_data, nullptr); + diff = space_data->shadow_.Begin() - space_data->begin_; + auto visitor = [space_data, last_byte, diff, this, &others_processing]( + uint8_t* page_begin, + uint8_t* first_obj) REQUIRES_SHARED(Locks::mutator_lock_) { + // No need to process pages past last_byte as they already have updated + // gc-roots, if any. + if (page_begin >= last_byte) { + return; + } + LinearAllocPageUpdater updater(this); + size_t page_idx = (page_begin - space_data->begin_) / kPageSize; + DCHECK_LT(page_idx, space_data->page_status_map_.Size()); + Atomic<PageState>* state_arr = + reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin()); + PageState expected_state = PageState::kUnprocessed; + PageState desired_state = + minor_fault_initialized_ ? PageState::kProcessing : PageState::kProcessingAndMapping; + // Acquire order to ensure that we don't start accessing the shadow page, + // which is shared with other threads, prior to CAS. Also, for same + // reason, we used 'release' order for changing the state to 'processed'. + if (state_arr[page_idx].compare_exchange_strong( + expected_state, desired_state, std::memory_order_acquire)) { + updater(page_begin + diff, first_obj + diff); + expected_state = PageState::kProcessing; + if (!minor_fault_initialized_) { + MapUpdatedLinearAllocPage( + page_begin, page_begin + diff, state_arr[page_idx], updater.WasLastPageTouched()); + } else if (!state_arr[page_idx].compare_exchange_strong( + expected_state, PageState::kProcessed, std::memory_order_release)) { + DCHECK_EQ(expected_state, PageState::kProcessingAndMapping); + // Force read in case the page was missing and updater didn't touch it + // as there was nothing to do. This will ensure that a zeropage is + // faulted on the shadow map. + ForceRead(page_begin + diff); + MapProcessedPages</*kFirstPageMapping=*/true>( + page_begin, state_arr, page_idx, space_data->page_status_map_.Size()); + } + } else { + others_processing = true; + } + }; - arena->VisitRoots(visitor); + arena->VisitRoots(visitor); + } // If we are not in minor-fault mode and if no other thread was found to be // processing any pages in this arena, then we can madvise the shadow size. // Otherwise, we will double the memory use for linear-alloc. if (!minor_fault_initialized_ && !others_processing) { - ZeroAndReleasePages(arena->Begin() + diff, arena->Size()); + ZeroAndReleasePages(arena_begin + diff, arena_size); } } } +void MarkCompact::RegisterUffd(void* addr, size_t size, int mode) { + DCHECK(IsValidFd(uffd_)); + struct uffdio_register uffd_register; + uffd_register.range.start = reinterpret_cast<uintptr_t>(addr); + uffd_register.range.len = size; + uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (mode == kMinorFaultMode) { + uffd_register.mode |= UFFDIO_REGISTER_MODE_MINOR; + } + CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0) + << "ioctl_userfaultfd: register failed: " << strerror(errno) + << ". start:" << static_cast<void*>(addr) << " len:" << PrettySize(size); +} + void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) { + DCHECK(IsValidFd(uffd_)); struct uffdio_range range; range.start = reinterpret_cast<uintptr_t>(start); range.len = len; @@ -3092,10 +3454,15 @@ void MarkCompact::CompactionPhase() { CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin()); } - // TODO: add more sophisticated logic here wherein we sleep after attempting - // yield a couple of times. - while (compaction_in_progress_count_.load(std::memory_order_relaxed) > 0) { - sched_yield(); + // Make sure no mutator is reading from the from-space before unregistering + // userfaultfd from moving-space and then zapping from-space. The mutator + // and GC may race to set a page state to processing or further along. The two + // attempts are ordered. If the collector wins, then the mutator will see that + // and not access the from-space page. If the muator wins, then the + // compaction_in_progress_count_ increment by the mutator happens-before the test + // here, and we will not see a zero value until the mutator has completed. + for (uint32_t i = 0; compaction_in_progress_count_.load(std::memory_order_acquire) > 0; i++) { + BackOff(i); } size_t moving_space_size = bump_pointer_space_->Capacity(); @@ -3144,17 +3511,29 @@ void MarkCompact::CompactionPhase() { ProcessLinearAlloc(); - DCHECK(IsAligned<kPageSize>(conc_compaction_termination_page_)); - // We will only iterate once if gKernelHasFaultRetry is true. - do { - // madvise the page so that we can get userfaults on it. - ZeroAndReleasePages(conc_compaction_termination_page_, kPageSize); - // The following load triggers 'special' userfaults. When received by the - // thread-pool workers, they will exit out of the compaction task. This fault - // happens because we madvised the page. - ForceRead(conc_compaction_termination_page_); - } while (thread_pool_counter_ > 0); - + if (use_uffd_sigbus_) { + // Set compaction-done bit so that no new mutator threads start compaction + // process in the SIGBUS handler. + SigbusCounterType count = sigbus_in_progress_count_.fetch_or(kSigbusCounterCompactionDoneMask, + std::memory_order_acq_rel); + // Wait for SIGBUS handlers already in play. + for (uint32_t i = 0; count > 0; i++) { + BackOff(i); + count = sigbus_in_progress_count_.load(std::memory_order_acquire); + count &= ~kSigbusCounterCompactionDoneMask; + } + } else { + DCHECK(IsAligned<kPageSize>(conc_compaction_termination_page_)); + // We will only iterate once if gKernelHasFaultRetry is true. + do { + // madvise the page so that we can get userfaults on it. + ZeroAndReleasePages(conc_compaction_termination_page_, kPageSize); + // The following load triggers 'special' userfaults. When received by the + // thread-pool workers, they will exit out of the compaction task. This fault + // happens because we madvised the page. + ForceRead(conc_compaction_termination_page_); + } while (thread_pool_counter_ > 0); + } // Unregister linear-alloc spaces for (auto& data : linear_alloc_spaces_data_) { DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size())); @@ -3172,7 +3551,9 @@ void MarkCompact::CompactionPhase() { } } - heap_->GetThreadPool()->StopWorkers(thread_running_gc_); + if (!use_uffd_sigbus_) { + heap_->GetThreadPool()->StopWorkers(thread_running_gc_); + } } template <size_t kBufferSize> @@ -3215,7 +3596,7 @@ class MarkCompact::ThreadRootsVisitor : public RootVisitor { StackReference<mirror::Object>* start; StackReference<mirror::Object>* end; { - MutexLock mu(self_, mark_compact_->mark_stack_lock_); + MutexLock mu(self_, mark_compact_->lock_); // Loop here because even after expanding once it may not be sufficient to // accommodate all references. It's almost impossible, but there is no harm // in implementing it this way. @@ -3260,6 +3641,8 @@ class MarkCompact::CheckpointMarkThreadRoots : public Closure { ThreadRootsVisitor</*kBufferSize*/ 20> visitor(mark_compact_, self); thread->VisitRoots(&visitor, kVisitRootFlagAllRoots); } + // Clear page-buffer to prepare for compaction phase. + thread->SetThreadLocalGcBuffer(nullptr); // If thread is a running mutator, then act on behalf of the garbage // collector. See the code in ThreadList::RunCheckpoint. @@ -3568,11 +3951,12 @@ size_t MarkCompact::LiveWordsBitmap<kAlignment>::LiveBytesInBitmapWord(size_t ch return words * kAlignment; } -void MarkCompact::UpdateLivenessInfo(mirror::Object* obj) { +void MarkCompact::UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) { DCHECK(obj != nullptr); + DCHECK_EQ(obj_size, obj->SizeOf<kDefaultVerifyFlags>()); uintptr_t obj_begin = reinterpret_cast<uintptr_t>(obj); UpdateClassAfterObjectMap(obj); - size_t size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment); + size_t size = RoundUp(obj_size, kAlignment); uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size); size_t chunk_idx = (obj_begin - live_words_bitmap_->Begin()) / kOffsetChunkSize; // Compute the bit-index within the chunk-info vector word. @@ -3591,10 +3975,16 @@ void MarkCompact::UpdateLivenessInfo(mirror::Object* obj) { template <bool kUpdateLiveWords> void MarkCompact::ScanObject(mirror::Object* obj) { + // The size of `obj` is used both here (to update `bytes_scanned_`) and in + // `UpdateLivenessInfo`. As fetching this value can be expensive, do it once + // here and pass that information to `UpdateLivenessInfo`. + size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>(); + bytes_scanned_ += obj_size; + RefFieldsVisitor visitor(this); DCHECK(IsMarked(obj)) << "Scanning marked object " << obj << "\n" << heap_->DumpSpaces(); if (kUpdateLiveWords && moving_space_bitmap_->HasAddress(obj)) { - UpdateLivenessInfo(obj); + UpdateLivenessInfo(obj, obj_size); } obj->VisitReferences(visitor, visitor); } @@ -3779,24 +4169,25 @@ void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass, } void MarkCompact::FinishPhase() { + GetCurrentIteration()->SetScannedBytes(bytes_scanned_); bool is_zygote = Runtime::Current()->IsZygote(); + compacting_ = false; minor_fault_initialized_ = !is_zygote && uffd_minor_fault_supported_; - // When poisoning ObjPtr, we are forced to use buffers for page compaction in - // lower 4GB. Now that the usage is done, madvise them. But skip the first - // page, which is used by the gc-thread for the next iteration. Otherwise, we - // get into a deadlock due to userfault on it in the next iteration. This page - // is not consuming any physical memory because we already madvised it above - // and then we triggered a read userfault, which maps a special zero-page. - if (!minor_fault_initialized_ || !shadow_to_space_map_.IsValid() || + // Madvise compaction buffers. When using threaded implementation, skip the first page, + // which is used by the gc-thread for the next iteration. Otherwise, we get into a + // deadlock due to userfault on it in the next iteration. This page is not consuming any + // physical memory because we already madvised it above and then we triggered a read + // userfault, which maps a special zero-page. + if (use_uffd_sigbus_ || !minor_fault_initialized_ || !shadow_to_space_map_.IsValid() || shadow_to_space_map_.Size() < (moving_first_objs_count_ + black_page_count_) * kPageSize) { - ZeroAndReleasePages(compaction_buffers_map_.Begin() + kPageSize, - compaction_buffers_map_.Size() - kPageSize); + size_t adjustment = use_uffd_sigbus_ ? 0 : kPageSize; + ZeroAndReleasePages(compaction_buffers_map_.Begin() + adjustment, + compaction_buffers_map_.Size() - adjustment); } else if (shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) { // Now that we are going to use minor-faults from next GC cycle, we can // unmap the buffers used by worker threads. compaction_buffers_map_.SetSize(kPageSize); } - info_map_.MadviseDontNeedAndZero(); live_words_bitmap_->ClearBitmap(); // TODO: We can clear this bitmap right before compaction pause. But in that @@ -3814,14 +4205,17 @@ void MarkCompact::FinishPhase() { } CHECK(mark_stack_->IsEmpty()); // Ensure that the mark stack is empty. mark_stack_->Reset(); - if (kIsDebugBuild && updated_roots_.get() != nullptr) { - updated_roots_->clear(); + DCHECK_EQ(thread_running_gc_, Thread::Current()); + if (kIsDebugBuild) { + MutexLock mu(thread_running_gc_, lock_); + if (updated_roots_.get() != nullptr) { + updated_roots_->clear(); + } } class_after_obj_ordered_map_.clear(); delete[] moving_pages_status_; linear_alloc_arenas_.clear(); { - DCHECK_EQ(thread_running_gc_, Thread::Current()); ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_); WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_); heap_->ClearMarkedObjects(); diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h index 78ee5c5022..a6f7912ed4 100644 --- a/runtime/gc/collector/mark_compact.h +++ b/runtime/gc/collector/mark_compact.h @@ -17,6 +17,8 @@ #ifndef ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ #define ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ +#include <signal.h> + #include <map> #include <memory> #include <unordered_map> @@ -37,6 +39,8 @@ namespace art { +bool KernelSupportsUffd(); + namespace mirror { class DexCache; } // namespace mirror @@ -52,28 +56,36 @@ class BumpPointerSpace; namespace collector { class MarkCompact final : public GarbageCollector { public: + using SigbusCounterType = uint32_t; + static constexpr size_t kAlignment = kObjectAlignment; static constexpr int kCopyMode = -1; static constexpr int kMinorFaultMode = -2; // Fake file descriptor for fall back mode (when uffd isn't available) static constexpr int kFallbackMode = -3; - static constexpr int kFdSharedAnon = -1; static constexpr int kFdUnused = -2; + // Bitmask for the compaction-done bit in the sigbus_in_progress_count_. + static constexpr SigbusCounterType kSigbusCounterCompactionDoneMask = + 1u << (BitSizeOf<SigbusCounterType>() - 1); + explicit MarkCompact(Heap* heap); ~MarkCompact() {} - void RunPhases() override REQUIRES(!Locks::mutator_lock_); + void RunPhases() override REQUIRES(!Locks::mutator_lock_, !lock_); // Updated before (or in) pre-compaction pause and is accessed only in the - // pause or during concurrent compaction. The flag is reset after compaction - // is completed and never accessed by mutators. Therefore, safe to update - // without any memory ordering. - bool IsCompacting(Thread* self) const { - return compacting_ && self == thread_running_gc_; - } + // pause or during concurrent compaction. The flag is reset in next GC cycle's + // InitializePhase(). Therefore, it's safe to update without any memory ordering. + bool IsCompacting() const { return compacting_; } + + bool IsUsingSigbusFeature() const { return use_uffd_sigbus_; } + + // Called by SIGBUS handler. NO_THREAD_SAFETY_ANALYSIS for mutator-lock, which + // is asserted in the function. + bool SigbusHandler(siginfo_t* info) REQUIRES(!lock_) NO_THREAD_SAFETY_ANALYSIS; GcType GetGcType() const override { return kGcTypeFull; @@ -121,11 +133,6 @@ class MarkCompact final : public GarbageCollector { mirror::Object* IsMarked(mirror::Object* obj) override REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_); - // Perform GC-root updation and heap protection so that during the concurrent - // compaction phase we can receive faults and compact the corresponding pages - // on the fly. This is performed in a STW pause. - void CompactionPause() REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_); - mirror::Object* GetFromSpaceAddrFromBarrier(mirror::Object* old_ref) { CHECK(compacting_); if (live_words_bitmap_->HasAddress(old_ref)) { @@ -155,7 +162,8 @@ class MarkCompact final : public GarbageCollector { kProcessed = 2, // Processed but not mapped kProcessingAndMapping = 3, // Being processed by GC or mutator and will be mapped kMutatorProcessing = 4, // Being processed by mutator thread - kProcessedAndMapping = 5 // Processed and will be mapped + kProcessedAndMapping = 5, // Processed and will be mapped + kProcessedAndMapped = 6 // Processed and mapped. For SIGBUS. }; private: @@ -241,7 +249,7 @@ class MarkCompact final : public GarbageCollector { // mirror::Class. bool IsValidObject(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_); void InitializePhase(); - void FinishPhase() REQUIRES(!Locks::mutator_lock_, !Locks::heap_bitmap_lock_); + void FinishPhase() REQUIRES(!Locks::mutator_lock_, !Locks::heap_bitmap_lock_, !lock_); void MarkingPhase() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); void CompactionPhase() REQUIRES_SHARED(Locks::mutator_lock_); @@ -294,7 +302,7 @@ class MarkCompact final : public GarbageCollector { // Updates GC-roots and protects heap so that during the concurrent // compaction phase we can receive faults and compact the corresponding pages // on the fly. - void PreCompactionPhase() REQUIRES(Locks::mutator_lock_); + void CompactionPause() REQUIRES(Locks::mutator_lock_); // Compute offsets (in chunk_info_vec_) and other data structures required // during concurrent compaction. void PrepareForCompaction() REQUIRES_SHARED(Locks::mutator_lock_); @@ -414,7 +422,8 @@ class MarkCompact final : public GarbageCollector { // Update the live-words bitmap as well as add the object size to the // chunk-info vector. Both are required for computation of post-compact addresses. // Also updates freed_objects_ counter. - void UpdateLivenessInfo(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_); + void UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) + REQUIRES_SHARED(Locks::mutator_lock_); void ProcessReferences(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) @@ -446,15 +455,15 @@ class MarkCompact final : public GarbageCollector { // mremap to move pre-compact pages to from-space, followed by userfaultfd // registration on the moving space and linear-alloc. void KernelPreparation(); - // Called by KernelPreparation() for every memory range being prepared. - void KernelPrepareRange(uint8_t* to_addr, - uint8_t* from_addr, - size_t map_size, - size_t uffd_size, - int fd, - int uffd_mode, - uint8_t* shadow_addr = nullptr); - // Unregister given range from userfaultfd. + // Called by KernelPreparation() for every memory range being prepared for + // userfaultfd registration. + void KernelPrepareRangeForUffd(uint8_t* to_addr, + uint8_t* from_addr, + size_t map_size, + int fd, + uint8_t* shadow_addr = nullptr); + + void RegisterUffd(void* addr, size_t size, int mode); void UnregisterUffd(uint8_t* start, size_t len); // Called by thread-pool workers to read uffd_ and process fault events. @@ -462,20 +471,15 @@ class MarkCompact final : public GarbageCollector { void ConcurrentCompaction(uint8_t* buf) REQUIRES_SHARED(Locks::mutator_lock_); // Called by thread-pool workers to compact and copy/map the fault page in // moving space. - template <int kMode, typename ZeropageType, typename CopyType> - void ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, - CopyType& copy_ioctl, - uint8_t* fault_page, + template <int kMode> + void ConcurrentlyProcessMovingPage(uint8_t* fault_page, uint8_t* buf, size_t nr_moving_space_used_pages) REQUIRES_SHARED(Locks::mutator_lock_); // Called by thread-pool workers to process and copy/map the fault page in // linear-alloc. - template <int kMode, typename ZeropageType, typename CopyType> - void ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioctl, - CopyType& copy_ioctl, - uint8_t* fault_page, - bool is_minor_fault) + template <int kMode> + void ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault) REQUIRES_SHARED(Locks::mutator_lock_); // Process concurrently all the pages in linear-alloc. Called by gc-thread. @@ -485,7 +489,7 @@ class MarkCompact final : public GarbageCollector { // feature. bool CanCompactMovingSpaceWithMinorFault(); - void FreeFromSpacePages(size_t cur_page_idx) REQUIRES_SHARED(Locks::mutator_lock_); + void FreeFromSpacePages(size_t cur_page_idx, int mode) REQUIRES_SHARED(Locks::mutator_lock_); // Maps processed pages (from moving space and linear-alloc) for uffd's // minor-fault feature. We try to 'claim' all processed (and unmapped) pages @@ -513,20 +517,24 @@ class MarkCompact final : public GarbageCollector { void MarkZygoteLargeObjects() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); - // Buffers, one per worker thread + gc-thread, to be used when - // kObjPtrPoisoning == true as in that case we can't have the buffer on the - // stack. The first page of the buffer is assigned to - // conc_compaction_termination_page_. A read access to this page signals - // termination of concurrent compaction by making worker threads terminate the - // userfaultfd read loop. - MemMap compaction_buffers_map_; + void ZeropageIoctl(void* addr, bool tolerate_eexist, bool tolerate_enoent); + void CopyIoctl(void* dst, void* buffer); + // Called after updating a linear-alloc page to either map a zero-page if the + // page wasn't touched during updation, or map the page via copy-ioctl. And + // then updates the page's state to indicate the page is mapped. + void MapUpdatedLinearAllocPage(uint8_t* page, + uint8_t* shadow_page, + Atomic<PageState>& state, + bool page_touched); + // For checkpoints Barrier gc_barrier_; // Every object inside the immune spaces is assumed to be marked. ImmuneSpaces immune_spaces_; // Required only when mark-stack is accessed in shared mode, which happens - // when collecting thread-stack roots using checkpoint. - Mutex mark_stack_lock_; + // when collecting thread-stack roots using checkpoint. Otherwise, we use it + // to synchronize on updated_roots_ in debug-builds. + Mutex lock_; accounting::ObjectStack* mark_stack_; // Special bitmap wherein all the bits corresponding to an object are set. // TODO: make LiveWordsBitmap encapsulated in this class rather than a @@ -539,12 +547,18 @@ class MarkCompact final : public GarbageCollector { // GC-root is updated twice. // TODO: Must be replaced with an efficient mechanism eventually. Or ensure // that double updation doesn't happen in the first place. - std::unique_ptr<std::unordered_set<void*>> updated_roots_; + std::unique_ptr<std::unordered_set<void*>> updated_roots_ GUARDED_BY(lock_); MemMap from_space_map_; MemMap shadow_to_space_map_; // Any array of live-bytes in logical chunks of kOffsetChunkSize size // in the 'to-be-compacted' space. MemMap info_map_; + // Set of page-sized buffers used for compaction. The first page is used by + // the GC thread. Subdequent pages are used by mutator threads in case of + // SIGBUS feature, and by uffd-worker threads otherwise. In the latter case + // the first page is also used for termination of concurrent compaction by + // making worker threads terminate the userfaultfd read loop. + MemMap compaction_buffers_map_; class LessByArenaAddr { public: @@ -637,11 +651,13 @@ class MarkCompact final : public GarbageCollector { accounting::ContinuousSpaceBitmap* const moving_space_bitmap_; accounting::ContinuousSpaceBitmap* non_moving_space_bitmap_; Thread* thread_running_gc_; - // Array of pages' compaction status. + // Array of moving-space's pages' compaction status. Atomic<PageState>* moving_pages_status_; size_t vector_length_; size_t live_stack_freeze_size_; + uint64_t bytes_scanned_; + // For every page in the to-space (post-compact heap) we need to know the // first object from which we must compact and/or update references. This is // for both non-moving and moving space. Additionally, for the moving-space, @@ -709,9 +725,20 @@ class MarkCompact final : public GarbageCollector { // Userfault file descriptor, accessed only by the GC itself. // kFallbackMode value indicates that we are in the fallback mode. int uffd_; + // Number of mutator-threads currently executing SIGBUS handler. When the + // GC-thread is done with compaction, it set the most significant bit to + // indicate that. Mutator threads check for the flag when incrementing in the + // handler. + std::atomic<SigbusCounterType> sigbus_in_progress_count_; + // Number of mutator-threads/uffd-workers working on moving-space page. It + // must be 0 before gc-thread can unregister the space after it's done + // sequentially compacting all pages of the space. + std::atomic<uint16_t> compaction_in_progress_count_; + // When using SIGBUS feature, this counter is used by mutators to claim a page + // out of compaction buffers to be used for the entire compaction cycle. + std::atomic<uint16_t> compaction_buffer_counter_; // Used to exit from compaction loop at the end of concurrent compaction uint8_t thread_pool_counter_; - std::atomic<uint8_t> compaction_in_progress_count_; // True while compacting. bool compacting_; // Flag indicating whether one-time uffd initialization has been done. It will @@ -723,6 +750,9 @@ class MarkCompact final : public GarbageCollector { // Flag indicating if userfaultfd supports minor-faults. Set appropriately in // CreateUserfaultfd(), where we get this information from the kernel. const bool uffd_minor_fault_supported_; + // Flag indicating if we should use sigbus signals instead of threads to + // handle userfaults. + const bool use_uffd_sigbus_; // For non-zygote processes this flag indicates if the spaces are ready to // start using userfaultfd's minor-fault feature. This initialization involves // starting to use shmem (memfd_create) for the userfaultfd protected spaces. @@ -732,9 +762,12 @@ class MarkCompact final : public GarbageCollector { // minor-fault from next GC. bool map_linear_alloc_shared_; + class FlipCallback; + class ThreadFlipVisitor; class VerifyRootMarkedVisitor; class ScanObjectVisitor; class CheckpointMarkThreadRoots; + class CheckpointSweepInterpreterCache; template<size_t kBufferSize> class ThreadRootsVisitor; class CardModifiedVisitor; class RefFieldsVisitor; diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index c450676c91..ccba536695 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -1488,6 +1488,8 @@ void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType Runtime::Current()->GetPreAllocatedOutOfMemoryErrorWhenHandlingStackOverflow()); return; } + // Allow plugins to intercept out of memory errors. + Runtime::Current()->OutOfMemoryErrorHook(); std::ostringstream oss; size_t total_bytes_free = GetFreeMemory(); @@ -1541,15 +1543,15 @@ void Heap::DoPendingCollectorTransition() { } else { VLOG(gc) << "Homogeneous compaction ignored due to jank perceptible process state"; } - } else if (desired_collector_type == kCollectorTypeCCBackground) { - DCHECK(gUseReadBarrier); + } else if (desired_collector_type == kCollectorTypeCCBackground || + desired_collector_type == kCollectorTypeCMC) { if (!CareAboutPauseTimes()) { - // Invoke CC full compaction. + // Invoke full compaction. CollectGarbageInternal(collector::kGcTypeFull, kGcCauseCollectorTransition, /*clear_soft_references=*/false, GC_NUM_ANY); } else { - VLOG(gc) << "CC background compaction ignored due to jank perceptible process state"; + VLOG(gc) << "background compaction ignored due to jank perceptible process state"; } } else { CHECK_EQ(desired_collector_type, collector_type_) << "Unsupported collector transition"; diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 26cb3be635..a6a162ad7a 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -34,6 +34,7 @@ #include "base/time_utils.h" #include "gc/collector/gc_type.h" #include "gc/collector/iteration.h" +#include "gc/collector/mark_compact.h" #include "gc/collector_type.h" #include "gc/gc_cause.h" #include "gc/space/large_object_space.h" @@ -87,7 +88,6 @@ class RememberedSet; namespace collector { class ConcurrentCopying; class GarbageCollector; -class MarkCompact; class MarkSweep; class SemiSpace; } // namespace collector @@ -826,10 +826,14 @@ class Heap { } collector::MarkCompact* MarkCompactCollector() { + DCHECK(!gUseUserfaultfd || mark_compact_ != nullptr); return mark_compact_; } + bool IsPerformingUffdCompaction() { return gUseUserfaultfd && mark_compact_->IsCompacting(); } + CollectorType CurrentCollectorType() { + DCHECK(!gUseUserfaultfd || collector_type_ == kCollectorTypeCMC); return collector_type_; } diff --git a/runtime/gc/heap_test.cc b/runtime/gc/heap_test.cc index 244b294dcc..f8093f9ea3 100644 --- a/runtime/gc/heap_test.cc +++ b/runtime/gc/heap_test.cc @@ -102,10 +102,7 @@ TEST_F(HeapTest, DumpGCPerformanceOnShutdown) { Runtime::Current()->SetDumpGCPerformanceOnShutdown(true); } -template <typename T> -bool AnyIsNonNull(const metrics::MetricsBase<T>* x, const metrics::MetricsBase<T>* y) { - return !x->IsNull() || !y->IsNull(); -} +bool AnyIsFalse(bool x, bool y) { return !x || !y; } TEST_F(HeapTest, GCMetrics) { // Allocate a few string objects (to be collected), then trigger garbage @@ -163,19 +160,24 @@ TEST_F(HeapTest, GCMetrics) { if (heap->GetUseGenerationalCC()) { // Check that full-heap and/or young-generation GC metrics are non-null // after trigerring the collection. - EXPECT_PRED2(AnyIsNonNull<int64_t>, full_gc_collection_time, young_gc_collection_time); - EXPECT_PRED2(AnyIsNonNull<uint64_t>, full_gc_count, young_gc_count); - EXPECT_PRED2(AnyIsNonNull<uint64_t>, full_gc_count_delta, young_gc_count_delta); - EXPECT_PRED2(AnyIsNonNull<int64_t>, full_gc_throughput, young_gc_throughput); - EXPECT_PRED2(AnyIsNonNull<int64_t>, full_gc_tracing_throughput, young_gc_tracing_throughput); - EXPECT_PRED2(AnyIsNonNull<uint64_t>, full_gc_throughput_avg, young_gc_throughput_avg); EXPECT_PRED2( - AnyIsNonNull<uint64_t>, full_gc_tracing_throughput_avg, young_gc_tracing_throughput_avg); - EXPECT_PRED2(AnyIsNonNull<uint64_t>, full_gc_scanned_bytes, young_gc_scanned_bytes); + AnyIsFalse, full_gc_collection_time->IsNull(), young_gc_collection_time->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_count->IsNull(), young_gc_count->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_count_delta->IsNull(), young_gc_count_delta->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_throughput->IsNull(), young_gc_throughput->IsNull()); + EXPECT_PRED2( + AnyIsFalse, full_gc_tracing_throughput->IsNull(), young_gc_tracing_throughput->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_throughput_avg->IsNull(), young_gc_throughput_avg->IsNull()); + EXPECT_PRED2(AnyIsFalse, + full_gc_tracing_throughput_avg->IsNull(), + young_gc_tracing_throughput_avg->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_scanned_bytes->IsNull(), young_gc_scanned_bytes->IsNull()); + EXPECT_PRED2(AnyIsFalse, + full_gc_scanned_bytes_delta->IsNull(), + young_gc_scanned_bytes_delta->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_freed_bytes->IsNull(), young_gc_freed_bytes->IsNull()); EXPECT_PRED2( - AnyIsNonNull<uint64_t>, full_gc_scanned_bytes_delta, young_gc_scanned_bytes_delta); - EXPECT_PRED2(AnyIsNonNull<uint64_t>, full_gc_freed_bytes, young_gc_freed_bytes); - EXPECT_PRED2(AnyIsNonNull<uint64_t>, full_gc_freed_bytes_delta, young_gc_freed_bytes_delta); + AnyIsFalse, full_gc_freed_bytes_delta->IsNull(), young_gc_freed_bytes_delta->IsNull()); // We have observed that sometimes the GC duration (both for full-heap and // young-generation collections) is null (b/271112044). Temporarily // suspend the following checks while we investigate. @@ -183,8 +185,8 @@ TEST_F(HeapTest, GCMetrics) { // TODO(b/271112044): Investigate and adjust these expectations and/or the // corresponding metric logic. #if 0 - EXPECT_PRED2(AnyIsNonNull<uint64_t>, full_gc_duration, young_gc_duration); - EXPECT_PRED2(AnyIsNonNull<uint64_t>, full_gc_duration_delta, young_gc_duration_delta); + EXPECT_PRED2(AnyIsFalse, full_gc_duration->IsNull(), young_gc_duration->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_duration_delta->IsNull(), young_gc_duration_delta->IsNull()); #endif } else { // Check that only full-heap GC metrics are non-null after trigerring the collection. @@ -195,12 +197,8 @@ TEST_F(HeapTest, GCMetrics) { EXPECT_FALSE(full_gc_tracing_throughput->IsNull()); EXPECT_FALSE(full_gc_throughput_avg->IsNull()); EXPECT_FALSE(full_gc_tracing_throughput_avg->IsNull()); - if (fg_collector_type != kCollectorTypeCMC) { - // TODO(b/270957146): For some reason, these metrics are still null - // after running the Concurrent Mark-Compact collector; investigate why. - EXPECT_FALSE(full_gc_scanned_bytes->IsNull()); - EXPECT_FALSE(full_gc_scanned_bytes_delta->IsNull()); - } + EXPECT_FALSE(full_gc_scanned_bytes->IsNull()); + EXPECT_FALSE(full_gc_scanned_bytes_delta->IsNull()); EXPECT_FALSE(full_gc_freed_bytes->IsNull()); EXPECT_FALSE(full_gc_freed_bytes_delta->IsNull()); EXPECT_FALSE(full_gc_duration->IsNull()); diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index a06fe24b76..3e8c5dc9a7 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -1594,21 +1594,21 @@ bool JitCodeCache::IsOsrCompiled(ArtMethod* method) { } void JitCodeCache::VisitRoots(RootVisitor* visitor) { - Thread* self = Thread::Current(); - gc::Heap* const heap = Runtime::Current()->GetHeap(); - if (heap->CurrentCollectorType() != gc::CollectorType::kCollectorTypeCMC - || !heap->MarkCompactCollector()->IsCompacting(self)) { - MutexLock mu(self, *Locks::jit_lock_); - UnbufferedRootVisitor root_visitor(visitor, RootInfo(kRootStickyClass)); - for (ArtMethod* method : current_optimized_compilations_) { - method->VisitRoots(root_visitor, kRuntimePointerSize); - } - for (ArtMethod* method : current_baseline_compilations_) { - method->VisitRoots(root_visitor, kRuntimePointerSize); - } - for (ArtMethod* method : current_osr_compilations_) { - method->VisitRoots(root_visitor, kRuntimePointerSize); - } + if (Runtime::Current()->GetHeap()->IsPerformingUffdCompaction()) { + // In case of userfaultfd compaction, ArtMethods are updated concurrently + // via linear-alloc. + return; + } + MutexLock mu(Thread::Current(), *Locks::jit_lock_); + UnbufferedRootVisitor root_visitor(visitor, RootInfo(kRootStickyClass)); + for (ArtMethod* method : current_optimized_compilations_) { + method->VisitRoots(root_visitor, kRuntimePointerSize); + } + for (ArtMethod* method : current_baseline_compilations_) { + method->VisitRoots(root_visitor, kRuntimePointerSize); + } + for (ArtMethod* method : current_osr_compilations_) { + method->VisitRoots(root_visitor, kRuntimePointerSize); } } diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h index ff4693f55e..c4576640fd 100644 --- a/runtime/read_barrier-inl.h +++ b/runtime/read_barrier-inl.h @@ -93,7 +93,7 @@ inline MirrorType* ReadBarrier::Barrier( UNREACHABLE(); } } else if (kReadBarrierOption == kWithFromSpaceBarrier) { - CHECK(gUseUserfaultfd); + DCHECK(gUseUserfaultfd); MirrorType* old = ref_addr->template AsMirrorPtr<kIsVolatile>(); mirror::Object* ref = Runtime::Current()->GetHeap()->MarkCompactCollector()->GetFromSpaceAddrFromBarrier(old); @@ -143,6 +143,11 @@ inline MirrorType* ReadBarrier::BarrierForRoot(MirrorType** root, LOG(FATAL) << "Unexpected read barrier type"; UNREACHABLE(); } + } else if (kReadBarrierOption == kWithFromSpaceBarrier) { + DCHECK(gUseUserfaultfd); + mirror::Object* from_ref = + Runtime::Current()->GetHeap()->MarkCompactCollector()->GetFromSpaceAddrFromBarrier(ref); + return reinterpret_cast<MirrorType*>(from_ref); } else { return ref; } @@ -190,6 +195,11 @@ inline MirrorType* ReadBarrier::BarrierForRoot(mirror::CompressedReference<Mirro LOG(FATAL) << "Unexpected read barrier type"; UNREACHABLE(); } + } else if (kReadBarrierOption == kWithFromSpaceBarrier) { + DCHECK(gUseUserfaultfd); + mirror::Object* from_ref = + Runtime::Current()->GetHeap()->MarkCompactCollector()->GetFromSpaceAddrFromBarrier(ref); + return reinterpret_cast<MirrorType*>(from_ref); } else { return ref; } diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 73a337a5e6..195256b21a 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -308,7 +308,8 @@ Runtime::Runtime() verifier_logging_threshold_ms_(100), verifier_missing_kthrow_fatal_(false), perfetto_hprof_enabled_(false), - perfetto_javaheapprof_enabled_(false) { + perfetto_javaheapprof_enabled_(false), + out_of_memory_error_hook_(nullptr) { static_assert(Runtime::kCalleeSaveSize == static_cast<uint32_t>(CalleeSaveType::kLastCalleeSaveType), "Unexpected size"); CheckConstants(); @@ -1747,11 +1748,10 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { break; } + fault_manager.Init(!no_sig_chain_); if (!no_sig_chain_) { // Dex2Oat's Runtime does not need the signal chain or the fault handler. if (implicit_null_checks_ || implicit_so_checks_ || implicit_suspend_checks_) { - fault_manager.Init(); - // These need to be in a specific order. The null point check handler must be // after the suspend check and stack overflow check handlers. // @@ -2529,17 +2529,20 @@ void Runtime::VisitReflectiveTargets(ReflectiveValueVisitor *visitor) { } void Runtime::VisitImageRoots(RootVisitor* visitor) { - for (auto* space : GetHeap()->GetContinuousSpaces()) { - if (space->IsImageSpace()) { - auto* image_space = space->AsImageSpace(); - const auto& image_header = image_space->GetImageHeader(); - for (int32_t i = 0, size = image_header.GetImageRoots()->GetLength(); i != size; ++i) { - mirror::Object* obj = - image_header.GetImageRoot(static_cast<ImageHeader::ImageRoot>(i)).Ptr(); - if (obj != nullptr) { - mirror::Object* after_obj = obj; - visitor->VisitRoot(&after_obj, RootInfo(kRootStickyClass)); - CHECK_EQ(after_obj, obj); + // We only confirm that image roots are unchanged. + if (kIsDebugBuild) { + for (auto* space : GetHeap()->GetContinuousSpaces()) { + if (space->IsImageSpace()) { + auto* image_space = space->AsImageSpace(); + const auto& image_header = image_space->GetImageHeader(); + for (int32_t i = 0, size = image_header.GetImageRoots()->GetLength(); i != size; ++i) { + mirror::Object* obj = + image_header.GetImageRoot(static_cast<ImageHeader::ImageRoot>(i)).Ptr(); + if (obj != nullptr) { + mirror::Object* after_obj = obj; + visitor->VisitRoot(&after_obj, RootInfo(kRootStickyClass)); + CHECK_EQ(after_obj, obj); + } } } } @@ -3386,7 +3389,7 @@ bool Runtime::GetOatFilesExecutable() const { void Runtime::ProcessWeakClass(GcRoot<mirror::Class>* root_ptr, IsMarkedVisitor* visitor, mirror::Class* update) { - // This does not need a read barrier because this is called by GC. + // This does not need a read barrier because this is called by GC. mirror::Class* cls = root_ptr->Read<kWithoutReadBarrier>(); if (cls != nullptr && cls != GetWeakClassSentinel()) { DCHECK((cls->IsClass<kDefaultVerifyFlags>())); diff --git a/runtime/runtime.h b/runtime/runtime.h index 21383f9fcb..a5515945df 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -1098,6 +1098,17 @@ class Runtime { // See Flags::ReloadAllFlags as well. static void ReloadAllFlags(const std::string& caller); + // Used by plugin code to attach a hook for OOME. + void SetOutOfMemoryErrorHook(void (*hook)()) { + out_of_memory_error_hook_ = hook; + } + + void OutOfMemoryErrorHook() { + if (out_of_memory_error_hook_ != nullptr) { + out_of_memory_error_hook_(); + } + } + private: static void InitPlatformSignalHandlers(); @@ -1490,6 +1501,9 @@ class Runtime { bool perfetto_hprof_enabled_; bool perfetto_javaheapprof_enabled_; + // Called on out of memory error + void (*out_of_memory_error_hook_)(); + metrics::ArtMetrics metrics_; std::unique_ptr<metrics::MetricsReporter> metrics_reporter_; diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h index 4110ed2851..6431acfd3d 100644 --- a/runtime/thread-inl.h +++ b/runtime/thread-inl.h @@ -428,7 +428,8 @@ inline bool Thread::ModifySuspendCount(Thread* self, int delta, AtomicInteger* suspend_barrier, SuspendReason reason) { - if (delta > 0 && ((gUseReadBarrier && this != self) || suspend_barrier != nullptr)) { + if (delta > 0 && + (((gUseUserfaultfd || gUseReadBarrier) && this != self) || suspend_barrier != nullptr)) { // When delta > 0 (requesting a suspend), ModifySuspendCountInternal() may fail either if // active_suspend_barriers is full or we are in the middle of a thread flip. Retry in a loop. while (true) { diff --git a/runtime/thread.cc b/runtime/thread.cc index 920fb7a20c..08552b5334 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -1482,7 +1482,7 @@ bool Thread::ModifySuspendCountInternal(Thread* self, return false; } - if (gUseReadBarrier && delta > 0 && this != self && tlsPtr_.flip_function != nullptr) { + if (delta > 0 && this != self && tlsPtr_.flip_function != nullptr) { // Force retry of a suspend request if it's in the middle of a thread flip to avoid a // deadlock. b/31683379. return false; @@ -3857,14 +3857,11 @@ class ReferenceMapVisitor : public StackVisitor { public: ReferenceMapVisitor(Thread* thread, Context* context, RootVisitor& visitor) REQUIRES_SHARED(Locks::mutator_lock_) - // We are visiting the references in compiled frames, so we do not need - // to know the inlined frames. + // We are visiting the references in compiled frames, so we do not need + // to know the inlined frames. : StackVisitor(thread, context, StackVisitor::StackWalkKind::kSkipInlinedFrames), - visitor_(visitor) { - gc::Heap* const heap = Runtime::Current()->GetHeap(); - visit_declaring_class_ = heap->CurrentCollectorType() != gc::CollectorType::kCollectorTypeCMC - || !heap->MarkCompactCollector()->IsCompacting(Thread::Current()); - } + visitor_(visitor), + visit_declaring_class_(!Runtime::Current()->GetHeap()->IsPerformingUffdCompaction()) {} bool VisitFrame() override REQUIRES_SHARED(Locks::mutator_lock_) { if (false) { @@ -4297,8 +4294,10 @@ void Thread::VisitRoots(RootVisitor* visitor) { } #pragma GCC diagnostic pop -static void SweepCacheEntry(IsMarkedVisitor* visitor, const Instruction* inst, size_t* value) - REQUIRES_SHARED(Locks::mutator_lock_) { +static void SweepCacheEntry(IsMarkedVisitor* visitor, + const Instruction* inst, + size_t* value, + bool only_update_class) REQUIRES_SHARED(Locks::mutator_lock_) { if (inst == nullptr) { return; } @@ -4310,16 +4309,23 @@ static void SweepCacheEntry(IsMarkedVisitor* visitor, const Instruction* inst, s case Opcode::INSTANCE_OF: case Opcode::NEW_ARRAY: case Opcode::CONST_CLASS: { - mirror::Class* cls = reinterpret_cast<mirror::Class*>(*value); - if (cls == nullptr || cls == Runtime::GetWeakClassSentinel()) { - // Entry got deleted in a previous sweep. + // TODO: There is no reason to process weak-class differently from strings + // (below). Streamline the logic here and jit-code-cache. + if (!only_update_class) { + mirror::Class* cls = reinterpret_cast<mirror::Class*>(*value); + if (cls == nullptr || cls == Runtime::GetWeakClassSentinel()) { + // Entry got deleted in a previous sweep. + return; + } + // Need to fetch from-space pointer for class in case of userfaultfd GC. + Runtime::ProcessWeakClass(reinterpret_cast<GcRoot<mirror::Class>*>(value), + visitor, + Runtime::GetWeakClassSentinel()); + return; + } else if (reinterpret_cast<mirror::Class*>(*value) == Runtime::GetWeakClassSentinel()) { return; } - Runtime::ProcessWeakClass( - reinterpret_cast<GcRoot<mirror::Class>*>(value), - visitor, - Runtime::GetWeakClassSentinel()); - return; + FALLTHROUGH_INTENDED; } case Opcode::CONST_STRING: case Opcode::CONST_STRING_JUMBO: { @@ -4350,8 +4356,12 @@ static void SweepCacheEntry(IsMarkedVisitor* visitor, const Instruction* inst, s } void Thread::SweepInterpreterCache(IsMarkedVisitor* visitor) { + bool only_update_class = Runtime::Current()->GetHeap()->IsPerformingUffdCompaction(); for (InterpreterCache::Entry& entry : GetInterpreterCache()->GetArray()) { - SweepCacheEntry(visitor, reinterpret_cast<const Instruction*>(entry.first), &entry.second); + SweepCacheEntry(visitor, + reinterpret_cast<const Instruction*>(entry.first), + &entry.second, + only_update_class); } } diff --git a/runtime/thread.h b/runtime/thread.h index f9303d80b0..4ee4f6343a 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -384,6 +384,15 @@ class Thread { tlsPtr_.thread_local_mark_stack = stack; } + uint8_t* GetThreadLocalGcBuffer() { + DCHECK(gUseUserfaultfd); + return tlsPtr_.thread_local_gc_buffer; + } + void SetThreadLocalGcBuffer(uint8_t* buf) { + DCHECK(gUseUserfaultfd); + tlsPtr_.thread_local_gc_buffer = buf; + } + // Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of // mutator_lock_ and waits until it is resumed and thread_suspend_count_ is zero. void FullSuspendCheck(bool implicit = false) @@ -2036,8 +2045,12 @@ class Thread { // Current method verifier, used for root marking. verifier::MethodVerifier* method_verifier; - // Thread-local mark stack for the concurrent copying collector. - gc::accounting::AtomicStack<mirror::Object>* thread_local_mark_stack; + union { + // Thread-local mark stack for the concurrent copying collector. + gc::accounting::AtomicStack<mirror::Object>* thread_local_mark_stack; + // Thread-local page-sized buffer for userfaultfd GC. + uint8_t* thread_local_gc_buffer; + }; // The pending async-exception or null. mirror::Throwable* async_exception; @@ -2194,16 +2207,10 @@ class ScopedTransitioningToRunnable : public ValueObject { explicit ScopedTransitioningToRunnable(Thread* self) : self_(self) { DCHECK_EQ(self, Thread::Current()); - if (gUseReadBarrier) { - self_->SetIsTransitioningToRunnable(true); - } + self_->SetIsTransitioningToRunnable(true); } - ~ScopedTransitioningToRunnable() { - if (gUseReadBarrier) { - self_->SetIsTransitioningToRunnable(false); - } - } + ~ScopedTransitioningToRunnable() { self_->SetIsTransitioningToRunnable(false); } private: Thread* const self_; diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index c522be3d69..43650c58cc 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -514,11 +514,13 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, Locks::thread_list_lock_->AssertNotHeld(self); Locks::thread_suspend_count_lock_->AssertNotHeld(self); CHECK_NE(self->GetState(), ThreadState::kRunnable); + size_t runnable_thread_count = 0; + std::vector<Thread*> other_threads; collector->GetHeap()->ThreadFlipBegin(self); // Sync with JNI critical calls. - // ThreadFlipBegin happens before we suspend all the threads, so it does not count towards the - // pause. + // ThreadFlipBegin happens before we suspend all the threads, so it does not + // count towards the pause. const uint64_t suspend_start_time = NanoTime(); SuspendAllInternal(self, self, nullptr); if (pause_listener != nullptr) { @@ -529,15 +531,28 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, Locks::mutator_lock_->ExclusiveLock(self); suspend_all_historam_.AdjustAndAddValue(NanoTime() - suspend_start_time); flip_callback->Run(self); - Locks::mutator_lock_->ExclusiveUnlock(self); - collector->RegisterPause(NanoTime() - suspend_start_time); - if (pause_listener != nullptr) { - pause_listener->EndPause(); + // Releasing mutator-lock *before* setting up flip function in the threads + // leaves a gap for another thread trying to suspend all threads. That thread + // gets to run with mutator-lock, thereby accessing the heap, without running + // its flip function. It's not a problem with CC as the gc-thread hasn't + // started marking yet and the from-space is accessible. By delaying releasing + // mutator-lock until after the flip function are running on all threads we + // fix that without increasing pause time, except for any thread that might be + // trying to suspend all. Even though the change works irrespective of the GC, + // it has been limited to userfaultfd GC to keep the change behind the flag. + // + // TODO: It's a temporary change as aosp/2377951 is going to clean-up at a + // broad scale, including not allowing concurrent suspend-all. + // + // Compiler complains that the mutator is not held on all paths across this + // function, even though it's not required. Faking it to suppress the error. + auto fake_mutator_lock_acquire = []() ACQUIRE(*Locks::mutator_lock_) NO_THREAD_SAFETY_ANALYSIS {}; + auto fake_mutator_lock_release = []() RELEASE(*Locks::mutator_lock_) NO_THREAD_SAFETY_ANALYSIS {}; + if (!gUseUserfaultfd) { + Locks::mutator_lock_->ExclusiveUnlock(self); + fake_mutator_lock_acquire(); } - // Resume runnable threads. - size_t runnable_thread_count = 0; - std::vector<Thread*> other_threads; { TimingLogger::ScopedTiming split2("ResumeRunnableThreads", collector->GetTimings()); MutexLock mu(self, *Locks::thread_list_lock_); @@ -568,19 +583,36 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, Thread::resume_cond_->Broadcast(self); } + collector->RegisterPause(NanoTime() - suspend_start_time); + if (pause_listener != nullptr) { + pause_listener->EndPause(); + } collector->GetHeap()->ThreadFlipEnd(self); // Try to run the closure on the other threads. { TimingLogger::ScopedTiming split3("FlipOtherThreads", collector->GetTimings()); - ReaderMutexLock mu(self, *Locks::mutator_lock_); - for (Thread* thread : other_threads) { - thread->EnsureFlipFunctionStarted(self); - DCHECK(!thread->ReadFlag(ThreadFlag::kPendingFlipFunction)); + if (gUseUserfaultfd) { + Locks::mutator_lock_->AssertExclusiveHeld(self); + for (Thread* thread : other_threads) { + thread->EnsureFlipFunctionStarted(self); + DCHECK(!thread->ReadFlag(ThreadFlag::kPendingFlipFunction)); + } + // Try to run the flip function for self. + self->EnsureFlipFunctionStarted(self); + DCHECK(!self->ReadFlag(ThreadFlag::kPendingFlipFunction)); + Locks::mutator_lock_->ExclusiveUnlock(self); + } else { + fake_mutator_lock_release(); + ReaderMutexLock mu(self, *Locks::mutator_lock_); + for (Thread* thread : other_threads) { + thread->EnsureFlipFunctionStarted(self); + DCHECK(!thread->ReadFlag(ThreadFlag::kPendingFlipFunction)); + } + // Try to run the flip function for self. + self->EnsureFlipFunctionStarted(self); + DCHECK(!self->ReadFlag(ThreadFlag::kPendingFlipFunction)); } - // Try to run the flip function for self. - self->EnsureFlipFunctionStarted(self); - DCHECK(!self->ReadFlag(ThreadFlag::kPendingFlipFunction)); } // Resume other threads. diff --git a/runtime/thread_list.h b/runtime/thread_list.h index c1ffe9e59d..51fac4a6ed 100644 --- a/runtime/thread_list.h +++ b/runtime/thread_list.h @@ -127,7 +127,7 @@ class ThreadList { REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_); // Flip thread roots from from-space refs to to-space refs. Used by - // the concurrent copying collector. + // the concurrent moving collectors. size_t FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback, gc::collector::GarbageCollector* collector, diff --git a/test/odsign/Android.bp b/test/odsign/Android.bp index 511f5a1690..eb09587515 100644 --- a/test/odsign/Android.bp +++ b/test/odsign/Android.bp @@ -50,6 +50,9 @@ java_test_host { data: [ ":odsign_e2e_test_app", ], + java_resources: [ + ":art-gtest-jars-Main", + ], test_config: "odsign-e2e-tests-full.xml", test_suites: [ "general-tests", diff --git a/test/odsign/test-src/com/android/tests/odsign/DeviceState.java b/test/odsign/test-src/com/android/tests/odsign/DeviceState.java new file mode 100644 index 0000000000..92958311f2 --- /dev/null +++ b/test/odsign/test-src/com/android/tests/odsign/DeviceState.java @@ -0,0 +1,266 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.tests.odsign; + +import static com.google.common.truth.Truth.assertThat; + +import com.android.tradefed.invoker.TestInformation; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import java.io.File; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + +/** A helper class that can mutate the device state and restore it afterwards. */ +public class DeviceState { + private static final String TEST_JAR_RESOURCE_NAME = "/art-gtest-jars-Main.jar"; + private static final String PHENOTYPE_FLAG_NAMESPACE = "runtime_native_boot"; + private static final String ART_APEX_DALVIK_CACHE_BACKUP_DIRNAME = + OdsignTestUtils.ART_APEX_DALVIK_CACHE_DIRNAME + ".bak"; + + private final TestInformation mTestInfo; + private final OdsignTestUtils mTestUtils; + + private Set<String> mTempFiles = new HashSet<>(); + private Set<String> mMountPoints = new HashSet<>(); + private Map<String, String> mMutatedProperties = new HashMap<>(); + private Set<String> mMutatedPhenotypeFlags = new HashSet<>(); + private Map<String, String> mDeletedFiles = new HashMap<>(); + private boolean mHasArtifactsBackup = false; + + public DeviceState(TestInformation testInfo) throws Exception { + mTestInfo = testInfo; + mTestUtils = new OdsignTestUtils(testInfo); + } + + /** Restores the device state. */ + public void restore() throws Exception { + for (String mountPoint : mMountPoints) { + mTestInfo.getDevice().executeShellV2Command(String.format("umount '%s'", mountPoint)); + } + + for (String tempFile : mTempFiles) { + mTestInfo.getDevice().deleteFile(tempFile); + } + + for (var entry : mMutatedProperties.entrySet()) { + mTestInfo.getDevice().setProperty( + entry.getKey(), entry.getValue() != null ? entry.getValue() : ""); + } + + for (String flag : mMutatedPhenotypeFlags) { + mTestInfo.getDevice().executeShellV2Command(String.format( + "device_config delete '%s' '%s'", PHENOTYPE_FLAG_NAMESPACE, flag)); + } + + if (!mMutatedPhenotypeFlags.isEmpty()) { + mTestInfo.getDevice().executeShellV2Command( + "device_config set_sync_disabled_for_tests none"); + } + + for (var entry : mDeletedFiles.entrySet()) { + mTestInfo.getDevice().executeShellV2Command( + String.format("cp '%s' '%s'", entry.getValue(), entry.getKey())); + mTestInfo.getDevice().executeShellV2Command(String.format("rm '%s'", entry.getValue())); + mTestInfo.getDevice().executeShellV2Command( + String.format("restorecon '%s'", entry.getKey())); + } + + if (mHasArtifactsBackup) { + mTestInfo.getDevice().executeShellV2Command( + String.format("rm -rf '%s'", OdsignTestUtils.ART_APEX_DALVIK_CACHE_DIRNAME)); + mTestInfo.getDevice().executeShellV2Command( + String.format("mv '%s' '%s'", ART_APEX_DALVIK_CACHE_BACKUP_DIRNAME, + OdsignTestUtils.ART_APEX_DALVIK_CACHE_DIRNAME)); + } + } + + /** Simulates that the ART APEX has been upgraded. */ + public void simulateArtApexUpgrade() throws Exception { + updateApexInfo("com.android.art", false /* isFactory */); + } + + /** + * Simulates that the new ART APEX has been uninstalled (i.e., the ART module goes back to the + * factory version). + */ + public void simulateArtApexUninstall() throws Exception { + updateApexInfo("com.android.art", true /* isFactory */); + } + + /** + * Simulates that an APEX has been upgraded. We could install a real APEX, but that would + * introduce an extra dependency to this test, which we want to avoid. + */ + public void simulateApexUpgrade() throws Exception { + updateApexInfo("com.android.wifi", false /* isFactory */); + } + + /** + * Simulates that the new APEX has been uninstalled (i.e., the module goes back to the factory + * version). + */ + public void simulateApexUninstall() throws Exception { + updateApexInfo("com.android.wifi", true /* isFactory */); + } + + private void updateApexInfo(String moduleName, boolean isFactory) throws Exception { + try (var xmlMutator = new XmlMutator(OdsignTestUtils.APEX_INFO_FILE)) { + NodeList list = xmlMutator.getDocument().getElementsByTagName("apex-info"); + for (int i = 0; i < list.getLength(); i++) { + Element node = (Element) list.item(i); + if (node.getAttribute("moduleName").equals(moduleName) + && node.getAttribute("isActive").equals("true")) { + node.setAttribute("isFactory", String.valueOf(isFactory)); + node.setAttribute( + "lastUpdateMillis", String.valueOf(System.currentTimeMillis())); + } + } + } + } + + /** Simulates that there is an OTA that updates a boot classpath jar. */ + public void simulateBootClasspathOta() throws Exception { + File localFile = mTestUtils.copyResourceToFile(TEST_JAR_RESOURCE_NAME); + pushAndBindMount(localFile, "/system/framework/framework.jar"); + } + + /** Simulates that there is an OTA that updates a system server jar. */ + public void simulateSystemServerOta() throws Exception { + File localFile = mTestUtils.copyResourceToFile(TEST_JAR_RESOURCE_NAME); + pushAndBindMount(localFile, "/system/framework/services.jar"); + } + + public void makeDex2oatFail() throws Exception { + setProperty("dalvik.vm.boot-dex2oat-threads", "-1"); + } + + /** Sets a system property. */ + public void setProperty(String key, String value) throws Exception { + if (!mMutatedProperties.containsKey(key)) { + // Backup the original value. + mMutatedProperties.put(key, mTestInfo.getDevice().getProperty(key)); + } + + mTestInfo.getDevice().setProperty(key, value); + } + + /** Sets a phenotype flag. */ + public void setPhenotypeFlag(String key, String value) throws Exception { + if (!mMutatedPhenotypeFlags.contains(key)) { + // Tests assume that phenotype flags are initially not set. Check if the assumption is + // true. + assertThat(mTestUtils.assertCommandSucceeds(String.format( + "device_config get '%s' '%s'", PHENOTYPE_FLAG_NAMESPACE, key))) + .isEqualTo("null"); + mMutatedPhenotypeFlags.add(key); + } + + // Disable phenotype flag syncing. Potentially, we can set `set_sync_disabled_for_tests` to + // `until_reboot`, but setting it to `persistent` prevents unrelated system crashes/restarts + // from affecting the test. `set_sync_disabled_for_tests` is reset in `restore` anyway. + mTestUtils.assertCommandSucceeds("device_config set_sync_disabled_for_tests persistent"); + + if (value != null) { + mTestUtils.assertCommandSucceeds(String.format( + "device_config put '%s' '%s' '%s'", PHENOTYPE_FLAG_NAMESPACE, key, value)); + } else { + mTestUtils.assertCommandSucceeds( + String.format("device_config delete '%s' '%s'", PHENOTYPE_FLAG_NAMESPACE, key)); + } + } + + public void backupAndDeleteFile(String remotePath) throws Exception { + String tempFile = "/data/local/tmp/odsign_e2e_tests_" + UUID.randomUUID() + ".tmp"; + // Backup the file before deleting it. + mTestUtils.assertCommandSucceeds(String.format("cp '%s' '%s'", remotePath, tempFile)); + mTestUtils.assertCommandSucceeds(String.format("rm '%s'", remotePath)); + mDeletedFiles.put(remotePath, tempFile); + } + + public void backupArtifacts() throws Exception { + mTestInfo.getDevice().executeShellV2Command( + String.format("rm -rf '%s'", ART_APEX_DALVIK_CACHE_BACKUP_DIRNAME)); + mTestUtils.assertCommandSucceeds( + String.format("cp -r '%s' '%s'", OdsignTestUtils.ART_APEX_DALVIK_CACHE_DIRNAME, + ART_APEX_DALVIK_CACHE_BACKUP_DIRNAME)); + mHasArtifactsBackup = true; + } + + /** + * Pushes the file to a temporary location and bind-mount it at the given path. This is useful + * when the path is readonly. + */ + private void pushAndBindMount(File localFile, String remotePath) throws Exception { + String tempFile = "/data/local/tmp/odsign_e2e_tests_" + UUID.randomUUID() + ".tmp"; + assertThat(mTestInfo.getDevice().pushFile(localFile, tempFile)).isTrue(); + mTempFiles.add(tempFile); + + // If the path has already been bind-mounted by this method before, unmount it first. + if (mMountPoints.contains(remotePath)) { + mTestUtils.assertCommandSucceeds(String.format("umount '%s'", remotePath)); + mMountPoints.remove(remotePath); + } + + mTestUtils.assertCommandSucceeds( + String.format("mount --bind '%s' '%s'", tempFile, remotePath)); + mMountPoints.add(remotePath); + mTestUtils.assertCommandSucceeds(String.format("restorecon '%s'", remotePath)); + } + + /** A helper class for mutating an XML file. */ + private class XmlMutator implements AutoCloseable { + private final Document mDocument; + private final String mRemoteXmlFile; + private final File mLocalFile; + + public XmlMutator(String remoteXmlFile) throws Exception { + // Load the XML file. + mRemoteXmlFile = remoteXmlFile; + mLocalFile = mTestInfo.getDevice().pullFile(remoteXmlFile); + assertThat(mLocalFile).isNotNull(); + DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + mDocument = builder.parse(mLocalFile); + } + + @Override + public void close() throws Exception { + // Save the XML file. + Transformer transformer = TransformerFactory.newInstance().newTransformer(); + transformer.transform(new DOMSource(mDocument), new StreamResult(mLocalFile)); + pushAndBindMount(mLocalFile, mRemoteXmlFile); + } + + /** Returns a mutable XML document. */ + public Document getDocument() { + return mDocument; + } + } +} diff --git a/test/odsign/test-src/com/android/tests/odsign/OdrefreshFactoryHostTestBase.java b/test/odsign/test-src/com/android/tests/odsign/OdrefreshFactoryHostTestBase.java new file mode 100644 index 0000000000..16e26c5b7f --- /dev/null +++ b/test/odsign/test-src/com/android/tests/odsign/OdrefreshFactoryHostTestBase.java @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.tests.odsign; + +import static org.junit.Assume.assumeTrue; + +import com.android.tradefed.invoker.TestInformation; +import com.android.tradefed.testtype.junit4.AfterClassWithInfo; +import com.android.tradefed.testtype.junit4.BaseHostJUnit4Test; +import com.android.tradefed.testtype.junit4.BeforeClassWithInfo; + +import org.junit.After; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; + +import java.util.HashSet; +import java.util.Set; + +/** + * This class tests odrefresh for the cases where all the APEXes are initially factory-installed. + * Similar to OdrefreshHostTest, it does not involve odsign, fs-verity, and ART runtime. + * + * The tests are run by derived classes with different conditions: with and without the cache info. + */ +@Ignore("See derived classes") +abstract public class OdrefreshFactoryHostTestBase extends BaseHostJUnit4Test { + protected OdsignTestUtils mTestUtils; + protected DeviceState mDeviceState; + + @BeforeClassWithInfo + public static void beforeClassWithDeviceBase(TestInformation testInfo) throws Exception { + OdsignTestUtils testUtils = new OdsignTestUtils(testInfo); + assumeTrue(testUtils.areAllApexesFactoryInstalled()); + testUtils.assertCommandSucceeds("disable-verity"); + testUtils.removeCompilationLogToAvoidBackoff(); + testUtils.reboot(); + testUtils.assertCommandSucceeds("remount"); + } + + @AfterClassWithInfo + public static void afterClassWithDeviceBase(TestInformation testInfo) throws Exception { + OdsignTestUtils testUtils = new OdsignTestUtils(testInfo); + testUtils.assertCommandSucceeds("enable-verity"); + testUtils.removeCompilationLogToAvoidBackoff(); + testUtils.reboot(); + } + + @Before + public void setUpBase() throws Exception { + mTestUtils = new OdsignTestUtils(getTestInformation()); + mDeviceState = new DeviceState(getTestInformation()); + mDeviceState.backupArtifacts(); + } + + @After + public void tearDownBase() throws Exception { + mDeviceState.restore(); + } + + @Test + public void verifyArtSamegradeUpdateTriggersCompilation() throws Exception { + mDeviceState.simulateArtApexUpgrade(); + long timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // It should recompile everything. + mTestUtils.assertModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); + + mDeviceState.simulateArtApexUninstall(); + mTestUtils.runOdrefresh(); + + // It should delete all compilation artifacts and update the cache info. + mTestUtils.assertModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertFilesNotExist(mTestUtils.getZygotesExpectedArtifacts()); + mTestUtils.assertFilesNotExist(mTestUtils.getSystemServerExpectedArtifacts()); + } + + @Test + public void verifyOtherApexSamegradeUpdateTriggersCompilation() throws Exception { + mDeviceState.simulateApexUpgrade(); + long timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // It should only recompile system server. + mTestUtils.assertModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertFilesNotExist(mTestUtils.getZygotesExpectedArtifacts()); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); + + mDeviceState.simulateApexUninstall(); + mTestUtils.runOdrefresh(); + + // It should delete all compilation artifacts and update the cache info. + mTestUtils.assertModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertFilesNotExist(mTestUtils.getZygotesExpectedArtifacts()); + mTestUtils.assertFilesNotExist(mTestUtils.getSystemServerExpectedArtifacts()); + } + + @Test + public void verifyMissingArtifactTriggersCompilation() throws Exception { + // Simulate that an artifact is missing from /system. + mDeviceState.backupAndDeleteFile( + "/system/framework/oat/" + mTestUtils.getSystemServerIsa() + "/services.odex"); + + mTestUtils.removeCompilationLogToAvoidBackoff(); + long timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + Set<String> expectedArtifacts = OdsignTestUtils.getApexDataDalvikCacheFilenames( + "/system/framework/services.jar", mTestUtils.getSystemServerIsa()); + + Set<String> nonExpectedArtifacts = new HashSet<>(); + nonExpectedArtifacts.addAll(mTestUtils.getZygotesExpectedArtifacts()); + nonExpectedArtifacts.addAll(mTestUtils.getSystemServerExpectedArtifacts()); + nonExpectedArtifacts.removeAll(expectedArtifacts); + + // It should only generate artifacts that are missing from /system. + mTestUtils.assertModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertFilesNotExist(nonExpectedArtifacts); + mTestUtils.assertModifiedAfter(expectedArtifacts, timeMs); + + mDeviceState.simulateArtApexUpgrade(); + timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // It should recompile everything. + mTestUtils.assertModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); + + mDeviceState.simulateArtApexUninstall(); + timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // It should only re-generate artifacts that are missing from /system. + mTestUtils.assertModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertFilesNotExist(nonExpectedArtifacts); + mTestUtils.assertModifiedAfter(expectedArtifacts, timeMs); + } + + @Test + public void verifyEnableUffdGcChangeTriggersCompilation() throws Exception { + mDeviceState.setPhenotypeFlag("enable_uffd_gc", "true"); + + long timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // It should recompile everything. + mTestUtils.assertModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); + + // Run odrefresh again with the flag unchanged. + timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // Nothing should change. + mTestUtils.assertNotModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); + + mDeviceState.setPhenotypeFlag("enable_uffd_gc", null); + + mTestUtils.runOdrefresh(); + + // It should delete all compilation artifacts and update the cache info. + mTestUtils.assertModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertFilesNotExist(mTestUtils.getZygotesExpectedArtifacts()); + mTestUtils.assertFilesNotExist(mTestUtils.getSystemServerExpectedArtifacts()); + } +} diff --git a/test/odsign/test-src/com/android/tests/odsign/OdrefreshFactoryWithCacheInfoHostTest.java b/test/odsign/test-src/com/android/tests/odsign/OdrefreshFactoryWithCacheInfoHostTest.java new file mode 100644 index 0000000000..d270fab82a --- /dev/null +++ b/test/odsign/test-src/com/android/tests/odsign/OdrefreshFactoryWithCacheInfoHostTest.java @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.tests.odsign; + +import com.android.tradefed.testtype.DeviceJUnit4ClassRunner; + +import org.junit.Test; +import org.junit.runner.RunWith; + +import java.util.Set; + +/** + * This class tests odrefresh for the cases where all the APEXes are initially factory-installed + * and the cache info exists, which is the normal case. + * + * Both the tests in the base class and the tests in this class are run with the setup of this + * class. + */ +@RunWith(DeviceJUnit4ClassRunner.class) +public class OdrefreshFactoryWithCacheInfoHostTest extends OdrefreshFactoryHostTestBase { + @Test + public void verifyNoCompilationWhenSystemIsGood() throws Exception { + // Only the cache info should exist. + mTestUtils.assertFilesExist(Set.of(OdsignTestUtils.CACHE_INFO_FILE)); + mTestUtils.assertFilesNotExist(mTestUtils.getZygotesExpectedArtifacts()); + mTestUtils.assertFilesNotExist(mTestUtils.getSystemServerExpectedArtifacts()); + + // Run again. + long timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // Nothing should change. + mTestUtils.assertNotModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertFilesNotExist(mTestUtils.getZygotesExpectedArtifacts()); + mTestUtils.assertFilesNotExist(mTestUtils.getSystemServerExpectedArtifacts()); + } +} diff --git a/test/odsign/test-src/com/android/tests/odsign/OdrefreshFactoryWithoutCacheInfoHostTest.java b/test/odsign/test-src/com/android/tests/odsign/OdrefreshFactoryWithoutCacheInfoHostTest.java new file mode 100644 index 0000000000..bec0dd8476 --- /dev/null +++ b/test/odsign/test-src/com/android/tests/odsign/OdrefreshFactoryWithoutCacheInfoHostTest.java @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.tests.odsign; + +import com.android.tradefed.testtype.DeviceJUnit4ClassRunner; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; + +import java.util.Set; + +/** + * This class tests odrefresh for the cases where all the APEXes are initially factory-installed + * and the cache info does not exist. + * + * The cache info can be missing due to various reasons (corrupted files deleted by odsign, odsign + * failure, etc.), so this test makes sure that odrefresh doesn't rely on the cache info when + * checking artifacts on /system. + * + * Both the tests in the base class and the tests in this class are run with the setup of this + * class. + */ +@RunWith(DeviceJUnit4ClassRunner.class) +public class OdrefreshFactoryWithoutCacheInfoHostTest extends OdrefreshFactoryHostTestBase { + @Before + public void setUp() throws Exception { + getDevice().deleteFile(OdsignTestUtils.CACHE_INFO_FILE); + } + + @Test + public void verifyNoCompilationWhenSystemIsGood() throws Exception { + long timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // It should only generate the missing cache info. + mTestUtils.assertModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertFilesNotExist(mTestUtils.getZygotesExpectedArtifacts()); + mTestUtils.assertFilesNotExist(mTestUtils.getSystemServerExpectedArtifacts()); + } +} diff --git a/test/odsign/test-src/com/android/tests/odsign/OdrefreshHostTest.java b/test/odsign/test-src/com/android/tests/odsign/OdrefreshHostTest.java index 731ea38999..993d7f0aef 100644 --- a/test/odsign/test-src/com/android/tests/odsign/OdrefreshHostTest.java +++ b/test/odsign/test-src/com/android/tests/odsign/OdrefreshHostTest.java @@ -17,9 +17,7 @@ package com.android.tests.odsign; import static com.google.common.truth.Truth.assertThat; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static com.google.common.truth.Truth.assertWithMessage; import com.android.tradefed.invoker.TestInformation; import com.android.tradefed.testtype.DeviceJUnit4ClassRunner; @@ -27,15 +25,13 @@ import com.android.tradefed.testtype.junit4.AfterClassWithInfo; import com.android.tradefed.testtype.junit4.BaseHostJUnit4Test; import com.android.tradefed.testtype.junit4.BeforeClassWithInfo; +import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; -import java.util.Arrays; import java.util.HashSet; import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * Test to check end-to-end odrefresh invocations, but without odsign, fs-verity, and ART runtime @@ -43,36 +39,14 @@ import java.util.regex.Pattern; */ @RunWith(DeviceJUnit4ClassRunner.class) public class OdrefreshHostTest extends BaseHostJUnit4Test { - private static final String CACHE_INFO_FILE = - OdsignTestUtils.ART_APEX_DALVIK_CACHE_DIRNAME + "/cache-info.xml"; - private static final String ODREFRESH_BIN = "odrefresh"; - private static final String ODREFRESH_COMMAND = - ODREFRESH_BIN + " --partial-compilation --no-refresh --compile"; - private static final String ODREFRESH_MINIMAL_COMMAND = - ODREFRESH_BIN + " --partial-compilation --no-refresh --minimal --compile"; - - private static final String TAG = "OdrefreshHostTest"; - private static final String ZYGOTE_ARTIFACTS_KEY = TAG + ":ZYGOTE_ARTIFACTS"; - private static final String SYSTEM_SERVER_ARTIFACTS_KEY = TAG + ":SYSTEM_SERVER_ARTIFACTS"; - private OdsignTestUtils mTestUtils; + private DeviceState mDeviceState; @BeforeClassWithInfo public static void beforeClassWithDevice(TestInformation testInfo) throws Exception { OdsignTestUtils testUtils = new OdsignTestUtils(testInfo); testUtils.installTestApex(); testUtils.reboot(); - - HashSet<String> zygoteArtifacts = new HashSet<>(); - for (String zygoteName : testUtils.ZYGOTE_NAMES) { - zygoteArtifacts.addAll( - testUtils.getZygoteLoadedArtifacts(zygoteName).orElse(new HashSet<>())); - } - Set<String> systemServerArtifacts = testUtils.getSystemServerLoadedArtifacts(); - - testInfo.properties().put(ZYGOTE_ARTIFACTS_KEY, String.join(":", zygoteArtifacts)); - testInfo.properties() - .put(SYSTEM_SERVER_ARTIFACTS_KEY, String.join(":", systemServerArtifacts)); } @AfterClassWithInfo @@ -85,197 +59,224 @@ public class OdrefreshHostTest extends BaseHostJUnit4Test { @Before public void setUp() throws Exception { mTestUtils = new OdsignTestUtils(getTestInformation()); + mDeviceState = new DeviceState(getTestInformation()); + mDeviceState.backupArtifacts(); + } + + @After + public void tearDown() throws Exception { + mDeviceState.restore(); } @Test public void verifyArtSamegradeUpdateTriggersCompilation() throws Exception { - simulateArtApexUpgrade(); + mDeviceState.simulateArtApexUpgrade(); long timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); - assertArtifactsModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); } @Test public void verifyOtherApexSamegradeUpdateTriggersCompilation() throws Exception { - simulateApexUpgrade(); + mDeviceState.simulateApexUpgrade(); long timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); - assertArtifactsNotModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); } @Test public void verifyBootClasspathOtaTriggersCompilation() throws Exception { - simulateBootClasspathOta(); + mDeviceState.simulateBootClasspathOta(); long timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); - assertArtifactsModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); } @Test public void verifySystemServerOtaTriggersCompilation() throws Exception { - simulateSystemServerOta(); + mDeviceState.simulateSystemServerOta(); long timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); - assertArtifactsNotModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); } @Test public void verifyMissingArtifactTriggersCompilation() throws Exception { Set<String> missingArtifacts = simulateMissingArtifacts(); Set<String> remainingArtifacts = new HashSet<>(); - remainingArtifacts.addAll(getZygoteArtifacts()); - remainingArtifacts.addAll(getSystemServerArtifacts()); + remainingArtifacts.addAll(mTestUtils.getZygotesExpectedArtifacts()); + remainingArtifacts.addAll(mTestUtils.getSystemServerExpectedArtifacts()); remainingArtifacts.removeAll(missingArtifacts); mTestUtils.removeCompilationLogToAvoidBackoff(); long timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); - assertArtifactsNotModifiedAfter(remainingArtifacts, timeMs); - assertArtifactsModifiedAfter(missingArtifacts, timeMs); + mTestUtils.assertNotModifiedAfter(remainingArtifacts, timeMs); + mTestUtils.assertModifiedAfter(missingArtifacts, timeMs); } @Test public void verifyEnableUffdGcChangeTriggersCompilation() throws Exception { - try { - // Disable phenotype flag syncing. Potentially, we can set - // `set_sync_disabled_for_tests` to `until_reboot`, but setting it to - // `persistent` prevents unrelated system crashes/restarts from affecting the - // test. `set_sync_disabled_for_tests` is reset in the `finally` block anyway. - getDevice().executeShellV2Command( - "device_config set_sync_disabled_for_tests persistent"); - - // Simulate that the phenotype flag is set to the default value. - getDevice().executeShellV2Command( - "device_config put runtime_native_boot enable_uffd_gc false"); - - long timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); - - // Artifacts should not be re-compiled. - assertArtifactsNotModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsNotModifiedAfter(getSystemServerArtifacts(), timeMs); - - // Simulate that the phenotype flag is set to true. - getDevice().executeShellV2Command( - "device_config put runtime_native_boot enable_uffd_gc true"); - - timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); - - // Artifacts should be re-compiled. - assertArtifactsModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsModifiedAfter(getSystemServerArtifacts(), timeMs); - - // Run odrefresh again with the flag unchanged. - timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); - - // Artifacts should not be re-compiled. - assertArtifactsNotModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsNotModifiedAfter(getSystemServerArtifacts(), timeMs); - - // Simulate that the phenotype flag is set to false. - getDevice().executeShellV2Command( - "device_config put runtime_native_boot enable_uffd_gc false"); - - timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); - - // Artifacts should be re-compiled. - assertArtifactsModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsModifiedAfter(getSystemServerArtifacts(), timeMs); - } finally { - getDevice().executeShellV2Command("device_config set_sync_disabled_for_tests none"); - getDevice().executeShellV2Command( - "device_config delete runtime_native_boot enable_uffd_gc"); - } + mDeviceState.setPhenotypeFlag("enable_uffd_gc", "false"); + + long timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // Artifacts should be re-compiled. + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); + + mDeviceState.setPhenotypeFlag("enable_uffd_gc", "true"); + + timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // Artifacts should be re-compiled. + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); + + // Run odrefresh again with the flag unchanged. + timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // Artifacts should not be re-compiled. + mTestUtils.assertNotModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); + + mDeviceState.setPhenotypeFlag("enable_uffd_gc", null); + + timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // Artifacts should be re-compiled. + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); + } + + @Test + public void verifySystemServerCompilerFilterOverrideChangeTriggersCompilation() + throws Exception { + mDeviceState.setPhenotypeFlag("systemservercompilerfilter_override", null); + + long timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // Artifacts should not be re-compiled. + mTestUtils.assertNotModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); + + mDeviceState.setPhenotypeFlag("systemservercompilerfilter_override", "speed"); + + timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // Artifacts should be re-compiled. + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); + + // Run odrefresh again with the flag unchanged. + timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // Artifacts should not be re-compiled. + mTestUtils.assertNotModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); + + mDeviceState.setPhenotypeFlag("systemservercompilerfilter_override", "verify"); + + timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // Artifacts should be re-compiled. + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); } @Test public void verifySystemPropertyMismatchTriggersCompilation() throws Exception { // Change a system property from empty to a value. - getDevice().setProperty("dalvik.vm.foo", "1"); + mDeviceState.setProperty("dalvik.vm.foo", "1"); long timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); // Artifacts should be re-compiled. - assertArtifactsModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); // Run again with the same value. timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); // Artifacts should not be re-compiled. - assertArtifactsNotModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsNotModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); // Change the system property to another value. - getDevice().setProperty("dalvik.vm.foo", "2"); + mDeviceState.setProperty("dalvik.vm.foo", "2"); timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); // Artifacts should be re-compiled. - assertArtifactsModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); // Run again with the same value. timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); // Artifacts should not be re-compiled. - assertArtifactsNotModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsNotModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); // Change the system property to empty. - getDevice().setProperty("dalvik.vm.foo", ""); + mDeviceState.setProperty("dalvik.vm.foo", ""); timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); // Artifacts should be re-compiled. - assertArtifactsModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); // Run again with the same value. timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); // Artifacts should not be re-compiled. - assertArtifactsNotModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsNotModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); } @Test public void verifyNoCompilationWhenCacheIsGood() throws Exception { mTestUtils.removeCompilationLogToAvoidBackoff(); long timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); - assertArtifactsNotModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsNotModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); } @Test public void verifyUnexpectedFilesAreCleanedUp() throws Exception { String unexpected = OdsignTestUtils.ART_APEX_DALVIK_CACHE_DIRNAME + "/unexpected"; - getDevice().pushString(/*contents=*/"", unexpected); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + getDevice().pushString("" /* contents */, unexpected); + mTestUtils.runOdrefresh(); - assertFalse(getDevice().doesFileExist(unexpected)); + assertThat(getDevice().doesFileExist(unexpected)).isFalse(); } @Test public void verifyCacheInfoOmitsIrrelevantApexes() throws Exception { - String cacheInfo = getDevice().pullFileContents(CACHE_INFO_FILE); + String cacheInfo = getDevice().pullFileContents(OdsignTestUtils.CACHE_INFO_FILE); // cacheInfo should list all APEXes that have compilable JARs and // none that do not. @@ -290,16 +291,14 @@ public class OdrefreshHostTest extends BaseHostJUnit4Test { @Test public void verifyCompilationOsMode() throws Exception { mTestUtils.removeCompilationLogToAvoidBackoff(); - simulateApexUpgrade(); + mDeviceState.simulateApexUpgrade(); long timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command( - ODREFRESH_BIN + " --no-refresh --partial-compilation" - + " --compilation-os-mode --compile"); + mTestUtils.runOdrefresh("--compilation-os-mode"); - assertArtifactsNotModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); - String cacheInfo = getDevice().pullFileContents(CACHE_INFO_FILE); + String cacheInfo = getDevice().pullFileContents(OdsignTestUtils.CACHE_INFO_FILE); assertThat(cacheInfo).contains("compilationOsMode=\"true\""); // Compilation OS does not write the compilation log to the host. @@ -307,185 +306,89 @@ public class OdrefreshHostTest extends BaseHostJUnit4Test { // Simulate the odrefresh invocation on the next boot. timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); // odrefresh should not re-compile anything. - assertArtifactsNotModifiedAfter(getZygoteArtifacts(), timeMs); - assertArtifactsNotModifiedAfter(getSystemServerArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); + mTestUtils.assertNotModifiedAfter(mTestUtils.getSystemServerExpectedArtifacts(), timeMs); } @Test public void verifyMinimalCompilation() throws Exception { mTestUtils.removeCompilationLogToAvoidBackoff(); getDevice().executeShellV2Command( - "rm -rf " + OdsignTestUtils.ART_APEX_DALVIK_CACHE_DIRNAME); - getDevice().executeShellV2Command(ODREFRESH_MINIMAL_COMMAND); + "rm -rf " + OdsignTestUtils.ART_APEX_DALVIK_CACHE_DIRNAME); + mTestUtils.runOdrefresh("--minimal"); mTestUtils.restartZygote(); // The minimal boot image should be loaded. - Set<String> minimalZygoteArtifacts = - mTestUtils.verifyZygotesLoadedArtifacts("boot_minimal"); + mTestUtils.verifyZygotesLoadedArtifacts("boot_minimal"); // Running the command again should not overwrite the minimal boot image. mTestUtils.removeCompilationLogToAvoidBackoff(); long timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_MINIMAL_COMMAND); - - assertArtifactsNotModifiedAfter(minimalZygoteArtifacts, timeMs); - - // `odrefresh --check` should keep the minimal boot image. - mTestUtils.removeCompilationLogToAvoidBackoff(); - timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_BIN + " --check"); + mTestUtils.runOdrefresh("--minimal"); - assertArtifactsNotModifiedAfter(minimalZygoteArtifacts, timeMs); + Set<String> minimalZygoteArtifacts = mTestUtils.getZygotesExpectedArtifacts("boot_minimal"); + mTestUtils.assertNotModifiedAfter(minimalZygoteArtifacts, timeMs); // A normal odrefresh invocation should replace the minimal boot image with a full one. mTestUtils.removeCompilationLogToAvoidBackoff(); timeMs = mTestUtils.getCurrentTimeMs(); - getDevice().executeShellV2Command(ODREFRESH_COMMAND); + mTestUtils.runOdrefresh(); for (String artifact : minimalZygoteArtifacts) { - assertFalse( + assertWithMessage( String.format( - "Artifact %s should be cleaned up while it still exists", artifact), - getDevice().doesFileExist(artifact)); + "Artifact %s should be cleaned up while it still exists", artifact)) + .that(getDevice().doesFileExist(artifact)) + .isFalse(); } - assertArtifactsModifiedAfter(getZygoteArtifacts(), timeMs); + mTestUtils.assertModifiedAfter(mTestUtils.getZygotesExpectedArtifacts(), timeMs); } - /** - * Checks the input line by line and replaces all lines that match the regex with the given - * replacement. - */ - private String replaceLine(String input, String regex, String replacement) { - StringBuffer output = new StringBuffer(); - Pattern p = Pattern.compile(regex); - for (String line : input.split("\n")) { - Matcher m = p.matcher(line); - if (m.matches()) { - m.appendReplacement(output, replacement); - output.append("\n"); - } else { - output.append(line + "\n"); - } - } - return output.toString(); - } + @Test + public void verifyCompilationFailureBackoff() throws Exception { + mDeviceState.makeDex2oatFail(); + mDeviceState.simulateArtApexUpgrade(); - /** - * Simulates that there is an OTA that updates a boot classpath jar. - */ - private void simulateBootClasspathOta() throws Exception { - String cacheInfo = getDevice().pullFileContents(CACHE_INFO_FILE); - // Replace the cached checksum of /system/framework/framework.jar with "aaaaaaaa". - cacheInfo = replaceLine( - cacheInfo, - "(.*/system/framework/framework\\.jar.*checksums=\").*?(\".*)", - "$1aaaaaaaa$2"); - getDevice().pushString(cacheInfo, CACHE_INFO_FILE); - } + // Run odrefresh. It should encounter dex2oat failures. + long timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); - /** - * Simulates that there is an OTA that updates a system server jar. - */ - private void simulateSystemServerOta() throws Exception { - String cacheInfo = getDevice().pullFileContents(CACHE_INFO_FILE); - // Replace the cached checksum of /system/framework/services.jar with "aaaaaaaa". - cacheInfo = replaceLine( - cacheInfo, - "(.*/system/framework/services\\.jar.*checksums=\").*?(\".*)", - "$1aaaaaaaa$2"); - getDevice().pushString(cacheInfo, CACHE_INFO_FILE); - } + // Artifacts don't exist because the compilation failed. + mTestUtils.assertModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + mTestUtils.assertFilesNotExist(mTestUtils.getZygotesExpectedArtifacts()); + mTestUtils.assertFilesNotExist(mTestUtils.getSystemServerExpectedArtifacts()); - /** - * Simulates that an ART APEX has been upgraded. - */ - private void simulateArtApexUpgrade() throws Exception { - String apexInfo = getDevice().pullFileContents(CACHE_INFO_FILE); - // Replace the lastUpdateMillis of com.android.art with "1". - apexInfo = replaceLine( - apexInfo, - "(.*com\\.android\\.art.*lastUpdateMillis=\").*?(\".*)", - "$11$2"); - getDevice().pushString(apexInfo, CACHE_INFO_FILE); - } + // Run odrefresh again. + timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); + + // It should not retry. + mTestUtils.assertNotModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); + + // Simulate that the backoff time has passed. + mTestUtils.removeCompilationLogToAvoidBackoff(); + + // Run odrefresh again. + timeMs = mTestUtils.getCurrentTimeMs(); + mTestUtils.runOdrefresh(); - /** - * Simulates that an APEX has been upgraded. We could install a real APEX, but that would - * introduce an extra dependency to this test, which we want to avoid. - */ - private void simulateApexUpgrade() throws Exception { - String apexInfo = getDevice().pullFileContents(CACHE_INFO_FILE); - // Replace the lastUpdateMillis of com.android.wifi with "1". - apexInfo = replaceLine( - apexInfo, - "(.*com\\.android\\.wifi.*lastUpdateMillis=\").*?(\".*)", - "$11$2"); - getDevice().pushString(apexInfo, CACHE_INFO_FILE); + // Now it should retry. + mTestUtils.assertModifiedAfter(Set.of(OdsignTestUtils.CACHE_INFO_FILE), timeMs); } private Set<String> simulateMissingArtifacts() throws Exception { Set<String> missingArtifacts = new HashSet<>(); - String sample = getSystemServerArtifacts().iterator().next(); + String sample = mTestUtils.getSystemServerExpectedArtifacts().iterator().next(); for (String extension : OdsignTestUtils.APP_ARTIFACT_EXTENSIONS) { - String artifact = replaceExtension(sample, extension); + String artifact = OdsignTestUtils.replaceExtension(sample, extension); getDevice().deleteFile(artifact); missingArtifacts.add(artifact); } return missingArtifacts; } - - private void assertArtifactsModifiedAfter(Set<String> artifacts, long timeMs) throws Exception { - for (String artifact : artifacts) { - long modifiedTime = mTestUtils.getModifiedTimeMs(artifact); - assertTrue( - String.format( - "Artifact %s is not re-compiled. Modified time: %d, Reference time: %d", - artifact, - modifiedTime, - timeMs), - modifiedTime > timeMs); - } - } - - private void assertArtifactsNotModifiedAfter(Set<String> artifacts, long timeMs) - throws Exception { - for (String artifact : artifacts) { - long modifiedTime = mTestUtils.getModifiedTimeMs(artifact); - assertTrue( - String.format( - "Artifact %s is unexpectedly re-compiled. " + - "Modified time: %d, Reference time: %d", - artifact, - modifiedTime, - timeMs), - modifiedTime < timeMs); - } - } - - private String replaceExtension(String filename, String extension) throws Exception { - int index = filename.lastIndexOf("."); - assertTrue("Extension not found in filename: " + filename, index != -1); - return filename.substring(0, index) + extension; - } - - private Set<String> getColonSeparatedSet(String key) { - String value = getTestInformation().properties().get(key); - if (value == null || value.isEmpty()) { - return new HashSet<>(); - } - return new HashSet<>(Arrays.asList(value.split(":"))); - } - - private Set<String> getZygoteArtifacts() { - return getColonSeparatedSet(ZYGOTE_ARTIFACTS_KEY); - } - - private Set<String> getSystemServerArtifacts() { - return getColonSeparatedSet(SYSTEM_SERVER_ARTIFACTS_KEY); - } } diff --git a/test/odsign/test-src/com/android/tests/odsign/OdsignTestUtils.java b/test/odsign/test-src/com/android/tests/odsign/OdsignTestUtils.java index caf94a783b..c8d2516ca5 100644 --- a/test/odsign/test-src/com/android/tests/odsign/OdsignTestUtils.java +++ b/test/odsign/test-src/com/android/tests/odsign/OdsignTestUtils.java @@ -18,10 +18,9 @@ package com.android.tests.odsign; import static com.android.tradefed.testtype.DeviceJUnit4ClassRunner.TestLogData; +import static com.google.common.truth.Truth.assertThat; import static com.google.common.truth.Truth.assertWithMessage; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assume.assumeTrue; @@ -29,32 +28,49 @@ import android.cts.install.lib.host.InstallUtilsHost; import com.android.tradefed.device.DeviceNotAvailableException; import com.android.tradefed.device.ITestDevice; -import com.android.tradefed.device.ITestDevice.ApexInfo; import com.android.tradefed.device.TestDeviceOptions; import com.android.tradefed.invoker.TestInformation; import com.android.tradefed.result.FileInputStreamSource; import com.android.tradefed.result.LogDataType; import com.android.tradefed.util.CommandResult; +import com.google.common.io.ByteStreams; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; import java.time.Duration; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Optional; +import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.stream.Collectors; import java.util.stream.Stream; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; public class OdsignTestUtils { public static final String ART_APEX_DALVIK_CACHE_DIRNAME = "/data/misc/apexdata/com.android.art/dalvik-cache"; + public static final String CACHE_INFO_FILE = ART_APEX_DALVIK_CACHE_DIRNAME + "/cache-info.xml"; + public static final String APEX_INFO_FILE = "/apex/apex-info-list.xml"; - public static final List<String> ZYGOTE_NAMES = List.of("zygote", "zygote64"); + private static final String ODREFRESH_BIN = "odrefresh"; + + public static final String ZYGOTE_32_NAME = "zygote"; + public static final String ZYGOTE_64_NAME = "zygote64"; public static final List<String> APP_ARTIFACT_EXTENSIONS = List.of(".art", ".odex", ".vdex"); public static final List<String> BCP_ARTIFACT_EXTENSIONS = List.of(".art", ".oat", ".vdex"); @@ -68,11 +84,17 @@ public class OdsignTestUtils { private static final String TAG = "OdsignTestUtils"; private static final String PACKAGE_NAME_KEY = TAG + ":PACKAGE_NAME"; + // Keep in sync with `ABI_TO_INSTRUCTION_SET_MAP` in + // libcore/libart/src/main/java/dalvik/system/VMRuntime.java. + private static final Map<String, String> ABI_TO_INSTRUCTION_SET_MAP = + Map.of("armeabi", "arm", "armeabi-v7a", "arm", "x86", "x86", "x86_64", "x86_64", + "arm64-v8a", "arm64", "arm64-v8a-hwasan", "arm64", "riscv64", "riscv64"); + private final InstallUtilsHost mInstallUtils; private final TestInformation mTestInfo; public OdsignTestUtils(TestInformation testInfo) throws Exception { - assertNotNull(testInfo.getDevice()); + assertThat(testInfo.getDevice()).isNotNull(); mInstallUtils = new InstallUtilsHost(testInfo); mTestInfo = testInfo; } @@ -86,17 +108,13 @@ public class OdsignTestUtils { String packagesOutput = mTestInfo.getDevice().executeShellCommand("pm list packages -f --apex-only"); Pattern p = Pattern.compile( - "^package:(.*)=(com(?:\\.google)?\\.android(?:\\.go)?\\.art)$", - Pattern.MULTILINE); + "^package:(.*)=(com(?:\\.google)?\\.android(?:\\.go)?\\.art)$", Pattern.MULTILINE); Matcher m = p.matcher(packagesOutput); assertTrue("ART module not found. Packages are:\n" + packagesOutput, m.find()); String artApexPath = m.group(1); String artApexName = m.group(2); - CommandResult result = mTestInfo.getDevice().executeShellV2Command( - "pm install --apex " + artApexPath); - assertWithMessage("Failed to install APEX. Reason: " + result.toString()) - .that(result.getExitCode()).isEqualTo(0); + assertCommandSucceeds("pm install --apex " + artApexPath); mTestInfo.properties().put(PACKAGE_NAME_KEY, artApexName); @@ -112,14 +130,14 @@ public class OdsignTestUtils { } public Set<String> getMappedArtifacts(String pid, String grepPattern) throws Exception { - final String grepCommand = String.format("grep \"%s\" /proc/%s/maps", grepPattern, pid); - CommandResult result = mTestInfo.getDevice().executeShellV2Command(grepCommand); - assertTrue(result.toString(), result.getExitCode() == 0); + String grepCommand = String.format("grep \"%s\" /proc/%s/maps", grepPattern, pid); Set<String> mappedFiles = new HashSet<>(); - for (String line : result.getStdout().split("\\R")) { + for (String line : assertCommandSucceeds(grepCommand).split("\\R")) { int start = line.indexOf(ART_APEX_DALVIK_CACHE_DIRNAME); - if (line.contains("[")) { - continue; // ignore anonymously mapped sections which are quoted in square braces. + if (line.contains("[") || line.contains("(deleted)")) { + // Ignore anonymously mapped sections, which are quoted in square braces, and + // deleted mapped files. + continue; } mappedFiles.add(line.substring(start)); } @@ -127,110 +145,88 @@ public class OdsignTestUtils { } /** - * Returns the mapped artifacts of the Zygote process, or {@code Optional.empty()} if the - * process does not exist. + * Returns the mapped artifacts of the Zygote process. */ - public Optional<Set<String>> getZygoteLoadedArtifacts(String zygoteName) throws Exception { - final CommandResult result = - mTestInfo.getDevice().executeShellV2Command("pidof " + zygoteName); - if (result.getExitCode() != 0) { - return Optional.empty(); - } + public Set<String> getZygoteLoadedArtifacts(String zygoteName) throws Exception { // There may be multiple Zygote processes when Zygote just forks and has not executed any // app binary. We can take any of the pids. // We can't use the "-s" flag when calling `pidof` because the Toybox's `pidof` // implementation is wrong and it outputs multiple pids regardless of the "-s" flag, so we // split the output and take the first pid ourselves. - final String zygotePid = result.getStdout().trim().split("\\s+")[0]; + String zygotePid = assertCommandSucceeds("pidof " + zygoteName).split("\\s+")[0]; assertTrue(!zygotePid.isEmpty()); - final String grepPattern = ART_APEX_DALVIK_CACHE_DIRNAME + ".*boot"; - return Optional.of(getMappedArtifacts(zygotePid, grepPattern)); + String grepPattern = ART_APEX_DALVIK_CACHE_DIRNAME + "/.*/boot"; + return getMappedArtifacts(zygotePid, grepPattern); } public Set<String> getSystemServerLoadedArtifacts() throws Exception { - final CommandResult result = - mTestInfo.getDevice().executeShellV2Command("pidof system_server"); - assertTrue(result.toString(), result.getExitCode() == 0); - final String systemServerPid = result.getStdout().trim(); + String systemServerPid = assertCommandSucceeds("pidof system_server"); assertTrue(!systemServerPid.isEmpty()); - assertTrue( - "There should be exactly one `system_server` process", + assertTrue("There should be exactly one `system_server` process", systemServerPid.matches("\\d+")); // system_server artifacts are in the APEX data dalvik cache and names all contain // the word "@classes". Look for mapped files that match this pattern in the proc map for // system_server. - final String grepPattern = ART_APEX_DALVIK_CACHE_DIRNAME + ".*@classes"; + String grepPattern = ART_APEX_DALVIK_CACHE_DIRNAME + "/.*@classes"; return getMappedArtifacts(systemServerPid, grepPattern); } - public void verifyZygoteLoadedArtifacts(String zygoteName, Set<String> mappedArtifacts, - String bootImageStem) throws Exception { - assertTrue("Expect 3 bootclasspath artifacts", mappedArtifacts.size() == 3); - - String allArtifacts = mappedArtifacts.stream().collect(Collectors.joining(",")); + public Set<String> getZygoteExpectedArtifacts(String bootImageStem, String isa) + throws Exception { + Set<String> artifacts = new HashSet<>(); for (String extension : BCP_ARTIFACT_EXTENSIONS) { - final String artifact = bootImageStem + extension; - final boolean found = mappedArtifacts.stream().anyMatch(a -> a.endsWith(artifact)); - assertTrue(zygoteName + " " + artifact + " not found: '" + allArtifacts + "'", found); + artifacts.add(String.format( + "%s/%s/%s%s", ART_APEX_DALVIK_CACHE_DIRNAME, isa, bootImageStem, extension)); } + return artifacts; } - // Verifies that boot image files with the given stem are loaded by Zygote for each instruction - // set. Returns the verified files. - public HashSet<String> verifyZygotesLoadedArtifacts(String bootImageStem) throws Exception { - // There are potentially two zygote processes "zygote" and "zygote64". These are - // instances 32-bit and 64-bit unspecialized app_process processes. - // (frameworks/base/cmds/app_process). - int zygoteCount = 0; - HashSet<String> verifiedArtifacts = new HashSet<>(); - for (String zygoteName : ZYGOTE_NAMES) { - final Optional<Set<String>> mappedArtifacts = getZygoteLoadedArtifacts(zygoteName); - if (!mappedArtifacts.isPresent()) { - continue; - } - verifyZygoteLoadedArtifacts(zygoteName, mappedArtifacts.get(), bootImageStem); - zygoteCount += 1; - verifiedArtifacts.addAll(mappedArtifacts.get()); + public Set<String> getZygotesExpectedArtifacts(String bootImageStem) throws Exception { + Set<String> artifacts = new HashSet<>(); + for (String isa : getZygoteNamesAndIsas().values()) { + artifacts.addAll(getZygoteExpectedArtifacts(bootImageStem, isa)); } - assertTrue("No zygote processes found", zygoteCount > 0); - return verifiedArtifacts; + return artifacts; } - public void verifySystemServerLoadedArtifacts() throws Exception { + public Set<String> getZygotesExpectedArtifacts() throws Exception { + return getZygotesExpectedArtifacts("boot"); + } + + public Set<String> getSystemServerExpectedArtifacts() throws Exception { String[] classpathElements = getListFromEnvironmentVariable("SYSTEMSERVERCLASSPATH"); assertTrue("SYSTEMSERVERCLASSPATH is empty", classpathElements.length > 0); String[] standaloneJars = getListFromEnvironmentVariable("STANDALONE_SYSTEMSERVER_JARS"); - String[] allSystemServerJars = Stream - .concat(Arrays.stream(classpathElements), Arrays.stream(standaloneJars)) - .toArray(String[]::new); - - final Set<String> mappedArtifacts = getSystemServerLoadedArtifacts(); - assertTrue( - "No mapped artifacts under " + ART_APEX_DALVIK_CACHE_DIRNAME, - mappedArtifacts.size() > 0); - final String isa = getSystemServerIsa(mappedArtifacts.iterator().next()); - final String isaCacheDirectory = String.format("%s/%s", ART_APEX_DALVIK_CACHE_DIRNAME, isa); - - // Check components in the system_server classpath have mapped artifacts. - for (String element : allSystemServerJars) { - String escapedPath = element.substring(1).replace('/', '@'); - for (String extension : APP_ARTIFACT_EXTENSIONS) { - final String fullArtifactPath = - String.format("%s/%s@classes%s", isaCacheDirectory, escapedPath, extension); - assertTrue("Missing " + fullArtifactPath, mappedArtifacts.contains(fullArtifactPath)); - } + String[] allSystemServerJars = + Stream.concat(Arrays.stream(classpathElements), Arrays.stream(standaloneJars)) + .toArray(String[] ::new); + String isa = getSystemServerIsa(); + + Set<String> artifacts = new HashSet<>(); + for (String jar : allSystemServerJars) { + artifacts.addAll(getApexDataDalvikCacheFilenames(jar, isa)); } - for (String mappedArtifact : mappedArtifacts) { - // Check the mapped artifact has a .art, .odex or .vdex extension. - final boolean knownArtifactKind = - APP_ARTIFACT_EXTENSIONS.stream().anyMatch(e -> mappedArtifact.endsWith(e)); - assertTrue("Unknown artifact kind: " + mappedArtifact, knownArtifactKind); + return artifacts; + } + + // Verifies that boot image files with the given stem are loaded by Zygote for each instruction + // set. + public void verifyZygotesLoadedArtifacts(String bootImageStem) throws Exception { + for (var entry : getZygoteNamesAndIsas().entrySet()) { + assertThat(getZygoteLoadedArtifacts(entry.getKey())) + .containsAtLeastElementsIn( + getZygoteExpectedArtifacts(bootImageStem, entry.getValue())); } } + public void verifySystemServerLoadedArtifacts() throws Exception { + assertThat(getSystemServerLoadedArtifacts()) + .containsAtLeastElementsIn(getSystemServerExpectedArtifacts()); + } + public boolean haveCompilationLog() throws Exception { CommandResult result = mTestInfo.getDevice().executeShellV2Command("stat " + ODREFRESH_COMPILATION_LOG); @@ -246,7 +242,7 @@ public class OdsignTestUtils { // store default value and increase time-out for reboot int rebootTimeout = options.getRebootTimeout(); long onlineTimeout = options.getOnlineTimeout(); - options.setRebootTimeout((int)BOOT_COMPLETE_TIMEOUT.toMillis()); + options.setRebootTimeout((int) BOOT_COMPLETE_TIMEOUT.toMillis()); options.setOnlineTimeout(BOOT_COMPLETE_TIMEOUT.toMillis()); mTestInfo.getDevice().setOptions(options); @@ -266,9 +262,10 @@ public class OdsignTestUtils { // `waitForBootComplete` relies on `dev.bootcomplete`. mTestInfo.getDevice().executeShellCommand("setprop dev.bootcomplete 0"); mTestInfo.getDevice().executeShellCommand("setprop ctl.restart zygote"); - boolean success = mTestInfo.getDevice() - .waitForBootComplete(RESTART_ZYGOTE_COMPLETE_TIMEOUT.toMillis()); - assertWithMessage("Zygote didn't start in %s", BOOT_COMPLETE_TIMEOUT).that(success) + boolean success = mTestInfo.getDevice().waitForBootComplete( + RESTART_ZYGOTE_COMPLETE_TIMEOUT.toMillis()); + assertWithMessage("Zygote didn't start in %s", BOOT_COMPLETE_TIMEOUT) + .that(success) .isTrue(); } @@ -296,16 +293,45 @@ public class OdsignTestUtils { return new String[0]; } - private String getSystemServerIsa(String mappedArtifact) { - // Artifact path for system server artifacts has the form: - // ART_APEX_DALVIK_CACHE_DIRNAME + "/<arch>/system@framework@some.jar@classes.odex" - String[] pathComponents = mappedArtifact.split("/"); - return pathComponents[pathComponents.length - 2]; + private static String getInstructionSet(String abi) { + String instructionSet = ABI_TO_INSTRUCTION_SET_MAP.get(abi); + assertThat(instructionSet).isNotNull(); + return instructionSet; + } + + public Map<String, String> getZygoteNamesAndIsas() throws Exception { + Map<String, String> namesAndIsas = new HashMap<>(); + String abiList64 = mTestInfo.getDevice().getProperty("ro.product.cpu.abilist64"); + if (abiList64 != null && !abiList64.isEmpty()) { + namesAndIsas.put(ZYGOTE_64_NAME, getInstructionSet(abiList64.split(",")[0])); + } + String abiList32 = mTestInfo.getDevice().getProperty("ro.product.cpu.abilist32"); + if (abiList32 != null && !abiList32.isEmpty()) { + namesAndIsas.put(ZYGOTE_32_NAME, getInstructionSet(abiList32.split(",")[0])); + } + return namesAndIsas; + } + + public String getSystemServerIsa() throws Exception { + return getInstructionSet( + mTestInfo.getDevice().getProperty("ro.product.cpu.abilist").split(",")[0]); + } + + // Keep in sync with `GetApexDataDalvikCacheFilename` in art/libartbase/base/file_utils.cc. + public static Set<String> getApexDataDalvikCacheFilenames(String dexLocation, String isa) + throws Exception { + Set<String> filenames = new HashSet<>(); + String escapedPath = dexLocation.substring(1).replace('/', '@'); + for (String extension : APP_ARTIFACT_EXTENSIONS) { + filenames.add(String.format("%s/%s/%s@classes%s", ART_APEX_DALVIK_CACHE_DIRNAME, isa, + escapedPath, extension)); + } + return filenames; } private long parseFormattedDateTime(String dateTimeStr) throws Exception { - DateTimeFormatter formatter = DateTimeFormatter.ofPattern( - "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z"); + DateTimeFormatter formatter = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z"); ZonedDateTime zonedDateTime = ZonedDateTime.parse(dateTimeStr, formatter); return zonedDateTime.toInstant().toEpochMilli(); } @@ -314,18 +340,14 @@ public class OdsignTestUtils { // We can't use the "-c '%.3Y'" flag when to get the timestamp because the Toybox's `stat` // implementation truncates the timestamp to seconds, which is not accurate enough, so we // use "-c '%%y'" and parse the time ourselves. - String dateTimeStr = mTestInfo.getDevice() - .executeShellCommand(String.format("stat -c '%%y' '%s'", filename)) - .trim(); + String dateTimeStr = assertCommandSucceeds(String.format("stat -c '%%y' '%s'", filename)); return parseFormattedDateTime(dateTimeStr); } public long getCurrentTimeMs() throws Exception { // We can't use getDevice().getDeviceDate() because it truncates the timestamp to seconds, // which is not accurate enough. - String dateTimeStr = mTestInfo.getDevice() - .executeShellCommand("date +'%Y-%m-%d %H:%M:%S.%N %z'") - .trim(); + String dateTimeStr = assertCommandSucceeds("date +'%Y-%m-%d %H:%M:%S.%N %z'"); return parseFormattedDateTime(dateTimeStr); } @@ -368,4 +390,88 @@ public class OdsignTestUtils { } } + public File copyResourceToFile(String resourceName) throws Exception { + File file = File.createTempFile("odsign_e2e_tests", ".tmp"); + file.deleteOnExit(); + try (OutputStream outputStream = new FileOutputStream(file); + InputStream inputStream = getClass().getResourceAsStream(resourceName)) { + assertThat(ByteStreams.copy(inputStream, outputStream)).isGreaterThan(0); + } + return file; + } + + public void assertModifiedAfter(Set<String> artifacts, long timeMs) throws Exception { + for (String artifact : artifacts) { + long modifiedTime = getModifiedTimeMs(artifact); + assertTrue( + String.format( + "Artifact %s is not re-compiled. Modified time: %d, Reference time: %d", + artifact, modifiedTime, timeMs), + modifiedTime > timeMs); + } + } + + public void assertNotModifiedAfter(Set<String> artifacts, long timeMs) throws Exception { + for (String artifact : artifacts) { + long modifiedTime = getModifiedTimeMs(artifact); + assertTrue(String.format("Artifact %s is unexpectedly re-compiled. " + + "Modified time: %d, Reference time: %d", + artifact, modifiedTime, timeMs), + modifiedTime < timeMs); + } + } + + public void assertFilesExist(Set<String> files) throws Exception { + assertThat(getExistingFiles(files)).containsExactlyElementsIn(files); + } + + public void assertFilesNotExist(Set<String> files) throws Exception { + assertThat(getExistingFiles(files)).isEmpty(); + } + + private Set<String> getExistingFiles(Set<String> files) throws Exception { + Set<String> existingFiles = new HashSet<>(); + for (String file : files) { + if (mTestInfo.getDevice().doesFileExist(file)) { + existingFiles.add(file); + } + } + return existingFiles; + } + + public static String replaceExtension(String filename, String extension) throws Exception { + int index = filename.lastIndexOf("."); + assertTrue("Extension not found in filename: " + filename, index != -1); + return filename.substring(0, index) + extension; + } + + public void runOdrefresh() throws Exception { + runOdrefresh("" /* extraArgs */); + } + + public void runOdrefresh(String extraArgs) throws Exception { + mTestInfo.getDevice().executeShellV2Command(ODREFRESH_BIN + " --check"); + mTestInfo.getDevice().executeShellV2Command( + ODREFRESH_BIN + " --partial-compilation --no-refresh " + extraArgs + " --compile"); + } + + public boolean areAllApexesFactoryInstalled() throws Exception { + Document doc = loadXml(APEX_INFO_FILE); + NodeList list = doc.getElementsByTagName("apex-info"); + for (int i = 0; i < list.getLength(); i++) { + Element node = (Element) list.item(i); + if (node.getAttribute("isActive").equals("true") + && node.getAttribute("isFactory").equals("false")) { + return false; + } + } + return true; + } + + private Document loadXml(String remoteXmlFile) throws Exception { + File localFile = mTestInfo.getDevice().pullFile(remoteXmlFile); + assertThat(localFile).isNotNull(); + DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + return builder.parse(localFile); + } } |