diff options
author | Frederick Mayle <fmayle@google.com> | 2024-04-23 18:55:41 -0700 |
---|---|---|
committer | Frederick Mayle <fmayle@google.com> | 2024-04-25 13:04:55 -0700 |
commit | b601a7aeaf2790b0ecf9444ecbd10caa6fad2b8d (patch) | |
tree | 6279d2cb81d11dcd58a5e5464bffba8ebf8d432b | |
parent | 5f933bd71909b14816adc0131572296b39d430b5 (diff) | |
download | cuttlefish-b601a7aeaf2790b0ecf9444ecbd10caa6fad2b8d.tar.gz |
use `crosvm resume` to detect RESTORE_COMPLETE
Instead of waiting for logcat output, run the `crosvm resume` command to
block until the vCPUs have begun.
CvdBootStateMachine starts a thread to run that command. When the
command completes, it notifies the other CvdBootStateMachine thread via
a pipe, which then sends a value to the `--daemon` parent indicating
that the restore is complete (using the same mechanism as for a normal
boot complete).
Bug: 333111958
Test: m && (stop_cvd; launch_cvd --noresume --enable-sandbox=false --daemon) && snapshot_util_cvd --subcmd=snapshot_take --auto_suspend --force --snapshot_path=$HOME/cf-snapshot && stop_cvd && launch_cvd --snapshot_path=$HOME/cf-snapshot
Change-Id: Ia515c0485a7d6ec5cda611a7423674422b9f28d5
-rw-r--r-- | host/commands/logcat_receiver/main.cpp | 33 | ||||
-rw-r--r-- | host/commands/run_cvd/boot_state_machine.cc | 192 | ||||
-rw-r--r-- | host/commands/run_cvd/boot_state_machine.h | 1 | ||||
-rw-r--r-- | host/commands/run_cvd/server_loop_impl.cpp | 1 | ||||
-rw-r--r-- | host/libs/config/cuttlefish_config.h | 1 | ||||
-rw-r--r-- | host/libs/config/cuttlefish_config_instance.cpp | 4 | ||||
-rw-r--r-- | host/libs/vm_manager/crosvm_manager.cpp | 27 | ||||
-rw-r--r-- | host/libs/vm_manager/crosvm_manager.h | 2 | ||||
-rw-r--r-- | host/libs/vm_manager/vm_manager.h | 8 |
9 files changed, 162 insertions, 107 deletions
diff --git a/host/commands/logcat_receiver/main.cpp b/host/commands/logcat_receiver/main.cpp index d7c9dbc94..3e08f750a 100644 --- a/host/commands/logcat_receiver/main.cpp +++ b/host/commands/logcat_receiver/main.cpp @@ -64,7 +64,6 @@ int main(int argc, char** argv) { auto logcat_file = cuttlefish::SharedFD::Open(path.c_str(), O_CREAT | O_APPEND | O_WRONLY, 0666); - bool first_iter = true; // Server loop while (true) { char buff[1024]; @@ -74,35 +73,9 @@ int main(int argc, char** argv) { break; } auto written = cuttlefish::WriteAll(logcat_file, buff, read); - CHECK(written == read) - << "Error writing to log file: " << logcat_file->StrError() - << ". This is unrecoverable."; - if (first_iter) { - first_iter = false; - if (cuttlefish::IsRestoring(*config)) { - cuttlefish::SharedFD restore_pipe = cuttlefish::SharedFD::Open( - instance.restore_pipe_name().c_str(), O_WRONLY); - if (!restore_pipe->IsOpen()) { - LOG(ERROR) << "Error opening restore pipe: " - << restore_pipe->StrError(); - return 2; - } - cuttlefish::SharedFD restore_adbd_pipe = cuttlefish::SharedFD::Open( - instance.restore_adbd_pipe_name().c_str(), O_WRONLY); - if (!restore_adbd_pipe->IsOpen()) { - LOG(ERROR) << "Error opening restore pipe: " - << restore_adbd_pipe->StrError(); - return 2; - } - - CHECK(cuttlefish::WriteAll(restore_pipe, "1") == 1) - << "Error writing to restore pipe: " << restore_pipe->StrError() - << ". This is unrecoverable."; - CHECK(cuttlefish::WriteAll(restore_adbd_pipe, "2") == 1) - << "Error writing to adbd restore pipe: " - << restore_adbd_pipe->StrError() << ". This is unrecoverable."; - } - } + CHECK(written == read) << "Error writing to log file: " + << logcat_file->StrError() + << ". This is unrecoverable."; } logcat_file->Close(); diff --git a/host/commands/run_cvd/boot_state_machine.cc b/host/commands/run_cvd/boot_state_machine.cc index 67060e07a..242c0a5e3 100644 --- a/host/commands/run_cvd/boot_state_machine.cc +++ b/host/commands/run_cvd/boot_state_machine.cc @@ -43,72 +43,47 @@ namespace { // Forks run_cvd into a daemonized child process. The current process continues // only until the child has signalled that the boot is finished. // -// How the child signals when the boot is finished depends on whether we are -// restoring from a snapshot. -// -// * When booting normally, `DaemonizeLauncher` returns the write end of a -// pipe. The child is expected to write a `RunnerExitCodes` into the pipe -// when the boot finishes. -// * When restoring from a snapshot, `DaemonizeLauncher` returns an invalid -// `SharedFD`. The child is expected to write an arbitrary byte to the -// instance's "restore_pipe" and then the parent assumes the restore was -// successful. -// -// We should consider unifying these two types of pipes. +// `DaemonizeLauncher` returns the write end of a pipe. The child is expected +// to write a `RunnerExitCodes` into the pipe when the boot finishes. Result<SharedFD> DaemonizeLauncher(const CuttlefishConfig& config) { auto instance = config.ForDefaultInstance(); - SharedFD read_end, write_end, restore_pipe_read; - if (IsRestoring(config)) { - restore_pipe_read = - CF_EXPECT(SharedFD::Fifo(instance.restore_pipe_name(), 0600), - "Unable to create restore fifo"); - } else { - CF_EXPECT(SharedFD::Pipe(&read_end, &write_end), "Unable to create pipe"); - } + SharedFD read_end, write_end; + CF_EXPECT(SharedFD::Pipe(&read_end, &write_end), "Unable to create pipe"); auto pid = fork(); if (pid) { // Explicitly close here, otherwise we may end up reading forever if the // child process dies. write_end->Close(); RunnerExitCodes exit_code; - if (IsRestoring(config)) { - if (!restore_pipe_read->IsOpen()) { - LOG(ERROR) << "Error opening restore pipe: " - << restore_pipe_read->StrError(); - std::exit(RunnerExitCodes::kDaemonizationError); - } - // Try to read from restore pipe. IF successfully reads, that means logcat - // has started, and the VM has resumed. Exit the thread. - char buff[1]; - auto read = restore_pipe_read->Read(buff, 1); - if (read <= 0) { - LOG(ERROR) << "Could not read restore pipe: " - << restore_pipe_read->StrError(); - std::exit(RunnerExitCodes::kDaemonizationError); - } - exit_code = RunnerExitCodes::kSuccess; - LOG(INFO) << "Virtual device restored successfully"; - std::exit(exit_code); - } auto bytes_read = read_end->Read(&exit_code, sizeof(exit_code)); if (bytes_read != sizeof(exit_code)) { LOG(ERROR) << "Failed to read a complete exit code, read " << bytes_read << " bytes only instead of the expected " << sizeof(exit_code); exit_code = RunnerExitCodes::kPipeIOError; } else if (exit_code == RunnerExitCodes::kSuccess) { - LOG(INFO) << "Virtual device booted successfully"; + if (IsRestoring(config)) { + LOG(INFO) << "Virtual device restored successfully"; + } else { + LOG(INFO) << "Virtual device booted successfully"; + } } else if (exit_code == RunnerExitCodes::kVirtualDeviceBootFailed) { - LOG(ERROR) << "Virtual device failed to boot"; + if (IsRestoring(config)) { + LOG(ERROR) << "Virtual device failed to restore"; + } else { + LOG(ERROR) << "Virtual device failed to boot"; + } if (!instance.fail_fast()) { LOG(ERROR) << "Device has been left running for debug"; } } else { LOG(ERROR) << "Unexpected exit code: " << exit_code; } - if (exit_code == RunnerExitCodes::kSuccess) { - LOG(INFO) << kBootCompletedMessage; - } else { - LOG(INFO) << kBootFailedMessage; + if (!IsRestoring(config)) { + if (exit_code == RunnerExitCodes::kSuccess) { + LOG(INFO) << kBootCompletedMessage; + } else { + LOG(INFO) << kBootFailedMessage; + } } std::exit(exit_code); } else { @@ -145,12 +120,8 @@ Result<SharedFD> DaemonizeLauncher(const CuttlefishConfig& config) { std::exit(RunnerExitCodes::kDaemonizationError); } - if (IsRestoring(config)) { - return {}; - } else { - read_end->Close(); - return write_end; - } + read_end->Close(); + return write_end; } } @@ -183,11 +154,15 @@ Result<SharedFD> ProcessLeader( class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer { public: INJECT( - CvdBootStateMachine(AutoSetup<ProcessLeader>::Type& process_leader, + CvdBootStateMachine(const CuttlefishConfig& config, + AutoSetup<ProcessLeader>::Type& process_leader, KernelLogPipeProvider& kernel_log_pipe_provider, + const vm_manager::VmManager& vm_manager, const CuttlefishConfig::InstanceSpecific& instance)) - : process_leader_(process_leader), + : config_(config), + process_leader_(process_leader), kernel_log_pipe_provider_(kernel_log_pipe_provider), + vm_manager_(vm_manager), instance_(instance), state_(kBootStarted) {} @@ -200,6 +175,9 @@ class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer { if (boot_event_handler_.joinable()) { boot_event_handler_.join(); } + if (restore_complete_handler_.joinable()) { + restore_complete_handler_.join(); + } } // SetupFeature @@ -228,12 +206,48 @@ class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer { SharedFD boot_events_pipe = kernel_log_pipe_provider_.KernelLogPipe(); CF_EXPECTF(boot_events_pipe->IsOpen(), "Could not get boot events pipe: {}", boot_events_pipe->StrError()); - boot_event_handler_ = std::thread( - [this, boot_events_pipe]() { ThreadLoop(boot_events_pipe); }); + + SharedFD restore_complete_pipe, restore_complete_pipe_write; + if (IsRestoring(config_)) { + CF_EXPECT( + SharedFD::Pipe(&restore_complete_pipe, &restore_complete_pipe_write), + "unable to create pipe"); + + // Unlike `boot_event_handler_`, this doesn't support graceful shutdown, + // it blocks until it finishes its work. + restore_complete_handler_ = + std::thread([this, restore_complete_pipe_write]() { + const auto result = vm_manager_.WaitForRestoreComplete(); + CHECK(result.ok()) << "Failed to wait for restore complete: " + << result.error().FormatForEnv(); + + cuttlefish::SharedFD restore_adbd_pipe = cuttlefish::SharedFD::Open( + config_.ForDefaultInstance().restore_adbd_pipe_name().c_str(), + O_WRONLY); + CHECK(restore_adbd_pipe->IsOpen()) + << "Error opening adbd restore pipe: " + << restore_adbd_pipe->StrError(); + CHECK(cuttlefish::WriteAll(restore_adbd_pipe, "2") == 1) + << "Error writing to adbd restore pipe: " + << restore_adbd_pipe->StrError() << ". This is unrecoverable."; + + // Done last so that adb is more likely to be ready. + CHECK(cuttlefish::WriteAll(restore_complete_pipe_write, "1") == 1) + << "Error writing to restore complete pipe: " + << restore_complete_pipe_write->StrError() + << ". This is unrecoverable."; + }); + } + + boot_event_handler_ = + std::thread([this, boot_events_pipe, restore_complete_pipe]() { + ThreadLoop(boot_events_pipe, restore_complete_pipe); + }); + return {}; } - void ThreadLoop(SharedFD boot_events_pipe) { + void ThreadLoop(SharedFD boot_events_pipe, SharedFD restore_complete_pipe) { while (true) { std::vector<PollSharedFd> poll_shared_fd = { { @@ -241,17 +255,26 @@ class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer { .events = POLLIN | POLLHUP, }, { + .fd = restore_complete_pipe, + .events = restore_complete_pipe->IsOpen() + ? (short)(POLLIN | POLLHUP) + : (short)0, + }, + { .fd = interrupt_fd_read_, .events = POLLIN | POLLHUP, - }}; + }, + }; int result = SharedFD::Poll(poll_shared_fd, -1); - if (poll_shared_fd[1].revents & POLLIN) { + // interrupt_fd_read_ + if (poll_shared_fd[2].revents & POLLIN) { return; } if (result < 0) { PLOG(FATAL) << "Failed to call Select"; return; } + // boot_events_pipe if (poll_shared_fd[0].revents & POLLHUP) { LOG(ERROR) << "Failed to read a complete kernel log boot event."; state_ |= kGuestBootFailed; @@ -259,23 +282,46 @@ class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer { break; } } - if (!(poll_shared_fd[0].revents & POLLIN)) { - continue; + if (poll_shared_fd[0].revents & POLLIN) { + auto sent_code = OnBootEvtReceived(boot_events_pipe); + if (sent_code) { + if (!BootCompleted()) { + if (!instance_.fail_fast()) { + LOG(ERROR) << "Device running, likely in a bad state"; + break; + } + auto monitor_res = GetLauncherMonitorFromInstance(instance_, 5); + CHECK(monitor_res.ok()) << monitor_res.error().FormatForEnv(); + auto fail_res = RunLauncherAction( + *monitor_res, LauncherAction::kFail, std::optional<int>()); + CHECK(fail_res.ok()) << fail_res.error().FormatForEnv(); + } + break; + } } - auto sent_code = OnBootEvtReceived(boot_events_pipe); - if (sent_code) { - if (!BootCompleted()) { - if (!instance_.fail_fast()) { - LOG(ERROR) << "Device running, likely in a bad state"; + // restore_complete_pipe + if (poll_shared_fd[1].revents & POLLIN) { + char buff[1]; + auto read = restore_complete_pipe->Read(buff, 1); + if (read <= 0) { + LOG(ERROR) << "Could not read restore pipe: " + << restore_complete_pipe->StrError(); + state_ |= kGuestBootFailed; + if (MaybeWriteNotification()) { break; } - auto monitor_res = GetLauncherMonitorFromInstance(instance_, 5); - CHECK(monitor_res.ok()) << monitor_res.error().FormatForEnv(); - auto fail_res = RunLauncherAction(*monitor_res, LauncherAction::kFail, - std::optional<int>()); - CHECK(fail_res.ok()) << fail_res.error().FormatForEnv(); } - break; + state_ |= kGuestBootCompleted; + if (MaybeWriteNotification()) { + break; + } + } + if (poll_shared_fd[1].revents & POLLHUP) { + LOG(ERROR) << "restore_complete_pipe closed unexpectedly"; + state_ |= kGuestBootFailed; + if (MaybeWriteNotification()) { + break; + } } } } @@ -325,11 +371,14 @@ class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer { return BootCompleted() || (state_ & kGuestBootFailed); } + const CuttlefishConfig& config_; AutoSetup<ProcessLeader>::Type& process_leader_; KernelLogPipeProvider& kernel_log_pipe_provider_; + const vm_manager::VmManager& vm_manager_; const CuttlefishConfig::InstanceSpecific& instance_; std::thread boot_event_handler_; + std::thread restore_complete_handler_; SharedFD fg_launcher_pipe_; SharedFD reboot_notification_; SharedFD interrupt_fd_read_; @@ -344,6 +393,7 @@ class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer { fruit::Component<fruit::Required<const CuttlefishConfig, KernelLogPipeProvider, const CuttlefishConfig::InstanceSpecific, + const vm_manager::VmManager, AutoSetup<ValidateTapDevices>::Type>> bootStateMachineComponent() { return fruit::createComponent() diff --git a/host/commands/run_cvd/boot_state_machine.h b/host/commands/run_cvd/boot_state_machine.h index 04de24801..4b742ac4a 100644 --- a/host/commands/run_cvd/boot_state_machine.h +++ b/host/commands/run_cvd/boot_state_machine.h @@ -24,6 +24,7 @@ namespace cuttlefish { fruit::Component<fruit::Required<const CuttlefishConfig, KernelLogPipeProvider, const CuttlefishConfig::InstanceSpecific, + const vm_manager::VmManager, AutoSetup<ValidateTapDevices>::Type>> bootStateMachineComponent(); diff --git a/host/commands/run_cvd/server_loop_impl.cpp b/host/commands/run_cvd/server_loop_impl.cpp index 8506245cf..61f1fa6bb 100644 --- a/host/commands/run_cvd/server_loop_impl.cpp +++ b/host/commands/run_cvd/server_loop_impl.cpp @@ -297,7 +297,6 @@ void ServerLoopImpl::DeleteFifos() { instance_.console_in_pipe_name(), instance_.console_out_pipe_name(), instance_.logcat_pipe_name(), - instance_.restore_pipe_name(), instance_.PerInstanceInternalPath("keymaster_fifo_vm.in"), instance_.PerInstanceInternalPath("keymaster_fifo_vm.out"), instance_.PerInstanceInternalPath("keymint_fifo_vm.in"), diff --git a/host/libs/config/cuttlefish_config.h b/host/libs/config/cuttlefish_config.h index 8de591aa8..71448ea97 100644 --- a/host/libs/config/cuttlefish_config.h +++ b/host/libs/config/cuttlefish_config.h @@ -398,7 +398,6 @@ class CuttlefishConfig { std::string gnss_out_pipe_name() const; std::string logcat_pipe_name() const; - std::string restore_pipe_name() const; std::string restore_adbd_pipe_name() const; std::string launcher_log_path() const; diff --git a/host/libs/config/cuttlefish_config_instance.cpp b/host/libs/config/cuttlefish_config_instance.cpp index 41c319e5b..455f53af5 100644 --- a/host/libs/config/cuttlefish_config_instance.cpp +++ b/host/libs/config/cuttlefish_config_instance.cpp @@ -1187,10 +1187,6 @@ std::string CuttlefishConfig::InstanceSpecific::logcat_pipe_name() const { return AbsolutePath(PerInstanceInternalPath("logcat-pipe")); } -std::string CuttlefishConfig::InstanceSpecific::restore_pipe_name() const { - return AbsolutePath(PerInstanceInternalPath("restore-pipe")); -} - std::string CuttlefishConfig::InstanceSpecific::restore_adbd_pipe_name() const { return AbsolutePath(PerInstanceInternalPath("restore-pipe-adbd")); } diff --git a/host/libs/vm_manager/crosvm_manager.cpp b/host/libs/vm_manager/crosvm_manager.cpp index c26da188e..899e39653 100644 --- a/host/libs/vm_manager/crosvm_manager.cpp +++ b/host/libs/vm_manager/crosvm_manager.cpp @@ -880,5 +880,32 @@ Result<std::vector<MonitorCommand>> CrosvmManager::StartCommands( return commands; } +Result<void> CrosvmManager::WaitForRestoreComplete() const { + auto instance = CF_EXPECT(CuttlefishConfig::Get())->ForDefaultInstance(); + + // Wait for the control socket to exist. It is created early in crosvm's + // startup sequence, but the process may not even have been exec'd by CF at + // this point. + while (!FileExists(instance.CrosvmSocketPath())) { + usleep(50000); // 50 ms, arbitrarily chosen + } + + // Ask crosvm to resume the VM. crosvm promises to not complete this command + // until the vCPUs are started (even if it was never suspended to begin + // with). + auto infop = CF_EXPECT(Execute( + std::vector<std::string>{ + instance.crosvm_binary(), + "resume", + instance.CrosvmSocketPath(), + "--full", + }, + SubprocessOptions(), WEXITED)); + CF_EXPECT_EQ(infop.si_code, CLD_EXITED); + CF_EXPECTF(infop.si_status == 0, "crosvm resume returns non zero code {}", + infop.si_status); + return {}; +} + } // namespace vm_manager } // namespace cuttlefish diff --git a/host/libs/vm_manager/crosvm_manager.h b/host/libs/vm_manager/crosvm_manager.h index a1433d2a2..cb119fefb 100644 --- a/host/libs/vm_manager/crosvm_manager.h +++ b/host/libs/vm_manager/crosvm_manager.h @@ -46,6 +46,8 @@ class CrosvmManager : public VmManager { const CuttlefishConfig& config, std::vector<VmmDependencyCommand*>& dependencyCommands) override; + Result<void> WaitForRestoreComplete() const override; + private: static constexpr int kCrosvmVmResetExitCode = 32; }; diff --git a/host/libs/vm_manager/vm_manager.h b/host/libs/vm_manager/vm_manager.h index b3fa4be6c..29fb68a8e 100644 --- a/host/libs/vm_manager/vm_manager.h +++ b/host/libs/vm_manager/vm_manager.h @@ -101,6 +101,14 @@ class VmManager { virtual Result<std::vector<MonitorCommand>> StartCommands( const CuttlefishConfig& config, std::vector<VmmDependencyCommand*>& dependencyCommands) = 0; + + // Block until the restore work is finished and the guest is running. Only + // called if a snapshot is being restored. + // + // Must be thread safe. + virtual Result<void> WaitForRestoreComplete() const { + return CF_ERR("not implemented"); + } }; fruit::Component<fruit::Required<const CuttlefishConfig, |