aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Mayle <fmayle@google.com>2024-04-23 18:55:41 -0700
committerFrederick Mayle <fmayle@google.com>2024-04-25 13:04:55 -0700
commitb601a7aeaf2790b0ecf9444ecbd10caa6fad2b8d (patch)
tree6279d2cb81d11dcd58a5e5464bffba8ebf8d432b
parent5f933bd71909b14816adc0131572296b39d430b5 (diff)
downloadcuttlefish-b601a7aeaf2790b0ecf9444ecbd10caa6fad2b8d.tar.gz
use `crosvm resume` to detect RESTORE_COMPLETE
Instead of waiting for logcat output, run the `crosvm resume` command to block until the vCPUs have begun. CvdBootStateMachine starts a thread to run that command. When the command completes, it notifies the other CvdBootStateMachine thread via a pipe, which then sends a value to the `--daemon` parent indicating that the restore is complete (using the same mechanism as for a normal boot complete). Bug: 333111958 Test: m && (stop_cvd; launch_cvd --noresume --enable-sandbox=false --daemon) && snapshot_util_cvd --subcmd=snapshot_take --auto_suspend --force --snapshot_path=$HOME/cf-snapshot && stop_cvd && launch_cvd --snapshot_path=$HOME/cf-snapshot Change-Id: Ia515c0485a7d6ec5cda611a7423674422b9f28d5
-rw-r--r--host/commands/logcat_receiver/main.cpp33
-rw-r--r--host/commands/run_cvd/boot_state_machine.cc192
-rw-r--r--host/commands/run_cvd/boot_state_machine.h1
-rw-r--r--host/commands/run_cvd/server_loop_impl.cpp1
-rw-r--r--host/libs/config/cuttlefish_config.h1
-rw-r--r--host/libs/config/cuttlefish_config_instance.cpp4
-rw-r--r--host/libs/vm_manager/crosvm_manager.cpp27
-rw-r--r--host/libs/vm_manager/crosvm_manager.h2
-rw-r--r--host/libs/vm_manager/vm_manager.h8
9 files changed, 162 insertions, 107 deletions
diff --git a/host/commands/logcat_receiver/main.cpp b/host/commands/logcat_receiver/main.cpp
index d7c9dbc94..3e08f750a 100644
--- a/host/commands/logcat_receiver/main.cpp
+++ b/host/commands/logcat_receiver/main.cpp
@@ -64,7 +64,6 @@ int main(int argc, char** argv) {
auto logcat_file =
cuttlefish::SharedFD::Open(path.c_str(), O_CREAT | O_APPEND | O_WRONLY, 0666);
- bool first_iter = true;
// Server loop
while (true) {
char buff[1024];
@@ -74,35 +73,9 @@ int main(int argc, char** argv) {
break;
}
auto written = cuttlefish::WriteAll(logcat_file, buff, read);
- CHECK(written == read)
- << "Error writing to log file: " << logcat_file->StrError()
- << ". This is unrecoverable.";
- if (first_iter) {
- first_iter = false;
- if (cuttlefish::IsRestoring(*config)) {
- cuttlefish::SharedFD restore_pipe = cuttlefish::SharedFD::Open(
- instance.restore_pipe_name().c_str(), O_WRONLY);
- if (!restore_pipe->IsOpen()) {
- LOG(ERROR) << "Error opening restore pipe: "
- << restore_pipe->StrError();
- return 2;
- }
- cuttlefish::SharedFD restore_adbd_pipe = cuttlefish::SharedFD::Open(
- instance.restore_adbd_pipe_name().c_str(), O_WRONLY);
- if (!restore_adbd_pipe->IsOpen()) {
- LOG(ERROR) << "Error opening restore pipe: "
- << restore_adbd_pipe->StrError();
- return 2;
- }
-
- CHECK(cuttlefish::WriteAll(restore_pipe, "1") == 1)
- << "Error writing to restore pipe: " << restore_pipe->StrError()
- << ". This is unrecoverable.";
- CHECK(cuttlefish::WriteAll(restore_adbd_pipe, "2") == 1)
- << "Error writing to adbd restore pipe: "
- << restore_adbd_pipe->StrError() << ". This is unrecoverable.";
- }
- }
+ CHECK(written == read) << "Error writing to log file: "
+ << logcat_file->StrError()
+ << ". This is unrecoverable.";
}
logcat_file->Close();
diff --git a/host/commands/run_cvd/boot_state_machine.cc b/host/commands/run_cvd/boot_state_machine.cc
index 67060e07a..242c0a5e3 100644
--- a/host/commands/run_cvd/boot_state_machine.cc
+++ b/host/commands/run_cvd/boot_state_machine.cc
@@ -43,72 +43,47 @@ namespace {
// Forks run_cvd into a daemonized child process. The current process continues
// only until the child has signalled that the boot is finished.
//
-// How the child signals when the boot is finished depends on whether we are
-// restoring from a snapshot.
-//
-// * When booting normally, `DaemonizeLauncher` returns the write end of a
-// pipe. The child is expected to write a `RunnerExitCodes` into the pipe
-// when the boot finishes.
-// * When restoring from a snapshot, `DaemonizeLauncher` returns an invalid
-// `SharedFD`. The child is expected to write an arbitrary byte to the
-// instance's "restore_pipe" and then the parent assumes the restore was
-// successful.
-//
-// We should consider unifying these two types of pipes.
+// `DaemonizeLauncher` returns the write end of a pipe. The child is expected
+// to write a `RunnerExitCodes` into the pipe when the boot finishes.
Result<SharedFD> DaemonizeLauncher(const CuttlefishConfig& config) {
auto instance = config.ForDefaultInstance();
- SharedFD read_end, write_end, restore_pipe_read;
- if (IsRestoring(config)) {
- restore_pipe_read =
- CF_EXPECT(SharedFD::Fifo(instance.restore_pipe_name(), 0600),
- "Unable to create restore fifo");
- } else {
- CF_EXPECT(SharedFD::Pipe(&read_end, &write_end), "Unable to create pipe");
- }
+ SharedFD read_end, write_end;
+ CF_EXPECT(SharedFD::Pipe(&read_end, &write_end), "Unable to create pipe");
auto pid = fork();
if (pid) {
// Explicitly close here, otherwise we may end up reading forever if the
// child process dies.
write_end->Close();
RunnerExitCodes exit_code;
- if (IsRestoring(config)) {
- if (!restore_pipe_read->IsOpen()) {
- LOG(ERROR) << "Error opening restore pipe: "
- << restore_pipe_read->StrError();
- std::exit(RunnerExitCodes::kDaemonizationError);
- }
- // Try to read from restore pipe. IF successfully reads, that means logcat
- // has started, and the VM has resumed. Exit the thread.
- char buff[1];
- auto read = restore_pipe_read->Read(buff, 1);
- if (read <= 0) {
- LOG(ERROR) << "Could not read restore pipe: "
- << restore_pipe_read->StrError();
- std::exit(RunnerExitCodes::kDaemonizationError);
- }
- exit_code = RunnerExitCodes::kSuccess;
- LOG(INFO) << "Virtual device restored successfully";
- std::exit(exit_code);
- }
auto bytes_read = read_end->Read(&exit_code, sizeof(exit_code));
if (bytes_read != sizeof(exit_code)) {
LOG(ERROR) << "Failed to read a complete exit code, read " << bytes_read
<< " bytes only instead of the expected " << sizeof(exit_code);
exit_code = RunnerExitCodes::kPipeIOError;
} else if (exit_code == RunnerExitCodes::kSuccess) {
- LOG(INFO) << "Virtual device booted successfully";
+ if (IsRestoring(config)) {
+ LOG(INFO) << "Virtual device restored successfully";
+ } else {
+ LOG(INFO) << "Virtual device booted successfully";
+ }
} else if (exit_code == RunnerExitCodes::kVirtualDeviceBootFailed) {
- LOG(ERROR) << "Virtual device failed to boot";
+ if (IsRestoring(config)) {
+ LOG(ERROR) << "Virtual device failed to restore";
+ } else {
+ LOG(ERROR) << "Virtual device failed to boot";
+ }
if (!instance.fail_fast()) {
LOG(ERROR) << "Device has been left running for debug";
}
} else {
LOG(ERROR) << "Unexpected exit code: " << exit_code;
}
- if (exit_code == RunnerExitCodes::kSuccess) {
- LOG(INFO) << kBootCompletedMessage;
- } else {
- LOG(INFO) << kBootFailedMessage;
+ if (!IsRestoring(config)) {
+ if (exit_code == RunnerExitCodes::kSuccess) {
+ LOG(INFO) << kBootCompletedMessage;
+ } else {
+ LOG(INFO) << kBootFailedMessage;
+ }
}
std::exit(exit_code);
} else {
@@ -145,12 +120,8 @@ Result<SharedFD> DaemonizeLauncher(const CuttlefishConfig& config) {
std::exit(RunnerExitCodes::kDaemonizationError);
}
- if (IsRestoring(config)) {
- return {};
- } else {
- read_end->Close();
- return write_end;
- }
+ read_end->Close();
+ return write_end;
}
}
@@ -183,11 +154,15 @@ Result<SharedFD> ProcessLeader(
class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer {
public:
INJECT(
- CvdBootStateMachine(AutoSetup<ProcessLeader>::Type& process_leader,
+ CvdBootStateMachine(const CuttlefishConfig& config,
+ AutoSetup<ProcessLeader>::Type& process_leader,
KernelLogPipeProvider& kernel_log_pipe_provider,
+ const vm_manager::VmManager& vm_manager,
const CuttlefishConfig::InstanceSpecific& instance))
- : process_leader_(process_leader),
+ : config_(config),
+ process_leader_(process_leader),
kernel_log_pipe_provider_(kernel_log_pipe_provider),
+ vm_manager_(vm_manager),
instance_(instance),
state_(kBootStarted) {}
@@ -200,6 +175,9 @@ class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer {
if (boot_event_handler_.joinable()) {
boot_event_handler_.join();
}
+ if (restore_complete_handler_.joinable()) {
+ restore_complete_handler_.join();
+ }
}
// SetupFeature
@@ -228,12 +206,48 @@ class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer {
SharedFD boot_events_pipe = kernel_log_pipe_provider_.KernelLogPipe();
CF_EXPECTF(boot_events_pipe->IsOpen(), "Could not get boot events pipe: {}",
boot_events_pipe->StrError());
- boot_event_handler_ = std::thread(
- [this, boot_events_pipe]() { ThreadLoop(boot_events_pipe); });
+
+ SharedFD restore_complete_pipe, restore_complete_pipe_write;
+ if (IsRestoring(config_)) {
+ CF_EXPECT(
+ SharedFD::Pipe(&restore_complete_pipe, &restore_complete_pipe_write),
+ "unable to create pipe");
+
+ // Unlike `boot_event_handler_`, this doesn't support graceful shutdown,
+ // it blocks until it finishes its work.
+ restore_complete_handler_ =
+ std::thread([this, restore_complete_pipe_write]() {
+ const auto result = vm_manager_.WaitForRestoreComplete();
+ CHECK(result.ok()) << "Failed to wait for restore complete: "
+ << result.error().FormatForEnv();
+
+ cuttlefish::SharedFD restore_adbd_pipe = cuttlefish::SharedFD::Open(
+ config_.ForDefaultInstance().restore_adbd_pipe_name().c_str(),
+ O_WRONLY);
+ CHECK(restore_adbd_pipe->IsOpen())
+ << "Error opening adbd restore pipe: "
+ << restore_adbd_pipe->StrError();
+ CHECK(cuttlefish::WriteAll(restore_adbd_pipe, "2") == 1)
+ << "Error writing to adbd restore pipe: "
+ << restore_adbd_pipe->StrError() << ". This is unrecoverable.";
+
+ // Done last so that adb is more likely to be ready.
+ CHECK(cuttlefish::WriteAll(restore_complete_pipe_write, "1") == 1)
+ << "Error writing to restore complete pipe: "
+ << restore_complete_pipe_write->StrError()
+ << ". This is unrecoverable.";
+ });
+ }
+
+ boot_event_handler_ =
+ std::thread([this, boot_events_pipe, restore_complete_pipe]() {
+ ThreadLoop(boot_events_pipe, restore_complete_pipe);
+ });
+
return {};
}
- void ThreadLoop(SharedFD boot_events_pipe) {
+ void ThreadLoop(SharedFD boot_events_pipe, SharedFD restore_complete_pipe) {
while (true) {
std::vector<PollSharedFd> poll_shared_fd = {
{
@@ -241,17 +255,26 @@ class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer {
.events = POLLIN | POLLHUP,
},
{
+ .fd = restore_complete_pipe,
+ .events = restore_complete_pipe->IsOpen()
+ ? (short)(POLLIN | POLLHUP)
+ : (short)0,
+ },
+ {
.fd = interrupt_fd_read_,
.events = POLLIN | POLLHUP,
- }};
+ },
+ };
int result = SharedFD::Poll(poll_shared_fd, -1);
- if (poll_shared_fd[1].revents & POLLIN) {
+ // interrupt_fd_read_
+ if (poll_shared_fd[2].revents & POLLIN) {
return;
}
if (result < 0) {
PLOG(FATAL) << "Failed to call Select";
return;
}
+ // boot_events_pipe
if (poll_shared_fd[0].revents & POLLHUP) {
LOG(ERROR) << "Failed to read a complete kernel log boot event.";
state_ |= kGuestBootFailed;
@@ -259,23 +282,46 @@ class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer {
break;
}
}
- if (!(poll_shared_fd[0].revents & POLLIN)) {
- continue;
+ if (poll_shared_fd[0].revents & POLLIN) {
+ auto sent_code = OnBootEvtReceived(boot_events_pipe);
+ if (sent_code) {
+ if (!BootCompleted()) {
+ if (!instance_.fail_fast()) {
+ LOG(ERROR) << "Device running, likely in a bad state";
+ break;
+ }
+ auto monitor_res = GetLauncherMonitorFromInstance(instance_, 5);
+ CHECK(monitor_res.ok()) << monitor_res.error().FormatForEnv();
+ auto fail_res = RunLauncherAction(
+ *monitor_res, LauncherAction::kFail, std::optional<int>());
+ CHECK(fail_res.ok()) << fail_res.error().FormatForEnv();
+ }
+ break;
+ }
}
- auto sent_code = OnBootEvtReceived(boot_events_pipe);
- if (sent_code) {
- if (!BootCompleted()) {
- if (!instance_.fail_fast()) {
- LOG(ERROR) << "Device running, likely in a bad state";
+ // restore_complete_pipe
+ if (poll_shared_fd[1].revents & POLLIN) {
+ char buff[1];
+ auto read = restore_complete_pipe->Read(buff, 1);
+ if (read <= 0) {
+ LOG(ERROR) << "Could not read restore pipe: "
+ << restore_complete_pipe->StrError();
+ state_ |= kGuestBootFailed;
+ if (MaybeWriteNotification()) {
break;
}
- auto monitor_res = GetLauncherMonitorFromInstance(instance_, 5);
- CHECK(monitor_res.ok()) << monitor_res.error().FormatForEnv();
- auto fail_res = RunLauncherAction(*monitor_res, LauncherAction::kFail,
- std::optional<int>());
- CHECK(fail_res.ok()) << fail_res.error().FormatForEnv();
}
- break;
+ state_ |= kGuestBootCompleted;
+ if (MaybeWriteNotification()) {
+ break;
+ }
+ }
+ if (poll_shared_fd[1].revents & POLLHUP) {
+ LOG(ERROR) << "restore_complete_pipe closed unexpectedly";
+ state_ |= kGuestBootFailed;
+ if (MaybeWriteNotification()) {
+ break;
+ }
}
}
}
@@ -325,11 +371,14 @@ class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer {
return BootCompleted() || (state_ & kGuestBootFailed);
}
+ const CuttlefishConfig& config_;
AutoSetup<ProcessLeader>::Type& process_leader_;
KernelLogPipeProvider& kernel_log_pipe_provider_;
+ const vm_manager::VmManager& vm_manager_;
const CuttlefishConfig::InstanceSpecific& instance_;
std::thread boot_event_handler_;
+ std::thread restore_complete_handler_;
SharedFD fg_launcher_pipe_;
SharedFD reboot_notification_;
SharedFD interrupt_fd_read_;
@@ -344,6 +393,7 @@ class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer {
fruit::Component<fruit::Required<const CuttlefishConfig, KernelLogPipeProvider,
const CuttlefishConfig::InstanceSpecific,
+ const vm_manager::VmManager,
AutoSetup<ValidateTapDevices>::Type>>
bootStateMachineComponent() {
return fruit::createComponent()
diff --git a/host/commands/run_cvd/boot_state_machine.h b/host/commands/run_cvd/boot_state_machine.h
index 04de24801..4b742ac4a 100644
--- a/host/commands/run_cvd/boot_state_machine.h
+++ b/host/commands/run_cvd/boot_state_machine.h
@@ -24,6 +24,7 @@ namespace cuttlefish {
fruit::Component<fruit::Required<const CuttlefishConfig, KernelLogPipeProvider,
const CuttlefishConfig::InstanceSpecific,
+ const vm_manager::VmManager,
AutoSetup<ValidateTapDevices>::Type>>
bootStateMachineComponent();
diff --git a/host/commands/run_cvd/server_loop_impl.cpp b/host/commands/run_cvd/server_loop_impl.cpp
index 8506245cf..61f1fa6bb 100644
--- a/host/commands/run_cvd/server_loop_impl.cpp
+++ b/host/commands/run_cvd/server_loop_impl.cpp
@@ -297,7 +297,6 @@ void ServerLoopImpl::DeleteFifos() {
instance_.console_in_pipe_name(),
instance_.console_out_pipe_name(),
instance_.logcat_pipe_name(),
- instance_.restore_pipe_name(),
instance_.PerInstanceInternalPath("keymaster_fifo_vm.in"),
instance_.PerInstanceInternalPath("keymaster_fifo_vm.out"),
instance_.PerInstanceInternalPath("keymint_fifo_vm.in"),
diff --git a/host/libs/config/cuttlefish_config.h b/host/libs/config/cuttlefish_config.h
index 8de591aa8..71448ea97 100644
--- a/host/libs/config/cuttlefish_config.h
+++ b/host/libs/config/cuttlefish_config.h
@@ -398,7 +398,6 @@ class CuttlefishConfig {
std::string gnss_out_pipe_name() const;
std::string logcat_pipe_name() const;
- std::string restore_pipe_name() const;
std::string restore_adbd_pipe_name() const;
std::string launcher_log_path() const;
diff --git a/host/libs/config/cuttlefish_config_instance.cpp b/host/libs/config/cuttlefish_config_instance.cpp
index 41c319e5b..455f53af5 100644
--- a/host/libs/config/cuttlefish_config_instance.cpp
+++ b/host/libs/config/cuttlefish_config_instance.cpp
@@ -1187,10 +1187,6 @@ std::string CuttlefishConfig::InstanceSpecific::logcat_pipe_name() const {
return AbsolutePath(PerInstanceInternalPath("logcat-pipe"));
}
-std::string CuttlefishConfig::InstanceSpecific::restore_pipe_name() const {
- return AbsolutePath(PerInstanceInternalPath("restore-pipe"));
-}
-
std::string CuttlefishConfig::InstanceSpecific::restore_adbd_pipe_name() const {
return AbsolutePath(PerInstanceInternalPath("restore-pipe-adbd"));
}
diff --git a/host/libs/vm_manager/crosvm_manager.cpp b/host/libs/vm_manager/crosvm_manager.cpp
index c26da188e..899e39653 100644
--- a/host/libs/vm_manager/crosvm_manager.cpp
+++ b/host/libs/vm_manager/crosvm_manager.cpp
@@ -880,5 +880,32 @@ Result<std::vector<MonitorCommand>> CrosvmManager::StartCommands(
return commands;
}
+Result<void> CrosvmManager::WaitForRestoreComplete() const {
+ auto instance = CF_EXPECT(CuttlefishConfig::Get())->ForDefaultInstance();
+
+ // Wait for the control socket to exist. It is created early in crosvm's
+ // startup sequence, but the process may not even have been exec'd by CF at
+ // this point.
+ while (!FileExists(instance.CrosvmSocketPath())) {
+ usleep(50000); // 50 ms, arbitrarily chosen
+ }
+
+ // Ask crosvm to resume the VM. crosvm promises to not complete this command
+ // until the vCPUs are started (even if it was never suspended to begin
+ // with).
+ auto infop = CF_EXPECT(Execute(
+ std::vector<std::string>{
+ instance.crosvm_binary(),
+ "resume",
+ instance.CrosvmSocketPath(),
+ "--full",
+ },
+ SubprocessOptions(), WEXITED));
+ CF_EXPECT_EQ(infop.si_code, CLD_EXITED);
+ CF_EXPECTF(infop.si_status == 0, "crosvm resume returns non zero code {}",
+ infop.si_status);
+ return {};
+}
+
} // namespace vm_manager
} // namespace cuttlefish
diff --git a/host/libs/vm_manager/crosvm_manager.h b/host/libs/vm_manager/crosvm_manager.h
index a1433d2a2..cb119fefb 100644
--- a/host/libs/vm_manager/crosvm_manager.h
+++ b/host/libs/vm_manager/crosvm_manager.h
@@ -46,6 +46,8 @@ class CrosvmManager : public VmManager {
const CuttlefishConfig& config,
std::vector<VmmDependencyCommand*>& dependencyCommands) override;
+ Result<void> WaitForRestoreComplete() const override;
+
private:
static constexpr int kCrosvmVmResetExitCode = 32;
};
diff --git a/host/libs/vm_manager/vm_manager.h b/host/libs/vm_manager/vm_manager.h
index b3fa4be6c..29fb68a8e 100644
--- a/host/libs/vm_manager/vm_manager.h
+++ b/host/libs/vm_manager/vm_manager.h
@@ -101,6 +101,14 @@ class VmManager {
virtual Result<std::vector<MonitorCommand>> StartCommands(
const CuttlefishConfig& config,
std::vector<VmmDependencyCommand*>& dependencyCommands) = 0;
+
+ // Block until the restore work is finished and the guest is running. Only
+ // called if a snapshot is being restored.
+ //
+ // Must be thread safe.
+ virtual Result<void> WaitForRestoreComplete() const {
+ return CF_ERR("not implemented");
+ }
};
fruit::Component<fruit::Required<const CuttlefishConfig,