From 02829da5fd99da9965bbb9c6d00962f32f2508a8 Mon Sep 17 00:00:00 2001 From: "A. Cody Schuffelen" Date: Sat, 17 Jun 2023 01:43:27 -0700 Subject: BACKPORT: Add `--core-scheduling` flag gating interaction with PR_SCHED_CORE On Android CI we observe performance regressions when crosvm takes advantage of the PR_SCHED_CORE feature. We are somewhat oversubscribed on CPUs, in some cases running a Cuttlefish Android VM with --cpus=4 and an OpenWRT VM with --cpus=1 next to some other host processes on cloud instances with 2 cores and 4 hyperthreads. In this case we would prefer not to lose cpu time to the scheduler blocking off hyperthreads when either VM claims complete cores to itself. In this case we are intending to fall back to the default state of "all processes trust each other", mentioned under "Trust model" on https://www.kernel.org/doc/html/next/admin-guide/hw-vuln/core-scheduling.html `--core-scheduling` defaults to true, so there is no behavioral change for existing users. Bug: b/280660768 Test: `crosvm start --core-scheduling=false` from Cuttlefish launcher script Change-Id: Id154790c16b7d9f81aff1f189468959fb5fa7259 Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4602908 Reviewed-by: Frederick Mayle Reviewed-by: Dennis Kempin Reviewed-by: Daniel Verkamp Commit-Queue: Cody Schuffelen Merged-In: Id154790c16b7d9f81aff1f189468959fb5fa7259 --- src/crosvm.rs | 2 ++ src/linux/mod.rs | 3 ++- src/linux/vcpu.rs | 5 ++++- src/main.rs | 11 +++++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/crosvm.rs b/src/crosvm.rs index c79ef0115..cdba3e0e9 100644 --- a/src/crosvm.rs +++ b/src/crosvm.rs @@ -363,6 +363,7 @@ pub struct Config { pub vcpu_affinity: Option, pub cpu_clusters: Vec>, pub cpu_capacity: BTreeMap, // CPU index -> capacity + pub core_scheduling: bool, pub per_vm_core_scheduling: bool, #[cfg(feature = "audio_cras")] pub cras_snds: Vec, @@ -486,6 +487,7 @@ impl Default for Config { vcpu_affinity: None, cpu_clusters: Vec::new(), cpu_capacity: BTreeMap::new(), + core_scheduling: true, per_vm_core_scheduling: false, #[cfg(feature = "audio_cras")] cras_snds: Vec::new(), diff --git a/src/linux/mod.rs b/src/linux/mod.rs index 444d8c9b4..9ff692e8c 100644 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -1719,7 +1719,7 @@ fn run_control( // shared by all vCPU threads. // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process // itself for even better performance. Only vCPUs need the feature. - if cfg.per_vm_core_scheduling { + if cfg.core_scheduling && cfg.per_vm_core_scheduling { if let Err(e) = enable_core_scheduling() { error!("Failed to enable core scheduling: {}", e); } @@ -1770,6 +1770,7 @@ fn run_control( use_hypervisor_signals, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_channel.clone(), + cfg.core_scheduling, cfg.per_vm_core_scheduling, cfg.host_cpu_topology, cfg.privileged_vm, diff --git a/src/linux/vcpu.rs b/src/linux/vcpu.rs index 562b528ee..f8c854984 100644 --- a/src/linux/vcpu.rs +++ b/src/linux/vcpu.rs @@ -73,6 +73,7 @@ pub fn runnable_vcpu( no_smt: bool, has_bios: bool, use_hypervisor_signals: bool, + core_scheduling: bool, enable_per_vm_core_scheduling: bool, host_cpu_topology: bool, vcpu_cgroup_tasks_file: Option, @@ -119,7 +120,7 @@ where ) .context("failed to configure vcpu")?; - if !enable_per_vm_core_scheduling { + if core_scheduling && !enable_per_vm_core_scheduling { // Do per-vCPU core scheduling by setting a unique cookie to each vCPU. if let Err(e) = enable_core_scheduling() { error!("Failed to enable core scheduling: {}", e); @@ -609,6 +610,7 @@ pub fn run_vcpu( #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option< mpsc::Sender, >, + core_scheduling: bool, enable_per_vm_core_scheduling: bool, host_cpu_topology: bool, privileged_vm: bool, @@ -658,6 +660,7 @@ where no_smt, has_bios, use_hypervisor_signals, + core_scheduling, enable_per_vm_core_scheduling, host_cpu_topology, vcpu_cgroup_tasks_file, diff --git a/src/main.rs b/src/main.rs index c910065f6..48afbb53b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1173,6 +1173,15 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument:: "cpu-capacity" => { parse_cpu_capacity(value.unwrap(), &mut cfg.cpu_capacity)?; } + "core-scheduling" => { + let val_str = value.unwrap_or("true"); + cfg.core_scheduling = + val_str.parse() + .map_err(|_| argument::Error::InvalidValue { + value: val_str.to_owned(), + expected: String::from("core-scheduling must be a boolean"), + })?; + } "per-vm-core-scheduling" => { cfg.per_vm_core_scheduling = true; } @@ -2574,6 +2583,8 @@ fn run_vm(args: std::env::Args) -> std::result::Result { or colon-separated list of assignments of guest to host CPU assignments (e.g. 0=0:1=1:2=2) (default: no mask)"), Argument::value("cpu-cluster", "CPUSET", "Group the given CPUs into a cluster (default: no clusters)"), Argument::value("cpu-capacity", "CPU=CAP[,CPU=CAP[,...]]", "Set the relative capacity of the given CPU (default: no capacity)"), + Argument::value("core-scheduling", "true", "Enable core scheduling feature to protect against hyperthread attacks. This option is + a prerequisite for per-vm-core-scheduling."), Argument::flag("per-vm-core-scheduling", "Enable per-VM core scheduling intead of the default one (per-vCPU core scheduing) by making all vCPU threads share same cookie for core scheduling. This option is no-op on devices that have neither MDS nor L1TF vulnerability."), -- cgit v1.2.3