From 02829da5fd99da9965bbb9c6d00962f32f2508a8 Mon Sep 17 00:00:00 2001
From: "A. Cody Schuffelen" <schuffelen@google.com>
Date: Sat, 17 Jun 2023 01:43:27 -0700
Subject: BACKPORT: Add `--core-scheduling` flag gating interaction with
 PR_SCHED_CORE

On Android CI we observe performance regressions when crosvm takes
advantage of the PR_SCHED_CORE feature. We are somewhat oversubscribed
on CPUs, in some cases running a Cuttlefish Android VM with --cpus=4 and
an OpenWRT VM with --cpus=1 next to some other host processes on cloud
instances with 2 cores and 4 hyperthreads. In this case we would prefer
not to lose cpu time to the scheduler blocking off hyperthreads when
either VM claims complete cores to itself.

In this case we are intending to fall back to the default state of "all
processes trust each other", mentioned under "Trust model" on
https://www.kernel.org/doc/html/next/admin-guide/hw-vuln/core-scheduling.html

`--core-scheduling` defaults to true, so there is no behavioral
change for existing users.

Bug: b/280660768
Test: `crosvm start --core-scheduling=false` from Cuttlefish launcher script
Change-Id: Id154790c16b7d9f81aff1f189468959fb5fa7259
Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4602908
Reviewed-by: Frederick Mayle <fmayle@google.com>
Reviewed-by: Dennis Kempin <denniskempin@google.com>
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
Commit-Queue: Cody Schuffelen <schuffelen@google.com>
Merged-In: Id154790c16b7d9f81aff1f189468959fb5fa7259
---
 src/crosvm.rs     |  2 ++
 src/linux/mod.rs  |  3 ++-
 src/linux/vcpu.rs |  5 ++++-
 src/main.rs       | 11 +++++++++++
 4 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/src/crosvm.rs b/src/crosvm.rs
index c79ef0115..cdba3e0e9 100644
--- a/src/crosvm.rs
+++ b/src/crosvm.rs
@@ -363,6 +363,7 @@ pub struct Config {
     pub vcpu_affinity: Option<VcpuAffinity>,
     pub cpu_clusters: Vec<Vec<usize>>,
     pub cpu_capacity: BTreeMap<usize, u32>, // CPU index -> capacity
+    pub core_scheduling: bool,
     pub per_vm_core_scheduling: bool,
     #[cfg(feature = "audio_cras")]
     pub cras_snds: Vec<CrasSndParameters>,
@@ -486,6 +487,7 @@ impl Default for Config {
             vcpu_affinity: None,
             cpu_clusters: Vec::new(),
             cpu_capacity: BTreeMap::new(),
+            core_scheduling: true,
             per_vm_core_scheduling: false,
             #[cfg(feature = "audio_cras")]
             cras_snds: Vec::new(),
diff --git a/src/linux/mod.rs b/src/linux/mod.rs
index 444d8c9b4..9ff692e8c 100644
--- a/src/linux/mod.rs
+++ b/src/linux/mod.rs
@@ -1719,7 +1719,7 @@ fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
     // shared by all vCPU threads.
     // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
     // itself for even better performance. Only vCPUs need the feature.
-    if cfg.per_vm_core_scheduling {
+    if cfg.core_scheduling && cfg.per_vm_core_scheduling {
         if let Err(e) = enable_core_scheduling() {
             error!("Failed to enable core scheduling: {}", e);
         }
@@ -1770,6 +1770,7 @@ fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
             use_hypervisor_signals,
             #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
             to_gdb_channel.clone(),
+            cfg.core_scheduling,
             cfg.per_vm_core_scheduling,
             cfg.host_cpu_topology,
             cfg.privileged_vm,
diff --git a/src/linux/vcpu.rs b/src/linux/vcpu.rs
index 562b528ee..f8c854984 100644
--- a/src/linux/vcpu.rs
+++ b/src/linux/vcpu.rs
@@ -73,6 +73,7 @@ pub fn runnable_vcpu<V>(
     no_smt: bool,
     has_bios: bool,
     use_hypervisor_signals: bool,
+    core_scheduling: bool,
     enable_per_vm_core_scheduling: bool,
     host_cpu_topology: bool,
     vcpu_cgroup_tasks_file: Option<File>,
@@ -119,7 +120,7 @@ where
     )
     .context("failed to configure vcpu")?;
 
-    if !enable_per_vm_core_scheduling {
+    if core_scheduling && !enable_per_vm_core_scheduling {
         // Do per-vCPU core scheduling by setting a unique cookie to each vCPU.
         if let Err(e) = enable_core_scheduling() {
             error!("Failed to enable core scheduling: {}", e);
@@ -609,6 +610,7 @@ pub fn run_vcpu<V>(
     #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
         mpsc::Sender<VcpuDebugStatusMessage>,
     >,
+    core_scheduling: bool,
     enable_per_vm_core_scheduling: bool,
     host_cpu_topology: bool,
     privileged_vm: bool,
@@ -658,6 +660,7 @@ where
                 no_smt,
                 has_bios,
                 use_hypervisor_signals,
+                core_scheduling,
                 enable_per_vm_core_scheduling,
                 host_cpu_topology,
                 vcpu_cgroup_tasks_file,
diff --git a/src/main.rs b/src/main.rs
index c910065f6..48afbb53b 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1173,6 +1173,15 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument::
         "cpu-capacity" => {
             parse_cpu_capacity(value.unwrap(), &mut cfg.cpu_capacity)?;
         }
+        "core-scheduling" => {
+            let val_str = value.unwrap_or("true");
+            cfg.core_scheduling =
+                val_str.parse()
+                    .map_err(|_| argument::Error::InvalidValue {
+                        value: val_str.to_owned(),
+                        expected: String::from("core-scheduling must be a boolean"),
+                    })?;
+        }
         "per-vm-core-scheduling" => {
             cfg.per_vm_core_scheduling = true;
         }
@@ -2574,6 +2583,8 @@ fn run_vm(args: std::env::Args) -> std::result::Result<CommandStatus, ()> {
                               or colon-separated list of assignments of guest to host CPU assignments (e.g. 0=0:1=1:2=2) (default: no mask)"),
           Argument::value("cpu-cluster", "CPUSET", "Group the given CPUs into a cluster (default: no clusters)"),
           Argument::value("cpu-capacity", "CPU=CAP[,CPU=CAP[,...]]", "Set the relative capacity of the given CPU (default: no capacity)"),
+          Argument::value("core-scheduling", "true", "Enable core scheduling feature to protect against hyperthread attacks. This option is
+                                                      a prerequisite for per-vm-core-scheduling."),
           Argument::flag("per-vm-core-scheduling", "Enable per-VM core scheduling intead of the default one (per-vCPU core scheduing) by
               making all vCPU threads share same cookie for core scheduling.
               This option is no-op on devices that have neither MDS nor L1TF vulnerability."),
-- 
cgit v1.2.3