aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeongik Cha <jeongik@google.com>2023-09-14 16:36:44 +0900
committerJeongik Cha <jeongik@google.com>2023-09-27 03:10:15 +0900
commit69aec0289bec504f3c58997212653c59d4151bc4 (patch)
treec689c04bc0ee16244ba8959d8a5bd02c15e5d909
parent0c08e13e2cbcd3ff5f93b21ea1cd5db8b4ec4973 (diff)
downloadvmm-sys-util-69aec0289bec504f3c58997212653c59d4151bc4.tar.gz
Import vmm-sys-util
Bug: 277909042 Test: build Change-Id: I0454659ed2f64d0845898430352209fda761943b
-rw-r--r--.buildkite/pipeline.windows.yml66
-rw-r--r--.cargo/config2
-rw-r--r--.cargo_vcs_info.json6
-rw-r--r--.github/dependabot.yml7
-rw-r--r--.gitignore5
-rw-r--r--Android.bp17
-rw-r--r--CHANGELOG.md195
-rw-r--r--CODEOWNERS1
-rw-r--r--Cargo.toml39
-rw-r--r--Cargo.toml.orig24
l---------LICENSE1
-rw-r--r--LICENSE-BSD-3-Clause27
-rw-r--r--METADATA19
-rw-r--r--MODULE_LICENSE_BSD_LIKE0
-rw-r--r--OWNERS1
-rw-r--r--README.md24
-rw-r--r--cargo2android.json6
-rw-r--r--coverage_config_aarch64.json5
-rw-r--r--coverage_config_x86_64.json5
-rw-r--r--src/errno.rs193
-rw-r--r--src/fam.rs1048
-rw-r--r--src/lib.rs24
-rw-r--r--src/linux/aio.rs362
-rw-r--r--src/linux/epoll.rs522
-rw-r--r--src/linux/eventfd.rs216
-rw-r--r--src/linux/fallocate.rs93
-rw-r--r--src/linux/ioctl.rs410
-rw-r--r--src/linux/mod.rs15
-rw-r--r--src/linux/poll.rs1010
-rw-r--r--src/linux/seek_hole.rs228
-rw-r--r--src/linux/signal.rs583
-rw-r--r--src/linux/sock_ctrl_msg.rs663
-rw-r--r--src/linux/timerfd.rs281
-rw-r--r--src/linux/write_zeroes.rs330
-rw-r--r--src/metric.rs178
-rw-r--r--src/rand.rs163
-rw-r--r--src/syscall.rs50
-rw-r--r--src/tempfile.rs304
-rw-r--r--src/unix/file_traits.rs101
-rw-r--r--src/unix/mod.rs5
-rw-r--r--src/unix/tempdir.rs205
-rw-r--r--src/unix/terminal.rs190
42 files changed, 7624 insertions, 0 deletions
diff --git a/.buildkite/pipeline.windows.yml b/.buildkite/pipeline.windows.yml
new file mode 100644
index 0000000..ecfdb56
--- /dev/null
+++ b/.buildkite/pipeline.windows.yml
@@ -0,0 +1,66 @@
+steps:
+ - label: "build-msvc-x86"
+ commands:
+ - cargo build --release
+ retry:
+ automatic: true
+ agents:
+ platform: x86_64
+ os: windows
+ plugins:
+ - docker#v3.7.0:
+ image: "lpetrut/rust_win_buildtools"
+ always-pull: true
+
+ - label: "style"
+ command: cargo fmt --all -- --check
+ retry:
+ automatic: true
+ agents:
+ platform: x86_64
+ os: windows
+ plugins:
+ - docker#v3.7.0:
+ image: "lpetrut/rust_win_buildtools"
+ always-pull: true
+
+ - label: "unittests-msvc-x86"
+ commands:
+ - cargo test --all-features
+ retry:
+ automatic: true
+ agents:
+ platform: x86_64
+ os: windows
+ plugins:
+ - docker#v3.7.0:
+ image: "lpetrut/rust_win_buildtools"
+ always-pull: true
+
+ - label: "clippy-x86"
+ commands:
+ - cargo clippy --all
+ retry:
+ automatic: true
+ agents:
+ platform: x86_64
+ os: windows
+ plugins:
+ - docker#v3.7.0:
+ image: "lpetrut/rust_win_buildtools"
+ always-pull: true
+
+ - label: "check-warnings-x86"
+ commands:
+ - cargo check --all-targets
+ retry:
+ automatic: true
+ agents:
+ platform: x86_64
+ os: windows
+ plugins:
+ - docker#v3.7.0:
+ image: "lpetrut/rust_win_buildtools"
+ always-pull: true
+ environment:
+ - "RUSTFLAGS=-D warnings"
diff --git a/.cargo/config b/.cargo/config
new file mode 100644
index 0000000..0c55a7c
--- /dev/null
+++ b/.cargo/config
@@ -0,0 +1,2 @@
+[target.aarch64-unknown-linux-musl]
+rustflags = [ "-C", "target-feature=+crt-static", "-C", "link-arg=-lgcc"]
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..e83a746
--- /dev/null
+++ b/.cargo_vcs_info.json
@@ -0,0 +1,6 @@
+{
+ "git": {
+ "sha1": "0e10ca98b55797a64319d746d94379f7cdf81d02"
+ },
+ "path_in_vcs": ""
+} \ No newline at end of file
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..97b2020
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,7 @@
+version: 2
+updates:
+- package-ecosystem: gitsubmodule
+ directory: "/"
+ schedule:
+ interval: monthly
+ open-pull-requests-limit: 10
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f21ea09
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+/target
+.idea
+**/*.rs.bk
+Cargo.lock
+
diff --git a/Android.bp b/Android.bp
new file mode 100644
index 0000000..e72fe88
--- /dev/null
+++ b/Android.bp
@@ -0,0 +1,17 @@
+// This file is generated by cargo2android.py --config cargo2android.json.
+// Do not modify this file as changes will be overridden on upgrade.
+
+
+
+rust_library_host {
+ name: "libvmm_sys_util",
+ crate_name: "vmm_sys_util",
+ cargo_env_compat: true,
+ cargo_pkg_version: "0.11.1",
+ srcs: ["src/lib.rs"],
+ edition: "2021",
+ rustlibs: [
+ "libbitflags-1.3.2",
+ "liblibc",
+ ],
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..2a08682
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,195 @@
+# Changelog
+## v0.11.1
+
+### Changed
+- [[#178](https://github.com/rust-vmm/vmm-sys-util/issues/178)]: Fixed a bug in
+ `rand_bytes` that was triggering a panic when the number of bytes was not a
+ multiple of 4.
+- [[#181](https://github.com/rust-vmm/vmm-sys-util/pull/181)]: Changed
+ `TempFile::new_with_prefix()` on linux to use `mkstemp` to prevent name
+ collisions.
+
+## v0.11.0
+
+### Added
+- Added `rand_bytes` function that generates a pseudo random vector of
+ `len` bytes.
+- Added implementation of `std::error::Error` for `fam::Error`.
+ - Added derive `Eq` and `PartialEq` for error types.
+
+### Changed
+- [[#161](https://github.com/rust-vmm/vmm-sys-util/issues/161)]: Updated the
+ license to BSD-3-Clause.
+- Use edition 2021.
+- [[vm-memory#199](https://github.com/rust-vmm/vm-memory/issues/199)]: Use caret
+ dependencies. This is the idiomatic way of specifying dependencies.
+ With this we reduce the risk of breaking customer code when new releases of
+ the dependencies are published.
+- Renamed `xor_psuedo_rng_u32` to `xor_pseudo_rng_u32` to fix a typo.
+- Renamed `xor_psuedo_rng_u8_alphanumerics` to `xor_pseudo_rng_u8_alphanumerics`
+ to fix a typo.
+
+## v0.10.0
+
+### Added
+- Added Android support by using the appropriate macro configuration when
+ exporting functionality.
+- Derive `Debug` for `FamStructWrapper` & `EventFd`.
+
+### Changed
+- The `ioctl_expr` is now a const function instead of a macro.
+
+## v0.9.0
+
+### Changed
+* Fixed safety for sock_ctrl_msg::raw_recvmsg() and enhanced documentation
+* Fixed sock_cmsg: ensured copy_nonoverlapping safety
+* [[#135](https://github.com/rust-vmm/vmm-sys-util/pull/135)]: sock_ctrl_msg:
+ mark recv_with_fds as unsafe
+
+
+## v0.8.0
+
+* Added set_check_for_hangup() to PollContext.
+* Added writable()/has_error()/raw_events() to PollEvent.
+* Derived Copy/Clone for PollWatchingEvents.
+* Fixed the implementation of `write_zeroes` to use `FALLOC_FL_ZERO_RANGE`
+ instead of `FALLOC_FL_PUNCH_HOLE`.
+* Added `write_all_zeroes` to `WriteZeroes`, which calls `write_zeroes` in a
+ loop until the requested length is met.
+* Added a new trait, `WriteZeroesAt`, which allows giving the offset in file
+ instead of using the current cursor.
+* Removed `max_events` from `Epoll::wait` which removes possible undefined
+ behavior.
+* [[#104](https://github.com/rust-vmm/vmm-sys-util/issues/104)]: Fixed FAM
+ struct `PartialEq` implementation.
+* [[#85](https://github.com/rust-vmm/vmm-sys-util/issues/85)]: Fixed FAM struct
+ `Clone` implementation.
+* [[#99](https://github.com/rust-vmm/vmm-sys-util/issues/99)]: Validate the
+ maximum capacity when initializing FAM Struct.
+
+# v0.7.0
+
+* Switched to Rust edition 2018.
+* Added the `metric` module that provides a `Metric` interface as well as a
+ default implementation for `AtomicU64`.
+
+# v0.6.1
+
+* Implemented `From<io::Error>` for `errno::Error`.
+
+# v0.6.0
+
+* Derived Copy for EpollEvent.
+* Implemented Debug for EpollEvent.
+* Changed `Epoll::ctl` signature such that `EpollEvent` is passed by
+ value and not by reference.
+* Enabled this crate to be used on other Unixes (besides Linux) by using
+ target_os = linux where appropriate.
+
+# v0.5.0
+
+* Added conditionally compiled `serde` compatibility to `FamStructWrapper`,
+ gated by the `with-serde` feature.
+* Implemented `Into<std::io::Error` for `errno::Error`.
+* Added a wrapper over `libc::epoll` used for basic epoll operations.
+
+# v0.4.0
+
+* Added Windows support for TempFile and errno::Error.
+* Added `into_file` for TempFile which enables the TempFile to be used as a
+ regular file.
+* Implemented std::error::Error for errno::Error.
+* Fixed the implementation of `register_signal_handler` by allowing only
+ valid signal numbers.
+
+# v0.3.1
+
+* Advertise functionality for obtaining POSIX real time signal base which is
+ needed to provide absolute signals in the API changed in v0.3.0.
+
+# v0.3.0
+
+* Removed `for_vcpu` argument from `signal::register_signal_handler` and
+ `signal::validate_signal_num`. Users can now pass absolute values for all
+ valid signal numbers.
+* Removed `flag` argument of `signal::register_signal_handler` public methods,
+ which now defaults to `libc::SA_SIGINFO`.
+* Changed `TempFile::new` and `TempDir::new` to create new temporary files/
+ directories inside `$TMPDIR` if set, otherwise inside `/tmp`.
+* Added methods which create temporary files/directories with prefix.
+
+# v0.2.1
+
+* Fixed the FamStructWrapper Clone implementation to avoid UB.
+
+# v0.2.0
+
+* fam: updated the macro that generates implementions of FamStruct to
+ also take a parameter that specifies the name of the flexible array
+ member.
+
+# v0.1.1
+
+* Fixed the Cargo.toml license.
+* Fixed some clippy warnings.
+
+# v0.1.0
+
+This is the first vmm-sys-util crate release.
+
+It is a collection of modules implementing helpers and utilities used by
+multiple rust-vmm components and rust-vmm based VMMs.
+Most of the code in this first release is based on either the crosvm or the
+Firecracker projects, or both.
+
+The first release comes with the following Rust modules:
+
+* aio: Safe wrapper over
+ [`Linux AIO`](http://man7.org/linux/man-pages/man7/aio.7.html).
+
+* errno: Structures, helpers, and type definitions for working with
+ [`errno`](http://man7.org/linux/man-pages/man3/errno.3.html).
+
+* eventfd: Structure and wrapper functions for working with
+ [`eventfd`](http://man7.org/linux/man-pages/man2/eventfd.2.html).
+
+* fallocate: Enum and function for dealing with an allocated disk space
+ by [`fallocate`](http://man7.org/linux/man-pages/man2/fallocate.2.html).
+
+* fam: Trait and wrapper for working with C defined FAM structures.
+
+* file_traits: Traits for handling file synchronization and length.
+
+* ioctls: Macros and functions for working with
+ [`ioctl`](http://man7.org/linux/man-pages/man2/ioctl.2.html).
+
+* poll: Traits and structures for working with
+ [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html)
+
+* rand: Miscellaneous functions related to getting (pseudo) random
+ numbers and strings.
+
+* seek_hole: Traits and implementations over
+ [`lseek64`](https://linux.die.net/man/3/lseek64).
+
+* signal: Enums, traits and functions for working with
+ [`signal`](http://man7.org/linux/man-pages/man7/signal.7.html).
+
+* sockctrl_msg: Wrapper for sending and receiving messages with file
+ descriptors on sockets that accept control messages (e.g. Unix domain
+ sockets).
+
+* tempdir: Structure for handling temporary directories.
+
+* tempfile: Struct for handling temporary files as well as any cleanup
+ required.
+
+* terminal: Trait for working with
+ [`termios`](http://man7.org/linux/man-pages/man3/termios.3.html).
+
+* timerfd: Structure and functions for working with
+ [`timerfd`](http://man7.org/linux/man-pages/man2/timerfd_create.2.html).
+
+* write_zeroes: Traits for replacing a range with a hole and writing
+ zeroes in a file.
diff --git a/CODEOWNERS b/CODEOWNERS
new file mode 100644
index 0000000..b0440e9
--- /dev/null
+++ b/CODEOWNERS
@@ -0,0 +1 @@
+* @liujing2 @sameo @andreeaflorescu @jiangliu
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..f1da15c
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,39 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+name = "vmm-sys-util"
+version = "0.11.1"
+authors = ["Intel Virtualization Team <vmm-maintainers@intel.com>"]
+description = "A system utility set"
+readme = "README.md"
+keywords = ["utils"]
+license = "BSD-3-Clause"
+repository = "https://github.com/rust-vmm/vmm-sys-util"
+resolver = "2"
+[dependencies.libc]
+version = "0.2.39"
+
+[dependencies.serde]
+version = "1.0.27"
+optional = true
+
+[dependencies.serde_derive]
+version = "1.0.27"
+optional = true
+[dev-dependencies.serde_json]
+version = "1.0.9"
+
+[features]
+with-serde = ["serde", "serde_derive"]
+[target."cfg(any(target_os = \"linux\", target_os = \"android\"))".dependencies.bitflags]
+version = "1.0"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..668a2ac
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,24 @@
+[package]
+name = "vmm-sys-util"
+version = "0.11.1"
+authors = ["Intel Virtualization Team <vmm-maintainers@intel.com>"]
+description = "A system utility set"
+repository = "https://github.com/rust-vmm/vmm-sys-util"
+readme = "README.md"
+keywords = ["utils"]
+edition = "2021"
+license = "BSD-3-Clause"
+
+[features]
+with-serde = ["serde", "serde_derive"]
+
+[dependencies]
+libc = "0.2.39"
+serde = { version = "1.0.27", optional = true }
+serde_derive = { version = "1.0.27", optional = true }
+
+[target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies]
+bitflags = "1.0"
+
+[dev-dependencies]
+serde_json = "1.0.9"
diff --git a/LICENSE b/LICENSE
new file mode 120000
index 0000000..d3b3859
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1 @@
+LICENSE-BSD-3-Clause \ No newline at end of file
diff --git a/LICENSE-BSD-3-Clause b/LICENSE-BSD-3-Clause
new file mode 100644
index 0000000..8bafca3
--- /dev/null
+++ b/LICENSE-BSD-3-Clause
@@ -0,0 +1,27 @@
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..1c8b946
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,19 @@
+name: "vmm-sys-util"
+description: "A system utility set"
+third_party {
+ identifier {
+ type: "crates.io"
+ value: "https://crates.io/crates/vmm-sys-util"
+ }
+ identifier {
+ type: "Archive"
+ value: "https://static.crates.io/crates/vmm-sys-util/vmm-sys-util-0.11.1.crate"
+ }
+ version: "0.11.1"
+ license_type: NOTICE
+ last_upgrade_date {
+ year: 2023
+ month: 8
+ day: 23
+ }
+}
diff --git a/MODULE_LICENSE_BSD_LIKE b/MODULE_LICENSE_BSD_LIKE
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_BSD_LIKE
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..45dc4dd
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1 @@
+include platform/prebuilts/rust:master:/OWNERS
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..293a11a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,24 @@
+# vmm-sys-util
+
+[![crates.io](https://img.shields.io/crates/v/vmm-sys-util)](https://crates.io/crates/vmm-sys-util)
+[![docs.rs](https://img.shields.io/docsrs/vmm-sys-util)](https://docs.rs/vmm-sys-util/)
+
+This crate is a collection of modules that provides helpers and utilities
+used by multiple [rust-vmm](https://github.com/rust-vmm/community) components.
+
+The crate implements safe wrappers around common utilities for working
+with files, event file descriptors, ioctls and others.
+
+## Support
+
+**Platforms**:
+- x86_64
+- aarch64
+
+**Operating Systems**:
+- Linux
+- Windows (partial support)
+
+## License
+
+This code is licensed under [BSD-3-Clause](LICENSE-BSD-3-Clause).
diff --git a/cargo2android.json b/cargo2android.json
new file mode 100644
index 0000000..1f2fa38
--- /dev/null
+++ b/cargo2android.json
@@ -0,0 +1,6 @@
+{
+ "run": true,
+ "dep-suffixes": {
+ "bitflags": "-1.3.2"
+ }
+} \ No newline at end of file
diff --git a/coverage_config_aarch64.json b/coverage_config_aarch64.json
new file mode 100644
index 0000000..6b35952
--- /dev/null
+++ b/coverage_config_aarch64.json
@@ -0,0 +1,5 @@
+{
+ "coverage_score": 84.3,
+ "exclude_path": "",
+ "crate_features": ""
+}
diff --git a/coverage_config_x86_64.json b/coverage_config_x86_64.json
new file mode 100644
index 0000000..4277fc0
--- /dev/null
+++ b/coverage_config_x86_64.json
@@ -0,0 +1,5 @@
+{
+ "coverage_score": 87.1,
+ "exclude_path": "",
+ "crate_features": ""
+}
diff --git a/src/errno.rs b/src/errno.rs
new file mode 100644
index 0000000..27b9bcf
--- /dev/null
+++ b/src/errno.rs
@@ -0,0 +1,193 @@
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+//
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Structures, helpers, and type definitions for working with
+//! [`errno`](http://man7.org/linux/man-pages/man3/errno.3.html).
+
+use std::fmt::{Display, Formatter};
+use std::io;
+use std::result;
+
+/// Wrapper over [`errno`](http://man7.org/linux/man-pages/man3/errno.3.html).
+///
+/// The error number is an integer number set by system calls and some libc
+/// functions in case of error.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Error(i32);
+
+/// A specialized [Result](https://doc.rust-lang.org/std/result/enum.Result.html) type
+/// for operations that can return `errno`.
+///
+/// This typedef is generally used to avoid writing out `errno::Error` directly and is
+/// otherwise a direct mapping to `Result`.
+pub type Result<T> = result::Result<T, Error>;
+
+impl Error {
+ /// Creates a new error from the given error number.
+ ///
+ /// # Arguments
+ ///
+ /// * `errno`: error number used for creating the `Error`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # extern crate libc;
+ /// extern crate vmm_sys_util;
+ /// #
+ /// # use libc;
+ /// use vmm_sys_util::errno::Error;
+ ///
+ /// let err = Error::new(libc::EIO);
+ /// ```
+ pub fn new(errno: i32) -> Error {
+ Error(errno)
+ }
+
+ /// Returns the last occurred `errno` wrapped in an `Error`.
+ ///
+ /// Calling `Error::last()` is the equivalent of using
+ /// [`errno`](http://man7.org/linux/man-pages/man3/errno.3.html) in C/C++.
+ /// The result of this function only has meaning after a libc call or syscall
+ /// where `errno` was set.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # extern crate libc;
+ /// extern crate vmm_sys_util;
+ /// #
+ /// # use libc;
+ /// # use std::fs::File;
+ /// # use std::io::{self, Read};
+ /// # use std::env::temp_dir;
+ /// use vmm_sys_util::errno::Error;
+ /// #
+ /// // Reading from a file without permissions returns an error.
+ /// let mut path = temp_dir();
+ /// path.push("test");
+ /// let mut file = File::create(path).unwrap();
+ /// let mut buf: Vec<u8> = Vec::new();
+ /// assert!(file.read_to_end(&mut buf).is_err());
+ ///
+ /// // Retrieve the error number of the previous operation using `Error::last()`:
+ /// let read_err = Error::last();
+ /// #[cfg(unix)]
+ /// assert_eq!(read_err, Error::new(libc::EBADF));
+ /// #[cfg(not(unix))]
+ /// assert_eq!(read_err, Error::new(libc::EIO));
+ /// ```
+ pub fn last() -> Error {
+ // It's safe to unwrap because this `Error` was constructed via `last_os_error`.
+ Error(io::Error::last_os_error().raw_os_error().unwrap())
+ }
+
+ /// Returns the raw integer value (`errno`) corresponding to this Error.
+ ///
+ /// # Examples
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::errno::Error;
+ ///
+ /// let err = Error::new(13);
+ /// assert_eq!(err.errno(), 13);
+ /// ```
+ pub fn errno(self) -> i32 {
+ self.0
+ }
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ io::Error::from_raw_os_error(self.0).fmt(f)
+ }
+}
+
+impl std::error::Error for Error {}
+
+impl From<io::Error> for Error {
+ fn from(e: io::Error) -> Self {
+ Error::new(e.raw_os_error().unwrap_or_default())
+ }
+}
+
+impl From<Error> for io::Error {
+ fn from(err: Error) -> io::Error {
+ io::Error::from_raw_os_error(err.0)
+ }
+}
+
+/// Returns the last `errno` as a [`Result`] that is always an error.
+///
+/// [`Result`]: type.Result.html
+pub fn errno_result<T>() -> Result<T> {
+ Err(Error::last())
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::env::temp_dir;
+ use std::error::Error as _;
+ use std::fs::OpenOptions;
+ use std::io::{self, Read};
+
+ #[test]
+ pub fn test_errno() {
+ #[cfg(unix)]
+ let expected_errno = libc::EBADF;
+ #[cfg(not(unix))]
+ let expected_errno = libc::EIO;
+
+ // try to read from a file without read permissions
+ let mut path = temp_dir();
+ path.push("test");
+ let mut file = OpenOptions::new()
+ .read(false)
+ .write(true)
+ .create(true)
+ .truncate(true)
+ .open(path)
+ .unwrap();
+ let mut buf: Vec<u8> = Vec::new();
+ assert!(file.read_to_end(&mut buf).is_err());
+
+ // Test that errno_result returns Err and the error is the expected one.
+ let last_err = errno_result::<i32>().unwrap_err();
+ assert_eq!(last_err, Error::new(expected_errno));
+
+ // Test that the inner value of `Error` corresponds to expected_errno.
+ assert_eq!(last_err.errno(), expected_errno);
+ assert!(last_err.source().is_none());
+
+ // Test creating an `Error` from a `std::io::Error`.
+ assert_eq!(last_err, Error::from(io::Error::last_os_error()));
+
+ // Test that calling `last()` returns the same error as `errno_result()`.
+ assert_eq!(last_err, Error::last());
+
+ let last_err: io::Error = last_err.into();
+ // Test creating a `std::io::Error` from an `Error`
+ assert_eq!(io::Error::last_os_error().kind(), last_err.kind());
+ }
+
+ #[test]
+ pub fn test_display() {
+ // Test the display implementation.
+ #[cfg(target_os = "linux")]
+ assert_eq!(
+ format!("{}", Error::new(libc::EBADF)),
+ "Bad file descriptor (os error 9)"
+ );
+ #[cfg(not(unix))]
+ assert_eq!(
+ format!("{}", Error::new(libc::EIO)),
+ "Access is denied. (os error 5)"
+ );
+ }
+}
diff --git a/src/fam.rs b/src/fam.rs
new file mode 100644
index 0000000..0d62b0f
--- /dev/null
+++ b/src/fam.rs
@@ -0,0 +1,1048 @@
+// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Trait and wrapper for working with C defined FAM structures.
+//!
+//! In C 99 an array of unknown size may appear within a struct definition as the last member
+//! (as long as there is at least one other named member).
+//! This is known as a flexible array member (FAM).
+//! Pre C99, the same behavior could be achieved using zero length arrays.
+//!
+//! Flexible Array Members are the go-to choice for working with large amounts of data
+//! prefixed by header values.
+//!
+//! For example the KVM API has many structures of this kind.
+
+#[cfg(feature = "with-serde")]
+use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor};
+#[cfg(feature = "with-serde")]
+use serde::{ser::SerializeTuple, Serialize, Serializer};
+use std::fmt;
+#[cfg(feature = "with-serde")]
+use std::marker::PhantomData;
+use std::mem::{self, size_of};
+
+/// Errors associated with the [`FamStructWrapper`](struct.FamStructWrapper.html) struct.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum Error {
+ /// The max size has been exceeded
+ SizeLimitExceeded,
+}
+
+impl std::error::Error for Error {}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Self::SizeLimitExceeded => write!(f, "The max size has been exceeded"),
+ }
+ }
+}
+
+/// Trait for accessing properties of C defined FAM structures.
+///
+/// # Safety
+///
+/// This is unsafe due to the number of constraints that aren't checked:
+/// * the implementer should be a POD
+/// * the implementor should contain a flexible array member of elements of type `Entry`
+/// * `Entry` should be a POD
+///
+/// Violating these may cause problems.
+///
+/// # Example
+///
+/// ```
+/// use vmm_sys_util::fam::*;
+///
+/// #[repr(C)]
+/// #[derive(Default)]
+/// pub struct __IncompleteArrayField<T>(::std::marker::PhantomData<T>, [T; 0]);
+/// impl<T> __IncompleteArrayField<T> {
+/// #[inline]
+/// pub fn new() -> Self {
+/// __IncompleteArrayField(::std::marker::PhantomData, [])
+/// }
+/// #[inline]
+/// pub unsafe fn as_ptr(&self) -> *const T {
+/// ::std::mem::transmute(self)
+/// }
+/// #[inline]
+/// pub unsafe fn as_mut_ptr(&mut self) -> *mut T {
+/// ::std::mem::transmute(self)
+/// }
+/// #[inline]
+/// pub unsafe fn as_slice(&self, len: usize) -> &[T] {
+/// ::std::slice::from_raw_parts(self.as_ptr(), len)
+/// }
+/// #[inline]
+/// pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] {
+/// ::std::slice::from_raw_parts_mut(self.as_mut_ptr(), len)
+/// }
+/// }
+///
+/// #[repr(C)]
+/// #[derive(Default)]
+/// struct MockFamStruct {
+/// pub len: u32,
+/// pub padding: u32,
+/// pub entries: __IncompleteArrayField<u32>,
+/// }
+///
+/// unsafe impl FamStruct for MockFamStruct {
+/// type Entry = u32;
+///
+/// fn len(&self) -> usize {
+/// self.len as usize
+/// }
+///
+/// fn set_len(&mut self, len: usize) {
+/// self.len = len as u32
+/// }
+///
+/// fn max_len() -> usize {
+/// 100
+/// }
+///
+/// fn as_slice(&self) -> &[u32] {
+/// let len = self.len();
+/// unsafe { self.entries.as_slice(len) }
+/// }
+///
+/// fn as_mut_slice(&mut self) -> &mut [u32] {
+/// let len = self.len();
+/// unsafe { self.entries.as_mut_slice(len) }
+/// }
+/// }
+///
+/// type MockFamStructWrapper = FamStructWrapper<MockFamStruct>;
+/// ```
+#[allow(clippy::len_without_is_empty)]
+pub unsafe trait FamStruct {
+ /// The type of the FAM entries
+ type Entry: PartialEq + Copy;
+
+ /// Get the FAM length
+ ///
+ /// These type of structures contain a member that holds the FAM length.
+ /// This method will return the value of that member.
+ fn len(&self) -> usize;
+
+ /// Set the FAM length
+ ///
+ /// These type of structures contain a member that holds the FAM length.
+ /// This method will set the value of that member.
+ fn set_len(&mut self, len: usize);
+
+ /// Get max allowed FAM length
+ ///
+ /// This depends on each structure.
+ /// For example a structure representing the cpuid can contain at most 80 entries.
+ fn max_len() -> usize;
+
+ /// Get the FAM entries as slice
+ fn as_slice(&self) -> &[Self::Entry];
+
+ /// Get the FAM entries as mut slice
+ fn as_mut_slice(&mut self) -> &mut [Self::Entry];
+}
+
+/// A wrapper for [`FamStruct`](trait.FamStruct.html).
+///
+/// It helps in treating a [`FamStruct`](trait.FamStruct.html) similarly to an actual `Vec`.
+#[derive(Debug)]
+pub struct FamStructWrapper<T: Default + FamStruct> {
+ // This variable holds the FamStruct structure. We use a `Vec<T>` to make the allocation
+ // large enough while still being aligned for `T`. Only the first element of `Vec<T>`
+ // will actually be used as a `T`. The remaining memory in the `Vec<T>` is for `entries`,
+ // which must be contiguous. Since the entries are of type `FamStruct::Entry` we must
+ // be careful to convert the desired capacity of the `FamStructWrapper`
+ // from `FamStruct::Entry` to `T` when reserving or releasing memory.
+ mem_allocator: Vec<T>,
+}
+
+impl<T: Default + FamStruct> FamStructWrapper<T> {
+ /// Convert FAM len to `mem_allocator` len.
+ ///
+ /// Get the capacity required by mem_allocator in order to hold
+ /// the provided number of [`FamStruct::Entry`](trait.FamStruct.html#associatedtype.Entry).
+ fn mem_allocator_len(fam_len: usize) -> usize {
+ let wrapper_size_in_bytes = size_of::<T>() + fam_len * size_of::<T::Entry>();
+ (wrapper_size_in_bytes + size_of::<T>() - 1) / size_of::<T>()
+ }
+
+ /// Convert `mem_allocator` len to FAM len.
+ ///
+ /// Get the number of elements of type
+ /// [`FamStruct::Entry`](trait.FamStruct.html#associatedtype.Entry)
+ /// that fit in a mem_allocator of provided len.
+ fn fam_len(mem_allocator_len: usize) -> usize {
+ if mem_allocator_len == 0 {
+ return 0;
+ }
+
+ let array_size_in_bytes = (mem_allocator_len - 1) * size_of::<T>();
+ array_size_in_bytes / size_of::<T::Entry>()
+ }
+
+ /// Create a new FamStructWrapper with `num_elements` elements.
+ ///
+ /// The elements will be zero-initialized. The type of the elements will be
+ /// [`FamStruct::Entry`](trait.FamStruct.html#associatedtype.Entry).
+ ///
+ /// # Arguments
+ ///
+ /// * `num_elements` - The number of elements in the FamStructWrapper.
+ ///
+ /// # Errors
+ ///
+ /// When `num_elements` is greater than the max possible len, it returns
+ /// `Error::SizeLimitExceeded`.
+ pub fn new(num_elements: usize) -> Result<FamStructWrapper<T>, Error> {
+ if num_elements > T::max_len() {
+ return Err(Error::SizeLimitExceeded);
+ }
+ let required_mem_allocator_capacity =
+ FamStructWrapper::<T>::mem_allocator_len(num_elements);
+
+ let mut mem_allocator = Vec::with_capacity(required_mem_allocator_capacity);
+ mem_allocator.push(T::default());
+ for _ in 1..required_mem_allocator_capacity {
+ // SAFETY: Safe as long T follows the requirements of being POD.
+ mem_allocator.push(unsafe { mem::zeroed() })
+ }
+ mem_allocator[0].set_len(num_elements);
+
+ Ok(FamStructWrapper { mem_allocator })
+ }
+
+ /// Create a new FamStructWrapper from a slice of elements.
+ ///
+ /// # Arguments
+ ///
+ /// * `entries` - The slice of [`FamStruct::Entry`](trait.FamStruct.html#associatedtype.Entry)
+ /// entries.
+ ///
+ /// # Errors
+ ///
+ /// When the size of `entries` is greater than the max possible len, it returns
+ /// `Error::SizeLimitExceeded`.
+ pub fn from_entries(entries: &[T::Entry]) -> Result<FamStructWrapper<T>, Error> {
+ let mut adapter = FamStructWrapper::<T>::new(entries.len())?;
+
+ {
+ let wrapper_entries = adapter.as_mut_fam_struct().as_mut_slice();
+ wrapper_entries.copy_from_slice(entries);
+ }
+
+ Ok(adapter)
+ }
+
+ /// Create a new FamStructWrapper from the raw content represented as `Vec<T>`.
+ ///
+ /// Sometimes we already have the raw content of an FAM struct represented as `Vec<T>`,
+ /// and want to use the FamStructWrapper as accessors.
+ ///
+ /// # Arguments
+ ///
+ /// * `content` - The raw content represented as `Vec[T]`.
+ ///
+ /// # Safety
+ ///
+ /// This function is unsafe because the caller needs to ensure that the raw content is
+ /// correctly layed out.
+ pub unsafe fn from_raw(content: Vec<T>) -> Self {
+ FamStructWrapper {
+ mem_allocator: content,
+ }
+ }
+
+ /// Consume the FamStructWrapper and return the raw content as `Vec<T>`.
+ pub fn into_raw(self) -> Vec<T> {
+ self.mem_allocator
+ }
+
+ /// Get a reference to the actual [`FamStruct`](trait.FamStruct.html) instance.
+ pub fn as_fam_struct_ref(&self) -> &T {
+ &self.mem_allocator[0]
+ }
+
+ /// Get a mut reference to the actual [`FamStruct`](trait.FamStruct.html) instance.
+ pub fn as_mut_fam_struct(&mut self) -> &mut T {
+ &mut self.mem_allocator[0]
+ }
+
+ /// Get a pointer to the [`FamStruct`](trait.FamStruct.html) instance.
+ ///
+ /// The caller must ensure that the fam_struct outlives the pointer this
+ /// function returns, or else it will end up pointing to garbage.
+ ///
+ /// Modifying the container referenced by this pointer may cause its buffer
+ /// to be reallocated, which would also make any pointers to it invalid.
+ pub fn as_fam_struct_ptr(&self) -> *const T {
+ self.as_fam_struct_ref()
+ }
+
+ /// Get a mutable pointer to the [`FamStruct`](trait.FamStruct.html) instance.
+ ///
+ /// The caller must ensure that the fam_struct outlives the pointer this
+ /// function returns, or else it will end up pointing to garbage.
+ ///
+ /// Modifying the container referenced by this pointer may cause its buffer
+ /// to be reallocated, which would also make any pointers to it invalid.
+ pub fn as_mut_fam_struct_ptr(&mut self) -> *mut T {
+ self.as_mut_fam_struct()
+ }
+
+ /// Get the elements slice.
+ pub fn as_slice(&self) -> &[T::Entry] {
+ self.as_fam_struct_ref().as_slice()
+ }
+
+ /// Get the mutable elements slice.
+ pub fn as_mut_slice(&mut self) -> &mut [T::Entry] {
+ self.as_mut_fam_struct().as_mut_slice()
+ }
+
+ /// Get the number of elements of type `FamStruct::Entry` currently in the vec.
+ fn len(&self) -> usize {
+ self.as_fam_struct_ref().len()
+ }
+
+ /// Get the capacity of the `FamStructWrapper`
+ ///
+ /// The capacity is measured in elements of type `FamStruct::Entry`.
+ fn capacity(&self) -> usize {
+ FamStructWrapper::<T>::fam_len(self.mem_allocator.capacity())
+ }
+
+ /// Reserve additional capacity.
+ ///
+ /// Reserve capacity for at least `additional` more
+ /// [`FamStruct::Entry`](trait.FamStruct.html#associatedtype.Entry) elements.
+ ///
+ /// If the capacity is already reserved, this method doesn't do anything.
+ /// If not this will trigger a reallocation of the underlying buffer.
+ fn reserve(&mut self, additional: usize) {
+ let desired_capacity = self.len() + additional;
+ if desired_capacity <= self.capacity() {
+ return;
+ }
+
+ let current_mem_allocator_len = self.mem_allocator.len();
+ let required_mem_allocator_len = FamStructWrapper::<T>::mem_allocator_len(desired_capacity);
+ let additional_mem_allocator_len = required_mem_allocator_len - current_mem_allocator_len;
+
+ self.mem_allocator.reserve(additional_mem_allocator_len);
+ }
+
+ /// Update the length of the FamStructWrapper.
+ ///
+ /// The length of `self` will be updated to the specified value.
+ /// The length of the `T` structure and of `self.mem_allocator` will be updated accordingly.
+ /// If the len is increased additional capacity will be reserved.
+ /// If the len is decreased the unnecessary memory will be deallocated.
+ ///
+ /// This method might trigger reallocations of the underlying buffer.
+ ///
+ /// # Errors
+ ///
+ /// When len is greater than the max possible len it returns Error::SizeLimitExceeded.
+ fn set_len(&mut self, len: usize) -> Result<(), Error> {
+ let additional_elements: isize = len as isize - self.len() as isize;
+ // If len == self.len there's nothing to do.
+ if additional_elements == 0 {
+ return Ok(());
+ }
+
+ // If the len needs to be increased:
+ if additional_elements > 0 {
+ // Check if the new len is valid.
+ if len > T::max_len() {
+ return Err(Error::SizeLimitExceeded);
+ }
+ // Reserve additional capacity.
+ self.reserve(additional_elements as usize);
+ }
+
+ let current_mem_allocator_len = self.mem_allocator.len();
+ let required_mem_allocator_len = FamStructWrapper::<T>::mem_allocator_len(len);
+ // Update the len of the `mem_allocator`.
+ // SAFETY: This is safe since enough capacity has been reserved.
+ unsafe {
+ self.mem_allocator.set_len(required_mem_allocator_len);
+ }
+ // Zero-initialize the additional elements if any.
+ for i in current_mem_allocator_len..required_mem_allocator_len {
+ // SAFETY: Safe as long as the trait is only implemented for POD. This is a requirement
+ // for the trait implementation.
+ self.mem_allocator[i] = unsafe { mem::zeroed() }
+ }
+ // Update the len of the underlying `FamStruct`.
+ self.as_mut_fam_struct().set_len(len);
+
+ // If the len needs to be decreased, deallocate unnecessary memory
+ if additional_elements < 0 {
+ self.mem_allocator.shrink_to_fit();
+ }
+
+ Ok(())
+ }
+
+ /// Append an element.
+ ///
+ /// # Arguments
+ ///
+ /// * `entry` - The element that will be appended to the end of the collection.
+ ///
+ /// # Errors
+ ///
+ /// When len is already equal to max possible len it returns Error::SizeLimitExceeded.
+ pub fn push(&mut self, entry: T::Entry) -> Result<(), Error> {
+ let new_len = self.len() + 1;
+ self.set_len(new_len)?;
+ self.as_mut_slice()[new_len - 1] = entry;
+
+ Ok(())
+ }
+
+ /// Retain only the elements specified by the predicate.
+ ///
+ /// # Arguments
+ ///
+ /// * `f` - The function used to evaluate whether an entry will be kept or not.
+ /// When `f` returns `true` the entry is kept.
+ pub fn retain<P>(&mut self, mut f: P)
+ where
+ P: FnMut(&T::Entry) -> bool,
+ {
+ let mut num_kept_entries = 0;
+ {
+ let entries = self.as_mut_slice();
+ for entry_idx in 0..entries.len() {
+ let keep = f(&entries[entry_idx]);
+ if keep {
+ entries[num_kept_entries] = entries[entry_idx];
+ num_kept_entries += 1;
+ }
+ }
+ }
+
+ // This is safe since this method is not increasing the len
+ self.set_len(num_kept_entries).expect("invalid length");
+ }
+}
+
+impl<T: Default + FamStruct + PartialEq> PartialEq for FamStructWrapper<T> {
+ fn eq(&self, other: &FamStructWrapper<T>) -> bool {
+ self.as_fam_struct_ref() == other.as_fam_struct_ref() && self.as_slice() == other.as_slice()
+ }
+}
+
+impl<T: Default + FamStruct> Clone for FamStructWrapper<T> {
+ fn clone(&self) -> Self {
+ // The number of entries (self.as_slice().len()) can't be > T::max_len() since `self` is a
+ // valid `FamStructWrapper`.
+ let required_mem_allocator_capacity =
+ FamStructWrapper::<T>::mem_allocator_len(self.as_slice().len());
+
+ let mut mem_allocator = Vec::with_capacity(required_mem_allocator_capacity);
+
+ // SAFETY: This is safe as long as the requirements for the `FamStruct` trait to be safe
+ // are met (the implementing type and the entries elements are POD, therefore `Copy`, so
+ // memory safety can't be violated by the ownership of `fam_struct`). It is also safe
+ // because we're trying to read a T from a `&T` that is pointing to a properly initialized
+ // and aligned T.
+ unsafe {
+ let fam_struct: T = std::ptr::read(self.as_fam_struct_ref());
+ mem_allocator.push(fam_struct);
+ }
+ for _ in 1..required_mem_allocator_capacity {
+ mem_allocator.push(
+ // SAFETY: This is safe as long as T respects the FamStruct trait and is a POD.
+ unsafe { mem::zeroed() },
+ )
+ }
+
+ let mut adapter = FamStructWrapper { mem_allocator };
+ {
+ let wrapper_entries = adapter.as_mut_fam_struct().as_mut_slice();
+ wrapper_entries.copy_from_slice(self.as_slice());
+ }
+ adapter
+ }
+}
+
+impl<T: Default + FamStruct> From<Vec<T>> for FamStructWrapper<T> {
+ fn from(vec: Vec<T>) -> Self {
+ FamStructWrapper { mem_allocator: vec }
+ }
+}
+
+#[cfg(feature = "with-serde")]
+impl<T: Default + FamStruct + Serialize> Serialize for FamStructWrapper<T>
+where
+ <T as FamStruct>::Entry: serde::Serialize,
+{
+ fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ let mut s = serializer.serialize_tuple(2)?;
+ s.serialize_element(self.as_fam_struct_ref())?;
+ s.serialize_element(self.as_slice())?;
+ s.end()
+ }
+}
+
+#[cfg(feature = "with-serde")]
+impl<'de, T: Default + FamStruct + Deserialize<'de>> Deserialize<'de> for FamStructWrapper<T>
+where
+ <T as FamStruct>::Entry: std::marker::Copy + serde::Deserialize<'de>,
+{
+ fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ struct FamStructWrapperVisitor<X> {
+ dummy: PhantomData<X>,
+ }
+
+ impl<'de, X: Default + FamStruct + Deserialize<'de>> Visitor<'de> for FamStructWrapperVisitor<X>
+ where
+ <X as FamStruct>::Entry: std::marker::Copy + serde::Deserialize<'de>,
+ {
+ type Value = FamStructWrapper<X>;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("FamStructWrapper")
+ }
+
+ fn visit_seq<V>(self, mut seq: V) -> Result<FamStructWrapper<X>, V::Error>
+ where
+ V: SeqAccess<'de>,
+ {
+ use serde::de::Error;
+
+ let header = seq
+ .next_element()?
+ .ok_or_else(|| de::Error::invalid_length(0, &self))?;
+ let entries: Vec<X::Entry> = seq
+ .next_element()?
+ .ok_or_else(|| de::Error::invalid_length(1, &self))?;
+
+ let mut result: Self::Value = FamStructWrapper::from_entries(entries.as_slice())
+ .map_err(|e| V::Error::custom(format!("{:?}", e)))?;
+ result.mem_allocator[0] = header;
+ Ok(result)
+ }
+ }
+
+ deserializer.deserialize_tuple(2, FamStructWrapperVisitor { dummy: PhantomData })
+ }
+}
+
+/// Generate `FamStruct` implementation for structs with flexible array member.
+#[macro_export]
+macro_rules! generate_fam_struct_impl {
+ ($struct_type: ty, $entry_type: ty, $entries_name: ident,
+ $field_type: ty, $field_name: ident, $max: expr) => {
+ unsafe impl FamStruct for $struct_type {
+ type Entry = $entry_type;
+
+ fn len(&self) -> usize {
+ self.$field_name as usize
+ }
+
+ fn set_len(&mut self, len: usize) {
+ self.$field_name = len as $field_type;
+ }
+
+ fn max_len() -> usize {
+ $max
+ }
+
+ fn as_slice(&self) -> &[<Self as FamStruct>::Entry] {
+ let len = self.len();
+ unsafe { self.$entries_name.as_slice(len) }
+ }
+
+ fn as_mut_slice(&mut self) -> &mut [<Self as FamStruct>::Entry] {
+ let len = self.len();
+ unsafe { self.$entries_name.as_mut_slice(len) }
+ }
+ }
+ };
+}
+
+#[cfg(test)]
+mod tests {
+ #![allow(clippy::undocumented_unsafe_blocks)]
+ #[cfg(feature = "with-serde")]
+ use serde_derive::{Deserialize, Serialize};
+
+ use super::*;
+
+ const MAX_LEN: usize = 100;
+
+ #[repr(C)]
+ #[derive(Default, PartialEq, Eq)]
+ pub struct __IncompleteArrayField<T>(::std::marker::PhantomData<T>, [T; 0]);
+ impl<T> __IncompleteArrayField<T> {
+ #[inline]
+ pub fn new() -> Self {
+ __IncompleteArrayField(::std::marker::PhantomData, [])
+ }
+ #[inline]
+ pub unsafe fn as_ptr(&self) -> *const T {
+ self as *const __IncompleteArrayField<T> as *const T
+ }
+ #[inline]
+ pub unsafe fn as_mut_ptr(&mut self) -> *mut T {
+ self as *mut __IncompleteArrayField<T> as *mut T
+ }
+ #[inline]
+ pub unsafe fn as_slice(&self, len: usize) -> &[T] {
+ ::std::slice::from_raw_parts(self.as_ptr(), len)
+ }
+ #[inline]
+ pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] {
+ ::std::slice::from_raw_parts_mut(self.as_mut_ptr(), len)
+ }
+ }
+
+ #[cfg(feature = "with-serde")]
+ impl<T> Serialize for __IncompleteArrayField<T> {
+ fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ [0u8; 0].serialize(serializer)
+ }
+ }
+
+ #[cfg(feature = "with-serde")]
+ impl<'de, T> Deserialize<'de> for __IncompleteArrayField<T> {
+ fn deserialize<D>(_: D) -> std::result::Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ Ok(__IncompleteArrayField::new())
+ }
+ }
+
+ #[repr(C)]
+ #[derive(Default, PartialEq)]
+ struct MockFamStruct {
+ pub len: u32,
+ pub padding: u32,
+ pub entries: __IncompleteArrayField<u32>,
+ }
+
+ generate_fam_struct_impl!(MockFamStruct, u32, entries, u32, len, 100);
+
+ type MockFamStructWrapper = FamStructWrapper<MockFamStruct>;
+
+ const ENTRIES_OFFSET: usize = 2;
+
+ const FAM_LEN_TO_MEM_ALLOCATOR_LEN: &[(usize, usize)] = &[
+ (0, 1),
+ (1, 2),
+ (2, 2),
+ (3, 3),
+ (4, 3),
+ (5, 4),
+ (10, 6),
+ (50, 26),
+ (100, 51),
+ ];
+
+ const MEM_ALLOCATOR_LEN_TO_FAM_LEN: &[(usize, usize)] = &[
+ (0, 0),
+ (1, 0),
+ (2, 2),
+ (3, 4),
+ (4, 6),
+ (5, 8),
+ (10, 18),
+ (50, 98),
+ (100, 198),
+ ];
+
+ #[test]
+ fn test_mem_allocator_len() {
+ for pair in FAM_LEN_TO_MEM_ALLOCATOR_LEN {
+ let fam_len = pair.0;
+ let mem_allocator_len = pair.1;
+ assert_eq!(
+ mem_allocator_len,
+ MockFamStructWrapper::mem_allocator_len(fam_len)
+ );
+ }
+ }
+
+ #[test]
+ fn test_wrapper_len() {
+ for pair in MEM_ALLOCATOR_LEN_TO_FAM_LEN {
+ let mem_allocator_len = pair.0;
+ let fam_len = pair.1;
+ assert_eq!(fam_len, MockFamStructWrapper::fam_len(mem_allocator_len));
+ }
+ }
+
+ #[test]
+ fn test_new() {
+ let num_entries = 10;
+
+ let adapter = MockFamStructWrapper::new(num_entries).unwrap();
+ assert_eq!(num_entries, adapter.capacity());
+
+ let u32_slice = unsafe {
+ std::slice::from_raw_parts(
+ adapter.as_fam_struct_ptr() as *const u32,
+ num_entries + ENTRIES_OFFSET,
+ )
+ };
+ assert_eq!(num_entries, u32_slice[0] as usize);
+ for entry in u32_slice[1..].iter() {
+ assert_eq!(*entry, 0);
+ }
+
+ // It's okay to create a `FamStructWrapper` with the maximum allowed number of entries.
+ let adapter = MockFamStructWrapper::new(MockFamStruct::max_len()).unwrap();
+ assert_eq!(MockFamStruct::max_len(), adapter.capacity());
+
+ assert!(matches!(
+ MockFamStructWrapper::new(MockFamStruct::max_len() + 1),
+ Err(Error::SizeLimitExceeded)
+ ));
+ }
+
+ #[test]
+ fn test_from_entries() {
+ let num_entries: usize = 10;
+
+ let mut entries = Vec::new();
+ for i in 0..num_entries {
+ entries.push(i as u32);
+ }
+
+ let adapter = MockFamStructWrapper::from_entries(entries.as_slice()).unwrap();
+ let u32_slice = unsafe {
+ std::slice::from_raw_parts(
+ adapter.as_fam_struct_ptr() as *const u32,
+ num_entries + ENTRIES_OFFSET,
+ )
+ };
+ assert_eq!(num_entries, u32_slice[0] as usize);
+ for (i, &value) in entries.iter().enumerate().take(num_entries) {
+ assert_eq!(adapter.as_slice()[i], value);
+ }
+
+ let mut entries = Vec::new();
+ for i in 0..MockFamStruct::max_len() + 1 {
+ entries.push(i as u32);
+ }
+
+ // Can't create a `FamStructWrapper` with a number of entries > MockFamStruct::max_len().
+ assert!(matches!(
+ MockFamStructWrapper::from_entries(entries.as_slice()),
+ Err(Error::SizeLimitExceeded)
+ ));
+ }
+
+ #[test]
+ fn test_entries_slice() {
+ let num_entries = 10;
+ let mut adapter = MockFamStructWrapper::new(num_entries).unwrap();
+
+ let expected_slice = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+
+ {
+ let mut_entries_slice = adapter.as_mut_slice();
+ mut_entries_slice.copy_from_slice(expected_slice);
+ }
+
+ let u32_slice = unsafe {
+ std::slice::from_raw_parts(
+ adapter.as_fam_struct_ptr() as *const u32,
+ num_entries + ENTRIES_OFFSET,
+ )
+ };
+ assert_eq!(expected_slice, &u32_slice[ENTRIES_OFFSET..]);
+ assert_eq!(expected_slice, adapter.as_slice());
+ }
+
+ #[test]
+ fn test_reserve() {
+ let mut adapter = MockFamStructWrapper::new(0).unwrap();
+
+ // test that the right capacity is reserved
+ for pair in FAM_LEN_TO_MEM_ALLOCATOR_LEN {
+ let num_elements = pair.0;
+ let required_mem_allocator_len = pair.1;
+
+ adapter.reserve(num_elements);
+
+ assert!(adapter.mem_allocator.capacity() >= required_mem_allocator_len);
+ assert_eq!(0, adapter.len());
+ assert!(adapter.capacity() >= num_elements);
+ }
+
+ // test that when the capacity is already reserved, the method doesn't do anything
+ let current_capacity = adapter.capacity();
+ adapter.reserve(current_capacity - 1);
+ assert_eq!(current_capacity, adapter.capacity());
+ }
+
+ #[test]
+ fn test_set_len() {
+ let mut desired_len = 0;
+ let mut adapter = MockFamStructWrapper::new(desired_len).unwrap();
+
+ // keep initial len
+ assert!(adapter.set_len(desired_len).is_ok());
+ assert_eq!(adapter.len(), desired_len);
+
+ // increase len
+ desired_len = 10;
+ assert!(adapter.set_len(desired_len).is_ok());
+ // check that the len has been increased and zero-initialized elements have been added
+ assert_eq!(adapter.len(), desired_len);
+ for element in adapter.as_slice() {
+ assert_eq!(*element, 0_u32);
+ }
+
+ // decrease len
+ desired_len = 5;
+ assert!(adapter.set_len(desired_len).is_ok());
+ assert_eq!(adapter.len(), desired_len);
+ }
+
+ #[test]
+ fn test_push() {
+ let mut adapter = MockFamStructWrapper::new(0).unwrap();
+
+ for i in 0..MAX_LEN {
+ assert!(adapter.push(i as u32).is_ok());
+ assert_eq!(adapter.as_slice()[i], i as u32);
+ assert_eq!(adapter.len(), i + 1);
+ assert!(
+ adapter.mem_allocator.capacity() >= MockFamStructWrapper::mem_allocator_len(i + 1)
+ );
+ }
+
+ assert!(adapter.push(0).is_err());
+ }
+
+ #[test]
+ fn test_retain() {
+ let mut adapter = MockFamStructWrapper::new(0).unwrap();
+
+ let mut num_retained_entries = 0;
+ for i in 0..MAX_LEN {
+ assert!(adapter.push(i as u32).is_ok());
+ if i % 2 == 0 {
+ num_retained_entries += 1;
+ }
+ }
+
+ adapter.retain(|entry| entry % 2 == 0);
+
+ for entry in adapter.as_slice().iter() {
+ assert_eq!(0, entry % 2);
+ }
+ assert_eq!(adapter.len(), num_retained_entries);
+ assert!(
+ adapter.mem_allocator.capacity()
+ >= MockFamStructWrapper::mem_allocator_len(num_retained_entries)
+ );
+ }
+
+ #[test]
+ fn test_partial_eq() {
+ let mut wrapper_1 = MockFamStructWrapper::new(0).unwrap();
+ let mut wrapper_2 = MockFamStructWrapper::new(0).unwrap();
+ let mut wrapper_3 = MockFamStructWrapper::new(0).unwrap();
+
+ for i in 0..MAX_LEN {
+ assert!(wrapper_1.push(i as u32).is_ok());
+ assert!(wrapper_2.push(i as u32).is_ok());
+ assert!(wrapper_3.push(0).is_ok());
+ }
+
+ assert!(wrapper_1 == wrapper_2);
+ assert!(wrapper_1 != wrapper_3);
+ }
+
+ #[test]
+ fn test_clone() {
+ let mut adapter = MockFamStructWrapper::new(0).unwrap();
+
+ for i in 0..MAX_LEN {
+ assert!(adapter.push(i as u32).is_ok());
+ }
+
+ assert!(adapter == adapter.clone());
+ }
+
+ #[test]
+ fn test_raw_content() {
+ let data = vec![
+ MockFamStruct {
+ len: 2,
+ padding: 5,
+ entries: __IncompleteArrayField::new(),
+ },
+ MockFamStruct {
+ len: 0xA5,
+ padding: 0x1e,
+ entries: __IncompleteArrayField::new(),
+ },
+ ];
+
+ let mut wrapper = unsafe { MockFamStructWrapper::from_raw(data) };
+ {
+ let payload = wrapper.as_slice();
+ assert_eq!(payload[0], 0xA5);
+ assert_eq!(payload[1], 0x1e);
+ }
+ assert_eq!(wrapper.as_mut_fam_struct().padding, 5);
+ let data = wrapper.into_raw();
+ assert_eq!(data[0].len, 2);
+ assert_eq!(data[0].padding, 5);
+ }
+
+ #[cfg(feature = "with-serde")]
+ #[test]
+ fn test_ser_deser() {
+ #[repr(C)]
+ #[derive(Default, PartialEq)]
+ #[cfg_attr(feature = "with-serde", derive(Deserialize, Serialize))]
+ struct Message {
+ pub len: u32,
+ pub padding: u32,
+ pub value: u32,
+ #[cfg_attr(feature = "with-serde", serde(skip))]
+ pub entries: __IncompleteArrayField<u32>,
+ }
+
+ generate_fam_struct_impl!(Message, u32, entries, u32, len, 100);
+
+ type MessageFamStructWrapper = FamStructWrapper<Message>;
+
+ let data = vec![
+ Message {
+ len: 2,
+ padding: 0,
+ value: 42,
+ entries: __IncompleteArrayField::new(),
+ },
+ Message {
+ len: 0xA5,
+ padding: 0x1e,
+ value: 0,
+ entries: __IncompleteArrayField::new(),
+ },
+ ];
+
+ let wrapper = unsafe { MessageFamStructWrapper::from_raw(data) };
+ let data_ser = serde_json::to_string(&wrapper).unwrap();
+ assert_eq!(
+ data_ser,
+ "[{\"len\":2,\"padding\":0,\"value\":42},[165,30]]"
+ );
+ let data_deser =
+ serde_json::from_str::<MessageFamStructWrapper>(data_ser.as_str()).unwrap();
+ assert!(wrapper.eq(&data_deser));
+
+ let bad_data_ser = r#"{"foo": "bar"}"#;
+ assert!(serde_json::from_str::<MessageFamStructWrapper>(bad_data_ser).is_err());
+
+ #[repr(C)]
+ #[derive(Default)]
+ #[cfg_attr(feature = "with-serde", derive(Deserialize, Serialize))]
+ struct Message2 {
+ pub len: u32,
+ pub padding: u32,
+ pub value: u32,
+ #[cfg_attr(feature = "with-serde", serde(skip))]
+ pub entries: __IncompleteArrayField<u32>,
+ }
+
+ // Maximum number of entries = 1, so the deserialization should fail because of this reason.
+ generate_fam_struct_impl!(Message2, u32, entries, u32, len, 1);
+
+ type Message2FamStructWrapper = FamStructWrapper<Message2>;
+ assert!(serde_json::from_str::<Message2FamStructWrapper>(data_ser.as_str()).is_err());
+ }
+
+ #[test]
+ fn test_clone_multiple_fields() {
+ #[derive(Default, PartialEq)]
+ #[repr(C)]
+ struct Foo {
+ index: u32,
+ length: u16,
+ flags: u32,
+ entries: __IncompleteArrayField<u32>,
+ }
+
+ generate_fam_struct_impl!(Foo, u32, entries, u16, length, 100);
+
+ type FooFamStructWrapper = FamStructWrapper<Foo>;
+
+ let mut wrapper = FooFamStructWrapper::new(0).unwrap();
+ wrapper.as_mut_fam_struct().index = 1;
+ wrapper.as_mut_fam_struct().flags = 2;
+ wrapper.as_mut_fam_struct().length = 3;
+ wrapper.push(3).unwrap();
+ wrapper.push(14).unwrap();
+ assert_eq!(wrapper.as_slice().len(), 3 + 2);
+ assert_eq!(wrapper.as_slice()[3], 3);
+ assert_eq!(wrapper.as_slice()[3 + 1], 14);
+
+ let mut wrapper2 = wrapper.clone();
+ assert_eq!(
+ wrapper.as_mut_fam_struct().index,
+ wrapper2.as_mut_fam_struct().index
+ );
+ assert_eq!(
+ wrapper.as_mut_fam_struct().length,
+ wrapper2.as_mut_fam_struct().length
+ );
+ assert_eq!(
+ wrapper.as_mut_fam_struct().flags,
+ wrapper2.as_mut_fam_struct().flags
+ );
+ assert_eq!(wrapper.as_slice(), wrapper2.as_slice());
+ assert_eq!(
+ wrapper2.as_slice().len(),
+ wrapper2.as_mut_fam_struct().length as usize
+ );
+ assert!(wrapper == wrapper2);
+
+ wrapper.as_mut_fam_struct().index = 3;
+ assert!(wrapper != wrapper2);
+
+ wrapper.as_mut_fam_struct().length = 7;
+ assert!(wrapper != wrapper2);
+
+ wrapper.push(1).unwrap();
+ assert_eq!(wrapper.as_mut_fam_struct().length, 8);
+ assert!(wrapper != wrapper2);
+
+ let mut wrapper2 = wrapper.clone();
+ assert!(wrapper == wrapper2);
+
+ // Dropping the original variable should not affect its clone.
+ drop(wrapper);
+ assert_eq!(wrapper2.as_mut_fam_struct().index, 3);
+ assert_eq!(wrapper2.as_mut_fam_struct().length, 8);
+ assert_eq!(wrapper2.as_mut_fam_struct().flags, 2);
+ assert_eq!(wrapper2.as_slice(), [0, 0, 0, 3, 14, 0, 0, 1]);
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..1929816
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,24 @@
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Collection of modules that provides helpers and utilities used by multiple
+//! [rust-vmm](https://github.com/rust-vmm/community) components.
+
+#![deny(missing_docs)]
+
+#[cfg(any(target_os = "linux", target_os = "android"))]
+mod linux;
+#[cfg(any(target_os = "linux", target_os = "android"))]
+pub use crate::linux::*;
+
+#[cfg(unix)]
+mod unix;
+#[cfg(unix)]
+pub use crate::unix::*;
+
+pub mod errno;
+pub mod fam;
+pub mod metric;
+pub mod rand;
+pub mod syscall;
+pub mod tempfile;
diff --git a/src/linux/aio.rs b/src/linux/aio.rs
new file mode 100644
index 0000000..1e14ea0
--- /dev/null
+++ b/src/linux/aio.rs
@@ -0,0 +1,362 @@
+// Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Safe wrapper over [`Linux AIO`](http://man7.org/linux/man-pages/man7/aio.7.html).
+
+#![allow(non_camel_case_types)]
+
+/* automatically generated by rust-bindgen from file linux/include/uapi/linux/aio_abi.h
+ * of commit 69973b8 and then manually edited */
+
+use std::io::{Error, Result};
+use std::os::raw::{c_int, c_long, c_uint, c_ulong};
+use std::ptr::null_mut;
+
+type __s16 = ::std::os::raw::c_short;
+type __u16 = ::std::os::raw::c_ushort;
+type __u32 = ::std::os::raw::c_uint;
+type __s64 = ::std::os::raw::c_longlong;
+type __u64 = ::std::os::raw::c_ulonglong;
+
+/// Read from a file descriptor at a given offset.
+pub const IOCB_CMD_PREAD: u32 = 0;
+/// Write to a file descriptor at a given offset.
+pub const IOCB_CMD_PWRITE: u32 = 1;
+/// Synchronize a file's in-core metadata and data to storage device.
+pub const IOCB_CMD_FSYNC: u32 = 2;
+/// Synchronize a file's in-core data to storage device.
+pub const IOCB_CMD_FDSYNC: u32 = 3;
+/// Noop, this defined by never used by linux kernel.
+pub const IOCB_CMD_NOOP: u32 = 6;
+/// Read from a file descriptor at a given offset into multiple buffers.
+pub const IOCB_CMD_PREADV: u32 = 7;
+/// Write to a file descriptor at a given offset from multiple buffers.
+pub const IOCB_CMD_PWRITEV: u32 = 8;
+
+/// Valid flags for the "aio_flags" member of the "struct iocb".
+/// Set if the "aio_resfd" member of the "struct iocb" is valid.
+pub const IOCB_FLAG_RESFD: u32 = 1;
+
+/// Maximum number of concurrent requests.
+pub const MAX_REQUESTS: usize = 0x10000;
+
+/// Wrapper over the [`iocb`](https://elixir.bootlin.com/linux/v4.9/source/include/uapi/linux/aio_abi.h#L79) structure.
+#[allow(missing_docs)]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone)]
+pub struct IoControlBlock {
+ pub aio_data: __u64,
+ pub aio_key: __u32,
+ pub aio_reserved1: __u32,
+ pub aio_lio_opcode: __u16,
+ pub aio_reqprio: __s16,
+ pub aio_fildes: __u32,
+ pub aio_buf: __u64,
+ pub aio_nbytes: __u64,
+ pub aio_offset: __s64,
+ pub aio_reserved2: __u64,
+ pub aio_flags: __u32,
+ pub aio_resfd: __u32,
+}
+
+/// Wrapper over the [`io_event`](https://elixir.bootlin.com/linux/v4.9/source/include/uapi/linux/aio_abi.h#L58) structure.
+#[allow(missing_docs)]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone)]
+pub struct IoEvent {
+ pub data: __u64,
+ pub obj: __u64,
+ pub res: __s64,
+ pub res2: __s64,
+}
+
+/// Newtype for [`aio_context_t`](https://elixir.bootlin.com/linux/v4.9/source/include/uapi/linux/aio_abi.h#L33).
+#[repr(transparent)]
+#[derive(Debug)]
+pub struct IoContext(::std::os::raw::c_ulong);
+
+impl IoContext {
+ /// Create a new aio context instance.
+ ///
+ /// Refer to Linux [`io_setup`](http://man7.org/linux/man-pages/man2/io_setup.2.html).
+ ///
+ /// # Arguments
+ /// * `nr_events`: maximum number of concurrently processing IO operations.
+ #[allow(clippy::new_ret_no_self)]
+ pub fn new(nr_events: c_uint) -> Result<Self> {
+ if nr_events as usize > MAX_REQUESTS {
+ return Err(Error::from_raw_os_error(libc::EINVAL));
+ }
+
+ let mut ctx = IoContext(0);
+ let rc =
+ // SAFETY: Safe because we use valid parameters and check the result.
+ unsafe { libc::syscall(libc::SYS_io_setup, nr_events, &mut ctx as *mut Self) as c_int };
+ if rc < 0 {
+ Err(Error::last_os_error())
+ } else {
+ Ok(ctx)
+ }
+ }
+
+ /// Submit asynchronous I/O blocks for processing.
+ ///
+ /// Refer to Linux [`io_submit`](http://man7.org/linux/man-pages/man2/io_submit.2.html).
+ ///
+ /// # Arguments
+ /// * `iocbs`: array of AIO control blocks, which will be submitted to the context.
+ ///
+ /// # Examples
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::aio::*;
+ /// # use std::fs::File;
+ /// # use std::os::unix::io::AsRawFd;
+ ///
+ /// let file = File::open("/dev/zero").unwrap();
+ /// let ctx = IoContext::new(128).unwrap();
+ /// let mut buf: [u8; 4096] = unsafe { std::mem::uninitialized() };
+ /// let iocbs = [&mut IoControlBlock {
+ /// aio_fildes: file.as_raw_fd() as u32,
+ /// aio_lio_opcode: IOCB_CMD_PREAD as u16,
+ /// aio_buf: buf.as_mut_ptr() as u64,
+ /// aio_nbytes: buf.len() as u64,
+ /// ..Default::default()
+ /// }];
+ /// assert_eq!(ctx.submit(&iocbs[..]).unwrap(), 1);
+ /// ```
+ pub fn submit(&self, iocbs: &[&mut IoControlBlock]) -> Result<usize> {
+ // SAFETY: It's safe because parameters are valid and we have checked the result.
+ let rc = unsafe {
+ libc::syscall(
+ libc::SYS_io_submit,
+ self.0,
+ iocbs.len() as c_ulong,
+ iocbs.as_ptr(),
+ ) as c_int
+ };
+ if rc < 0 {
+ Err(Error::last_os_error())
+ } else {
+ Ok(rc as usize)
+ }
+ }
+
+ /// Cancel an outstanding asynchronous I/O operation.
+ ///
+ /// Refer to Linux [`io_cancel`](http://man7.org/linux/man-pages/man2/io_cancel.2.html).
+ /// Note: according to current Linux kernel implementation(v4.19), libc::SYS_io_cancel always
+ /// return failure, thus rendering it useless.
+ ///
+ /// # Arguments
+ /// * `iocb`: The iocb for the operation to be canceled.
+ /// * `result`: If the operation is successfully canceled, the event will be copied into the
+ /// memory pointed to by result without being placed into the completion queue.
+ pub fn cancel(&self, iocb: &IoControlBlock, result: &mut IoEvent) -> Result<()> {
+ // SAFETY: It's safe because parameters are valid and we have checked the result.
+ let rc = unsafe {
+ libc::syscall(
+ libc::SYS_io_cancel,
+ self.0,
+ iocb as *const IoControlBlock,
+ result as *mut IoEvent,
+ ) as c_int
+ };
+ if rc < 0 {
+ Err(Error::last_os_error())
+ } else {
+ Ok(())
+ }
+ }
+
+ /// Read asynchronous I/O events from the completion queue.
+ ///
+ /// Refer to Linux [`io_getevents`](http://man7.org/linux/man-pages/man2/io_getevents.2.html).
+ ///
+ /// # Arguments
+ /// * `min_nr`: read at least min_nr events.
+ /// * `events`: array to receive the io operation results.
+ /// * `timeout`: optional amount of time to wait for events.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::aio::*;
+ /// # use std::fs::File;
+ /// # use std::os::unix::io::AsRawFd;
+ ///
+ /// let file = File::open("/dev/zero").unwrap();
+ /// let ctx = IoContext::new(128).unwrap();
+ /// let mut buf: [u8; 4096] = unsafe { std::mem::uninitialized() };
+ /// let iocbs = [
+ /// &mut IoControlBlock {
+ /// aio_fildes: file.as_raw_fd() as u32,
+ /// aio_lio_opcode: IOCB_CMD_PREAD as u16,
+ /// aio_buf: buf.as_mut_ptr() as u64,
+ /// aio_nbytes: buf.len() as u64,
+ /// ..Default::default()
+ /// },
+ /// &mut IoControlBlock {
+ /// aio_fildes: file.as_raw_fd() as u32,
+ /// aio_lio_opcode: IOCB_CMD_PREAD as u16,
+ /// aio_buf: buf.as_mut_ptr() as u64,
+ /// aio_nbytes: buf.len() as u64,
+ /// ..Default::default()
+ /// },
+ /// ];
+ ///
+ /// let mut rc = ctx.submit(&iocbs[..]).unwrap();
+ /// let mut events = [unsafe { std::mem::uninitialized::<IoEvent>() }];
+ /// rc = ctx.get_events(1, &mut events, None).unwrap();
+ /// assert_eq!(rc, 1);
+ /// assert!(events[0].res > 0);
+ /// rc = ctx.get_events(1, &mut events, None).unwrap();
+ /// assert_eq!(rc, 1);
+ /// assert!(events[0].res > 0);
+ /// ```
+ pub fn get_events(
+ &self,
+ min_nr: c_long,
+ events: &mut [IoEvent],
+ timeout: Option<&mut libc::timespec>,
+ ) -> Result<usize> {
+ let to = match timeout {
+ Some(val) => val as *mut libc::timespec,
+ None => null_mut() as *mut libc::timespec,
+ };
+
+ // SAFETY: It's safe because parameters are valid and we have checked the result.
+ let rc = unsafe {
+ libc::syscall(
+ libc::SYS_io_getevents,
+ self.0,
+ min_nr,
+ events.len() as c_long,
+ events.as_mut_ptr(),
+ to,
+ ) as c_int
+ };
+ if rc < 0 {
+ Err(Error::last_os_error())
+ } else {
+ Ok(rc as usize)
+ }
+ }
+}
+
+impl Drop for IoContext {
+ fn drop(&mut self) {
+ if self.0 != 0 {
+ // SAFETY: It's safe because the context is created by us.
+ let _ = unsafe { libc::syscall(libc::SYS_io_destroy, self.0) as c_int };
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use std::fs::File;
+ use std::os::unix::io::AsRawFd;
+
+ #[test]
+ fn test_new_context() {
+ let _ = IoContext::new(0).unwrap_err();
+ }
+
+ #[test]
+ fn test_cancel_request() {
+ let file = File::open("/dev/zero").unwrap();
+
+ let ctx = IoContext::new(128).unwrap();
+ let mut buf: [u8; 16384] = [0u8; 16384];
+ let iocbs = [&mut IoControlBlock {
+ aio_fildes: file.as_raw_fd() as u32,
+ aio_lio_opcode: IOCB_CMD_PREAD as u16,
+ aio_buf: buf.as_mut_ptr() as u64,
+ aio_nbytes: buf.len() as u64,
+ ..Default::default()
+ }];
+
+ let mut rc = ctx.submit(&iocbs).unwrap();
+ assert_eq!(rc, 1);
+
+ let mut result = Default::default();
+ let err = ctx
+ .cancel(iocbs[0], &mut result)
+ .unwrap_err()
+ .raw_os_error()
+ .unwrap();
+ assert_eq!(err, libc::EINVAL);
+
+ let mut events = [IoEvent::default()];
+ rc = ctx.get_events(1, &mut events, None).unwrap();
+ assert_eq!(rc, 1);
+ assert!(events[0].res > 0);
+ }
+
+ #[test]
+ fn test_read_zero() {
+ let file = File::open("/dev/zero").unwrap();
+
+ let ctx = IoContext::new(128).unwrap();
+ let mut buf: [u8; 4096] = [0u8; 4096];
+ let iocbs = [
+ &mut IoControlBlock {
+ aio_fildes: file.as_raw_fd() as u32,
+ aio_lio_opcode: IOCB_CMD_PREAD as u16,
+ aio_buf: buf.as_mut_ptr() as u64,
+ aio_nbytes: buf.len() as u64,
+ ..Default::default()
+ },
+ &mut IoControlBlock {
+ aio_fildes: file.as_raw_fd() as u32,
+ aio_lio_opcode: IOCB_CMD_PREAD as u16,
+ aio_buf: buf.as_mut_ptr() as u64,
+ aio_nbytes: buf.len() as u64,
+ ..Default::default()
+ },
+ ];
+
+ let mut rc = ctx.submit(&iocbs[..]).unwrap();
+ assert_eq!(rc, 2);
+
+ let mut events = [IoEvent::default()];
+ rc = ctx.get_events(1, &mut events, None).unwrap();
+ assert_eq!(rc, 1);
+ assert!(events[0].res > 0);
+
+ rc = ctx.get_events(1, &mut events, None).unwrap();
+ assert_eq!(rc, 1);
+ assert!(events[0].res > 0);
+ }
+
+ #[test]
+ fn bindgen_test_layout_io_event() {
+ assert_eq!(
+ ::std::mem::size_of::<IoEvent>(),
+ 32usize,
+ concat!("Size of: ", stringify!(IoEvent))
+ );
+ assert_eq!(
+ ::std::mem::align_of::<IoEvent>(),
+ 8usize,
+ concat!("Alignment of", stringify!(IoEvent))
+ );
+ }
+
+ #[test]
+ fn bindgen_test_layout_iocb() {
+ assert_eq!(
+ ::std::mem::size_of::<IoControlBlock>(),
+ 64usize,
+ concat!("Size of:", stringify!(IoControlBlock))
+ );
+ assert_eq!(
+ ::std::mem::align_of::<IoControlBlock>(),
+ 8usize,
+ concat!("Alignment of", stringify!(IoControlBlock))
+ );
+ }
+}
diff --git a/src/linux/epoll.rs b/src/linux/epoll.rs
new file mode 100644
index 0000000..b8e9b7b
--- /dev/null
+++ b/src/linux/epoll.rs
@@ -0,0 +1,522 @@
+// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Safe wrappers over the
+//! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html) API.
+
+use std::io;
+use std::ops::{Deref, Drop};
+use std::os::unix::io::{AsRawFd, RawFd};
+
+#[cfg(any(target_os = "linux", target_os = "android"))]
+use bitflags::bitflags;
+use libc::{
+ epoll_create1, epoll_ctl, epoll_event, epoll_wait, EPOLLERR, EPOLLET, EPOLLEXCLUSIVE, EPOLLHUP,
+ EPOLLIN, EPOLLONESHOT, EPOLLOUT, EPOLLPRI, EPOLLRDHUP, EPOLLWAKEUP, EPOLL_CLOEXEC,
+ EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD,
+};
+
+use crate::syscall::SyscallReturnCode;
+
+/// Wrapper over `EPOLL_CTL_*` operations that can be performed on a file descriptor.
+#[repr(i32)]
+pub enum ControlOperation {
+ /// Add a file descriptor to the interest list.
+ Add = EPOLL_CTL_ADD,
+ /// Change the settings associated with a file descriptor that is
+ /// already in the interest list.
+ Modify = EPOLL_CTL_MOD,
+ /// Remove a file descriptor from the interest list.
+ Delete = EPOLL_CTL_DEL,
+}
+
+bitflags! {
+ /// The type of events we can monitor a file descriptor for.
+ pub struct EventSet: u32 {
+ /// The associated file descriptor is available for read operations.
+ const IN = EPOLLIN as u32;
+ /// The associated file descriptor is available for write operations.
+ const OUT = EPOLLOUT as u32;
+ /// Error condition happened on the associated file descriptor.
+ const ERROR = EPOLLERR as u32;
+ /// This can be used to detect peer shutdown when using Edge Triggered monitoring.
+ const READ_HANG_UP = EPOLLRDHUP as u32;
+ /// Sets the Edge Triggered behavior for the associated file descriptor.
+ /// The default behavior is Level Triggered.
+ const EDGE_TRIGGERED = EPOLLET as u32;
+ /// Hang up happened on the associated file descriptor. Note that `epoll_wait`
+ /// will always wait for this event and it is not necessary to set it in events.
+ const HANG_UP = EPOLLHUP as u32;
+ /// There is an exceptional condition on that file descriptor. It is mostly used to
+ /// set high priority for some data.
+ const PRIORITY = EPOLLPRI as u32;
+ /// The event is considered as being "processed" from the time when it is returned
+ /// by a call to `epoll_wait` until the next call to `epoll_wait` on the same
+ /// epoll file descriptor, the closure of that file descriptor, the removal of the
+ /// event file descriptor via EPOLL_CTL_DEL, or the clearing of EPOLLWAKEUP
+ /// for the event file descriptor via EPOLL_CTL_MOD.
+ const WAKE_UP = EPOLLWAKEUP as u32;
+ /// Sets the one-shot behavior for the associated file descriptor.
+ const ONE_SHOT = EPOLLONESHOT as u32;
+ /// Sets an exclusive wake up mode for the epoll file descriptor that is being
+ /// attached to the associated file descriptor.
+ /// When a wake up event occurs and multiple epoll file descriptors are attached to
+ /// the same target file using this mode, one or more of the epoll file descriptors
+ /// will receive an event with `epoll_wait`. The default here is for all those file
+ /// descriptors to receive an event.
+ const EXCLUSIVE = EPOLLEXCLUSIVE as u32;
+ }
+}
+
+/// Wrapper over
+/// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html).
+// We are using `transparent` here to be super sure that this struct and its fields
+// have the same alignment as those from the `epoll_event` struct from C.
+#[repr(transparent)]
+#[derive(Clone, Copy)]
+pub struct EpollEvent(epoll_event);
+
+impl std::fmt::Debug for EpollEvent {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{{ events: {}, data: {} }}", self.events(), self.data())
+ }
+}
+
+impl Deref for EpollEvent {
+ type Target = epoll_event;
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl Default for EpollEvent {
+ fn default() -> Self {
+ EpollEvent(epoll_event {
+ events: 0u32,
+ u64: 0u64,
+ })
+ }
+}
+
+impl EpollEvent {
+ /// Create a new epoll_event instance.
+ ///
+ /// # Arguments
+ ///
+ /// `events` - contains an event mask.
+ /// `data` - a user data variable. `data` field can be a fd on which
+ /// we want to monitor the events specified by `events`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
+ ///
+ /// let event = EpollEvent::new(EventSet::IN, 2);
+ /// ```
+ pub fn new(events: EventSet, data: u64) -> Self {
+ EpollEvent(epoll_event {
+ events: events.bits(),
+ u64: data,
+ })
+ }
+
+ /// Returns the `events` from
+ /// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
+ ///
+ /// let event = EpollEvent::new(EventSet::IN, 2);
+ /// assert_eq!(event.events(), 1);
+ /// ```
+ pub fn events(&self) -> u32 {
+ self.events
+ }
+
+ /// Returns the `EventSet` corresponding to `epoll_event.events`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `libc::epoll_event` contains invalid events.
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
+ ///
+ /// let event = EpollEvent::new(EventSet::IN, 2);
+ /// assert_eq!(event.event_set(), EventSet::IN);
+ /// ```
+ pub fn event_set(&self) -> EventSet {
+ // This unwrap is safe because `epoll_events` can only be user created or
+ // initialized by the kernel. We trust the kernel to only send us valid
+ // events. The user can only initialize `epoll_events` using valid events.
+ EventSet::from_bits(self.events()).unwrap()
+ }
+
+ /// Returns the `data` from the `libc::epoll_event`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
+ ///
+ /// let event = EpollEvent::new(EventSet::IN, 2);
+ /// assert_eq!(event.data(), 2);
+ /// ```
+ pub fn data(&self) -> u64 {
+ self.u64
+ }
+
+ /// Converts the `libc::epoll_event` data to a RawFd.
+ ///
+ /// This conversion is lossy when the data does not correspond to a RawFd
+ /// (data does not fit in a i32).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
+ ///
+ /// let event = EpollEvent::new(EventSet::IN, 2);
+ /// assert_eq!(event.fd(), 2);
+ /// ```
+ pub fn fd(&self) -> RawFd {
+ self.u64 as i32
+ }
+}
+
+/// Wrapper over epoll functionality.
+#[derive(Debug)]
+pub struct Epoll {
+ epoll_fd: RawFd,
+}
+
+impl Epoll {
+ /// Create a new epoll file descriptor.
+ pub fn new() -> io::Result<Self> {
+ let epoll_fd = SyscallReturnCode(
+ // SAFETY: Safe because the return code is transformed by `into_result` in a `Result`.
+ unsafe { epoll_create1(EPOLL_CLOEXEC) },
+ )
+ .into_result()?;
+ Ok(Epoll { epoll_fd })
+ }
+
+ /// Wrapper for `libc::epoll_ctl`.
+ ///
+ /// This can be used for adding, modifying or removing a file descriptor in the
+ /// interest list of the epoll instance.
+ ///
+ /// # Arguments
+ ///
+ /// * `operation` - refers to the action to be performed on the file descriptor.
+ /// * `fd` - the file descriptor on which we want to perform `operation`.
+ /// * `event` - refers to the `epoll_event` instance that is linked to `fd`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ ///
+ /// use std::os::unix::io::AsRawFd;
+ /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
+ /// use vmm_sys_util::eventfd::EventFd;
+ ///
+ /// let epoll = Epoll::new().unwrap();
+ /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
+ /// epoll
+ /// .ctl(
+ /// ControlOperation::Add,
+ /// event_fd.as_raw_fd() as i32,
+ /// EpollEvent::new(EventSet::OUT, event_fd.as_raw_fd() as u64),
+ /// )
+ /// .unwrap();
+ /// epoll
+ /// .ctl(
+ /// ControlOperation::Modify,
+ /// event_fd.as_raw_fd() as i32,
+ /// EpollEvent::new(EventSet::IN, 4),
+ /// )
+ /// .unwrap();
+ /// ```
+ pub fn ctl(&self, operation: ControlOperation, fd: RawFd, event: EpollEvent) -> io::Result<()> {
+ SyscallReturnCode(
+ // SAFETY: Safe because we give a valid epoll file descriptor, a valid file descriptor
+ // to watch, as well as a valid epoll_event structure. We also check the return value.
+ unsafe {
+ epoll_ctl(
+ self.epoll_fd,
+ operation as i32,
+ fd,
+ &event as *const EpollEvent as *mut epoll_event,
+ )
+ },
+ )
+ .into_empty_result()
+ }
+
+ /// Wrapper for `libc::epoll_wait`.
+ /// Returns the number of file descriptors in the interest list that became ready
+ /// for I/O or `errno` if an error occurred.
+ ///
+ /// # Arguments
+ ///
+ /// * `timeout` - specifies for how long the `epoll_wait` system call will block
+ /// (measured in milliseconds).
+ /// * `events` - points to a memory area that will be used for storing the events
+ /// returned by `epoll_wait()` call.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ ///
+ /// use std::os::unix::io::AsRawFd;
+ /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
+ /// use vmm_sys_util::eventfd::EventFd;
+ ///
+ /// let epoll = Epoll::new().unwrap();
+ /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
+ ///
+ /// let mut ready_events = vec![EpollEvent::default(); 10];
+ /// epoll
+ /// .ctl(
+ /// ControlOperation::Add,
+ /// event_fd.as_raw_fd() as i32,
+ /// EpollEvent::new(EventSet::OUT, 4),
+ /// )
+ /// .unwrap();
+ /// let ev_count = epoll.wait(-1, &mut ready_events[..]).unwrap();
+ /// assert_eq!(ev_count, 1);
+ /// ```
+ pub fn wait(&self, timeout: i32, events: &mut [EpollEvent]) -> io::Result<usize> {
+ let events_count = SyscallReturnCode(
+ // SAFETY: Safe because we give a valid epoll file descriptor and an array of
+ // epoll_event structures that will be modified by the kernel to indicate information
+ // about the subset of file descriptors in the interest list.
+ // We also check the return value.
+ unsafe {
+ epoll_wait(
+ self.epoll_fd,
+ events.as_mut_ptr() as *mut epoll_event,
+ events.len() as i32,
+ timeout,
+ )
+ },
+ )
+ .into_result()? as usize;
+
+ Ok(events_count)
+ }
+}
+
+impl AsRawFd for Epoll {
+ fn as_raw_fd(&self) -> RawFd {
+ self.epoll_fd
+ }
+}
+
+impl Drop for Epoll {
+ fn drop(&mut self) {
+ // SAFETY: Safe because this fd is opened with `epoll_create` and we trust
+ // the kernel to give us a valid fd.
+ unsafe {
+ libc::close(self.epoll_fd);
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ use crate::eventfd::EventFd;
+
+ #[test]
+ fn test_event_ops() {
+ let mut event = EpollEvent::default();
+ assert_eq!(event.events(), 0);
+ assert_eq!(event.data(), 0);
+
+ event = EpollEvent::new(EventSet::IN, 2);
+ assert_eq!(event.events(), 1);
+ assert_eq!(event.event_set(), EventSet::IN);
+
+ assert_eq!(event.data(), 2);
+ assert_eq!(event.fd(), 2);
+ }
+
+ #[test]
+ fn test_events_debug() {
+ let events = EpollEvent::new(EventSet::IN, 42);
+ assert_eq!(format!("{:?}", events), "{ events: 1, data: 42 }")
+ }
+
+ #[test]
+ fn test_epoll() {
+ const DEFAULT__TIMEOUT: i32 = 250;
+ const EVENT_BUFFER_SIZE: usize = 128;
+
+ let epoll = Epoll::new().unwrap();
+ assert_eq!(epoll.epoll_fd, epoll.as_raw_fd());
+
+ // Let's test different scenarios for `epoll_ctl()` and `epoll_wait()` functionality.
+
+ let event_fd_1 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
+ // For EPOLLOUT to be available it is enough only to be possible to write a value of
+ // at least 1 to the eventfd counter without blocking.
+ // If we write a value greater than 0 to this counter, the fd will be available for
+ // EPOLLIN events too.
+ event_fd_1.write(1).unwrap();
+
+ let mut event_1 =
+ EpollEvent::new(EventSet::IN | EventSet::OUT, event_fd_1.as_raw_fd() as u64);
+
+ // For EPOLL_CTL_ADD behavior we will try to add some fds with different event masks into
+ // the interest list of epoll instance.
+ assert!(epoll
+ .ctl(
+ ControlOperation::Add,
+ event_fd_1.as_raw_fd() as i32,
+ event_1
+ )
+ .is_ok());
+
+ // We can't add twice the same fd to epoll interest list.
+ assert!(epoll
+ .ctl(
+ ControlOperation::Add,
+ event_fd_1.as_raw_fd() as i32,
+ event_1
+ )
+ .is_err());
+
+ let event_fd_2 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
+ event_fd_2.write(1).unwrap();
+ assert!(epoll
+ .ctl(
+ ControlOperation::Add,
+ event_fd_2.as_raw_fd() as i32,
+ // For this fd, we want an Event instance that has `data` field set to other
+ // value than the value of the fd and `events` without EPOLLIN type set.
+ EpollEvent::new(EventSet::OUT, 10)
+ )
+ .is_ok());
+
+ // For the following eventfd we won't write anything to its counter, so we expect EPOLLIN
+ // event to not be available for this fd, even if we say that we want to monitor this type
+ // of event via EPOLL_CTL_ADD operation.
+ let event_fd_3 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
+ let event_3 = EpollEvent::new(EventSet::OUT | EventSet::IN, event_fd_3.as_raw_fd() as u64);
+ assert!(epoll
+ .ctl(
+ ControlOperation::Add,
+ event_fd_3.as_raw_fd() as i32,
+ event_3
+ )
+ .is_ok());
+
+ // Let's check `epoll_wait()` behavior for our epoll instance.
+ let mut ready_events = vec![EpollEvent::default(); EVENT_BUFFER_SIZE];
+ let mut ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
+
+ // We expect to have 3 fds in the ready list of epoll instance.
+ assert_eq!(ev_count, 3);
+
+ // Let's check also the Event values that are now returned in the ready list.
+ assert_eq!(ready_events[0].data(), event_fd_1.as_raw_fd() as u64);
+ // For this fd, `data` field was populated with random data instead of the
+ // corresponding fd value.
+ assert_eq!(ready_events[1].data(), 10);
+ assert_eq!(ready_events[2].data(), event_fd_3.as_raw_fd() as u64);
+
+ // EPOLLIN and EPOLLOUT should be available for this fd.
+ assert_eq!(
+ ready_events[0].events(),
+ (EventSet::IN | EventSet::OUT).bits()
+ );
+ // Only EPOLLOUT is expected because we didn't want to monitor EPOLLIN on this fd.
+ assert_eq!(ready_events[1].events(), EventSet::OUT.bits());
+ // Only EPOLLOUT too because eventfd counter value is 0 (we didn't write a value
+ // greater than 0 to it).
+ assert_eq!(ready_events[2].events(), EventSet::OUT.bits());
+
+ // Now we're gonna modify the Event instance for a fd to test EPOLL_CTL_MOD
+ // behavior.
+ // We create here a new Event with some events, other than those previously set,
+ // that we want to monitor this time on event_fd_1.
+ event_1 = EpollEvent::new(EventSet::OUT, 20);
+ assert!(epoll
+ .ctl(
+ ControlOperation::Modify,
+ event_fd_1.as_raw_fd() as i32,
+ event_1
+ )
+ .is_ok());
+
+ let event_fd_4 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
+ // Can't modify a fd that wasn't added to epoll interest list.
+ assert!(epoll
+ .ctl(
+ ControlOperation::Modify,
+ event_fd_4.as_raw_fd() as i32,
+ EpollEvent::default()
+ )
+ .is_err());
+
+ let _ = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
+
+ // Let's check that Event fields were indeed changed for the `event_fd_1` fd.
+ assert_eq!(ready_events[0].data(), 20);
+ // EPOLLOUT is now available for this fd as we've intended with EPOLL_CTL_MOD operation.
+ assert_eq!(ready_events[0].events(), EventSet::OUT.bits());
+
+ // Now let's set for a fd to not have any events monitored.
+ assert!(epoll
+ .ctl(
+ ControlOperation::Modify,
+ event_fd_1.as_raw_fd() as i32,
+ EpollEvent::default()
+ )
+ .is_ok());
+
+ // In this particular case we expect to remain only with 2 fds in the ready list.
+ ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
+ assert_eq!(ev_count, 2);
+
+ // Let's also delete a fd from the interest list.
+ assert!(epoll
+ .ctl(
+ ControlOperation::Delete,
+ event_fd_2.as_raw_fd() as i32,
+ EpollEvent::default()
+ )
+ .is_ok());
+
+ // We expect to have only one fd remained in the ready list (event_fd_3).
+ ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
+
+ assert_eq!(ev_count, 1);
+ assert_eq!(ready_events[0].data(), event_fd_3.as_raw_fd() as u64);
+ assert_eq!(ready_events[0].events(), EventSet::OUT.bits());
+
+ // If we try to remove a fd from epoll interest list that wasn't added before it will fail.
+ assert!(epoll
+ .ctl(
+ ControlOperation::Delete,
+ event_fd_4.as_raw_fd() as i32,
+ EpollEvent::default()
+ )
+ .is_err());
+ }
+}
diff --git a/src/linux/eventfd.rs b/src/linux/eventfd.rs
new file mode 100644
index 0000000..55944f6
--- /dev/null
+++ b/src/linux/eventfd.rs
@@ -0,0 +1,216 @@
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+//
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Structure and wrapper functions for working with
+//! [`eventfd`](http://man7.org/linux/man-pages/man2/eventfd.2.html).
+
+use std::fs::File;
+use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
+use std::{io, mem, result};
+
+use libc::{c_void, dup, eventfd, read, write};
+
+// Reexport commonly used flags from libc.
+pub use libc::{EFD_CLOEXEC, EFD_NONBLOCK, EFD_SEMAPHORE};
+
+/// A safe wrapper around Linux
+/// [`eventfd`](http://man7.org/linux/man-pages/man2/eventfd.2.html).
+#[derive(Debug)]
+pub struct EventFd {
+ eventfd: File,
+}
+
+impl EventFd {
+ /// Create a new EventFd with an initial value.
+ ///
+ /// # Arguments
+ ///
+ /// * `flag`: The initial value used for creating the `EventFd`.
+ /// Refer to Linux [`eventfd`](http://man7.org/linux/man-pages/man2/eventfd.2.html).
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK};
+ ///
+ /// EventFd::new(EFD_NONBLOCK).unwrap();
+ /// ```
+ pub fn new(flag: i32) -> result::Result<EventFd, io::Error> {
+ // SAFETY: This is safe because eventfd merely allocated an eventfd for
+ // our process and we handle the error case.
+ let ret = unsafe { eventfd(0, flag) };
+ if ret < 0 {
+ Err(io::Error::last_os_error())
+ } else {
+ Ok(EventFd {
+ // SAFETY: This is safe because we checked ret for success and know
+ // the kernel gave us an fd that we own.
+ eventfd: unsafe { File::from_raw_fd(ret) },
+ })
+ }
+ }
+
+ /// Add a value to the eventfd's counter.
+ ///
+ /// When the addition causes the counter overflow, this would either block
+ /// until a [`read`](http://man7.org/linux/man-pages/man2/read.2.html) is
+ /// performed on the file descriptor, or fail with the
+ /// error EAGAIN if the file descriptor has been made nonblocking.
+ ///
+ /// # Arguments
+ ///
+ /// * `v`: the value to be added to the eventfd's counter.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK};
+ ///
+ /// let evt = EventFd::new(EFD_NONBLOCK).unwrap();
+ /// evt.write(55).unwrap();
+ /// ```
+ pub fn write(&self, v: u64) -> result::Result<(), io::Error> {
+ // SAFETY: This is safe because we made this fd and the pointer we pass
+ // can not overflow because we give the syscall's size parameter properly.
+ let ret = unsafe {
+ write(
+ self.as_raw_fd(),
+ &v as *const u64 as *const c_void,
+ mem::size_of::<u64>(),
+ )
+ };
+ if ret <= 0 {
+ Err(io::Error::last_os_error())
+ } else {
+ Ok(())
+ }
+ }
+
+ /// Read a value from the eventfd.
+ ///
+ /// If the counter is zero, this would either block
+ /// until the counter becomes nonzero, or fail with the
+ /// error EAGAIN if the file descriptor has been made nonblocking.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK};
+ ///
+ /// let evt = EventFd::new(EFD_NONBLOCK).unwrap();
+ /// evt.write(55).unwrap();
+ /// assert_eq!(evt.read().unwrap(), 55);
+ /// ```
+ pub fn read(&self) -> result::Result<u64, io::Error> {
+ let mut buf: u64 = 0;
+ // SAFETY: This is safe because we made this fd and the pointer we
+ // pass can not overflow because we give the syscall's size parameter properly.
+ let ret = unsafe {
+ read(
+ self.as_raw_fd(),
+ &mut buf as *mut u64 as *mut c_void,
+ mem::size_of::<u64>(),
+ )
+ };
+ if ret < 0 {
+ Err(io::Error::last_os_error())
+ } else {
+ Ok(buf)
+ }
+ }
+
+ /// Clone this EventFd.
+ ///
+ /// This internally creates a new file descriptor and it will share the same
+ /// underlying count within the kernel.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK};
+ ///
+ /// let evt = EventFd::new(EFD_NONBLOCK).unwrap();
+ /// let evt_clone = evt.try_clone().unwrap();
+ /// evt.write(923).unwrap();
+ /// assert_eq!(evt_clone.read().unwrap(), 923);
+ /// ```
+ pub fn try_clone(&self) -> result::Result<EventFd, io::Error> {
+ // SAFETY: This is safe because we made this fd and properly check that it returns
+ // without error.
+ let ret = unsafe { dup(self.as_raw_fd()) };
+ if ret < 0 {
+ Err(io::Error::last_os_error())
+ } else {
+ Ok(EventFd {
+ // SAFETY: This is safe because we checked ret for success and know the kernel
+ // gave us an fd that we own.
+ eventfd: unsafe { File::from_raw_fd(ret) },
+ })
+ }
+ }
+}
+
+impl AsRawFd for EventFd {
+ fn as_raw_fd(&self) -> RawFd {
+ self.eventfd.as_raw_fd()
+ }
+}
+
+impl FromRawFd for EventFd {
+ unsafe fn from_raw_fd(fd: RawFd) -> Self {
+ EventFd {
+ eventfd: File::from_raw_fd(fd),
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_new() {
+ EventFd::new(EFD_NONBLOCK).unwrap();
+ EventFd::new(0).unwrap();
+ }
+
+ #[test]
+ fn test_read_write() {
+ let evt = EventFd::new(EFD_NONBLOCK).unwrap();
+ evt.write(55).unwrap();
+ assert_eq!(evt.read().unwrap(), 55);
+ }
+
+ #[test]
+ fn test_write_overflow() {
+ let evt = EventFd::new(EFD_NONBLOCK).unwrap();
+ evt.write(std::u64::MAX - 1).unwrap();
+ let r = evt.write(1);
+ match r {
+ Err(ref inner) if inner.kind() == io::ErrorKind::WouldBlock => (),
+ _ => panic!("Unexpected"),
+ }
+ }
+ #[test]
+ fn test_read_nothing() {
+ let evt = EventFd::new(EFD_NONBLOCK).unwrap();
+ let r = evt.read();
+ match r {
+ Err(ref inner) if inner.kind() == io::ErrorKind::WouldBlock => (),
+ _ => panic!("Unexpected"),
+ }
+ }
+ #[test]
+ fn test_clone() {
+ let evt = EventFd::new(EFD_NONBLOCK).unwrap();
+ let evt_clone = evt.try_clone().unwrap();
+ evt.write(923).unwrap();
+ assert_eq!(evt_clone.read().unwrap(), 923);
+ }
+}
diff --git a/src/linux/fallocate.rs b/src/linux/fallocate.rs
new file mode 100644
index 0000000..e3a7fed
--- /dev/null
+++ b/src/linux/fallocate.rs
@@ -0,0 +1,93 @@
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+//
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Enum and function for dealing with an allocated disk space
+//! by [`fallocate`](http://man7.org/linux/man-pages/man2/fallocate.2.html).
+
+use std::os::unix::io::AsRawFd;
+
+use crate::errno::{errno_result, Error, Result};
+
+/// Operation to be performed on a given range when calling [`fallocate`]
+///
+/// [`fallocate`]: fn.fallocate.html
+pub enum FallocateMode {
+ /// Deallocating file space.
+ PunchHole,
+ /// Zeroing file space.
+ ZeroRange,
+}
+
+/// A safe wrapper for [`fallocate`](http://man7.org/linux/man-pages/man2/fallocate.2.html).
+///
+/// Manipulate the file space with specified operation parameters.
+///
+/// # Arguments
+///
+/// * `file`: the file to be manipulate.
+/// * `mode`: specify the operation to be performed on the given range.
+/// * `keep_size`: file size won't be changed even if `offset` + `len` is greater
+/// than the file size.
+/// * `offset`: the position that manipulates the file from.
+/// * `size`: the bytes of the operation range.
+///
+/// # Examples
+///
+/// ```
+/// extern crate vmm_sys_util;
+/// # use std::fs::OpenOptions;
+/// # use std::path::PathBuf;
+/// use vmm_sys_util::fallocate::{fallocate, FallocateMode};
+/// use vmm_sys_util::tempdir::TempDir;
+///
+/// let tempdir = TempDir::new_with_prefix("/tmp/fallocate_test").unwrap();
+/// let mut path = PathBuf::from(tempdir.as_path());
+/// path.push("file");
+/// let mut f = OpenOptions::new()
+/// .read(true)
+/// .write(true)
+/// .create(true)
+/// .open(&path)
+/// .unwrap();
+/// fallocate(&f, FallocateMode::PunchHole, true, 0, 1).unwrap();
+/// ```
+pub fn fallocate(
+ file: &dyn AsRawFd,
+ mode: FallocateMode,
+ keep_size: bool,
+ offset: u64,
+ len: u64,
+) -> Result<()> {
+ let offset = if offset > libc::off64_t::max_value() as u64 {
+ return Err(Error::new(libc::EINVAL));
+ } else {
+ offset as libc::off64_t
+ };
+
+ let len = if len > libc::off64_t::max_value() as u64 {
+ return Err(Error::new(libc::EINVAL));
+ } else {
+ len as libc::off64_t
+ };
+
+ let mut mode = match mode {
+ FallocateMode::PunchHole => libc::FALLOC_FL_PUNCH_HOLE,
+ FallocateMode::ZeroRange => libc::FALLOC_FL_ZERO_RANGE,
+ };
+
+ if keep_size {
+ mode |= libc::FALLOC_FL_KEEP_SIZE;
+ }
+
+ // SAFETY: Safe since we pass in a valid fd and fallocate mode, validate offset and len,
+ // and check the return value.
+ let ret = unsafe { libc::fallocate64(file.as_raw_fd(), mode, offset, len) };
+ if ret < 0 {
+ errno_result()
+ } else {
+ Ok(())
+ }
+}
diff --git a/src/linux/ioctl.rs b/src/linux/ioctl.rs
new file mode 100644
index 0000000..a1e29d5
--- /dev/null
+++ b/src/linux/ioctl.rs
@@ -0,0 +1,410 @@
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+//
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Macros and functions for working with
+//! [`ioctl`](http://man7.org/linux/man-pages/man2/ioctl.2.html).
+
+use std::os::raw::{c_int, c_uint, c_ulong, c_void};
+use std::os::unix::io::AsRawFd;
+
+// The only reason
+// [_IOC](https://elixir.bootlin.com/linux/v5.10.129/source/arch/alpha/include/uapi/asm/ioctl.h#L40)
+// is a macro in C is because C doesn't have const functions, it is always better when possible to
+// use a const function over a macro in Rust.
+/// Function to calculate icotl number. Mimic of
+/// [_IOC](https://elixir.bootlin.com/linux/v5.10.129/source/arch/alpha/include/uapi/asm/ioctl.h#L40)
+/// ```
+/// # use std::os::raw::c_uint;
+/// # use vmm_sys_util::ioctl::{ioctl_expr, _IOC_NONE};
+/// const KVMIO: c_uint = 0xAE;
+/// ioctl_expr(_IOC_NONE, KVMIO, 0x01, 0);
+/// ```
+pub const fn ioctl_expr(
+ dir: c_uint,
+ ty: c_uint,
+ nr: c_uint,
+ size: c_uint,
+) -> ::std::os::raw::c_ulong {
+ (dir << crate::ioctl::_IOC_DIRSHIFT
+ | ty << crate::ioctl::_IOC_TYPESHIFT
+ | nr << crate::ioctl::_IOC_NRSHIFT
+ | size << crate::ioctl::_IOC_SIZESHIFT) as ::std::os::raw::c_ulong
+}
+
+/// Declare a function that returns an ioctl number.
+///
+/// ```
+/// # #[macro_use] extern crate vmm_sys_util;
+/// # use std::os::raw::c_uint;
+/// use vmm_sys_util::ioctl::_IOC_NONE;
+///
+/// const KVMIO: c_uint = 0xAE;
+/// ioctl_ioc_nr!(KVM_CREATE_VM, _IOC_NONE, KVMIO, 0x01, 0);
+/// ```
+#[macro_export]
+macro_rules! ioctl_ioc_nr {
+ ($name:ident, $dir:expr, $ty:expr, $nr:expr, $size:expr) => {
+ #[allow(non_snake_case)]
+ #[allow(clippy::cast_lossless)]
+ pub fn $name() -> ::std::os::raw::c_ulong {
+ $crate::ioctl::ioctl_expr($dir, $ty, $nr, $size)
+ }
+ };
+ ($name:ident, $dir:expr, $ty:expr, $nr:expr, $size:expr, $($v:ident),+) => {
+ #[allow(non_snake_case)]
+ #[allow(clippy::cast_lossless)]
+ pub fn $name($($v: ::std::os::raw::c_uint),+) -> ::std::os::raw::c_ulong {
+ $crate::ioctl::ioctl_expr($dir, $ty, $nr, $size)
+ }
+ };
+}
+
+/// Declare an ioctl that transfers no data.
+///
+/// ```
+/// # #[macro_use] extern crate vmm_sys_util;
+/// # use std::os::raw::c_uint;
+/// const KVMIO: c_uint = 0xAE;
+/// ioctl_io_nr!(KVM_CREATE_VM, KVMIO, 0x01);
+/// ```
+#[macro_export]
+macro_rules! ioctl_io_nr {
+ ($name:ident, $ty:expr, $nr:expr) => {
+ ioctl_ioc_nr!($name, $crate::ioctl::_IOC_NONE, $ty, $nr, 0);
+ };
+ ($name:ident, $ty:expr, $nr:expr, $($v:ident),+) => {
+ ioctl_ioc_nr!($name, $crate::ioctl::_IOC_NONE, $ty, $nr, 0, $($v),+);
+ };
+}
+
+/// Declare an ioctl that reads data.
+///
+/// ```
+/// # #[macro_use] extern crate vmm_sys_util;
+/// const TUNTAP: ::std::os::raw::c_uint = 0x54;
+/// ioctl_ior_nr!(TUNGETFEATURES, TUNTAP, 0xcf, ::std::os::raw::c_uint);
+/// ```
+#[macro_export]
+macro_rules! ioctl_ior_nr {
+ ($name:ident, $ty:expr, $nr:expr, $size:ty) => {
+ ioctl_ioc_nr!(
+ $name,
+ $crate::ioctl::_IOC_READ,
+ $ty,
+ $nr,
+ ::std::mem::size_of::<$size>() as u32
+ );
+ };
+ ($name:ident, $ty:expr, $nr:expr, $size:ty, $($v:ident),+) => {
+ ioctl_ioc_nr!(
+ $name,
+ $crate::ioctl::_IOC_READ,
+ $ty,
+ $nr,
+ ::std::mem::size_of::<$size>() as u32,
+ $($v),+
+ );
+ };
+}
+
+/// Declare an ioctl that writes data.
+///
+/// ```
+/// # #[macro_use] extern crate vmm_sys_util;
+/// const TUNTAP: ::std::os::raw::c_uint = 0x54;
+/// ioctl_iow_nr!(TUNSETQUEUE, TUNTAP, 0xd9, ::std::os::raw::c_int);
+/// ```
+#[macro_export]
+macro_rules! ioctl_iow_nr {
+ ($name:ident, $ty:expr, $nr:expr, $size:ty) => {
+ ioctl_ioc_nr!(
+ $name,
+ $crate::ioctl::_IOC_WRITE,
+ $ty,
+ $nr,
+ ::std::mem::size_of::<$size>() as u32
+ );
+ };
+ ($name:ident, $ty:expr, $nr:expr, $size:ty, $($v:ident),+) => {
+ ioctl_ioc_nr!(
+ $name,
+ $crate::ioctl::_IOC_WRITE,
+ $ty,
+ $nr,
+ ::std::mem::size_of::<$size>() as u32,
+ $($v),+
+ );
+ };
+}
+
+/// Declare an ioctl that reads and writes data.
+///
+/// ```
+/// # #[macro_use] extern crate vmm_sys_util;
+/// const VHOST: ::std::os::raw::c_uint = 0xAF;
+/// ioctl_iowr_nr!(VHOST_GET_VRING_BASE, VHOST, 0x12, ::std::os::raw::c_int);
+/// ```
+#[macro_export]
+macro_rules! ioctl_iowr_nr {
+ ($name:ident, $ty:expr, $nr:expr, $size:ty) => {
+ ioctl_ioc_nr!(
+ $name,
+ $crate::ioctl::_IOC_READ | $crate::ioctl::_IOC_WRITE,
+ $ty,
+ $nr,
+ ::std::mem::size_of::<$size>() as u32
+ );
+ };
+ ($name:ident, $ty:expr, $nr:expr, $size:ty, $($v:ident),+) => {
+ ioctl_ioc_nr!(
+ $name,
+ $crate::ioctl::_IOC_READ | $crate::ioctl::_IOC_WRITE,
+ $ty,
+ $nr,
+ ::std::mem::size_of::<$size>() as u32,
+ $($v),+
+ );
+ };
+}
+
+// Define IOC_* constants in a module so that we can allow missing docs on it.
+// There is not much value in documenting these as it is code generated from
+// kernel definitions.
+#[allow(missing_docs)]
+mod ioc {
+ use std::os::raw::c_uint;
+
+ pub const _IOC_NRBITS: c_uint = 8;
+ pub const _IOC_TYPEBITS: c_uint = 8;
+ pub const _IOC_SIZEBITS: c_uint = 14;
+ pub const _IOC_DIRBITS: c_uint = 2;
+ pub const _IOC_NRMASK: c_uint = 255;
+ pub const _IOC_TYPEMASK: c_uint = 255;
+ pub const _IOC_SIZEMASK: c_uint = 16383;
+ pub const _IOC_DIRMASK: c_uint = 3;
+ pub const _IOC_NRSHIFT: c_uint = 0;
+ pub const _IOC_TYPESHIFT: c_uint = 8;
+ pub const _IOC_SIZESHIFT: c_uint = 16;
+ pub const _IOC_DIRSHIFT: c_uint = 30;
+ pub const _IOC_NONE: c_uint = 0;
+ pub const _IOC_WRITE: c_uint = 1;
+ pub const _IOC_READ: c_uint = 2;
+ pub const IOC_IN: c_uint = 1_073_741_824;
+ pub const IOC_OUT: c_uint = 2_147_483_648;
+ pub const IOC_INOUT: c_uint = 3_221_225_472;
+ pub const IOCSIZE_MASK: c_uint = 1_073_676_288;
+ pub const IOCSIZE_SHIFT: c_uint = 16;
+}
+pub use self::ioc::*;
+
+// The type of the `req` parameter is different for the `musl` library. This will enable
+// successful build for other non-musl libraries.
+#[cfg(target_env = "musl")]
+type IoctlRequest = c_int;
+#[cfg(all(not(target_env = "musl"), not(target_os = "android")))]
+type IoctlRequest = c_ulong;
+#[cfg(all(not(target_env = "musl"), target_os = "android"))]
+type IoctlRequest = c_int;
+/// Run an [`ioctl`](http://man7.org/linux/man-pages/man2/ioctl.2.html)
+/// with no arguments.
+///
+/// # Arguments
+///
+/// * `fd`: an open file descriptor corresponding to the device on which
+/// to call the ioctl.
+/// * `req`: a device-dependent request code.
+///
+/// # Safety
+///
+/// The caller should ensure to pass a valid file descriptor and have the
+/// return value checked.
+///
+/// # Examples
+///
+/// ```
+/// # extern crate libc;
+/// # #[macro_use] extern crate vmm_sys_util;
+/// #
+/// # use libc::{open, O_CLOEXEC, O_RDWR};
+/// # use std::fs::File;
+/// # use std::os::raw::{c_char, c_uint};
+/// # use std::os::unix::io::FromRawFd;
+/// use vmm_sys_util::ioctl::ioctl;
+///
+/// const KVMIO: c_uint = 0xAE;
+/// const KVM_API_VERSION: u32 = 12;
+/// ioctl_io_nr!(KVM_GET_API_VERSION, KVMIO, 0x00);
+///
+/// let open_flags = O_RDWR | O_CLOEXEC;
+/// let kvm_fd = unsafe { open("/dev/kvm\0".as_ptr() as *const c_char, open_flags) };
+///
+/// let ret = unsafe { ioctl(&File::from_raw_fd(kvm_fd), KVM_GET_API_VERSION()) };
+///
+/// assert_eq!(ret as u32, KVM_API_VERSION);
+/// ```
+pub unsafe fn ioctl<F: AsRawFd>(fd: &F, req: c_ulong) -> c_int {
+ libc::ioctl(fd.as_raw_fd(), req as IoctlRequest, 0)
+}
+
+/// Run an [`ioctl`](http://man7.org/linux/man-pages/man2/ioctl.2.html)
+/// with a single value argument.
+///
+/// # Arguments
+///
+/// * `fd`: an open file descriptor corresponding to the device on which
+/// to call the ioctl.
+/// * `req`: a device-dependent request code.
+/// * `arg`: a single value passed to ioctl.
+///
+/// # Safety
+///
+/// The caller should ensure to pass a valid file descriptor and have the
+/// return value checked.
+///
+/// # Examples
+///
+/// ```
+/// # extern crate libc;
+/// # #[macro_use] extern crate vmm_sys_util;
+/// # use libc::{open, O_CLOEXEC, O_RDWR};
+/// # use std::fs::File;
+/// # use std::os::raw::{c_char, c_uint, c_ulong};
+/// # use std::os::unix::io::FromRawFd;
+/// use vmm_sys_util::ioctl::ioctl_with_val;
+///
+/// const KVMIO: c_uint = 0xAE;
+/// const KVM_CAP_USER_MEMORY: u32 = 3;
+/// ioctl_io_nr!(KVM_CHECK_EXTENSION, KVMIO, 0x03);
+///
+/// let open_flags = O_RDWR | O_CLOEXEC;
+/// let kvm_fd = unsafe { open("/dev/kvm\0".as_ptr() as *const c_char, open_flags) };
+///
+/// let ret = unsafe {
+/// ioctl_with_val(
+/// &File::from_raw_fd(kvm_fd),
+/// KVM_CHECK_EXTENSION(),
+/// KVM_CAP_USER_MEMORY as c_ulong,
+/// )
+/// };
+/// assert!(ret > 0);
+/// ```
+pub unsafe fn ioctl_with_val<F: AsRawFd>(fd: &F, req: c_ulong, arg: c_ulong) -> c_int {
+ libc::ioctl(fd.as_raw_fd(), req as IoctlRequest, arg)
+}
+
+/// Run an [`ioctl`](http://man7.org/linux/man-pages/man2/ioctl.2.html)
+/// with an immutable reference.
+///
+/// # Arguments
+///
+/// * `fd`: an open file descriptor corresponding to the device on which
+/// to call the ioctl.
+/// * `req`: a device-dependent request code.
+/// * `arg`: an immutable reference passed to ioctl.
+///
+/// # Safety
+///
+/// The caller should ensure to pass a valid file descriptor and have the
+/// return value checked.
+pub unsafe fn ioctl_with_ref<F: AsRawFd, T>(fd: &F, req: c_ulong, arg: &T) -> c_int {
+ libc::ioctl(
+ fd.as_raw_fd(),
+ req as IoctlRequest,
+ arg as *const T as *const c_void,
+ )
+}
+
+/// Run an [`ioctl`](http://man7.org/linux/man-pages/man2/ioctl.2.html)
+/// with a mutable reference.
+///
+/// # Arguments
+///
+/// * `fd`: an open file descriptor corresponding to the device on which
+/// to call the ioctl.
+/// * `req`: a device-dependent request code.
+/// * `arg`: a mutable reference passed to ioctl.
+///
+/// # Safety
+///
+/// The caller should ensure to pass a valid file descriptor and have the
+/// return value checked.
+pub unsafe fn ioctl_with_mut_ref<F: AsRawFd, T>(fd: &F, req: c_ulong, arg: &mut T) -> c_int {
+ libc::ioctl(
+ fd.as_raw_fd(),
+ req as IoctlRequest,
+ arg as *mut T as *mut c_void,
+ )
+}
+
+/// Run an [`ioctl`](http://man7.org/linux/man-pages/man2/ioctl.2.html)
+/// with a raw pointer.
+///
+/// # Arguments
+///
+/// * `fd`: an open file descriptor corresponding to the device on which
+/// to call the ioctl.
+/// * `req`: a device-dependent request code.
+/// * `arg`: a raw pointer passed to ioctl.
+///
+/// # Safety
+///
+/// The caller should ensure to pass a valid file descriptor and have the
+/// return value checked.
+pub unsafe fn ioctl_with_ptr<F: AsRawFd, T>(fd: &F, req: c_ulong, arg: *const T) -> c_int {
+ libc::ioctl(fd.as_raw_fd(), req as IoctlRequest, arg as *const c_void)
+}
+
+/// Run an [`ioctl`](http://man7.org/linux/man-pages/man2/ioctl.2.html)
+/// with a mutable raw pointer.
+///
+/// # Arguments
+///
+/// * `fd`: an open file descriptor corresponding to the device on which
+/// to call the ioctl.
+/// * `req`: a device-dependent request code.
+/// * `arg`: a mutable raw pointer passed to ioctl.
+///
+/// # Safety
+///
+/// The caller should ensure to pass a valid file descriptor and have the
+/// return value checked.
+pub unsafe fn ioctl_with_mut_ptr<F: AsRawFd, T>(fd: &F, req: c_ulong, arg: *mut T) -> c_int {
+ libc::ioctl(fd.as_raw_fd(), req as IoctlRequest, arg as *mut c_void)
+}
+
+#[cfg(test)]
+mod tests {
+ const TUNTAP: ::std::os::raw::c_uint = 0x54;
+ const VHOST: ::std::os::raw::c_uint = 0xAF;
+ const EVDEV: ::std::os::raw::c_uint = 0x45;
+
+ const KVMIO: ::std::os::raw::c_uint = 0xAE;
+
+ ioctl_io_nr!(KVM_CREATE_VM, KVMIO, 0x01);
+ ioctl_ior_nr!(TUNGETFEATURES, TUNTAP, 0xcf, ::std::os::raw::c_uint);
+ ioctl_iow_nr!(TUNSETQUEUE, TUNTAP, 0xd9, ::std::os::raw::c_int);
+ ioctl_io_nr!(VHOST_SET_OWNER, VHOST, 0x01);
+ ioctl_iowr_nr!(VHOST_GET_VRING_BASE, VHOST, 0x12, ::std::os::raw::c_int);
+ ioctl_iowr_nr!(KVM_GET_MSR_INDEX_LIST, KVMIO, 0x2, ::std::os::raw::c_int);
+
+ ioctl_ior_nr!(EVIOCGBIT, EVDEV, 0x20 + evt, [u8; 128], evt);
+ ioctl_io_nr!(FAKE_IOCTL_2_ARG, EVDEV, 0x01 + x + y, x, y);
+
+ #[test]
+ fn test_ioctl_macros() {
+ assert_eq!(0x0000_AE01, KVM_CREATE_VM());
+ assert_eq!(0x0000_AF01, VHOST_SET_OWNER());
+ assert_eq!(0x8004_54CF, TUNGETFEATURES());
+ assert_eq!(0x4004_54D9, TUNSETQUEUE());
+ assert_eq!(0xC004_AE02, KVM_GET_MSR_INDEX_LIST());
+ assert_eq!(0xC004_AF12, VHOST_GET_VRING_BASE());
+
+ assert_eq!(0x8080_4522, EVIOCGBIT(2));
+ assert_eq!(0x0000_4509, FAKE_IOCTL_2_ARG(3, 5));
+ }
+}
diff --git a/src/linux/mod.rs b/src/linux/mod.rs
new file mode 100644
index 0000000..daf86f5
--- /dev/null
+++ b/src/linux/mod.rs
@@ -0,0 +1,15 @@
+// Copyright 2022 rust-vmm Authors or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+
+#[macro_use]
+pub mod ioctl;
+pub mod aio;
+pub mod epoll;
+pub mod eventfd;
+pub mod fallocate;
+pub mod poll;
+pub mod seek_hole;
+pub mod signal;
+pub mod sock_ctrl_msg;
+pub mod timerfd;
+pub mod write_zeroes;
diff --git a/src/linux/poll.rs b/src/linux/poll.rs
new file mode 100644
index 0000000..12809f0
--- /dev/null
+++ b/src/linux/poll.rs
@@ -0,0 +1,1010 @@
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+//
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Traits and structures for working with
+//! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html)
+
+use std::cell::{Cell, Ref, RefCell};
+use std::cmp::min;
+use std::fs::File;
+use std::i32;
+use std::i64;
+use std::io::{stderr, Cursor, Write};
+use std::marker::PhantomData;
+use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
+use std::ptr::null_mut;
+use std::slice;
+use std::thread;
+use std::time::Duration;
+
+use libc::{
+ c_int, epoll_create1, epoll_ctl, epoll_event, epoll_wait, EINTR, EPOLLERR, EPOLLHUP, EPOLLIN,
+ EPOLLOUT, EPOLL_CLOEXEC, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD,
+};
+
+use crate::errno::{errno_result, Error, Result};
+
+macro_rules! handle_eintr_errno {
+ ($x:expr) => {{
+ let mut res;
+ loop {
+ res = $x;
+ if res != -1 || Error::last() != Error::new(EINTR) {
+ break;
+ }
+ }
+ res
+ }};
+}
+
+const POLL_CONTEXT_MAX_EVENTS: usize = 16;
+
+/// A wrapper of raw `libc::epoll_event`.
+///
+/// This should only be used with [`EpollContext`](struct.EpollContext.html).
+pub struct EpollEvents(RefCell<[epoll_event; POLL_CONTEXT_MAX_EVENTS]>);
+
+impl EpollEvents {
+ /// Creates a new EpollEvents.
+ pub fn new() -> EpollEvents {
+ EpollEvents(RefCell::new(
+ [epoll_event { events: 0, u64: 0 }; POLL_CONTEXT_MAX_EVENTS],
+ ))
+ }
+}
+
+impl Default for EpollEvents {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+/// Trait for a token that can be associated with an `fd` in a [`PollContext`](struct.PollContext.html).
+///
+/// Simple enums that have no or primitive variant data can use the `#[derive(PollToken)]`
+/// custom derive to implement this trait.
+pub trait PollToken {
+ /// Converts this token into a u64 that can be turned back into a token via `from_raw_token`.
+ fn as_raw_token(&self) -> u64;
+
+ /// Converts a raw token as returned from `as_raw_token` back into a token.
+ ///
+ /// It is invalid to give a raw token that was not returned via `as_raw_token` from the same
+ /// `Self`. The implementation can expect that this will never happen as a result of its usage
+ /// in `PollContext`.
+ fn from_raw_token(data: u64) -> Self;
+}
+
+impl PollToken for usize {
+ fn as_raw_token(&self) -> u64 {
+ *self as u64
+ }
+
+ fn from_raw_token(data: u64) -> Self {
+ data as Self
+ }
+}
+
+impl PollToken for u64 {
+ fn as_raw_token(&self) -> u64 {
+ *self as u64
+ }
+
+ fn from_raw_token(data: u64) -> Self {
+ data as Self
+ }
+}
+
+impl PollToken for u32 {
+ fn as_raw_token(&self) -> u64 {
+ u64::from(*self)
+ }
+
+ fn from_raw_token(data: u64) -> Self {
+ data as Self
+ }
+}
+
+impl PollToken for u16 {
+ fn as_raw_token(&self) -> u64 {
+ u64::from(*self)
+ }
+
+ fn from_raw_token(data: u64) -> Self {
+ data as Self
+ }
+}
+
+impl PollToken for u8 {
+ fn as_raw_token(&self) -> u64 {
+ u64::from(*self)
+ }
+
+ fn from_raw_token(data: u64) -> Self {
+ data as Self
+ }
+}
+
+impl PollToken for () {
+ fn as_raw_token(&self) -> u64 {
+ 0
+ }
+
+ fn from_raw_token(_data: u64) -> Self {}
+}
+
+/// An event returned by [`PollContext::wait`](struct.PollContext.html#method.wait).
+pub struct PollEvent<'a, T> {
+ event: &'a epoll_event,
+ token: PhantomData<T>, // Needed to satisfy usage of T
+}
+
+impl<'a, T: PollToken> PollEvent<'a, T> {
+ /// Gets the token associated in
+ /// [`PollContext::add`](struct.PollContext.html#method.add) with this event.
+ pub fn token(&self) -> T {
+ T::from_raw_token(self.event.u64)
+ }
+
+ /// Get the raw events returned by the kernel.
+ pub fn raw_events(&self) -> u32 {
+ self.event.events
+ }
+
+ /// Checks if the event is readable.
+ ///
+ /// True if the `fd` associated with this token in
+ /// [`PollContext::add`](struct.PollContext.html#method.add) is readable.
+ pub fn readable(&self) -> bool {
+ self.event.events & (EPOLLIN as u32) != 0
+ }
+
+ /// Checks if the event is writable.
+ ///
+ /// True if the `fd` associated with this token in
+ /// [`PollContext::add`](struct.PollContext.html#method.add) is writable.
+ pub fn writable(&self) -> bool {
+ self.event.events & (EPOLLOUT as u32) != 0
+ }
+
+ /// Checks if the event has been hangup on.
+ ///
+ /// True if the `fd` associated with this token in
+ /// [`PollContext::add`](struct.PollContext.html#method.add) has been hungup on.
+ pub fn hungup(&self) -> bool {
+ self.event.events & (EPOLLHUP as u32) != 0
+ }
+
+ /// Checks if the event has associated error conditions.
+ ///
+ /// True if the `fd` associated with this token in
+ /// [`PollContext::add`](struct.PollContext.html#method.add) has associated error conditions.
+ pub fn has_error(&self) -> bool {
+ self.event.events & (EPOLLERR as u32) != 0
+ }
+}
+
+/// An iterator over a subset of events returned by
+/// [`PollContext::wait`](struct.PollContext.html#method.wait).
+pub struct PollEventIter<'a, I, T>
+where
+ I: Iterator<Item = &'a epoll_event>,
+{
+ mask: u32,
+ iter: I,
+ tokens: PhantomData<[T]>, // Needed to satisfy usage of T
+}
+
+impl<'a, I, T> Iterator for PollEventIter<'a, I, T>
+where
+ I: Iterator<Item = &'a epoll_event>,
+ T: PollToken,
+{
+ type Item = PollEvent<'a, T>;
+ fn next(&mut self) -> Option<Self::Item> {
+ let mask = self.mask;
+ self.iter
+ .find(|event| (event.events & mask) != 0)
+ .map(|event| PollEvent {
+ event,
+ token: PhantomData,
+ })
+ }
+}
+
+/// The list of events returned by [`PollContext::wait`](struct.PollContext.html#method.wait).
+pub struct PollEvents<'a, T> {
+ count: usize,
+ events: Ref<'a, [epoll_event; POLL_CONTEXT_MAX_EVENTS]>,
+ tokens: PhantomData<[T]>, // Needed to satisfy usage of T
+}
+
+impl<'a, T: PollToken> PollEvents<'a, T> {
+ /// Creates owned structure from borrowed [`PollEvents`](struct.PollEvents.html).
+ ///
+ /// Copies the events to an owned structure so the reference to this (and by extension
+ /// [`PollContext`](struct.PollContext.html)) can be dropped.
+ pub fn to_owned(&self) -> PollEventsOwned<T> {
+ PollEventsOwned {
+ count: self.count,
+ events: RefCell::new(*self.events),
+ tokens: PhantomData,
+ }
+ }
+
+ /// Iterates over each event.
+ pub fn iter(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> {
+ PollEventIter {
+ mask: 0xffff_ffff,
+ iter: self.events[..self.count].iter(),
+ tokens: PhantomData,
+ }
+ }
+
+ /// Iterates over each readable event.
+ pub fn iter_readable(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> {
+ PollEventIter {
+ mask: EPOLLIN as u32,
+ iter: self.events[..self.count].iter(),
+ tokens: PhantomData,
+ }
+ }
+
+ /// Iterates over each hungup event.
+ pub fn iter_hungup(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> {
+ PollEventIter {
+ mask: EPOLLHUP as u32,
+ iter: self.events[..self.count].iter(),
+ tokens: PhantomData,
+ }
+ }
+}
+
+/// A deep copy of the event records from [`PollEvents`](struct.PollEvents.html).
+pub struct PollEventsOwned<T> {
+ count: usize,
+ events: RefCell<[epoll_event; POLL_CONTEXT_MAX_EVENTS]>,
+ tokens: PhantomData<T>, // Needed to satisfy usage of T
+}
+
+impl<T: PollToken> PollEventsOwned<T> {
+ /// Creates borrowed structure from owned structure
+ /// [`PollEventsOwned`](struct.PollEventsOwned.html).
+ ///
+ /// Takes a reference to the events so it can be iterated via methods in
+ /// [`PollEvents`](struct.PollEvents.html).
+ pub fn as_ref(&self) -> PollEvents<'_, T> {
+ PollEvents {
+ count: self.count,
+ events: self.events.borrow(),
+ tokens: PhantomData,
+ }
+ }
+}
+
+/// Watching events taken by [`PollContext`](struct.PollContext.html).
+#[derive(Copy, Clone)]
+pub struct WatchingEvents(u32);
+
+impl WatchingEvents {
+ /// Returns empty `WatchingEvents`.
+ #[inline(always)]
+ pub fn empty() -> WatchingEvents {
+ WatchingEvents(0)
+ }
+
+ /// Creates a new `WatchingEvents` with a specified value.
+ ///
+ /// Builds `WatchingEvents` from raw `epoll_event`.
+ ///
+ /// # Arguments
+ ///
+ /// * `raw`: the events to be created for watching.
+ #[inline(always)]
+ pub fn new(raw: u32) -> WatchingEvents {
+ WatchingEvents(raw)
+ }
+
+ /// Sets read events.
+ ///
+ /// Sets the events to be readable.
+ #[inline(always)]
+ pub fn set_read(self) -> WatchingEvents {
+ WatchingEvents(self.0 | EPOLLIN as u32)
+ }
+
+ /// Sets write events.
+ ///
+ /// Sets the events to be writable.
+ #[inline(always)]
+ pub fn set_write(self) -> WatchingEvents {
+ WatchingEvents(self.0 | EPOLLOUT as u32)
+ }
+
+ /// Gets the underlying epoll events.
+ pub fn get_raw(&self) -> u32 {
+ self.0
+ }
+}
+
+/// A wrapper of linux [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html).
+///
+/// It provides similar interface to [`PollContext`](struct.PollContext.html).
+/// It is thread safe while PollContext is not. It requires user to pass in a reference of
+/// EpollEvents while PollContext does not. Always use PollContext if you don't need to access the
+/// same epoll from different threads.
+///
+/// # Examples
+///
+/// ```
+/// extern crate vmm_sys_util;
+/// use vmm_sys_util::eventfd::EventFd;
+/// use vmm_sys_util::poll::{EpollContext, EpollEvents};
+///
+/// let evt = EventFd::new(0).unwrap();
+/// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
+/// let events = EpollEvents::new();
+///
+/// evt.write(1).unwrap();
+/// ctx.add(&evt, 1).unwrap();
+///
+/// for event in ctx.wait(&events).unwrap().iter_readable() {
+/// assert_eq!(event.token(), 1);
+/// }
+/// ```
+pub struct EpollContext<T> {
+ epoll_ctx: File,
+ // Needed to satisfy usage of T
+ tokens: PhantomData<[T]>,
+}
+
+impl<T: PollToken> EpollContext<T> {
+ /// Creates a new `EpollContext`.
+ ///
+ /// Uses [`epoll_create1`](http://man7.org/linux/man-pages/man2/epoll_create.2.html)
+ /// to create a new epoll fd.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::poll::EpollContext;
+ ///
+ /// let ctx: EpollContext<usize> = EpollContext::new().unwrap();
+ /// ```
+ pub fn new() -> Result<EpollContext<T>> {
+ // SAFETY: Safe because we check the return value.
+ let epoll_fd = unsafe { epoll_create1(EPOLL_CLOEXEC) };
+ if epoll_fd < 0 {
+ return errno_result();
+ }
+ Ok(EpollContext {
+ // SAFETY: Safe because we verified that the FD is valid and we trust `epoll_create1`.
+ epoll_ctx: unsafe { File::from_raw_fd(epoll_fd) },
+ tokens: PhantomData,
+ })
+ }
+
+ /// Adds the given `fd` to this context and associates the given
+ /// `token` with the `fd`'s readable events.
+ ///
+ /// A `fd` can only be added once and does not need to be kept open.
+ /// If the `fd` is dropped and there were no duplicated file descriptors
+ /// (i.e. adding the same descriptor with a different FD number) added
+ /// to this context, events will not be reported by `wait` anymore.
+ ///
+ /// # Arguments
+ ///
+ /// * `fd`: the target file descriptor to be added.
+ /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::eventfd::EventFd;
+ /// use vmm_sys_util::poll::EpollContext;
+ ///
+ /// let evt = EventFd::new(0).unwrap();
+ /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
+ /// ctx.add(&evt, 1).unwrap();
+ /// ```
+ pub fn add(&self, fd: &dyn AsRawFd, token: T) -> Result<()> {
+ self.add_fd_with_events(fd, WatchingEvents::empty().set_read(), token)
+ }
+
+ /// Adds the given `fd` to this context, watching for the specified `events`
+ /// and associates the given 'token' with those events.
+ ///
+ /// A `fd` can only be added once and does not need to be kept open. If the `fd`
+ /// is dropped and there were no duplicated file descriptors (i.e. adding the same
+ /// descriptor with a different FD number) added to this context, events will
+ /// not be reported by `wait` anymore.
+ ///
+ /// # Arguments
+ ///
+ /// * `fd`: the target file descriptor to be added.
+ /// * `events`: specifies the events to be watched.
+ /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::eventfd::EventFd;
+ /// use vmm_sys_util::poll::{EpollContext, WatchingEvents};
+ ///
+ /// let evt = EventFd::new(0).unwrap();
+ /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
+ /// ctx.add_fd_with_events(&evt, WatchingEvents::empty().set_read(), 1)
+ /// .unwrap();
+ /// ```
+ pub fn add_fd_with_events(
+ &self,
+ fd: &dyn AsRawFd,
+ events: WatchingEvents,
+ token: T,
+ ) -> Result<()> {
+ let mut evt = epoll_event {
+ events: events.get_raw(),
+ u64: token.as_raw_token(),
+ };
+ // SAFETY: Safe because we give a valid epoll FD and FD to watch, as well as a
+ // valid epoll_event structure. Then we check the return value.
+ let ret = unsafe {
+ epoll_ctl(
+ self.epoll_ctx.as_raw_fd(),
+ EPOLL_CTL_ADD,
+ fd.as_raw_fd(),
+ &mut evt,
+ )
+ };
+ if ret < 0 {
+ return errno_result();
+ };
+ Ok(())
+ }
+
+ /// Changes the setting associated with the given `fd` in this context.
+ ///
+ /// If `fd` was previously added to this context, the watched events will be replaced with
+ /// `events` and the token associated with it will be replaced with the given `token`.
+ ///
+ /// # Arguments
+ ///
+ /// * `fd`: the target file descriptor to be performed.
+ /// * `events`: specifies the events to be watched.
+ /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::eventfd::EventFd;
+ /// use vmm_sys_util::poll::{EpollContext, WatchingEvents};
+ ///
+ /// let evt = EventFd::new(0).unwrap();
+ /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
+ /// ctx.add_fd_with_events(&evt, WatchingEvents::empty().set_read(), 1)
+ /// .unwrap();
+ /// ctx.modify(&evt, WatchingEvents::empty().set_write(), 2)
+ /// .unwrap();
+ /// ```
+ pub fn modify(&self, fd: &dyn AsRawFd, events: WatchingEvents, token: T) -> Result<()> {
+ let mut evt = epoll_event {
+ events: events.0,
+ u64: token.as_raw_token(),
+ };
+ // SAFETY: Safe because we give a valid epoll FD and FD to modify, as well as a valid
+ // epoll_event structure. Then we check the return value.
+ let ret = unsafe {
+ epoll_ctl(
+ self.epoll_ctx.as_raw_fd(),
+ EPOLL_CTL_MOD,
+ fd.as_raw_fd(),
+ &mut evt,
+ )
+ };
+ if ret < 0 {
+ return errno_result();
+ };
+ Ok(())
+ }
+
+ /// Deletes the given `fd` from this context.
+ ///
+ /// If an `fd`'s token shows up in the list of hangup events, it should be removed using this
+ /// method or by closing/dropping (if and only if the fd was never dup()'d/fork()'d) the `fd`.
+ /// Failure to do so will cause the `wait` method to always return immediately, causing ~100%
+ /// CPU load.
+ ///
+ /// # Arguments
+ ///
+ /// * `fd`: the target file descriptor to be removed.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::eventfd::EventFd;
+ /// use vmm_sys_util::poll::EpollContext;
+ ///
+ /// let evt = EventFd::new(0).unwrap();
+ /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
+ /// ctx.add(&evt, 1).unwrap();
+ /// ctx.delete(&evt).unwrap();
+ /// ```
+ pub fn delete(&self, fd: &dyn AsRawFd) -> Result<()> {
+ // SAFETY: Safe because we give a valid epoll FD and FD to stop watching. Then we check
+ // the return value.
+ let ret = unsafe {
+ epoll_ctl(
+ self.epoll_ctx.as_raw_fd(),
+ EPOLL_CTL_DEL,
+ fd.as_raw_fd(),
+ null_mut(),
+ )
+ };
+ if ret < 0 {
+ return errno_result();
+ };
+ Ok(())
+ }
+
+ /// Waits for any events to occur in FDs that were previously added to this context.
+ ///
+ /// The events are level-triggered, meaning that if any events are unhandled (i.e. not reading
+ /// for readable events and not closing for hungup events), subsequent calls to `wait` will
+ /// return immediately. The consequence of not handling an event perpetually while calling
+ /// `wait` is that the callers loop will degenerated to busy loop polling, pinning a CPU to
+ /// ~100% usage.
+ ///
+ /// # Arguments
+ ///
+ /// * `events`: the events to wait for.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// use vmm_sys_util::eventfd::EventFd;
+ /// use vmm_sys_util::poll::{EpollContext, EpollEvents};
+ ///
+ /// let evt = EventFd::new(0).unwrap();
+ /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
+ /// let events = EpollEvents::new();
+ ///
+ /// evt.write(1).unwrap();
+ /// ctx.add(&evt, 1).unwrap();
+ ///
+ /// for event in ctx.wait(&events).unwrap().iter_readable() {
+ /// assert_eq!(event.token(), 1);
+ /// }
+ /// ```
+ pub fn wait<'a>(&self, events: &'a EpollEvents) -> Result<PollEvents<'a, T>> {
+ self.wait_timeout(events, Duration::new(i64::MAX as u64, 0))
+ }
+
+ /// Like [`wait`](struct.EpollContext.html#method.wait) except will only block for a
+ /// maximum of the given `timeout`.
+ ///
+ /// This may return earlier than `timeout` with zero events if the duration indicated exceeds
+ /// system limits.
+ ///
+ /// # Arguments
+ ///
+ /// * `events`: the events to wait for.
+ /// * `timeout`: specifies the timeout that will block.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// # use std::time::Duration;
+ /// use vmm_sys_util::eventfd::EventFd;
+ /// use vmm_sys_util::poll::{EpollContext, EpollEvents};
+ ///
+ /// let evt = EventFd::new(0).unwrap();
+ /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
+ /// let events = EpollEvents::new();
+ ///
+ /// evt.write(1).unwrap();
+ /// ctx.add(&evt, 1).unwrap();
+ /// for event in ctx
+ /// .wait_timeout(&events, Duration::new(100, 0))
+ /// .unwrap()
+ /// .iter_readable()
+ /// {
+ /// assert_eq!(event.token(), 1);
+ /// }
+ /// ```
+ pub fn wait_timeout<'a>(
+ &self,
+ events: &'a EpollEvents,
+ timeout: Duration,
+ ) -> Result<PollEvents<'a, T>> {
+ let timeout_millis = if timeout.as_secs() as i64 == i64::max_value() {
+ // We make the convenient assumption that 2^63 seconds is an effectively unbounded time
+ // frame. This is meant to mesh with `wait` calling us with no timeout.
+ -1
+ } else {
+ // In cases where we the number of milliseconds would overflow an i32, we substitute the
+ // maximum timeout which is ~24.8 days.
+ let millis = timeout
+ .as_secs()
+ .checked_mul(1_000)
+ .and_then(|ms| ms.checked_add(u64::from(timeout.subsec_nanos()) / 1_000_000))
+ .unwrap_or(i32::max_value() as u64);
+ min(i32::max_value() as u64, millis) as i32
+ };
+ let ret = {
+ let mut epoll_events = events.0.borrow_mut();
+ let max_events = epoll_events.len() as c_int;
+ // SAFETY: Safe because we give an epoll context and a properly sized epoll_events
+ // array pointer, which we trust the kernel to fill in properly.
+ unsafe {
+ handle_eintr_errno!(epoll_wait(
+ self.epoll_ctx.as_raw_fd(),
+ &mut epoll_events[0],
+ max_events,
+ timeout_millis
+ ))
+ }
+ };
+ if ret < 0 {
+ return errno_result();
+ }
+ let epoll_events = events.0.borrow();
+ let events = PollEvents {
+ count: ret as usize,
+ events: epoll_events,
+ tokens: PhantomData,
+ };
+ Ok(events)
+ }
+}
+
+impl<T: PollToken> AsRawFd for EpollContext<T> {
+ fn as_raw_fd(&self) -> RawFd {
+ self.epoll_ctx.as_raw_fd()
+ }
+}
+
+impl<T: PollToken> IntoRawFd for EpollContext<T> {
+ fn into_raw_fd(self) -> RawFd {
+ self.epoll_ctx.into_raw_fd()
+ }
+}
+
+/// Used to poll multiple objects that have file descriptors.
+///
+/// # Example
+///
+/// ```
+/// # use vmm_sys_util::errno::Result;
+/// # use vmm_sys_util::eventfd::EventFd;
+/// # use vmm_sys_util::poll::{PollContext, PollEvents};
+/// let evt1 = EventFd::new(0).unwrap();
+/// let evt2 = EventFd::new(0).unwrap();
+/// evt2.write(1).unwrap();
+///
+/// let ctx: PollContext<u32> = PollContext::new().unwrap();
+/// ctx.add(&evt1, 1).unwrap();
+/// ctx.add(&evt2, 2).unwrap();
+///
+/// let pollevents: PollEvents<u32> = ctx.wait().unwrap();
+/// let tokens: Vec<u32> = pollevents.iter_readable().map(|e| e.token()).collect();
+/// assert_eq!(&tokens[..], &[2]);
+/// ```
+pub struct PollContext<T> {
+ epoll_ctx: EpollContext<T>,
+
+ // We use a RefCell here so that the `wait` method only requires an immutable self reference
+ // while returning the events (encapsulated by PollEvents). Without the RefCell, `wait` would
+ // hold a mutable reference that lives as long as its returned reference (i.e. the PollEvents),
+ // even though that reference is immutable. This is terribly inconvenient for the caller because
+ // the borrow checking would prevent them from using `delete` and `add` while the events are in
+ // scope.
+ events: EpollEvents,
+
+ // Hangup busy loop detection variables. See `check_for_hungup_busy_loop`.
+ check_for_hangup: bool,
+ hangups: Cell<usize>,
+ max_hangups: Cell<usize>,
+}
+
+impl<T: PollToken> PollContext<T> {
+ /// Creates a new `PollContext`.
+ pub fn new() -> Result<PollContext<T>> {
+ Ok(PollContext {
+ epoll_ctx: EpollContext::new()?,
+ events: EpollEvents::new(),
+ check_for_hangup: true,
+ hangups: Cell::new(0),
+ max_hangups: Cell::new(0),
+ })
+ }
+
+ /// Enable/disable of checking for unhandled hangup events.
+ pub fn set_check_for_hangup(&mut self, enable: bool) {
+ self.check_for_hangup = enable;
+ }
+
+ /// Adds the given `fd` to this context and associates the given `token` with the `fd`'s
+ /// readable events.
+ ///
+ /// A `fd` can only be added once and does not need to be kept open. If the `fd` is dropped and
+ /// there were no duplicated file descriptors (i.e. adding the same descriptor with a different
+ /// FD number) added to this context, events will not be reported by `wait` anymore.
+ ///
+ /// # Arguments
+ ///
+ /// * `fd`: the target file descriptor to be added.
+ /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
+ pub fn add(&self, fd: &dyn AsRawFd, token: T) -> Result<()> {
+ self.add_fd_with_events(fd, WatchingEvents::empty().set_read(), token)
+ }
+
+ /// Adds the given `fd` to this context, watching for the specified events and associates the
+ /// given 'token' with those events.
+ ///
+ /// A `fd` can only be added once and does not need to be kept open. If the `fd` is dropped and
+ /// there were no duplicated file descriptors (i.e. adding the same descriptor with a different
+ /// FD number) added to this context, events will not be reported by `wait` anymore.
+ ///
+ /// # Arguments
+ ///
+ /// * `fd`: the target file descriptor to be added.
+ /// * `events`: specifies the events to be watched.
+ /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
+ pub fn add_fd_with_events(
+ &self,
+ fd: &dyn AsRawFd,
+ events: WatchingEvents,
+ token: T,
+ ) -> Result<()> {
+ self.epoll_ctx.add_fd_with_events(fd, events, token)?;
+ self.hangups.set(0);
+ self.max_hangups.set(self.max_hangups.get() + 1);
+ Ok(())
+ }
+
+ /// Changes the setting associated with the given `fd` in this context.
+ ///
+ /// If `fd` was previously added to this context, the watched events will be replaced with
+ /// `events` and the token associated with it will be replaced with the given `token`.
+ ///
+ /// # Arguments
+ ///
+ /// * `fd`: the target file descriptor to be modified.
+ /// * `events`: specifies the events to be watched.
+ /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
+ pub fn modify(&self, fd: &dyn AsRawFd, events: WatchingEvents, token: T) -> Result<()> {
+ self.epoll_ctx.modify(fd, events, token)
+ }
+
+ /// Deletes the given `fd` from this context.
+ ///
+ /// If an `fd`'s token shows up in the list of hangup events, it should be removed using this
+ /// method or by closing/dropping (if and only if the fd was never dup()'d/fork()'d) the `fd`.
+ /// Failure to do so will cause the `wait` method to always return immediately, causing ~100%
+ /// CPU load.
+ ///
+ /// # Arguments
+ ///
+ /// * `fd`: the target file descriptor to be removed.
+ pub fn delete(&self, fd: &dyn AsRawFd) -> Result<()> {
+ self.epoll_ctx.delete(fd)?;
+ self.hangups.set(0);
+ self.max_hangups.set(self.max_hangups.get() - 1);
+ Ok(())
+ }
+
+ // This method determines if the the user of wait is misusing the `PollContext` by leaving FDs
+ // in this `PollContext` that have been shutdown or hungup on. Such an FD will cause `wait` to
+ // return instantly with a hungup event. If that FD is perpetually left in this context, a busy
+ // loop burning ~100% of one CPU will silently occur with no human visible malfunction.
+ //
+ // How do we know if the client of this context is ignoring hangups? A naive implementation
+ // would trigger if consecutive wait calls yield hangup events, but there are legitimate cases
+ // for this, such as two distinct sockets becoming hungup across two consecutive wait calls. A
+ // smarter implementation would only trigger if `delete` wasn't called between waits that
+ // yielded hangups. Sadly `delete` isn't the only way to remove an FD from this context. The
+ // other way is for the client to close the hungup FD, which automatically removes it from this
+ // context. Assuming that the client always uses close, this implementation would too eagerly
+ // trigger.
+ //
+ // The implementation used here keeps an upper bound of FDs in this context using a counter
+ // hooked into add/delete (which is imprecise because close can also remove FDs without us
+ // knowing). The number of consecutive (no add or delete in between) hangups yielded by wait
+ // calls is counted and compared to the upper bound. If the upper bound is exceeded by the
+ // consecutive hangups, the implementation triggers the check and logs.
+ //
+ // This implementation has false negatives because the upper bound can be completely too high,
+ // in the worst case caused by only using close instead of delete. However, this method has the
+ // advantage of always triggering eventually genuine busy loop cases, requires no dynamic
+ // allocations, is fast and constant time to compute, and has no false positives.
+ fn check_for_hungup_busy_loop(&self, new_hangups: usize) {
+ let old_hangups = self.hangups.get();
+ let max_hangups = self.max_hangups.get();
+ if old_hangups <= max_hangups && old_hangups + new_hangups > max_hangups {
+ let mut buf = [0u8; 512];
+ let (res, len) = {
+ let mut buf_cursor = Cursor::new(&mut buf[..]);
+ (
+ writeln!(
+ &mut buf_cursor,
+ "[{}:{}] busy poll wait loop with hungup FDs detected on thread {}\n",
+ file!(),
+ line!(),
+ thread::current().name().unwrap_or("")
+ ),
+ buf_cursor.position() as usize,
+ )
+ };
+
+ if res.is_ok() {
+ let _ = stderr().write_all(&buf[..len]);
+ }
+ // This panic is helpful for tests of this functionality.
+ #[cfg(test)]
+ panic!("hungup busy loop detected");
+ }
+ self.hangups.set(old_hangups + new_hangups);
+ }
+
+ /// Waits for any events to occur in FDs that were previously added to this context.
+ ///
+ /// The events are level-triggered, meaning that if any events are unhandled (i.e. not reading
+ /// for readable events and not closing for hungup events), subsequent calls to `wait` will
+ /// return immediately. The consequence of not handling an event perpetually while calling
+ /// `wait` is that the callers loop will degenerated to busy loop polling, pinning a CPU to
+ /// ~100% usage.
+ ///
+ /// # Panics
+ /// Panics if the returned `PollEvents` structure is not dropped before subsequent `wait` calls.
+ pub fn wait(&self) -> Result<PollEvents<'_, T>> {
+ self.wait_timeout(Duration::new(i64::MAX as u64, 0))
+ }
+
+ /// Like [`wait`](struct.EpollContext.html#method.wait) except will only block for a
+ /// maximum of the given `timeout`.
+ ///
+ /// This may return earlier than `timeout` with zero events if the duration indicated exceeds
+ /// system limits.
+ ///
+ /// # Arguments
+ ///
+ /// * `timeout`: specify the time that will block.
+ pub fn wait_timeout(&self, timeout: Duration) -> Result<PollEvents<'_, T>> {
+ let events = self.epoll_ctx.wait_timeout(&self.events, timeout)?;
+ let hangups = events.iter_hungup().count();
+ if self.check_for_hangup {
+ self.check_for_hungup_busy_loop(hangups);
+ }
+ Ok(events)
+ }
+}
+
+impl<T: PollToken> AsRawFd for PollContext<T> {
+ fn as_raw_fd(&self) -> RawFd {
+ self.epoll_ctx.as_raw_fd()
+ }
+}
+
+impl<T: PollToken> IntoRawFd for PollContext<T> {
+ fn into_raw_fd(self) -> RawFd {
+ self.epoll_ctx.into_raw_fd()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::eventfd::EventFd;
+ use std::os::unix::net::UnixStream;
+ use std::time::Instant;
+
+ #[test]
+ fn test_poll_context() {
+ let evt1 = EventFd::new(0).unwrap();
+ let evt2 = EventFd::new(0).unwrap();
+ evt1.write(1).unwrap();
+ evt2.write(1).unwrap();
+ let ctx: PollContext<u32> = PollContext::new().unwrap();
+ ctx.add(&evt1, 1).unwrap();
+ ctx.add(&evt2, 2).unwrap();
+
+ let mut evt_count = 0;
+ while evt_count < 2 {
+ for event in ctx.wait().unwrap().iter_readable() {
+ evt_count += 1;
+ match event.token() {
+ 1 => {
+ evt1.read().unwrap();
+ ctx.delete(&evt1).unwrap();
+ }
+ 2 => {
+ evt2.read().unwrap();
+ ctx.delete(&evt2).unwrap();
+ }
+ _ => panic!("unexpected token"),
+ };
+ }
+ }
+ assert_eq!(evt_count, 2);
+ }
+
+ #[test]
+ fn test_poll_context_overflow() {
+ const EVT_COUNT: usize = POLL_CONTEXT_MAX_EVENTS * 2 + 1;
+ let ctx: PollContext<usize> = PollContext::new().unwrap();
+ let mut evts = Vec::with_capacity(EVT_COUNT);
+ for i in 0..EVT_COUNT {
+ let evt = EventFd::new(0).unwrap();
+ evt.write(1).unwrap();
+ ctx.add(&evt, i).unwrap();
+ evts.push(evt);
+ }
+ let mut evt_count = 0;
+ while evt_count < EVT_COUNT {
+ for event in ctx.wait().unwrap().iter_readable() {
+ evts[event.token()].read().unwrap();
+ evt_count += 1;
+ }
+ }
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_poll_context_hungup() {
+ let (s1, s2) = UnixStream::pair().unwrap();
+ let ctx: PollContext<u32> = PollContext::new().unwrap();
+ ctx.add(&s1, 1).unwrap();
+
+ // Causes s1 to receive hangup events, which we purposefully ignore to trip the detection
+ // logic in `PollContext`.
+ drop(s2);
+
+ // Should easily panic within this many iterations.
+ for _ in 0..1000 {
+ ctx.wait().unwrap();
+ }
+ }
+
+ #[test]
+ fn test_poll_context_timeout() {
+ let mut ctx: PollContext<u32> = PollContext::new().unwrap();
+ let dur = Duration::from_millis(10);
+ let start_inst = Instant::now();
+
+ ctx.set_check_for_hangup(false);
+ ctx.wait_timeout(dur).unwrap();
+ assert!(start_inst.elapsed() >= dur);
+ }
+
+ #[test]
+ fn test_poll_event() {
+ let event = epoll_event {
+ events: (EPOLLIN | EPOLLERR | EPOLLOUT | EPOLLHUP) as u32,
+ u64: 0x10,
+ };
+ let ev = PollEvent::<u32> {
+ event: &event,
+ token: PhantomData,
+ };
+
+ assert_eq!(ev.token(), 0x10);
+ assert!(ev.readable());
+ assert!(ev.writable());
+ assert!(ev.hungup());
+ assert!(ev.has_error());
+ assert_eq!(
+ ev.raw_events(),
+ (EPOLLIN | EPOLLERR | EPOLLOUT | EPOLLHUP) as u32
+ );
+ }
+}
diff --git a/src/linux/seek_hole.rs b/src/linux/seek_hole.rs
new file mode 100644
index 0000000..1392993
--- /dev/null
+++ b/src/linux/seek_hole.rs
@@ -0,0 +1,228 @@
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+//
+// Copyright 2018 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Traits and implementations over [lseek64](https://linux.die.net/man/3/lseek64).
+
+use std::fs::File;
+use std::io::{Error, Result};
+use std::os::unix::io::AsRawFd;
+
+#[cfg(target_env = "musl")]
+use libc::{c_int, lseek64, ENXIO};
+
+#[cfg(target_env = "gnu")]
+use libc::{lseek64, ENXIO, SEEK_DATA, SEEK_HOLE};
+
+#[cfg(all(not(target_env = "musl"), target_os = "android"))]
+use libc::{lseek64, ENXIO, SEEK_DATA, SEEK_HOLE};
+
+/// A trait for seeking to the next hole or non-hole position in a file.
+pub trait SeekHole {
+ /// Seek to the first hole in a file.
+ ///
+ /// Seek at a position greater than or equal to `offset`. If no holes exist
+ /// after `offset`, the seek position will be set to the end of the file.
+ /// If `offset` is at or after the end of the file, the seek position is
+ /// unchanged, and None is returned.
+ ///
+ /// Returns the current seek position after the seek or an error.
+ fn seek_hole(&mut self, offset: u64) -> Result<Option<u64>>;
+
+ /// Seek to the first data in a file.
+ ///
+ /// Seek at a position greater than or equal to `offset`.
+ /// If no data exists after `offset`, the seek position is unchanged,
+ /// and None is returned.
+ ///
+ /// Returns the current offset after the seek or an error.
+ fn seek_data(&mut self, offset: u64) -> Result<Option<u64>>;
+}
+
+#[cfg(target_env = "musl")]
+const SEEK_DATA: c_int = 3;
+#[cfg(target_env = "musl")]
+const SEEK_HOLE: c_int = 4;
+
+// Safe wrapper for `libc::lseek64()`
+fn lseek(file: &mut File, offset: i64, whence: i32) -> Result<Option<u64>> {
+ // SAFETY: This is safe because we pass a known-good file descriptor.
+ let res = unsafe { lseek64(file.as_raw_fd(), offset, whence) };
+
+ if res < 0 {
+ // Convert ENXIO into None; pass any other error as-is.
+ let err = Error::last_os_error();
+ if let Some(errno) = Error::raw_os_error(&err) {
+ if errno == ENXIO {
+ return Ok(None);
+ }
+ }
+ Err(err)
+ } else {
+ Ok(Some(res as u64))
+ }
+}
+
+impl SeekHole for File {
+ fn seek_hole(&mut self, offset: u64) -> Result<Option<u64>> {
+ lseek(self, offset as i64, SEEK_HOLE)
+ }
+
+ fn seek_data(&mut self, offset: u64) -> Result<Option<u64>> {
+ lseek(self, offset as i64, SEEK_DATA)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::tempdir::TempDir;
+ use std::fs::File;
+ use std::io::{Seek, SeekFrom, Write};
+ use std::path::PathBuf;
+
+ fn seek_cur(file: &mut File) -> u64 {
+ file.seek(SeekFrom::Current(0)).unwrap()
+ }
+
+ #[test]
+ fn seek_data() {
+ let tempdir = TempDir::new_with_prefix("/tmp/seek_data_test").unwrap();
+ let mut path = PathBuf::from(tempdir.as_path());
+ path.push("test_file");
+ let mut file = File::create(&path).unwrap();
+
+ // Empty file
+ assert_eq!(file.seek_data(0).unwrap(), None);
+ assert_eq!(seek_cur(&mut file), 0);
+
+ // File with non-zero length consisting entirely of a hole
+ file.set_len(0x10000).unwrap();
+ assert_eq!(file.seek_data(0).unwrap(), None);
+ assert_eq!(seek_cur(&mut file), 0);
+
+ // seek_data at or after the end of the file should return None
+ assert_eq!(file.seek_data(0x10000).unwrap(), None);
+ assert_eq!(seek_cur(&mut file), 0);
+ assert_eq!(file.seek_data(0x10001).unwrap(), None);
+ assert_eq!(seek_cur(&mut file), 0);
+
+ // Write some data to [0x10000, 0x20000)
+ let b = [0x55u8; 0x10000];
+ file.seek(SeekFrom::Start(0x10000)).unwrap();
+ file.write_all(&b).unwrap();
+ assert_eq!(file.seek_data(0).unwrap(), Some(0x10000));
+ assert_eq!(seek_cur(&mut file), 0x10000);
+
+ // seek_data within data should return the same offset
+ assert_eq!(file.seek_data(0x10000).unwrap(), Some(0x10000));
+ assert_eq!(seek_cur(&mut file), 0x10000);
+ assert_eq!(file.seek_data(0x10001).unwrap(), Some(0x10001));
+ assert_eq!(seek_cur(&mut file), 0x10001);
+ assert_eq!(file.seek_data(0x1FFFF).unwrap(), Some(0x1FFFF));
+ assert_eq!(seek_cur(&mut file), 0x1FFFF);
+
+ // Extend the file to add another hole after the data
+ file.set_len(0x30000).unwrap();
+ assert_eq!(file.seek_data(0).unwrap(), Some(0x10000));
+ assert_eq!(seek_cur(&mut file), 0x10000);
+ assert_eq!(file.seek_data(0x1FFFF).unwrap(), Some(0x1FFFF));
+ assert_eq!(seek_cur(&mut file), 0x1FFFF);
+ assert_eq!(file.seek_data(0x20000).unwrap(), None);
+ assert_eq!(seek_cur(&mut file), 0x1FFFF);
+ }
+
+ #[test]
+ #[allow(clippy::cognitive_complexity)]
+ fn seek_hole() {
+ let tempdir = TempDir::new_with_prefix("/tmp/seek_hole_test").unwrap();
+ let mut path = PathBuf::from(tempdir.as_path());
+ path.push("test_file");
+ let mut file = File::create(&path).unwrap();
+
+ // Empty file
+ assert_eq!(file.seek_hole(0).unwrap(), None);
+ assert_eq!(seek_cur(&mut file), 0);
+
+ // File with non-zero length consisting entirely of a hole
+ file.set_len(0x10000).unwrap();
+ assert_eq!(file.seek_hole(0).unwrap(), Some(0));
+ assert_eq!(seek_cur(&mut file), 0);
+ assert_eq!(file.seek_hole(0xFFFF).unwrap(), Some(0xFFFF));
+ assert_eq!(seek_cur(&mut file), 0xFFFF);
+
+ // seek_hole at or after the end of the file should return None
+ file.seek(SeekFrom::Start(0)).unwrap();
+ assert_eq!(file.seek_hole(0x10000).unwrap(), None);
+ assert_eq!(seek_cur(&mut file), 0);
+ assert_eq!(file.seek_hole(0x10001).unwrap(), None);
+ assert_eq!(seek_cur(&mut file), 0);
+
+ // Write some data to [0x10000, 0x20000)
+ let b = [0x55u8; 0x10000];
+ file.seek(SeekFrom::Start(0x10000)).unwrap();
+ file.write_all(&b).unwrap();
+
+ // seek_hole within a hole should return the same offset
+ assert_eq!(file.seek_hole(0).unwrap(), Some(0));
+ assert_eq!(seek_cur(&mut file), 0);
+ assert_eq!(file.seek_hole(0xFFFF).unwrap(), Some(0xFFFF));
+ assert_eq!(seek_cur(&mut file), 0xFFFF);
+
+ // seek_hole within data should return the next hole (EOF)
+ file.seek(SeekFrom::Start(0)).unwrap();
+ assert_eq!(file.seek_hole(0x10000).unwrap(), Some(0x20000));
+ assert_eq!(seek_cur(&mut file), 0x20000);
+ file.seek(SeekFrom::Start(0)).unwrap();
+ assert_eq!(file.seek_hole(0x10001).unwrap(), Some(0x20000));
+ assert_eq!(seek_cur(&mut file), 0x20000);
+ file.seek(SeekFrom::Start(0)).unwrap();
+ assert_eq!(file.seek_hole(0x1FFFF).unwrap(), Some(0x20000));
+ assert_eq!(seek_cur(&mut file), 0x20000);
+
+ // seek_hole at EOF after data should return None
+ file.seek(SeekFrom::Start(0)).unwrap();
+ assert_eq!(file.seek_hole(0x20000).unwrap(), None);
+ assert_eq!(seek_cur(&mut file), 0);
+
+ // Extend the file to add another hole after the data
+ file.set_len(0x30000).unwrap();
+ assert_eq!(file.seek_hole(0).unwrap(), Some(0));
+ assert_eq!(seek_cur(&mut file), 0);
+ assert_eq!(file.seek_hole(0xFFFF).unwrap(), Some(0xFFFF));
+ assert_eq!(seek_cur(&mut file), 0xFFFF);
+ file.seek(SeekFrom::Start(0)).unwrap();
+ assert_eq!(file.seek_hole(0x10000).unwrap(), Some(0x20000));
+ assert_eq!(seek_cur(&mut file), 0x20000);
+ file.seek(SeekFrom::Start(0)).unwrap();
+ assert_eq!(file.seek_hole(0x1FFFF).unwrap(), Some(0x20000));
+ assert_eq!(seek_cur(&mut file), 0x20000);
+ file.seek(SeekFrom::Start(0)).unwrap();
+ assert_eq!(file.seek_hole(0x20000).unwrap(), Some(0x20000));
+ assert_eq!(seek_cur(&mut file), 0x20000);
+ file.seek(SeekFrom::Start(0)).unwrap();
+ assert_eq!(file.seek_hole(0x20001).unwrap(), Some(0x20001));
+ assert_eq!(seek_cur(&mut file), 0x20001);
+
+ // seek_hole at EOF after a hole should return None
+ file.seek(SeekFrom::Start(0)).unwrap();
+ assert_eq!(file.seek_hole(0x30000).unwrap(), None);
+ assert_eq!(seek_cur(&mut file), 0);
+
+ // Write some data to [0x20000, 0x30000)
+ file.seek(SeekFrom::Start(0x20000)).unwrap();
+ file.write_all(&b).unwrap();
+
+ // seek_hole within [0x20000, 0x30000) should now find the hole at EOF
+ assert_eq!(file.seek_hole(0x20000).unwrap(), Some(0x30000));
+ assert_eq!(seek_cur(&mut file), 0x30000);
+ file.seek(SeekFrom::Start(0)).unwrap();
+ assert_eq!(file.seek_hole(0x20001).unwrap(), Some(0x30000));
+ assert_eq!(seek_cur(&mut file), 0x30000);
+ file.seek(SeekFrom::Start(0)).unwrap();
+ assert_eq!(file.seek_hole(0x30000).unwrap(), None);
+ assert_eq!(seek_cur(&mut file), 0);
+ }
+}
diff --git a/src/linux/signal.rs b/src/linux/signal.rs
new file mode 100644
index 0000000..45d9009
--- /dev/null
+++ b/src/linux/signal.rs
@@ -0,0 +1,583 @@
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+//
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Enums, traits and functions for working with
+//! [`signal`](http://man7.org/linux/man-pages/man7/signal.7.html).
+
+use libc::{
+ c_int, c_void, pthread_kill, pthread_sigmask, pthread_t, sigaction, sigaddset, sigemptyset,
+ sigfillset, siginfo_t, sigismember, sigpending, sigset_t, sigtimedwait, timespec, EAGAIN,
+ EINTR, EINVAL, SIG_BLOCK, SIG_UNBLOCK,
+};
+
+use crate::errno;
+use std::fmt::{self, Display};
+use std::io;
+use std::mem;
+use std::os::unix::thread::JoinHandleExt;
+use std::ptr::{null, null_mut};
+use std::result;
+use std::thread::JoinHandle;
+
+/// The error cases enumeration for signal handling.
+#[derive(Debug, PartialEq, Eq)]
+pub enum Error {
+ /// Couldn't create a sigset.
+ CreateSigset(errno::Error),
+ /// The wrapped signal has already been blocked.
+ SignalAlreadyBlocked(c_int),
+ /// Failed to check if the requested signal is in the blocked set already.
+ CompareBlockedSignals(errno::Error),
+ /// The signal could not be blocked.
+ BlockSignal(errno::Error),
+ /// The signal mask could not be retrieved.
+ RetrieveSignalMask(c_int),
+ /// The signal could not be unblocked.
+ UnblockSignal(errno::Error),
+ /// Failed to wait for given signal.
+ ClearWaitPending(errno::Error),
+ /// Failed to get pending signals.
+ ClearGetPending(errno::Error),
+ /// Failed to check if given signal is in the set of pending signals.
+ ClearCheckPending(errno::Error),
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ use self::Error::*;
+
+ match self {
+ CreateSigset(e) => write!(f, "couldn't create a sigset: {}", e),
+ SignalAlreadyBlocked(num) => write!(f, "signal {} already blocked", num),
+ CompareBlockedSignals(e) => write!(
+ f,
+ "failed to check whether requested signal is in the blocked set: {}",
+ e,
+ ),
+ BlockSignal(e) => write!(f, "signal could not be blocked: {}", e),
+ RetrieveSignalMask(errno) => write!(
+ f,
+ "failed to retrieve signal mask: {}",
+ io::Error::from_raw_os_error(*errno),
+ ),
+ UnblockSignal(e) => write!(f, "signal could not be unblocked: {}", e),
+ ClearWaitPending(e) => write!(f, "failed to wait for given signal: {}", e),
+ ClearGetPending(e) => write!(f, "failed to get pending signals: {}", e),
+ ClearCheckPending(e) => write!(
+ f,
+ "failed to check whether given signal is in the pending set: {}",
+ e,
+ ),
+ }
+ }
+}
+
+/// A simplified [Result](https://doc.rust-lang.org/std/result/enum.Result.html) type
+/// for operations that can return [`Error`](Enum.error.html).
+pub type SignalResult<T> = result::Result<T, Error>;
+
+/// Public alias for a signal handler.
+/// [`sigaction`](http://man7.org/linux/man-pages/man2/sigaction.2.html).
+pub type SignalHandler =
+ extern "C" fn(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) -> ();
+
+extern "C" {
+ fn __libc_current_sigrtmin() -> c_int;
+ fn __libc_current_sigrtmax() -> c_int;
+}
+
+/// Return the minimum (inclusive) real-time signal number.
+#[allow(non_snake_case)]
+pub fn SIGRTMIN() -> c_int {
+ // SAFETY: We trust this libc function.
+ unsafe { __libc_current_sigrtmin() }
+}
+
+/// Return the maximum (inclusive) real-time signal number.
+#[allow(non_snake_case)]
+pub fn SIGRTMAX() -> c_int {
+ // SAFETY: We trust this libc function.
+ unsafe { __libc_current_sigrtmax() }
+}
+
+/// Verify that a signal number is valid.
+///
+/// Supported signals range from `SIGHUP` to `SIGSYS` and from `SIGRTMIN` to `SIGRTMAX`.
+/// We recommend using realtime signals `[SIGRTMIN(), SIGRTMAX()]` for VCPU threads.
+///
+/// # Arguments
+///
+/// * `num`: the signal number to be verified.
+///
+/// # Examples
+///
+/// ```
+/// extern crate vmm_sys_util;
+/// use vmm_sys_util::signal::validate_signal_num;
+///
+/// let num = validate_signal_num(1).unwrap();
+/// ```
+pub fn validate_signal_num(num: c_int) -> errno::Result<()> {
+ if (libc::SIGHUP..=libc::SIGSYS).contains(&num) || (SIGRTMIN() <= num && num <= SIGRTMAX()) {
+ Ok(())
+ } else {
+ Err(errno::Error::new(EINVAL))
+ }
+}
+
+/// Register the signal handler of `signum`.
+///
+/// # Safety
+///
+/// This is considered unsafe because the given handler will be called
+/// asynchronously, interrupting whatever the thread was doing and therefore
+/// must only do async-signal-safe operations.
+///
+/// # Arguments
+///
+/// * `num`: the signal number to be registered.
+/// * `handler`: the signal handler function to register.
+///
+/// # Examples
+///
+/// ```
+/// # extern crate libc;
+/// extern crate vmm_sys_util;
+/// # use libc::{c_int, c_void, siginfo_t, SA_SIGINFO};
+/// use vmm_sys_util::signal::{register_signal_handler, SignalHandler};
+///
+/// extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) {}
+/// register_signal_handler(0, handle_signal);
+/// ```
+
+pub fn register_signal_handler(num: c_int, handler: SignalHandler) -> errno::Result<()> {
+ validate_signal_num(num)?;
+
+ // signum specifies the signal and can be any valid signal except
+ // SIGKILL and SIGSTOP.
+ // [`sigaction`](http://man7.org/linux/man-pages/man2/sigaction.2.html).
+ if libc::SIGKILL == num || libc::SIGSTOP == num {
+ return Err(errno::Error::new(EINVAL));
+ }
+
+ // SAFETY: Safe, because this is a POD struct.
+ let mut act: sigaction = unsafe { mem::zeroed() };
+ act.sa_sigaction = handler as *const () as usize;
+ act.sa_flags = libc::SA_SIGINFO;
+
+ // Block all signals while the `handler` is running.
+ // Blocking other signals is needed to make sure the execution of
+ // the handler continues uninterrupted if another signal comes.
+ // SAFETY: The parameters are valid and we trust the sifillset function.
+ if unsafe { sigfillset(&mut act.sa_mask as *mut sigset_t) } < 0 {
+ return errno::errno_result();
+ }
+
+ // SAFETY: Safe because the parameters are valid and we check the return value.
+ match unsafe { sigaction(num, &act, null_mut()) } {
+ 0 => Ok(()),
+ _ => errno::errno_result(),
+ }
+}
+
+/// Create a `sigset` with given signals.
+///
+/// An array of signal numbers are added into the signal set by
+/// [`sigaddset`](http://man7.org/linux/man-pages/man3/sigaddset.3p.html).
+/// This is a helper function used when we want to manipulate signals.
+///
+/// # Arguments
+///
+/// * `signals`: signal numbers to be added to the new `sigset`.
+///
+/// # Examples
+///
+/// ```
+/// # extern crate libc;
+/// extern crate vmm_sys_util;
+/// # use libc::sigismember;
+/// use vmm_sys_util::signal::create_sigset;
+///
+/// let sigset = create_sigset(&[1]).unwrap();
+///
+/// unsafe {
+/// assert_eq!(sigismember(&sigset, 1), 1);
+/// }
+/// ```
+pub fn create_sigset(signals: &[c_int]) -> errno::Result<sigset_t> {
+ // SAFETY: sigset will actually be initialized by sigemptyset below.
+ let mut sigset: sigset_t = unsafe { mem::zeroed() };
+
+ // SAFETY: return value is checked.
+ let ret = unsafe { sigemptyset(&mut sigset) };
+ if ret < 0 {
+ return errno::errno_result();
+ }
+
+ for signal in signals {
+ // SAFETY: return value is checked.
+ let ret = unsafe { sigaddset(&mut sigset, *signal) };
+ if ret < 0 {
+ return errno::errno_result();
+ }
+ }
+
+ Ok(sigset)
+}
+
+/// Retrieve the signal mask that is blocked of the current thread.
+///
+/// Use [`pthread_sigmask`](http://man7.org/linux/man-pages/man3/pthread_sigmask.3.html)
+/// to fetch the signal mask which is blocked for the caller, return the signal mask as
+/// a vector of c_int.
+///
+/// # Examples
+///
+/// ```
+/// extern crate vmm_sys_util;
+/// use vmm_sys_util::signal::{block_signal, get_blocked_signals};
+///
+/// block_signal(1).unwrap();
+/// assert!(get_blocked_signals().unwrap().contains(&(1)));
+/// ```
+pub fn get_blocked_signals() -> SignalResult<Vec<c_int>> {
+ let mut mask = Vec::new();
+
+ // SAFETY: return values are checked.
+ unsafe {
+ let mut old_sigset: sigset_t = mem::zeroed();
+ let ret = pthread_sigmask(SIG_BLOCK, null(), &mut old_sigset as *mut sigset_t);
+ if ret < 0 {
+ return Err(Error::RetrieveSignalMask(ret));
+ }
+
+ for num in 0..=SIGRTMAX() {
+ if sigismember(&old_sigset, num) > 0 {
+ mask.push(num);
+ }
+ }
+ }
+
+ Ok(mask)
+}
+
+/// Mask a given signal.
+///
+/// Set the given signal `num` as blocked.
+/// If signal is already blocked, the call will fail with
+/// [`SignalAlreadyBlocked`](enum.Error.html#variant.SignalAlreadyBlocked).
+///
+/// # Arguments
+///
+/// * `num`: the signal to be masked.
+///
+/// # Examples
+///
+/// ```
+/// extern crate vmm_sys_util;
+/// use vmm_sys_util::signal::block_signal;
+///
+/// block_signal(1).unwrap();
+/// ```
+// Allowing comparison chain because rewriting it with match makes the code less readable.
+// Also, the risk of having non-exhaustive checks is low.
+#[allow(clippy::comparison_chain)]
+pub fn block_signal(num: c_int) -> SignalResult<()> {
+ let sigset = create_sigset(&[num]).map_err(Error::CreateSigset)?;
+
+ // SAFETY: return values are checked.
+ unsafe {
+ let mut old_sigset: sigset_t = mem::zeroed();
+ let ret = pthread_sigmask(SIG_BLOCK, &sigset, &mut old_sigset as *mut sigset_t);
+ if ret < 0 {
+ return Err(Error::BlockSignal(errno::Error::last()));
+ }
+ // Check if the given signal is already blocked.
+ let ret = sigismember(&old_sigset, num);
+ if ret < 0 {
+ return Err(Error::CompareBlockedSignals(errno::Error::last()));
+ } else if ret > 0 {
+ return Err(Error::SignalAlreadyBlocked(num));
+ }
+ }
+ Ok(())
+}
+
+/// Unmask a given signal.
+///
+/// # Arguments
+///
+/// * `num`: the signal to be unmasked.
+///
+/// # Examples
+///
+/// ```
+/// extern crate vmm_sys_util;
+/// use vmm_sys_util::signal::{block_signal, get_blocked_signals, unblock_signal};
+///
+/// block_signal(1).unwrap();
+/// assert!(get_blocked_signals().unwrap().contains(&(1)));
+/// unblock_signal(1).unwrap();
+/// ```
+pub fn unblock_signal(num: c_int) -> SignalResult<()> {
+ let sigset = create_sigset(&[num]).map_err(Error::CreateSigset)?;
+
+ // SAFETY: return value is checked.
+ let ret = unsafe { pthread_sigmask(SIG_UNBLOCK, &sigset, null_mut()) };
+ if ret < 0 {
+ return Err(Error::UnblockSignal(errno::Error::last()));
+ }
+ Ok(())
+}
+
+/// Clear a pending signal.
+///
+/// # Arguments
+///
+/// * `num`: the signal to be cleared.
+///
+/// # Examples
+///
+/// ```
+/// # extern crate libc;
+/// extern crate vmm_sys_util;
+/// # use libc::{pthread_kill, sigismember, sigpending, sigset_t};
+/// # use std::mem;
+/// # use std::thread;
+/// # use std::time::Duration;
+/// use vmm_sys_util::signal::{block_signal, clear_signal, Killable};
+///
+/// block_signal(1).unwrap();
+/// let killable = thread::spawn(move || {
+/// thread::sleep(Duration::from_millis(100));
+/// unsafe {
+/// let mut chkset: sigset_t = mem::zeroed();
+/// sigpending(&mut chkset);
+/// assert_eq!(sigismember(&chkset, 1), 1);
+/// }
+/// });
+/// unsafe {
+/// pthread_kill(killable.pthread_handle(), 1);
+/// }
+/// clear_signal(1).unwrap();
+/// ```
+pub fn clear_signal(num: c_int) -> SignalResult<()> {
+ let sigset = create_sigset(&[num]).map_err(Error::CreateSigset)?;
+
+ while {
+ // SAFETY: This is safe as we are rigorously checking return values
+ // of libc calls.
+ unsafe {
+ let mut siginfo: siginfo_t = mem::zeroed();
+ let ts = timespec {
+ tv_sec: 0,
+ tv_nsec: 0,
+ };
+ // Attempt to consume one instance of pending signal. If signal
+ // is not pending, the call will fail with EAGAIN or EINTR.
+ let ret = sigtimedwait(&sigset, &mut siginfo, &ts);
+ if ret < 0 {
+ let e = errno::Error::last();
+ match e.errno() {
+ EAGAIN | EINTR => {}
+ _ => {
+ return Err(Error::ClearWaitPending(errno::Error::last()));
+ }
+ }
+ }
+
+ // This sigset will be actually filled with `sigpending` call.
+ let mut chkset: sigset_t = mem::zeroed();
+ // See if more instances of the signal are pending.
+ let ret = sigpending(&mut chkset);
+ if ret < 0 {
+ return Err(Error::ClearGetPending(errno::Error::last()));
+ }
+
+ let ret = sigismember(&chkset, num);
+ if ret < 0 {
+ return Err(Error::ClearCheckPending(errno::Error::last()));
+ }
+
+ // This is do-while loop condition.
+ ret != 0
+ }
+ } {}
+
+ Ok(())
+}
+
+/// Trait for threads that can be signalled via `pthread_kill`.
+///
+/// Note that this is only useful for signals between `SIGRTMIN()` and
+/// `SIGRTMAX()` because these are guaranteed to not be used by the C
+/// runtime.
+///
+/// # Safety
+///
+/// This is marked unsafe because the implementation of this trait must
+/// guarantee that the returned `pthread_t` is valid and has a lifetime at
+/// least that of the trait object.
+pub unsafe trait Killable {
+ /// Cast this killable thread as `pthread_t`.
+ fn pthread_handle(&self) -> pthread_t;
+
+ /// Send a signal to this killable thread.
+ ///
+ /// # Arguments
+ ///
+ /// * `num`: specify the signal
+ fn kill(&self, num: c_int) -> errno::Result<()> {
+ validate_signal_num(num)?;
+
+ // SAFETY: Safe because we ensure we are using a valid pthread handle,
+ // a valid signal number, and check the return result.
+ let ret = unsafe { pthread_kill(self.pthread_handle(), num) };
+ if ret < 0 {
+ return errno::errno_result();
+ }
+ Ok(())
+ }
+}
+
+// SAFETY: Safe because we fulfill our contract of returning a genuine pthread handle.
+unsafe impl<T> Killable for JoinHandle<T> {
+ fn pthread_handle(&self) -> pthread_t {
+ // JoinHandleExt::as_pthread_t gives c_ulong, convert it to the
+ // type that the libc crate expects
+ assert_eq!(mem::size_of::<pthread_t>(), mem::size_of::<usize>());
+ self.as_pthread_t() as usize as pthread_t
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ #![allow(clippy::undocumented_unsafe_blocks)]
+ use super::*;
+ use std::thread;
+ use std::time::Duration;
+
+ // Reserve for each vcpu signal.
+ static mut SIGNAL_HANDLER_CALLED: bool = false;
+
+ extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) {
+ unsafe {
+ // In the tests, there only uses vcpu signal.
+ SIGNAL_HANDLER_CALLED = true;
+ }
+ }
+
+ fn is_pending(signal: c_int) -> bool {
+ unsafe {
+ let mut chkset: sigset_t = mem::zeroed();
+ sigpending(&mut chkset);
+ sigismember(&chkset, signal) == 1
+ }
+ }
+
+ #[test]
+ fn test_register_signal_handler() {
+ // testing bad value
+ assert!(register_signal_handler(libc::SIGKILL, handle_signal).is_err());
+ assert!(register_signal_handler(libc::SIGSTOP, handle_signal).is_err());
+ assert!(register_signal_handler(SIGRTMAX() + 1, handle_signal).is_err());
+ format!("{:?}", register_signal_handler(SIGRTMAX(), handle_signal));
+ assert!(register_signal_handler(SIGRTMIN(), handle_signal).is_ok());
+ assert!(register_signal_handler(libc::SIGSYS, handle_signal).is_ok());
+ }
+
+ #[test]
+ #[allow(clippy::empty_loop)]
+ fn test_killing_thread() {
+ let killable = thread::spawn(|| thread::current().id());
+ let killable_id = killable.join().unwrap();
+ assert_ne!(killable_id, thread::current().id());
+
+ // We install a signal handler for the specified signal; otherwise the whole process will
+ // be brought down when the signal is received, as part of the default behaviour. Signal
+ // handlers are global, so we install this before starting the thread.
+ register_signal_handler(SIGRTMIN(), handle_signal)
+ .expect("failed to register vcpu signal handler");
+
+ let killable = thread::spawn(|| loop {});
+
+ let res = killable.kill(SIGRTMAX() + 1);
+ assert!(res.is_err());
+ format!("{:?}", res);
+
+ unsafe {
+ assert!(!SIGNAL_HANDLER_CALLED);
+ }
+
+ assert!(killable.kill(SIGRTMIN()).is_ok());
+
+ // We're waiting to detect that the signal handler has been called.
+ const MAX_WAIT_ITERS: u32 = 20;
+ let mut iter_count = 0;
+ loop {
+ thread::sleep(Duration::from_millis(100));
+
+ if unsafe { SIGNAL_HANDLER_CALLED } {
+ break;
+ }
+
+ iter_count += 1;
+ // timeout if we wait too long
+ assert!(iter_count <= MAX_WAIT_ITERS);
+ }
+
+ // Our signal handler doesn't do anything which influences the killable thread, so the
+ // previous signal is effectively ignored. If we were to join killable here, we would block
+ // forever as the loop keeps running. Since we don't join, the thread will become detached
+ // as the handle is dropped, and will be killed when the process/main thread exits.
+ }
+
+ #[test]
+ fn test_block_unblock_signal() {
+ let signal = SIGRTMIN();
+
+ // Check if it is blocked.
+ unsafe {
+ let mut sigset: sigset_t = mem::zeroed();
+ pthread_sigmask(SIG_BLOCK, null(), &mut sigset as *mut sigset_t);
+ assert_eq!(sigismember(&sigset, signal), 0);
+ }
+
+ block_signal(signal).unwrap();
+ assert!(get_blocked_signals().unwrap().contains(&(signal)));
+
+ unblock_signal(signal).unwrap();
+ assert!(!get_blocked_signals().unwrap().contains(&(signal)));
+ }
+
+ #[test]
+ fn test_clear_pending() {
+ let signal = SIGRTMIN() + 1;
+
+ block_signal(signal).unwrap();
+
+ // Block the signal, which means it won't be delivered until it is
+ // unblocked. Pending between the time when the signal which is set as blocked
+ // is generated and when is delivered.
+ let killable = thread::spawn(move || {
+ loop {
+ // Wait for the signal being killed.
+ thread::sleep(Duration::from_millis(100));
+ if is_pending(signal) {
+ clear_signal(signal).unwrap();
+ assert!(!is_pending(signal));
+ break;
+ }
+ }
+ });
+
+ // Send a signal to the thread.
+ assert!(killable.kill(SIGRTMIN() + 1).is_ok());
+ killable.join().unwrap();
+ }
+}
diff --git a/src/linux/sock_ctrl_msg.rs b/src/linux/sock_ctrl_msg.rs
new file mode 100644
index 0000000..0c19a11
--- /dev/null
+++ b/src/linux/sock_ctrl_msg.rs
@@ -0,0 +1,663 @@
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE-BSD-3-Clause file.
+// SPDX-License-Identifier: BSD-3-Clause
+
+/* Copied from the crosvm Project, commit 186eb8b */
+
+//! Wrapper for sending and receiving messages with file descriptors on sockets that accept
+//! control messages (e.g. Unix domain sockets).
+
+use std::fs::File;
+use std::mem::size_of;
+use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
+use std::os::unix::net::{UnixDatagram, UnixStream};
+use std::ptr::{copy_nonoverlapping, null_mut, write_unaligned};
+
+use crate::errno::{Error, Result};
+use libc::{
+ c_long, c_void, cmsghdr, iovec, msghdr, recvmsg, sendmsg, MSG_NOSIGNAL, SCM_RIGHTS, SOL_SOCKET,
+};
+use std::os::raw::c_int;
+
+// Each of the following macros performs the same function as their C counterparts. They are each
+// macros because they are used to size statically allocated arrays.
+
+macro_rules! CMSG_ALIGN {
+ ($len:expr) => {
+ (($len) as usize + size_of::<c_long>() - 1) & !(size_of::<c_long>() - 1)
+ };
+}
+
+macro_rules! CMSG_SPACE {
+ ($len:expr) => {
+ size_of::<cmsghdr>() + CMSG_ALIGN!($len)
+ };
+}
+
+// This function (macro in the C version) is not used in any compile time constant slots, so is just
+// an ordinary function. The returned pointer is hard coded to be RawFd because that's all that this
+// module supports.
+#[allow(non_snake_case)]
+#[inline(always)]
+fn CMSG_DATA(cmsg_buffer: *mut cmsghdr) -> *mut RawFd {
+ // Essentially returns a pointer to just past the header.
+ cmsg_buffer.wrapping_offset(1) as *mut RawFd
+}
+
+#[cfg(not(target_env = "musl"))]
+macro_rules! CMSG_LEN {
+ ($len:expr) => {
+ size_of::<cmsghdr>() + ($len)
+ };
+}
+
+#[cfg(target_env = "musl")]
+macro_rules! CMSG_LEN {
+ ($len:expr) => {{
+ let sz = size_of::<cmsghdr>() + ($len);
+ assert!(sz <= (std::u32::MAX as usize));
+ sz as u32
+ }};
+}
+
+#[cfg(not(target_env = "musl"))]
+fn new_msghdr(iovecs: &mut [iovec]) -> msghdr {
+ msghdr {
+ msg_name: null_mut(),
+ msg_namelen: 0,
+ msg_iov: iovecs.as_mut_ptr(),
+ msg_iovlen: iovecs.len(),
+ msg_control: null_mut(),
+ msg_controllen: 0,
+ msg_flags: 0,
+ }
+}
+
+#[cfg(target_env = "musl")]
+fn new_msghdr(iovecs: &mut [iovec]) -> msghdr {
+ assert!(iovecs.len() <= (std::i32::MAX as usize));
+ let mut msg: msghdr = unsafe { std::mem::zeroed() };
+ msg.msg_name = null_mut();
+ msg.msg_iov = iovecs.as_mut_ptr();
+ msg.msg_iovlen = iovecs.len() as i32;
+ msg.msg_control = null_mut();
+ msg
+}
+
+#[cfg(not(target_env = "musl"))]
+fn set_msg_controllen(msg: &mut msghdr, cmsg_capacity: usize) {
+ msg.msg_controllen = cmsg_capacity;
+}
+
+#[cfg(target_env = "musl")]
+fn set_msg_controllen(msg: &mut msghdr, cmsg_capacity: usize) {
+ assert!(cmsg_capacity <= (std::u32::MAX as usize));
+ msg.msg_controllen = cmsg_capacity as u32;
+}
+
+// This function is like CMSG_NEXT, but safer because it reads only from references, although it
+// does some pointer arithmetic on cmsg_ptr.
+#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+fn get_next_cmsg(msghdr: &msghdr, cmsg: &cmsghdr, cmsg_ptr: *mut cmsghdr) -> *mut cmsghdr {
+ let next_cmsg = (cmsg_ptr as *mut u8).wrapping_add(CMSG_ALIGN!(cmsg.cmsg_len)) as *mut cmsghdr;
+ if next_cmsg
+ .wrapping_offset(1)
+ .wrapping_sub(msghdr.msg_control as usize) as usize
+ > msghdr.msg_controllen as usize
+ {
+ null_mut()
+ } else {
+ next_cmsg
+ }
+}
+
+const CMSG_BUFFER_INLINE_CAPACITY: usize = CMSG_SPACE!(size_of::<RawFd>() * 32);
+
+enum CmsgBuffer {
+ Inline([u64; (CMSG_BUFFER_INLINE_CAPACITY + 7) / 8]),
+ Heap(Box<[cmsghdr]>),
+}
+
+impl CmsgBuffer {
+ fn with_capacity(capacity: usize) -> CmsgBuffer {
+ let cap_in_cmsghdr_units =
+ (capacity.checked_add(size_of::<cmsghdr>()).unwrap() - 1) / size_of::<cmsghdr>();
+ if capacity <= CMSG_BUFFER_INLINE_CAPACITY {
+ CmsgBuffer::Inline([0u64; (CMSG_BUFFER_INLINE_CAPACITY + 7) / 8])
+ } else {
+ CmsgBuffer::Heap(
+ vec![
+ cmsghdr {
+ cmsg_len: 0,
+ cmsg_level: 0,
+ cmsg_type: 0,
+ #[cfg(all(target_env = "musl", target_pointer_width = "64"))]
+ __pad1: 0,
+ };
+ cap_in_cmsghdr_units
+ ]
+ .into_boxed_slice(),
+ )
+ }
+ }
+
+ fn as_mut_ptr(&mut self) -> *mut cmsghdr {
+ match self {
+ CmsgBuffer::Inline(a) => a.as_mut_ptr() as *mut cmsghdr,
+ CmsgBuffer::Heap(a) => a.as_mut_ptr(),
+ }
+ }
+}
+
+fn raw_sendmsg<D: IntoIovec>(fd: RawFd, out_data: &[D], out_fds: &[RawFd]) -> Result<usize> {
+ let cmsg_capacity = CMSG_SPACE!(size_of::<RawFd>() * out_fds.len());
+ let mut cmsg_buffer = CmsgBuffer::with_capacity(cmsg_capacity);
+
+ let mut iovecs = Vec::with_capacity(out_data.len());
+ for data in out_data {
+ iovecs.push(iovec {
+ iov_base: data.as_ptr() as *mut c_void,
+ iov_len: data.size(),
+ });
+ }
+
+ let mut msg = new_msghdr(&mut iovecs);
+
+ if !out_fds.is_empty() {
+ let cmsg = cmsghdr {
+ cmsg_len: CMSG_LEN!(size_of::<RawFd>() * out_fds.len()),
+ cmsg_level: SOL_SOCKET,
+ cmsg_type: SCM_RIGHTS,
+ #[cfg(all(target_env = "musl", target_pointer_width = "64"))]
+ __pad1: 0,
+ };
+ // SAFETY: Check comments below for each call.
+ unsafe {
+ // Safe because cmsg_buffer was allocated to be large enough to contain cmsghdr.
+ write_unaligned(cmsg_buffer.as_mut_ptr() as *mut cmsghdr, cmsg);
+ // Safe because the cmsg_buffer was allocated to be large enough to hold out_fds.len()
+ // file descriptors.
+ copy_nonoverlapping(
+ out_fds.as_ptr(),
+ CMSG_DATA(cmsg_buffer.as_mut_ptr()),
+ out_fds.len(),
+ );
+ }
+
+ msg.msg_control = cmsg_buffer.as_mut_ptr() as *mut c_void;
+ set_msg_controllen(&mut msg, cmsg_capacity);
+ }
+
+ // SAFETY: Safe because the msghdr was properly constructed from valid (or null) pointers of
+ // the indicated length and we check the return value.
+ let write_count = unsafe { sendmsg(fd, &msg, MSG_NOSIGNAL) };
+
+ if write_count == -1 {
+ Err(Error::last())
+ } else {
+ Ok(write_count as usize)
+ }
+}
+
+unsafe fn raw_recvmsg(
+ fd: RawFd,
+ iovecs: &mut [iovec],
+ in_fds: &mut [RawFd],
+) -> Result<(usize, usize)> {
+ let cmsg_capacity = CMSG_SPACE!(size_of::<RawFd>() * in_fds.len());
+ let mut cmsg_buffer = CmsgBuffer::with_capacity(cmsg_capacity);
+ let mut msg = new_msghdr(iovecs);
+
+ if !in_fds.is_empty() {
+ // MSG control len is size_of(cmsghdr) + size_of(RawFd) * in_fds.len().
+ msg.msg_control = cmsg_buffer.as_mut_ptr() as *mut c_void;
+ set_msg_controllen(&mut msg, cmsg_capacity);
+ }
+
+ // Safe because the msghdr was properly constructed from valid (or null) pointers of the
+ // indicated length and we check the return value.
+ // TODO: Should we handle MSG_TRUNC in a specific way?
+ let total_read = recvmsg(fd, &mut msg, 0);
+ if total_read == -1 {
+ return Err(Error::last());
+ }
+
+ if total_read == 0 && (msg.msg_controllen as usize) < size_of::<cmsghdr>() {
+ return Ok((0, 0));
+ }
+
+ // Reference to a memory area with a CmsgBuffer, which contains a `cmsghdr` struct followed
+ // by a sequence of `in_fds.len()` count RawFds.
+ let mut cmsg_ptr = msg.msg_control as *mut cmsghdr;
+ let mut copied_fds_count = 0;
+ // If the control data was truncated, then this might be a sign of incorrect communication
+ // protocol. If MSG_CTRUNC was set we must close the fds from the control data.
+ let mut teardown_control_data = msg.msg_flags & libc::MSG_CTRUNC != 0;
+
+ while !cmsg_ptr.is_null() {
+ // Safe because we checked that cmsg_ptr was non-null, and the loop is constructed such
+ // that it only happens when there is at least sizeof(cmsghdr) space after the pointer to
+ // read.
+ let cmsg = (cmsg_ptr as *mut cmsghdr).read_unaligned();
+ if cmsg.cmsg_level == SOL_SOCKET && cmsg.cmsg_type == SCM_RIGHTS {
+ let fds_count = ((cmsg.cmsg_len - CMSG_LEN!(0)) as usize) / size_of::<RawFd>();
+ // The sender can transmit more data than we can buffer. If a message is too long to
+ // fit in the supplied buffer, excess bytes may be discarded depending on the type of
+ // socket the message is received from.
+ let fds_to_be_copied_count = std::cmp::min(in_fds.len() - copied_fds_count, fds_count);
+ teardown_control_data |= fds_count > fds_to_be_copied_count;
+ if teardown_control_data {
+ // Allocating space for cmesg buffer might provide extra space for fds, due to
+ // alignment. If these fds can not be stored in `in_fds` buffer, then all the control
+ // data must be dropped to insufficient buffer space for returning them to outer
+ // scope. This might be a sign of incorrect protocol communication.
+ for fd_offset in 0..fds_count {
+ let raw_fds_ptr = CMSG_DATA(cmsg_ptr);
+ // The cmsg_ptr is valid here because is checked at the beginning of the
+ // loop and it is assured to have `fds_count` fds available.
+ let raw_fd = *(raw_fds_ptr.wrapping_add(fd_offset)) as c_int;
+ libc::close(raw_fd);
+ }
+ } else {
+ // Safe because `cmsg_ptr` is checked against null and we copy from `cmesg_buffer` to
+ // `in_fds` according to their current capacity.
+ copy_nonoverlapping(
+ CMSG_DATA(cmsg_ptr),
+ in_fds[copied_fds_count..(copied_fds_count + fds_to_be_copied_count)]
+ .as_mut_ptr(),
+ fds_to_be_copied_count,
+ );
+
+ copied_fds_count += fds_to_be_copied_count;
+ }
+ }
+
+ // Remove the previously copied fds.
+ if teardown_control_data {
+ for fd in in_fds.iter().take(copied_fds_count) {
+ // This is safe because we close only the previously copied fds. We do not care
+ // about `close` return code.
+ libc::close(*fd);
+ }
+
+ return Err(Error::new(libc::ENOBUFS));
+ }
+
+ cmsg_ptr = get_next_cmsg(&msg, &cmsg, cmsg_ptr);
+ }
+
+ Ok((total_read as usize, copied_fds_count))
+}
+
+/// Trait for file descriptors can send and receive socket control messages via `sendmsg` and
+/// `recvmsg`.
+///
+/// # Examples
+///
+/// ```
+/// # extern crate libc;
+/// extern crate vmm_sys_util;
+/// use vmm_sys_util::sock_ctrl_msg::ScmSocket;
+/// # use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK};
+/// # use std::fs::File;
+/// # use std::io::Write;
+/// # use std::os::unix::io::{AsRawFd, FromRawFd};
+/// # use std::os::unix::net::UnixDatagram;
+/// # use std::slice::from_raw_parts;
+///
+/// # use libc::{c_void, iovec};
+///
+/// let (s1, s2) = UnixDatagram::pair().expect("failed to create socket pair");
+/// let evt = EventFd::new(0).expect("failed to create eventfd");
+///
+/// let write_count = s1
+/// .send_with_fds(&[[237].as_ref()], &[evt.as_raw_fd()])
+/// .expect("failed to send fd");
+///
+/// let mut files = [0; 2];
+/// let mut buf = [0u8];
+/// let mut iovecs = [iovec {
+/// iov_base: buf.as_mut_ptr() as *mut c_void,
+/// iov_len: buf.len(),
+/// }];
+/// let (read_count, file_count) = unsafe {
+/// s2.recv_with_fds(&mut iovecs[..], &mut files)
+/// .expect("failed to recv fd")
+/// };
+///
+/// let mut file = unsafe { File::from_raw_fd(files[0]) };
+/// file.write(unsafe { from_raw_parts(&1203u64 as *const u64 as *const u8, 8) })
+/// .expect("failed to write to sent fd");
+/// assert_eq!(evt.read().expect("failed to read from eventfd"), 1203);
+/// ```
+pub trait ScmSocket {
+ /// Gets the file descriptor of this socket.
+ fn socket_fd(&self) -> RawFd;
+
+ /// Sends the given data and file descriptor over the socket.
+ ///
+ /// On success, returns the number of bytes sent.
+ ///
+ /// # Arguments
+ ///
+ /// * `buf` - A buffer of data to send on the `socket`.
+ /// * `fd` - A file descriptors to be sent.
+ fn send_with_fd<D: IntoIovec>(&self, buf: D, fd: RawFd) -> Result<usize> {
+ self.send_with_fds(&[buf], &[fd])
+ }
+
+ /// Sends the given data and file descriptors over the socket.
+ ///
+ /// On success, returns the number of bytes sent.
+ ///
+ /// # Arguments
+ ///
+ /// * `bufs` - A list of data buffer to send on the `socket`.
+ /// * `fds` - A list of file descriptors to be sent.
+ fn send_with_fds<D: IntoIovec>(&self, bufs: &[D], fds: &[RawFd]) -> Result<usize> {
+ raw_sendmsg(self.socket_fd(), bufs, fds)
+ }
+
+ /// Receives data and potentially a file descriptor from the socket.
+ ///
+ /// On success, returns the number of bytes and an optional file descriptor.
+ ///
+ /// # Arguments
+ ///
+ /// * `buf` - A buffer to receive data from the socket.
+ fn recv_with_fd(&self, buf: &mut [u8]) -> Result<(usize, Option<File>)> {
+ let mut fd = [0];
+ let mut iovecs = [iovec {
+ iov_base: buf.as_mut_ptr() as *mut c_void,
+ iov_len: buf.len(),
+ }];
+
+ // SAFETY: Safe because we have mutably borrowed buf and it's safe to write arbitrary data
+ // to a slice.
+ let (read_count, fd_count) = unsafe { self.recv_with_fds(&mut iovecs[..], &mut fd)? };
+ let file = if fd_count == 0 {
+ None
+ } else {
+ // SAFETY: Safe because the first fd from recv_with_fds is owned by us and valid
+ // because this branch was taken.
+ Some(unsafe { File::from_raw_fd(fd[0]) })
+ };
+ Ok((read_count, file))
+ }
+
+ /// Receives data and file descriptors from the socket.
+ ///
+ /// On success, returns the number of bytes and file descriptors received as a tuple
+ /// `(bytes count, files count)`.
+ ///
+ /// # Arguments
+ ///
+ /// * `iovecs` - A list of iovec to receive data from the socket.
+ /// * `fds` - A slice of `RawFd`s to put the received file descriptors into. On success, the
+ /// number of valid file descriptors is indicated by the second element of the
+ /// returned tuple. The caller owns these file descriptors, but they will not be
+ /// closed on drop like a `File`-like type would be. It is recommended that each valid
+ /// file descriptor gets wrapped in a drop type that closes it after this returns.
+ ///
+ /// # Safety
+ ///
+ /// It is the callers responsibility to ensure it is safe for arbitrary data to be
+ /// written to the iovec pointers.
+ unsafe fn recv_with_fds(
+ &self,
+ iovecs: &mut [iovec],
+ fds: &mut [RawFd],
+ ) -> Result<(usize, usize)> {
+ raw_recvmsg(self.socket_fd(), iovecs, fds)
+ }
+}
+
+impl ScmSocket for UnixDatagram {
+ fn socket_fd(&self) -> RawFd {
+ self.as_raw_fd()
+ }
+}
+
+impl ScmSocket for UnixStream {
+ fn socket_fd(&self) -> RawFd {
+ self.as_raw_fd()
+ }
+}
+
+/// Trait for types that can be converted into an `iovec` that can be referenced by a syscall for
+/// the lifetime of this object.
+///
+/// # Safety
+///
+/// This is marked unsafe because the implementation must ensure that the returned pointer and size
+/// is valid and that the lifetime of the returned pointer is at least that of the trait object.
+pub unsafe trait IntoIovec {
+ /// Gets the base pointer of this `iovec`.
+ fn as_ptr(&self) -> *const c_void;
+
+ /// Gets the size in bytes of this `iovec`.
+ fn size(&self) -> usize;
+}
+
+// SAFETY: Safe because this slice can not have another mutable reference and it's pointer and
+// size are guaranteed to be valid.
+unsafe impl<'a> IntoIovec for &'a [u8] {
+ // Clippy false positive: https://github.com/rust-lang/rust-clippy/issues/3480
+ #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_asref))]
+ fn as_ptr(&self) -> *const c_void {
+ self.as_ref().as_ptr() as *const c_void
+ }
+
+ fn size(&self) -> usize {
+ self.len()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ #![allow(clippy::undocumented_unsafe_blocks)]
+ use super::*;
+ use crate::eventfd::EventFd;
+
+ use std::io::Write;
+ use std::mem::size_of;
+ use std::os::raw::c_long;
+ use std::os::unix::net::UnixDatagram;
+ use std::slice::from_raw_parts;
+
+ use libc::cmsghdr;
+
+ #[test]
+ fn buffer_len() {
+ assert_eq!(CMSG_SPACE!(0), size_of::<cmsghdr>());
+ assert_eq!(
+ CMSG_SPACE!(size_of::<RawFd>()),
+ size_of::<cmsghdr>() + size_of::<c_long>()
+ );
+ if size_of::<RawFd>() == 4 {
+ assert_eq!(
+ CMSG_SPACE!(2 * size_of::<RawFd>()),
+ size_of::<cmsghdr>() + size_of::<c_long>()
+ );
+ assert_eq!(
+ CMSG_SPACE!(3 * size_of::<RawFd>()),
+ size_of::<cmsghdr>() + size_of::<c_long>() * 2
+ );
+ assert_eq!(
+ CMSG_SPACE!(4 * size_of::<RawFd>()),
+ size_of::<cmsghdr>() + size_of::<c_long>() * 2
+ );
+ } else if size_of::<RawFd>() == 8 {
+ assert_eq!(
+ CMSG_SPACE!(2 * size_of::<RawFd>()),
+ size_of::<cmsghdr>() + size_of::<c_long>() * 2
+ );
+ assert_eq!(
+ CMSG_SPACE!(3 * size_of::<RawFd>()),
+ size_of::<cmsghdr>() + size_of::<c_long>() * 3
+ );
+ assert_eq!(
+ CMSG_SPACE!(4 * size_of::<RawFd>()),
+ size_of::<cmsghdr>() + size_of::<c_long>() * 4
+ );
+ }
+ }
+
+ #[test]
+ fn send_recv_no_fd() {
+ let (s1, s2) = UnixDatagram::pair().expect("failed to create socket pair");
+
+ let write_count = s1
+ .send_with_fds(&[[1u8, 1, 2].as_ref(), [21u8, 34, 55].as_ref()], &[])
+ .expect("failed to send data");
+
+ assert_eq!(write_count, 6);
+
+ let mut buf = [0u8; 6];
+ let mut files = [0; 1];
+ let mut iovecs = [iovec {
+ iov_base: buf.as_mut_ptr() as *mut c_void,
+ iov_len: buf.len(),
+ }];
+ let (read_count, file_count) = unsafe {
+ s2.recv_with_fds(&mut iovecs[..], &mut files)
+ .expect("failed to recv data")
+ };
+
+ assert_eq!(read_count, 6);
+ assert_eq!(file_count, 0);
+ assert_eq!(buf, [1, 1, 2, 21, 34, 55]);
+ }
+
+ #[test]
+ fn send_recv_only_fd() {
+ let (s1, s2) = UnixDatagram::pair().expect("failed to create socket pair");
+
+ let evt = EventFd::new(0).expect("failed to create eventfd");
+ let write_count = s1
+ .send_with_fd([].as_ref(), evt.as_raw_fd())
+ .expect("failed to send fd");
+
+ assert_eq!(write_count, 0);
+
+ let (read_count, file_opt) = s2.recv_with_fd(&mut []).expect("failed to recv fd");
+
+ let mut file = file_opt.unwrap();
+
+ assert_eq!(read_count, 0);
+ assert!(file.as_raw_fd() >= 0);
+ assert_ne!(file.as_raw_fd(), s1.as_raw_fd());
+ assert_ne!(file.as_raw_fd(), s2.as_raw_fd());
+ assert_ne!(file.as_raw_fd(), evt.as_raw_fd());
+
+ file.write_all(unsafe { from_raw_parts(&1203u64 as *const u64 as *const u8, 8) })
+ .expect("failed to write to sent fd");
+
+ assert_eq!(evt.read().expect("failed to read from eventfd"), 1203);
+ }
+
+ #[test]
+ fn send_recv_with_fd() {
+ let (s1, s2) = UnixDatagram::pair().expect("failed to create socket pair");
+
+ let evt = EventFd::new(0).expect("failed to create eventfd");
+ let write_count = s1
+ .send_with_fds(&[[237].as_ref()], &[evt.as_raw_fd()])
+ .expect("failed to send fd");
+
+ assert_eq!(write_count, 1);
+
+ let mut files = [0; 2];
+ let mut buf = [0u8];
+ let mut iovecs = [iovec {
+ iov_base: buf.as_mut_ptr() as *mut c_void,
+ iov_len: buf.len(),
+ }];
+ let (read_count, file_count) = unsafe {
+ s2.recv_with_fds(&mut iovecs[..], &mut files)
+ .expect("failed to recv fd")
+ };
+
+ assert_eq!(read_count, 1);
+ assert_eq!(buf[0], 237);
+ assert_eq!(file_count, 1);
+ assert!(files[0] >= 0);
+ assert_ne!(files[0], s1.as_raw_fd());
+ assert_ne!(files[0], s2.as_raw_fd());
+ assert_ne!(files[0], evt.as_raw_fd());
+
+ let mut file = unsafe { File::from_raw_fd(files[0]) };
+
+ file.write_all(unsafe { from_raw_parts(&1203u64 as *const u64 as *const u8, 8) })
+ .expect("failed to write to sent fd");
+
+ assert_eq!(evt.read().expect("failed to read from eventfd"), 1203);
+ }
+
+ #[test]
+ // Exercise the code paths that activate the issue of receiving the all the ancillary data,
+ // but missing to provide enough buffer space to store it.
+ fn send_more_recv_less1() {
+ let (s1, s2) = UnixDatagram::pair().expect("failed to create socket pair");
+
+ let evt1 = EventFd::new(0).expect("failed to create eventfd");
+ let evt2 = EventFd::new(0).expect("failed to create eventfd");
+ let evt3 = EventFd::new(0).expect("failed to create eventfd");
+ let evt4 = EventFd::new(0).expect("failed to create eventfd");
+ let write_count = s1
+ .send_with_fds(
+ &[[237].as_ref()],
+ &[
+ evt1.as_raw_fd(),
+ evt2.as_raw_fd(),
+ evt3.as_raw_fd(),
+ evt4.as_raw_fd(),
+ ],
+ )
+ .expect("failed to send fd");
+
+ assert_eq!(write_count, 1);
+
+ let mut files = [0; 2];
+ let mut buf = [0u8];
+ let mut iovecs = [iovec {
+ iov_base: buf.as_mut_ptr() as *mut c_void,
+ iov_len: buf.len(),
+ }];
+ assert!(unsafe { s2.recv_with_fds(&mut iovecs[..], &mut files).is_err() });
+ }
+
+ // Exercise the code paths that activate the issue of receiving part of the sent ancillary
+ // data due to insufficient buffer space, activating `msg_flags` `MSG_CTRUNC` flag.
+ #[test]
+ fn send_more_recv_less2() {
+ let (s1, s2) = UnixDatagram::pair().expect("failed to create socket pair");
+
+ let evt1 = EventFd::new(0).expect("failed to create eventfd");
+ let evt2 = EventFd::new(0).expect("failed to create eventfd");
+ let evt3 = EventFd::new(0).expect("failed to create eventfd");
+ let evt4 = EventFd::new(0).expect("failed to create eventfd");
+ let write_count = s1
+ .send_with_fds(
+ &[[237].as_ref()],
+ &[
+ evt1.as_raw_fd(),
+ evt2.as_raw_fd(),
+ evt3.as_raw_fd(),
+ evt4.as_raw_fd(),
+ ],
+ )
+ .expect("failed to send fd");
+
+ assert_eq!(write_count, 1);
+
+ let mut files = [0; 1];
+ let mut buf = [0u8];
+ let mut iovecs = [iovec {
+ iov_base: buf.as_mut_ptr() as *mut c_void,
+ iov_len: buf.len(),
+ }];
+ assert!(unsafe { s2.recv_with_fds(&mut iovecs[..], &mut files).is_err() });
+ }
+}
diff --git a/src/linux/timerfd.rs b/src/linux/timerfd.rs
new file mode 100644
index 0000000..80f0789
--- /dev/null
+++ b/src/linux/timerfd.rs
@@ -0,0 +1,281 @@
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+//
+// Copyright 2018 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Structure and functions for working with
+//! [`timerfd`](http://man7.org/linux/man-pages/man2/timerfd_create.2.html).
+
+use std::fs::File;
+use std::mem;
+use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
+use std::ptr;
+use std::time::Duration;
+
+use libc::{self, timerfd_create, timerfd_gettime, timerfd_settime, CLOCK_MONOTONIC, TFD_CLOEXEC};
+
+use crate::errno::{errno_result, Result};
+
+/// A safe wrapper around a Linux
+/// [`timerfd`](http://man7.org/linux/man-pages/man2/timerfd_create.2.html).
+pub struct TimerFd(File);
+
+impl TimerFd {
+ /// Create a new [`TimerFd`](struct.TimerFd.html).
+ ///
+ /// This creates a nonsettable monotonically increasing clock that does not
+ /// change after system startup. The timer is initally disarmed and must be
+ /// armed by calling [`reset`](fn.reset.html).
+ pub fn new() -> Result<TimerFd> {
+ // SAFETY: Safe because this doesn't modify any memory and we check the return value.
+ let ret = unsafe { timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC) };
+ if ret < 0 {
+ return errno_result();
+ }
+
+ // SAFETY: Safe because we uniquely own the file descriptor.
+ Ok(TimerFd(unsafe { File::from_raw_fd(ret) }))
+ }
+
+ /// Arm the [`TimerFd`](struct.TimerFd.html).
+ ///
+ /// Set the timer to expire after `dur`.
+ ///
+ /// # Arguments
+ ///
+ /// * `dur`: Specify the initial expiration of the timer.
+ /// * `interval`: Specify the period for repeated expirations, depending on the
+ /// value passed. If `interval` is not `None`, it represents the period after
+ /// the initial expiration. Otherwise the timer will expire just once. Cancels
+ /// any existing duration and repeating interval.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// # use std::time::Duration;
+ /// use vmm_sys_util::timerfd::TimerFd;
+ ///
+ /// let mut timer = TimerFd::new().unwrap();
+ /// let dur = Duration::from_millis(100);
+ /// let interval = Duration::from_millis(100);
+ ///
+ /// timer.reset(dur, Some(interval)).unwrap();
+ /// ```
+ pub fn reset(&mut self, dur: Duration, interval: Option<Duration>) -> Result<()> {
+ // SAFETY: Safe because we are zero-initializing a struct with only primitive member fields.
+ let mut spec: libc::itimerspec = unsafe { mem::zeroed() };
+ // https://github.com/rust-lang/libc/issues/1848
+ #[cfg_attr(target_env = "musl", allow(deprecated))]
+ {
+ spec.it_value.tv_sec = dur.as_secs() as libc::time_t;
+ }
+ // nsec always fits in i32 because subsec_nanos is defined to be less than one billion.
+ let nsec = dur.subsec_nanos() as i32;
+ spec.it_value.tv_nsec = libc::c_long::from(nsec);
+
+ if let Some(int) = interval {
+ // https://github.com/rust-lang/libc/issues/1848
+ #[cfg_attr(target_env = "musl", allow(deprecated))]
+ {
+ spec.it_interval.tv_sec = int.as_secs() as libc::time_t;
+ }
+ // nsec always fits in i32 because subsec_nanos is defined to be less than one billion.
+ let nsec = int.subsec_nanos() as i32;
+ spec.it_interval.tv_nsec = libc::c_long::from(nsec);
+ }
+
+ // SAFETY: Safe because this doesn't modify any memory and we check the return value.
+ let ret = unsafe { timerfd_settime(self.as_raw_fd(), 0, &spec, ptr::null_mut()) };
+ if ret < 0 {
+ return errno_result();
+ }
+
+ Ok(())
+ }
+
+ /// Wait until the timer expires.
+ ///
+ /// The return value represents the number of times the timer has expired since
+ /// the last time `wait` was called. If the timer has not yet expired once,
+ /// this call will block until it does.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// # use std::time::Duration;
+ /// # use std::thread::sleep;
+ /// use vmm_sys_util::timerfd::TimerFd;
+ ///
+ /// let mut timer = TimerFd::new().unwrap();
+ /// let dur = Duration::from_millis(100);
+ /// let interval = Duration::from_millis(100);
+ /// timer.reset(dur, Some(interval)).unwrap();
+ ///
+ /// sleep(dur * 3);
+ /// let count = timer.wait().unwrap();
+ /// assert!(count >= 3);
+ /// ```
+ pub fn wait(&mut self) -> Result<u64> {
+ let mut count = 0u64;
+
+ // SAFETY: Safe because this will only modify |buf| and we check the return value.
+ let ret = unsafe {
+ libc::read(
+ self.as_raw_fd(),
+ &mut count as *mut _ as *mut libc::c_void,
+ mem::size_of_val(&count),
+ )
+ };
+ if ret < 0 {
+ return errno_result();
+ }
+
+ // The bytes in the buffer are guaranteed to be in native byte-order so we don't need to
+ // use from_le or from_be.
+ Ok(count)
+ }
+
+ /// Tell if the timer is armed.
+ ///
+ /// Returns `Ok(true)` if the timer is currently armed, otherwise the errno set by
+ /// [`timerfd_gettime`](http://man7.org/linux/man-pages/man2/timerfd_create.2.html).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// # use std::time::Duration;
+ /// use vmm_sys_util::timerfd::TimerFd;
+ ///
+ /// let mut timer = TimerFd::new().unwrap();
+ /// let dur = Duration::from_millis(100);
+ ///
+ /// timer.reset(dur, None).unwrap();
+ /// assert!(timer.is_armed().unwrap());
+ /// ```
+ pub fn is_armed(&self) -> Result<bool> {
+ // SAFETY: Safe because we are zero-initializing a struct with only primitive member fields.
+ let mut spec: libc::itimerspec = unsafe { mem::zeroed() };
+
+ // SAFETY: Safe because timerfd_gettime is trusted to only modify `spec`.
+ let ret = unsafe { timerfd_gettime(self.as_raw_fd(), &mut spec) };
+ if ret < 0 {
+ return errno_result();
+ }
+
+ Ok(spec.it_value.tv_sec != 0 || spec.it_value.tv_nsec != 0)
+ }
+
+ /// Disarm the timer.
+ ///
+ /// Set zero to disarm the timer, referring to
+ /// [`timerfd_settime`](http://man7.org/linux/man-pages/man2/timerfd_create.2.html).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// # use std::time::Duration;
+ /// use vmm_sys_util::timerfd::TimerFd;
+ ///
+ /// let mut timer = TimerFd::new().unwrap();
+ /// let dur = Duration::from_millis(100);
+ ///
+ /// timer.reset(dur, None).unwrap();
+ /// timer.clear().unwrap();
+ /// ```
+ pub fn clear(&mut self) -> Result<()> {
+ // SAFETY: Safe because we are zero-initializing a struct with only primitive member fields.
+ let spec: libc::itimerspec = unsafe { mem::zeroed() };
+
+ // SAFETY: Safe because this doesn't modify any memory and we check the return value.
+ let ret = unsafe { timerfd_settime(self.as_raw_fd(), 0, &spec, ptr::null_mut()) };
+ if ret < 0 {
+ return errno_result();
+ }
+
+ Ok(())
+ }
+}
+
+impl AsRawFd for TimerFd {
+ fn as_raw_fd(&self) -> RawFd {
+ self.0.as_raw_fd()
+ }
+}
+
+impl FromRawFd for TimerFd {
+ /// This function is unsafe as the primitives currently returned
+ /// have the contract that they are the sole owner of the file
+ /// descriptor they are wrapping. Usage of this function could
+ /// accidentally allow violating this contract which can cause memory
+ /// unsafety in code that relies on it being true.
+ unsafe fn from_raw_fd(fd: RawFd) -> Self {
+ TimerFd(File::from_raw_fd(fd))
+ }
+}
+
+impl IntoRawFd for TimerFd {
+ fn into_raw_fd(self) -> RawFd {
+ self.0.into_raw_fd()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ #![allow(clippy::undocumented_unsafe_blocks)]
+ use super::*;
+ use std::thread::sleep;
+ use std::time::{Duration, Instant};
+
+ #[test]
+ fn test_from_raw_fd() {
+ let ret = unsafe { timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC) };
+ let tfd = unsafe { TimerFd::from_raw_fd(ret) };
+ assert!(!tfd.is_armed().unwrap());
+ }
+
+ #[test]
+ fn test_into_raw_fd() {
+ let tfd = TimerFd::new().expect("failed to create timerfd");
+ let fd = tfd.into_raw_fd();
+ assert!(fd > 0);
+ }
+ #[test]
+ fn test_one_shot() {
+ let mut tfd = TimerFd::new().expect("failed to create timerfd");
+ assert!(!tfd.is_armed().unwrap());
+
+ let dur = Duration::from_millis(200);
+ let now = Instant::now();
+ tfd.reset(dur, None).expect("failed to arm timer");
+
+ assert!(tfd.is_armed().unwrap());
+
+ let count = tfd.wait().expect("unable to wait for timer");
+
+ assert_eq!(count, 1);
+ assert!(now.elapsed() >= dur);
+ tfd.clear().expect("unable to clear the timer");
+ assert!(!tfd.is_armed().unwrap());
+ }
+
+ #[test]
+ fn test_repeating() {
+ let mut tfd = TimerFd::new().expect("failed to create timerfd");
+
+ let dur = Duration::from_millis(200);
+ let interval = Duration::from_millis(100);
+ tfd.reset(dur, Some(interval)).expect("failed to arm timer");
+
+ sleep(dur * 3);
+
+ let count = tfd.wait().expect("unable to wait for timer");
+ assert!(count >= 5, "count = {}", count);
+ tfd.clear().expect("unable to clear the timer");
+ assert!(!tfd.is_armed().unwrap());
+ }
+}
diff --git a/src/linux/write_zeroes.rs b/src/linux/write_zeroes.rs
new file mode 100644
index 0000000..e6084da
--- /dev/null
+++ b/src/linux/write_zeroes.rs
@@ -0,0 +1,330 @@
+// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+//
+// Copyright 2018 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Traits for replacing a range with a hole and writing zeroes in a file.
+
+use std::cmp::min;
+use std::fs::File;
+use std::io::{Error, ErrorKind, Result, Seek, SeekFrom};
+use std::os::unix::fs::FileExt;
+
+use crate::fallocate::{fallocate, FallocateMode};
+
+/// A trait for deallocating space in a file.
+pub trait PunchHole {
+ /// Replace a range of bytes with a hole.
+ ///
+ /// # Arguments
+ ///
+ /// * `offset`: offset of the file where to replace with a hole.
+ /// * `length`: the number of bytes of the hole to replace with.
+ fn punch_hole(&mut self, offset: u64, length: u64) -> Result<()>;
+}
+
+impl PunchHole for File {
+ fn punch_hole(&mut self, offset: u64, length: u64) -> Result<()> {
+ fallocate(self, FallocateMode::PunchHole, true, offset, length as u64)
+ .map_err(|e| Error::from_raw_os_error(e.errno()))
+ }
+}
+
+/// A trait for writing zeroes to a stream.
+pub trait WriteZeroes {
+ /// Write up to `length` bytes of zeroes to the stream, returning how many bytes were written.
+ ///
+ /// # Arguments
+ ///
+ /// * `length`: the number of bytes of zeroes to write to the stream.
+ fn write_zeroes(&mut self, length: usize) -> Result<usize>;
+
+ /// Write zeroes to the stream until `length` bytes have been written.
+ ///
+ /// This method will continuously write zeroes until the requested `length` is satisfied or an
+ /// unrecoverable error is encountered.
+ ///
+ /// # Arguments
+ ///
+ /// * `length`: the exact number of bytes of zeroes to write to the stream.
+ fn write_all_zeroes(&mut self, mut length: usize) -> Result<()> {
+ while length > 0 {
+ match self.write_zeroes(length) {
+ Ok(0) => return Err(Error::from(ErrorKind::WriteZero)),
+ Ok(bytes_written) => {
+ length = length
+ .checked_sub(bytes_written)
+ .ok_or_else(|| Error::from(ErrorKind::Other))?
+ }
+ // If the operation was interrupted, we should retry it.
+ Err(e) => {
+ if e.kind() != ErrorKind::Interrupted {
+ return Err(e);
+ }
+ }
+ }
+ }
+ Ok(())
+ }
+}
+
+/// A trait for writing zeroes to an arbitrary position in a file.
+pub trait WriteZeroesAt {
+ /// Write up to `length` bytes of zeroes starting at `offset`, returning how many bytes were
+ /// written.
+ ///
+ /// # Arguments
+ ///
+ /// * `offset`: offset of the file where to write zeroes.
+ /// * `length`: the number of bytes of zeroes to write to the stream.
+ fn write_zeroes_at(&mut self, offset: u64, length: usize) -> Result<usize>;
+
+ /// Write zeroes starting at `offset` until `length` bytes have been written.
+ ///
+ /// This method will continuously write zeroes until the requested `length` is satisfied or an
+ /// unrecoverable error is encountered.
+ ///
+ /// # Arguments
+ ///
+ /// * `offset`: offset of the file where to write zeroes.
+ /// * `length`: the exact number of bytes of zeroes to write to the stream.
+ fn write_all_zeroes_at(&mut self, mut offset: u64, mut length: usize) -> Result<()> {
+ while length > 0 {
+ match self.write_zeroes_at(offset, length) {
+ Ok(0) => return Err(Error::from(ErrorKind::WriteZero)),
+ Ok(bytes_written) => {
+ length = length
+ .checked_sub(bytes_written)
+ .ok_or_else(|| Error::from(ErrorKind::Other))?;
+ offset = offset
+ .checked_add(bytes_written as u64)
+ .ok_or_else(|| Error::from(ErrorKind::Other))?;
+ }
+ Err(e) => {
+ // If the operation was interrupted, we should retry it.
+ if e.kind() != ErrorKind::Interrupted {
+ return Err(e);
+ }
+ }
+ }
+ }
+ Ok(())
+ }
+}
+
+impl WriteZeroesAt for File {
+ fn write_zeroes_at(&mut self, offset: u64, length: usize) -> Result<usize> {
+ // Try to use fallocate() first, since it is more efficient than writing zeroes with
+ // write().
+ if fallocate(self, FallocateMode::ZeroRange, true, offset, length as u64).is_ok() {
+ return Ok(length);
+ }
+
+ // Fall back to write().
+ // fallocate() failed; fall back to writing a buffer of zeroes until we have written up
+ // to `length`.
+ let buf_size = min(length, 0x10000);
+ let buf = vec![0u8; buf_size];
+ let mut num_written: usize = 0;
+ while num_written < length {
+ let remaining = length - num_written;
+ let write_size = min(remaining, buf_size);
+ num_written += self.write_at(&buf[0..write_size], offset + num_written as u64)?;
+ }
+ Ok(length)
+ }
+}
+
+impl<T: WriteZeroesAt + Seek> WriteZeroes for T {
+ fn write_zeroes(&mut self, length: usize) -> Result<usize> {
+ let offset = self.seek(SeekFrom::Current(0))?;
+ let num_written = self.write_zeroes_at(offset, length)?;
+ // Advance the seek cursor as if we had done a real write().
+ self.seek(SeekFrom::Current(num_written as i64))?;
+ Ok(length)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ use std::io::{Read, Seek, SeekFrom, Write};
+
+ use crate::tempfile::TempFile;
+
+ #[test]
+ fn test_small_write_zeroes() {
+ const NON_ZERO_VALUE: u8 = 0x55;
+ const BUF_SIZE: usize = 5678;
+
+ let mut f = TempFile::new().unwrap().into_file();
+ f.set_len(16384).unwrap();
+
+ // Write buffer of non-zero bytes to offset 1234.
+ let orig_data = [NON_ZERO_VALUE; BUF_SIZE];
+ f.seek(SeekFrom::Start(1234)).unwrap();
+ f.write_all(&orig_data).unwrap();
+
+ // Read back the data plus some overlap on each side.
+ let mut readback = [0u8; 16384];
+ f.seek(SeekFrom::Start(0)).unwrap();
+ f.read_exact(&mut readback).unwrap();
+ // Bytes before the write should still be 0.
+ for read in &readback[0..1234] {
+ assert_eq!(*read, 0);
+ }
+ // Bytes that were just written should have `NON_ZERO_VALUE` value.
+ for read in &readback[1234..(1234 + BUF_SIZE)] {
+ assert_eq!(*read, NON_ZERO_VALUE);
+ }
+ // Bytes after the written area should still be 0.
+ for read in &readback[(1234 + BUF_SIZE)..] {
+ assert_eq!(*read, 0);
+ }
+
+ // Overwrite some of the data with zeroes.
+ f.seek(SeekFrom::Start(2345)).unwrap();
+ f.write_all_zeroes(4321).unwrap();
+ // Verify seek position after `write_all_zeroes()`.
+ assert_eq!(f.seek(SeekFrom::Current(0)).unwrap(), 2345 + 4321);
+
+ // Read back the data and verify that it is now zero.
+ f.seek(SeekFrom::Start(0)).unwrap();
+ f.read_exact(&mut readback).unwrap();
+ // Bytes before the write should still be 0.
+ for read in &readback[0..1234] {
+ assert_eq!(*read, 0);
+ }
+ // Original data should still exist before the zeroed region.
+ for read in &readback[1234..2345] {
+ assert_eq!(*read, NON_ZERO_VALUE);
+ }
+ // Verify that `write_all_zeroes()` zeroed the intended region.
+ for read in &readback[2345..(2345 + 4321)] {
+ assert_eq!(*read, 0);
+ }
+ // Original data should still exist after the zeroed region.
+ for read in &readback[(2345 + 4321)..(1234 + BUF_SIZE)] {
+ assert_eq!(*read, NON_ZERO_VALUE);
+ }
+ // The rest of the file should still be 0.
+ for read in &readback[(1234 + BUF_SIZE)..] {
+ assert_eq!(*read, 0);
+ }
+ }
+
+ #[test]
+ fn test_large_write_zeroes() {
+ const NON_ZERO_VALUE: u8 = 0x55;
+ const SIZE: usize = 0x2_0000;
+
+ let mut f = TempFile::new().unwrap().into_file();
+ f.set_len(16384).unwrap();
+
+ // Write buffer of non-zero bytes. The size of the buffer will be the new
+ // size of the file.
+ let orig_data = [NON_ZERO_VALUE; SIZE];
+ f.seek(SeekFrom::Start(0)).unwrap();
+ f.write_all(&orig_data).unwrap();
+ assert_eq!(f.metadata().unwrap().len(), SIZE as u64);
+
+ // Overwrite some of the data with zeroes.
+ f.seek(SeekFrom::Start(0)).unwrap();
+ f.write_all_zeroes(0x1_0001).unwrap();
+ // Verify seek position after `write_all_zeroes()`.
+ assert_eq!(f.seek(SeekFrom::Current(0)).unwrap(), 0x1_0001);
+
+ // Read back the data and verify that it is now zero.
+ let mut readback = [0u8; SIZE];
+ f.seek(SeekFrom::Start(0)).unwrap();
+ f.read_exact(&mut readback).unwrap();
+ // Verify that `write_all_zeroes()` zeroed the intended region.
+ for read in &readback[0..0x1_0001] {
+ assert_eq!(*read, 0);
+ }
+ // Original data should still exist after the zeroed region.
+ for read in &readback[0x1_0001..SIZE] {
+ assert_eq!(*read, NON_ZERO_VALUE);
+ }
+
+ // Now let's zero a certain region by using `write_all_zeroes_at()`.
+ f.write_all_zeroes_at(0x1_8001, 0x200).unwrap();
+ f.seek(SeekFrom::Start(0)).unwrap();
+ f.read_exact(&mut readback).unwrap();
+
+ // Original data should still exist before the zeroed region.
+ for read in &readback[0x1_0001..0x1_8001] {
+ assert_eq!(*read, NON_ZERO_VALUE);
+ }
+ // Verify that `write_all_zeroes_at()` zeroed the intended region.
+ for read in &readback[0x1_8001..(0x1_8001 + 0x200)] {
+ assert_eq!(*read, 0);
+ }
+ // Original data should still exist after the zeroed region.
+ for read in &readback[(0x1_8001 + 0x200)..SIZE] {
+ assert_eq!(*read, NON_ZERO_VALUE);
+ }
+ }
+
+ #[test]
+ fn test_punch_hole() {
+ const NON_ZERO_VALUE: u8 = 0x55;
+ const SIZE: usize = 0x2_0000;
+
+ let mut f = TempFile::new().unwrap().into_file();
+ f.set_len(16384).unwrap();
+
+ // Write buffer of non-zero bytes. The size of the buffer will be the new
+ // size of the file.
+ let orig_data = [NON_ZERO_VALUE; SIZE];
+ f.seek(SeekFrom::Start(0)).unwrap();
+ f.write_all(&orig_data).unwrap();
+ assert_eq!(f.metadata().unwrap().len(), SIZE as u64);
+
+ // Punch a hole at offset 0x10001.
+ // Subsequent reads from this range will return zeros.
+ f.punch_hole(0x1_0001, 0x200).unwrap();
+
+ // Read back the data.
+ let mut readback = [0u8; SIZE];
+ f.seek(SeekFrom::Start(0)).unwrap();
+ f.read_exact(&mut readback).unwrap();
+ // Original data should still exist before the hole.
+ for read in &readback[0..0x1_0001] {
+ assert_eq!(*read, NON_ZERO_VALUE);
+ }
+ // Verify that `punch_hole()` zeroed the intended region.
+ for read in &readback[0x1_0001..(0x1_0001 + 0x200)] {
+ assert_eq!(*read, 0);
+ }
+ // Original data should still exist after the hole.
+ for read in &readback[(0x1_0001 + 0x200)..] {
+ assert_eq!(*read, NON_ZERO_VALUE);
+ }
+
+ // Punch a hole at the end of the file.
+ // Subsequent reads from this range should return zeros.
+ f.punch_hole(SIZE as u64 - 0x400, 0x400).unwrap();
+ // Even though we punched a hole at the end of the file, the file size should remain the
+ // same since FALLOC_FL_PUNCH_HOLE must be used with FALLOC_FL_KEEP_SIZE.
+ assert_eq!(f.metadata().unwrap().len(), SIZE as u64);
+
+ let mut readback = [0u8; 0x400];
+ f.seek(SeekFrom::Start(SIZE as u64 - 0x400)).unwrap();
+ f.read_exact(&mut readback).unwrap();
+ // Verify that `punch_hole()` zeroed the intended region.
+ for read in &readback[0..0x400] {
+ assert_eq!(*read, 0);
+ }
+
+ // Punching a hole of len 0 should return an error.
+ assert!(f.punch_hole(0x200, 0x0).is_err());
+ // Zeroing a region of len 0 should not return an error since we have a fallback path
+ // in `write_zeroes_at()` for `fallocate()` failure.
+ assert!(f.write_zeroes_at(0x200, 0x0).is_ok());
+ }
+}
diff --git a/src/metric.rs b/src/metric.rs
new file mode 100644
index 0000000..c728161
--- /dev/null
+++ b/src/metric.rs
@@ -0,0 +1,178 @@
+// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+//! The purpose of this module is to provide abstractions for working with
+//! metrics in the context of rust-vmm components where there is a strong need
+//! to have metrics as an optional feature.
+//!
+//! As multiple stakeholders are using these components, there are also
+//! questions regarding the serialization format, as metrics are expected to be
+//! flexible enough to allow different formatting, serialization and writers.
+//! When using the rust-vmm metrics, the expectation is that VMMs built on top
+//! of these components can choose what metrics they’re interested in and also
+//! can add their own custom metrics without the need to maintain forks.
+
+use std::sync::atomic::{AtomicU64, Ordering};
+
+/// Abstraction over the common metric operations.
+///
+/// An object implementing `Metric` is expected to have an inner counter that
+/// can be incremented and reset. The `Metric` trait can be used for
+/// implementing a metric system backend (or an aggregator).
+pub trait Metric {
+ /// Adds `value` to the current counter.
+ fn add(&self, value: u64);
+ /// Increments by 1 unit the current counter.
+ fn inc(&self) {
+ self.add(1);
+ }
+ /// Returns current value of the counter.
+ fn count(&self) -> u64;
+ /// Resets the metric counter.
+ fn reset(&self);
+ /// Set the metric counter `value`.
+ fn set(&self, value: u64);
+}
+
+impl Metric for AtomicU64 {
+ /// Adds `value` to the current counter.
+ ///
+ /// According to
+ /// [`fetch_add` documentation](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicU64.html#method.fetch_add),
+ /// in case of an integer overflow, the counter starts over from 0.
+ fn add(&self, value: u64) {
+ self.fetch_add(value, Ordering::Relaxed);
+ }
+
+ /// Returns current value of the counter.
+ fn count(&self) -> u64 {
+ self.load(Ordering::Relaxed)
+ }
+
+ /// Resets the metric counter to 0.
+ fn reset(&self) {
+ self.store(0, Ordering::Relaxed)
+ }
+
+ /// Set the metric counter `value`.
+ fn set(&self, value: u64) {
+ self.store(value, Ordering::Relaxed);
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::metric::Metric;
+
+ use std::sync::atomic::AtomicU64;
+ use std::sync::Arc;
+
+ struct Dog<T: DogEvents> {
+ metrics: T,
+ }
+
+ // Trait that declares events that can happen during the lifetime of the
+ // `Dog` which should also have associated events (such as metrics).
+ trait DogEvents {
+ // Event to be called when the dog `bark`s.
+ fn inc_bark(&self);
+ // Event to be called when the dog `eat`s.
+ fn inc_eat(&self);
+ // Event to be called when the dog `eat`s a lot.
+ fn set_eat(&self, no_times: u64);
+ }
+
+ impl<T: DogEvents> Dog<T> {
+ fn bark(&self) {
+ println!("bark! bark!");
+ self.metrics.inc_bark();
+ }
+
+ fn eat(&self) {
+ println!("nom! nom!");
+ self.metrics.inc_eat();
+ }
+
+ fn eat_more_times(&self, no_times: u64) {
+ self.metrics.set_eat(no_times);
+ }
+ }
+
+ impl<T: DogEvents> Dog<T> {
+ fn new_with_metrics(metrics: T) -> Self {
+ Self { metrics }
+ }
+ }
+
+ #[test]
+ fn test_main() {
+ // The `Metric` trait is implemented for `AtomicUsize` so we can easily use it as the
+ // counter for the dog events.
+ #[derive(Default, Debug)]
+ struct DogEventMetrics {
+ bark: AtomicU64,
+ eat: AtomicU64,
+ }
+
+ impl DogEvents for Arc<DogEventMetrics> {
+ fn inc_bark(&self) {
+ self.bark.inc();
+ }
+
+ fn inc_eat(&self) {
+ self.eat.inc();
+ }
+
+ fn set_eat(&self, no_times: u64) {
+ self.eat.set(no_times);
+ }
+ }
+
+ impl DogEventMetrics {
+ fn reset(&self) {
+ self.bark.reset();
+ self.eat.reset();
+ }
+ }
+
+ // This is the central object of mini-app built in this example.
+ // All the metrics that might be needed by the app are referenced through the
+ // `SystemMetrics` object. The `SystemMetric` also decides how to format the metrics.
+ // In this simple example, the metrics are formatted with the dummy Debug formatter.
+ #[derive(Default)]
+ struct SystemMetrics {
+ pub(crate) dog_metrics: Arc<DogEventMetrics>,
+ }
+
+ impl SystemMetrics {
+ fn serialize(&self) -> String {
+ let mut serialized_metrics = format!("{:#?}", &self.dog_metrics);
+ // We can choose to reset the metrics right after we format them for serialization.
+ self.dog_metrics.reset();
+
+ serialized_metrics.retain(|c| !c.is_whitespace());
+ serialized_metrics
+ }
+ }
+
+ let system_metrics = SystemMetrics::default();
+ let dog = Dog::new_with_metrics(system_metrics.dog_metrics.clone());
+ dog.bark();
+ dog.bark();
+ dog.eat();
+
+ let expected_metrics = String::from("DogEventMetrics{bark:2,eat:1,}");
+ let actual_metrics = system_metrics.serialize();
+ assert_eq!(expected_metrics, actual_metrics);
+
+ assert_eq!(system_metrics.dog_metrics.eat.count(), 0);
+ assert_eq!(system_metrics.dog_metrics.bark.count(), 0);
+
+ // Set `std::u64::MAX` value to `eat` metric.
+ dog.eat_more_times(std::u64::MAX);
+ assert_eq!(system_metrics.dog_metrics.eat.count(), std::u64::MAX);
+ // Check that `add()` wraps around on overflow.
+ dog.eat();
+ dog.eat();
+ assert_eq!(system_metrics.dog_metrics.eat.count(), 1);
+ }
+}
diff --git a/src/rand.rs b/src/rand.rs
new file mode 100644
index 0000000..097341f
--- /dev/null
+++ b/src/rand.rs
@@ -0,0 +1,163 @@
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Miscellaneous functions related to getting (pseudo) random numbers and
+//! strings.
+//!
+//! NOTE! This should not be used when you do need __real__ random numbers such
+//! as for encryption but will probably be suitable when you want locally
+//! unique ID's that will not be shared over the network.
+
+use std::ffi::OsString;
+use std::str;
+
+/// Gets an ever increasing u64 (at least for this process).
+///
+/// The number retrieved will be based upon the time of the last reboot (x86_64)
+/// and something undefined for other architectures.
+pub fn timestamp_cycles() -> u64 {
+ #[cfg(target_arch = "x86_64")]
+ // SAFETY: Safe because there's nothing that can go wrong with this call.
+ unsafe {
+ std::arch::x86_64::_rdtsc() as u64
+ }
+
+ #[cfg(not(target_arch = "x86_64"))]
+ {
+ const MONOTONIC_CLOCK_MULTPIPLIER: u64 = 1_000_000_000;
+
+ let mut ts = libc::timespec {
+ tv_sec: 0,
+ tv_nsec: 0,
+ };
+ // SAFETY: We initialized the parameters correctly and we trust the function.
+ unsafe {
+ libc::clock_gettime(libc::CLOCK_MONOTONIC, &mut ts);
+ }
+ (ts.tv_sec as u64) * MONOTONIC_CLOCK_MULTPIPLIER + (ts.tv_nsec as u64)
+ }
+}
+
+/// Generate pseudo random u32 numbers based on the current timestamp.
+pub fn xor_pseudo_rng_u32() -> u32 {
+ let mut t: u32 = timestamp_cycles() as u32;
+ // Taken from https://en.wikipedia.org/wiki/Xorshift
+ t ^= t << 13;
+ t ^= t >> 17;
+ t ^ (t << 5)
+}
+
+// This will get an array of numbers that can safely be converted to strings
+// because they will be in the range [a-zA-Z0-9]. The return vector could be any
+// size between 0 and 4.
+fn xor_pseudo_rng_u8_alphanumerics(rand_fn: &dyn Fn() -> u32) -> Vec<u8> {
+ rand_fn()
+ .to_ne_bytes()
+ .to_vec()
+ .drain(..)
+ .filter(|val| {
+ (48..=57).contains(val) || (65..=90).contains(val) || (97..=122).contains(val)
+ })
+ .collect()
+}
+
+fn xor_pseudo_rng_u8_bytes(rand_fn: &dyn Fn() -> u32) -> Vec<u8> {
+ rand_fn().to_ne_bytes().to_vec()
+}
+
+fn rand_alphanumerics_impl(rand_fn: &dyn Fn() -> u32, len: usize) -> OsString {
+ let mut buf = OsString::new();
+ let mut done = 0;
+ loop {
+ for n in xor_pseudo_rng_u8_alphanumerics(rand_fn) {
+ done += 1;
+ buf.push(str::from_utf8(&[n]).unwrap_or("_"));
+ if done >= len {
+ return buf;
+ }
+ }
+ }
+}
+
+fn rand_bytes_impl(rand_fn: &dyn Fn() -> u32, len: usize) -> Vec<u8> {
+ let mut buf: Vec<Vec<u8>> = Vec::new();
+ let mut num = if len % 4 == 0 { len / 4 } else { len / 4 + 1 };
+ while num > 0 {
+ buf.push(xor_pseudo_rng_u8_bytes(rand_fn));
+ num -= 1;
+ }
+ buf.into_iter().flatten().take(len).collect()
+}
+
+/// Gets a pseudo random OsString of length `len` with characters in the
+/// range [a-zA-Z0-9].
+pub fn rand_alphanumerics(len: usize) -> OsString {
+ rand_alphanumerics_impl(&xor_pseudo_rng_u32, len)
+}
+
+/// Get a pseudo random vector of `len` bytes.
+pub fn rand_bytes(len: usize) -> Vec<u8> {
+ rand_bytes_impl(&xor_pseudo_rng_u32, len)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_timestamp_cycles() {
+ for _ in 0..1000 {
+ assert!(timestamp_cycles() < timestamp_cycles());
+ }
+ }
+
+ #[test]
+ fn test_xor_pseudo_rng_u32() {
+ for _ in 0..1000 {
+ assert_ne!(xor_pseudo_rng_u32(), xor_pseudo_rng_u32());
+ }
+ }
+
+ #[test]
+ fn test_xor_pseudo_rng_u8_alphas() {
+ let i = 3612982; // 55 (shifted 16 places), 33 (shifted 8 places), 54...
+ // The 33 will be discarded as it is not a valid letter
+ // (upper or lower) or number.
+ let s = xor_pseudo_rng_u8_alphanumerics(&|| i);
+ assert_eq!(vec![54, 55], s);
+ }
+
+ #[test]
+ fn test_rand_alphanumerics_impl() {
+ let s = rand_alphanumerics_impl(&|| 14134, 5);
+ assert_eq!("67676", s);
+ }
+
+ #[test]
+ fn test_rand_alphanumerics() {
+ let s = rand_alphanumerics(5);
+ assert_eq!(5, s.len());
+ }
+
+ #[test]
+ fn test_xor_pseudo_rng_u8_bytes() {
+ let i = 3612982; // 55 (shifted 16 places), 33 (shifted 8 places), 54...
+ // The 33 will be discarded as it is not a valid letter
+ // (upper or lower) or number.
+ let s = xor_pseudo_rng_u8_bytes(&|| i);
+ assert_eq!(vec![54, 33, 55, 0], s);
+ }
+
+ #[test]
+ fn test_rand_bytes_impl() {
+ let s = rand_bytes_impl(&|| 1234567, 4);
+ assert_eq!(vec![135, 214, 18, 0], s);
+ }
+
+ #[test]
+ fn test_rand_bytes() {
+ for i in 0..8 {
+ assert_eq!(i, rand_bytes(i).len());
+ }
+ }
+}
diff --git a/src/syscall.rs b/src/syscall.rs
new file mode 100644
index 0000000..6fb4d64
--- /dev/null
+++ b/src/syscall.rs
@@ -0,0 +1,50 @@
+// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Wrapper for interpreting syscall exit codes.
+
+use std::os::raw::c_int;
+
+/// Wrapper to interpret syscall exit codes and provide a rustacean `io::Result`.
+pub struct SyscallReturnCode(pub c_int);
+
+impl SyscallReturnCode {
+ /// Returns the last OS error if value is -1 or Ok(value) otherwise.
+ pub fn into_result(self) -> std::io::Result<c_int> {
+ if self.0 == -1 {
+ Err(std::io::Error::last_os_error())
+ } else {
+ Ok(self.0)
+ }
+ }
+ /// Returns the last OS error if value is -1 or Ok(()) otherwise.
+ pub fn into_empty_result(self) -> std::io::Result<()> {
+ self.into_result().map(|_| ())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_syscall_ops() {
+ let mut syscall_code = SyscallReturnCode(1);
+ match syscall_code.into_result() {
+ Ok(_value) => (),
+ _ => unreachable!(),
+ }
+
+ syscall_code = SyscallReturnCode(-1);
+ assert!(syscall_code.into_result().is_err());
+
+ syscall_code = SyscallReturnCode(1);
+ match syscall_code.into_empty_result() {
+ Ok(()) => (),
+ _ => unreachable!(),
+ }
+
+ syscall_code = SyscallReturnCode(-1);
+ assert!(syscall_code.into_empty_result().is_err());
+ }
+}
diff --git a/src/tempfile.rs b/src/tempfile.rs
new file mode 100644
index 0000000..7d70f66
--- /dev/null
+++ b/src/tempfile.rs
@@ -0,0 +1,304 @@
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE-BSD-3-Clause file.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Struct for handling temporary files as well as any cleanup required.
+//!
+//! The temporary files will be created with a name available as well as having
+//! an exposed `fs::File` for reading/writing.
+//!
+//! The file will be removed when the object goes out of scope.
+//!
+//! # Examples
+//!
+//! ```
+//! use std::env::temp_dir;
+//! use std::io::Write;
+//! use std::path::{Path, PathBuf};
+//! use vmm_sys_util::tempfile::TempFile;
+//!
+//! let mut prefix = temp_dir();
+//! prefix.push("tempfile");
+//! let t = TempFile::new_with_prefix(prefix).unwrap();
+//! let mut f = t.as_file();
+//! f.write_all(b"hello world").unwrap();
+//! f.sync_all().unwrap();
+
+use std::env::temp_dir;
+use std::ffi::OsStr;
+use std::fs;
+use std::fs::File;
+use std::path::{Path, PathBuf};
+
+use libc;
+
+use crate::errno::{errno_result, Error, Result};
+
+/// Wrapper for working with temporary files.
+///
+/// The file will be maintained for the lifetime of the `TempFile` object.
+pub struct TempFile {
+ path: PathBuf,
+ file: Option<File>,
+}
+
+impl TempFile {
+ /// Creates the TempFile using a prefix.
+ ///
+ /// # Arguments
+ ///
+ /// `prefix`: The path and filename where to create the temporary file. Six
+ /// random alphanumeric characters will be added to the end of this to form
+ /// the filename.
+ #[cfg(unix)]
+ pub fn new_with_prefix<P: AsRef<OsStr>>(prefix: P) -> Result<TempFile> {
+ use std::ffi::CString;
+ use std::os::unix::{ffi::OsStrExt, io::FromRawFd};
+
+ let mut os_fname = prefix.as_ref().to_os_string();
+ os_fname.push("XXXXXX");
+
+ let raw_fname = match CString::new(os_fname.as_bytes()) {
+ Ok(c_string) => c_string.into_raw(),
+ Err(_) => return Err(Error::new(libc::EINVAL)),
+ };
+
+ // SAFETY: Safe because `raw_fname` originates from CString::into_raw, meaning
+ // it is a pointer to a nul-terminated sequence of characters.
+ let fd = unsafe { libc::mkstemp(raw_fname) };
+ if fd == -1 {
+ return errno_result();
+ }
+
+ // SAFETY: raw_fname originates from a call to CString::into_raw. The length
+ // of the string has not changed, as mkstemp returns a valid file name, and
+ // '\0' cannot be part of a valid filename.
+ let c_tempname = unsafe { CString::from_raw(raw_fname) };
+ let os_tempname = OsStr::from_bytes(c_tempname.as_bytes());
+
+ // SAFETY: Safe because we checked `fd != -1` above and we uniquely own the file
+ // descriptor. This `fd` will be freed etc when `File` and thus
+ // `TempFile` goes out of scope.
+ let file = unsafe { File::from_raw_fd(fd) };
+
+ Ok(TempFile {
+ path: PathBuf::from(os_tempname),
+ file: Some(file),
+ })
+ }
+
+ /// Creates the TempFile using a prefix.
+ ///
+ /// # Arguments
+ ///
+ /// `prefix`: The path and filename where to create the temporary file. Six
+ /// random alphanumeric characters will be added to the end of this to form
+ /// the filename.
+ #[cfg(windows)]
+ pub fn new_with_prefix<P: AsRef<OsStr>>(prefix: P) -> Result<TempFile> {
+ use crate::rand::rand_alphanumerics;
+ use std::fs::OpenOptions;
+
+ let file_path_str = format!(
+ "{}{}",
+ prefix.as_ref().to_str().unwrap_or_default(),
+ rand_alphanumerics(6).to_str().unwrap_or_default()
+ );
+ let file_path_buf = PathBuf::from(&file_path_str);
+
+ let file = OpenOptions::new()
+ .read(true)
+ .write(true)
+ .create(true)
+ .truncate(true)
+ .open(file_path_buf.as_path())?;
+
+ Ok(TempFile {
+ path: file_path_buf,
+ file: Some(file),
+ })
+ }
+
+ /// Creates the TempFile inside a specific location.
+ ///
+ /// # Arguments
+ ///
+ /// `path`: The path where to create a temporary file with a filename formed from
+ /// six random alphanumeric characters.
+ pub fn new_in(path: &Path) -> Result<Self> {
+ let mut path_buf = path.canonicalize().unwrap();
+ // This `push` adds a trailing slash ("/whatever/path" -> "/whatever/path/").
+ // This is safe for paths with an already existing trailing slash.
+ path_buf.push("");
+ let temp_file = TempFile::new_with_prefix(path_buf.as_path())?;
+ Ok(temp_file)
+ }
+
+ /// Creates the TempFile.
+ ///
+ /// Creates a temporary file inside `$TMPDIR` if set, otherwise inside `/tmp`.
+ /// The filename will consist of six random alphanumeric characters.
+ pub fn new() -> Result<Self> {
+ let in_tmp_dir = temp_dir();
+ let temp_file = TempFile::new_in(in_tmp_dir.as_path())?;
+ Ok(temp_file)
+ }
+
+ /// Removes the temporary file.
+ ///
+ /// Calling this is optional as dropping a `TempFile` object will also
+ /// remove the file. Calling remove explicitly allows for better error
+ /// handling.
+ pub fn remove(&mut self) -> Result<()> {
+ fs::remove_file(&self.path).map_err(Error::from)
+ }
+
+ /// Returns the path to the file if the `TempFile` object that is wrapping the file
+ /// is still in scope.
+ ///
+ /// If we remove the file by explicitly calling [`remove`](#method.remove),
+ /// `as_path()` can still be used to return the path to that file (even though that
+ /// path does not point at an existing entity anymore).
+ /// Calling `as_path()` after `remove()` is useful, for example, when you need a
+ /// random path string, but don't want an actual resource at that path.
+ pub fn as_path(&self) -> &Path {
+ &self.path
+ }
+
+ /// Returns a reference to the File.
+ pub fn as_file(&self) -> &File {
+ // It's safe to unwrap because `file` can be `None` only after calling `into_file`
+ // which consumes this object.
+ self.file.as_ref().unwrap()
+ }
+
+ /// Consumes the TempFile, returning the wrapped file.
+ ///
+ /// This also removes the file from the system. The file descriptor remains opened and
+ /// it can be used until the returned file is dropped.
+ pub fn into_file(mut self) -> File {
+ self.file.take().unwrap()
+ }
+}
+
+impl Drop for TempFile {
+ fn drop(&mut self) {
+ let _ = self.remove();
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::io::{Read, Write};
+
+ #[test]
+ fn test_create_file_with_prefix() {
+ fn between(lower: u8, upper: u8, to_check: u8) -> bool {
+ (to_check >= lower) && (to_check <= upper)
+ }
+
+ let mut prefix = temp_dir();
+ prefix.push("asdf");
+ let t = TempFile::new_with_prefix(&prefix).unwrap();
+ let path = t.as_path().to_owned();
+
+ // Check filename exists
+ assert!(path.is_file());
+
+ // Check filename is in the correct location
+ assert!(path.starts_with(temp_dir()));
+
+ // Check filename has random added
+ assert_eq!(path.file_name().unwrap().to_string_lossy().len(), 10);
+
+ // Check filename has only ascii letters / numbers
+ for n in path.file_name().unwrap().to_string_lossy().bytes() {
+ assert!(between(b'0', b'9', n) || between(b'a', b'z', n) || between(b'A', b'Z', n));
+ }
+
+ // Check we can write to the file
+ let mut f = t.as_file();
+ f.write_all(b"hello world").unwrap();
+ f.sync_all().unwrap();
+ assert_eq!(f.metadata().unwrap().len(), 11);
+ }
+
+ #[test]
+ fn test_create_file_new() {
+ let t = TempFile::new().unwrap();
+ let path = t.as_path().to_owned();
+
+ // Check filename is in the correct location
+ assert!(path.starts_with(temp_dir().canonicalize().unwrap()));
+ }
+
+ #[test]
+ fn test_create_file_new_in() {
+ let t = TempFile::new_in(temp_dir().as_path()).unwrap();
+ let path = t.as_path().to_owned();
+
+ // Check filename exists
+ assert!(path.is_file());
+
+ // Check filename is in the correct location
+ assert!(path.starts_with(temp_dir().canonicalize().unwrap()));
+
+ let t = TempFile::new_in(temp_dir().as_path()).unwrap();
+ let path = t.as_path().to_owned();
+
+ // Check filename is in the correct location
+ assert!(path.starts_with(temp_dir().canonicalize().unwrap()));
+ }
+
+ #[test]
+ fn test_remove_file() {
+ let mut prefix = temp_dir();
+ prefix.push("asdf");
+
+ let mut t = TempFile::new_with_prefix(prefix).unwrap();
+ let path = t.as_path().to_owned();
+
+ // Check removal.
+ assert!(t.remove().is_ok());
+ assert!(!path.exists());
+
+ // Calling `as_path()` after the file was removed is allowed.
+ let path_2 = t.as_path().to_owned();
+ assert_eq!(path, path_2);
+
+ // Check trying to remove a second time returns an error.
+ assert!(t.remove().is_err());
+ }
+
+ #[test]
+ fn test_drop_file() {
+ let mut prefix = temp_dir();
+ prefix.push("asdf");
+
+ let t = TempFile::new_with_prefix(prefix).unwrap();
+ let path = t.as_path().to_owned();
+
+ assert!(path.starts_with(temp_dir()));
+ drop(t);
+ assert!(!path.exists());
+ }
+
+ #[test]
+ fn test_into_file() {
+ let mut prefix = temp_dir();
+ prefix.push("asdf");
+
+ let text = b"hello world";
+ let temp_file = TempFile::new_with_prefix(prefix).unwrap();
+ let path = temp_file.as_path().to_owned();
+ fs::write(path, text).unwrap();
+
+ let mut file = temp_file.into_file();
+ let mut buf: Vec<u8> = Vec::new();
+ file.read_to_end(&mut buf).unwrap();
+ assert_eq!(buf, text);
+ }
+}
diff --git a/src/unix/file_traits.rs b/src/unix/file_traits.rs
new file mode 100644
index 0000000..ce9fa29
--- /dev/null
+++ b/src/unix/file_traits.rs
@@ -0,0 +1,101 @@
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+//
+// Copyright 2018 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Traits for handling file synchronization and length.
+
+use std::fs::File;
+use std::io::Result;
+
+/// A trait for flushing the contents of a file to disk.
+///
+/// This is equivalent to
+/// [`std::fd::File::sync_all`](https://doc.rust-lang.org/std/fs/struct.File.html#method.sync_all)
+/// method, but wrapped in a trait so that it can be implemented for other types.
+pub trait FileSync {
+ /// Flush buffers related to this file to disk.
+ fn fsync(&mut self) -> Result<()>;
+}
+
+impl FileSync for File {
+ fn fsync(&mut self) -> Result<()> {
+ self.sync_all()
+ }
+}
+
+/// A trait for setting the size of a file.
+///
+/// This is equivalent to
+/// [`std::fd::File::set_len`](https://doc.rust-lang.org/std/fs/struct.File.html#method.set_len)
+/// method, but wrapped in a trait so that it can be implemented for other types.
+pub trait FileSetLen {
+ /// Set the size of this file.
+ ///
+ /// This is the moral equivalent of
+ /// [`ftruncate`](http://man7.org/linux/man-pages/man3/ftruncate.3p.html).
+ ///
+ /// # Arguments
+ ///
+ /// * `len`: the size to set for file.
+ fn set_len(&self, len: u64) -> Result<()>;
+}
+
+impl FileSetLen for File {
+ fn set_len(&self, len: u64) -> Result<()> {
+ File::set_len(self, len)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::fs::OpenOptions;
+ use std::io::{Seek, SeekFrom, Write};
+ use std::path::PathBuf;
+
+ use crate::tempdir::TempDir;
+
+ #[test]
+ fn test_fsync() {
+ let tempdir = TempDir::new_with_prefix("/tmp/fsync_test").unwrap();
+ let mut path = PathBuf::from(tempdir.as_path());
+ path.push("file");
+ let mut f = OpenOptions::new()
+ .read(true)
+ .write(true)
+ .create(true)
+ .open(&path)
+ .unwrap();
+ f.write_all(b"Hello, world!").unwrap();
+ f.fsync().unwrap();
+ assert_eq!(f.metadata().unwrap().len(), 13);
+ }
+
+ #[test]
+ fn test_set_len() {
+ let tempdir = TempDir::new_with_prefix("/tmp/set_len_test").unwrap();
+ let mut path = PathBuf::from(tempdir.as_path());
+ path.push("file");
+ let mut f = OpenOptions::new()
+ .read(true)
+ .write(true)
+ .create(true)
+ .open(&path)
+ .unwrap();
+ f.set_len(10).unwrap();
+ assert_eq!(f.seek(SeekFrom::End(0)).unwrap(), 10);
+ }
+
+ #[test]
+ fn test_set_len_fails_when_file_not_opened_for_writing() {
+ let tempdir = TempDir::new_with_prefix("/tmp/set_len_test").unwrap();
+ let mut path = PathBuf::from(tempdir.as_path());
+ path.push("file");
+ File::create(path.clone()).unwrap();
+ let f = OpenOptions::new().read(true).open(&path).unwrap();
+ let result = f.set_len(10);
+ assert!(result.is_err());
+ }
+}
diff --git a/src/unix/mod.rs b/src/unix/mod.rs
new file mode 100644
index 0000000..5c26a9c
--- /dev/null
+++ b/src/unix/mod.rs
@@ -0,0 +1,5 @@
+// Copyright 2022 rust-vmm Authors or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+pub mod file_traits;
+pub mod tempdir;
+pub mod terminal;
diff --git a/src/unix/tempdir.rs b/src/unix/tempdir.rs
new file mode 100644
index 0000000..101d35a
--- /dev/null
+++ b/src/unix/tempdir.rs
@@ -0,0 +1,205 @@
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+//
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Structure for handling temporary directories.
+use std::env::temp_dir;
+use std::ffi::{CString, OsStr, OsString};
+use std::fs;
+use std::os::unix::ffi::OsStringExt;
+use std::path::{Path, PathBuf};
+
+use crate::errno::{errno_result, Error, Result};
+
+/// Wrapper over a temporary directory.
+///
+/// The directory will be maintained for the lifetime of the `TempDir` object.
+pub struct TempDir {
+ path: PathBuf,
+}
+
+impl TempDir {
+ /// Creates a new temporary directory with `prefix`.
+ ///
+ /// The directory will be removed when the object goes out of scope.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use vmm_sys_util::tempdir::TempDir;
+ /// let t = TempDir::new_with_prefix("/tmp/testdir").unwrap();
+ /// ```
+ pub fn new_with_prefix<P: AsRef<OsStr>>(prefix: P) -> Result<TempDir> {
+ let mut dir_string = prefix.as_ref().to_os_string();
+ dir_string.push("XXXXXX");
+ // unwrap this result as the internal bytes can't have a null with a valid path.
+ let dir_name = CString::new(dir_string.into_vec()).unwrap();
+ let mut dir_bytes = dir_name.into_bytes_with_nul();
+ // SAFETY: Creating the directory isn't unsafe. The fact that it modifies the guts of the
+ // path is also OK because it only overwrites the last 6 Xs added above.
+ let ret = unsafe { libc::mkdtemp(dir_bytes.as_mut_ptr() as *mut libc::c_char) };
+ if ret.is_null() {
+ return errno_result();
+ }
+ dir_bytes.pop(); // Remove the null becasue from_vec can't handle it.
+ Ok(TempDir {
+ path: PathBuf::from(OsString::from_vec(dir_bytes)),
+ })
+ }
+
+ /// Creates a new temporary directory with inside `path`.
+ ///
+ /// The directory will be removed when the object goes out of scope.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use std::path::Path;
+ /// # use vmm_sys_util::tempdir::TempDir;
+ /// let t = TempDir::new_in(Path::new("/tmp/")).unwrap();
+ /// ```
+ pub fn new_in(path: &Path) -> Result<TempDir> {
+ let mut path_buf = path.canonicalize().unwrap();
+ // This `push` adds a trailing slash ("/whatever/path" -> "/whatever/path/").
+ // This is safe for paths with already trailing slash.
+ path_buf.push("");
+ let temp_dir = TempDir::new_with_prefix(path_buf)?;
+ Ok(temp_dir)
+ }
+
+ /// Creates a new temporary directory with inside `$TMPDIR` if set, otherwise in `/tmp`.
+ ///
+ /// The directory will be removed when the object goes out of scope.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use vmm_sys_util::tempdir::TempDir;
+ /// let t = TempDir::new().unwrap();
+ /// ```
+ pub fn new() -> Result<TempDir> {
+ let mut in_tmp_dir = temp_dir();
+ // This `push` adds a trailing slash ("/tmp" -> "/tmp/").
+ // This is safe for paths with already trailing slash.
+ in_tmp_dir.push("");
+ let temp_dir = TempDir::new_in(in_tmp_dir.as_path())?;
+ Ok(temp_dir)
+ }
+
+ /// Removes the temporary directory.
+ ///
+ /// Calling this is optional as when a `TempDir` object goes out of scope,
+ /// the directory will be removed.
+ /// Calling remove explicitly allows for better error handling.
+ ///
+ /// # Errors
+ ///
+ /// This function can only be called once per object. An error is returned
+ /// otherwise.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use std::path::Path;
+ /// # use std::path::PathBuf;
+ /// # use vmm_sys_util::tempdir::TempDir;
+ /// let temp_dir = TempDir::new_with_prefix("/tmp/testdir").unwrap();
+ /// temp_dir.remove().unwrap();
+ pub fn remove(&self) -> Result<()> {
+ fs::remove_dir_all(&self.path).map_err(Error::from)
+ }
+
+ /// Returns the path to the tempdir.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use std::path::Path;
+ /// # use std::path::PathBuf;
+ /// # use vmm_sys_util::tempdir::TempDir;
+ /// let temp_dir = TempDir::new_with_prefix("/tmp/testdir").unwrap();
+ /// assert!(temp_dir.as_path().exists());
+ pub fn as_path(&self) -> &Path {
+ self.path.as_ref()
+ }
+}
+
+impl Drop for TempDir {
+ fn drop(&mut self) {
+ let _ = self.remove();
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_create_dir() {
+ let t = TempDir::new().unwrap();
+ let path = t.as_path();
+ assert!(path.exists());
+ assert!(path.is_dir());
+ assert!(path.starts_with(temp_dir()));
+ }
+
+ #[test]
+ fn test_create_dir_with_prefix() {
+ let t = TempDir::new_with_prefix("/tmp/testdir").unwrap();
+ let path = t.as_path();
+ assert!(path.exists());
+ assert!(path.is_dir());
+ assert!(path.to_str().unwrap().contains("/tmp/testdir"));
+ }
+
+ #[test]
+ fn test_remove_dir() {
+ use crate::tempfile::TempFile;
+ let t = TempDir::new().unwrap();
+ let path = t.as_path().to_owned();
+ assert!(t.remove().is_ok());
+ // Calling remove twice returns error.
+ assert!(t.remove().is_err());
+ assert!(!path.exists());
+
+ let t = TempDir::new().unwrap();
+ let mut file = TempFile::new_in(t.as_path()).unwrap();
+ let t2 = TempDir::new_in(t.as_path()).unwrap();
+ let mut file2 = TempFile::new_in(t2.as_path()).unwrap();
+ let path2 = t2.as_path().to_owned();
+ assert!(t.remove().is_ok());
+ // Calling t2.remove returns error because parent dir has removed
+ assert!(t2.remove().is_err());
+ assert!(!path2.exists());
+ assert!(file.remove().is_err());
+ assert!(file2.remove().is_err());
+ }
+
+ #[test]
+ fn test_create_dir_in() {
+ let t = TempDir::new_in(Path::new("/tmp")).unwrap();
+ let path = t.as_path();
+ assert!(path.exists());
+ assert!(path.is_dir());
+ assert!(path.starts_with("/tmp/"));
+
+ let t = TempDir::new_in(Path::new("/tmp")).unwrap();
+ let path = t.as_path();
+ assert!(path.exists());
+ assert!(path.is_dir());
+ assert!(path.starts_with("/tmp"));
+ }
+
+ #[test]
+ fn test_drop() {
+ use std::mem::drop;
+ let t = TempDir::new_with_prefix("/tmp/asdf").unwrap();
+ let path = t.as_path().to_owned();
+ // Force tempdir object to go out of scope.
+ drop(t);
+
+ assert!(!(path.exists()));
+ }
+}
diff --git a/src/unix/terminal.rs b/src/unix/terminal.rs
new file mode 100644
index 0000000..093502a
--- /dev/null
+++ b/src/unix/terminal.rs
@@ -0,0 +1,190 @@
+// Copyright 2019 Intel Corporation. All Rights Reserved.
+//
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! Trait for working with [`termios`](http://man7.org/linux/man-pages/man3/termios.3.html).
+
+use std::io::StdinLock;
+use std::mem::zeroed;
+use std::os::unix::io::RawFd;
+
+use libc::{
+ c_int, fcntl, isatty, read, tcgetattr, tcsetattr, termios, ECHO, F_GETFL, F_SETFL, ICANON,
+ ISIG, O_NONBLOCK, STDIN_FILENO, TCSANOW,
+};
+
+use crate::errno::{errno_result, Result};
+
+fn modify_mode<F: FnOnce(&mut termios)>(fd: RawFd, f: F) -> Result<()> {
+ // SAFETY: Safe because we check the return value of isatty.
+ if unsafe { isatty(fd) } != 1 {
+ return Ok(());
+ }
+
+ // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr
+ // and we check the return result.
+ let mut termios: termios = unsafe { zeroed() };
+ // SAFETY: The parameter is valid and we check the result.
+ let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
+ if ret < 0 {
+ return errno_result();
+ }
+ let mut new_termios = termios;
+ f(&mut new_termios);
+ // SAFETY: Safe because the syscall will only read the extent of termios and we check the
+ // return result.
+ let ret = unsafe { tcsetattr(fd, TCSANOW, &new_termios as *const _) };
+ if ret < 0 {
+ return errno_result();
+ }
+
+ Ok(())
+}
+
+fn get_flags(fd: RawFd) -> Result<c_int> {
+ // SAFETY: Safe because no third parameter is expected and we check the return result.
+ let ret = unsafe { fcntl(fd, F_GETFL) };
+ if ret < 0 {
+ return errno_result();
+ }
+ Ok(ret)
+}
+
+fn set_flags(fd: RawFd, flags: c_int) -> Result<()> {
+ // SAFETY: Safe because we supply the third parameter and we check the return result.
+ let ret = unsafe { fcntl(fd, F_SETFL, flags) };
+ if ret < 0 {
+ return errno_result();
+ }
+ Ok(())
+}
+
+/// Trait for file descriptors that are TTYs, according to
+/// [`isatty`](http://man7.org/linux/man-pages/man3/isatty.3.html).
+///
+/// # Safety
+///
+/// This is marked unsafe because the implementation must ensure that the returned
+/// RawFd is a valid fd and that the lifetime of the returned fd is at least that
+/// of the trait object.
+pub unsafe trait Terminal {
+ /// Get the file descriptor of the TTY.
+ fn tty_fd(&self) -> RawFd;
+
+ /// Set this terminal to canonical mode (`ICANON | ECHO | ISIG`).
+ ///
+ /// Enable canonical mode with `ISIG` that generates signal when receiving
+ /// any of the characters INTR, QUIT, SUSP, or DSUSP, and with `ECHO` that echo
+ /// the input characters. Refer to
+ /// [`termios`](http://man7.org/linux/man-pages/man3/termios.3.html).
+ fn set_canon_mode(&self) -> Result<()> {
+ modify_mode(self.tty_fd(), |t| t.c_lflag |= ICANON | ECHO | ISIG)
+ }
+
+ /// Set this terminal to raw mode.
+ ///
+ /// Unset the canonical mode with (`!(ICANON | ECHO | ISIG)`) which means
+ /// input is available character by character, echoing is disabled and special
+ /// signal of receiving characters INTR, QUIT, SUSP, or DSUSP is disabled.
+ fn set_raw_mode(&self) -> Result<()> {
+ modify_mode(self.tty_fd(), |t| t.c_lflag &= !(ICANON | ECHO | ISIG))
+ }
+
+ /// Set this terminal to non-blocking mode.
+ ///
+ /// If `non_block` is `true`, then `read_raw` will not block.
+ /// If `non_block` is `false`, then `read_raw` may block if
+ /// there is nothing to read.
+ fn set_non_block(&self, non_block: bool) -> Result<()> {
+ let old_flags = get_flags(self.tty_fd())?;
+ let new_flags = if non_block {
+ old_flags | O_NONBLOCK
+ } else {
+ old_flags & !O_NONBLOCK
+ };
+ if new_flags != old_flags {
+ set_flags(self.tty_fd(), new_flags)?
+ }
+ Ok(())
+ }
+
+ /// Read from a [`Terminal`](trait.Terminal.html).
+ ///
+ /// Read up to `out.len()` bytes from this terminal without any buffering.
+ /// This may block, depending on if non-blocking was enabled with `set_non_block`
+ /// or if there are any bytes to read.
+ /// If there is at least one byte that is readable, this will not block.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// extern crate vmm_sys_util;
+ /// # use std::io;
+ /// # use std::os::unix::io::RawFd;
+ /// use vmm_sys_util::terminal::Terminal;
+ ///
+ /// let stdin_handle = io::stdin();
+ /// let stdin = stdin_handle.lock();
+ /// assert!(stdin.set_non_block(true).is_ok());
+ ///
+ /// let mut out = [0u8; 0];
+ /// assert_eq!(stdin.read_raw(&mut out[..]).unwrap(), 0);
+ /// ```
+ fn read_raw(&self, out: &mut [u8]) -> Result<usize> {
+ // SAFETY: Safe because read will only modify the pointer up to the length we give it and
+ // we check the return result.
+ let ret = unsafe { read(self.tty_fd(), out.as_mut_ptr() as *mut _, out.len()) };
+ if ret < 0 {
+ return errno_result();
+ }
+
+ Ok(ret as usize)
+ }
+}
+
+// SAFETY: Safe because we return a genuine terminal fd that never changes and shares our lifetime.
+unsafe impl<'a> Terminal for StdinLock<'a> {
+ fn tty_fd(&self) -> RawFd {
+ STDIN_FILENO
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ #![allow(clippy::undocumented_unsafe_blocks)]
+ use super::*;
+ use std::fs::File;
+ use std::io;
+ use std::os::unix::io::AsRawFd;
+ use std::path::Path;
+
+ unsafe impl Terminal for File {
+ fn tty_fd(&self) -> RawFd {
+ self.as_raw_fd()
+ }
+ }
+
+ #[test]
+ fn test_a_tty() {
+ let stdin_handle = io::stdin();
+ let stdin = stdin_handle.lock();
+
+ assert!(stdin.set_canon_mode().is_ok());
+ assert!(stdin.set_raw_mode().is_ok());
+ assert!(stdin.set_raw_mode().is_ok());
+ assert!(stdin.set_canon_mode().is_ok());
+ assert!(stdin.set_non_block(true).is_ok());
+ let mut out = [0u8; 0];
+ assert!(stdin.read_raw(&mut out[..]).is_ok());
+ }
+
+ #[test]
+ fn test_a_non_tty() {
+ let file = File::open(Path::new("/dev/zero")).unwrap();
+ assert!(file.set_canon_mode().is_ok());
+ }
+}