summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Martin <evan.martin@gmail.com>2024-01-19 00:11:47 -0800
committerEvan Martin <evan.martin@gmail.com>2024-01-19 00:15:20 -0800
commit38899de2f32a5c8bb156f5180c6ba3156552124f (patch)
tree9f47e38f1388ad4c14f0acaf576ec89e46cd0c16
parent909ac6087a2d74903c1d6c3772c7e3823d4b259a (diff)
downloadn2-38899de2f32a5c8bb156f5180c6ba3156552124f.tar.gz
avoid clown shoes allocation of input files
Scanner expects its input to be nul terminated, which I implemented as buf = std::fs::read(file) buf.push(0) However, std::fs::read carefully sizes its buffer to be exactly the file's size, which means the push() must grow the buffer. This means for e.g. a 40mb input file, we'd read it into a 40mb buffer, then grow the buffer, copying it into an 80mb buffer, just to add one byte! Fix this by using a buffer of the appropriate size. I attempted to measure the perf impact here and it wasn't measureable. Possibly this buffer growth really is that fast? It seems it will at least have memory impact at least.
-rw-r--r--benches/parse.rs7
-rw-r--r--src/lib.rs2
-rw-r--r--src/load.rs4
-rw-r--r--src/scanner.rs20
-rw-r--r--src/task.rs5
5 files changed, 27 insertions, 11 deletions
diff --git a/benches/parse.rs b/benches/parse.rs
index 518e458..71abf60 100644
--- a/benches/parse.rs
+++ b/benches/parse.rs
@@ -52,8 +52,7 @@ fn bench_parse_synthetic(c: &mut Criterion) {
});
}
-fn bench_parse_file(c: &mut Criterion, mut input: Vec<u8>) {
- input.push(0);
+fn bench_parse_file(c: &mut Criterion, input: &[u8]) {
c.bench_function("parse benches/build.ninja", |b| {
b.iter(|| {
let mut parser = n2::parse::Parser::new(&input);
@@ -67,8 +66,8 @@ fn bench_parse_file(c: &mut Criterion, mut input: Vec<u8>) {
}
pub fn bench_parse(c: &mut Criterion) {
- match std::fs::read("benches/build.ninja") {
- Ok(input) => bench_parse_file(c, input),
+ match n2::scanner::read_file_with_nul("benches/build.ninja".as_ref()) {
+ Ok(input) => bench_parse_file(c, &input),
Err(err) => {
eprintln!("failed to read benches/build.ninja: {}", err);
eprintln!("using synthetic build.ninja");
diff --git a/src/lib.rs b/src/lib.rs
index fea575a..c1e8cb0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -14,7 +14,7 @@ mod process_posix;
mod process_win;
mod progress;
pub mod run;
-mod scanner;
+pub mod scanner;
mod signal;
mod smallmap;
mod task;
diff --git a/src/load.rs b/src/load.rs
index 677137a..8297b4b 100644
--- a/src/load.rs
+++ b/src/load.rs
@@ -5,6 +5,7 @@ use crate::{
eval::{EvalPart, EvalString},
graph::{FileId, RspFile},
parse::Statement,
+ scanner,
smallmap::SmallMap,
{db, eval, graph, parse, trace},
};
@@ -172,11 +173,10 @@ impl Loader {
fn read_file(&mut self, id: FileId) -> anyhow::Result<()> {
let path = self.graph.file(id).path().to_path_buf();
- let mut bytes = match trace::scope("fs::read", || std::fs::read(&path)) {
+ let bytes = match trace::scope("read file", || scanner::read_file_with_nul(&path)) {
Ok(b) => b,
Err(e) => bail!("read {}: {}", path.display(), e),
};
- bytes.push(0);
self.parse(path, &bytes)
}
diff --git a/src/scanner.rs b/src/scanner.rs
index af06d2d..091791d 100644
--- a/src/scanner.rs
+++ b/src/scanner.rs
@@ -1,6 +1,6 @@
//! Scans an input string (source file) character by character.
-use std::path::Path;
+use std::{io::Read, path::Path};
#[derive(Debug)]
pub struct ParseError {
@@ -132,3 +132,21 @@ impl<'a> Scanner<'a> {
panic!("invalid offset when formatting error")
}
}
+
+/// Scanner wants its input buffer to end in a trailing nul.
+/// This function is like std::fs::read() but appends a nul, efficiently.
+pub fn read_file_with_nul(path: &Path) -> std::io::Result<Vec<u8>> {
+ // Using std::fs::read() to read the file and then pushing a nul on the end
+ // causes us to allocate a buffer the size of the file, then grow it to push
+ // the nul, copying the entire file(!). So instead create a buffer of the
+ // right size up front.
+ let mut file = std::fs::File::open(path)?;
+ let size = file.metadata()?.len() as usize;
+ let mut bytes = Vec::with_capacity(size + 1);
+ unsafe {
+ bytes.set_len(size);
+ }
+ file.read_exact(&mut bytes[..size])?;
+ bytes.push(0);
+ Ok(bytes)
+}
diff --git a/src/task.rs b/src/task.rs
index ec4dc8d..693ab52 100644
--- a/src/task.rs
+++ b/src/task.rs
@@ -12,7 +12,7 @@ use crate::{
depfile,
graph::{Build, BuildId, RspFile},
process,
- scanner::Scanner,
+ scanner::{self, Scanner},
};
use anyhow::{anyhow, bail};
use std::path::{Path, PathBuf};
@@ -38,7 +38,7 @@ pub struct TaskResult {
/// Reads dependencies from a .d file path.
fn read_depfile(path: &Path) -> anyhow::Result<Vec<String>> {
- let mut bytes = match std::fs::read(path) {
+ let bytes = match scanner::read_file_with_nul(path) {
Ok(b) => b,
// See discussion of missing depfiles in #80.
// TODO(#99): warn or error in this circumstance?
@@ -46,7 +46,6 @@ fn read_depfile(path: &Path) -> anyhow::Result<Vec<String>> {
Err(e) => bail!("read {}: {}", path.display(), e),
};
- bytes.push(0);
let mut scanner = Scanner::new(&bytes);
let parsed_deps = depfile::parse(&mut scanner)
.map_err(|err| anyhow!(scanner.format_parse_error(path, err)))?;