Skip to content

Commit

Permalink
Improve performance of VCS diff listing
Browse files Browse the repository at this point in the history
Improves performance of listing yarn lock files and VCS changes by:

* changing exclude patterns
* using rayon

Test Plan: cargo test

Reviewers: pancaspe87, #atlaspack-core

Reviewed By: pancaspe87

Pull Request: #343
  • Loading branch information
yamadapc authored Feb 18, 2025
1 parent 1b1ef6e commit 27667e3
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 73 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/atlaspack_vcs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ anyhow = { workspace = true }
git2 = { workspace = true }
hex = { workspace = true }
nom = { workspace = true }
rayon = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_yaml = { workspace = true }
tracing = { workspace = true }
Expand Down
158 changes: 85 additions & 73 deletions crates/atlaspack_vcs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
use anyhow::anyhow;
use git2::{DiffOptions, Repository};
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::{
hash::{Hash, Hasher},
Expand Down Expand Up @@ -148,11 +149,15 @@ impl VCSState {
exclude_patterns: &[String],
failure_mode: FailureMode,
) -> anyhow::Result<VCSState> {
tracing::info!("Reading VCS state");
let repo = Repository::open(path)?;
let head = repo.revparse_single("HEAD")?.peel_to_commit()?;
let git_hash = head.id().to_string();
tracing::info!("Found head commit");
let file_listing = vcs_list_dirty_files(path, exclude_patterns)?;
tracing::info!("Listed dirty files");
let yarn_states = list_yarn_states(path, failure_mode)?;
tracing::info!("Listed yarn states");

Ok(VCSState {
git_hash,
Expand All @@ -166,46 +171,49 @@ pub fn list_yarn_states(
repo: &Path,
failure_mode: FailureMode,
) -> anyhow::Result<Vec<YarnSnapshot>> {
let files = vcs_list_files(repo)?;
let yarn_lock_files = files
.iter()
.filter(|file| file.ends_with("yarn.lock"))
.cloned()
.collect::<Vec<_>>();

let mut yarn_states = Vec::new();

for file in yarn_lock_files {
let yarn_lock_path = repo.join(file);
let node_modules_relative_path = yarn_lock_path.parent().unwrap().join("node_modules");

let yarn_lock_blob = std::fs::read(&yarn_lock_path)?;
let yarn_lock: Result<YarnLock, _> = parse_yarn_lock(&String::from_utf8(yarn_lock_blob)?);
if failure_mode != FailureMode::FailOnMissingNodeModules && yarn_lock.is_err() {
continue;
};
let yarn_lock = yarn_lock?;

let node_modules_path = repo.join(&node_modules_relative_path);
let yarn_state = parse_yarn_state_file(&node_modules_path).map_err(|err| {
tracing::warn!(
"Failed to read .yarn-state.yml {node_modules_relative_path:?} {}",
err
);

anyhow!("Failed to read .yarn-state.yml at {node_modules_relative_path:?}: {err}")
});
if failure_mode != FailureMode::FailOnMissingNodeModules && yarn_state.is_err() {
continue;
};
let yarn_state = yarn_state?;
let yarn_snapshot = YarnSnapshot {
yarn_lock_path,
yarn_lock,
yarn_state,
};
yarn_states.push(yarn_snapshot);
}
let yarn_lock_files = vcs_list_yarn_lock_files(repo)?;
tracing::info!("Found yarn.lock files");

let yarn_states = yarn_lock_files
.par_iter()
.map(|file| -> anyhow::Result<_> {
let yarn_lock_path = repo.join(file);
let node_modules_relative_path = yarn_lock_path.parent().unwrap().join("node_modules");

let yarn_lock_blob = std::fs::read(&yarn_lock_path)?;
let yarn_lock: Result<YarnLock, _> = parse_yarn_lock(&String::from_utf8(yarn_lock_blob)?);
if failure_mode != FailureMode::FailOnMissingNodeModules && yarn_lock.is_err() {
return Ok(None);
};
let yarn_lock = yarn_lock?;

let node_modules_path = repo.join(&node_modules_relative_path);
let yarn_state = parse_yarn_state_file(&node_modules_path).map_err(|err| {
tracing::warn!(
"Failed to read .yarn-state.yml {node_modules_relative_path:?} {}",
err
);

anyhow!("Failed to read .yarn-state.yml at {node_modules_relative_path:?}: {err}")
});
if failure_mode != FailureMode::FailOnMissingNodeModules && yarn_state.is_err() {
return Ok(None);
};
let yarn_state = yarn_state?;
let yarn_snapshot = YarnSnapshot {
yarn_lock_path,
yarn_lock,
yarn_state,
};

Ok(Some(yarn_snapshot))
})
.filter_map(|result| match result {
Ok(Some(yarn_snapshot)) => Some(Ok(yarn_snapshot)),
Ok(None) => None,
Err(err) => Some(Err(err)),
})
.collect::<anyhow::Result<Vec<_>>>()?;

Ok(yarn_states)
}
Expand Down Expand Up @@ -324,15 +332,18 @@ pub fn get_changed_files(
Ok(changed_files)
}

pub fn vcs_list_files(dir: &Path) -> Result<Vec<String>, anyhow::Error> {
pub fn vcs_list_yarn_lock_files(dir: &Path) -> Result<Vec<String>, anyhow::Error> {
let mut command = Command::new("git");
command
.arg("ls-files")
.arg("--cached")
.arg("--exclude=yarn.lock")
.arg("--ignored")
.arg("-z") // We separate rows by \0 to avoid issues with newlines
.current_dir(dir);
let command = command.output()?;
if !command.status.success() {
tracing::error!("git lfs-files: {:?}", String::from_utf8(command.stderr));
tracing::error!("git ls-files: {:?}", String::from_utf8(command.stderr));
return Err(anyhow::anyhow!("Git ls-files failed"));
}
let output = String::from_utf8(command.stdout).unwrap();
Expand All @@ -354,50 +365,51 @@ pub fn vcs_list_dirty_files(
let mut command = Command::new("git");
command
.arg("ls-files")
// .arg("--format=%(objectname)%(path)")
.arg("--deleted")
.arg("--modified")
.arg("--others")
.arg("--exclude-standard")
.arg("-z") // We separate rows by \0 to avoid issues with newlines
.current_dir(dir);
for pattern in exclude_patterns {
command.arg(format!("--exclude={}", pattern));
}
let command = command.output()?;
if !command.status.success() {
tracing::error!("git lfs-files: {:?}", String::from_utf8(command.stderr));
tracing::error!("git ls-files: {:?}", String::from_utf8(command.stderr));
return Err(anyhow::anyhow!("Git ls-files failed"));
}
let output = String::from_utf8(command.stdout).unwrap();
let lines = output.split_terminator('\0');

let mut results = Vec::new();

for relative_path in lines {
tracing::debug!("Hashing file {}", relative_path);
let path = Path::new(relative_path);
let path = dir.join(path);

// We hash the contents of the file but if it's a symlink we hash the target
// path instead rather than following the link.
let metadata = std::fs::symlink_metadata(&path)?;
let contents = if metadata.is_symlink() {
std::fs::read_link(&path)?
.to_str()
.unwrap()
.as_bytes()
.to_vec()
} else {
std::fs::read(&path)?
};
let mut state = std::collections::hash_map::DefaultHasher::new();
contents.hash(&mut state);
let hash = state.finish();
results.push(VCSFile {
path: relative_path.into(),
hash,
});
}
let lines: Vec<_> = output.split_terminator('\0').map(String::from).collect();

let results = lines
.par_iter()
.map(|relative_path| {
tracing::info!("Hashing file {}", relative_path);
let path = Path::new(relative_path);
let path = dir.join(path);

// We hash the contents of the file but if it's a symlink we hash the target
// path instead rather than following the link.
let metadata = std::fs::symlink_metadata(&path)?;
let contents = if metadata.is_symlink() {
std::fs::read_link(&path)?
.to_str()
.unwrap()
.as_bytes()
.to_vec()
} else {
std::fs::read(&path)?
};
let mut state = std::collections::hash_map::DefaultHasher::new();
contents.hash(&mut state);
let hash = state.finish();
Ok(VCSFile {
path: relative_path.into(),
hash,
})
})
.collect::<anyhow::Result<Vec<_>>>()?;

Ok(results)
}

0 comments on commit 27667e3

Please sign in to comment.