Skip to content

Commit

Permalink
fix(package): check dirtiness of symlink source files
Browse files Browse the repository at this point in the history
This adds a special case for checking source files are symlinks
and have been modified when under a VCS control

This is required because those paths may link to a file outside the
current package root, but still under the git workdir, affecting the
final packaged `.crate` file.

This may have potential performance issue. If a package contains
thousands of symlinks, Cargo will fire `git status` for each of them.
  • Loading branch information
weihanglo committed Dec 29, 2024
1 parent 1098e59 commit 19acb93
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 2 deletions.
35 changes: 34 additions & 1 deletion src/cargo/ops/cargo_package/vcs.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Helpers to gather the VCS information for `cargo package`.
use std::collections::HashSet;
use std::path::Path;
use std::path::PathBuf;

Expand Down Expand Up @@ -133,12 +134,14 @@ fn git(
// Find the intersection of dirty in git, and the src_files that would
// be packaged. This is a lazy n^2 check, but seems fine with
// thousands of files.
let mut dirty_files_outside_pkg_root = dirty_symlinks(pkg, repo, src_files)?;
dirty_files_outside_pkg_root.extend(dirty_metadata_paths(pkg, repo)?);
let cwd = gctx.cwd();
let mut dirty_src_files: Vec<_> = src_files
.iter()
.filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
.map(|p| p.as_ref())
.chain(dirty_metadata_paths(pkg, repo)?.iter())
.chain(dirty_files_outside_pkg_root.iter())
.map(|path| {
pathdiff::diff_paths(path, cwd)
.as_ref()
Expand Down Expand Up @@ -206,6 +209,36 @@ fn dirty_metadata_paths(pkg: &Package, repo: &git2::Repository) -> CargoResult<V
Ok(dirty_files)
}

/// Checks whether source files are symlinks and have been modified.
///
/// This is required because those paths may link to a file outside the
/// current package root, but still under the git workdir, affecting the
/// final packaged `.crate` file.
fn dirty_symlinks(
pkg: &Package,
repo: &git2::Repository,
src_files: &[PathEntry],
) -> CargoResult<HashSet<PathBuf>> {
let workdir = repo.workdir().unwrap();
let mut dirty_symlinks = HashSet::new();
for rel_path in src_files
.iter()
.filter(|p| p.is_symlink_or_under_symlink())
.map(|p| p.as_ref().as_path())
// If inside package root. Don't bother checking git status.
.filter(|p| paths::strip_prefix_canonical(*p, pkg.root()).is_err())
// Handle files outside package root but under git workdir,
.filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
{
// TODO: Should we warn users if there are like thousands of symlinks,
// which may hurt the performance?
if repo.status_file(&rel_path)? != git2::Status::CURRENT {
dirty_symlinks.insert(workdir.join(rel_path));
}
}
Ok(dirty_symlinks)
}

/// Helper to collect dirty statuses for a single repo.
fn collect_statuses(repo: &git2::Repository, dirty_files: &mut Vec<PathBuf>) -> CargoResult<()> {
let mut status_opts = git2::StatusOptions::new();
Expand Down
4 changes: 3 additions & 1 deletion tests/testsuite/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1378,10 +1378,12 @@ fn dirty_file_outside_pkg_root_considered_dirty() {
p.cargo("package --workspace --no-verify")
.with_status(101)
.with_stderr_data(str![[r#"
[ERROR] 2 files in the working directory contain changes that were not yet committed into git:
[ERROR] 4 files in the working directory contain changes that were not yet committed into git:
LICENSE
README.md
lib.rs
original-dir/file
to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag
Expand Down

0 comments on commit 19acb93

Please sign in to comment.