Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial attempt at adding support for pcre #1606

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 46 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ etcetera = "0.8"
normpath = "1.1.1"
crossbeam-channel = "0.5.13"
clap_complete = {version = "4.5.8", optional = true}
memchr = "2.7.4"
faccess = "0.2.4"
pcre2 = {version = "0.2.9", optional = true}

[dependencies.clap]
version = "4.5.13"
Expand Down Expand Up @@ -91,4 +93,5 @@ codegen-units = 1
use-jemalloc = ["jemallocator"]
completions = ["clap_complete"]
base = ["use-jemalloc"]
pcre = ["dep:pcre2"]
default = ["use-jemalloc", "completions"]
16 changes: 13 additions & 3 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,18 @@ pub struct Opts {
)]
pub regex: bool,

/// Use the PCRE regex engine
///
/// This allows you to use features like backreferences and lookarounds.
#[cfg(feature = "pcre")]
#[arg(
long,
overrides_with_all(["glob", "regex"]),
conflicts_with("fixed_strings"),
long_help
)]
pub pcre: bool,

/// Treat the pattern as a literal string instead of a regular expression. Note
/// that this also performs substring comparison. If you want to match on an
/// exact filename, consider using '--glob'.
Expand Down Expand Up @@ -605,13 +617,11 @@ pub struct Opts {
/// is considered a match. If your pattern starts with a dash (-), make sure to
/// pass '--' first, or it will be considered as a flag (fd -- '-foo').
#[arg(
default_value = "",
hide_default_value = true,
value_name = "pattern",
help = "the search pattern (a regular expression, unless '--glob' is used; optional)",
long_help
)]
pub pattern: String,
pub pattern: Option<String>,

/// Set the path separator to use when printing file paths. The default is
/// the OS-specific separator ('/' on Unix, '\' on Windows).
Expand Down
3 changes: 0 additions & 3 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ use crate::fmt::FormatTemplate;

/// Configuration options for *fd*.
pub struct Config {
/// Whether the search is case-sensitive or case-insensitive.
pub case_sensitive: bool,

/// Whether to search within the full file path or just the base name (filename or directory
/// name).
pub search_full_path: bool,
Expand Down
18 changes: 0 additions & 18 deletions src/filesystem.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
use std::borrow::Cow;
use std::env;
use std::ffi::OsStr;
use std::fs;
use std::io;
#[cfg(any(unix, target_os = "redox"))]
Expand Down Expand Up @@ -99,22 +97,6 @@ pub fn is_pipe(_: fs::FileType) -> bool {
false
}

#[cfg(any(unix, target_os = "redox"))]
pub fn osstr_to_bytes(input: &OsStr) -> Cow<[u8]> {
use std::os::unix::ffi::OsStrExt;
Cow::Borrowed(input.as_bytes())
}

#[cfg(windows)]
pub fn osstr_to_bytes(input: &OsStr) -> Cow<[u8]> {
let string = input.to_string_lossy();

match string {
Cow::Owned(string) => Cow::Owned(string.into_bytes()),
Cow::Borrowed(string) => Cow::Borrowed(string.as_bytes()),
}
}

/// Remove the `./` prefix from a path.
pub fn strip_current_dir(path: &Path) -> &Path {
path.strip_prefix(".").unwrap_or(path)
Expand Down
136 changes: 65 additions & 71 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ mod filter;
mod fmt;
mod hyperlink;
mod output;
mod patterns;
mod regex_helper;
mod walk;

Expand All @@ -21,9 +22,9 @@ use std::time;

use anyhow::{anyhow, bail, Context, Result};
use clap::{CommandFactory, Parser};
use globset::GlobBuilder;
use lscolors::LsColors;
use regex::bytes::{Regex, RegexBuilder, RegexSetBuilder};
use patterns::PatternType;
use regex::bytes::RegexSetBuilder;

use crate::cli::{ColorWhen, HyperlinkWhen, Opts};
use crate::config::Config;
Expand All @@ -33,6 +34,7 @@ use crate::filetypes::FileTypes;
#[cfg(unix)]
use crate::filter::OwnerFilter;
use crate::filter::TimeFilter;
use crate::patterns::build_patterns;
use crate::regex_helper::{pattern_has_uppercase_char, pattern_matches_strings_with_leading_dot};

// We use jemalloc for performance reasons, see https://github.com/sharkdp/fd/pull/481
Expand Down Expand Up @@ -70,7 +72,7 @@ fn main() {
}

fn run() -> Result<ExitCode> {
let opts = Opts::parse();
let mut opts = Opts::parse();

#[cfg(feature = "completions")]
if let Some(shell) = opts.gen_completions()? {
Expand All @@ -84,28 +86,24 @@ fn run() -> Result<ExitCode> {
}

ensure_search_pattern_is_not_a_path(&opts)?;
let pattern = &opts.pattern;
let exprs = &opts.exprs;
let empty = Vec::new();
let mut patterns = opts.exprs.take().unwrap_or(Vec::new());
if let Some(pattern) = opts.pattern.take() {
patterns.push(pattern);
}

let pattern_regexps = exprs
.as_ref()
.unwrap_or(&empty)
.iter()
.chain([pattern])
.map(|pat| build_pattern_regex(pat, &opts))
.collect::<Result<Vec<String>>>()?;
let pattern_type = determine_pattern_type(&opts);
// The search will be case-sensitive if the command line flag is set or
// if any of the patterns has an uppercase character (smart case).
let ignore_case = opts.ignore_case
|| !(opts.case_sensitive || patterns.iter().any(|pat| pattern_has_uppercase_char(pat)));

let config = construct_config(opts, &pattern_regexps)?;
let config = construct_config(opts)?;

ensure_use_hidden_option_for_leading_dot_pattern(&config, &pattern_regexps)?;
ensure_use_hidden_option_for_leading_dot_pattern(&patterns, &config, pattern_type)?;

let regexps = pattern_regexps
.into_iter()
.map(|pat| build_regex(pat, &config))
.collect::<Result<Vec<Regex>>>()?;
let matcher = build_patterns(patterns, pattern_type, ignore_case)?;

walk::scan(&search_paths, regexps, config)
walk::scan(&search_paths, matcher, config)
}

#[cfg(feature = "completions")]
Expand Down Expand Up @@ -145,35 +143,38 @@ fn set_working_dir(opts: &Opts) -> Result<()> {

/// Detect if the user accidentally supplied a path instead of a search pattern
fn ensure_search_pattern_is_not_a_path(opts: &Opts) -> Result<()> {
if !opts.full_path
&& opts.pattern.contains(std::path::MAIN_SEPARATOR)
&& Path::new(&opts.pattern).is_dir()
{
Err(anyhow!(
"The search pattern '{pattern}' contains a path-separation character ('{sep}') \
and will not lead to any search results.\n\n\
If you want to search for all files inside the '{pattern}' directory, use a match-all pattern:\n\n \
fd . '{pattern}'\n\n\
Instead, if you want your pattern to match the full file path, use:\n\n \
fd --full-path '{pattern}'",
pattern = &opts.pattern,
sep = std::path::MAIN_SEPARATOR,
))
} else {
Ok(())
if let Some(ref pattern) = opts.pattern {
if !opts.full_path
&& pattern.contains(std::path::MAIN_SEPARATOR)
&& Path::new(pattern).is_dir()
{
return Err(anyhow!(
"The search pattern '{pattern}' contains a path-separation character ('{sep}') \
and will not lead to any search results.\n\n\
If you want to search for all files inside the '{pattern}' directory, use a match-all pattern:\n\n \
fd . '{pattern}'\n\n\
Instead, if you want your pattern to match the full file path, use:\n\n \
fd --full-path '{pattern}'",
pattern = pattern,
sep = std::path::MAIN_SEPARATOR,
));
}
}
Ok(())
}

fn build_pattern_regex(pattern: &str, opts: &Opts) -> Result<String> {
Ok(if opts.glob && !pattern.is_empty() {
let glob = GlobBuilder::new(pattern).literal_separator(true).build()?;
glob.regex().to_owned()
fn determine_pattern_type(opts: &Opts) -> PatternType {
#[cfg(feature = "pcre")]
if opts.pcre {
return PatternType::Pcre;
}
if opts.glob {
PatternType::Glob
} else if opts.fixed_strings {
// Treat pattern as literal string if '--fixed-strings' is used
regex::escape(pattern)
PatternType::Fixed
} else {
String::from(pattern)
})
PatternType::Regex
}
}

fn check_path_separator_length(path_separator: Option<&str>) -> Result<()> {
Expand All @@ -190,15 +191,7 @@ fn check_path_separator_length(path_separator: Option<&str>) -> Result<()> {
}
}

fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result<Config> {
// The search will be case-sensitive if the command line flag is set or
// if any of the patterns has an uppercase character (smart case).
let case_sensitive = !opts.ignore_case
&& (opts.case_sensitive
|| pattern_regexps
.iter()
.any(|pat| pattern_has_uppercase_char(pat)));

fn construct_config(mut opts: Opts) -> Result<Config> {
let path_separator = opts
.path_separator
.take()
Expand Down Expand Up @@ -244,7 +237,6 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result<Config
let has_command = command.is_some();

Ok(Config {
case_sensitive,
search_full_path: opts.full_path,
ignore_hidden: !(opts.hidden || opts.rg_alias_ignore()),
read_fdignore: !(opts.no_ignore || opts.rg_alias_ignore()),
Expand Down Expand Up @@ -455,14 +447,13 @@ fn extract_time_constraints(opts: &Opts) -> Result<Vec<TimeFilter>> {
}

fn ensure_use_hidden_option_for_leading_dot_pattern(
patterns: &[String],
config: &Config,
pattern_regexps: &[String],
pattern_type: PatternType,
) -> Result<()> {
if cfg!(unix)
&& config.ignore_hidden
&& pattern_regexps
.iter()
.any(|pat| pattern_matches_strings_with_leading_dot(pat))
&& patterns_match_strings_with_leading_dots(patterns, pattern_type)
{
Err(anyhow!(
"The pattern(s) seems to only match files with a leading dot, but hidden files are \
Expand All @@ -474,17 +465,20 @@ fn ensure_use_hidden_option_for_leading_dot_pattern(
}
}

fn build_regex(pattern_regex: String, config: &Config) -> Result<regex::bytes::Regex> {
RegexBuilder::new(&pattern_regex)
.case_insensitive(!config.case_sensitive)
.dot_matches_new_line(true)
.build()
.map_err(|e| {
anyhow!(
"{}\n\nNote: You can use the '--fixed-strings' option to search for a \
literal string instead of a regular expression. Alternatively, you can \
also use the '--glob' option to match on a glob pattern.",
e.to_string()
)
})
fn patterns_match_strings_with_leading_dots(
patterns: &[String],
pattern_type: PatternType,
) -> bool {
let mut iter = patterns.iter();
match pattern_type {
PatternType::Regex => iter.any(|pat| pattern_matches_strings_with_leading_dot(pat)),
// For PCRE just do a basic check if the pattern starts with "\." for a literal
// . since we can't parse it to an AST.
#[cfg(feature = "pcre")]
PatternType::Pcre => iter.any(|pat| pat.starts_with("^\\.")),
// fixed strings aren't anchored so always false
PatternType::Fixed => false,
// globs just check if it starts with a .
PatternType::Glob => patterns.iter().any(|pat| pat.starts_with(".")),
}
}
Loading