From accfcf1a52b6d1afe3e66fc622dbfeca9e54bf7c Mon Sep 17 00:00:00 2001 From: Gabriel de Perthuis Date: Thu, 18 Jan 2024 14:32:32 +0100 Subject: [PATCH] Add a flag to anchor patterns in the input Closes #1476. --- src/cli.rs | 37 +++++++++++++++++++++++++++++++++++-- src/main.rs | 35 ++++++++++++++++++++++++++++------- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index 6e6163664..b63e63c74 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -168,8 +168,8 @@ pub struct Opts { pub regex: bool, /// Treat the pattern as a literal string instead of a regular expression. Note - /// that this also performs substring comparison. If you want to match on an - /// exact filename, consider using '--glob'. + /// that the pattern would still match on a substring of the input. If you want + /// to match on an exact filename, consider adding '--anchor=input' as well. #[arg( long, short = 'F', @@ -246,6 +246,20 @@ pub struct Opts { )] pub full_path: bool, + /// By default, the search pattern for --regex and --fixed-strings can match any part of the input. + /// (See the --full-path option for what constitutes input) + /// + /// This flag allows other ways to anchor the pattern. + /// + /// Conflicts with the --glob flag: globs always match the entire input + #[arg( + long, + help = "Where to anchor the pattern", + conflicts_with("glob"), + long_help + )] + pub anchor: Option, + /// Separate search results by the null character (instead of newlines). /// Useful for piping results to 'xargs'. #[arg( @@ -680,6 +694,17 @@ impl Opts { self.rg_alias_hidden_ignore > 0 } + pub fn anchor(&self) -> Option { + if self.glob { + // globset has no way to use an anchor. + // Otherwise we'd guard like this: + // && !self.no_anchor && self.anchor.is_none() + Some(Anchor::Input) + } else { + self.anchor + } + } + pub fn max_depth(&self) -> Option { self.max_depth.or(self.exact_depth) } @@ -725,6 +750,14 @@ fn default_num_threads() -> NonZeroUsize { .min(limit) } +#[derive(Copy, Clone, PartialEq, Eq, ValueEnum)] +pub enum Anchor { + InputStart, + InputEnd, + Input, + Word, +} + #[derive(Copy, Clone, PartialEq, Eq, ValueEnum)] pub enum FileType { #[value(alias = "f")] diff --git a/src/main.rs b/src/main.rs index bef4120ec..e0b3e6a9d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -162,15 +162,36 @@ fn ensure_search_pattern_is_not_a_path(opts: &Opts) -> Result<()> { } } +fn apply_anchors(re: String, anchors: Option) -> String { + use cli::Anchor; + match anchors { + None => re, + Some(Anchor::InputStart) => "^".to_owned() + &re, + Some(Anchor::InputEnd) => re + "$", + Some(Anchor::Input) => "^".to_owned() + &re + "$", + // https://docs.rs/regex/latest/regex/#empty-matches + Some(Anchor::Word) => r"\<".to_owned() + &re + r"\>", + } +} + fn build_pattern_regex(pattern: &str, opts: &Opts) -> Result { - Ok(if opts.glob && !pattern.is_empty() { - let glob = GlobBuilder::new(pattern).literal_separator(true).build()?; - glob.regex().to_owned() - } else if opts.fixed_strings { - // Treat pattern as literal string if '--fixed-strings' is used - regex::escape(pattern) + Ok(if opts.glob { + if !pattern.is_empty() { + let glob = GlobBuilder::new(pattern).literal_separator(true).build()?; + glob.regex().to_owned() + } else { + "".to_owned() + } } else { - String::from(pattern) + apply_anchors( + if opts.fixed_strings { + // Treat pattern as literal string if '--fixed-strings' is used + regex::escape(pattern) + } else { + String::from(pattern) + }, + opts.anchor(), + ) }) }