Skip to content

Commit

Permalink
Allow subfield code ranges in subfield matcher (#733)
Browse files Browse the repository at this point in the history
  • Loading branch information
nwagner84 authored Dec 11, 2023
1 parent 2f9272d commit a53065a
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 6 deletions.
59 changes: 53 additions & 6 deletions crates/pica-matcher/src/subfield_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ use regex::bytes::{Regex, RegexBuilder};
use strsim::normalized_levenshtein;
use winnow::ascii::digit1;
use winnow::combinator::{
alt, delimited, opt, preceded, repeat, separated, terminated,
alt, delimited, fold_repeat, opt, preceded, repeat, separated,
separated_pair, terminated,
};
use winnow::error::ParserError;
use winnow::{PResult, Parser};
Expand All @@ -35,12 +36,51 @@ pub struct ExistsMatcher {
const SUBFIELD_CODES: &str =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";

#[inline]
fn parse_subfield_code_range(i: &mut &[u8]) -> PResult<Vec<char>> {
separated_pair(parse_subfield_code, '-', parse_subfield_code)
.verify(|(min, max)| min < max)
.map(|(min, max)| (min..=max).collect())
.parse_next(i)
}

#[inline]
fn parse_subfield_code_single(i: &mut &[u8]) -> PResult<Vec<char>> {
parse_subfield_code.map(|code| vec![code]).parse_next(i)
}

#[inline]
fn parse_subfield_code_list(i: &mut &[u8]) -> PResult<Vec<char>> {
delimited(
'[',
fold_repeat(
1..,
alt((
parse_subfield_code_range,
parse_subfield_code_single,
)),
Vec::new,
|mut acc: Vec<_>, item| {
acc.extend_from_slice(&item);
acc
},
),
']',
)
.parse_next(i)
}

#[inline]
fn parse_subfield_code_wildcard(i: &mut &[u8]) -> PResult<Vec<char>> {
'*'.value(SUBFIELD_CODES.chars().collect()).parse_next(i)
}

/// Parse a list of subfield codes
fn parse_subfield_codes(i: &mut &[u8]) -> PResult<Vec<char>> {
alt((
delimited('[', repeat(1.., parse_subfield_code), ']'),
parse_subfield_code.map(|code| vec![code]),
'*'.value(SUBFIELD_CODES.chars().collect()),
parse_subfield_code_list,
parse_subfield_code_single,
parse_subfield_code_wildcard,
))
.parse_next(i)
}
Expand Down Expand Up @@ -1043,12 +1083,18 @@ mod tests {
parse_success!(code.to_string().as_bytes(), vec![*code]);
}

parse_success!(b"[12]", vec!['1', '2']);
parse_success!(b"*", codes);
parse_success!(b"[12]", vec!['1', '2']);
parse_success!(b"[1-3]", vec!['1', '2', '3']);
parse_success!(
b"[1-3a-cx]",
vec!['1', '2', '3', 'a', 'b', 'c', 'x']
);

assert!(super::parse_subfield_codes.parse(b"!").is_err());
assert!(super::parse_subfield_codes.parse(b"12").is_err());
assert!(super::parse_subfield_codes.parse(b"[a1!]").is_err());
assert!(super::parse_subfield_codes.parse(b"[2-2]").is_err());
}

#[test]
Expand All @@ -1063,10 +1109,11 @@ mod tests {
}

parse_success!(b"*?", SUBFIELD_CODES.chars().collect());
parse_success!(b"[a-f]?", vec!['a', 'b', 'c', 'd', 'e', 'f']);
parse_success!(b"[a-cf]?", vec!['a', 'b', 'c', 'f']);
parse_success!(b"[ab]?", vec!['a', 'b']);
parse_success!(b"a?", vec!['a']);

assert!(super::parse_exists_matcher.parse(b"[a-f]?").is_err());
assert!(super::parse_exists_matcher.parse(b"a ?").is_err());
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
bin.name = "pica"
args = "filter '041[A@].* =? \"Algebra\"'"
status = "success"
stderr = ""

0 comments on commit a53065a

Please sign in to comment.