Skip to content

Commit

Permalink
Move FilterList into pica-utils crate (#725)
Browse files Browse the repository at this point in the history
  • Loading branch information
nwagner84 authored Dec 8, 2023
1 parent 9271f20 commit 0eaee09
Show file tree
Hide file tree
Showing 12 changed files with 173 additions and 177 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,17 @@ pica-select = { version = "0.21", path = "./crates/pica-select" }
pica-utils = { version = "0.21", path = "./crates/pica-utils" }

anyhow = "1.0"
arrow2 = "0.18"
bstr = "1.8"
chrono = { version = "0.4", default-features = false }
clap = "4.4"
clap_complete = "4.4"
csv = "1.3"
flate2 = "1.0"
polars = { version = "0.35.4", features = ["ipc", "decompress", "performant"] }
quickcheck = "1.0"
rand = "0.8"
regex = "1.10"
serde = "1.0"
quickcheck = "1.0"
serde_json = "1.0"
sha2 = "0.10"
thiserror = "1.0"
Expand Down
1 change: 0 additions & 1 deletion crates/pica-toolkit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ keywords.workspace = true
edition.workspace = true

[dependencies]
arrow2 = { workspace = true, features = ["io_ipc", "io_ipc_compression"] }
bstr = { workspace = true }
clap = { workspace = true, features = ["cargo", "derive", "wrap_help"] }
clap_complete = { workspace = true }
Expand Down
38 changes: 12 additions & 26 deletions crates/pica-toolkit/src/commands/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ use pica_matcher::{
MatcherOptions, OccurrenceMatcher, ParseMatcherError,
RecordMatcher, TagMatcher,
};
use pica_path::PathExt;
use pica_record::io::{ReaderBuilder, RecordsIterator, WriterBuilder};
use pica_utils::NormalizationForm;
use pica_utils::{FilterList, NormalizationForm};
use serde::{Deserialize, Serialize};

use crate::common::FilterList;
use crate::progress::Progress;
use crate::util::{CliError, CliResult};
use crate::{gzip_flag, skip_invalid_flag, Config};
Expand Down Expand Up @@ -69,8 +69,8 @@ pub(crate) struct Filter {
/// If the file extension is `.feather`, `.arrow`, or `.ipc` the
/// file is automatically interpreted as Apache Arrow;
/// otherwise the file is read as CSV.
#[arg(long, short = 'A')]
allow_list: Vec<PathBuf>,
#[arg(long = "allow-lists", short = 'A')]
allow_lists: Vec<PathBuf>,

/// Ignore records which are explicitly listed in one of the
/// given deny-lists.
Expand All @@ -80,8 +80,8 @@ pub(crate) struct Filter {
/// If the file extension is `.feather`, `.arrow`, or `.ipc` the
/// file is automatically interpreted as Apache Arrow;
/// otherwise the file is read as CSV.
#[arg(long, short = 'D')]
deny_list: Vec<PathBuf>,
#[arg(long = "deny-lists", short = 'D')]
deny_lists: Vec<PathBuf>,

/// Limit the result to first <n> records
///
Expand Down Expand Up @@ -247,17 +247,11 @@ impl Filter {
}
}

let allow_list = if !self.allow_list.is_empty() {
FilterList::new(self.allow_list)?
} else {
FilterList::default()
};

let deny_list = if !self.deny_list.is_empty() {
FilterList::new(self.deny_list)?
} else {
FilterList::default()
};
let filter_list = FilterList::new()
.allow(self.allow_lists)
.unwrap()
.deny(self.deny_lists)
.unwrap();

let mut progress = Progress::new(self.progress);

Expand All @@ -283,15 +277,7 @@ impl Filter {
Ok(mut record) => {
progress.record();

if !allow_list.is_empty()
&& !allow_list.check(&record)
{
continue;
}

if !deny_list.is_empty()
&& deny_list.check(&record)
{
if !filter_list.check(record.idn()) {
continue;
}

Expand Down
38 changes: 12 additions & 26 deletions crates/pica-toolkit/src/commands/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ use std::str::FromStr;

use clap::Parser;
use pica_matcher::{MatcherOptions, RecordMatcher};
use pica_path::PathExt;
use pica_record::io::{ReaderBuilder, RecordsIterator};
use pica_select::{Query, QueryExt, QueryOptions};
use pica_utils::NormalizationForm;
use pica_utils::{FilterList, NormalizationForm};
use serde::{Deserialize, Serialize};

use crate::common::FilterList;
use crate::config::Config;
use crate::progress::Progress;
use crate::skip_invalid_flag;
Expand Down Expand Up @@ -119,8 +119,8 @@ pub(crate) struct Select {
/// If the file extension is `.feather`, `.arrow`, or `.ipc` the
/// file is automatically interpreted as Apache Arrow;
/// otherwise the file is read as CSV.
#[arg(long, short = 'A')]
allow_list: Vec<PathBuf>,
#[arg(long = "allow-lists", short = 'A')]
allow_lists: Vec<PathBuf>,

/// Ignore records which are explicitly listed in one of the
/// given deny-lists.
Expand All @@ -130,8 +130,8 @@ pub(crate) struct Select {
/// If the file extension is `.feather`, `.arrow`, or `.ipc` the
/// file is automatically interpreted as Apache Arrow;
/// otherwise the file is read as CSV.
#[arg(long, short = 'D')]
deny_list: Vec<PathBuf>,
#[arg(long = "deny-lists", short = 'D')]
deny_lists: Vec<PathBuf>,

/// Show progress bar (requires `-o`/`--output`).
#[arg(short, long, requires = "output")]
Expand Down Expand Up @@ -235,17 +235,11 @@ impl Select {
None
};

let allow_list = if !self.allow_list.is_empty() {
FilterList::new(self.allow_list)?
} else {
FilterList::default()
};

let deny_list = if !self.deny_list.is_empty() {
FilterList::new(self.deny_list)?
} else {
FilterList::default()
};
let filter_list = FilterList::new()
.allow(self.allow_lists)
.unwrap()
.deny(self.deny_lists)
.unwrap();

let query = NormalizationForm::translit_opt(&self.query, nf);
let query = Query::from_str(&query)?;
Expand Down Expand Up @@ -277,15 +271,7 @@ impl Select {
Ok(record) => {
progess.record();

if !allow_list.is_empty()
&& !allow_list.check(&record)
{
continue;
}

if !deny_list.is_empty()
&& deny_list.check(&record)
{
if !filter_list.check(record.idn()) {
continue;
}

Expand Down
120 changes: 0 additions & 120 deletions crates/pica-toolkit/src/common.rs

This file was deleted.

1 change: 0 additions & 1 deletion crates/pica-toolkit/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ extern crate serde;
extern crate termcolor;

mod commands;
mod common;
mod config;
mod macros;
mod progress;
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
idn
040011569
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
idn
118515551
1012852784
5 changes: 4 additions & 1 deletion crates/pica-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,8 @@ keywords.workspace = true
edition.workspace = true

[dependencies]
unicode-normalization = { version = "0.1.22" }
bstr = { workspace = true }
polars = { workspace = true }
serde = { workspace = true, features = ["derive"] }
unicode-normalization = { version = "0.1.22" }
thiserror = { workspace = true }
Loading

0 comments on commit 0eaee09

Please sign in to comment.