Skip to content

Commit

Permalink
Add --unique flag to frequency command (#731)
Browse files Browse the repository at this point in the history
  • Loading branch information
nwagner84 authored Dec 9, 2023
1 parent 8dbcd84 commit 5e6cfc9
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 1 deletion.
16 changes: 15 additions & 1 deletion crates/pica-toolkit/src/commands/frequency.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::cmp::Ordering;
use std::collections::HashMap;
use std::collections::{BTreeSet, HashMap};
use std::ffi::OsString;
use std::fs::File;
use std::io::{self, Write};
Expand Down Expand Up @@ -51,6 +51,10 @@ pub(crate) struct Frequency {
default_value = "75")]
strsim_threshold: u8,

/// Skip duplicate rows (of a record).
#[arg(long, short)]
unique: bool,

/// Sort results in reverse order.
#[arg(long, short)]
reverse: bool,
Expand Down Expand Up @@ -137,6 +141,7 @@ impl Frequency {
.from_writer(writer);

let mut progress = Progress::new(self.progress);
let mut seen = BTreeSet::new();

for filename in self.filenames {
let mut reader =
Expand All @@ -154,10 +159,19 @@ impl Frequency {

let record = result.unwrap();
progress.record();
seen.clear();

let outcome = record.query(&query, &options);
for key in outcome.clone().into_iter() {
if key.iter().any(|e| !e.is_empty()) {
if self.unique {
if seen.contains(&key) {
continue;
}

seen.insert(key.clone());
}

*ftable.entry(key).or_insert(0) += 1;
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bin.name = "pica"
args = "frequency -u '007[KN].0'"
status = "success"
stdout = "4001156-2,1\n"
stderr = ""
2 changes: 2 additions & 0 deletions docs/book/src/referenz/kommandos/frequency.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ Ts1,1
ignoriert.
* `--strsim-threshold <value>` — Festlegen des Schwellenwerts beim
Ähnlichkeitsvergleich von Zeichenketten mittels `=*`.
* `--unique`, `-u` — Doppelte Werte eines Datensatzes werden
ignorieren.
* `--reverse` — Ergebnisse werden in aufsteigender Reihenfolge
ausgegeben.
* `-l`, `--limit` `<n>` — Eingrenzung der Ausgabe auf die häufigsten
Expand Down

0 comments on commit 5e6cfc9

Please sign in to comment.