Skip to content

Commit

Permalink
Reduce nesting level (read pica records) (#727)
Browse files Browse the repository at this point in the history
  • Loading branch information
nwagner84 authored Dec 8, 2023
1 parent 5316238 commit 4cfe5c2
Show file tree
Hide file tree
Showing 12 changed files with 325 additions and 372 deletions.
41 changes: 19 additions & 22 deletions crates/pica-toolkit/src/commands/cat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,33 +157,30 @@ impl Cat {
ReaderBuilder::new().from_path(filename)?;

while let Some(result) = reader.next() {
match result {
Err(e) => {
if e.is_invalid_record() && skip_invalid {
progress.invalid();
continue;
} else {
return Err(e.into());
}
if let Err(e) = result {
if e.is_invalid_record() && skip_invalid {
progress.invalid();
continue;
} else {
return Err(e.into());
}
Ok(record) => {
progress.record();
}

if self.unique {
let k = key(&record);
let record = result.unwrap();
progress.record();

if k.is_empty() || seen.contains(&k) {
continue;
}
if self.unique {
let k = key(&record);
if k.is_empty() || seen.contains(&k) {
continue;
}

seen.insert(k);
}
seen.insert(k);
}

writer.write_byte_record(&record)?;
if let Some(ref mut writer) = tee_writer {
writer.write_byte_record(&record)?;
}
}
writer.write_byte_record(&record)?;
if let Some(ref mut writer) = tee_writer {
writer.write_byte_record(&record)?;
}
}
}
Expand Down
34 changes: 16 additions & 18 deletions crates/pica-toolkit/src/commands/count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,26 +100,24 @@ impl Count {
ReaderBuilder::new().from_path(filename)?;

while let Some(result) = reader.next() {
match result {
Err(e) => {
if e.is_invalid_record() && skip_invalid {
progress.invalid();
continue;
} else {
return Err(e.into());
}
}
Ok(record) => {
progress.record();

records += 1;
fields += record.iter().len();
subfields += record
.iter()
.map(|field| field.subfields().len())
.sum::<usize>();
if let Err(e) = result {
if e.is_invalid_record() && skip_invalid {
progress.invalid();
continue;
} else {
return Err(e.into());
}
}

let record = result.unwrap();
progress.record();

records += 1;
fields += record.iter().len();
subfields += record
.iter()
.map(|field| field.subfields().len())
.sum::<usize>();
}
}

Expand Down
33 changes: 15 additions & 18 deletions crates/pica-toolkit/src/commands/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,31 +308,28 @@ impl Explode {
ReaderBuilder::new().from_path(filename)?;

while let Some(result) = reader.next() {
match result {
Err(e) => {
if e.is_invalid_record() && skip_invalid {
progress.invalid();
continue;
} else {
return Err(e.into());
}
}
Ok(record) => {
progress.record();
process_record(
&record,
matcher.as_ref(),
&options,
&mut writer,
)?;
if let Err(e) = result {
if e.is_invalid_record() && skip_invalid {
progress.invalid();
continue;
} else {
return Err(e.into());
}
}

let record = result.unwrap();
progress.record();
process_record(
&record,
matcher.as_ref(),
&options,
&mut writer,
)?;
}
}

progress.record();
writer.finish()?;

Ok(())
}
}
111 changes: 53 additions & 58 deletions crates/pica-toolkit/src/commands/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ impl Filter {
let mut filenames = self.filenames;
let filter_str = if let Some(filename) = self.expr_file {
// This "hack" is necessary, because it's not possible to
// distinguish between filter and filenames. If a expression
// file is given, it makes no sense to provide
// distinguish between filter and filenames. If
// a expression file is given, it makes no sense to provide
// an filter expression as CLI argument.
if !self.filter.is_empty() {
if filenames != ["-"] {
Expand Down Expand Up @@ -265,81 +265,76 @@ impl Filter {
ReaderBuilder::new().from_path(filename)?;

while let Some(result) = reader.next() {
match result {
Err(e) => {
if e.is_invalid_record() && skip_invalid {
progress.invalid();
continue;
} else {
return Err(e.into());
}
if let Err(e) = result {
if e.is_invalid_record() && skip_invalid {
progress.invalid();
continue;
} else {
return Err(e.into());
}
Ok(mut record) => {
progress.record();
}

if !filter_list.check(record.idn()) {
continue;
}
let mut record = result.unwrap();
progress.record();

let mut is_match =
filter.is_match(&record, &options);
if !filter_list.check(record.idn()) {
continue;
}

if self.invert_match {
is_match = !is_match;
}
let mut is_match = filter.is_match(&record, &options);
if self.invert_match {
is_match = !is_match;
}

if is_match {
if !keep_predicates.is_empty() {
record.retain(|field| {
for (t, o) in keep_predicates.iter()
{
if t.is_match(field.tag())
&& *o == field.occurrence()
{
return true;
}
}
false
});
}
if !is_match {
continue;
}

if !discard_predicates.is_empty() {
record.retain(|field| {
for (t, o) in
discard_predicates.iter()
{
if t.is_match(field.tag())
&& *o == field.occurrence()
{
return false;
}
}
true
});
if !keep_predicates.is_empty() {
record.retain(|field| {
for (t, o) in keep_predicates.iter() {
if t.is_match(field.tag())
&& *o == field.occurrence()
{
return true;
}
}
false
});
}

writer.write_byte_record(&record)?;
if let Some(ref mut writer) = tee_writer {
writer.write_byte_record(&record)?;
if !discard_predicates.is_empty() {
record.retain(|field| {
for (t, o) in discard_predicates.iter() {
if t.is_match(field.tag())
&& *o == field.occurrence()
{
return false;
}

count += 1;
}
true
});
}

if self.limit > 0 && count >= self.limit {
break 'outer;
}
}
writer.write_byte_record(&record)?;
if let Some(ref mut writer) = tee_writer {
writer.write_byte_record(&record)?;
}

count += 1;

if self.limit > 0 && count >= self.limit {
break 'outer;
}
}
}

progress.finish();
writer.finish()?;
if let Some(ref mut writer) = tee_writer {
writer.finish()?;
}

progress.finish();
writer.finish()?;
Ok(())
}
}
Expand Down
39 changes: 18 additions & 21 deletions crates/pica-toolkit/src/commands/frequency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,24 +143,22 @@ impl Frequency {
ReaderBuilder::new().from_path(filename)?;

while let Some(result) = reader.next() {
match result {
Err(e) => {
if e.is_invalid_record() && skip_invalid {
progress.invalid();
continue;
} else {
return Err(e.into());
}
if let Err(e) = result {
if e.is_invalid_record() && skip_invalid {
progress.invalid();
continue;
} else {
return Err(e.into());
}
Ok(record) => {
progress.record();

let outcome = record.query(&query, &options);
for key in outcome.clone().into_iter() {
if key.iter().any(|e| !e.is_empty()) {
*ftable.entry(key).or_insert(0) += 1;
}
}
}

let record = result.unwrap();
progress.record();

let outcome = record.query(&query, &options);
for key in outcome.clone().into_iter() {
if key.iter().any(|e| !e.is_empty()) {
*ftable.entry(key).or_insert(0) += 1;
}
}
}
Expand All @@ -185,13 +183,12 @@ impl Frequency {
});
}

for (i, (values, frequency)) in ftable_sorted.iter().enumerate()
{
for (i, (values, freq)) in ftable_sorted.iter().enumerate() {
if self.limit > 0 && i >= self.limit {
break;
}

if **frequency < self.threshold {
if **freq < self.threshold {
break;
}

Expand All @@ -200,7 +197,7 @@ impl Frequency {
.map(|s| NormalizationForm::translit_opt(s, self.nf))
.collect::<Vec<_>>();

record.push(frequency.to_string());
record.push(freq.to_string());
writer.write_record(record)?;
}

Expand Down
Loading

0 comments on commit 4cfe5c2

Please sign in to comment.