From d27801948b5bf68188a372dbacc2bf3182902247 Mon Sep 17 00:00:00 2001 From: George Helman Date: Sun, 30 Apr 2023 14:01:35 -0400 Subject: [PATCH 1/2] Start parsing offense record count --- README.md | 2 +- ciprs_reader/parser/section/offense.py | 1 + tests/parsers/test_offense.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e0c4f99..36e3ea7 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ pip install -e . Read CIPRS PDF: ``` -python ciprs_-eader.py ./cypress-example.pdf +python ciprs_reader.py ./cypress-example.pdf ``` Run Jupyter: diff --git a/ciprs_reader/parser/section/offense.py b/ciprs_reader/parser/section/offense.py index 35f9823..faebd16 100644 --- a/ciprs_reader/parser/section/offense.py +++ b/ciprs_reader/parser/section/offense.py @@ -50,6 +50,7 @@ def clean(self, matches): def extract(self, matches, report): record = { + "Count": matches["num"], "Action": matches["action"], "Description": matches["desc"], "Severity": matches["severity"], diff --git a/tests/parsers/test_offense.py b/tests/parsers/test_offense.py index bff5b58..1fcfeb6 100644 --- a/tests/parsers/test_offense.py +++ b/tests/parsers/test_offense.py @@ -21,6 +21,7 @@ def test_offense_record_charged_with_number(report, state): """ matches = offense.OffenseRecordRowWithNumber(report, state).match(string) assert matches is not None, "Regex match failed" + assert matches["count"] == "54" assert matches["action"] == "CHARGED" assert matches["desc"] == "SPEEDING(80 mph in a 65 mph zone)" assert matches["severity"] == "INFRACTION" From 7d33c56ecdad229a35bd9697aa1a5037656514a1 Mon Sep 17 00:00:00 2001 From: George Helman Date: Sun, 30 Apr 2023 15:20:54 -0400 Subject: [PATCH 2/2] Fix UTs --- README.md | 12 +++++ tests/parsers/test_offense.py | 2 +- .../expected_output/test_redacted_1.json | 1 + .../expected_output/test_redacted_2.json | 54 ++++++++++++++++++- 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 36e3ea7..3a9e6f7 100644 --- a/README.md +++ b/README.md @@ -52,10 +52,22 @@ Example output: Pre-requisites: +Mac ``` brew cask install pdftotext ``` +Ubuntu +``` +sudo apt-get install -y poppler-utils +``` + +``` +wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz \ + && tar -xvf xpdf-tools-linux-4.04.tar.gz \ + && cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin/pdftotext-4 +``` + Setup: ```bash diff --git a/tests/parsers/test_offense.py b/tests/parsers/test_offense.py index 1fcfeb6..5f65266 100644 --- a/tests/parsers/test_offense.py +++ b/tests/parsers/test_offense.py @@ -21,7 +21,7 @@ def test_offense_record_charged_with_number(report, state): """ matches = offense.OffenseRecordRowWithNumber(report, state).match(string) assert matches is not None, "Regex match failed" - assert matches["count"] == "54" + assert matches["num"] == "54" assert matches["action"] == "CHARGED" assert matches["desc"] == "SPEEDING(80 mph in a 65 mph zone)" assert matches["severity"] == "INFRACTION" diff --git a/tests/test_records/expected_output/test_redacted_1.json b/tests/test_records/expected_output/test_redacted_1.json index 8557056..2a69cbd 100644 --- a/tests/test_records/expected_output/test_redacted_1.json +++ b/tests/test_records/expected_output/test_redacted_1.json @@ -19,6 +19,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "POSSESS MARIJUANA UP TO 1/2 OZ", "Severity": "MISDEMEANOR", diff --git a/tests/test_records/expected_output/test_redacted_2.json b/tests/test_records/expected_output/test_redacted_2.json index d8e4925..34c5d99 100644 --- a/tests/test_records/expected_output/test_redacted_2.json +++ b/tests/test_records/expected_output/test_redacted_2.json @@ -19,6 +19,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "PWISD CRACK COCAINE", "Severity": "FELONY", @@ -60,6 +61,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "ASSAULT ON A CHILD UNDER 12", "Severity": "MISDEMEANOR", @@ -93,6 +95,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "ASSAULT ON A CHILD UNDER 12", "Severity": "MISDEMEANOR", @@ -134,6 +137,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "COMMUNICATING THREATS", "Severity": "MISDEMEANOR", @@ -175,6 +179,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "PWIMSD SCH II CS", "Severity": "FELONY", @@ -189,6 +194,7 @@ { "Records": [ { + "Count": "51", "Action": "CHARGED", "Description": "PWIMSD SCH II CS", "Severity": "FELONY", @@ -229,6 +235,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "FELONY POSSESSION SCH II CS", "Severity": "FELONY", @@ -243,6 +250,7 @@ { "Records": [ { + "Count": "51", "Action": "CHARGED", "Description": "FELONY POSSESSION SCH II CS", "Severity": "FELONY", @@ -255,6 +263,7 @@ { "Records": [ { + "Count": "52", "Action": "CHARGED", "Description": "COMMUNICATING THREATS", "Severity": "MISDEMEANOR", @@ -287,6 +296,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "ASSAULT WITH A DEADLY WEAPON", "Severity": "MISDEMEANOR", @@ -320,6 +330,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "COMMUNICATING THREATS", "Severity": "MISDEMEANOR", @@ -353,6 +364,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "SIMPLE ASSAULT", "Severity": "MISDEMEANOR", @@ -386,6 +398,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "ASSAULT ON A CHILD UNDER 12", "Severity": "MISDEMEANOR", @@ -419,6 +432,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "SIMPLE WORTHLESS CHECK($198.35)", "Severity": "MISDEMEANOR", @@ -460,6 +474,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "SIMPLE WORTHLESS CHECK($93.47)", "Severity": "MISDEMEANOR", @@ -501,6 +516,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "RESISTING PUBLIC OFFICER", "Severity": "MISDEMEANOR", @@ -521,6 +537,7 @@ { "Records": [ { + "Count": "02", "Action": "CHARGED", "Description": "FICTITIOUS INFO TO OFFICER", "Severity": "TRAFFIC", @@ -554,6 +571,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "SIMPLE POSSESS SCH VI CS (M)", "Severity": "MISDEMEANOR", @@ -587,6 +605,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "MAINTN VEH/DWELL/PLACE CS (F)", "Severity": "FELONY", @@ -628,6 +647,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "OBSTRUCTING JUSTICE", "Severity": "MISDEMEANOR", @@ -640,6 +660,7 @@ { "Records": [ { + "Count": "02", "Action": "CHARGED", "Description": "FELONY CONSPIRACY", "Severity": "FELONY", @@ -652,6 +673,7 @@ { "Records": [ { + "Count": "03", "Action": "CHARGED", "Description": "INTIMIDATING WITNESS", "Severity": "FELONY", @@ -666,6 +688,7 @@ { "Records": [ { + "Count": "51", "Action": "CHARGED", "Description": "ATTEMPTED CONSPIRACY TO COMMIT MURDER", "Severity": "FELONY", @@ -678,6 +701,7 @@ { "Records": [ { + "Count": "52", "Action": "CHARGED", "Description": "SOLICITATION TO COMMIT AWDWISI", "Severity": "FELONY", @@ -698,6 +722,7 @@ { "Records": [ { + "Count": "53", "Action": "CHARGED", "Description": "INTIMIDATING WITNESS", "Severity": "FELONY", @@ -730,6 +755,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "SPEEDING(52 mph in a 35 mph zone)", "Severity": "TRAFFIC", @@ -771,6 +797,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "POSSESSION OF FIREARM BY FELON", "Severity": "FELONY", @@ -804,6 +831,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "FAIL TO RETURN RENTAL PROPERTY", "Severity": "MISDEMEANOR", @@ -824,6 +852,7 @@ { "Records": [ { + "Count": "02", "Action": "CHARGED", "Description": "MISDEMEANOR PROBATION VIOL", "Severity": "MISDEMEANOR", @@ -838,6 +867,7 @@ { "Records": [ { + "Count": "51", "Action": "CHARGED", "Description": "MISDEMEANOR PROBATION VIOL", "Severity": "MISDEMEANOR", @@ -850,6 +880,7 @@ { "Records": [ { + "Count": "52", "Action": "CHARGED", "Description": "MISDEMEANOR PROBATION VIOL", "Severity": "R", @@ -882,6 +913,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "HARASSING PHONE CALL", "Severity": "MISDEMEANOR", @@ -915,6 +947,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "DRIVE/ALLOW MV NO REGISTRATION", "Severity": "TRAFFIC", @@ -927,6 +960,7 @@ { "Records": [ { + "Count": "02", "Action": "CHARGED", "Description": "DWLR", "Severity": "TRAFFIC", @@ -968,6 +1002,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "FICTITIOUS INFO TO OFFICER", "Severity": "TRAFFIC", @@ -980,6 +1015,7 @@ { "Records": [ { + "Count": "02", "Action": "CHARGED", "Description": "RESISTING PUBLIC OFFICER", "Severity": "MISDEMEANOR", @@ -1021,6 +1057,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "DWLR", "Severity": "TRAFFIC", @@ -1054,6 +1091,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "RESISTING PUBLIC OFFICER", "Severity": "MISDEMEANOR", @@ -1087,6 +1125,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "DWLR", "Severity": "TRAFFIC", @@ -1099,6 +1138,7 @@ { "Records": [ { + "Count": "02", "Action": "CHARGED", "Description": "FICT/ALT TITLE/REG CARD/TAG", "Severity": "TRAFFIC", @@ -1132,6 +1172,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "POSSESSION OF SCHEDULE II CONTROLLED SUBSTANC", "Severity": "FELONY", @@ -1145,6 +1186,7 @@ "_multiline": true, "Records": [ { + "Count": "02", "Action": "CHARGED", "Description": "POSSESSION OF SCHEDULE II CONTROLLED SUBSTANC", "Severity": "FELONY", @@ -1179,6 +1221,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "FAIL TO RETURN RENTAL PROPERTY", "Severity": "MISDEMEANOR", @@ -1213,6 +1256,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "OBTAIN PROPERTY FALSE PRETENSE", "Severity": "FELONY", @@ -1246,6 +1290,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "DWLR NOT IMPAIRED REV", "Severity": "TRAFFIC", @@ -1279,6 +1324,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "FAIL RETN PROP RENTD PUR OPT", "Severity": "MISDEMEANOR", @@ -1311,6 +1357,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "SIMPLE ASSAULT", "Severity": "MISDEMEANOR", @@ -1322,6 +1369,7 @@ { "Records": [ { + "Count": "02", "Action": "CHARGED", "Description": "ASSAULT ON A CHILD UNDER 12", "Severity": "MISDEMEANOR", @@ -1355,6 +1403,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "RESISTING PUBLIC OFFICER", "Severity": "MISDEMEANOR", @@ -1397,6 +1446,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "ALLOW UNLICENSED TO DRIVE", "Severity": "TRAFFIC", @@ -1409,6 +1459,7 @@ { "Records": [ { + "Count": "02", "Action": "CHARGED", "Description": "MISDEMEANOR LARCENY", "Severity": "MISDEMEANOR", @@ -1443,6 +1494,7 @@ { "Records": [ { + "Count": "01", "Action": "CHARGED", "Description": "SIMPLE WORTHLESS CHECK($228.47)", "Severity": "MISDEMEANOR", @@ -1464,4 +1516,4 @@ "Superior Court Offense Information": [], "_meta": {} } -] +] \ No newline at end of file