-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathNG911_flag_addpt_issues.py
133 lines (102 loc) · 4.63 KB
/
NG911_flag_addpt_issues.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 16 15:20:42 2021
@author: eneemann
Script to flag NG911 errors on Address Points
"""
import arcpy
import os
import time
from xxhash import xxh64
# Start timer and print start time in UTC
start_time = time.time()
readable_start = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
print("The script start time is {}".format(readable_start))
######################
# Set up variables #
######################
# Set up databases (SGID must be changed based on user's path)
ng911_db = r"\\itwfpcap2\AGRC\agrc\data\ng911\SpatialStation_live_data\UtahNG911GIS.gdb"
#ng911_db = r"C:\Users\eneemann\Desktop\Neemann\NG911\Arizona Data\UT_AZ_Border_PSAPs.gdb"
error_db = r"C:\Users\eneemann\Desktop\Neemann\NG911\911 DataMaster\NG911_Data_Errors.gdb"
arcpy.env.workspace = ng911_db
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False
today = time.strftime("%Y%m%d")
addpts = os.path.join(ng911_db, r'AddressPoints')
#addpts = os.path.join(ng911_db, r'AZNG911_SSAP_in_Utah_PSAPs')
addpts_working = os.path.join(error_db, f'AddressPoints_errors_{today}')
addpts_final = os.path.join(error_db, f'AddressPoints_errors_only_{today}')
addpts_final_name = f'AddressPoints_errors_only_{today}'
## Make a copy of the data to work on
arcpy.management.CopyFeatures(addpts, addpts_working)
arcpy.AddField_management(addpts_working, "Error_UGRC", "TEXT", "", "", 100)
print("Time elapsed: {:.2f}s".format(time.time() - start_time))
addr_count = int(arcpy.management.GetCount(addpts_working)[0])
#: Check for duplicate attributes
digests = set([])
description = arcpy.da.Describe(addpts_working)
print(f'Working on Duplicates for: {addpts_working}')
skip_fields = ['Error_UGRC', 'Long', 'Lat', description['shapeFieldName']]
if description['hasGlobalID']:
skip_fields.append(description['globalIDFieldName'])
if description['hasOID']:
skip_fields.append(description['OIDFieldName'])
fields = [field.name for field in description['fields'] if field.name not in skip_fields]
mandatory_fields = ['GlobalID', 'State', 'County', 'Add_Number', 'LSt_Name', 'MSAGComm']
empties = [None, 'none', 'null', '', ' ', ' ']
#: Add OID and Error_UGRC at the end, so we can ignore them in the hash
fields.append('OID@')
fields.append('Error_UGRC')
print(fields)
oid_index = fields.index('OID@')
notes_index = fields.index('Error_UGRC')
oids_with_issues = []
duplicate_count = 0
required_count = 0
print("Looping through rows in FC ...")
with arcpy.da.UpdateCursor(addpts_working, fields) as update_cursor:
mandatory_idx = [fields.index(item) for item in fields if item in mandatory_fields]
print(mandatory_fields)
for row in update_cursor:
comment = None
object_id = row[oid_index]
if object_id % 100000 == 0:
print(f'working on OBJECTID: {object_id}')
#: Has all fields except for OID and Error_UGRC, which are the last fields
hasher = xxh64(str(row[:-2]))
digest = hasher.hexdigest()
if digest in digests:
oids_with_issues.append(object_id)
comment = 'attribute duplicate'
duplicate_count += 1
digests.add(digest)
#: Check mandatory fields
row_mandatory = [row[i] for i in mandatory_idx]
if any(val is None or str(val).strip().casefold() in empties for val in row_mandatory):
oids_with_issues.append(object_id)
required_count += 1
if comment is None or comment in ('', ' '):
comment = 'required value missing'
else:
comment += ', required value missing'
row[notes_index] = comment
update_cursor.updateRow(row)
print(f"Total count of attribute duplicates is: {duplicate_count} or {round((duplicate_count/addr_count)*100, 3)}%")
print(f"Total count of rows missing required value: {required_count} or {round((required_count/addr_count)*100, 3)}%")
#oid_set = set(oids_with_issues)
#print('\nSelect statement to view errors in ArcGIS:')
#sql = f'OBJECTID IN ({", ".join([str(oid) for oid in oid_set])})'
#print(sql)
# Create copy with only points containing errors
print('Exporting features with errors in separate feature class ...')
where_clause = """Error_UGRC IS NOT NULL"""
arcpy.conversion.FeatureClassToFeatureClass(addpts_working, error_db, addpts_final_name, where_clause)
##########################
# Call Functions Below #
##########################
print("Script shutting down ...")
# Stop timer and print end time in UTC
readable_end = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
print("The script end time is {}".format(readable_end))
print("Time elapsed: {:.2f}s".format(time.time() - start_time))