Skip to content

Commit

Permalink
chore: re-org python deps for provider map
Browse files Browse the repository at this point in the history
  • Loading branch information
allejo committed Feb 23, 2024
1 parent d367e77 commit cf586f4
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 56 deletions.
45 changes: 45 additions & 0 deletions .github/resources/process_providers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import json
import pandas as pd

providers_file = 'src/metadata/providers/providers.csv'

df = pd.read_csv(providers_file)
city_lookup = pd.read_csv('src/metadata/cities_to_county.csv')
city_to_county = dict(zip(city_lookup['City'], city_lookup['County']))

lookup_records = df[df['counties_served'].isna()]['ntd_id']

# Fill in the null values for counties served with the HQ county
for record in lookup_records:
city = df[df['ntd_id'] == record]['hq_city'].values[0]

try:
county = city_to_county[city] or city_to_county[f'City of {city}']
df.loc[df['ntd_id'] == record, 'hq_county'] = county
df.loc[df['ntd_id'] == record, 'counties_served'] = county

except KeyError:
print("No county found for city: ", city)

df.to_csv(providers_file)

# Do a group by for the counties served
county_counts = df['counties_served'].str.split(';') \
.explode('counties_served') \
.value_counts()

geojson_file = 'src/metadata/providers/counties.geojson'
geojson = json.load(open(geojson_file))

# Add the county counts to the geojson
for feature in geojson['features']:
county_name = feature['properties']['county']

if county_name in county_counts:
feature['properties']['num_providers'] = int(county_counts[county_name])
else:
feature['properties']['num_providers'] = 0

# Write the geojson back to the file
with open(geojson_file, 'w') as f:
json.dump(geojson, f, indent=2)
64 changes: 8 additions & 56 deletions .github/workflows/provider-map-jobs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,68 +35,20 @@ jobs:
"SELECT * FROM \`mart_transit_database.dim_mobility_mart_providers\`" \
> src/metadata/providers/providers.csv
- name: Fix our CSV file
run: |
# Workaround because of...
# https://github.com/google-github-actions/setup-gcloud/issues/666
sed -i -n -e '/agency_name/,$p' src/metadata/providers/providers.csv
# - name: Fix our CSV file
# run: |
# # Workaround because of...
# # https://github.com/google-github-actions/setup-gcloud/issues/666
# sed -i -n -e '/agency_name/,$p' src/metadata/providers/providers.csv

- uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install pandas
run: |
pip install pandas
- name: Update Counties GeoJSON file
shell: python
- name: Install dependencies, process providers, and update GeoJSON
run: |
import json
import pandas as pd
providers_file = 'src/metadata/providers/providers.csv'
df = pd.read_csv(providers_file)
city_lookup = pd.read_csv('src/metadata/cities_to_county.csv')
city_to_county = dict(zip(city_lookup['City'], city_lookup['County']))
lookup_records = df[df['counties_served'].isna()]['ntd_id']
# Fill in the null values for counties served with the HQ county
for record in lookup_records:
city = df[df['ntd_id'] == record]['hq_city'].values[0]
try:
county = city_to_county[city] or city_to_county[f'City of {city}']
df.loc[df['ntd_id'] == record, 'hq_county'] = county
df.loc[df['ntd_id'] == record, 'counties_served'] = county
except KeyError:
print("No county found for city: ", city)
df.to_csv(providers_file)
# Do a group by for the counties served
county_counts = df['counties_served'].str.split(';') \
.explode('counties_served') \
.value_counts()
geojson_file = 'src/metadata/providers/counties.geojson'
geojson = json.load(open(geojson_file))
# Add the county counts to the geojson
for feature in geojson['features']:
county_name = feature['properties']['county']
if county_name in county_counts:
feature['properties']['num_providers'] = int(county_counts[county_name])
else:
feature['properties']['num_providers'] = 0
# Write the geojson back to the file
with open(geojson_file, 'w') as f:
json.dump(geojson, f, indent=2)
pip install -r requirements.txt
python .github/resources/process_providers.py
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ _site/
data/providers/*.json
node_modules/
vendor/
venv/
**/.DS_Store
9 changes: 9 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
distlib==0.3.8
filelock==3.13.1
numpy==1.26.4
pandas==2.2.0
platformdirs==4.2.0
python-dateutil==2.8.2
pytz==2024.1
six==1.16.0
tzdata==2024.1

0 comments on commit cf586f4

Please sign in to comment.