Skip to content

Uhm, don't publish GHA private key... #3

Uhm, don't publish GHA private key...

Uhm, don't publish GHA private key... #3

name: Provider Map Jobs
on:
# TODO: Remove this once behavior has been finalized
push:
branches:
- vlad/provider-map-gha
workflow_dispatch:
schedule:
- cron: "0 0 * * 0"
jobs:
fetch_warehouse:
name: Fetch Warehouse Updates
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Authenticate with Google
uses: google-github-actions/auth@v1
with:
credentials_json: '${{ secrets.GCP_SA_KEY }}'
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v1
- name: Download providers.csv from warehouse
run: |
bq query \
--quiet \
--headless \
--format=csv \
--use_legacy_sql=false \
"SELECT * FROM \`mart_transit_database.dim_mobility_mart_providers\`" \
> src/metadata/providers/providers.csv
- name: Fix our CSV file
run: |
# Workaround because of...
# https://github.com/google-github-actions/setup-gcloud/issues/666
sed -i -n -e '/agency_name/,$p' src/metadata/providers/providers.csv
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install pandas
run: |
pip install pandas
- name: Update Counties GeoJSON file
shell: python
run: |
import json
import pandas as pd
df = pd.read_csv('src/metadata/providers/providers.csv')
# Drop the null values for TAs that don't have counties served
df = df[df['counties_served'].notnull()]
# Do a group by for the counties served
county_counts = df['counties_served'].str.split(';') \
.explode('counties_served') \
.value_counts()
geojson_file = 'src/metadata/providers/counties.geojson'
geojson = json.load(open(geojson_file))
# Add the county counts to the geojson
for feature in geojson['features']:
county_name = feature['properties']['county']
if county_name in county_counts:
feature['properties']['num_providers'] = int(county_counts[county_name])
else:
feature['properties']['num_providers'] = 0
# Write the geojson back to the file
with open(geojson_file, 'w') as f:
json.dump(geojson, f, indent=2)
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
title: Provider Map Data Auto Update
body: |
It's that time again! The warehouse has delivered new data for us to use.
This is an automatic pull request created by the `provider-map-jobs.yml` workflow. It is triggered by a cron job that runs every Sunday at midnight UTC.
commit-message: Auto-update provider data from warehouse
add-paths: |
src/metadata/providers/providers.csv
src/metadata/providers/counties.geojson
base: main