-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNZ_workforce_staff_eth_gen.py
36 lines (33 loc) · 1.36 KB
/
NZ_workforce_staff_eth_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# encoding=utf-8
from databaker.framework import *
import sa_walk
import feather
path = "data/raw/nz_raw_data/NZ 0321_Universities_Workforce_Data_gender_ethnicity 2000-2017.xlsx"
tab, = loadxlstabs(path, "Staff type x Ethnic x Gender", verbose = False)
anchor = tab.filter_one("Ethnic Group")
race = anchor.fill(DOWN)
gender = anchor.fill(RIGHT)
institution = tab.filter("Provider").fill(DOWN).is_not_blank()
profession = tab.filter("Staff Type/Group").fill(DOWN).is_not_blank()
# fte = tab.filter("Part-time").expand(RIGHT).is_not_blank()
metric = tab.filter("FTE") | tab.filter("Number of Staff")
year = tab.excel_ref("D5").fill(RIGHT).is_not_blank()
observations = race.fill(RIGHT).is_not_blank()
dimensions = [
HDimConst(DATAMARKER, 0),
HDimConst(TIME, 0),
HDim(institution, "Institution", CLOSEST, ABOVE),
HDim(profession, "Profession", CLOSEST, ABOVE),
HDimConst("IsAcademic", "Academic"),
HDim(race, "Race", DIRECTLY, LEFT),
HDim(gender, "Gender", DIRECTLY, ABOVE),
# HDim(fte, "fte", CLOSEST, LEFT),
HDim(metric, "Metric", CLOSEST, LEFT),
# HDim(stat, "Statistic", CLOSEST, ABOVE),
HDim(year, "Time", CLOSEST, LEFT)
]
c1 = ConversionSegment(observations, dimensions)
df = c1.topandas()
feather.write_dataframe(df, f"feather/NZ-staff-ethnic-gen.feather") # TODO won't get all!
df.to_csv(f"csv/NZ-staff-ethnic-gen.csv", header=True)
print(df)