-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathone_large_table.R
148 lines (122 loc) · 5.31 KB
/
one_large_table.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
pacman::p_load(tidyverse,
glue,
janitor,
fs)
# extract the source data which are tables in duckdb
tbl_list <- read_rds("portal_tbl_list.rds")
names(tbl_list) <- str_replace_all(names(tbl_list), pattern = "-", replacement = "_")
list2env(tbl_list, .GlobalEnv)
# formatting functions ----
format_nice <- function(str){
str_replace_all(str, pattern = "_", replacement = " ") %>%
str_to_sentence()
}
make_kv <- function(dotx){
if_else(is.na(dotx),
str_c(cur_column() %>% format_nice(),
": NA"),
str_c(cur_column() %>% format_nice(), ": ",
dotx))
}
# prep measures tbls for better formatting ----
area_measures_single_tbl <- area_measures_tbl %>%
mutate(across(-area_measure_id, ~make_kv(.x)
)) %>%
unite(col = "area_measures",
sep = "\n",
-area_measure_id)
priority_measures_single_tbl <- priority_measures_tbl %>%
mutate(across(-priority_measure_id, ~make_kv(.x)
)) %>%
unite(col = "priority_measures",
sep = "\n",
-priority_measure_id)
grants_single_tbl <- grants_tbl %>%
select(-id) %>%
relocate(url, .after = everything()) %>%
rename(link = url) %>%
mutate(across(-c(grant_id), ~make_kv(.x)
)) %>%
unite(col = "grants",
sep = "\n",
-grant_id)
grants_single_tbl
# make a table which relates areas, priorities, measures and grants
areas_priorities_grants_tbl <- areas_tbl %>%
left_join(area_funding_schemes_tbl,
by = join_by(area_id == area_id),
relationship = "many-to-many") %>%
left_join(priorities_areas_lookup_tbl,
by = join_by(area_id == area_id),
relationship = "many-to-many") %>%
left_join(priorities_areas_measures_lookup_tbl,
by = join_by(area_id == area_id,
priority_id == priority_id),
relationship = "many-to-many") %>%
left_join(area_measures_single_tbl, by = join_by(area_measure_id == area_measure_id)) %>%
left_join(priorities_tbl, by = join_by(priority_id == priority_id)) %>%
left_join(priorities_measures_lookup_tbl,
by = join_by(priority_id == priority_id),
relationship = "many-to-many") %>%
left_join(priority_measures_single_tbl,
by = join_by(priority_measure_id == priority_measure_id),
suffix = c("_area", "_priority")) %>%
left_join(priority_measures_grants_lookup_tbl,
by = join_by(priority_measure_id == priority_measure_id),
relationship = "many-to-many") %>%
left_join(areas_measures_grants_lookup_tbl,
by = join_by(area_measure_id == area_measure_id),
relationship = "many-to-many", suffix = c("_p_measures", "_a_measures")) %>%
pivot_longer(cols = c(grant_id_p_measures, grant_id_a_measures),
names_to = "grant_area_or_priority",
values_to = "grant_id") %>%
mutate(grant_area_or_priority = str_remove_all(grant_area_or_priority, "^grant_id_|_measures$")) %>%
left_join(grants_single_tbl, by = join_by(grant_id == grant_id)) %>%
select(-starts_with("id")) %>%
distinct()
areas_priorities_grants_tbl %>% glimpse()
# make a table which relates species, areas and priorities ----
# retain just the species_id and common_name from the species_tbl
species_area_priority_tbl <-
areas_priorities_grants_tbl %>%
select(area_id, priority_id) %>%
left_join(species_area_lookup_tbl,
by = join_by(area_id == area_id),
relationship = "many-to-many") %>%
left_join(species_priority_lookup_tbl,
by = join_by(priority_id == priority_id),
relationship = "many-to-many",
suffix = c("_area", "_priority")) %>%
pivot_longer(cols = c(species_id_area, species_id_priority),
names_to = "species_area_priority",
values_to = "species_id") %>%
distinct(area_id, priority_id, species_id) %>%
left_join(species_tbl %>%
select(species_id, common_name),
by = join_by(species_id == species_id))
# now join to add species
areas_priorities_grants_species_tbl <- areas_priorities_grants_tbl %>%
left_join(species_area_priority_tbl,
by = join_by(area_id == area_id,
priority_id == priority_id),
relationship = "many-to-many")
# consolidate ----
make_priority_area_tbl <- function(areas_priorities_grants_species_tbl, ...){
areas_priorities_grants_species_tbl %>%
group_by(...) %>%
# gather text and separate with new lines (character) and colons (numeric)
summarise(across(where(is.character), ~str_c(unique(.x[!is.na(.x)]),
collapse = "\n\n")),
across(where(is.numeric), ~str_c(unique(.x[!is.na(.x)]),
collapse = ": ")),
.groups = "drop")
}
areas_grouped_tbl <- make_priority_area_tbl(
areas_priorities_grants_species_tbl,
area_id, area_name, area_description, area_link)
priorities_grouped_tbl <- make_priority_area_tbl(
areas_priorities_grants_species_tbl,
priority_id, theme, biodiversity_priority, simplified_biodiversity_priority)
# write to csv
write_csv(areas_grouped_tbl, "data/areas_grouped_tbl.csv")
write_csv(priorities_grouped_tbl, "data/priorities_grouped_tbl.csv")