-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbwi_index_processor.R
72 lines (58 loc) · 2.12 KB
/
bwi_index_processor.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
pacman::p_load(tidyverse,
janitor,
glue,
readxl
)
# Ingest the BWI sample data from chris clements
# transform to a dataset that can be published on ODS
# Can't publish as BTO restricts re - use
path <- "data/BWI data 2024 - Copy.xlsx"
sheets_vec <-
readxl::excel_sheets(path)
make_list_from_sheets <- function(
path = "data/BWI data 2024 - Copy.xlsx",
sheets_vec) {
# read all the sheets and store in a list with each element a sheet
sheets_vec %>%
map(~ read_xlsx(path, .x, col_types = "text")) %>% # bring all in as text
set_names(make_clean_names(sheets_vec)) %>%
map(.f = clean_names)
}
sheets_list <- make_list_from_sheets(path, sheets_vec)
names(sheets_list)
bwi_raw <- sheets_list %>% pluck("bristol_index_example")
bwi_raw %>% glimpse()
bwi_clean <- bwi_raw %>%
mutate(across(starts_with("username"), ~NULL),
across(c(continuing_collection,
specific_location,
inside_city_limits,
invasive,
migratory,
resident,
data_transformed),
~as.integer(.x) %>% as.logical()),
units = case_when(
units %in% c("Count", "count", "Counts", "count per run") ~ "count",
units %in% c("Transect count", "number per transect") ~ "count (transect)",
.default = units),
across(starts_with("x"), as.numeric),
across(ends_with("tude"), as.numeric),
source_year = as.integer(source_year),
replicate = as.integer(replicate),
date_created = as.Date(date_created, format = "%d/%m/%Y"),
status = NULL, # zero variance fields
notes = NULL,
reference = NULL
) %>%
pivot_longer(cols = starts_with("x"),
names_to = "year",
values_to = "value") %>%
mutate(year = str_sub(year, 2, 5) %>% as.integer()) %>%
mutate(across(where(is.character), ~na_if(.x, "NA"))) %>%
filter(!is.na(value))
bwi_clean %>% view()
bwi_clean %>%
write_csv("data/bwi_clean.csv", na = "")
bwi_clean %>%
write_rds("data/bwi_clean.rds")