-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess_assets.R
71 lines (56 loc) · 1.84 KB
/
process_assets.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
pacman::p_load(tidyverse, glue, janitor, httr2, jsonlite)
# the R package PostcodesioR doesn't seem to work for bulk lookup
assets_raw <- read_csv("data/asset_mapping.csv") |>
clean_names()
postcodes <- unique(assets_raw$postcode) |>
map_chr(~str_remove(.x, "\\s$"))
# check tralling spaces
postcode_not_utf8 <- postcodes|>
map_lgl(validUTF8)
postcodes[!postcode_not_utf8]
# need to chunk the postcodes in batches of 100
# for the API
chunk <- function(x, n) {
mapply(function(a, b) (x[a:b]),
seq.int(from=1, to=length(x), by=n),
pmin(
seq.int(from=1, to=length(x), by=n)+(n-1),
length(x)),
SIMPLIFY=FALSE)
}
jsonise <- function(a_chunk){
# create a named list of postcodes
a_chunk |>
list() |>
set_names("postcodes")
}
flatten_to_df <- function(response){
# rectangle that bad boy
response |>
resp_body_json() |>
pluck("result") |>
map(~pluck(.x, "result")) |>
enframe(1) |>
unnest_wider(value) |>
unnest_wider(codes, names_sep = "_")
}
call_api <- function(postcode_json, base_url = "https://api.postcodes.io/postcodes/"){
Sys.sleep(1) # wait to respect the server
request(base_url) |>
req_headers("Content-Type" = "application/json") |>
req_body_json(data = postcode_json) |>
req_perform()
}
# the api accepts a maximum of 100 items in a json (list)
pc_chunks <- chunk(postcodes, 100)
pcodes_response_tbl <- pc_chunks |>
map(~jsonise(.x) |> call_api()) |>
map(flatten_to_df) |>
bind_rows()
geocoded_assets_out_tbl <- assets_raw |>
left_join(pcodes_response_tbl,
by = join_by(postcode == postcode)) |>
rename(asset = asset_nationally_and_internationally_singificant_research_capability_and_or_institutions) |>
select(-`1`, -starts_with("codes"))
geocoded_assets_out_tbl |>
write_csv("data/geocoded_assets.csv", na = "")