-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfood_hygiene_processor.R
93 lines (72 loc) · 2.71 KB
/
food_hygiene_processor.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
pacman::p_load(tidyverse,
glue,
janitor,
xml2
)
# Extract food establishment ratings data from the FSA
# Clean and prepare for loading to ODS platform
# apparently can also change the file extensions to JSON to get json
#https://ratings.food.gov.uk/open-data
base_url <- "https://ratings.food.gov.uk/api/open-data-files/"
la_names <- c("Bristol", "Bath_and_North_East_Somerset", "North_Somerset", "South_Gloucestershire")
xml_files <- c("FHRS855en-GB.xml",
"FHRS857en-GB.xml",
"FHRS858en-GB.xml",
"FHRS856en-GB.xml")
food_data_list <- as.list(glue("{base_url}{xml_files}")) %>%
set_names(la_names)
food_files <- imap(food_data_list,
~download_xml(.x,
file = glue("data/{.y}.xml")))
extract_establishments <- function(la_item){
read_xml(la_item) %>%
as_list() %>%
pluck("FHRSEstablishment", "EstablishmentCollection")
}
make_df <- function(fhr_detail){
unnested_tbl <- fhr_detail %>%
imap(unlist) %>%
enframe() %>%
unnest_longer(value)
if(ncol(unnested_tbl) == 3){
out_tbl <- unnested_tbl %>%
mutate(name = if_else(value_id == "",
name,
value_id
),
value_id = NULL)
} else {
out_tbl <- unnested_tbl
}
return(out_tbl %>% pivot_wider())
}
la_establishment_list <- map(food_files, extract_establishments)
food_hygiene_tbl <- la_establishment_list %>%
map(~map(.x, make_df) %>%
bind_rows()
) %>%
bind_rows()
write_rds(food_hygiene_tbl, "data/food_hygiene_raw_tbl.rds")
food_hygiene_tbl <- read_rds("data/food_hygiene_raw_tbl.rds")
# reformat the food hygiene data for upload to ODS
fh_clean_tbl <- food_hygiene_tbl %>%
clean_names() %>%
select(-scheme_type,
-rating_key,
-local_authority_code,
-local_authority_web_site,
-local_authority_email_address,
-business_type_id) %>%
mutate(across(.cols = c("hygiene",
"structural",
"confidence_in_management"),
as.integer),
across(.cols = c("longitude", "latitude"),
as.double),
rating_date = as.Date(rating_date),
geo_point_2d = glue("{latitude}, {longitude}")) %>%
relocate(
business_name, business_type, address_line1, address_line2, address_line3, address_line4, post_code, rating_date, hygiene, structural, confidence_in_management, rating_date, rating_value, new_rating_pending, local_authority_name, everything()
)
fh_clean_tbl %>%
write_delim(file = "data/food_hygiene_woe.csv", delim = ";", na = "")