-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexport_epc_data_ods.R
105 lines (93 loc) · 2.63 KB
/
export_epc_data_ods.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
pacman::p_load(tidyverse,
duckdb,
janitor)
con <- DBI::dbConnect(duckdb(), dbdir = "../weca_cesap/data/ca_epc.duckdb")
# the EPC data is now within a view in the duckdb database
# calculations for construction epoch and the joins with other tables
# are much faster than doing it in R
# The cleaning process are done in Polars and are much more performant than R
dbListTables(con)
con |>
dbSendQuery("INSTALL SPATIAL;")
con |>
dbSendQuery("LOAD SPATIAL;")
lep_epc_domestic_point_ods_tbl <- tbl(con, "epc_domestic_lep_vw") %>%
clean_names() |>
select(lmk_key,
local_authority,
property_type,
transaction_type,
tenure,
walls_description,
roof_description,
walls_energy_eff,
mainheat_description,
mainheat_energy_eff,
mainheat_env_eff,
main_fuel,
solar_water_heating_flag,
construction_age_band,
current_energy_rating,
potential_energy_rating,
co2_emissions_current,
co2_emissions_potential,
co2_emiss_curr_per_floor_area,
number_habitable_rooms,
number_heated_rooms,
photo_supply,
total_floor_area,
building_reference_number,
built_form,
lsoa21,
msoa21,
lat,
long,
imd,
total = total_all_households,
owned,
social_rented,
private_rented,
date = lodgement_date,
year = lodgement_year,
month = lodgement_month,
day = lodgement_day,
n_imd_decile = imd_decile,
n_nominal_construction_date = nominal_construction_year,
construction_epoch,
ladnm
) |>
collect()
lep_epc_domestic_point_ods_tbl %>%
write_csv("data/lep_epc_domestic_point_ods_tbl.csv", na = "")
epc_non_domestic_ods_tbl <-
tbl(con, "epc_non_domestic_ods_vw") |>
clean_names() |>
select(uprn,
lmk_key,
building_reference_number,
asset_rating,
asset_rating_band,
property_type,
local_authority,
constituency,
transaction_type,
standard_emissions,
typical_emissions,
target_emissions,
building_emissions,
building_level,
renewable_sources,
date = lodgement_date,
year = lodgement_year,
month = lodgement_month,
ladcd,
ladnm,
cauthcd,
cauthnm,
lsoa21,
lat,
long) |>
head() |>
collect() |> glimpse()
#write_csv("data/epc_non_domestic_ods.csv", na = "")
con %>% dbDisconnect()