-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy path.Rhistory
483 lines (483 loc) · 29.9 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
locations_sf = st_as_sf(merged_mr, coords = c("long","lat"))
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.path = 'Figs_geohub/',
echo=T, include = T, warning = FALSE, message = FALSE)
ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
#repos='http://cran.muenster.r-project.org'
stdata = c("sp", "sf", "raster")
Stat_methods = c("glmnet", "ranger", "gbm", "xgboost", "party", "caret", "gstat")
visual = c("RColorBrewer", "ggplot2", "corrplot", "tmap", "leaflet", "mapview","leafem", "pdp", "vip", "DT", "sparkline")
map = c("maptools")
tidy = c("devtools", "dplyr", "tidyr", "reshape2", "knitr")
other = c("countrycode", "htmlwidgets", "data.table", "Matrix", "GGally")
packages <- c(stdata, tidy, Stat_methods, visual, map, other)
ipak(packages)
install_github("mengluchu/APMtools")
library(APMtools)
ls("package:APMtools")
#gd = fread("~/Documents/GitHub/Global mapping/2020_06_world/stations_20200602.csv")
#avg = fread("~/Documents/GitHub/Global mapping/oaqEUAUCAUS.csv")
#gdata = merge(gd, avg, by.x = c("long", "lat"), by.y = c("LONGITUDE","LATITUDE" ))
#g1 = na_if(gdata, -9999.9)
#g2 = g1%>%dplyr::select(-id, -dir, -V1)%>%filter(value_mean >0)
data("global_annual")
vastring = "road|nightlight|population|temp|wind|trop|indu|elev"
global_annual %>% dplyr::select(value_mean ) %>% summary()
#datatable(g2, rownames = FALSE, filter = "top", options = list(pageLength = 5, scrollX = T))
merged_mr = merge_roads(global_annual, c(3, 4, 5), keep = F) # keep = T keeps the original roads.
names(merged_mr)
locations_sf = st_as_sf(merged_mr, coords = c("long","lat"))
osm_valuemean = tm_shape(locations_sf) +
tm_dots( "value_mean", col = "value_mean", size = 0.05,title = "NO2 value",
popup.vars = c("value_mean" )) + tm_view(basemaps = c('OpenStreetMap'))
#+tm_shape(lnd)+tm_lines()
tmap_save(osm_valuemean, "NO2mean.html")
merged_mr%>%filter(is.na(wind_speed_10m_10))
climatemissing= merged_mr%>%filter(is.na(wind_speed_10m_10))
locations_sf = st_as_sf(climatemissing, coords = c("long","lat"))
osm_valuemean = tm_shape(locations_sf) +
tm_dots( "value_mean", col = "value_mean", size = 0.05,title = "NO2 value",
popup.vars = c("value_mean" )) + tm_view(basemaps = c('OpenStreetMap'))
#+tm_shape(lnd)+tm_lines()
tmap_save(osm_valuemean, "climatemissing.html")
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.path = 'Figs_geohub/',
echo=T, include = T, warning = FALSE, message = FALSE)
ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
#repos='http://cran.muenster.r-project.org'
stdata = c("sp", "sf", "raster")
Stat_methods = c("glmnet", "ranger", "gbm", "xgboost", "party", "caret", "gstat")
visual = c("RColorBrewer", "ggplot2", "corrplot", "tmap", "leaflet", "mapview","leafem", "pdp", "vip", "DT", "sparkline")
map = c("maptools")
tidy = c("devtools", "dplyr", "tidyr", "reshape2", "knitr")
other = c("countrycode", "htmlwidgets", "data.table", "Matrix", "GGally")
packages <- c(stdata, tidy, Stat_methods, visual, map, other)
ipak(packages)
install_github("mengluchu/APMtools")
library(APMtools)
ls("package:APMtools")
#gd = fread("~/Documents/GitHub/Global mapping/2020_06_world/stations_20200602.csv")
#avg = fread("~/Documents/GitHub/Global mapping/oaqEUAUCAUS.csv")
#gdata = merge(gd, avg, by.x = c("long", "lat"), by.y = c("LONGITUDE","LATITUDE" ))
#g1 = na_if(gdata, -9999.9)
#g2 = g1%>%dplyr::select(-id, -dir, -V1)%>%filter(value_mean >0)
data("global_annual")
vastring = "road|nightlight|population|temp|wind|trop|indu|elev"
global_annual %>% dplyr::select(value_mean ) %>% summary()
#datatable(g2, rownames = FALSE, filter = "top", options = list(pageLength = 5, scrollX = T))
merged_mr = merge_roads(global_annual, c(3, 4, 5), keep = F) # keep = T keeps the original roads.
names(merged_mr)
locations_sf = st_as_sf(merged_mr, coords = c("long","lat"))
osm_valuemean = tm_shape(locations_sf) +
tm_dots( "value_mean", col = "value_mean", size = 0.05,title = "NO2 value",
popup.vars = c("value_mean" )) + tm_view(basemaps = c('OpenStreetMap'))
#+tm_shape(lnd)+tm_lines()
tmap_save(osm_valuemean, "NO2mean.html")
countryname = paste(merged_mr$country, countrycode(merged_mr$country, 'iso2c', 'country.name'), sep = ":")
#tag country with ppm
# use the median for colour
mergedmedian = merged_mr %>% group_by(country) %>% mutate(median = median(value_mean, na.rm = TRUE))
nrow(merged_mr)
merged_mr = merged_mr %>% group_by(country) %>% mutate(count1 = n())
merged_mr$count1
countryname_s_e=ifelse( merged_mr$country %in% countrywithppm[countrywithppm %in% merged_mr$country], paste(countryname, "*", sep = ""), countryname)
mergedmedian$countryfullname = paste0(countryname_s_e," (",merged_mr$count1 ,")")
bp2 <- ggplot(mergedmedian, aes(x=countryfullname, y=value_mean, group=country)) +
labs(x = "Country", y = expression(paste("NO"[2], " ", mu, "g/", "m"^3)), cex = 1.5) +
geom_boxplot(aes(fill = median)) +
theme(text = element_text(size = 13), axis.text.x = element_text(angle = 90, hjust = 1)) + scale_fill_distiller(palette = "Spectral")
# scale_color_brewer(direction = 1)
print(bp2 + ylim(0, 100))
#ggsave("boxplot.png")
merged_mr %>% na.omit %>% filter(country == "DE") %>% ungroup() %>%dplyr::select(matches("value_mean|road_|night|trop")) %>% cor %>% corrplot(type = "upper", method = "pie", tl.cex = 0.7)
merged_mr %>% na.omit %>% filter(country == "US") %>% ungroup() %>%dplyr::select(matches("value_mean|road_|night|trop")) %>% cor %>% corrplot(type = "upper", method = "pie", tl.cex = 0.7)
merged_mr %>% na.omit %>% filter(country == "CA") %>% ungroup() %>%dplyr::select(matches("value_mean|road_|night|trop")) %>% cor %>% corrplot(type = "upper", method = "pie", tl.cex = 0.7)
merged_mr %>% na.omit %>% filter(country == "CN") %>% ungroup() %>%dplyr::select(matches("value_mean|road_|night|trop")) %>% cor %>% corrplot(type = "upper", method = "pie", tl.cex = 0.7)
merged_mr %>% na.omit %>% ungroup() %>%dplyr::select(matches("value_mean|road_|night|trop")) %>% cor %>% corrplot(type = "upper", method = "pie", tl.cex = 0.7)
grd_sp <- as_Spatial(locations_sf)
dt.vgm = variogram(value_mean~1, grd_sp, cutoff= 1)
plot(dt.vgm)
#Moran I test
#install.packages("ape", dependencies = TRUE)
#library(ape)
#merged_mrf = merged_mr%>%filter(country == "US")
#no2.dists <- as.matrix(dist(cbind(merged_mrf$LONGITUDE, merged_mrf$LATITUDE)))
#no2.dists[1:5, 1:5]
#no2.inv <- 1/no2.dists
#diag(no2.inv) <- 0
#no2.inv[1:5, 1:5]
#Moran.I(merged_mrf$value_mean, na.rm = T, no2.inv)
countryvariogram = function(COUN, cutoff){
loca = locations_sf%>%filter(country == COUN)
grd_sp <- as_Spatial(loca)
dt.vgm = variogram(value_mean~1, grd_sp, cutoff = cutoff)
plot(dt.vgm)
}
countryvariogram("DE", 1)
countryvariogram("US", 1)
countryvariogram("CN", 1) # reason?
inde_var = merged_mr %>%na.omit() # 70 variables
names(inde_var)
inde_var %>% filter(country=="DE")%>%ungroup%>% dplyr::select(matches("road_class_M345|nightlight|temperature_2m_7|value")) %>% scatterplot(y_name = "value_mean", fittingmethod = "loess")
inde_var %>% filter(country=="DE")%>%ungroup%>% dplyr::select(matches("road_class_2|value")) %>% scatterplot(y_name = "value_mean", fittingmethod = "gam")
inde_var %>%ungroup%>% dplyr::select(matches("road_class_M345|nightlight|temperature_2m_7|value")) %>% scatterplot(y_name = "value_mean", fittingmethod = "loess")
inde_var %>% ungroup%>% dplyr::select(matches("road_class_2|value")) %>% scatterplot(y_name = "value_mean", fittingmethod = "loess")
inde_var %>% ungroup%>% dplyr::select(matches("road_class_1|value")) %>% scatterplot(y_name = "value_mean", fittingmethod = "gam")
#inde_var %>% dplyr::select(matches("Tro|OMI|Rsp|night_value")) %>% scatterplot(y_name = "night_value", fittingmethod = "loess")
# why?
# can choose any variable to look at the scatterplot
#inde_var %>% dplyr::select(matches("ROAD_1|day_value")) %>% scatterplot(y_name = "day_value", fittingmethod = "gam")
inde_var = inde_var%>%ungroup()%>%dplyr::select(-country)
str(inde_var)
ranger(value_mean~., data = inde_var)
gbm1 = gbm(formula = value_mean~., data =inde_var, distribution = "gaussian", n.trees = 2000,
interaction.depth = 6, shrinkage = 0.01, bag.fraction = 0.5 )
gbm1
summary(gbm1)
plot(gbm1, i.var = 2:3)
#plot(gbm1, i.var = 1)
#rf_residual <- pre_rf - rdf_test$NO2
y_varname= "value_mean"
x = inde_var%>%dplyr::select(-value_mean)
df_x = data.table(inde_var, keep.rownames = F)
df_y = inde_var$value_mean
formu = as.formula(paste(y_varname, "~.", sep = ""))
dfmatrix_x = sparse.model.matrix(formu, data = df_x)[, -1]
train_b = xgb.DMatrix(data = dfmatrix_x, label = df_y)
max_depth = 4
eta = 0.01
nthread = 4
nrounds = 800
Gamma = 2
bst <- xgboost(data = train_b, max_depth = max_depth, eta = eta, nthread = nthread, nrounds = nrounds, Gamma = Gamma, verbose = 1, print_every_n = 200, early_stopping_rounds = 50, maximize = F )
pre_mat = subset_grep(inde_var , grepstring = paste0("value_mean|",vastring))
rf = ranger(value_mean~ ., data = na.omit( pre_mat), mtry = 25, num.trees = 1000, importance = "permutation")
rf
# ranger method
sort(importance(rf), decreasing = T)
pre_mat_s = inde_var%>%na.omit%>%ungroup() %>% dplyr::select(value_mean, road_class_2_50, nightlight_500, population_5000, road_class_M345_1000)
lm_s = lm(value_mean~., data = pre_mat_s)
rf_s = ranger(value_mean~ ., data = pre_mat_s, num.trees = 1000, importance = "permutation")
rf_s
pre_mat_predictor = pre_mat_s%>%dplyr::select(-value_mean)
ggpairs(pre_mat_predictor)
p_lm = partial(lm_s, "road_class_M345_1000",plot = TRUE, rug = TRUE)
plot(p_lm) # Linear regression
p2 = partial(rf_s, "road_class_M345_1000",plot = TRUE, rug = TRUE)
plot(p2) # random forest
library(lme4)
a2 = lm(value_mean ~ population+ road_class_M345_1000+road_class_2_50 +country , scaledtrain)
smp_size <- floor(0.9 * nrow(merged_mr))
smp_size <- floor(0.9 * nrow(inde_var))
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.path = 'Figs_geohub/',
echo=T, include = T, warning = FALSE, message = FALSE)
ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
#repos='http://cran.muenster.r-project.org'
stdata = c("sp", "sf", "raster")
Stat_methods = c("lme4", "glmnet", "ranger", "gbm", "xgboost", "party", "caret", "gstat")
visual = c("RColorBrewer", "ggplot2", "corrplot", "tmap", "leaflet", "mapview","leafem", "pdp", "vip", "DT", "sparkline")
map = c("maptools")
tidy = c("devtools", "dplyr", "tidyr", "reshape2", "knitr")
other = c("countrycode", "htmlwidgets", "data.table", "Matrix", "GGally")
packages <- c(stdata, tidy, Stat_methods, visual, map, other)
ipak(packages)
install_github("mengluchu/APMtools")
library(APMtools)
ls("package:APMtools")
#gd = fread("~/Documents/GitHub/Global mapping/2020_06_world/stations_20200602.csv")
#avg = fread("~/Documents/GitHub/Global mapping/oaqEUAUCAUS.csv")
#gdata = merge(gd, avg, by.x = c("long", "lat"), by.y = c("LONGITUDE","LATITUDE" ))
#g1 = na_if(gdata, -9999.9)
#g2 = g1%>%dplyr::select(-id, -dir, -V1)%>%filter(value_mean >0)
data("global_annual")
vastring = "road|nightlight|population|temp|wind|trop|indu|elev"
global_annual %>% dplyr::select(value_mean ) %>% summary()
#datatable(g2, rownames = FALSE, filter = "top", options = list(pageLength = 5, scrollX = T))
merged_mr = merge_roads(global_annual, c(3, 4, 5), keep = F) # keep = T keeps the original roads.
names(merged_mr)
locations_sf = st_as_sf(merged_mr, coords = c("long","lat"))
osm_valuemean = tm_shape(locations_sf) +
tm_dots( "value_mean", col = "value_mean", size = 0.05,title = "NO2 value",
popup.vars = c("value_mean" )) + tm_view(basemaps = c('OpenStreetMap'))
#+tm_shape(lnd)+tm_lines()
tmap_save(osm_valuemean, "NO2mean.html")
countryname = paste(merged_mr$country, countrycode(merged_mr$country, 'iso2c', 'country.name'), sep = ":")
#tag country with ppm
# use the median for colour
mergedmedian = merged_mr %>% group_by(country) %>% mutate(median = median(value_mean, na.rm = TRUE))
nrow(merged_mr)
merged_mr = merged_mr %>% group_by(country) %>% mutate(count1 = n())
countryname_s_e=ifelse( merged_mr$country %in% countrywithppm[countrywithppm %in% merged_mr$country], paste(countryname, "*", sep = ""), countryname)
mergedmedian$countryfullname = paste0(countryname_s_e," (",merged_mr$count1 ,")")
bp2 <- ggplot(mergedmedian, aes(x=countryfullname, y=value_mean, group=country)) +
labs(x = "Country", y = expression(paste("NO"[2], " ", mu, "g/", "m"^3)), cex = 1.5) +
geom_boxplot(aes(fill = median)) +
theme(text = element_text(size = 13), axis.text.x = element_text(angle = 90, hjust = 1)) + scale_fill_distiller(palette = "Spectral")
# scale_color_brewer(direction = 1)
print(bp2 + ylim(0, 100))
#ggsave("boxplot.png")
merged_mr %>% na.omit %>% filter(country == "DE") %>% ungroup() %>%dplyr::select(matches("value_mean|road_|night|trop")) %>% cor %>% corrplot(type = "upper", method = "pie", tl.cex = 0.7)
#merged_mr %>% na.omit %>% filter(country == "US") %>% ungroup() %>%dplyr::select(matches("value_mean|road_|night|trop")) %>% cor %>% corrplot(type = "upper", method = "pie", tl.cex = 0.7)
#merged_mr %>% na.omit %>% filter(country == "CA") %>% ungroup() %>%dplyr::select(matches("value_mean|road_|night|trop")) %>% cor %>% corrplot(type = "upper", method = "pie", tl.cex = 0.7)
#merged_mr %>% na.omit %>% filter(country == "CN") %>% ungroup() %>%dplyr::select(matches("value_mean|road_|night|trop")) %>% cor %>% corrplot(type = "upper", method = "pie", tl.cex = 0.7)
merged_mr %>% na.omit %>% ungroup() %>%dplyr::select(matches("value_mean|road_|night|trop")) %>% cor %>% corrplot(type = "upper", method = "pie", tl.cex = 0.7)
grd_sp <- as_Spatial(locations_sf)
dt.vgm = variogram(value_mean~1, grd_sp, cutoff= 1)
plot(dt.vgm)
#Moran I test
#install.packages("ape", dependencies = TRUE)
#library(ape)
#merged_mrf = merged_mr%>%filter(country == "US")
#no2.dists <- as.matrix(dist(cbind(merged_mrf$LONGITUDE, merged_mrf$LATITUDE)))
#no2.dists[1:5, 1:5]
#no2.inv <- 1/no2.dists
#diag(no2.inv) <- 0
#no2.inv[1:5, 1:5]
#Moran.I(merged_mrf$value_mean, na.rm = T, no2.inv)
countryvariogram = function(COUN, cutoff){
loca = locations_sf%>%filter(country == COUN)
grd_sp <- as_Spatial(loca)
dt.vgm = variogram(value_mean~1, grd_sp, cutoff = cutoff)
plot(dt.vgm)
}
countryvariogram("DE", 1)
countryvariogram("US", 1)
countryvariogram("CN", 1) # reason?
inde_var = merged_mr %>%na.omit() # 70 variables
inde_var %>% filter(country=="DE")%>%ungroup%>% dplyr::select(matches("road_class_M345|nightlight|temperature_2m_7|value")) %>% scatterplot(y_name = "value_mean", fittingmethod = "loess")
inde_var %>% filter(country=="DE")%>%ungroup%>% dplyr::select(matches("road_class_2|value")) %>% scatterplot(y_name = "value_mean", fittingmethod = "gam")
inde_var %>%ungroup%>% dplyr::select(matches("road_class_M345|nightlight|temperature_2m_7|value")) %>% scatterplot(y_name = "value_mean", fittingmethod = "loess")
inde_var %>% ungroup%>% dplyr::select(matches("road_class_2|value")) %>% scatterplot(y_name = "value_mean", fittingmethod = "loess")
inde_var %>% ungroup%>% dplyr::select(matches("road_class_1|value")) %>% scatterplot(y_name = "value_mean", fittingmethod = "gam")
#inde_var %>% dplyr::select(matches("Tro|OMI|Rsp|night_value")) %>% scatterplot(y_name = "night_value", fittingmethod = "loess")
# why?
# can choose any variable to look at the scatterplot
#inde_var %>% dplyr::select(matches("ROAD_1|day_value")) %>% scatterplot(y_name = "day_value", fittingmethod = "gam")
merged_mr = merged_mr%>%drop_na()
merged_mr = merged_mr%>%drop_na()
smp_size <- floor(0.9 * nrow(merged_mr))
training<- sample(seq_len(nrow(merged_mr)), size = smp_size)
mpre =subset_grep(merged_mr, vastring)
scaled1 = data.frame(apply(mpre, 2, scale))
scaled1$value_mean = merged$value_mean
scaled1$value_mean = merged_mr$value_mean
scaled1$country = merged$country
scaled1$country = merged_mr$country
scaled1$tropofac = as.factor(cut(merged$Tropomi_2018, seq(min(merged$Tropomi_2018), max(merged$Tropomi_2018), 100), label = c(1:(length(seq(min(merged$Tropomi_2018), max(merged$Tropomi_2018), 100))-1))))
scaled1$tropofac = as.factor(cut(merged_mr$trop_mean_filt, seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100), label = c(1:(length(seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100))-1))))
caled1$nifac = as.factor(cut(merged$ni_light, sep, label = c(1:(length(sep)-1))))
caled1$nifac = as.factor(cut(merged_mr$nightlight_5000, sep, label = c(1:(length(sep)-1))))
sep = c( -0.001, 10, 30, 60, max(merged_mr$nightlight_5000))
caled1$nifac = as.factor(cut(merged_mr$nightlight_5000, sep, label = c(1:(length(sep)-1))))
scaled1$nifac = as.factor(cut(merged_mr$nightlight_5000, sep, label = c(1:(length(sep)-1))))
scaledtrain = scaled1[training,]
scaledtest = scaled1[test,]
test = seq_len(nrow(merged_mr))[-training]
scaledtest = scaled1[test,]
a = lmer(value_mean ~ pop5k + ROAD_M345_1000 + ROAD_M345_50 + ROAD_2_100 + ROAD_2_500+ Tropomi_2018 + ni_light+(ROAD_2_50| country), scaledtrain)
a = lmer(value_mean ~ population5k +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +(road_class_2_50| country), scaledtrain)
a = lmer(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +(road_class_2_50| country), scaledtrain)
a
pre= predict(a, scaledtest)
error_matrix(scaledtest$value_mean,prediction = pre)
scaled1 = scaled1%>%select(population_5000 ,road_class_M345_1000 , road_class_M345_50 , road_class_2_100 ,road_class_2_25 , trop_mean_filt , nightlight_500 ,road_class_2_50, country)
scaled1 = data.frame(apply(mpre, 2, scale))
scaled1$value_mean = merged_mr$value_mean
scaled1$country = merged_mr$country
scaled1$tropofac = as.factor(cut(merged_mr$trop_mean_filt, seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100), label = c(1:(length(seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100))-1))))
sep = c( -0.001, 10, 30, 60, max(merged_mr$nightlight_5000))
scaled1$nifac = as.factor(cut(merged_mr$nightlight_5000, sep, label = c(1:(length(sep)-1))))
scaled_sel = scaled1%>%select(population_5000 ,road_class_M345_1000 , road_class_M345_50 , road_class_2_100 ,road_class_2_25 , trop_mean_filt , nightlight_500 ,road_class_2_50, country)
scaledtrain = scaled1_sel[training,]
scaledtrain = scaled_sel[training,]
scaledtest = scaled_sel[test,]
a = lmer(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +(road_class_2_50| country), scaledtrain)
scaled_sel = scaled1%>%select(population_5000 ,road_class_M345_1000 , road_class_M345_50 , road_class_2_100 ,road_class_2_25 , trop_mean_filt , nightlight_500 ,road_class_2_50, country, value_mean)
scaledtrain = scaled_sel[training,]
scaledtest = scaled_sel[test,]
a = lmer(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +(road_class_2_50| country), scaledtrain)
a
rangers(value_mean~., data =scaled_sel)
rf=predict(prediction(ranger(value_mean~., data =scaled_sel),scaletest))
rf=predict(ranger(value_mean~., data =scaled_sel),scaletest)
rf=predict(ranger(value_mean~., data =scaled_sel),scaledtest)
error_matrix(scaledtest$value_mean,prediction = rf)
rf
rfpre=predict(ranger(value_mean~., data =scaled_sel),scaledtest)
rfpre
rfpre=prediction(predict(ranger(value_mean~., data =scaled_sel),scaledtest))
rfpre
prediction(rfpre)
predictions(rfpre)
rfpre= predict(ranger(value_mean~., data =scaled_sel),scaledtest)%>%predictions()
error_matrix(scaledtest$value_mean,prediction = pre)
error_matrix(scaledtest$value_mean,prediction = rfpre)
merged_mr = merged_mr%>%drop_na()
smp_size <- floor(0.8* nrow(merged_mr))
training<- sample(seq_len(nrow(merged_mr)), size = smp_size)
test = seq_len(nrow(merged_mr))[-training]
mpre =subset_grep(merged_mr, vastring)
scaled1 = data.frame(apply(mpre, 2, scale))
scaled1$value_mean = merged_mr$value_mean
scaled1$country = merged_mr$country
scaled1$tropofac = as.factor(cut(merged_mr$trop_mean_filt, seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100), label = c(1:(length(seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100))-1))))
sep = c( -0.001, 10, 30, 60, max(merged_mr$nightlight_5000))
scaled1$nifac = as.factor(cut(merged_mr$nightlight_5000, sep, label = c(1:(length(sep)-1))))
scaled_sel = scaled1%>%select(population_5000 ,road_class_M345_1000 , road_class_M345_50 , road_class_2_100 ,road_class_2_25 , trop_mean_filt , nightlight_500 ,road_class_2_50, country, value_mean)
scaledtrain = scaled_sel[training,]
scaledtest = scaled_sel[test,]
a = lmer(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +(road_class_2_50| country), scaledtrain)
a
pre= predict(a, scaledtest)
pre= predict(a, newdata = scaledtest)
a = lmer(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +(road_class_2_50| country), scaledtrain)
a
pre= predict(a, newdata = scaledtest)
scaledtest
scaledtest = scaled_sel[test,]%>%select(-value_mean)
pre= predict(a, newdata = scaledtest)
pre= predict(a, scaledtest)
scaledtest = scaled_sel[test,]%>%select(-value_mean)
pre= predict(a, scaledtest)
scaledtest
error_matrix(scaledtest$value_mean,prediction = pre)
rfpre= predict(ranger(value_mean~., data =scaled_sel),scaledtest)%>%predictions()
error_matrix(scaledtest$value_mean,prediction = rfpre)
rfpre
scaledtest = scaled_sel[test,]
error_matrix(scaledtest$value_mean,prediction = rfpre)
set.seed(1)
smp_size <- floor(0.9 * nrow(merged_mr))
training<- sample(seq_len(nrow(merged)), size = smp_size)
smp_size <- floor(0.8 * nrow(merged_mr))
training<- sample(seq_len(nrow(merged)), size = smp_size)
nrow(merged_mr)
set.seed(1)
merged_mr = merged_mr%>%drop_na()
smp_size <- floor(0.8* nrow(merged_mr))
training<- sample(seq_len(nrow(merged_mr)), size = smp_size)
test = seq_len(nrow(merged_mr))[-training]
mpre =subset_grep(merged_mr, vastring)
scaled1 = data.frame(apply(mpre, 2, scale))
scaled1$value_mean = merged_mr$value_mean
scaled1$country = merged_mr$country
scaled1$tropofac = as.factor(cut(merged_mr$trop_mean_filt, seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100), label = c(1:(length(seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100))-1))))
sep = c( -0.001, 10, 30, 60, max(merged_mr$nightlight_5000))
scaled1$nifac = as.factor(cut(merged_mr$nightlight_5000, sep, label = c(1:(length(sep)-1))))
scaled_sel = scaled1%>%select(population_5000 ,road_class_M345_1000 , road_class_M345_50 , road_class_2_100 ,road_class_2_25 , trop_mean_filt , nightlight_500 ,road_class_2_50, country, value_mean)
scaledtrain = scaled_sel[training,]
scaledtest = scaled_sel[test,]
a = lmer(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +(road_class_2_50| country), scaledtrain)
a
pre= predict(a, scaledtest)
rfpre= predict(ranger(value_mean~., data =scaled_sel),scaledtest)%>%predictions()
error_matrix(scaledtest$value_mean,prediction = pre)
error_matrix(scaledtest$value_mean,prediction = rfpre)
set.seed(2)
merged_mr = merged_mr%>%drop_na()
smp_size <- floor(0.8* nrow(merged_mr))
training<- sample(seq_len(nrow(merged_mr)), size = smp_size)
test = seq_len(nrow(merged_mr))[-training]
mpre =subset_grep(merged_mr, vastring)
scaled1 = data.frame(apply(mpre, 2, scale))
scaled1$value_mean = merged_mr$value_mean
scaled1$country = merged_mr$country
scaled1$tropofac = as.factor(cut(merged_mr$trop_mean_filt, seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100), label = c(1:(length(seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100))-1))))
sep = c( -0.001, 10, 30, 60, max(merged_mr$nightlight_5000))
scaled1$nifac = as.factor(cut(merged_mr$nightlight_5000, sep, label = c(1:(length(sep)-1))))
scaled_sel = scaled1%>%select(population_5000 ,road_class_M345_1000 , road_class_M345_50 , road_class_2_100 ,road_class_2_25 , trop_mean_filt , nightlight_500 ,road_class_2_50, country, value_mean)
scaledtrain = scaled_sel[training,]
scaledtest = scaled_sel[test,]
a = lmer(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +(road_class_2_50| country), scaledtrain)
a
pre= predict(a, scaledtest)
rfpre= predict(ranger(value_mean~., data =scaled_sel),scaledtest)%>%predictions()
error_matrix(scaledtest$value_mean,prediction = pre)
error_matrix(scaledtest$value_mean,prediction = rfpre)
ranger(value_mean~., data =scaled_sel)
error_matrix(scaledtest$value_mean,prediction = rfpre)
set.seed(3)
merged_mr = merged_mr%>%drop_na()
smp_size <- floor(0.8* nrow(merged_mr))
training<- sample(seq_len(nrow(merged_mr)), size = smp_size)
test = seq_len(nrow(merged_mr))[-training]
mpre =subset_grep(merged_mr, vastring)
scaled1 = data.frame(apply(mpre, 2, scale))
scaled1$value_mean = merged_mr$value_mean
scaled1$country = merged_mr$country
scaled1$tropofac = as.factor(cut(merged_mr$trop_mean_filt, seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100), label = c(1:(length(seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100))-1))))
sep = c( -0.001, 10, 30, 60, max(merged_mr$nightlight_5000))
scaled1$nifac = as.factor(cut(merged_mr$nightlight_5000, sep, label = c(1:(length(sep)-1))))
scaled_sel = scaled1%>%select(population_5000 ,road_class_M345_1000 , road_class_M345_50 , road_class_2_100 ,road_class_2_25 , trop_mean_filt , nightlight_500 ,road_class_2_50, country, value_mean)
scaledtrain = scaled_sel[training,]
scaledtest = scaled_sel[test,]
a = lmer(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +(road_class_2_50| country), scaledtrain)
a
pre= predict(a, scaledtest)
rfpre= predict(ranger(value_mean~., data =scaled_sel),scaledtest)%>%predictions()
error_matrix(scaledtest$value_mean,prediction = pre)
error_matrix(scaledtest$value_mean,prediction = rfpre)
ranger(value_mean~., data =scaled_sel)
a = lmer(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +road_class_2_50, scaledtrain)
a = lm(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +road_class_2_50, scaledtrain)
pre2= predict(a2, scaledtest)
a2 = lm(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +road_class_2_50, scaledtrain)
pre2= predict(a2, scaledtest)
error_matrix(scaledtest$value_mean,prediction = pre2)
error_matrix(scaledtest$value_mean,prediction = pre)
set.seed(3)
merged_mr = merged_mr%>%drop_na()
smp_size <- floor(0.8* nrow(merged_mr))
training<- sample(seq_len(nrow(merged_mr)), size = smp_size)
test = seq_len(nrow(merged_mr))[-training]
mpre =subset_grep(merged_mr, vastring)
scaled1 = data.frame(apply(mpre, 2, scale))
scaled1$value_mean = merged_mr$value_mean
scaled1$country = merged_mr$country
scaled1$tropofac = as.factor(cut(merged_mr$trop_mean_filt, seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100), label = c(1:(length(seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100))-1))))
sep = c( -0.001, 10, 30, 60, max(merged_mr$nightlight_5000))
scaled1$nifac = as.factor(cut(merged_mr$nightlight_5000, sep, label = c(1:(length(sep)-1))))
scaled_sel = scaled1%>%select(population_5000 ,road_class_M345_1000 , road_class_M345_50 , road_class_2_100 ,road_class_2_25 , trop_mean_filt , nightlight_500 ,road_class_2_50, country, value_mean)
scaledtrain = scaled_sel[training,]
scaledtest = scaled_sel[test,]
me = lmer(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +(road_class_2_50| country), scaledtrain)
fixe = lm(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +road_class_2_50, scaledtrain)
pre_me1= predict(me, scaledtest)
pre_fixe= predict(fixe, scaledtest)
error_matrix(scaledtest$value_mean,prediction = pre_me)
set.seed(3)
merged_mr = merged_mr%>%drop_na()
smp_size <- floor(0.8* nrow(merged_mr))
training<- sample(seq_len(nrow(merged_mr)), size = smp_size)
test = seq_len(nrow(merged_mr))[-training]
mpre =subset_grep(merged_mr, vastring)
scaled1 = data.frame(apply(mpre, 2, scale))
scaled1$value_mean = merged_mr$value_mean
scaled1$country = merged_mr$country
scaled1$tropofac = as.factor(cut(merged_mr$trop_mean_filt, seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100), label = c(1:(length(seq(min(merged_mr$trop_mean_filt), max(merged_mr$trop_mean_filt), 100))-1))))
sep = c( -0.001, 10, 30, 60, max(merged_mr$nightlight_5000))
scaled1$nifac = as.factor(cut(merged_mr$nightlight_5000, sep, label = c(1:(length(sep)-1))))
scaled_sel = scaled1%>%select(population_5000 ,road_class_M345_1000 , road_class_M345_50 , road_class_2_100 ,road_class_2_25 , trop_mean_filt , nightlight_500 ,road_class_2_50, country, value_mean)
scaledtrain = scaled_sel[training,]
scaledtest = scaled_sel[test,]
me = lmer(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +(road_class_2_50| country), scaledtrain)
fixe = lm(value_mean ~ population_5000 +road_class_M345_1000 + road_class_M345_50 + road_class_2_100 + road_class_2_25 + trop_mean_filt + nightlight_500 +road_class_2_50, scaledtrain)
pre_me1= predict(me, scaledtest)
pre_fixe= predict(fixe, scaledtest)
error_matrix(scaledtest$value_mean,prediction = pre_me1)
error_matrix(scaledtest$value_mean,prediction = pre_fixe)
error_matrix(scaledtest$value_mean,prediction = rfpre)
rfpre= predict(ranger(value_mean~., data =scaled_sel),scaledtest)%>%predictions()