-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01.prepdatafull.R
57 lines (49 loc) · 2.59 KB
/
01.prepdatafull.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
################################################################################
# Updated version of the R code for the analysis in:
#
# "Scortichini M, Schneider Dos Santos R, De' Donato F, De Sario M,
# Michelozzi P, Davoli M, Masselot P, Sera F, Gasparrini A.
# Excess mortality during the COVID-19 outbreak in Italy: a two-stage
# interrupted time-series analysis.
# International Journal of Epidemiology. 2020. DOI: 10.1093/ije/dyaa169."
# http://www.ag-myresearch.com/italyCOVIDdeath.html
#
# Update: 21 October 2020
# * an updated version of this code, compatible with future versions of the
# software, is available at:
# https://github.com/gasparrini/ItalyCOVIDdeath
################################################################################
################################################################################
# DOWNLOAD THE DATA, RESHAPE, RENAME AND TRANSFORM
################################################################################
# DOWNLOAD AND UNZIP THE DATA
# NB: ZIP IN FOLDER, UNCOMMENT TO DOWNLOAD AGAIN
source <- paste0("https://www.istat.it/it/files//2020/03/Dataset-decessi-",
"comunali-giornalieri-e-tracciato-record-1.zip")
file <- paste0("data/",
"Dataset-decessi-comunali-giornalieri-e-tracciato-record-1.zip")
# curl_download(url=source, destfile=file, quiet=FALSE, mode="wb")
unzip(zipfile=file, exdir=paste(getwd(), "data", sep="/"), overwrite=F)
# READ THE DATA, THEN ERASE UNZIPPED (LARGE FILE)
dataorig <- fread("data/comuni_giornaliero_15maggio.csv",na.strings="n.d.")
file.remove("data/comuni_giornaliero_15maggio.csv")
# RESHAPE TO LONG
datafull <- dataorig[rep(seq(nrow(dataorig)), each=6), 1:9]
datafull$year <- rep(2015:2020, nrow(dataorig))
datafull$male <- c(t(dataorig[,grep("M_", names(dataorig), fixed=T), with=F]))
datafull$female <- c(t(dataorig[,grep("F_", names(dataorig), fixed=T), with=F]))
datafull$tot <- c(t(dataorig[,grep("T_", names(dataorig), fixed=T), with=F]))
# SELECT AND RENAME VARIABLES
datafull <- datafull %>%
rename(regcode=REG, regname=NOME_REGIONE, provcode=PROV,
provname=NOME_PROVINCIA, municcode=COD_PROVCOM, municname=NOME_COMUNE,
munictype=TIPO_COMUNE, agegrfull=CL_ETA)
# GENERATE DATE
datafull <- datafull %>%
mutate(month=floor(GE/100), day=GE-month*100, date=make_date(year,month,day),
GE=NULL)
# DEFINE THE DATE SERIES, THEN REMOVE LAST PERIOD AND ERRONEOUS LEAP DAYS
seqdate <- seq(from=dmy("01012015"), to=dmy("15052020"), by=1)
datafull <- subset(datafull, date%in%seqdate)
# ORDER BY REGION/PROVINCE/DATE, AND CREATE SEQUENCE AND LABELS (REDUCED)
datafull <- arrange(datafull, regcode, provcode, date)