library(tidyverse,warn.conflicts=F, quietly=T)
library(rstan,warn.conflicts=F, quietly=T)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())#these libraries need to be loaded
library(utils)
#read the Dataset sheet into “R”.
#The dataset will be called "covidata".
covidata <- read.csv("https://opendata.ecdc.europa.eu/covid19/nationalcasedeath_eueea_daily_ei/csv", na.strings = "", fileEncoding = "UTF-8-BOM") #takes some time!
#read the Dataset sheet of countries' responses into “R”.
#The dataset will be called "resp".
resp <- read.csv("Code Build/response_graphs_data_2022-08-25.csv")covidata$dateRep=as.Date(covidata$dateRep,'%d/%m/%Y')
resp$date_start=as.Date(resp$date_start,"%Y/%m/%d")
resp$date_end=as.Date(resp$date_end,"%Y/%m/%d",optional=TRUE)
resp=na.omit(resp)respbydate<-resp %>%
rowwise() %>%
do(data.frame(DATE=seq(.$date_start, .$date_end, by='day'),
Response_measure= rep(.$Response_measure, .$date_start-.$date_start+1),
Country=rep(.$Country, .$date_start-.$date_start+1)))
respbydate$ID=paste(respbydate$Country,respbydate$DATE)
respbydate<-respbydate%>%spread(Response_measure, 1)Trouble-shooting: if the reviewer got error when running these lines, please check the reading process of the data. We found that using xlsx downloaded from Canvas or using “csv2” package may failed to run these lines
covidata$ID=paste(covidata$countriesAndTerritories,covidata$dateRep)
mgdata<-merge(covidata,respbydate,by.x="ID",by.y="ID",all=TRUE)
mgdata_<-apply(mgdata[,c(14:79)], 2, function(x){
x[!is.na(x)]=1;x
})
mgdata_<-apply(mgdata_,2, function(x){
x[is.na(x)]=0;x
})
mgdata=cbind (mgdata[,c(1:13)],mgdata_)
mgdata$Country <- ifelse(is.na(mgdata$Country), mgdata$countriesAndTerritories,mgdata$Country)for (x in seq(14,79,by=1)){
mgdata[,x]=as.numeric(mgdata[,x])
}
grep("Work|work",colnames(mgdata))
mgdata$wk <- rowSums(mgdata[,c(14, 76, 78)])
grep("Mask",colnames(mgdata))
mgdata$mask <- rowSums(mgdata[,c(40 ,41 ,42, 43)])
grep("Quarantine|Hotel",colnames(mgdata))
mgdata$trav <- rowSums(mgdata[,c(34, 35, 62 ,63)])
grep("StayHome",colnames(mgdata))
mgdata$home <- rowSums(mgdata[,c(64, 65, 70, 71, 72, 73, 74,75)])
mgdata$home<-ifelse(mgdata$home!=0,1,mgdata$home)
grep("Ban|Mass|Indoor|Outdoor",colnames(mgdata))
mgdata$pbg <- rowSums(mgdata[,c(16, 36, 38, 48, 50,54, 56)])
mgdata$pbg<-ifelse(mgdata$pbg!=0,1,mgdata$pbg)
grep("Private|SocialCircle",colnames(mgdata))
mgdata$prg <- rowSums(mgdata[,c(60, 61, 68 ,69)])
grep("^Clos",colnames(mgdata))
mgdata$sch <- rowSums(mgdata[,c(18, 20, 22, 24, 26)])
grep("PublicTransport",colnames(mgdata))
mgdata$pbtf <- rowSums(mgdata[,c(28,29)])
grep("Entertainment|Gyms|Shops|Worship|Cafes",colnames(mgdata))
mgdata$cps <- rowSums(mgdata[,c(30, 32,52, 58, 66)])
mgdata$pysum <- rowSums(mgdata[,c(80:88)])library(dplyr)
mgdata<-filter(mgdata, mgdata$cases>=0)
mgdata<-filter(mgdata, mgdata$deaths>=0)remove(mgdata_)
remove(covidata)
remove(resp)
remove(respbydate)
gc() #Free unused memory## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1498523 80.1 4193250 224.0 3036041 162.2
## Vcells 5422381 41.4 23420751 178.7 23408072 178.6
ggplot(data=mgdata,aes(x = dateRep, y = cases, color=Country))+geom_line()+labs(title = "Newly Reported COVID-19 cases in Europe by day",
x = "Date of Report",
y = "Country")+theme(text = element_text(size = 20))mgdata%>%ggplot()+geom_line(aes(x = dateRep, y = Country, color=pysum))+
scale_colour_gradient(low = "light blue", high = "red")+labs(title = "Number of Policies implemented in Europe by day",
x = "Date",
y = "Country")+theme(text = element_text(size = 20))
We also use these code to view the distribution of policies in countries
over time. We adjusted our deininations of policies after failing to see
enough differences of policies adopted in countries at the same time
period.
wktl<-mgdata%>%ggplot()+geom_line(aes(x = dateRep, y = Country, color=wk))+
scale_colour_gradient(low = "light blue", high = "blue", guide = "none")
masktl<-mgdata%>%ggplot()+geom_line(aes(x = dateRep, y = Country, color=mask))+
scale_colour_gradient(low = "light blue", high = "blue", guide = "none")
travtl<-mgdata%>%ggplot()+geom_line(aes(x = dateRep, y = Country, color=trav))+
scale_colour_gradient(low = "light blue", high = "blue", guide = "none")
hometl<-mgdata%>%ggplot()+geom_line(aes(x = dateRep, y = Country, color=home))+
scale_colour_gradient(low = "light blue", high = "blue", guide = "none")
pbgtl<-mgdata%>%ggplot()+geom_line(aes(x = dateRep, y = Country, color=pbg))+
scale_colour_gradient(low = "light blue", high = "blue", guide = "none")
prgtl<-mgdata%>%ggplot()+geom_line(aes(x = dateRep, y = Country, color=prg))+
scale_colour_gradient(low = "light blue", high = "blue", guide = "none")
schtl<-mgdata%>%ggplot()+geom_line(aes(x = dateRep, y = Country, color=sch))+
scale_colour_gradient(low = "light blue", high = "blue", guide = "none")
pbtftl<-mgdata%>%ggplot()+geom_line(aes(x = dateRep, y = Country, color=pbtf))+
scale_colour_gradient(low = "light blue", high = "blue", guide = "none")
cpstl<-mgdata%>%ggplot()+geom_line(aes(x = dateRep, y = Country, color=cps))+
scale_colour_gradient(low = "light blue", high = "blue", guide = "none")options("scipen"=100, "digits"=4) #remove scientific notation
#Package required"glm"
mgdata$cases_lead=lead(mgdata$cases,7L)
mgdata$deaths_lead=lead(mgdata$deaths,7L)
fm1<-cases_lead~ wk+mask+trav+home+pbg+prg+sch+pbtf+cps + popData2020
fm2<-cases_lead~ wk+mask+trav+home+pbg+prg+sch+pbtf+cps + log(popData2020)
glm1 <- glm(fm1, data = na.omit(mgdata), family = poisson(link="log"))
glm2 <- glm(fm2, data = na.omit(mgdata), family = poisson(link="log"))
summary(glm1)##
## Call:
## glm(formula = fm1, family = poisson(link = "log"), data = na.omit(mgdata))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -410.4 -64.7 -42.1 -6.2 1698.8
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.68827375765606 0.00018857203887 40771 <0.0000000000000002 ***
## wk -0.03431230805708 0.00015086790492 -227 <0.0000000000000002 ***
## mask 0.43724186060443 0.00021319159810 2051 <0.0000000000000002 ***
## trav -0.24506386580498 0.00014537633111 -1686 <0.0000000000000002 ***
## home 0.07420345938909 0.00018831020599 394 <0.0000000000000002 ***
## pbg -0.58467124593211 0.00019397380998 -3014 <0.0000000000000002 ***
## prg 0.28982071006519 0.00018342127432 1580 <0.0000000000000002 ***
## sch -0.25717285466034 0.00013270283216 -1938 <0.0000000000000002 ***
## pbtf 0.15481072938831 0.00023305461706 664 <0.0000000000000002 ***
## cps -0.05859722754361 0.00010185468799 -575 <0.0000000000000002 ***
## popData2020 0.00000003737789 0.00000000000254 14690 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 594917927 on 28305 degrees of freedom
## Residual deviance: 318927391 on 28295 degrees of freedom
## AIC: 319154561
##
## Number of Fisher Scoring iterations: 6
summary(glm2) #with smaller AIC, adopted##
## Call:
## glm(formula = fm2, family = poisson(link = "log"), data = na.omit(mgdata))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -336.5 -60.1 -23.8 3.3 1581.3
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.5194546 0.0013252 -5674 <0.0000000000000002 ***
## wk -0.2106104 0.0001451 -1452 <0.0000000000000002 ***
## mask 0.3952246 0.0002184 1810 <0.0000000000000002 ***
## trav -0.1203145 0.0001528 -787 <0.0000000000000002 ***
## home -0.0928037 0.0001914 -485 <0.0000000000000002 ***
## pbg -0.6001896 0.0001994 -3010 <0.0000000000000002 ***
## prg 0.3095710 0.0001814 1707 <0.0000000000000002 ***
## sch -0.2718481 0.0001294 -2101 <0.0000000000000002 ***
## pbtf 0.3264349 0.0002292 1425 <0.0000000000000002 ***
## cps -0.0253386 0.0001052 -241 <0.0000000000000002 ***
## log(popData2020) 0.9864175 0.0000769 12826 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 594917927 on 28305 degrees of freedom
## Residual deviance: 293368429 on 28295 degrees of freedom
## AIC: 293595599
##
## Number of Fisher Scoring iterations: 6
anova(glm1,glm2,test="Chisq") #Cannot understand the resultfm3<-deaths_lead~ wk+mask+trav+home+pbg+prg+sch+pbtf+cps + log(popData2020)
glm3 <- glm(fm3, data = na.omit(mgdata), family = poisson(link="log"))
summary(glm3)##
## Call:
## glm(formula = fm3, family = poisson(link = "log"), data = na.omit(mgdata))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -28.1 -4.7 -1.9 0.8 388.6
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.209270 0.016772 -727.9 <0.0000000000000002 ***
## wk 0.025921 0.001323 19.6 <0.0000000000000002 ***
## mask 0.409350 0.002685 152.4 <0.0000000000000002 ***
## trav 0.032495 0.001502 21.6 <0.0000000000000002 ***
## home 0.265870 0.002438 109.1 <0.0000000000000002 ***
## pbg -0.077288 0.002380 -32.5 <0.0000000000000002 ***
## prg 0.183327 0.002082 88.1 <0.0000000000000002 ***
## sch -0.014353 0.000864 -16.6 <0.0000000000000002 ***
## pbtf 0.145629 0.002491 58.5 <0.0000000000000002 ***
## cps 0.274627 0.000862 318.4 <0.0000000000000002 ***
## log(popData2020) 0.914457 0.000973 940.0 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3334090 on 28305 degrees of freedom
## Residual deviance: 1453286 on 28295 degrees of freedom
## AIC: 1547197
##
## Number of Fisher Scoring iterations: 6