library(dplyr)
library(lubridate)
library(ggplot2)
library(GGally)
library(corrplot)
library(plotly)
library(car)
df <- read.csv("covid3.csv")
df$date <- mdy(df$date) ### change the date type
df$private_cases_rate <- round(df$private_cases/df$new_cases,3) ### idiotiki protovoulia case rate
private_median_case_rate <- median(df$private_cases_rate[204:229], na.rm=T) ### median case rate for October
### calculating the cases using the October's median
df[which(is.na(df$private_cases)),]$private_cases <- round(df[which(is.na(df$private_cases)),]$new_cases * private_median_case_rate)
df[which(is.na(df$private_cases_rate)),]$private_cases_rate[22:24]<- round(df$private_cases[217:219]/df$new_cases[217:219],3)
df[which(is.na(df$private_cases_rate)),]$private_cases_rate[1:21] <- as.numeric("0.000")
df$total_private_tests <- cumsum(df$private_test)
df$public_tests <- df$new_tests-df$private_test
df$public_cases <- df$new_cases - df$private_cases
df$pos_rate_total <- round(df$new_cases/df$new_tests , 3)
df$pos_rate_private <- round(df$private_cases/df$private_test,3)
df$pos_rate_public <- round(df$public_cases/df$public_tests,3)
df$private_perc <- round(df$private_test/df$new_tests,3)
ggplotly(ggplot(df, aes(x=date, y= total_cases,colour="Total"))+ geom_line(size=1) + ylab("Number of Cases") + xlab("Days") + ggtitle("Total cases") + geom_line(aes(x=date, y= cumsum(public_cases),colour="Public")) + theme_bw() + geom_line(aes(x=date, y= cumsum(private_cases),colour="Private")) + scale_color_discrete(name = "Test type", labels = c(Total="Red", Public="Blue", Private="Green")))
ggplot(df[18:229,], aes(x=date, y=cumsum(private_tests))) + geom_line(colour="Red", size=1) + geom_line(aes(x=date, y=cumsum(public_tests)), colour="Blue", size=1) + ylab("Number of tests") + ggtitle("Total number of tests") + theme_bw()
ggplotly(ggplot(df, aes(x=date,y=new_cases)) + geom_line(colour="Red", size=1) + ylab("Number of cases") + ggtitle("New total cases") + theme_bw())
ggplotly(ggplot(df, aes(x=date,y=private_cases,colour="Private")) + geom_line( size=1) + ylab("Number of cases") + ggtitle("New cases") + theme_bw() + geom_line(aes(x=date,y=public_cases, colour="Public"), size=1)+ scale_color_discrete(name = "Test type", labels = c(Private="Red", Public="Blue")))
ggplotly(ggplot(df, aes(x=date,y=private_tests)) + geom_line(aes(colour="Private"), size=1) + ylab("Number of cases") + ggtitle("Daily number of tests") + theme_bw() + geom_line(aes(x=date,y=new_tests, colour="Total"), size=1) + scale_color_discrete(name = "Test type", labels = c(Private="Red", Total="Blue")))
ggplotly(ggplot(df[50:229,]) + geom_line(aes(x=date, y=pos_rate_total,colour="Total"), size=1) + geom_line(aes(x=date,y=pos_rate_public,colour="Public"), size=1) + ggtitle("Positive rate per type") + geom_line(aes(x=date,y=pos_rate_private,colour="Private"), size=1) + theme_bw() + ylab("Positive rate")+ scale_color_discrete(name = "Test type", labels = c(Total="Red", Public="Blue", Private="Green")))
ggplotly(ggplot(df, aes(x=date,y=private_perc))+geom_line(size=0.7, colour="Orange")+theme_bw()+geom_smooth(span=0.1)+ylab("Percentage")+ ggtitle("Private test percentage"))
ylim.prim <- c(0,5000)
ylim.sec <- c(0,0.08)
b <- diff(ylim.prim)/diff(ylim.sec)
a <- b*(ylim.prim[1] - ylim.sec[1])
ggplot(df, aes(x=date, y=new_tests)) + geom_bar(stat="identity", colour="white", fill="Red") + geom_bar(aes(x=date,y=new_cases), stat="identity",colour="blue", fill="blue", size=1) + theme_minimal() + geom_line(aes(y= b*pos_rate_total), size=1, colour="black") + scale_y_continuous(name = "Number of cases",
sec.axis = sec_axis(~ (. - a)/b, name = "Positive tests"))
cor_cov <- cor(df[,2:16],use = "complete.obs")
corrplot(cor_cov,order = "hclust")
corrplot(cor_cov, method = "color",type = "upper", order = "hclust",addCoef.col = "red",diag = FALSE)
plot(df[50:229,]$pos_rate_private,df[50:229,]$pos_rate_total)
model <- lm( pos_rate_private~pos_rate_total, data=df[50:229,])
summary(model)
##
## Call:
## lm(formula = pos_rate_private ~ pos_rate_total, data = df[50:229,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.019978 -0.002391 -0.000195 0.001356 0.047609
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0001952 0.0005611 0.348 0.728
## pos_rate_total 1.2989253 0.0603999 21.505 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006369 on 177 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7232, Adjusted R-squared: 0.7217
## F-statistic: 462.5 on 1 and 177 DF, p-value: < 2.2e-16
symptomatic <- read.csv("symptomatic.csv")
symptomatic$date <- ymd(symptomatic$date)
ggplotly(ggplot(symptomatic, aes(x=date, y=sym_cases))+ geom_bar(stat="identity", fill="blue", colour="white") + theme_minimal() + ylab("N. of cases") + ggtitle("Cases with symptoms. Date of first day of symptoms appeared"))
merged <- merge(df, symptomatic, all.x=T, by="date")
ggplotly(ggplot(merged, aes(x=date,y=private_cases, colour="Private"))+ geom_line(size=1)+ geom_line(aes(x=date,y=sym_cases,colour="Symptomatic"), size=1)+ theme_minimal() + scale_color_discrete(name = "Test type", labels = c(Private="Red", Symptomatic="Blue"))+ ylab("N. of cases"))
model2 <- lm(new_cases~new_tests+private_cases+private_tests+sym_cases + private_cases * sym_cases, data=merged)
summary(model2)
##
## Call:
## lm(formula = new_cases ~ new_tests + private_cases + private_tests +
## sym_cases + private_cases * sym_cases, data = merged)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.260 -5.165 -2.486 2.191 42.605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.2911227 1.6501059 4.419 1.61e-05 ***
## new_tests -0.0005625 0.0009695 -0.580 0.56242
## private_cases 1.5596013 0.0832931 18.724 < 2e-16 ***
## private_tests -0.0046256 0.0029573 -1.564 0.11934
## sym_cases 0.5339078 0.0580170 9.203 < 2e-16 ***
## private_cases:sym_cases -0.0042702 0.0015114 -2.825 0.00519 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.006 on 204 degrees of freedom
## (19 observations deleted due to missingness)
## Multiple R-squared: 0.9069, Adjusted R-squared: 0.9046
## F-statistic: 397.4 on 5 and 204 DF, p-value: < 2.2e-16
### VAF
predicted <- predict(model2, merged[,c(6:8,18)])
(1-var(merged$new_cases[20:227]-predicted[20:227])/var(merged$new_cases[20:227]))*100
## [1] 91.02076
### VIF
vif1 <- vif(model2)
vif1
## new_tests private_cases private_tests
## 2.237857 5.677579 3.886717
## sym_cases private_cases:sym_cases
## 1.955732 4.794124
barplot(vif1, main = "VIF Values", col = "steelblue",cex.names=0.7)
ggplot(merged, aes(x=sym_cases,y=new_cases)) + geom_point() + theme_minimal() + xlab("Symptomatic cases") + ylab("New cases")
## Warning: Removed 2 rows containing missing values (geom_point).
model_sn <- lm(new_cases~sym_cases, data=merged)
summary(model_sn)
##
## Call:
## lm(formula = new_cases ~ sym_cases, data = merged)
##
## Residuals:
## Min 1Q Median 3Q Max
## -66.257 -6.189 -2.947 0.811 150.766
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.94712 1.64017 3.016 0.00285 **
## sym_cases 1.24179 0.09819 12.647 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.55 on 225 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.4155, Adjusted R-squared: 0.4129
## F-statistic: 160 on 1 and 225 DF, p-value: < 2.2e-16
ggplot(merged, aes(x=sym_cases, y=private_cases)) + geom_point() + theme_minimal() + xlab("Symptomatic cases") + ylab("Private cases")
## Warning: Removed 2 rows containing missing values (geom_point).
model_sp <- lm(private_cases~sym_cases, data=merged)
summary(model_sp)
##
## Call:
## lm(formula = private_cases ~ sym_cases, data = merged)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.803 -3.360 -0.513 0.487 124.340
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.09326 1.12487 -0.083 0.934
## sym_cases 0.60591 0.06734 8.998 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.78 on 225 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.2646, Adjusted R-squared: 0.2614
## F-statistic: 80.96 on 1 and 225 DF, p-value: < 2.2e-16
ggplot(merged, aes(x=private_cases, y=new_cases)) + geom_point() + theme_minimal() + xlab("Private cases") + ylab("New cases") + geom_smooth()
model_pn <- lm(new_cases~private_cases, data=merged)
summary(model_pn)
##
## Call:
## lm(formula = new_cases ~ private_cases, data = merged)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.800 -6.843 -3.825 2.480 50.175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.82498 0.74898 10.45 <2e-16 ***
## private_cases 1.53554 0.04178 36.75 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.86 on 226 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.8567, Adjusted R-squared: 0.856
## F-statistic: 1351 on 1 and 226 DF, p-value: < 2.2e-16
ggplotly(ggplot(merged, aes(x=sym_cases, y=private_cases, colour="Private")) + geom_point( size=3, alpha=0.5) + theme_minimal() + xlab("Symptomatic cases") + ylab("Cases") + geom_point(aes(x=sym_cases, y=new_cases, colour="Daily"), size=3, alpha=0.5) + geom_smooth(aes(x=sym_cases, y=private_cases), fill="Blue", alpha=0.2) + geom_smooth(aes(x=sym_cases, y=new_cases), fill="Red", alpha=0.2) + scale_color_discrete(name = "Test type", labels = c(Private="Red", Daily="Blue")))
corr2 <- cor(merged[,c(7,9,18)],use = "complete.obs")
corrplot(corr2, method = "color",type = "upper", order = "hclust",addCoef.col = "red",diag = FALSE)
model3 <- lm(new_cases~new_tests + private_tests, data=merged, na.action = na.omit)
summary(model3)
##
## Call:
## lm(formula = new_cases ~ new_tests + private_tests, data = merged,
## na.action = na.omit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.266 -12.212 -4.060 6.704 118.245
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.411704 3.967254 -0.608 0.544
## new_tests -0.002607 0.002300 -1.134 0.258
## private_tests 0.052331 0.005320 9.837 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.06 on 208 degrees of freedom
## (18 observations deleted due to missingness)
## Multiple R-squared: 0.4529, Adjusted R-squared: 0.4476
## F-statistic: 86.09 on 2 and 208 DF, p-value: < 2.2e-16
vif(model3)
## new_tests private_tests
## 2.10257 2.10257
model4 <-lm(new_cases~private_tests + public_tests, data=merged, na.action = na.omit)
summary(model4)
##
## Call:
## lm(formula = new_cases ~ private_tests + public_tests, data = merged,
## na.action = na.omit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.266 -12.212 -4.060 6.704 118.245
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.411704 3.967254 -0.608 0.544
## private_tests 0.049724 0.003984 12.481 <2e-16 ***
## public_tests -0.002607 0.002300 -1.134 0.258
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.06 on 208 degrees of freedom
## (18 observations deleted due to missingness)
## Multiple R-squared: 0.4529, Adjusted R-squared: 0.4476
## F-statistic: 86.09 on 2 and 208 DF, p-value: < 2.2e-16
vif(model4)
## private_tests public_tests
## 1.179121 1.179121
```