Loading packages for analysis

library(ggplot2)
library(gridExtra)
library(gridExtra)
library(ggrepel)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)

Number of cases, deaths

# Source of data: https://www.worldometers.info/coronavirus
dat = read.csv("~/Dropbox/Temp Files/Corona virus/Statistics of COVID-19 5-3-2020.csv")

dat$CFR = dat$Deaths/dat$Total.Cases*100
dat$Serious1 = dat$Serious/dat$Total.Cases*100

# Number of cases by country 

hh = transform(dat, Country=reorder(Country, Total.Cases))
ggplot(data=hh, aes(x=Country, y=log(Total.Cases+1), fill=Country)) + geom_bar(stat="identity") + coord_flip() + labs(y="Number of cases (log; till 6/3/2020)", x="") + theme(legend.position="none")

Relationship between cases and temperatue

ggplot(data=dat, aes(x=Temp, y=log(Total.Cases))) + geom_point(aes(col=Country)) + geom_text_repel(aes(label=Country, col=Country)) + geom_smooth(method="lm") + labs(x="Temperature (oC)", y="Number of cases (log scale; till 6/3/2020)") + theme(legend.position="none")
## Warning: Removed 43 rows containing non-finite values (stat_smooth).
## Warning: Removed 43 rows containing missing values (geom_point).
## Warning: Removed 43 rows containing missing values (geom_text_repel).

Cluster analysis

library(cluster)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggfortify)
temp = na.omit(dat[, c("Country", "Temp", "Humid", "Total.Cases")])
temp$Total.Cases=log(temp$Total.Cases+1)

# row name
t1 = temp[-1]
row.names(t1) = temp$Country

res = eclust(t1, "kmeans", 5, nstart = 25)

autoplot(pam(t1, 5), label=T, frame=T, frame.type="norm")

Risk of deaths by country

# Source of data: https://www.worldometers.info/coronavirus
dat = read.csv("~/Dropbox/Temp Files/Corona virus/Statistics of COVID-19 5-3-2020.csv")

dat$CFR = dat$Deaths/dat$Total.Cases*100
dat$Serious1 = dat$Serious/dat$Total.Cases*100

death = subset(dat, Country %in% c("China", "S. Korea", "Iran", "Italy", "USA", "Japan"))
death1 = transform(death, Country=reorder(Country, CFR))

ggplot(data=death1, aes(x=Country, y=CFR, fill=Country)) + geom_bar(stat="identity") + coord_flip() + labs(y="Case Fatality Rate (%)", x="") + theme(legend.position="none")

Number of daily cases and deaths

# Source of data: https://www.worldometers.info/coronavirus
dat = read.csv("~/Dropbox/Temp Files/Corona virus/Cases and deaths from Jan 2020.csv")
dat$Date = factor(dat$Date, levels = dat$Date)

ggplot(data=dat, aes(x=Date, y=Daily.Cases, col=Date, label=Daily.Cases)) + geom_point(stat="identity", fill="black", size=5) + geom_text(color="black", size=3) + geom_segment(aes(x=Date, xend=Date, y=0, yend=Daily.Cases)) + labs(x="Date", y="Daily number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + theme(legend.position="none")

ggplot(data=dat, aes(x=Date, y=Daily.Deaths, col=Date, label=Daily.Deaths)) + geom_point(stat="identity", fill="black", size=5) + geom_text(color="black", size=3) + geom_segment(aes(x=Date, xend=Date, y=0, yend=Daily.Deaths)) + labs(x="Date", y="Daily number of deaths") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + theme(legend.position="none")
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_text).
## Warning: Removed 2 rows containing missing values (geom_segment).

Rate of mortality by age and gender

# Source: https://gisanddata.maps.arcgis.com/apps/opsdashboard/index.html
cov = read.csv("~/Dropbox/Temp Files/Corona virus/Phan tich Wuhan epidemic.csv")
cov$Dur = cov$Patient.Days/cov$Conf.Cases
cov$Patient.Months = cov$Patient.Days/30
cov$Rate1 = cov$Deaths/cov$Conf.Cases*100
cov$Rate2 = cov$Deaths/cov$Patient.Months*100
cov$Rate1 = round(cov$Rate1, 2)
cov$Rate2 = round(cov$Rate2 , 2)
cov$Average1 = sum(cov$Deaths)/sum(cov$Conf.Cases)*100
cov$Average2 = sum(cov$Deaths)/sum(cov$Patient.Months)*100

# Number of cases and deaths by age group 

p1 = ggplot(data=cov, aes(x=Age, y=Deaths)) + geom_bar(stat="identity", fill="red") + labs(x="Age group", y="Number of Deaths")

p2 = ggplot(data=cov, aes(x=Age, y=Conf.Cases)) + geom_bar(stat="identity", fill="blue") + labs(x="Age group", y="Number of Confirmed Cases") + theme(axis.title.x=element_blank(), axis.text.x=element_blank())

grid.arrange(p2, p1, nrow=2)

p1 = ggplot(data=cov, aes(x=Age, y=Rate1, col=Age, label=Rate1)) + geom_point(stat="identity", fill="black", size=5) + geom_text(color="black", size=3) + geom_segment(aes(x=Age, xend=Age, y=0, yend=Rate1)) + geom_hline(yintercept = 2.29, colour="#990000", linetype="dashed") + labs(x="Age group", y="Case Fatality Rate (%)") + theme(legend.position="none") 

p2 = ggplot(data=cov, aes(x=Age, y=Rate2, col=Age, label=Rate2)) + geom_point(stat="identity", fill="black", size=5) + geom_text(color="black", size=3) + geom_segment(aes(x=Age, xend=Age, y=0, yend=Rate2)) + geom_hline(yintercept = 4.46, colour="#990000", linetype="dashed") + labs(x="Age group", y="Rate of mortality per 100 person-months") + theme(legend.position="none")

grid.arrange(p2, p1, nrow=2)

Rate of mortality by comorbidity

# https://gisanddata.maps.arcgis.com/apps/opsdashboard/index.html
PDays = c(42603, 17940, 13533, 8083, 1690, 242948) 
Disease = c("Hypertension", "Diabetes", "CVD", "Chronic Resp Dis", "Cancer", "No Comorbidities")
Deaths = c(161, 80, 92, 32, 6, 133)
Patient.Months = PDays/30
Rate2 = round((Deaths/Patient.Months*100), 2) 
cov2 = data.frame(Disease, Deaths, PDays, Patient.Months, Rate2)
cov2$Disease = factor(cov2$Disease, levels=c("Hypertension", "Diabetes", "CVD", "Chronic Resp Dis", "Cancer", "No Comorbidities"))

ggplot(data=cov2, aes(x=Disease, y=Rate2, col=Disease, fill=Disease, label=Rate2)) + geom_bar(stat="identity") + geom_text(color="white", size=3, position=position_stack(vjust=0.95)) + geom_hline(yintercept = 4.46, colour="#990000", linetype="dashed") + labs(x="Disease", y="Rate of mortality per 100 person-months") + theme(legend.position="none")

Rate of mortality in SARS, MERS, and Covid-19

epidemic = c("COVID-19", "SARS", "MERS", "Swine Flu")
rate = c(3.4, 9.6, 34, 0.02)
dat = data.frame(epidemic, rate)

ggplot(data=dat, aes(x=epidemic, y=rate, col= epidemic, fill= epidemic, label=rate)) + geom_bar(stat="identity") + geom_text(color="black", size=3, position=position_stack(vjust=0.95)) + labs(x=" ", y="Case Fatality Rate") + theme(legend.position="none")  

Number of tested, and positives

# Source of data: https://www.worldometers.info/coronavirus
Country = c("UK", "Italy", "France", "Austria", "S. Korea", "USA") 
Tested = c(7132, 9462, 762, 321, 66652, 445)
Positives = c(13, 470, 17, 2, 1766, 14)
Rate.Positives = Positives/Tested*100 
dat = data.frame(Country, Tested, Positives, Rate.Positives)

p1 = ggplot(data=dat, aes(x=Country, y=Tested, fill=Country)) + geom_bar(stat="identity") + theme(legend.position="none") + labs(x="", y="Number of people tested")

p2 = ggplot(data=dat, aes(x=Country, y=Rate.Positives, fill=Country)) + geom_bar(stat="identity") + theme(legend.position="none") + labs(x="", y="Rate of Positives(%)")

grid.arrange(p1, p2, nrow=2)

Incubation period

epidemic = c("COVID-19", "SARS", "MERS", "Swine Flue", "Seasonal Flu")
lower = c(2, 2, 2, 1, 1)
med = c(5.3, 5, 5, 2.5, 2)
upper = c(14, 7, 14, 4, 4)
dat = data.frame(epidemic, lower, med, upper) 

g = ggplot(data=dat, aes(x=epidemic, y=med, ymin=lower, ymax=upper, color=epidemic))
g = g + geom_pointrange(lwd=0.8)  
g = g + geom_hline(yintercept = 0, lty=2) + coord_flip()
g + xlab("") + ylab("Incubation period (days)") + theme(legend.position="none")

R0: Transmission rate

epidemic = c("COVID-19", "SARS", "Seasonal Flu")
lower = c(2.0, 2.0, 1.1)
med = c(2.8, 3.0, 1.3)
upper = c(3.1, 4.0, 1.8) 
dat = data.frame(epidemic, lower, upper) 

g = ggplot(data=dat, aes(x=epidemic, y=med, ymin=lower, ymax=upper, color=epidemic))
g = g + geom_pointrange(lwd=2)  
g = g + geom_hline(yintercept = 1, lty=3) + coord_flip()
g + xlab("") + ylab("Rate of transmission (R0)") + theme(legend.position="none")