library(lubridate)
library(tidyverse)
library(RCurl)
The data has been obtained from the John Hopkins github repositry. The raw csv is in a wide format that requires pivoting into a data frame and the conversion of the text column headers to dates. There are three sets of data, confirmed cases, deaths and recoveries.
variable <-F
URL <- getURL("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
data <- read.csv(text = URL, check.names = F)
pivot_longer(data,cols=5:dim(data)[2],names_to = "Date") ->d
names(d)<-c("Province","Country","Lat","Long","Date","NCases")
d$Date<-as.Date(d$Date,format="%m/%d/%y")
Confirmed<-d
URL <- getURL("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv")
data <- read.csv(text = URL, check.names = F)
pivot_longer(data,cols=5:dim(data)[2],names_to = "Date") ->d
names(d)<-c("Province","Country","Lat","Long","Date","NCases")
d$Date<-as.Date(d$Date,format="%m/%d/%y")
Deaths<-d
URL <- getURL("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv")
data <- read.csv(text = URL, check.names = F)
pivot_longer(data,cols=5:dim(data)[2],names_to = "Date") ->d
names(d)<-c("Province","Country","Lat","Long","Date","NCases")
d$Date<-as.Date(d$Date,format="%m/%d/%y")
Recovered<-d
Subtracting the previous day’s total from the cumulative total of cases provides the number of new cases which were reported each day.
Confirmed %>% group_by(Country, Date) %>% summarise(NCases=sum(NCases)) %>%arrange(Date) %>% mutate(New_cases = NCases - lag(NCases, default = first(NCases))) ->dd
dd %>% filter(Country =="China") -> china
ggplot(china,aes(x=Date,y=New_cases)) + geom_point() + geom_smooth(se=FALSE, method="loess")
dd %>% filter(Country =="Korea, South") %>%
ggplot(aes(x=Date,y=New_cases)) + geom_point() + geom_smooth(se=FALSE, method="loess")
lst<-c("United Kingdom","US","Korea, South")
dd %>% filter(Country %in% lst) %>%
ggplot(aes(x=Date,y=New_cases, colour=Country)) + geom_point() + geom_smooth(se=FALSE, method="loess")
lst<-c("Italy","Spain","Iran")
dd %>% filter(Country %in% lst) %>%
ggplot(aes(x=Date,y=New_cases, colour=Country)) + geom_point() + geom_smooth(se=FALSE, method="loess")
lst<-c("Korea, South", "China","Japan","Iran")
dd %>% filter(!(Country %in% lst)) %>% group_by(Date) %>% summarise(New_cases=sum(New_cases)) %>%
ggplot(aes(x=Date,y=New_cases)) + geom_point() + geom_smooth(se=FALSE, method="loess")
lst<-c("Korea, South", "China","Japan","Iran")
dd %>% filter(!(Country %in% lst)) %>% group_by(Date) %>% summarise(New_cases=sum(New_cases)) %>%
ggplot(aes(x=Date,y=log10(New_cases))) + geom_point() + geom_smooth(se=FALSE, method="loess")