rm(list = ls())
library(gganimate)
library(dplyr)
library(tidyverse)
library(ggthemes)
library(gifski)
library(reshape2)
# load data from Johns Hopkins github
confirmedCases= read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv')
deathCases= read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv')
recoveredCases= read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv')
# set system locale for date conversion purpose
Sys.setlocale("LC_TIME", "English")
## [1] "English_United States.1252"
#convert data sets into columns and remove unwanted columns
confirmedCases<-confirmedCases%>%select(-c(Lat,Long))%>%melt(id=c('Country/Region','Province/State'))
confirmedCases<-confirmedCases%>%group_by(`Country/Region`,variable)%>%summarise(Confirmed=sum(value))
deathCases<-deathCases%>%select(-c(Lat,Long))%>%melt(id=c('Country/Region','Province/State'))
deathCases<-deathCases%>%group_by(`Country/Region`,variable)%>%summarise(Deaths=sum(value))
recoveredCases<-recoveredCases%>%select(-c(Lat,Long))%>%melt(id=c('Country/Region','Province/State'))
recoveredCases<-recoveredCases%>%group_by(`Country/Region`,variable)%>%summarise(Recovered=sum(value))
# rename table columns
colnames(confirmedCases)<-c("Country","Date","Confirmed")
colnames(deathCases)<-c("Country","Date","Death")
colnames(recoveredCases)<-c("Country","Date","Recovered")
# merge all atbles together
mergedCases<-merge(confirmedCases,deathCases, by.y=c("Country","Date"))
mergedCases<-merge(mergedCases,recoveredCases, by.y=c("Country","Date"))
# convert factors to date format
mergedCases$Date<-as.Date(mergedCases$Date,"%m/%d/%y")
# summarize cases by date
df1<-mergedCases %>% group_by(Date) %>% summarise_at(c("Confirmed","Recovered","Death"),sum)
# stack columns together and add state columns to each case
df2 <- data.frame(Date=rep(df1$Date, 3),
act_noact=c(df1$Confirmed, df1$Death,df1$Recovered),
State=rep(c("Confirmed","Deaths", "Recovered"), each=nrow(df1)))
# retrieve last update date for title
lastDate<-max(df1$Date)
# define plot object
p <- ggplot(df2, aes(x=Date, y=act_noact, group=State, color=State)) +
geom_line() +
geom_segment(aes(xend=max(Date), yend = act_noact), linetype=2, colour='blue') +
geom_point(size = 3) +
geom_text(aes(x = max(Date)+.1, label = sprintf("%5.0f", act_noact)), hjust=-0.5) +
transition_reveal(Date) +
view_follow(fixed_y = TRUE)+
coord_cartesian(clip = 'off') +
xlab("Day") +
ylab("Number of cases") + ggtitle(paste("Evolution of cases over time as of ",lastDate)) +
enter_drift(x_mod = -1) + exit_drift(x_mod = 1) +
theme_classic() +
theme(legend.position = c(0.2, 0.8))+
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
plot.margin = margin(5.5, 40, 5.5, 5.5))
# create animation gif file
animate(p, fps=5,renderer = gifski_renderer("virusevolution.gif"))
