- To compare how the numbers of cases are evolving in 4 different countries
- Raise public awareness of how contagious this virus is
18/3/2020
The datasets were taken from The Humanitarian Data Exchange https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases.
Loading the libraries and the datasets
suppressMessages(library(dplyr) )
suppressMessages(library(plotly) )
download.file("https://data.humdata.org/hxlproxy/data/download/time_series-ncov-Confirmed.csv?dest=data_edit&filter01=explode&explode-header-att01=date&explode-value-att01=value&filter02=rename&rename-oldtag02=%23affected%2Bdate&rename-newtag02=%23date&rename-header02=Date&filter03=rename&rename-oldtag03=%23affected%2Bvalue&rename-newtag03=%23affected%2Binfected%2Bvalue%2Bnum&rename-header03=Value&filter04=clean&clean-date-tags04=%23date&filter05=sort&sort-tags05=%23date&sort-reverse05=on&filter06=sort&sort-tags06=%23country%2Bname%2C%23adm1%2Bname&tagger-match-all=on&tagger-default-tag=%23affected%2Blabel&tagger-01-header=province%2Fstate&tagger-01-tag=%23adm1%2Bname&tagger-02-header=country%2Fregion&tagger-02-tag=%23country%2Bname&tagger-03-header=lat&tagger-03-tag=%23geo%2Blat&tagger-04-header=long&tagger-04-tag=%23geo%2Blon&header-row=1&url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_19-covid-Confirmed.csv", destfile = "COVID19/time_series-ncov-Confirmed.csv")
download.file("https://data.humdata.org/hxlproxy/data/download/time_series-ncov-Deaths.csv?dest=data_edit&filter01=explode&explode-header-att01=date&explode-value-att01=value&filter02=rename&rename-oldtag02=%23affected%2Bdate&rename-newtag02=%23date&rename-header02=Date&filter03=rename&rename-oldtag03=%23affected%2Bvalue&rename-newtag03=%23affected%2Bkilled%2Bvalue%2Bnum&rename-header03=Value&filter04=clean&clean-date-tags04=%23date&filter05=sort&sort-tags05=%23date&sort-reverse05=on&filter06=sort&sort-tags06=%23country%2Bname%2C%23adm1%2Bname&tagger-match-all=on&tagger-default-tag=%23affected%2Blabel&tagger-01-header=province%2Fstate&tagger-01-tag=%23adm1%2Bname&tagger-02-header=country%2Fregion&tagger-02-tag=%23country%2Bname&tagger-03-header=lat&tagger-03-tag=%23geo%2Blat&tagger-04-header=long&tagger-04-tag=%23geo%2Blon&header-row=1&url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_19-covid-Deaths.csv", destfile = "COVID19/time_series-ncov-Deaths.csv")
download.file("https://data.humdata.org/hxlproxy/data/download/time_series-ncov-Recovered.csv?dest=data_edit&filter01=explode&explode-header-att01=date&explode-value-att01=value&filter02=rename&rename-oldtag02=%23affected%2Bdate&rename-newtag02=%23date&rename-header02=Date&filter03=rename&rename-oldtag03=%23affected%2Bvalue&rename-newtag03=%23affected%2Brecovered%2Bvalue%2Bnum&rename-header03=Value&filter04=clean&clean-date-tags04=%23date&filter05=sort&sort-tags05=%23date&sort-reverse05=on&filter06=sort&sort-tags06=%23country%2Bname%2C%23adm1%2Bname&tagger-match-all=on&tagger-default-tag=%23affected%2Blabel&tagger-01-header=province%2Fstate&tagger-01-tag=%23adm1%2Bname&tagger-02-header=country%2Fregion&tagger-02-tag=%23country%2Bname&tagger-03-header=lat&tagger-03-tag=%23geo%2Blat&tagger-04-header=long&tagger-04-tag=%23geo%2Blon&header-row=1&url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_19-covid-Recovered.csv", destfile = "COVID19/time_series-ncov-Recovered.csv")
df.Confirmed.Original<- read.csv(
file = 'COVID19/time_series-ncov-Confirmed.csv')
df.Deaths.Original<- read.csv(
file = 'COVID19/time_series-ncov-Deaths.csv')
df.Recovered.Original<- read.csv(
file = 'COVID19/time_series-ncov-Recovered.csv')
Filtering for Peru, China, Italy and US
df.Confirmed<-df.Confirmed.Original %>%
filter(
Country.Region=='Peru' |
Country.Region=='China' |
Country.Region=='Italy' |
Country.Region=='US' ) %>%
transform(
Country.Region=as.character(Country.Region),
Date=as.character(Date),
Value=as.character(Value),
Cases = as.character('Confirmed'))
Filtering for Peru, China, Italy and US
df.Deaths<-df.Deaths.Original %>%
filter(
Country.Region=='Peru' |
Country.Region=='China' |
Country.Region=='Italy' |
Country.Region=='US' ) %>%
transform(
Country.Region=as.character(Country.Region),
Date=as.character(Date),
Value=as.character(Value),
Cases = as.character('Deaths'))
Filtering for Peru, China, Italy and US
df.Recovered<-df.Recovered.Original %>%
filter(
Country.Region=='Peru' |
Country.Region=='China' |
Country.Region=='Italy' |
Country.Region=='US' ) %>%
transform(
Country.Region=as.character(Country.Region),
Date=as.character(Date),
Value=as.character(Value),
Cases = as.character('Recovered'))
df.base <- bind_rows(df.Confirmed, df.Deaths, df.Recovered)
df <- df.base %>%
mutate(
Value = as.numeric(Value)
) %>%
group_by(Country.Region, Cases, Date ) %>% summarise(
Value = sum( Value )
)
head( df )
# A tibble: 6 x 4 # Groups: Country.Region, Cases [1] Country.Region Cases Date Value <chr> <chr> <chr> <dbl> 1 China Confirmed 2020-01-22 548 2 China Confirmed 2020-01-23 643 3 China Confirmed 2020-01-24 920 4 China Confirmed 2020-01-25 1406 5 China Confirmed 2020-01-26 2075 6 China Confirmed 2020-01-27 2877
Numbers of cases of COVID19 in China by Date
g <- ggplot( df[df$Country.Region=='China', ],
aes(x=Date, y=Value, color= Cases)) +
geom_point() +
geom_line(size=1)+
theme(legend.position="top",
axis.text.x = element_text(angle = 90, hjust = 1))+
labs(title = "Numbers of cases of COVID19 in China by Date",
y = "Count of numbers of Cases", x = "")
Numbers of cases of COVID19 in Italy by Date
g <- ggplot( df[df$Country.Region=='Italy', ],
aes(x=Date, y=Value, color= Cases)) +
geom_point() +
geom_line(size=1)+
theme(legend.position="top",
axis.text.x = element_text(angle = 90, hjust = 1))+
labs(title = "Numbers of cases of COVID19 in Italy by Date",
y = "Count of numbers of Cases", x = "")
Numbers of cases of COVID19 in US by Date
g <- ggplot( df[df$Country.Region=='US', ],
aes(x=Date, y=Value, color= Cases)) +
geom_point() +
geom_line(size=1)+
theme(legend.position="top",
axis.text.x = element_text(angle = 90, hjust = 1))+
labs(title = "Numbers of cases of COVID19 in US by Date",
y = "Count of numbers of Cases", x = "")
Numbers of cases of COVID19 in Peru by Date
g <- ggplot( df[df$Country.Region=='Peru', ],
aes(x=Date, y=Value, color= Cases)) +
geom_point() +
geom_line(size=1)+
theme(legend.position="top",
axis.text.x = element_text(angle = 90, hjust = 1))+
labs(title = "Numbers of cases of COVID19 in Peru by Date",
y = "Count of numbers of Cases", x = "")
Numbers of cases of COVID19 in the four Countries
max.date <- max( as.character(df$Date) )
bars <- arrange( df[df$Date == max.date, ], Cases)
g <- ggplot( bars,
aes(x=Country.Region, y=Value, fill= Cases )
) +
geom_bar( stat="identity", position=position_dodge() ) +
theme(legend.position="top",
axis.text.x = element_text(angle = 90, hjust = 1))+
labs(title = "Numbers of cases of COVID19 in the four Countries",
y = "Count of numbers of Cases", x = "")
Confirmed cases in the four countries shown on a map
df.base <-df.base %>% filter(Date == max.date & Cases == 'Confirmed' ) %>%
transform( Value = as.numeric(Value) )
g <- plot_ly( df.base, lat = df.base$Lat, lon = df.base$Long,
marker = list(size=log(df.base$Value ), color = "red"),
type = 'scattermapbox',
mode='markers',
hovertext = paste(df.base$Province.State,',',df.base$Country.Region,
"<br />",df.base$Value," cases confirmed"
)
) %>%
layout(
mapbox = list(
style = 'open-street-map', zoom =1,
center = list(lon = 11.7739345, lat = 28.6783798)
)
)