18/3/2020

Objectives

  • To compare how the numbers of cases are evolving in 4 different countries
  • Raise public awareness of how contagious this virus is

The Datasets

The datasets were taken from The Humanitarian Data Exchange https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases.

Loading the libraries and the datasets

suppressMessages(library(dplyr) )
suppressMessages(library(plotly) )

download.file("https://data.humdata.org/hxlproxy/data/download/time_series-ncov-Confirmed.csv?dest=data_edit&filter01=explode&explode-header-att01=date&explode-value-att01=value&filter02=rename&rename-oldtag02=%23affected%2Bdate&rename-newtag02=%23date&rename-header02=Date&filter03=rename&rename-oldtag03=%23affected%2Bvalue&rename-newtag03=%23affected%2Binfected%2Bvalue%2Bnum&rename-header03=Value&filter04=clean&clean-date-tags04=%23date&filter05=sort&sort-tags05=%23date&sort-reverse05=on&filter06=sort&sort-tags06=%23country%2Bname%2C%23adm1%2Bname&tagger-match-all=on&tagger-default-tag=%23affected%2Blabel&tagger-01-header=province%2Fstate&tagger-01-tag=%23adm1%2Bname&tagger-02-header=country%2Fregion&tagger-02-tag=%23country%2Bname&tagger-03-header=lat&tagger-03-tag=%23geo%2Blat&tagger-04-header=long&tagger-04-tag=%23geo%2Blon&header-row=1&url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_19-covid-Confirmed.csv", destfile = "COVID19/time_series-ncov-Confirmed.csv")
download.file("https://data.humdata.org/hxlproxy/data/download/time_series-ncov-Deaths.csv?dest=data_edit&filter01=explode&explode-header-att01=date&explode-value-att01=value&filter02=rename&rename-oldtag02=%23affected%2Bdate&rename-newtag02=%23date&rename-header02=Date&filter03=rename&rename-oldtag03=%23affected%2Bvalue&rename-newtag03=%23affected%2Bkilled%2Bvalue%2Bnum&rename-header03=Value&filter04=clean&clean-date-tags04=%23date&filter05=sort&sort-tags05=%23date&sort-reverse05=on&filter06=sort&sort-tags06=%23country%2Bname%2C%23adm1%2Bname&tagger-match-all=on&tagger-default-tag=%23affected%2Blabel&tagger-01-header=province%2Fstate&tagger-01-tag=%23adm1%2Bname&tagger-02-header=country%2Fregion&tagger-02-tag=%23country%2Bname&tagger-03-header=lat&tagger-03-tag=%23geo%2Blat&tagger-04-header=long&tagger-04-tag=%23geo%2Blon&header-row=1&url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_19-covid-Deaths.csv", destfile = "COVID19/time_series-ncov-Deaths.csv")
download.file("https://data.humdata.org/hxlproxy/data/download/time_series-ncov-Recovered.csv?dest=data_edit&filter01=explode&explode-header-att01=date&explode-value-att01=value&filter02=rename&rename-oldtag02=%23affected%2Bdate&rename-newtag02=%23date&rename-header02=Date&filter03=rename&rename-oldtag03=%23affected%2Bvalue&rename-newtag03=%23affected%2Brecovered%2Bvalue%2Bnum&rename-header03=Value&filter04=clean&clean-date-tags04=%23date&filter05=sort&sort-tags05=%23date&sort-reverse05=on&filter06=sort&sort-tags06=%23country%2Bname%2C%23adm1%2Bname&tagger-match-all=on&tagger-default-tag=%23affected%2Blabel&tagger-01-header=province%2Fstate&tagger-01-tag=%23adm1%2Bname&tagger-02-header=country%2Fregion&tagger-02-tag=%23country%2Bname&tagger-03-header=lat&tagger-03-tag=%23geo%2Blat&tagger-04-header=long&tagger-04-tag=%23geo%2Blon&header-row=1&url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_19-covid-Recovered.csv", destfile = "COVID19/time_series-ncov-Recovered.csv")

df.Confirmed.Original<- read.csv(
  file = 'COVID19/time_series-ncov-Confirmed.csv')
df.Deaths.Original<- read.csv(
  file = 'COVID19/time_series-ncov-Deaths.csv')
df.Recovered.Original<- read.csv(
  file = 'COVID19/time_series-ncov-Recovered.csv')

Confirmed dataset

Filtering for Peru, China, Italy and US

df.Confirmed<-df.Confirmed.Original %>% 
              filter( 
                Country.Region=='Peru' | 
                Country.Region=='China' | 
                Country.Region=='Italy' | 
                Country.Region=='US' ) %>% 
              transform(
                Country.Region=as.character(Country.Region),
                Date=as.character(Date),
                Value=as.character(Value),
                Cases = as.character('Confirmed'))

Deaths dataset

Filtering for Peru, China, Italy and US

df.Deaths<-df.Deaths.Original %>% 
            filter( 
              Country.Region=='Peru' | 
              Country.Region=='China' | 
              Country.Region=='Italy' | 
              Country.Region=='US' ) %>% 
            transform(
              Country.Region=as.character(Country.Region),
              Date=as.character(Date),
              Value=as.character(Value),
              Cases = as.character('Deaths'))

Recovered dataset

Filtering for Peru, China, Italy and US

df.Recovered<-df.Recovered.Original %>% 
                filter( 
                  Country.Region=='Peru' | 
                  Country.Region=='China' | 
                  Country.Region=='Italy' | 
                  Country.Region=='US' ) %>% 
                transform(
                  Country.Region=as.character(Country.Region),
                  Date=as.character(Date),
                  Value=as.character(Value),
                  Cases = as.character('Recovered'))

Combining the datasets

df.base <- bind_rows(df.Confirmed, df.Deaths, df.Recovered)
df <- df.base %>%
  mutate(
      Value = as.numeric(Value)
    ) %>%
    group_by(Country.Region, Cases, Date ) %>% summarise(
      Value = sum( Value )
    )

Let’s take a look at the data

head( df )
# A tibble: 6 x 4
# Groups:   Country.Region, Cases [1]
  Country.Region Cases     Date       Value
  <chr>          <chr>     <chr>      <dbl>
1 China          Confirmed 2020-01-22   548
2 China          Confirmed 2020-01-23   643
3 China          Confirmed 2020-01-24   920
4 China          Confirmed 2020-01-25  1406
5 China          Confirmed 2020-01-26  2075
6 China          Confirmed 2020-01-27  2877

The plots

Numbers of cases of COVID19 in China by Date

g <- ggplot( df[df$Country.Region=='China', ], 
             aes(x=Date, y=Value, color= Cases)) + 
  geom_point() + 
  geom_line(size=1)+
  theme(legend.position="top", 
        axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(title = "Numbers of cases of COVID19 in China by Date",
       y = "Count of numbers of Cases", x = "")

Numbers of cases of COVID19 in Italy by Date

g <- ggplot( df[df$Country.Region=='Italy', ], 
             aes(x=Date, y=Value, color= Cases)) + 
  geom_point() + 
  geom_line(size=1)+
  theme(legend.position="top", 
        axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(title = "Numbers of cases of COVID19 in Italy by Date",
       y = "Count of numbers of Cases", x = "")

Numbers of cases of COVID19 in US by Date

g <- ggplot( df[df$Country.Region=='US', ], 
             aes(x=Date, y=Value, color= Cases)) + 
  geom_point() + 
  geom_line(size=1)+
  theme(legend.position="top", 
        axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(title = "Numbers of cases of COVID19 in US by Date",
       y = "Count of numbers of Cases", x = "")

Numbers of cases of COVID19 in Peru by Date

g <- ggplot( df[df$Country.Region=='Peru', ], 
             aes(x=Date, y=Value, color= Cases)) + 
  geom_point() + 
  geom_line(size=1)+
  theme(legend.position="top", 
        axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(title = "Numbers of cases of COVID19 in Peru by Date",
       y = "Count of numbers of Cases", x = "")

Numbers of cases of COVID19 in the four Countries

max.date <- max( as.character(df$Date) )
bars <- arrange( df[df$Date == max.date, ], Cases) 
g <- ggplot( bars,
             aes(x=Country.Region, y=Value, fill= Cases )
             ) + 
  geom_bar( stat="identity", position=position_dodge() ) + 
  theme(legend.position="top", 
        axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(title = "Numbers of cases of COVID19 in the four Countries",
       y = "Count of numbers of Cases", x = "")

Confirmed cases in the four countries shown on a map

df.base <-df.base  %>% filter(Date == max.date & Cases == 'Confirmed' ) %>%
                      transform( Value = as.numeric(Value) )
g <- plot_ly( df.base, lat = df.base$Lat, lon = df.base$Long,
          marker = list(size=log(df.base$Value ), color = "red"),
    
    type = 'scattermapbox',
    mode='markers',
    hovertext = paste(df.base$Province.State,',',df.base$Country.Region,
                              "<br />",df.base$Value," cases confirmed"
                              )
    ) %>%
  layout(
    mapbox = list(
      style = 'open-street-map', zoom =1, 
        center = list(lon = 11.7739345, lat = 28.6783798)
      )
    ) 

Final words

  • Compared to the other countries, Peru is in the early stages of the infection
  • Be safe! Please stay at home.