Introduction

This viz aimed to visualize the trend of worldwide Covid19 cases in time-series with most updated data.

Challenges

Proposed Design

Treemap

1. Import Dataset and R Packages

#install.packages("coronavirus")
#install.packages("plotly")
library(coronavirus)
library(plotly)
library(tidyr)
library(leaflet)
library(leafpop)
library(purrr)

update_dataset()

1. Analyze the distribution of covid19 cases worldwide with timestamp. Create line_chart with R package Plotly, set x to be number of cases (confirmed, death and recovered) and y be the date.

coronavirus %>% 
  group_by(type, date) %>%
  summarise(total_cases = sum(cases)) %>%
  pivot_wider(names_from = type, values_from = total_cases) %>%
  arrange(date) %>%
  mutate(confirm = confirmed - death - recovered) %>%
  mutate(confirm_total = cumsum(confirm),
                recovered_total = cumsum(recovered),
                death_total = cumsum(death)) %>%
  plot_ly(x = ~ date,
                  y = ~ confirm_total,
                  name = 'Confirmed', 
                  fillcolor = 'goldenrod',
                  type = 'scatter',
                  mode = 'none', 
                  stackgroup = 'one') %>%
  add_trace(y = ~ death_total, 
             name = "Death",
             fillcolor = 'red') %>%
  add_trace(y = ~recovered_total, 
            name = 'Recovered', 
            fillcolor = 'forestgreen') %>%
  layout(title = "Trend of Covid19 Cases Worldwide",
         yaxis = list(title = "Number of Cases"),
         xaxis = list(title = "Date"))
        legend = list(x = 100, y = 0.5)

2. Find out countries with the most critical covid19 spread by sorting the data with number of confirmed cases. Date has been filtered to latest date

coronavirus %>% 
  filter(date == max(date)) %>%
  select(country, type, cases) %>%
  group_by(country, type) %>%
  summarise(total_cases = sum(cases)) %>%
  pivot_wider(names_from = type,
              values_from = total_cases) %>%
  arrange(-confirmed)
## # A tibble: 188 x 4
## # Groups:   country [188]
##    country      confirmed death recovered
##    <chr>            <int> <int>     <int>
##  1 US               58406  1133     23725
##  2 India            54735   853     51255
##  3 Brazil           45392  1088     29128
##  4 Philippines      12746    77     38269
##  5 Colombia         10673   225      6321
##  6 South Africa     10107   148     16290
##  7 Mexico            9556   784      7752
##  8 Russia            5429    95      8099
##  9 Argentina         5241    53      2719
## 10 Iran              2548   216      2311
## # … with 178 more rows

Plot trend of covid19 cases for top 5 countries with highest confirmed cases with plotly

df <- coronavirus %>%
  dplyr::filter(type == "confirmed") %>%
  dplyr::mutate(country = country) %>%
  dplyr::group_by(date, country) %>%
  dplyr::summarise(total = sum(cases)) %>%
  dplyr::ungroup() %>%
  tidyr::pivot_wider(names_from = country, values_from = total)

#----------------------------------------
df %>%
  plotly::plot_ly() %>%
  plotly::add_trace(
    x = ~date,
    y = ~US,
    type = "scatter",
    mode = "lines",
    name = "US"
  ) %>%
  plotly::add_trace(
    x = ~date,
    y = ~India,
    type = "scatter",
    mode = "lines",
    name = "India"
  ) %>%
  plotly::add_trace(
    x = ~date,
    y = ~Brazil,
    type = "scatter",
    mode = "lines",
    name = "Brazil"
  ) %>%
  plotly::add_trace(
    x = ~date,
    y = ~Netherlands,
    type = "scatter",
    mode = "lines",
    name = "South Africa"
  ) %>%
  plotly::add_trace(
    x = ~date,
    y = ~Colombia,
    type = "scatter",
    mode = "lines",
    name = "Colombia"
  ) %>%
  plotly::layout(
    yaxis = list(title = "Confirmed Cases"),
    xaxis = list(title = "Date"),
    title = "Top 5 Countries with Covid19 Confirmed Cases",
    legend = list(x = 100, y = 0.5),
    hovermode = "compare"
  )

3. Create Treemap with Plotly package.

conf_df <- coronavirus %>% 
  filter(type == "confirmed") %>%
  group_by(country) %>%
  summarise(total_cases = sum(cases)) %>%
  arrange(-total_cases) %>%
  mutate(parents = "Confirmed") %>%
  ungroup() 
  
  fig <- plot_ly(data = conf_df,
          type= "treemap",
          values = ~total_cases,
          labels= ~ country,
          parents=  ~parents,
          name = "Confirmed",
          domain = list(column=0),
          marker=list(colorscale='Reds'),
          title = "Worldwide Confirmed Cases",
          textinfo=("label+value")
          )
  
  fig  

5. Create interactive map with total confirmed cases with leaflet packages, filter out 0 cases.

df <- coronavirus %>%
  dplyr::filter(cases > 0) %>%
  dplyr::group_by(country, province, lat, long, type) %>%
  dplyr::summarise(cases = sum(cases)) %>%
  dplyr::mutate(log_cases = 2 * log(cases)) %>%
  dplyr::ungroup()

df.split <- df %>% split(df$type)
pal <- colorFactor(c("orange", "red", "forestgreen"), domain = c("confirmed", "death", "recovered"))
map_object <- leaflet() %>% addProviderTiles(providers$Stamen.TonerHybrid)
names(df.split) %>%
  purrr::walk(function(df) {
    map_object <<- map_object %>%
      addCircleMarkers(
        data = df.split[[df]],
        lng = ~long, lat = ~lat,
        label=~as.character(cases),
        color = ~ pal(type),
        stroke = FALSE,
        fillOpacity = 0.5,
        radius = ~log_cases,
        popup = leafpop::popupTable(df.split[[df]],
          feature.id = FALSE,
          row.numbers = FALSE,
          zcol = c("type", "cases", "country", "province")
        ),
        group = df,
        labelOptions = labelOptions(
          noHide = F,
          direction = "auto"
        )
      )
  })

map_object %>%
  addLayersControl(
    overlayGroups = names(df.split),
    options = layersControlOptions(collapsed = FALSE)
  )

Conclusion

Based on the viz, it is clear that covid19 cases will continue to rise worldwide. The top 5 countries with the most confirmed cases are US, Brazil, India, South Africa, and Colombia. Looking at the growth rate of confirmed cases in these countries individually, it is still upward rising. Therefore, more effective controls are required for damage control. Even though there is a continuous increase in no. of confirmed cases, the recovery cases are constantly increasing as well with more advance treatments after clinical trials.

Reference List

Data Source: https://github.com/RamiKrispin/coronavirus/tree/master/csv