1. From epidemic to pandemic Load the readr, ggplot2, adn dplyr packages
library(readr)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Read datasets/confirmed_cases_worldwide.csv into confirmed_cases_worldwide

confirmed_cases_worldwide <- read_csv("datasets/confirmed_cases_worldwide.csv")
## Parsed with column specification:
## cols(
##   date = col_date(format = ""),
##   cum_cases = col_double()
## )
confirmed_cases_worldwide
## # A tibble: 56 x 2
##    date       cum_cases
##    <date>         <dbl>
##  1 2020-01-22       555
##  2 2020-01-23       653
##  3 2020-01-24       941
##  4 2020-01-25      1434
##  5 2020-01-26      2118
##  6 2020-01-27      2927
##  7 2020-01-28      5578
##  8 2020-01-29      6166
##  9 2020-01-30      8234
## 10 2020-01-31      9927
## # … with 46 more rows
  1. Confirmed cases throughout the world Draw a line plot of cumulative cases vs. date Label the y-axis
ggplot(confirmed_cases_worldwide, aes(y=cum_cases, x=date))+
  geom_line() +
  ylab("Cumulative confirmed cases")

  1. China compaered to the rest of the world Read in datasets/confirmed_cases_china_vs_world.csv
confirmed_cases_china_vs_world <- read_csv("datasets/confirmed_cases_china_vs_world.csv")
## Parsed with column specification:
## cols(
##   is_china = col_character(),
##   date = col_date(format = ""),
##   cases = col_double(),
##   cum_cases = col_double()
## )
glimpse(confirmed_cases_china_vs_world)
## Observations: 112
## Variables: 4
## $ is_china  <chr> "China", "China", "China", "China", "China", "China", "Chin…
## $ date      <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01-26…
## $ cases     <dbl> 548, 95, 277, 486, 669, 802, 2632, 578, 2054, 1661, 2089, 4…
## $ cum_cases <dbl> 548, 643, 920, 1406, 2075, 2877, 5509, 6087, 8141, 9802, 11…

Draw a line plot of cumulative cases vs. date, grouped and colored by is_china Define aesthetics within the line geom

plt_cum_confirmed_cases_china_vs_world <- ggplot(confirmed_cases_china_vs_world)+
  geom_line(aes(date, cum_cases, group = is_china, color= is_china))+
  ylab("Cumulative confirmed cases")
plt_cum_confirmed_cases_china_vs_world

  1. Let’s annotate!
who_events <- tribble(
  ~ date, ~ event,
  "2020-01-30", "Global health\nemergency declared",
  "2020-03-11", "Pandemic\ndeclared",
  "2020-02-13", "China reporting\nchange"
) %>%
  mutate(date = as.Date(date))

Using who_events, add vertical dashed lines with an xintercept at date and text at date, labeled by event, and at 100000 on the y-axis

plt_cum_confirmed_cases_china_vs_world +
  geom_vline(aes(xintercept = date), data = who_events, linetype = "dashed") +
  geom_text(aes(x = date, label = event), data = who_events, y = 100000)

  1. Adding a trend line to China Filter for China, from Feb 15
china_after_feb15 <- confirmed_cases_china_vs_world %>%
  filter(is_china == "China", date >= "2020-02-15")

Using china_after_feb15, draw a line plot cum_cases vs. date Add a smooth trend line using linear regression, no error bars

ggplot(china_after_feb15, aes(date, cum_cases)) +
  geom_line() +
  geom_smooth(method = "lm", se = FALSE) +
  ylab("Cumulative confirmed cases")

  1. And the rest of the world? Filter confirmed_cases_china_vs_world for not China
not_china <- confirmed_cases_china_vs_world%>%
  filter(is_china == "Not China")

Using not_china, draw a line plot cum_cases vs. date Add a smooth trend line using linear regression, no error bars

plt_not_china_trend_lin <- ggplot(not_china, aes(date, cum_cases)) +
  geom_line() +
  geom_smooth(method = "lm", se = FALSE) +
  ylab("Cumulative confirmed cases")
plt_not_china_trend_lin 

  1. Adding a logarithmic scale Modify the plot to use a logarithmic scale on the y-axis
plt_not_china_trend_lin + 
  scale_y_log10()

  1. Which countries outside of China have been hit hardest? Run this to get the data for each country
confirmed_cases_by_country <- read_csv("datasets/confirmed_cases_by_country.csv")
## Parsed with column specification:
## cols(
##   country = col_character(),
##   province = col_character(),
##   date = col_date(format = ""),
##   cases = col_double(),
##   cum_cases = col_double()
## )
glimpse(confirmed_cases_by_country)
## Observations: 13,272
## Variables: 5
## $ country   <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "Antigua an…
## $ province  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ date      <date> 2020-01-22, 2020-01-22, 2020-01-22, 2020-01-22, 2020-01-22…
## $ cases     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ cum_cases <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…

Group by country, summarize to calculate total cases, find the top 7

top_countries_by_total_cases <- confirmed_cases_by_country %>%
  group_by(country) %>%
  summarize(total_cases = max(cum_cases)) %>%
  top_n(7, total_cases)
top_countries_by_total_cases
## # A tibble: 7 x 2
##   country      total_cases
##   <chr>              <dbl>
## 1 France              7699
## 2 Germany             9257
## 3 Iran               16169
## 4 Italy              31506
## 5 Korea, South        8320
## 6 Spain              11748
## 7 US                  6421
  1. Plotting hardest hit countriews as of Mid-March 2020 Run this to get the data for the top 7 countries
confirmed_cases_top7_outside_china <- read_csv("datasets/confirmed_cases_top7_outside_china.csv")
## Parsed with column specification:
## cols(
##   country = col_character(),
##   date = col_date(format = ""),
##   cum_cases = col_double()
## )
glimpse(confirmed_cases_top7_outside_china)
## Observations: 2,030
## Variables: 3
## $ country   <chr> "Germany", "Iran", "Italy", "Korea, South", "Spain", "US", …
## $ date      <date> 2020-02-18, 2020-02-18, 2020-02-18, 2020-02-18, 2020-02-18…
## $ cum_cases <dbl> 16, 0, 3, 31, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13…

Using confirmed_cases_top7_outside_china, draw a line plot of cum_cases vs. date, grouped and colored by country

ggplot(confirmed_cases_top7_outside_china, aes(date, cum_cases, group = country, color = country))+
  geom_line()+
  ylab("Cumulative confirmed cases")