Data reading & preprocessing

I am using data provided by JHU CSSE on their github repo: https://github.com/CSSEGISandData/COVID-19

countries <- c( 'US',
                'Canada',
                'United Kingdom',
                'Italy',
                'France',
                'Germany',
                'Spain',
                'Brazil',
                'Mexico',
                'United Arab Emirates',
                'India',
                'Australia',
                'China',
                'Japan',
                'Korea, South',
                'Poland')


confirmed_cases_df <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv") %>%
    filter(`Country/Region` %in% countries) %>%
    select(-`Province/State`) %>%
    pivot_longer(cols = -(`Country/Region`:`Long`), values_to = "cases", names_to = "date_text") %>%
    mutate(date = mdy(date_text)) %>%
    mutate(week = week(date)) %>%
    arrange(`Country/Region`, date) %>%
    group_by(`Country/Region`, date) %>%
    summarise(cases = sum(cases)) %>%
    mutate(weekly_running_increase = cases - lag(cases, n = 6)) %>%
    mutate(weekly_running_increase_p = (cases - lag(cases, n = 6))/lag(cases, n = 6)) %>%
    mutate(weekly_running_increase_p = ifelse(weekly_running_increase_p == Inf, NA, weekly_running_increase_p)) %>%
    ungroup()
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
recovered_df <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv") %>%
    filter(`Country/Region` %in% countries) %>%
    select(-`Province/State`) %>%
    pivot_longer(cols = -(`Country/Region`:`Long`), values_to = "recovered", names_to = "date_text") %>%
    mutate(date = mdy(date_text)) %>%
    mutate(week = week(date)) %>%
    arrange(`Country/Region`, date) %>%
    group_by(`Country/Region`, date) %>%
    summarise(recovered = sum(recovered)) %>%
    ungroup()
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
merged_cases <- confirmed_cases_df %>%
    left_join(recovered_df) %>%
    mutate(recovered_p = recovered / cases)
## Joining, by = c("Country/Region", "date")

Situation 2 weeks ago

First we will look at absolute number of confirmed cases in all countries

since_100_cases_march <- merged_cases %>%
    filter(cases >= 100) %>%
    filter(date <= dmy("24/03/2020")) %>%
    group_by(`Country/Region`) %>%
    mutate(start_date = min(date)) %>%
    mutate(days_since_start = date - start_date) %>%
    filter(`Country/Region` %in% countries)


since_100_cases_april <- merged_cases %>%
    filter(cases >= 100) %>%
    group_by(`Country/Region`) %>%
    mutate(start_date = min(date)) %>%
    mutate(days_since_start = date - start_date) %>%
    filter(`Country/Region` %in% countries)


ggplot(since_100_cases_march, aes(x = date, y = cases)) + 
  geom_line(aes(color = `Country/Region`), size = 1) + 
    ggtitle("Confirmed cases")

Let’s transform the x axis, so that it will begin with the day of the 100th confirmed case for every country.

ggplot(since_100_cases_march, aes(x = days_since_start, y = cases)) + 
  geom_line(aes(color = `Country/Region`), size = 1) + 
    ggtitle("Cases since 100 diagonosed")
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.

Instead of looking at absolute number of cases, let’s look at the weekly running increase of cases, i.e. how many new cases appear compared to 1 week ago.

ggplot(since_100_cases_march, aes(x = days_since_start, y = weekly_running_increase)) + 
  geom_line(aes(color = `Country/Region`)) + 
    ggtitle("Weekly running increase since 100 diagnosed")

In order to improve readability, we will split the countries into 3 groups and plot them separately

since_100_cases_march %>%
    filter(`Country/Region` %in% c(
                'China',
                'Japan',
                'Korea, South')) %>%
ggplot(aes(x = days_since_start, y = weekly_running_increase)) + 
  geom_line(aes(color = `Country/Region`), size = 1) + 
    ggtitle("Weekly increase since 100 diagnosed: Stabilizing") +
    xlab("Days since 100 diagnoses") +
    ylab("Weekly increase")

since_100_cases_march %>%
    filter(`Country/Region` %in% c(
                'US',
                'Italy',
                'Germany',
                'France',
                'Spain',
                'United Kingdom')) %>%
ggplot(aes(x = days_since_start, y = weekly_running_increase)) + 
  geom_line(aes(color = `Country/Region`), size = 1) + 
    ggtitle("Weekly increase since 100 diagnosed: Advancing") +
    xlab("Days since 100 diagnoses") +
    ylab("Weekly increase")

since_100_cases_march %>%
    filter(`Country/Region` %in% c(
                'Australia',
                'India',             
                'Canada',
                'Brazil',
                'United Arab Emirates',
                'Mexico',
                'Poland')) %>%
ggplot(aes(x = days_since_start, y = weekly_running_increase)) + 
  geom_line(aes(color = `Country/Region`), size = 1) + 
    ggtitle("Weekly increase since 100 diagnosed: Emerging") +
    xlab("Days since 100 diagnoses") +
    ylab("Weekly increase") +
    ylim(0,30000)

Change in situation

We can see signs of stabilization in several advancing countries. Italy, Spain and Germany significantly flatenned their curve. On the other hand, USA has weekly increases unseen before in any other country.

since_100_cases_april %>%
    filter(`Country/Region` %in% c(
                'US',
                'Italy',
                'Germany',
                'France',
                'Spain',
                'United Kingdom')) %>%
ggplot(aes(x = days_since_start, y = weekly_running_increase)) + 
  geom_line(aes(color = `Country/Region`), size = 1) + 
    ggtitle("Weekly increase since 100 diagnosed: Advancing") +
    xlab("Days since 100 diagnoses") +
    ylab("Weekly increase")