Disclaimer: I am data scientist but not an expert in the field of epidemics.

This is a analysis of the corona data, provided from: https://github.com/CSSEGISandData/COVID-19. It tries to answer the question if the actions which have been taken by the european goverment are effective.

Data

Loading of the data

library(lubridate)
source('load_data.R')
allData = 
  loadData("time_series_19-covid-Confirmed.csv", "CumConfirmed") %>%
  inner_join(
    loadData("time_series_19-covid-Deaths.csv", "CumDeaths")) %>%
  inner_join(
    loadData("time_series_19-covid-Recovered.csv","CumRecovered"))

max(allData$date)
## [1] "2020-03-18"

Current Data

The lastest number of cases are (we included the Hubei Province in China for Reference)

germany = allData %>% filter(`Country/Region` == c('Germany'))
italy = allData %>% filter(`Country/Region` == 'Italy')
france = allData %>% filter(`Country/Region` == 'France') %>% filter(`Province/State` == 'France')
china_hubei = allData %>% filter(`Country/Region` == 'China') %>% filter(`Province/State` == 'Hubei')
suisse = allData %>% filter(`Country/Region` == 'Switzerland')
austria = allData %>% filter(`Country/Region` == 'Austria')
spain = allData %>% filter(`Country/Region` == 'Spain')
europe = rbind(germany, italy, france, suisse, austria, spain)
europe_with_china = rbind(europe, china_hubei)
last_day = max(europe$date)

library(knitr) #For nicer plotting of the table
kable(filter(europe_with_china, date==last_day))
Province/State Country/Region date CumConfirmed CumDeaths CumRecovered
Germany 2020-03-18 12327 28 105
Italy 2020-03-18 35713 2978 4025
France France 2020-03-18 9043 148 12
Switzerland 2020-03-18 3028 28 15
Austria 2020-03-18 1646 4 9
Spain 2020-03-18 13910 623 1081
Hubei China 2020-03-18 67800 3122 56927

Europe over time

Here we investigate the number of cases over time. We include some important dates in the plots.

From China, we see that it takes about 14 days until the effects of a lockdown will become visible, this is shown by the corresponding second vertical line.

  italy_nationwide = as.Date('2020-03-09')
  italy_nationwide_effect = as.Date('2020-03-23')
  
  other_lockdown = as.Date('2020-03-16')
  other_lockdown_effect = as.Date('2020-03-30')
cols <- c("Austria" = "darkred", "Italy" = 'darkgreen', "Switzerland" = "red","Germany" = "black", "France"='darkblue', "China"='red', "Spain"='yellow3')
title = paste0('Corona Cases in Europe as of : ', last_day)

library(ggplot2)
library(ggpubr)
library(scales)
d_plot = ggplot(europe) + 
  geom_line(aes(x = date, y=CumConfirmed, color=`Country/Region`)) + 
  scale_y_continuous(trans = 'log10', name = 'Total Number of Confirmed Cases') + 
  scale_x_date(labels=date_format("%d/%m"),
               limits = as.Date(c('2020-02-20','2020-03-31'))) +
  ggtitle(title, subtitle = 'Focus on dates around the actions taken') +
  scale_color_manual(values=cols) +
  geom_vline(xintercept = italy_nationwide, col=cols['Italy'])  +
  geom_vline(xintercept = italy_nationwide_effect, col=cols['Italy'])  +
  geom_vline(xintercept = other_lockdown, col=cols['Germany'], linetype='dashed')  +
  geom_vline(xintercept = other_lockdown_effect, col=cols['Germany'],linetype='dashed')  
d_plot

d_plot + 
  geom_line(data = china_hubei, aes(x = date, y=CumConfirmed, color=`Country/Region`)) + 
  scale_x_date(labels=date_format("%d/%m")) + ggtitle(title, subtitle = 'Complete Time Span with China') 

What would happen isf no meassures have been taken

Let’s predict the numbers before govermental actions have been taken into the future. We assume a power law of the from \[ y = a \cdot 2^{t/t_0} \] That formulae states that the number of cases \(y\) double after a period \(t_0\) of days. We try to estimate \(a\) and \(t_0\). We do a trick and take the log, this yields:

\[ \lg(y) = \frac{1}{t_0} \cdot t + \lg(a) \] We fit that model to all states.

  fit = function(state, lock_down, plot=TRUE) {
    f = state %>% 
    filter(date < (lock_down + days(0))) %>% 
    filter(date > (lock_down - days(7)))
    t = as.numeric(f$date) - as.numeric(lock_down) #-6 -5 -4 ... 4  5  6
    y = log2(f$CumConfirmed)
    lm = lm(y ~ t)
    a = 2^(lm$coefficients[1])
    t0 = 1/lm$coefficients[2]
    
    name = state$`Country/Region`[1]
    if (plot) plot(t, y, main=paste0('Before gov. action ', name, ' t0=', round(t0,2)), cex.main=0.8, xlab='time (0 is lockdown)', ylab='log number of cases')
    lines(t, lm$coefficients[2]*t+lm$coefficients[1])
    
    
    t = seq(-10,30) #project 20 days after the lockdown
    return(data.frame(date = lock_down + days(t), pred = a*2^(t/t0), state=name, t0))
  }
  
  europ_fit = fit(italy, italy_nationwide)

  europ_fit = rbind(europ_fit,fit(germany, other_lockdown))

  europ_fit = rbind(europ_fit,fit(france, other_lockdown))

  europ_fit = rbind(europ_fit,fit(suisse, other_lockdown))

  europ_fit = rbind(europ_fit,fit(austria, other_lockdown))

  europ_fit = rbind(europ_fit,fit(spain, other_lockdown))

We see that the used model describes the data before the actions have been taken quite well. However, the slope of the individual countries is quite different. Before the lockdown \(t_0\), the number of days needed for doubling is:

  kable(europ_fit %>% select(c('state', 't0')) %>% group_by(state) %>% summarise_each(mean))
state t0
Italy 3.225488
Germany 2.401180
France 3.409355
Switzerland 2.364518
Austria 2.163248
Spain 2.098819

Forecasts (without action taken)

What would happen if no actions would have been taken?

  d_fit = d_plot + geom_line(data=europ_fit, aes(x=date, y=pred, col=state), linetype='dashed') +
    #scale_x_date(labels=date_format("%d/%m"), limits = as.Date(c('2020-03-02','2020-03-20')))
    ggtitle(title, subtitle = 'Data with fitted values (dashed), projections') 
  d_fit

If no meassures would have been taken, we would have by the end of the month:

europ_fit %>% filter(date == as.Date("2020-03-31")) %>% select(date, state, pred)
##         date       state      pred
## 1 2020-03-31       Italy 1022206.9
## 2 2020-03-31     Germany  599591.2
## 3 2020-03-31      France  127612.0
## 4 2020-03-31 Switzerland  214067.4
## 5 2020-03-31     Austria  148511.2
## 6 2020-03-31       Spain 1634716.3

The cases are in the millions. While there is much spread in the data, I believe this numbers are not totally off. Since the states have 10s of millions of inhabitants the spread of the virus would be still in an exponential phase (not all infected or recovered / deceased).

Are the actions effective?

  d_fit_2 = d_fit + 
    ggtitle(title, subtitle = 'Data with fitted values (dashed), focus on fitted regions') +
    scale_x_date(labels=date_format("%d/%m"), limits = as.Date(c('2020-03-08','2020-03-20'))) +
  coord_cartesian(ylim=c(500,50000))
  d_fit_2

  d_fit_2 + scale_y_continuous()

First conclusions

It seems that at least in Italy the actions taken start to be effective and the grows slows down. However, it’s still quite early to tell how strong the effect is.

Experimental Stuff below

Below we try to estimate \(t_0\) on a daily basis. However, it seems that this is not optimal and has strong fluctuations.

Evaluation of \(t_0\) over time

Starting with \[ \lg(y) = \frac{1}{t_0} \cdot t + \lg(a) \] we set substract the values at two time points, yielding

\[ t_0 = \frac{1}{\lg(y_i) - \lg(y_{i-1})} \]

italy$to = append(NA, 1/diff(log2(italy$CumConfirmed)))
germany$to = append(NA, 1/diff(log2(germany$CumConfirmed)))
france$to = append(NA, 1/diff(log2(france$CumConfirmed)))
suisse$to = append(NA, 1/diff(log2(suisse$CumConfirmed)))
spain$to = append(NA, 1/diff(log2(spain$CumConfirmed)))
austria$to = append(NA, 1/diff(log2(austria$CumConfirmed)))
china_hubei$to = append(NA, 1/diff(log2(china_hubei$CumConfirmed)))
europe = rbind(germany, italy, france, suisse, austria, spain)
europe_with_china = rbind(europe, china_hubei)
ggplot(europe) +
  geom_line(aes(x = date, y=to, color=`Country/Region`), alpha=0.2) + 
  geom_smooth(aes(x = date, y=to, color=`Country/Region`), se=FALSE) +
  scale_y_continuous(name = 'Time for doubeling t_0', limits=c(0,7)) + 
  scale_x_date(labels=date_format("%d/%m"),
               limits = as.Date(c('2020-02-20','2020-03-31'))) +
  ggtitle(title, subtitle = 'Focus on dates around the actions taken') +
  scale_color_manual(values=cols) +
  geom_vline(xintercept = italy_nationwide, col=cols['Italy'])  +
  geom_vline(xintercept = italy_nationwide_effect, col=cols['Italy'])  +
  geom_vline(xintercept = other_lockdown, col=cols['Germany'], linetype='dashed')  +
  geom_vline(xintercept = other_lockdown_effect, col=cols['Germany'],linetype='dashed')  

ggplot(china_hubei) +
  geom_line(aes(x = date, y=to, color=`Country/Region`), alpha = 0.4) + 
  geom_smooth(aes(x = date, y=to, color=`Country/Region`), se=FALSE) +
  scale_y_continuous(name = 'Time for doubeling t_0') +
scale_color_manual(values=cols)