Covid19_Model_calibration

#library (dplyr)
library(deSolve)
library(reshape2)
library(ggplot2)

Sys.time()

## [1] "2020-10-01 20:51:40 CEST"

CV19_backup<- read.csv("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv")

#head(CV19_backup);tail(CV19_backup)

names(CV19_backup)

##  [1] "data"                         "stato"                       
##  [3] "ricoverati_con_sintomi"       "terapia_intensiva"           
##  [5] "totale_ospedalizzati"         "isolamento_domiciliare"      
##  [7] "totale_positivi"              "variazione_totale_positivi"  
##  [9] "nuovi_positivi"               "dimessi_guariti"             
## [11] "deceduti"                     "casi_da_sospetto_diagnostico"
## [13] "casi_da_screening"            "totale_casi"                 
## [15] "tamponi"                      "casi_testati"                
## [17] "note"

require(dplyr)

CV19_IT<- CV19_backup%>%
  mutate(time=seq(1,length(data),by=1),cases=totale_casi,infected= totale_casi-deceduti,recovered=dimessi_guariti, deceased=deceduti,CFR=round(deceduti/totale_casi*100,2))%>%
  select(time, cases, infected, deceased,recovered,CFR)%>%
  arrange(desc(cases))


head(CV19_IT)

##   time  cases infected deceased recovered   CFR
## 1  221 317409   281491    35918    228844 11.32
## 2  220 314861   278967    35894    227704 11.40
## 3  219 313011   277136    35875    226506 11.46
## 4  218 311364   275513    35851    225190 11.51
## 5  217 309870   274035    35835    224417 11.56
## 6  216 308104   272286    35818    223693 11.63

CV19_IT_long <- melt(as.data.frame(CV19_IT), id = "time")

ggplot(data = CV19_IT_long,                                              
       aes(x = time, y = value, colour = variable, group = variable)) + 
  geom_line() +                                                          
  xlab("Time (days)")+                                                  
  ylab("Number") +                                                      
  labs(colour = "Compartment",                                           
       title = "Novel Coronavirus Outbreak - Italy")+theme_bw()

Creating a new dataset with the number of new infected:

data<- data.frame(time= 1:length(CV19_backup$data), new_infected = CV19_backup$nuovi_positivi[1:length(CV19_backup$data)] )  #real distribution of new infected

head(data)

##   time new_infected
## 1    1          221
## 2    2           93
## 3    3           78
## 4    4          250
## 5    5          238
## 6    6          240

Time-length of the epidemic:

length(data$time)

## [1] 221

And then considering the shape of the distribution and the density distribution; the difference can be seen on the right tail as it is going down to zero.

par(mfrow=c(1,2))
plot(data$new_infected,type="l")
plot(density(data$new_infected))

This graph shows the distribution of the new infected and its moving average:

require(zoo)

ggplot(data, aes(time, new_infected)) + 
  geom_point(position=position_jitter(1,3), pch=21, fill="#FF0000AA") +
  geom_line(aes(y=rollmean(new_infected, 7, na.pad=TRUE))) +
  theme_bw()

The prevalence is intended as the number of infected cases at a given piont in time.

The peak prevalence of the duration of the pandemic in Italy:

print("Peak prevalence of the pandemic in Italy:")

## [1] "Peak prevalence of the pandemic in Italy:"

max(data$new_infected)

## [1] 6557

print("Timing of the peak (days):")

## [1] "Timing of the peak (days):"

data$time[data$new_infected==max(data$new_infected)]

## [1] 27

print("Duration of the pandemic (days):")

## [1] "Duration of the pandemic (days):"

max(data$time[data$new_infected>1])-min(data$time[data$new_infected>1])

## [1] 220

To build a workable SIR model it is important to consider the proportion of individuals who are at risk of infection, setting up a percentage of the population who will more likely be infected by the virus, also considering the latent phase of the infection when the susceptible are infected but still not infectious. To do this the variable “E” has been included in the model as well as “A” for the asymptomatics.

This first part of the model is to identify the second wave and to do thar a SEAIR model has been used:

# MODEL FUNCTION

SEAIR_model <- function(time, state, parameters) {  
  
  with(as.list(c(state, parameters)), {
    
    N <- S+E+A+I+R
    
    lambda <- beta * I/N    # force of infection
    
    # The differential equations
    dS <- -lambda * S - mu * S + b * N 
    
    dE <- lambda * S - r * E
    dA <- (1-p_a) * lambda * S - gamma * A
      
    dI <-  p_a * lambda * S + r * E - gamma * I - mu * I
    dR <- gamma * A + gamma * I - mu * R + b * N   
    
    #dT <- 
  
    # Output
    return(list(c(dS, dE, dA, dI, dR))) 
  })
}

# MODEL OUTPUT

output <- as.data.frame(ode(y = initial_state_values, 
                            times = times, 
                            func = SEAIR_model,
                            parms = parameters))

# PLOT OF THE MODEL FIT

ggplot() +
  geom_line(data = output, aes(x = time, y = I)) +        # plot the model prediction of
                                                          # the number infected as a line
  geom_point(data = data, aes(x = time, y = new_infected), 
             colour = "red") +  # overlay the data as red dots
  xlab("Time (days)")+                                              
  ylab("Number of new infected people") +                                 
  labs(title = paste("Model fit to the second wave epidemic curve with beta =", parameters["beta"], 
                     "and gamma =", parameters["gamma"]),caption= "Data source: Civil Protection")+
  xlim(100,210)+theme_bw()

This second part of the modeling represent the basic of a SIR model applyed to Italian’s first wave of Covid19 outbreak prevalence of infection.

require(deSolve)


N<- max(CV19_IT$cases)

initial_state_values<- c(S=N-1,I=1,R=0)

parameters<- c(beta= 1/3,gamma=1/22)#beta (infection rate) is hypothesized to be 3 days^-1, 
                                    #gamma (recovery rate) is 22 days^-1

times<- seq(0,length(CV19_IT$time),by=1)

SIR_model<- function(time,state,parameters){
  with(as.list(c(state,parameters)),{
       
    N<- S+I+R
    lambda<- beta*I/N
       
       dS<- -lambda*S
       dI<- lambda*S-gamma*I
       dR<- gamma*I
       
       return(list(c(dS,dI,dR)))
       })
}

output_sir<- as.data.frame(ode(y=initial_state_values,
                           times=times,
                           func=SIR_model,
                           parms=parameters))

ggplot()+
  geom_line(data=output_sir,aes(x=time,y=I),col="red")+
  labs(x="Time(days)",y="Infected", title="IT Prevalence of infection")+
  theme_bw()

The peak prevalence is clearly at 50 days the start of the epidemic

output_sir_long<- melt(as.data.frame(output_sir), id="time")
ggplot(data=output_sir_long, aes(x=time, y=value,colour=variable,group=variable))+
  geom_line()+
 labs( x="Time(days)",y="proportions", title="IT Covid19 theoretical Prevalence proportion: basic SIR model",subtitle=paste("with beta =",round(parameters["beta"],2),"and gamma =", round(parameters["gamma"],2)))+
  theme_bw()

To calibrate the theoretical model on the observed data, a simple linear regression has been used:

#data is the obeserved new_infected dataframe 

#times <- seq(from = 0, to = 14, by = 0.1) 

SIR_SSQ <- function(parameters, dat){
  beta<- parameters[1]
  gamma<- parameters[2]
            
  output1 <- as.data.frame(ode(y = initial_state_values,
                                 times = times,
                                 func = SIR_model,
                                 parms = c(beta=beta,
                                           gamma=gamma))
                             ) 
    
    SSQ<- sum((output1$I[output1$time %in% dat$time] - dat$new_infected)^2)
                          
    return(SSQ)
}

optimised_IT<- optim( par = c(0.33,0.05),
                      fn = SIR_SSQ,
                      dat =data)

round(optimised_IT$par,2)# 0.45 0.08

## [1] 0.21 0.17

                      # 0.21 0.17

With these two new parameters

#♥#

initial_state_values <- c(S = 10000-1, I = 1, R = 0)

times <- seq(from = 0, to = 100, by = 1) 

parameters<- c(beta=0.5,gamma=0.047)#♠ 0.55 are 18 days for recovery

opt_mod_IT <- as.data.frame(ode(y = initial_state_values,
                             times = times,
                             func = SIR_model,
                             parms = parameters))

require(ggplot2)


library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

opt_plot <- ggplot()+
  
  geom_point(aes(x = time, y = new_infected),
             colour = "red",
             shape  = "x",
             data = data)+
  
  geom_line(aes(x = time, y = I),
            colour = "blue",
            data   = opt_mod_IT)+
  
  labs(x="Time(days)",y="Prevalence of new infections",title="First wave prevalence ", subtitle=paste("beta=",round(parameters[1],2),"and gamma=",round(parameters[2],2)),caption="Data Source: Civil Protection data")+
  xlim(0,100)+
  theme_bw()

opt_plot_2<- ggplot()+geom_point(aes(x = time, y = new_infected)
                                , colour = "red"
                                , shape  = "x" 
                                , data = data)+
  geom_line(data = output, aes(x = time, y = I),col="blue")+ 
  
  labs(x="Time(days)",title="Second wave",subtitle="beta = 0.457 and gamma = 1/4",caption="Data Source: Civil Protection data")+
  xlim(100,210)+
  theme_bw()
  
  grid.arrange(opt_plot,opt_plot_2,ncol=2)

Covid19_Model_calibration

FG