library (dplyr)
library(deSolve)
library(reshape2)
library(ggplot2)
Sys.time()
## [1] "2020-10-25 22:45:33 CET"
## [1] "2020-10-01 20:51:40 CEST"
CV19_backup<- read.csv("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv")
#head(CV19_backup);tail(CV19_backup)
names(CV19_backup)
##  [1] "data"                         "stato"                       
##  [3] "ricoverati_con_sintomi"       "terapia_intensiva"           
##  [5] "totale_ospedalizzati"         "isolamento_domiciliare"      
##  [7] "totale_positivi"              "variazione_totale_positivi"  
##  [9] "nuovi_positivi"               "dimessi_guariti"             
## [11] "deceduti"                     "casi_da_sospetto_diagnostico"
## [13] "casi_da_screening"            "totale_casi"                 
## [15] "tamponi"                      "casi_testati"                
## [17] "note"
require(dplyr)
CV19_IT<- CV19_backup%>%
  mutate(time=seq(1,length(data),by=1),cases=totale_casi,infected= totale_casi-deceduti,recovered=dimessi_guariti, deceased=deceduti,CFR=round(deceduti/totale_casi*100,2))%>%
  select(time, cases, infected, deceased,recovered,CFR)%>%
  arrange(desc(cases))
head(CV19_IT)
##   time  cases infected deceased recovered  CFR
## 1  245 525782   488444    37338    266203 7.10
## 2  244 504509   467299    37210    264117 7.38
## 3  243 484869   447810    37059    261808 7.64
## 4  242 465726   428758    36968    259456 7.94
## 5  241 449648   412816    36832    257374 8.19
## 6  240 434449   397744    36705    255005 8.45
CV19_IT_long <- melt(as.data.frame(CV19_IT), id = "time")

ggplot(data = CV19_IT_long,                                              
       aes(x = time, y = value, colour = variable, group = variable)) + 
  geom_line() +                                                          
  xlab("Time (days)")+                                                  
  ylab("Number") +                                                      
  labs(colour = "Compartment",                                           
       title = "Novel Coronavirus Outbreak - Italy")+theme_bw()

Creating a new dataset with the number of new infected:

data<- data.frame(time= 1:length(CV19_backup$data), new_infected = CV19_backup$nuovi_positivi[1:length(CV19_backup$data)] )  #real distribution of new infected

head(data)
##   time new_infected
## 1    1          221
## 2    2           93
## 3    3           78
## 4    4          250
## 5    5          238
## 6    6          240

Time-length of the epidemic:

length(data$time)
## [1] 245

And then considering the shape of the distribution and the density distribution; the difference can be seen on the right tail as it is going down to zero.

par(mfrow=c(1,2))
plot(data$new_infected,type="l")
plot(density(data$new_infected))

This graph shows the distribution of the new infected and its moving average:

require(zoo)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
ggplot(data, aes(time, new_infected)) + 
  geom_point(position=position_jitter(1,3), pch=21, fill="#FF0000AA") +
  geom_line(aes(y=rollmean(new_infected, 7, na.pad=TRUE))) +
  theme_bw()

The prevalence is intended as the number of infected cases at a given piont in time.

The peak prevalence of the duration of the pandemic in Italy:

print("Peak prevalence of the pandemic in Italy:")
## [1] "Peak prevalence of the pandemic in Italy:"
## [1] "Peak prevalence of the pandemic in Italy:"
max(data$new_infected)
## [1] 21273
## [1] 6557
print("Timing of the peak (days):")
## [1] "Timing of the peak (days):"
## [1] "Timing of the peak (days):"
data$time[data$new_infected==max(data$new_infected)]
## [1] 245
## [1] 27
print("Duration of the pandemic (days):")
## [1] "Duration of the pandemic (days):"
## [1] "Duration of the pandemic (days):"
max(data$time[data$new_infected>1])-min(data$time[data$new_infected>1])
## [1] 244
## [1] 220

To build a workable SIR model it is important to consider the proportion of individuals who are at risk of infection, setting up a percentage of the population who will more likely be infected by the virus, also considering the latent phase of the infection when the susceptible are infected but still not infectious. To do this the variable “E” has been included in the model as well as “A” for the asymptomatics.

This first part of the model is to identify the second wave and to do thar a SEAIR model has been used:

MODEL FUNCTION

SEAIR_model <- function(time, state, parameters) {  
  
  with(as.list(c(state, parameters)), {
    
    N <- S+E+A+I+R
    
    lambda <- beta * I/N    # force of infection
    
    # The differential equations
    dS <- -lambda * S - mu * S + b * N 
    
    dE <- lambda * S - r * E
    dA <- (1-p_a) * lambda * S - gamma * A
      
    dI <-  p_a * lambda * S + r * E - gamma * I - mu * I
    dR <- gamma * A + gamma * I - mu * R + b * N   
    
    #dT <- 
  
    # Output
    return(list(c(dS, dE, dA, dI, dR))) 
  })
}


# MODEL OUTPUT

output <- as.data.frame(ode(y = initial_state_values, 
                            times = times, 
                            func = SEAIR_model,
                            parms = parameters))

# PLOT OF THE MODEL FIT

ggplot() +
  geom_line(data = output, aes(x = time, y = I)) +        # plot the model prediction of
                                                          # the number infected as a line
  geom_point(data = data, aes(x = time, y = new_infected), 
             colour = "red") +  # overlay the data as red dots
  xlab("Time (days)")+                                              
  ylab("Number of new infected people") +                                 
  labs(title = paste("Model fit to the second wave epidemic curve with beta =", parameters["beta"], 
                     "and gamma =", parameters["gamma"]),caption= "Data source: Civil Protection")+
  xlim(100,length(data$time))+theme_bw()

This second part of the modeling represent the basic of a SIR model applyed to Italian’s first wave of Covid19 outbreak prevalence of infection.

require(deSolve)


N<- max(CV19_IT$cases)

initial_state_values<- c(S=N-1,I=1,R=0)

parameters<- c(beta= 1/3,gamma=1/22)#beta (infection rate) is hypothesized to be 3 days^-1, 
                                    #gamma (recovery rate) is 22 days^-1

times<- seq(0,length(CV19_IT$time),by=1)

SIR_model<- function(time,state,parameters){
  with(as.list(c(state,parameters)),{
       
    N<- S+I+R
    lambda<- beta*I/N
       
       dS<- -lambda*S
       dI<- lambda*S-gamma*I
       dR<- gamma*I
       
       return(list(c(dS,dI,dR)))
       })
}

output_sir<- as.data.frame(ode(y=initial_state_values,
                           times=times,
                           func=SIR_model,
                           parms=parameters))

ggplot()+
  geom_line(data=output_sir,aes(x=time,y=I),col="red")+
  labs(x="Time(days)",y="Infected", title="IT Prevalence of infection")+
  theme_bw()

The peak prevalence is clearly at 50 days the start of the epidemic

output_sir_long<- melt(as.data.frame(output_sir), id="time")
ggplot(data=output_sir_long, aes(x=time, y=value,colour=variable,group=variable))+
  geom_line()+
 labs( x="Time(days)",y="proportions", title="IT Covid19 theoretical Prevalence proportion: basic SIR model",subtitle=paste("with beta =",round(parameters["beta"],2),"and gamma =", round(parameters["gamma"],2)))+
  theme_bw()

To calibrate the theoretical model on the observed data, a simple linear regression has been used:

#data is the obeserved new_infected dataframe 

#times <- seq(from = 0, to = 14, by = 0.1) 

SIR_SSQ <- function(parameters, dat){
  beta<- parameters[1]
  gamma<- parameters[2]
            
  output1 <- as.data.frame(ode(y = initial_state_values,
                                 times = times,
                                 func = SIR_model,
                                 parms = c(beta=beta,
                                           gamma=gamma))
                             ) 
    
    SSQ<- sum((output1$I[output1$time %in% dat$time] - dat$new_infected)^2)
                          
    return(SSQ)
}

optimised_IT<- optim( par = c(0.33,0.05),
                      fn = SIR_SSQ,
                      dat =data)
round(optimised_IT$par,2)# 0.45 0.08
## [1]  0.00 -0.04
## [1] 0.21 0.17
# 0.21 0.17

With these two new parameters

initial_state_values <- c(S = 10000-1, I = 1, R = 0)

times <- seq(from = 0, to = 100, by = 1) 

parameters<- c(beta=0.5,gamma=0.047)#♠ 0.55 are 18 days for recovery

opt_mod_IT <- as.data.frame(ode(y = initial_state_values,
                             times = times,
                             func = SIR_model,
                             parms = parameters))
require(ggplot2)


library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
opt_plot <- ggplot()+
  
  geom_point(aes(x = time, y = new_infected),
             colour = "red",
             shape  = "x",
             data = data)+
  
  geom_line(aes(x = time, y = I),
            colour = "blue",
            data   = opt_mod_IT)+
  
  labs(x="Time(days)",y="Prevalence of new infections",title="First wave prevalence ", subtitle=paste("beta=",round(parameters[1],2),"and gamma=",round(parameters[2],2)),caption="Data Source: Civil Protection data")+
  xlim(0,100)+
  theme_bw()
opt_plot_2<- ggplot()+geom_point(aes(x = time, y = new_infected)
                                , colour = "red"
                                , shape  = "x" 
                                , data = data)+
  geom_line(data = output, aes(x = time, y = I),col="blue")+ 
  
  labs(x="Time(days)",title="Second wave",subtitle="beta = 0.457 and gamma = 1/4",caption="Data Source: Civil Protection data")+
  xlim(100,length(data$time))+
  theme_bw()
  
  grid.arrange(opt_plot,opt_plot_2,ncol=2)