#library (dplyr)
library(deSolve)
library(reshape2)
library(ggplot2)
Sys.time()
## [1] "2020-10-01 20:51:40 CEST"
CV19_backup<- read.csv("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv")
#head(CV19_backup);tail(CV19_backup)
names(CV19_backup)
## [1] "data" "stato"
## [3] "ricoverati_con_sintomi" "terapia_intensiva"
## [5] "totale_ospedalizzati" "isolamento_domiciliare"
## [7] "totale_positivi" "variazione_totale_positivi"
## [9] "nuovi_positivi" "dimessi_guariti"
## [11] "deceduti" "casi_da_sospetto_diagnostico"
## [13] "casi_da_screening" "totale_casi"
## [15] "tamponi" "casi_testati"
## [17] "note"
require(dplyr)
CV19_IT<- CV19_backup%>%
mutate(time=seq(1,length(data),by=1),cases=totale_casi,infected= totale_casi-deceduti,recovered=dimessi_guariti, deceased=deceduti,CFR=round(deceduti/totale_casi*100,2))%>%
select(time, cases, infected, deceased,recovered,CFR)%>%
arrange(desc(cases))
head(CV19_IT)
## time cases infected deceased recovered CFR
## 1 221 317409 281491 35918 228844 11.32
## 2 220 314861 278967 35894 227704 11.40
## 3 219 313011 277136 35875 226506 11.46
## 4 218 311364 275513 35851 225190 11.51
## 5 217 309870 274035 35835 224417 11.56
## 6 216 308104 272286 35818 223693 11.63
CV19_IT_long <- melt(as.data.frame(CV19_IT), id = "time")
ggplot(data = CV19_IT_long,
aes(x = time, y = value, colour = variable, group = variable)) +
geom_line() +
xlab("Time (days)")+
ylab("Number") +
labs(colour = "Compartment",
title = "Novel Coronavirus Outbreak - Italy")+theme_bw()
Creating a new dataset with the number of new infected:
data<- data.frame(time= 1:length(CV19_backup$data), new_infected = CV19_backup$nuovi_positivi[1:length(CV19_backup$data)] ) #real distribution of new infected
head(data)
## time new_infected
## 1 1 221
## 2 2 93
## 3 3 78
## 4 4 250
## 5 5 238
## 6 6 240
Time-length of the epidemic:
length(data$time)
## [1] 221
And then considering the shape of the distribution and the density distribution; the difference can be seen on the right tail as it is going down to zero.
par(mfrow=c(1,2))
plot(data$new_infected,type="l")
plot(density(data$new_infected))
This graph shows the distribution of the new infected and its moving average:
require(zoo)
ggplot(data, aes(time, new_infected)) +
geom_point(position=position_jitter(1,3), pch=21, fill="#FF0000AA") +
geom_line(aes(y=rollmean(new_infected, 7, na.pad=TRUE))) +
theme_bw()
The prevalence is intended as the number of infected cases at a given piont in time.
The peak prevalence of the duration of the pandemic in Italy:
print("Peak prevalence of the pandemic in Italy:")
## [1] "Peak prevalence of the pandemic in Italy:"
max(data$new_infected)
## [1] 6557
print("Timing of the peak (days):")
## [1] "Timing of the peak (days):"
data$time[data$new_infected==max(data$new_infected)]
## [1] 27
print("Duration of the pandemic (days):")
## [1] "Duration of the pandemic (days):"
max(data$time[data$new_infected>1])-min(data$time[data$new_infected>1])
## [1] 220
To build a workable SIR model it is important to consider the proportion of individuals who are at risk of infection, setting up a percentage of the population who will more likely be infected by the virus, also considering the latent phase of the infection when the susceptible are infected but still not infectious. To do this the variable “E” has been included in the model as well as “A” for the asymptomatics.
This first part of the model is to identify the second wave and to do thar a SEAIR model has been used:
# MODEL FUNCTION
SEAIR_model <- function(time, state, parameters) {
with(as.list(c(state, parameters)), {
N <- S+E+A+I+R
lambda <- beta * I/N # force of infection
# The differential equations
dS <- -lambda * S - mu * S + b * N
dE <- lambda * S - r * E
dA <- (1-p_a) * lambda * S - gamma * A
dI <- p_a * lambda * S + r * E - gamma * I - mu * I
dR <- gamma * A + gamma * I - mu * R + b * N
#dT <-
# Output
return(list(c(dS, dE, dA, dI, dR)))
})
}
# MODEL OUTPUT
output <- as.data.frame(ode(y = initial_state_values,
times = times,
func = SEAIR_model,
parms = parameters))
# PLOT OF THE MODEL FIT
ggplot() +
geom_line(data = output, aes(x = time, y = I)) + # plot the model prediction of
# the number infected as a line
geom_point(data = data, aes(x = time, y = new_infected),
colour = "red") + # overlay the data as red dots
xlab("Time (days)")+
ylab("Number of new infected people") +
labs(title = paste("Model fit to the second wave epidemic curve with beta =", parameters["beta"],
"and gamma =", parameters["gamma"]),caption= "Data source: Civil Protection")+
xlim(100,210)+theme_bw()
This second part of the modeling represent the basic of a SIR model applyed to Italian’s first wave of Covid19 outbreak prevalence of infection.
require(deSolve)
N<- max(CV19_IT$cases)
initial_state_values<- c(S=N-1,I=1,R=0)
parameters<- c(beta= 1/3,gamma=1/22)#beta (infection rate) is hypothesized to be 3 days^-1,
#gamma (recovery rate) is 22 days^-1
times<- seq(0,length(CV19_IT$time),by=1)
SIR_model<- function(time,state,parameters){
with(as.list(c(state,parameters)),{
N<- S+I+R
lambda<- beta*I/N
dS<- -lambda*S
dI<- lambda*S-gamma*I
dR<- gamma*I
return(list(c(dS,dI,dR)))
})
}
output_sir<- as.data.frame(ode(y=initial_state_values,
times=times,
func=SIR_model,
parms=parameters))
ggplot()+
geom_line(data=output_sir,aes(x=time,y=I),col="red")+
labs(x="Time(days)",y="Infected", title="IT Prevalence of infection")+
theme_bw()
The peak prevalence is clearly at 50 days the start of the epidemic
output_sir_long<- melt(as.data.frame(output_sir), id="time")
ggplot(data=output_sir_long, aes(x=time, y=value,colour=variable,group=variable))+
geom_line()+
labs( x="Time(days)",y="proportions", title="IT Covid19 theoretical Prevalence proportion: basic SIR model",subtitle=paste("with beta =",round(parameters["beta"],2),"and gamma =", round(parameters["gamma"],2)))+
theme_bw()
To calibrate the theoretical model on the observed data, a simple linear regression has been used:
#data is the obeserved new_infected dataframe
#times <- seq(from = 0, to = 14, by = 0.1)
SIR_SSQ <- function(parameters, dat){
beta<- parameters[1]
gamma<- parameters[2]
output1 <- as.data.frame(ode(y = initial_state_values,
times = times,
func = SIR_model,
parms = c(beta=beta,
gamma=gamma))
)
SSQ<- sum((output1$I[output1$time %in% dat$time] - dat$new_infected)^2)
return(SSQ)
}
optimised_IT<- optim( par = c(0.33,0.05),
fn = SIR_SSQ,
dat =data)
round(optimised_IT$par,2)# 0.45 0.08
## [1] 0.21 0.17
# 0.21 0.17
With these two new parameters
#♥#
initial_state_values <- c(S = 10000-1, I = 1, R = 0)
times <- seq(from = 0, to = 100, by = 1)
parameters<- c(beta=0.5,gamma=0.047)#♠ 0.55 are 18 days for recovery
opt_mod_IT <- as.data.frame(ode(y = initial_state_values,
times = times,
func = SIR_model,
parms = parameters))
require(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
opt_plot <- ggplot()+
geom_point(aes(x = time, y = new_infected),
colour = "red",
shape = "x",
data = data)+
geom_line(aes(x = time, y = I),
colour = "blue",
data = opt_mod_IT)+
labs(x="Time(days)",y="Prevalence of new infections",title="First wave prevalence ", subtitle=paste("beta=",round(parameters[1],2),"and gamma=",round(parameters[2],2)),caption="Data Source: Civil Protection data")+
xlim(0,100)+
theme_bw()
opt_plot_2<- ggplot()+geom_point(aes(x = time, y = new_infected)
, colour = "red"
, shape = "x"
, data = data)+
geom_line(data = output, aes(x = time, y = I),col="blue")+
labs(x="Time(days)",title="Second wave",subtitle="beta = 0.457 and gamma = 1/4",caption="Data Source: Civil Protection data")+
xlim(100,210)+
theme_bw()
grid.arrange(opt_plot,opt_plot_2,ncol=2)