library (dplyr)
library(deSolve)
library(reshape2)
library(ggplot2)
Sys.time()
## [1] "2020-10-25 22:45:33 CET"
## [1] "2020-10-01 20:51:40 CEST"
CV19_backup<- read.csv("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv")
#head(CV19_backup);tail(CV19_backup)
names(CV19_backup)
## [1] "data" "stato"
## [3] "ricoverati_con_sintomi" "terapia_intensiva"
## [5] "totale_ospedalizzati" "isolamento_domiciliare"
## [7] "totale_positivi" "variazione_totale_positivi"
## [9] "nuovi_positivi" "dimessi_guariti"
## [11] "deceduti" "casi_da_sospetto_diagnostico"
## [13] "casi_da_screening" "totale_casi"
## [15] "tamponi" "casi_testati"
## [17] "note"
require(dplyr)
CV19_IT<- CV19_backup%>%
mutate(time=seq(1,length(data),by=1),cases=totale_casi,infected= totale_casi-deceduti,recovered=dimessi_guariti, deceased=deceduti,CFR=round(deceduti/totale_casi*100,2))%>%
select(time, cases, infected, deceased,recovered,CFR)%>%
arrange(desc(cases))
head(CV19_IT)
## time cases infected deceased recovered CFR
## 1 245 525782 488444 37338 266203 7.10
## 2 244 504509 467299 37210 264117 7.38
## 3 243 484869 447810 37059 261808 7.64
## 4 242 465726 428758 36968 259456 7.94
## 5 241 449648 412816 36832 257374 8.19
## 6 240 434449 397744 36705 255005 8.45
CV19_IT_long <- melt(as.data.frame(CV19_IT), id = "time")
ggplot(data = CV19_IT_long,
aes(x = time, y = value, colour = variable, group = variable)) +
geom_line() +
xlab("Time (days)")+
ylab("Number") +
labs(colour = "Compartment",
title = "Novel Coronavirus Outbreak - Italy")+theme_bw()
Creating a new dataset with the number of new infected:
data<- data.frame(time= 1:length(CV19_backup$data), new_infected = CV19_backup$nuovi_positivi[1:length(CV19_backup$data)] ) #real distribution of new infected
head(data)
## time new_infected
## 1 1 221
## 2 2 93
## 3 3 78
## 4 4 250
## 5 5 238
## 6 6 240
Time-length of the epidemic:
length(data$time)
## [1] 245
And then considering the shape of the distribution and the density distribution; the difference can be seen on the right tail as it is going down to zero.
par(mfrow=c(1,2))
plot(data$new_infected,type="l")
plot(density(data$new_infected))
This graph shows the distribution of the new infected and its moving average:
require(zoo)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
ggplot(data, aes(time, new_infected)) +
geom_point(position=position_jitter(1,3), pch=21, fill="#FF0000AA") +
geom_line(aes(y=rollmean(new_infected, 7, na.pad=TRUE))) +
theme_bw()
The prevalence is intended as the number of infected cases at a given piont in time.
The peak prevalence of the duration of the pandemic in Italy:
print("Peak prevalence of the pandemic in Italy:")
## [1] "Peak prevalence of the pandemic in Italy:"
## [1] "Peak prevalence of the pandemic in Italy:"
max(data$new_infected)
## [1] 21273
## [1] 6557
print("Timing of the peak (days):")
## [1] "Timing of the peak (days):"
## [1] "Timing of the peak (days):"
data$time[data$new_infected==max(data$new_infected)]
## [1] 245
## [1] 27
print("Duration of the pandemic (days):")
## [1] "Duration of the pandemic (days):"
## [1] "Duration of the pandemic (days):"
max(data$time[data$new_infected>1])-min(data$time[data$new_infected>1])
## [1] 244
## [1] 220
To build a workable SIR model it is important to consider the proportion of individuals who are at risk of infection, setting up a percentage of the population who will more likely be infected by the virus, also considering the latent phase of the infection when the susceptible are infected but still not infectious. To do this the variable “E” has been included in the model as well as “A” for the asymptomatics.
This first part of the model is to identify the second wave and to do thar a SEAIR model has been used:
SEAIR_model <- function(time, state, parameters) {
with(as.list(c(state, parameters)), {
N <- S+E+A+I+R
lambda <- beta * I/N # force of infection
# The differential equations
dS <- -lambda * S - mu * S + b * N
dE <- lambda * S - r * E
dA <- (1-p_a) * lambda * S - gamma * A
dI <- p_a * lambda * S + r * E - gamma * I - mu * I
dR <- gamma * A + gamma * I - mu * R + b * N
#dT <-
# Output
return(list(c(dS, dE, dA, dI, dR)))
})
}
# MODEL OUTPUT
output <- as.data.frame(ode(y = initial_state_values,
times = times,
func = SEAIR_model,
parms = parameters))
# PLOT OF THE MODEL FIT
ggplot() +
geom_line(data = output, aes(x = time, y = I)) + # plot the model prediction of
# the number infected as a line
geom_point(data = data, aes(x = time, y = new_infected),
colour = "red") + # overlay the data as red dots
xlab("Time (days)")+
ylab("Number of new infected people") +
labs(title = paste("Model fit to the second wave epidemic curve with beta =", parameters["beta"],
"and gamma =", parameters["gamma"]),caption= "Data source: Civil Protection")+
xlim(100,length(data$time))+theme_bw()
This second part of the modeling represent the basic of a SIR model applyed to Italian’s first wave of Covid19 outbreak prevalence of infection.
require(deSolve)
N<- max(CV19_IT$cases)
initial_state_values<- c(S=N-1,I=1,R=0)
parameters<- c(beta= 1/3,gamma=1/22)#beta (infection rate) is hypothesized to be 3 days^-1,
#gamma (recovery rate) is 22 days^-1
times<- seq(0,length(CV19_IT$time),by=1)
SIR_model<- function(time,state,parameters){
with(as.list(c(state,parameters)),{
N<- S+I+R
lambda<- beta*I/N
dS<- -lambda*S
dI<- lambda*S-gamma*I
dR<- gamma*I
return(list(c(dS,dI,dR)))
})
}
output_sir<- as.data.frame(ode(y=initial_state_values,
times=times,
func=SIR_model,
parms=parameters))
ggplot()+
geom_line(data=output_sir,aes(x=time,y=I),col="red")+
labs(x="Time(days)",y="Infected", title="IT Prevalence of infection")+
theme_bw()
The peak prevalence is clearly at 50 days the start of the epidemic
output_sir_long<- melt(as.data.frame(output_sir), id="time")
ggplot(data=output_sir_long, aes(x=time, y=value,colour=variable,group=variable))+
geom_line()+
labs( x="Time(days)",y="proportions", title="IT Covid19 theoretical Prevalence proportion: basic SIR model",subtitle=paste("with beta =",round(parameters["beta"],2),"and gamma =", round(parameters["gamma"],2)))+
theme_bw()
To calibrate the theoretical model on the observed data, a simple linear regression has been used:
#data is the obeserved new_infected dataframe
#times <- seq(from = 0, to = 14, by = 0.1)
SIR_SSQ <- function(parameters, dat){
beta<- parameters[1]
gamma<- parameters[2]
output1 <- as.data.frame(ode(y = initial_state_values,
times = times,
func = SIR_model,
parms = c(beta=beta,
gamma=gamma))
)
SSQ<- sum((output1$I[output1$time %in% dat$time] - dat$new_infected)^2)
return(SSQ)
}
optimised_IT<- optim( par = c(0.33,0.05),
fn = SIR_SSQ,
dat =data)
round(optimised_IT$par,2)# 0.45 0.08
## [1] 0.00 -0.04
## [1] 0.21 0.17
# 0.21 0.17
With these two new parameters
initial_state_values <- c(S = 10000-1, I = 1, R = 0)
times <- seq(from = 0, to = 100, by = 1)
parameters<- c(beta=0.5,gamma=0.047)#♠ 0.55 are 18 days for recovery
opt_mod_IT <- as.data.frame(ode(y = initial_state_values,
times = times,
func = SIR_model,
parms = parameters))
require(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
opt_plot <- ggplot()+
geom_point(aes(x = time, y = new_infected),
colour = "red",
shape = "x",
data = data)+
geom_line(aes(x = time, y = I),
colour = "blue",
data = opt_mod_IT)+
labs(x="Time(days)",y="Prevalence of new infections",title="First wave prevalence ", subtitle=paste("beta=",round(parameters[1],2),"and gamma=",round(parameters[2],2)),caption="Data Source: Civil Protection data")+
xlim(0,100)+
theme_bw()
opt_plot_2<- ggplot()+geom_point(aes(x = time, y = new_infected)
, colour = "red"
, shape = "x"
, data = data)+
geom_line(data = output, aes(x = time, y = I),col="blue")+
labs(x="Time(days)",title="Second wave",subtitle="beta = 0.457 and gamma = 1/4",caption="Data Source: Civil Protection data")+
xlim(100,length(data$time))+
theme_bw()
grid.arrange(opt_plot,opt_plot_2,ncol=2)