# required libraries
library(deSolve)
library(shape) # for plotting arrows
library(progress) # for drawing the progress bar
####################################
##
## The basic model (which does not result in the exact solution)
## Adapted from the previous blogpost but with small adaptations
##
####################################
# the data in China
#https://en.wikipedia.org/wiki/Timeline_of_the_2019%E2%80%9320_Wuhan_coronavirus_outbreak#Cases_Chronology_in_Mainland_China
#https://en.wikipedia.org/wiki/Template:2019%E2%80%9320_coronavirus_pandemic_data/China_medical_cases
IpRpD <- c(45,62,121,198,291,440,571,830,1287,1975,2744,4515,5974,7711,9692,11791,14380,17205,20440, 24324,28018,31161,34546,37198,40171,42638,44653,59804,63851,66492,68500,70548,72436,74185,74576,75465,76288,76936,77150,77658,78064,78497,78824,79251,79824,80026,80151,80270,80409,80552,80651,80695,80735,80754,80778,80793,80813,80824,80844,80860,80881,80894,80928,80967,81008)
R <- c(15,19, 24, 25, 25, 25, 25, 34, 38, 49, 51, 60, 103, 124, 171, 243, 328, 475, 632, 892, 1153, 1540, 2050, 2649, 3281, 3996, 4749, 5911, 6723, 8096, 9419,10844,12552,14376,16155,18264,20659,22888,24734,27323,29745,32495,36117,39002,41625,44462,47204,49856,52045,53726,55404,57065,58600,59897,61475,62793,64111,65541,66911,67749,68679,69601,70420,71150,71740)
D <- c( 2, 2, 2, 3, 6, 9, 17, 25, 41, 56, 80, 106, 132, 170, 213, 259, 304, 361, 425, 490, 563, 637, 722, 811, 908, 1016, 1113, 1259, 1380, 1523, 1665, 1770, 1868, 2004, 2118, 2236, 2345, 2442, 2592, 2663, 2715, 2744, 2788, 2835, 2870, 2912, 2943, 2981, 3012, 3042, 3070, 3097, 3119, 3136, 3158, 3169, 3176, 3189, 3199, 3213, 3226, 3237, 3245, 3248, 3255)
# we take the first 19 to make the analysis relate to the previous blog post
Infected <- (IpRpD)[1:19]
Day <- 1:(length(Infected))
N <- 1400000000 # population of mainland china
# ODE equation used for fitting
#
# I have removed the R(t) in comparison
# to the function used in the odler blogpost
# because we are not gonna use that value
# also we have anyway: R(t) = N(0) - N(t) - I(t)
SIR <- function(time, state, parameters) {
par <- as.list(c(state, parameters))
with(par, {
dS <- -beta/N * I * S
dI <- beta/N * I * S - gamma * I
list(c(dS, dI))
})
}
#
# cost function to be optimized in the fitting
#
RSS <- function(parameters) {
names(parameters) <- c("beta", "gamma")
out <- ode(y = init, times = Day, func = SIR, parms = parameters)
fitInfected <- out[,3]
# fitInfected <- N-out[,2] # this would be a better comparison since the data is not the number of Infectious people
sum((Infected - fitInfected)^2)
}
# starting condition
init <- c(S = N-Infected[1], I = Infected[1])
# init <- c(S = N-Infected[1], I = Infected[1]-R[1]-D[1]) use this starting condition when applying the different line in the RSS function
# performing the fit
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", lower = c(0, 0), upper = c(1, 1)) # optimize with some sensible conditions
Opt$message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
Opt_par <- setNames(Opt$par, c("beta", "gamma"))
Opt_par
## beta gamma
## 0.6746088 0.3253912
## beta gamma
## 0.693018 0.306982
# plotting the result
t <- 1:70 # time in days
fit <- data.frame(ode(y = init, times = t, func = SIR, parms = Opt_par))
plot(Day,Infected, xlim = c(0,30), ylim = c(0,3*10^4))
lines(t,fit[,3])

###########################
##
## Alternative model which provides a better fit
##
############################
# We transform the equations and instead of
# parameters beta and gamma
# we use parameters
#
# K = beta-gamma
# R0 = beta/gamma
#
# or
#
# beta = K * R0/(R0-1)
# gamma = K * 1/(R0-1)
#
# then the equations become
#
# dS = I * K * (-S/N * R0)/(R0-1)
# dI = I * K * ( S/N * R0 - 1)/(R0-1)
# note in the beginning, S/N = 1
# then in the start you get this approximate exponential growth
# dI = I * K * (1)
SIR2 <- function(time, state, parameters) {
par <- as.list(c(state, parameters))
with(par, {
dS <- I * K * (-S/N * R0/(R0-1))
dI <- I * K * ( S/N * R0/(R0-1) - 1/(R0-1))
list(c(dS, dI))
})
}
RSS2 <- function(parameters) {
names(parameters) <- c("K", "R0")
out <- ode(y = init, times = Day, func = SIR2, parms = parameters)
fitInfected <- out[,3]
#fitInfected <- N-out[,2]
sum((Infected - fitInfected)^2)
}
### Two functions RSS to do the optimization in a nested way
###
### This nesting requires a lot more computational power
### However, it makes that we have to worry less about the different scale
### of the parameters
Infected_MC <- Infected
SIRMC2 <- function(R0,K) {
parameters <- c(K=K, R0=R0)
out <- ode(y = init, times = Day, func = SIR2, parms = parameters)
fitInfected <- out[,3]
#fitInfected <- N-out[,2]
RSS <- sum((Infected_MC - fitInfected)^2)
return(RSS)
}
SIRMC <- function(K) {
optimize(SIRMC2, lower=1,upper=10^5,K=K, tol = .Machine$double.eps)$objective
}
# wrapper to optimize and return estimated values
getOptim <- function() {
opt1 <- optimize(SIRMC,lower=0,upper=1, tol = .Machine$double.eps)
opt2 <- optimize(SIRMC2, lower=1,upper=10^5,K=opt1$minimum, tol = .Machine$double.eps)
return(list(RSS=opt2$objective,K=opt1$minimum,R0=opt2$minimum))
}
# starting condition
#init <- c(S = N-Infected[1], I = Infected[1]-R[1]-D[1])
init <- c(S = N-Infected[1], I = Infected[1])
# performing the fit
# starting K=0.3, R0 = 2
Opt2 <- optim(c(0.3, 3), RSS2, method = "L-BFGS-B",
hessian = TRUE, control = list(parscale = c(10^0,10^0), factr = 1))
Opt2
## $par
## [1] 0.3987135 1.0054626
##
## $value
## [1] 6510695
##
## $counts
## function gradient
## 129 129
##
## $convergence
## [1] 52
##
## $message
## [1] "ERROR: ABNORMAL_TERMINATION_IN_LNSRCH"
##
## $hessian
## [,1] [,2]
## [1,] 1.214301e+11 3.058955e+12
## [2,] 3.058955e+12 1.275578e+14
Opt3 <- getOptim()
Opt3
## $RSS
## [1] 5926880
##
## $K
## [1] 0.4020269
##
## $R0
## [1] 1.005332
Opt_par2 <- setNames(Opt2$par, c("K", "R0"))
Opt_par3 <- setNames(Opt3[2:3], c("K", "R0"))
# plotting the result
t <- seq(1,70,0.1) # time in days
fit1 <- data.frame(ode(y = init, times = t, func = SIR , parms = Opt_par))
fit2 <- data.frame(ode(y = init, times = t, func = SIR2, parms = Opt_par2))
fit3 <- data.frame(ode(y = init, times = t, func = SIR2, parms = Opt_par3))
plot(Day,Infected, xlim = c(0,30), ylim = c(0,3*10^4),
log = "", xaxt = "n",
main = "Infected in China (including Recovered and Death)", xlab = "", ylab = "number infected")
lines(t, fit3[,3], col = 1)
lines(t, fit2[,3], col = 4, lty = 2)
lines(t, fit1[,3], col = 2, lty = 3)
axis(1, at = 1:30, labels = rep("",30), tck = -0.01)
axis(1, at = c(1,8,15,22), labels = c("Jan 16", "Jan 23", "Jan 30", "Feb 6"))
text(t[183]+2,fit1[183,3]+1800,"old optim fit",pos=4, col=2)
text(t[183]+2,fit1[183,3],expression(R[0] == 2.07),pos=4, col=2)
text(t[183]+2,fit1[183,3]-1400,expression(RSS == 74.3 %*% 10^6),pos=4, col=2)
text(t[220]+3,fit2[220,3]+3200,"new optim fit",pos=3, col=4)
text(t[220]+3,fit2[220,3]+1400,expression(R[0] == 1.0054626),pos=3, col=4)
text(t[220]+3,fit2[220,3],expression(RSS == 6.5 %*% 10^6),pos=3, col=4)
text(t[240]-3,fit3[240,3],"nested algorithm",pos=1, col=1)
text(t[240]-3,fit3[240,3]+700-2500,expression(R[0] == 1.005332),pos=1, col=1)
text(t[240]-3,fit3[240,3]-700-2500,expression(RSS == 5.9 %*% 10^6),pos=1, col=1)
x1 <- t[240]-3; x2 <- t[225];
y1 <- fit3[240,3]; y2 <- fit3[225,3]
Arrows(x1,y1,x1+(x2-x1)*0.65,y1+(y2-y1)*0.65, col = 1)
x1 <- t[220]+2; x2 <- t[227];
y1 <- fit2[220,3]; y2 <- fit2[227,3]
Arrows(x1,y1,x1+(x2-x1)*0.6,y1+(y2-y1)*0.6, col = 4)
x1 <- t[183]+2; x2 <- t[183];
y1 <- fit1[183,3]; y2 <- fit1[183,3]
Arrows(x1,y1,x1+(x2-x1)*0.6,y1+(y2-y1)*0.6, col = 2)

#################
##
## Plotting the cost function and how
## the optimize approaches the minimum
##
################
# use this value to select between the different figures of the blogpost
figure = 2
# compute loss function on a grid
n=101
K <- seq(0.25,0.45,length.out=n)
R0 <- seq(1.001,1.03,length.out=n) #be sure not to use R0= 1 this gives a devision by 0
if (figure == 2) {
K <- seq(0.34920,0.34924,length.out=n)
R0 <- seq(1.8,2.01,length.out=n)
}
z <- matrix(rep(0,n^2),n)
for (i in 1:n) {
for(j in 1:n) {
z[i,j] <- log(RSS2(c(K[i],R0[j])))
}
}
# levels for plotting
levels <- log(10^seq(6,12,0.5))
key <- head(c(t(matrix(c(10^c(6:12),rep("",7)),7))),-1)
if (figure == 2) {
levels <- seq(18.12349,18.12354,0.000005)
key <- head(c(t(matrix(c(seq(18.12349,18.12354,0.00001),rep("",5)),5))),-1)
}
key
## [1] "18.12349" "" "18.1235" "" "18.12351" "" "18.12352"
## [8] "" "18.12353"
# colours for plotting
colours <- function(n) {hsv(c(seq(0.15,0.7,length.out=n),0),
c(seq(0.2,0.4,length.out=n),0),
c(seq(1,1,length.out=n),0.9))}
# empty plot
ylims = c(1,1.03)
if (figure == 2) {
ylims = c(1.8,2.01)
}
plot(-1000,-1000,
xlab="K",ylab=expression(R[0]),
xlim=range(K),
ylim = ylims
#ylim=range(R0)
)
title("RSS as function of parameters \n and path of optimization algorithm",cex.main=1)
# add contours
.filled.contour(K,(R0),z,
col=colours(length(levels)),
levels=levels)
contour(K,(R0),z,add=1, levels=levels, labels = key)
# compute path
# start value
new=c(0.3, 1.025)
if (figure == 2) {
new = c(0.34920,2) # with this starting condition convergence will be more difficult
}
# make lots of small steps
for (i in 1:20) {
### safe old value
old <- new
### compute 1 step
OptTemp <- optim(new, RSS2, method = "L-BFGS-B", lower = c(0,1.00001),
hessian = TRUE,
control = list(parscale = c(10^0,10^0),
factr = 1,
maxit=1))
OptTemp
new <- OptTemp$par
### plot path
dist <- (old[1]-new[1])^2+(old[2]-new[2])^2
lines(c(old[1],new[1]),(c(old[2],new[2])), col=2)
points(new[1],new[2], col=2, bg = 2, pch = 21, cex = 0.7)
if (dist>0.01^2) {
step <- new-old
Arrows(old[1], old[2], old[1]+step[1]*0.85,old[2]+step[2]*0.85,
col=2,
arr.type="curved",
arr.length=0.25,
lty=1)
}
}
OptTemp
## $par
## [1] 0.3492195 2.0000000
##
## $value
## [1] 74292937
##
## $counts
## function gradient
## 21 21
##
## $convergence
## [1] 52
##
## $message
## [1] "ERROR: ABNORMAL_TERMINATION_IN_LNSRCH"
##
## $hessian
## [,1] [,2]
## [1,] 688553069717 2440483.40
## [2,] 2440483 -18721.76
# make lots of small steps again (but different parscale)
for (i in 1:20) {
### safe old value
old <- new
### compute 1 step
OptTemp <- optim(new, RSS2, method = "L-BFGS-B", lower = c(0,1.00001),
hessian = TRUE, control = list(parscale = c(10^-4,10^-4),
factr = 1,
maxit=1))
# alternatively use: ndeps = c(10^-7,10^-7), parscale = c(1,1)
OptTemp
new <- OptTemp$par
### plot path
dist <- (old[1]-new[1])^2+(old[2]-new[2])^2
lines(c(old[1],new[1]),(c(old[2],new[2])), col=1)
points(new[1],new[2], col=1, bg = 1, pch = 21, cex = 0.7)
if (dist>0.01^2) {
step <- new-old
Arrows(old[1], old[2], old[1]+step[1]*0.85,old[2]+step[2]*0.85,
col=1,
arr.type="curved",
arr.length=0.25,
lty=1)
}
}
OptTemp
## $par
## [1] 0.3492171 1.7865381
##
## $value
## [1] 74290781
##
## $counts
## function gradient
## 4 4
##
## $convergence
## [1] 1
##
## $message
## [1] "NEW_X"
##
## $hessian
## [,1] [,2]
## [1,] 688175746612 4475843.16
## [2,] 4475843 -43287.87
if (figure == 1) {
legend(0.255,1.003, c("path with parscale 1","path with parscale 0.0001"),
col = c(2,1), pch = 21, lty=1, cex = 0.9, pt.bg = c(2,1) , yjust = 0)
}
if (figure == 2) {
legend(0.349201,1.81, c("path with parscale 1","path with parscale 0.0001"),
col = c(2,1), pch = 21, lty=1, cex = 0.9, pt.bg = c(2,1) , yjust = 0)
}

####################
##
## Graph with various values of R0
##
#######################
# starting condition
#init <- c(S = N-Infected[1], I = Infected[1]-R[1]-D[1])
init <- c(S = N-Infected[1], I = Infected[1])
Infected_MC <- Infected
SIRMC3 <- function(R0,K) {
parameters <- c(K=K, R0=R0)
out <- ode(y = init, times = Day, func = SIR2, parms = parameters)
fitInfected <- out[,3]
#fitInfected <- N-out[,2]
RSS <- sum((Infected_MC - fitInfected)^2)
return(RSS)
}
plot(Day,Infected, xlim = c(0,80), ylim = c(1,10^9),
log = "y", xaxt = "n",
main = "scenario's for different R0", xlab = "", ylab = "number infected")
axis(1, at = 1:30, labels = rep("",30), tck = -0.01)
axis(1, at = c(1,8,15,22), labels = c("Jan 16", "Jan 23", "Jan 30", "Feb 6"))
for (i in 1:10) {
R0 <- c(1.005,1.01,1.05,1.1,1.2,1.5,2,2.5,4,20)[i]
K <- optimize(SIRMC3, lower=0,upper=1,R0=R0, tol = .Machine$double.eps)$minimum
parameters <- c(K,R0)
xd <- seq(1,60,0.01)
if (i == 1) {
xd <- seq(1,40,0.01)
}
if (i == 2) {
xd <- seq(1,50,0.01)
}
out <- ode(y = init, times = xd, func = SIR2, parms = parameters)
lines(xd,out[,3])
text(tail(xd,1),tail(out[,3],1),bquote(R[0] == .(R0)), pos =4)
}

#####################
##
## computing variance with Hessian
##
###################
### The output of optim will store valures for RSS and the hessian
mod <- optim(c(0.3, 1.04), RSS2, method = "L-BFGS-B", hessian = TRUE,
control = list(parscale = c(10^-4,10^-4), factr = 1))
# unbiased estimate of standard deviation
# we divide by n-p
# where n is the number of data points
# and p is the number of estimated parameters
sigma_estimate <- sqrt(mod$value/(length(Infected)-2))
# compute the inverse of the hessian
# The hessian = the second order partial derivative of the objective function
# in our case this is the RSS
# we multiply by 1/(2 * sigma^2)
covpar <- solve(1/(2*sigma_estimate^2)*mod$hessian)
covpar
## [,1] [,2]
## [1,] 1.236666e-05 -2.349611e-07
## [2,] -2.349611e-07 9.175736e-09
# [,1] [,2]
#[1,] 1.236666e-05 -2.349611e-07
#[2,] -2.349611e-07 9.175736e-09
## the marginal variance of R0 is then approximately at least
##
covpar[2,2]^0.5
## [1] 9.579006e-05
#####################
##
## computing variance with Monte Carlo method
##
###################
set.seed(1)
# modeled data that we use to repeatedly generate data with noise
Infected_MC <- Infected
Day <- c(1:length(Infected))
modMC <- getOptim()
beginpar <- as.numeric(modMC[2:3]) #getOptim is the nested fitting model defined earlier
names(beginpar) <- c("K", "R0")
beginpar
## K R0
## 0.4020269 1.0053320
modInfected <- data.frame(ode(y = init, times = Day, func = SIR2, parms = beginpar))$I
# use RSS to make a relative noise level
errrate <- modMC$RSS/sum(Infected)
### generate random data with
### the mean of modInfected and
### Noise based on errrate
### Note: this assumes heterogeneous noise
### the Fisher Matrix assumed homogeneous noise (which is probabily less correct)
par <- c(0,0) # this will store the parameter outcome and grow using rbind
for (i in 1:100) {
Infected_MC <- rnorm(length(modInfected),modInfected,(modInfected*errrate)^0.5)
OptMC <- getOptim()
par <- rbind(par,c(OptMC$K,OptMC$R0))
}
par <- par[-1,] # strip the first c(0,0) entry
which(par[,2]>2) # some fits went wrong
## named integer(0)
par <- par[-c(36,55),]
layout(matrix(1:2,1))
plot(par, xlab = "K",ylab=expression(R[0]))
title("Monte Carlo simulation")
cov(par)
## [,1] [,2]
## [1,] 1.534513e-05 -3.27662e-07
## [2,] -3.276620e-07 2.37102e-08
# [,1] [,2]
#[1,] 4.461422e-04 -1.709287e-05
#[2,] -1.709287e-05 1.599932e-06
beta <- par[,1]*par[,2]/(par[,2]-1)
gamma <- par[,1]/(par[,2]-1)
plot(beta,gamma,
xlab = expression(beta), ylab = expression(gamma))

###########################
##
## The alternative model 2
##
############################
##
## 1 Now we make the starting number of infected I(0) and population N(0)
## fitting parameters
## (the population N(0) is fitted indirectly by scaling the outcome by a factor
## note that the equations allow scaling all variables by a fixed factor)
## dS = I * K * (-S/N * R0)/(R0-1)
## dI = I * K * ( S/N * R0 - 1)/(R0-1)
## d(f*S) = (f*I) * K * (-(f*S)/(f*N) * R0)/(R0-1)
## d(f*I) = (f*I) * K * ( (f*S)/(f*N) * R0 - 1)/(R0-1)
## 2 Also we consider the 'Infected' from the data cumulatively
## The data infected is not the infected from the model but
## the number of discovered people
## 3 We introduce a factor to scale the number of computed infected with the observed infected
## This scaling corrects for the idea that not all infectious/infected people are reported
## A 'problem' with this scaling is that it also relates to a different population size N(0)
## So this fitting will not allow to determine the effective population size and the over reporting independently
Infected <- IpRpD
Day <- 1:(length(Infected))
N <- 1400000000 # population of mainland china
Ir <- Infected[1]
Days <- 1:length(Infected)
#### Some exploratory stuff to estimate starting values:
#### This is based on the differential equation in
#### Exact analytical solutions of the Susceptible-Infected-Recovered (SIR) epidemic
# model and of the SIR model with equal death and birth rates
# from Harko, Lobo and Mak
#x0 = N exp(beta/gamma N3)
#ln(x0) = ln(N) + beta/gamma N3
#S/x0 = u
#ln(u) = ln(S/N) - beta/gamma N3
#dS/dt = x0/phi = S(beta (N3-N) -gamma ln(S/N) + beta S)
# function to estimate parameters based on linear least squares fit
# and the *linear* relation between dS and S, S ln(S) and S^2
RSSdiff <- function(N) {
corrInfected <- Infected * c(rep(1.22,27),rep(1,38))
dS <- -diff(corrInfected)
S <- (N-corrInfected[-1])
moddif <- lm(dS ~ 0+I(S)+I(S*log(S/N))+I(S*S))
sum(moddif$residuals^2) # RSS
#moddif$coefficients[3]/moddif$coefficients[2]*N # beta
}
RSSdiff <- Vectorize(RSSdiff)
N <- 10:200*10^4
plot(N,RSSdiff(N),log="")
corrInfected <- Infected * c(rep(1.22,27),rep(1,38))
N <- 320000
dS <- diff(corrInfected)
S <- (N-corrInfected[-1])
Sp <- S
Slog <- S*log(S/N)
S2 <- S*S
moddif <- lm(dS ~ 0+Sp+Slog+S2)
moddif
##
## Call:
## lm(formula = dS ~ 0 + Sp + Slog + S2)
##
## Coefficients:
## Sp Slog S2
## 1.632e+00 1.417e+00 -5.098e-06
sum(moddif$residuals)^2
## [1] 0.00231477
beta <- moddif$coefficients[3]
gamma <- -moddif$coefficients[2]
Rstart <- moddif$coefficients[1]/beta+N # this is an unrealistic Rstart = -130
beta <- beta*N #scaling due to different use of beta in Harko et al
beta/gamma
## S2
## 1.151174
-(beta - gamma)
## S2
## 0.2142512
Rstart
## Sp
## -130.3256
plot(diff(corrInfected))
lines(predict(moddif))

# matrix used for computing the scaling
# we use a different sacaling after the 27th day when reporting changed
X <- cbind(c(rep(1,27),rep(0,max(Days)-27)),
c(rep(0,27),rep(1,max(Days)-27)))
SIR4 <- function(time, state, parameters) {
par <- as.list(c(state, parameters))
with(par, {
dS <- I * K * (-S/Nm * R0/(R0-1))
dI <- I * K * ( S/Nm * R0/(R0-1) - 1/(R0-1))
list(c(dS, dI))
})
}
parameters <- c(0.4,4)
Nm <- 10^5 #using a smaller population than entire China
Istart = 30
# inner optimization loop
# here we optimize K and R0 for a given Nm and Istart
RSS4 <- function(parameters,Nm,Istart) {
parametersx <- c(parameters, Nm, Istart)
names(parametersx) <- c("K", "R0", "Nm", "Istart")
start <- c(parametersx[3]-Infected[1],parametersx[4])
names(start) <- c("S","I")
out <- ode(y = start, times = Day, func = SIR4, parms = parametersx[c(1,2,3)])
fitInfected <- parametersx[3]-out[,2]
## perform additional scaling (you could see this as yet another nested optimization)
Xm <- X*fitInfected
# the limit for a and b is because we should not scale such that the infected become more than obeserved
# we introduced I(0) because of uncertainty in R(0)
modscaled <- nls(Infected ~ a*Xm[,1]+ b* Xm[,2], start = list(a=0.5*Infected[1]/Istart,b=0.5*Infected[1]/Istart),
upper = c(1,2)*Infected[1]/Istart,algorithm= "port")
sum((Infected - predict(modscaled))^2)
}
# outer optimization loop
# here we optimize Istart
# for each Istart we optimize K and R0
# this is very slow but much more robust
RSS4b <- function(parameters) {
Istart <- parameters[1]
RSS4start <- c(0.3,2.5)
OptInner <- optim(RSS4start, RSS4, method = "L-BFGS-B",
Istart = Istart, Nm = Nm,
lower = c(0.1,1.001),
upper = c(1,20),
hessian = TRUE,
control = list(parscale = c(10^-6,10^-6),
factr = 1, maxit= 10^4))
OptInner$value
}
### Doing the outer optimization loop by Optimize
OptOuter <- optimize(RSS4b,interval = c(5,45) )
OptOuter
## $minimum
## [1] 10.83284
##
## $objective
## [1] 31754389
### Doing the outer optimization loop by manual grid search
n=30
Nm <- 10^5 #using a smaller population than entire China
rangeIstart <- 10.83284 + seq(-5,+30,length.out=n)
zRSS <- c(NA,length(rangeIstart)) # empty container for values to be computed
pb <- progress_bar$new(total = n)
for (i in 1:n) {
pb$tick()
zRSS[i] <- RSS4b(rangeIstart[i])
}
# plotting RSS as function of I(0)
plot(rangeIstart,zRSS, log="y", yaxt = "n",
xlab = "I(0)", ylab = "RSS")
title("optimal RSS as function of parameters I(0)",cex.main=1)
### plot a fit with the data for some selected parameters
# Doing model with Nm = 10^5 and Istart = 12
# But multiplied by a factor
# In this way we end up with a scaling in the end of 1
# This is a bit exchangable, correcting for underreporting and for population size is similar
Nm <- 10^5*4.155
Istart <- 10.83284*4.155
RSS4start <- c(0.24,2.5)
Opt4 <- optim(RSS4start, RSS4, method = "L-BFGS-B",
Nm = Nm, Istart = Istart,
lower = c(0.1,1.001),
upper = c(1,20),
hessian = TRUE,
control = list(parscale = c(10^-5,10^-4), ndeps = rep(10^-6,2),
factr = 1000, maxit= 10^4, trace = 2 ))
## N = 2, M = 5 machine precision = 2.22045e-16
##
## iterations 25
## function evaluations 53
## segments explored during Cauchy searches 26
## BFGS updates skipped 0
## active bounds at final generalized Cauchy point 0
## norm of the final projected gradient 0.879169
## final function value 2.94611e+07
##
## final value 29461131.019445
## converged
Opt4
## $par
## [1] 0.217721 1.087240
##
## $value
## [1] 29461131
##
## $counts
## function gradient
## 53 53
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
##
## $hessian
## [,1] [,2]
## [1,] 3065820783377 -985437072814
## [2,] -985437072814 939285382628
# this may give an abnormal termination
# but that could be due to the small factr parameter
# Retrieve optimal parameters and compute curve
parameters <- c(Opt4$par,Nm,Istart)
names(parameters) <- c("K", "R0", "Nm","Istart")
start <- c(parameters[3]-Infected[1],parameters[4])
names(start) <- c("S","I")
result <- ode(y = start, times = Day, func = SIR4, parms = parameters[c(1,2,3)])
parameters
## K R0 Nm Istart
## 2.177210e-01 1.087240e+00 4.155000e+05 4.501045e+01
# scaling
fitInfected <- Nm-result[,2]
Xm <- X*fitInfected
modscaled <- lm(Infected ~ 0 + Xm)
modscaled
##
## Call:
## lm(formula = Infected ~ 0 + Xm)
##
## Coefficients:
## Xm1 Xm2
## 1.001 1.233
modscaled
##
## Call:
## lm(formula = Infected ~ 0 + Xm)
##
## Coefficients:
## Xm1 Xm2
## 1.001 1.233
plot(Infected/Nm*100, log = "", xlim = c(0,70), ylim = c(0,100),
xlab = "day", ylab = "Total people that got sick [%]", xaxt="n")
axis(1, at = c(1,8,15,22,29,36,43), labels = c("Jan 16", "Jan 23", "Jan 30", "Feb 6", "Feb 13", "Feb20", "Feb 27"), cex.axis=0.8)
axis(1, at = c(1:50), labels = rep("",50), cex.axis=0.8, tck=-0.01)
lines((predict(modscaled))/Nm*100)
title("modeling with Ntot = 4.155*10^5, I(0) = 45, K = 0.217721, R0 = 1.087", cex.main = 1)
