library(tidyverse) # useful for general R
library(EpiModel) # EpiModel package
library(parallel) # for parallel processing where applicable
library(plotly) # for interactive plots
library(foreach)
theme_set(theme_bw())
if("igraph" %in% (.packages())){ # igraph sometimes clashes with the statnet and the network package
detach("package:igraph", unload=TRUE)
}
calculate.max.edges = function(n){
return (n*(n-1)/2)
}
This model builds on previous knowledge acquired through weeks of work and therefore does not aim to be an instructive piece for beginers to EpiModel. For the old notebooks that were used to build knowledge please see Archive/DevNotebooks00.
At this time we have individual information on age, gender and aggregate data on ethnicity.
# age and gender data
ageData = read.csv("../Data/age_and_sex.csv") %>% rename(Age = V1, Gender = V2, ID = X)
# camp parameters: hospitalisation rate and fatality rate by Age group, from Nick in AI4GOOD team
campParams = read.csv("../Data/camp_params.csv") %>% rename(Pop_Proportion = Value) %>%
filter(Camp == "Camp_1", Variable == "Population_structure")
campParams$Age = gdata::drop.levels(campParams$Age)
# processing of the age data to be put into 10 year age bins
ageGroups = campParams %>%
select(Age) %>% as.matrix()
# left inclusive and right exclusive cuts
ageData$ageGroup = cut(ageData$Age, breaks = c(0,10,20,30,40,50,60,70,80, Inf))
plot(as.factor(campParams$Age), campParams$Hosp_given_symptomatic,
main = "Hospitalisation rate by age group",
ylab = 'Hospitalisation rate', xlab = 'Age group')
plot(as.factor(campParams$Age), campParams$Critical_given_hospitalised,
main = "Fatality rate by age group",
ylab = 'Fatality rate', xlab = 'Age group')
plot(density(ageData$Age), main = "Distribution of age in Moria camp",
xlab = 'Age', ylab = 'Density')
paramsFromData = list()
paramsFromData$age.dist = ageData$ageGroup
# the two below are in same order as age groups
paramsFromData$rates.byAge = data.frame(AgeGroup = levels(ageData$ageGroup),
hosp.rate = campParams$Hosp_given_symptomatic,
fat.rate = campParams$Critical_given_hospitalised)
Individuals in Moria live in either tents or isoboxes, with a capacity of 4 and 10 respectively. In this model we attribute each person to a housing unit which is represented by a unique name (e.g. tent1, tent2, isobox4…). From the Tucker model, we know that 8100 indviduals occupy isoboxes and 10600 are in tents (total of 18700). We can also use this numbers to make proportions by housing unit and build smaller networks (less individuals).
Note: For convenience, we will define here n, the number of individuals in the simulated network. In experiments, this number equals to number of individuals in Moria.
n = 50
prop.isobox = round(8100/(8100+10600),2) # prop. of people in isobox
prop.tent = 1 - prop.isobox # prop. of people in tent
cat(paste0("Proportion of people in isoboxes: ", prop.isobox, "\n",
"Proportion of people in tents:", prop.tent, "\n"))
## Proportion of people in isoboxes: 0.43
## Proportion of people in tents:0.57
The number of tents and the number of isoboxes are calculated as: \[N_{tents} = \frac{\text{Number of people in tents}}{\text{tent capacity}} = \frac{n\cdot Prop_{tents}}{tent_{capacity}}\\ N_{isobox} = \frac{\text{Number of people in isoboxes}}{\text{isobox capacity}}= \frac{n\cdot Prop_{isobox}}{isobox_{capacity}}\]
Note: this are rounded to get integer values.
tent.capacity = 4
iso.capacity = 10
num_of_iso = round(n*prop.isobox/iso.capacity) # number of isoboxes
num_of_tents = round(n*prop.tent/tent.capacity) # number of tents
if (n > num_of_iso*iso.capacity+num_of_tents*tent.capacity){
num_of_tents = num_of_tents+1
}
num_in_iso = num_of_iso*iso.capacity
num_in_tents = n - num_in_iso # num_of_tents*tent.capacity
cat(paste0(
"Number of tents: ", num_of_tents, "\n",
"Number of isoboxes: ", num_of_iso, "\n",
"Number of people in tents: ", num_in_tents, "\n",
"Number of people in isoboxes: ", num_in_iso, "\n\n",
"Number of housing units in total: ", num_of_iso+num_of_tents))
## Number of tents: 8
## Number of isoboxes: 2
## Number of people in tents: 30
## Number of people in isoboxes: 20
##
## Number of housing units in total: 10
iso_ids = 1:num_of_iso
tent_ids = 1:num_of_tents
housing_iso = apportion_lr(
vector.length = num_in_iso,
values = iso_ids,
proportions = rep(1/(num_of_iso), num_of_iso)
)
housing_iso = paste0("iso", housing_iso)
housing_tents = apportion_lr(
vector.length = num_in_tents,
values = tent_ids,
proportions = rep(1/(num_of_tents), num_of_tents)
)
housing_tents = paste0("tent", housing_tents)
housing = c(housing_iso,housing_tents)
# residence vector to keep track of those in tents and isos
residence = c(rep("iso", num_in_iso), rep("tent", num_in_tents))
plot(table(housing),
main = "Number of people per housing unit",
ylab = "Number of people",
xlab = "Housing ID",
)
grid()
nw = network.initialize(n = n, directed = FALSE)
# housing
nw = set.vertex.attribute(nw, "housing", housing)
# age
nw = set.vertex.attribute(nw, "age", sample(as.vector(paramsFromData$age.dist),n))
formation = ~edges +
offset(nodematch("housing", diff = F))
## max in-house ties
max.inhouse.edges = 0
for (num_in_house in table(housing)){
max.inhouse.edges = max.inhouse.edges + calculate.max.edges(num_in_house)
}
# default degrees in housing units as per current occupancy
## The degree of a node in a network is the number of connections it has to other nodes
# the -1 is tou account fr the lack of connection to one-self
iso.default.degree = mean(table(housing)[iso_ids]-1)
tent.default.degree = mean(table(housing)[tent_ids]-1)
# number of external contacts per person in average
external.contacts = 4
mean_degree.iso = iso.default.degree + external.contacts
mean_degree.tent = tent.default.degree + external.contacts
# calculate mean degree
mean_degree = (num_in_iso*mean_degree.iso+
num_in_tents*mean_degree.tent)/n
expected.edges = n*mean_degree/2
target.stats = c(expected.edges)
d.rate = 0
coef.diss = dissolution_coefs(dissolution = ~offset(edges)+
offset(nodematch("housing", diff = F)),
duration = c(2, 1e9),
d.rate = d.rate)
est <- netest(nw,
formation,
target.stats,
coef.diss,
coef.form = Inf,
set.control.ergm = control.ergm(MCMLE.maxit = 500)
)
summary(est)
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: TARGET_STATS ~ edges + offset(nodematch("housing", diff = F))
## <environment: 0x7f9727be7f20>
##
## Iterations: 5 out of 500
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -2.04217 0.09425 0 -21.67 <1e-04 ***
## offset(nodematch.housing) Inf 0.00000 0 Inf <1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log-likelihood was not estimated for this fit.
## To get deviances, AIC, and/or BIC from fit `object$fit` run
## > object$fit<-logLik(object$fit, add=TRUE)
## to add it to the object or rerun this function with eval.loglik=TRUE.
##
## The following terms are fixed by offset and are not estimated:
## offset(nodematch.housing)
##
##
## Dissolution Coefficients
## =======================
## Dissolution Model: ~offset(edges) + offset(nodematch("housing", diff = F))
## Target Statistics: 2 1e+09
## Crude Coefficient: 0 20.72327
## Mortality/Exit Rate: 0
## Adjusted Coefficient: 0 20.72327
# mcmc.diagnostics(est1$fit)
Do our network simulation match our target statistics?
cores = parallel::detectCores()-1
dx <- netdx(est,
nsims = 1e3,
nsteps = 90,
ncores = cores,
dynamic = FALSE,
nwstats.formula = ~edges + nodematch("housing", diff = FALSE),
set.control.ergm = control.simulate.ergm(MCMC.burnin = 1e6),
keep.tnetwork = T)
##
## Network Diagnostics
## -----------------------
## - Simulating 1000 networks
## - Calculating formation statistics
print(dx) # diagnostics table
## EpiModel Network Diagnostics
## =======================
## Diagnostic Method: Static
## Simulations: 1000
##
## Formation Diagnostics
## -----------------------
## Target Sim Mean Pct Diff Sim SD
## edges 257.5 257.205 -0.115 10.745
## offset(nodematch.housing) NA 132.000 NA 0.000
## nodematch.housing NA 132.000 NA 0.000
plot(dx) # diagnostics plot
First we import our custom modules
source("../Scripts/SEIQHRFNetModules.R")
Next we set our epidemic parameters
param = param.net(act.rate.se = 10,
inf.prob.se = 0.02,
act.rate.si = 10,
inf.prob.si = 0.05,
act.rate.sq = 2.5,
inf.prob.sq = 0.02,
ei.rate = 1/10,
iq.rate = 1/30, #c(rep(1/30, 60), rep(15/30, 120)), # time varying works
ih.rate = 1/100,
qh.rate = 1/100,
hr.rate = 1/15,
qr.rate = 1/20,
hf.rate = 0,#1/50,
hf.rate.overcap = 0,# 1/25,
hosp.cap = 5,
hosp.tcoeff = 0.5,
a.rate = 0,
di.rate = d.rate,
ds.rate = d.rate,
dr.rate = d.rate,
ratesbyAge = paramsFromData$rates.byAge
)
init = init.net(i.num = 3,
r.num = 0,
e.num = 0,
s.num = n - 3,
f.num = 0,
h.num = 0,
q.num = 0
)
print(Sys.time()-t0) # roughly 30 minutes for n = 1000 and nsteps = 60
## Time difference of 1.588885 mins
res = as.data.frame(sim)
nw_sim <- get_network(sim)
df_sim <- as.data.frame(nw_sim)
color <- nw_sim %v% "housing"
summary(df_sim$duration[which(color[df_sim$head] == color[df_sim$tail])])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 33.00 90.00 66.36 90.00 90.00
summary(df_sim$duration[which(color[df_sim$head] != color[df_sim$tail])])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 1.000 1.978 2.000 13.000
ggplotly(
res %>% select(s.num, e.num, i.num, q.num, h.num, r.num, f.num, num, time) %>%
group_by(time) %>% summarise_all(~mean(.)) %>%
pivot_longer(-time) %>% ggplot(aes(x = time, y = value, color = name))+
geom_line(size = 1)+scale_color_brewer(palette = "Set1")
)
get_times <- function(simulation.object) {
sim <- simulation.object
for (s in 1:sim$control$nsims) {
if (s == 1) {
times <- sim$times[[paste0("sim", s)]]
times <- times %>% mutate(s = s)
} else {
times <- times %>% bind_rows(sim$times[[paste("sim",
s, sep = "")]] %>% mutate(s = s))
}
}
times <- times %>%
mutate(infTime = ifelse(infTime < 0, -5, infTime),
expTime = ifelse(expTime < 0, -5, expTime)) %>%
mutate(incubation_period = infTime - expTime,
illness_duration = recTime - expTime,
illness_duration_hosp = dischTime - expTime,
hosp_los = dischTime - hospTime,
quarantine_delay = quarTime - infTime,
survival_time = fatTime - infTime) %>%
select(s,
incubation_period,
quarantine_delay,
illness_duration,
illness_duration_hosp,
hosp_los,
survival_time) %>%
pivot_longer(-s, names_to = "period_type", values_to = "duration") %>%
mutate(period_type = factor(period_type,
levels = c("incubation_period",
"quarantine_delay",
"illness_duration",
"illness_duration_hosp",
"hosp_los",
"survival_time"),
labels = c("Incubation period",
"Delay entering isolation",
"Illness duration",
"Illness duration (hosp)",
"Hospital care required duration",
"Survival time of case fatalities"),
ordered = TRUE))
return(times)
}
times = get_times(sim)
times %>% filter(duration <= 30) %>% ggplot(aes(x = duration)) +
geom_density() + facet_wrap(period_type ~ ., scales = "free_y") +
labs(title = "Duration frequency distributions", subtitle = "Baseline simulation")
As a heatmap
nw_object = get_network(sim)
net_at_1 = network.collapse(nw_object, at = 2)
graph = intergraph::asIgraph(net_at_1)
adj = igraph::as_adjacency_matrix(graph, sparse = F)
colnames(adj) = igraph::V(graph)$housing
rownames(adj) = igraph::V(graph)$housing
adj = adj[order(rownames(adj)), order(colnames(adj))]
pheatmap::pheatmap(adj,
color = c("grey50","black"),
border_color = "white",
angle_col = 45,
angle_row = 45,
fontsize = 6,
legend_breaks = c(0,1),
legend = F,
cluster_rows = F,
cluster_cols = F,
show_rownames = ifelse(n<100, T, F),
show_colnames = ifelse(n<100, T, F))
library(animation)
nw_object = get_network(sim)
ani.record(reset = TRUE) # clear history before recording
for (at in c(1:30)){
net_at = network.collapse(nw_object, at = at)
graph = intergraph::asIgraph(net_at)
adj = igraph::as_adjacency_matrix(graph, sparse = F)
colnames(adj) = igraph::V(graph)$housing
rownames(adj) = igraph::V(graph)$housing
adj = adj[order(rownames(adj)), order(colnames(adj))]
pheatmap::pheatmap(adj,
color = c("grey50","black"),
border_color = "white",
angle_col = 45,
angle_row = 45,
fontsize = 6,
legend_breaks = c(0,1),
legend = F,
cluster_rows = F,
cluster_cols = F,
show_rownames = ifelse(n<100, T, F),
show_colnames = ifelse(n<100, T, F))
ani.record()
}
oopts = ani.options(interval = 0.5)
# ani.replay()
# saveHTML(ani.replay(), img.name = "record_plot")