Code
library(tidyverse)
library(knitr)
library(beepr)
library(doParallel)
library(foreach)
library(parallel)
SimEpidemic <- function(N, init.p, infectivity, daily.contacts, eval = FALSE) {
# A function for simulating a flu epidemic given parameters
#
# Args:
# N: the number of students in the school
# init.p: the proportion of students who are infected with flu on the first
# day of the week
# infectivity: the probability that an infected student transmits flu to a
# student they have contact with
# daily.contacts: the number of students that each student has contact with
# on a given day. For simplicity, you may assume that each
# student's daily contacts are selected from the student
# population at random, without replacement
#
# Returns:
# N.cases: a vector of length 7 that counts the total number of infected
# individuals in the school on each day with in the school on
# each day
#
# N.infections: a vector of length 7 that counts the total number of new
# flu cases in the school on each day
# N.infected: total number of individuals who were infected over one week
if (init.p < 0 | init.p > 1 ) {
stop("Parameter init.p must be between 0 and 1.")
} else {if(daily.contacts > N) {
stop("Parameter daily.contacts must be less than or equal to N.")
}}
#Establishing simulation parameters
iN.infected <- N * init.p
#Initializing health status of subject population
individuals_dat <- data.frame(id = c(1:N),
Day.1 = c(rep(0, times = N-iN.infected),
rep(1, times = iN.infected)),
Day.2 = NA, Day.3 = NA, Day.4 = NA,
Day.5 = NA, Day.6 = NA, Day.7 = NA)
#Initialize subject contact matrix
contact_matrix <- diag(N)
Individual_Proc <- function(day, sub_id, ...) {
library(tidyverse)
day <- day + 1
#If it is beginning of day
if(sub_id == 1) {
#Reset subject contact matrix at start of each day
contact_matrix <<- diag(N)
}
#Only executes if sub_id is for infected individual
if ((sub_id %in% individuals_dat[which(individuals_dat[, day] == 1), c("id")]) &
(sum(contact_matrix[sub_id, ]) <= daily.contacts)) {
#Define students who are candidates for potential contact
# excludes subject's self, num_contacts number of others who
# they already had contact with, and others who have already had
# their max contact for the day
eligible <- individuals_dat[which(rowSums(contact_matrix) <= daily.contacts &
contact_matrix[sub_id, ] == FALSE),]
#Finds number of contacts individual has already had (excluding with their self)
num_contacts <- sum(contact_matrix[sub_id, ]) - 1
#Creates vector of contact ids who potentially can be infected the next day
contacts <- sample_n(eligible, size = min(daily.contacts-num_contacts, nrow(eligible)), replace = FALSE)
#Updates contact matrix based on new connections
contact_matrix[contacts$id, sub_id] <<- 1
contact_matrix[sub_id, contacts$id] <<- 1
#Counts number of contacts who are currently infected
num.inf.contacts <- contacts[which(contacts[, day] == 1), day] %>% length()
#Sets current individual to be infected next day if any of their contacts infects them
if (rbinom(n = 1, size = num.inf.contacts, prob = infectivity) > 0) {
individuals_dat[which(individuals_dat$id == sub_id), day+1] <<- 1
}
#Determines number of candidate individuals who will be infected the next day (if any)
infNext_num <- rbinom(n = 1, size = length(contacts$id), prob = infectivity)
#Selects infNext_num number of individuals to be infected the next day
if (infNext_num > 0) {
individuals_dat[contacts$id[1:infNext_num], day+1] <<- 1
}
}
}
#Executing Individual_Proc for each student each day
days <- 1:6
sub_ids <- 1:N
ind.inputs <- expand.grid(sub_ids, days)
invisible(mapply(FUN = Individual_Proc, sub_id = ind.inputs[, 1],
day = ind.inputs[, 2]))
#Summarizing data to return
individuals_dat[is.na(individuals_dat)] <- 0
N.cases <- individuals_dat %>% select(-id) %>% summarise_all(funs(sum))
N.infections <- N.cases
temp <- individuals_dat %>% select(-id) %>% rowSums()
N.infected <- sum(temp > 0)
return(list(N.cases, N.infections, N.infected))
}
#Task 1
Ns <- c(100, 2500)
init.ps <- c(0.10, 0.20, 0.50)
infectivitys <- 0.10
daily.contactss <- 10
inputs <- expand.grid(Ns, init.ps, infectivitys,
daily.contactss)
### Use foreach and doParallel to do this on Windows (and on UNIX and Mac)
ncores <- detectCores()-1 #Set ncores to something less than or equal to available cores
cl <- makeCluster(ncores) # creates a cluster with <ncore> cores
registerDoParallel(cl) # register the cluster
results <- foreach(j = 1:3) %dopar% {
set.seed(j)
return(mapply(FUN = SimEpidemic, N = inputs[, 1], init.p = inputs[, 2],
infectivity = inputs[, 3], daily.contacts = inputs[, 4]))
}
stopCluster(cl) #Stop executing in parallel
ResultsSummary <- function(i) {
Results <- results[i]
return(data.frame(N = inputs[, 1], init.p = inputs[, 2],
Avg.N.infected = unlist(Results[seq(from = 3,
to = length(Results), by = 3)])))
}
Results <- map_df(.f = ResultsSummary, .x = list(1:3)) %>% group_by(N, init.p) %>%
summarise(Avg.N.Infected = mean(Avg.N.infected),
SD = sd(Avg.N.infected)) %>% kable()
#Task 3
Ns <- 500
init.ps <- 0.20
infectivitys <- c(0.20, 0.50, 0.80)
daily.contactss <- c(10, 25)
inputs2 <- expand.grid(Ns, init.ps, infectivitys,
daily.contactss)
Plot_results <- mapply(FUN = SimEpidemic, N = inputs2[, 1], init.p = inputs2[, 2],
infectivity = inputs2[, 3], daily.contacts = inputs2[, 4])
Plot_Dat <- cbind(inputs2, do.call("rbind",
Plot_results[c(seq(from = 1,
to = length(Plot_results)-2,
by = 3))]))
Plot_Dat <- reshape(Plot_Dat, varying = 5:11, timevar = "Day", idvar = "id", direction = "long")
colnames(Plot_Dat) <- c("N", "init.p", "infectivity",
"daily.contacts", "Number.infected", "Day")
gplot <- ggplot(data = Plot_Dat) +
geom_point(mapping = aes(x=Day, y =Number.infected,
color = daily.contacts, size = infectivity))
beepr::beep(8)
Reflection on Simulation
1)
For flu transmission, my simulation essentially considered one day at a time (for days 1 through 7), and one subject at a time within each day (for subjects 1 through \(N\)). In order to reduce the amount of computation time, I wrote my code so it would only have to execute if the subject was infected already for that given day. Despite these efforts among many others to speed my code up, overall my simulation runs quite slowly (\(\sim 30-40\) minutes) but still correctly. As such the knitted tables are not included, but I do have many comments through my script explaining exactly what each part does. I am not sure why my code was not efficient, but I would like to learn a more efficient way of tackling this problem.
2)
The tables from Task #2 were outputted using knitr
.
3)
The plot from Task #3 is displayed below.
Plot for Task 3