PubH 7462 - Homework 4

Code

library(tidyverse)
library(knitr)
library(beepr)
library(doParallel)
library(foreach)
library(parallel)

SimEpidemic <- function(N, init.p, infectivity, daily.contacts, eval = FALSE) {
  # A function for simulating a flu epidemic given parameters
  #
  # Args:
  #   N: the number of students in the school
  #   init.p: the proportion of students who are infected with flu on the first
  #           day of the week
  #   infectivity: the probability that an infected student transmits flu to a 
  #                student they have contact with
  #   daily.contacts: the number of students that each student has contact with 
  #                   on a given day. For simplicity, you may assume that each 
  #                   student's daily contacts are selected from the student 
  #                   population at random, without replacement
  #                
  # Returns:
  #   N.cases: a vector of length 7 that counts the total number of infected 
  #            individuals in the school on each day with in the school on 
  #            each day
  #            
  #   N.infections: a vector of length 7 that counts the total number of new
  #                 flu cases in the school on each day
  #     N.infected: total number of individuals who were infected over one week
  
  if (init.p < 0 | init.p > 1 ) {
    stop("Parameter init.p must be between 0 and 1.")
  } else {if(daily.contacts > N) { 
    stop("Parameter daily.contacts must be less than or equal to N.")
    }}
  
  #Establishing simulation parameters
  iN.infected <- N * init.p
  
  #Initializing health status of subject population
  individuals_dat <- data.frame(id = c(1:N), 
                                 Day.1 = c(rep(0, times = N-iN.infected), 
                                           rep(1, times = iN.infected)), 
                                 Day.2 = NA, Day.3 = NA, Day.4 = NA, 
                                 Day.5 = NA, Day.6 = NA, Day.7 = NA)
  
  #Initialize subject contact matrix
  contact_matrix <- diag(N)
  
  Individual_Proc <- function(day, sub_id, ...) {
  
  library(tidyverse)
  day <- day + 1
    
  #If it is beginning of day
  if(sub_id == 1) {
  #Reset subject contact matrix at start of each day
    contact_matrix <<- diag(N)
  }

  #Only executes if sub_id is for infected individual
  if ((sub_id %in% individuals_dat[which(individuals_dat[, day] == 1), c("id")]) & 
       (sum(contact_matrix[sub_id, ]) <= daily.contacts)) {
    
    #Define students who are candidates for potential contact
    # excludes subject's self, num_contacts number of others who
    # they already had contact with, and others who have already had 
    # their max contact for the day
    eligible <- individuals_dat[which(rowSums(contact_matrix) <= daily.contacts & 
                           contact_matrix[sub_id, ] == FALSE),]
    
    #Finds number of contacts individual has already had (excluding with their self)
    num_contacts <- sum(contact_matrix[sub_id, ]) - 1
    
    #Creates vector of contact ids who potentially can be infected the next day
    contacts <- sample_n(eligible, size = min(daily.contacts-num_contacts, nrow(eligible)), replace = FALSE)
    
    #Updates contact matrix based on new connections
    contact_matrix[contacts$id, sub_id] <<- 1
    contact_matrix[sub_id, contacts$id] <<- 1
    
    #Counts number of contacts who are currently infected
    num.inf.contacts <- contacts[which(contacts[, day] == 1), day] %>% length()
    
    #Sets current individual to be infected next day if any of their contacts infects them
    if (rbinom(n = 1, size = num.inf.contacts, prob = infectivity) > 0) {
      individuals_dat[which(individuals_dat$id == sub_id), day+1] <<- 1
    }
    
    #Determines number of candidate individuals who will be infected the next day (if any)
    infNext_num <- rbinom(n = 1, size = length(contacts$id), prob = infectivity)
    
    #Selects infNext_num number of individuals to be infected the next day
    if (infNext_num > 0) {
      individuals_dat[contacts$id[1:infNext_num], day+1] <<- 1
    }
  }
  }
  
  #Executing Individual_Proc for each student each day
  days <- 1:6
  sub_ids <- 1:N
  
  ind.inputs <- expand.grid(sub_ids, days)
  
  invisible(mapply(FUN = Individual_Proc, sub_id = ind.inputs[, 1],
         day = ind.inputs[, 2]))
  
  #Summarizing data to return
  individuals_dat[is.na(individuals_dat)] <- 0
  N.cases <- individuals_dat %>% select(-id) %>% summarise_all(funs(sum))
  N.infections <- N.cases
  
  temp <- individuals_dat %>% select(-id) %>% rowSums()
  N.infected <- sum(temp > 0)
  return(list(N.cases, N.infections, N.infected))
  
}

#Task 1
Ns <- c(100, 2500)
init.ps <- c(0.10, 0.20, 0.50)
infectivitys <- 0.10
daily.contactss <- 10
  
inputs <- expand.grid(Ns, init.ps, infectivitys, 
                        daily.contactss)

### Use foreach and doParallel to do this on Windows (and on UNIX and Mac)
ncores <- detectCores()-1 #Set ncores to something less than or equal to available cores
cl <- makeCluster(ncores) # creates a cluster with <ncore> cores
registerDoParallel(cl) # register the cluster

results <- foreach(j = 1:3) %dopar% {

  set.seed(j)
return(mapply(FUN = SimEpidemic, N = inputs[, 1], init.p = inputs[, 2],
                    infectivity = inputs[, 3], daily.contacts = inputs[, 4]))

}

stopCluster(cl) #Stop executing in parallel

ResultsSummary <- function(i) {
  Results <- results[i]
return(data.frame(N = inputs[, 1], init.p = inputs[, 2],
           Avg.N.infected = unlist(Results[seq(from = 3, 
                                               to = length(Results), by = 3)])))
}

Results <- map_df(.f = ResultsSummary, .x = list(1:3)) %>% group_by(N, init.p) %>% 
  summarise(Avg.N.Infected = mean(Avg.N.infected), 
            SD = sd(Avg.N.infected)) %>% kable()


#Task 3
Ns <- 500
init.ps <- 0.20
infectivitys <- c(0.20, 0.50, 0.80)
daily.contactss <- c(10, 25)

inputs2 <- expand.grid(Ns, init.ps, infectivitys, 
                        daily.contactss)

Plot_results <- mapply(FUN = SimEpidemic, N = inputs2[, 1], init.p = inputs2[, 2],
                    infectivity = inputs2[, 3], daily.contacts = inputs2[, 4])

Plot_Dat <- cbind(inputs2, do.call("rbind", 
                                   Plot_results[c(seq(from = 1, 
                                                      to = length(Plot_results)-2, 
                                                      by = 3))]))
Plot_Dat <- reshape(Plot_Dat, varying = 5:11, timevar = "Day", idvar = "id", direction = "long")

colnames(Plot_Dat) <- c("N", "init.p", "infectivity", 
                        "daily.contacts", "Number.infected", "Day")

gplot <- ggplot(data = Plot_Dat) + 
    geom_point(mapping = aes(x=Day, y =Number.infected, 
                             color = daily.contacts, size = infectivity))

beepr::beep(8)

Reflection on Simulation

1)

For flu transmission, my simulation essentially considered one day at a time (for days 1 through 7), and one subject at a time within each day (for subjects 1 through \(N\)). In order to reduce the amount of computation time, I wrote my code so it would only have to execute if the subject was infected already for that given day. Despite these efforts among many others to speed my code up, overall my simulation runs quite slowly (\(\sim 30-40\) minutes) but still correctly. As such the knitted tables are not included, but I do have many comments through my script explaining exactly what each part does. I am not sure why my code was not efficient, but I would like to learn a more efficient way of tackling this problem.

2)

The tables from Task #2 were outputted using knitr.

3)

The plot from Task #3 is displayed below.

Plot for Task 3

PubH 7462 - Homework 4

Andrew DiLernia

February 16, 2018

Code

Reflection on Simulation

1)

2)

3)