postsimscript

library(data.table)
library(shape)
library(reshape2)
library(nlme)
library(phylobase)

## Loading required package: grid

#library(sna)
library(ggplot2)
library(stringr)
library(plyr)

#library(sqldf)

# We want to replace the default age distribution with one that is consistent with the non-HIV-related mortality hazard.
# mortality.normal.weibull.shape                               = 4
# mortality.normal.weibull.scale                               = 70
# mortality.normal.weibull.genderdiff                          = 0 (default is 5 and will need to be changed to 0 further down when the cfg object is created)

agedist.create <- function(shape = 5, scale = 65){
  agebins <- seq(0.5, 99.5, 1)
  probofstillalive <- 1 - pweibull(agebins, shape = shape, scale = scale)
  #plot(agebins,probofstillalive, type="l")
  #meanweib <- 70 * gamma(1+ (1/4))
  fractionsinagebins <- 100 * probofstillalive/sum(probofstillalive)
  simple.age.data.frame <- data.frame(Age = c(agebins, 100.5), Percent.Male = c(fractionsinagebins, 0), Percent.Female = c(fractionsinagebins, 0))
  
  return(simple.age.data.frame)
}



simpact.run.wrapper <- function(configParams, destDir, agedist = "${SIMPACT_DATA_DIR}sa_2003.csv", intervention = NULL, release = TRUE, slowalg = FALSE, parallel=FALSE, seed=-1, dryrun = FALSE, identifierFormat = "%T-%y-%m-%d-%H-%M-%S_%p_%r%r%r%r%r%r%r%r-" )
{
  cfg <- configParams
  if (is.null(cfg))
    cfg <- list()
  
  intv <- intervention
  if (is.null(intv))
    intv <- list()
  
  ret <- simpact.run(cfg, destDir, agedist, intv, release, slowalg, parallel, seed, dryrun, identifierFormat)
  
  inputParams <- list()
  inputParams["config"] <- list(cfg)
  inputParams["destination"] <- destDir
  inputParams["agedist"] <- agedist
  inputParams["intervention"] <- list(intv)
  inputParams["alg.release"] <- release
  inputParams["alg.slow"] <- slowalg
  inputParams["seed"] <- seed
  inputParams["dryrun"] <- dryrun
  inputParams["identifierFormat"] <- identifierFormat
  
  ret["input.params"] <- list(inputParams)
  return(ret)
}

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

readthedata <- function(modeloutput){
  path <- as.character(modeloutput["outputfile"])
  outputID <- as.character(modeloutput["id"])
  DestDir <- sub(pattern = paste0(outputID, "output.txt"), replacement = "", x = path, fixed=T)
  personlogfilename <- paste0(DestDir, outputID, "personlog.csv")
  relationlogfilename <- paste0(DestDir, outputID, "relationlog.csv")
  eventlogfilename <- paste0(DestDir, outputID, "eventlog.csv")
  treatmentlogfilename <- paste0(DestDir, outputID, "treatmentlog.csv")
  periodiclogfilename <- paste0(DestDir, outputID, "periodiclog.csv")
    
  ptable <- fread(personlogfilename, sep = ",", skip = 0)
  rtable <- fread(relationlogfilename, sep = ",", skip = 0)
  etable <- fread(eventlogfilename, sep = ",", skip = 0)
  setnames(etable, c("V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10"),
           c("eventtime", "eventname", "p1name", "p1ID", "p1gender", "p1age", "p2name", "p2ID", "p2gender", "p2age"))
  ttable <- fread(treatmentlogfilename, sep  = ",", skip = 0)

  if (file.exists(periodiclogfilename)){
    ltable <- fread(periodiclogfilename, sep = ",", skip = 0)
    outputtables <- list(ptable = ptable, rtable = rtable, etable = etable, ttable = ttable, ltable = ltable)
  } else {
    outputtables <- list(ptable = ptable, rtable = rtable, etable = etable, ttable = ttable)
  }
  return(outputtables)

}

####
# To create heatplot, we need to loop through time steps, and calculate age- and gender-specific prevalence and incidence rates.
prevalenceheatmapdata <- function(modeloutput){ #(DT = datalist$ptable, cfg = configfile){
  datalist <- readthedata(modeloutput)
  DT <- datalist$ptable
  # inp <- modeloutput[["input.params"]][["config"]]
  # cfg <- inp[["config"]] # Should be 'cfg' again
  cfg <- modeloutput[["input.params"]][["config"]]
  output <- data.table()
  timestep.width <- 1
  decplaces <- decimalplaces(timestep.width)
  timesteps <- seq(as.numeric(cfg["hivseed.time"]), as.numeric(cfg["population.simtime"]), by = timestep.width)
  for (time_i in timesteps){
    DTalive.infected <- alive.infected(DT = datalist$ptable, time = time_i, dec.places = decplaces) # First we only take the data of people who were alive at time_i
    Prevalencetimestepdata <- DTalive.infected[ ,Prevalence := sum(Infected) / sum(!is.na(Infected)),by = "Gender,Age"] # And we calculate HIV prevalence, by gender and age group
    #setnames(Prevalencetimestepdata, "V1", "Prevalence")
    Prevalencetimestepdata <- cbind(time_i, Prevalencetimestepdata)
    output <- rbind(output, Prevalencetimestepdata)
  }
  return(output)
}

incidenceheatmapdata <- function(modeloutput){ #DTP = datalist$ptable, DTE = datalist$etable, cfg = configfile){
  datalist <- readthedata(modeloutput)
  DTP <- datalist$ptable
  DTE <- datalist$etable
  cfg <- modeloutput[["input.params"]][["config"]]
  output <- data.table()
  timestep.width <- 1
  decplaces <- decimalplaces(timestep.width)
  timesteps <- seq(as.numeric(cfg["hivseed.time"]), as.numeric(cfg["population.simtime"]), by = timestep.width)
  for (time_i in head(timesteps, -1)){
    DTcases <- incidentcase(DT = DTE, interval = c(time_i, (time_i + timestep.width)))  # New infections that happened at time x with: time_i < x <= time_(i+1)
    DTnonHIVdeaths <- nonHIVdeath(DT = DTE, interval = c(time_i, (time_i + timestep.width))) # New non-HIV deaths that happened at time x with: time_i < x <= time_(i+1)
    DTcases$ID <- DTcases$p2ID
    DTcases$Gender <- DTcases$p2gender
    DTnonHIVdeaths$ID <- DTnonHIVdeaths$p1ID
    DTnonHIVdeaths$Gender <- DTnonHIVdeaths$p1gender
    
    # Personyears at risk during the interval
    DTalive.infected <- alive.infected(DT = DTP, time = time_i, dec.places = decplaces) # First we only take the data of people who were alive at time_i
    DTalive.uninfected <- DTalive.infected[Infected == "FALSE"] # We only keep those that were not infected at the start of the interval
    # A merge with etable will find those that died and/or got infected during the interval
    # To do this, we need to add a key to DTalive.uninfected
    DTPkeycols <- c("ID", "Gender")
    DTcaseskeycols <- c("ID", "Gender") # The ID and gender of the newly infected person  
    DTnonHIVdeathskeycols <- c("ID", "Gender") # The ID and gender of the newly died person (non-HIV death)
    setkeyv(DTalive.uninfected, DTPkeycols)
    setkeyv(DTcases, DTcaseskeycols)
    setkeyv(DTnonHIVdeaths, DTnonHIVdeathskeycols)
    DTforincidencecalc_a <- merge(DTalive.uninfected, DTcases, all.x=TRUE)
    DTforincidencecalc <- merge(DTforincidencecalc_a, DTnonHIVdeaths, all.x=TRUE, suffixes = c(".incident", ".dead"))
    DTforincidencecalc$eventtime.incident[is.na(DTforincidencecalc$eventtime.incident)] <- time_i + timestep.width
    DTforincidencecalc$eventtime.dead[is.na(DTforincidencecalc$eventtime.dead)] <- time_i + timestep.width
    
    DTforincidencecalc$PYend <- pmin(DTforincidencecalc$eventtime.incident, DTforincidencecalc$eventtime.dead)
    DTforincidencecalc$PY <- DTforincidencecalc$PYend - time_i
    
    Incidencetimestepdata <- DTforincidencecalc[ ,Incidence_G_A := sum(eventname.incident=="transmission", na.rm = TRUE) / sum(PY),by = "Gender,Age"] # And we calculate HIV Incidence, by gender and age group
    Incidencetimestepdata <- DTforincidencecalc[ ,PY_G_A := sum(PY),by = "Gender,Age"] # And we calculate PY, by gender and age group
    Incidencetimestepdata <- DTforincidencecalc[ ,Incidence_G := sum(eventname.incident=="transmission", na.rm = TRUE) / sum(PY),by = "Gender"] # And we calculate HIV Incidence, by gender and age group
    Incidencetimestepdata <- DTforincidencecalc[ ,PY_G := sum(PY),by = "Gender"] # And we calculate PY, by gender and age group
    Incidencetimestepdata <- DTforincidencecalc[ ,Incidence_All := sum(eventname.incident=="transmission", na.rm = TRUE) / sum(PY)] # And we calculate HIV Incidence, by gender and age group
    Incidencetimestepdata <- DTforincidencecalc[ ,PY_All := sum(PY)] # And we calculate PY, by gender and age group
    # (so we can be selective when plotting, and not plot if PY per age-gender group is too low)
    # setnames(Incidencetimestepdata, "V1", "Incidence") # Cases per personyear at risk
    Incidencetimestepdata <- cbind(time_i, Incidencetimestepdata)
    output <- rbind(output, Incidencetimestepdata)
  }
  incid.table <- unique(subset(output, select = c(time_i, Incidence_All, PY_All)))
  incid.table$cases <- incid.table$Incidence_All * incid.table$PY_All
  mean.incidence <- sum(incid.table$cases) / sum(incid.table$PY_All)
  cumul.incidence <- 1 - exp(-mean.incidence*(length(timesteps)-1))
  outputlist <- list(output=output,
                     incid.table=incid.table,
                     mean.incidence=mean.incidence,
                     cumul.incidence=cumul.incidence)
  return(outputlist)
}

incidenceheatmapdata.list <- function(datalist){ #DTP = datalist$ptable, DTE = datalist$etable, cfg = configfile){
  DTP <- datalist$ptable
  DTE <- datalist$etable
  DTL <- datalist$ltable
  #cfg <- modeloutput[["input.params"]][["config"]]
  output <- data.table()
  timestep.width <- 1
  decplaces <- decimalplaces(timestep.width)
  timesteps <- DTL$Time #seq(as.numeric(cfg["hivseed.time"]), as.numeric(cfg["population.simtime"]), by = timestep.width)
  for (time_i in head(timesteps, -1)){
    DTcases <- incidentcase(DT = DTE, interval = c(time_i, (time_i + timestep.width)))  # New infections that happened at time x with: time_i < x <= time_(i+1)
    DTnonHIVdeaths <- nonHIVdeath(DT = DTE, interval = c(time_i, (time_i + timestep.width))) # New non-HIV deaths that happened at time x with: time_i < x <= time_(i+1)
    DTcases$ID <- DTcases$p2ID
    DTcases$Gender <- DTcases$p2gender
    DTnonHIVdeaths$ID <- DTnonHIVdeaths$p1ID
    DTnonHIVdeaths$Gender <- DTnonHIVdeaths$p1gender
    
    # Personyears at risk during the interval
    DTalive.infected <- alive.infected(DT = DTP, time = time_i, dec.places = decplaces) # First we only take the data of people who were alive at time_i
    DTalive.uninfected <- DTalive.infected[Infected == "FALSE"] # We only keep those that were not infected at the start of the interval
    # A merge with etable will find those that died and/or got infected during the interval
    # To do this, we need to add a key to DTalive.uninfected
    DTPkeycols <- c("ID", "Gender")
    DTcaseskeycols <- c("ID", "Gender") # The ID and gender of the newly infected person  
    DTnonHIVdeathskeycols <- c("ID", "Gender") # The ID and gender of the newly died person (non-HIV death)
    setkeyv(DTalive.uninfected, DTPkeycols)
    setkeyv(DTcases, DTcaseskeycols)
    setkeyv(DTnonHIVdeaths, DTnonHIVdeathskeycols)
    DTforincidencecalc_a <- merge(DTalive.uninfected, DTcases, all.x=TRUE)
    DTforincidencecalc <- merge(DTforincidencecalc_a, DTnonHIVdeaths, all.x=TRUE, suffixes = c(".incident", ".dead"))
    DTforincidencecalc$eventtime.incident[is.na(DTforincidencecalc$eventtime.incident)] <- time_i + timestep.width
    DTforincidencecalc$eventtime.dead[is.na(DTforincidencecalc$eventtime.dead)] <- time_i + timestep.width
    
    DTforincidencecalc$PYend <- pmin(DTforincidencecalc$eventtime.incident, DTforincidencecalc$eventtime.dead)
    DTforincidencecalc$PY <- DTforincidencecalc$PYend - time_i
    
    Incidencetimestepdata <- DTforincidencecalc[ ,Incidence_G_A := sum(eventname.incident=="transmission", na.rm = TRUE) / sum(PY),by = "Gender,Age"] # And we calculate HIV Incidence, by gender and age group
    Incidencetimestepdata <- DTforincidencecalc[ ,PY_G_A := sum(PY),by = "Gender,Age"] # And we calculate PY, by gender and age group
    Incidencetimestepdata <- DTforincidencecalc[ ,Incidence_G := sum(eventname.incident=="transmission", na.rm = TRUE) / sum(PY),by = "Gender"] # And we calculate HIV Incidence, by gender and age group
    Incidencetimestepdata <- DTforincidencecalc[ ,PY_G := sum(PY),by = "Gender"] # And we calculate PY, by gender and age group
    Incidencetimestepdata <- DTforincidencecalc[ ,Incidence_All := sum(eventname.incident=="transmission", na.rm = TRUE) / sum(PY)] # And we calculate HIV Incidence, by gender and age group
    Incidencetimestepdata <- DTforincidencecalc[ ,PY_All := sum(PY)] # And we calculate PY, by gender and age group
    # (so we can be selective when plotting, and not plot if PY per age-gender group is too low)
    # setnames(Incidencetimestepdata, "V1", "Incidence") # Cases per personyear at risk
    Incidencetimestepdata <- cbind(time_i, Incidencetimestepdata)
    output <- rbind(output, Incidencetimestepdata)
  }
  incid.table <- unique(subset(output, select = c(time_i, Incidence_All, PY_All)))
  incid.table$cases <- incid.table$Incidence_All * incid.table$PY_All
  mean.incidence <- sum(incid.table$cases) / sum(incid.table$PY_All)
  cumul.incidence <- 1 - exp(-mean.incidence*(length(timesteps)-1))
  outputlist <- list(output=output,
                     incid.table=incid.table,
                     mean.incidence=mean.incidence,
                     cumul.incidence=cumul.incidence)
  return(outputlist)
}

population <- function(modeloutput){
  datalist <- readthedata(modeloutput)
  DTP <- datalist$ptable
  output <- data.table()
  timestep.width <- 1
  decplaces <- decimalplaces(timestep.width)
  timesteps <- seq(as.numeric(cfg["hivseed.time"]), as.numeric(cfg["population.simtime"]), by = timestep.width)
  for (time_i in head(timesteps, -1)){
    DTalive.infected <- alive.infected(DT = DTP, time = time_i, dec.places = decplaces) # First we only take the data of people who were alive at time_i
    popsize <- nrow(DTalive.infected)
    pointprevalence <- sum(DTalive.infected$Infected) / popsize
    popdata <- data.table(cbind(time_i, popsize, pointprevalence))
    output <- rbind(output, popdata)
  }
  #outputlist <- list(output=output)
  #return(outputlist)
  return(output)
}

population.list <- function(datalist){ #DTP = datalist$ptable, DTE = datalist$etable, cfg = configfile){
  DTP <- datalist$ptable
  DTL <- datalist$ltable
  output <- data.table()
  timestep.width <- 1
  decplaces <- decimalplaces(timestep.width)
  timesteps <- DTL$Time #seq(as.numeric(cfg["hivseed.time"]), as.numeric(cfg["population.simtime"]), by = timestep.width)
  for (time_i in head(timesteps, -1)){
    DTalive.infected <- alive.infected(DT = DTP, time = time_i, dec.places = decplaces) # First we only take the data of people who were alive at time_i
    popsize <- nrow(DTalive.infected)
    pointprevalence <- sum(DTalive.infected$Infected) / popsize
    popdata <- data.table(cbind(time_i, popsize, pointprevalence))
    output <- rbind(output, popdata)
  }
  #outputlist <- list(output=output)
  #return(outputlist)
  return(output)
}

allRtimestepsdata <- function(modeloutput){ #(DT = datalist$ptable, DTE = datalist$etable, cfg = configfile){
  datalist <- readthedata(modeloutput)
  DT <- datalist$ptable
  DTE <- datalist$etable
  cfg <- modeloutput[["input.params"]][["config"]]
  output <- data.table()
  timestep.width <- 1
  decplaces <- decimalplaces(timestep.width)
  timesteps <- seq(as.numeric(cfg["hivseed.time"]), as.numeric(cfg["population.simtime"]), by = timestep.width)
  for (time_i in head(timesteps, -1)){
    DTnewlyinfected <- DT[InfectTime > time_i & InfectTime <= (time_i + timestep.width) & TOD <= as.numeric(cfg["population.simtime"])] # People who got infected during the time slice and had a completed infectious period 
    # small hack: DT[ID == 151]$TOD <- as.numeric(cfg["population.simtime"]) - 1
    transmissions <- DTE[eventname == "transmission" & eventtime > time_i] # transmissions that could possibly be caused by the pople in DTnewlyinfected
    transmissions$ID <- transmissions$p1ID
    transmissions$Gender <- transmissions$p1gender

    DTnewlyinfectedkeycols <- c("ID", "Gender")
    transmissionskeycols <- c("ID", "Gender") # The ID and gender of the receptors, acquiring infection from people in DTnewlyinfected  
    setkeyv(DTnewlyinfected, DTnewlyinfectedkeycols)
    setkeyv(transmissions, transmissionskeycols)
    DTforRcalc <- merge(DTnewlyinfected, transmissions, all.x=TRUE)
    DTforRcalc$ID.Gender <- paste(DTforRcalc$ID, DTforRcalc$Gender, sep=".")
    DTwithR <- DTforRcalc[ ,infcaused :=  sum(!is.na(eventtime)), by = "ID.Gender"] # number of infections caused for each person in DTnewlyinfected 
    DTwithR$R <- sum(DTwithR$infcaused) / nrow(DTwithR)
    Rtimestepdata <- data.frame(time_i = time_i, suminfcaused = sum(DTwithR$infcaused), R = sum(DTwithR$infcaused) / nrow(DTwithR))
    output <- rbind(output, Rtimestepdata)
  }
  return(output)
}

transmissionmap <- function(modeloutput){ #(DT = datalist$ptable, DTE = datalist$etable, cfg = configfile){
  # requires the shape package
  # xlim <- c(0, 15)
  # ylim <- c(0, 108)
  # plot(0, type = "n", xlim = xlim, ylim = ylim)
  # First we insert diagonal "arrows" (age and time living with HIV)
  datalist <- readthedata(modeloutput)
  DT <- datalist$ptable
  DTE <- datalist$etable
  cfg <- modeloutput[["input.params"]][["config"]]
  Infecteds <- DT[InfectTime < Inf]
  timeatinf <- Infecteds$InfectTime
  ageatinf <- - Infecteds$TOB + Infecteds$InfectTime
  timeatdeath <- pmin(as.numeric(cfg["population.simtime"]), Infecteds$TOD)
  ageatdeath <- ageatinf + (timeatdeath - timeatinf)
  # Arrows(timeatinf, ageatinf, timeatdeath, ageatdeath, arr.length = 0.02, code = 2,
  #        arr.type = "T", arr.col = Infecteds$Gender + 2, col = Infecteds$Gender + 2)
  # Next we insert vertical dashed arrows (age and time at transmission)
  DTE <- datalist$etable
  DTincident <- DTE[eventname == "transmission"]
  timeattransm <- DTincident$eventtime
  ageoftransmitter <- DTincident$p1age
  genderoftransmitter <- DTincident$p1gender
  ageofreceptor <- DTincident$p2age + ((DTincident$p1age - DTincident$p2age) > 0) - ((DTincident$p1age - DTincident$p2age) < 0)
  # Arrows(timeattransm, ageoftransmitter, timeattransm, ageofreceptor, arr.length = 0.1, code = 2,
  #        arr.type = "triangle", col = DTincident$p1gender + 2)
  result <- list(livingwithHIVdata = data.frame(timeatinf = timeatinf,
                                                ageatinf = ageatinf,
                                                timeatdeath = timeatdeath,
                                                ageatdeath = ageatdeath),
                 transmissiondata = data.frame(timeattransm = timeattransm,
                                               ageoftransmitter = ageoftransmitter,
                                               genderoftransmitter = genderoftransmitter,
                                               ageofreceptor = ageofreceptor))
  return(result)
}


agemixing <- function(modeloutput){ #(DT = datalist$ptable, DTR = datalist$rtable){
  datalist <- readthedata(modeloutput)
  DT <- datalist$ptable
  DTR <- datalist$rtable
  #cfg <- modeloutput[["input.params"]][["config"]]
  DTRandDT_a <- merge(data.frame(DTR), data.frame(DT), by.x = "IDm", by.y = "ID")
  DTRandDT_b <- merge(DTRandDT_a, data.frame(DT), by.x = "IDw", by.y = "ID", suffixes = c(".m", ".w"))
  DTRandDT_b$relID <- factor(1:nrow(DTRandDT_b))
  setnames(DTRandDT_b, c("IDw", "IDm"), c("ID.w", "ID.m"))
  DTRlong <- reshape(data = DTRandDT_b,
                     idvar = "relID",
                     varying = names(DTRandDT_b)[c(1:2, 6:(ncol(DTRandDT_b)-1))], #, 6:ncol(DTRandDT_b))],
                     timevar = "g",
                     direction = "long")
  DTRlong$g.ID <- as.character(paste(DTRlong$g, DTRlong$ID, sep="."))
  
  DTRandDT_m <- merge(data.frame(DTR), data.frame(DT[Gender==0]), by.x = "IDm", by.y = "ID")
  DTRandDT_m$AgeMaleatForm <- - DTRandDT_m$TOB + DTRandDT_m$FormTime
  DTRandDT_m$AgeMaleatForm0 <- DTRandDT_m$AgeMaleatForm - min(DTRandDT_m$AgeMaleatForm)
  DTRandDT_m$AgeFemaleatForm <- DTRandDT_m$AgeMaleatForm - DTRandDT_m$AgeGap  
                
  # Analysis of age mixing pattern, using the DTR_andDT_m dataset, NOT the DTRlong dataset
  # Mean age gap, between-, and within-subject variance of age gaps
  
  meanagegap <- mean(DTRandDT_m$AgeGap)
  varagegap <- var(DTRandDT_m$AgeGap) #[DTRlong$g == "w"]) # We only want to count each relationship once
  # First we fit a linear mixed effects model
  MixingModel <- lme(AgeGap~1, random=~1|IDm, data = DTRandDT_m,
                     control=lmeControl(returnObject=TRUE, opt = "optim"))
  VarianceEstimates <- VarCorr(MixingModel)
  BetweenVar <- as.numeric(VarianceEstimates[1])
  WithinVar <- as.numeric(VarianceEstimates[2])
  # Then we fit a non-linear mixed effects model: within-subject variance does not increase linearly with age
  #Subset datasets for men as "respondents" reporting on all their relationships
  DTRmen <- DTRlong[DTRlong$g=="m",]
  DTRmen$AgeMaleatForm <- - DTRmen$TOB + DTRmen$FormTime
  DTRmen$AgeFemaleatForm <- DTRmen$AgeMaleatForm - DTRmen$AgeGap
  DTRmen$AgeMaleatForm0 <- DTRmen$AgeMaleatForm - min(DTRmen$AgeMaleatForm)
  
  men.lme <- lme(AgeFemaleatForm ~ AgeMaleatForm0,
                 data = DTRandDT_m,
                 control=lmeControl(returnObject=TRUE),
                 random = ~1 | IDm, 
                 method = "REML",
                 weight = varPower(value = 0.5, form = ~AgeMaleatForm0 + 1))
  #The formula to calculate the weights for variance is |v|^(2*t)
  #Age can't be at 0 in varPower formula because then it will evaluate to 0 variance for the first level (18 year olds)
  
  
  slope <- summary(men.lme)$tTable[2, 1]
  
  #add the predicted values to the dataset
  #Level 0 mean population level predictions
  DTRandDT_m$pred <- predict(men.lme, DTRandDT_m, level = 0)

  #Need to calculate a vector of within-subject variances because the variances should be different for each age
  #Create a vector of the variance weights. This is based upon the formula for varPower |v|^(2*t)
  #(2*t) is the power
  #|v| is the absolute value of each of the ages from the age vector
  powerm <- attributes(men.lme$apVar)$Pars["varStruct.power"]
  within.var.weights <- (1 + DTRandDT_m$AgeMaleatForm0)^powerm
  #Extracting the residual variance (within-subject variance) for youngest men (0— baseline)
  within.var.base <- men.lme$sigma^2

  #Now create a vector that contains the residual variances for all ages
  # DTRandDT_m$within.var <- within.var.base * within.var.weights

  #Extract the variance of the random intercept # This is the between-subject variance
  # DTRandDT_m$between.var <- getVarCov(men.lme)[1,1] # called "interceptvar" in LNS analysis

  #Caluclate the prediction stardard errors
  # DTRandDT_m$predict.sd <- sqrt(DTRandDT_m$between.var + DTRandDT_m$within.var) # called "se2" in LNS analysis
  
  result <- list(AAD = mean(DTRandDT_m$AgeGap),
                 VAD = var(DTRandDT_m$AgeGap),
                 men.lme = men.lme,
                 slope = slope,
                 powerm = attributes(men.lme$apVar)$Pars["varStruct.power"],
                 within.var.base = men.lme$sigma^2,
                 within.var.weights = within.var.weights,
                 WVAD = within.var.base * within.var.weights,
                 Pred = predict(men.lme, DTRandDT_m, level = 0),
                 BVAD = getVarCov(men.lme)[1,1],
                 agescatterdata = data.frame(DTRandDT_m))
  return(result)
}

agemixing.list <- function(datalist){ #(DT = datalist$ptable, DTR = datalist$rtable){
  DT <- datalist$ptable
  DTR <- datalist$rtable
  #cfg <- modeloutput[["input.params"]][["config"]]
  DTRandDT_a <- merge(data.frame(DTR), data.frame(DT), by.x = "IDm", by.y = "ID")
  DTRandDT_b <- merge(DTRandDT_a, data.frame(DT), by.x = "IDw", by.y = "ID", suffixes = c(".m", ".w"))
  DTRandDT_b$relID <- factor(1:nrow(DTRandDT_b))
  setnames(DTRandDT_b, c("IDw", "IDm"), c("ID.w", "ID.m"))
  DTRlong <- reshape(data = DTRandDT_b,
                     idvar = "relID",
                     varying = names(DTRandDT_b)[c(1:2, 6:(ncol(DTRandDT_b)-1))], #, 6:ncol(DTRandDT_b))],
                     timevar = "g",
                     direction = "long")
  DTRlong$g.ID <- as.character(paste(DTRlong$g, DTRlong$ID, sep="."))
  
  DTRandDT_m <- merge(data.frame(DTR), data.frame(DT[Gender==0]), by.x = "IDm", by.y = "ID")
  DTRandDT_m$AgeMaleatForm <- - DTRandDT_m$TOB + DTRandDT_m$FormTime
  DTRandDT_m$AgeMaleatForm0 <- DTRandDT_m$AgeMaleatForm - min(DTRandDT_m$AgeMaleatForm)
  DTRandDT_m$AgeFemaleatForm <- DTRandDT_m$AgeMaleatForm - DTRandDT_m$AgeGap  
  
  # Analysis of age mixing pattern, using the DTR_andDT_m dataset, NOT the DTRlong dataset
  # Mean age gap, between-, and within-subject variance of age gaps
  
  meanagegap <- mean(DTRandDT_m$AgeGap)
  varagegap <- var(DTRandDT_m$AgeGap) #[DTRlong$g == "w"]) # We only want to count each relationship once
  # First we fit a linear mixed effects model
  MixingModel <- lme(AgeGap~1, random=~1|IDm, data = DTRandDT_m,
                     control=lmeControl(returnObject=TRUE, opt = "optim"))
  VarianceEstimates <- VarCorr(MixingModel)
  BetweenVar <- as.numeric(VarianceEstimates[1])
  WithinVar <- as.numeric(VarianceEstimates[2])
  # Then we fit a non-linear mixed effects model: within-subject variance does not increase linearly with age
  #Subset datasets for men as "respondents" reporting on all their relationships
  DTRmen <- DTRlong[DTRlong$g=="m",]
  DTRmen$AgeMaleatForm <- - DTRmen$TOB + DTRmen$FormTime
  DTRmen$AgeFemaleatForm <- DTRmen$AgeMaleatForm - DTRmen$AgeGap
  DTRmen$AgeMaleatForm0 <- DTRmen$AgeMaleatForm - min(DTRmen$AgeMaleatForm)
  
  men.lme <- lme(AgeFemaleatForm ~ AgeMaleatForm0,
                 data = DTRandDT_m,
                 control=lmeControl(returnObject=TRUE),
                 random = ~1 | IDm, 
                 method = "REML",
                 weight = varPower(value = 0.5, form = ~AgeMaleatForm0 + 1))
  #The formula to calculate the weights for variance is |v|^(2*t)
  #Age can't be at 0 in varPower formula because then it will evaluate to 0 variance for the first level (18 year olds)
  
  #add the predicted values to the dataset
  #Level 0 mean population level predictions
  DTRandDT_m$pred <- predict(men.lme, DTRandDT_m, level = 0)
  
  #Need to calculate a vector of within-subject variances because the variances should be different for each age
  #Create a vector of the variance weights. This is based upon the formula for varPower |v|^(2*t)
  #(2*t) is the power
  #|v| is the absolute value of each of the ages from the age vector
  powerm <- attributes(men.lme$apVar)$Pars["varStruct.power"]
  within.var.weights <- (1 + DTRandDT_m$AgeMaleatForm0)^powerm
  #Extracting the residual variance (within-subject variance) for youngest men (0— baseline)
  within.var.base <- men.lme$sigma^2
  
  #Now create a vector that contains the residual variances for all ages
  # DTRandDT_m$within.var <- within.var.base * within.var.weights
  
  #Extract the variance of the random intercept # This is the between-subject variance
  # DTRandDT_m$between.var <- getVarCov(men.lme)[1,1] # called "interceptvar" in LNS analysis
  
  #Caluclate the prediction stardard errors
  # DTRandDT_m$predict.sd <- sqrt(DTRandDT_m$between.var + DTRandDT_m$within.var) # called "se2" in LNS analysis
  
  result <- list(AAD = mean(DTRandDT_m$AgeGap),
                 VAD = var(DTRandDT_m$AgeGap),
                 men.lme = men.lme,
                 powerm = attributes(men.lme$apVar)$Pars["varStruct.power"],
                 within.var.base = men.lme$sigma^2,
                 within.var.weights = within.var.weights,
                 WVAD = within.var.base * within.var.weights,
                 Pred = predict(men.lme, DTRandDT_m, level = 0),
                 BVAD = getVarCov(men.lme)[1,1],
                 agescatterdata = data.frame(DTRandDT_m))
  return(result)
}


# Calculate point prevalence of concurrency,
# Lifetime number of sex partners
# Cross-sectional degree distribution

degreecalc <- function(DT = datalist$ptable, DTR = datalist$rtable){
  DT$ID.Gender <- paste(DT$ID, DT$Gender, sep=".")
  setkey(DT, ID.Gender)
  
  men_degree <- data.table(data.frame(table(DTR$IDm)))
  men_degree$ID.Gender <- paste(men_degree$Var1, 0, sep=".")
  setnames(men_degree, c("IDm", "degree", "ID.Gender"))
  setkey(men_degree, ID.Gender)
  
  women_degree <- data.table(data.frame(table(DTR$IDw)))
  women_degree$ID.Gender <- paste(women_degree$Var1, 1, sep=".")
  setnames(women_degree, c("IDw", "degree", "ID.Gender"))
  setkey(women_degree, ID.Gender)
  
  
  DT_degree_MW <- merge(DT, men_degree, all.x=TRUE)
  DT_degree_W <- merge(DT, women_degree, all.x=TRUE)
  DT_degree_MW$degree[DT_degree_MW$Gender==1] <-  DT_degree_W$degree[DT_degree_W$Gender==1]
  DT_degree_MW$degree[is.na(DT_degree_MW$degree)] <- 0

  
  result <- DT_degree_MW
  return(result)
}
  
# allRtimestepsdata <- function(DT = datalist$ptable, DTE = datalist$etable, cfg = configfile){
#   DTE$ID <- DTE$
#   output <- data.table()
#   
#     transmissions <- DTE[eventname == "transmission" & eventtime > time_i] # transmissions that could possibly be caused by the pople in DTnewlyinfected
#     transmissions$ID <- transmissions$p1ID
#     transmissions$Gender <- transmissions$p1gender
#     
#     DTnewlyinfectedkeycols <- c("ID", "Gender")
#     transmissionskeycols <- c("ID", "Gender") # The ID and gender of the receptors, acquiring infection from people in DTnewlyinfected  
#     setkeyv(DTnewlyinfected, DTnewlyinfectedkeycols)
#     setkeyv(transmissions, transmissionskeycols)
#     DTforRcalc <- merge(DTnewlyinfected, transmissions, all.x=TRUE)
#     DTforRcalc$ID.Gender <- paste(DTforRcalc$ID, DTforRcalc$Gender, sep=".")
#     DTwithR <- DTforRcalc[ ,infcaused :=  sum(!is.na(eventtime)), by = "ID.Gender"] # number of infections caused for each person in DTnewlyinfected 
#     DTwithR$R <- sum(DTwithR$infcaused) / nrow(DTwithR)
#     Rtimestepdata <- data.frame(time_i = time_i, suminfcaused = sum(DTwithR$infcaused), R = sum(DTwithR$infcaused) / nrow(DTwithR))
#     output <- rbind(output, Rtimestepdata)
#   }
#   return(output)
# }

################
# Create cumulative network dataset
################
cumulnetwork <- function(DTR = datalist$rtable, cfg = configfile, windowwidth = 20){
  endwindow <- 30 # as.numeric(cfg["hivseed.time"]) + windowwidth
  startwindow <- 0 # endwindow - windowwidth
  Rels <- DTR[FormTime >=startwindow & FormTime < endwindow]
  rdata <- data.frame(
    tails = paste0(Rels$IDm, "m"),
    heads = paste0(Rels$IDw, "w"),
    form = Rels$FormTime,
    diss = Rels$DisTime,
    durat = pmin(endwindow, Rels$DisTime) - Rels$FormTime,
    stringsAsFactors=FALSE
  )
  cn <- network(rdata[ , 1:2], directed = FALSE, matrix.type = "edgelist") # cumulative network
  cnvertexnames <- network.vertex.names(cn)
  maleindices <- grep("m", cnvertexnames)
  Sex <- rep(0, network.size(cn))
  Sex[maleindices] <- 1
  cnseedindex <- grep(SeedID, cnvertexnames)
  Sex[cnseedindex] <- 2
  VertexSize <- rep(1, network.size(cn))
  VertexSize[cnseedindex] <- 2
  
  set.vertex.attribute(cn, attrname="Sex", value=Sex, v=seq_len(network.size(cn)))
  #cnplot<- plot(cn, label = "", vertex.col = Sex)
  # Saving the plot
  png(file="CN.png", height = 1800, width = 1800, res=300)
  cnplot <- plot(cn, label = "", vertex.col = Sex, vertex.cex = VertexSize)
  dev.off()
  
  
}



################
# Create cross-sectional network dataset
################
csnetwork <- function(DTR = datalist$rtable, cfg = configfile, time = as.numeric(cfg["hivseed.time"])){
  Rels <- DTR[FormTime <= time & DisTime > time]
  rdata <- data.frame(
    tails = paste0(Rels$IDm, "m"),
    heads = paste0(Rels$IDw, "w"),
    form = Rels$FormTime,
    diss = Rels$DisTime,
    durat = pmin(endwindow, Rels$DisTime) - Rels$FormTime,
    stringsAsFactors=FALSE
  )
  csn <- network(rdata[ , 1:2], directed = FALSE, matrix.type = "edgelist") # cumulative network
  csnvertexnames <- network.vertex.names(csn)
  maleindices <- grep("m", csnvertexnames)
  SexCSN <- rep(0, network.size(csn))
  SexCSN[maleindices] <- 1
  csnseedindex <- grep(SeedID, csnvertexnames)
  SexCSN[csnseedindex] <- 2
  VertexSize <- rep(1, network.size(csn))
  VertexSize[csnseedindex] <- 2
  set.vertex.attribute(csn, attrname="Sex", value=SexCSN, v=seq_len(network.size(csn)))
  #csnplot <- plot(csn, label = "", vertex.col = SexCSN)
  # Saving the plot
  png(file="CSN.png", height = 1800, width = 1800, res=300)
  plot(csn, label = "", vertex.col = SexCSN, vertex.cex = VertexSize)
  dev.off()
  
}

# Overlaying cn with csn
# matchingindices <- pmatch(csnvertexnames, cnvertexnames)
# csncoord <- cnplot[matchingindices, ]
# png(file="CSN_CS.png", height = 1800, width = 1800, res=300)
# plot(csn, label = "", vertex.col = SexCSN, coord = csncoord, vertex.cex = VertexSize)
# dev.off()


################
# Create potential transmission network (directed network) (=PTN)
################
# The potential transmission routes, starting from the seed infection
ptnetwork <- function(DT = datalist$ptable, DTR = datalist$rtable, cfg = configfile){
  
  DTR[, IDm := paste0(DTR[, IDm], "m")]
  DTR[, IDw := paste0(DTR[, IDw], "w")]
  Seed <- datalist$ptable[InfectTime == as.numeric(cfg["hivseed.time"])]
  gender_suffix <- c("m", "w")
  SeedID <- paste0(Seed$ID, gender_suffix[Seed$Gender + 1])
  
  PTN <- data.table()
  # Now we run through the partners of the seed and their partners and so on, while NEW relationships are being formed
  DTRfr <- data.frame(DTR)
  # Rels of seed
  SeedRels <- DTRfr[, (Seed$Gender + 1)] == SeedID
  AfterSeedTimeRels <- DTR$DisTime > as.numeric(cfg["hivseed.time"])
  RelsList <- DTR[SeedRels & AfterSeedTimeRels, ] # This is the list of ongoing Rels of the seed, after HIV was introduced
  
  iteration <- 0
  while (nrow(RelsList) > 0){
    iteration <- iteration + 1
    RelsListfr <- data.frame(RelsList)
    RelsListCopy <- data.table(RelsListfr)
    # Now we list the partners of the seed
    if (iteration == 1){
      PartnerColumnIndex <- abs(Seed$Gender - 1) + 1
      
      if (Seed$Gender == 1){
        setcolorder(RelsListCopy, c("IDw", "IDm", "FormTime", "DisTime", "AgeGap"))
      }
    } else {
      PartnerColumnIndex <- - PartnerColumnIndex + 3 # 1 becomes 2 and 2 becomes 1
      if (PartnerColumnIndex == 1){
        setcolorder(RelsListCopy, c("IDw", "IDm", "FormTime", "DisTime", "AgeGap"))
      }
    }
    RelsListCopyfr <- data.frame(RelsListCopy)
    if (iteration == 1){
    PTN <- as.matrix(RelsListCopyfr)#[, 1:2])
    } else {
      PTN <- rbind(as.matrix(PTN), as.matrix(RelsListCopyfr))#[, 1:2]))
    }
    
    PartnerIDs <- unique(RelsListfr[, PartnerColumnIndex])
    FirstRelIndex <- match(PartnerIDs, RelsListfr[, PartnerColumnIndex])
    StartPotentialTransm <- pmax(as.numeric(cfg["hivseed.time"]), RelsListfr[FirstRelIndex, "FormTime"])
    
    # Now list the relationships that these partners formed AFTER they formed the relationship with the seed that could have led to transmission
    RelsList <- data.table()
    for (i in (1:length(PartnerIDs))){
      PartnerRels <- DTRfr[, PartnerColumnIndex] == PartnerIDs[i]
      AfterRels <- DTRfr[, "FormTime"] > StartPotentialTransm[i]
      RelsListi <- DTR[PartnerRels & AfterRels, ]
      RelsList <- rbind(RelsList, RelsListi)
    }
  }
  rPTNdata <- data.frame(
    tails = PTN[, 1],
    heads = PTN[, 2],
    form = as.numeric(PTN[, 3]),
    diss = as.numeric(PTN[, 4]),
    durat = pmin(as.numeric(cfg["population.simtime"]), as.numeric(PTN[, 4])) - as.numeric(PTN[, 3]),
    stringsAsFactors=FALSE
  )
  ptn <- network(rPTNdata[ , 1:2], directed = TRUE, matrix.type = "edgelist") # potential tranmission network
  ptnvertexnames <- network.vertex.names(ptn)
  ptnmaleindices <- grep("m", ptnvertexnames)
  ptnseedindex <- grep(SeedID, ptnvertexnames)
  SexPTN <- rep(0, network.size(ptn))
  SexPTN[ptnmaleindices] <- 1
  SexPTN[ptnseedindex] <- 2
  set.vertex.attribute(ptn, attrname="Sex", value=list(SexPTN), v=seq_len(network.size(ptn)))
  VertexSize <- rep(1, network.size(ptn))
  VertexSize[ptnseedindex] <- 2
  # Plotting the network
  ptnplot <- plot(ptn,
                  label = "",
                  vertex.col = SexPTN,
                  vertex.cex = VertexSize,
                  jitter=T)
  # Saving the plot
  png(file="PTN.png", height = 1800, width = 1800, res=300)
  plot(ptn,
       label = "",
       vertex.col = SexPTN,
       vertex.cex = VertexSize,
       jitter=T)

  dev.off()
  
}


# Overlaying cn with ptn
# matchingindicesPTN <- pmatch(ptnvertexnames, cnvertexnames)
# ptncoord <- cnplot[matchingindicesPTN, ]
# png(file="PTN_CN.png", height = 1800, width = 1800, res=300)
# plot(ptn, label = "", vertex.col = SexPTN, coord = ptncoord, vertex.cex = VertexSize)
# dev.off()







################
# Create transmission tree dataset
################
transmissiontree <- function(datalist = datalist, cfg = configfile){
  TE = datalist$etable[eventname=="transmission"] # TE = transmission events
  
  Seed <- datalist$ptable[InfectTime == as.numeric(cfg["hivseed.time"])]
  gender_suffix <- c("m", "w")
  SeedID <- paste0(Seed$ID, gender_suffix[Seed$Gender + 1])
  
  edata <- data.frame(
    tails = paste0(TE$p1ID, gender_suffix[TE$p1gender + 1]), #TE$p1name,
    heads = paste0(TE$p2ID, gender_suffix[TE$p2gender + 1]),
    time = TE$eventtime,
    tailname = TE$p1name,
    tailgender = TE$p1gender,
    tailage = TE$p1age,
    headname = TE$p2name,
    headgender = TE$p2gender,
    headage = TE$p2age,
    stringsAsFactors=FALSE
  )
  
# Before we turn this edge list into a network object, we need to add the seeding transmission event
  edataplusroot <- rbind(c("root", edata$tails[1], as.numeric(cfg["hivseed.time"]),
                   "root", NA, NA, edata$tailname[1], edata$tailgender[1],
                   edata$tailage[1] - edata$time[1] + as.numeric(cfg["hivseed.time"])),
                 edata)
#   
#   ntips <- length(unique(c(TE$p1name, TE$p2name)))
#   tip.labels <- unique(c(TE$p1name, TE$p2name))
#                      
#   ninternalnodes <- nrow(edataplusroot)

tn <- network(edata[ , 1:2], directed = TRUE, matrix.type = "edgelist")
tnvertexnames <- network.vertex.names(tn)
tnmaleindices <- grep("m", tnvertexnames)
tnseedindex <- grep(SeedID, tnvertexnames) # Temporary solution for SACEMA Research days
SexTN <- rep(0, network.size(tn))
SexTN[tnmaleindices] <- 1
SexTN[tnseedindex] <- 2
set.vertex.attribute(tn, attrname="Sex", value=list(SexTN), v=seq_len(network.size(tn)))
VertexSizeTN <- rep(1, network.size(tn))
VertexSizeTN[tnseedindex] <- 2

tn.undir <- network(edata, directed = FALSE, matrix.type = "edgelist", ignore.eval = FALSE)
tndata <- list(tn = tn,
               tn.vertex.col = SexTN,
               tn.vertex.cex = VertexSizeTN,
               tn.undir = tn.undir)

return(tndata)
# Plotting the network
# tnplot <- plot(tn,
#                 label = "",
#                 vertex.col = SexTN,
#                 vertex.cex = VertexSizeTN,
#                 jitter=T)
# # Saving the plot
# png(file="TN.png", height = 1800, width = 1800, res=300)
# plot(tn,
#      label = "",
#      vertex.col = SexTN,
#      vertex.cex = VertexSizeTN,
#      jitter=T)
# 
# dev.off()

}

postABCplot <- function(ABCfit = ABC_LenormandResult,
                        targetSS = sum_stat_obs,
                        preprior = preprior,
                        param.prior = simpact_prior){
  stats <- ABCfit$stats
  param <- ABCfit$param
  prior.df <- ldply(simpact_prior)
  prior.df$V4 <- preprior
  
  statsdiff <- sweep(stats, 2, targetSS)
  statsreldiff <- sweep(statsdiff, 2, targetSS, FUN = "/")
  av.normalised.distance <- rowSums(statsreldiff)
  av.abs.normalised.distance <- rowSums(abs(statsreldiff))
  
  stats.df <- as.data.table(stats)
  stats.df$run <- 1: nrow(stats.df)
  stats.df.long <- melt(data = stats.df, measure.vars = 1:8)
  stats.df.long$target <- targetSS
  
  param.df <- as.data.table(param)
  param.df$run <- 1:nrow(param.df)
  param.df.long <- melt(data = param.df, measure.vars = 1:8)
  param.df.long$prior.low <- prior.df$V2
  param.df.long$prior.high <- prior.df$V3
  param.df.long$prior.est <- prior.df$V4
  
  plot.SS <- ggplot(data = stats.df.long, aes(x = value)) +
    geom_histogram() +
    facet_wrap(~ variable, nrow = 2, scales = "free") +
    ggtitle("Summary statistics")
  vline.data <- data.frame(target = targetSS, variable = levels(stats.df.long$variable))
  plot.SS <- plot.SS +
    geom_vline(aes(xintercept = target), colour = "green4", size = 2, vline.data)
  plot.SS
  
  plot.param <- ggplot(data = param.df.long, aes(x = value)) +
    geom_histogram() +
    facet_wrap(~ variable, nrow = 2, scales = "free") +
    ggtitle("Model parameters")
  vline.data <- data.frame(prior.low = as.numeric(prior.df$V2),
                           prior.high = as.numeric(prior.df$V3),
                           prior.est = prior.df$V4,
                           variable = levels(param.df.long$variable))
  plot.param <- plot.param +
    geom_vline(aes(xintercept = prior.low), colour = "blue2", vline.data) +
    geom_vline(aes(xintercept = prior.high), colour = "blue2", vline.data) +
    geom_vline(aes(xintercept = prior.est), colour = "blue2", size = 2, vline.data)
  plot.param
  multiplot(plot.SS, plot.param)
  
}

# Overlaying cn with tn
# matchingindicesTN <- pmatch(tnvertexnames, cnvertexnames)
# tncoord <- cnplot[matchingindicesTN, ]
# png(file="TN_CN.png", height = 1800, width = 1800, res=300)
# plot(tn, label = "", vertex.col = SexTN, coord = tncoord, vertex.cex = VertexSizeTN)
# dev.off()

######
# Last step: plot phylogeny of transmission tree
######

# pg <- phylo4(nj(geodist(tn)$gdist))
# apepg <- as(pg, "phylo")
# apepgrooted <- multi2di(apepg, random = FALSE)
# apepgrooted$edge.length <- c(30-10.18568, # 7-6
#                              15.05207-10.18568, # 7-8
#                              30-15.05207, # 8-5
#                              16.13587-15.05207, # 8-9
#                              22.99193-16.13587, # 9-10
#                              24.29339-22.99193, # 10-11
#                              30-24.29339, # 11-2
#                              30-24.29339, # 11-1
#                              30-22.99193, # 10-3
#                              30-16.13587  #  9-4
# )
# 
# apepgrooted$root.edge <- 10.18568-10
# apepgrooted$tip.label <- c("Woman 46",
#                            "Woman 37",
#                            "Man 23",
#                            "Man 4",
#                            "Man 2",
#                            "Woman 28")
# png(file="PG.png", height = 1800, width = 1800, res=300)
# plot(apepgrooted, root.edge = TRUE)
# dev.off()

# 
# 
#   g <- network(edata[ , 1:2], directed = TRUE, matrix.type = "edgelist")
# vertexnames <- network.vertex.names(g)
# plot(network(edata, directed=T, matrix.type = "edgelist"), label = vertexnames)
# 
# plot(g, label = vertexnames)
# 
#   g2 <- network(edataplusroot[ , 1:2], directed = TRUE, matrix.type = "edgelist")
# 
# 
# test <- phylo4(nj(geodist(g)$gdist))
# apetest <- as(test, "phylo")
# apetestrooted <- multi2di(apetest, random = FALSE)
# plot(apetestrooted)
# 
# phylo4(nj(geodist(g2)$gdist))


  # Now we add node attributes
  # Now we add edge attributes

  # Now we extract essential information from edata to create phylo tree.
  # Constraints to input for phylo4 tree constructor:
  
  #   if the tree has edge lengths defined, the number of edge lengths must match
  #   the number of edges; the number of tip labels must match the number of tips;
  #   in a tree with ntips tips and nnodes (total) nodes, nodes 1 to ntips must be
  #   tips if the tree is rooted, the root must be node number ntips+1 and the
  #   root node must be the first row of the edge matrix; tip labels, node labels,
  #   edge labels, edge lengths must have proper internal names (i.e. internal
  #   names that match the node numbers they document); tip and node labels must be
  #   unique.

  # First essential element is x, the matrix of edges.
#   nodes <- 1:(ntips+ninternalnodes)
#   rootnodenumber <- ntips+1
#   internalnodenumbers <- (rootnodenumber+1):max(nodes)
#   tipnumbers <- 1:ntips
#   
#   firstrow <- c(rootnodenumber, 1)
  


################
# Create phylogeny dataset
################


#   nrels[i] <- nrow(r)
#   cumulincid[i] <- sum(p$InfectTime>=60 & p$InfectTime<Inf) #cfg["hivseed.time"] & p$InfectTime<Inf)
#   # How many infections to place in the last 10 years of the simulation? (simulation years 50-60)
#   
#   allrpdata <- rbind(allrpdata, rp)
# } 
# 
# summarystats <- (data.frame(simID, meanagegap, varagegap, BetweenVar, WithinVar,
#                             nrels, cumulincid))
# summarystats
# }
  
  



# hivincdata <- data.frame(timesteps,incidence)
# ggplot(hivincdata,aes(x=timesteps, y=1000*incidence)) +
#   geom_line(colour = "darkred", size=2) +
#   guides(colour = FALSE) +
#   xlab("Simulation time") +
#   ylab("HIV incidence (per 100 PY)")



incidentcase <- function(DT, interval){ # arguments are the eventlog data.table and a vector of start and end times of the interval
  DTincident <- DT[eventname == "transmission" & eventtime > interval[1] & eventtime <= interval[2]]
}

nonHIVdeath <- function(DT, interval){ # arguments are the eventlog data.table and a vector of start and end times of the interval
  DTnonHIVdeath <- DT[eventname == "normalmortality" & eventtime > interval[1] & eventtime <= interval[2]]
}


alive.infected <- function(DT, time, dec.places){ # arguments are the personlog data.table and a point in time
  DTalive <- DT[TOB <= time & TOD > time]
  DTalive$Age <- time -round(ceiling(DTalive$TOB), dec.places) # Next we allocate them in discrete age bins with bin size as wide as timestep
  DTalive$Infected <- time >= DTalive$InfectTime # Now we allocate infection status to all people in our table of living people
  return(DTalive)
}

gg_color_hue <- function(n, l) {
  hues = seq(15, 375, length=n+1)
  hcl(h=hues, l=l, c=100)[1:n]
}


decimalplaces <- function(x) {
  if ((x %% 1) != 0) {
    nchar(strsplit(sub('0+$', '', as.character(x)), ".", fixed=TRUE)[[1]][[2]])
  } else {
    return(0)
  }
}

# ####
# # To create heatplot, we need to loop through time steps, and calculate age- and gender-specific prevalence and incidence rates.
# alltimestepsdata <- data.table()
# timesteps <- seq(as.numeric(cfg["hivseed.time"]), as.numeric(cfg["population.simtime"]), by=1)
# for (time_i in timesteps) {
#   alivetable <- alivetab(time_i)   # First we only take the data of people who were alive at time_i
#   alivetable$Age.1 <- time_i -round(alivetable$TOB, 0) # Next we allocate them in discrete age bins with bin size equal as width of timesteps
#   alivetable$Infected <- time_i >= alivetable$InfectTime # Now we allocate infection status to all people in our table of living people
#   alive_data.table <- data.table(alivetable) # We turn the dataset into a proper data.table
#   timestepdata <- alive_data.table[,sum(Infected) / nrow(alive_data.table),by="Gender,Age.1"] # And we calculate HIV prevalence, by gender and age group
#   setnames(timestepdata, "V1", "Prevalence")
#   timestepdata <- cbind(time_i, timestepdata)
#   alltimestepsdata <- rbind(alltimestepsdata, timestepdata)
# }
# alltimestepsdata <- cbind(i, alltimestepsdata) # i is the ID number of the simulation
# 
# 
# 
# # Test sqldf query
# # x is evaltime (e.g. at end of simulation)
# relsandconcurM <- function(x){
#   query <- paste0("SELECT IDm, rels,
#                   CASE WHEN rels > 1 THEN 1 ELSE 0 END concur
#                   FROM (
#                   SELECT IDm, COUNT(*) rels
#                   FROM r
#                   WHERE ", x, "> FormTime AND ", x, "< DisTime
#                   GROUP BY IDm
#                   )"
#                   )
#   return (sqldf(query))
# }
# relsandconcurW <- function(x){
#   query <- paste0("SELECT IDw, rels,
#                   CASE WHEN rels > 1 THEN 1 ELSE 0 END concur
#                   FROM (
#                   SELECT IDw, COUNT(*) rels
#                   FROM r
#                   WHERE ", x, "> FormTime AND ", x, "< DisTime
#                   GROUP BY IDw
#                   )"
#                   )
#   return (sqldf(query))
# }
# # selecting people who are alive at time x
# alive <- function(x){
#   query <- paste0("SELECT ID, TOB
#                   FROM p
#                   WHERE ", x, ">= TOB AND ", x, "< TOD
#                   "#                  GROUP BY ID"
#   )
#   return (sqldf(query))
# }
# # number of infections caused
# infcaused <- function(){
#   query <- paste0("SELECT InfectOrigID, COUNT(*) infcaused
#                   FROM p
#                   GROUP BY InfectOrigID"
#   )
#   return (sqldf(query))
# }
# # infected at evaltime x
# infected <- function(x){
#   query <- paste0("SELECT ID, log10SPVL
#                   FROM p
#                   WHERE ", x, ">= InfectTime"
#   )
#   return (sqldf(query))
# }
# 
# # chronically infected at evaltime x (past the acute phase already)
# chroninfected <- function(x){
#   query <- paste0("SELECT ID, log10SPVL
#                   FROM p
#                   WHERE ", x, ">= InfectTime + 0.25"
#   )
#   return (sqldf(query))
# }
# 
# # dead and infected at evaltime x
# deadandinfected <- function(x){
#   query <- paste0("SELECT *
#                   FROM p
#                   WHERE ", x, ">= InfectTime AND ", x, "> TOD"
#   )
#   return (sqldf(query))
# }
# 
# 
# # dead and infected with number of infections at evaltime x
# secondaryinfections <- function(completedinfperiod, infectionscaused){
#   query <- paste0("SELECT *
#                   FROM completedinfperiod
#                   LEFT OUTER JOIN infectionscaused
#                   ON completedinfperiod.ID = infectionscaused.InfectOrigID"
#   )
#   return (sqldf(query))
# }
# 
# # alive and infected at evaltime x
# aliveandinfected <- function(infected, alive){
#   query <- paste0("SELECT ID, log10SPVL
#                   FROM infected
#                   JOIN alive
#                   USING (ID)"
#   )
#   return (sqldf(query))
# }
# 
# # alive and chronically infected at evaltime x
# aliveandchroninfected <- function(chroninfected, alive){
#   query <- paste0("SELECT ID, log10SPVL
#                   FROM chroninfected
#                   JOIN alive
#                   USING (ID)"
#   )
#   return (sqldf(query))
# }
# 
# # number of relationships per person in pop
# relsandpop <- function(relsMandW, pop){
#   query <-paste0("SELECT *
#                  FROM pop
#                  LEFT OUTER JOIN relsMandW
#                  USING (ID)"
#   )
#   return (sqldf(query))
# }
# 
# # Adding TOB to r, so that we can plot age mixing pattern
# rwithIDmdata <- function(r, p){
#   query <-paste0("SELECT *
#                  FROM r
#                  JOIN p
#                  ON r.IDm = p.ID"
#   )
#   return (sqldf(query))
# }
# 
# # Adding TOB to r, so that we can plot age mixing pattern
# pGender <- function(p, vIDs){
#   query <-paste0("SELECT ID, Gender
#                  FROM p
#                  JOIN e
#                  ON p.ID = vIDs.ID"
#   )
#   return (sqldf(query))
# }
# 
# alive <- function(x){
#   query <- paste0("SELECT ID, TOB
#                   FROM p
#                   WHERE ", x, ">= TOB AND ", x, "< TOD
#                   "#                  GROUP BY ID"
#   )
#   return (sqldf(query))
# }
# 
# alivetab <- function(x){
#   query <- paste0("SELECT *
#                   FROM DT
#                   WHERE ", x, ">= TOB AND ", x, "< TOD
# "#                  GROUP BY ID"
#   )
#   return (sqldf(query))
# }

postsimscript

Wim Delva

11 Sep 2015