To Do: Rewrite this for an exponential equation for Giardia, using new Giardia dose-response relationship data.

For this risk estimate, we will use mcmodel and evalmcmod from the mc2d package.

# Load packages.
library(mc2d)
## Loading required package: mvtnorm
## 
## Attaching package: 'mc2d'
## The following objects are masked from 'package:base':
## 
##     pmax, pmin
library(data.table)
library(sfsmisc)
## 
## Attaching package: 'sfsmisc'
## The following object is masked from 'package:data.table':
## 
##     last
library(graphics)

Define variables

Set the number of simulations for the variability and uncertainty dimensions.

nsv <- 401
nsu <- 108
ndvar(nsv)  # Variability
## [1] 401
ndunc(nsu)   # Uncertainty
## [1] 108

The mc2d functions will set nsv to ndvar() and nsu to ndunc() by default. Alternatively, we could supply these values to the mc2d model functions when we call them.

Define the variables we will use to set the seed for random sampling and the number of digits for print() statements.

seed <- 1
digits <- 5

Introducing the bootstrapped dose-response data, fread command and sampling from Brian:

klist <- fread("output_rendtorff_giardia/klist.csv", sep = ',')
gdrk <- klist[sample(nrow(klist), nsu, replace = TRUE), list(x)] #sampling rows in the dose-response bootstrapped table equal to the number of simulations in the uncertainty dimension, aka NSU

Sampling from the Giardia Crabtree custom distribution estimates:

cw.vlg <- fread("~/u_drive/envh543/cwvl_estimates/giardia_estwol.csv", sep = ',')
cw.vl.samp <- sample(cw.vlg[,c(V1)], nsu, replace = TRUE) #sampling rows in the dose-response bootstrapped table equal to the number of simulations in the uncertainty dimension, aka NSU

We will use this variable to explicitly set the seed with the various mc2d functions. Another approach would be to only set it through the model evaluation functions, or not at all. Since we want to do our best to provide the most reproducible results, we set the seed explicitly.

Define exposure model

Within the mcmodel function, use mcstoc to define “mc nodes” for each component of the model.

For each stochastic variable, use the mcstoc() function to create a “mcnode”, supply a probability function, the node type as “V” for variability or “U” for uncertainty, and any additional parameters to be passed to the probability function.

For deterministic variables, create nodes with the mcdata() function using the “0” type.

We will model the deterministic factors as uniform probablity distributions.

The last statement makes an “mc” object using the mc function.

# Define an exposure model for evaluation by evalmcmod().
expo.mod1 <- mcmodel({
  # Values from Example 6.18 from Quantitative Microbial Risk Assessment, 
  # 2nd Edition by Charles N. Haas, Joan B. Rose, and Charles P. Gerba. 
  # (Wiley, 2014), pp. 215-216. Other fictitious values are noted below.
  
   # Cistern water viral loading (viruses/L):
  cw.vl <- mcdata(cw.vl.samp, type = "U")
  
  # Water consumption (L/day):
  dw.ing <- mcdata(1.466, type = "0")

  # Hosing per event ingestion rate (L/event): worst case estimate = 10mL or .01L per day
  hose.perevent <- mcdata(.01, type = "0")

  # Hosing frequency (hosing/day), fictitious discrete distribution (this basically says that there is a 10% chance someone never hoses, a 70% chance they use the hose every other day, and a 10% chance they use the hose every day) - will be IGNORED for the moment:
  hosing.frequency <- mcstoc(rempiricalD, type = "V", seed = seed, 
                             values = c(0, 0.5, 1), 
                             prob = c(0.1, 0.7, 0.2))
  
   # Showering per event ingestion rate (L/event): worst case estimate = 10mL or .01L per day
  shower.perevent <- mcdata(.01, type = "0")
  
  # Estimate the exposure based ONLY on drinking water ingestion using the 0, V and U nodes to create a VU node.
  exporaw.mc1 <- (cw.vl * (dw.ing + hose.perevent + shower.perevent)) 
  
  #Reduction by free chlorine at 5ppm - this is based on the "range" of values from 99% reduction to 99.8% reduction, but can be changed to just a single value:
  fc_reduc <- mcstoc(rempiricalD, type = "V", seed = seed, values = c(0.01, 0.007, 0.004, 0.002), prob = c(0.25, 0.25, 0.25, 0.25))
  exporaw.5ppm_fcl <- (exporaw.mc1 * fc_reduc)
  
  #Reduction by UV: 
  exporaw.uv <- (exporaw.mc1 * .0001)
  
    #Reduction by porous ceramic filtration - this is based on the "range" of values from 98.7% reduction to 99.8% reduction, but can be changed to just a single value:
  pfc_reduc <- mcstoc(rempiricalD, type = "V", seed = seed, values = c(0.013, 0.009, 0.006, 0.002), prob = c(0.25, 0.25, 0.25, 0.25))
  exporaw.pfc_reduc <- (exporaw.mc1 * pfc_reduc)
  
  #Reduction by porous ceramic filtration AND free chlorine 5ppm:
  exporaw.fcl_pfc <- (exporaw.mc1 * pfc_reduc * fc_reduc)
  
    #Final exposure - just make this whichever one you want to use at the moment:
  expo.mc1 <- exporaw.5ppm_fcl
  
  #Estimate the risk using a dose-response model pulling from a bootstrapped model of the dose-response
  dose <- mcstoc(rpois, type = "VU", lambda = expo.mc1)

  k <- mcdata(gdrk$x, type = "U")
  
  risk <- (1 - exp(-dose*k)) #this equation is my version an exponential DR equation from what came off QMRA Wiki <http://qmrawiki.canr.msu.edu/index.php?title=Dose_response_assessment> and someone should check it -A
  
  #yearlyrisk
  yearrisk <- 1 - ((1 - risk)^365)
  
  logyearrisk <- log10(yearrisk)
  
  # Build a mc model from all of the mcnode objects.
  mc(cw.vl, dw.ing, hose.perevent, shower.perevent, k, exporaw.mc1, exporaw.5ppm_fcl, fc_reduc, pfc_reduc, exporaw.pfc_reduc, exporaw.uv, expo.mc1, risk, yearrisk, logyearrisk) 
})

Evaluate the model

Evaluate the model with 5000 iterations in the variability dimension and 250 iterations in the uncertainty dimension, as set previously.

expo.ev1 <- evalmcmod(expo.mod1, seed = seed)
print(expo.ev1, digits = digits)
##                 node    mode nsv nsu nva variate      min       mean
## 1              cw.vl numeric   1 108   1       1 0.000000 5.5843e-01
## 2             dw.ing numeric   1   1   1       1 1.466000 1.4660e+00
## 3      hose.perevent numeric   1   1   1       1 0.010000 1.0000e-02
## 4    shower.perevent numeric   1   1   1       1 0.010000 1.0000e-02
## 5                  k numeric   1 108   1       1 0.010498 2.0230e-02
## 6        exporaw.mc1 numeric   1 108   1       1 0.000000 8.2982e-01
## 7   exporaw.5ppm_fcl numeric 401 108   1       1 0.000000 4.7265e-03
## 8           fc_reduc numeric 401   1   1       1 0.002000 5.6958e-03
## 9          pfc_reduc numeric 401   1   1       1 0.002000 7.4888e-03
## 10 exporaw.pfc_reduc numeric 401 108   1       1 0.000000 6.2143e-03
## 11        exporaw.uv numeric   1 108   1       1 0.000000 8.2982e-05
## 12          expo.mc1 numeric 401 108   1       1 0.000000 4.7265e-03
## 13              risk numeric 401 108   1       1 0.000000 1.0376e-04
## 14          yearrisk numeric 401 108   1       1 0.000000 4.7017e-03
## 15       logyearrisk numeric 401 108   1       1     -Inf       -Inf
##    median         max Nas type outm
## 1  0.0000  3.7900e+00   0    U each
## 2  1.4660  1.4660e+00   0    0 each
## 3  0.0100  1.0000e-02   0    0 each
## 4  0.0100  1.0000e-02   0    0 each
## 5  0.0173  3.7046e-02   0    U each
## 6  0.0000  5.6319e+00   0    U each
## 7  0.0000  5.6319e-02   0   VU each
## 8  0.0040  1.0000e-02   0    V each
## 9  0.0060  1.3000e-02   0    V each
## 10 0.0000  7.3215e-02   0   VU each
## 11 0.0000  5.6319e-04   0    U each
## 12 0.0000  5.6319e-02   0   VU each
## 13 0.0000  4.4446e-02   0   VU each
## 14 0.0000  1.0000e+00   0   VU each
## 15   -Inf -2.6973e-08   0   VU each

Summarize results

Print a summary and a plot of the evaluation results (expo.ev1).

# Summarize the results.
summary(expo.ev1)
## cw.vl :
##        NoVar
## median 0.000
## mean   0.558
## 2.5%   0.000
## 97.5%  3.160
## 
## dw.ing :
##       NoUnc
## NoVar  1.47
## 
## hose.perevent :
##       NoUnc
## NoVar  0.01
## 
## shower.perevent :
##       NoUnc
## NoVar  0.01
## 
## k :
##         NoVar
## median 0.0173
## mean   0.0202
## 2.5%   0.0105
## 97.5%  0.0329
## 
## exporaw.mc1 :
##        NoVar
## median  0.00
## mean    0.83
## 2.5%    0.00
## 97.5%   4.70
## 
## exporaw.5ppm_fcl :
##           mean      sd     Min    2.5%     25%     50%     75%  97.5%
## median 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.0000
## mean   0.00473 0.00245 0.00166 0.00166 0.00332 0.00332 0.00581 0.0083
## 2.5%   0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.0000
## 97.5%  0.02675 0.01386 0.00939 0.00939 0.01878 0.01878 0.03287 0.0470
##           Max nsv Na's
## median 0.0000 401    0
## mean   0.0083 401    0
## 2.5%   0.0000 401    0
## 97.5%  0.0470 401    0
## 
## fc_reduc :
##         mean      sd   Min  2.5%   25%   50%   75% 97.5%  Max nsv Na's
## NoUnc 0.0057 0.00295 0.002 0.002 0.004 0.004 0.007  0.01 0.01 401    0
## 
## pfc_reduc :
##          mean      sd   Min  2.5%   25%   50%   75% 97.5%   Max nsv Na's
## NoUnc 0.00749 0.00389 0.002 0.002 0.006 0.006 0.009 0.013 0.013 401    0
## 
## exporaw.pfc_reduc :
##           mean      sd     Min    2.5%     25%     50%     75%  97.5%
## median 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.0000
## mean   0.00621 0.00323 0.00166 0.00166 0.00498 0.00498 0.00747 0.0108
## 2.5%   0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.0000
## 97.5%  0.03517 0.01827 0.00939 0.00939 0.02817 0.02817 0.04226 0.0610
##           Max nsv Na's
## median 0.0000 401    0
## mean   0.0108 401    0
## 2.5%   0.0000 401    0
## 97.5%  0.0610 401    0
## 
## exporaw.uv :
##          NoVar
## median 0.0e+00
## mean   8.3e-05
## 2.5%   0.0e+00
## 97.5%  4.7e-04
## 
## expo.mc1 :
##           mean      sd     Min    2.5%     25%     50%     75%  97.5%
## median 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.0000
## mean   0.00473 0.00245 0.00166 0.00166 0.00332 0.00332 0.00581 0.0083
## 2.5%   0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.0000
## 97.5%  0.02675 0.01386 0.00939 0.00939 0.01878 0.01878 0.03287 0.0470
##           Max nsv Na's
## median 0.0000 401    0
## mean   0.0083 401    0
## 2.5%   0.0000 401    0
## 97.5%  0.0470 401    0
## 
## risk :
##            mean       sd Min 2.5% 25% 50% 75%    97.5%     Max nsv Na's
## median 0.000000 0.000000   0    0   0   0   0 0.000000 0.00000 401    0
## mean   0.000104 0.000789   0    0   0   0   0 0.000708 0.00692 401    0
## 2.5%   0.000000 0.000000   0    0   0   0   0 0.000000 0.00000 401    0
## 97.5%  0.000567 0.003934   0    0   0   0   0 0.017151 0.03236 401    0
## 
## yearrisk :
##          mean     sd Min 2.5% 25% 50% 75% 97.5%   Max nsv Na's
## median 0.0000 0.0000   0    0   0   0   0 0.000 0.000 401    0
## mean   0.0047 0.0354   0    0   0   0   0 0.037 0.296 401    0
## 2.5%   0.0000 0.0000   0    0   0   0   0 0.000 0.000 401    0
## 97.5%  0.0299 0.1703   0    0   0   0   0 0.998 1.000 401    0
## 
## logyearrisk :
##        mean  sd  Min 2.5%  25%  50%  75%     97.5%       Max nsv Na's
## median -Inf  NA -Inf -Inf -Inf -Inf -Inf      -Inf      -Inf 401    0
## mean   -Inf NaN -Inf -Inf -Inf -Inf -Inf      -Inf      -Inf 401    0
## 2.5%   -Inf  NA -Inf -Inf -Inf -Inf -Inf      -Inf      -Inf 401    0
## 97.5%  -Inf  NA -Inf -Inf -Inf -Inf -Inf -0.000787 -2.65e-06 401    0
# Plot the results.
plot(expo.ev1)

Report the mean and median of the DAILY risk probability means with a 95% confidence interval (CI95).

mean.risk <- sapply(1:ndunc(), function(j) mean(expo.ev1$risk[, j, ]))
mean(mean.risk)
## [1] 0.0001037639
quantile(mean.risk, probs = seq(0, 1, 0.025))[c("2.5%", "50%", "97.5%")]
##         2.5%          50%        97.5% 
## 0.0000000000 0.0000000000 0.0005670194

Report the mean and median of the YEARLY risk probability means with a 95% confidence interval (CI95).

mean.yearrisk <- sapply(1:ndunc(), function(j) mean(expo.ev1$yearrisk[, j, ]))
mean(mean.yearrisk)
## [1] 0.004701743
quantile(mean.yearrisk, probs = seq(0, 1, 0.025))[c("2.5%", "50%", "97.5%")]
##       2.5%        50%      97.5% 
## 0.00000000 0.00000000 0.02988183

Plot the empirical cumulative distribution function (ecdf) of the exposure model (expo.mc1) estimates.

# Generate an "ecdf" plot. This actually calls plot.mcnode().
plot(expo.ev1$expo.mc1)

Plot the empirical cumulative distribution function (ecdf) of the risk model (risk) estimates.

# Generate an "ecdf" plot. This actually calls plot.mcnode().
plot(expo.ev1$risk)

So if we did this right, could the plot above can be correctly labeled as follows?

# Generate an "ecdf" plot. This actually calls plot.mcnode().
plot(expo.ev1$risk, main = "", ylab = "Proportion of Estimates", xlab = "Daily Probability of Infection")
title("Daily Infec. Risk with Giardia by RCRW
in the USVI", cex.main = 1, font.main= 1, line = 7)

# Generate an "ecdf" plot. This actually calls plot.mcnode().
plot(expo.ev1$yearrisk, main = "", ylab = "Proportion of Estimates", xlab = "Yearly Probability of Infection")
title("Yearly Infec. Risk with Giardia by RCRW
in the USVI", cex.main = 1, font.main= 1, line = 7)

Manually remake the graph, this time using yearrisk, which should just be another mcnode object with all values between 0 and 1:

The first one plots yearrisk without a log x scale:

library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:data.table':
## 
##     melt
library(ggplot2)
Risk.mat.1 <- expo.ev1$yearrisk[,,]
probs1 <- c(0.025, 0.25, 0.50, 0.75, 0.975)
quant1 <- as.data.frame(t(apply(Risk.mat.1, 1, quantile, probs = probs1)))
ecdfs1 <- sapply(names(quant1), function(q) ecdf(quant1[[q]]))
plot(ecdfs1[['50%']], main = '')
grays <- c('gray75', 'gray35', 'black', 'gray35', 'gray75')
lines <- mapply(function(e, g) lines(e, col = g), ecdfs1, grays)

Plotting yearrisk ECDF plot with ggplot2:

quant.melt1 <- suppressMessages(melt(quant1))
names(quant.melt1) <- c('q', 'x')
grays <- c('gray75', 'gray35', 'black', 'gray35', 'gray75')
ggplot(quant.melt1, aes(x = x)) + theme_bw() + theme(legend.position = 'none') + geom_hline(yintercept = c(0, 1), linetype = "dashed", color = 'gray') + stat_ecdf(aes(group = q, color = q)) + xlab('x') + ylab('Fn(x)') + scale_colour_manual(values = grays)

Plot Version 2 with ggplot2, and a log-scale x axis:

quant.melt1 <- suppressMessages(melt(quant1))
names(quant.melt1) <- c('q', 'x')
grays <- c('gray75', 'gray35', 'black', 'gray35', 'gray75')
ggplot(quant.melt1, aes(x = x)) + theme_bw() + theme(legend.position = 'none') + geom_hline(yintercept = c(0, 1), linetype = "dashed", color = 'gray') + stat_ecdf(aes(group = q, color = q)) + xlab('x') + ylab('Fn(x)') + scale_colour_manual(values = grays) + scale_x_log10()
## Warning: Removed 1997 rows containing non-finite values (stat_ecdf).