For this risk estimate, we will use mcmodel and evalmcmod from the mc2d package.
# Load packages.
library(mc2d)
## Loading required package: mvtnorm
##
## Attaching package: 'mc2d'
## The following objects are masked from 'package:base':
##
## pmax, pmin
library(data.table)
Set the number of simulations for the variability and uncertainty dimensions.
nsv <- 401
nsu <- 108
ndvar(nsv) # Variability
## [1] 401
ndunc(nsu) # Uncertainty
## [1] 108
rdv <- (nsu/3)
cwv <- (nsu/4)
The mc2d functions will set nsv
to ndvar()
and nsu
to ndunc()
by default. Alternatively, we could supply these values to the mc2d model functions when we call them.
Define the variables we will use to set the seed
for random sampling and the number of digits
for print()
statements.
seed <- 1
digits <- 5
Introducing the bootstrapped dose-response data, fread command and sampling from Brian:
alpha <- fread("output_dupont_crypto/alphalist.csv", sep = ',')
n50 <- fread("output_dupont_crypto/N50list.csv", sep = ',')
cdr <- merge(alpha,n50, by = "V1")
scdr <- cdr[sample(nrow(cdr), nsu, replace = TRUE), list(alpha, N50)] #sampling rows in the dose-response bootstrapped table equal to the number of simulations in the uncertainty dimension, aka NSU
Leaving that ^ for convenience, but since what we really want is a k-value list made of samples from the three different dose-response estimates (Dupont, Tamu, and UCP):
k_dup <- fread("output_dupont_crypto/klist_dupont.csv", sep = ',')
gdrkD <- k_dup[sample(nrow(k_dup), rdv, replace = TRUE), list(k)] #sampling rows in the dose-response bootstrapped table equal to 1/3 the number of simulations in the uncertainty dimension, aka NSU
k_tamu <- fread("output_tamu_crypto/klist_tamu.csv", sep = ',')
gdrkT <- k_tamu[sample(nrow(k_tamu), rdv, replace = TRUE), list(k)]
k_ucp <- fread("output_UCP_crypto/klist_ucp.csv", sep = ',')
gdrkU <- k_ucp[sample(nrow(k_ucp), rdv, replace = TRUE), list(k)]
kval <- unlist(c(gdrkD,gdrkT,gdrkU))
Sampling from the Cryptosporidium custom distribution estimates:
cw.vlg <- fread("~/u_drive/envh543/cwvl_estimates/crypto_est.csv", sep = ',')
cw.vl.samp <- sample(cw.vlg[,c(V1)], nsu, replace = TRUE) #sampling rows in the dose-response bootstrapped table equal to the number of simulations in the uncertainty dimension, aka NSU
Within the mcmodel function, use mcstoc to define “mc nodes” for each component of the model.
For each stochastic variable, use the mcstoc()
function to create a “mcnode”, supply a probability function, the node type as “V” for variability or “U” for uncertainty, and any additional parameters to be passed to the probability function.
For deterministic variables, create nodes with the mcdata()
function using the “0” type.
We will model the deterministic factors as uniform probablity distributions.
The last statement makes an “mc” object using the mc function.
# Define an exposure model for evaluation by evalmcmod().
expo.mod1 <- mcmodel({
# Values from Example 6.18 from Quantitative Microbial Risk Assessment,
# 2nd Edition by Charles N. Haas, Joan B. Rose, and Charles P. Gerba.
# (Wiley, 2014), pp. 215-216. Other fictitious values are noted below.
# Cistern water viral loading (viruses/L):
cw.vl <- mcdata(cw.vl.samp, type = "U")
# Water consumption (L/day):
dw.ing <- mcdata(1.466, type = "0")
# Hosing per event ingestion rate (L/event): worst case estimate = 10mL or .01L per day
hose.perevent <- mcdata(.01, type = "0")
# Hosing frequency (hosing/day), fictitious discrete distribution that is NOT IN USE and is only still around as an example of the discrete empirical distribution syntax:
hosing.frequency <- mcstoc(rempiricalD, type = "V", seed = seed,
values = c(0, 0.5, 1),
prob = c(0.1, 0.7, 0.2))
# Showering per event ingestion rate (L/event): worst case estimate = 10mL or .01L per day
shower.perevent <- mcdata(.01, type = "0")
# Estimate the exposure based ONLY on drinking water ingestion using the 0, V and U nodes to create a VU node.
exporaw.mc1 <- (cw.vl * (dw.ing + hose.perevent + shower.perevent))
#Reduction by 2ppm free chlorine:
exporaw.2ppm_free_chl <- (exporaw.mc1 * .4904)
#Reduction by 15ppm free chlorine:
exporaw.15ppm_free_chl <- (exporaw.mc1 * .47)
#Reduction by UV filtration:
exporaw.uv <- (exporaw.mc1 * .0001)
#Reduction by UV filtration and 2ppmchlorine:
exporaw.uv_and_2ppm_chl <- (exporaw.mc1 * .0001 * .4904)
#Reduction by porous ceramic filtration - this is based on the "range" of values from 66.1% reduction to 99.6% reduction, but can be changed to just 99% reduction as well (that's the next version, see below):
pfc_reduc <- mcstoc(rempiricalD, type = "V", seed = seed, values = c(0.004, 0.135, 0.266, 0.39), prob = c(0.25, 0.25, 0.25, 0.25))
exporaw.porous_ceram <- (exporaw.mc1 * pfc_reduc)
#Reduction by porous ceramic filtration, assuming 99% reduction:
exporaw.por_ceram_singval <- (exporaw.mc1 * .01)
#Reduction by UV filtration, ceramic filtration (single value), and 2ppmchlorine:
exporaw.uv_2ppm_chl_cerfilt <- (exporaw.mc1 * .0001 * .4904 * .01)
#Reduction by ceramic filter (single value) and 2ppmchlorine:
exporaw.2ppm_chl_cerfilt <- (exporaw.mc1 * .4904 * .01)
#Final exposure - just make this whichever one you want to use at the moment:
expo.mc1 <- exporaw.2ppm_chl_cerfilt
#Estimate the risk using a dose-response model pulling from a bootstrapped model of the dose-response
dose <- mcstoc(rpois, type = "VU", lambda = expo.mc1)
# Create mcnodes from previously sampled dose-response bootstrap estimates. - Brian
a <- mcdata(scdr$alpha, type = "U")
n50 <- mcdata(scdr$N50, type = "U")
#this equation is my version of what came off QMRA Wiki <http://qmrawiki.canr.msu.edu/index.php?title=Dose_response_assessment> and someone should check it. But it's beta-poisson, and just left here for convenience.
riskbp <- 1 - (1 + dose*((2^(1/a)-1)/n50))^(-a)
#Exponential version of the equation (also from QMRA Wiki and please lmk if it looks wrong):
k <- mcdata(kval, type = "U")
risk <- (1 - exp(-dose*k))
#yearlyrisk
yearrisk <- 1- ((1 - risk)^365)
# Build a mc model from all of the mcnode objects.
mc(cw.vl, dw.ing, hose.perevent, shower.perevent, exporaw.2ppm_free_chl, exporaw.15ppm_free_chl, exporaw.uv, exporaw.uv_and_2ppm_chl, exporaw.uv_2ppm_chl_cerfilt, exporaw.2ppm_chl_cerfilt, exporaw.porous_ceram, pfc_reduc, a, n50, expo.mc1, riskbp, risk, yearrisk)
})
Evaluate the model with 5000 iterations in the variability dimension and 250 iterations in the uncertainty dimension, as set previously.
expo.ev1 <- evalmcmod(expo.mod1, seed = seed)
print(expo.ev1, digits = digits)
## node mode nsv nsu nva variate min
## 1 cw.vl numeric 1 108 1 1 0.00000
## 2 dw.ing numeric 1 1 1 1 1.46600
## 3 hose.perevent numeric 1 1 1 1 0.01000
## 4 shower.perevent numeric 1 1 1 1 0.01000
## 5 exporaw.2ppm_free_chl numeric 1 108 1 1 0.00000
## 6 exporaw.15ppm_free_chl numeric 1 108 1 1 0.00000
## 7 exporaw.uv numeric 1 108 1 1 0.00000
## 8 exporaw.uv_and_2ppm_chl numeric 1 108 1 1 0.00000
## 9 exporaw.uv_2ppm_chl_cerfilt numeric 1 108 1 1 0.00000
## 10 exporaw.2ppm_chl_cerfilt numeric 1 108 1 1 0.00000
## 11 exporaw.porous_ceram numeric 401 108 1 1 0.00000
## 12 pfc_reduc numeric 401 1 1 1 0.00400
## 13 a numeric 1 108 1 1 0.29676
## 14 n50 numeric 1 108 1 1 17.81775
## 15 expo.mc1 numeric 1 108 1 1 0.00000
## 16 riskbp numeric 401 108 1 1 0.00000
## 17 risk numeric 401 108 1 1 0.00000
## 18 yearrisk numeric 401 108 1 1 0.00000
## mean median max Nas type outm
## 1 5.0485e+00 0.000 7.0290e+01 0 U each
## 2 1.4660e+00 1.466 1.4660e+00 0 0 each
## 3 1.0000e-02 0.010 1.0000e-02 0 0 each
## 4 1.0000e-02 0.010 1.0000e-02 0 0 each
## 5 3.6790e+00 0.000 5.1223e+01 0 U each
## 6 3.5260e+00 0.000 4.9092e+01 0 U each
## 7 7.5021e-04 0.000 1.0445e-02 0 U each
## 8 3.6790e-04 0.000 5.1223e-03 0 U each
## 9 3.6790e-06 0.000 5.1223e-05 0 U each
## 10 3.6790e-02 0.000 5.1223e-01 0 U each
## 11 1.5011e+00 0.000 4.0736e+01 0 VU each
## 12 2.0009e-01 0.266 3.9000e-01 0 V each
## 13 4.3555e+03 198.998 2.7950e+04 0 U each
## 14 1.4539e+02 136.972 3.2547e+02 0 U each
## 15 3.6790e-02 0.000 5.1223e-01 0 U each
## 16 3.0633e-04 0.000 5.3532e-02 0 VU each
## 17 1.7487e-04 0.000 2.8436e-02 0 VU each
## 18 2.5572e-02 0.000 9.9997e-01 0 VU each
Print a summary and a plot of the evaluation results (expo.ev1
).
# Summarize the results.
summary(expo.ev1)
## cw.vl :
## NoVar
## median 0.00
## mean 5.05
## 2.5% 0.00
## 97.5% 70.29
##
## dw.ing :
## NoUnc
## NoVar 1.47
##
## hose.perevent :
## NoUnc
## NoVar 0.01
##
## shower.perevent :
## NoUnc
## NoVar 0.01
##
## exporaw.2ppm_free_chl :
## NoVar
## median 0.00
## mean 3.68
## 2.5% 0.00
## 97.5% 51.22
##
## exporaw.15ppm_free_chl :
## NoVar
## median 0.00
## mean 3.53
## 2.5% 0.00
## 97.5% 49.09
##
## exporaw.uv :
## NoVar
## median 0.00000
## mean 0.00075
## 2.5% 0.00000
## 97.5% 0.01045
##
## exporaw.uv_and_2ppm_chl :
## NoVar
## median 0.000000
## mean 0.000368
## 2.5% 0.000000
## 97.5% 0.005122
##
## exporaw.uv_2ppm_chl_cerfilt :
## NoVar
## median 0.00e+00
## mean 3.68e-06
## 2.5% 0.00e+00
## 97.5% 5.12e-05
##
## exporaw.2ppm_chl_cerfilt :
## NoVar
## median 0.0000
## mean 0.0368
## 2.5% 0.0000
## 97.5% 0.5122
##
## exporaw.porous_ceram :
## mean sd Min 2.5% 25% 50% 75% 97.5% Max nsv Na's
## median 0.0 0.00 0.000 0.000 0.00 0.0 0.0 0.00 0.00 401 0
## mean 1.5 1.05 0.030 0.030 1.01 2.0 2.0 2.93 2.93 401 0
## 2.5% 0.0 0.00 0.000 0.000 0.00 0.0 0.0 0.00 0.00 401 0
## 97.5% 20.9 14.58 0.418 0.418 14.10 27.8 27.8 40.74 40.74 401 0
##
## pfc_reduc :
## mean sd Min 2.5% 25% 50% 75% 97.5% Max nsv Na's
## NoUnc 0.2 0.14 0.004 0.004 0.135 0.266 0.266 0.39 0.39 401 0
##
## a :
## NoVar
## median 1.99e+02
## mean 4.36e+03
## 2.5% 3.83e-01
## 97.5% 2.40e+04
##
## n50 :
## NoVar
## median 137.0
## mean 145.4
## 2.5% 39.9
## 97.5% 302.7
##
## expo.mc1 :
## NoVar
## median 0.0000
## mean 0.0368
## 2.5% 0.0000
## 97.5% 0.5122
##
## riskbp :
## mean sd Min 2.5% 25% 50% 75% 97.5% Max nsv Na's
## median 0.000000 0.000000 0 0 0 0 0.000000 0.0000 0.00000 401 0
## mean 0.000306 0.000659 0 0 0 0 0.000481 0.0019 0.00412 401 0
## 2.5% 0.000000 0.000000 0 0 0 0 0.000000 0.0000 0.00000 401 0
## 97.5% 0.003792 0.005209 0 0 0 0 0.007218 0.0170 0.03108 401 0
##
## risk :
## mean sd Min 2.5% 25% 50% 75% 97.5% Max nsv
## median 0.000000 0.000000 0 0 0 0 0.000000 0.00000 0.00000 401
## mean 0.000175 0.000397 0 0 0 0 0.000263 0.00129 0.00254 401
## 2.5% 0.000000 0.000000 0 0 0 0 0.000000 0.00000 0.00000 401
## 97.5% 0.002432 0.003370 0 0 0 0 0.004564 0.01073 0.01966 401
## Na's
## median 0
## mean 0
## 2.5% 0
## 97.5% 0
##
## yearrisk :
## mean sd Min 2.5% 25% 50% 75% 97.5% Max nsv Na's
## median 0.0000 0.000 0 0 0 0 0.0000 0.000 0.000 401 0
## mean 0.0256 0.058 0 0 0 0 0.0448 0.172 0.277 401 0
## 2.5% 0.0000 0.000 0 0 0 0 0.0000 0.000 0.000 401 0
## 97.5% 0.3474 0.420 0 0 0 0 0.8115 0.976 0.999 401 0
# Plot the results.
plot(expo.ev1)
Report the mean and median of the exposure means with a 95% confidence interval (CI95).
mean.expo <- sapply(1:ndunc(), function(j) mean(expo.ev1$expo.mc1[, j, ]))
mean(mean.expo)
## [1] 0.03679029
quantile(mean.expo, probs = seq(0, 1, 0.025))[c("2.5%", "50%", "97.5%")]
## 2.5% 50% 97.5%
## 0.0000000 0.0000000 0.5122274
Report the mean and median of the DAILY risk probability means with a 95% confidence interval (CI95).
mean.risk <- sapply(1:ndunc(), function(j) mean(expo.ev1$risk[, j, ]))
mean(mean.risk)
## [1] 0.0001748697
quantile(mean.risk, probs = seq(0, 1, 0.025))[c("2.5%", "50%", "97.5%")]
## 2.5% 50% 97.5%
## 0.000000000 0.000000000 0.002432356
Report the mean and median of the YEARLY risk probability means with a 95% confidence interval (CI95).
mean.yearrisk <- sapply(1:ndunc(), function(j) mean(expo.ev1$yearrisk[, j, ]))
mean(mean.yearrisk)
## [1] 0.02557158
quantile(mean.yearrisk, probs = seq(0, 1, 0.025))[c("2.5%", "50%", "97.5%")]
## 2.5% 50% 97.5%
## 0.000000 0.000000 0.347416
Plot the empirical cumulative distribution function (ecdf) of the exposure model (expo.mc1
) estimates.
# Generate an "ecdf" plot. This actually calls plot.mcnode().
plot(expo.ev1$expo.mc1)
Plot the empirical cumulative distribution function (ecdf) of the risk model (risk
) estimates.
# Generate an "ecdf" plot. This actually calls plot.mcnode().
plot(expo.ev1$risk)
So if we did this right, could the plot above can be correctly labeled as follows?
# Generate an "ecdf" plot. This actually calls plot.mcnode().
plot(expo.ev1$risk, main = "", ylab = "Proportion of Estimates", xlab = "Daily Probability of Infection")
title("Daily Infec. Risk with Cryptosporidium by RCRW
in the USVI", cex.main = 1, font.main= 1, line = 7)
# Generate an "ecdf" plot. This actually calls plot.mcnode().
plot(expo.ev1$yearrisk, main = "", ylab = "Proportion of Estimates", xlab = "Yearly Probability of Infection")
title("Yearly Infec. Risk with Cryptosporidium by RCRW
in the USVI", cex.main = 1, font.main= 1, line = 7)
Elena/Mary - this stuff is just me trying to figure out what’s going wrong with with the logarithmic graphmaking.
Manually remake the graph, this time using yearrisk, which should just be another mcnode object with all values between 0 and 1:
The first one plots yearrisk without a log x scale:
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:data.table':
##
## melt
library(ggplot2)
Risk.mat.1 <- expo.ev1$yearrisk[,,]
probs1 <- c(0.025, 0.25, 0.50, 0.75, 0.975)
quant1 <- as.data.frame(t(apply(Risk.mat.1, 1, quantile, probs = probs1)))
ecdfs1 <- sapply(names(quant1), function(q) ecdf(quant1[[q]]))
plot(ecdfs1[['50%']], main = '')
grays <- c('gray75', 'gray35', 'black', 'gray35', 'gray75')
lines <- mapply(function(e, g) lines(e, col = g), ecdfs1, grays)
Plotting yearrisk ECDF plot with ggplot2:
quant.melt1 <- suppressMessages(melt(quant1))
names(quant.melt1) <- c('q', 'x')
grays <- c('gray75', 'gray35', 'black', 'gray35', 'gray75')
ggplot(quant.melt1, aes(x = x)) + theme_bw() + theme(legend.position = 'none') + geom_hline(yintercept = c(0, 1), linetype = "dashed", color = 'gray') + stat_ecdf(aes(group = q, color = q)) + xlab('x') + ylab('Fn(x)') + scale_colour_manual(values = grays)
Plot Version 2 with ggplot2, and a log-scale x axis:
quant.melt1 <- suppressMessages(melt(quant1))
names(quant.melt1) <- c('q', 'x')
grays <- c('gray75', 'gray35', 'black', 'gray35', 'gray75')
ggplot(quant.melt1, aes(x = x)) + theme_bw() + theme(legend.position = 'none') + geom_hline(yintercept = c(0, 1), linetype = "dashed", color = 'gray') + stat_ecdf(aes(group = q, color = q)) + xlab('x') + ylab('Fn(x)') + scale_colour_manual(values = grays) + scale_x_log10()
## Warning: Removed 1730 rows containing non-finite values (stat_ecdf).