library(tidyverse)
library(rstan)
library(bayesplot)
library(corrplot)
library(sensitivity)
library(pksensi)
theme_set(theme_light())
Prepare model (perc.model.R) and input (perc_mcmc.in.R) files. This input file contains MCMC simulation descriptions based on Monster et al. (1989) paper. The study measured the TCE in blood (C_ven) and exhaled air (C_exh_ug) for a group of 6 subjects exposed to two different concentrations (72 and 144 ppm) of TCE in air.
Modeling
model <- "perc.model.R"
input <- "perc_mcmc.in.R"
set.seed(1234)
system.time(out_1 <- mcsim(model, input, dir = "modeling/perc"))
## * Create 'MCMC.check.out' from the last iteration.
## user system elapsed
## 41.047 0.109 41.158
head(out_1)
## iter sc_Vmax.1. Km.1. LnPrior LnData LnPosterior
## 1 0 0.299836 2.34383 -3.747032 -34446.920 -34450.660
## 2 1 0.299836 168.11200 -8.251050 -1781.688 -1789.939
## 3 2 0.295656 187.34100 -8.390727 -1655.223 -1663.613
## 4 3 0.295656 231.52900 -8.707851 -1526.996 -1535.704
## 5 4 0.295656 231.52900 -8.707851 -1526.996 -1535.704
## 6 5 0.295656 231.52900 -8.707851 -1526.996 -1535.704
Diagnosis
str <- ceiling(nrow(out_1)/2) + 1
end <- nrow(out_1)
j <- c(str:end) # discard burn-in
plot(out_1$iter[j], out_1$sc_Vmax.1.[j], type = "l")
plot(out_1$iter[j], out_1$Km.1.[j], type = "l")
out_1$sc_Vmax.1.[j] %>% density %>% plot()
out_1$Km.1.[j] %>% density() %>% plot()
par(mfrow = c(1,2), mar = c(4,4,3,1))
acf(out_1$sc_Vmax.1.[j])
acf(out_1$Km.1.[j])
cor <- cor(out_1$sc_Vmax.1.[j], out_1$Km.1.[j])
plot(out_1$sc_Vmax.1.[j], out_1$Km.1.[j],
main = paste("r = ", round(cor, digits = 3)))
Checking model fit
chk_1 <- read.delim("MCMC.check.out")
tail(chk_1)
## Level Simulation Output_Var Time Data Prediction
## 175 1_12 12 C_ven 360 1.760 1.7123300
## 176 1_12 12 C_ven 1320 0.245 0.1316280
## 177 1_12 12 C_ven 2760 0.160 0.0902648
## 178 1_12 12 C_ven 4260 0.120 0.0751109
## 179 1_12 12 C_ven 5700 0.098 0.0629977
## 180 1_12 12 C_ven 10020 0.052 0.0371824
For multi level simulation, the “Level” column include “population-experiments”
R_square <- cor(chk_1$Data, chk_1$Prediction)^2
plot(chk_1$Data, chk_1$Prediction, log = "xy",
xlim = c(0.001, 1000), ylim = c(0.001, 1000),
xlab = "Observation", ylab = "Prediction",
main = paste("R2 =", round(R_square, digits = 3)))
abline(0,1)
chk_1 %>% filter(Output_Var == "C_ven") %>%
ggplot(aes(x = Time, y = Data)) +
geom_point() +
geom_line(aes(y = Prediction))+
facet_wrap(~Simulation) +
scale_y_log10() +
labs(x = "Time (min)", y = "Concentration (mg/L)", title = "Concentration in venous blood")
chk_1 %>% filter(Output_Var == "C_exh_ug") %>%
ggplot(aes(x = Time, y = Data)) +
geom_point() +
geom_line(aes(y = Prediction))+
facet_wrap(~Simulation) +
scale_y_log10() +
labs(x = "Time (min)", y = "Concentration (ug/L)", title = "Concentration exhaled air")
Checking convergence
model <- "perc.model.R"
input <- "perc_mcmc.in.R"
set.seed(2345)
out_1_2 <- mcsim(model, input, dir = "modeling/perc")
## * Create 'MCMC.check.out' from the last iteration.
set.seed(3344)
out_1_3 <- mcsim(model, input, dir = "modeling/perc")
## * Create 'MCMC.check.out' from the last iteration.
set.seed(4455)
out_1_4 <- mcsim(model, input, dir = "modeling/perc")
## * Create 'MCMC.check.out' from the last iteration.
x <- mcmc_array(list(out_1, out_1_2, out_1_3, out_1_4))
parms <- c("sc_Vmax.1.", "Km.1.")
mcmc_dens_overlay(x[j,,], pars = parms)
mcmc_trace(x[j,,], pars = parms)
monitor(x, digits = 4)
## Inference for the input samples (4 chains: each with iter=8001; warmup=4000):
##
## mean se_mean sd 2.5% 25% 50%
## iter 6000.0000 364.5813 1155.0253 4100.0000 5000.0000 6000.0000
## sc_Vmax.1. 0.0033 0.0001 0.0005 0.0025 0.0029 0.0032
## Km.1. 1.3517 0.0392 0.2788 0.9142 1.1445 1.3082
## LnPrior 0.8625 0.0249 0.1797 0.5085 0.7403 0.8793
## LnData -1387.5392 0.0690 0.9857 -1390.2210 -1387.9720 -1387.2590
## LnPosterior -1386.6767 0.0781 1.0277 -1389.4249 -1387.1360 -1386.3790
## 75% 97.5% n_eff Rhat
## iter 7000.0000 7900.0000 10 2.1047
## sc_Vmax.1. 0.0036 0.0042 52 1.0704
## Km.1. 1.5252 1.9435 51 1.0711
## LnPrior 0.9945 1.1735 52 1.0711
## LnData -1386.8160 -1386.5010 204 1.0132
## LnPosterior -1385.9170 -1385.6050 173 1.0170
##
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Evaluation of model fit
X <- x[j,,] %>% matrix(nrow = 16000)
write.table(X, file = "setpts.out", row.names = F, sep = "\t")
X_setpts <- mcsim("perc.model.R", "perc_setpts.in.R", dir = "modeling/perc")
## Execute: ./mcsim.perc.model.R.exe modeling/perc/perc_setpts.in.R
vars <- names(X_setpts)
index <- which(vars == "C_exh_ug_1.1" | vars == "C_ven_12.7")
X <- apply(X_setpts[index[1]:index[2]], 2, quantile, c(0.5, 0.025, 0.975)) %>% t()
colnames(X) <- c("median", "LCL", "UCL")
df <- as.data.frame(X)
chk_1$med <- df$median
chk_1$LCL <- df$LCL
chk_1$UCL <- df$UCL
chk_1 %>% filter(Output_Var == "C_ven") %>%
ggplot(aes(x = Time, y = Data)) +
geom_point() +
geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) +
geom_line(aes(y = med)) +
facet_wrap(~Simulation) +
scale_y_log10() +
labs(x = "Time (min)", y = "Concentration (mg/L)", title = "Concentration in venous blood")
Note: The 95% confidence interval is too narrow for visual observing.
Modeling
model <- "perc.model.R"
input <- "perc_mcmc-hier.in.R"
set.seed(1234)
system.time(out_2 <- mcsim(model, input, dir = "modeling/perc"))
## * Create 'MCMC.check.out' from the last iteration.
## user system elapsed
## 46.896 0.190 47.104
head(out_2)
## iter sc_Vmax.1. Km.1. sc_Vmax.1.1. Km.1.1. sc_Vmax.1.2. Km.1.2.
## 1 0 0.0376081 310.371 0.0217434 685.401 0.00766265 1315.590
## 2 1 0.0376081 310.371 0.0297153 160.938 0.00702677 947.047
## 3 2 0.0376081 459.349 0.0288894 160.938 0.00614582 1022.530
## 4 3 0.0376081 265.157 0.0370984 160.938 0.00603613 970.740
## 5 4 0.0376081 265.157 0.0370984 112.163 0.00713647 726.030
## 6 5 0.0376081 265.157 0.0370984 112.163 0.00838208 629.015
## sc_Vmax.1.3. Km.1.3. sc_Vmax.1.4. Km.1.4. sc_Vmax.1.5. Km.1.5.
## 1 0.00351614 462.05500 0.0381820 791.739 0.129087 1054.850
## 2 0.00213336 462.05500 0.0464026 791.739 0.129087 944.273
## 3 0.00213336 511.48500 0.0510909 791.739 0.130996 132.865
## 4 0.00213336 764.86600 0.0510909 791.739 0.131582 132.865
## 5 0.00651876 805.08700 0.0510909 791.739 0.132688 132.865
## 6 0.01519300 2.12759 0.0511913 791.739 0.133172 132.865
## sc_Vmax.1.6. Km.1.6. LnPrior LnData LnPosterior
## 1 0.0321897 270.02 -41.20180 -2249.799 -2291.001
## 2 0.0321897 270.02 -39.81937 -2206.786 -2246.605
## 3 0.0306556 270.02 -38.77012 -2153.866 -2192.636
## 4 0.0241955 270.02 -38.66947 -2146.738 -2185.407
## 5 0.0488277 270.02 -37.59740 -2122.446 -2160.043
## 6 0.0433307 270.02 -40.32183 -1951.073 -1991.395
Diagnosis
str <- ceiling(nrow(out_2)/2) + 1
end <- nrow(out_2)
j <- c(str:end) # discard burn-in
plot(out_2$iter[j], out_2$sc_Vmax.1.[j], type = "l")
plot(out_2$iter[j], out_2$Km.1.[j], type = "l")
out_2$sc_Vmax.1.[j] %>% density %>% plot()
out_2$Km.1.[j] %>% density() %>% plot()
par(mfrow = c(1,2), mar = c(4,4,3,1))
acf(out_2$sc_Vmax.1.[j])
acf(out_2$Km.1.[j])
cor_2 <- cor(out_2$Km.1.[j], out_2$sc_Vmax.1.[j])
plot(out_2$Km.1.[j], out_2$sc_Vmax.1.[j],
xlab = "Km", ylab = "Vmax",
main = paste("r =", round(cor_2, digits = 3)))
Evaluation of model fit
chk_2 <- read.delim("MCMC.check.out")
Compare the different between single and multi level
For multi level simulation, the “Level” column include “population-individuals-experiments”
tail(chk_1)
## Level Simulation Output_Var Time Data Prediction med
## 175 1_12 12 C_ven 360 1.760 1.7123300 1.71130000
## 176 1_12 12 C_ven 1320 0.245 0.1316280 0.13142300
## 177 1_12 12 C_ven 2760 0.160 0.0902648 0.09022060
## 178 1_12 12 C_ven 4260 0.120 0.0751109 0.07509865
## 179 1_12 12 C_ven 5700 0.098 0.0629977 0.06300090
## 180 1_12 12 C_ven 10020 0.052 0.0371824 0.03721525
## LCL UCL
## 175 1.69721975 1.72119000
## 176 0.12885090 0.13395900
## 177 0.08899328 0.09143501
## 178 0.07406419 0.07614902
## 179 0.06206130 0.06396182
## 180 0.03642380 0.03805150
tail(chk_2)
## Level Simulation Output_Var Time Data Prediction
## 175 1_6_2 12 C_ven 360 1.760 1.7550700
## 176 1_6_2 12 C_ven 1320 0.245 0.1569630
## 177 1_6_2 12 C_ven 2760 0.160 0.1090500
## 178 1_6_2 12 C_ven 4260 0.120 0.0925396
## 179 1_6_2 12 C_ven 5700 0.098 0.0789962
## 180 1_6_2 12 C_ven 10020 0.052 0.0486749
R_square <- cor(chk_2$Data, chk_2$Prediction)^2
plot(chk_2$Data, chk_2$Prediction, log = "xy",
xlim = c(0.001, 1000), ylim = c(0.001, 1000),
xlab = "Observation", ylab = "Prediction",
main = paste("R2 =", round(R_square, digits = 3)))
abline(0,1)
chk_2 %>%
separate(col = Level, c("Pop", "Subj", "Exp")) %>%
filter(Output_Var == "C_ven") %>%
ggplot(aes(x = Time, y = Data)) +
geom_point() +
geom_line(aes(y = Prediction))+
facet_grid(Exp~Subj) +
scale_y_log10() +
labs(x = "Time (min)", y = "Concentration (mg/L)", title = "Concentration venous blood")
model <- "perc.model.R"
input <- "perc_mcmc-hier.in.R"
set.seed(2345)
out_2_2 <- mcsim(model, input, dir = "modeling/perc")
## * Create 'MCMC.check.out' from the last iteration.
set.seed(3344)
out_2_3 <- mcsim(model, input, dir = "modeling/perc")
## * Create 'MCMC.check.out' from the last iteration.
set.seed(4444)
out_2_4 <- mcsim(model, input, dir = "modeling/perc")
## * Create 'MCMC.check.out' from the last iteration.
x <- mcmc_array(list(out_2, out_2_2, out_2_3, out_2_4))
Diagnosis
parms <- c("sc_Vmax.1.", "Km.1.", "sc_Vmax.1.1.", "Km.1.1.", "sc_Vmax.1.2.",
"Km.1.2.", "sc_Vmax.1.3.", "Km.1.3.", "sc_Vmax.1.4.", "Km.1.4.", "sc_Vmax.1.5.",
"Km.1.5.", "sc_Vmax.1.6.", "Km.1.6.")
mcmc_dens_overlay(x[j,,], pars = parms)
mcmc_trace(x[j,,], pars = parms)
monitor(x, digits = 4)
## Inference for the input samples (4 chains: each with iter=8001; warmup=4000):
##
## mean se_mean sd 2.5% 25%
## iter 6000.0000 364.5813 1155.0253 4100.0000 5000.0000
## sc_Vmax.1. 0.0027 0.0000 0.0013 0.0010 0.0018
## Km.1. 0.5918 0.0152 0.3113 0.1952 0.3739
## sc_Vmax.1.1. 0.0012 0.0000 0.0004 0.0007 0.0010
## Km.1.1. 0.3511 0.0290 0.2453 0.0895 0.1977
## sc_Vmax.1.2. 0.0018 0.0000 0.0003 0.0013 0.0015
## Km.1.2. 0.3806 0.0085 0.1378 0.1813 0.2812
## sc_Vmax.1.3. 0.0022 0.0000 0.0003 0.0018 0.0020
## Km.1.3. 0.2653 0.0048 0.0746 0.1529 0.2119
## sc_Vmax.1.4. 0.0016 0.0000 0.0001 0.0013 0.0015
## Km.1.4. 0.0564 0.0020 0.0188 0.0263 0.0426
## sc_Vmax.1.5. 0.0448 0.0020 0.0189 0.0183 0.0314
## Km.1.5. 54.0461 2.5102 23.5467 21.4925 37.5163
## sc_Vmax.1.6. 0.0004 0.0000 0.0003 0.0003 0.0003
## Km.1.6. 0.2619 0.0616 0.5553 0.0205 0.0773
## LnPrior 11.9482 0.3873 3.9497 3.8243 9.3635
## LnData -1019.9617 0.4094 3.7188 -1028.3816 -1022.0412
## LnPosterior -1008.0135 0.3486 3.9949 -1017.4530 -1010.3030
## 50% 75% 97.5% n_eff Rhat
## iter 6000.0000 7000.0000 7900.0000 10 2.1047
## sc_Vmax.1. 0.0024 0.0033 0.0060 1226 1.0065
## Km.1. 0.5238 0.7324 1.3793 420 1.0031
## sc_Vmax.1.1. 0.0011 0.0014 0.0022 74 1.0560
## Km.1.1. 0.2938 0.4260 0.9978 72 1.0600
## sc_Vmax.1.2. 0.0017 0.0019 0.0025 258 1.0347
## Km.1.2. 0.3581 0.4519 0.7134 266 1.0338
## sc_Vmax.1.3. 0.0022 0.0024 0.0028 241 1.0141
## Km.1.3. 0.2539 0.3066 0.4379 246 1.0147
## sc_Vmax.1.4. 0.0016 0.0017 0.0018 101 1.0353
## Km.1.4. 0.0546 0.0678 0.0981 90 1.0378
## sc_Vmax.1.5. 0.0409 0.0540 0.0938 87 1.0352
## Km.1.5. 49.4873 65.7160 115.7525 88 1.0351
## sc_Vmax.1.6. 0.0004 0.0005 0.0010 83 1.0356
## Km.1.6. 0.1364 0.2552 1.4367 81 1.0403
## LnPrior 12.0756 14.7674 19.2462 104 1.0373
## LnData -1019.6140 -1017.3107 -1013.8393 82 1.0367
## LnPosterior -1007.4740 -1005.0850 -1001.7610 131 1.0216
##
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Evaluation of model fit
X <- x[j,,] %>% matrix(nrow = 16000)
write.table(X, file = "setpts.out", row.names = F, sep = "\t")
X_setpts <- mcsim("perc.model.R", "perc_setpts.in.R", dir = "modeling/perc")
## Execute: ./mcsim.perc.model.R.exe modeling/perc/perc_setpts.in.R
vars <- names(X_setpts)
index <- which(vars == "C_exh_ug_1.1" | vars == "C_ven_12.7")
X <- apply(X_setpts[index[1]:index[2]], 2, quantile, c(0.5, 0.025, 0.975)) %>% t()
colnames(X) <- c("median", "LCL", "UCL")
df <- as.data.frame(X)
chk_2$med <- df$median
chk_2$LCL <- df$LCL
chk_2$UCL <- df$UCL
chk_2 %>% filter(Output_Var == "C_ven") %>%
ggplot(aes(x = Time, y = Data)) +
geom_point() +
geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) +
geom_line(aes(y = med)) +
facet_wrap(~Simulation) +
scale_y_log10() +
labs(x = "Time (min)", y = "Concentration (mg/L)", title = "Concentration in venous blood")
How how many threads your CPU has?
parallel::detectCores()
Parallel computing
Prepare the job file first, the example job file is put into “examples/perc_job.R”.
Set importEnv = T to assign the random seed to the job. The exportEnv is used to assign the name of object to store the output data.
library(rstudioapi)
set.seed(1234)
jobRunScript(workingDir = getwd(), path = "examples/perc_job.R", importEnv = T, exportEnv = "job_1")
set.seed(2345)
jobRunScript(workingDir = getwd(), path = "examples/perc_job.R", importEnv = T, exportEnv = "job_2")
set.seed(3344)
jobRunScript(workingDir = getwd(), path = "examples/perc_job.R", importEnv = T, exportEnv = "job_3")
set.seed(4444)
jobRunScript(workingDir = getwd(), path = "examples/perc_job.R", importEnv = T, exportEnv = "job_4")
x <- mcmc_array(list(job_1$out_2, job_2$out_2, job_3$out_2, job_4$out_2))