library(tidyverse)
library(rstan)
library(bayesplot)
library(corrplot)
library(sensitivity)
library(pksensi)
theme_set(theme_light())

3 Tetrachloroethylene PBPK Model

Prepare model (perc.model.R) and input (perc_mcmc.in.R) files. This input file contains MCMC simulation descriptions based on Monster et al. (1989) paper. The study measured the TCE in blood (C_ven) and exhaled air (C_exh_ug) for a group of 6 subjects exposed to two different concentrations (72 and 144 ppm) of TCE in air.

3.1 Single level modeling

3.1.1 Single chain testing

Modeling

model <- "perc.model.R"
input <- "perc_mcmc.in.R"   
set.seed(1234)
system.time(out_1 <- mcsim(model, input, dir = "modeling/perc"))
## * Create 'MCMC.check.out' from the last iteration.
##    user  system elapsed 
##  41.047   0.109  41.158
head(out_1)
##   iter sc_Vmax.1.     Km.1.   LnPrior     LnData LnPosterior
## 1    0   0.299836   2.34383 -3.747032 -34446.920  -34450.660
## 2    1   0.299836 168.11200 -8.251050  -1781.688   -1789.939
## 3    2   0.295656 187.34100 -8.390727  -1655.223   -1663.613
## 4    3   0.295656 231.52900 -8.707851  -1526.996   -1535.704
## 5    4   0.295656 231.52900 -8.707851  -1526.996   -1535.704
## 6    5   0.295656 231.52900 -8.707851  -1526.996   -1535.704

Diagnosis

str <- ceiling(nrow(out_1)/2) + 1
end <- nrow(out_1)
j <- c(str:end) # discard burn-in
plot(out_1$iter[j], out_1$sc_Vmax.1.[j], type = "l")

plot(out_1$iter[j], out_1$Km.1.[j], type = "l")

out_1$sc_Vmax.1.[j] %>% density %>% plot()

out_1$Km.1.[j] %>% density() %>% plot()

par(mfrow = c(1,2), mar = c(4,4,3,1))
acf(out_1$sc_Vmax.1.[j])
acf(out_1$Km.1.[j])

cor <- cor(out_1$sc_Vmax.1.[j], out_1$Km.1.[j])
plot(out_1$sc_Vmax.1.[j], out_1$Km.1.[j],
     main = paste("r = ", round(cor, digits = 3)))

Checking model fit

  1. Global evaluation
chk_1 <- read.delim("MCMC.check.out")
tail(chk_1)
##     Level Simulation Output_Var  Time  Data Prediction
## 175  1_12         12      C_ven   360 1.760  1.7123300
## 176  1_12         12      C_ven  1320 0.245  0.1316280
## 177  1_12         12      C_ven  2760 0.160  0.0902648
## 178  1_12         12      C_ven  4260 0.120  0.0751109
## 179  1_12         12      C_ven  5700 0.098  0.0629977
## 180  1_12         12      C_ven 10020 0.052  0.0371824

For multi level simulation, the “Level” column include “population-experiments”

R_square <- cor(chk_1$Data, chk_1$Prediction)^2
plot(chk_1$Data, chk_1$Prediction, log = "xy",
     xlim = c(0.001, 1000), ylim = c(0.001, 1000),
     xlab = "Observation", ylab = "Prediction",
     main = paste("R2 =", round(R_square, digits = 3)))
abline(0,1)

  1. Individual evaluation
  1. Concentration in venous blood
chk_1 %>% filter(Output_Var == "C_ven") %>%
ggplot(aes(x = Time, y = Data)) +
  geom_point() + 
  geom_line(aes(y = Prediction))+
  facet_wrap(~Simulation) +
  scale_y_log10() + 
  labs(x = "Time (min)", y = "Concentration (mg/L)", title = "Concentration in venous blood")

  1. Concentration in exhaled air
chk_1 %>% filter(Output_Var == "C_exh_ug") %>%
ggplot(aes(x = Time, y = Data)) +
  geom_point() + 
  geom_line(aes(y = Prediction))+
  facet_wrap(~Simulation) +
  scale_y_log10() + 
  labs(x = "Time (min)", y = "Concentration (ug/L)", title = "Concentration exhaled air")

3.1.2 Checking convergence and model fit with multi chains

Checking convergence

model <- "perc.model.R"
input <- "perc_mcmc.in.R"   
set.seed(2345)
out_1_2 <- mcsim(model, input, dir = "modeling/perc")
## * Create 'MCMC.check.out' from the last iteration.
set.seed(3344)
out_1_3 <- mcsim(model, input, dir = "modeling/perc")
## * Create 'MCMC.check.out' from the last iteration.
set.seed(4455)
out_1_4 <- mcsim(model, input, dir = "modeling/perc")
## * Create 'MCMC.check.out' from the last iteration.
x <- mcmc_array(list(out_1, out_1_2, out_1_3, out_1_4))
parms <- c("sc_Vmax.1.", "Km.1.")
mcmc_dens_overlay(x[j,,], pars = parms)

mcmc_trace(x[j,,], pars = parms)

monitor(x, digits = 4)
## Inference for the input samples (4 chains: each with iter=8001; warmup=4000):
## 
##                   mean  se_mean        sd       2.5%        25%        50%
## iter         6000.0000 364.5813 1155.0253  4100.0000  5000.0000  6000.0000
## sc_Vmax.1.      0.0033   0.0001    0.0005     0.0025     0.0029     0.0032
## Km.1.           1.3517   0.0392    0.2788     0.9142     1.1445     1.3082
## LnPrior         0.8625   0.0249    0.1797     0.5085     0.7403     0.8793
## LnData      -1387.5392   0.0690    0.9857 -1390.2210 -1387.9720 -1387.2590
## LnPosterior -1386.6767   0.0781    1.0277 -1389.4249 -1387.1360 -1386.3790
##                    75%      97.5% n_eff   Rhat
## iter         7000.0000  7900.0000    10 2.1047
## sc_Vmax.1.      0.0036     0.0042    52 1.0704
## Km.1.           1.5252     1.9435    51 1.0711
## LnPrior         0.9945     1.1735    52 1.0711
## LnData      -1386.8160 -1386.5010   204 1.0132
## LnPosterior -1385.9170 -1385.6050   173 1.0170
## 
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Evaluation of model fit

  1. Setpoints analysis
X <- x[j,,] %>% matrix(nrow = 16000) 
write.table(X, file = "setpts.out", row.names = F, sep = "\t")
X_setpts <- mcsim("perc.model.R", "perc_setpts.in.R", dir = "modeling/perc")
## Execute: ./mcsim.perc.model.R.exe modeling/perc/perc_setpts.in.R
  1. Tidy data with median and 95% confidence interval
vars <- names(X_setpts)
index <- which(vars == "C_exh_ug_1.1" | vars == "C_ven_12.7")
X <- apply(X_setpts[index[1]:index[2]], 2, quantile,  c(0.5, 0.025, 0.975)) %>% t()
colnames(X) <- c("median", "LCL", "UCL")
df <- as.data.frame(X)
chk_1$med <- df$median
chk_1$LCL <- df$LCL
chk_1$UCL <- df$UCL
  1. Plot result
chk_1 %>% filter(Output_Var == "C_ven") %>%
ggplot(aes(x = Time, y = Data)) +
  geom_point() + 
  geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) +
  geom_line(aes(y = med)) +
  facet_wrap(~Simulation) +
  scale_y_log10() + 
  labs(x = "Time (min)", y = "Concentration (mg/L)", title = "Concentration in venous blood")

Note: The 95% confidence interval is too narrow for visual observing.

3.2 Hierarchical modeling

3.2.1 Single chain testing

Modeling

model <- "perc.model.R"
input <- "perc_mcmc-hier.in.R"   
set.seed(1234)
system.time(out_2 <- mcsim(model, input, dir = "modeling/perc"))
## * Create 'MCMC.check.out' from the last iteration.
##    user  system elapsed 
##  46.896   0.190  47.104
head(out_2)
##   iter sc_Vmax.1.   Km.1. sc_Vmax.1.1. Km.1.1. sc_Vmax.1.2.  Km.1.2.
## 1    0  0.0376081 310.371    0.0217434 685.401   0.00766265 1315.590
## 2    1  0.0376081 310.371    0.0297153 160.938   0.00702677  947.047
## 3    2  0.0376081 459.349    0.0288894 160.938   0.00614582 1022.530
## 4    3  0.0376081 265.157    0.0370984 160.938   0.00603613  970.740
## 5    4  0.0376081 265.157    0.0370984 112.163   0.00713647  726.030
## 6    5  0.0376081 265.157    0.0370984 112.163   0.00838208  629.015
##   sc_Vmax.1.3.   Km.1.3. sc_Vmax.1.4. Km.1.4. sc_Vmax.1.5.  Km.1.5.
## 1   0.00351614 462.05500    0.0381820 791.739     0.129087 1054.850
## 2   0.00213336 462.05500    0.0464026 791.739     0.129087  944.273
## 3   0.00213336 511.48500    0.0510909 791.739     0.130996  132.865
## 4   0.00213336 764.86600    0.0510909 791.739     0.131582  132.865
## 5   0.00651876 805.08700    0.0510909 791.739     0.132688  132.865
## 6   0.01519300   2.12759    0.0511913 791.739     0.133172  132.865
##   sc_Vmax.1.6. Km.1.6.   LnPrior    LnData LnPosterior
## 1    0.0321897  270.02 -41.20180 -2249.799   -2291.001
## 2    0.0321897  270.02 -39.81937 -2206.786   -2246.605
## 3    0.0306556  270.02 -38.77012 -2153.866   -2192.636
## 4    0.0241955  270.02 -38.66947 -2146.738   -2185.407
## 5    0.0488277  270.02 -37.59740 -2122.446   -2160.043
## 6    0.0433307  270.02 -40.32183 -1951.073   -1991.395

Diagnosis

  1. Trace plot
str <- ceiling(nrow(out_2)/2) + 1
end <- nrow(out_2)
j <- c(str:end) # discard burn-in
plot(out_2$iter[j], out_2$sc_Vmax.1.[j], type = "l")

plot(out_2$iter[j], out_2$Km.1.[j], type = "l")

  1. Kernel plot
out_2$sc_Vmax.1.[j] %>% density %>% plot()

out_2$Km.1.[j] %>% density() %>% plot()

  1. Auto-correlation plot
par(mfrow = c(1,2), mar = c(4,4,3,1))
acf(out_2$sc_Vmax.1.[j])
acf(out_2$Km.1.[j])

  1. Pair plot
cor_2 <- cor(out_2$Km.1.[j], out_2$sc_Vmax.1.[j])
plot(out_2$Km.1.[j], out_2$sc_Vmax.1.[j], 
     xlab = "Km", ylab = "Vmax",
     main = paste("r =", round(cor_2, digits = 3)))

Evaluation of model fit

chk_2 <- read.delim("MCMC.check.out")

Compare the different between single and multi level

For multi level simulation, the “Level” column include “population-individuals-experiments”

tail(chk_1)
##     Level Simulation Output_Var  Time  Data Prediction        med
## 175  1_12         12      C_ven   360 1.760  1.7123300 1.71130000
## 176  1_12         12      C_ven  1320 0.245  0.1316280 0.13142300
## 177  1_12         12      C_ven  2760 0.160  0.0902648 0.09022060
## 178  1_12         12      C_ven  4260 0.120  0.0751109 0.07509865
## 179  1_12         12      C_ven  5700 0.098  0.0629977 0.06300090
## 180  1_12         12      C_ven 10020 0.052  0.0371824 0.03721525
##            LCL        UCL
## 175 1.69721975 1.72119000
## 176 0.12885090 0.13395900
## 177 0.08899328 0.09143501
## 178 0.07406419 0.07614902
## 179 0.06206130 0.06396182
## 180 0.03642380 0.03805150
tail(chk_2)
##     Level Simulation Output_Var  Time  Data Prediction
## 175 1_6_2         12      C_ven   360 1.760  1.7550700
## 176 1_6_2         12      C_ven  1320 0.245  0.1569630
## 177 1_6_2         12      C_ven  2760 0.160  0.1090500
## 178 1_6_2         12      C_ven  4260 0.120  0.0925396
## 179 1_6_2         12      C_ven  5700 0.098  0.0789962
## 180 1_6_2         12      C_ven 10020 0.052  0.0486749
  1. Global evaluation
R_square <- cor(chk_2$Data, chk_2$Prediction)^2
plot(chk_2$Data, chk_2$Prediction, log = "xy",
     xlim = c(0.001, 1000), ylim = c(0.001, 1000),
     xlab = "Observation", ylab = "Prediction",
     main = paste("R2 =", round(R_square, digits = 3)))
abline(0,1)

  1. Individual evaluation
chk_2 %>% 
  separate(col = Level, c("Pop", "Subj", "Exp")) %>% 
  filter(Output_Var == "C_ven") %>%
ggplot(aes(x = Time, y = Data)) +
  geom_point() + 
  geom_line(aes(y = Prediction))+
  facet_grid(Exp~Subj) +
  scale_y_log10() + 
  labs(x = "Time (min)", y = "Concentration (mg/L)", title = "Concentration venous blood")

3.2.2 Checking convergence and model fit with multi chains

model <- "perc.model.R"
input <- "perc_mcmc-hier.in.R"
set.seed(2345)
out_2_2 <- mcsim(model, input, dir = "modeling/perc")
## * Create 'MCMC.check.out' from the last iteration.
set.seed(3344)
out_2_3 <- mcsim(model, input, dir = "modeling/perc")
## * Create 'MCMC.check.out' from the last iteration.
set.seed(4444)
out_2_4 <- mcsim(model, input, dir = "modeling/perc")
## * Create 'MCMC.check.out' from the last iteration.
x <- mcmc_array(list(out_2, out_2_2, out_2_3, out_2_4))

Diagnosis

parms <- c("sc_Vmax.1.", "Km.1.", "sc_Vmax.1.1.", "Km.1.1.", "sc_Vmax.1.2.",
           "Km.1.2.", "sc_Vmax.1.3.", "Km.1.3.", "sc_Vmax.1.4.", "Km.1.4.", "sc_Vmax.1.5.",
           "Km.1.5.", "sc_Vmax.1.6.", "Km.1.6.")
mcmc_dens_overlay(x[j,,], pars = parms)
mcmc_trace(x[j,,], pars = parms)
monitor(x, digits = 4)
## Inference for the input samples (4 chains: each with iter=8001; warmup=4000):
## 
##                    mean  se_mean        sd       2.5%        25%
## iter          6000.0000 364.5813 1155.0253  4100.0000  5000.0000
## sc_Vmax.1.       0.0027   0.0000    0.0013     0.0010     0.0018
## Km.1.            0.5918   0.0152    0.3113     0.1952     0.3739
## sc_Vmax.1.1.     0.0012   0.0000    0.0004     0.0007     0.0010
## Km.1.1.          0.3511   0.0290    0.2453     0.0895     0.1977
## sc_Vmax.1.2.     0.0018   0.0000    0.0003     0.0013     0.0015
## Km.1.2.          0.3806   0.0085    0.1378     0.1813     0.2812
## sc_Vmax.1.3.     0.0022   0.0000    0.0003     0.0018     0.0020
## Km.1.3.          0.2653   0.0048    0.0746     0.1529     0.2119
## sc_Vmax.1.4.     0.0016   0.0000    0.0001     0.0013     0.0015
## Km.1.4.          0.0564   0.0020    0.0188     0.0263     0.0426
## sc_Vmax.1.5.     0.0448   0.0020    0.0189     0.0183     0.0314
## Km.1.5.         54.0461   2.5102   23.5467    21.4925    37.5163
## sc_Vmax.1.6.     0.0004   0.0000    0.0003     0.0003     0.0003
## Km.1.6.          0.2619   0.0616    0.5553     0.0205     0.0773
## LnPrior         11.9482   0.3873    3.9497     3.8243     9.3635
## LnData       -1019.9617   0.4094    3.7188 -1028.3816 -1022.0412
## LnPosterior  -1008.0135   0.3486    3.9949 -1017.4530 -1010.3030
##                     50%        75%      97.5% n_eff   Rhat
## iter          6000.0000  7000.0000  7900.0000    10 2.1047
## sc_Vmax.1.       0.0024     0.0033     0.0060  1226 1.0065
## Km.1.            0.5238     0.7324     1.3793   420 1.0031
## sc_Vmax.1.1.     0.0011     0.0014     0.0022    74 1.0560
## Km.1.1.          0.2938     0.4260     0.9978    72 1.0600
## sc_Vmax.1.2.     0.0017     0.0019     0.0025   258 1.0347
## Km.1.2.          0.3581     0.4519     0.7134   266 1.0338
## sc_Vmax.1.3.     0.0022     0.0024     0.0028   241 1.0141
## Km.1.3.          0.2539     0.3066     0.4379   246 1.0147
## sc_Vmax.1.4.     0.0016     0.0017     0.0018   101 1.0353
## Km.1.4.          0.0546     0.0678     0.0981    90 1.0378
## sc_Vmax.1.5.     0.0409     0.0540     0.0938    87 1.0352
## Km.1.5.         49.4873    65.7160   115.7525    88 1.0351
## sc_Vmax.1.6.     0.0004     0.0005     0.0010    83 1.0356
## Km.1.6.          0.1364     0.2552     1.4367    81 1.0403
## LnPrior         12.0756    14.7674    19.2462   104 1.0373
## LnData       -1019.6140 -1017.3107 -1013.8393    82 1.0367
## LnPosterior  -1007.4740 -1005.0850 -1001.7610   131 1.0216
## 
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Evaluation of model fit

  1. Setpoints analysis
X <- x[j,,] %>% matrix(nrow = 16000) 
write.table(X, file = "setpts.out", row.names = F, sep = "\t")
X_setpts <- mcsim("perc.model.R", "perc_setpts.in.R", dir = "modeling/perc")
## Execute: ./mcsim.perc.model.R.exe modeling/perc/perc_setpts.in.R
  1. Tidy data with median and 95% confidence interval
vars <- names(X_setpts)
index <- which(vars == "C_exh_ug_1.1" | vars == "C_ven_12.7")
X <- apply(X_setpts[index[1]:index[2]], 2, quantile,  c(0.5, 0.025, 0.975)) %>% t()
colnames(X) <- c("median", "LCL", "UCL")
df <- as.data.frame(X)
chk_2$med <- df$median
chk_2$LCL <- df$LCL
chk_2$UCL <- df$UCL
  1. Plot result
chk_2 %>% filter(Output_Var == "C_ven") %>%
ggplot(aes(x = Time, y = Data)) +
  geom_point() + 
  geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) +
  geom_line(aes(y = med)) +
  facet_wrap(~Simulation) +
  scale_y_log10() + 
  labs(x = "Time (min)", y = "Concentration (mg/L)", title = "Concentration in venous blood")

3.3 Parallel computing

How how many threads your CPU has?

parallel::detectCores()

Parallel computing

Prepare the job file first, the example job file is put into “examples/perc_job.R”.

Set importEnv = T to assign the random seed to the job. The exportEnv is used to assign the name of object to store the output data.

library(rstudioapi)
set.seed(1234)
jobRunScript(workingDir = getwd(), path = "examples/perc_job.R", importEnv = T, exportEnv = "job_1")
set.seed(2345)
jobRunScript(workingDir = getwd(), path = "examples/perc_job.R", importEnv = T, exportEnv = "job_2")
set.seed(3344)
jobRunScript(workingDir = getwd(), path = "examples/perc_job.R", importEnv = T, exportEnv = "job_3")
set.seed(4444)
jobRunScript(workingDir = getwd(), path = "examples/perc_job.R", importEnv = T, exportEnv = "job_4")
x <- mcmc_array(list(job_1$out_2, job_2$out_2, job_3$out_2, job_4$out_2))