3 Tetrachloroethylene PBPK Model

Prepare model (perc.model.R) and input (perc_mcmc.in.R) files. This input file contains MCMC simulation descriptions based on Monster et al. (1989) paper. The study measured the TCE in blood (C_ven) and exhaled air (C_exh_ug) for a group of 6 subjects exposed to two different concentrations (72 and 144 ppm) of TCE in air.

3.1 Single level modeling

3.1.1 Single chain testing

Modeling

model <- "perc.model.R"
input <- "perc_mcmc.in.R"   
set.seed(1234)
system.time(out_1 <- mcsim(model, input, dir = "modeling/perc"))

## * Create 'MCMC.check.out' from the last iteration.

##    user  system elapsed 
##  41.047   0.109  41.158

head(out_1)

##   iter sc_Vmax.1.     Km.1.   LnPrior     LnData LnPosterior
## 1    0   0.299836   2.34383 -3.747032 -34446.920  -34450.660
## 2    1   0.299836 168.11200 -8.251050  -1781.688   -1789.939
## 3    2   0.295656 187.34100 -8.390727  -1655.223   -1663.613
## 4    3   0.295656 231.52900 -8.707851  -1526.996   -1535.704
## 5    4   0.295656 231.52900 -8.707851  -1526.996   -1535.704
## 6    5   0.295656 231.52900 -8.707851  -1526.996   -1535.704

Diagnosis

str <- ceiling(nrow(out_1)/2) + 1
end <- nrow(out_1)
j <- c(str:end) # discard burn-in
plot(out_1$iter[j], out_1$sc_Vmax.1.[j], type = "l")

plot(out_1$iter[j], out_1$Km.1.[j], type = "l")

out_1$sc_Vmax.1.[j] %>% density %>% plot()

out_1$Km.1.[j] %>% density() %>% plot()

par(mfrow = c(1,2), mar = c(4,4,3,1))
acf(out_1$sc_Vmax.1.[j])
acf(out_1$Km.1.[j])

cor <- cor(out_1$sc_Vmax.1.[j], out_1$Km.1.[j])
plot(out_1$sc_Vmax.1.[j], out_1$Km.1.[j],
     main = paste("r = ", round(cor, digits = 3)))

Checking model fit

Global evaluation

chk_1 <- read.delim("MCMC.check.out")
tail(chk_1)

##     Level Simulation Output_Var  Time  Data Prediction
## 175  1_12         12      C_ven   360 1.760  1.7123300
## 176  1_12         12      C_ven  1320 0.245  0.1316280
## 177  1_12         12      C_ven  2760 0.160  0.0902648
## 178  1_12         12      C_ven  4260 0.120  0.0751109
## 179  1_12         12      C_ven  5700 0.098  0.0629977
## 180  1_12         12      C_ven 10020 0.052  0.0371824

For multi level simulation, the “Level” column include “population-experiments”

R_square <- cor(chk_1$Data, chk_1$Prediction)^2
plot(chk_1$Data, chk_1$Prediction, log = "xy",
     xlim = c(0.001, 1000), ylim = c(0.001, 1000),
     xlab = "Observation", ylab = "Prediction",
     main = paste("R2 =", round(R_square, digits = 3)))
abline(0,1)

Individual evaluation

Concentration in venous blood

chk_1 %>% filter(Output_Var == "C_ven") %>%
ggplot(aes(x = Time, y = Data)) +
  geom_point() + 
  geom_line(aes(y = Prediction))+
  facet_wrap(~Simulation) +
  scale_y_log10() + 
  labs(x = "Time (min)", y = "Concentration (mg/L)", title = "Concentration in venous blood")

Concentration in exhaled air

chk_1 %>% filter(Output_Var == "C_exh_ug") %>%
ggplot(aes(x = Time, y = Data)) +
  geom_point() + 
  geom_line(aes(y = Prediction))+
  facet_wrap(~Simulation) +
  scale_y_log10() + 
  labs(x = "Time (min)", y = "Concentration (ug/L)", title = "Concentration exhaled air")

3.1.2 Checking convergence and model fit with multi chains

Checking convergence

model <- "perc.model.R"
input <- "perc_mcmc.in.R"   
set.seed(2345)
out_1_2 <- mcsim(model, input, dir = "modeling/perc")

## * Create 'MCMC.check.out' from the last iteration.

set.seed(3344)
out_1_3 <- mcsim(model, input, dir = "modeling/perc")

## * Create 'MCMC.check.out' from the last iteration.

set.seed(4455)
out_1_4 <- mcsim(model, input, dir = "modeling/perc")

## * Create 'MCMC.check.out' from the last iteration.

x <- mcmc_array(list(out_1, out_1_2, out_1_3, out_1_4))

parms <- c("sc_Vmax.1.", "Km.1.")
mcmc_dens_overlay(x[j,,], pars = parms)

mcmc_trace(x[j,,], pars = parms)

monitor(x, digits = 4)

## Inference for the input samples (4 chains: each with iter=8001; warmup=4000):
## 
##                   mean  se_mean        sd       2.5%        25%        50%
## iter         6000.0000 364.5813 1155.0253  4100.0000  5000.0000  6000.0000
## sc_Vmax.1.      0.0033   0.0001    0.0005     0.0025     0.0029     0.0032
## Km.1.           1.3517   0.0392    0.2788     0.9142     1.1445     1.3082
## LnPrior         0.8625   0.0249    0.1797     0.5085     0.7403     0.8793
## LnData      -1387.5392   0.0690    0.9857 -1390.2210 -1387.9720 -1387.2590
## LnPosterior -1386.6767   0.0781    1.0277 -1389.4249 -1387.1360 -1386.3790
##                    75%      97.5% n_eff   Rhat
## iter         7000.0000  7900.0000    10 2.1047
## sc_Vmax.1.      0.0036     0.0042    52 1.0704
## Km.1.           1.5252     1.9435    51 1.0711
## LnPrior         0.9945     1.1735    52 1.0711
## LnData      -1386.8160 -1386.5010   204 1.0132
## LnPosterior -1385.9170 -1385.6050   173 1.0170
## 
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Evaluation of model fit

Setpoints analysis

X <- x[j,,] %>% matrix(nrow = 16000) 
write.table(X, file = "setpts.out", row.names = F, sep = "\t")
X_setpts <- mcsim("perc.model.R", "perc_setpts.in.R", dir = "modeling/perc")

## Execute: ./mcsim.perc.model.R.exe modeling/perc/perc_setpts.in.R

Tidy data with median and 95% confidence interval

vars <- names(X_setpts)
index <- which(vars == "C_exh_ug_1.1" | vars == "C_ven_12.7")
X <- apply(X_setpts[index[1]:index[2]], 2, quantile,  c(0.5, 0.025, 0.975)) %>% t()
colnames(X) <- c("median", "LCL", "UCL")
df <- as.data.frame(X)
chk_1$med <- df$median
chk_1$LCL <- df$LCL
chk_1$UCL <- df$UCL

Plot result

chk_1 %>% filter(Output_Var == "C_ven") %>%
ggplot(aes(x = Time, y = Data)) +
  geom_point() + 
  geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) +
  geom_line(aes(y = med)) +
  facet_wrap(~Simulation) +
  scale_y_log10() + 
  labs(x = "Time (min)", y = "Concentration (mg/L)", title = "Concentration in venous blood")

Note: The 95% confidence interval is too narrow for visual observing.

3.2 Hierarchical modeling

3.2.1 Single chain testing

Modeling

model <- "perc.model.R"
input <- "perc_mcmc-hier.in.R"   
set.seed(1234)
system.time(out_2 <- mcsim(model, input, dir = "modeling/perc"))

## * Create 'MCMC.check.out' from the last iteration.

##    user  system elapsed 
##  46.896   0.190  47.104

head(out_2)

##   iter sc_Vmax.1.   Km.1. sc_Vmax.1.1. Km.1.1. sc_Vmax.1.2.  Km.1.2.
## 1    0  0.0376081 310.371    0.0217434 685.401   0.00766265 1315.590
## 2    1  0.0376081 310.371    0.0297153 160.938   0.00702677  947.047
## 3    2  0.0376081 459.349    0.0288894 160.938   0.00614582 1022.530
## 4    3  0.0376081 265.157    0.0370984 160.938   0.00603613  970.740
## 5    4  0.0376081 265.157    0.0370984 112.163   0.00713647  726.030
## 6    5  0.0376081 265.157    0.0370984 112.163   0.00838208  629.015
##   sc_Vmax.1.3.   Km.1.3. sc_Vmax.1.4. Km.1.4. sc_Vmax.1.5.  Km.1.5.
## 1   0.00351614 462.05500    0.0381820 791.739     0.129087 1054.850
## 2   0.00213336 462.05500    0.0464026 791.739     0.129087  944.273
## 3   0.00213336 511.48500    0.0510909 791.739     0.130996  132.865
## 4   0.00213336 764.86600    0.0510909 791.739     0.131582  132.865
## 5   0.00651876 805.08700    0.0510909 791.739     0.132688  132.865
## 6   0.01519300   2.12759    0.0511913 791.739     0.133172  132.865
##   sc_Vmax.1.6. Km.1.6.   LnPrior    LnData LnPosterior
## 1    0.0321897  270.02 -41.20180 -2249.799   -2291.001
## 2    0.0321897  270.02 -39.81937 -2206.786   -2246.605
## 3    0.0306556  270.02 -38.77012 -2153.866   -2192.636
## 4    0.0241955  270.02 -38.66947 -2146.738   -2185.407
## 5    0.0488277  270.02 -37.59740 -2122.446   -2160.043
## 6    0.0433307  270.02 -40.32183 -1951.073   -1991.395

Diagnosis

Trace plot

str <- ceiling(nrow(out_2)/2) + 1
end <- nrow(out_2)
j <- c(str:end) # discard burn-in
plot(out_2$iter[j], out_2$sc_Vmax.1.[j], type = "l")

plot(out_2$iter[j], out_2$Km.1.[j], type = "l")

Kernel plot

out_2$sc_Vmax.1.[j] %>% density %>% plot()

out_2$Km.1.[j] %>% density() %>% plot()

Auto-correlation plot

par(mfrow = c(1,2), mar = c(4,4,3,1))
acf(out_2$sc_Vmax.1.[j])
acf(out_2$Km.1.[j])

Pair plot

cor_2 <- cor(out_2$Km.1.[j], out_2$sc_Vmax.1.[j])
plot(out_2$Km.1.[j], out_2$sc_Vmax.1.[j], 
     xlab = "Km", ylab = "Vmax",
     main = paste("r =", round(cor_2, digits = 3)))

Evaluation of model fit

chk_2 <- read.delim("MCMC.check.out")

Compare the different between single and multi level

For multi level simulation, the “Level” column include “population-individuals-experiments”

tail(chk_1)

##     Level Simulation Output_Var  Time  Data Prediction        med
## 175  1_12         12      C_ven   360 1.760  1.7123300 1.71130000
## 176  1_12         12      C_ven  1320 0.245  0.1316280 0.13142300
## 177  1_12         12      C_ven  2760 0.160  0.0902648 0.09022060
## 178  1_12         12      C_ven  4260 0.120  0.0751109 0.07509865
## 179  1_12         12      C_ven  5700 0.098  0.0629977 0.06300090
## 180  1_12         12      C_ven 10020 0.052  0.0371824 0.03721525
##            LCL        UCL
## 175 1.69721975 1.72119000
## 176 0.12885090 0.13395900
## 177 0.08899328 0.09143501
## 178 0.07406419 0.07614902
## 179 0.06206130 0.06396182
## 180 0.03642380 0.03805150

tail(chk_2)

##     Level Simulation Output_Var  Time  Data Prediction
## 175 1_6_2         12      C_ven   360 1.760  1.7550700
## 176 1_6_2         12      C_ven  1320 0.245  0.1569630
## 177 1_6_2         12      C_ven  2760 0.160  0.1090500
## 178 1_6_2         12      C_ven  4260 0.120  0.0925396
## 179 1_6_2         12      C_ven  5700 0.098  0.0789962
## 180 1_6_2         12      C_ven 10020 0.052  0.0486749

Global evaluation

R_square <- cor(chk_2$Data, chk_2$Prediction)^2
plot(chk_2$Data, chk_2$Prediction, log = "xy",
     xlim = c(0.001, 1000), ylim = c(0.001, 1000),
     xlab = "Observation", ylab = "Prediction",
     main = paste("R2 =", round(R_square, digits = 3)))
abline(0,1)

Individual evaluation

chk_2 %>% 
  separate(col = Level, c("Pop", "Subj", "Exp")) %>% 
  filter(Output_Var == "C_ven") %>%
ggplot(aes(x = Time, y = Data)) +
  geom_point() + 
  geom_line(aes(y = Prediction))+
  facet_grid(Exp~Subj) +
  scale_y_log10() + 
  labs(x = "Time (min)", y = "Concentration (mg/L)", title = "Concentration venous blood")

3.2.2 Checking convergence and model fit with multi chains

model <- "perc.model.R"
input <- "perc_mcmc-hier.in.R"
set.seed(2345)
out_2_2 <- mcsim(model, input, dir = "modeling/perc")

## * Create 'MCMC.check.out' from the last iteration.

set.seed(3344)
out_2_3 <- mcsim(model, input, dir = "modeling/perc")

## * Create 'MCMC.check.out' from the last iteration.

set.seed(4444)
out_2_4 <- mcsim(model, input, dir = "modeling/perc")

## * Create 'MCMC.check.out' from the last iteration.

x <- mcmc_array(list(out_2, out_2_2, out_2_3, out_2_4))

Diagnosis

parms <- c("sc_Vmax.1.", "Km.1.", "sc_Vmax.1.1.", "Km.1.1.", "sc_Vmax.1.2.",
           "Km.1.2.", "sc_Vmax.1.3.", "Km.1.3.", "sc_Vmax.1.4.", "Km.1.4.", "sc_Vmax.1.5.",
           "Km.1.5.", "sc_Vmax.1.6.", "Km.1.6.")
mcmc_dens_overlay(x[j,,], pars = parms)
mcmc_trace(x[j,,], pars = parms)

monitor(x, digits = 4)

## Inference for the input samples (4 chains: each with iter=8001; warmup=4000):
## 
##                    mean  se_mean        sd       2.5%        25%
## iter          6000.0000 364.5813 1155.0253  4100.0000  5000.0000
## sc_Vmax.1.       0.0027   0.0000    0.0013     0.0010     0.0018
## Km.1.            0.5918   0.0152    0.3113     0.1952     0.3739
## sc_Vmax.1.1.     0.0012   0.0000    0.0004     0.0007     0.0010
## Km.1.1.          0.3511   0.0290    0.2453     0.0895     0.1977
## sc_Vmax.1.2.     0.0018   0.0000    0.0003     0.0013     0.0015
## Km.1.2.          0.3806   0.0085    0.1378     0.1813     0.2812
## sc_Vmax.1.3.     0.0022   0.0000    0.0003     0.0018     0.0020
## Km.1.3.          0.2653   0.0048    0.0746     0.1529     0.2119
## sc_Vmax.1.4.     0.0016   0.0000    0.0001     0.0013     0.0015
## Km.1.4.          0.0564   0.0020    0.0188     0.0263     0.0426
## sc_Vmax.1.5.     0.0448   0.0020    0.0189     0.0183     0.0314
## Km.1.5.         54.0461   2.5102   23.5467    21.4925    37.5163
## sc_Vmax.1.6.     0.0004   0.0000    0.0003     0.0003     0.0003
## Km.1.6.          0.2619   0.0616    0.5553     0.0205     0.0773
## LnPrior         11.9482   0.3873    3.9497     3.8243     9.3635
## LnData       -1019.9617   0.4094    3.7188 -1028.3816 -1022.0412
## LnPosterior  -1008.0135   0.3486    3.9949 -1017.4530 -1010.3030
##                     50%        75%      97.5% n_eff   Rhat
## iter          6000.0000  7000.0000  7900.0000    10 2.1047
## sc_Vmax.1.       0.0024     0.0033     0.0060  1226 1.0065
## Km.1.            0.5238     0.7324     1.3793   420 1.0031
## sc_Vmax.1.1.     0.0011     0.0014     0.0022    74 1.0560
## Km.1.1.          0.2938     0.4260     0.9978    72 1.0600
## sc_Vmax.1.2.     0.0017     0.0019     0.0025   258 1.0347
## Km.1.2.          0.3581     0.4519     0.7134   266 1.0338
## sc_Vmax.1.3.     0.0022     0.0024     0.0028   241 1.0141
## Km.1.3.          0.2539     0.3066     0.4379   246 1.0147
## sc_Vmax.1.4.     0.0016     0.0017     0.0018   101 1.0353
## Km.1.4.          0.0546     0.0678     0.0981    90 1.0378
## sc_Vmax.1.5.     0.0409     0.0540     0.0938    87 1.0352
## Km.1.5.         49.4873    65.7160   115.7525    88 1.0351
## sc_Vmax.1.6.     0.0004     0.0005     0.0010    83 1.0356
## Km.1.6.          0.1364     0.2552     1.4367    81 1.0403
## LnPrior         12.0756    14.7674    19.2462   104 1.0373
## LnData       -1019.6140 -1017.3107 -1013.8393    82 1.0367
## LnPosterior  -1007.4740 -1005.0850 -1001.7610   131 1.0216
## 
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Evaluation of model fit

Setpoints analysis

X <- x[j,,] %>% matrix(nrow = 16000) 
write.table(X, file = "setpts.out", row.names = F, sep = "\t")
X_setpts <- mcsim("perc.model.R", "perc_setpts.in.R", dir = "modeling/perc")

## Execute: ./mcsim.perc.model.R.exe modeling/perc/perc_setpts.in.R

Tidy data with median and 95% confidence interval

vars <- names(X_setpts)
index <- which(vars == "C_exh_ug_1.1" | vars == "C_ven_12.7")
X <- apply(X_setpts[index[1]:index[2]], 2, quantile,  c(0.5, 0.025, 0.975)) %>% t()
colnames(X) <- c("median", "LCL", "UCL")
df <- as.data.frame(X)
chk_2$med <- df$median
chk_2$LCL <- df$LCL
chk_2$UCL <- df$UCL

Plot result

chk_2 %>% filter(Output_Var == "C_ven") %>%
ggplot(aes(x = Time, y = Data)) +
  geom_point() + 
  geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) +
  geom_line(aes(y = med)) +
  facet_wrap(~Simulation) +
  scale_y_log10() + 
  labs(x = "Time (min)", y = "Concentration (mg/L)", title = "Concentration in venous blood")

3.3 Parallel computing

How how many threads your CPU has?

parallel::detectCores()

Parallel computing

Prepare the job file first, the example job file is put into “examples/perc_job.R”.

Set importEnv = T to assign the random seed to the job. The exportEnv is used to assign the name of object to store the output data.

library(rstudioapi)
set.seed(1234)
jobRunScript(workingDir = getwd(), path = "examples/perc_job.R", importEnv = T, exportEnv = "job_1")
set.seed(2345)
jobRunScript(workingDir = getwd(), path = "examples/perc_job.R", importEnv = T, exportEnv = "job_2")
set.seed(3344)
jobRunScript(workingDir = getwd(), path = "examples/perc_job.R", importEnv = T, exportEnv = "job_3")
set.seed(4444)
jobRunScript(workingDir = getwd(), path = "examples/perc_job.R", importEnv = T, exportEnv = "job_4")

x <- mcmc_array(list(job_1$out_2, job_2$out_2, job_3$out_2, job_4$out_2))

MCSim tutorial 3 - Markov chain Monte Carlo Calibration (Supplementary)

Nan-Hung Hsieh

2019-05-22

3 Tetrachloroethylene PBPK Model

3.1 Single level modeling

3.1.1 Single chain testing

3.1.2 Checking convergence and model fit with multi chains

3.2 Hierarchical modeling

3.2.1 Single chain testing

3.2.2 Checking convergence and model fit with multi chains

3.3 Parallel computing