0 Prerequisites

Import the R packages

library(tidyverse)
library(rstan)
library(bayesplot)
library(corrplot)
library(sensitivity)
library(pksensi)
theme_set(theme_light())

1 Linear Model

1.1 Single chain testing

Use model (linear.model.R) and input (linear_mcmc.in.R) files to run MCMC,

linear.model.R

## linear.model.R ####
Outputs = {y}

# Model Parameters
A = 0; # 
B = 1;

# Statistical parameter
SD_true = 0;

CalcOutputs { y = A + B * t + NormalRandom(0,SD_true); }

End.

linear_mcmc.in.R

## linear_mcmc.in.R ####
MCMC ("MCMC.default.out","", # name of output and restart file
     "",           # name of data file
     4000,0,       # iterations, print predictions flag,
     1,4000,       # printing frequency, iters to print
     10101010);    # random seed (default )

Level {
  
  Distrib(A, Normal, 0, 2); # prior of intercept coefficient
  Distrib(B, Normal, 1, 2); # prior of slope coefficient
  
  Likelihood(y, Normal, Prediction(y), 0.05); #  # exact SD
  
  Simulation {
    PrintStep (y, 0, 10, 1); #seq(0, 10 1)
    Data  (y, 0.0, 0.15, 2.32, 4.33, 4.61, 6.68, 7.89, 7.13, 7.27, 9.4, 10.0);
  }
}

End.

Plotting the data

x <- seq(0, 10, 1)
y <- c(0.0, 0.15, 2.32, 4.33, 4.61, 6.68, 7.89, 7.13, 7.27, 9.4, 10.0)
plot(x, y)

model <- "linear.model.R"
input <- "linear_mcmc.in.R"
set.seed(1111) 
out1 <- mcsim(model, input)

## * Create 'MCMC.check.out' from the last iteration.

After the simulation, we need to diagnosis the result,

plot(out1$A.1., type = "l", xlab = "Iteration", ylab = "")
lines(out1$B.1., col = 2)
legend("topright", legend = c("Intercept", "Slope"), col = c(1,2), lty = 1)

plot(out1$A.1., out1$B.1., type = "b", xlab = "Intercept", ylab = "Slope")

Basically, the “burn-in” period is not necessary to include in our analysis

str <- ceiling(nrow(out1)/2) + 1
end <- nrow(out1)
j <- c(str:end)
plot(out1$iter[j], out1$A.1.[j], type = "l", xlab = "Iteration", ylab = "Intercept")

plot(out1$iter[j], out1$B.1.[j], type = "l", xlab = "Iteration", ylab = "Slope")

plot(out1$A.1.[j], out1$B.1.[j], type = "b", xlab = "Intercept", ylab = "Slope")

cor(out1$A.1.[j], out1$B.1.[j])

## [1] -0.8736645

Check kernel density

out1$A.1.[j] %>% density() %>% plot()

out1$B.1.[j] %>% density() %>% plot()

The MCMC.check.out is generated by the last iteration of simulation. It is used to quick check the output.

# Visualizing the fitting result 
chk <- read.delim("MCMC.check.out")

plot(chk$Time, chk$Data, xlab = "x", ylab = "y")
lines(chk$Time, chk$Prediction)

1.2 Multi-chains simulation

Use set.seed() to create the different chains with different random seed and run simulation.

# Check convergence
model <- "linear.model.R"
input <- "linear_mcmc.in.R"
set.seed(2234) 
out2 <- mcsim(model, input) # Generate the 2nd chain

## * Create 'MCMC.check.out' from the last iteration.

set.seed(3234) 
out3 <- mcsim(model, input) # Generate the 3rd chain

## * Create 'MCMC.check.out' from the last iteration.

set.seed(4234) 
out4 <- mcsim(model, input) # Generate the 4th chain

## * Create 'MCMC.check.out' from the last iteration.

Use mcmc_array() to create the 3-D array (iterations * chains * parameters) to store all outputs.

sims <- mcmc_array(data = list(out1,out2,out3,out4))
dim(sims)

## [1] 2001    4    6

The bayesplot package can use to visualize output.

parms_name <- c("A.1.","B.1.")
color_scheme_set("mix-blue-red")
mcmc_trace(sims, pars = parms_name, facet_args = list(ncol = 1, strip.position = "left"))

Kernel density plots of MCMC draws

mcmc_dens_overlay(x = sims[j,,], pars = parms_name)

mcmc_dens_overlay(x = sims[j,,], pars = "LnData")

Pairs plots from MCMC draws

mcmc_pairs(sims[j,,], pars = parms_name, off_diag_fun = "hex")

Rank histogram plot (Vehtari et al. 2019)

mcmc_rank_hist(sims, pars = parms_name)

mcmc_rank_overlay(sims, pars = parms_name)

The rstan package can use to analyze and diagnosis output.

monitor(sims[,,parms_name], digit=4)

## Inference for the input samples (4 chains: each with iter=2001; warmup=1000):
## 
##        mean se_mean     sd    2.5%    25%    50%    75%  97.5% n_eff
## A.1. 0.4466  0.0300 0.3002 -0.1278 0.2276 0.4417 0.6563 0.9969   100
## B.1. 0.9980  0.0049 0.0512  0.8949 0.9623 0.9972 1.0332 1.0919   109
##        Rhat
## A.1. 1.0407
## B.1. 1.0352
## 
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Finally, evaluate the confidence interval of prediction through setpoints method. Prepare the input file linear_setpts.in.R

## ./mcsim.linear.model.R.exe linear_setpts.in.R ####
SetPoints ("", "setpts.out", 0, A, B);

Simulation { 
  PrintStep (y, 0, 10, 1);
  Data  (y, -0.0289654, 1.15968, 2.32502, 3.33289, 4.61105, 5.6818, 
         6.89044, 8.13242, 9.27033, 10.4522, 11.6703);
} 

End.

X <- sims[j,,] %>% matrix(nrow = 1000*4) 
write.table(X, file = "setpts.out", row.names = F, sep = "\t")
X_setpts <- mcsim("linear.model.R", "linear_setpts.in.R")

## Execute: ./mcsim.linear.model.R.exe modeling/linear_setpts.in.R

head(X_setpts)

##   Iter        A        B    y_1.1   y_1.2   y_1.3   y_1.4   y_1.5   y_1.6
## 1    0 0.704130 0.940713 0.704130 1.64484 2.58556 3.52627 4.46698 5.40770
## 2    1 0.542775 0.940713 0.542775 1.48349 2.42420 3.36491 4.30563 5.24634
## 3    2 0.797207 0.940713 0.797207 1.73792 2.67863 3.61935 4.56006 5.50077
## 4    3 0.797207 0.940713 0.797207 1.73792 2.67863 3.61935 4.56006 5.50077
## 5    4 0.797207 0.930386 0.797207 1.72759 2.65798 3.58837 4.51875 5.44914
## 6    5 0.797207 0.930386 0.797207 1.72759 2.65798 3.58837 4.51875 5.44914
##     y_1.7   y_1.8   y_1.9  y_1.10   y_1.11
## 1 6.34841 7.28912 8.22983 9.17055 10.11130
## 2 6.18705 7.12777 8.06848 9.00919  9.94991
## 3 6.44149 7.38220 8.32291 9.26362 10.20430
## 4 6.44149 7.38220 8.32291 9.26362 10.20430
## 5 6.37952 7.30991 8.24029 9.17068 10.10110
## 6 6.37952 7.30991 8.24029 9.17068 10.10110

vars <- names(X_setpts)
index <- which(vars == "y_1.1" | vars == "y_1.11")
X <- apply(X_setpts[index[1]:index[2]], 2, quantile,  c(0.5, 0.025, 0.975)) %>% t()
colnames(X) <- c("median", "LCL", "UCL")
df <- as.data.frame(X)
x <- seq(0, 10, 1)
df$x <- x
df

##           median       LCL       UCL  x
## y_1.1   0.441327 -0.127816  0.997311  0
## y_1.2   1.441715  0.967708  1.921580  1
## y_1.3   2.442100  2.029488  2.850090  2
## y_1.4   3.439265  3.081570  3.794925  3
## y_1.5   4.432740  4.128170  4.768060  4
## y_1.6   5.427430  5.146256  5.742600  5
## y_1.7   6.425620  6.136910  6.759760  6
## y_1.8   7.422680  7.098845  7.798120  7
## y_1.9   8.421310  8.022710  8.853480  8
## y_1.10  9.422700  8.935832  9.920214  9
## y_1.11 10.418350  9.841940 10.988438 10

y <- c(0.0, 0.15, 2.32, 4.33, 4.61, 6.68, 7.89, 7.13, 7.27, 9.4, 10.0)
obs_data <- data.frame(x, y)
ggplot(df, aes(x = x, y = median)) +
    geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) + 
    geom_line() +
    geom_point(data = obs_data, x=x, y=y) +
    labs(x = "x", y = "y")

2 Ethylbenzene PBPK Model

Prepare model (perc.model.R) and input files in the following simulation. The input files include straightforward (EB.in.R), Monte Carlo (EB_mtc.in.R), setpints (EB_setpts.in.R), MCMC (EB_MCMC.in.R) simulation, and MCMC for model evaluation (EB_MCMC_setpts.in.R).

2.1 Model verification

Run EB-PBPK model (“EB.model.R”) and input files (“EB.in.R”), the exposure condition is continuous inhalation of 100 ppm Ethylbenzene (EB) for 4 hours and plot the time-course of blood concentration from 0 to 6 hour.

Modeling

out <- mcsim(model = "EB.model.R", input = "EB.in.R", dir = "modeling/EB")

## Execute: ./mcsim.EB.model.R.exe modeling/EB/EB.in.R

exp <- readsims(out, exp = 3) 
head(exp)

##   Time       Cvtot
## 1  0.0 0.00000e+00
## 2  0.1 3.43935e-06
## 3  0.2 4.09070e-06
## 4  0.3 4.46969e-06
## 5  0.4 4.71608e-06
## 6  0.5 4.89472e-06

Unit transfer and add study data

MW <- 106.16 # g/mol
exp$Cvtot_mg <- exp$Cvtot * MW * 1000 # mol/L -> mg/L 
data_t <- c(4, 4.5, 5, 5.5, 6) # Obsrvation time 
data_c <- c(1.93, 1.29, 0.87, 0.55, 0.38) # Obsrvation concentration
sd <- c(0.15, 0.15, 0.24, 0.11, 0.05)
sd.up <- data_c + sd
sd.dn <- data_c - sd

Plotting

Compare the simulation result with experiment data as:

plot(data_t, data_c, main = "Exposure: 100 ppm", xlab = "Time (hr)", ylab = "Concentration (mg/L)", xlim = c(0, 6), ylim = c(0, 2.5))
lines(exp$Time, exp$Cvtot_mg)
arrows(data_t, sd.dn, data_t, sd.up, code=3, length=0.02, angle=90)
legend("topright", legend = c("Data", "Model"), lty = c(NA, 1), pch = c(1, NA))

2.2 Uncertainty analysis

Setting the uncertainty for model parameter

Here we used 11 model parameters include body weight (uniform distribution from 35g to 55g), 7 partition coefficients (set 2 times difference from baseline) and 3 Michaelis-Menten reaction rate constant (set 4 times difference from baseline). The distribution setting in input file is as,

Distrib (BW, Uniform, 0.035, 0.055);

Distrib (Pb, Uniform, 21.35, 85.4);
Distrib (Pl, Uniform, 0.98, 3.92);
Distrib (Pf, Uniform, 18.2, 72.8);
Distrib (Pm, Uniform, 0.3045, 1.218);
Distrib (Pvrg, Uniform, 0.98, 3.92);
Distrib (Ppu, Uniform, 0.98, 3.92);
Distrib (Pbr, Uniform, 0.98, 3.92);

Distrib (VmaxC, Uniform, 1.5975, 25.56);
Distrib (VmaxClu, Uniform, 3.35, 53.6);
Distrib (VmaxCvr, Uniform, 4.35, 69.6);

Modeling

out <- mcsim(model = "EB.model.R", input = "EB_mtc.in.R", dir = "modeling/EB")

## Execute: ./mcsim.EB.model.R.exe modeling/EB/EB_mtc.in.R

Data wrangling

vars <- names(out)
index <- which(vars == "Cvtot_1.2" | vars == "Cvtot_1.61")
time <- seq(0.1, 6, 0.1) # corresponding time-points
exp_data <- data.frame(data_t, data_c)

X <- apply(out[,index[1]:index[2]], 2, quantile,  c(0.5, 0.025, 0.975)) %>% t() * MW * 1000 # 95% CI
colnames(X) <- c("median", "LCL", "UCL")
df <- as.data.frame(X)
df$time <- time

Plotting

ggplot(df, aes(x = time, y = median)) +
    geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) + 
    geom_line() +
    geom_point(data = exp_data, aes(x = data_t, y = data_c)) +
    geom_errorbar(data = exp_data, aes(x = data_t, y = data_c, ymin=data_c-sd, ymax=data_c+sd), width=.1) +
    labs(x = "Time (hr)", y = "Concentration (mg/L)")

2.3 Morris elementary effects screening

Argument setting

BW <- 0.043  

Pb <- 42.7
Pl <- 1.96
Pf <- 36.4
Pm <- 0.609
Pvrg <- 1.96
Ppu <- 1.96
Pbr <- 1.96

VmaxC <- 6.39
VmaxClu <- 13.4
VmaxCvr <- 17.4

baseline <- c(BW, Pb, Pl, Pf, Pm, Pvrg, Ppu, Pbr, VmaxC, VmaxClu, VmaxCvr)
limit <- c(1.2, rep(2, 7), rep(4, 3))
binf <- baseline/limit
bsup <- baseline*limit

parameters <- c("BW", "Pb", "Pl", "Pf", "Pm", "Pvrg", "Ppu", "Pbr", "VmaxC", "VmaxClu", "VmaxCvr")

Generate parameter matrix

set.seed(1234)
x <- morris(model = NULL, factors = parameters, r = 512,
            design = list(type = "oat", levels = 5, grid.jump = 3),
            binf = binf, bsup = bsup)
X <- cbind(1, x$X)
write.table(X, file = "setpts.out", row.names = F, sep = "\t")

Modeling

out <- mcsim(model = "EB.model.R",  input = "EB_setpts.in.R", dir = "modeling/EB")

## Execute: ./mcsim.EB.model.R.exe modeling/EB/EB_setpts.in.R

Plotting result

str <- which(names(out) == "Cvtot_1.1")
end <- which(names(out) == "Cvtot_1.5")
par(mfrow = c(2,3), mar = c(2,2,1,1))
for (i in str:end){
tell(x, log(out[,i]))
plot(x, xlim=c(0,3), main = paste("Time = ",4 + (i - str)*0.5, "hr"))
abline(v = 0.5)
}

2.4 Uncertainty analysis again

Adjust the range of Pf (to 4 times different), VmaxC (to 8 times different) and VmaxCvr (to 8 times different), and check the possible output.

baseline <- c(BW, Pb, Pl, Pf, Pm, Pvrg, Ppu, Pbr, VmaxC, VmaxClu, VmaxCvr)
new_limit <- c(1.2, 2, 2, 4, rep(2, 4), 8, 4, 8)
binf <- baseline/new_limit
bsup <- baseline*new_limit
dist <- c(rep("Uniform", 8), "LogUniform", "Uniform", "LogUniform")
q.arg <- list(list(min = binf[1], max = bsup[1]),
              list(min = binf[2], max = bsup[2]),
              list(min = binf[3], max = bsup[3]),
              list(min = binf[4], max = bsup[4]),
              list(min = binf[5], max = bsup[5]),
              list(min = binf[6], max = bsup[6]),
              list(min = binf[7], max = bsup[7]),
              list(min = binf[8], max = bsup[8]),
              list(min = binf[9], max = bsup[9]),
              list(min = binf[10], max = bsup[10]),
              list(min = binf[11], max = bsup[11])) 
outputs <- c("Cvtot")
conditions <- c("Cppm = NDoses (2, 100, 0, 0, 4 )")

Run MC simulation

set.seed(2222)
y<-solve_mcsim(mName = "EB.model.R", params = parameters, vars = outputs,
               monte_carlo = 1000, dist = dist, q.arg = q.arg, time = time, 
               condition = conditions, rtol = 1e-9, atol = 1e-11)

## Starting time: 2019-05-23 17:47:43

## * Created input file "sim.in".

## Execute: ./mcsim.EB.model.R.exe sim.in

## Ending time: 2019-05-23 17:47:44

Check result

X <- y * MW * 1000
pksim(X)
points(data_t, data_c)
arrows(data_t, sd.dn, data_t, sd.up, code=3, length=0.02, angle=90)

2.5 MCMC calibration

2.5.1 Single chain testing

1. MCMC Modeling

Run MCMC for single chain the iteration is set to 2000

set.seed(1234)
system.time(out1 <- mcsim(model = "EB.model.R", input = "EB_mcmc.in.R", dir = "modeling/EB"))

## * Create 'MCMC.check.out' from the last iteration.

##    user  system elapsed 
##  12.886   0.292  13.528

tail(out1)

##        iter     BW.1.   Pb.1.   Pl.1.   Pf.1.    Pm.1. Pvrg.1.  Ppu.1.
## 9996   9995 0.0453466 22.8305 3.53578 60.1662 1.141440 3.10114 1.10761
## 9997   9996 0.0423804 22.8305 3.23631 77.5714 1.141440 3.57866 1.25789
## 9998   9997 0.0458509 22.8305 3.54474 83.3918 1.195420 3.54165 1.25789
## 9999   9998 0.0450851 22.8305 3.56708 58.3106 1.195420 3.63249 2.19522
## 10000  9999 0.0425578 22.8305 3.65135 77.3074 1.122660 3.72383 1.98653
## 10001 10000 0.0487795 26.5546 3.02228 76.1249 0.914863 3.49290 1.74972
##        Pbr.1. VmaxC.1. VmaxClu.1. VmaxCvr.1.   LnPrior   LnData
## 9996  3.53231 0.122525    22.6870    2.45984 -20.61048 53.94794
## 9997  3.33264 0.122525    22.6870    2.45984 -20.54410 52.27535
## 9998  3.33264 0.361567    24.1154    2.45984 -21.93018 52.07051
## 9999  3.87490 0.361567    23.8097    2.19453 -22.11632 54.95096
## 10000 3.89594 0.452277    24.4765    2.19453 -22.34119 52.59288
## 10001 3.84273 0.452277    24.4765    2.31050 -21.45515 50.71719
##       LnPosterior
## 9996     33.33746
## 9997     31.73125
## 9998     30.14033
## 9999     32.83464
## 10000    30.25169
## 10001    29.26204

Check simulation result with the parameter from the last iteration

check_df <- read.delim("MCMC.check.out")
check_df$Data_mgl <- check_df$Data * MW * 1000
check_df$Prediction_mgl <- check_df$Prediction * MW * 1000

plot(check_df$Time, check_df$Data_mgl, ylim = c(0, 4),
     xlab = "Time (hr)", ylab = "Concentration (mg/L)")
lines(check_df$Time, check_df$Prediction_mgl)

plot(check_df$Data_mgl, check_df$Prediction_mgl, log = "xy", 
     xlab = "Observation", ylab = "Prediction")
abline(0,1) # calibration curve

2. Setpoints analysis

Use output file to do setpoints analysis. The setpoints file (EB_MCMC_setpts.in.R) need to build in this analysis. The setpoints analysis aims to perform the simulation based on the posterior of model output. Through the setpoints analysis, we can check all possible outputs (e.g., 95% confidence interval) and detect the data points that are not covered in the simulation routes.

Run simulation

out <- mcsim(model = "EB.model.R",  input = "EB_MCMC_setpts.in.R", dir = "modeling/EB")

## Execute: ./mcsim.EB.model.R.exe modeling/EB/EB_MCMC_setpts.in.R

tail(out)

##        Iter        BW      Pb      Pl      Pf       Pm    Pvrg     Ppu
## 9996   9995 0.0453466 22.8305 3.53578 60.1662 1.141440 3.10114 1.10761
## 9997   9996 0.0423804 22.8305 3.23631 77.5714 1.141440 3.57866 1.25789
## 9998   9997 0.0458509 22.8305 3.54474 83.3918 1.195420 3.54165 1.25789
## 9999   9998 0.0450851 22.8305 3.56708 58.3106 1.195420 3.63249 2.19522
## 10000  9999 0.0425578 22.8305 3.65135 77.3074 1.122660 3.72383 1.98653
## 10001 10000 0.0487795 26.5546 3.02228 76.1249 0.914863 3.49290 1.74972
##           Pbr    VmaxC VmaxClu VmaxCvr   Cvtot_1.1   Cvtot_1.2   Cvtot_1.3
## 9996  3.53231 0.122525 22.6870 2.45984 2.68564e-05 1.05748e-05 7.30639e-06
## 9997  3.33264 0.122525 22.6870 2.45984 2.57658e-05 9.89401e-06 7.07697e-06
## 9998  3.33264 0.361567 24.1154 2.45984 2.42521e-05 9.07392e-06 6.45183e-06
## 9999  3.87490 0.361567 23.8097 2.19453 2.66155e-05 1.05418e-05 7.17077e-06
## 10000 3.89594 0.452277 24.4765 2.19453 2.50488e-05 9.35651e-06 6.61361e-06
## 10001 3.84273 0.452277 24.4765 2.31050 2.59626e-05 9.33058e-06 6.73085e-06
##         Cvtot_1.4   Cvtot_1.5
## 9996  5.49242e-06 4.22452e-06
## 9997  5.58741e-06 4.52469e-06
## 9998  5.11406e-06 4.17913e-06
## 9999  5.30361e-06 4.02070e-06
## 10000 5.18304e-06 4.16844e-06
## 10001 5.35898e-06 4.34604e-06

Data processing

str <- which(names(out) == "Cvtot_1.1")
end <- which(names(out) == "Cvtot_1.5")
vars <- names(out)
index <- which(vars == "Cvtot_1.1" | vars == "Cvtot_1.5")
X <- apply(out[,index[1]:index[2]], 2, quantile,  c(0.5, 0.01, 0.99))
dat <- t(X) * MW * 1000
colnames(dat) <- c("median", "LCL", "UCL")
df <- as.data.frame(dat)
df$time <- data_t
head(df)

##              median       LCL       UCL time
## Cvtot_1.1 2.9732550 2.6581509 3.3388488  4.0
## Cvtot_1.2 1.0862291 0.9165302 1.2728478  4.5
## Cvtot_1.3 0.7386592 0.6520602 0.8290650  5.0
## Cvtot_1.4 0.5470191 0.4817010 0.6174287  5.5
## Cvtot_1.5 0.4134136 0.3399169 0.4908562  6.0

Plotting

ggplot(df, aes(x = time, y = median)) +
    geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) + 
    geom_line() +
    geom_point(data = exp_data, aes(x = data_t, y = data_c)) +
    geom_errorbar(data = exp_data, aes(x = data_t, y=data_c, ymin=data_c-sd, ymax=data_c+sd), width=.05) +
    labs(x = "Time (hr)", y = "Concentration (mg/L)")

## Warning: Ignoring unknown aesthetics: y

3. Result diagnosis

Trace plot

str <- ceiling(nrow(out1)/2) + 1
end <- nrow(out1)
j <- c(str:end) # discard burn-in
par(mfrow = c(3,4), mar = c(2,2,2,1))
for (i in 2:12){
  plot(out1[j,1], out1[j,i], type = "l", main = names(out1)[i])
}

Auto correlation

par(mfrow = c(3,4), mar = c(2,2,4,1))
for (i in 2:12){
  acf(out1[j,i], main = names(out1)[i])
}

Density plot

par(mfrow = c(3,4), mar = c(2,2,2,1))
for (i in 2:12){
  out1[j,i] %>% density() %>% plot(type = "l", main = names(out1)[i])
}

Correlation matrix

out1[,2:12] %>% cor() %>% corrplot(method = "number")

2.5.2 Multi chains testing

1. MCMC Modeling

This is an example to run multiple chain without parallel

set.seed(2233)
out2 <- mcsim(model = "EB.model.R", input = "EB_mcmc.in.R", dir = "modeling/EB")

## * Create 'MCMC.check.out' from the last iteration.

set.seed(3344)
out3 <- mcsim(model = "EB.model.R", input = "EB_mcmc.in.R", dir = "modeling/EB")

## * Create 'MCMC.check.out' from the last iteration.

set.seed(4455)
out4 <- mcsim(model = "EB.model.R", input = "EB_mcmc.in.R", dir = "modeling/EB")

## * Create 'MCMC.check.out' from the last iteration.

We can also use parallel computing to run multiple chains at the same time.

parallel::detectCores()

Through parallel package, we can detect the available core in the working computer.

library(rstudioapi)
set.seed(1234)
jobRunScript(workingDir = getwd(), path = "examples/EB_job.R", importEnv = T, exportEnv = "job_1")
set.seed(2345)
jobRunScript(workingDir = getwd(), path = "examples/EB_job.R", importEnv = T, exportEnv = "job_2")

set.seed(3344)
jobRunScript(workingDir = getwd(), path = "examples/EB_job.R", importEnv = T, exportEnv = "job_3")
set.seed(4455)
jobRunScript(workingDir = getwd(), path = "examples/EB_job.R", importEnv = T, exportEnv = "job_4")

Use mcmc_array() to store output as an object

x <- mcmc_array(data = list(out1, out2, out3, out4))

For parallel

x <- mcmc_array(data = list(job_1$out, job_2$out, job_3$out, job_4$out))

dim(x)

## [1] 10001     4    15

2. Result diagnosis

Kernel density

pars <- c("BW.1.", "Pb.1.", "Pl.1.", "Pf.1.", "Pm.1.", "Pvrg.1.", "Ppu.1.", "Pbr.1.", "VmaxC.1.", "VmaxClu.1.", "VmaxCvr.1.")
mcmc_dens_overlay(x[j,,], pars = pars)

mcmc_dens_overlay(x[j,,], pars = "LnData")

Trace plot

mcmc_trace(x[j,,], pars = pars)

Pair plot

mcmc_pairs(x[j,,], pars = c("Pb.1.", "VmaxC.1.", "VmaxClu.1.", "VmaxCvr.1."), off_diag_fun = "hex")

Create report

monitor(x, digit = 4)

## Inference for the input samples (4 chains: each with iter=10001; warmup=5000):
## 
##                  mean  se_mean        sd      2.5%       25%       50%
## iter        7500.0000 455.7220 1443.7004 5125.0000 6250.0000 7500.0000
## BW.1.          0.0438   0.0001    0.0035    0.0370    0.0414    0.0438
## Pb.1.         41.0310   0.7016   13.9192   22.6030   30.0346   38.2743
## Pl.1.          2.2326   0.0174    0.7383    1.0698    1.6364    2.1580
## Pf.1.         60.8607   0.2000   10.2507   44.3812   53.2557   59.3062
## Pm.1.          1.0615   0.0035    0.1357    0.7059    0.9932    1.0968
## Pvrg.1.        2.3286   0.0181    0.7771    1.0880    1.6953    2.2685
## Ppu.1.         2.1034   0.0170    0.7076    1.0642    1.5361    1.9940
## Pbr.1.         2.1038   0.0209    0.7337    1.0422    1.5144    1.9826
## VmaxC.1.       1.5156   0.2678    1.1934    0.0827    0.3658    1.2314
## VmaxClu.1.    18.1610   0.3389    8.2462    6.8804   12.1215   16.6213
## VmaxCvr.1.     2.0197   0.3305    1.5229    0.1931    0.5180    1.9381
## LnPrior      -18.3456   0.0488    1.5434  -21.7430  -19.2780  -18.1960
## LnData        50.9201   0.0571    1.7600   46.8074   49.9216   51.1704
## LnPosterior   32.5745   0.0842    1.9715   28.2881   31.3757   32.7621
##                   75%     97.5% n_eff   Rhat
## iter        8750.0000 9875.0000    10 2.1046
## BW.1.          0.0464    0.0502  2256 1.0006
## Pb.1.         49.5690   74.4535   394 1.0165
## Pl.1.          2.7560    3.7316  1808 1.0009
## Pf.1.         66.6999   84.4580  2628 1.0080
## Pm.1.          1.1656    1.2115  1509 1.0041
## Pvrg.1.        2.9255    3.7943  1838 1.0009
## Ppu.1.         2.5868    3.6458  1741 1.0004
## Pbr.1.         2.5947    3.7015  1229 1.0000
## VmaxC.1.       2.5695    3.7712    20 1.2559
## VmaxClu.1.    22.7737   39.1581   592 1.0052
## VmaxCvr.1.     3.2147    4.9587    21 1.1810
## LnPrior      -17.2433  -15.7910  1002 1.0015
## LnData        52.1837   53.6793   952 1.0160
## LnPosterior   33.9849   35.9173   548 1.0213
## 
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

3. Evaluation of model fit

Setpoints analysis

X <- x[j,,] %>% matrix(nrow = 20000) 
write.table(X, file = "setpts.out", row.names = F, sep = "\t")
X_setpts <- mcsim("EB.model.R", "EB_setpts.in.R")

## Execute: ./mcsim.EB.model.R.exe modeling/EB_setpts.in.R

head(X_setpts)

##   Iter        BW      Pb      Pl      Pf      Pm    Pvrg     Ppu     Pbr
## 1    0 0.0447211 59.7808 3.62970 60.6963 1.07323 2.43232 2.37481 2.02889
## 2    1 0.0447211 64.8059 3.42841 60.6963 1.07153 2.50442 1.37378 1.57070
## 3    2 0.0453967 64.8059 3.67594 52.3103 1.07153 2.50442 1.61156 1.22918
## 4    3 0.0453967 64.8059 3.29223 51.4293 1.07153 1.94113 1.61800 1.26715
## 5    4 0.0483390 64.8059 3.11091 51.4293 1.07153 2.22615 1.61800 1.17951
## 6    5 0.0459130 56.9000 3.24675 51.4293 1.07153 2.38812 1.26203 1.16542
##      VmaxC VmaxClu VmaxCvr   Cvtot_1.1   Cvtot_1.2   Cvtot_1.3   Cvtot_1.4
## 1 0.179986 8.02029 5.78176 2.76733e-05 1.02020e-05 6.89437e-06 5.11517e-06
## 2 0.179986 8.02029 5.78176 2.80546e-05 1.03703e-05 7.01920e-06 5.21320e-06
## 3 0.158005 8.02029 5.78176 2.90096e-05 1.08920e-05 7.15581e-06 5.11582e-06
## 4 0.170050 8.02029 5.78176 2.90849e-05 1.07912e-05 7.07057e-06 5.03096e-06
## 5 0.170050 8.02029 5.62233 2.95331e-05 1.11565e-05 7.34950e-06 5.25754e-06
## 6 0.170050 8.02029 5.62233 2.89466e-05 1.07933e-05 7.07547e-06 5.03903e-06
##     Cvtot_1.5
## 1 3.88608e-06
## 2 3.96168e-06
## 3 3.74135e-06
## 4 3.65910e-06
## 5 3.84650e-06
## 6 3.66931e-06

Tidy data with median and 95% confidence interval

vars <- names(X_setpts)
index <- which(vars == "Cvtot_1.1" | vars == "Cvtot_1.5")
X <- apply(X_setpts[index[1]:index[2]], 2, quantile,  c(0.5, 0.025, 0.975)) %>% t() * MW * 1000
colnames(X) <- c("median", "LCL", "UCL")
df <- as.data.frame(X)
x <- seq(0, 10, 1)
df$time <- c(4.0, 4.5, 5.0, 5.5, 6.0)
df

##              median       LCL       UCL time
## Cvtot_1.1 2.9791097 2.7219474 3.2878206  4.0
## Cvtot_1.2 1.0870200 0.9499428 1.2368229  4.5
## Cvtot_1.3 0.7379983 0.6665069 0.8158527  5.0
## Cvtot_1.4 0.5464517 0.4927346 0.6034471  5.5
## Cvtot_1.5 0.4125287 0.3535352 0.4787538  6.0

Plot result

y <- c(0.0, 0.15, 2.32, 4.33, 4.61, 6.68, 7.89, 7.13, 7.27, 9.4, 10.0)
ggplot(df, aes(x = time, y = median)) +
    geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) + 
    geom_line() +
    geom_point(data = exp_data, aes(x = data_t, y = data_c)) +
    geom_errorbar(data = exp_data, aes(x = data_t, y=data_c, ymin=data_c-sd, ymax=data_c+sd), width=.05) + 
    labs(x = "Time (hr)", y = "Concentration (mg/L)")

Software package

Check the software package version that will be used in this example.

devtools::session_info()

## ─ Session info ──────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.0 (2019-04-26)
##  os       macOS Mojave 10.14.5        
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2019-05-23                  
## 
## ─ Packages ──────────────────────────────────────────────────────────────
##  package     * version date       lib source                          
##  assertthat    0.2.1   2019-03-21 [1] CRAN (R 3.6.0)                  
##  backports     1.1.4   2019-04-10 [1] CRAN (R 3.6.0)                  
##  bayesplot   * 1.7.0   2019-05-23 [1] CRAN (R 3.6.0)                  
##  boot          1.3-22  2019-04-02 [1] CRAN (R 3.6.0)                  
##  broom         0.5.2   2019-04-07 [1] CRAN (R 3.6.0)                  
##  callr         3.2.0   2019-03-15 [1] CRAN (R 3.6.0)                  
##  cellranger    1.1.0   2016-07-27 [1] CRAN (R 3.6.0)                  
##  cli           1.1.0   2019-03-19 [1] CRAN (R 3.6.0)                  
##  colorspace    1.4-1   2019-03-18 [1] CRAN (R 3.6.0)                  
##  corrplot    * 0.84    2017-10-16 [1] CRAN (R 3.6.0)                  
##  crayon        1.3.4   2017-09-16 [1] CRAN (R 3.6.0)                  
##  data.table    1.12.2  2019-04-07 [1] CRAN (R 3.6.0)                  
##  desc          1.2.0   2018-05-01 [1] CRAN (R 3.6.0)                  
##  devtools      2.0.2   2019-04-08 [1] CRAN (R 3.6.0)                  
##  digest        0.6.18  2018-10-10 [1] CRAN (R 3.6.0)                  
##  dplyr       * 0.8.1   2019-05-14 [1] CRAN (R 3.6.0)                  
##  evaluate      0.13    2019-02-12 [1] CRAN (R 3.6.0)                  
##  forcats     * 0.4.0   2019-02-17 [1] CRAN (R 3.6.0)                  
##  fs            1.3.1   2019-05-06 [1] CRAN (R 3.6.0)                  
##  generics      0.0.2   2018-11-29 [1] CRAN (R 3.6.0)                  
##  getPass       0.2-2   2017-07-21 [1] CRAN (R 3.6.0)                  
##  ggplot2     * 3.1.1   2019-04-07 [1] CRAN (R 3.6.0)                  
##  ggridges      0.5.1   2018-09-27 [1] CRAN (R 3.6.0)                  
##  glue          1.3.1   2019-03-12 [1] CRAN (R 3.6.0)                  
##  gridExtra     2.3     2017-09-09 [1] CRAN (R 3.6.0)                  
##  gtable        0.3.0   2019-03-25 [1] CRAN (R 3.6.0)                  
##  haven         2.1.0   2019-02-19 [1] CRAN (R 3.6.0)                  
##  hexbin      * 1.27.2  2018-01-15 [1] CRAN (R 3.6.0)                  
##  hms           0.4.2   2018-03-10 [1] CRAN (R 3.6.0)                  
##  htmltools     0.3.6   2017-04-28 [1] CRAN (R 3.6.0)                  
##  httr          1.4.0   2018-12-11 [1] CRAN (R 3.6.0)                  
##  inline        0.3.15  2018-05-18 [1] CRAN (R 3.6.0)                  
##  jsonlite      1.6     2018-12-07 [1] CRAN (R 3.6.0)                  
##  knitr         1.22    2019-03-08 [1] CRAN (R 3.6.0)                  
##  labeling      0.3     2014-08-23 [1] CRAN (R 3.6.0)                  
##  lattice       0.20-38 2018-11-04 [1] CRAN (R 3.6.0)                  
##  lazyeval      0.2.2   2019-03-15 [1] CRAN (R 3.6.0)                  
##  loo           2.1.0   2019-03-13 [1] CRAN (R 3.6.0)                  
##  lubridate     1.7.4   2018-04-11 [1] CRAN (R 3.6.0)                  
##  magrittr      1.5     2014-11-22 [1] CRAN (R 3.6.0)                  
##  matrixStats   0.54.0  2018-07-23 [1] CRAN (R 3.6.0)                  
##  memoise       1.1.0   2017-04-21 [1] CRAN (R 3.6.0)                  
##  modelr        0.1.4   2019-02-18 [1] CRAN (R 3.6.0)                  
##  munsell       0.5.0   2018-06-12 [1] CRAN (R 3.6.0)                  
##  nlme          3.1-139 2019-04-09 [1] CRAN (R 3.6.0)                  
##  pillar        1.4.0   2019-05-11 [1] CRAN (R 3.6.0)                  
##  pkgbuild      1.0.3   2019-03-20 [1] CRAN (R 3.6.0)                  
##  pkgconfig     2.0.2   2018-08-16 [1] CRAN (R 3.6.0)                  
##  pkgload       1.0.2   2018-10-29 [1] CRAN (R 3.6.0)                  
##  pksensi     * 1.1.1   2019-05-16 [1] Github (nanhung/pksensi@8435e68)
##  plyr          1.8.4   2016-06-08 [1] CRAN (R 3.6.0)                  
##  prettyunits   1.0.2   2015-07-13 [1] CRAN (R 3.6.0)                  
##  processx      3.3.1   2019-05-08 [1] CRAN (R 3.6.0)                  
##  ps            1.3.0   2018-12-21 [1] CRAN (R 3.6.0)                  
##  purrr       * 0.3.2   2019-03-15 [1] CRAN (R 3.6.0)                  
##  R6            2.4.0   2019-02-14 [1] CRAN (R 3.6.0)                  
##  Rcpp          1.0.1   2019-03-17 [1] CRAN (R 3.6.0)                  
##  readr       * 1.3.1   2018-12-21 [1] CRAN (R 3.6.0)                  
##  readxl        1.3.1   2019-03-13 [1] CRAN (R 3.6.0)                  
##  remotes       2.0.4   2019-04-10 [1] CRAN (R 3.6.0)                  
##  reshape       0.8.8   2018-10-23 [1] CRAN (R 3.6.0)                  
##  reshape2      1.4.3   2017-12-11 [1] CRAN (R 3.6.0)                  
##  rlang         0.3.4   2019-04-07 [1] CRAN (R 3.6.0)                  
##  rmarkdown     1.12    2019-03-14 [1] CRAN (R 3.6.0)                  
##  rprojroot     1.3-2   2018-01-03 [1] CRAN (R 3.6.0)                  
##  rstan       * 2.18.2  2018-11-07 [1] CRAN (R 3.6.0)                  
##  rstudioapi    0.10    2019-03-19 [1] CRAN (R 3.6.0)                  
##  rvest         0.3.3   2019-04-11 [1] CRAN (R 3.6.0)                  
##  scales        1.0.0   2018-08-09 [1] CRAN (R 3.6.0)                  
##  sensitivity * 1.15.2  2018-09-02 [1] CRAN (R 3.6.0)                  
##  sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 3.6.0)                  
##  StanHeaders * 2.18.1  2019-01-28 [1] CRAN (R 3.6.0)                  
##  stringi       1.4.3   2019-03-12 [1] CRAN (R 3.6.0)                  
##  stringr     * 1.4.0   2019-02-10 [1] CRAN (R 3.6.0)                  
##  testthat      2.1.1   2019-04-23 [1] CRAN (R 3.6.0)                  
##  tibble      * 2.1.1   2019-03-16 [1] CRAN (R 3.6.0)                  
##  tidyr       * 0.8.3   2019-03-01 [1] CRAN (R 3.6.0)                  
##  tidyselect    0.2.5   2018-10-11 [1] CRAN (R 3.6.0)                  
##  tidyverse   * 1.2.1   2017-11-14 [1] CRAN (R 3.6.0)                  
##  usethis       1.5.0   2019-04-07 [1] CRAN (R 3.6.0)                  
##  withr         2.1.2   2018-03-15 [1] CRAN (R 3.6.0)                  
##  xfun          0.6     2019-04-02 [1] CRAN (R 3.6.0)                  
##  xml2          1.2.0   2018-01-24 [1] CRAN (R 3.6.0)                  
##  yaml          2.2.0   2018-07-25 [1] CRAN (R 3.6.0)                  
## 
## [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library

mcsim_version()

## The current GNU MCSim version is 6.1.0

MCSim tutorial 3 - Markov chain Monte Carlo Calibration

Nan-Hung Hsieh

2019/05/23