0 Prerequisites

Import the R packages

library(tidyverse)
library(rstan)
library(bayesplot)
library(corrplot)
library(sensitivity)
library(pksensi)
theme_set(theme_light())

1 Linear Model

1.1 Single chain testing

Use model (linear.model.R) and input (linear_mcmc.in.R) files to run MCMC,

linear.model.R

## linear.model.R ####
Outputs = {y}

# Model Parameters
A = 0; # 
B = 1;

# Statistical parameter
SD_true = 0;

CalcOutputs { y = A + B * t + NormalRandom(0,SD_true); }

End.

linear_mcmc.in.R

## linear_mcmc.in.R ####
MCMC ("MCMC.default.out","", # name of output and restart file
     "",           # name of data file
     4000,0,       # iterations, print predictions flag,
     1,4000,       # printing frequency, iters to print
     10101010);    # random seed (default )

Level {
  
  Distrib(A, Normal, 0, 2); # prior of intercept coefficient
  Distrib(B, Normal, 1, 2); # prior of slope coefficient
  
  Likelihood(y, Normal, Prediction(y), 0.05); #  # exact SD
  
  Simulation {
    PrintStep (y, 0, 10, 1); #seq(0, 10 1)
    Data  (y, 0.0, 0.15, 2.32, 4.33, 4.61, 6.68, 7.89, 7.13, 7.27, 9.4, 10.0);
  }
}

End.

Plotting the data

x <- seq(0, 10, 1)
y <- c(0.0, 0.15, 2.32, 4.33, 4.61, 6.68, 7.89, 7.13, 7.27, 9.4, 10.0)
plot(x, y)

model <- "linear.model.R"
input <- "linear_mcmc.in.R"
set.seed(1111) 
out1 <- mcsim(model, input)

## * Create 'MCMC.check.out' from the last iteration.

After the simulation, we need to diagnosis the result,

plot(out1$A.1., type = "l", xlab = "Iteration", ylab = "")
lines(out1$B.1., col = 2)
legend("topright", legend = c("Intercept", "Slope"), col = c(1,2), lty = 1)

plot(out1$A.1., out1$B.1., type = "b", xlab = "Intercept", ylab = "Slope")

Basically, the “burn-in” period is not necessary to include in our analysis

str <- ceiling(nrow(out1)/2) + 1
end <- nrow(out1)
j <- c(str:end)
plot(out1$iter[j], out1$A.1.[j], type = "l", xlab = "Iteration", ylab = "Intercept")

plot(out1$iter[j], out1$B.1.[j], type = "l", xlab = "Iteration", ylab = "Slope")

plot(out1$A.1.[j], out1$B.1.[j], type = "b", xlab = "Intercept", ylab = "Slope")

cor(out1$A.1.[j], out1$B.1.[j])

## [1] -0.8736645

Check kernel density

out1$A.1.[j] %>% density() %>% plot()

out1$B.1.[j] %>% density() %>% plot()

The MCMC.check.out is generated by the last iteration of simulation. It is used to quick check the output.

# Visualizing the fitting result 
chk <- read.delim("MCMC.check.out")

plot(chk$Time, chk$Data, xlab = "x", ylab = "y")
lines(chk$Time, chk$Prediction)

1.2 Multi-chains simulation

Use set.seed() to create the different chains with different random seed and run simulation.

# Check convergence
model <- "linear.model.R"
input <- "linear_mcmc.in.R"
set.seed(2234) 
out2 <- mcsim(model, input) # Generate the 2nd chain

## * Create 'MCMC.check.out' from the last iteration.

set.seed(3234) 
out3 <- mcsim(model, input) # Generate the 3rd chain

## * Create 'MCMC.check.out' from the last iteration.

set.seed(4234) 
out4 <- mcsim(model, input) # Generate the 4th chain

## * Create 'MCMC.check.out' from the last iteration.

Use mcmc_array() to create the 3-D array (iterations * chains * parameters) to store all outputs.

sims <- mcmc_array(data = list(out1,out2,out3,out4))
dim(sims)

## [1] 2001    4    6

The bayesplot package can use to visualize output.

parms_name <- c("A.1.","B.1.")
color_scheme_set("mix-blue-red")
mcmc_trace(sims, pars = parms_name, facet_args = list(ncol = 1, strip.position = "left"))

Kernel density plots of MCMC draws

mcmc_dens_overlay(x = sims[j,,], pars = parms_name)

mcmc_dens_overlay(x = sims[j,,], pars = "LnData")

Pairs plots from MCMC draws

mcmc_pairs(sims[j,,], pars = parms_name, off_diag_fun = "hex")

Rank histogram plot (Vehtari et al. 2019)

mcmc_rank_hist(sims, pars = parms_name)

mcmc_rank_overlay(sims, pars = parms_name)

The rstan package can use to analyze and diagnosis output.

monitor(sims[,,parms_name], digit=4)

## Inference for the input samples (4 chains: each with iter=2001; warmup=1000):
## 
##        mean se_mean     sd    2.5%    25%    50%    75%  97.5% n_eff
## A.1. 0.4466  0.0300 0.3002 -0.1278 0.2276 0.4417 0.6563 0.9969   100
## B.1. 0.9980  0.0049 0.0512  0.8949 0.9623 0.9972 1.0332 1.0919   109
##        Rhat
## A.1. 1.0407
## B.1. 1.0352
## 
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Finally, evaluate the confidence interval of prediction through setpoints method. Prepare the input file linear_setpts.in.R

## ./mcsim.linear.model.R.exe linear_setpts.in.R ####
SetPoints ("", "setpts.out", 0, A, B);

Simulation { 
  PrintStep (y, 0, 10, 1);
  Data  (y, -0.0289654, 1.15968, 2.32502, 3.33289, 4.61105, 5.6818, 
         6.89044, 8.13242, 9.27033, 10.4522, 11.6703);
} 

End.

X <- sims[j,,] %>% matrix(nrow = 1000*4) 
write.table(X, file = "setpts.out", row.names = F, sep = "\t")
X_setpts <- mcsim("linear.model.R", "linear_setpts.in.R")

## Execute: ./mcsim.linear.model.R.exe modeling/linear_setpts.in.R

head(X_setpts)

##   Iter        A        B    y_1.1   y_1.2   y_1.3   y_1.4   y_1.5   y_1.6
## 1    0 0.704130 0.940713 0.704130 1.64484 2.58556 3.52627 4.46698 5.40770
## 2    1 0.542775 0.940713 0.542775 1.48349 2.42420 3.36491 4.30563 5.24634
## 3    2 0.797207 0.940713 0.797207 1.73792 2.67863 3.61935 4.56006 5.50077
## 4    3 0.797207 0.940713 0.797207 1.73792 2.67863 3.61935 4.56006 5.50077
## 5    4 0.797207 0.930386 0.797207 1.72759 2.65798 3.58837 4.51875 5.44914
## 6    5 0.797207 0.930386 0.797207 1.72759 2.65798 3.58837 4.51875 5.44914
##     y_1.7   y_1.8   y_1.9  y_1.10   y_1.11
## 1 6.34841 7.28912 8.22983 9.17055 10.11130
## 2 6.18705 7.12777 8.06848 9.00919  9.94991
## 3 6.44149 7.38220 8.32291 9.26362 10.20430
## 4 6.44149 7.38220 8.32291 9.26362 10.20430
## 5 6.37952 7.30991 8.24029 9.17068 10.10110
## 6 6.37952 7.30991 8.24029 9.17068 10.10110

vars <- names(X_setpts)
index <- which(vars == "y_1.1" | vars == "y_1.11")
X <- apply(X_setpts[index[1]:index[2]], 2, quantile,  c(0.5, 0.025, 0.975)) %>% t()
colnames(X) <- c("median", "LCL", "UCL")
df <- as.data.frame(X)
x <- seq(0, 10, 1)
df$x <- x
df

##           median       LCL       UCL  x
## y_1.1   0.441327 -0.127816  0.997311  0
## y_1.2   1.441715  0.967708  1.921580  1
## y_1.3   2.442100  2.029488  2.850090  2
## y_1.4   3.439265  3.081570  3.794925  3
## y_1.5   4.432740  4.128170  4.768060  4
## y_1.6   5.427430  5.146256  5.742600  5
## y_1.7   6.425620  6.136910  6.759760  6
## y_1.8   7.422680  7.098845  7.798120  7
## y_1.9   8.421310  8.022710  8.853480  8
## y_1.10  9.422700  8.935832  9.920214  9
## y_1.11 10.418350  9.841940 10.988438 10

y <- c(0.0, 0.15, 2.32, 4.33, 4.61, 6.68, 7.89, 7.13, 7.27, 9.4, 10.0)
obs_data <- data.frame(x, y)
ggplot(df, aes(x = x, y = median)) +
    geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) + 
    geom_line() +
    geom_point(data = obs_data, x=x, y=y) +
    labs(x = "x", y = "y")

2 Ethylbenzene PBPK Model

Prepare model (perc.model.R) and input files in the following simulation. The input files include straightforward (EB.in.R), Monte Carlo (EB_mtc.in.R), setpints (EB_setpts.in.R), MCMC (EB_MCMC.in.R) simulation, and MCMC for model evaluation (EB_MCMC_setpts.in.R).

2.1 Model verification

Run EB-PBPK model (“EB.model.R”) and input files (“EB.in.R”), the exposure condition is continuous inhalation of 100 ppm Ethylbenzene (EB) for 4 hours and plot the time-course of blood concentration from 0 to 6 hour.

Modeling

out <- mcsim(model = "EB.model.R", input = "EB.in.R", dir = "modeling/EB")

## Execute: ./mcsim.EB.model.R.exe modeling/EB/EB.in.R

exp <- readsims(out, exp = 3) 
head(exp)

##   Time       Cvtot
## 1  0.0 0.00000e+00
## 2  0.1 3.43935e-06
## 3  0.2 4.09070e-06
## 4  0.3 4.46969e-06
## 5  0.4 4.71608e-06
## 6  0.5 4.89472e-06

Unit transfer and add study data

MW <- 106.16 # g/mol
exp$Cvtot_mg <- exp$Cvtot * MW * 1000 # mol/L -> mg/L 
data_t <- c(4, 4.5, 5, 5.5, 6) # Obsrvation time 
data_c <- c(1.93, 1.29, 0.87, 0.55, 0.38) # Obsrvation concentration
sd <- c(0.15, 0.15, 0.24, 0.11, 0.05)
sd.up <- data_c + sd
sd.dn <- data_c - sd

Plotting

Compare the simulation result with experiment data as:

plot(data_t, data_c, main = "Exposure: 100 ppm", xlab = "Time (hr)", ylab = "Concentration (mg/L)", xlim = c(0, 6), ylim = c(0, 2.5))
lines(exp$Time, exp$Cvtot_mg)
arrows(data_t, sd.dn, data_t, sd.up, code=3, length=0.02, angle=90)
legend("topright", legend = c("Data", "Model"), lty = c(NA, 1), pch = c(1, NA))

2.2 Uncertainty analysis

Setting the uncertainty for model parameter

Here we used 11 model parameters include body weight (uniform distribution from 35g to 55g), 7 partition coefficients (set 2 times difference from baseline) and 3 Michaelis-Menten reaction rate constant (set 4 times difference from baseline). The distribution setting in input file is as,

Distrib (BW, Uniform, 0.035, 0.055);

Distrib (Pb, Uniform, 21.35, 85.4);
Distrib (Pl, Uniform, 0.98, 3.92);
Distrib (Pf, Uniform, 18.2, 72.8);
Distrib (Pm, Uniform, 0.3045, 1.218);
Distrib (Pvrg, Uniform, 0.98, 3.92);
Distrib (Ppu, Uniform, 0.98, 3.92);
Distrib (Pbr, Uniform, 0.98, 3.92);

Distrib (VmaxC, Uniform, 1.5975, 25.56);
Distrib (VmaxClu, Uniform, 3.35, 53.6);
Distrib (VmaxCvr, Uniform, 4.35, 69.6);

Modeling

out <- mcsim(model = "EB.model.R", input = "EB_mtc.in.R", dir = "modeling/EB")

## Execute: ./mcsim.EB.model.R.exe modeling/EB/EB_mtc.in.R

Data wrangling

vars <- names(out)
index <- which(vars == "Cvtot_1.2" | vars == "Cvtot_1.61")
time <- seq(0.1, 6, 0.1) # corresponding time-points
exp_data <- data.frame(data_t, data_c)

X <- apply(out[,index[1]:index[2]], 2, quantile,  c(0.5, 0.025, 0.975)) %>% t() * MW * 1000 # 95% CI
colnames(X) <- c("median", "LCL", "UCL")
df <- as.data.frame(X)
df$time <- time

Plotting

ggplot(df, aes(x = time, y = median)) +
    geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) + 
    geom_line() +
    geom_point(data = exp_data, aes(x = data_t, y = data_c)) +
    geom_errorbar(data = exp_data, aes(x = data_t, y = data_c, ymin=data_c-sd, ymax=data_c+sd), width=.1) +
    labs(x = "Time (hr)", y = "Concentration (mg/L)")

2.3 Morris elementary effects screening

Argument setting

BW <- 0.043  

Pb <- 42.7
Pl <- 1.96
Pf <- 36.4
Pm <- 0.609
Pvrg <- 1.96
Ppu <- 1.96
Pbr <- 1.96

VmaxC <- 6.39
VmaxClu <- 13.4
VmaxCvr <- 17.4

baseline <- c(BW, Pb, Pl, Pf, Pm, Pvrg, Ppu, Pbr, VmaxC, VmaxClu, VmaxCvr)
limit <- c(1.2, rep(2, 7), rep(4, 3))
binf <- baseline/limit
bsup <- baseline*limit

parameters <- c("BW", "Pb", "Pl", "Pf", "Pm", "Pvrg", "Ppu", "Pbr", "VmaxC", "VmaxClu", "VmaxCvr")

Generate parameter matrix

set.seed(1234)
x <- morris(model = NULL, factors = parameters, r = 512,
            design = list(type = "oat", levels = 5, grid.jump = 3),
            binf = binf, bsup = bsup)
X <- cbind(1, x$X)
write.table(X, file = "setpts.out", row.names = F, sep = "\t")

Modeling

out <- mcsim(model = "EB.model.R",  input = "EB_setpts.in.R", dir = "modeling/EB")

## Execute: ./mcsim.EB.model.R.exe modeling/EB/EB_setpts.in.R

Plotting result

str <- which(names(out) == "Cvtot_1.1")
end <- which(names(out) == "Cvtot_1.5")
par(mfrow = c(2,3), mar = c(2,2,1,1))
for (i in str:end){
tell(x, log(out[,i]))
plot(x, xlim=c(0,3), main = paste("Time = ",4 + (i - str)*0.5, "hr"))
abline(v = 0.5)
}

2.4 Uncertainty analysis again

Adjust the range of Pf (to 4 times different), VmaxC (to 8 times different) and VmaxCvr (to 8 times different), and check the possible output.

baseline <- c(BW, Pb, Pl, Pf, Pm, Pvrg, Ppu, Pbr, VmaxC, VmaxClu, VmaxCvr)
new_limit <- c(1.2, 2, 2, 4, rep(2, 4), 8, 4, 8)
binf <- baseline/new_limit
bsup <- baseline*new_limit
dist <- c(rep("Uniform", 8), "LogUniform", "Uniform", "LogUniform")
q.arg <- list(list(min = binf[1], max = bsup[1]),
              list(min = binf[2], max = bsup[2]),
              list(min = binf[3], max = bsup[3]),
              list(min = binf[4], max = bsup[4]),
              list(min = binf[5], max = bsup[5]),
              list(min = binf[6], max = bsup[6]),
              list(min = binf[7], max = bsup[7]),
              list(min = binf[8], max = bsup[8]),
              list(min = binf[9], max = bsup[9]),
              list(min = binf[10], max = bsup[10]),
              list(min = binf[11], max = bsup[11])) 
outputs <- c("Cvtot")
conditions <- c("Cppm = NDoses (2, 100, 0, 0, 4 )")

Run MC simulation

set.seed(2222)
y<-solve_mcsim(mName = "EB.model.R", params = parameters, vars = outputs,
               monte_carlo = 1000, dist = dist, q.arg = q.arg, time = time, 
               condition = conditions, rtol = 1e-9, atol = 1e-11)

## Starting time: 2019-05-29 12:34:22

## * Created input file "sim.in".

## Execute: ./mcsim.EB.model.R.exe sim.in

## Ending time: 2019-05-29 12:34:23

Check result

X <- y * MW * 1000
pksim(X)
points(data_t, data_c)
arrows(data_t, sd.dn, data_t, sd.up, code=3, length=0.02, angle=90)

2.5 MCMC calibration

2.5.1 Single chain testing

1. MCMC Modeling

Run MCMC for single chain the iteration is set to 2000

set.seed(1234)
system.time(out1 <- mcsim(model = "EB.model.R", input = "EB_mcmc.in.R", dir = "modeling/EB"))

## * Create 'MCMC.check.out' from the last iteration.

##    user  system elapsed 
##  26.931   0.156  27.113

tail(out1)

##        iter     BW.1.   Pb.1.   Pl.1.   Pf.1.   Pm.1. Pvrg.1.  Ppu.1.
## 9996   9995 0.0454827 23.2139 2.17862 66.7154 1.14244 1.61018 2.46083
## 9997   9996 0.0439305 23.2139 2.17862 69.9049 1.10571 1.62998 1.89805
## 9998   9997 0.0439305 23.2139 1.70261 63.6226 1.10619 2.11519 1.54647
## 9999   9998 0.0466850 23.2139 1.72388 63.6226 1.10619 2.11519 1.24726
## 10000  9999 0.0444217 23.2139 1.22300 63.6226 1.13802 2.56905 1.83958
## 10001 10000 0.0444217 21.7610 1.34718 63.6226 1.21597 2.30974 1.27817
##        Pbr.1. VmaxC.1. VmaxClu.1. VmaxCvr.1.   LnPrior   LnData
## 9996  2.38779  1.85825    11.3039   1.072930 -17.18803 51.37028
## 9997  2.01602  1.85825    11.3039   1.072930 -16.32287 50.48054
## 9998  1.38319  1.85825    11.3039   1.072930 -16.00573 50.93018
## 9999  1.25928  1.85825    11.3039   1.072930 -16.60234 51.15638
## 10000 1.25928  1.85825    11.3039   0.317634 -16.31526 49.00030
## 10001 1.34023  1.86669    17.7267   0.317634 -16.95433 53.25342
##       LnPosterior
## 9996     34.18225
## 9997     34.15767
## 9998     34.92445
## 9999     34.55404
## 10000    32.68505
## 10001    36.29909

Check simulation result with the parameter from the last iteration

check_df <- read.delim("MCMC.check.out")
check_df$Data_mgl <- check_df$Data * MW * 1000
check_df$Prediction_mgl <- check_df$Prediction * MW * 1000

plot(check_df$Time, check_df$Data_mgl, ylim = c(0, 4),
     xlab = "Time (hr)", ylab = "Concentration (mg/L)")
lines(check_df$Time, check_df$Prediction_mgl)

plot(check_df$Data_mgl, check_df$Prediction_mgl, log = "xy", 
     xlab = "Observation", ylab = "Prediction")
abline(0,1) # calibration curve

2. Setpoints analysis

Use output file to do setpoints analysis. The setpoints file (EB_MCMC_setpts.in.R) need to build in this analysis. The setpoints analysis aims to perform the simulation based on the posterior of model output. Through the setpoints analysis, we can check all possible outputs (e.g., 95% confidence interval) and detect the data points that are not covered in the simulation routes.

Run simulation

out <- mcsim(model = "EB.model.R",  input = "EB_MCMC_setpts.in.R", dir = "modeling/EB")

## Execute: ./mcsim.EB.model.R.exe modeling/EB/EB_MCMC_setpts.in.R

tail(out)

##        Iter        BW      Pb      Pl      Pf      Pm    Pvrg     Ppu
## 9996   9995 0.0454827 23.2139 2.17862 66.7154 1.14244 1.61018 2.46083
## 9997   9996 0.0439305 23.2139 2.17862 69.9049 1.10571 1.62998 1.89805
## 9998   9997 0.0439305 23.2139 1.70261 63.6226 1.10619 2.11519 1.54647
## 9999   9998 0.0466850 23.2139 1.72388 63.6226 1.10619 2.11519 1.24726
## 10000  9999 0.0444217 23.2139 1.22300 63.6226 1.13802 2.56905 1.83958
## 10001 10000 0.0444217 21.7610 1.34718 63.6226 1.21597 2.30974 1.27817
##           Pbr   VmaxC VmaxClu  VmaxCvr   Cvtot_1.1   Cvtot_1.2   Cvtot_1.3
## 9996  2.38779 1.85825 11.3039 1.072930 2.63869e-05 9.45762e-06 6.49199e-06
## 9997  2.01602 1.85825 11.3039 1.072930 2.62245e-05 9.23371e-06 6.39859e-06
## 9998  1.38319 1.85825 11.3039 1.072930 2.67043e-05 9.49619e-06 6.46908e-06
## 9999  1.25928 1.85825 11.3039 1.072930 2.66124e-05 9.50143e-06 6.48485e-06
## 10000 1.25928 1.85825 11.3039 0.317634 2.91861e-05 1.12475e-05 7.79156e-06
## 10001 1.34023 1.86669 17.7267 0.317634 2.65054e-05 9.89048e-06 6.78110e-06
##         Cvtot_1.4   Cvtot_1.5
## 9996  4.90290e-06 3.79767e-06
## 9997  4.88669e-06 3.82228e-06
## 9998  4.83268e-06 3.69522e-06
## 9999  4.85824e-06 3.72835e-06
## 10000 5.88077e-06 4.54304e-06
## 10001 5.07774e-06 3.90104e-06

Data processing

str <- which(names(out) == "Cvtot_1.1")
end <- which(names(out) == "Cvtot_1.5")
vars <- names(out)
index <- which(vars == "Cvtot_1.1" | vars == "Cvtot_1.5")
X <- apply(out[,index[1]:index[2]], 2, quantile,  c(0.5, 0.01, 0.99))
dat <- t(X) * MW * 1000
colnames(dat) <- c("median", "LCL", "UCL")
df <- as.data.frame(dat)
df$time <- data_t
head(df)

##              median       LCL       UCL time
## Cvtot_1.1 2.9676603 2.6693614 3.3322669  4.0
## Cvtot_1.2 1.0854117 0.9230166 1.2683891  4.5
## Cvtot_1.3 0.7393545 0.6544435 0.8322466  5.0
## Cvtot_1.4 0.5477867 0.4808740 0.6185402  5.5
## Cvtot_1.5 0.4135378 0.3424891 0.4925962  6.0

Plotting

ggplot(df, aes(x = time, y = median)) +
    geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) + 
    geom_line() +
    geom_point(data = exp_data, aes(x = data_t, y = data_c)) +
    geom_errorbar(data = exp_data, aes(x = data_t, y=data_c, ymin=data_c-sd, ymax=data_c+sd), width=.05) +
    labs(x = "Time (hr)", y = "Concentration (mg/L)")

## Warning: Ignoring unknown aesthetics: y

3. Result diagnosis

Trace plot

str <- ceiling(nrow(out1)/2) + 1
end <- nrow(out1)
j <- c(str:end) # discard burn-in
par(mfrow = c(3,4), mar = c(2,2,2,1))
for (i in 2:12){
  plot(out1[j,1], out1[j,i], type = "l", main = names(out1)[i])
}

Auto correlation

par(mfrow = c(3,4), mar = c(2,2,4,1))
for (i in 2:12){
  acf(out1[j,i], main = names(out1)[i])
}

Density plot

par(mfrow = c(3,4), mar = c(2,2,2,1))
for (i in 2:12){
  out1[j,i] %>% density() %>% plot(type = "l", main = names(out1)[i])
}

Correlation matrix

out1[,2:12] %>% cor() %>% corrplot(method = "number")

2.5.2 Multi chains testing

1. MCMC Modeling

This is an example to run multiple chain without parallel

set.seed(2233)
out2 <- mcsim(model = "EB.model.R", input = "EB_mcmc.in.R", dir = "modeling/EB")

## * Create 'MCMC.check.out' from the last iteration.

set.seed(3344)
out3 <- mcsim(model = "EB.model.R", input = "EB_mcmc.in.R", dir = "modeling/EB")

## * Create 'MCMC.check.out' from the last iteration.

set.seed(4455)
out4 <- mcsim(model = "EB.model.R", input = "EB_mcmc.in.R", dir = "modeling/EB")

## * Create 'MCMC.check.out' from the last iteration.

We can also use parallel computing to run multiple chains at the same time.

parallel::detectCores()

Through parallel package, we can detect the available core in the working computer.

library(rstudioapi)
set.seed(1234)
jobRunScript(workingDir = getwd(), path = "examples/EB_job.R", importEnv = T, exportEnv = "job_1")
set.seed(2345)
jobRunScript(workingDir = getwd(), path = "examples/EB_job.R", importEnv = T, exportEnv = "job_2")

set.seed(3344)
jobRunScript(workingDir = getwd(), path = "examples/EB_job.R", importEnv = T, exportEnv = "job_3")
set.seed(4455)
jobRunScript(workingDir = getwd(), path = "examples/EB_job.R", importEnv = T, exportEnv = "job_4")

Use mcmc_array() to store output as an object

x <- mcmc_array(data = list(out1, out2, out3, out4))

For parallel

x <- mcmc_array(data = list(job_1$out, job_2$out, job_3$out, job_4$out))

dim(x)

## [1] 10001     4    15

2. Result diagnosis

Kernel density

pars <- c("BW.1.", "Pb.1.", "Pl.1.", "Pf.1.", "Pm.1.", "Pvrg.1.", "Ppu.1.", "Pbr.1.", "VmaxC.1.", "VmaxClu.1.", "VmaxCvr.1.")
mcmc_dens_overlay(x[j,,], pars = pars)

mcmc_dens_overlay(x[j,,], pars = "LnData")

Trace plot

mcmc_trace(x[j,,], pars = pars)

Pair plot

mcmc_pairs(x[j,,], pars = c("Pb.1.", "VmaxC.1.", "VmaxClu.1.", "VmaxCvr.1."), off_diag_fun = "hex")

Create report

monitor(x, digit = 4)

## Inference for the input samples (4 chains: each with iter=10001; warmup=5000):
## 
##                  mean  se_mean        sd      2.5%       25%       50%
## iter        7500.0000 455.7220 1443.7004 5125.0000 6250.0000 7500.0000
## BW.1.          0.0437   0.0001    0.0035    0.0368    0.0413    0.0439
## Pb.1.         36.9642   1.0877   13.9473   21.8567   26.2019   32.8111
## Pl.1.          2.2352   0.0185    0.7519    1.0718    1.6201    2.1461
## Pf.1.         59.9893   0.2458    9.9257   43.6858   52.9142   58.7872
## Pm.1.          1.0518   0.0040    0.1447    0.6747    0.9837    1.0909
## Pvrg.1.        2.2446   0.0187    0.7478    1.0800    1.6366    2.1520
## Ppu.1.         2.1127   0.0181    0.7297    1.0542    1.5209    1.9974
## Pbr.1.         2.0731   0.0184    0.7153    1.0499    1.5039    1.9426
## VmaxC.1.       0.9348   0.1430    0.8523    0.0790    0.2609    0.6175
## VmaxClu.1.    18.8364   0.3954    8.1073    7.3497   12.9138   17.3389
## VmaxCvr.1.     2.4943   0.1837    1.3789    0.2576    1.3371    2.5625
## LnPrior      -18.1508   0.0447    1.5948  -21.7430  -19.1185  -17.9961
## LnData        51.1964   0.0545    1.7324   47.0531   50.2363   51.4772
## LnPosterior   33.0456   0.0743    1.9640   28.6907   31.8740   33.2509
##                   75%     97.5% n_eff   Rhat
## iter        8750.0000 9875.0000    10 2.1046
## BW.1.          0.0463    0.0500  2023 1.0009
## Pb.1.         43.4928   74.5595   164 1.0268
## Pl.1.          2.7930    3.7433  1652 1.0030
## Pf.1.         65.6459   83.2916  1631 1.0041
## Pm.1.          1.1621    1.2136  1303 1.0028
## Pvrg.1.        2.7968    3.7517  1591 1.0012
## Ppu.1.         2.6115    3.6927  1634 1.0018
## Pbr.1.         2.5459    3.6722  1511 1.0010
## VmaxC.1.       1.4512    3.1159    36 1.0752
## VmaxClu.1.    23.2227   39.3250   420 1.0084
## VmaxCvr.1.     3.5325    5.1390    56 1.0486
## LnPrior      -17.0218  -15.4684  1271 1.0024
## LnData        52.4411   53.7979  1009 1.0056
## LnPosterior   34.4519   36.2991   699 1.0070
## 
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

3. Evaluation of model fit

Setpoints analysis

X <- x[j,,] %>% matrix(nrow = 20000) 
write.table(X, file = "setpts.out", row.names = F, sep = "\t")
X_setpts <- mcsim(model = "EB.model.R", input = "EB_setpts.in.R", dir = "modeling/EB")

## Execute: ./mcsim.EB.model.R.exe modeling/EB/EB_setpts.in.R

head(X_setpts)

##   Iter        BW      Pb      Pl      Pf      Pm    Pvrg     Ppu     Pbr
## 1    0 0.0449635 25.0732 3.91886 65.6022 1.19026 2.07945 2.36071 1.78874
## 2    1 0.0439721 25.3721 3.83139 65.6022 1.07528 2.21990 2.24628 1.71869
## 3    2 0.0435804 23.6833 3.06635 69.9228 1.07528 2.16576 2.13499 2.02663
## 4    3 0.0445239 23.6833 2.78110 69.9228 1.07528 2.91158 1.91394 1.49011
## 5    4 0.0442719 23.6833 2.78110 69.9228 1.19615 3.09856 2.39087 1.37072
## 6    5 0.0468835 23.6833 2.56889 70.9039 1.20635 3.26622 2.53408 1.07631
##     VmaxC VmaxClu  VmaxCvr   Cvtot_1.1   Cvtot_1.2   Cvtot_1.3   Cvtot_1.4
## 1 2.35516 14.5839 0.350967 2.64230e-05 9.82371e-06 6.70529e-06 5.03758e-06
## 2 2.06901 14.8821 0.350967 2.76765e-05 1.03053e-05 7.10958e-06 5.38696e-06
## 3 2.06901 19.4811 0.350967 2.57663e-05 9.16520e-06 6.37974e-06 4.89496e-06
## 4 1.62405 19.4811 0.350967 2.73915e-05 1.03333e-05 7.26184e-06 5.60817e-06
## 5 1.62405 17.8723 1.282590 2.48476e-05 8.99242e-06 6.15667e-06 4.66588e-06
## 6 1.62405 17.8723 0.774760 2.61512e-05 9.92997e-06 6.89118e-06 5.27809e-06
##     Cvtot_1.5
## 1 3.89105e-06
## 2 4.17727e-06
## 3 3.84259e-06
## 4 4.43175e-06
## 5 3.63895e-06
## 6 4.16262e-06

Tidy data with median and 95% confidence interval

vars <- names(X_setpts)
index <- which(vars == "Cvtot_1.1" | vars == "Cvtot_1.5")
X <- apply(X_setpts[index[1]:index[2]], 2, quantile,  c(0.5, 0.025, 0.975)) %>% t() * MW * 1000
colnames(X) <- c("median", "LCL", "UCL")
df <- as.data.frame(X)
x <- seq(0, 10, 1)
df$time <- c(4.0, 4.5, 5.0, 5.5, 6.0)
df

##              median       LCL       UCL time
## Cvtot_1.1 2.9692474 2.7107367 3.2761018  4.0
## Cvtot_1.2 1.0896262 0.9535288 1.2388556  4.5
## Cvtot_1.3 0.7416141 0.6716165 0.8178252  5.0
## Cvtot_1.4 0.5482596 0.4958690 0.6060427  5.5
## Cvtot_1.5 0.4136487 0.3565374 0.4790795  6.0

Plot result

y <- c(0.0, 0.15, 2.32, 4.33, 4.61, 6.68, 7.89, 7.13, 7.27, 9.4, 10.0)
ggplot(df, aes(x = time, y = median)) +
    geom_ribbon(aes(ymin = LCL, ymax = UCL), fill = "grey70", alpha = 0.5) + 
    geom_line() +
    geom_point(data = exp_data, aes(x = data_t, y = data_c)) +
    geom_errorbar(data = exp_data, aes(x = data_t, y=data_c, ymin=data_c-sd, ymax=data_c+sd), width=.05) + 
    labs(x = "Time (hr)", y = "Concentration (mg/L)")

Software package

Check the software package version that will be used in this example.

devtools::session_info()

## ─ Session info ──────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.0 (2019-04-26)
##  os       Ubuntu 18.04.2 LTS          
##  system   x86_64, linux-gnu           
##  ui       X11                         
##  language en_US                       
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2019-05-29                  
## 
## ─ Packages ──────────────────────────────────────────────────────────────
##  package     * version date       lib source                          
##  assertthat    0.2.1   2019-03-21 [1] CRAN (R 3.6.0)                  
##  backports     1.1.4   2019-04-10 [1] CRAN (R 3.6.0)                  
##  bayesplot   * 1.7.0   2019-05-23 [1] CRAN (R 3.6.0)                  
##  boot          1.3-20  2017-07-30 [4] CRAN (R 3.5.0)                  
##  broom         0.5.2   2019-04-07 [1] CRAN (R 3.6.0)                  
##  callr         3.2.0   2019-03-15 [1] CRAN (R 3.6.0)                  
##  cellranger    1.1.0   2016-07-27 [1] CRAN (R 3.6.0)                  
##  cli           1.1.0   2019-03-19 [1] CRAN (R 3.6.0)                  
##  colorspace    1.4-1   2019-03-18 [1] CRAN (R 3.6.0)                  
##  corrplot    * 0.84    2017-10-16 [1] CRAN (R 3.6.0)                  
##  crayon        1.3.4   2017-09-16 [1] CRAN (R 3.6.0)                  
##  data.table    1.12.2  2019-04-07 [1] CRAN (R 3.6.0)                  
##  desc          1.2.0   2018-05-01 [1] CRAN (R 3.6.0)                  
##  devtools      2.0.2   2019-04-08 [1] CRAN (R 3.6.0)                  
##  digest        0.6.19  2019-05-20 [1] CRAN (R 3.6.0)                  
##  dplyr       * 0.8.1   2019-05-14 [1] CRAN (R 3.6.0)                  
##  evaluate      0.13    2019-02-12 [1] CRAN (R 3.6.0)                  
##  forcats     * 0.4.0   2019-02-17 [1] CRAN (R 3.6.0)                  
##  fs            1.2.7   2019-03-19 [1] CRAN (R 3.6.0)                  
##  generics      0.0.2   2018-11-29 [1] CRAN (R 3.6.0)                  
##  getPass       0.2-2   2017-07-21 [1] CRAN (R 3.6.0)                  
##  ggplot2     * 3.1.1   2019-04-07 [1] CRAN (R 3.6.0)                  
##  ggridges      0.5.1   2018-09-27 [1] CRAN (R 3.6.0)                  
##  glue          1.3.1   2019-03-12 [1] CRAN (R 3.6.0)                  
##  gridExtra     2.3     2017-09-09 [1] CRAN (R 3.6.0)                  
##  gtable        0.3.0   2019-03-25 [1] CRAN (R 3.6.0)                  
##  haven         2.1.0   2019-02-19 [1] CRAN (R 3.6.0)                  
##  hexbin      * 1.27.3  2019-05-14 [1] CRAN (R 3.6.0)                  
##  hms           0.4.2   2018-03-10 [1] CRAN (R 3.6.0)                  
##  htmltools     0.3.6   2017-04-28 [1] CRAN (R 3.6.0)                  
##  httr          1.4.0   2018-12-11 [1] CRAN (R 3.6.0)                  
##  inline        0.3.15  2018-05-18 [1] CRAN (R 3.6.0)                  
##  jsonlite      1.6     2018-12-07 [1] CRAN (R 3.6.0)                  
##  knitr         1.22    2019-03-08 [1] CRAN (R 3.6.0)                  
##  labeling      0.3     2014-08-23 [1] CRAN (R 3.6.0)                  
##  lattice       0.20-38 2018-11-04 [1] CRAN (R 3.6.0)                  
##  lazyeval      0.2.2   2019-03-15 [1] CRAN (R 3.6.0)                  
##  loo           2.1.0   2019-03-13 [1] CRAN (R 3.6.0)                  
##  lubridate     1.7.4   2018-04-11 [1] CRAN (R 3.6.0)                  
##  magrittr      1.5     2014-11-22 [1] CRAN (R 3.6.0)                  
##  matrixStats   0.54.0  2018-07-23 [1] CRAN (R 3.6.0)                  
##  memoise       1.1.0   2017-04-21 [1] CRAN (R 3.6.0)                  
##  modelr        0.1.4   2019-02-18 [1] CRAN (R 3.6.0)                  
##  munsell       0.5.0   2018-06-12 [1] CRAN (R 3.6.0)                  
##  nlme          3.1-140 2019-05-12 [4] CRAN (R 3.6.0)                  
##  pillar        1.4.0   2019-05-11 [1] CRAN (R 3.6.0)                  
##  pkgbuild      1.0.3   2019-03-20 [1] CRAN (R 3.6.0)                  
##  pkgconfig     2.0.2   2018-08-16 [1] CRAN (R 3.6.0)                  
##  pkgload       1.0.2   2018-10-29 [1] CRAN (R 3.6.0)                  
##  pksensi     * 1.1.1   2019-05-24 [1] Github (nanhung/pksensi@a728559)
##  plyr          1.8.4   2016-06-08 [1] CRAN (R 3.6.0)                  
##  prettyunits   1.0.2   2015-07-13 [1] CRAN (R 3.6.0)                  
##  processx      3.3.0   2019-03-10 [1] CRAN (R 3.6.0)                  
##  ps            1.3.0   2018-12-21 [1] CRAN (R 3.6.0)                  
##  purrr       * 0.3.2   2019-03-15 [1] CRAN (R 3.6.0)                  
##  R6            2.4.0   2019-02-14 [1] CRAN (R 3.6.0)                  
##  Rcpp          1.0.1   2019-03-17 [1] CRAN (R 3.6.0)                  
##  readr       * 1.3.1   2018-12-21 [1] CRAN (R 3.6.0)                  
##  readxl        1.3.1   2019-03-13 [1] CRAN (R 3.6.0)                  
##  remotes       2.0.4   2019-04-10 [1] CRAN (R 3.6.0)                  
##  reshape       0.8.8   2018-10-23 [1] CRAN (R 3.6.0)                  
##  reshape2      1.4.3   2017-12-11 [1] CRAN (R 3.6.0)                  
##  rlang         0.3.4   2019-04-07 [1] CRAN (R 3.6.0)                  
##  rmarkdown     1.12    2019-03-14 [1] CRAN (R 3.6.0)                  
##  rprojroot     1.3-2   2018-01-03 [1] CRAN (R 3.6.0)                  
##  rstan       * 2.18.2  2018-11-07 [1] CRAN (R 3.6.0)                  
##  rstudioapi    0.10    2019-03-19 [1] CRAN (R 3.6.0)                  
##  rvest         0.3.3   2019-04-11 [1] CRAN (R 3.6.0)                  
##  scales        1.0.0   2018-08-09 [1] CRAN (R 3.6.0)                  
##  sensitivity * 1.15.2  2018-09-02 [1] CRAN (R 3.6.0)                  
##  sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 3.6.0)                  
##  StanHeaders * 2.18.1  2019-01-28 [1] CRAN (R 3.6.0)                  
##  stringi       1.4.3   2019-03-12 [1] CRAN (R 3.6.0)                  
##  stringr     * 1.4.0   2019-02-10 [1] CRAN (R 3.6.0)                  
##  testthat      2.1.1   2019-04-23 [1] CRAN (R 3.6.0)                  
##  tibble      * 2.1.1   2019-03-16 [1] CRAN (R 3.6.0)                  
##  tidyr       * 0.8.3   2019-03-01 [1] CRAN (R 3.6.0)                  
##  tidyselect    0.2.5   2018-10-11 [1] CRAN (R 3.6.0)                  
##  tidyverse   * 1.2.1   2017-11-14 [1] CRAN (R 3.6.0)                  
##  usethis       1.5.0   2019-04-07 [1] CRAN (R 3.6.0)                  
##  withr         2.1.2   2018-03-15 [1] CRAN (R 3.6.0)                  
##  xfun          0.6     2019-04-02 [1] CRAN (R 3.6.0)                  
##  xml2          1.2.0   2018-01-24 [1] CRAN (R 3.6.0)                  
##  yaml          2.2.0   2018-07-25 [1] CRAN (R 3.6.0)                  
## 
## [1] /home/nanhung/R/x86_64-pc-linux-gnu-library/3.6
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library

mcsim_version()

## The current GNU MCSim version is 6.1.0

Tutorial 3 - Markov chain Monte Carlo Calibration

Nan-Hung Hsieh

2019/05/23 (update: 2019-05-29)