# sample 10,000 draws from Beta dist with shape (45,55)
prior_A <- rbeta(n=10000, shape1 = 45, shape2 = 55)
# store results in data frame
prior_sim <- data.frame(prior_A)
# construct density plot of prior sample
ggplot(prior_sim, aes(x=prior_A))+ geom_density()
# Sample 10000 draws from the Beta(1,1) prior
prior_B <- rbeta(n = 10000, shape1 = 1, shape2 = 1)
# Sample 10000 draws from the Beta(100,100) prior
prior_C <- rbeta(n = 10000, shape1 = 100, shape2 = 100)
# Combine the results in a single data frame
prior_sim <- data.frame(samples = c(prior_A, prior_B, prior_C),
priors = rep(c("A","B","C"), each = 10000))
# Plot the 3 priors
ggplot(prior_sim, aes(x = samples, fill = priors)) +
geom_density(alpha = 0.5)
\[ X \sim Bin(n, p) \]
# Define a vector of 1000 p values
p_grid <- seq(from =0, to = 1, length.out = 1000)
# Simulate 1 poll result for each p in p_grid
## we want to simulate the probability of success for 10 respondents from sample n in p_grid
poll_result <- rbinom(1000, 10, prob=p_grid)
# Create likelihood_sim data frame
likelihood_sim <- data.frame(p_grid, poll_result)
# Density plots of p_grid grouped by poll_result
ggplot(likelihood_sim, aes(x = p_grid, y = poll_result, group = poll_result)) +
geom_density_ridges()
## Picking joint bandwidth of 0.0413
ggplot(likelihood_sim, aes(x = 6, y = poll_result, group = poll_result,
fill = poll_result==6)) + geom_density_ridges()
## Picking joint bandwidth of 2.2
\[ prior: p \sim Beta(45,55) \]
\[ likelihood: X \sim(10,p) \]
vote_model <- "model{
# likelihood model for X
X ~ dbin(p, n)
# Prior model for p
p ~ dbeta(a,b)
}"
vote_jags <- jags.model(textConnection(vote_model),
data=list(a=45, b=55, X=6, n=10),
# inits function ensures reproducibility of results
inits = list(.RNG.name="base::Wichmann-Hill",
.RNG.seed=100))
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1
## Unobserved stochastic nodes: 1
## Total graph size: 5
##
## Initializing model
vote_sim <- coda.samples(model=vote_jags,
variable.names=c("p"),
n.iter=10000)
# plot simulated posterior
plot(vote_sim, trace=FALSE)
vote_jags <- jags.model(textConnection(vote_model),
data=list(a=1, b=1, X=6, n=10),
# inits function ensures reproducibility of results
inits = list(.RNG.name="base::Wichmann-Hill",
.RNG.seed=100))
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1
## Unobserved stochastic nodes: 1
## Total graph size: 5
##
## Initializing model
vote_sim <- coda.samples(model=vote_jags,
variable.names=c("p"),
n.iter=10000)
# plot simulated posterior
plot(vote_sim, trace=FALSE, xlim=c(0,1), ylim=c(0,18))
vote_jags <- jags.model(textConnection(vote_model),
data=list(a=1, b=1, X=220, n=400),
# inits function ensures reproducibility of results
inits = list(.RNG.name="base::Wichmann-Hill",
.RNG.seed=100))
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1
## Unobserved stochastic nodes: 1
## Total graph size: 5
##
## Initializing model
vote_sim <- coda.samples(model=vote_jags,
variable.names=c("p"),
n.iter=10000)
# plot simulated posterior
plot(vote_sim, trace=FALSE, xlim=c(0,1), ylim=c(0,18))
\[ \mu \sim N(50, 25^2) \]
\[ \sigma^2 \sim N(0,200) \]
# Take 10000 samples from the m prior
prior_m <- rnorm(10000, mean=50, sd=25)
# Take 10000 samples from the s prior
prior_s <- runif(10000, min=0, max=200)
# Store samples in a data frame
samples <- data.frame(prior_m, prior_s)
# Density plots of the prior_m & prior_s samples
ggplot(samples, aes(x = prior_m)) +
geom_density()
ggplot(samples, aes(x = prior_s)) +
geom_density()
# enter data
subject <- c(308, 309, 310, 330, 331, 332,333, 334, 335, 337, 349, 350,
351, 352, 369, 370, 371, 372)
day_0 <- c(249.5600, 222.7339, 199.0539, 321.5426, 287.6079, 234.8606,
283.8424, 265.4731, 241.6083, 312.3666, 236.1032, 256.2968,
250.5265, 221.6771, 271.9235, 225.2640, 269.8804, 269.4117)
day_3 <- c(321.4398, 204.7070, 232.8416, 285.1330, 320.1153, 309.768,
299.8097, 254.6723, 270.8021, 346.1222, 254.9220, 255.5271,
280.5891, 346.8555, 277.6566, 240.4730, 281.7895, 310.6316)
sleep_study <- data.frame(subject, day_0, day_3)
# Check out the first 6 rows of sleep_study
sleep_study %>% dplyr::select(day_0, day_3) %>% head(6)
## day_0 day_3
## 1 249.5600 321.4398
## 2 222.7339 204.7070
## 3 199.0539 232.8416
## 4 321.5426 285.1330
## 5 287.6079 320.1153
## 6 234.8606 309.7680
# Define diff_3
sleep_study <- sleep_study %>%
mutate(diff_3=day_3-day_0)
# Histogram of diff_3
ggplot(sleep_study, aes(x = diff_3)) +
geom_histogram(binwidth = 20, color = "white")
# Mean and standard deviation of diff_3
sleep_study %>%
summarize(mean(diff_3), sd(diff_3))
## mean(diff_3) sd(diff_3)
## 1 26.34016 37.20758
sleep_model <- "model{
# likelihood model for Y[i]
for(i in 1:length(Y)){
Y[i] ~ dnorm(m, s^(-2))
}
# prior models for m and s
m ~ dnorm(50, 25^(-2))
s ~ dunif(0, 200)
}"
sleep_jags <- jags.model(
textConnection(sleep_model),
data = list(Y = sleep_study$diff_3),
inits = list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 1989)
)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 18
## Unobserved stochastic nodes: 2
## Total graph size: 28
##
## Initializing model
sleep_sim <- coda.samples(model=sleep_jags,
variable.names = c("m", "s"),
n.iter = 10000)
# PLOT the posterior
plot(sleep_sim, trace = FALSE)
# Check out the head of sleep_sim
head(sleep_sim)
## [[1]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001
## End = 1007
## Thinning interval = 1
## m s
## [1,] 17.25793 31.46256
## [2,] 34.58465 37.88655
## [3,] 36.45476 39.58056
## [4,] 25.00967 39.69494
## [5,] 29.95471 35.90001
## [6,] 28.43890 37.46466
## [7,] 38.32424 35.44081
##
## attr(,"class")
## [1] "mcmc.list"
# Store the chains in a data frame
sleep_chains <- data.frame(sleep_sim[[1]], iter = 1:10000)
# Check out the head of sleep_chains
head(sleep_chains)
## m s iter
## 1 17.25793 31.46256 1
## 2 34.58465 37.88655 2
## 3 36.45476 39.58056 3
## 4 25.00967 39.69494 4
## 5 29.95471 35.90001 5
## 6 28.43890 37.46466 6
# Use plot() to construct trace plots of the m and s chains
plot(sleep_sim, density=FALSE)
# Use ggplot() to construct a trace plot of the m chain
ggplot(sleep_chains, aes(x = iter, y = m)) +
geom_line()
# Trace plot the first 100 iterations of the m chain
ggplot(sleep_chains[1:100,], aes(x = iter, y = m)) +
geom_line()
# Use plot() to construct density plots of the m and s chains
plot(sleep_sim, trace=FALSE)
# Use ggplot() to construct a density plot of the m chain
ggplot(sleep_chains, aes(x = m)) +
geom_density()
# COMPILE the model
sleep_jags_multi <- jags.model(textConnection(sleep_model),
data = list(Y = sleep_study$diff_3),
n.chains = 4 ) # setting 4 chains
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 18
## Unobserved stochastic nodes: 2
## Total graph size: 28
##
## Initializing model
# SIMULATE the posterior
sleep_sim_multi <- coda.samples(model = sleep_jags_multi,
variable.names = c("m", "s"), n.iter = 1000)
# Check out the head of sleep_sim_multi
head(sleep_sim_multi)
## [[1]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001
## End = 1007
## Thinning interval = 1
## m s
## [1,] 24.49748 30.29383
## [2,] 20.50823 35.55945
## [3,] 30.83271 34.76266
## [4,] 39.57329 38.54255
## [5,] 41.34857 38.21380
## [6,] 24.69495 39.48386
## [7,] 32.24674 33.13512
##
## [[2]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001
## End = 1007
## Thinning interval = 1
## m s
## [1,] 28.67653 37.31694
## [2,] 32.90644 39.04578
## [3,] 25.81167 39.68650
## [4,] 10.14015 39.94096
## [5,] 25.11850 47.07013
## [6,] 38.61209 35.23329
## [7,] 31.84079 34.34093
##
## [[3]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001
## End = 1007
## Thinning interval = 1
## m s
## [1,] 25.30581 45.58583
## [2,] 27.72148 44.33929
## [3,] 20.24952 59.45800
## [4,] 38.61860 64.96132
## [5,] 49.82786 62.94610
## [6,] 25.29672 58.27316
## [7,] 35.48307 33.01691
##
## [[4]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001
## End = 1007
## Thinning interval = 1
## m s
## [1,] 28.92389 39.34942
## [2,] 33.85626 43.53444
## [3,] 33.54002 43.77411
## [4,] 25.05875 40.07052
## [5,] 24.34086 33.54599
## [6,] 37.55552 41.75491
## [7,] 17.62291 49.34885
##
## attr(,"class")
## [1] "mcmc.list"
# Construct trace plots of the m and s chains
plot(sleep_sim_multi, density=FALSE)
# SIMULATE the posterior
sleep_sim_1 <- coda.samples(model = sleep_jags, variable.names = c("m", "s"), n.iter = 1000)
# Summarize the m and s chains of sleep_sim_1
summary(sleep_sim_1)
##
## Iterations = 11001:12000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 1000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## m 29.55 9.032 0.2856 0.2856
## s 40.81 7.654 0.2420 0.3414
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## m 12.00 23.62 29.31 35.58 47.48
## s 28.34 35.25 39.97 45.41 58.60
# RE-SIMULATE the posterior
sleep_sim_2 <- coda.samples(model = sleep_jags, variable.names = c("m", "s"), n.iter = 10000)
# Summarize the m and s chains of sleep_sim_2
summary(sleep_sim_2)
##
## Iterations = 12001:22000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## m 29.32 9.069 0.09069 0.09396
## s 40.14 7.508 0.07508 0.11514
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## m 11.83 23.24 29.27 35.22 47.55
## s 28.60 34.87 39.07 44.30 57.78
# COMPILE the model
sleep_jags_multi <- jags.model(textConnection(sleep_model),
data = list(Y = sleep_study$diff_3),
n.chains = 4,
inits = list(.RNG.name = "base::Wichmann-Hill",
.RNG.seed = 1989))
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 18
## Unobserved stochastic nodes: 2
## Total graph size: 28
##
## Initializing model
sleep_sim_3 <- coda.samples(model = sleep_jags, variable.names = c("m", "s"),
n.iter = 10000)
# Summarize the m and s chains of sleep_sim_2
summary(sleep_sim_3)
##
## Iterations = 22001:32000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## m 29.25 8.929 0.08929 0.08939
## s 40.14 7.378 0.07378 0.11203
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## m 11.94 23.19 29.10 35.08 47.15
## s 28.68 34.80 39.17 44.40 57.28
\[ Y_i \sim N(m_i, s^2) \] \[ m_i=a+bX_i \]
\[ a \sim N(0, 200^2) \]
\[ b \sim N(1, 0.5^2) \]
\[ s \sim Unif(0, 20) \]
# Take 10000 samples from the a, b, & s priors
a <- rnorm(10000, mean=0, sd=200)
b <- rnorm(10000, mean=1, sd=.5)
s <- runif(10000, min=0, max=20)
# Store samples in a data frame
samples <- data.frame(set = 1:10000, a, b, s)
# Construct density plots of the prior samples
ggplot(samples, aes(x = a)) +
geom_density()
ggplot(samples, aes(x = b)) +
geom_density()
ggplot(samples, aes(x = s)) +
geom_density()
# Replicate the first 12 parameter sets 50 times each
prior_scenarios_rep <- bind_rows(replicate(n = 50, expr = samples[1:12, ],
simplify = FALSE))
# Simulate 50 height & weight data points for each parameter set
prior_simulation <- prior_scenarios_rep %>%
mutate(height = rnorm(n = 600, mean = 170, sd = 10)) %>%
mutate(weight = rnorm(n = 600, mean = a + b * height, sd = s))
# Plot the simulated data & regression model for each parameter set
ggplot(prior_simulation, aes(x = height, y = weight)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, size = 0.75) +
facet_wrap(~ set)
weight_model <- "model{
#likelihood model for Y[i]
for(i in 1:length(Y)) {
Y[i] ~ dnorm(m[i], s^(-2))
m[i] <- a + b*X[i]
}
# prior models for a, b, s
a ~ dnorm(0, 200^(-2))
b ~ dnorm(1, 0.5^(-2))
s ~ dunif(0, 20)
}
"
weight_jags <- jags.model(textConnection(weight_model),
data=list(X=bdims$hgt, Y=bdims$wgt),
inits = list(.RNG.name="base::Wichmann-Hill",
.RNG.seed=2018))
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 507
## Unobserved stochastic nodes: 3
## Total graph size: 1321
##
## Initializing model
dim(bdims)
## [1] 507 25
head(bdims$hgt)
## [1] 174.0 175.3 193.5 186.5 187.2 181.5
head(bdims$wgt)
## [1] 65.6 71.8 80.7 72.6 78.8 74.8
weight_sim <- coda.samples(model=weight_jags,
variable.names = c("a", "b", "s"),
n.iter=10000)
plot(weight_sim)
weight_sim <- coda.samples(model=weight_jags,
variable.names = c("a", "b", "s"),
n.iter=100000, n.thin = 100, n.burnin=10000, nchain=4)
plot(weight_sim)
summary(weight_sim)
##
## Iterations = 11001:111000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 1e+05
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## a -104.308 7.9352 0.0250935 0.673047
## b 1.014 0.0463 0.0001464 0.003925
## s 9.331 0.2950 0.0009328 0.001213
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## a -119.6373 -109.728 -104.421 -99.077 -88.410
## b 0.9208 0.983 1.014 1.045 1.103
## s 8.7762 9.129 9.322 9.524 9.934
table(weight_sim[[1]][,2] > 1.1)
##
## FALSE TRUE
## 97150 2850
mean(weight_sim[[1]][,2] > 1.1)
## [1] 0.0285
# Summarize the posterior Markov chains
summary(weight_sim)
##
## Iterations = 11001:111000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 1e+05
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## a -104.308 7.9352 0.0250935 0.673047
## b 1.014 0.0463 0.0001464 0.003925
## s 9.331 0.2950 0.0009328 0.001213
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## a -119.6373 -109.728 -104.421 -99.077 -88.410
## b 0.9208 0.983 1.014 1.045 1.103
## s 8.7762 9.129 9.322 9.524 9.934
# Calculate the estimated posterior mean of b
mean(weight_sim[[1]][,2])
## [1] 1.013526
# Plot the posterior mean regression model
ggplot(bdims, aes(x = hgt, y = wgt)) +
geom_point() +
geom_abline(intercept = mean(weight_sim[[1]][,1]),
slope = mean(weight_sim[[1]][,2]), color = "red")
# Visualize the range of 20 posterior regression models
ggplot(bdims, aes(x = hgt, y = wgt)) +
geom_point() +
geom_abline(intercept = weight_sim[[1]][,1][1:20],
slope = weight_sim[[1]][,2][1:20], color = "gray", size = 0.25)
# Calculate the 90% posterior credible interval for b
ci_90 <- quantile(weight_sim[[1]][,2], probs = c(0.05, .950))
ci_90
## 5% 95%
## 0.9363988 1.0861830
b <- weight_sim[[1]][,2]
b <- data.frame(b)
# Mark the 90% credible interval
ggplot(b, aes(x = var1)) +
geom_density() +
geom_vline(xintercept = ci_90, color = "red")
# Mark the 90% credible interval
ci_95 <- quantile(b$var1, probs = c(0.250, .975))
ci_90 <- quantile(b$var1, probs = c(0.050, .950))
mean(b$var1 > 1.1)
## [1] 0.0285
ggplot(b, aes(x = var1)) +
geom_density() +
geom_vline(xintercept = ci_95, color = "darkblue") +
geom_vline(xintercept = ci_90, color = "darkgreen") +
geom_vline(xintercept = 1.1, color = "red")
\[ Y_i = -105.959 + 1.017X_i \]
-105.959 + 1.017*180 # mean weight among persons of 180cm
## [1] 77.101
weight_sim_data <- data.frame(weight_sim[[1]][,1], weight_sim[[1]][,2],
weight_sim[[1]][,3])
colnames(weight_sim_data) <- c("a", "b", "s")
weight_sim_data <- weight_sim_data %>% mutate(m_180=a+b*180)
mean(weight_sim_data$m_180) # mean weight among persons of 180cm
## [1] 78.12649
sd(weight_sim_data$m_180)
## [1] 0.5840351
ggplot(weight_sim_data, aes(x=m_180)) +
geom_density() + geom_vline(xintercept = mean(weight_sim_data$m_180),
color = "red")
ci_95 <- quantile(weight_sim_data$m_180, c(.0250, .975))
ci_95
## 2.5% 97.5%
## 76.97520 79.27133
ggplot(weight_sim_data, aes(x=m_180)) +
geom_density() + geom_vline(xintercept = mean(weight_sim_data$m_180),
color = "red") +
geom_vline(xintercept = ci_95, color = "darkgreen")
\[ Y \sim N(m_{180}, s^2) \] \[ m_{180} = a + b*180 \]
head( weight_sim_data, 1)
## a b s m_180
## 1 -98.73789 0.9817596 8.990471 77.97883
set.seed(2000)
rnorm(n=1, mean=77.44675, sd = 9.652081)
## [1] 69.20539
# Simulate 1 prediction under the first parameter set
rnorm(n = 1, mean = weight_sim_data$m_180[1], sd = weight_sim_data$s[1])
## [1] 74.80619
# Simulate 1 prediction under the second parameter set
rnorm(n = 1, mean = weight_sim_data$m_180[2], sd = weight_sim_data$s[2])
## [1] 86.55272
# Simulate & store 1 prediction under each parameter set
weight_sim_data <- weight_sim_data %>%
mutate(Y_180 = rnorm(n = 100000, mean = m_180, sd = s))
# Print the first 6 parameter sets & predictions
head(weight_sim_data)
## a b s m_180 Y_180
## 1 -98.73789 0.9817596 8.990471 77.97883 93.63382
## 2 -99.12228 0.9837447 9.662376 77.95176 86.80325
## 3 -98.62796 0.9844134 9.544523 78.56646 67.33636
## 4 -99.63689 0.9886564 9.332551 78.32126 60.23069
## 5 -99.51294 0.9899063 9.131669 78.67019 97.79537
## 6 -100.02678 0.9857338 9.151524 77.40530 79.46874
# Construct a posterior credible interval for the prediction
ci_180 <- quantile(weight_sim_data$Y_180, probs = c(.0250, .975))
ci_180
## 2.5% 97.5%
## 59.70542 96.35238
# Construct a density plot of the posterior predictions
ggplot(weight_sim_data, aes(x = Y_180)) +
geom_density() +
geom_vline(xintercept = ci_180, color = "red")
# Visualize the credible interval on a scatterplot of the data
ggplot(bdims, aes(x = hgt, y = wgt)) +
geom_point() +
geom_abline(intercept = mean(weight_sim_data$a), slope = mean(weight_sim_data$b),
color = "red") +
geom_segment(x = 180, xend = 180, y = ci_180[1], yend = ci_180[2], color = "red")
\(Y_i\) = trail volume (# of users) on day \(i\) \(X_i\) = 1 for weekday, 0 for weekends
\[ Y_i \sim N(m_i, s^2) \]
RailTrail %>% dplyr::select(volume, weekday) %>% head()
## volume weekday
## 1 501 TRUE
## 2 419 TRUE
## 3 397 TRUE
## 4 385 FALSE
## 5 200 TRUE
## 6 375 TRUE
class(RailTrail$weekday)
## [1] "logical"
RailTrail <- RailTrail %>% mutate(weekday=as.factor(weekday))
class(RailTrail$weekday)
## [1] "factor"
# Confirm that weekday is a factor variable
class(RailTrail$weekday)
## [1] "factor"
# Construct a density plot of volume by weekday
ggplot(RailTrail, aes(x = volume, fill = weekday)) +
geom_density(alpha = 0.5)
rail_model_1 <- "model{
# likelihood model for Y[i]
for (i in 1:length(Y)) {
Y[i] ~ dnorm(m[i], s^(-2))
m[i] <- a + b[X[i]]
}
# Prior models for a, b, s
a ~ dnorm(400, 100^(-2))
s ~ dunif(0, 200)
# scalling categorical variable
b[1] <- 0
# weekday value
b[2] ~ dnorm(0, 200^(-2))
}"
rail_jags_1 <- jags.model(
textConnection(rail_model_1),
data = list(Y = RailTrail$volume, X = RailTrail$weekday),
inits = list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 10)
)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 90
## Unobserved stochastic nodes: 3
## Total graph size: 194
##
## Initializing model
rail_sim_1 <- coda.samples(model = rail_jags_1,
variable.names = c("a", "b", "s"), n.iter = 10000)
# Store the chains in a data frame
rail_chains_1 <- data.frame(rail_sim_1[[1]])
# PLOT the posterior
plot(rail_sim_1)
summary(rail_sim_1)
##
## Iterations = 1001:11000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## a 427.9 23.331 0.23331 0.5445
## b[1] 0.0 0.000 0.00000 0.0000
## b[2] -77.0 28.120 0.28120 0.6553
## s 124.3 9.533 0.09533 0.1313
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## a 381.7 412.78 428.00 443.34 474.50
## b[1] 0.0 0.00 0.00 0.00 0.00
## b[2] -132.9 -95.77 -76.77 -58.92 -20.61
## s 107.7 117.45 123.74 130.40 144.69
# Construct a chain of values for the typical weekday volume
rail_chains_1 <- rail_chains_1 %>%
mutate(weekday_mean = a + b.2.)
# Construct a density plot of the weekday chain
ggplot(rail_chains_1, aes(x = weekday_mean)) +
geom_density()
# 95% credible interval for typical weekday volume
ci_95 <- quantile(rail_chains_1$weekday_mean, c(.0250, .975))
ci_95
## 2.5% 97.5%
## 319.8023 381.3127
ggplot(data=RailTrail, aes(x=hightemp, y=volume, color=weekday)) +
geom_point()
\[ Y_i \sim N(m_i, s^2) \]
\[ m_i = a + bX_i + cZ_i \]
ggplot(data=RailTrail, aes(x=hightemp, y=volume, color=weekday)) +
geom_point() + geom_smooth(method=lm, se=FALSE, fullrange=FALSE)
\[ b \sim N(0, 200^2) \]
\[ c \sim N(0, 20^2) \]
\[ s \sim Unif(0, 200) \]
rail_model_2 <- "model{
# Likelihood model for Y[i]
for(i in 1:length(Y)) {
Y[i] ~ dnorm(m[i], s^(-2))
m[i] <- a + b[X[i]] + c * Z[i]
}
# Prior models for a, b, c, s
a ~ dnorm(0, 200^(-2))
b[1] <- 0
b[2] ~ dnorm(0, 200^(-2))
c ~ dnorm(0, 20^(-2))
s ~ dunif(0, 200)
}"
rail_jags_2 <- jags.model(textConnection(rail_model_2),
data = list(Y = RailTrail$volume, X = RailTrail$weekday,
Z = RailTrail$hightemp),
inits = list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 10))
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 90
## Unobserved stochastic nodes: 4
## Total graph size: 385
##
## Initializing model
rail_sim_2 <- coda.samples(model = rail_jags_2,
variable.names = c("a", "b", "c", "s"),
n.iter = 10000)
# Store the chains in a data frame
rail_chains_2 <- data.frame(rail_sim_2[[1]])
summary(rail_sim_2)
##
## Iterations = 1001:11000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## a 36.592 60.6238 0.606238 4.19442
## b[1] 0.000 0.0000 0.000000 0.00000
## b[2] -49.610 23.4930 0.234930 0.55520
## c 5.417 0.8029 0.008029 0.05849
## s 103.434 7.9418 0.079418 0.11032
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## a -83.443 -4.631 38.160 78.413 150.306
## b[1] 0.000 0.000 0.000 0.000 0.000
## b[2] -96.181 -65.468 -49.547 -33.633 -3.644
## c 3.865 4.865 5.402 5.965 7.007
## s 89.466 97.766 102.875 108.503 120.205
# plot the posterior
plot(rail_sim_2)
ggplot(RailTrail, aes(y = volume, x = hightemp, color = weekday)) +
geom_point() +
geom_abline(intercept = mean(rail_chains_2$a),
slope = mean(rail_chains_2$c), color = "red") +
geom_abline(intercept = mean(rail_chains_2$a) + mean(rail_chains_2$b.2.),
slope = mean(rail_chains_2$c), color = "turquoise3")
\[ Y \sim Pois(l) \]
\[ Y_i \sim Pois(l_i) ~where~ l_i > 0 \]
\[ log(l_i)= a +bX_i + cZ_i \]
-We can model the log linear trend in \(Y\) by the linear combination of x and z, so that we do not have negative values. \[ l_i=e^{a+bX_i+cZ_i} \]
\[ a \sim N(0, 200^2) \]
\[ b \sim N(0, 2^2) \]
\[ c \sim N(0, 2^2) \]
poisson_model <- "model{
# Likelihood model for Y[i]
for(i in 1:length(Y)) {
Y[i] ~ dpois(l[i])
log(l[i]) <- a +b[X[i]] + c*Z[i]
}
# Prior models for a, b, c, s
a ~ dnorm(0, 200^(-2))
b[1] <- 0
b[2] ~ dnorm(0, 2^(-2))
c ~ dnorm(0, 2^(-2))
}"
poisson_jags <- jags.model(textConnection(poisson_model),
data = list(Y = RailTrail$volume, X = RailTrail$weekday,
Z = RailTrail$hightemp),
inits = list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 10))
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 90
## Unobserved stochastic nodes: 3
## Total graph size: 441
##
## Initializing model
poisson_sim <- coda.samples(model = poisson_jags,
variable.names = c("a", "b", "c"), n.iter = 10000)
# Store the chains in a data frame
poisson_chains <- data.frame(poisson_sim[[1]])
summary(poisson_sim)
##
## Iterations = 1001:11000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## a 5.01352 0.0329264 3.293e-04 3.281e-03
## b[1] 0.00000 0.0000000 0.000e+00 0.000e+00
## b[2] -0.12800 0.0115978 1.160e-04 3.518e-04
## c 0.01426 0.0004225 4.225e-06 4.121e-05
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## a 4.94808 4.9932 5.01332 5.03295 5.08310
## b[1] 0.00000 0.0000 0.00000 0.00000 0.00000
## b[2] -0.15104 -0.1359 -0.12807 -0.12016 -0.10524
## c 0.01338 0.0140 0.01426 0.01452 0.01509
# plot the posterior
plot(rail_sim_2)
ggplot(RailTrail, aes(y = volume, x = hightemp, color = weekday)) +
geom_point() +
stat_function(fun = function(x){exp(mean(poisson_chains$a) +
mean(poisson_chains$c) * x)},
color = "red") +
stat_function(fun = function(x){exp(mean(poisson_chains$a) +
mean(poisson_chains$b.2.) +
mean(poisson_chains$c) * x)},
color = "turquoise3")
poisson_chains <- poisson_chains %>%
mutate(l_weekend = exp(a + c*80)) %>%
mutate(l_weekday = exp(a + b.2. + c*80))
mean(poisson_chains$l_weekend)
## [1] 470.7265
mean(poisson_chains$l_weekday)
## [1] 414.1674
# Construct a 95% CI for typical volume on 80 degree weekend
ci_95_weekend <- quantile(poisson_chains$l_weekend, c(.0250, .975))
ci_95_weekend
## 2.5% 97.5%
## 462.1431 479.3101
# Construct a 95% CI for typical volume on 80 degree weekday
ci_95_weekday <- quantile(poisson_chains$l_weekday, c(.0250, .975))
ci_95_weekday
## 2.5% 97.5%
## 407.4644 420.7102
poisson_chains <- poisson_chains %>%
mutate(Y_weekday = rpois(n = 10000, lambda = l_weekday))
# Construct a density plot of the posterior weekday predictions
ggplot(poisson_chains, aes(x = Y_weekday)) +
geom_density()
# Posterior probability that weekday volume is less 400
mean(poisson_chains$Y_weekday < 400)
## [1] 0.247