# Parameters
theta <- 0.5
n <- 10
k <- 7
# Compute binomial probability
prob <- choose(n, k) * theta^k * (1 - theta)^(n - k)
prob
## [1] 0.1171875
# Plot full binomial distribution for comparison
k_vals <- 0:n
probs <- dbinom(k_vals, size = n, prob = theta)
library(ggplot2)
ggplot(data.frame(k = k_vals, prob = probs), aes(x = k, y = prob)) +
geom_col(fill = "steelblue") +
geom_col(data = data.frame(k = 7, prob = prob), fill = "purple") +
labs(
title = "Binomial Probability: P(k | n = 10, θ = 0.5)",
subtitle = "Purple bar shows probability of 7 heads",
x = "Number of Heads (k)",
y = "Probability"
) +
theme_minimal()

# Observed data
k <- 7
n <- 10
# Grid of theta values
theta_vals <- seq(0, 1, length.out = 100)
# Compute likelihood (up to proportional constant)
likelihood_vals <- dbinom(k, size = n, prob = theta_vals)
# Normalize for plotting (optional)
likelihood_vals <- likelihood_vals / max(likelihood_vals)
# Plot
library(ggplot2)
ggplot(data.frame(theta = theta_vals, likelihood = likelihood_vals), aes(x = theta, y = likelihood)) +
geom_line(color = "purple", size = 1.2) +
geom_vline(xintercept = k/n, linetype = "dashed", color = "gray40") +
labs(
title = "Likelihood Function for θ Given 7 Heads in 10 Flips",
subtitle = "Dashed line shows MLE at θ = 0.7",
x = expression(theta),
y = "Relative Likelihood"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

library(tibble)
library(brms)
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.5.2
## Loading 'brms' package (version 2.23.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
##
## Attaching package: 'brms'
## The following object is masked from 'package:stats':
##
## ar
# observed data: 7 heads in 10 flips
data <- tibble(y = 7, n = 10)
fit <- brm(
y | trials(n) ~ 1,
data = data,
family = binomial(),
prior = prior(normal(0, 2), class = "Intercept"),
seed = 123,
iter = 2000, warmup = 1000, chains = 2,
refresh = 0
)
## Compiling Stan program...
## WARNING: Rtools is required to build R packages, but is not currently installed.
##
## Please download and install the appropriate version of Rtools for 4.5.1 from
## https://cran.r-project.org/bin/windows/Rtools/.
## Trying to compile a simple C file
## Running "C:/PROGRA~1/R/R-45~1.1/bin/x64/Rcmd.exe" SHLIB foo.c
## using C compiler: 'gcc.exe (GCC) 14.2.0'
## gcc -I"C:/PROGRA~1/R/R-45~1.1/include" -DNDEBUG -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/Rcpp/include/" -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/RcppEigen/include/" -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/RcppEigen/include/unsupported" -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/BH/include" -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/StanHeaders/include/src/" -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/StanHeaders/include/" -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/RcppParallel/include/" -DRCPP_PARALLEL_USE_TBB=1 -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include "C:/Users/orkne/AppData/Local/R/win-library/4.5/StanHeaders/include/stan/math/prim/fun/Eigen.hpp" -std=c++1y -I"C:/rtools45/x86_64-w64-mingw32.static.posix/include" -O2 -Wall -std=gnu2x -mfpmath=sse -msse2 -mstackrealign -c foo.c -o foo.o
## cc1.exe: warning: command-line option '-std=c++14' is valid for C++/ObjC++ but not for C
## In file included from C:/Users/orkne/AppData/Local/R/win-library/4.5/RcppEigen/include/Eigen/Core:19,
## from C:/Users/orkne/AppData/Local/R/win-library/4.5/RcppEigen/include/Eigen/Dense:1,
## from C:/Users/orkne/AppData/Local/R/win-library/4.5/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22,
## from <command-line>:
## C:/Users/orkne/AppData/Local/R/win-library/4.5/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: cmath: No such file or directory
## 679 | #include <cmath>
## | ^~~~~~~
## compilation terminated.
## make: *** [C:/PROGRA~1/R/R-45~1.1/etc/x64/Makeconf:289: foo.o] Error 1
## WARNING: Rtools is required to build R packages, but is not currently installed.
##
## Please download and install the appropriate version of Rtools for 4.5.1 from
## https://cran.r-project.org/bin/windows/Rtools/.
## Start sampling
summary(fit)
## Family: binomial
## Links: mu = logit
## Formula: y | trials(n) ~ 1
## Data: data (Number of observations: 1)
## Draws: 2 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 2000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.79 0.70 -0.56 2.18 1.00 707 846
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
theta_vals <- seq(0, 1, length.out = 100)
likelihood <- dbinom(7, size = 10, prob = theta_vals)
plot(theta_vals, likelihood, type = "l", lwd = 2, col = "steelblue",
xlab = expression(theta), ylab = "Likelihood",
main = "Likelihood of θ given 7 Heads in 10 Flips")
abline(v = 0.7, col = "red", lty = 2)

library(tidyr)
theta <- seq(0, 1, length.out = 500)
# Likelihood: Binomial PMF
likelihood <- dbinom(7, size = 10, prob = theta)
# Prior: Beta(1, 1) = Uniform
prior <- dbeta(theta, 1, 1)
# Unnormalized Posterior
posterior <- likelihood * prior
df <- tibble(theta, likelihood, prior, posterior)
df %>%
pivot_longer(-theta) %>%
ggplot(aes(theta, value, color = name)) +
geom_line(size = 1.2) +
labs(title = "Likelihood × Prior → Posterior (Unnormalized)",
x = expression(theta),
y = "Density",
color = "Component") +
scale_color_manual(values = c("likelihood" = "darkorange",
"prior" = "steelblue",
"posterior" = "purple"))

library(ggplot2)
posterior_samples <- as_draws_df(fit)
posterior_theta <- plogis(posterior_samples$b_Intercept) # convert from log-odds to probability
qplot(posterior_theta, bins = 40, fill = I("purple"), alpha = I(0.8)) +
labs(title = "Posterior Samples of θ from MCMC",
x = expression(theta),
y = "Frequency")
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# ------------------ FULL, SELF-CONTAINED CODE ------------------
# Action box helper (HTML)
action_box <- function(title, items, border = "#FF4500", bg = "#000000") { # purple border, light green bg
html <- paste0(
'<div style="border-left:6px solid ', border,
';padding:12px 14px;background:', bg,
';border-radius:10px;margin:14px 0 6px 0">',
'<h4 style="margin:0 0 8px 0;color:', border, ';font-weight:bold;">',
title, '</h4>',
'<ul style="margin:0;list-style-type:disc;color:white;">',
paste0("<li>", items, "</li>", collapse = ""),
"</ul></div>"
)
cat(html)
}
action_box(
title = "Action Items A: Coin Flip Example",
items = c(
"<b>What is the role of the likelihood in shaping the posterior for θ?</b><br>
<span style='margin-left:20px; color:#CCCCCC; font-style:italic;'>The likelihood shows how each θ fits the data 7 heads in 10 flips. It peaks near 0.7 and with the combination of a weak prior it determines the posterior .</span><br><br>",
"<b>How does the prior (Beta(1,1)) influence the posterior — or does it?</b><br>
<span style='margin-left:20px; color:#CCCCCC; font-style:italic;'>The Beta (1,1) had an error and it was replaced with a weak informed normal (0,2). It it almost flat so it has very little influence. </span><br><br>",
"<b>Based on the MCMC results, what range of θ values are most plausible?</b><br>
<span style='margin-left:20px; color:#CCCCCC; font-style:italic;'> θ value is between 0.56 and 0.88 with its center being near 0.7 .</span><br><br>",
"<b>How does this differ from simply reporting a single maximum likelihood estimate (θ̂ = 0.7)?</b><br>
<span style='margin-left:20px; color:#CCCCCC; font-style:italic;'>MLE gives the best value, while the posterior shows a range.</span>"
),
border = "#FF4500",
bg = "#000000"
)
Action Items A: Coin Flip Example
-
What is the role of the likelihood in shaping the posterior for
θ?
The
likelihood shows how each θ fits the data 7 heads in 10 flips. It peaks
near 0.7 and with the combination of a weak prior it determines the
posterior .
-
How does the prior (Beta(1,1)) influence the posterior — or does
it?
The Beta
(1,1) had an error and it was replaced with a weak informed normal
(0,2). It it almost flat so it has very little influence.
-
Based on the MCMC results, what range of θ values are most
plausible?
θ value is
between 0.56 and 0.88 with its center being near 0.7 .
-
How does this differ from simply reporting a single maximum
likelihood estimate (θ̂ = 0.7)?
MLE gives
the best value, while the posterior shows a range.
library(tibble)
# Create a small dataset
data <- tibble(
hours = c(1, 2, 3, 4, 5),
score = c(60, 65, 70, 72, 75)
)
library(brms)
# Set weakly informative priors
priors <- c(
prior(normal(50, 20), class = "Intercept"),
prior(normal(0, 10), class = "b"), # prior for slope
prior(exponential(1), class = "sigma") # prior for error
)
# Fit model with MCMC
fit <- brm(
score ~ hours,
data = data,
family = gaussian(),
prior = priors,
seed = 123,
iter = 2000, chains = 4
)
## Compiling Stan program...
## WARNING: Rtools is required to build R packages, but is not currently installed.
##
## Please download and install the appropriate version of Rtools for 4.5.1 from
## https://cran.r-project.org/bin/windows/Rtools/.
## Trying to compile a simple C file
## Running "C:/PROGRA~1/R/R-45~1.1/bin/x64/Rcmd.exe" SHLIB foo.c
## using C compiler: 'gcc.exe (GCC) 14.2.0'
## gcc -I"C:/PROGRA~1/R/R-45~1.1/include" -DNDEBUG -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/Rcpp/include/" -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/RcppEigen/include/" -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/RcppEigen/include/unsupported" -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/BH/include" -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/StanHeaders/include/src/" -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/StanHeaders/include/" -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/RcppParallel/include/" -DRCPP_PARALLEL_USE_TBB=1 -I"C:/Users/orkne/AppData/Local/R/win-library/4.5/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include "C:/Users/orkne/AppData/Local/R/win-library/4.5/StanHeaders/include/stan/math/prim/fun/Eigen.hpp" -std=c++1y -I"C:/rtools45/x86_64-w64-mingw32.static.posix/include" -O2 -Wall -std=gnu2x -mfpmath=sse -msse2 -mstackrealign -c foo.c -o foo.o
## cc1.exe: warning: command-line option '-std=c++14' is valid for C++/ObjC++ but not for C
## In file included from C:/Users/orkne/AppData/Local/R/win-library/4.5/RcppEigen/include/Eigen/Core:19,
## from C:/Users/orkne/AppData/Local/R/win-library/4.5/RcppEigen/include/Eigen/Dense:1,
## from C:/Users/orkne/AppData/Local/R/win-library/4.5/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22,
## from <command-line>:
## C:/Users/orkne/AppData/Local/R/win-library/4.5/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: cmath: No such file or directory
## 679 | #include <cmath>
## | ^~~~~~~
## compilation terminated.
## make: *** [C:/PROGRA~1/R/R-45~1.1/etc/x64/Makeconf:289: foo.o] Error 1
## WARNING: Rtools is required to build R packages, but is not currently installed.
##
## Please download and install the appropriate version of Rtools for 4.5.1 from
## https://cran.r-project.org/bin/windows/Rtools/.
## Start sampling
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 3e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.3 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.033 seconds (Warm-up)
## Chain 1: 0.025 seconds (Sampling)
## Chain 1: 0.058 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 1.2e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.12 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.028 seconds (Warm-up)
## Chain 2: 0.014 seconds (Sampling)
## Chain 2: 0.042 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 7e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.034 seconds (Warm-up)
## Chain 3: 0.026 seconds (Sampling)
## Chain 3: 0.06 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 1e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.031 seconds (Warm-up)
## Chain 4: 0.018 seconds (Sampling)
## Chain 4: 0.049 seconds (Total)
## Chain 4:
summary(fit)
## Family: gaussian
## Links: mu = identity
## Formula: score ~ hours
## Data: data (Number of observations: 5)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 57.28 1.61 53.97 60.58 1.00 1961 1709
## hours 3.70 0.48 2.71 4.72 1.00 2028 1618
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.38 0.59 0.68 2.94 1.00 1440 2111
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit) # trace plots and density plots per chain

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.1.0
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.4 ✔ stringr 1.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Extract posterior draws
draws <- as_draws_df(fit)
# Compute posterior draws for beta0 and beta1
beta0 <- draws$b_Intercept
beta1 <- draws$b_hours
# Create a grid of x values for prediction
x_grid <- seq(min(data$hours), max(data$hours), length.out = 100)
# Sample a subset of posterior draws for plotting (e.g., 100 lines)
set.seed(123)
sample_rows <- sample(1:length(beta0), 100)
# Build data frame of regression lines
regression_lines <- map_dfr(sample_rows, function(i) {
tibble(
hours = x_grid,
score = beta0[i] + beta1[i] * x_grid,
draw = i
)
})
# Compute posterior mean line
mean_line <- tibble(
hours = x_grid,
score = mean(beta0) + mean(beta1) * x_grid
)
# Plot
ggplot() +
geom_line(data = regression_lines, aes(x = hours, y = score, group = draw),
alpha = 0.2, color = "gray50") +
geom_line(data = mean_line, aes(x = hours, y = score),
color = "purple", size = 1.5) +
geom_point(data = data, aes(x = hours, y = score), size = 2) +
labs(title = "Posterior Regression Lines",
subtitle = "Gray = posterior samples, Purple = mean line",
x = "Hours Studied",
y = "Exam Score") +
theme_minimal()

# ------------------ FULL, SELF-CONTAINED CODE ------------------
# Action box helper (HTML)
action_box <- function(title, items, border = "#FF4500", bg = "#000000") { # purple border, light green bg
html <- paste0(
'<div style="border-left:6px solid ', border,
';padding:12px 14px;background:', bg,
';border-radius:10px;margin:14px 0 6px 0">',
'<h4 style="margin:0 0 8px 0;color:', border, ';font-weight:bold;">',
title, '</h4>',
'<ul style="margin:0;list-style-type:disc;color:white;">',
paste0("<li>", items, "</li>", collapse = ""),
"</ul></div>"
)
cat(html)
}
action_box(
title = "Action Items B: MCMC Regression Reflection",
items = c(
"<b><span style='color:#FFA500;'>1. Posterior Distributions</span></b><br><br>
<b>Look at the posterior distributions for β₀ (intercept) and β₁ (slope).</b><br>
<span style='margin-left:20px; color:#DDDDDD; font-style:italic;'>The intercept is centered around 57.3 and the slope is centered around 3.7, with posterior samples showing tight distributions.</span><br><br>
<b>What do the spread and shape of these distributions tell you?</b><br>
<span style='margin-left:40px; color:#DDDDDD; font-style:italic;'>The narrow spread means the model is very confident about both parameters. The posterior for β₁ shows that almost all mass is above 0. Which means that the slope is positive. The smooth bell-shaped curves indicate stable posteriors.</span><br><br>
<b>Would classical regression give you this kind of information?</b><br>
<span style='margin-left:40px; color:#DDDDDD; font-style:italic;'>Classical regression gives standard errors and confidence intervals, but it does not give a full probability distribution for each parameter. MCMC shows uncertainty, not just a point estimate plus a margin.</span><br><br>",
"<b><span style='color:#FFA500;'>2. Posterior Predictive Plot</span></b><br><br>
<b>Use conditional_effects(fit) to view the model’s predictions.</b><br>
<span style='margin-left:20px; color:#DDDDDD; font-style:italic;'>The conditional effects plot shows the mean prediction line plus a credible band around it.</span><br><br>
<b>How does the uncertainty vary across the predictor range?</b><br>
<span style='margin-left:40px; color:#DDDDDD; font-style:italic;'>Uncertainty is smallest near the center of the data and increases slightly toward the edges (hours = 1 and 5).</span><br><br>
<b>What does this tell you about how MCMC helps visualize prediction uncertainty?</b><br>
<span style='margin-left:40px; color:#DDDDDD; font-style:italic;'>MCMC provides a distribution of predicted values at each x, so you can see where predictions are tight or uncertain. On the other hand, Frequentist only gives a single line and maybe a confidence band, but the Bayesian version shows uncertainty as a natural outcome of sampling.</span><br><br>",
"<b><span style='color:#FFA500;'>3. Posterior Draws (Many Regression Lines)</span></b><br><br>
<b>Each line represents one draw from the posterior distribution of parameters (from MCMC).</b><br>
<span style='margin-left:20px; color:#DDDDDD; font-style:italic;'>Each of these lines show all plausible regression relationships supported by the data.</span><br><br>
<b>What do the spread and direction of these lines show?</b><br>
<span style='margin-left:40px; color:#DDDDDD; font-style:italic;'>All lines trend upward, showing a consistently positive relationship. The vertical spread around the mean line reflects the uncertainty in β₀ and β₁.</span><br><br>
<b>Is there a lot of variation? Are all lines positive-sloped?</b><br>
<span style='margin-left:40px; color:#DDDDDD; font-style:italic;'>Variation is moderate and the lines are tightly clustered. Yes, all lines have positive slopes, meaning the model is almost completely certain that more study hours leads to higher scores.</span><br><br>",
"<b><span style='color:#FFA500;'>4. Why MCMC?</span></b><br><br>
<b>Why did we need to use MCMC here?</b><br>
<span style='margin-left:20px; color:#DDDDDD; font-style:italic;'>MCMC is needed because Bayesian regression does not have a closed-form solution for posterior distributions when priors are included. MCMC lets us approximate the full joint posterior.</span><br><br>
<b>Could we solve this regression analytically?</b><br>
<span style='margin-left:40px; color:#DDDDDD; font-style:italic;'>WE can solve this analytically, but brms uses priors and likelihood where the analytic solution isn’t available.</span><br><br>
<b>What would you lose without the sampling approach?</b><br>
<span style='margin-left:40px; color:#DDDDDD; font-style:italic;'>We would lose the full distrobution, parameter uncertainty, posterior predictive distrobution and visualization of the model. </span><br><br>",
"<b><span style='color:#FFA500;'>5. MCMC Diagnostics</span></b><br><br>
<b>Look at the model summary and diagnostics:</b><br>
<span style='margin-left:20px; color:#DDDDDD; font-style:italic;'>Rhat is 1.0 & ESS vakues are in the thousands </span><br><br>
<b>Are the R² values close to 1.00? If not, what might that mean?</b><br>
<span style='margin-left:40px; color:#DDDDDD; font-style:italic;'>R² wsa not computed</span><br><br>
<b>How many effective samples did you get?</b><br>
<span style='margin-left:40px; color:#DDDDDD; font-style:italic;'>Bulk ESS was about 1,900 - 2,000 for parameters; Tail ESS was about 1,600 - 2,100</span><br><br>
<b>What is the warm-up period, and why is it important?</b><br>
<span style='margin-left:40px; color:#DDDDDD; font-style:italic;'>WArm-up was 1,000. </span><br><br>",
"<b><span style='color:#FFA500;'>6. Reflection</span></b><br><br>
<b>What are the advantages of using MCMC-based Bayesian regression over classical (frequentist) regression?</b><br>
<span style='margin-left:20px; color:#DDDDDD; font-style:italic;'>The advantages of MCMC based regression over classical regression is that it gives full probability distribution of parameters, better for quantifying uncertainties, and handles small sample sizes better</span><br><br>
<b>What are some challenges or trade-offs?</b><br>
<span style='margin-left:20px; color:#DDDDDD; font-style:italic;'>A few caveats are that it is slower at processing, MCMC diagnostics must be checked, and interpretation needs more care than classical regression</span>"
),
border = "#FF4500",
bg = "#000000"
)
Action Items B: MCMC Regression Reflection
-
1. Posterior
Distributions
Look at the posterior distributions
for β₀ (intercept) and β₁ (slope).
The
intercept is centered around 57.3 and the slope is centered around 3.7,
with posterior samples showing tight distributions.
What do the spread and shape of these distributions tell you?
The
narrow spread means the model is very confident about both parameters.
The posterior for β₁ shows that almost all mass is above 0. Which means
that the slope is positive. The smooth bell-shaped curves indicate
stable posteriors.
Would classical regression give you
this kind of information?
Classical
regression gives standard errors and confidence intervals, but it does
not give a full probability distribution for each parameter. MCMC shows
uncertainty, not just a point estimate plus a margin.
-
2. Posterior Predictive
Plot
Use conditional_effects(fit) to view the
model’s predictions.
The
conditional effects plot shows the mean prediction line plus a credible
band around it.
How does the uncertainty vary across
the predictor range?
Uncertainty
is smallest near the center of the data and increases slightly toward
the edges (hours = 1 and 5).
What does this tell you
about how MCMC helps visualize prediction uncertainty?
MCMC
provides a distribution of predicted values at each x, so you can see
where predictions are tight or uncertain. On the other hand, Frequentist
only gives a single line and maybe a confidence band, but the Bayesian
version shows uncertainty as a natural outcome of
sampling.
-
3. Posterior Draws (Many Regression
Lines)
Each line represents one draw from the
posterior distribution of parameters (from MCMC).
Each of
these lines show all plausible regression relationships supported by the
data.
What do the spread and direction of these lines
show?
All lines
trend upward, showing a consistently positive relationship. The vertical
spread around the mean line reflects the uncertainty in β₀ and
β₁.
Is there a lot of variation? Are all lines
positive-sloped?
Variation is
moderate and the lines are tightly clustered. Yes, all lines have
positive slopes, meaning the model is almost completely certain that
more study hours leads to higher scores.
-
4. Why MCMC?
Why
did we need to use MCMC here?
MCMC is
needed because Bayesian regression does not have a closed-form solution
for posterior distributions when priors are included. MCMC lets us
approximate the full joint posterior.
Could we solve
this regression analytically?
WE can solve
this analytically, but brms uses priors and likelihood where the
analytic solution isn’t available.
What would you lose
without the sampling approach?
We would
lose the full distrobution, parameter uncertainty, posterior predictive
distrobution and visualization of the model.
-
5. MCMC Diagnostics
Look at the model summary and diagnostics:
Rhat is 1.0
& ESS vakues are in the thousands
Are the R²
values close to 1.00? If not, what might that mean?
R² wsa not
computed
How many effective samples did you
get?
Bulk ESS was
about 1,900 - 2,000 for parameters; Tail ESS was about 1,600 -
2,100
What is the warm-up period, and why is it
important?
WArm-up was
1,000.
-
6. Reflection
What
are the advantages of using MCMC-based Bayesian regression over
classical (frequentist) regression?
The
advantages of MCMC based regression over classical regression is that it
gives full probability distribution of parameters, better for
quantifying uncertainties, and handles small sample sizes
better
What are some challenges or trade-offs?
A few
caveats are that it is slower at processing, MCMC diagnostics must be
checked, and interpretation needs more care than classical
regression