library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.23.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
##
## Attaching package: 'brms'
##
## The following object is masked from 'package:stats':
##
## ar
library(tidybayes)
##
## Attaching package: 'tidybayes'
##
## The following objects are masked from 'package:brms':
##
## dstudent_t, pstudent_t, qstudent_t, rstudent_t
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE,
fig.width = 7, fig.height = 4.5)
# FAST settings
FAST <- TRUE
fast_iter <- if (FAST) 1000 else 2000
fast_warmup <- if (FAST) 500 else 1000
fast_chains <- if (FAST) 2 else 4
fast_draws <- if (FAST) 30 else 100
loo_do <- TRUE
use_cmdstan <- FALSE
bk <- if (use_cmdstan) "cmdstanr" else "rstan"
# Helper to print Action Item boxes
action_box <- function(title, items, border = "#e91e63", bg = "#fff5f7") {
html <- paste0(
'<div style="border-left:6px solid ', border,
';padding:12px 14px;background:', bg,
';border-radius:10px;margin:14px 0 6px 0">',
'<h4 style="margin:0 0 8px 0;color:', border, ';font-weight:bold;">', title, '</h4>',
'<ul style="margin:0;list-style-type:disc;color:black;">',
paste0("<li>", items, "</li>", collapse = ""),
"</ul></div>"
)
cat(html)
}
set.seed(101)
N_per <- 20 # was 40/60 in main FAST lab
sunlight <- runif(2*N_per, 0, 10)
treatment <- rep(c("A","B"), each = N_per)
alpha_A <- 20; beta_A <- 1.2
alpha_B <- 22; beta_B <- 0.8
sigma <- 2.5
priors_cat <- c(
set_prior("normal(0, 10)", class = "Intercept"),
set_prior("normal(0, 5)", class = "b"),
set_prior("student_t(4, 0, 5)", class = "sigma")
)
mu <- ifelse(treatment == "A",
alpha_A + beta_A*sunlight,
alpha_B + beta_B*sunlight)
height <- rnorm(2*N_per, mu, sigma)
dat_smallN <- tibble(sunlight, treatment = factor(treatment, levels = c("A","B")), height)
m_cat_smallN <- brm(
height ~ sunlight * treatment,
data = dat_smallN,
prior = priors_cat,
iter = fast_iter,
warmup = fast_warmup,
chains = fast_chains,
seed = 123,
backend = bk,
file = "m_cat_smallN_cached"
)
## Compiling Stan program...
## Start sampling
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000181 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.81 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.098 seconds (Warm-up)
## Chain 1: 0.082 seconds (Sampling)
## Chain 1: 0.18 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 3e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.3 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 2: Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 2: Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 2: Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 2: Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 2: Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 2: Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 2: Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 2: Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 2: Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 2: Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 2: Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.145 seconds (Warm-up)
## Chain 2: 0.091 seconds (Sampling)
## Chain 2: 0.236 seconds (Total)
## Chain 2:
summary(m_cat_smallN)
## Family: gaussian
## Links: mu = identity
## Formula: height ~ sunlight * treatment
## Data: dat_smallN (Number of observations: 40)
## Draws: 2 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup draws = 1000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 20.96 1.24 18.60 23.33 1.00 530 356
## sunlight 1.00 0.21 0.60 1.43 1.00 488 434
## treatmentB 0.46 1.58 -2.76 3.55 1.00 555 676
## sunlight:treatmentB -0.21 0.28 -0.76 0.33 1.00 526 680
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 2.52 0.30 2.02 3.20 1.00 589 570
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(m_cat_smallN, ndraws = fast_draws)

ndraws_smooth <- 200
newgrid <- expand.grid(
sunlight = seq(0, 10, length.out = 80),
treatment = factor(c("A","B"), levels = c("A","B"))
) |> arrange(treatment, sunlight)
summary_lines <- add_epred_draws(newgrid, m_cat_smallN, ndraws = ndraws_smooth) |>
group_by(treatment, sunlight) |>
summarise(
mean = mean(.epred),
low = quantile(.epred, 0.055),
high = quantile(.epred, 0.945),
.groups = "drop"
)
ggplot() +
geom_point(data = dat_smallN,
aes(sunlight, height, color = treatment), alpha = 0.6) +
geom_ribbon(data = summary_lines,
aes(sunlight, ymin = low, ymax = high, fill = treatment, group = treatment),
inherit.aes = FALSE, alpha = 0.2, color = NA) +
geom_line(data = summary_lines,
aes(sunlight, y = mean, color = treatment, group = treatment),
inherit.aes = FALSE, linewidth = 1) +
theme_bw() +
labs(title = "Smaller N: Posterior lines & 89% ribbons",
x = "Sunlight (h/day)", y = "Plant height (cm)")

action_box(
title = "Action Items — Sandbox A (Smaller N)",
items = c(
"Report the slope difference (B − A) posterior mean and 95% CI. The slope difference is about -0.20 with a wide 95% CI, showing no evidence of any difference compared to the main analysis.",
"Describe how the ribbons changed and why that happens when N is smaller. Since the sample size is smaller, the posterior ribbons are wider — greater uncertainty in both slope and intercept estimates."
),
border = "#e91e63", bg = "#fff5f7"
)
Action Items — Sandbox A (Smaller N)
-
Report the slope difference (B − A) posterior mean and 95% CI. The slope
difference is about -0.20 with a wide 95% CI, showing no evidence of any
difference compared to the main analysis.
-
Describe how the ribbons changed and why that happens when N is smaller.
Since the sample size is smaller, the posterior ribbons are wider —
greater uncertainty in both slope and intercept estimates.
set.seed(202)
N_per <- 60
sunlight <- runif(2*N_per, 0, 10)
treatment <- rep(c("A","B"), each = N_per)
alpha_A <- 20; beta_A <- 1.2
alpha_B <- 22; beta_B <- 0.8
sigma <- 6.0 # was 2.5
mu <- ifelse(treatment == "A",
alpha_A + beta_A*sunlight,
alpha_B + beta_B*sunlight)
height <- rnorm(2*N_per, mu, sigma)
dat_noisy <- tibble(sunlight, treatment = factor(treatment, levels = c("A","B")), height)
m_cat_noisy <- brm(
height ~ sunlight * treatment,
data = dat_noisy,
prior = priors_cat,
iter = fast_iter,
warmup = fast_warmup,
chains = fast_chains,
seed = 123,
backend = bk,
file = "m_cat_noisy_cached"
)
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 4.8e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.48 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.04 seconds (Warm-up)
## Chain 1: 0.022 seconds (Sampling)
## Chain 1: 0.062 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 1.1e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 2: Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 2: Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 2: Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 2: Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 2: Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 2: Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 2: Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 2: Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 2: Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 2: Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 2: Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.036 seconds (Warm-up)
## Chain 2: 0.021 seconds (Sampling)
## Chain 2: 0.057 seconds (Total)
## Chain 2:
summary(m_cat_noisy)
## Family: gaussian
## Links: mu = identity
## Formula: height ~ sunlight * treatment
## Data: dat_noisy (Number of observations: 120)
## Draws: 2 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup draws = 1000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 17.71 1.61 14.65 20.95 1.00 476 605
## sunlight 1.48 0.27 0.90 2.02 1.00 526 575
## treatmentB 4.97 2.21 0.50 9.02 1.00 443 581
## sunlight:treatmentB -0.64 0.38 -1.34 0.17 1.00 478 493
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 6.00 0.41 5.30 6.86 1.00 669 603
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(m_cat_noisy, ndraws = fast_draws)

ndraws_smooth <- 200
newgrid <- expand.grid(
sunlight = seq(0, 10, length.out = 80),
treatment = factor(c("A","B"), levels = c("A","B"))
) |> arrange(treatment, sunlight)
summary_lines <- add_epred_draws(newgrid, m_cat_noisy, ndraws = ndraws_smooth) |>
group_by(treatment, sunlight) |>
summarise(
mean = mean(.epred),
low = quantile(.epred, 0.055),
high = quantile(.epred, 0.945),
.groups = "drop"
)
ggplot() +
geom_point(data = dat_noisy,
aes(sunlight, height, color = treatment), alpha = 0.6) +
geom_ribbon(data = summary_lines,
aes(sunlight, ymin = low, ymax = high, fill = treatment, group = treatment),
inherit.aes = FALSE, alpha = 0.2, color = NA) +
geom_line(data = summary_lines,
aes(sunlight, y = mean, color = treatment, group = treatment),
inherit.aes = FALSE, linewidth = 1) +
theme_bw() +
labs(title = "Higher noise: Posterior lines & 89% ribbons",
x = "Sunlight (h/day)", y = "Plant height (cm)")

action_box(
title = "Action Items — Sandbox B (Higher Noise)",
items = c(
"Compare the contrast posteriors (intercept and slope differences) to the main analysis: wider or narrower? They are wider, showing less accuracy than in ther main analysis ",
"Does the crossover point remain obvious? Explain using the figure. No. The crossover is not clear because of higher noise - causing the ribbons to overlap heavly."
),
border = "#1565c0", bg = "#eef5ff"
)
Action Items — Sandbox B (Higher Noise)
-
Compare the contrast posteriors (intercept and slope differences) to the
main analysis: wider or narrower? They are wider, showing less accuracy
than in ther main analysis
-
Does the crossover point remain obvious? Explain using the figure.
No. The crossover is not clear because of higher noise - causing the
ribbons to overlap heavly.
set.seed(303)
N <- 120
flowers <- runif(N, 0, 100)
a <- 10; b <- 2.5; c <- -0.025
mu_true <- a + b*flowers + c*flowers^2
insects <- rnorm(N, mu_true, sd = 8)
dat_flow2 <- tibble(flowers, insects)
m_lin_only <- brm(
insects ~ flowers,
data = dat_flow2,
prior = priors_cat,
iter = fast_iter,
warmup = fast_warmup,
chains = fast_chains,
seed = 123,
backend = bk,
file = "m_lin_only_cached"
)
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 3e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.3 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.027 seconds (Warm-up)
## Chain 1: 0.015 seconds (Sampling)
## Chain 1: 0.042 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 1.2e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.12 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 2: Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 2: Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 2: Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 2: Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 2: Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 2: Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 2: Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 2: Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 2: Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 2: Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 2: Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.02 seconds (Warm-up)
## Chain 2: 0.008 seconds (Sampling)
## Chain 2: 0.028 seconds (Total)
## Chain 2:
summary(m_lin_only)
## Family: gaussian
## Links: mu = identity
## Formula: insects ~ flowers
## Data: dat_flow2 (Number of observations: 120)
## Draws: 2 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup draws = 1000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 56.13 3.59 49.10 63.00 1.00 987 717
## flowers -0.09 0.06 -0.21 0.02 1.00 1145 846
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 19.12 1.23 16.89 21.60 1.00 673 597
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(m_lin_only, ndraws = fast_draws)

ndraws_smooth <- 200
grid2 <- tibble(flowers = seq(0, 100, length.out = 100))
s_lin_only <- add_epred_draws(grid2, m_lin_only, ndraws = ndraws_smooth) |>
group_by(flowers) |>
summarise(
mean = mean(.epred),
low = quantile(.epred, 0.055),
high = quantile(.epred, 0.945),
.groups = "drop"
)
ggplot(dat_flow2, aes(flowers, insects)) +
geom_point(color = "gray40", alpha = 0.6) +
geom_ribbon(data = s_lin_only,
aes(flowers, ymin = low, ymax = high, group = 1),
inherit.aes = FALSE, alpha = 0.2) +
geom_line(data = s_lin_only,
aes(flowers, y = mean, group = 1),
inherit.aes = FALSE, linewidth = 1) +
theme_bw() +
labs(title = "Linear model on a hump-shaped relation",
x = "Flower cover (%)", y = "Insect visits")

action_box(
title = "Action Items — Sandbox C (Linear Fit Only)",
items = c(
"What story would a linear fit tell about insects vs. flowers? Why is that misleading here? A liner fit would suggest that insect visits decrease slightly as flower cover increases, but it is misleading because the relationship is shaped like a hump",
"In one sentence, explain why you’d prefer a quadratic or spline for this pattern.A quadratic can capture the curvature, which a non-liner patter would miss. "
),
border = "#2e7d32", bg = "#eefaf0"
)
Action Items — Sandbox C (Linear Fit Only)
-
What story would a linear fit tell about insects vs. flowers? Why is
that misleading here? A liner fit would suggest that insect visits
decrease slightly as flower cover increases, but it is misleading
because the relationship is shaped like a hump
-
In one sentence, explain why you’d prefer a quadratic or spline for this
pattern.A quadratic can capture the curvature, which a non-liner patter
would miss.