LAB_2_Coreas

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(brms)

## Loading required package: Rcpp
## Loading 'brms' package (version 2.23.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## 
## The following object is masked from 'package:stats':
## 
##     ar

library(tidybayes)

## 
## Attaching package: 'tidybayes'
## 
## The following objects are masked from 'package:brms':
## 
##     dstudent_t, pstudent_t, qstudent_t, rstudent_t

knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE,
                      fig.width = 7, fig.height = 4.5)


# FAST settings
FAST <- TRUE
fast_iter   <- if (FAST) 1000 else 2000
fast_warmup <- if (FAST)  500 else 1000
fast_chains <- if (FAST)    2 else   4
fast_draws  <- if (FAST)   30 else 100
loo_do      <- TRUE

use_cmdstan <- FALSE
bk <- if (use_cmdstan) "cmdstanr" else "rstan"

# Helper to print Action Item boxes
action_box <- function(title, items, border = "#e91e63", bg = "#fff5f7") {
  html <- paste0(
    '<div style="border-left:6px solid ', border,
    ';padding:12px 14px;background:', bg,
    ';border-radius:10px;margin:14px 0 6px 0">',
    '<h4 style="margin:0 0 8px 0;color:', border, ';font-weight:bold;">', title, '</h4>',
    '<ul style="margin:0;list-style-type:disc;color:black;">',
    paste0("<li>", items, "</li>", collapse = ""),
    "</ul></div>"
  )
  cat(html)
}



set.seed(101)
N_per <- 20   # was 40/60 in main FAST lab

sunlight <- runif(2*N_per, 0, 10)
treatment <- rep(c("A","B"), each = N_per)

alpha_A <- 20; beta_A <- 1.2
alpha_B <- 22; beta_B <- 0.8
sigma   <- 2.5

priors_cat <- c(
  set_prior("normal(0, 10)", class = "Intercept"),
  set_prior("normal(0, 5)",  class = "b"),
  set_prior("student_t(4, 0, 5)", class = "sigma")
)

mu <- ifelse(treatment == "A",
             alpha_A + beta_A*sunlight,
             alpha_B + beta_B*sunlight)

height <- rnorm(2*N_per, mu, sigma)
dat_smallN <- tibble(sunlight, treatment = factor(treatment, levels = c("A","B")), height)

m_cat_smallN <- brm(
  height ~ sunlight * treatment,
  data   = dat_smallN,
  prior  = priors_cat,
  iter   = fast_iter,
  warmup = fast_warmup,
  chains = fast_chains,
  seed   = 123,
  backend = bk,
  file   = "m_cat_smallN_cached"
)

## Compiling Stan program...
## Start sampling

## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.000181 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.81 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.098 seconds (Warm-up)
## Chain 1:                0.082 seconds (Sampling)
## Chain 1:                0.18 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 3e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.3 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 2: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 2: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 2: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 2: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 2: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 2: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 2: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 2: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 2: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 2: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 2: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.145 seconds (Warm-up)
## Chain 2:                0.091 seconds (Sampling)
## Chain 2:                0.236 seconds (Total)
## Chain 2:

summary(m_cat_smallN)

##  Family: gaussian 
##   Links: mu = identity 
## Formula: height ~ sunlight * treatment 
##    Data: dat_smallN (Number of observations: 40) 
##   Draws: 2 chains, each with iter = 1000; warmup = 500; thin = 1;
##          total post-warmup draws = 1000
## 
## Regression Coefficients:
##                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              20.96      1.24    18.60    23.33 1.00      530      356
## sunlight                1.00      0.21     0.60     1.43 1.00      488      434
## treatmentB              0.46      1.58    -2.76     3.55 1.00      555      676
## sunlight:treatmentB    -0.21      0.28    -0.76     0.33 1.00      526      680
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     2.52      0.30     2.02     3.20 1.00      589      570
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

pp_check(m_cat_smallN, ndraws = fast_draws)

ndraws_smooth <- 200
newgrid <- expand.grid(
  sunlight  = seq(0, 10, length.out = 80),
  treatment = factor(c("A","B"), levels = c("A","B"))
) |> arrange(treatment, sunlight)

summary_lines <- add_epred_draws(newgrid, m_cat_smallN, ndraws = ndraws_smooth) |>
  group_by(treatment, sunlight) |>
  summarise(
    mean = mean(.epred),
    low  = quantile(.epred, 0.055),
    high = quantile(.epred, 0.945),
    .groups = "drop"
  )

ggplot() +
  geom_point(data = dat_smallN,
             aes(sunlight, height, color = treatment), alpha = 0.6) +
  geom_ribbon(data = summary_lines,
              aes(sunlight, ymin = low, ymax = high, fill = treatment, group = treatment),
              inherit.aes = FALSE, alpha = 0.2, color = NA) +
  geom_line(data = summary_lines,
            aes(sunlight, y = mean, color = treatment, group = treatment),
            inherit.aes = FALSE, linewidth = 1) +
  theme_bw() +
  labs(title = "Smaller N: Posterior lines & 89% ribbons",
       x = "Sunlight (h/day)", y = "Plant height (cm)")

action_box(
  title  = "Action Items — Sandbox A (Smaller N)",
  items  = c(
    "Report the slope difference (B − A) posterior mean and 95% CI. The slope difference is about -0.20 with a wide 95% CI, showing no evidence of any difference compared to the main analysis.",
    "Describe how the ribbons changed and why that happens when N is smaller. Since the sample size is smaller, the posterior ribbons are wider — greater uncertainty in both slope and intercept estimates."
  ),
  border = "#e91e63", bg = "#fff5f7"
)

Action Items — Sandbox A (Smaller N)

Report the slope difference (B − A) posterior mean and 95% CI. The slope difference is about -0.20 with a wide 95% CI, showing no evidence of any difference compared to the main analysis.
Describe how the ribbons changed and why that happens when N is smaller. Since the sample size is smaller, the posterior ribbons are wider — greater uncertainty in both slope and intercept estimates.

set.seed(202)
N_per <- 60
sunlight <- runif(2*N_per, 0, 10)
treatment <- rep(c("A","B"), each = N_per)

alpha_A <- 20; beta_A <- 1.2
alpha_B <- 22; beta_B <- 0.8
sigma   <- 6.0   # was 2.5

mu <- ifelse(treatment == "A",
             alpha_A + beta_A*sunlight,
             alpha_B + beta_B*sunlight)

height <- rnorm(2*N_per, mu, sigma)
dat_noisy <- tibble(sunlight, treatment = factor(treatment, levels = c("A","B")), height)

m_cat_noisy <- brm(
  height ~ sunlight * treatment,
  data   = dat_noisy,
  prior  = priors_cat,
  iter   = fast_iter,
  warmup = fast_warmup,
  chains = fast_chains,
  seed   = 123,
  backend = bk,
  file   = "m_cat_noisy_cached"
)

## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 4.8e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.48 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.04 seconds (Warm-up)
## Chain 1:                0.022 seconds (Sampling)
## Chain 1:                0.062 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 1.1e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 2: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 2: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 2: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 2: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 2: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 2: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 2: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 2: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 2: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 2: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 2: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.036 seconds (Warm-up)
## Chain 2:                0.021 seconds (Sampling)
## Chain 2:                0.057 seconds (Total)
## Chain 2:

summary(m_cat_noisy)

##  Family: gaussian 
##   Links: mu = identity 
## Formula: height ~ sunlight * treatment 
##    Data: dat_noisy (Number of observations: 120) 
##   Draws: 2 chains, each with iter = 1000; warmup = 500; thin = 1;
##          total post-warmup draws = 1000
## 
## Regression Coefficients:
##                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              17.71      1.61    14.65    20.95 1.00      476      605
## sunlight                1.48      0.27     0.90     2.02 1.00      526      575
## treatmentB              4.97      2.21     0.50     9.02 1.00      443      581
## sunlight:treatmentB    -0.64      0.38    -1.34     0.17 1.00      478      493
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     6.00      0.41     5.30     6.86 1.00      669      603
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

pp_check(m_cat_noisy, ndraws = fast_draws)

ndraws_smooth <- 200
newgrid <- expand.grid(
  sunlight  = seq(0, 10, length.out = 80),
  treatment = factor(c("A","B"), levels = c("A","B"))
) |> arrange(treatment, sunlight)

summary_lines <- add_epred_draws(newgrid, m_cat_noisy, ndraws = ndraws_smooth) |>
  group_by(treatment, sunlight) |>
  summarise(
    mean = mean(.epred),
    low  = quantile(.epred, 0.055),
    high = quantile(.epred, 0.945),
    .groups = "drop"
  )

ggplot() +
  geom_point(data = dat_noisy,
             aes(sunlight, height, color = treatment), alpha = 0.6) +
  geom_ribbon(data = summary_lines,
              aes(sunlight, ymin = low, ymax = high, fill = treatment, group = treatment),
              inherit.aes = FALSE, alpha = 0.2, color = NA) +
  geom_line(data = summary_lines,
            aes(sunlight, y = mean, color = treatment, group = treatment),
            inherit.aes = FALSE, linewidth = 1) +
  theme_bw() +
  labs(title = "Higher noise: Posterior lines & 89% ribbons",
       x = "Sunlight (h/day)", y = "Plant height (cm)")

action_box(
  title  = "Action Items — Sandbox B (Higher Noise)",
  items  = c(
    "Compare the contrast posteriors (intercept and slope differences) to the main analysis: wider or narrower? They are wider, showing less accuracy than in ther main analysis  ",
    "Does the crossover point remain obvious? Explain using the figure. No. The crossover is not clear because of higher noise - causing the ribbons to overlap heavly."
  ),
  border = "#1565c0", bg = "#eef5ff"
)

Action Items — Sandbox B (Higher Noise)

Compare the contrast posteriors (intercept and slope differences) to the main analysis: wider or narrower? They are wider, showing less accuracy than in ther main analysis
Does the crossover point remain obvious? Explain using the figure. No. The crossover is not clear because of higher noise - causing the ribbons to overlap heavly.

set.seed(303)
N <- 120
flowers <- runif(N, 0, 100)

a <- 10; b <- 2.5; c <- -0.025
mu_true <- a + b*flowers + c*flowers^2
insects <- rnorm(N, mu_true, sd = 8)

dat_flow2 <- tibble(flowers, insects)

m_lin_only <- brm(
  insects ~ flowers,
  data   = dat_flow2,
  prior  = priors_cat,
  iter   = fast_iter,
  warmup = fast_warmup,
  chains = fast_chains,
  seed   = 123,
  backend = bk,
  file   = "m_lin_only_cached"
)

## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 3e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.3 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.027 seconds (Warm-up)
## Chain 1:                0.015 seconds (Sampling)
## Chain 1:                0.042 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 1.2e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.12 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 2: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 2: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 2: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 2: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 2: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 2: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 2: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 2: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 2: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 2: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 2: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.02 seconds (Warm-up)
## Chain 2:                0.008 seconds (Sampling)
## Chain 2:                0.028 seconds (Total)
## Chain 2:

summary(m_lin_only)

##  Family: gaussian 
##   Links: mu = identity 
## Formula: insects ~ flowers 
##    Data: dat_flow2 (Number of observations: 120) 
##   Draws: 2 chains, each with iter = 1000; warmup = 500; thin = 1;
##          total post-warmup draws = 1000
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    56.13      3.59    49.10    63.00 1.00      987      717
## flowers      -0.09      0.06    -0.21     0.02 1.00     1145      846
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma    19.12      1.23    16.89    21.60 1.00      673      597
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

pp_check(m_lin_only, ndraws = fast_draws)

ndraws_smooth <- 200
grid2 <- tibble(flowers = seq(0, 100, length.out = 100))

s_lin_only <- add_epred_draws(grid2, m_lin_only, ndraws = ndraws_smooth) |>
  group_by(flowers) |>
  summarise(
    mean = mean(.epred),
    low  = quantile(.epred, 0.055),
    high = quantile(.epred, 0.945),
    .groups = "drop"
  )

ggplot(dat_flow2, aes(flowers, insects)) +
  geom_point(color = "gray40", alpha = 0.6) +
  geom_ribbon(data = s_lin_only,
              aes(flowers, ymin = low, ymax = high, group = 1),
              inherit.aes = FALSE, alpha = 0.2) +
  geom_line(data = s_lin_only,
            aes(flowers, y = mean, group = 1),
            inherit.aes = FALSE, linewidth = 1) +
  theme_bw() +
  labs(title = "Linear model on a hump-shaped relation",
       x = "Flower cover (%)", y = "Insect visits")

action_box(
  title  = "Action Items — Sandbox C (Linear Fit Only)",
  items  = c(
    "What story would a linear fit tell about insects vs. flowers? Why is that misleading here? A liner fit would suggest that insect visits decrease slightly as flower cover increases, but it is misleading because the relationship is shaped like a hump",
    "In one sentence, explain why you’d prefer a quadratic or spline for this pattern.A quadratic can capture the curvature, which a non-liner patter would miss. "
  ),
  border = "#2e7d32", bg = "#eefaf0"
)

Action Items — Sandbox C (Linear Fit Only)

What story would a linear fit tell about insects vs. flowers? Why is that misleading here? A liner fit would suggest that insect visits decrease slightly as flower cover increases, but it is misleading because the relationship is shaped like a hump
In one sentence, explain why you’d prefer a quadratic or spline for this pattern.A quadratic can capture the curvature, which a non-liner patter would miss.

LAB_2_Coreas_Guzman

2025-09-27

Action Items — Sandbox A (Smaller N)

Action Items — Sandbox B (Higher Noise)

Action Items — Sandbox C (Linear Fit Only)