PS 10.3

Consider Question 5. Plot the analytic rejection rates of ϕ_1 and ϕ_2 on the same panel faceted by sample size assuming α=0.05 over a grid of σ^2∈[1,10] for n∈{5,10,15,20}. Verify that the tests have the same size and that ϕ_1 is uniformly more powerful than ϕ_2.

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

alpha <- 0.05
n_vals <- c(5, 10, 15, 20)

sigma2_grid <- seq(1, 10, length = 200)

power_df <- expand_grid(
  sigma2_true = sigma2_grid,
  n = n_vals
) %>%
  mutate(
    sigma = sqrt(sigma2_true),

    crit_phi1 = qchisq(1 - alpha, df = n),
    power_phi1 = 1 - pchisq(crit_phi1 / sigma2_true, df = n),

    crit_phi2 = qnorm(1 - alpha, mean = 0, sd = 1 / sqrt(n)),
    power_phi2 = 1 - pnorm(crit_phi2 * sqrt(n) / sigma)
  )

power_df %>%
  filter(abs(sigma2_true - 1) < 1e-6) %>%
  group_by(n) %>%
  summarise(
    size_phi1 = unique(power_phi1),
    size_phi2 = unique(power_phi2)
  )

# A tibble: 4 × 3
      n size_phi1 size_phi2
  <dbl>     <dbl>     <dbl>
1     5    0.0500    0.0500
2    10    0.0500    0.0500
3    15    0.0500    0.0500
4    20    0.0500    0.0500

ggplot(power_df) +
  geom_line(aes(x = sigma2_true, y = power_phi1, color = "ϕ1: sum of squares")) +
  geom_line(aes(x = sigma2_true, y = power_phi2, color = "ϕ2: mean")) +
  facet_wrap(~n, labeller = label_both) +
  geom_hline(yintercept = alpha, linetype = 2) +
  theme_classic() +
  labs(
    x = expression(sigma^2),
    y = "Power / P(reject H0)",
    color = ""
  )

Consider Question 7. Let ϕ_1 represent the uniformly most powerful size-α test, and ϕ_2 a second size-α test that rejects the null when Y ‾>k for some constant k. Note that neither of these test statistics have closed-form analytic sampling distributions! For each n∈{5,10,15,20}:

Find the simulated rejection regions for ϕ_1 and ϕ_2, using 10,000 replications per n, for α=0.05;

Simulate the rejection probabilities for ϕ_1 and ϕ_2 over a length-20 grid of θ∈[0.2,1];

Plot the rejection probabilities of ϕ_1 and ϕ_2 versus θ, faceted by n, to verify that both tests have the same size and that ϕ_1 is indeed uniformly more powerful than ϕ_2.

library(tidyverse)
library(dplyr)
library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

theta0 <- 0.2

null_stats <- parameters(~n, seq(5, 20, by = 5)) %>%
  add_trials(10000) %>%
  mutate(
    Ysample = map(n, \(n) rbeta(n, shape1 = theta0, shape2 = 1)),
    
    T1 = map_dbl(Ysample, \(y) sum(log(y))),
    T2 = map_dbl(Ysample, mean)
  )

exp_crit_vals <- null_stats %>%
  summarize(
    c1 = quantile(T1, 0.95),   
    c2 = quantile(T2, 0.95),
    .by = n
  )

theta_grid <- seq(0.2, 1, length = 20)

(exp_decisions <- parameters(~n, ~true_theta,
            seq(5, 20, by = 5),
            theta_grid
            ) %>%
  add_trials(1000) %>%
  
  inner_join(exp_crit_vals, by = "n") %>%
  
  mutate(
    Ysample = pmap(list(n, true_theta),
                   \(n, theta) rbeta(n, shape1 = theta, shape2 = 1)),
    
    T1 = map_dbl(Ysample, \(y) sum(log(y))),
    T2 = map_dbl(Ysample, mean),
    
    T1_decision = ifelse(T1 > c1, 1, 0),
    T2_decision = ifelse(T2 > c2, 1, 0)
  )
)

# A tibble: 80,000 × 10
       n true_theta .trial    c1    c2 Ysample      T1      T2 T1_decision
   <dbl>      <dbl>  <dbl> <dbl> <dbl> <list>    <dbl>   <dbl>       <dbl>
 1     5        0.2      1 -9.81 0.377 <dbl [5]> -29.6 0.0279            0
 2     5        0.2      2 -9.81 0.377 <dbl [5]> -15.3 0.0823            0
 3     5        0.2      3 -9.81 0.377 <dbl [5]> -41.2 0.216             0
 4     5        0.2      4 -9.81 0.377 <dbl [5]> -34.4 0.0265            0
 5     5        0.2      5 -9.81 0.377 <dbl [5]> -39.2 0.00969           0
 6     5        0.2      6 -9.81 0.377 <dbl [5]> -27.1 0.208             0
 7     5        0.2      7 -9.81 0.377 <dbl [5]> -37.1 0.120             0
 8     5        0.2      8 -9.81 0.377 <dbl [5]> -16.9 0.276             0
 9     5        0.2      9 -9.81 0.377 <dbl [5]> -19.4 0.379             0
10     5        0.2     10 -9.81 0.377 <dbl [5]> -27.1 0.183             0
# ℹ 79,990 more rows
# ℹ 1 more variable: T2_decision <dbl>

(simulated_prob_reject <- exp_decisions %>%
  summarize(
    power_T1 = mean(T1_decision),
    power_T2 = mean(T2_decision),
    .by = c(n, true_theta)
  ))

# A tibble: 80 × 4
       n true_theta power_T1 power_T2
   <dbl>      <dbl>    <dbl>    <dbl>
 1     5      0.2      0.053    0.042
 2     5      0.242    0.103    0.077
 3     5      0.284    0.174    0.137
 4     5      0.326    0.193    0.154
 5     5      0.368    0.291    0.203
 6     5      0.411    0.386    0.25 
 7     5      0.453    0.432    0.298
 8     5      0.495    0.521    0.347
 9     5      0.537    0.616    0.425
10     5      0.579    0.681    0.489
# ℹ 70 more rows

ggplot(simulated_prob_reject) + 
  geom_line(aes(x = true_theta, y = power_T1, col = "ϕ1 (UMP)")) + 
  geom_line(aes(x = true_theta, y = power_T2, col = "ϕ2 (mean)")) + 
  facet_wrap(~n, labeller = label_both) +
  geom_hline(yintercept = 0.05, linetype = 2) + 
  theme_classic(base_size = 14) +
  labs(
    x = expression(theta),
    y = "Rejection probability",
    color = ""
  )