## Q1.4
############################################################
# Power analysis using randomization inference (RI)
# for a block-randomized experiment (blocks = gender × state)
#
# Goal: For a chosen Minimum Detectable Effect (MDE),
# find how large N must be to get ≈80% power
# using the same RI test.
############################################################
## 1. Load data and define outcome + blocks -----------------------------
# NOTE: change file name/path if needed
diet <- read.csv("/Users/wenyuhan/Downloads/ROSS/Fall B/557 inference/data_diet-hw2.csv")
# Outcome: weight loss (positive = lost weight)
diet$loss <- diet$weight_pre - diet$weight_post
# Blocks: actual randomization blocks = gender × state
diet$block <- interaction(diet$female, diet$state, drop = TRUE)
# Store block information (used to scale N and mimic design)
blocks <- levels(diet$block) # block labels
block_sizes <- table(diet$block) # how many in each block now
block_shares <- block_sizes / sum(block_sizes) # share of total N in each block
# Proportion treated in each block in the original study
# (so simulated experiments have similar treat/control ratios)
prop_treat_block <- tapply(diet$treat, diet$block, mean)
block_sizes
##
## 0.CA 1.CA 0.MI 1.MI
## 1250 1250 1250 1250
block_shares
##
## 0.CA 1.CA 0.MI 1.MI
## 0.25 0.25 0.25 0.25
prop_treat_block
## 0.CA 1.CA 0.MI 1.MI
## 0.5 0.5 0.5 0.5
## 2. Function: randomization inference p-value with blocking ----------
# This runs the SAME test as in your earlier question:
# - Test statistic: difference in mean weight loss (treated - control)
# - Sharp null: pill has NO effect for ANYONE
# - Randomization: shuffle treatment only WITHIN each block
ri_block_pvalue <- function(df, B_perm = 500) {
# Observed treatment effect in this dataset
obs <- with(df, mean(loss[treat == 1]) - mean(loss[treat == 0]))
# Simulated effects under the sharp null
sim <- numeric(B_perm)
for (b in 1:B_perm) {
perm_treat <- df$treat
# Shuffle treatment labels WITHIN each block to mimic block randomization
for (bl in unique(df$block)) {
idx <- which(df$block == bl)
perm_treat[idx] <- sample(df$treat[idx])
}
sim[b] <- mean(df$loss[perm_treat == 1]) - mean(df$loss[perm_treat == 0])
}
# Two-sided p-value: how extreme is obs relative to null distribution
mean(abs(sim) >= abs(obs))
}
## 3. Function: simulate ONE block-randomized experiment ---------------
# This creates a "fake" dataset for a given total sample size N_total
# under a TRUE treatment effect of size 'delta' (the MDE).
#
# Steps:
# 1. For each block, decide how many people it gets (using block_shares)
# 2. Sample rows from that block (with replacement) to get realistic outcomes
# 3. Assign treatment inside each block using original treat proportion
# 4. Add 'delta' pounds of extra weight loss to treated units
#
# The result looks like a new experiment with the same structure,
# but where the pill really works with effect size = delta.
simulate_experiment <- function(N_total, delta) {
df_list <- list()
for (bl in blocks) {
# Number of observations in this block for total N_total
n_bl <- round(N_total * block_shares[bl])
n_bl <- max(n_bl, 4) # ensure at least a few per block
# Sample existing individuals from this block (with replacement)
rows_bl <- diet[diet$block == bl, ]
idx <- sample(seq_len(nrow(rows_bl)), size = n_bl, replace = TRUE)
temp <- rows_bl[idx, ]
# Assign treatment using original treat share in this block
p_treat <- prop_treat_block[bl]
temp$treat <- rbinom(n_bl, size = 1, prob = p_treat)
# Impose true treatment effect: treated lose an extra 'delta' pounds
temp$loss <- temp$loss + delta * temp$treat
df_list[[bl]] <- temp
}
do.call(rbind, df_list)
}
## 4. Function: estimate power at a given N and MDE ---------------------
# For a candidate sample size N_total:
# - Repeat many simulated experiments (B_outer times)
# - In each experiment, run the block RI test and get a p-value
# - Power = proportion of experiments where p-value < 0.05
estimate_power <- function(N_total,
delta = 5, # MDE: extra pounds lost due to pill
B_outer = 100, # number of simulated experiments
B_inner = 200 # permutations per RI test
) {
rejections <- 0
for (s in 1:B_outer) {
# 1) Simulate one experiment with TRUE effect = delta
df_sim <- simulate_experiment(N_total, delta = delta)
# 2) Run block randomization inference test on this simulated data
p_val <- ri_block_pvalue(df_sim, B_perm = B_inner)
# 3) Count rejection at alpha = 0.05
if (p_val < 0.05) {
rejections <- rejections + 1
}
}
# Estimated power = Pr(reject H0 | true effect = delta)
rejections / B_outer
}
## 5. Run power analysis over several N values --------------------------
set.seed(123) # for reproducibility
# Choose MDE (minimum detectable effect) in pounds
delta_chosen <- 5 # you can change this to e.g. 3, 4, etc.
# Candidate total sample sizes to try
candidate_N <- seq(60, 160, by = 20) # 60, 80, 100, 120, 140, 160
# Compute estimated power for each N
power_vals <- sapply(candidate_N,
estimate_power,
delta = delta_chosen,
B_outer = 100, # total simulated experiments per N
B_inner = 200) # permutations per randomization test
# Summarize results
power_table <- data.frame(N = candidate_N, power = power_vals)
print(power_table)
## N power
## 1 60 0.35
## 2 80 0.52
## 3 100 0.50
## 4 120 0.63
## 5 140 0.72
## 6 160 0.79
# Plot power curve
plot(power_table$N, power_table$power, type = "b",
xlab = "Total sample size (N)",
ylab = "Estimated power",
main = paste("Power curve for MDE =", delta_chosen, "lbs"))
abline(h = 0.8, lty = 2) # reference line at 80% power
