library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
set.seed(123)
N <- 10000
mean_g0 <- 99
mean_g1 <- 87
group <- rbinom(N, 1, 0.25)
trait <- ifelse(group == 0,
rnorm(N, mean_g0, 15),
rnorm(N, mean_g1, 20))
df <- data.frame(group = group, trait = trait)
reg_full <- lm(trait ~ group, data = df)
beta_full <- coef(reg_full)["group"]
subgroup filtered by trait
df_selected <- df %>% filter(trait > 110)
reg_selected <- lm(trait ~ group, data = df_selected)
beta_selected <- coef(reg_selected)["group"]
bootstrap
boot_betas <- replicate(1000, {
sample_df <- df_selected %>% slice_sample(n = nrow(df_selected), replace = TRUE)
coef(lm(trait ~ group, data = sample_df))["group"]
})
boot_df <- data.frame(beta = boot_betas)
ggplot(boot_df, aes(x = beta)) +
geom_histogram(bins = 50, fill = "salmon", color = "black", alpha = 0.8) +
geom_vline(xintercept = mean(boot_betas), color = "red", linetype = "dashed",
size = 1.1) +
geom_vline(xintercept = 0, color = "black", linetype = "dotted") +
labs(
title = "Bootstrap Distribution of Beta (Trait ~ Group)\nSubsample: Trait > 110",
x = "Beta Coefficient",
y = "Frequency"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

list(
beta_full = beta_full,
beta_selected = beta_selected,
CI_95 = quantile(boot_betas, probs = c(0.025, 0.975))
)
## $beta_full
## group
## -12.19174
##
## $beta_selected
## group
## 1.470894
##
## $CI_95
## 2.5% 97.5%
## 0.4637232 2.4700854