set.seed(1)n <-500# sample size we want to fit p <-0.5# baseline prob of people belong to category 1effect_size <-c(0.2, 0.2, 0.2) #this is log odds ratio difference # 2*2*2 within group settingcomb <-2*2*2# Generate factorsfactor1 <-factor(rep(rep(c(0, 1), each = n), times =4))factor2 <-factor(rep(rep(c(0, 1), each = n), each =2, times =2))factor3 <-factor(rep(c(0, 1), each = n, times =4))# Simulate the log odds of successlog_odds <- effect_size[1] *as.numeric(factor1) + effect_size[2] *as.numeric(factor2) + effect_size[3] *as.numeric(factor3)# Convert log odds to probabilityprob <-exp(log_odds) / (1+exp(log_odds))# Generate binary outcome based on the simulated probabilityoutcome <-rbinom(comb * n, 1, prob)# Fit logistic regression modelfit <-glm(outcome ~ factor1 + factor2 + factor3, family =binomial())# Output summary of the modelsummary(fit)
Call:
glm(formula = outcome ~ factor1 + factor2 + factor3, family = binomial())
Deviance Residuals:
Min 1Q Median 3Q Max
-1.6557 -1.4650 0.7987 0.8784 0.9148
Coefficients: (1 not defined because of singularities)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.65473 0.05851 11.189 < 2e-16 ***
factor11 0.32435 0.06955 4.664 3.1e-06 ***
factor21 0.09870 0.06941 1.422 0.155
factor31 NA NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 4868.1 on 3999 degrees of freedom
Residual deviance: 4844.3 on 3997 degrees of freedom
AIC: 4850.3
Number of Fisher Scoring iterations: 4
Now wrapping them up to a function, for interaction effect, you don’t have to change the effect_size default in the function here, you can define it yourself later when calling the function:
# Function to calculate powerpower_simulation <-function(n, alpha =0.05,effect_size =c(0.2, 0.2, 0.2),p =0.5) { comb =2*2*2# Generate factors factor1 <-factor(rep(rep(c(0, 1), each = n), times =4)) factor2 <-factor(rep(rep(c(0, 1), each = n), each =2, times =2)) factor3 <-factor(rep(c(0, 1), each = n, times =4))# Simulate the log odds of success log_odds <- effect_size[1] *as.numeric(factor1) + effect_size[2] *as.numeric(factor2) + effect_size[3] *as.numeric(factor3) # Convert log odds to probability prob <-exp(log_odds) / (1+exp(log_odds))# Generate binary outcome based on the simulated probability outcome <-rbinom(comb * n, 1, prob) fit <-glm(outcome ~ factor1 + factor2 + factor3, family =binomial())# Check if the p-value for the treatment effect is less than alpha p_value <-summary(fit)$coefficients[2, 4]return(p_value < alpha)}
Given power level, find sample size
estimate_sample_size <-function(desired_power, alpha =0.05,effect_size =c(0.2, 0.2, 0.2),p =0.5,step =10,max_n =1000) { n <- step estimated_power <-0while (estimated_power < desired_power && n <= max_n) {# Run power simulation n_simulations <-200# feel free to change results <-replicate(n_simulations, power_simulation(n, alpha, effect_size, p)) estimated_power <-mean(results)# Print progress (optional) feel free to turn it on if you need to trouble shooting below for printin#cat("Sample size:", n, "Estimated power:", estimated_power, "\n") n <- n + step }if (estimated_power >= desired_power) {return(n - step) } else {return(NA) }}
set.seed(124) # replicable thignsslice <-10# how many time you want to slice up the range of effect sizesample_size <-rep(0, slice) # store estimated sample size neededinc <-seq(0.1, 0.5, length.out = slice)for (i in1:slice) { effect_size <-c(0, 0, 0) + inc[i] # add more dimension if needed Dana! sample_size[i] <-estimate_sample_size(desired_power =0.8,# how much power you need, this is in decimal so less than 1alpha =0.05,# rejection region effect_size,# add more dimentions if you need to change the effect_size, to do this you also need to alter the function "power_simulation"p =0.5,# baseline success ratestep =10,# step size or precise level to do thismax_n =1000# max sample size for shuting down the loop )}
library(ggplot2)df =data.frame(increment = inc, sample_size = sample_size)ggplot(data = df, aes(x = inc, y = sample_size))+coord_cartesian(ylim=c(0,500))+geom_line()+labs(y ='Sample Size Needed', x ='Effect Size at Log Odd Ratio',title ='Sample Size needed for given Log Odd Ratio (Effect Size)')+theme_bw()