This script demonstrates how to estimate the required sample size for a logistic regression model where the predictor is an ordinal factor. The steps include:
x.ord <- sample(
x = c(1, 2, 3, 4), # four ordinal levels
size = 1e5, # large sample for stable estimates
prob = c(0.25, 0.25, 0.25, 0.25), # equal probability for each level
replace = TRUE
)
x.ord <- factor(x.ord, ordered = TRUE) # convert to ordered factor
contrasts(x.ord) <- contr.treatment(4, base = 4) # use level 4 as reference
x.dummy <- model.matrix( ~ x.ord)[,-1] # remove intercept column
x.data <- as.data.frame(x.dummy) # convert to data frame
head(x.data) # preview dummy-coded data
## x.ord1 x.ord2 x.ord3
## 1 0 1 0
## 2 1 0 0
## 3 0 0 0
## 4 1 0 0
## 5 1 0 0
## 6 0 1 0
x.fit <- lm(x.ord1 ~ x.ord2 + x.ord3, data = x.data)
r.squared.pred <- summary(x.fit)$adj.r.squared # extract adjusted R-squared
bern.prob <- mean(x.data$x.ord1) # or 0.25 in Step 1
pred.dist <- list(dist = "bernoulli", prob = bern.prob)
power.z.logistic(
base.prob = 0.15, # probability when X.ord1 = 0
prob = 0.20, # probability when X.ord1 = 1
alpha = 0.05, # significance level
power = 0.80, # desired power
r.squared.pred = r.squared.pred, # adjusted R-squared
dist = pred.dist # predictor distribution
)
## +--------------------------------------------------+
## | SAMPLE SIZE CALCULATION |
## +--------------------------------------------------+
##
## Logistic Regression Coefficient (Wald's Z-Test)
##
## Method : Demidenko (Variance Corrected)
## Predictor Dist. : Bernoulli
##
## ---------------------------------------------------
## Hypotheses
## ---------------------------------------------------
## H0 (Null Claim) : Odds Ratio = 1
## H1 (Alt. Claim) : Odds Ratio != 1
##
## ---------------------------------------------------
## Results
## ---------------------------------------------------
## Sample Size = 3536 <<
## Type 1 Error (alpha) = 0.050
## Type 2 Error (beta) = 0.200
## Statistical Power = 0.8