### power analysis for multiple logistic regression with continuous predictors
### according to "A simple method of sample size calculation for linear and logistic regression."
### see http://dx.doi.org/10.1002/(SICI)1097-0258(19980730)17:14<1623::AID-SIM871>3.0.CO;2-S
library(powerMediation)
## sample size for simple logistic regression
p1 <- 0.5 # the event rate at the mean of the predictor X
OR <- exp(0.405) # expected odds ratio. log(OR) is the change in log odds
# for an increase of one unit in X.
# beta*=log(OR) is the effect size to be tested
n1 <- SSizeLogisticCon(p1, OR, alpha = 0.05, power = 0.8)
## sample size for multiple logistic regression
## n_p = n_1 / (1-rho^2)
## rho^2 = R^2, for X_1 ~ X_2 + ... + X_p
r2 <- 0.3 # R^2
np <- n1 / (1-r2)
## variance inflation for the multiple case
ssize.multi <- function(p1, OR, r2, alpha=0.05, power=0.8) {
n1 <- SSizeLogisticCon(p1, OR, alpha, power)
np <- n1 / (1-r2)
return(np)
}
## plot sample size
library(rgl)
x <- seq(0.2, 0.8, by=0.01)
y <- seq(1.5, 5, by=0.01)
z1 <- outer(x ,y, function(x,y) ssize.multi(x, y, 0.1))
z2 <- outer(x ,y, function(x,y) ssize.multi(x, y, 0.3))
z3 <- outer(x ,y, function(x,y) ssize.multi(x, y, 0.5))
z4 <- outer(x ,y, function(x,y) ssize.multi(x, y, 0.5, power=0.5))
persp3d(x, y, z1, col="yellow")
persp3d(x, y, z2, col="green", add=TRUE)
persp3d(x, y, z3, col="blue", add=TRUE)
persp3d(x, y, z4, col="red", add=TRUE)
## formula (4) from above reference
## p1: as above
## p2: event rate at one SD above the mean of X
ssize.whittemore <- function (p1, p2, alpha = 0.05, power = 0.8) {
beta.star <- log(p2*(1-p1)/(p1*(1-p2)))
za <- qnorm(1 - alpha/2)
zb <- qnorm(power)
V0 <- 1
Vb <- exp(-beta.star^2 / 2)
delta <- (1+(1+beta.star^2)*exp(5*beta.star^2 / 4)) * (1+exp(-beta.star^2 / 4))^(-1)
n <- (V0^(1/2)*za + Vb^(1/2)*zb)^2 * (1+2*p1*delta) / (p1*beta.star^2)
n.int <- ceiling(n)
return(n.int)
}
## multiple case
ssize.whittemore.multi <- function(p1, p2, r2, alpha=0.05, power=0.8) {
n1 <- ssize.whittemore(p1, p2, alpha, power)
np <- n1 / (1-r2)
return(np)
}
## examples as in Table II, p. 1628
## balanced design
SSizeLogisticCon(p1=0.5, OR=exp(0.405), alpha = 0.05, power = 0.95) # formula (1)
## [1] 317
ssize.whittemore(p1=0.5, p2=0.6, alpha=0.05, power=0.95) # nQuery
## [1] 342
## unbalanced design, high event rates
SSizeLogisticCon(p1=0.4, OR=exp(0.405), alpha = 0.05, power = 0.95) # formula (1)
## [1] 331
ssize.whittemore(p1=0.4, p2=0.5, alpha=0.05, power=0.95) # nQuery
## [1] 380
## unbalanced design, low event rates
SSizeLogisticCon(p1=0.1, OR=exp(0.405), alpha = 0.05, power = 0.95) # formula (1)
## [1] 881
ssize.whittemore(p1=0.1, p2=0.143, alpha=0.05, power=0.95) # nQuery
## [1] 946
ssize.whittemore(p1=0.1, p2=0.14285, alpha=0.05, power=0.95) # slightly adjusted
## [1] 951
## example, p. 1628
## two-sample t-test
ssize.ttest <- function(p1, OR, df=100, alpha = 0.05, power = 0.8) {
beta.star <- log(OR)
ta <- qt(1 - alpha/2, df)
tb <- qt(power, df)
n <- (ta + tb)^2/(p1 * (1 - p1) * beta.star^2)
n.int <- ceiling(n)
return(n.int)
}
ssize.ttest(p1=0.2, OR=exp(0.3), df=1000, alpha=0.05, power=0.95)
## [1] 905
SSizeLogisticCon(p1=0.2, OR=exp(0.3), alpha=0.05, power=0.95)
## [1] 903
p1 <- 0.2
OR <- exp(0.3)
p2 <- OR*p1 / (1-p1+OR*p1)
ssize.multi(p1, OR, r2=0.1, alpha=0.05, power=0.95)
## [1] 1003.333
ssize.whittemore.multi(p1, p2, r2=0.1, alpha=0.05, power=0.95)
## [1] 1138.889