Sample size and power of a study


Created by Jari Haukka, email: jari.haukka@helsinki.fi
Updated Thu Apr 03 10:30:13 2014

Creative Commons License
This work is licensed under a Creative Commons Attribution-NonCommercial 3.0 Unported License.


library(Hmisc)
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: splines
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units

###Odds ratio and sample size (n)


smokers <- c(83, 90, 129, 70)
patients <- c(86, 93, 136, 82)
prop.test(smokers, patients)
## 
##  4-sample test for equality of proportions without continuity
##  correction
## 
## data:  smokers out of patients
## X-squared = 12.6, df = 3, p-value = 0.005585
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3 prop 4 
## 0.9651 0.9677 0.9485 0.8537

# Plot power vs. n for various odds ratios (base prob.=.1)
n <- seq(10, 1000, by = 10)
OR <- seq(0.2, 0.9, by = 0.1)

plot(0, 0, xlim = range(n), ylim = c(0, 1), xlab = "n", ylab = "Power", type = "n", 
    main = "Base probability 0.1")
for (or in OR) {
    lines(n, bpower(0.1, odds.ratio = or, n = n))
    text(350, bpower(0.1, odds.ratio = or, n = 350) - 0.02, format(or))
}

plot of chunk unnamed-chunk-2


plot(0, 0, xlim = range(n), ylim = c(0.01, 1), xlab = "n", ylab = "Power", type = "n", 
    main = "Base probability 0.05", log = "y")
## Warning: 1 y value <= 0 omitted from logarithmic plot
for (or in OR) {
    lines(n, bpower(0.05, odds.ratio = or, n = n))
    text(350, bpower(0.05, odds.ratio = or, n = 350) - 0.02, format(or))
}

plot of chunk unnamed-chunk-2


How to allocate optimal groups when comparing two groups?

Below is presented for two sample sizes (100 ja 200) how groups should be allocated optimally to obtain maximum power.

It is assumed that prob. for “success” is p1=0.1 and allocation is calculated for varius P2 values.

Brittain E, Schlesselman JJ (1982): Optimal allocation for the comparison of proportions. Biometrics 38:1003–9.

tmp.p1 <- 0.1
tmp.p2 <- seq(0.2, 0.9, by = 0.05)
tmp.max.power.100 <- sapply(tmp.p2, function(x) ballocation(p1 = tmp.p1, p2 = x, 
    n = 100)[4])
tmp.max.power.200 <- sapply(tmp.p2, function(x) ballocation(p1 = tmp.p1, p2 = x, 
    n = 200)[4])


tmp.power.100 <- rep(NA, length(tmp.max.power.100))
for (i in seq(tmp.power.100)) tmp.power.100[i] <- bpower(p1 = tmp.p1, p2 = tmp.p2[i], 
    n1 = 100 * tmp.max.power.100[i], n2 = 100 * (1 - tmp.max.power.100[i]), 
    alpha = 0.05)
tmp.power.200 <- rep(NA, length(tmp.max.power.200))
for (i in seq(tmp.power.200)) tmp.power.200[i] <- bpower(p1 = tmp.p1, p2 = tmp.p2[i], 
    n1 = 200 * tmp.max.power.200[i], n2 = 200 * (1 - tmp.max.power.200[i]), 
    alpha = 0.05)

tmp.max.power
## Error: object 'tmp.max.power' not found
plot(tmp.p2, tmp.max.power.200, type = "l", xlab = "p2", ylab = "Fraction for group 1 (p1=0.1)", 
    ylim = c(0.4, 0.65))
lines(tmp.p2, tmp.max.power.100, col = "green", lty = 2)
text(0.2, tmp.max.power.200[1], "n=200", adj = 0)
text(0.2, tmp.max.power.100[1], "n=100", adj = 0)
abline(h = 0.5, lty = 5)
title(main = "Allocation of groups to get maximum power")

plot of chunk unnamed-chunk-3


ballocation(p1 = tmp.p1, p2 = tmp.p2[1], n = 100)
##    fraction.group1.min.var.diff   fraction.group1.min.var.ratio 
##                          0.4286                          0.6000 
## fraction.group1.min.var.logodds       fraction.group1.max.power 
##                          0.5714                          0.6274
bpower(p1 = tmp.p1, p2 = tmp.p2[1], n1 = 60, n2 = 100 - 60, alpha = 0.05)
## Power 
## 0.301

cbind(tmp.p1, tmp.p2, tmp.max.power.100, tmp.max.power.100)
##                           tmp.p1 tmp.p2 tmp.max.power.100
## fraction.group1.max.power    0.1   0.20            0.6274
## fraction.group1.max.power    0.1   0.25            0.5874
## fraction.group1.max.power    0.1   0.30            0.5554
## fraction.group1.max.power    0.1   0.35            0.5305
## fraction.group1.max.power    0.1   0.40            0.5085
## fraction.group1.max.power    0.1   0.45            0.4895
## fraction.group1.max.power    0.1   0.50            0.4745
## fraction.group1.max.power    0.1   0.55            0.4625
## fraction.group1.max.power    0.1   0.60            0.4525
## fraction.group1.max.power    0.1   0.65            0.4466
## fraction.group1.max.power    0.1   0.70            0.4436
## fraction.group1.max.power    0.1   0.75            0.4446
## fraction.group1.max.power    0.1   0.80            0.4525
## fraction.group1.max.power    0.1   0.85            0.4995
## fraction.group1.max.power    0.1   0.90            0.4995
##                           tmp.max.power.100
## fraction.group1.max.power            0.6274
## fraction.group1.max.power            0.5874
## fraction.group1.max.power            0.5554
## fraction.group1.max.power            0.5305
## fraction.group1.max.power            0.5085
## fraction.group1.max.power            0.4895
## fraction.group1.max.power            0.4745
## fraction.group1.max.power            0.4625
## fraction.group1.max.power            0.4525
## fraction.group1.max.power            0.4466
## fraction.group1.max.power            0.4436
## fraction.group1.max.power            0.4446
## fraction.group1.max.power            0.4525
## fraction.group1.max.power            0.4995
## fraction.group1.max.power            0.4995

# In this example, 4 plots are drawn on one page, one plot for each
# combination of noncompliance percentage.  Within a plot, the 5-year
# mortality % in the control group is on the x-axis, and separate curves are
# drawn for several % reductions in mortality with the intervention.  The
# accrual period is 1.5y, with all patients followed at least 5y and some
# 6.5y.


# par(mfrow=c(2,2),oma=c(3,0,3,0)) par(mfrow=c(2,2),oma=c(1,0,1,0))
par(mfrow = c(1, 1))


morts <- seq(10, 25, length = 50)
red <- c(10, 15, 20, 25)

for (noncomp in c(0, 10, 15, -1)) {
    if (noncomp >= 0) 
        nc.i <- nc.c <- noncomp else {
        nc.i <- 25
        nc.c <- 15
    }
    z <- paste("Drop-in ", nc.c, "%, Non-adherence ", nc.i, "%", sep = "")
    plot(0, 0, xlim = range(morts), ylim = c(0, 1), xlab = "5-year Mortality in Control Patients (%)", 
        ylab = "Power", type = "n")
    title(z)
    cat(z, "\n")
    lty <- 0
    for (r in red) {
        lty <- lty + 1
        power <- morts
        i <- 0
        for (m in morts) {
            i <- i + 1
            power[i] <- cpower(5, 14000, m/100, r, 1.5, 5, nc.c, nc.i, pr = FALSE)
        }
        lines(morts, power, lty = lty)
    }
    if (noncomp == 0) 
        legend(18, 0.55, rev(paste(red, "% reduction", sep = "")), lty = 4:1, 
            bty = "n")
}

plot of chunk unnamed-chunk-4

## Drop-in 0%, Non-adherence 0%

plot of chunk unnamed-chunk-4

## Drop-in 10%, Non-adherence 10%

plot of chunk unnamed-chunk-4

## Drop-in 15%, Non-adherence 15%
## Drop-in 15%, Non-adherence 25%

mtitle("", ll = "alpha=.05, 2-tailed, Total N=14000", cex.l = 0.8)

plot of chunk unnamed-chunk-4


tmp.N <- seq(10, 200, by = 10)

tmp.1 <- sapply(tmp.N, function(x) power.t.test(n = x, delta = 1, sd = 1)$power)
tmp.2 <- sapply(tmp.N, function(x) power.t.test(n = x, delta = 1, sd = 2)$power)
tmp.4 <- sapply(tmp.N, function(x) power.t.test(n = x, delta = 1, sd = 4)$power)
tmp.1.2 <- sapply(tmp.N, function(x) power.t.test(n = x, delta = 1, sd = 1/2)$power)



plot(range(tmp.N), range(c(tmp.1.2, tmp.4)), type = "n", xlab = "N", ylab = "Power", 
    log = "")
abline(h = 0.8, lty = 5, col = "green", v = tmp.N)
lines(tmp.N, tmp.1)
lines(tmp.N, tmp.2)
lines(tmp.N, tmp.4)
lines(tmp.N, tmp.1.2)

text(rep(10, 5), c(tmp.1[1], tmp.2[1], tmp.4[1], tmp.1.2[1]), c("1", "2", "4", 
    ".5"), adj = 0)

plot of chunk unnamed-chunk-5

library(xtable)
## 
## Attaching package: 'xtable'
## 
## The following objects are masked from 'package:Hmisc':
## 
##     label, label<-
tmp.ulos <- cbind(tmp.1, tmp.2, tmp.4, tmp.1.2)
dimnames(tmp.ulos) <- list(tmp.N, c("1", "2", "4", "0.5"))
print(xtable(tmp.ulos, digits = c(0, 4, 4, 4, 4), caption = "Power. N and SD as proportion of delta (100%, 200%, 400%, 50%))"), 
    type = "html")
Power. N and SD as proportion of delta (100%, 200%, 400%, 50%))
1 2 4 0.5
10 0.5620 0.1838 0.0763 0.9882
20 0.8690 0.3377 0.1172 1.0000
30 0.9677 0.4778 0.1568 1.0000
40 0.9930 0.5981 0.1961 1.0000
50 0.9986 0.6969 0.2351 1.0000
60 0.9997 0.7753 0.2737 1.0000
70 1.0000 0.8358 0.3116 1.0000
80 1.0000 0.8816 0.3488 1.0000
90 1.0000 0.9156 0.3852 1.0000
100 1.0000 0.9404 0.4204 1.0000
110 1.0000 0.9583 0.4546 1.0000
120 1.0000 0.9711 0.4875 1.0000
130 1.0000 0.9801 0.5192 1.0000
140 1.0000 0.9864 0.5495 1.0000
150 1.0000 0.9908 0.5785 1.0000
160 1.0000 0.9938 0.6062 1.0000
170 1.0000 0.9958 0.6325 1.0000
180 1.0000 0.9972 0.6574 1.0000
190 1.0000 0.9981 0.6810 1.0000
200 1.0000 0.9988 0.7033 1.0000


cbind(tmp.N, tmp.1, tmp.2, tmp.4)
##       tmp.N  tmp.1  tmp.2   tmp.4
##  [1,]    10 0.5620 0.1838 0.07634
##  [2,]    20 0.8690 0.3377 0.11718
##  [3,]    30 0.9677 0.4778 0.15680
##  [4,]    40 0.9930 0.5981 0.19609
##  [5,]    50 0.9986 0.6969 0.23509
##  [6,]    60 0.9997 0.7753 0.27366
##  [7,]    70 1.0000 0.8358 0.31163
##  [8,]    80 1.0000 0.8816 0.34885
##  [9,]    90 1.0000 0.9156 0.38516
## [10,]   100 1.0000 0.9404 0.42044
## [11,]   110 1.0000 0.9583 0.45459
## [12,]   120 1.0000 0.9711 0.48752
## [13,]   130 1.0000 0.9801 0.51918
## [14,]   140 1.0000 0.9864 0.54952
## [15,]   150 1.0000 0.9908 0.57852
## [16,]   160 1.0000 0.9938 0.60617
## [17,]   170 1.0000 0.9958 0.63246
## [18,]   180 1.0000 0.9972 0.65740
## [19,]   190 1.0000 0.9981 0.68102
## [20,]   200 1.0000 0.9988 0.70333
power.t.test(power = 0.9, delta = 1)
## 
##      Two-sample t test power calculation 
## 
##               n = 22.02
##           delta = 1
##              sd = 1
##       sig.level = 0.05
##           power = 0.9
##     alternative = two.sided
## 
## NOTE: n is number in *each* group
power.t.test(power = 0.9, delta = 1, alternative = "one.sided")
## 
##      Two-sample t test power calculation 
## 
##               n = 17.85
##           delta = 1
##              sd = 1
##       sig.level = 0.05
##           power = 0.9
##     alternative = one.sided
## 
## NOTE: n is number in *each* group