Sample size and power of a study

Created by Jari Haukka, email: jari.haukka@helsinki.fi
Updated Thu Apr 03 10:30:13 2014

This work is licensed under a Creative Commons Attribution-NonCommercial 3.0 Unported License.

library(Hmisc)

## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: splines
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units

###Odds ratio and sample size (n)


smokers <- c(83, 90, 129, 70)
patients <- c(86, 93, 136, 82)
prop.test(smokers, patients)

## 
##  4-sample test for equality of proportions without continuity
##  correction
## 
## data:  smokers out of patients
## X-squared = 12.6, df = 3, p-value = 0.005585
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3 prop 4 
## 0.9651 0.9677 0.9485 0.8537


# Plot power vs. n for various odds ratios (base prob.=.1)
n <- seq(10, 1000, by = 10)
OR <- seq(0.2, 0.9, by = 0.1)

plot(0, 0, xlim = range(n), ylim = c(0, 1), xlab = "n", ylab = "Power", type = "n", 
    main = "Base probability 0.1")
for (or in OR) {
    lines(n, bpower(0.1, odds.ratio = or, n = n))
    text(350, bpower(0.1, odds.ratio = or, n = 350) - 0.02, format(or))
}

plot of chunk unnamed-chunk-2


plot(0, 0, xlim = range(n), ylim = c(0.01, 1), xlab = "n", ylab = "Power", type = "n", 
    main = "Base probability 0.05", log = "y")

## Warning: 1 y value <= 0 omitted from logarithmic plot

for (or in OR) {
    lines(n, bpower(0.05, odds.ratio = or, n = n))
    text(350, bpower(0.05, odds.ratio = or, n = 350) - 0.02, format(or))
}

plot of chunk unnamed-chunk-2

How to allocate optimal groups when comparing two groups?

Below is presented for two sample sizes (100 ja 200) how groups should be allocated optimally to obtain maximum power.

It is assumed that prob. for “success” is p1=0.1 and allocation is calculated for varius P2 values.

Brittain E, Schlesselman JJ (1982): Optimal allocation for the comparison of proportions. Biometrics 38:10039.

tmp.p1 <- 0.1
tmp.p2 <- seq(0.2, 0.9, by = 0.05)
tmp.max.power.100 <- sapply(tmp.p2, function(x) ballocation(p1 = tmp.p1, p2 = x, 
    n = 100)[4])
tmp.max.power.200 <- sapply(tmp.p2, function(x) ballocation(p1 = tmp.p1, p2 = x, 
    n = 200)[4])


tmp.power.100 <- rep(NA, length(tmp.max.power.100))
for (i in seq(tmp.power.100)) tmp.power.100[i] <- bpower(p1 = tmp.p1, p2 = tmp.p2[i], 
    n1 = 100 * tmp.max.power.100[i], n2 = 100 * (1 - tmp.max.power.100[i]), 
    alpha = 0.05)
tmp.power.200 <- rep(NA, length(tmp.max.power.200))
for (i in seq(tmp.power.200)) tmp.power.200[i] <- bpower(p1 = tmp.p1, p2 = tmp.p2[i], 
    n1 = 200 * tmp.max.power.200[i], n2 = 200 * (1 - tmp.max.power.200[i]), 
    alpha = 0.05)

tmp.max.power

## Error: object 'tmp.max.power' not found

plot(tmp.p2, tmp.max.power.200, type = "l", xlab = "p2", ylab = "Fraction for group 1 (p1=0.1)", 
    ylim = c(0.4, 0.65))
lines(tmp.p2, tmp.max.power.100, col = "green", lty = 2)
text(0.2, tmp.max.power.200[1], "n=200", adj = 0)
text(0.2, tmp.max.power.100[1], "n=100", adj = 0)
abline(h = 0.5, lty = 5)
title(main = "Allocation of groups to get maximum power")

plot of chunk unnamed-chunk-3


ballocation(p1 = tmp.p1, p2 = tmp.p2[1], n = 100)

##    fraction.group1.min.var.diff   fraction.group1.min.var.ratio 
##                          0.4286                          0.6000 
## fraction.group1.min.var.logodds       fraction.group1.max.power 
##                          0.5714                          0.6274

bpower(p1 = tmp.p1, p2 = tmp.p2[1], n1 = 60, n2 = 100 - 60, alpha = 0.05)

## Power 
## 0.301


cbind(tmp.p1, tmp.p2, tmp.max.power.100, tmp.max.power.100)

##                           tmp.p1 tmp.p2 tmp.max.power.100
## fraction.group1.max.power    0.1   0.20            0.6274
## fraction.group1.max.power    0.1   0.25            0.5874
## fraction.group1.max.power    0.1   0.30            0.5554
## fraction.group1.max.power    0.1   0.35            0.5305
## fraction.group1.max.power    0.1   0.40            0.5085
## fraction.group1.max.power    0.1   0.45            0.4895
## fraction.group1.max.power    0.1   0.50            0.4745
## fraction.group1.max.power    0.1   0.55            0.4625
## fraction.group1.max.power    0.1   0.60            0.4525
## fraction.group1.max.power    0.1   0.65            0.4466
## fraction.group1.max.power    0.1   0.70            0.4436
## fraction.group1.max.power    0.1   0.75            0.4446
## fraction.group1.max.power    0.1   0.80            0.4525
## fraction.group1.max.power    0.1   0.85            0.4995
## fraction.group1.max.power    0.1   0.90            0.4995
##                           tmp.max.power.100
## fraction.group1.max.power            0.6274
## fraction.group1.max.power            0.5874
## fraction.group1.max.power            0.5554
## fraction.group1.max.power            0.5305
## fraction.group1.max.power            0.5085
## fraction.group1.max.power            0.4895
## fraction.group1.max.power            0.4745
## fraction.group1.max.power            0.4625
## fraction.group1.max.power            0.4525
## fraction.group1.max.power            0.4466
## fraction.group1.max.power            0.4436
## fraction.group1.max.power            0.4446
## fraction.group1.max.power            0.4525
## fraction.group1.max.power            0.4995
## fraction.group1.max.power            0.4995

# In this example, 4 plots are drawn on one page, one plot for each
# combination of noncompliance percentage.  Within a plot, the 5-year
# mortality % in the control group is on the x-axis, and separate curves are
# drawn for several % reductions in mortality with the intervention.  The
# accrual period is 1.5y, with all patients followed at least 5y and some
# 6.5y.


# par(mfrow=c(2,2),oma=c(3,0,3,0)) par(mfrow=c(2,2),oma=c(1,0,1,0))
par(mfrow = c(1, 1))


morts <- seq(10, 25, length = 50)
red <- c(10, 15, 20, 25)

for (noncomp in c(0, 10, 15, -1)) {
    if (noncomp >= 0) 
        nc.i <- nc.c <- noncomp else {
        nc.i <- 25
        nc.c <- 15
    }
    z <- paste("Drop-in ", nc.c, "%, Non-adherence ", nc.i, "%", sep = "")
    plot(0, 0, xlim = range(morts), ylim = c(0, 1), xlab = "5-year Mortality in Control Patients (%)", 
        ylab = "Power", type = "n")
    title(z)
    cat(z, "\n")
    lty <- 0
    for (r in red) {
        lty <- lty + 1
        power <- morts
        i <- 0
        for (m in morts) {
            i <- i + 1
            power[i] <- cpower(5, 14000, m/100, r, 1.5, 5, nc.c, nc.i, pr = FALSE)
        }
        lines(morts, power, lty = lty)
    }
    if (noncomp == 0) 
        legend(18, 0.55, rev(paste(red, "% reduction", sep = "")), lty = 4:1, 
            bty = "n")
}

plot of chunk unnamed-chunk-4

## Drop-in 0%, Non-adherence 0%

plot of chunk unnamed-chunk-4

## Drop-in 10%, Non-adherence 10%

plot of chunk unnamed-chunk-4

## Drop-in 15%, Non-adherence 15%

## Drop-in 15%, Non-adherence 25%


mtitle("", ll = "alpha=.05, 2-tailed, Total N=14000", cex.l = 0.8)

plot of chunk unnamed-chunk-4


tmp.N <- seq(10, 200, by = 10)

tmp.1 <- sapply(tmp.N, function(x) power.t.test(n = x, delta = 1, sd = 1)$power)
tmp.2 <- sapply(tmp.N, function(x) power.t.test(n = x, delta = 1, sd = 2)$power)
tmp.4 <- sapply(tmp.N, function(x) power.t.test(n = x, delta = 1, sd = 4)$power)
tmp.1.2 <- sapply(tmp.N, function(x) power.t.test(n = x, delta = 1, sd = 1/2)$power)



plot(range(tmp.N), range(c(tmp.1.2, tmp.4)), type = "n", xlab = "N", ylab = "Power", 
    log = "")
abline(h = 0.8, lty = 5, col = "green", v = tmp.N)
lines(tmp.N, tmp.1)
lines(tmp.N, tmp.2)
lines(tmp.N, tmp.4)
lines(tmp.N, tmp.1.2)

text(rep(10, 5), c(tmp.1[1], tmp.2[1], tmp.4[1], tmp.1.2[1]), c("1", "2", "4", 
    ".5"), adj = 0)

plot of chunk unnamed-chunk-5

library(xtable)

## 
## Attaching package: 'xtable'
## 
## The following objects are masked from 'package:Hmisc':
## 
##     label, label<-

tmp.ulos <- cbind(tmp.1, tmp.2, tmp.4, tmp.1.2)
dimnames(tmp.ulos) <- list(tmp.N, c("1", "2", "4", "0.5"))

print(xtable(tmp.ulos, digits = c(0, 4, 4, 4, 4), caption = "Power. N and SD as proportion of delta (100%, 200%, 400%, 50%))"), 
    type = "html")

Power. N and SD as proportion of delta (100%, 200%, 400%, 50%))
	1	2	4	0.5
10	0.5620	0.1838	0.0763	0.9882
20	0.8690	0.3377	0.1172	1.0000
30	0.9677	0.4778	0.1568	1.0000
40	0.9930	0.5981	0.1961	1.0000
50	0.9986	0.6969	0.2351	1.0000
60	0.9997	0.7753	0.2737	1.0000
70	1.0000	0.8358	0.3116	1.0000
80	1.0000	0.8816	0.3488	1.0000
90	1.0000	0.9156	0.3852	1.0000
100	1.0000	0.9404	0.4204	1.0000
110	1.0000	0.9583	0.4546	1.0000
120	1.0000	0.9711	0.4875	1.0000
130	1.0000	0.9801	0.5192	1.0000
140	1.0000	0.9864	0.5495	1.0000
150	1.0000	0.9908	0.5785	1.0000
160	1.0000	0.9938	0.6062	1.0000
170	1.0000	0.9958	0.6325	1.0000
180	1.0000	0.9972	0.6574	1.0000
190	1.0000	0.9981	0.6810	1.0000
200	1.0000	0.9988	0.7033	1.0000



cbind(tmp.N, tmp.1, tmp.2, tmp.4)

##       tmp.N  tmp.1  tmp.2   tmp.4
##  [1,]    10 0.5620 0.1838 0.07634
##  [2,]    20 0.8690 0.3377 0.11718
##  [3,]    30 0.9677 0.4778 0.15680
##  [4,]    40 0.9930 0.5981 0.19609
##  [5,]    50 0.9986 0.6969 0.23509
##  [6,]    60 0.9997 0.7753 0.27366
##  [7,]    70 1.0000 0.8358 0.31163
##  [8,]    80 1.0000 0.8816 0.34885
##  [9,]    90 1.0000 0.9156 0.38516
## [10,]   100 1.0000 0.9404 0.42044
## [11,]   110 1.0000 0.9583 0.45459
## [12,]   120 1.0000 0.9711 0.48752
## [13,]   130 1.0000 0.9801 0.51918
## [14,]   140 1.0000 0.9864 0.54952
## [15,]   150 1.0000 0.9908 0.57852
## [16,]   160 1.0000 0.9938 0.60617
## [17,]   170 1.0000 0.9958 0.63246
## [18,]   180 1.0000 0.9972 0.65740
## [19,]   190 1.0000 0.9981 0.68102
## [20,]   200 1.0000 0.9988 0.70333

power.t.test(power = 0.9, delta = 1)

## 
##      Two-sample t test power calculation 
## 
##               n = 22.02
##           delta = 1
##              sd = 1
##       sig.level = 0.05
##           power = 0.9
##     alternative = two.sided
## 
## NOTE: n is number in *each* group

power.t.test(power = 0.9, delta = 1, alternative = "one.sided")

## 
##      Two-sample t test power calculation 
## 
##               n = 17.85
##           delta = 1
##              sd = 1
##       sig.level = 0.05
##           power = 0.9
##     alternative = one.sided
## 
## NOTE: n is number in *each* group