Two-Sample Two-Sided Permutation Testing

Data

This handout provides some simple two-sided permutation tests. First we load some data.

data(sleep)
extra <- sleep[, 1]
group <- sleep[, 2]
x <- extra[group == 1]
y <- extra[group == 2]
sleep
##    extra group ID
## 1    0.7     1  1
## 2   -1.6     1  2
## 3   -0.2     1  3
## 4   -1.2     1  4
## 5   -0.1     1  5
## 6    3.4     1  6
## 7    3.7     1  7
## 8    0.8     1  8
## 9    0.0     1  9
## 10   2.0     1 10
## 11   1.9     2  1
## 12   0.8     2  2
## 13   1.1     2  3
## 14   0.1     2  4
## 15  -0.1     2  5
## 16   4.4     2  6
## 17   5.5     2  7
## 18   1.6     2  8
## 19   4.6     2  9
## 20   3.4     2 10

The t-test

The first two functions implement the parametric and non-parametric version of the t-test.

t.par <- function(x, y, p = 0.95) {
    n1 <- length(x)
    n2 <- length(y)
    df <- n1 + n2 - 2
    ta <- abs(t.stat(x, y))
    pp <- 2 * pt(ta, df) - 1
    return(list(sig = pp > p, pp = pp, ta = ta))
}
t.np <- function(x, y, p = 0.95, nperm = 100) {
    n1 <- length(x)
    n2 <- length(y)
    t0 <- abs(t.stat(x, y))
    nn <- 1:(n1 + n2)
    z <- c(x, y)
    tt <- NULL
    for (i in 1:nperm) {
        ind <- sample(nn, n1)
        xp <- z[ind]
        yp <- z[-ind]
        tt <- c(tt, abs(t.stat(xp, yp)))
    }
    pp <- edf(t0, tt)
    return(list(sig = pp > p, pp = pp))
}

For both function we need to compute the t-statistic for two-sided testing.

t.stat <- function(x, y) {
    n1 <- length(x)
    n2 <- length(y)
    mx <- mean(x)
    my <- mean(y)
    nn <- n1 + n2 - 2
    nm <- (1/n1) + (1/n2)
    sp <- sqrt(((n1 - 1) * var(x) + (n2 - 1) * var(y)) * nm/nn)
    tt <- (mx - my)/sp
    return(tt)
}

Here are the results for the parametric t-test.

t.par(x, y)
## $sig
## [1] FALSE
## 
## $pp
## [1] 0.9208
## 
## $ta
## [1] 1.861

For our non-parametric computations we need a simple auxilary to compute the empirical distribution function.

edf <- function(z, x) {
    sapply(z, function(y) length(which(x < y))/length(x))
}

Now we can compute (estimate) the significance level for the permutation t-test.

t.np(x, y, nperm = 100)
## $sig
## [1] FALSE
## 
## $pp
## [1] 0.9

Wilcoxon test

This test still uses the t-statistic, but first replaces all \( n_1+n_2 \) observations by rankings.

w.np <- function(x, y, p = 0.95, nperm = 100) {
    n1 <- length(x)
    n2 <- length(y)
    z <- rank(c(x, y))
    x <- z[1:n1]
    y <- z[(n1 + 1):n2]
    t0 <- abs(t.stat(x, y))
    nn <- 1:(n1 + n2)
    tt <- NULL
    for (i in 1:nperm) {
        ind <- sample(nn, n1)
        xp <- z[ind]
        yp <- z[-ind]
        tt <- c(tt, abs(t.stat(xp, yp)))
    }
    pp <- edf(t0, tt)
    return(list(sig = pp > p, pp = pp))
}
w.np(x, y, nperm = 100)
## $sig
## [1] FALSE
## 
## $pp
## [1] 0.79

Kolmogorov-Smirnov test

k.np <- function(x, y, p = 0.95, nperm = 100) {
    n1 <- length(x)
    n2 <- length(y)
    t0 <- k.stat(x, y)
    nn <- 1:(n1 + n2)
    z <- c(x, y)
    tt <- NULL
    for (i in 1:nperm) {
        ind <- sample(nn, n1)
        xp <- z[ind]
        yp <- z[-ind]
        tt <- c(tt, k.stat(xp, yp))
    }
    pp <- edf(t0, tt)
    return(list(sig = pp > p, pp = pp))
}
k.stat <- function(x, y) {
    z <- c(x, y)
    return(max(abs(edf(z, x) - edf(z, y))))
}
k.np(x, y, nperm = 100)
## $sig
## [1] FALSE
## 
## $pp
## [1] 0.67

Power against shifted normal alternatives

Power estimation of the tests do not use the data. They sample from two normals, where one is shifted by

power <- function(d, n1, n2, test, nrep = 100, ...) {
    rp <- NULL
    for (i in 1:nrep) {
        xz <- rnorm(n1)
        yz <- rnorm(n2, d)
        args <- list(xz, yz, ...)
        rp <- c(rp, do.call(test, args)$sig)
    }
    return(length(which(rp))/nrep)
}
power(1, 10, 10, t.par)
## [1] 0.58
power(1, 10, 10, t.np, nperm = 1000)
## [1] 0.55
power(1, 10, 10, w.np, nperm = 1000)
## [1] 0.09
power(1, 10, 10, k.np, nperm = 1000)
## [1] 0.43