We will show the basic idea of permutation test.
## Warning: package 'Hmisc' was built under R version 3.0.3
## Loading required package: grid
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.0.3
## Loading required package: survival
## Loading required package: splines
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
Set the parameters for two normally distributed populations and sample sizes:
n1 <- 10
mu1 <- 50
sd1 <- 10
n2 <- 10
mu2 <- 53
sd2 <- 10
Select two samples
X <- rnorm(n1, mu1, sd1)
Y <- rnorm(n2, mu2, sd2)
XY <- c(X, Y)
group <- c(rep(1, n1), rep(2, n2))
Plot the data
plot(range(XY), c(0, 3), xlim = c(range(XY)), type = "n", xlab = "", ylab = "")
segments(XY, group - 0.5, XY, group + 0.5, col = group, lwd = 2)
Results of Student t-test
t.test(X, Y)
##
## Welch Two Sample t-test
##
## data: X and Y
## t = -2.224, df = 17.58, p-value = 0.03951
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -20.6908 -0.5709
## sample estimates:
## mean of x mean of y
## 47.29 57.92
Another way to perform the t-test.
t <- t.test(XY ~ group)
t
##
## Welch Two Sample t-test
##
## data: XY by group
## t = -2.224, df = 17.58, p-value = 0.03951
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -20.6908 -0.5709
## sample estimates:
## mean in group 1 mean in group 2
## 47.29 57.92
d0 <- as.vector(diff(t$estimate)) # get rid of names
d0
## [1] 10.63
The difference of sample means is 10.63
This is a basic resampling function.
resample <- function(XY, group, FUN = mean, plt = TRUE, permut = TRUE) {
g <- group
if (permut)
g <- sample(g)
stat <- sapply(split(XY, g), FUN)
d <- diff(stat)
if (plt) {
plot(range(XY), c(0, 3), xlim = c(range(XY)), type = "n", xlab = "",
ylab = "", axes = FALSE)
axis(1)
segments(XY, group - 0.4, XY, group + 0.4, col = "grey", lwd = 2)
segments(XY, g - 0.4, XY, g + 0.4, col = group, lwd = 2)
points(stat, rep(0, length(stat)), pch = 16, col = unique(g), cex = 2)
}
return(d)
}
Repeat permutations and calculate test statistics - in this case difference of medians. In addition, some plots are shown.
N <- 200
d <- NULL
FUN <- median
d0 <- resample(XY, group, FUN = FUN, permut = FALSE, plt = FALSE)
par(mfrow = c(4, 1), mar = c(1, 1, 0, 1))
frame()
for (i in 1:N) {
d <- c(d, resample(XY, group, FUN = FUN, plt = (i < 5)))
}
Plot the empirical 'sampling' distribution. The observed test statistic based on original samples is shown by red line.
par(mfrow = c(1, 1))
hist(d, col = "lightblue")
abline(v = d0, col = "red", lwd = 5)
rug(d)
p <- sum(d > d0)/length(d)
cat("p=", p, "\n")
## p= 0.04
deparse(FUN)
## [1] "function (x, na.rm = FALSE) " "UseMethod(\"median\")"
t.test(XY ~ group)$p.value
## [1] 0.03951
Windows 7 x64 (build 7601) Service Pack 1 R version 3.0.2 (2013-09-25) Platform: x86_64-w64-mingw32/x64 (64-bit)
locale:
[1] LC_COLLATE=Slovenian_Slovenia.1250
[2] LC_CTYPE=Slovenian_Slovenia.1250
[3] LC_MONETARY=Slovenian_Slovenia.1250
[4] LC_NUMERIC=C
[5] LC_TIME=Slovenian_Slovenia.1250
attached base packages:
[1] splines grid stats graphics datasets utils
[7] grDevices methods base
other attached packages: [1] Hmisc_3.14-3 Formula_1.1-1 survival_2.37-7 lattice_0.20-27 [5] knitr_1.5
loaded via a namespace (and not attached):
[1] cluster_1.14.4 evaluate_0.5.3 formatR_0.10
[4] latticeExtra_0.6-26 RColorBrewer_1.0-5 stringr_0.6.2
[7] tools_3.0.2
© A. Blejec