Some examples of permutation tests using the difference of means and the Kolmogorov-Smirnov test statistic

Permutation tests based on the difference of means

##########################################################################
# Permutation test based on the difference of means
##########################################################################
rm(list = ls())

# Simulated samples (Same distribution)
set.seed(123)
X <- rnorm(n=100, mean = 0, sd = 1)
Y <- rnorm(n=100, mean = 0, sd = 1)
Z <- c(X,Y)
n <- length(X)
m <- length(Y)
N <- length(Z)

# Number of permutations
K = 10000

# Test statistic
TS <- function(A,B) abs(mean(A) - mean(B))

# Test statistic for the observed sample
TO <- TS(X,Y)

# Vector of test statistics for each permutation
TT <- vector()

# Permutation test
for(i in 1:K){
 set.seed(i)
 Z.pi <- sample(Z, N, replace = FALSE)
 TT[i] <- TS(Z.pi[1:n], Z.pi[(n+1):(n+m)])
}

# Visualising the permuted test statistics
hist(TT)
abline(v = TO, lwd = 2, col = "red")
box()

# approximate p-value
mean(TT>TO)
## [1] 0.1402
##########################################################################
# Permutation test based on the difference of means
##########################################################################
rm(list = ls())

# Simulated samples (Different mean)
set.seed(123)
X <- rnorm(n=100, mean = 0, sd = 1)
Y <- rnorm(n=100, mean = 0.5, sd = 1)
Z <- c(X,Y)
n <- length(X)
m <- length(Y)
N <- length(Z)

# Number of permutations
K = 10000

# Test statistic
TS <- function(A,B) abs(mean(A) - mean(B))

# Test statistic for the observed sample
TO <- TS(X,Y)

# Vector of test statistics for each permutation
TT <- vector()

# Permutation test
for(i in 1:K){
  set.seed(i)
  Z.pi <- sample(Z, N, replace = FALSE)
  TT[i] <- TS(Z.pi[1:n], Z.pi[(n+1):(n+m)])
}

# Visualising the permuted test statistics
hist(TT)
abline(v = TO, lwd = 2, col = "red")
box()

# approximate p-value
mean(TT>TO)
## [1] 0.0259
##########################################################################
# Permutation test based on the difference of means
##########################################################################
rm(list = ls())


# Simulated samples (Different SD)
set.seed(123)
X <- rnorm(n=100, mean = 0, sd = 1)
Y <- rnorm(n=100, mean = 0, sd = 2)
Z <- c(X,Y)
n <- length(X)
m <- length(Y)
N <- length(Z)

# Number of permutations
K = 10000

# Test statistic
TS <- function(A,B) abs(mean(A) - mean(B))

# Test statistic for the observed sample
TO <- TS(X,Y)

# Vector of test statistics for each permutation
TT <- vector()

# Permutation test
for(i in 1:K){
  set.seed(i)
  Z.pi <- sample(Z, N, replace = FALSE)
  TT[i] <- TS(Z.pi[1:n], Z.pi[(n+1):(n+m)])
}

# Visualising the permuted test statistics
hist(TT)
abline(v = TO, lwd = 2, col = "red")
box()

# approximate p-value
mean(TT>TO)
## [1] 0.1608

Permutation tests based on the Kolmogorov-Smirnov test statistic

##########################################################################
#Test based on the maximum distance between empirical distributions
##########################################################################
rm(list = ls())
# Simulated samples (Same distribution)
set.seed(123)
X <- rnorm(n=100, mean = 0, sd = 1)
Y <- rnorm(n=100, mean = 0, sd = 1)
Z <- c(X,Y)
n <- length(X)
m <- length(Y)
N <- length(Z)

# Number of permutations
K = 10000

# Test statistic
TS <- function(A,B) ks.test(A,B)$statistic

# Test statistic for the observed sample
TO <- TS(X,Y)

# Vector of test statistics for each permutation
TT <- vector()

# Permutation test
for(i in 1:K){
  set.seed(i)
  Z.pi <- sample(Z, N, replace = FALSE)
  TT[i] <- TS(Z.pi[1:n], Z.pi[(n+1):(n+m)])
}

# Visualising the permuted test statistics
hist(TT)
abline(v = TO, lwd = 2, col = "red")
box()

# approximate p-value
mean(TT>TO)
## [1] 0.2876
# Direct test using the Kolmogorov distribution
ks.test(X,Y)
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  X and Y
## D = 0.13, p-value = 0.3667
## alternative hypothesis: two-sided
##########################################################################
#Test based on the maximum distance between empirical distributions
##########################################################################
rm(list = ls())
# Simulated samples (Different mean)
set.seed(123)
X <- rnorm(n=100, mean = 0, sd = 1)
Y <- rnorm(n=100, mean = 1, sd = 1)
Z <- c(X,Y)
n <- length(X)
m <- length(Y)
N <- length(Z)

# Number of permutations
K = 10000

# Test statistic
TS <- function(A,B) ks.test(A,B)$statistic

# Test statistic for the observed sample
TO <- TS(X,Y)

# Vector of test statistics for each permutation
TT <- vector()

# Permutation test
for(i in 1:K){
  set.seed(i)
  Z.pi <- sample(Z, N, replace = FALSE)
  TT[i] <- TS(Z.pi[1:n], Z.pi[(n+1):(n+m)])
}

# Visualising the permuted test statistics
hist(TT)
abline(v = TO, lwd = 2, col = "red")
box()

# approximate p-value
mean(TT>TO)
## [1] 0
# Direct test using the Kolmogorov distribution
ks.test(X,Y)
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  X and Y
## D = 0.37, p-value = 2.267e-06
## alternative hypothesis: two-sided
##########################################################################
#Test based on the maximum distance between empirical distributions
##########################################################################
rm(list = ls())
# Simulated samples (Different SD)
set.seed(123)
X <- rnorm(n=100, mean = 0, sd = 1)
Y <- rnorm(n=100, mean = 0, sd = 2)
Z <- c(X,Y)
n <- length(X)
m <- length(Y)
N <- length(Z)

# Number of permutations
K = 10000

# Test statistic
TS <- function(A,B) ks.test(A,B)$statistic

# Test statistic for the observed sample
TO <- TS(X,Y)

# Vector of test statistics for each permutation
TT <- vector()

# Permutation test
for(i in 1:K){
  set.seed(i)
  Z.pi <- sample(Z, N, replace = FALSE)
  TT[i] <- TS(Z.pi[1:n], Z.pi[(n+1):(n+m)])
}

# Visualising the permuted test statistics
hist(TT)
abline(v = TO, lwd = 2, col = "red")
box()

# approximate p-value
mean(TT>TO)
## [1] 8e-04
# Direct test using the Kolmogorov distribution
ks.test(X,Y)
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  X and Y
## D = 0.28, p-value = 0.0007873
## alternative hypothesis: two-sided

Comparison of Medians of gene expression levels for two types of genes

#############################################################
# Permutation test for H0: Median Type I = Median Type II
#############################################################
rm(list=ls())
T1 <- c(230, -1350, -1580, -400, -760) # Type I 
T2 <- c(970, 110, -50, -190, -200)    # Type II 
Z <- c(T1,T2)
n <- length(T1)
m <- length(T2)
N <- length(Z)

# Number of permutations
K = 10000

# Test statistic
TS <- function(A,B) abs(median(A) - median(B))

# Test statistic for the observed sample
TO <- TS(T1,T2)

# Vector of test statistics for each permutation
TT <- vector()

# Permutation test
for(i in 1:K){
  set.seed(i)
  Z.pi <- sample(Z, N, replace = FALSE)
  TT[i] <- TS(Z.pi[1:n], Z.pi[(n+1):(n+m)])
}

# Visualising the permuted test statistics
hist(TT)
abline(v = TO, lwd = 2, col = "red")
box()

# approximate p-value
mean(TT>TO)
## [1] 0.0464