5.1 A Permutation for Correlation and Slope

Permutation Test

Ref: 5-6

#Table 5.1.1
X <- 1:4
Y <- c(5, 7, 9, 8)
#Use this package to find out all permutation
library(combinat)
#All permutation of Y
Yper <- permn(Y)

Approximate p-value pR

Ref: 5-7

slope <- rep(0, length(Yper))
correlation <- rep(0, length(Yper))
for(i in 1:length(Yper)){
    m <- lm(Yper[[i]] ~ X)
    slope[i] <- summary(m)$coefficients[2, 1]
    correlation[i] <- cor(X, Yper[[i]])
}
(p <- mean(slope >= slope[1]))
## [1] 0.125
(p <- mean(correlation >= correlation[1]))
## [1] 0.125

Ref: 5-7

#Table 5.1.2
Heter <- c(2276, 3724, 2723, 4020, 4011, 2035,
           1540, 1300, 2240, 2467, 3700, 1501,
           2907, 2898, 2783, 2870, 3263, 2780)
Lym <- c(2830, 5488, 2904, 5528, 4966, 3135,
         2079, 1755, 3080, 2363, 5087, 2821,
         5130, 4830, 4690, 3570, 3480, 3823)

Here we let user to set the pernum because the permutation memory size is too big(like the example in above, 18!).

Approximate p-value pR

Ref: 5-8

#Build the Slope permutation test
slope_pertestR <- function(Y, X, pernum=10000){
    slope <- rep(0, pernum)
    m1 <- lm(Y ~ X)
    #Take out slope
    slope[1] <- m1[[1]][2]
    for(i in 1:pernum){
        m <- lm(sample(Y) ~ X)
        slope[i] <- m[[1]][2]
    }
    
    (p <- mean(slope >= slope[1]))
}

slope_pertestR(Lym, Heter)
## [1] 0.0708

Linear model

Ref: 5-8

#Linear model
m <- lm(Heter ~ Lym)
summary(m)
## 
## Call:
## lm(formula = Heter ~ Lym)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -684.6 -316.4    6.3  265.5  696.6 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 555.4213   341.7571   1.625    0.124    
## Lym           0.5779     0.0868   6.657 5.51e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 438 on 16 degrees of freedom
## Multiple R-squared:  0.7347, Adjusted R-squared:  0.7182 
## F-statistic: 44.32 on 1 and 16 DF,  p-value: 5.507e-06

Asymptotic p-value pA

Ref: 5-8

#Asymptotic p-value pA
pnorm(sqrt(length(Heter) - 1)*cor(Heter, Lym), lower.tail=FALSE)
## [1] 0.000204486

5.2 Spearman Rank Correlation

Ref: 5-15

#Spearman Rank Correlation
cor(Heter, Lym, method="spearman")
## [1] 0.8968008

Asymptotic p-value pA

Ref: 5-15

#Asymptotic p-value pA(two tail)
2*pnorm(sqrt(length(Heter) - 1)*cor(Heter, Lym, method="spearman"), lower.tail=FALSE)
## [1] 0.0002176436

Approximate p-value pR

Ref: 5-15

#Approximate p-value pR(two tail)
Spcor_pertestR <- function(Y, X, pernum=10000, 
                           alternative=c("lower", "greater", "two.side")){
    correlation <- rep(0, pernum)
    correlation[1] <- cor(Y, X, method="spearman")
    #Take out slope
    for(i in 2:pernum){
        correlation[i] <- cor(sample(Y), X, method="spearman")
    }
    n1 <- sum(correlation >= correlation[1])
    n2 <- sum(correlation <= correlation[1])
    if(alternative=="two.side"){
        2*(pR <- min(n1, n2)/pernum)
    }else if(alternative=="greater"){
        (pR <- n1/pernum)
    }else{
        (pR <- n2/pernum)
    }
}

Spcor_pertestR(Lym, Heter, alternative="two.side")
## [1] 2e-04

Spearman Rank Correlation with Ties

#Example 5.2.2
X <- 1:4;Y <- c(1, 2, 2, 4)
rbind(rank(X), rank(Y))
##      [,1] [,2] [,3] [,4]
## [1,]    1  2.0  3.0    4
## [2,]    1  2.5  2.5    4
#Spearman Rank Correlation
cor(X, Y, method="spearman")
## [1] 0.9486833

Ref: 5-17

#Table 5.2.2 & Example 5.2.3
A <- c(8, 8, 7, 8, 5, 6, 6, 9, 8, 7)
B <- c(7, 8, 8, 5, 6, 4, 5, 8, 6, 9)
rbind(rank(A), rank(B))
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,]  7.5  7.5  4.5  7.5  1.0  2.5  2.5   10  7.5   4.5
## [2,]  6.0  8.0  8.0  2.5  4.5  1.0  2.5    8  4.5  10.0

Approximate p-value pR

Ref: 5-18

##Approximate p-value pR(two tail)
Spcor_pertestR(A, B, alternative="two.side")
## [1] 0.2966

Approximate p-value pA

#Asymptotic p-value pA(two tail)
2*pnorm(sqrt(length(A) - 1)*cor(A, B, method="spearman"), lower.tail=FALSE)
## [1] 0.2605009

5.3 Kendall’s Tau

Ref: 5-21

#Table 5.3.1
Height <- c(68, 70, 71, 72)
Weight <- c(1.53, 1.55, 1.40, 1.80)
cor(Height, Weight, method="kendall")
## [1] 0.3333333
#U and Vi function
U <- function(x, y, i, j){
    ifelse((x[i] - x[j])*(y[i] - y[j]) > 0, 1, 0)
}
Vi <- function(x, y){
    n <- length(y) - 1
    V <- rep(0, n)
    for(i in 1:n){
        V[i] <- sum(U(x, y, i, (i + 1):(n + 1)))
    }
    V
}

Ref: 5-21

Vi(Weight, Height)
## [1] 2 1 1

Exact p-value p

Ref: 5-24

#Exact p-value p
kencor_pertest <- function(Y, X, alternative="lower"){
    Yper <- permn(Y)
    pernum <- length(Yper)
    correlation <- rep(0, pernum)
    #Take out slope
    for(i in 1:pernum){
        correlation[i] <- cor(Yper[[i]], X, method="kendall")
    }
    n1 <- sum(correlation >= correlation[1])
    n2 <- sum(correlation <= correlation[1])
    if(alternative=="two.side"){
        2*(pR <- min(n1, n2)/pernum)
    }else if(alternative=="greater"){
        (pR <- n1/pernum)
    }else{
        (pR <- n2/pernum)
    }
}

Ref: 5-24

#Tabl 5.3.2
kencor_pertest(c(2, 3, 1, 4), 1:4, alternative="greater")
## [1] 0.375

Ref: 5-25&5-26

#Compute Vi for Example 5.3.1
#Make the same table as lecture note
m <- cbind(Heter, Lym)
order(Heter)
##  [1]  8 12  7  6  9  1 10  3 18 15 16 14 13 17 11  2  5  4
#Reorder data
m1 <- m[order(Heter), ]
#Combine all
data.frame(Rabbits=order(Heter), Heter_X=m1[, 1], Lym_Y=m1[,2], 
           R_X=rank(m1[, 1]), R_Y=rank(m1[, 2]), Vi=c(Vi(m1[, 1], m1[, 2]), NA))
##    Rabbits Heter_X Lym_Y R_X R_Y Vi
## 1        8    1300  1755   1   1 17
## 2       12    1501  2821   2   4 14
## 3        7    1540  2079   3   2 15
## 4        6    2035  3135   4   8 10
## 5        9    2240  3080   5   7 10
## 6        1    2276  2830   6   5 11
## 7       10    2467  2363   7   3 11
## 8        3    2723  2904   8   6 10
## 9       18    2780  3823   9  11  7
## 10      15    2783  4690  10  12  6
## 11      16    2870  3570  11  10  6
## 12      14    2898  4830  12  13  5
## 13      13    2907  5130  13  16  2
## 14      17    3263  3480  14   9  4
## 15      11    3700  5087  15  15  2
## 16       2    3724  5488  16  17  1
## 17       5    4011  4966  17  14  1
## 18       4    4020  5528  18  18 NA

Asymptotic p-value pA

Ref: 5-26

#Example 5.3.1
#Asymptotic p-value pA
n <- length(Heter)
(taucor <- cor(Lym, Heter, method="kendall"))
## [1] 0.7254902
(vartau <- (4*n + 10)/(9*(n^2 - n)))
## [1] 0.02977487
(pA <- pnorm(taucor/sqrt(vartau), lower.tail=FALSE))
## [1] 1.308735e-05

Kentall’s tau Correlation with Ties

We got a problem when computing tau with ties.

Therefore, we write our function to compute it.

#Table 5.3.3
judA <- c(5, 6, 6, 7, 7, 8, 8, 8, 8, 9)
judB <- c(6, 4, 5, 8, 9, 5, 6, 7, 8, 8)
#We got a wrong answer => Tie problem
cor(judA, judB, method="kendall")
## [1] 0.2599376

Kendall’s tau with Ties

#We use the formula to compute tau but first we have to compute Vi
#Kendall's Tau for Tie
KenTauTie <- function(X, Y){
    #U and Vi function for tie
    #Here we use a small trick to achieve our goal
    #We do not use separate if else 
    #because here we have to do vector computation.
    U <- function(x, y, i, j){
        U1 <- ifelse((x[i] - x[j])*(y[i] - y[j]) == 0, 0.5, 0)
        U2 <- ifelse((x[i] - x[j])*(y[i] - y[j]) > 0, 1, 0)
        U1 + U2
    }
    Vi <- function(x, y){
        n <- length(y) - 1
        V <- rep(0, n)
        for(i in 1:n){
            V[i] <- sum(U(x, y, i, (i + 1):(n + 1)))
        }
        V
    }
    (tau <- 2*(sum(Vi(X, Y)))/choose(length(X), 2) - 1)
}

(tau <- KenTauTie(judA, judB))
## [1] 0.2222222

Variance of Kendall’s tau with Ties

#Compute Variance of tau
Vartau <-function(x, y){
    #First take out ties and number
    tab <- sort(table(rank(x)), decreasing=T)
    s <- tab[tab != 1]
    tab <- sort(table(rank(y)), decreasing=T)
    t <- tab[tab != 1]
    
    #Compute A, B, C
    n <- length(x)
    A <- (sum(s*(s - 1)*(2*s + 5)) + sum(t*(t - 1)*(2*t + 5)))/18
    B <- (sum(s*(s - 1)*(s -2))*sum(t*(t - 1)*(t - 2)))/(9*n*(n - 1)*(n -2))
    C <- (sum(s*(s - 1))*sum(t*(t - 1)))/(2*n*(n - 1))
    
    tau <- (4*n + 10)/(9*(n^2 - n))
    (Tietau <- tau - (4/((n^2)*((n-1)^2)))*(A - B - C))
}

(vartau <- Vartau(judA, judB))
## [1] 0.05411248

Asymptotic p-value pA

#Asymptotic p-value pA
pnorm(tau/sqrt(vartau),lower.tail=FALSE)
## [1] 0.1697136

5.4 Permutation Test for Contingency Tables

#Table 5.4.2
Physician <- c(2, 2, 0)
SelfA <- c(0, 1, 2)

Asymptotic p-value pA

#Asymptotic p-value
#It gives us warning message
#because the number in cell is too small(eij < 5)
mydata <- rbind(Physician, SelfA)
chisq.test(mydata)
## Warning in chisq.test(mydata): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mydata
## X-squared = 4.2778, df = 2, p-value = 0.1178