4.1 Paired-Comparison Permutation Test

Paired T test

Ref: 4-1~4-4

#Table 4.1.1
Hour <- c(1530, 2130, 2940, 1960, 2270)
Survey <- c(1290, 2250, 2430, 1900, 2120)

#Paired t test
t.test(Hour, Survey, paired=TRUE, alternative="greater")

## 
##  Paired t-test
## 
## data:  Hour and Survey
## t = 1.6126, df = 4, p-value = 0.09107
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -54.1012      Inf
## sample estimates:
## mean of the differences 
##                     168

Paired Permutation Test

Exact p-value p

Ref: 4-4

#Paired-Comparison Permutation Test
library(exactRankTests)

#Note that this test use sum as statistic, not mean.
perm.test(Hour, Survey, paired=TRUE, alternative="greater")

## 
##  1-sample Permutation Test
## 
## data:  Hour and Survey
## T = 960, p-value = 0.09375
## alternative hypothesis: true mu is greater than 0

Paired T test

Ref: 4-9

#Table 4.1.3
drug1 <- c(74, 55, 61, 41, 53, 74, 52, 31, 50, 58, 54, 53, 69, 60, 61, 54, 57)
drug2 <- c(63, 58, 49, 47, 50, 69, 67, 40, 44, 38, 56, 38, 47, 41, 46, 47, 44)
#Paird t test
t.test(drug1, drug2, paired=TRUE, alternative="two.sided")

## 
##  Paired t-test
## 
## data:  drug1 and drug2
## t = 2.5475, df = 16, p-value = 0.02151
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   1.115612 12.178505
## sample estimates:
## mean of the differences 
##                6.647059

Paired Permutation Test

Exact p-value p

Ref: 4-9

#Paired-Comparison Permutation Test
#Exact p-value p
perm.test(drug1, drug2, paired=TRUE, alternative="two.sided")

## 
##  1-sample Permutation Test
## 
## data:  drug1 and drug2
## T = 148, p-value = 0.02502
## alternative hypothesis: true mu is not equal to 0

Asymptotic p-value pA

Ref: 4-13

#Asymptotic p-value pA
d <- drug1 - drug2
Sobs <- sum(d[which(d > 0)])
absDi <- sum(abs(d))
(pA <- 2*pnorm(abs((Sobs - absDi/2)/(sqrt(sum(d^2)/4))), lower.tail=FALSE))

## [1] 0.0267714

4.2 Signed-Rank Test

Exact p-value p

Ref: 4-18

#Table 4.1.1
d <- Hour - Survey
wilcox.test(d, alternative="greater")

## 
##  Wilcoxon signed rank test
## 
## data:  d
## V = 13, p-value = 0.09375
## alternative hypothesis: true location is greater than 0

Asymptotic p-value pA without ccorrection

Ref: 4-19~4-20

#Asymptotic p-value pA
signedRankA <- function(d, correct=TRUE){
    if(correct){
        n <- length(d)
        SR <- sum(rank(abs(d))*(d > 0))
        ESR <- n*(n + 1)/4
        varSR <- n*(n + 1)*(2*n + 1)/24
        cat("This is for alternative=greater", "\n")
        (pA <- pnorm((SR - 0.5 - ESR)/sqrt(varSR), lower.tail=F))
    }else{
        n <- length(d)
        SR <- sum(rank(abs(d))*(d > 0))
        ESR <- n*(n + 1)/4
        varSR <- n*(n + 1)*(2*n + 1)/24
        cat("This is for alternative=greater", "\n")
        (pA <- pnorm((SR - ESR)/sqrt(varSR), lower.tail=F))
    }
}
#Without correction
signedRankA(d, correct=FALSE)

## This is for alternative=greater

## [1] 0.06900537

Asymptotic p-value pA with ccorrection

#With correction
signedRankA(d)

## This is for alternative=greater

## [1] 0.08876493

Ties adjustment

Ref: 4-21

#Table 4.2.3
d <- c(-5, -3, -3, 0, 0, 2, 4, 4, 4, 5)
#Adjusted ties for zero
Zerotiead<- function(d){
    #Use abs to get absolute value
    #Then multiple the sign
    rank(abs(d))*sign(d)
}
Zerotiead(d)

##  [1] -9.5 -4.5 -4.5  0.0  0.0  3.0  7.0  7.0  7.0  9.5

#Remove the zero 
Zerotierm <- function(d){
    rank(abs(d)[-which(abs(d) == 0)])
}
Zerotierm(d)

## [1] 7.5 2.5 2.5 1.0 5.0 5.0 5.0 7.5

Signed-Rank Test Adjusted for Ties

Exact p-value p

Ref: 4-24~4-25

#Table 4.2.5
d <- drug1 - drug2
#Because of tie problem, we use wilcox.exact like before in chapter3.
wilcox.exact(d, alternative="greater")

## 
##  Exact Wilcoxon signed rank test
## 
## data:  d
## V = 123, p-value = 0.01282
## alternative hypothesis: true mu is greater than 0

Asymptotic p-value pA without ccorrection

Ref: 4-22&4-25

#Asymptotic p-value pA without correction
adsignedRankA <- function(d, correct=TRUE){
    if(correct){
        n <- length(d)
        SR <- sum(rank(abs(d))*(d > 0))
        ESR <- n*(n + 1)/4
        #Before compute variance we have to know how many groups
        #Count each number and sort them from big to small
        t <- sort(table(rank(abs(d))), decreasing=T)
        #Take out ties and also number
        g <- t[t != 1]
        varSR <- n*(n + 1)*(2*n + 1)/24 - sum(g^3 -g)/48
        cat("This is for alternative = greater", "\n")
        (pA <- pnorm((SR - 0.5 - ESR)/sqrt(varSR), lower.tail=F))
    }else{
        n <- length(d)
        SR <- sum(rank(abs(d))*(d > 0))
        ESR <- n*(n + 1)/4
        #As previous said
        t <- sort(table(rank(abs(d))), decreasing=T)
        g <- t[t != 1]
        varSR <- n*(n + 1)*(2*n + 1)/24 - sum(g^3 -g)/48
        cat("This is for alternative = greater", "\n")
        (pA <- pnorm((SR - ESR)/sqrt(varSR), lower.tail=F))
    }
}
#Asymptotic p-value pA without correction
adsignedRankA(d, correct=FALSE)

## This is for alternative = greater

## [1] 0.01379476

Asymptotic p-value pA with ccorrection

#Asymptotic p-value pA with correction
adsignedRankA(d)

## This is for alternative = greater

## [1] 0.01465154

4.3 Other Paried-Comparison Tests

Sign test

Exact p-value p

Ref: 4-30

#Table 4.1.3
data.frame(numofdifference=length(d), numofsuccess=sum(d > 0))

##   numofdifference numofsuccess
## 1              17           12

binom.test(12, 17, alternative="greater")

## 
##  Exact binomial test
## 
## data:  12 and 17
## number of successes = 12, number of trials = 17, p-value = 0.07173
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
##  0.4780823 1.0000000
## sample estimates:
## probability of success 
##              0.7058824

Asymptotic p-value pA without ccorrection

Ref: 4-31

#Asymptotic p-value pA
signA <- function(d, correct=TRUE){
    if(correct){
        n <- length(d)
        SN <- sum(d > 0)
        ESN <- 0.5*n
        varSN <- 0.25*n
        cat("This is for alternative = greater", "\n")
        (pA <- pnorm((SN - 0.5 - ESN)/sqrt(varSN), lower.tail=F))
    }else{
        n <- length(d)
        SN <- sum(d > 0)
        ESN <- 0.5*n
        varSN <- 0.25*n
        cat("This is for alternative = greater", "\n")
        (pA <- pnorm((SN - ESN)/sqrt(varSN), lower.tail=F))
    }
}
#Without correction
signA(d, correct=FALSE)

## This is for alternative = greater

## [1] 0.04477754

Asymptotic p-value pA with ccorrection

#With correction
signA(d)

## This is for alternative = greater

## [1] 0.07280505

4.4 A Permutation Test for a Randomized Complete Block Design

Ref: 4-44

#table 4.4.3
trt1 <- c(120, 208, 199, 194, 177, 195)
trt2 <- c(207, 188, 181, 164, 155, 175)
trt3 <- c(122, 137, 177, 177, 160, 138)
trt4 <- c(128, 128, 160, 142, 157, 179)
m <- rbind(trt1, trt2, trt3, trt4)

#Use this package to find out permutation
library(combinat)

Here we also encounter the same problem, memory size. Hence, we won’t run the code. Just show it.

Exact p-value

#Permutation Test for Randomized Complete Block Design(RCBD)
RCBDper <- function(m){
    nt <- dim(m)[1]
    nb <- dim(m)[2]
    #Save each permutation of block in blki
    blk1 <- permn(m[, 1]);blk2 <- permn(m[, 2])
    blk3 <- permn(m[, 3]);blk4 <- permn(m[, 4])
    blk5 <- permn(m[, 5]);blk6 <- permn(m[, 6])
    #Create an array to put each simulation matrix
    w=array(0,dim=c(nt, nb, factorial(nt)^nb))
    for(i in 1: factorial(nt)){
        for(j in 1: factorial(nt)){
            for(k in 1: factorial(nt)){
                for(l in 1: factorial(nt)){
                    for(m in 1: factorial(nt)){
                        for(n in 1: factorial(nt)){
    #Because of the width limit, the indent is not so clear.
    w[, , ((i - 1)*(factorial(nt)^(nb - 1)) +
           (j - 1)*(factorial(nt)^(nb - 2)) +
           (k - 1)*(factorial(nt)^(nb - 3)) + 
           (l - 1)*(factorial(nt)^(nb - 4)) +
           (m - 1)*(factorial(nt)*(nb - 5)) + (n - 1) + 1)] <- 
    as.array(cbind(blk1[[i]], blk2[[j]], blk3[[k]], 
                   blk4[[l]], blk5[[m]], blk6[[n]]))
                        }
                    }
                }
            }
        }
    }
    #Now we have the array w which its 
    #third dimension represent each simulation
    
    #Here I compute the F statistics
    #However, as you know, you can use SST, SSX
    SST <- rep(0, factorial(nt)^nb)
    for(i in 1:factorial(nt)^nb){
        rowmean <- apply(w[, , i], 1, mean)
        SST[i] <- nb*sum(rowmean - rep(mean(rowmean), nt))^2
    }
    
    SSBL <- rep(0, factorial(nt)^nb)
    for(i in 1:factorial(nt)^nb){
        colmean <- apply(w[, , i], 2, mean)
        SSBL[i] <- nb*sum(colmean - rep(mean(colmean), nb))^2
    }
    
    #Each SSTO will be the same
    SSTO <- rep(var(as.numeric(w[, , 1]))*(nt*nb - 1), factorial(nt)^nb)
    
    SSE <- SSTO - SST - SSBL
    
    #Compute F statistic
    F <- (SST/(nt - 1))/(SSE/((nt - 1)*(nb - 1)))
    
    (p <- mean(F >= F[1]))
}
RCBDper(m)

Approximate pR

#Approximate pR
RCBDperR <- function(m, pernum=5000){
    nt <- dim(m)[1]
    nb <- dim(m)[2]
    #Save each permutation of block in blki
    blk1 <- permn(m[, 1]);blk2 <- permn(m[, 2])
    blk3 <- permn(m[, 3]);blk4 <- permn(m[, 4])
    blk5 <- permn(m[, 5]);blk6 <- permn(m[, 6])
    #Create an array to put each simulation matrix
    w=array(0, dim=c(nt, nb, pernum))
    w[, , 1] <- m
    for(i in 2:pernum){
        #Store simulate number
        simN <- c()
        for(j in 1:6){
            simN[j] <- sample(1:factorial(nt), 1)
        }
        w[, , i] <- as.array(cbind(blk1[[simN[1]]], blk2[[simN[2]]], 
                                   blk3[[simN[3]]], blk4[[simN[4]]], 
                                   blk5[[simN[5]]], blk6[[simN[6]]]))
    }
    #Now we have the array w which its 
    #third dimension represent each simulation
    
    #Here I compute the F statistics
    #However, as you know, you can use SST, SSX
    SST <- rep(0, pernum)
    for(i in 1:pernum){
        rowmean <- apply(w[, , i], 1, mean)
        SST[i] <- nb*(sum((rowmean - rep(mean(rowmean), nt))^2))
    }
    
    colmean <- apply(w[, , 1], 2, mean)
    SSBL <- rep(nt*(sum((colmean - rep(mean(colmean), nb))^2)), pernum)
    
    #Each SSTO will be the same
    SSTO <- rep(var(as.numeric(w[, , 1]))*(nt*nb - 1), pernum)
    
    SSE <- SSTO - SST - SSBL
    
    #Compute F statistic
    F <- (SST/(nt - 1))/(SSE/((nt - 1)*(nb - 1)))
    
    (pR <- mean(F >= F[1]))
}
RCBDperR(m)

## [1] 0.0596

#Simulate 10000 times
RCBDperR(m, pernum=10000)

## [1] 0.0568

Randomized Complete Block Design

** Ref: 4-44**

#RCBD
trt1 <- c(120, 208, 199, 194, 177, 195)
trt2 <- c(207, 188, 181, 164, 155, 175)
trt3 <- c(122, 137, 177, 177, 160, 138)
trt4 <- c(128, 128, 160, 142, 157, 179)
all <- c(trt1, trt2, trt3, trt4)
g <- rep(c(1, 2, 3, 4), each=6)
b <- rep(c(1:6), 4)

mydata <- data.frame(Y=all, group=as.factor(g), block=as.factor(b))
summary(aov(Y ~ group + block, data=mydata))

##             Df Sum Sq Mean Sq F value Pr(>F)  
## group        3   5408  1802.8   3.121 0.0575 .
## block        5   2817   563.4   0.975 0.4640  
## Residuals   15   8664   577.6                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.5 Friedman’s Test for a Randomized Complete Block Design

Ref: 4-47~4-48

#Table 4.4.3
trt1 <- c(120, 208, 199, 194, 177, 195)
trt2 <- c(207, 188, 181, 164, 155, 175)
trt3 <- c(122, 137, 177, 177, 160, 138)
trt4 <- c(128, 128, 160, 142, 157, 179)

#Rank
m <- rbind(trt1, trt2, trt3, trt4)
(r <- apply(m, 2, rank))

##      [,1] [,2] [,3] [,4] [,5] [,6]
## trt1    1    4    4    4    4    4
## trt2    4    3    3    2    1    2
## trt3    2    2    2    3    3    1
## trt4    3    1    1    1    2    3

Here we won’t compute exact p-value for the same reason, memory size. We compute pR here using the previous RCBDperR, but we put the rank matrix in it

Approximate p-value pR

RCBDperR(r)

## [1] 0.1148

#Simulate 10000 times
RCBDperR(r, pernum=10000)

## [1] 0.1177

Asypmtotic p-value pA

#Asypmtotic p-value pA
all <- c(trt1, trt2, trt3, trt4)
g <- rep(c(1, 2, 3, 4), each=6)
b <- rep(c(1:6), 4)
mydata <- data.frame(Y=all, group=as.factor(g), block=as.factor(b))

friedman.test(Y ~ g|b, data=mydata)

## 
##  Friedman rank sum test
## 
## data:  Y and g and b
## Friedman chi-squared = 5.6, df = 3, p-value = 0.1328

Friedman’s Test Adjusted for Ties

#Table 4.5.3
trt1 <- c(100, 80, 50)
trt2 <- c(100, 80, 60)
trt3 <- c(150, 80, 80)
trt4 <- c(150, 90, 90)
m <- rbind(trt1, trt2, trt3, trt4)

Exact p-value

#Friedman Test adjusted for Ties
FriedmanTieper <- function(m){
    nt <- dim(m)[1]
    nb <- dim(m)[2]
    #Save each permutation of block in blki
    blk1 <- permn(rank(m[, 1]))
    blk2 <- permn(rank(m[, 2]))
    blk3 <- permn(rank(m[, 3]))
    #Create an array to put each simulation matrix
    w=array(0,dim=c(nt, nb, factorial(nt)^nb))
    for(i in 1: factorial(nt)){
        for(j in 1: factorial(nt)){
            for(k in 1: factorial(nt)){
                w[, ,  (i - 1)*(factorial(nt)^(nb - 1)) +
                       (j - 1)*(factorial(nt)^(nb - 2)) +
                       (k - 1) + 1] <- 
                as.array(cbind(blk1[[i]], blk2[[j]], blk3[[k]]))
            }
        }
    }
    #Now we have the array w which its 
    #third dimension represent each simulation
    #Here I use sum(Ribar^2)
    Ribarsq <- c()
    for(i in 1:factorial(nt)^nb){
        Ribarsq[i] <- sum(apply(w[, , i], 1, mean)^2)
    }
    (p <- mean(Ribarsq >= Ribarsq[1]))
}
FriedmanTieper(m)

## [1] 0.04166667

#Rank
m <- rbind(trt1, trt2, trt3, trt4)
(r <- apply(m, 2, rank))

##      [,1] [,2] [,3]
## trt1  1.5    2    1
## trt2  1.5    2    2
## trt3  3.5    2    3
## trt4  3.5    4    4

Since the previous function friedman.test will not address the ties problem. Here we use similar method to create the function FriedmanTieper to help us compute the asymptotic p-value pA.

Asypmtotic p-value pA

#Asypmtotic p-value pA
k <- dim(m)[1]
b <- dim(m)[2]
FM <- ((b^2)/sum(apply(r, 2, var)))*(sum((apply(r, 1, mean) - (k + 1)/2)^2))
(pA <- pchisq(FM, k - 1, lower.tail=F))

## [1] 0.05755845

4.6 Ordered Alternative for a Randomized Complete Block Design

Page’s Test

#Table 4.4.3
trt1 <- c(120, 208, 199, 194, 177, 195)
trt2 <- c(207, 188, 181, 164, 155, 175)
trt3 <- c(122, 137, 177, 177, 160, 138)
trt4 <- c(128, 128, 160, 142, 157, 179)
m <- rbind(trt1, trt2, trt3, trt4)

Here we won’t compute exact p-value for the same reason, memory size.

Approximate p-value pR

myPagetestR <- function(m, pernum=5000){
    nt <- dim(m)[1]
    nb <- dim(m)[2]
    r <- apply(m, 2, rank)
    #Save each permutation of block in blki
    blk1 <- permn(r[, 1]);blk2 <- permn(r[, 2])
    blk3 <- permn(r[, 3]);blk4 <- permn(r[, 4])
    blk5 <- permn(r[, 5]);blk6 <- permn(r[, 6])
    #Create an array to put each simulation matrix
    w=array(0, dim=c(nt, nb, pernum))
    w[, , 1] <- r
    for(i in 2:pernum){
        #Store simulate number
        simN <- c()
        for(j in 1:6){
            simN[j] <- sample(1:factorial(nt), 1)
        }
        w[, , i] <- as.array(cbind(blk1[[simN[1]]], blk2[[simN[2]]], 
                                   blk3[[simN[3]]], blk4[[simN[4]]], 
                                   blk5[[simN[5]]], blk6[[simN[6]]]))
    }
    
    #Compute sum(i*Ri)
    sumRi <- rep(0, pernum)
    for(i in 1:pernum){
        Rankrowmean <- apply(w[, , i], 1, sum)
        sumRi[i] <- sum((1:nt)*Rankrowmean)
    }
    (pR <- mean(sumRi <= sumRi[1]))
}

myPagetestR(m)

## [1] 0.0138

Asymptotic p-value pA

#Asypmtotic p-value pA
PagetestA <- function(m){
    k <- dim(m)[1]
    b <- dim(m)[2]
    r <- apply(m, 2, rank)
    PGobs <- sum((1:k)*apply(r, 1, sum))
    EPG <- (b*k*(k + 1)^2)/4
    varPG <- (b*(k - 1)*(k^2)*(k + 1)^2)/144
    (pA <- pnorm((PGobs - EPG)/sqrt(varPG), lower.tail=T))
}
PagetestA(m)

## [1] 0.01182581

Nonparametric Statisitc Chapter4 R code

Professor: Chiang, Jeng-Tung Student: Wang, Chih-Hui

Sunday, January 18, 2015

4.1 Paired-Comparison Permutation Test

Paired T test

Paired Permutation Test

Exact p-value p

Paired T test

Paired Permutation Test

Exact p-value p

Asymptotic p-value pA

4.2 Signed-Rank Test

Exact p-value p

Asymptotic p-value pA without ccorrection

Asymptotic p-value pA with ccorrection

Ties adjustment

Signed-Rank Test Adjusted for Ties

Exact p-value p

Asymptotic p-value pA without ccorrection

Asymptotic p-value pA with ccorrection

4.3 Other Paried-Comparison Tests

Sign test

Exact p-value p

Asymptotic p-value pA without ccorrection

Asymptotic p-value pA with ccorrection

4.4 A Permutation Test for a Randomized Complete Block Design

Exact p-value

Approximate pR

Randomized Complete Block Design

4.5 Friedman’s Test for a Randomized Complete Block Design

Approximate p-value pR

Asypmtotic p-value pA

Friedman’s Test Adjusted for Ties

Exact p-value

Asypmtotic p-value pA

4.6 Ordered Alternative for a Randomized Complete Block Design

Page’s Test

Approximate p-value pR

Asymptotic p-value pA