3.1 K-Sample Permutation Test

Permuation F-test

Ref 3-5

#Table 3.1.2 Permutation F.test
trt1 <- c( 6.08, 22.29,  7.51, 34.36, 23.68)
trt2 <- c(30.45, 22.71, 44.52, 31.47, 36.81)
trt3 <- c(32.04, 28.03, 32.74, 23.84, 29.64)
all <- c(trt1, trt2, trt3)

# # of Permutation
factorial(15)/(factorial(5)^3)

## [1] 756756

#We use this package again to do permutation
library(combinat)

Exact p-vale p

Here we only use the following code to illustrate how to do the exact F permutation test, 
because if we create our the vector, it will be too big that computer memory cannot endure 
such a huge memory size.

#This code will not run, I show you what I think that can do the exact test.
#We permute all the treatment 
#and use the unique function to transform the results into combination
permutation <- unique(permn(all))
F <- rep(0, 756756)
for(i in 1:756756){
    allNew <- permutation[[i]]
    SSTO <- var(allNew)*14
    SSE <- var(allNew[1:5])*4 + var(allNew[6:10])*4 + var(allNew[11:15])*4
    SST <- SSTO - SSE
    F[i] <- (SST/(2))/(SSE/12)
}
(pvalue <- sum(F >= F[1]))

#Just for your reference
permn(c(1, 1))

## [[1]]
## [1] 1 1
## 
## [[2]]
## [1] 1 1

unique(permn(c(1, 1)))

## [[1]]
## [1] 1 1

#Permutation
permn(c(1, 1, 2))

## [[1]]
## [1] 1 1 2
## 
## [[2]]
## [1] 1 2 1
## 
## [[3]]
## [1] 2 1 1
## 
## [[4]]
## [1] 2 1 1
## 
## [[5]]
## [1] 1 2 1
## 
## [[6]]
## [1] 1 1 2

unique(permn(c(1, 1, 2)))

## [[1]]
## [1] 1 1 2
## 
## [[2]]
## [1] 1 2 1
## 
## [[3]]
## [1] 2 1 1

Approximated p-value pR

FpermutationR <- function(t1, t2, t3, pernum=1000){
    all <- c(t1, t2, t3)
    n1 <- length(t1)
    n2 <- length(t2)
    n3 <- length(t3)
    #Create number that later we sample and transform into factor
    g <- c(rep(1, n1), rep(2, n2), rep(3, n3))
    
    F <- c()
    for(i in 1:pernum){
        if(i == 1){

            SSTO <- var(all)*14
            SSE <- var(trt1)*4 + var(trt2)*4 +var(trt3)*4
            SST <- SSTO - SSE
            F[i] <- (SST/2)/(SSE/(length(all) - 3))
        }else{
            #Start the simulation and random permute treatment
            allp <- sample(all, 15, replace=FALSE)
            #Compute SST, SSTO, SSE
            SSTO <- var(allp)*14
            SSE <- var(allp[1:n1])*(n1 - 1) +
                   var(allp[(n1 + 1):(n1 + n2)])*(n2 - 1) +
                   var(allp[(n1 + n2 + 1):(n1 + n2 + n3)])*(n3 -1) 
            SST <- SSTO - SSE
            F[i] <- (SST/2)/(SSE/(length(all) - 3))
        }
    }
    
    pR <- sum(F >= F[1])/pernum
    data.frame(Fvalue_obs=F[1], pR=pR)
}
#Use default pernum(1000)
FpermutationR(trt1, trt2, trt3)

##   Fvalue_obs    pR
## 1   3.781445 0.054

#Note that You can adjust the pernum
FpermutationR(trt1, trt2, trt3, pernum=5000)

##   Fvalue_obs     pR
## 1   3.781445 0.0544

Asymptotic p-vale pA

SSTO <- var(all)*14
SSE <- var(trt1)*4 + var(trt2)*4 + var(trt3)*4
SST <- SSTO - SSE
N <- length(all)
#Chisq-Approximation
(pA <- pchisq((N - 1)*SST/SSTO, 2, lower.tail=FALSE))

## [1] 0.06679308

Analysis of Variance

#Table 3.1.2 Analysis of Variance
#gl function can generate factor. You can use ?gl to see details.
mydata <- data.frame(Y=all, Treatment=gl(3, 5))
#If you have experience in R,
#you probably used anova before,
#but the anova function is used to compare model
#Here, we use aov to do ANOVA.
#It is like how we run the regression
#As you know, ANOVA model is a type of regression model.
m <- aov(Y ~ Treatment, data=mydata)
summary(m)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## Treatment    2  554.6  277.31   3.781 0.0533 .
## Residuals   12  880.0   73.33                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.2 Kruskal-Wallis Test

Ref: 3-13

#Table3.2.2
Control <- c(4.302, 4.017, 4.049, 4.176)
Pre1 <- c(2.201, 3.190, 3.250, 3.276, 3.292, 3.267)
Pre2 <- c(3.397, 3.552, 3.630, 3.578, 3.612)
Pre3 <- c(2.699, 2.929, 2.785, 2.176, 2.845, 2.913)

all <- c(Control, Pre1, Pre2, Pre3)
pre <- c(rep("1", 4), rep("2", 6), rep("3", 5), rep("4", 6))
mydata <- data.frame(Y=all, X=as.factor(pre))
kruskal.test(Y ~ X, data=mydata)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Y by X
## Kruskal-Wallis chi-squared = 17.3593, df = 3, p-value = 0.0005961

Kruskal-Wallis Adjust for ties

Ref: 3-15

#Table3.2.3
P1 <- c(4, 5, 3, 4, 5, 5, 2)
P2 <- c(3, 4, 5, 2, 3, 1, 1, 2)
P3 <- c(2, 1, 1, 2, 1, 3)

all <- c(P1, P2, P3)
p <- c(rep("1", 7), rep("2", 8), rep("3", 6))
mydata <- data.frame(Y=all, X=as.factor(p))
kruskal.test(Y ~ X, data=mydata)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Y by X
## Kruskal-Wallis chi-squared = 8.2466, df = 2, p-value = 0.01619

3.4 Ordered Alternatives

Jonckheere-Terpstra Test (JT test)

Ref: 3-26

#Table3.4.1
trt1 <- c(13.0, 24.1, 11.7, 16.3, 15.5, 24.5)
trt2 <- c(42.0, 18.0, 14.0, 36.0, 11.6, 19.0)
trt3 <- c(15.6, 23.8, 24.4, 24.0, 21.0, 21.1)
trt4 <- c(35.3, 22.5, 16.9, 25.0, 23.1, 26.0)
n1 <- length(trt1);n2 <- length(trt2);n3 <- length(trt3);n4 <- length(trt4)

library(clinfun)

#JT test
all <- c(trt1, trt2, trt3, trt4)
N <- length(all)
g <- rep(c(1, 2, 3, 4), each=6)
jonckheere.test(all, g, alternative="increasing")

## 
##  Jonckheere-Terpstra test
## 
## data:  
## JT = 145, p-value = 0.02991
## alternative hypothesis: increasing

#Large sample approximation
mu <- (N^2 - (n1^2 + n2^2 + n3^2 + n4^2))/4
#n1=n2=n3=n4=6
variance <- ((N^2)*(2*N + 3) - 4*(6^2)*(2*6 + 3))/72
(pA <- pnorm(145, 108, sqrt(378), lower.tail=FALSE))

## [1] 0.0285154

Nonparametric Statisitc Chapter3 R code

Professor: Chiang, Jeng-Tung Student: Wang, Chih-Hui

Saturday, January 17, 2015

3.1 K-Sample Permutation Test

Permuation F-test

Exact p-vale p

Approximated p-value pR

Asymptotic p-vale pA

Analysis of Variance

3.2 Kruskal-Wallis Test

Kruskal-Wallis Adjust for ties

3.4 Ordered Alternatives

Jonckheere-Terpstra Test (JT test)