2.1 A Two-Sample t-test

Two-Sample t-test

Ref: 2-1

#Table 2.1.1
New <- c(37, 49, 55, 57)
Tradition <- c(23, 31, 46)
#Note that the t-test is equal variance.
t <- t.test(New, Tradition, alternative="greater", var.equal=TRUE, conf.level=0.95)
t

## 
##  Two Sample t-test
## 
## data:  New and Tradition
## t = 2.0843, df = 5, p-value = 0.04578
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.5372273       Inf
## sample estimates:
## mean of x mean of y 
##  49.50000  33.33333

Two-Sample Permutation test

Ref: 2-7~2-10

#A package that have function to help us find the all possible permutation outcomes
library(combinat)

#Combine all and find out the index for new method
all <- c(New, Tradition)
index <- seq(along=all)
indexInNew <- combn(index, 4)
#Use mean difference
meanD <- rep(NA, dim(indexInNew)[2])
for(i in 1:dim(indexInNew)[2]){
    meanD[i] <- mean(all[indexInNew[, i]]) - mean(all[-indexInNew[, i]])
}
# !!! 4.2 should be 4.3  There is a problem(in our world, not in computer world) 
#that round(0.45, 1)=0.4 and round(0.35, 1)=0.4. 
#When the digit preceding the round digit is even, 
#the round rule would change(only the cutpoint 0.5).
#You can use ?round to see more details.
sort(round(meanD, 1))

##  [1] -19.4 -17.7 -14.2 -13.0 -12.4  -8.9  -8.9  -7.8  -7.2  -6.0  -5.4
## [12]  -4.2  -4.2  -3.7  -2.5  -2.5  -0.8   0.4   1.0   1.0   1.6   2.2
## [23]   2.8   5.7   6.2   6.2   7.4   8.0   9.8  10.9  10.9  12.7  14.4
## [34]  16.2  21.4

#Exact p-value
(p_value <- sum(meanD >= meanD[1])/length(meanD))

## [1] 0.05714286

Ref: 2-11~2-12

#Use New method sum 
T1 <- rep(NA, dim(indexInNew)[2])
for(i in 1:dim(indexInNew)[2]){
    T1[i] <- sum(all[indexInNew[, i]])
}
sort(T1, decreasing=T)

##  [1] 207 198 195 192 189 189 187 184 183 181 181 180 175 174 173 172 172
## [18] 171 169 166 166 164 163 163 161 160 158 157 155 155 149 148 146 140
## [35] 137

#Exact p-value
(p_value <- sum(T1 >= T1[1])/length(T1))

## [1] 0.05714286

2.2 Permutation Tests Based on the Median and Trimmed Means

Ref: 2-26~2-28

#Median difference
medianD <- rep(NA, dim(indexInNew)[2])
for(i in 1:dim(indexInNew)[2]){
    medianD[i] <- median(all[indexInNew[, i]]) - median(all[-indexInNew[, i]])
}
#Exact p-value
(p_value <- sum(medianD >= medianD[1])/length(medianD))

## [1] 0.05714286

2.3 Random Sampling the Permutations

Ref: 2-29~2-35

#table 2.3.1
#Random sampling
trt1 <- c(59.1, 60.3, 58.1, 61.3, 65.1, 55.0, 63.4, 67.8)
trt2 <- c(60.1, 62.1, 59.3, 55.0, 54.6, 64.4, 58.7, 62.5)
# #of permutation
choose(16, 8)

## [1] 12870

all <- c(trt1, trt2)
index <- seq(along=all)
indexInNew <- combn(index, 8)

#Exact test (Mean Difference)
all <- c(trt1, trt2)
index <- seq(along=all)
indexInNew <- combn(index, 8)

meanD <- rep(NA, dim(indexInNew)[2])
for(i in 1:dim(indexInNew)[2]){
    meanD[i] <- mean(all[indexInNew[, i]]) - mean(all[-indexInNew[, i]])
}
#Exact p-value
(p_value <- sum(meanD >= meanD[1])/length(meanD))

## [1] 0.1966589

#Random Sampling (R=1000)
#Set seed here to make the result reproducible
#I set it 316 just because my birthday is March, 16
set.seed(316)
meanCritical <- mean(trt1) - mean(trt2)
R <- 1000
meanD <- rep(NA, R)
for(i in 1:1000){
    indexInNew <- sample(index, 8, replace=FALSE)
    meanD[i] <- mean(all[indexInNew]) - mean(all[-indexInNew])    
}
#Add the original mean difference in meanD
meanD <- c(meanCritical, meanD)
#Approximate p-value
(p_value <- sum(meanD >= meanD[1])/length(meanD))

## [1] 0.1938062

2.4 Wilcoxon Rank-Sum Test

Ref: 2-39

#Wilcoxon rank sum test
#Note that the alternative is specified by first data. In the following example, 
#we all use W1 to compute the statistics and compute the p-value. See 2-40 for reference.
w <- wilcox.test(New, Tradition, alternative="greater")
#Note that the statistic is substracted by n1*(n1 + 1)/2
w

## 
##  Wilcoxon rank sum test
## 
## data:  New and Tradition
## W = 11, p-value = 0.05714
## alternative hypothesis: true location shift is greater than 0

Ref: 2-42~2-44

#Table 2.4.3 and here we compute W2 statistics.
untreated <- c(0.55, 0.63, 0.67, 0.68, 0.79, 0.81, 0.85)
treated <- c(0.44, 0.47, 0.51, 0.52, 0.58, 0.59, 0.60, 0.65, 0.66)
w <- wilcox.test(untreated, treated, alternative="greater")
w

## 
##  Wilcoxon rank sum test
## 
## data:  untreated and treated
## W = 56, p-value = 0.003934
## alternative hypothesis: true location shift is greater than 0

2.5 Wilcoxon Rank-Sum Test Adjusted for Ties

Ref: 2-46

#The package contains lot of method in Nonparametric.
#We use it to deal with the tie problem in Wilcoxon Rank-Sum test for Ties.
library(exactRankTests)

#Table 2.5.2
trt1 <- c(11.7, 12.0)
trt2 <- c(11.8, 12.0, 12.5)
#If you use wilcox.test in stat package, 
#you will encounter the problem that 
#the original wilcox.test cannot compute the exact p-value when there are ties in data)
w <- wilcox.exact(trt1, trt2, alternative="less")
w

## 
##  Exact Wilcoxon rank sum test
## 
## data:  trt1 and trt2
## W = 1.5, p-value = 0.3
## alternative hypothesis: true mu is less than 0

Ref: 2-47~2-49

#Table 2.5.2
trt1 <- c(55.0, 58.1, 59.1, 60.3, 61.3, 63.4, 65.1, 67.8)
trt2 <- c(54.6, 55.0, 58.7, 59.3, 60.1, 62.1, 62.5, 64.4)
#It will occur a problem, but the statistics is still correct.
w <- wilcox.exact(trt1, trt2, alternative="greater")
w

## 
##  Exact Wilcoxon rank sum test
## 
## data:  trt1 and trt2
## W = 39.5, p-value = 0.2296
## alternative hypothesis: true mu is greater than 0

2.6 Mann-Whitney Test and a Confidence Interval

Ref: 2-53

#Table 2.6.1
Granite <- c(33.63, 39.86, 69.32, 42.13, 58.36, 74.11)
Basalt <- c(26.15, 18.56, 17.55, 9.84, 28.29, 34.15)
#Function to compute pair difference
Pairdiff <- function(x, y){
    n1 <- length(x);n2 <- length(y)
    paird <- c()
    for(i in 1:n1){
        paird <- c(paird, rep(x[i], n2) - y)   
    }
    paird
}
d <- Pairdiff(Granite, Basalt)
sort(d)

##  [1] -0.52  5.34  5.71  7.48  7.98 11.57 13.71 13.84 15.07 15.98 16.08
## [12] 21.30 22.31 23.57 23.79 24.21 24.58 30.02 30.07 32.21 32.29 35.17
## [23] 39.80 39.96 40.81 41.03 43.17 45.82 47.96 48.52 50.76 51.77 55.55
## [34] 56.56 59.48 64.27

#Hodes-Lehmann estimator of delta
median(d)

## [1] 30.045

#CI for delta
deltaCI <- function(x, y, q=0.05){
    #Above function
    Pairdiff <- function(x, y){
        n1 <- length(x);n2 <- length(y)
        paird <- c()
        for(i in 1:n1){
            paird <- c(paird, rep(x[i], n2) - y)   
        }
        paird
    }
    #Save paired difference in d
    d <- sort(Pairdiff(x, y))
    nlower <- qwilcox(q, length(x), length(y))
    nupper <- qwilcox(q, length(x), length(y), lower.tail=FALSE)
    
    cat("Confidence Interval:", "\n")
    cat(d[nlower], "<", "delta", "<=", d[nupper + 1])
}

deltaCI(Granite, Basalt)

## Confidence Interval: 
## 13.84 < delta <= 47.96

2.7 Scoring Systems

normalScore <- function(x){
    qnorm(rank(x)/(length(x) + 1), mu=0, sd=1)
}
exponentialScore <- function(x){
    score <- c()
    N <- length(x)
    score[1] <- 1/N
    for(i in 2:length(x)){
        score[i] <- score[i - 1] + 1/(N - i + 1)
    }
    score
}
SavageScore <- function(x){
    score <- c()
    N <- length(x) 
    score[1] <- 1/N - 1
    for(i in 2:length(x)){
        score[i] <- score[i - 1] + 1/(N - i + 1)
    }
    score
}

2.8 Tests for Equality of Scale Parameters and an Omnibus Test

Ref: 2-79 & 2-89

#Table 2.8.1
#Siegel-Tukey
trt1 <- c(16.55, 15.36, 15.94, 16.43, 16.01)
trt2 <- c(16.05, 15.98, 16.10, 15.88, 15.91)
#A package that can perform Siege-Tukey Test.

library(DescTools)

test <- SiegelTukeyTest(trt1, trt2, alternative="greater")
test

## 
##  Siegel-Tukey-test for equal variability
## 
## data:  trt1 and trt2
## ST = 24, p-value = 0.2738
## alternative hypothesis: true ratio of scales is greater than 1

Ref: 2-80 & 2-90

#Ansari-Bradley Test
test <- ansari.test(trt1, trt2, alternative="greater")
test

## 
##  Ansari-Bradley test
## 
## data:  trt1 and trt2
## AB = 13, p-value = 0.2698
## alternative hypothesis: true ratio of scales is greater than 1

Ref: 2-81~2-83 & 2-86~2-87

#Table 2.8.1
#Test on Deviances (RMD)
#Exact test for RMD
trt1 <- c(16.55, 15.36, 15.94, 16.43, 16.01)
trt2 <- c(16.05, 15.98, 16.10, 15.88, 15.91)
dev1 <- trt1 - median(trt1)
dev2 <- trt2 - median(trt2)
all <- c(dev1, dev2)
index <- seq(along=all)
indexIntrt1 <- combn(index, 5)
RMD <- rep(NA, dim(indexIntrt1)[2])
for(i in 1:dim(indexIntrt1)[2]){
    RMD[i] <- mean(abs(all[indexIntrt1[, i]]))/mean(abs(all[-indexIntrt1[, i]]))
}
(p_value <- sum(RMD >= RMD[1])/length(RMD))

## [1] 0.06349206

hist(RMD, main="Distribution of RMD", xlab="RMD", ylab="Frequency")

Ref: 2-84~2-85 & 2-91~2-93

#Table 2.8.2
#Kolmogorov-Smirnov Test
trt1 <- c(16.55, 15.36, 15.94, 16.43, 16.01)
trt2 <- c(16.05, 15.98, 16.10, 15.88, 15.91)
ks <- ks.test(trt1, trt2, alternative="two.sided")
ks

## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  trt1 and trt2
## D = 0.4, p-value = 0.873
## alternative hypothesis: two-sided

Nonparametric Statisitc Chapter2 R code

Professor: Chiang, Jeng-Tung Student: Wang, Chih-Hui

Friday, December 19, 2014