Ref: 2-1
#Table 2.1.1
New <- c(37, 49, 55, 57)
Tradition <- c(23, 31, 46)
#Note that the t-test is equal variance.
t <- t.test(New, Tradition, alternative="greater", var.equal=TRUE, conf.level=0.95)
t
##
## Two Sample t-test
##
## data: New and Tradition
## t = 2.0843, df = 5, p-value = 0.04578
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.5372273 Inf
## sample estimates:
## mean of x mean of y
## 49.50000 33.33333
Ref: 2-7~2-10
#A package that have function to help us find the all possible permutation outcomes
library(combinat)
#Combine all and find out the index for new method
all <- c(New, Tradition)
index <- seq(along=all)
indexInNew <- combn(index, 4)
#Use mean difference
meanD <- rep(NA, dim(indexInNew)[2])
for(i in 1:dim(indexInNew)[2]){
meanD[i] <- mean(all[indexInNew[, i]]) - mean(all[-indexInNew[, i]])
}
# !!! 4.2 should be 4.3 There is a problem(in our world, not in computer world)
#that round(0.45, 1)=0.4 and round(0.35, 1)=0.4.
#When the digit preceding the round digit is even,
#the round rule would change(only the cutpoint 0.5).
#You can use ?round to see more details.
sort(round(meanD, 1))
## [1] -19.4 -17.7 -14.2 -13.0 -12.4 -8.9 -8.9 -7.8 -7.2 -6.0 -5.4
## [12] -4.2 -4.2 -3.7 -2.5 -2.5 -0.8 0.4 1.0 1.0 1.6 2.2
## [23] 2.8 5.7 6.2 6.2 7.4 8.0 9.8 10.9 10.9 12.7 14.4
## [34] 16.2 21.4
#Exact p-value
(p_value <- sum(meanD >= meanD[1])/length(meanD))
## [1] 0.05714286
Ref: 2-11~2-12
#Use New method sum
T1 <- rep(NA, dim(indexInNew)[2])
for(i in 1:dim(indexInNew)[2]){
T1[i] <- sum(all[indexInNew[, i]])
}
sort(T1, decreasing=T)
## [1] 207 198 195 192 189 189 187 184 183 181 181 180 175 174 173 172 172
## [18] 171 169 166 166 164 163 163 161 160 158 157 155 155 149 148 146 140
## [35] 137
#Exact p-value
(p_value <- sum(T1 >= T1[1])/length(T1))
## [1] 0.05714286
Ref: 2-26~2-28
#Median difference
medianD <- rep(NA, dim(indexInNew)[2])
for(i in 1:dim(indexInNew)[2]){
medianD[i] <- median(all[indexInNew[, i]]) - median(all[-indexInNew[, i]])
}
#Exact p-value
(p_value <- sum(medianD >= medianD[1])/length(medianD))
## [1] 0.05714286
Ref: 2-29~2-35
#table 2.3.1
#Random sampling
trt1 <- c(59.1, 60.3, 58.1, 61.3, 65.1, 55.0, 63.4, 67.8)
trt2 <- c(60.1, 62.1, 59.3, 55.0, 54.6, 64.4, 58.7, 62.5)
# #of permutation
choose(16, 8)
## [1] 12870
all <- c(trt1, trt2)
index <- seq(along=all)
indexInNew <- combn(index, 8)
#Exact test (Mean Difference)
all <- c(trt1, trt2)
index <- seq(along=all)
indexInNew <- combn(index, 8)
meanD <- rep(NA, dim(indexInNew)[2])
for(i in 1:dim(indexInNew)[2]){
meanD[i] <- mean(all[indexInNew[, i]]) - mean(all[-indexInNew[, i]])
}
#Exact p-value
(p_value <- sum(meanD >= meanD[1])/length(meanD))
## [1] 0.1966589
#Random Sampling (R=1000)
#Set seed here to make the result reproducible
#I set it 316 just because my birthday is March, 16
set.seed(316)
meanCritical <- mean(trt1) - mean(trt2)
R <- 1000
meanD <- rep(NA, R)
for(i in 1:1000){
indexInNew <- sample(index, 8, replace=FALSE)
meanD[i] <- mean(all[indexInNew]) - mean(all[-indexInNew])
}
#Add the original mean difference in meanD
meanD <- c(meanCritical, meanD)
#Approximate p-value
(p_value <- sum(meanD >= meanD[1])/length(meanD))
## [1] 0.1938062
Ref: 2-39
#Wilcoxon rank sum test
#Note that the alternative is specified by first data. In the following example,
#we all use W1 to compute the statistics and compute the p-value. See 2-40 for reference.
w <- wilcox.test(New, Tradition, alternative="greater")
#Note that the statistic is substracted by n1*(n1 + 1)/2
w
##
## Wilcoxon rank sum test
##
## data: New and Tradition
## W = 11, p-value = 0.05714
## alternative hypothesis: true location shift is greater than 0
Ref: 2-42~2-44
#Table 2.4.3 and here we compute W2 statistics.
untreated <- c(0.55, 0.63, 0.67, 0.68, 0.79, 0.81, 0.85)
treated <- c(0.44, 0.47, 0.51, 0.52, 0.58, 0.59, 0.60, 0.65, 0.66)
w <- wilcox.test(untreated, treated, alternative="greater")
w
##
## Wilcoxon rank sum test
##
## data: untreated and treated
## W = 56, p-value = 0.003934
## alternative hypothesis: true location shift is greater than 0
Ref: 2-46
#The package contains lot of method in Nonparametric.
#We use it to deal with the tie problem in Wilcoxon Rank-Sum test for Ties.
library(exactRankTests)
#Table 2.5.2
trt1 <- c(11.7, 12.0)
trt2 <- c(11.8, 12.0, 12.5)
#If you use wilcox.test in stat package,
#you will encounter the problem that
#the original wilcox.test cannot compute the exact p-value when there are ties in data)
w <- wilcox.exact(trt1, trt2, alternative="less")
w
##
## Exact Wilcoxon rank sum test
##
## data: trt1 and trt2
## W = 1.5, p-value = 0.3
## alternative hypothesis: true mu is less than 0
Ref: 2-47~2-49
#Table 2.5.2
trt1 <- c(55.0, 58.1, 59.1, 60.3, 61.3, 63.4, 65.1, 67.8)
trt2 <- c(54.6, 55.0, 58.7, 59.3, 60.1, 62.1, 62.5, 64.4)
#It will occur a problem, but the statistics is still correct.
w <- wilcox.exact(trt1, trt2, alternative="greater")
w
##
## Exact Wilcoxon rank sum test
##
## data: trt1 and trt2
## W = 39.5, p-value = 0.2296
## alternative hypothesis: true mu is greater than 0
Ref: 2-53
#Table 2.6.1
Granite <- c(33.63, 39.86, 69.32, 42.13, 58.36, 74.11)
Basalt <- c(26.15, 18.56, 17.55, 9.84, 28.29, 34.15)
#Function to compute pair difference
Pairdiff <- function(x, y){
n1 <- length(x);n2 <- length(y)
paird <- c()
for(i in 1:n1){
paird <- c(paird, rep(x[i], n2) - y)
}
paird
}
d <- Pairdiff(Granite, Basalt)
sort(d)
## [1] -0.52 5.34 5.71 7.48 7.98 11.57 13.71 13.84 15.07 15.98 16.08
## [12] 21.30 22.31 23.57 23.79 24.21 24.58 30.02 30.07 32.21 32.29 35.17
## [23] 39.80 39.96 40.81 41.03 43.17 45.82 47.96 48.52 50.76 51.77 55.55
## [34] 56.56 59.48 64.27
#Hodes-Lehmann estimator of delta
median(d)
## [1] 30.045
#CI for delta
deltaCI <- function(x, y, q=0.05){
#Above function
Pairdiff <- function(x, y){
n1 <- length(x);n2 <- length(y)
paird <- c()
for(i in 1:n1){
paird <- c(paird, rep(x[i], n2) - y)
}
paird
}
#Save paired difference in d
d <- sort(Pairdiff(x, y))
nlower <- qwilcox(q, length(x), length(y))
nupper <- qwilcox(q, length(x), length(y), lower.tail=FALSE)
cat("Confidence Interval:", "\n")
cat(d[nlower], "<", "delta", "<=", d[nupper + 1])
}
deltaCI(Granite, Basalt)
## Confidence Interval:
## 13.84 < delta <= 47.96
normalScore <- function(x){
qnorm(rank(x)/(length(x) + 1), mu=0, sd=1)
}
exponentialScore <- function(x){
score <- c()
N <- length(x)
score[1] <- 1/N
for(i in 2:length(x)){
score[i] <- score[i - 1] + 1/(N - i + 1)
}
score
}
SavageScore <- function(x){
score <- c()
N <- length(x)
score[1] <- 1/N - 1
for(i in 2:length(x)){
score[i] <- score[i - 1] + 1/(N - i + 1)
}
score
}
Ref: 2-79 & 2-89
#Table 2.8.1
#Siegel-Tukey
trt1 <- c(16.55, 15.36, 15.94, 16.43, 16.01)
trt2 <- c(16.05, 15.98, 16.10, 15.88, 15.91)
#A package that can perform Siege-Tukey Test.
library(DescTools)
test <- SiegelTukeyTest(trt1, trt2, alternative="greater")
test
##
## Siegel-Tukey-test for equal variability
##
## data: trt1 and trt2
## ST = 24, p-value = 0.2738
## alternative hypothesis: true ratio of scales is greater than 1
Ref: 2-80 & 2-90
#Ansari-Bradley Test
test <- ansari.test(trt1, trt2, alternative="greater")
test
##
## Ansari-Bradley test
##
## data: trt1 and trt2
## AB = 13, p-value = 0.2698
## alternative hypothesis: true ratio of scales is greater than 1
Ref: 2-81~2-83 & 2-86~2-87
#Table 2.8.1
#Test on Deviances (RMD)
#Exact test for RMD
trt1 <- c(16.55, 15.36, 15.94, 16.43, 16.01)
trt2 <- c(16.05, 15.98, 16.10, 15.88, 15.91)
dev1 <- trt1 - median(trt1)
dev2 <- trt2 - median(trt2)
all <- c(dev1, dev2)
index <- seq(along=all)
indexIntrt1 <- combn(index, 5)
RMD <- rep(NA, dim(indexIntrt1)[2])
for(i in 1:dim(indexIntrt1)[2]){
RMD[i] <- mean(abs(all[indexIntrt1[, i]]))/mean(abs(all[-indexIntrt1[, i]]))
}
(p_value <- sum(RMD >= RMD[1])/length(RMD))
## [1] 0.06349206
hist(RMD, main="Distribution of RMD", xlab="RMD", ylab="Frequency")
Ref: 2-84~2-85 & 2-91~2-93
#Table 2.8.2
#Kolmogorov-Smirnov Test
trt1 <- c(16.55, 15.36, 15.94, 16.43, 16.01)
trt2 <- c(16.05, 15.98, 16.10, 15.88, 15.91)
ks <- ks.test(trt1, trt2, alternative="two.sided")
ks
##
## Two-sample Kolmogorov-Smirnov test
##
## data: trt1 and trt2
## D = 0.4, p-value = 0.873
## alternative hypothesis: two-sided