Ref: 4-1~4-4
#Table 4.1.1
Hour <- c(1530, 2130, 2940, 1960, 2270)
Survey <- c(1290, 2250, 2430, 1900, 2120)
#Paired t test
t.test(Hour, Survey, paired=TRUE, alternative="greater")
##
## Paired t-test
##
## data: Hour and Survey
## t = 1.6126, df = 4, p-value = 0.09107
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -54.1012 Inf
## sample estimates:
## mean of the differences
## 168
Ref: 4-4
#Paired-Comparison Permutation Test
library(exactRankTests)
#Note that this test use sum as statistic, not mean.
perm.test(Hour, Survey, paired=TRUE, alternative="greater")
##
## 1-sample Permutation Test
##
## data: Hour and Survey
## T = 960, p-value = 0.09375
## alternative hypothesis: true mu is greater than 0
Ref: 4-9
#Table 4.1.3
drug1 <- c(74, 55, 61, 41, 53, 74, 52, 31, 50, 58, 54, 53, 69, 60, 61, 54, 57)
drug2 <- c(63, 58, 49, 47, 50, 69, 67, 40, 44, 38, 56, 38, 47, 41, 46, 47, 44)
#Paird t test
t.test(drug1, drug2, paired=TRUE, alternative="two.sided")
##
## Paired t-test
##
## data: drug1 and drug2
## t = 2.5475, df = 16, p-value = 0.02151
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.115612 12.178505
## sample estimates:
## mean of the differences
## 6.647059
Ref: 4-9
#Paired-Comparison Permutation Test
#Exact p-value p
perm.test(drug1, drug2, paired=TRUE, alternative="two.sided")
##
## 1-sample Permutation Test
##
## data: drug1 and drug2
## T = 148, p-value = 0.02502
## alternative hypothesis: true mu is not equal to 0
Ref: 4-13
#Asymptotic p-value pA
d <- drug1 - drug2
Sobs <- sum(d[which(d > 0)])
absDi <- sum(abs(d))
(pA <- 2*pnorm(abs((Sobs - absDi/2)/(sqrt(sum(d^2)/4))), lower.tail=FALSE))
## [1] 0.0267714
Ref: 4-18
#Table 4.1.1
d <- Hour - Survey
wilcox.test(d, alternative="greater")
##
## Wilcoxon signed rank test
##
## data: d
## V = 13, p-value = 0.09375
## alternative hypothesis: true location is greater than 0
Ref: 4-19~4-20
#Asymptotic p-value pA
signedRankA <- function(d, correct=TRUE){
if(correct){
n <- length(d)
SR <- sum(rank(abs(d))*(d > 0))
ESR <- n*(n + 1)/4
varSR <- n*(n + 1)*(2*n + 1)/24
cat("This is for alternative=greater", "\n")
(pA <- pnorm((SR - 0.5 - ESR)/sqrt(varSR), lower.tail=F))
}else{
n <- length(d)
SR <- sum(rank(abs(d))*(d > 0))
ESR <- n*(n + 1)/4
varSR <- n*(n + 1)*(2*n + 1)/24
cat("This is for alternative=greater", "\n")
(pA <- pnorm((SR - ESR)/sqrt(varSR), lower.tail=F))
}
}
#Without correction
signedRankA(d, correct=FALSE)
## This is for alternative=greater
## [1] 0.06900537
#With correction
signedRankA(d)
## This is for alternative=greater
## [1] 0.08876493
Ref: 4-21
#Table 4.2.3
d <- c(-5, -3, -3, 0, 0, 2, 4, 4, 4, 5)
#Adjusted ties for zero
Zerotiead<- function(d){
#Use abs to get absolute value
#Then multiple the sign
rank(abs(d))*sign(d)
}
Zerotiead(d)
## [1] -9.5 -4.5 -4.5 0.0 0.0 3.0 7.0 7.0 7.0 9.5
#Remove the zero
Zerotierm <- function(d){
rank(abs(d)[-which(abs(d) == 0)])
}
Zerotierm(d)
## [1] 7.5 2.5 2.5 1.0 5.0 5.0 5.0 7.5
Ref: 4-24~4-25
#Table 4.2.5
d <- drug1 - drug2
#Because of tie problem, we use wilcox.exact like before in chapter3.
wilcox.exact(d, alternative="greater")
##
## Exact Wilcoxon signed rank test
##
## data: d
## V = 123, p-value = 0.01282
## alternative hypothesis: true mu is greater than 0
Ref: 4-22&4-25
#Asymptotic p-value pA without correction
adsignedRankA <- function(d, correct=TRUE){
if(correct){
n <- length(d)
SR <- sum(rank(abs(d))*(d > 0))
ESR <- n*(n + 1)/4
#Before compute variance we have to know how many groups
#Count each number and sort them from big to small
t <- sort(table(rank(abs(d))), decreasing=T)
#Take out ties and also number
g <- t[t != 1]
varSR <- n*(n + 1)*(2*n + 1)/24 - sum(g^3 -g)/48
cat("This is for alternative = greater", "\n")
(pA <- pnorm((SR - 0.5 - ESR)/sqrt(varSR), lower.tail=F))
}else{
n <- length(d)
SR <- sum(rank(abs(d))*(d > 0))
ESR <- n*(n + 1)/4
#As previous said
t <- sort(table(rank(abs(d))), decreasing=T)
g <- t[t != 1]
varSR <- n*(n + 1)*(2*n + 1)/24 - sum(g^3 -g)/48
cat("This is for alternative = greater", "\n")
(pA <- pnorm((SR - ESR)/sqrt(varSR), lower.tail=F))
}
}
#Asymptotic p-value pA without correction
adsignedRankA(d, correct=FALSE)
## This is for alternative = greater
## [1] 0.01379476
#Asymptotic p-value pA with correction
adsignedRankA(d)
## This is for alternative = greater
## [1] 0.01465154
Ref: 4-30
#Table 4.1.3
data.frame(numofdifference=length(d), numofsuccess=sum(d > 0))
## numofdifference numofsuccess
## 1 17 12
binom.test(12, 17, alternative="greater")
##
## Exact binomial test
##
## data: 12 and 17
## number of successes = 12, number of trials = 17, p-value = 0.07173
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.4780823 1.0000000
## sample estimates:
## probability of success
## 0.7058824
Ref: 4-31
#Asymptotic p-value pA
signA <- function(d, correct=TRUE){
if(correct){
n <- length(d)
SN <- sum(d > 0)
ESN <- 0.5*n
varSN <- 0.25*n
cat("This is for alternative = greater", "\n")
(pA <- pnorm((SN - 0.5 - ESN)/sqrt(varSN), lower.tail=F))
}else{
n <- length(d)
SN <- sum(d > 0)
ESN <- 0.5*n
varSN <- 0.25*n
cat("This is for alternative = greater", "\n")
(pA <- pnorm((SN - ESN)/sqrt(varSN), lower.tail=F))
}
}
#Without correction
signA(d, correct=FALSE)
## This is for alternative = greater
## [1] 0.04477754
#With correction
signA(d)
## This is for alternative = greater
## [1] 0.07280505
Ref: 4-44
#table 4.4.3
trt1 <- c(120, 208, 199, 194, 177, 195)
trt2 <- c(207, 188, 181, 164, 155, 175)
trt3 <- c(122, 137, 177, 177, 160, 138)
trt4 <- c(128, 128, 160, 142, 157, 179)
m <- rbind(trt1, trt2, trt3, trt4)
#Use this package to find out permutation
library(combinat)
Here we also encounter the same problem, memory size. Hence, we won’t run the code. Just show it.
#Permutation Test for Randomized Complete Block Design(RCBD)
RCBDper <- function(m){
nt <- dim(m)[1]
nb <- dim(m)[2]
#Save each permutation of block in blki
blk1 <- permn(m[, 1]);blk2 <- permn(m[, 2])
blk3 <- permn(m[, 3]);blk4 <- permn(m[, 4])
blk5 <- permn(m[, 5]);blk6 <- permn(m[, 6])
#Create an array to put each simulation matrix
w=array(0,dim=c(nt, nb, factorial(nt)^nb))
for(i in 1: factorial(nt)){
for(j in 1: factorial(nt)){
for(k in 1: factorial(nt)){
for(l in 1: factorial(nt)){
for(m in 1: factorial(nt)){
for(n in 1: factorial(nt)){
#Because of the width limit, the indent is not so clear.
w[, , ((i - 1)*(factorial(nt)^(nb - 1)) +
(j - 1)*(factorial(nt)^(nb - 2)) +
(k - 1)*(factorial(nt)^(nb - 3)) +
(l - 1)*(factorial(nt)^(nb - 4)) +
(m - 1)*(factorial(nt)*(nb - 5)) + (n - 1) + 1)] <-
as.array(cbind(blk1[[i]], blk2[[j]], blk3[[k]],
blk4[[l]], blk5[[m]], blk6[[n]]))
}
}
}
}
}
}
#Now we have the array w which its
#third dimension represent each simulation
#Here I compute the F statistics
#However, as you know, you can use SST, SSX
SST <- rep(0, factorial(nt)^nb)
for(i in 1:factorial(nt)^nb){
rowmean <- apply(w[, , i], 1, mean)
SST[i] <- nb*sum(rowmean - rep(mean(rowmean), nt))^2
}
SSBL <- rep(0, factorial(nt)^nb)
for(i in 1:factorial(nt)^nb){
colmean <- apply(w[, , i], 2, mean)
SSBL[i] <- nb*sum(colmean - rep(mean(colmean), nb))^2
}
#Each SSTO will be the same
SSTO <- rep(var(as.numeric(w[, , 1]))*(nt*nb - 1), factorial(nt)^nb)
SSE <- SSTO - SST - SSBL
#Compute F statistic
F <- (SST/(nt - 1))/(SSE/((nt - 1)*(nb - 1)))
(p <- mean(F >= F[1]))
}
RCBDper(m)
#Approximate pR
RCBDperR <- function(m, pernum=5000){
nt <- dim(m)[1]
nb <- dim(m)[2]
#Save each permutation of block in blki
blk1 <- permn(m[, 1]);blk2 <- permn(m[, 2])
blk3 <- permn(m[, 3]);blk4 <- permn(m[, 4])
blk5 <- permn(m[, 5]);blk6 <- permn(m[, 6])
#Create an array to put each simulation matrix
w=array(0, dim=c(nt, nb, pernum))
w[, , 1] <- m
for(i in 2:pernum){
#Store simulate number
simN <- c()
for(j in 1:6){
simN[j] <- sample(1:factorial(nt), 1)
}
w[, , i] <- as.array(cbind(blk1[[simN[1]]], blk2[[simN[2]]],
blk3[[simN[3]]], blk4[[simN[4]]],
blk5[[simN[5]]], blk6[[simN[6]]]))
}
#Now we have the array w which its
#third dimension represent each simulation
#Here I compute the F statistics
#However, as you know, you can use SST, SSX
SST <- rep(0, pernum)
for(i in 1:pernum){
rowmean <- apply(w[, , i], 1, mean)
SST[i] <- nb*(sum((rowmean - rep(mean(rowmean), nt))^2))
}
colmean <- apply(w[, , 1], 2, mean)
SSBL <- rep(nt*(sum((colmean - rep(mean(colmean), nb))^2)), pernum)
#Each SSTO will be the same
SSTO <- rep(var(as.numeric(w[, , 1]))*(nt*nb - 1), pernum)
SSE <- SSTO - SST - SSBL
#Compute F statistic
F <- (SST/(nt - 1))/(SSE/((nt - 1)*(nb - 1)))
(pR <- mean(F >= F[1]))
}
RCBDperR(m)
## [1] 0.0596
#Simulate 10000 times
RCBDperR(m, pernum=10000)
## [1] 0.0568
** Ref: 4-44**
#RCBD
trt1 <- c(120, 208, 199, 194, 177, 195)
trt2 <- c(207, 188, 181, 164, 155, 175)
trt3 <- c(122, 137, 177, 177, 160, 138)
trt4 <- c(128, 128, 160, 142, 157, 179)
all <- c(trt1, trt2, trt3, trt4)
g <- rep(c(1, 2, 3, 4), each=6)
b <- rep(c(1:6), 4)
mydata <- data.frame(Y=all, group=as.factor(g), block=as.factor(b))
summary(aov(Y ~ group + block, data=mydata))
## Df Sum Sq Mean Sq F value Pr(>F)
## group 3 5408 1802.8 3.121 0.0575 .
## block 5 2817 563.4 0.975 0.4640
## Residuals 15 8664 577.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Ref: 4-47~4-48
#Table 4.4.3
trt1 <- c(120, 208, 199, 194, 177, 195)
trt2 <- c(207, 188, 181, 164, 155, 175)
trt3 <- c(122, 137, 177, 177, 160, 138)
trt4 <- c(128, 128, 160, 142, 157, 179)
#Rank
m <- rbind(trt1, trt2, trt3, trt4)
(r <- apply(m, 2, rank))
## [,1] [,2] [,3] [,4] [,5] [,6]
## trt1 1 4 4 4 4 4
## trt2 4 3 3 2 1 2
## trt3 2 2 2 3 3 1
## trt4 3 1 1 1 2 3
Here we won’t compute exact p-value for the same reason, memory size. We compute pR here using the previous RCBDperR, but we put the rank matrix in it
RCBDperR(r)
## [1] 0.1148
#Simulate 10000 times
RCBDperR(r, pernum=10000)
## [1] 0.1177
#Asypmtotic p-value pA
all <- c(trt1, trt2, trt3, trt4)
g <- rep(c(1, 2, 3, 4), each=6)
b <- rep(c(1:6), 4)
mydata <- data.frame(Y=all, group=as.factor(g), block=as.factor(b))
friedman.test(Y ~ g|b, data=mydata)
##
## Friedman rank sum test
##
## data: Y and g and b
## Friedman chi-squared = 5.6, df = 3, p-value = 0.1328
#Table 4.5.3
trt1 <- c(100, 80, 50)
trt2 <- c(100, 80, 60)
trt3 <- c(150, 80, 80)
trt4 <- c(150, 90, 90)
m <- rbind(trt1, trt2, trt3, trt4)
#Friedman Test adjusted for Ties
FriedmanTieper <- function(m){
nt <- dim(m)[1]
nb <- dim(m)[2]
#Save each permutation of block in blki
blk1 <- permn(rank(m[, 1]))
blk2 <- permn(rank(m[, 2]))
blk3 <- permn(rank(m[, 3]))
#Create an array to put each simulation matrix
w=array(0,dim=c(nt, nb, factorial(nt)^nb))
for(i in 1: factorial(nt)){
for(j in 1: factorial(nt)){
for(k in 1: factorial(nt)){
w[, , (i - 1)*(factorial(nt)^(nb - 1)) +
(j - 1)*(factorial(nt)^(nb - 2)) +
(k - 1) + 1] <-
as.array(cbind(blk1[[i]], blk2[[j]], blk3[[k]]))
}
}
}
#Now we have the array w which its
#third dimension represent each simulation
#Here I use sum(Ribar^2)
Ribarsq <- c()
for(i in 1:factorial(nt)^nb){
Ribarsq[i] <- sum(apply(w[, , i], 1, mean)^2)
}
(p <- mean(Ribarsq >= Ribarsq[1]))
}
FriedmanTieper(m)
## [1] 0.04166667
#Rank
m <- rbind(trt1, trt2, trt3, trt4)
(r <- apply(m, 2, rank))
## [,1] [,2] [,3]
## trt1 1.5 2 1
## trt2 1.5 2 2
## trt3 3.5 2 3
## trt4 3.5 4 4
Since the previous function friedman.test will not address the ties problem. Here we use similar method to create the function FriedmanTieper to help us compute the asymptotic p-value pA.
#Asypmtotic p-value pA
k <- dim(m)[1]
b <- dim(m)[2]
FM <- ((b^2)/sum(apply(r, 2, var)))*(sum((apply(r, 1, mean) - (k + 1)/2)^2))
(pA <- pchisq(FM, k - 1, lower.tail=F))
## [1] 0.05755845
#Table 4.4.3
trt1 <- c(120, 208, 199, 194, 177, 195)
trt2 <- c(207, 188, 181, 164, 155, 175)
trt3 <- c(122, 137, 177, 177, 160, 138)
trt4 <- c(128, 128, 160, 142, 157, 179)
m <- rbind(trt1, trt2, trt3, trt4)
Here we won’t compute exact p-value for the same reason, memory size.
myPagetestR <- function(m, pernum=5000){
nt <- dim(m)[1]
nb <- dim(m)[2]
r <- apply(m, 2, rank)
#Save each permutation of block in blki
blk1 <- permn(r[, 1]);blk2 <- permn(r[, 2])
blk3 <- permn(r[, 3]);blk4 <- permn(r[, 4])
blk5 <- permn(r[, 5]);blk6 <- permn(r[, 6])
#Create an array to put each simulation matrix
w=array(0, dim=c(nt, nb, pernum))
w[, , 1] <- r
for(i in 2:pernum){
#Store simulate number
simN <- c()
for(j in 1:6){
simN[j] <- sample(1:factorial(nt), 1)
}
w[, , i] <- as.array(cbind(blk1[[simN[1]]], blk2[[simN[2]]],
blk3[[simN[3]]], blk4[[simN[4]]],
blk5[[simN[5]]], blk6[[simN[6]]]))
}
#Compute sum(i*Ri)
sumRi <- rep(0, pernum)
for(i in 1:pernum){
Rankrowmean <- apply(w[, , i], 1, sum)
sumRi[i] <- sum((1:nt)*Rankrowmean)
}
(pR <- mean(sumRi <= sumRi[1]))
}
myPagetestR(m)
## [1] 0.0138
#Asypmtotic p-value pA
PagetestA <- function(m){
k <- dim(m)[1]
b <- dim(m)[2]
r <- apply(m, 2, rank)
PGobs <- sum((1:k)*apply(r, 1, sum))
EPG <- (b*k*(k + 1)^2)/4
varPG <- (b*(k - 1)*(k^2)*(k + 1)^2)/144
(pA <- pnorm((PGobs - EPG)/sqrt(varPG), lower.tail=T))
}
PagetestA(m)
## [1] 0.01182581