Ref: 5-6
#Table 5.1.1
X <- 1:4
Y <- c(5, 7, 9, 8)
#Use this package to find out all permutation
library(combinat)
#All permutation of Y
Yper <- permn(Y)
Ref: 5-7
slope <- rep(0, length(Yper))
correlation <- rep(0, length(Yper))
for(i in 1:length(Yper)){
m <- lm(Yper[[i]] ~ X)
slope[i] <- summary(m)$coefficients[2, 1]
correlation[i] <- cor(X, Yper[[i]])
}
(p <- mean(slope >= slope[1]))
## [1] 0.125
(p <- mean(correlation >= correlation[1]))
## [1] 0.125
Ref: 5-7
#Table 5.1.2
Heter <- c(2276, 3724, 2723, 4020, 4011, 2035,
1540, 1300, 2240, 2467, 3700, 1501,
2907, 2898, 2783, 2870, 3263, 2780)
Lym <- c(2830, 5488, 2904, 5528, 4966, 3135,
2079, 1755, 3080, 2363, 5087, 2821,
5130, 4830, 4690, 3570, 3480, 3823)
Here we let user to set the pernum because the permutation memory size is too big(like the example in above, 18!).
Ref: 5-8
#Build the Slope permutation test
slope_pertestR <- function(Y, X, pernum=10000){
slope <- rep(0, pernum)
m1 <- lm(Y ~ X)
#Take out slope
slope[1] <- m1[[1]][2]
for(i in 1:pernum){
m <- lm(sample(Y) ~ X)
slope[i] <- m[[1]][2]
}
(p <- mean(slope >= slope[1]))
}
slope_pertestR(Lym, Heter)
## [1] 0.0708
Ref: 5-8
#Linear model
m <- lm(Heter ~ Lym)
summary(m)
##
## Call:
## lm(formula = Heter ~ Lym)
##
## Residuals:
## Min 1Q Median 3Q Max
## -684.6 -316.4 6.3 265.5 696.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 555.4213 341.7571 1.625 0.124
## Lym 0.5779 0.0868 6.657 5.51e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 438 on 16 degrees of freedom
## Multiple R-squared: 0.7347, Adjusted R-squared: 0.7182
## F-statistic: 44.32 on 1 and 16 DF, p-value: 5.507e-06
Ref: 5-8
#Asymptotic p-value pA
pnorm(sqrt(length(Heter) - 1)*cor(Heter, Lym), lower.tail=FALSE)
## [1] 0.000204486
Ref: 5-15
#Spearman Rank Correlation
cor(Heter, Lym, method="spearman")
## [1] 0.8968008
Ref: 5-15
#Asymptotic p-value pA(two tail)
2*pnorm(sqrt(length(Heter) - 1)*cor(Heter, Lym, method="spearman"), lower.tail=FALSE)
## [1] 0.0002176436
Ref: 5-15
#Approximate p-value pR(two tail)
Spcor_pertestR <- function(Y, X, pernum=10000,
alternative=c("lower", "greater", "two.side")){
correlation <- rep(0, pernum)
correlation[1] <- cor(Y, X, method="spearman")
#Take out slope
for(i in 2:pernum){
correlation[i] <- cor(sample(Y), X, method="spearman")
}
n1 <- sum(correlation >= correlation[1])
n2 <- sum(correlation <= correlation[1])
if(alternative=="two.side"){
2*(pR <- min(n1, n2)/pernum)
}else if(alternative=="greater"){
(pR <- n1/pernum)
}else{
(pR <- n2/pernum)
}
}
Spcor_pertestR(Lym, Heter, alternative="two.side")
## [1] 2e-04
#Example 5.2.2
X <- 1:4;Y <- c(1, 2, 2, 4)
rbind(rank(X), rank(Y))
## [,1] [,2] [,3] [,4]
## [1,] 1 2.0 3.0 4
## [2,] 1 2.5 2.5 4
#Spearman Rank Correlation
cor(X, Y, method="spearman")
## [1] 0.9486833
Ref: 5-17
#Table 5.2.2 & Example 5.2.3
A <- c(8, 8, 7, 8, 5, 6, 6, 9, 8, 7)
B <- c(7, 8, 8, 5, 6, 4, 5, 8, 6, 9)
rbind(rank(A), rank(B))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 7.5 7.5 4.5 7.5 1.0 2.5 2.5 10 7.5 4.5
## [2,] 6.0 8.0 8.0 2.5 4.5 1.0 2.5 8 4.5 10.0
Ref: 5-18
##Approximate p-value pR(two tail)
Spcor_pertestR(A, B, alternative="two.side")
## [1] 0.2966
#Asymptotic p-value pA(two tail)
2*pnorm(sqrt(length(A) - 1)*cor(A, B, method="spearman"), lower.tail=FALSE)
## [1] 0.2605009
Ref: 5-21
#Table 5.3.1
Height <- c(68, 70, 71, 72)
Weight <- c(1.53, 1.55, 1.40, 1.80)
cor(Height, Weight, method="kendall")
## [1] 0.3333333
#U and Vi function
U <- function(x, y, i, j){
ifelse((x[i] - x[j])*(y[i] - y[j]) > 0, 1, 0)
}
Vi <- function(x, y){
n <- length(y) - 1
V <- rep(0, n)
for(i in 1:n){
V[i] <- sum(U(x, y, i, (i + 1):(n + 1)))
}
V
}
Ref: 5-21
Vi(Weight, Height)
## [1] 2 1 1
Ref: 5-24
#Exact p-value p
kencor_pertest <- function(Y, X, alternative="lower"){
Yper <- permn(Y)
pernum <- length(Yper)
correlation <- rep(0, pernum)
#Take out slope
for(i in 1:pernum){
correlation[i] <- cor(Yper[[i]], X, method="kendall")
}
n1 <- sum(correlation >= correlation[1])
n2 <- sum(correlation <= correlation[1])
if(alternative=="two.side"){
2*(pR <- min(n1, n2)/pernum)
}else if(alternative=="greater"){
(pR <- n1/pernum)
}else{
(pR <- n2/pernum)
}
}
Ref: 5-24
#Tabl 5.3.2
kencor_pertest(c(2, 3, 1, 4), 1:4, alternative="greater")
## [1] 0.375
Ref: 5-25&5-26
#Compute Vi for Example 5.3.1
#Make the same table as lecture note
m <- cbind(Heter, Lym)
order(Heter)
## [1] 8 12 7 6 9 1 10 3 18 15 16 14 13 17 11 2 5 4
#Reorder data
m1 <- m[order(Heter), ]
#Combine all
data.frame(Rabbits=order(Heter), Heter_X=m1[, 1], Lym_Y=m1[,2],
R_X=rank(m1[, 1]), R_Y=rank(m1[, 2]), Vi=c(Vi(m1[, 1], m1[, 2]), NA))
## Rabbits Heter_X Lym_Y R_X R_Y Vi
## 1 8 1300 1755 1 1 17
## 2 12 1501 2821 2 4 14
## 3 7 1540 2079 3 2 15
## 4 6 2035 3135 4 8 10
## 5 9 2240 3080 5 7 10
## 6 1 2276 2830 6 5 11
## 7 10 2467 2363 7 3 11
## 8 3 2723 2904 8 6 10
## 9 18 2780 3823 9 11 7
## 10 15 2783 4690 10 12 6
## 11 16 2870 3570 11 10 6
## 12 14 2898 4830 12 13 5
## 13 13 2907 5130 13 16 2
## 14 17 3263 3480 14 9 4
## 15 11 3700 5087 15 15 2
## 16 2 3724 5488 16 17 1
## 17 5 4011 4966 17 14 1
## 18 4 4020 5528 18 18 NA
Ref: 5-26
#Example 5.3.1
#Asymptotic p-value pA
n <- length(Heter)
(taucor <- cor(Lym, Heter, method="kendall"))
## [1] 0.7254902
(vartau <- (4*n + 10)/(9*(n^2 - n)))
## [1] 0.02977487
(pA <- pnorm(taucor/sqrt(vartau), lower.tail=FALSE))
## [1] 1.308735e-05
#Table 5.3.3
judA <- c(5, 6, 6, 7, 7, 8, 8, 8, 8, 9)
judB <- c(6, 4, 5, 8, 9, 5, 6, 7, 8, 8)
#We got a wrong answer => Tie problem
cor(judA, judB, method="kendall")
## [1] 0.2599376
#We use the formula to compute tau but first we have to compute Vi
#Kendall's Tau for Tie
KenTauTie <- function(X, Y){
#U and Vi function for tie
#Here we use a small trick to achieve our goal
#We do not use separate if else
#because here we have to do vector computation.
U <- function(x, y, i, j){
U1 <- ifelse((x[i] - x[j])*(y[i] - y[j]) == 0, 0.5, 0)
U2 <- ifelse((x[i] - x[j])*(y[i] - y[j]) > 0, 1, 0)
U1 + U2
}
Vi <- function(x, y){
n <- length(y) - 1
V <- rep(0, n)
for(i in 1:n){
V[i] <- sum(U(x, y, i, (i + 1):(n + 1)))
}
V
}
(tau <- 2*(sum(Vi(X, Y)))/choose(length(X), 2) - 1)
}
(tau <- KenTauTie(judA, judB))
## [1] 0.2222222
#Compute Variance of tau
Vartau <-function(x, y){
#First take out ties and number
tab <- sort(table(rank(x)), decreasing=T)
s <- tab[tab != 1]
tab <- sort(table(rank(y)), decreasing=T)
t <- tab[tab != 1]
#Compute A, B, C
n <- length(x)
A <- (sum(s*(s - 1)*(2*s + 5)) + sum(t*(t - 1)*(2*t + 5)))/18
B <- (sum(s*(s - 1)*(s -2))*sum(t*(t - 1)*(t - 2)))/(9*n*(n - 1)*(n -2))
C <- (sum(s*(s - 1))*sum(t*(t - 1)))/(2*n*(n - 1))
tau <- (4*n + 10)/(9*(n^2 - n))
(Tietau <- tau - (4/((n^2)*((n-1)^2)))*(A - B - C))
}
(vartau <- Vartau(judA, judB))
## [1] 0.05411248
#Asymptotic p-value pA
pnorm(tau/sqrt(vartau),lower.tail=FALSE)
## [1] 0.1697136
#Table 5.4.2
Physician <- c(2, 2, 0)
SelfA <- c(0, 1, 2)
#Asymptotic p-value
#It gives us warning message
#because the number in cell is too small(eij < 5)
mydata <- rbind(Physician, SelfA)
chisq.test(mydata)
## Warning in chisq.test(mydata): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mydata
## X-squared = 4.2778, df = 2, p-value = 0.1178