### QUESTION 4
# (Grad students only) Suppose k=4, n1=n2=n3=1 and n4=2. Obtain the form of the exact null (H0) distribution of H for the case of no tied data.
## Formulas needed for exact null distribution of H:
## R (overall mean rank) = sum of ranks/n
## Ri (individual mean rank)
## V = 2∑(Ri-R)^2
## H Statistic = 12V/n(n+1)
n=5
r <- c(1,2,3,4,5)
r_mu <- sum(r)/length(r)
h.stat <- 12/(n*(n+1))
# 10 unique sequences:
# D1, D2 ranks = (1, 2) (1,3) (1,4) (1,5) (2,3) (2,4) (2,5) (3,4) (3,5) (4,5)
# sum of D ranks = (3) (4) (5) (6) (5) (6) (7) (7) (8) (9)
# A-C ranks = (3,4,5) (2,4,5) (2,3,5) (2,3,4) (1,4,5) (1,3,5) (1,3,4) (1,2,5) (1,2,4) (1,2,3)
# Switching A-C rank placements will have no impact on the H statistsic because V will not change (mean ranks of A-C will just be the value of that rank)
dat4<-data.frame(A=c(3, 2, 2, 2, 1, 1, 1, 1, 1, 1),
B=c(4, 4, 3, 3, 4, 3, 3, 2, 2, 2),
C=c(5, 5, 5, 4, 5, 5, 4, 5, 4, 3),
D1.D2=c(3, 4, 5, 6, 5, 6, 7, 7, 8, 9))
# Mean rank for D1 and D2
dat4$D <- (dat4$D1.D2/2)
# Calculate (Ri-R)^2 for A-D
dat4$Va <- ((dat4$A - r_mu)^2)
dat4$Vb <- ((dat4$B - r_mu)^2)
dat4$Vc <- ((dat4$C - r_mu)^2)
dat4$Vd <- ((dat4$D - r_mu)^2)
# Calculate V
dat4$V.stat <- 2*(dat4$Va + dat4$Vb + dat4$Vc +dat4$Vd)
# Calculate H
dat4$h.stat <- dat4$V.stat*h.stat
h.stat.list <- dat4[c(1:10), c(11)]
h.stat.list
## [1] 5.8 5.6 4.2 1.6 7.4 6.4 4.2 7.4 5.6 5.8
# Probability function
dat4.function <- data.frame(h=c(1.6, 4.2, 5.6, 5.8, 6.4, 7.4), p.of.h=c(1/10, 2/10, 2/10, 2/10, 1/10, 2/10))
## Exact Null Distribution
## P(H=1.6) = P(H=6.4) = 1/10
## P(H=4.2) = P(H=5.6) = P(H=5.8) = P(H=7.4)=2/10
## P(H)=0 for all other x
### QUESTION 5
# (Grad students only) The Bonferroni correction is known to be conservative. This means that we are guaranteed to control family-wise error rate at less than or equal to α, but often the true family-wise error rate is much less than this. Perform a simulation study to compute the true family-wise error rate when performing multiple comparisons under the null hypothesis that the means of all groups are the same. Perform the simulation study using the following parameters:
# α = 0.2
# k=6
# nj=20 for j=1,2,3,4,5,6,
# Data in each group is generated from a normal distribution with variance 1
# Use the Wilcoxon rank-sum test for multiple comparisons. table(detected)
set.seed(1234)
dat5<-data.frame(score=c(rnorm(20,0,1),rnorm(20,0,1),rnorm(20,0,1),rnorm(20,0,1),rnorm(20,0,1),rnorm(20,0,1))
,group=factor(c(rep(1,20),rep(2,20),rep(3,20),rep(4,20),rep(5,20),rep(6,20))))
# Testing comparisons with/without adjustment
k <- 5
alpha <- 0.2
alpha/(k*(k-1)/2)
## [1] 0.02
# α'=0.02
pairwise.wilcox.test(dat5$score,dat5$group)
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: dat5$score and dat5$group
##
## 1 2 3 4 5
## 2 1.00 - - - -
## 3 1.00 1.00 - - -
## 4 0.82 0.10 0.16 - -
## 5 0.86 0.10 0.29 1.00 -
## 6 1.00 0.58 0.58 1.00 1.00
##
## P value adjustment method: holm
pairwise.wilcox.test(dat5$score,dat5$group, p.adj="bonf")
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: dat5$score and dat5$group
##
## 1 2 3 4 5
## 2 1.00 - - - -
## 3 1.00 1.00 - - -
## 4 1.00 0.10 0.18 - -
## 5 1.00 0.11 0.37 1.00 -
## 6 1.00 0.79 0.79 1.00 1.00
##
## P value adjustment method: bonferroni
# Error Rates:
test <- function(n, k, mu) {
stats <- rnorm(n, mean = 0)
stats[1:k] <- stats[1:k] + mu
rejections <- which(stats > qnorm(.8))
# family-wise error rate
fwer <- any(rejections > k)
# Type 1 error
typ.1 <- sum(rejections > k)/max(1,length(rejections))
return(c(fwer, typ.1))
}
dat.sim <- replicate(1000, test(20, 6, 0))
row.names(dat.sim) <- c("Family-wise Error Rate", "Type 1 Error Proportion")
rowMeans(dat.sim)
## Family-wise Error Rate Type 1 Error Proportion
## 0.9600000 0.6907991
# 0.9600000