group1 <- c(2.9736,0.9448,1.6394,0.0389,1.2958)
group2 <- c(0.7681,0.8027,0.2156,0.0740,1.5076)
group3 <- c(4.8249,2.2516,1.5609,2.0452,1.0959)
kajdat <- data.frame(score=c(group1,group2,group3), group=factor(c(rep(1,5),rep(2,5),rep(3,5))))
tapply(kajdat$score,kajdat$group,mean)
## 1 2 3
## 1.3785 0.6736 2.3557
ANOVA F-test
lmkajdat <- lm(score~group, data=kajdat)
anovaOut <- anova(lmkajdat)
anovaOut
## Analysis of Variance Table
##
## Response: score
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 7.1354 3.5677 2.9907 0.08834 .
## Residuals 12 14.3153 1.1929
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Fobs<-anovaOut[1,4]
Permutation Test
set.seed(1234)
n <- length(kajdat$score)
nsim<-1000
FstatVecNull<-rep(NA,nsim)
for (i in 1:nsim){print(i)
datPermute <- kajdat
datPermute$score <- kajdat$score[sample(1:n,n)]
lmOut <- lm(score~group,data=datPermute)
anovaOut <- anova(lmOut)
FstatVecNull[i]<-anovaOut[1,4]
}
pvalPerm<-sum(Fobs<FstatVecNull)/nsim
pvalPerm
## [1] 0.072
Kruskal-Wallis Test
kruskal.test(list(group1,group2,group3))
##
## Kruskal-Wallis rank sum test
##
## data: list(group1, group2, group3)
## Kruskal-Wallis chi-squared = 5.78, df = 2, p-value = 0.05558
For all the tests, \(H_0\) : \(\tau_1\) = \(\tau_2\) = \(\dots\) = \(\tau_k\) \(H_1\) : At least one \(\tau_1\), \(\tau_2\), \(\dots\) \(\tau_k\) \(\neq\) 0
ANOVA p-value = 0.088 Permutation test p-value = 0.077 Kruskal Wallis p-value = 0.0558
With an \(\alpha = 0.05\), we would fail to reject the \(H_0\) for all of the tests as the p-value is > \(\alpha\). Though, the p-value for the Kruskal Wallis test is fairly close.
I don’t believe any assumptions were violated and the p-values are fairly close. I would argue that the Kruskal-Wallis test result is what I would rely on in this situation based on the above analysis.
boxplot(group1,group2, group3)
hist(FstatVecNull)
abline(v=Fobs, col="red")
\(H_0\) The samples/groups are from identical populations \(H_1\) At least one of the samples/groups comes from a different population than the others.
From the Kruskal Wallis test, we obtain a p-value of 3.116 e-05. So, we reject the \(H_0\).
We need to do a multiple comparisons test to find out which of the groups are different.
From our pairwise Wilcox test using the bonferroni adjustment, we learn that the following groups are different: -Groups 1 & 3 -Groups 1 & 4 -Groups 1 & 5
library(readr)
NP_HW2 <- read_csv("NP_HW2.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## score = col_double(),
## group = col_double()
## )
g1 <- subset(NP_HW2, group==1)
g2 <- subset(NP_HW2, group==2)
g3 <- subset(NP_HW2, group==3)
g4 <- subset(NP_HW2, group==4)
g5 <- subset(NP_HW2, group==5)
kruskal.test(list(g1$score,g2$score,g3$score,g4$score,g5$score))
##
## Kruskal-Wallis rank sum test
##
## data: list(g1$score, g2$score, g3$score, g4$score, g5$score)
## Kruskal-Wallis chi-squared = 26.033, df = 4, p-value = 3.116e-05
pairwise.wilcox.test(NP_HW2$score,NP_HW2$group, p.adj = "bonf")
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: NP_HW2$score and NP_HW2$group
##
## 1 2 3 4
## 2 1.0000 - - -
## 3 0.0021 0.0209 - -
## 4 0.0389 0.7526 0.0049 -
## 5 0.0209 0.1469 1.0000 0.0049
##
## P value adjustment method: bonferroni
\(H_0\) The samples/groups are from identical populations \(H_1\) At least one of the samples/groups comes from a different population than the others.
From the Kruskal Wallis test, we obtain a p-value of 4.335e-05 < \(\alpha = 0.05\). So, we reject the \(H_0\) and conlcude that the at least one of the samples comes from a different population than the others.
site1<-c(46,28,46,37,32,41,42,45,38,44)
site2<-c(42,60,32,42,45,58,27,51,42,52)
site3<-c(38,33,26,25,28,28,26,27,27,27)
site4<-c(31,30,27,29,30,25,25,24,27,30)
kruskal.test(list(site1,site2,site3,site4))
##
## Kruskal-Wallis rank sum test
##
## data: list(site1, site2, site3, site4)
## Kruskal-Wallis chi-squared = 22.852, df = 3, p-value = 4.335e-05
sitedf <- data.frame(score=c(site1,site2,site3,site4), group=factor(c(rep(1,10),rep(2,10),rep(3,10),rep(4,10))))
pairwise.wilcox.test(sitedf$score,sitedf$group, p.adj = "none")
## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: sitedf$score and sitedf$group
##
## 1 2 3
## 2 0.25450 - -
## 3 0.00110 0.00140 -
## 4 0.00074 0.00095 1.00000
##
## P value adjustment method: none
library(gtools)
perm <- combinations(5,2)
k <- 4
n <- c(1,1,1,2)
summation1 <- c()
summation2 <- c()
for (i in 1:10)
{
summation1[i] <- (2*((sum(perm[i,])/2 - (sum(n)+1)/2))^2)
summation2[i] <- sum(((setdiff(c(1:5), perm[i,]) - (sum(n)+1)/2))^2)
}
summation3 <- summation1+summation2
H <- 12/(sum(n)*sum(n)+1)*summation3
H
## [1] 4.3846154 3.6923077 2.5384615 0.9230769 4.3846154 3.6923077 2.5384615
## [8] 4.3846154 3.6923077 4.3846154
hist(H, xlim=c(0,4.5))
axis(side=1, at=seq(0,4.5,0.5), labels=seq(0,4.5,0.5))
set.seed(1234)
test <-c(rnorm(20,sd=1), rnorm(20,sd=1), rnorm(20,sd=1),rnorm(20,sd=1),rnorm(20,sd=1), rnorm(20, sd=1))
dat <- data.frame(test,groups=c(rep(1,20),rep(2,20),rep(3,20),rep(4,20),rep(5,20), rep(6,20)))
pairwilcox <- pairwise.wilcox.test(dat$test, dat$groups, p.adj = "none")
pairwilcox
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: dat$test and dat$groups
##
## 1 2 3 4 5
## 2 0.4945 - - - -
## 3 0.2423 0.8620 - - -
## 4 0.0911 0.0067 0.0122 - -
## 5 0.1081 0.0073 0.0245 0.9467 -
## 6 0.4135 0.0524 0.0524 0.4135 0.2766
##
## P value adjustment method: none