Problem 1

group1 <- c(2.9736,0.9448,1.6394,0.0389,1.2958)
group2 <- c(0.7681,0.8027,0.2156,0.0740,1.5076)
group3 <- c(4.8249,2.2516,1.5609,2.0452,1.0959)

kajdat <- data.frame(score=c(group1,group2,group3), group=factor(c(rep(1,5),rep(2,5),rep(3,5))))
tapply(kajdat$score,kajdat$group,mean)

##      1      2      3 
## 1.3785 0.6736 2.3557

Part A.

ANOVA F-test

lmkajdat <- lm(score~group, data=kajdat)
anovaOut <- anova(lmkajdat)
anovaOut

## Analysis of Variance Table
## 
## Response: score
##           Df  Sum Sq Mean Sq F value  Pr(>F)  
## group      2  7.1354  3.5677  2.9907 0.08834 .
## Residuals 12 14.3153  1.1929                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fobs<-anovaOut[1,4]

Part B.

Permutation Test

set.seed(1234)
n <- length(kajdat$score)
nsim<-1000
FstatVecNull<-rep(NA,nsim)
for (i in 1:nsim){print(i)
datPermute <- kajdat
datPermute$score <- kajdat$score[sample(1:n,n)]
lmOut <- lm(score~group,data=datPermute)
anovaOut <- anova(lmOut)
FstatVecNull[i]<-anovaOut[1,4]
}
pvalPerm<-sum(Fobs<FstatVecNull)/nsim

pvalPerm

## [1] 0.072

Part C.

Kruskal-Wallis Test

kruskal.test(list(group1,group2,group3))

## 
##  Kruskal-Wallis rank sum test
## 
## data:  list(group1, group2, group3)
## Kruskal-Wallis chi-squared = 5.78, df = 2, p-value = 0.05558

Part D.

For all the tests, \(H_0\) : \(\tau_1\) = \(\tau_2\) = \(\dots\) = \(\tau_k\) \(H_1\) : At least one \(\tau_1\), \(\tau_2\), \(\dots\) \(\tau_k\) \(\neq\) 0

ANOVA p-value = 0.088 Permutation test p-value = 0.077 Kruskal Wallis p-value = 0.0558

With an \(\alpha = 0.05\), we would fail to reject the \(H_0\) for all of the tests as the p-value is > \(\alpha\). Though, the p-value for the Kruskal Wallis test is fairly close.

I don’t believe any assumptions were violated and the p-values are fairly close. I would argue that the Kruskal-Wallis test result is what I would rely on in this situation based on the above analysis.

boxplot(group1,group2, group3)

hist(FstatVecNull)
abline(v=Fobs, col="red")

Problem 2

\(H_0\) The samples/groups are from identical populations \(H_1\) At least one of the samples/groups comes from a different population than the others.

From the Kruskal Wallis test, we obtain a p-value of 3.116 e-05. So, we reject the \(H_0\).

We need to do a multiple comparisons test to find out which of the groups are different.

From our pairwise Wilcox test using the bonferroni adjustment, we learn that the following groups are different: -Groups 1 & 3 -Groups 1 & 4 -Groups 1 & 5

library(readr)
NP_HW2 <- read_csv("NP_HW2.csv")

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   score = col_double(),
##   group = col_double()
## )

g1 <- subset(NP_HW2, group==1)
g2 <- subset(NP_HW2, group==2)
g3 <- subset(NP_HW2, group==3)
g4 <- subset(NP_HW2, group==4)
g5 <- subset(NP_HW2, group==5)

kruskal.test(list(g1$score,g2$score,g3$score,g4$score,g5$score))

## 
##  Kruskal-Wallis rank sum test
## 
## data:  list(g1$score, g2$score, g3$score, g4$score, g5$score)
## Kruskal-Wallis chi-squared = 26.033, df = 4, p-value = 3.116e-05

pairwise.wilcox.test(NP_HW2$score,NP_HW2$group, p.adj = "bonf")

## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  NP_HW2$score and NP_HW2$group 
## 
##   1      2      3      4     
## 2 1.0000 -      -      -     
## 3 0.0021 0.0209 -      -     
## 4 0.0389 0.7526 0.0049 -     
## 5 0.0209 0.1469 1.0000 0.0049
## 
## P value adjustment method: bonferroni

Problem 3

\(H_0\) The samples/groups are from identical populations \(H_1\) At least one of the samples/groups comes from a different population than the others.

From the Kruskal Wallis test, we obtain a p-value of 4.335e-05 < \(\alpha = 0.05\). So, we reject the \(H_0\) and conlcude that the at least one of the samples comes from a different population than the others.

site1<-c(46,28,46,37,32,41,42,45,38,44)
site2<-c(42,60,32,42,45,58,27,51,42,52)
site3<-c(38,33,26,25,28,28,26,27,27,27)
site4<-c(31,30,27,29,30,25,25,24,27,30)

kruskal.test(list(site1,site2,site3,site4))

## 
##  Kruskal-Wallis rank sum test
## 
## data:  list(site1, site2, site3, site4)
## Kruskal-Wallis chi-squared = 22.852, df = 3, p-value = 4.335e-05

sitedf <- data.frame(score=c(site1,site2,site3,site4), group=factor(c(rep(1,10),rep(2,10),rep(3,10),rep(4,10))))


pairwise.wilcox.test(sitedf$score,sitedf$group, p.adj = "none")

## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties

## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties

## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties

## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties

## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties

## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties

## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  sitedf$score and sitedf$group 
## 
##   1       2       3      
## 2 0.25450 -       -      
## 3 0.00110 0.00140 -      
## 4 0.00074 0.00095 1.00000
## 
## P value adjustment method: none

Problem 4

library(gtools)
perm <- combinations(5,2)

k <- 4
n <- c(1,1,1,2)

summation1 <- c()
summation2 <- c()
for (i in 1:10)
{
  summation1[i] <- (2*((sum(perm[i,])/2 - (sum(n)+1)/2))^2)
  summation2[i] <- sum(((setdiff(c(1:5), perm[i,]) - (sum(n)+1)/2))^2)
}

summation3 <- summation1+summation2

H <- 12/(sum(n)*sum(n)+1)*summation3

H

##  [1] 4.3846154 3.6923077 2.5384615 0.9230769 4.3846154 3.6923077 2.5384615
##  [8] 4.3846154 3.6923077 4.3846154

hist(H, xlim=c(0,4.5))
axis(side=1, at=seq(0,4.5,0.5), labels=seq(0,4.5,0.5))

Problem 5

set.seed(1234)

test <-c(rnorm(20,sd=1), rnorm(20,sd=1), rnorm(20,sd=1),rnorm(20,sd=1),rnorm(20,sd=1), rnorm(20, sd=1))
         
dat <- data.frame(test,groups=c(rep(1,20),rep(2,20),rep(3,20),rep(4,20),rep(5,20), rep(6,20)))
                  
pairwilcox <- pairwise.wilcox.test(dat$test, dat$groups, p.adj = "none")
pairwilcox

## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  dat$test and dat$groups 
## 
##   1      2      3      4      5     
## 2 0.4945 -      -      -      -     
## 3 0.2423 0.8620 -      -      -     
## 4 0.0911 0.0067 0.0122 -      -     
## 5 0.1081 0.0073 0.0245 0.9467 -     
## 6 0.4135 0.0524 0.0524 0.4135 0.2766
## 
## P value adjustment method: none

STAT Multi HW2

Kajal Chokshi

2/1/2019