testScores <- c(79,74,88,80,80,66,65,86,84,80,78,72,71,74,86,96,77,81,76,80,
76,75,78,87,87,74,85,84,76,77,76,74,85,74,76,77,76,74,81,76)
summary(testScores)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 65.00 74.75 77.00 78.53 81.75 96.00
Since the median and first quartile are both above 70, there is some evidence that the alternative hypothesis is true.
\[ H_0: \theta=70 \] \[ H_a: \theta >70 \] Where \(\theta\) is the median score.
I choose \(\alpha=0.05\)
dat=testScores-70
table(sign(dat))
##
## -1 1
## 2 38
#38/40 scores are >70
pbinom(2,40,.5) #This gives the probability of only 0, 1, or 2 students improving their score, symmetric in this case only
## [1] 7.466952e-10
binom.test(38,40, p=.5, alternative = c("greater"))
##
## Exact binomial test
##
## data: 38 and 40
## number of successes = 38, number of trials = 40, p-value =
## 7.467e-10
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.850848 1.000000
## sample estimates:
## probability of success
## 0.95
Since the pvalue is \(<\alpha\), we reject the null hypothesis and conclude that the median test score has increased.
In treatment 1 all four observations are larger than the four observations in treatment 2, so we can think of the 8 observations as L1,L2,L3,L4 (Larger)S1,S2,S3,S4 (smaller).
How many ways can we divide these into two groups? {L1,L2,L3,S1} {L4,S2,S3,S4} {L1,L2,L3,S2} {L4,S1,S3,S4} {L1,L2,L3,S3} {L4,S1,S2,S4} {L1,L2,L3,S4} {L4,S1,S2,S3}
This would take forever…
library(gtools)
perms<-combinations(8,4)
nrow(perms)
## [1] 70
Since there is only 1 way out of 70 for this to happen, the one-sided p-value is 1/70.
siblings<-data.frame(hometown=c(rep("rural",24),rep("urban",17)),
siblings=c(3,2,1,1,2,1,3,2,2,2,2,5,1,4,1,1,1,1,6,2,2,
2,1,1,1,0,1,1,0,0,1,1,1,8,1,1,1,0,1,1,2))
plot(siblings)
I choose \(\alpha=0.05\)
W=wilcox.test(siblings$siblings~siblings$hometown);W
## Warning in wilcox.test.default(x = c(3, 2, 1, 1, 2, 1, 3, 2, 2, 2, 2, 5, :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: siblings$siblings by siblings$hometown
## W = 314.5, p-value = 0.001598
## alternative hypothesis: true location shift is not equal to 0
Since 0.0015976 < \(\alpha = .05\), we reject the null hypothesis and conclude that there is a difference in number of siblings from urban and rural hometowns.
set.seed(1)
nsim=10000
permDiffs=rep(NA,nsim)
for (i in 1:nsim){
new.dat=data.frame(sib=sample(siblings$siblings, replace = FALSE), home=siblings$hometown)
permDiffs[i]=diff(tapply(new.dat$sib, new.dat$home, mean))
}
boxplot(new.dat$sib~new.dat$home) #The last simulated permutation, you can see the randomized distributions look roughly the same
only.rural = subset(siblings, hometown=="rural")
mean.rural = mean(only.rural$siblings)
only.urban = subset(siblings, hometown=="urban")
mean.urban = mean(only.urban$siblings)
actual.diff1 = mean.rural-mean.urban
actual.diff2 = mean.urban-mean.rural
lower= sum(permDiffs<=actual.diff2)
upper=sum(permDiffs>=actual.diff1)
pv.b=(lower+upper)/nsim
Since 0.113 > \(\alpha=.05\), we fail-to-reject the null and cannot conclude there is a difference in mean number of siblings given hometown.
fake = data.frame(a=c(1,3,5,7,9,11,13,15,16,3,3,3,3,2,4,6,8,100000,2000000,3000000, 400000,50000, 60000, 70000), b=c(rep(1, 13),rep(2,11)))
tapply(fake$a,fake$b,mean)
## 1 2
## 7.076923e+00 5.163655e+05
t.test(fake$a~fake$b)
##
## Welch Two Sample t-test
##
## data: fake$a by fake$b
## t = -1.6917, df = 10, p-value = 0.1216
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1196448.1 163731.3
## sample estimates:
## mean in group 1 mean in group 2
## 7.076923e+00 5.163655e+05
wilcox.test(fake$a~fake$b)
## Warning in wilcox.test.default(x = c(1, 3, 5, 7, 9, 11, 13, 15, 16, 3, 3, :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: fake$a by fake$b
## W = 30, p-value = 0.01704
## alternative hypothesis: true location shift is not equal to 0
#Treatment 1 data
trt1 <- c(21.9,20.2,19.4,20.3,19.6,20.4,18.4,20.1,22.0,18.9) #Treatment 2 data
trt2 <- c(20.2,13.8,21.8,19.2,19.6,25.5,17.0,17.6,19.5,22.2)
a = ansari.test(trt1,trt2);a
## Warning in ansari.test.default(trt1, trt2): cannot compute exact p-value
## with ties
##
## Ansari-Bradley test
##
## data: trt1 and trt2
## AB = 64, p-value = 0.1707
## alternative hypothesis: true ratio of scales is not equal to 1
Using this website http://users.stat.umn.edu/~helwig/notes/npde-Notes.pdf, I believe the Ansari test is testing for equality of variances. \[ H_0: Var(trt1)=Var(trt2) \] \[ H_a: Var(trt1) \neq Var(trt2) \] Where \(Var(trt1)\) and \(Var(trt2)\) are the variances of each treatment.
Since 0.170695 > .05, we fail-to-reject the null and cannot conclude the variances are different.