Do new “directed reading activities” improve the reading ability of elementary school students, as measured by their Degree of Reading Power” (DRP) scores? A study assigns students at random to either the new method (treatment group, 21 students) or traditional teaching methods (control group, 23 students). To illustrate the process, let’s perform a permutation test by hand for a small random subset of the DRP data. Here is the subset:
Treatment Group: (57, 61)
Control Group: (42, 62, 41, 28)
total<-c(57, 61, 42, 62, 41, 28)
treatment<-c(57, 61)
control<-c(42, 62, 41, 28)
xbar_treatment<-mean(treatment)
xbar_control<-mean(control)
obs_stat3<-xbar_treatment-xbar_control
obs_stat3
## [1] 15.75
set.seed(4)
n<-length(total)
sample(1:6, n, replace = FALSE)
## [1] 3 6 5 4 2 1
new_treatment<-c(42, 28)
new_control<-c(41, 62, 61, 57)
xbar_new_trmt<-mean(new_treatment)
xbar_new_cont<-mean(new_control)
new_obs<-xbar_new_trmt-xbar_new_cont
new_obs
## [1] -20.25
set.seed(4)
bootstrap<-function(treatment, control, nsim){
n1<-length(treatment)
n2<-length(control)
alldat<-c(treatment, control)
permnull<-c()
for(i in 1:nsim){
permsamp<-sample(1:(n1+n2), n1, replace = FALSE)
thisxbar<-mean(alldat[permsamp])-mean(alldat[-permsamp])
permnull<-c(permnull, thisxbar)
}
return(permnull)
}
DRPbootci<-bootstrap(treatment, control, nsim = 20)
hist(DRPbootci)
library(tidyverse)
## -- Attaching packages ----------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts -------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
trees<-read.csv("nspines.csv",
header = TRUE)
str(trees)
## 'data.frame': 60 obs. of 2 variables:
## $ ns : Factor w/ 2 levels "n","s": 1 1 1 1 1 1 1 1 1 1 ...
## $ dbh: num 27.8 14.5 39.1 3.2 58.8 55.5 25 5.4 19 30.6 ...
head(trees)
## ns dbh
## 1 n 27.8
## 2 n 14.5
## 3 n 39.1
## 4 n 3.2
## 5 n 58.8
## 6 n 55.5
ggplot(trees, aes(x=ns, y=dbh))+
geom_boxplot()
north<-c(27.8, 14.5, 39.1, 3.2, 58.8, 55.5, 25.0, 5.4, 19.0, 30.6, 15.1, 3.6, 28.4, 15.0, 2.2, 14.2, 44.2, 25.7, 11.2, 46.8, 36.9, 54.1, 10.2, 2.5, 13.8, 43.5, 13.8, 39.7, 6.4, 4.8)
south<-c(44.4, 26.1, 50.4, 23.3, 39.5, 51.0, 48.1, 47.2, 40.3, 37.4, 36.8, 21.7, 35.7, 32.0, 40.4, 12.8, 5.6, 44.3, 52.9, 38.0, 2.6, 44.6, 45.5, 29.1, 18.7, 7.0, 43.8, 28.3, 36.9, 51.6)
xbar_north<-mean(north)
xbar_south<-mean(south)
obs_stat<-xbar_north-xbar_south
obs_stat
## [1] -10.83333
set.seed(5007)
bootstrapCI<-function(trees, nsim){
n<-length(north)
n2<-length(south)
bootCI<-c()
for(i in 1:1000){
bootSamp<-sample(1:n, n, replace = TRUE)
bootSamp2<-sample(1:n2, n2, replace = TRUE)
thisxbar<-mean(north[bootSamp])-mean(south[bootSamp2])
bootCI<-c(bootCI, thisxbar)
}
return(bootCI)
}
treesBootCI<-bootstrapCI(trees$dbh, nsim = 1000)
hist(treesBootCI)
# Quantile Method
quantile(treesBootCI, c(0.025, 0.975))
## 2.5% 97.5%
## -18.945167 -3.303333
# Hybrid Method
bootSE<-sd(treesBootCI)
conf_hybrid<-(mean(north)-mean(south))+c(-1, 1)*qt(0.975, df=59)*bootSE
conf_hybrid
## [1] -18.971358 -2.695309
# Two Sample T Test
t.test(north, south)
##
## Welch Two Sample t-test
##
## data: north and south
## t = -2.6286, df = 55.725, p-value = 0.01106
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -19.090199 -2.576468
## sample estimates:
## mean of x mean of y
## 23.70000 34.53333