SRS<-c(57, 61, 42, 62, 41, 28)
sample(SRS, 6, replace = FALSE)
## [1] 61 41 28 62 42 57
SRS<-c(57, 61, 42, 62, 41, 28)
replicate(20, (sample(SRS, 6, replace = FALSE)))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 28 62 57 57 61 61 57 28 28 41 57 61 28
## [2,] 41 42 62 28 42 41 41 61 62 62 61 57 61
## [3,] 42 41 61 61 62 42 62 42 42 57 28 28 42
## [4,] 62 57 42 42 41 57 42 62 41 42 41 62 62
## [5,] 57 28 28 41 57 28 28 41 57 61 42 42 57
## [6,] 61 61 41 62 28 62 61 57 61 28 62 41 41
## [,14] [,15] [,16] [,17] [,18] [,19] [,20]
## [1,] 42 61 28 57 62 61 41
## [2,] 28 62 57 62 61 42 62
## [3,] 61 41 61 41 42 57 42
## [4,] 57 42 41 61 57 28 61
## [5,] 41 28 62 42 41 41 28
## [6,] 62 57 42 28 28 62 57
hist(c(49-48.25, 51.5-47, 35-55.25, 44.5-50.5, 51.5-47, 35-55.25, 42.5-51.5, 49-48.25, 35-55.25, 49-48.25, 59-43.25, 52-46.75, 45-50.25, 59-43.25, 34.5-55.5, 49.5-47.75, 51-47.25, 51.5-47, 52-46.75, 35-55.25))
x<-c(49-48.25, 51.5-47, 35-55.25, 44.5-50.5, 51.5-47, 35-55.25, 42.5-51.5, 49-48.25, 35-55.25, 49-48.25, 59-43.25, 52-46.75, 45-50.25, 59-43.25, 34.5-55.5, 49.5-47.75, 51-47.25, 51.5-47, 52-46.75, 35-55.25)
x[x>=15.75]
## [1] 15.75 15.75
x<-c(49-48.25, 51.5-47, 35-55.25, 44.5-50.5, 51.5-47, 35-55.25, 42.5-51.5, 49-48.25, 35-55.25, 49-48.25, 59-43.25, 52-46.75, 45-50.25, 59-43.25, 34.5-55.5, 49.5-47.75, 51-47.25, 51.5-47, 52-46.75, 35-55.25)
x[x>=15.75]
## [1] 15.75 15.75
2/15
## [1] 0.1333333
library(readr)
calls80 <- read_csv("calls80.csv")
## Parsed with column specification:
## cols(
## length = col_double()
## )
calls<-(calls80$length)
hist(calls)
#### The distribution is skewed to the right.
bootstrapCalls<-function(data, nsim){
n<- length(data)
bootCI<-c()
for (i in 1:nsim){
bootSamp=sample(1:n, n, replace=TRUE)
thisXbar<-mean(data[bootSamp])
bootCI=c(bootCI, thisXbar)
}
return(bootCI)
}
hist(bootstrapCalls(calls,1000))
qqnorm(bootstrapCalls(calls,1000))
#### After assessing the qqPlot it appears that the tails containe values greater than would be expected in a normal distribution.
callsSRS=c(104, 102, 35, 211, 56, 325, 67, 9, 179, 59)
bootstrapCalls<-function(data, nsim){
n<- length(data)
bootCI<-c()
for (i in 1:nsim){
bootSamp=sample(1:n, n, replace=TRUE)
thisXbar<-mean(data[bootSamp])
bootCI=c(bootCI, thisXbar)
}
return(bootCI)
}
hist(bootstrapCalls(callsSRS,1000))
qqnorm(bootstrapCalls(callsSRS,1000))
#### This distribution appears to be farther away from normal when assessing the qqPlot.
sd(bootstrapCalls(calls,10000))
## [1] 37.615
sd(bootstrapCalls(callsSRS,10000))
## [1] 29.16442
library(readr)
nspines <- read_csv("/Volumes/raenlow/MATH239/nspines.csv")
## Parsed with column specification:
## cols(
## ns = col_character(),
## dbh = col_double()
## )
Npine= nspines[1:30, ]
Spine =nspines[31:60, ]
#names(nspines)
library(tidyverse)
ggplot(nspines, aes(dbh))+
geom_histogram(bins=7)+
facet_wrap(~ns)
ggplot(nspines, aes(y=dbh, x=ns, fill=ns))+
geom_boxplot()
#### Neither distribution appears to be approximately normal and the sample sizes of n=30 are barely of sufficient size if going by the rule of n=30. From these observations it may not be reasonable to use standard t procedures.
meanspine=mean(Spine$dbh)
meannpine=mean(Npine$dbh)
meannpine-meanspine
## [1] -10.83333
bootStrapCI2<-function(data1, data2, nsim){
n1<-length(data1)
n2<-length(data2)
bootCI2<-c()
for(i in 1:nsim){
bootSamp1<-sample(1:n1, n1, replace=TRUE)
bootSamp2<-sample(1:n2, n2, replace=TRUE)
thisXbar<-mean(data1[bootSamp1])-mean(data2[bootSamp2])
bootCI2<-c(bootCI2, thisXbar)
}
return(bootCI2)
}
pinebootCI=bootStrapCI2(Npine$dbh, Spine$dbh, 1000)
hist(pinebootCI)
quantile(pinebootCI, c(0.025, 0.975))
## 2.5% 97.5%
## -18.57533 -2.77775
bootSE=sd(pinebootCI)
(mean(Npine$dbh)-mean(Spine$dbh))+c(-1,1)*qt(0.975, df = 58)*bootSE
## [1] -18.749693 -2.916974
mean(pinebootCI)-(meannpine-meanspine)
## [1] -0.1196967
t.test(Npine$dbh, Npine$dbh)
##
## Welch Two Sample t-test
##
## data: Npine$dbh and Npine$dbh
## t = 0, df = 58, p-value = 1
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -9.044798 9.044798
## sample estimates:
## mean of x mean of y
## 23.7 23.7