The sampling distribution for the sample mean will be approximately normal with mean 36 and standard error 0.5656854
set.seed(1234)
myse <- 8/sqrt(200)
n <-200
mydata <- rnorm(n, mean = 36, sd = 8)
For EDA, do numerical summaries and plots (such as a histogram.)
summary(mydata)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.15 29.81 34.62 35.54 40.43 60.35
hist(mydata, main = NULL)
m <- 10^4
set.seed(1234)
sampdist <- rep(0, m)
for(i in 1:m){
choosethese <- sample(mydata, n, replace = TRUE)
simulateddata <- mydata[choosethese]
sampdist[i] <- mean(simulateddata)
}
Then the standard error is
sd(sampdist)
## [1] 0.4790077
And the bootstrap mean is
mean(sampdist)
## [1] 31.49272
which is pretty close to the original sample’s mean of 35.5379258.
Here is the simulated sampling distribution for the sample mean:
hist(sampdist,
main = "sampling distribution for sample mean",
xlab = "x-bar (sample mean)")
tab <- matrix(data = NA, nrow = 4, ncol =2)
colnames(tab) <- c("mean", "standard deviation")
rownames(tab) <- c("population", "sampling distribution of xbar", "sample", "bootstrap distribution")
tab[1,1] <- 36
tab[1,2] <- 8
tab[2,1] <- 36
tab[2,2] <- myse
tab[3,1] <- mean(mydata)
tab[3,2] <- sd(mydata)
tab[4,1] <- mean(sampdist)
tab[4,2] <- sd(sampdist)
tab
## mean standard deviation
## population 36.00000 8.0000000
## sampling distribution of xbar 36.00000 0.5656854
## sample 35.53793 8.1653294
## bootstrap distribution 31.49272 0.4790077
library(resampledata)
## Warning: package 'resampledata' was built under R version 4.1.1
##
## Attaching package: 'resampledata'
## The following object is masked from 'package:datasets':
##
## Titanic
data("Bangladesh")
sampdist <- rep(0, m)
set.seed(1234)
Clor <- Bangladesh$Chlorine
sum(is.na(Clor))
## [1] 2
cuttheseout <- which(is.na(Clor))
Clor <- Clor[-cuttheseout]
(n <- length(Clor))
## [1] 269
for(i in 1:m){
tempdata <- sample(Clor, n, replace = TRUE)
sampdist[i] <- mean(tempdata)
}
hist(sampdist,
main = "simulated sampling dist of xbar",
xlab = "mean chlorine concentration")
(myCI <- quantile(sampdist, c(.025, .975)))
## 2.5% 97.5%
## 54.81903 104.66007
We are 95 percent confident that the true mean chlorine concentration in wells in Bangladesh is between 54.8190335 and 104.6600743.