5.8

5.8 A

The sampling distribution for the sample mean will be approximately normal with mean 36 and standard error 0.5656854

5.8 B

set.seed(1234)
myse <- 8/sqrt(200)
n <-200
mydata <- rnorm(n, mean = 36, sd = 8)

For EDA, do numerical summaries and plots (such as a histogram.)

summary(mydata)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.15   29.81   34.62   35.54   40.43   60.35
hist(mydata, main = NULL)

5.8 C

m <- 10^4
set.seed(1234)
sampdist <- rep(0, m)


for(i in 1:m){
  choosethese <- sample(mydata, n, replace = TRUE)
  simulateddata <- mydata[choosethese]
  sampdist[i] <- mean(simulateddata)
}

Then the standard error is

sd(sampdist)
## [1] 0.4790077

And the bootstrap mean is

mean(sampdist)
## [1] 31.49272

which is pretty close to the original sample’s mean of 35.5379258.

Here is the simulated sampling distribution for the sample mean:

hist(sampdist, 
     main = "sampling distribution for sample mean",
     xlab = "x-bar (sample mean)")

5.8 D

tab <- matrix(data = NA, nrow = 4, ncol =2)
colnames(tab) <- c("mean", "standard deviation")
rownames(tab) <- c("population", "sampling distribution of xbar", "sample", "bootstrap distribution")
tab[1,1] <- 36
tab[1,2] <- 8
tab[2,1] <-  36
tab[2,2] <- myse
tab[3,1] <- mean(mydata)
tab[3,2] <- sd(mydata)
tab[4,1] <- mean(sampdist)
tab[4,2] <- sd(sampdist)
tab
##                                   mean standard deviation
## population                    36.00000          8.0000000
## sampling distribution of xbar 36.00000          0.5656854
## sample                        35.53793          8.1653294
## bootstrap distribution        31.49272          0.4790077

5.11

library(resampledata)
## Warning: package 'resampledata' was built under R version 4.1.1
## 
## Attaching package: 'resampledata'
## The following object is masked from 'package:datasets':
## 
##     Titanic
data("Bangladesh")

5.11 B

sampdist <- rep(0, m)
set.seed(1234)
Clor <- Bangladesh$Chlorine
sum(is.na(Clor))
## [1] 2
cuttheseout <- which(is.na(Clor))
Clor <- Clor[-cuttheseout]
(n <- length(Clor))
## [1] 269
for(i in 1:m){
  tempdata <- sample(Clor, n, replace = TRUE) 
  sampdist[i] <- mean(tempdata)
}
hist(sampdist, 
     main = "simulated sampling dist of xbar",
     xlab = "mean chlorine concentration")

5.8 C

(myCI <- quantile(sampdist, c(.025, .975)))
##      2.5%     97.5% 
##  54.81903 104.66007

We are 95 percent confident that the true mean chlorine concentration in wells in Bangladesh is between 54.8190335 and 104.6600743.