boy.height <- rnorm (10000, mean=51, sd=3)
# generating 10000 8-year-old boy's body heights, with mean 51, sd 3 #
iter <- 10000
n <- 100
means <- rep(NA, iter)
# randomly picking 100 samples, from 10000 iteration #
for (i in 1:iter) {
boy.random <- sample (boy.height, n)
means [i] <- mean (boy.random)
}
# looping n (100) boy height #
mean (boy.random <52 )
## [1] 0.7
# 0.63 or 63% that boys that shorter than 52 inches #
boy.height <- rnorm (1000, mean=51, sd=3)
# generating 10000 8-year-old boy's body heights, with mean 51, sd 3 #
girl.height <- rnorm (1000, mean=53, sd=2.5)
# generating 10000 8-year-old girl's body heights, with mean 53, sd 2.5 #
iter <- 10000
n <- 100
means <- rep(NA, iter)
for (i in 1:iter) {
boy.random <- sample (boy.height, n)
means [i] <- mean (boy.random)
}
# looping n (100) boy height #
for (i in 1:iter) {
girl.random <- sample (girl.height, n)
means [i] <- mean (girl.random)
}
# looping n (100) girl height #
mean (girl.random)
## [1] 52.44205
mean (boy.random)
## [1] 51.23191
mean (boy.random > girl.random)
## [1] 0.39
# 0.35 or 35% that boys that will be taller than girls#
hrs <- read.csv("/Users/yunis/Desktop/HRS_w1sub.csv")
# we will need the bmi (r1bmi) data #
head (hrs$r1bmi, 5)
## [1] 30.7 18.5 25.8 32.4 23.7
tail (hrs$r1bmi, 5)
## [1] 31.0 26.6 26.5 20.4 28.1
mean (hrs$r1bmi)
## [1] 27.09804
hrs.bmi <- hrs$r1bmi
# bmi variable #
## 1 - Create a histogram of the variable r1bmi ##
hist(hrs.bmi,
main=" Population Mean",
xlab="Population BMI",
border="black",
col="pink"
)
abline (v=mean(hrs.bmi), lty=3)

## 2 - Create a Q-Q plot to examine the normality of the variable r1bmi #
qqnorm ( hrs.bmi, pch = 1)
qqline ( hrs.bmi, col = "#50b5b0", lwd = 3 )

## 3 - Calculate the mean of r1bmi ##
mean(hrs.bmi)
## [1] 27.09804
# population mean of BMI is 27.09804 #
## 4 - Draw 10,000 random samples of size n = 2 ##
iter <- 10000 # sample size 10000 #
n <- 2 # random samples of size n = 2 #
bmi.means <- rep(NA, iter)
## 4a - Create a histogram of the sampling distribution ##
for (i in 1:iter) {
sample.bmi <- sample (hrs.bmi, n)
bmi.means[i] <- mean (sample.bmi)
}
mean (sample.bmi)
## [1] 23.05
hist(bmi.means,
main="Sampling distribution n=2",
xlab="Sample mean BMI",
border="black",
col="pink"
)
abline (v=mean(hrs.bmi), lty=3)

## 4b - Create a Q-Q plot of the sampling distribution ##
qqnorm ( bmi.means, main='Q-Q plot n = 2')
qqline ( bmi.means, col = "pink", lwd = 6 )

## 4C - Compute the mean of the sampling distribution and population mean ##
population.mean <- mean (hrs.bmi)
# population mean is 27.09804 #
sample.mean <- mean (sample.bmi)
# sampling distribution mean is 29.15#
population.mean - sample.mean
## [1] 4.048039
## 5 - Draw 10,000 random samples of size n = 10 ##
iter <- 10000 # sample size 10000 #
n <- 10 # random samples of size n = 10 #
bmi.means <- rep(NA, iter)
## 5a - Create a histogram of the sampling distribution ##
for (i in 1:iter) {
sample.bmi <- sample (hrs.bmi, n)
bmi.means[i] <- mean (sample.bmi)
}
mean (sample.bmi)
## [1] 27.17
hist(bmi.means,
main="Sampling distribution n=10",
xlab="Sample mean BMI",
border="black",
col="pink"
)
abline (v=mean(hrs.bmi), lty=3)

## 5b - Create a Q-Q plot of the sampling distribution ##
qqnorm ( bmi.means, main='Q-Q plot n=10')
qqline ( bmi.means, col = "pink", lwd = 6 )

## 5C - Compute the mean of the sampling distribution and population mean ##
population.mean <- mean (hrs.bmi)
# population mean is 27.09804 #
sample.mean <- mean (sample.bmi)
# sampling distribution mean is 26.29#
population.mean - sample.mean
## [1] -0.07196012
## 6 - Draw 10,000 random samples of size n = 1000 ##
iter <- 10000 # sample size 10000 #
n <- 1000 # random samples of size n = 1000 #
bmi.means <- rep(NA, iter)
## 6a - Create a histogram of the sampling distribution ##
for (i in 1:iter) {
sample.bmi <- sample (hrs.bmi, n)
bmi.means[i] <- mean (sample.bmi)
}
mean (sample.bmi)
## [1] 27.1751
hist(bmi.means,
main="Sampling distribution n=1000",
xlab="Sample mean BMI",
border="black",
col="pink"
)
abline (v=mean(hrs.bmi), lty=3)

## 6b - Create a Q-Q plot of the sampling distribution ##
qqnorm ( bmi.means, main='Q-Q plot n=1000')
qqline ( bmi.means, col = "pink", lwd = 6 )

## 6C - Compute the mean of the sampling distribution and population mean ##
population.mean <- mean (hrs.bmi)
# population mean is 27.09804 #
sample.mean <- mean (sample.bmi)
# sampling distribution mean is 27.125 #
population.mean - sample.mean
## [1] -0.07706018
# Confirm (or reject) the two properties CLT, by comparing the histogram outputs #
# by comparing the all four graphic output, I am rejecting sample mean of n=2, and population mean #