YunisNi-Data306-Midterm

boy.height <- rnorm (10000, mean=51, sd=3)
# generating 10000 8-year-old boy's body heights, with mean 51, sd 3 # 

iter <- 10000
n <- 100
means <- rep(NA, iter)
# randomly picking 100 samples, from 10000 iteration # 

for (i in 1:iter) {
  boy.random <- sample (boy.height, n)
  means [i] <- mean (boy.random)
}
# looping n (100) boy height # 

mean (boy.random <52 )

## [1] 0.7

# 0.63 or 63% that boys that shorter than 52 inches #

boy.height <- rnorm (1000, mean=51, sd=3)
# generating 10000 8-year-old boy's body heights, with mean 51, sd 3 # 

girl.height <- rnorm (1000, mean=53, sd=2.5)
# generating 10000 8-year-old girl's body heights, with mean 53, sd 2.5 # 

iter <- 10000
n <- 100
means <- rep(NA, iter)

for (i in 1:iter) {
  boy.random <- sample (boy.height, n)
  means [i] <- mean (boy.random)
}
# looping n (100) boy height # 

for (i in 1:iter) {
  girl.random <- sample (girl.height, n)
  means [i] <- mean (girl.random)
}
# looping n (100) girl height # 

mean (girl.random)

## [1] 52.44205

mean (boy.random)

## [1] 51.23191

mean (boy.random > girl.random)

## [1] 0.39

# 0.35 or 35% that boys that will be taller than girls#

hrs <- read.csv("/Users/yunis/Desktop/HRS_w1sub.csv")
# we will need the bmi (r1bmi) data # 
head (hrs$r1bmi, 5)

## [1] 30.7 18.5 25.8 32.4 23.7

tail (hrs$r1bmi, 5)

## [1] 31.0 26.6 26.5 20.4 28.1

mean (hrs$r1bmi)

## [1] 27.09804

hrs.bmi <- hrs$r1bmi
# bmi variable #

## 1 - Create a histogram of the variable r1bmi ## 
hist(hrs.bmi, 
     main=" Population Mean", 
     xlab="Population BMI", 
     border="black", 
     col="pink"
)
abline (v=mean(hrs.bmi), lty=3)

## 2 -  Create a Q-Q plot to examine the normality of the variable r1bmi # 
qqnorm ( hrs.bmi, pch = 1)
qqline ( hrs.bmi, col = "#50b5b0", lwd = 3 )

## 3 - Calculate the mean of r1bmi ##

mean(hrs.bmi)

## [1] 27.09804

# population mean of BMI is 27.09804 #

## 4 - Draw 10,000 random samples of size n = 2 ##
iter <- 10000 # sample size 10000 #
n <- 2 # random samples of size n = 2 # 
bmi.means <- rep(NA, iter)

## 4a - Create a histogram of the sampling distribution ##
for (i in 1:iter) {
  sample.bmi <- sample (hrs.bmi, n)
  bmi.means[i] <- mean (sample.bmi)
}

mean (sample.bmi)

## [1] 23.05

hist(bmi.means, 
     main="Sampling distribution n=2", 
     xlab="Sample mean BMI", 
     border="black", 
     col="pink"
)
abline (v=mean(hrs.bmi), lty=3)

## 4b - Create a Q-Q plot of the sampling distribution ##
qqnorm ( bmi.means, main='Q-Q plot n = 2')
qqline ( bmi.means, col = "pink", lwd = 6 )

## 4C - Compute the mean of the sampling distribution and population mean ##
population.mean <- mean (hrs.bmi) 
# population mean is 27.09804 #

sample.mean <- mean (sample.bmi)
# sampling distribution mean is 29.15#

population.mean - sample.mean

## [1] 4.048039

## 5 - Draw 10,000 random samples of size n = 10 ##
iter <- 10000 # sample size 10000 #
n <- 10 # random samples of size n = 10 # 
bmi.means <- rep(NA, iter)


## 5a - Create a histogram of the sampling distribution ##
for (i in 1:iter) {
  sample.bmi <- sample (hrs.bmi, n)
  bmi.means[i] <- mean (sample.bmi)
}

mean (sample.bmi)

## [1] 27.17

hist(bmi.means, 
     main="Sampling distribution n=10", 
     xlab="Sample mean BMI",
     border="black", 
     col="pink"
)
abline (v=mean(hrs.bmi), lty=3)

## 5b - Create a Q-Q plot of the sampling distribution ##
qqnorm ( bmi.means, main='Q-Q plot n=10')
qqline ( bmi.means, col = "pink", lwd = 6 )

## 5C - Compute the mean of the sampling distribution and population mean ##
population.mean <- mean (hrs.bmi) 
# population mean is 27.09804 #

sample.mean <- mean (sample.bmi)
# sampling distribution mean is 26.29#

population.mean - sample.mean

## [1] -0.07196012

## 6 - Draw 10,000 random samples of size n = 1000 ##
iter <- 10000 # sample size 10000 #
n <- 1000 # random samples of size n = 1000 # 
bmi.means <- rep(NA, iter)


## 6a - Create a histogram of the sampling distribution ##
for (i in 1:iter) {
  sample.bmi <- sample (hrs.bmi, n)
  bmi.means[i] <- mean (sample.bmi)
}

mean (sample.bmi)

## [1] 27.1751

hist(bmi.means, 
     main="Sampling distribution n=1000", 
     xlab="Sample mean BMI",
     border="black", 
     col="pink"
)
abline (v=mean(hrs.bmi), lty=3)

## 6b - Create a Q-Q plot of the sampling distribution ##
qqnorm ( bmi.means, main='Q-Q plot n=1000')
qqline ( bmi.means, col = "pink", lwd = 6 )

## 6C - Compute the mean of the sampling distribution and population mean ##
population.mean <- mean (hrs.bmi) 
# population mean is 27.09804 #

sample.mean <- mean (sample.bmi)
# sampling distribution mean is 27.125 #

population.mean - sample.mean

## [1] -0.07706018

# Confirm (or reject) the two properties CLT, by comparing the histogram outputs # 
# by comparing the all four graphic output, I am rejecting sample mean of n=2, and population mean #