1 Introduction

Some background informarion about the data set……

make a clear problem statements….

2 EDA and Feature Engineering

This woring dataset is about ……

handheight = read.table("https://online.stat.psu.edu/stat501/sites/stat501/files/data/handheight.txt", header = TRUE)
MID = which(handheight$Sex=="Male")
MaleData = handheight[MID,]
FealeData = handheight[-MID,]
plot(handheight$Height, handheight$HandSpan, 
                        pch=16, 
                        col="white",
                        xlab = "Hand Span",
                        ylab = "Height",
                        main = "Hand Span vs Height",
                        col.main = "navy",
                        cex.main = 0.8,
                        bty="n")
points(handheight$Height[MID], handheight$HandSpan[MID], pch=16, col=alpha("darkred", 0.5))
points(handheight$Height[-MID], handheight$HandSpan[-MID], pch=19, col=alpha("blue", 0.5))
sctter plot

sctter plot

3 Parametric Confidence Intervals of Population Means

opening paragraph …….

For normal population, we use t-confidence interval that has the form form

\[ \bar{X} \pm t_{n-1, 1-\alpha/2} \times \frac{s}{\sqrt{}n} \]

where \(s\) is the sample standard deviation, …..

span =handheight$HandSpan  # extract the target variable
n = length(span)           # sample size
xbar = mean(span)          # sample mean
xsd = sd(span)             # 
cv = qt(0.975, n-1)        # t - critical value
###
LCL  = xbar - cv*xsd/sqrt(n)  # lower CI limit
UCL = xbar + cv*xsd/sqrt(n)
###
cbind(LCL.95 = LCL, UCL.95 = UCL)
       LCL.95   UCL.95
[1,] 20.56789 21.15666

4 Bootstrap Confidence Intervals

span =handheight$HandSpan     # the original sample
# we will take 1000 bootstraps
B = 1000
mean.vec = rep(0, B)    # 

for(i in 1:B){
  bt.sample = sample(x=span, size = n, replace = TRUE)
  mean.vec[i] = mean(bt.sample)
}
quantile(mean.vec, c(0.025, 0.975))
    2.5%    97.5% 
20.58076 21.17066 

5 Discussions and Conclusions

