download.file("http://www.openintro.org/stat/data/bdims.RData", destfile = "bdims.RData")

load("bdims.RData")

head(bdims)
##   bia.di bii.di bit.di che.de che.di elb.di wri.di kne.di ank.di sho.gi
## 1   42.9   26.0   31.5   17.7   28.0   13.1   10.4   18.8   14.1  106.2
## 2   43.7   28.5   33.5   16.9   30.8   14.0   11.8   20.6   15.1  110.5
## 3   40.1   28.2   33.3   20.9   31.7   13.9   10.9   19.7   14.1  115.1
## 4   44.3   29.9   34.0   18.4   28.2   13.9   11.2   20.9   15.0  104.5
## 5   42.5   29.9   34.0   21.5   29.4   15.2   11.6   20.7   14.9  107.5
## 6   43.3   27.0   31.5   19.6   31.3   14.0   11.5   18.8   13.9  119.8
##   che.gi wai.gi nav.gi hip.gi thi.gi bic.gi for.gi kne.gi cal.gi ank.gi
## 1   89.5   71.5   74.5   93.5   51.5   32.5   26.0   34.5   36.5   23.5
## 2   97.0   79.0   86.5   94.8   51.5   34.4   28.0   36.5   37.5   24.5
## 3   97.5   83.2   82.9   95.0   57.3   33.4   28.8   37.0   37.3   21.9
## 4   97.0   77.8   78.8   94.0   53.0   31.0   26.2   37.0   34.8   23.0
## 5   97.5   80.0   82.5   98.5   55.4   32.0   28.4   37.7   38.6   24.4
## 6   99.9   82.5   80.1   95.3   57.5   33.0   28.0   36.6   36.1   23.5
##   wri.gi age  wgt   hgt sex
## 1   16.5  21 65.6 174.0   1
## 2   17.0  23 71.8 175.3   1
## 3   16.9  28 80.7 193.5   1
## 4   16.6  23 72.6 186.5   1
## 5   18.0  22 78.8 187.2   1
## 6   16.9  21 74.8 181.5   1
mdims <- subset(bdims, sex == 1)

fdims <- subset(bdims, sex == 0)

Exercise 1

Men’s distribution on right side, and female’s on left side. But both look somewhat normal.

hist(mdims$hgt)

hist(fdims$hgt)

fhgtmean <- mean(fdims$hgt)

fhgtsd   <- sd(fdims$hgt)

hist(fdims$hgt, probability = TRUE)

x <- 140:190

y <- dnorm(x = x, mean = fhgtmean, sd = fhgtsd)

lines(x = x, y = y, col = "blue")

Exercise 2) Yes, data appears pretty normal

qqnorm(fdims$hgt)

qqline(fdims$hgt)

sim_norm <- rnorm(n = length(fdims$hgt), mean = fhgtmean, sd = fhgtsd)

Exercise 3) Not all points, but majority do. Real data is not as close to the line as simulated but still both plots look very simular.

qqnorm(sim_norm)

qqline(sim_norm)

qqnormsim(fdims$hgt)

Exercise 4) Actual data plots look very simular to simulated plots. It appears that our data is near normal

Exercise 5) Weigth seems to be distributed near nromal as well

qqnormsim(fdims$wgt)

1 - pnorm(q = 182, mean = fhgtmean, sd = fhgtsd)
## [1] 0.004434387
sum(fdims$hgt > 182) / length(fdims$hgt)
## [1] 0.003846154

Exercise 6) They seem to have simular discrepency, possible weight has slightly less discrepency between actual and simulated

1 - pnorm(q = 160, mean = fhgtmean, sd = fhgtsd)
## [1] 0.7717061
sum(fdims$hgt > 160) / length(fdims$hgt)
## [1] 0.7307692
fwgtmean <- mean(fdims$wgt)

fwgtsd   <- sd(fdims$wgt)

1 - pnorm(q = 53, mean = fwgtmean, sd = fwgtsd)
## [1] 0.785357
sum(fdims$wgt > 53) / length(fdims$wgt)
## [1] 0.8269231

On my own

    1. Plot B

    2. Plot C

    3. Plot D

    4. Plot A

  1. I believe it is due to probability ditribution (histogram) sharply dropping or increasing

  2. Right skewed

qqnorm(fdims$kne.di)

qqline(fdims$kne.di)

hist(fdims$kne.di)