download.file("http://www.openintro.org/stat/data/bdims.RData", destfile = "bdims.RData")
load("bdims.RData")
head(bdims)
## bia.di bii.di bit.di che.de che.di elb.di wri.di kne.di ank.di sho.gi che.gi
## 1 42.9 26.0 31.5 17.7 28.0 13.1 10.4 18.8 14.1 106.2 89.5
## 2 43.7 28.5 33.5 16.9 30.8 14.0 11.8 20.6 15.1 110.5 97.0
## 3 40.1 28.2 33.3 20.9 31.7 13.9 10.9 19.7 14.1 115.1 97.5
## 4 44.3 29.9 34.0 18.4 28.2 13.9 11.2 20.9 15.0 104.5 97.0
## 5 42.5 29.9 34.0 21.5 29.4 15.2 11.6 20.7 14.9 107.5 97.5
## 6 43.3 27.0 31.5 19.6 31.3 14.0 11.5 18.8 13.9 119.8 99.9
## wai.gi nav.gi hip.gi thi.gi bic.gi for.gi kne.gi cal.gi ank.gi wri.gi age
## 1 71.5 74.5 93.5 51.5 32.5 26.0 34.5 36.5 23.5 16.5 21
## 2 79.0 86.5 94.8 51.5 34.4 28.0 36.5 37.5 24.5 17.0 23
## 3 83.2 82.9 95.0 57.3 33.4 28.8 37.0 37.3 21.9 16.9 28
## 4 77.8 78.8 94.0 53.0 31.0 26.2 37.0 34.8 23.0 16.6 23
## 5 80.0 82.5 98.5 55.4 32.0 28.4 37.7 38.6 24.4 18.0 22
## 6 82.5 80.1 95.3 57.5 33.0 28.0 36.6 36.1 23.5 16.9 21
## wgt hgt sex
## 1 65.6 174.0 1
## 2 71.8 175.3 1
## 3 80.7 193.5 1
## 4 72.6 186.5 1
## 5 78.8 187.2 1
## 6 74.8 181.5 1
mdims <- subset(bdims, sex == 1)
fdims <- subset(bdims, sex == 0)
hist(mdims$hgt)
## Histogram of Womens’s Heights
hist(fdims$hgt)
### Normal Distabution ##normal curve plotted on top
fhgtmean <- mean(fdims$hgt)
fhgtsd <- sd(fdims$hgt)
hist(fdims$hgt, probability = TRUE)
x <- 140:190
y <- dnorm(x = x, mean = fhgtmean, sd = fhgtsd)
lines(x = x, y = y, col = "blue")
## is it normally distributaded? ## I would say it appear to follow the normal curve pretty well.
qqnorm(fdims$hgt)
qqline(fdims$hgt)
sim_norm <- rnorm(n = length(fdims$hgt), mean = fhgtmean, sd = fhgtsd)
qqnorm(sim_norm)
qqline(sim_norm)
qqnormsim(fdims$hgt)
##Does the normal probability plot for fdims$hgt look similar to the plots created for the simulated data? That is, do plots provide evidence that the female heights are nearly normal? ## all pieces of data would concure that the female’s heights. All the plot’s look diffent, but that is because they are desined to analyze normalness in diffrent ways.
##Histogram
fwgtmean <- mean(fdims$wgt)
fwgtsd <- sd(fdims$wgt)
hist(fdims$wgt, probability = TRUE)
x <- 140:190
y <- dnorm(x = x, mean = fwgtmean, sd = fwgtsd)
lines(x = x, y = y, col = "blue")
## Q norm
qqnorm(fdims$hgt)
qqline(fdims$hgt)
## weight would appear to be normal as well
1 - pnorm(q = 182, mean = fhgtmean, sd = fhgtsd)
## [1] 0.004434387
sum(fdims$hgt > 182) / length(fdims$hgt)
## [1] 0.003846154
#what are the odds you will randomly select a women who weighs more then 100plbs
pnorm(q = 100, mean = fwgtmean, sd = fwgtsd)
## [1] 0.9999791
#what are the odds you will randomly select a women who’s shoter than 100cm
1 - pnorm(q = 100, mean = fhgtmean, sd = fhgtsd)
## [1] 1
##1. match # a. c # b. a # c. d # d. b ##2. why C and D slant upwards. #Most litley this indicates the graph is scewed right (or in the positive direction). ##3. Normal curve # error