Pokemon <- read.csv("Pokemon.csv")
download.file("http://www.openintro.org/stat/data/bdims.RData", destfile = "bdims.RData")
load("bdims.RData")
head(bdims)
##   bia.di bii.di bit.di che.de che.di elb.di wri.di kne.di ank.di sho.gi che.gi
## 1   42.9   26.0   31.5   17.7   28.0   13.1   10.4   18.8   14.1  106.2   89.5
## 2   43.7   28.5   33.5   16.9   30.8   14.0   11.8   20.6   15.1  110.5   97.0
## 3   40.1   28.2   33.3   20.9   31.7   13.9   10.9   19.7   14.1  115.1   97.5
## 4   44.3   29.9   34.0   18.4   28.2   13.9   11.2   20.9   15.0  104.5   97.0
## 5   42.5   29.9   34.0   21.5   29.4   15.2   11.6   20.7   14.9  107.5   97.5
## 6   43.3   27.0   31.5   19.6   31.3   14.0   11.5   18.8   13.9  119.8   99.9
##   wai.gi nav.gi hip.gi thi.gi bic.gi for.gi kne.gi cal.gi ank.gi wri.gi age
## 1   71.5   74.5   93.5   51.5   32.5   26.0   34.5   36.5   23.5   16.5  21
## 2   79.0   86.5   94.8   51.5   34.4   28.0   36.5   37.5   24.5   17.0  23
## 3   83.2   82.9   95.0   57.3   33.4   28.8   37.0   37.3   21.9   16.9  28
## 4   77.8   78.8   94.0   53.0   31.0   26.2   37.0   34.8   23.0   16.6  23
## 5   80.0   82.5   98.5   55.4   32.0   28.4   37.7   38.6   24.4   18.0  22
## 6   82.5   80.1   95.3   57.5   33.0   28.0   36.6   36.1   23.5   16.9  21
##    wgt   hgt sex
## 1 65.6 174.0   1
## 2 71.8 175.3   1
## 3 80.7 193.5   1
## 4 72.6 186.5   1
## 5 78.8 187.2   1
## 6 74.8 181.5   1
mdims <- subset(bdims, sex == 1)
fdims <- subset(bdims, sex == 0)
fhgtmean <- mean(fdims$hgt)
fhgtsd   <- sd(fdims$hgt)
hist(fdims$hgt, probability = TRUE)
x <- 140:190
y <- dnorm(x = x, mean = fhgtmean, sd = fhgtsd)
lines(x = x, y = y, col = "blue")

qqnorm(fdims$hgt)
qqline(fdims$hgt)

sim_norm <- rnorm(n = length(fdims$hgt), mean = fhgtmean, sd = fhgtsd)
qqnormsim(fdims$hgt)

1 - pnorm(q = 182, mean = fhgtmean, sd = fhgtsd)
## [1] 0.004434387
sum(fdims$hgt > 182) / length(fdims$hgt)
## [1] 0.003846154
#1.a) The histogram for female biiliac (pelvic) diameter (bii.di) belongs to normal probability plot letter B
qqnorm(fdims$bii.di)
qqline(fdims$bii.di)

#1.b)  The histogram for female elbow diameter (elb.di) belongs to normal probability plot letter C
qqnorm(fdims$elb.di)
qqline(fdims$elb.di)

#1.c)  The histogram for general age (age) belongs to normal probability plot letter D
qqnorm(bdims$age)
qqline(bdims$age)

#1.d)  The histogram for female chest depth (che.de) belongs to normal probability plot letter A
qqnorm(fdims$che.de)
qqline(fdims$che.de)

#2.) Note that normal probability plots C and D have a slight stepwise pattern.Why do you think this is the case?
#Because the amount of theoritical quantiles has repeated for some amount until it steps up to next samples quantile

#3.) As you can see, normal probability plots can be used both to assess normality and visualize skewness. Make a normal probability plot for female knee diameter (kne.di). Based on this normal probability plot, is this variable left skewed, symmetric, or right skewed? Use a histogram to confirm your findings.
qqnorm(fdims$kne.di)
qqline(fdims$kne.di)

hist(fdims$kne.di, breaks = 40)

## MILESTONE DATA

#1. Restate your probability question from Lab 3, On Your Own - Milestone Data, Question 1.

# What is the probaility of a Pokemon has generation over 5?


#2.Identify the numeric variable you used to answer the probability question.


Pokemon$Generation
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [186] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [223] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [260] 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [297] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [334] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [371] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [408] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4
## [445] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [482] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [519] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5
## [556] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [593] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [630] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [667] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [704] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [741] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [778] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
#3. Produce a histogram and a Normal Q-Q plot of the numeric variable.

five_gen_up <- subset(Pokemon, Generation>5 )
qqnorm(five_gen_up$Attack)

five_gen_up$Attack
##  [1]  61  78 107  45  59  69  56  63  95  36  56  50  73  81  35  22  52  50  68
## [20]  38  45  65  65 100  82 124  80  48  48  48  80 110 150  50  52  72  48  80
## [39]  54  92  52 105  60  75  53  73  38  55  89 121  59  77  65  92  58  50  50
## [58]  75 100  80  70 110  66  66  66  66  90  85  95 100  69 117  30  70 131 131
## [77] 100 100 160 110 160 110
hist(five_gen_up$Attack, breaks=10, main="Generation for Pokemon", xlab="Attack Points")

#4. Assess the normality of the numeric variable. Are you surprised by what you see? Why or why not?

#Not at all the emperical data is pretty much what I expect. It is accurate because we have our data ahead of time so basically we know what to expect from data and probablity.


#5. Use the theoretical normal distribution to answer your probability question.


POKE_mean=mean(five_gen_up$Attack)
POKE_sd=sd(five_gen_up$Attack)
1-pnorm(60,POKE_mean,POKE_sd)
## [1] 0.7059679
#6.Compare the answer computed empirically (i.e., the answer from Lab 3, On Your Own - Milestone Data, Question 1) to the answer using the theoretical normal distribution. Which answer do you trust more? Why?


# the probablity from emperical method is 68.95 compared to the normal distribution method of 72.40%. This is a difference of about 3.45%. I trust the empirical data more because the normal distribution shows more error.


## Documentaton: I work with all my teammate.