library(DATA606)
## Loading required package: shiny
## Warning: package 'shiny' was built under R version 3.5.2
## Loading required package: openintro
## Warning: package 'openintro' was built under R version 3.5.2
## Please visit openintro.org for free statistics materials
## 
## Attaching package: 'openintro'
## The following objects are masked from 'package:datasets':
## 
##     cars, trees
## Loading required package: OIdata
## Warning: package 'OIdata' was built under R version 3.5.2
## Loading required package: RCurl
## Warning: package 'RCurl' was built under R version 3.5.2
## Loading required package: bitops
## Loading required package: maps
## Warning: package 'maps' was built under R version 3.5.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:openintro':
## 
##     diamonds
## Loading required package: markdown
## Warning: package 'markdown' was built under R version 3.5.2
## 
## Welcome to CUNY DATA606 Statistics and Probability for Data Analytics 
## This package is designed to support this course. The text book used 
## is OpenIntro Statistics, 3rd Edition. You can read this by typing 
## vignette('os3') or visit www.OpenIntro.org. 
##  
## The getLabs() function will return a list of the labs available. 
##  
## The demo(package='DATA606') will list the demos that are available.
## 
## Attaching package: 'DATA606'
## The following object is masked from 'package:utils':
## 
##     demo
#3.2 a) Z>-1.13
1 - pnorm(-1.13, mean = 0, sd = 1)
## [1] 0.8707619
##Ans 87.08%
normalPlot(mean = 0, sd = 1, bounds=c(-1.13,4), tails = FALSE)

##b) z<0.18
pnorm(0.18, mean = 0, sd = 1)
## [1] 0.5714237
normalPlot(mean = 0, sd = 1, bounds=c(-4,0.18), tails = FALSE)

##Ans 57.14%
##c) z>8
1- pnorm(8, mean = 0, sd = 1)
## [1] 6.661338e-16
normalPlot(mean = 0, sd = 1, bounds = c(8, 8.5), tails = FALSE)

##Ans: The probability is almost close to 0% as the value falls at the extreme end of the upper tail.
##d)|z|<0.5
pnorm(0.5, mean = 0, sd = 1)- pnorm(-0.5, mean = 0, sd = 1)
## [1] 0.3829249
normalPlot(mean = 0, sd = 1, bounds = c(-0.5, 0.5), tails = FALSE)

##Ans 38.29%

#3.4
##a) N(mu=4313, sd=583) for men ages 30-34
##   N(mu=5261, sd =807) for women ages 25-29
##b) Leo Z score
(4948-4313)/583
## [1] 1.089194
##Mary Z Score
(5513-5261)/807
## [1] 0.3122677
##The z scores show Leo has a better performance score
##c) Leo ransked better as he has a higher Z Score.
##d)
pnorm(4948, mean =4313, sd =583)
## [1] 0.8619658
##Ans Leo finished faster 86.2%
##e)
pnorm(5513, mean =5261, sd =807)
## [1] 0.6225814
##Mary finished faster than 62.26% in her group
##f) The answers would change as here we have calculated using the standard normal distribution.

#3.18
height<-c(54,55,56,56,57,58,58,59,60,60,60,61,61,62,62,63,63,63,64,65,65,67,67,69,73)
mean(height)
## [1] 61.52
sd(height)
## [1] 4.583667
zscore<-(height-mean(height))/sd(height)
table(zscore)
## zscore
##   -1.6406079601232  -1.42244200797916  -1.20427605583512 
##                  1                  1                  2 
## -0.986110103691073 -0.767944151547031 -0.549778199402988 
##                  1                  2                  1 
## -0.331612247258946 -0.113446295114903   0.10471965702914 
##                  3                  2                  2 
##  0.322885609173182  0.541051561317225  0.759217513461268 
##                  3                  1                  2 
##   1.19554941774935   1.63188132203744   2.50454513061361 
##                  2                  1                  1
##To check for 1 sd
mean(height)+sd(height)
## [1] 66.10367
mean(height)-sd(height)
## [1] 56.93633
##To check if 68% of data fall between 66.10 and 56.94
pnorm(66.10,mean=mean(height), sd=sd(height))-pnorm(56.94,mean=mean(height), sd=sd(height))
## [1] 0.6823022
##Checking 2 SD
mean(height)+2*sd(height)
## [1] 70.68733
mean(height)-2*sd(height)
## [1] 52.35267
##To check if 95% of data fall between 70.68 and 52.35
pnorm(70.68,mean=mean(height), sd=sd(height))-pnorm(52.35,mean=mean(height), sd=sd(height))
## [1] 0.9544446
##Checking 3 SD
mean(height)+3*sd(height)
## [1] 75.271
mean(height)-3*sd(height)
## [1] 47.769
##To check if 99.7% of data fall between 75.27 and 47.77
pnorm(75.27,mean=mean(height), sd=sd(height))-pnorm(47.77,mean=mean(height), sd=sd(height))
## [1] 0.9972983
##Therefore the data follows the 68-95-99,7 rule.
##b)
par(mfcol=c(1,2))
hist(height, probability = TRUE, xlab="Heights", ylim=c(0,0.10))
x <- 50:75
y <- dnorm(x = x, mean = mean(height), sd = sd(height))
lines(x = x, y = y, col = "blue")
qqnorm(height)
qqline(height)

qqnormsim(height)

##The histogram is roughly summetric and unimodal.
##The QQ plot has most points close to the line with few outliers at the tails.
##The graphs show that the data appear to follow a normal distribution.

#3.22 a) Defective rate is 2%
(1-.02)^9*.02
## [1] 0.01667496
dgeom(9, .02)
## [1] 0.01667496
##The probability is 1.67%
##b) Probability of non defective is 0.98
0.98^100
## [1] 0.1326196
##c)How many transistors before first defect? Ans 50
1/.02
## [1] 50
##Standard Deviation is SQRT((1-p)/p^2)=49.49%
sqrt((1-.02)/.02^2)
## [1] 49.49747
##d) Now defective rate is 5%. No of transistors is 1/.05=20
1/.05
## [1] 20
##Standard deviation is 19.49%
sqrt((1-.05)/.05^2)
## [1] 19.49359
##e) Both the mean and standard deviations get smaller

#3.38 a)
dbinom(2, 3, 0.51)
## [1] 0.382347
##b) Possible ordering of 3 children with 3 boys is 3
## BBG, GBB, BGB
choose(3,2)
## [1] 3
##Combination of 3 children
(.51*.51*.49)+(.49*.51*.51)+(.51*.49*.51)
## [1] 0.382347
##The answers match.
##c) Approach from part b would require us to write down 56 possible compbinations and then calculate the probability
choose(8,3)
## [1] 56
#3.42 a) p=.15, n=10, x=3
choose(10 - 1, 3- 1) * (1 - .15)^(10 - 3) * .15^3
## [1] 0.03895012
##Ans 38.95%
##b) Probability of 10th serve being successful is 15% as the events are independant.
##c)In part a, we are calculating the probablity of success in 10th serve given there has been 2 succesful serves by the 9th serve.
##In part b, we do not have this information. Hence the difference in probabilities.