Question 1

data <- read.csv("/Users/bellajean/Downloads/challenger.csv")
1A
library(psych)
describe(data)
##              vars  n  mean   sd median trimmed  mad  min  max range  skew
## launch          1 23 12.00 6.78   12.0   12.00 8.90  1.0 23.0    22  0.00
## temp            2 23 69.02 6.97   69.8   69.33 5.34 53.6 80.6    27 -0.40
## incident*       3 23  1.30 0.47    1.0    1.26 0.00  1.0  2.0     1  0.80
## o_ring_probs    4 23  0.43 0.79    0.0    0.26 0.00  0.0  3.0     3  1.81
##              kurtosis   se
## launch          -1.36 1.41
## temp            -0.44 1.45
## incident*       -1.42 0.10
## o_ring_probs     2.69 0.16
1B

Launch is nominal because although it is numerical, the numeric values have no meaning in the data, neither does the order of the numbers matter.

Temp is an interval because the values have order, and there is no true zero-point.

Incident is a nominal binary because it only has 2 states (yes or no) and it is qualitative in nature.

o_ring_probs is an interval because it is a count of outcomes and has no true zero-point.

1C

For most cases during the launches, the O-Ring does not experience partial failures, but there are 5 occurrences of up to 1 O-Ring failure per launch, and less than 2 occurences for 2 or 3 partial failures with the O-Ring per launch.

1D

1E
order(data$temp)
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23

Launch 5

1F

Three incidents.

Question 2

2A
# P(B'|A2) = (P(A2|B') * P(B'))/ ((P(A2|B') * P(B')) + (P(A2|B) * P(B))

a2Givennotb <- 0.59 
notb        <- 0.2
a2Givenb    <- 0.41
b           <- 0.8

actualliar  <- a2Givennotb/(a2Givennotb + a2Givenb)

actualliar
## [1] 0.59
2B
# P(A2 โˆช B') = P(A2) + P(B') - P(A2 โˆฉ B')

a2        <- 0.2
notb      <- 0.198
a2ANDnotb <- 0.118

actualliarORtestedliar <- a2 + notb - a2ANDnotb

actualliarORtestedliar
## [1] 0.28

Question 3

3A
range        <- 0:10   
lambda       <- 10
t            <- 8/10 
expectedvalue<- lambda*t
variance     <- lambda
stddev       <- sqrt(variance)

expectedvalue
## [1] 8
stddev
## [1] 3.162278
ppois(q          = 8,
      lambda     = 10, 
      lower.tail = FALSE
      ) 
## [1] 0.6671803

3B
pbinom(q    = 8,
       size = 10, 
       prob = 1-0.1,
       lower.tail = FALSE) 
## [1] 0.7360989
expectedvalue <- 10*0.9
expectedvalue
## [1] 9
stddev        <- sqrt(expectedvalue*(1-0.9))
stddev
## [1] 0.9486833

Question 4

4A
probcorrect <- 1/4
probwrong   <- 1-probcorrect
third       <- (probwrong^2)*probcorrect

third
## [1] 0.140625
4B
# P(X=3, X=4 | N=5)
# Binomial 
exactly3or4 <- dbinom(4,5,0.25) + dbinom(3,5,0.25)
exactly3or4
## [1] 0.1025391
4C
# P(X>2.5 | N=5)

majority <- 1-pbinom(2,5,0.25)
majority
## [1] 0.1035156
majority2 <- dbinom(3,5,0.25) + dbinom(2,5,0.25)
majority2
## [1] 0.3515625

Question 5

5A1
# P(X<80 | ยต=72.6)
pnorm(q    = 80, 
      mean = 72.6, 
      sd   = 4.78)
## [1] 0.939203
5A2
pnorm(q    = 78, 
      mean = 72.6, 
      sd   = 4.78) -  pnorm(q    = 68, 
                            mean = 72.6, 
                            sd   = 4.78)
## [1] 0.7027615

Makes sense because that means majority of people drive between 68 and 78 mph. The average mph is between the two values, which means that there are enough values on either side, meaning that majority lies between this range.

5A3
pnorm(q    = 70, 
      mean = 72.6, 
      sd   = 4.78,
      lower.tail = FALSE
      )
## [1] 0.7067562
5B1
qnorm(p = 0.05, mean = 4313, sd = 583)
## [1] 3354.05
5B2
qnorm(p = 0.9, mean = 5261, sd = 807)
## [1] 6295.212