Midterm

Question 1

data <- read.csv("/Users/bellajean/Downloads/challenger.csv")

1A

library(psych)
describe(data)

##              vars  n  mean   sd median trimmed  mad  min  max range  skew
## launch          1 23 12.00 6.78   12.0   12.00 8.90  1.0 23.0    22  0.00
## temp            2 23 69.02 6.97   69.8   69.33 5.34 53.6 80.6    27 -0.40
## incident*       3 23  1.30 0.47    1.0    1.26 0.00  1.0  2.0     1  0.80
## o_ring_probs    4 23  0.43 0.79    0.0    0.26 0.00  0.0  3.0     3  1.81
##              kurtosis   se
## launch          -1.36 1.41
## temp            -0.44 1.45
## incident*       -1.42 0.10
## o_ring_probs     2.69 0.16

1B

Launch is nominal because although it is numerical, the numeric values have no meaning in the data, neither does the order of the numbers matter.

Temp is an interval because the values have order, and there is no true zero-point.

Incident is a nominal binary because it only has 2 states (yes or no) and it is qualitative in nature.

o_ring_probs is an interval because it is a count of outcomes and has no true zero-point.

1C

For most cases during the launches, the O-Ring does not experience partial failures, but there are 5 occurrences of up to 1 O-Ring failure per launch, and less than 2 occurences for 2 or 3 partial failures with the O-Ring per launch.

1D

1E

order(data$temp)

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23

Launch 5

1F

Three incidents.

Question 2

2A

# P(B'|A2) = (P(A2|B') * P(B'))/ ((P(A2|B') * P(B')) + (P(A2|B) * P(B))

a2Givennotb <- 0.59 
notb        <- 0.2
a2Givenb    <- 0.41
b           <- 0.8

actualliar  <- a2Givennotb/(a2Givennotb + a2Givenb)

actualliar

## [1] 0.59

2B

# P(A2 ∪ B') = P(A2) + P(B') - P(A2 ∩ B')

a2        <- 0.2
notb      <- 0.198
a2ANDnotb <- 0.118

actualliarORtestedliar <- a2 + notb - a2ANDnotb

actualliarORtestedliar

## [1] 0.28

Question 3

3A

range        <- 0:10   
lambda       <- 10
t            <- 8/10 
expectedvalue<- lambda*t
variance     <- lambda
stddev       <- sqrt(variance)

expectedvalue

## [1] 8

stddev

## [1] 3.162278

ppois(q          = 8,
      lambda     = 10, 
      lower.tail = FALSE
      )

## [1] 0.6671803

3B

pbinom(q    = 8,
       size = 10, 
       prob = 1-0.1,
       lower.tail = FALSE)

## [1] 0.7360989

expectedvalue <- 10*0.9
expectedvalue

## [1] 9

stddev        <- sqrt(expectedvalue*(1-0.9))
stddev

## [1] 0.9486833

Question 4

4A

probcorrect <- 1/4
probwrong   <- 1-probcorrect
third       <- (probwrong^2)*probcorrect

third

## [1] 0.140625

4B

# P(X=3, X=4 | N=5)
# Binomial 
exactly3or4 <- dbinom(4,5,0.25) + dbinom(3,5,0.25)
exactly3or4

## [1] 0.1025391

4C

# P(X>2.5 | N=5)

majority <- 1-pbinom(2,5,0.25)
majority

## [1] 0.1035156

majority2 <- dbinom(3,5,0.25) + dbinom(2,5,0.25)
majority2

## [1] 0.3515625

Question 5

5A1

# P(X<80 | µ=72.6)
pnorm(q    = 80, 
      mean = 72.6, 
      sd   = 4.78)

## [1] 0.939203

5A2

pnorm(q    = 78, 
      mean = 72.6, 
      sd   = 4.78) -  pnorm(q    = 68, 
                            mean = 72.6, 
                            sd   = 4.78)

## [1] 0.7027615

Makes sense because that means majority of people drive between 68 and 78 mph. The average mph is between the two values, which means that there are enough values on either side, meaning that majority lies between this range.

5A3

pnorm(q    = 70, 
      mean = 72.6, 
      sd   = 4.78,
      lower.tail = FALSE
      )

## [1] 0.7067562

5B1

qnorm(p = 0.05, mean = 4313, sd = 583)

## [1] 3354.05

5B2

qnorm(p = 0.9, mean = 5261, sd = 807)

## [1] 6295.212