data <- read.csv("/Users/bellajean/Downloads/challenger.csv")
library(psych)
describe(data)
## vars n mean sd median trimmed mad min max range skew
## launch 1 23 12.00 6.78 12.0 12.00 8.90 1.0 23.0 22 0.00
## temp 2 23 69.02 6.97 69.8 69.33 5.34 53.6 80.6 27 -0.40
## incident* 3 23 1.30 0.47 1.0 1.26 0.00 1.0 2.0 1 0.80
## o_ring_probs 4 23 0.43 0.79 0.0 0.26 0.00 0.0 3.0 3 1.81
## kurtosis se
## launch -1.36 1.41
## temp -0.44 1.45
## incident* -1.42 0.10
## o_ring_probs 2.69 0.16
Launch is nominal because although it is numerical, the numeric values have no meaning in the data, neither does the order of the numbers matter.
Temp is an interval because the values have order, and there is no true zero-point.
Incident is a nominal binary because it only has 2 states (yes or no) and it is qualitative in nature.
o_ring_probs is an interval because it is a count of outcomes and has no true zero-point.
For most cases during the launches, the O-Ring does not experience partial failures, but there are 5 occurrences of up to 1 O-Ring failure per launch, and less than 2 occurences for 2 or 3 partial failures with the O-Ring per launch.
order(data$temp)
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
Launch 5
Three incidents.
# P(B'|A2) = (P(A2|B') * P(B'))/ ((P(A2|B') * P(B')) + (P(A2|B) * P(B))
a2Givennotb <- 0.59
notb <- 0.2
a2Givenb <- 0.41
b <- 0.8
actualliar <- a2Givennotb/(a2Givennotb + a2Givenb)
actualliar
## [1] 0.59
# P(A2 โช B') = P(A2) + P(B') - P(A2 โฉ B')
a2 <- 0.2
notb <- 0.198
a2ANDnotb <- 0.118
actualliarORtestedliar <- a2 + notb - a2ANDnotb
actualliarORtestedliar
## [1] 0.28
range <- 0:10
lambda <- 10
t <- 8/10
expectedvalue<- lambda*t
variance <- lambda
stddev <- sqrt(variance)
expectedvalue
## [1] 8
stddev
## [1] 3.162278
ppois(q = 8,
lambda = 10,
lower.tail = FALSE
)
## [1] 0.6671803
pbinom(q = 8,
size = 10,
prob = 1-0.1,
lower.tail = FALSE)
## [1] 0.7360989
expectedvalue <- 10*0.9
expectedvalue
## [1] 9
stddev <- sqrt(expectedvalue*(1-0.9))
stddev
## [1] 0.9486833
probcorrect <- 1/4
probwrong <- 1-probcorrect
third <- (probwrong^2)*probcorrect
third
## [1] 0.140625
# P(X=3, X=4 | N=5)
# Binomial
exactly3or4 <- dbinom(4,5,0.25) + dbinom(3,5,0.25)
exactly3or4
## [1] 0.1025391
# P(X>2.5 | N=5)
majority <- 1-pbinom(2,5,0.25)
majority
## [1] 0.1035156
majority2 <- dbinom(3,5,0.25) + dbinom(2,5,0.25)
majority2
## [1] 0.3515625
# P(X<80 | ยต=72.6)
pnorm(q = 80,
mean = 72.6,
sd = 4.78)
## [1] 0.939203
pnorm(q = 78,
mean = 72.6,
sd = 4.78) - pnorm(q = 68,
mean = 72.6,
sd = 4.78)
## [1] 0.7027615
Makes sense because that means majority of people drive between 68 and 78 mph. The average mph is between the two values, which means that there are enough values on either side, meaning that majority lies between this range.
pnorm(q = 70,
mean = 72.6,
sd = 4.78,
lower.tail = FALSE
)
## [1] 0.7067562
qnorm(p = 0.05, mean = 4313, sd = 583)
## [1] 3354.05
qnorm(p = 0.9, mean = 5261, sd = 807)
## [1] 6295.212