library("psych")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- read.csv("/Users/pin.lyu/Desktop/challenger.csv")
describe(df)
## vars n mean sd median trimmed mad min max range skew
## launch 1 23 12.00 6.78 12.0 12.00 8.90 1.0 23.0 22 0.00
## temp 2 23 69.02 6.97 69.8 69.33 5.34 53.6 80.6 27 -0.40
## incident* 3 23 1.30 0.47 1.0 1.26 0.00 1.0 2.0 1 0.80
## o_ring_probs 4 23 0.43 0.79 0.0 0.26 0.00 0.0 3.0 3 1.81
## kurtosis se
## launch -1.36 1.41
## temp -0.44 1.45
## incident* -1.42 0.10
## o_ring_probs 2.69 0.16Launch: Ordinal
Temp: Interval
Incident: Nominal
o_ring_probs: Ratio
hist(df$o_ring_probs, xlab = "Launch", ylab = "Counts of failures", main = "Total counts of of O-ring partial failures experienced per flight")
boxplot(df$temp~df$incident, xlab = "Incident (Yes/No)", ylab = "Temperature")
As we can see from this graph, we have more failures at lower temperatures. Thus, perhaps colder temperature contributes to the likelihood of o-ring partial failures in flight. And given that on the day of the Challenger launch was 36 degrees Fahrenheit which is way below 70F where we see most no-incident launch occur, launching the shuttle at this temperature can increase the probability of o-ring failure.
df |>
arrange(df$o_ring_probs)
## launch temp incident o_ring_probs
## 1 5 66.2 No 0
## 2 6 66.2 No 0
## 3 7 66.2 No 0
## 4 8 66.2 No 0
## 5 9 66.2 No 0
## 6 10 68.0 No 0
## 7 12 69.8 No 0
## 8 14 69.8 No 0
## 9 15 69.8 No 0
## 10 16 71.6 No 0
## 11 17 73.4 No 0
## 12 19 75.2 No 0
## 13 20 75.2 No 0
## 14 21 78.8 No 0
## 15 22 78.8 No 0
## 16 23 80.6 No 0
## 17 2 57.2 Yes 1
## 18 3 57.2 Yes 1
## 19 4 62.6 Yes 1
## 20 11 69.8 Yes 1
## 21 13 69.8 Yes 1
## 22 18 75.2 Yes 2
## 23 1 53.6 Yes 3
As we can see from above, given the temperature and the status of incident, launch number 5 was the first successful launch occurred.
df_new <- df[df$temp > 65, ]
output <- nrow(df_new[df_new$incident == "Yes", ])
output
## [1] 3P(+|L) = 0.59, liars and tested positive
P(-|NL) = 0.9, non-liars tasted as non-liars
p(L) = 0.2, possibility of liars
\[ P(L|+)= \frac{P(+|L)P(L)}{P(+)} \]
pos <- 0.59
negative <- 0.90
p <- 0.2 # expected population of liars
p_prime <- 1 - p # expected population of non-liars
result <- (pos*p)/((pos*p)+(p_prime*(1-negative)))
round(result,4)
## [1] 0.596
P(L) = 0.2
P(+) = 0.198
\[ P(L ∩ +) = P(L) + P(+) - P(L+) \]
pL <- 0.2
p_plus <- 0.198
result <- pL+p_plus-(pL*p_plus)
result
## [1] 0.3584
lambda <- 1 * 8/10
x <- 1
ppois( q = x,
lambda = lambda,
lower.tail = F
)
## [1] 0.1912079
# Expected value
lambda
## [1] 0.8
# Standard deviation
sd <- sqrt(lambda)
sd
## [1] 0.8944272
# Parameters
n <- 10
x <- 8
p <- 1/10
pbinom( q = x,
size = n,
prob = p,
lower.tail = FALSE
)
## [1] 9.1e-09
# Expected value
ex <- n*p
ex
## [1] 1
# Standard deviation
sd <- n*p*(1-p)
sd
## [1] 0.9
# parameters
wrong_prob <- 3/4. # 3 ways to get wrong answers
right_prob <- 1/4 # 1 way to get the right answer
result <- (wrong_prob^2)*(right_prob)
Model chosen: Binomial
#probability statements
n <- 5
x1 <- 3
x2 <- 4
p <- 1/4
# Getting exactly 3 right
lower <- dbinom( x = x1,
size = n,
prob = p
)
# Getting exactly 4 right
higher <- dbinom( x = x2,
size = n,
prob = p
)
# Probability of getting, in total, 3 or 4 questions correct
result <- lower + higher #(A or B) = P(A)+P(B)
result
## [1] 0.1025391
majority threshold is at getting 3 correct, so we are trying to find getting >= 3 questions correct. due to our model is binomial, therefore, the x must equal to 2.
# Probability statement
n <- 5
x <- 2
p <- 1/4
# method 1 "cdf with "lower.tail = FALSE"
pbinom( q = x,
size = n,
prob = p,
lower.tail = F
)
## [1] 0.1035156
# method 2 "pdf approach"
a <- dbinom(x = 3,
size = n,
prob = p
)
b <- dbinom(x = 4,
size = n,
prob = p
)
c <- dbinom(x = 5,
size = n,
prob = p
)
a+b+c
## [1] 0.1035156
# Probability statement
std <- 4.78
mu <- 72.6
# (x < 80), therefore, lower.tail = TRUE
pnorm(q = 80,
mean = mu,
sd = std,
lower.tail = T
)
## [1] 0.939203
# (68 < x < 78)
lower <- pnorm(q = 68,
mean = mu,
sd = std,
lower.tail = T
)
upper <- pnorm(q = 78,
mean = mu,
sd = std,
lower.tail = T
)
result <- upper - lower
result
## [1] 0.7027615
# This answer does make sense, becasue our mean = 72.6 and 68 miles/hr to 78 miles/hr is about a little over 1 standard deviation away from the mean. Within one standard deviation away from the mean, 68% of the data should be included. And here we have 70% which justifies that we are a little more one standard deviation away from the mean. Therefore, the answer makes sense.
a3)
# (x > 70) -> lower.tail = FALSE
pnorm(q = 70,
mean = mu,
sd = std,
lower.tail = F
)
## [1] 0.7067562b1)
# Parameter setup (men)
mu_m <- 4313
std_m <- 583
# top 5% means x>95%, lower.tail = FALSE
qnorm(p = 0.95,
mean = mu_m,
sd = std_m,
lower.tail = F
)
## [1] 3354.05b2)
# Parameter setup (women)
mu_w <- 5261
std_w <- 807
# bottom 10%
qnorm(p = 0.10,
mean = mu_m,
sd = std_m,
lower.tail = T
)
## [1] 3565.855mymatrix = matrix(data = c(7,5,12,0,2,2,7,7,14), nrow=3,byrow=F)
colnames(mymatrix) = c("Survived","Died","Total")
rownames(mymatrix) = c("Drug1","Drug2","Total")
mymatrix
## Survived Died Total
## Drug1 7 0 7
## Drug2 5 2 7
## Total 12 2 14
prop.table(x = mymatrix, margin = 2)
## Survived Died Total
## Drug1 0.2916667 0.0 0.25
## Drug2 0.2083333 0.5 0.25
## Total 0.5000000 0.5 0.50
Mutually exclusive events cannot be independent unless the probability of one of the events is zero. the two possible outcomes of the Independent events can still happen, however, mutually exclusive means that when one happens the other outcome cannot happen at all.d)
# Set the mean and standard deviation
mu <- 75
sigma <- 3
# Generate a range of values around the mean
x <- seq(from = mu - 3*sigma,
to = mu + 3*sigma,
length.out = 1000
)
# Calculate the probability density function
pdf <- dnorm(x = x,
mean = mu,
sd = sigma
)
# Plot the normal distribution
plot(x = x,
y = pdf,
type = 'l',
col = 'red',
lwd = 2,
xlab = 'Grades',
ylab = 'Density',
main = 'Normal Distribution of Test score with Mean 75 and SD 3'
)