ADEC7310 Assignment 3 again

1. A researcher wishes to conduct a study of the color preferences of new car buyers. Suppose that 50% of this population prefers the color red. If 20 buyers are randomly selected, what is the probability that between 9 and 12 (both inclusive) buyers would prefer red?

Thoughts: Binomial Distribution. The plotting below was the first time I have ever done this type of thing. I have never overlaid multiple colors onto a plot.

# Given/Parameters
n <- 20        # Number of buyers selected
p <- 0.50      # Probability of preferring red
x <- 0:n       # All possible outcomes (0 to 20)

# Calculate probabilities of all outcomes
probabilities <- dbinom(x, size = n, prob = p)

# Preparing to attempt plot of two colors
highlight_region <- x >= 9 & x <= 12

# Set up the empty plot area
plot(x, probabilities, 
     type = "n",  # Doesn't do anything yet
     main = "Car Color Preference\n(Binomial Distribution: n=20, p=0.50)",
     xlab = "Buyers Preferring Red",
     ylab = "Probability",
     las = 1)

# Add vertical lines for ALL bars (blue)
segments(x, 0, x, probabilities, lwd = 3, col = "blue")

# Add points on top of ALL bars (blue)
points(x, probabilities, pch = 19, col = "blue", cex = 1.2)

# Overlay the highlighted region (9 to 12) in red
highlight_x <- c(9, 10, 11, 12)
highlight_prob <- dbinom(9:12, size = 20, prob = 0.50)
segments(highlight_x, 0, highlight_x, highlight_prob, lwd = 3, col = "red")
points(highlight_x, highlight_prob, pch = 19, col = "red", cex = 1.5)

#CDF (pbinom)
prob_answer_cdf <- pbinom(12, size = n, prob = p) - pbinom(8, size = n, prob = p)

# Return the answer
answer <- round(prob_answer_cdf, 4)
cat("ANSWER:", answer, "\n")

## ANSWER: 0.6167

2. A quality control inspector has drawn a sample of 13 light bulbs from a recent production lot. Suppose 20% of the bulbs in the lot are defective. What is the probability that less than 6 but more than 3 bulbs from the sample are defective?

# Given/Parameters
n <- 13
p <- 0.20
x <- 0:n

# Calculate probabilities of all outcomes
probabilities <- dbinom(x, size = n, prob = p)

# More than 3; less than 6
highlight_region <- x > 3 & x < 6

#Extracting desired values (9-12)
highlight_x <- x[highlight_region]
highlight_prob <- probabilities[highlight_region]


# Plot
# Empty plot first
plot(x, probabilities, 
     type = "n",
     main = "Defective Light Bulbs\n(Binomial Distribution: n=13, p=0.20)",
     xlab = "Defective Bulbs",
     ylab = "Probability",
     las = 1)

# Vertical lines and points
segments(x, 0, x, probabilities, lwd = 3, col = "blue")
points(x, probabilities, pch = 19, col = "blue")


# Overlay
segments(highlight_x, 0, highlight_x, highlight_prob, lwd = 3, col = "red")
points(highlight_x, highlight_prob, pch = 19, col = "red")

# Let's do some math
prob_cdf <- pbinom(5, n, p) - pbinom(3, n, p)

# Answer
answer <- round(prob_cdf, 4)
cat("ANSWER:", answer, "\n")

## ANSWER: 0.2226

3. The auto parts department of an automotive dealership sends out a mean of 4.2 special orders daily. What is the probability that, for any day, the number of special orders sent out will be no more than 3?

** Thoughts:** Definitely a Poisson Distribution because we are counting orders within a certain timeframe and that each event is independent.

# lambda
lambda <- 4.2 # The average rate of special orders

# What should the size of the vector be? 10, 15, 30? 
x <- 0:15

# Probabilities
probabilities <- dpois(x, lambda)


# "No more than 3" (ie x <= 3)
highlight_region <- x <= 3

# Highlight values
highlight_x <- x[highlight_region]
highlight_prob <- probabilities[highlight_region]

# Start with an empty plot
plot(x, probabilities, 
     type = "n", 
     main = "Special Orders Per Day\n(Poisson Distribution: λ = 4.2)",
     xlab = "Special Orders",
     ylab = "Probability",
     las = 1) 

# Vertical lines and points
segments(x, 0, x, probabilities, lwd = 3, col = "blue")
points(x, probabilities, pch = 19, col = "blue")

# Overlay
segments(highlight_x, 0, highlight_x, highlight_prob, lwd = 3, col = "red")
points(highlight_x, highlight_prob, pch = 19, col = "red")

# Math
cdf <- ppois(3, lambda)

# Answer
answer <- round(cdf, 4)
cat("ANSWER:", answer)

## ANSWER: 0.3954

4. A pharmacist receives a shipment of 17 bottles of a drug and has 3 of the bottles tested. If 6 of the 17 bottles are contaminated, what is the probability that less than 2 of the tested bottles are contaminated?

Thoughts: This is a hypergeometric Distribution because we are sampling without replacement. There are only 2 types (contaminated and non-contaminated)

#What I know
N <- 17          # Population
C <- 6           # Contaminated bottles
n <- 3           # Sample size

good <- N - C    # Non-contaminated bottles 

# Vectors
x <- 0:3

# Probabilities
probabilities <- dhyper(x, C, good, n)

# "Less than 2" (ie x<2)
highlight_region <- x < 2

# Highlight values for plotting
highlight_x <- x[highlight_region]
highlight_prob <- probabilities[highlight_region]

#Empty plot to start
plot(x, probabilities, 
     type = "n",
     main = "Contaminated Drug Bottles\n(Hypergeometric Distribution: N=17, C=6, n=3)",
     xlab = "Contaminated Bottles",
     ylab = "Probability",
     xlim = c(-0.5, 3.5),
     las = 1) 

# Vertical Lines and points
segments(x, 0, x, probabilities, lwd = 6, col = "blue")
points(x, probabilities, pch = 19, col = "blue")

# Overlay
segments(highlight_x, 0, highlight_x, highlight_prob, lwd = 5, col = "red")
points(highlight_x, highlight_prob, pch = 19, col = "red")

text(x, probabilities, 
     labels = round(probabilities, 4),
     pos = 2, 
     cex = 1.1,
     font = 2)

# Math
cdf <- phyper(1, C, good, n)

# Answer
answer <- round(cdf, 4)
cat("ANSWER:", answer)

## ANSWER: 0.7279

5. A town recently dismissed 6 employees in order to meet their new budget reductions. The town had 6 employees over 50 years of age and 19 under 50. If the dismissed employees were selected at random, what is the probability that more than 1 employee was over 50?

Thoughts: Hypergeometric Distribution because we are sampling without replacement. 2 results (over 50 and under 50)

# What I know
N <- 25
K <- 6
n <- 6

under50 <- N - K

# Possible employees over 50
x <- 0:6

# Hypergeometric calc
probabilities <- dhyper(x, K, under50, n)

# Highlight values for plotting
highlight_x <- x[highlight_region]
highlight_prob <- probabilities[highlight_region]

# Setup empty plot area
plot(x, probabilities, 
     type = "n",
     main = "Dismissals by Age\n(Hypergeometric Distribution: N=25, K=6 over 50, n=6 dismissed)",
     xlab = "Employees Over 50 in Dismissed Group",
     ylab = "Probability",
     las = 1)
# Vertical Lines and points
segments(x, 0, x, probabilities, lwd = 6, col = "blue")
points(x, probabilities, pch = 19, col = "blue")

# Overlay
segments(highlight_x, 0, highlight_x, highlight_prob, lwd = 5, col = "red")
points(highlight_x, highlight_prob, pch = 19, col = "red")

# Add labels 
text(x, probabilities, 
     labels = round(probabilities, 4),
     pos = 2, 
     font = 2)

# Math
cdf <- 1 - phyper(1, K, under50, n)

# Answer
answer <- round(cdf, 4)
cat("ANSWER:", answer)

## ANSWER: 0.4529

6. The weights of steers in a herd are distributed normally. The variance is 90,000 and the mean steer weight is 800 lbs. Find the probability that the weight of a randomly selected steer is between 1040 and 1460 lbs.

mu <- 800
variance <- 90000
sigma <- sqrt(variance)

lower <- 1040
upper <- 1460

# 4 sigmas good enough?
x_val <-  seq(mu - (4 * sigma), mu + (4 * sigma), length = 200)
y_val <-  dnorm(x_val, mu, sigma)

# PLOT????!!!
plot(x_val, y_val, type = "l",
     main = "Normal Distribution of Steer Weights",
     xlab = "Weight (lbs)",
     ylab = "Probability (Density"
     )
abline(v = mu, b = 1, col = "blue")

# Math 
prob <- pnorm(upper, mu, sigma) - pnorm(lower, mu, sigma)

cat("The probability of a steer weighing between 1040 and 1460 lbs is:",
    round(prob, 4))

## The probability of a steer weighing between 1040 and 1460 lbs is: 0.198

7. The diameters of ball bearings are distributed normally. The mean diameter is 106 millimeters and the standard deviation is 4 millimeters. Find the probability that the diameter of a selected bearing is between 103 and 111 millimeters.

mu <-  106
sigma <- 4
lower <- 103
upper <- 111

x_val <-  seq(mu - (4 * sigma), mu + (4 * sigma), length = 200)
y_val <- dnorm(x_val, mu, sigma)

plot(x_val, y_val, type = "l",
     main = "Ball Bearing Diameters\n(Normal Distribution: μ=106 mm, σ=4 mm) ",
     xlab = "Diameter (mm)",
     ylab = "Probability (Density)"
     )
abline(v = mu, b = 1, col = "blue")

prob <- pnorm(upper, mu, sigma) - pnorm(lower, mu, sigma)

cat("The probability of a steer weighing between 1040 and 1460 lbs is:", round(prob, 4))

## The probability of a steer weighing between 1040 and 1460 lbs is: 0.6677

8. The lengths of nails produced in a factory are normally distributed with a mean of 3.34 centimeters and a standard deviation of 0.07 centimeters. Find the two lengths that separate the top 3% and the bottom 3%. These lengths could serve as limits used to identify which nails should be rejected.

mu <- 3.34
sigma <- 0.07

lower <- 0.03
upper <- 0.97

# qnorm finds the value at a specific probability
lower_limit <- qnorm(0.03, mu, sigma)
upper_limit <- qnorm(0.97, mu, sigma)

#Let's think about the plot
x <- seq(mu - (4 * sigma), mu + (4 * sigma), length = 200)
y <- dnorm(x, mu, sigma)

plot(x, y, type = "l", col = "blue",
     main = "Nail Length Limits",
     xlab = "Nail Length (cm)",
     ylab = "Probability Density"
)
abline(v = lower_limit, col="red", lty=2)
abline(v = upper_limit, col="red", lty=2)

cat("ANSWER: Nails shorter than:", round(lower_limit, 4), "cm and nails longer than", round(upper_limit, 4), "cm, will be rejected")

## ANSWER: Nails shorter than: 3.2083 cm and nails longer than 3.4717 cm, will be rejected

9. A psychology professor assigns letter grades on a test according to the following scheme. A: Top 9% of scores B: Scores below the top 9% and above the bottom 63% C: Scores below the top 37% and above the bottom 17% D: Scores below the top 83% and above the bottom 8% F: Bottom 8% of scores Scores on the test are normally distributed with a mean of 75.8 and a standard deviation of 8.1. Find the minimum score required for an A grade. Round your answer to the nearest whole number, if necessary.

mu <- 75.8
sigma <- 8.1

# For an A
pA <- 0.91

# min score for an A
minA <- qnorm(pA, mu, sigma)

#Distribution
x <- seq(mu - (4 * sigma), mu + (4 * sigma), length = 200)
y <- dnorm(x, mu, sigma)

#Plot
plot(x, y, type = "l", col = "green",
     main = "Test Score Distribution",
     xlab = "Test Score",
     ylab = "Probability Density")
abline(v = minA, col="red", lty=2)
text(minA, 0.01, round(minA), col="red", font=2)

cat("The minimum score for an A is:", round(minA))

## The minimum score for an A is: 87

10. Consider the probability that exactly 96 out of 155 computers will not crash in a day. Assume the probability that a given computer will not crash in a day is 61%. Approximate the (binomial) probability using the normal distribution.

# Binomial parameters
n <-  155
p <- 0.61
target <- 96

# Normal approximation
mu <-  n * p
sigma <-  sqrt(n * p * (1 - p))
mu

## [1] 94.55

sigma

## [1] 6.072438

# Continuity Correction
lower <- target - 0.5
upper <- target + 0.5
lower

## [1] 95.5

upper

## [1] 96.5

# Calculate using normal approximation
normal_approx <- pnorm(upper, mu, sigma) - pnorm(lower, mu, sigma)
normal_approx

## [1] 0.06378274

# Calculate z-scores
#z_lower <- (lower - mu) / sigma
#z_upper <- (upper - mu) / sigma

# Begin plotting normal approximation
x_min <- mu - (4 * sigma)
x_max <- mu + (4 * sigma)
x_val <- seq(x_min, x_max, length.out = 500)
prob_densities <- dnorm(x_val, mu, sigma)

# Set up the plot
plot(x_val, prob_densities,
     type = "l",
     lwd = 2,
     col = "black",
     main = "Normal Approximation to Binomial Distribution\nComputer Crashes (n=155, p=0.61)",
     xlab = "Computers Not Crashing",
     ylab = "Probability Density",
     las = 1)

answer  <-  round(normal_approx, 4)
cat("Answer using Normal Approximation P(X = 96): is APPROXIMATELY:", answer)

## Answer using Normal Approximation P(X = 96): is APPROXIMATELY: 0.0638

```