Thoughts: Binomial Distribution. The plotting below was the first time I have ever done this type of thing. I have never overlaid multiple colors onto a plot.
# Given/Parameters
n <- 20 # Number of buyers selected
p <- 0.50 # Probability of preferring red
x <- 0:n # All possible outcomes (0 to 20)
# Calculate probabilities of all outcomes
probabilities <- dbinom(x, size = n, prob = p)
# Preparing to attempt plot of two colors
highlight_region <- x >= 9 & x <= 12
# Set up the empty plot area
plot(x, probabilities,
type = "n", # Doesn't do anything yet
main = "Car Color Preference\n(Binomial Distribution: n=20, p=0.50)",
xlab = "Buyers Preferring Red",
ylab = "Probability",
las = 1)
# Add vertical lines for ALL bars (blue)
segments(x, 0, x, probabilities, lwd = 3, col = "blue")
# Add points on top of ALL bars (blue)
points(x, probabilities, pch = 19, col = "blue", cex = 1.2)
# Overlay the highlighted region (9 to 12) in red
highlight_x <- c(9, 10, 11, 12)
highlight_prob <- dbinom(9:12, size = 20, prob = 0.50)
segments(highlight_x, 0, highlight_x, highlight_prob, lwd = 3, col = "red")
points(highlight_x, highlight_prob, pch = 19, col = "red", cex = 1.5)
#CDF (pbinom)
prob_answer_cdf <- pbinom(12, size = n, prob = p) - pbinom(8, size = n, prob = p)
# Return the answer
answer <- round(prob_answer_cdf, 4)
cat("ANSWER:", answer, "\n")
## ANSWER: 0.6167
# Given/Parameters
n <- 13
p <- 0.20
x <- 0:n
# Calculate probabilities of all outcomes
probabilities <- dbinom(x, size = n, prob = p)
# More than 3; less than 6
highlight_region <- x > 3 & x < 6
#Extracting desired values (9-12)
highlight_x <- x[highlight_region]
highlight_prob <- probabilities[highlight_region]
# Plot
# Empty plot first
plot(x, probabilities,
type = "n",
main = "Defective Light Bulbs\n(Binomial Distribution: n=13, p=0.20)",
xlab = "Defective Bulbs",
ylab = "Probability",
las = 1)
# Vertical lines and points
segments(x, 0, x, probabilities, lwd = 3, col = "blue")
points(x, probabilities, pch = 19, col = "blue")
# Overlay
segments(highlight_x, 0, highlight_x, highlight_prob, lwd = 3, col = "red")
points(highlight_x, highlight_prob, pch = 19, col = "red")
# Let's do some math
prob_cdf <- pbinom(5, n, p) - pbinom(3, n, p)
# Answer
answer <- round(prob_cdf, 4)
cat("ANSWER:", answer, "\n")
## ANSWER: 0.2226
** Thoughts:** Definitely a Poisson Distribution because we are counting orders within a certain timeframe and that each event is independent.
# lambda
lambda <- 4.2 # The average rate of special orders
# What should the size of the vector be? 10, 15, 30?
x <- 0:15
# Probabilities
probabilities <- dpois(x, lambda)
# "No more than 3" (ie x <= 3)
highlight_region <- x <= 3
# Highlight values
highlight_x <- x[highlight_region]
highlight_prob <- probabilities[highlight_region]
# Start with an empty plot
plot(x, probabilities,
type = "n",
main = "Special Orders Per Day\n(Poisson Distribution: λ = 4.2)",
xlab = "Special Orders",
ylab = "Probability",
las = 1)
# Vertical lines and points
segments(x, 0, x, probabilities, lwd = 3, col = "blue")
points(x, probabilities, pch = 19, col = "blue")
# Overlay
segments(highlight_x, 0, highlight_x, highlight_prob, lwd = 3, col = "red")
points(highlight_x, highlight_prob, pch = 19, col = "red")
# Math
cdf <- ppois(3, lambda)
# Answer
answer <- round(cdf, 4)
cat("ANSWER:", answer)
## ANSWER: 0.3954
Thoughts: This is a hypergeometric Distribution because we are sampling without replacement. There are only 2 types (contaminated and non-contaminated)
#What I know
N <- 17 # Population
C <- 6 # Contaminated bottles
n <- 3 # Sample size
good <- N - C # Non-contaminated bottles
# Vectors
x <- 0:3
# Probabilities
probabilities <- dhyper(x, C, good, n)
# "Less than 2" (ie x<2)
highlight_region <- x < 2
# Highlight values for plotting
highlight_x <- x[highlight_region]
highlight_prob <- probabilities[highlight_region]
#Empty plot to start
plot(x, probabilities,
type = "n",
main = "Contaminated Drug Bottles\n(Hypergeometric Distribution: N=17, C=6, n=3)",
xlab = "Contaminated Bottles",
ylab = "Probability",
xlim = c(-0.5, 3.5),
las = 1)
# Vertical Lines and points
segments(x, 0, x, probabilities, lwd = 6, col = "blue")
points(x, probabilities, pch = 19, col = "blue")
# Overlay
segments(highlight_x, 0, highlight_x, highlight_prob, lwd = 5, col = "red")
points(highlight_x, highlight_prob, pch = 19, col = "red")
text(x, probabilities,
labels = round(probabilities, 4),
pos = 2,
cex = 1.1,
font = 2)
# Math
cdf <- phyper(1, C, good, n)
# Answer
answer <- round(cdf, 4)
cat("ANSWER:", answer)
## ANSWER: 0.7279
Thoughts: Hypergeometric Distribution because we are sampling without replacement. 2 results (over 50 and under 50)
# What I know
N <- 25
K <- 6
n <- 6
under50 <- N - K
# Possible employees over 50
x <- 0:6
# Hypergeometric calc
probabilities <- dhyper(x, K, under50, n)
# Highlight values for plotting
highlight_x <- x[highlight_region]
highlight_prob <- probabilities[highlight_region]
# Setup empty plot area
plot(x, probabilities,
type = "n",
main = "Dismissals by Age\n(Hypergeometric Distribution: N=25, K=6 over 50, n=6 dismissed)",
xlab = "Employees Over 50 in Dismissed Group",
ylab = "Probability",
las = 1)
# Vertical Lines and points
segments(x, 0, x, probabilities, lwd = 6, col = "blue")
points(x, probabilities, pch = 19, col = "blue")
# Overlay
segments(highlight_x, 0, highlight_x, highlight_prob, lwd = 5, col = "red")
points(highlight_x, highlight_prob, pch = 19, col = "red")
# Add labels
text(x, probabilities,
labels = round(probabilities, 4),
pos = 2,
font = 2)
# Math
cdf <- 1 - phyper(1, K, under50, n)
# Answer
answer <- round(cdf, 4)
cat("ANSWER:", answer)
## ANSWER: 0.4529
mu <- 800
variance <- 90000
sigma <- sqrt(variance)
lower <- 1040
upper <- 1460
# 4 sigmas good enough?
x_val <- seq(mu - (4 * sigma), mu + (4 * sigma), length = 200)
y_val <- dnorm(x_val, mu, sigma)
# PLOT????!!!
plot(x_val, y_val, type = "l",
main = "Normal Distribution of Steer Weights",
xlab = "Weight (lbs)",
ylab = "Probability (Density"
)
abline(v = mu, b = 1, col = "blue")
# Math
prob <- pnorm(upper, mu, sigma) - pnorm(lower, mu, sigma)
cat("The probability of a steer weighing between 1040 and 1460 lbs is:",
round(prob, 4))
## The probability of a steer weighing between 1040 and 1460 lbs is: 0.198
mu <- 106
sigma <- 4
lower <- 103
upper <- 111
x_val <- seq(mu - (4 * sigma), mu + (4 * sigma), length = 200)
y_val <- dnorm(x_val, mu, sigma)
plot(x_val, y_val, type = "l",
main = "Ball Bearing Diameters\n(Normal Distribution: μ=106 mm, σ=4 mm) ",
xlab = "Diameter (mm)",
ylab = "Probability (Density)"
)
abline(v = mu, b = 1, col = "blue")
prob <- pnorm(upper, mu, sigma) - pnorm(lower, mu, sigma)
cat("The probability of a steer weighing between 1040 and 1460 lbs is:", round(prob, 4))
## The probability of a steer weighing between 1040 and 1460 lbs is: 0.6677
mu <- 3.34
sigma <- 0.07
lower <- 0.03
upper <- 0.97
# qnorm finds the value at a specific probability
lower_limit <- qnorm(0.03, mu, sigma)
upper_limit <- qnorm(0.97, mu, sigma)
#Let's think about the plot
x <- seq(mu - (4 * sigma), mu + (4 * sigma), length = 200)
y <- dnorm(x, mu, sigma)
plot(x, y, type = "l", col = "blue",
main = "Nail Length Limits",
xlab = "Nail Length (cm)",
ylab = "Probability Density"
)
abline(v = lower_limit, col="red", lty=2)
abline(v = upper_limit, col="red", lty=2)
cat("ANSWER: Nails shorter than:", round(lower_limit, 4), "cm and nails longer than", round(upper_limit, 4), "cm, will be rejected")
## ANSWER: Nails shorter than: 3.2083 cm and nails longer than 3.4717 cm, will be rejected
mu <- 75.8
sigma <- 8.1
# For an A
pA <- 0.91
# min score for an A
minA <- qnorm(pA, mu, sigma)
#Distribution
x <- seq(mu - (4 * sigma), mu + (4 * sigma), length = 200)
y <- dnorm(x, mu, sigma)
#Plot
plot(x, y, type = "l", col = "green",
main = "Test Score Distribution",
xlab = "Test Score",
ylab = "Probability Density")
abline(v = minA, col="red", lty=2)
text(minA, 0.01, round(minA), col="red", font=2)
cat("The minimum score for an A is:", round(minA))
## The minimum score for an A is: 87
# Binomial parameters
n <- 155
p <- 0.61
target <- 96
# Normal approximation
mu <- n * p
sigma <- sqrt(n * p * (1 - p))
mu
## [1] 94.55
sigma
## [1] 6.072438
# Continuity Correction
lower <- target - 0.5
upper <- target + 0.5
lower
## [1] 95.5
upper
## [1] 96.5
# Calculate using normal approximation
normal_approx <- pnorm(upper, mu, sigma) - pnorm(lower, mu, sigma)
normal_approx
## [1] 0.06378274
# Calculate z-scores
#z_lower <- (lower - mu) / sigma
#z_upper <- (upper - mu) / sigma
# Begin plotting normal approximation
x_min <- mu - (4 * sigma)
x_max <- mu + (4 * sigma)
x_val <- seq(x_min, x_max, length.out = 500)
prob_densities <- dnorm(x_val, mu, sigma)
# Set up the plot
plot(x_val, prob_densities,
type = "l",
lwd = 2,
col = "black",
main = "Normal Approximation to Binomial Distribution\nComputer Crashes (n=155, p=0.61)",
xlab = "Computers Not Crashing",
ylab = "Probability Density",
las = 1)
answer <- round(normal_approx, 4)
cat("Answer using Normal Approximation P(X = 96): is APPROXIMATELY:", answer)
## Answer using Normal Approximation P(X = 96): is APPROXIMATELY: 0.0638
```