Is it binomial? Yes 1) Two possible outcomes - red or not red 2) Outcomes are exclusive? (Probably - it’s the favorite** color) 3) Counting buyers 4) Each is independent
# Define variables
n <- 20 # Number of trials
pi <- 0.5 # Probability of success
s <- 9:12 # Range of successes
# Plot the distribution
plot(x = 0:20,
y = dbinom(x = 0:20,
size = 20,
prob = .5
),
type = 'h',
main = 'Binomial Distribution (n=20, p=0.5)',
ylab = 'Probability',
xlab = '# Successes',
lwd = 3
)
# P(9 <= X <= 12|n = 20, pi = .5)
red <- round(sum(dbinom(x=s, size=n, prob= pi)),4)
cat("The probability that 9 to 12 selected buyers prefer red is",red)
## The probability that 9 to 12 selected buyers prefer red is 0.6167
Is it binomial? Yes 1) Two possible outcomes - defective or not 2) Outcomes are exclusive? - Yes 3) Counting bulbs 4) Each is independent
# Define variables
n <- 13 # Number of trials
pi <- 0.2 # Probability of success
x <- 4:5 # Range of successes
# Plot the Distribution
plot(x = 0:13, # x variable
y = dbinom(x = 0:13, # y variable
size = 13,
prob = .2
),
main = 'Binomial Distribution (n=13, p=0.2)', # title
ylab = 'Probability', # y axis label
xlab = '# Successes', # x axis label
type = 'h', # "h" for histogram-like vertical lines. Can try "l" if continuous distribution, or ignore
lwd = 6 # line widths, can be ignored
)
# P(3 < X < 6|n = 13, pi = .2)
bulb <- round(sum(dbinom(x=x, size=n, prob= pi)),4)
cat("The probability that 4 or 5 bulbs are broken is",bulb)
## The probability that 4 or 5 bulbs are broken is 0.2226
Not Binomial - so Poisson (# events during certain period)
# Define variables
m <- 4.2
o = 0:3 # 4 or less orders means we want the lower tail
# Plot the Distribution
range <- 0:10 # choose a large number arbitrarily for range of successes
plot(x = range,
y = dpois(x = range,
lambda = m
),
main = 'Poisson Distribution (lambda = 4.2)',
ylab = 'Probability',
xlab = '# Successes',
type = 'h',
lwd = 3
)
order <- round(sum(dpois(x = o,lambda = m)),4)
cat("The probability is",order)
## The probability is 0.3954
This is a hypergeometric distribution
# Define the variables
N <- 17 # Total Pop
n <- 3 # Sample Pop
S <- 6 # Total Success
F <- 11 # Total Failure
s <- 0:1 # Sample Success
# Plot the Distribution
# Apply dhyper function
x_dhyper <- 0:17
y_dhyper <- dhyper(x = x_dhyper, # success in sample / event of interest // the number of white balls drawn without replacement from an urn which contains both black and white balls.
m = S, # Success in Population // the number of white balls in the urn.
n = F, # Failure in Population // the number of black balls in the urn.
k = n # sample size // the number of balls drawn from the urn, hence must be in 0,1,...m+n
)
# Plot dhyper values
plot(x = x_dhyper,
y = y_dhyper,
type = 'h',
main = 'Hypergeometric Distribution (S = 6, F = 11, n = 3)',
ylab = 'Probability',
xlab = '# Successes',
lwd = 3
)
prob <- round(sum(dhyper(x=s,m=S,n=F,k=n)),4)
cat("The probability is",prob)
## The probability is 0.7279
This is a hypergeometric distribution
# Define the variables
N <- 25 # Total Pop
n <- 6 # Sample Pop
S <- 6 # Total Success
F <- N-S # Total Failure
s <- 2:6 # Sample Success
# Plot the Distribution
x_dhyper <- seq(from = 0,
to = 25,
by = 1
)
# Apply dhyper function
y_dhyper <- dhyper(x = x_dhyper, # success in sample / event of interest
m = S, # Success in Population
n = F, # Failure in Population
k = n # sample size
)
# Plot dhyper values
plot(x = x_dhyper,
y = y_dhyper,
type = 'h',
main = 'Hypergeometric Distribution (S = 6, F = 19, n = 6)',
ylab = 'Probability',
xlab = '# Successes',
lwd = 3
)
prob <- round(sum(dhyper(x=s,m=S,n=F,k=n)),4)
cat("The probability is",prob)
## The probability is 0.4529
Normal distribution (because it says so, but also animal characteristics are typically normal distributions)
# Define the variables
pop_m <- 800 # Mean
pop_var <- 90000 # Variance
pop_sd <- sqrt(pop_var) # Standard Dev
success <- 1040:1460 # Range of success
# Did this approach at first, but because it's continuous it's not the best
prob <- round(sum(dnorm(x=success,mean=pop_m,sd=pop_sd)),4)
#cat("The probability is",prob)
# CDF - use pnorm
upper <- 1460
lower <- 1040
prob <- pnorm(q=upper,mean=pop_m,sd=pop_sd) - pnorm(q=lower,mean=pop_m,sd=pop_sd)
prob <- round(prob,4)
cat("The better probability is",prob)
## The better probability is 0.198
Normal Distribution
# Define the variables
pop_m <- 106 # Mean
pop_sd <- 4 # Standard Dev
success <- 103:111 # Range of success
upper <- 111
lower <- 103
#### FOR GRAPHING
#define upper and lower bound
population_mean <- pop_m
population_sd <- pop_sd
lower_bound <- population_mean - population_sd
upper_bound <- population_mean + population_sd
#Create a sequence of 1000 x values based on population mean and standard deviation
x <- seq(from = -4,
to = 4,
length = 1000) * population_sd + population_mean
#create a vector of values that shows the height of the probability distribution
#for each value in x
y <- dnorm(x = x,
mean = population_mean,
sd = population_sd)
#plot normal distribution with customized x-axis labels
plot(x = x,
y = y,
type = "l",
lwd = 2,
axes = FALSE,
xlab = "",
ylab = ""
)
sd_axis_bounds <- 5
axis_bounds <- seq(from = -sd_axis_bounds * population_sd + population_mean,
to = sd_axis_bounds * population_sd + population_mean,
by = population_sd)
axis(side = 1, at = axis_bounds, pos = 0)
# CDF
prob <- pnorm(q=upper,mean=pop_m,sd=pop_sd) - pnorm(q=lower,mean=pop_m,sd=pop_sd)
prob <- round(prob,4)
cat("The better probability is",prob)
## The better probability is 0.6677
Normal Distribution
# Define the variables
pop_m <- 3.34 # Mean
pop_sd <- .07 # Standard Dev
u_p <- .97
l_p <- .03
#### FOR GRAPHING
#define upper and lower bound
population_mean <- pop_m
population_sd <- pop_sd
lower_bound <- population_mean - population_sd
upper_bound <- population_mean + population_sd
#Create a sequence of 1000 x values based on population mean and standard deviation
x <- seq(from = -4,
to = 4,
length = 1000) * population_sd + population_mean
#create a vector of values that shows the height of the probability distribution
#for each value in x
y <- dnorm(x = x,
mean = population_mean,
sd = population_sd)
#plot normal distribution with customized x-axis labels
plot(x = x,
y = y,
type = "l",
lwd = 2,
axes = FALSE,
xlab = "",
ylab = ""
)
sd_axis_bounds <- 5
axis_bounds <- seq(from = -sd_axis_bounds * population_sd + population_mean,
to = sd_axis_bounds * population_sd + population_mean,
by = population_sd)
axis(side = 1, at = axis_bounds, pos = 0)
# Quantile
cat("The lengths that separate the bottom & top 3% are ")
## The lengths that separate the bottom & top 3% are
round(qnorm(p=l_p,mean=pop_m,sd=pop_sd),2)
## [1] 3.21
round(qnorm(p=u_p,mean=pop_m,sd=pop_sd),2)
## [1] 3.47
A: Top 9% of scores B: Scores below the top 9% and above the bottom 63% C: Scores below the top 37% and above the bottom 17% D: Scores below the top 83% and above the bottom 8% F: Bottom 8% of scores
Scores on the test are normally distributed with a mean of 75.8 and a standard deviation of 8.1. Find the minimum score required for an A grade. Round your answer to the nearest whole number, if necessary. Normal Distribution
# Define the variables
pop_m <- 75.8 # Mean
pop_sd <- 8.1 # Standard Dev
percentile <- .91 # Minimum for an A
grade <- round(qnorm(p=percentile,mean=pop_m,sd=pop_sd),0)
cat("The minimum grade for an A is:",grade)
## The minimum grade for an A is: 87
Binomial But using Normal (need mean and sd)
# Define variables
n <- 155 # Number of trials
pi <- 0.61 # Probability of success
S <- 96 # Successes
pop_m <- n*pi
pop_sd <- sqrt(pi*(1-pi)*n)
good <- round(dnorm(x=S,mean=pop_m,sd=pop_sd),4)
cat("The probability is:",good)
## The probability is: 0.0639