Markdown Author: Jessie Bell, 2023
Libraries Used: none
Answers: Turquoise
plot(dbinom(0:10, 10, 0.2), col="#6192ab", pch=15)+
points(dbinom(0:10, 10, 0.3), col="pink", pch=10)+
points(dbinom(0:10, 10, 0.4), col="#d7add9", pch=8)
## integer(0)
#The most likely number of chicks is 0.7*80
chickies <- c(0.7*80, "chickies will hatch each month")
chickies
## [1] "56" "chickies will hatch each month"
plot(dbinom(0:80, 80, 0.7)) #distribution of all possible probabilities
plot(dpois(0:10, 4)) #0:10 possible events shown
# with 4 successful events.
plot(dpois(0:12, 10))
dnorm(4, 5, 2)
## [1] 0.1760327
plot(dnorm(0:10, 5, 2))
curve(dnorm(x,66,3),40,90)
curve(dnorm(x,66,3),40,90)|>
abline(v=64, col="#d7add9")|>
abline(v=70, col="#d7add9")
pnorm(64, 66, 3)
## [1] 0.2524925
pnorm(70, 66, 3, lower.tail = F)
## [1] 0.09121122
a <- pnorm(64, mean = 66, sd = 3, lower.tail = TRUE)
b <- pnorm(70, mean = 66, sd = 3, lower.tail = F)
plot(dbinom(0:6, 6, 0.5), col="#007c7c", pch=15, ylim=c(0,1))
plot(pbinom(0:6, 6, 0.5), col="#a328a5", pch=15, ylim=c(0,1))
plot(dbinom(0:6, 6, 0.2), col="#57daf6", pch=8, ylim=c(0,1))
n <- 0:8
plot(pbinom(n, 8, 0.5),
ylab = "Probability",
xlab = "Number of heads",
col = "#d4f054",
pch=16)+
text(6, 0.8, "CDF", col="#d4f054")+
points(n, dbinom(n, 8, 0.5), col="#2f7db4", pch=16)+
text(6, 0.3, "PDF", col="#2f7db4")+
text(6, 0.3, "PDF", col="#2f7db4")
## integer(0)
sum(dbinom(40:50, 50, 0.8))
## [1] 0.5835594
dbinom(25, 50, 0.8)
## [1] 1.602445e-06
plot(dbinom(0:50, 50, 0.8))|>
abline(v=23, col="#ba7c97")
Notes: * p = probability * q = quantiles * d = density * r = random
plot(pbinom(1, 4, 0.5)) #y axis is between 0 and 1
#probability distribution (CDF)
plot(qbinom(1, 4, 0.5)) #y axis is between 2.5 and 5
#quantile (inverse of CDF)
plot(dbinom(1, 4, 0.5)) #y axis is between 0 and 1
# density (PDF)
plot(rbinom(1, 4, 0.5)) #generates random y axis, not between 0 and 1
#random variable having a specified distribution (I think similar to normalizing your data)
dbinom(x, size, prob, log=F)
Binomial Distribution: a discrete distribution that is used to calculate the probability of a 0 or 1 event (i.e. success or failure, yes or no, etc)
dbinom(0, 4, 0.5) #vector of quantile 0, n=4, prob = 50%
## [1] 0.0625
dbinom(1, 4, 0.5) #vector of quantile 1, n=4, prob = 50%
## [1] 0.25
dbinom(0:4, 4, 0.5) #vector of quantile 0, n=4, prob = 50%
## [1] 0.0625 0.2500 0.3750 0.2500 0.0625
dbinom(4, 50, 0.4)# successful, n, prob
## [1] 3.676981e-07
#Tells you what Pr(X=x) (probability of observing value equal to x)
# The probability that only 4 students will come to class with a water bottle out of 50 total students is [1] 3.676981e-07
dbinom(1,10,0.5)
## [1] 0.009765625
# The probability of only getting one heads while flipping a coin 10 times is [1] 0.009765625.
plot(dbinom(0:50, 50, 0.4))
#make a figure that visualizes all possible probabilities the following scenario:
#A total of 30 events, and the probability of the desired event is .3.
# Grid of X-axis values
x <- -10:30
# size = 80, prob = 0.2
plot(dbinom(x, size = 10, prob = 0.2), type = "h", lwd = 2,
main = "Binomial probability function",
ylab = "P(X = x)", xlab = "Number of successes")
# size = 80, prob = 0.3
lines(dbinom(x, size = 10, prob = 0.3), type = "h",
lwd = 2, col = rgb(1,0,0, 0.7))
# size = 80, prob = 0.4
lines(dbinom(x, size = 80, prob = 0.4), type = "h",
lwd = 2, col = rgb(0, 1, 0, 0.7))
# Add a legend
legend("topright", legend = c("80 0.2", "80 0.3", "80 0.4"),
title = "size prob", title.adj = 0.95,
lty = 1, col = 1:3, lwd = 2, box.lty = 0)
my.x <- 1:50 # a vector of x values from 1 - 50
my.y <- 0.4*my.x + rnorm(50) # a vector of y values with some random error added.
plot(my.y~my.x)
# add labels
plot(my.y ~ my.x,
xlab = 'My X is the Best',
ylab = "Nicely Cooperative Variable")
rm(list=ls()) # remove all objects from the global environment
#Using pnorm
#first lets use pnorm to calculate the proportion of area IN THE TAILS under the normal curve (lower is neg, upper is pos)
pnorm(-2, lower.tail = T) #this gives us the area of the curve that is to the left of -2
## [1] 0.02275013
pnorm(-2, lower.tail = F) #now we have the upper tail airea
## [1] 0.9772499
### recall that the sum of the prob of all outcomes is equal to 1
value1 <- pnorm(-2, lower.tail = T)
value2 <- pnorm(-2, lower.tail = F)
value1+value2
## [1] 1
#incredible!
pnorm(-2, lower.tail = T) +pnorm(-2, lower.tail = F)
## [1] 1
#still 1! cool.
cool <- 1 - pnorm(-1, lower.tail = T) - pnorm(1, lower.tail = F)
# What about the proportion between -2 and 1?
pnorm(-1) - pnorm(-2)
## [1] 0.1359051
If you are in the 25th percentile, it means that 25% of the population is lower than you. If we want to find the value of 25% - we will use qnorm()
qnorm(0.25, mean=4, sd=1.5, lower.tail=T)
## [1] 2.988265
rnorm(5) #gives 5 points that are normally distributed over x values
## [1] 0.4158873 -0.3724822 1.1962670 -1.3250621 -2.2539667
#run it again and again to see how it changes!
hist(rnorm(5))
hist(rnorm(10))
HOLD ON A SECOND! Those do not look normal. Hmm….
What is the minimum sample size needed for getting that perfect bell shape?
hist(rnorm(30))
hist(rnorm(50))
hist(rnorm(100))
hist(rnorm(1000))
1000 looks pretty good!
Here lets make plots of normal probability density distributions.
my.mean <- 4
my.sd <- 1.5
# Create a vector of 100 values that range from (-4*sd + mean) to (4*sd + mean)
x = seq(-4,4,length=100)*my.sd + my.mean
# The first part of this code (seq()) creates a vector of 100 numbers between -4 and 4
# The last part multiplies those by the given sd and mean.
y <- dnorm(x, my.mean, my.sd)
head(x)
## [1] -2.000000 -1.878788 -1.757576 -1.636364 -1.515152 -1.393939
head(y)
## [1] 8.922015e-05 1.228635e-04 1.680920e-04 2.284732e-04 3.085231e-04
## [6] 4.139082e-04
The table above shows the random numbers that have been generated using the seq() function. We can use these to make a curve now.
plot(x, y)
plot(x, y, type="l", #l for LINE (LOWER CASE L)
ylab = 'Probability Density',
xlab = 'X = Values of my normal curve',
main = 'This curve represents the probability density for X ~ N(4,1.5)')
plot(x, y, type="l", #l for LINE (LOWER CASE L)
ylab = 'Probability Density',
xlab = 'X = Values of my normal curve',
main = 'This curve represents the probability density for X ~ N(4,1.5)') |> #syntax needed between the two
abline(v = 2, col="red") #this adds line to plot above
Create mean, sd, and x and y sequences
my.mean = 4
my.sd = 1.5
x = seq(-4,4,length=100)*my.sd + my.mean
y = dnorm(x, my.mean, my.sd)
Next, create a vector of x + the reverse of x
xrev <- c(x, rev(x))
Next, create a vector of y + an equal number of 0s
yzers <- c(y, rep(0, length(y)))
Now you plot the curve and then call the polygon function to shade in a portion of the graph specified by the coordinates in xx and yy
plot(x, y, type="l",
ylab = 'Probability Density',
xlab = 'X = Some random variable',
main = 'This curve represents the probability density for X ~ N(4,1.5)')+
polygon(xrev,yzers, col="gray", border=NA)
## integer(0)
If want to shade the area under the curve that is greater than a value, you create new coordinates that reflect that portion.
grtr = 6
keep <- which(x > grtr)
xx2 <- c(xrev[keep],rev(xrev[keep]))
yy2 <- c(yzers[keep],rep(0,length(keep)))
plot(x, y, type="l",
ylab = 'Probability Density',
xlab = 'X = Some random variable',
main = 'Pr(X > 6), when X ~ N(4,1.5)')+
polygon(xx2,yy2,col="red",border = NA)
## integer(0)
Now shade the other side:
less = 2
keep <- which(x < less)
xx2 <- c(xrev[keep],rev(xrev[keep]))
yy2 <- c(yzers[keep],rep(0,length(keep)))
plot(x, y, type="l",
ylab = 'Probability Density',
xlab = 'X = Some random variable',
main = 'Pr(X < 2), when X ~ N(4,1.5)')+
polygon(xx2,yy2,col="pink",border = NA)
## integer(0)