https://www.jarad.me/courses/stat587Eng/labs/lab03/lab03.html
https://seankross.com/notes/dpqr/#:~:text=Although%20x%20represents%20the%20independent,the%20normal%20distribution%20with%20dnorm%20.
d: density function p: cumulative density function q: quantile function r: random sampling
library(tidyverse)
library(openintro)
library(palmerpenguins)
library(janitor)
library(interpretCI)
library(BSDA)
library(pwr)
# Consistent Randomization
set.seed(1)
# Remove scientific notation
options(scipen = 999)
# Geom
#dgeom(x, prob, log = FALSE)
#pgeom(q, prob, lower.tail = TRUE, log.p = FALSE)
#qgeom(p, prob, lower.tail = TRUE, log.p = FALSE)
#rgeom(n, prob)
# Expo
#dexp(x, rate = 1, log = FALSE)
#pexp(q, rate = 1, lower.tail = TRUE, log.p = FALSE)
#qexp(p, rate = 1, lower.tail = TRUE, log.p = FALSE)
#rexp(n, rate = 1)
# Binom
#dbinom(x, size, prob, log = FALSE)
#pbinom(q, size, prob, lower.tail = TRUE, log.p = FALSE)
#qbinom(p, size, prob, lower.tail = TRUE, log.p = FALSE)
#rbinom(n, size, prob)
# Poisson
#dpois(x, lambda, log = FALSE)
#ppois(q, lambda, lower.tail = TRUE, log.p = FALSE)
#qpois(p, lambda, lower.tail = TRUE, log.p = FALSE)
#rpois(n, lambda)
# Uniform
#dunif(x, min = 0, max = 1, log = FALSE)
#punif(q, min = 0, max = 1, lower.tail = TRUE, log.p = FALSE)
#qunif(p, min = 0, max = 1, lower.tail = TRUE, log.p = FALSE)
#runif(n, min = 0, max = 1)
# Normal
#dnorm(x, mean = 0, sd = 1, log = FALSE)
#pnorm(q, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)
#qnorm(p, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)
#rnorm(n, mean = 0, sd = 1)
# t
#dt(x, df, ncp, log = FALSE)
#pt(q, df, ncp, lower.tail = TRUE, log.p = FALSE)
#qt(p, df, ncp, lower.tail = TRUE, log.p = FALSE)
#rt(n, df, ncp)
Example:
# Simulate data
coin_toss_1 <- rbinom(100, 1, 0.5)
janitor::tabyl(coin_toss_1) %>%
as.data.frame() %>% rename(`heads 1/0` = 1,
count = 2)
## heads 1/0 count percent
## 1 0 52 0.52
## 2 1 48 0.48
# Compute P(45 < X < 55) for X Binomial(100, 0.5)
sum(dbinom(46:54, 100, 0.5))
## [1] 0.6317984
# Exactly 3 heads from 10 coin tosses
dbinom(3, 10, 0.2)
## [1] 0.2013266
# Compute P(1 < X < 50) for X Binomial(100, 0.5)
sum(dbinom(x = 1:50, size = 100, p = 0.5))
## [1] 0.5397946
# Probability of tossing 50 heads or less out of 100
pbinom(50, size = 100, prob = 0.5)
## [1] 0.5397946
# Prob of tossing more than 50 heads out of 100
pbinom(50, size = 100, prob = 0.5, lower.tail = FALSE)
## [1] 0.4602054
success <- 0:100
plot(success, dbinom(success,
size = 100,
prob = 0.5),
type='h')
title("PMF for a binomial distribution with size = 100 and prob = 0.5")
plot(0:100, pbinom(0:100, size = 100, prob = 0.5),
type = "l",
main = "CDF for binomial distribution with size = 100 and prob = 0.5")
The Geometric Distribution models the number of trials needed to achieve the first success in a sequence of independent Bernoulli trials
The outcome from these is modelled by a binomial distribution, each with the same probability of success.
The PMF gives the probability of achieving the first success on the kth trial.
100 tosses of a fair coin.
Head = success / 1
Tails = fail / 0
# Note that X in each function represents the number of failures before a success
# Example if success / heads occurs on the 3rd toss then x should be 2
dgeom(x = 2, p = 0.5)
## [1] 0.125
# Compute P(3 < X < 10) for X Geometric(0.5) tossing a head for the first time between the 3rd and 10th tosses
sum(dgeom(3:10, 0.5))
## [1] 0.1245117
# Compute P(1 < X < 50) for X Geometric(0.5) tossing a head for the first time between the 1st and 50th tosses
sum(dgeom(x = 1:50, p = 0.5))
## [1] 0.5
# Probability of tossing a head by the 4th toss
dgeom(3, prob = 0.2)
## [1] 0.1024
# Prob of it taking more than 3 tosses to toss a head
pgeom(2, prob = 0.5, lower.tail = FALSE)
## [1] 0.125
success <- 0:20
plot(0:10, dgeom(0:10,
prob = 0.5),
type='h')
title("PMF for a geometric distribution with prob = 0.5")
plot(0:20, pgeom(0:20, prob = 0.5),
type = "l",
main = "CDF for a geometric distribution with prob = 0.5")
Example:
liverpool_goals <- rpois(100, 1.5)
hist(liverpool_goals,
breaks = 5,
main = "Goals scored by Liverpool per game n = 100, Lambda = 1.5",
xlab = "Goals per game")
# Compute P(3 < X < 10) for X Poisson(1.5) probability of scoring between 3 and 10 goals in a game
sum(dpois(3:10, lambda = 1.5))
## [1] 0.1911526
# Compute P(1 < X < 3) for X Poisson(1.5) probability of scoring between 1 and 3 goals in a game
sum(dpois(x = 1:3, lambda = 1.5))
## [1] 0.7112274
# Probability of scoring exactly 3 goals in a game
dpois(3, lambda = 1.5)
## [1] 0.1255107
# Prob of it taking more than 3 games to score
ppois(3, lambda = 1.5, lower.tail = FALSE)
## [1] 0.06564245
# PMF plot
plot(0:7, dpois(0:7,
lambda = 1.5),
type='h')
title("PMF for a Poisson distribution with lambda = 1.5")
# CDF Plot
plot(0:7, ppois(0:7, lambda = 1.5),
type = "l",
main = "CDF for a Poisson distribution with lambda = 1.5")
# Compute P(4 < X < 5) for X Exponential X ~ M(1.5) probability of the first goal happening between 4 and 5 games
1 - pexp(4, rate = 1.5) + 1 - pexp(5, rate = 1.5)
## [1] 0.003031837
# Probability of the first goal happening by the 3rd game
1 - pexp(2, rate = 1.5)
## [1] 0.04978707
# Probability of the first goal happening in the 1st game
1 - pexp(1, rate = 1.5)
## [1] 0.2231302
# Prob of it taking more than 3 games to score
pexp(3, rate = 1.5, lower.tail = FALSE)
## [1] 0.011109
# Prob of it taking more than 3 games to score
pexp(3, rate = 1.5)
## [1] 0.988891
# PMF plot
plot(0:7, dexp(0:7,
rate = 1.5),
type='h')
title("PMF for a Exponential distribution with rate = 1.5")
# CDF Plot
plot(0:7, pexp(0:7, rate = 1.5),
type = "l",
main = "CDF for a Exponential distribution with rate = 1.5")
# Simulated normal distribution
normal_dist <- rnorm(100, mean = 5, sd = 2)
# Simulated standard normal distribution
standard_normal_dist <- rnorm(100, mean = 0, sd = 1)
hist(normal_dist, main = "Normal Distribution, mu = 5, sd = 2")
hist(standard_normal_dist, main = "Normal Distribution, mu = 0, sd = 1")
PDF and CDF of a normal distribution
# PDF of normal dist with mean 5 and sd 2
curve(dnorm(x, mean = 5, sd = 2),
from = -1, to = 11,
main = "PDF for Normal Distrubution with mu = 5 and sd = 2")
curve(pnorm(x, mean = 5, sd = 2),
from = -1, to = 11,
main = "CDF for Normal Distrubution with mu = 5 and sd = 2",
ylab = "F(x)")
##### Probabilities of X
dnorm is the density function for a normal distribution of X with mean and sd
E.g for a
mean <- 5
st_dev <- 2
prob_list <- list()
x <- 1:9
for(i in seq_along(x)) {
pnorm <- pnorm(i, mean = mean, sd = st_dev)
dnorm <- dnorm(i, mean = mean, sd = st_dev)
prob_list[[i]] <-
data.frame(x = x[i],
p_norm = round(pnorm, 4),
d_norm = round(dnorm, 3)
)
}
x_normal_probs <- bind_rows(prob_list)
x_normal_probs
## x p_norm d_norm
## 1 1 0.0228 0.027
## 2 2 0.0668 0.065
## 3 3 0.1587 0.121
## 4 4 0.3085 0.176
## 5 5 0.5000 0.199
## 6 6 0.6915 0.176
## 7 7 0.8413 0.121
## 8 8 0.9332 0.065
## 9 9 0.9772 0.027
Probability calculations for X on a normal distribution
mean <- 5
st_dev <- 2
# P(X < 3)
# The area to the left of 3
pnorm(3, mean = mean, sd = st_dev)
## [1] 0.1586553
# P(X > 3)
# The area to the right of 3
pnorm(3, mean = mean, sd = st_dev, lower.tail = FALSE)
## [1] 0.8413447
# P(3 < X < 5)
# The area between 3 and 5
dnorm_range <- function(lower_range,
upper_range,
mean,
st_dev) {
1 - pnorm(lower_range, mean, st_dev) - pnorm(upper_range, mean, st_dev, lower.tail = FALSE)
}
dnorm_range(3, 5, mean, st_dev)
## [1] 0.3413447