https://www.jarad.me/courses/stat587Eng/labs/lab03/lab03.html

https://seankross.com/notes/dpqr/#:~:text=Although%20x%20represents%20the%20independent,the%20normal%20distribution%20with%20dnorm%20.

d: density function p: cumulative density function q: quantile function r: random sampling

library(tidyverse)
library(openintro)
library(palmerpenguins)
library(janitor)
library(interpretCI)
library(BSDA)
library(pwr)

# Consistent Randomization
set.seed(1)

# Remove scientific notation
options(scipen = 999) 
# Geom
#dgeom(x, prob, log = FALSE)
#pgeom(q, prob, lower.tail = TRUE, log.p = FALSE)
#qgeom(p, prob, lower.tail = TRUE, log.p = FALSE)
#rgeom(n, prob)

# Expo
#dexp(x, rate = 1, log = FALSE)
#pexp(q, rate = 1, lower.tail = TRUE, log.p = FALSE)
#qexp(p, rate = 1, lower.tail = TRUE, log.p = FALSE)
#rexp(n, rate = 1)

#  Binom
#dbinom(x, size, prob, log = FALSE)
#pbinom(q, size, prob, lower.tail = TRUE, log.p = FALSE)
#qbinom(p, size, prob, lower.tail = TRUE, log.p = FALSE)
#rbinom(n, size, prob)

# Poisson
#dpois(x, lambda, log = FALSE)
#ppois(q, lambda, lower.tail = TRUE, log.p = FALSE)
#qpois(p, lambda, lower.tail = TRUE, log.p = FALSE)
#rpois(n, lambda)

# Uniform
#dunif(x, min = 0, max = 1, log = FALSE)
#punif(q, min = 0, max = 1, lower.tail = TRUE, log.p = FALSE)
#qunif(p, min = 0, max = 1, lower.tail = TRUE, log.p = FALSE)
#runif(n, min = 0, max = 1)

# Normal
#dnorm(x, mean = 0, sd = 1, log = FALSE)
#pnorm(q, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)
#qnorm(p, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)
#rnorm(n, mean = 0, sd = 1)

# t
#dt(x, df, ncp, log = FALSE)
#pt(q, df, ncp, lower.tail = TRUE, log.p = FALSE)
#qt(p, df, ncp, lower.tail = TRUE, log.p = FALSE)
#rt(n, df, ncp)
Binomial

Example:

# Simulate data
coin_toss_1 <- rbinom(100, 1, 0.5)

janitor::tabyl(coin_toss_1) %>% 
  as.data.frame() %>% rename(`heads 1/0` = 1,
                             count = 2)
##   heads 1/0 count percent
## 1         0    52    0.52
## 2         1    48    0.48
# Compute P(45 < X < 55) for X Binomial(100, 0.5)
sum(dbinom(46:54, 100, 0.5))
## [1] 0.6317984
# Exactly 3 heads from 10 coin tosses
dbinom(3, 10, 0.2)
## [1] 0.2013266
# Compute P(1 < X < 50) for X Binomial(100, 0.5)
sum(dbinom(x = 1:50, size = 100, p = 0.5))
## [1] 0.5397946
# Probability of tossing 50 heads or less out of 100
pbinom(50, size = 100, prob = 0.5)
## [1] 0.5397946
# Prob of tossing more than 50 heads out of 100
pbinom(50, size = 100, prob = 0.5, lower.tail = FALSE)
## [1] 0.4602054
success <- 0:100

plot(success, dbinom(success, 
                     size = 100, 
                     prob = 0.5),
     type='h')
title("PMF for a binomial distribution with size = 100 and prob = 0.5")

plot(0:100, pbinom(0:100, size = 100, prob = 0.5), 
     type = "l", 
     main = "CDF for binomial distribution with size = 100 and prob = 0.5")

Geometric (the number of failures in a binomial distribution before first success)
# Note that X in each function represents the number of failures before a success

# Example if success / heads occurs on the 3rd toss then x should be 2
dgeom(x = 2, p = 0.5)
## [1] 0.125
# Compute P(3 < X < 10) for X Geometric(0.5) tossing a head for the first time between the 3rd and 10th tosses
sum(dgeom(3:10, 0.5))
## [1] 0.1245117
# Compute P(1 < X < 50) for X Geometric(0.5) tossing a head for the first time between the 1st and 50th tosses
sum(dgeom(x = 1:50, p = 0.5))
## [1] 0.5
# Probability of tossing a head by the 4th toss
dgeom(3, prob = 0.2)
## [1] 0.1024
# Prob of it taking more than 3 tosses to toss a head
pgeom(2, prob = 0.5, lower.tail = FALSE)
## [1] 0.125
success <- 0:20

plot(0:10, dgeom(0:10, 
                     prob = 0.5),
     type='h')
title("PMF for a geometric distribution with prob = 0.5")

plot(0:20, pgeom(0:20, prob = 0.5), 
     type = "l", 
     main = "CDF for a geometric distribution with prob = 0.5")

Poisson

Example:

liverpool_goals <- rpois(100, 1.5)

hist(liverpool_goals, 
     breaks = 5,
     main = "Goals scored by Liverpool per game n = 100, Lambda = 1.5",
     xlab = "Goals per game")

# Compute P(3 < X < 10) for X Poisson(1.5) probability of scoring between 3 and 10 goals in a game
sum(dpois(3:10, lambda = 1.5))
## [1] 0.1911526
# Compute P(1 < X < 3) for X Poisson(1.5) probability of scoring between 1 and 3 goals in a game
sum(dpois(x = 1:3, lambda = 1.5))
## [1] 0.7112274
# Probability of scoring exactly 3 goals in a game
dpois(3, lambda = 1.5)
## [1] 0.1255107
# Prob of it taking more than 3 games to score
ppois(3, lambda = 1.5, lower.tail = FALSE)
## [1] 0.06564245
# PMF plot
plot(0:7, dpois(0:7, 
                     lambda = 1.5),
     type='h')
title("PMF for a Poisson distribution with lambda = 1.5")

# CDF Plot
plot(0:7, ppois(0:7, lambda = 1.5), 
     type = "l", 
     main = "CDF for a Poisson distribution with lambda = 1.5")

Exponential (time between events in Poisson process)
# Compute P(4 < X < 5) for X Exponential X ~ M(1.5) probability of the first goal happening between 4 and 5 games
1 - pexp(4, rate = 1.5) + 1 - pexp(5, rate = 1.5)
## [1] 0.003031837
# Probability of the first goal happening by the 3rd game
1 - pexp(2, rate = 1.5)
## [1] 0.04978707
# Probability of the first goal happening in the 1st game
1 - pexp(1, rate = 1.5)
## [1] 0.2231302
# Prob of it taking more than 3 games to score
pexp(3, rate = 1.5, lower.tail = FALSE)
## [1] 0.011109
# Prob of it taking more than 3 games to score
pexp(3, rate = 1.5)
## [1] 0.988891
# PMF plot
plot(0:7, dexp(0:7, 
                     rate = 1.5),
     type='h')
title("PMF for a Exponential distribution with rate = 1.5")

# CDF Plot
plot(0:7, pexp(0:7, rate = 1.5), 
     type = "l", 
     main = "CDF for a Exponential distribution with rate = 1.5")

Normal
# Simulated normal distribution
normal_dist <- rnorm(100, mean = 5, sd = 2)

# Simulated standard normal distribution
standard_normal_dist <- rnorm(100, mean = 0, sd = 1)

hist(normal_dist, main = "Normal Distribution, mu = 5, sd = 2")

hist(standard_normal_dist, main = "Normal Distribution, mu = 0, sd = 1")

PDF and CDF of a normal distribution

# PDF of normal dist with mean 5 and sd 2
curve(dnorm(x, mean = 5, sd = 2),
      from = -1, to = 11,
      main = "PDF for Normal Distrubution with mu = 5 and sd = 2")

curve(pnorm(x, mean = 5, sd = 2), 
      from = -1, to = 11,
      main = "CDF for Normal Distrubution with mu = 5 and sd = 2",
      ylab = "F(x)")

##### Probabilities of X

dnorm is the density function for a normal distribution of X with mean and sd

E.g for a

mean <- 5
st_dev <- 2


prob_list <- list()

x <- 1:9

for(i in seq_along(x)) {
  
  pnorm <- pnorm(i, mean = mean, sd = st_dev)
  dnorm <- dnorm(i, mean = mean, sd = st_dev)
  
  prob_list[[i]] <-
  
  data.frame(x = x[i],
   p_norm = round(pnorm, 4),
   d_norm = round(dnorm, 3)
   
  )

}

x_normal_probs <- bind_rows(prob_list) 

x_normal_probs
##   x p_norm d_norm
## 1 1 0.0228  0.027
## 2 2 0.0668  0.065
## 3 3 0.1587  0.121
## 4 4 0.3085  0.176
## 5 5 0.5000  0.199
## 6 6 0.6915  0.176
## 7 7 0.8413  0.121
## 8 8 0.9332  0.065
## 9 9 0.9772  0.027

Probability calculations for X on a normal distribution

mean <- 5
st_dev <- 2

# P(X < 3)
# The area to the left of 3
pnorm(3, mean = mean, sd = st_dev)
## [1] 0.1586553
# P(X > 3)
# The area to the right of 3
pnorm(3, mean = mean, sd = st_dev, lower.tail = FALSE)
## [1] 0.8413447
# P(3 < X < 5)
# The area between 3 and 5
dnorm_range <- function(lower_range, 
                         upper_range,
                         mean,
                         st_dev) {

1 - pnorm(lower_range, mean, st_dev) - pnorm(upper_range, mean, st_dev, lower.tail = FALSE)

}

dnorm_range(3, 5, mean, st_dev)
## [1] 0.3413447