# Load the packages
suppressMessages(suppressWarnings(library(tidyverse)))
suppressMessages(suppressWarnings(library(gridExtra)))

Laws of probability

  • Probability of event A and event B
    • \(\text{Pr}(A \text{ and } B) = \text{Pr}(A) â‹… \text{Pr}(B)\) (Independent)
  • Probability of event A or event B
    • \(\text{Pr}(A \text{ or } B) = \text{Pr}(A) + \text{Pr}(B) - \text{Pr}(A \text{ and } B)\)
    • \(\text{Pr}(A \text{ or } B \text{ or } C) = \text{Pr}(A) + \text{Pr}(B) + \text{Pr}(C) - \text{Pr}(A \text{ and } B) - \text{Pr}(B \text{ and } C) - \text{Pr}(A \text{ and } C) + \text{Pr}(A \text{ and } B \text{ and } C)\)
  • Multiplying a random variable
    • \(E[k \cdot X] = k \cdot E[X]\)
    • \(\text{Var}(k \cdot X) = k^2 \cdot \text{Var}(X)\)
  • Adding two random variables
    • \(E[X + Y] = E[X] + E[Y]\) (Does not have to be independent)
    • \(\text{Var}[X + Y] = \text{Var}[X] + \text{Var}[Y]\) (Independent)

A&B A|B

# Laws of probability
A <- rbinom(100000, 1, .4)
B <- rbinom(100000, 1, .2)

Pr_A <- 0.4
Pr_B <- 0.2

# Probability of event A and event B
mean(A & B)
## [1] 0.08028
Pr_A * Pr_B
## [1] 0.08
# Probability of event A or event B
mean(A | B)
## [1] 0.52357
Pr_A + Pr_B - Pr_A * Pr_B
## [1] 0.52
X <- rbinom(100000, 10, .6)
Y <- rbinom(100000, 10, .7)

mean(X <= 4 | Y <= 4)
## [1] 0.20564
prob_X_less <- pbinom(4, 10, .6)
prob_Y_less <- pbinom(4, 10, .7)
prob_X_less + prob_Y_less - prob_X_less * prob_Y_less
## [1] 0.2057164

Multiplying a random variable

# Multiplying a random variable
X <- rbinom(100000, 20, .1)

mean(X)
## [1] 1.99767
mean(5 * X)
## [1] 9.98835
20 * .1 * 5
## [1] 10
var(X)
## [1] 1.801343
var(5 * X)
## [1] 45.03356
20 * .1 * (1 - .1) * 25
## [1] 45

Adding two random variables

# Adding two random variables
X <- rbinom(100000, 10, .5)
Y <- rbinom(100000, 100, .2)
Z <- X + Y

mean(X)
## [1] 4.99617
mean(Y)
## [1] 20.00215
mean(Z)
## [1] 24.99832
var(X)
## [1] 2.487
var(Y)
## [1] 15.92296
var(Z)
## [1] 18.43654

Bayesian statistics

  • Bayesian statistics - The process of updating our beliefs after seeing evidence
  • Posterior probability - The revised probability of an event occurring after taking into consideration new information
  • Prior probability - The probability of an event before new data is collected
  • Conditional probability - \(P(B|A)\)
    • \(P(B|A) = P(A \cap B) / P(A)\)
  • Bayes’ theorem
    • \(Pr(A|B) = \frac{Pr(B|A)Pr(A)}{Pr(B|A)Pr(A) + Pr(B|not A)Pr(notA)}\)
      • \(A\) = Biased
      • \(B\) = 14 Heads
# Posterior probability
# Suppose you have a coin that is equally likely to be fair (50% heads) or biased (75% heads). You then flip the coin 20 times and see 14 heads
# Simulate 50000 cases of flipping 20 coins from fair and from biased
fair <- rbinom(50000, 20, .5)
biased <- rbinom(50000, 20, .75)

# How many fair cases, and how many biased, led to exactly 14 heads?
fair_14 <- sum(fair == 14)
biased_14 <- sum(biased == 14)

# Find the fraction of biased coins that are 14 out of all coins that were 14
# Pr(Biased|14 Heads)
biased_14 / (fair_14 + biased_14)
## [1] 0.8214111
# Prior probability
# We see 14 out of 20 flips are heads, and start with a 80% chance the coin is fair and a 20% chance it is biased to 75%
# Simulate 8000 cases of flipping a fair coin, and 2000 of a biased coin
fair_flips <- rbinom(8000, 20, .5)
biased_flips <- rbinom(2000, 20, .75)

# Find the number of cases from each coin that resulted in 14/20
fair_14 <- sum(fair_flips == 14)
biased_14 <- sum(biased_flips == 14)

# Use these to estimate the posterior probability
fair_14 / (fair_14 + biased_14)
## [1] 0.4691152
# Bayes' theorem
# Use probability densities instead of simulation
# Use dbinom to calculate the probability of 14/20 heads with fair or biased coin
# Pr(14 Heads∣Fair) ⋅ Pr(Fair)
probability_fair <- dbinom(14, 20, .5) * .8
# Pr(14 Heads∣Biased) ⋅ Pr(Biased)
probability_biased <- dbinom(14, 20, .75) * .2

# Calculate the posterior probability that the coin is fair
probability_fair / (probability_fair + probability_biased)
## [1] 0.4672136

Probability distributions

Binomial distribution

  • \(X \sim B(\text{size}, p)\)
  • dbinom(x, size, prob, log = FALSE) - Probability density
  • pbinom(q, size, prob, lower.tail = TRUE, log.p = FALSE) - Cumulative density
  • qbinom(p, size, prob, lower.tail = TRUE, log.p = FALSE)
  • rbinom(n, size, prob)
  • Expected value (mean): \(E[X] = \text{size} \times p\)
  • Variance: \(Var(X) = \text{size} \times p (1 − p)\)
rbinom(1, 1, .5)
## [1] 1
rbinom(10, 1, .5)
##  [1] 0 1 1 1 1 0 1 0 1 1
rbinom(1, 10, .5)
## [1] 4
rbinom(10, 10, .5)
##  [1] 5 6 6 6 3 8 5 5 6 8
rbinom(10, 10, .8)
##  [1]  8  7  7  7 10  8  7  8  5  9
flips <- data.frame(n = rbinom(100000, 10, .5))

ggplot(flips, aes(n)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Pr(X = 5)
mean(rbinom(100000, 10, .5) == 5)
## [1] 0.246
dbinom(5, 10, .5)
## [1] 0.2460938
# Pr(X <= 4)
mean(rbinom(100000, 10, .5) <= 4)
## [1] 0.37956
pbinom(4, 10, .5)
## [1] 0.3769531
# P (heads >= 5)
pbinom(4, 10, .3, lower.tail = FALSE)
## [1] 0.1502683
mean(rbinom(10000, 10, .3) >= 5)
## [1] 0.1505
# Expected value
25 * .3
## [1] 7.5
mean(rbinom(10000, 25, .3))
## [1] 7.4666
# Variance
25 * .3 * (1 - .3)
## [1] 5.25
var(rbinom(10000, 25, .3))
## [1] 5.371337

Normal distribution

  • \(X \sim N(\mu, \sigma)\)
  • Gaussian distribution
  • Bell curve
  • dnorm(x, mean = 0, sd = 1, log = FALSE)
  • pnorm(q, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)
  • qnorm(p, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)
  • rnorm(n, mean = 0, sd = 1)
  • Expected value (mean): \(E[X] = \mu\)
  • Variance: \(Var(X) = \sigma^2\)
  • Binomial with a very large size - the result approximates a normal distribution
    • \(\mu = \text{size} \times p\)
    • \(\sigma = \sqrt{\text{size} \times p (1 − p)}\)
# Normal distribution
# Normal approximation to the binomial
binomial <- rbinom(100000, 1000, .2)
expected_value <- 1000 * .2
variance <- 1000 * .2 * (1 - .2)
stdev <- sqrt(variance)
normal <- rnorm(100000, expected_value, stdev)

compare_histograms <- function(variable1, variable2) {
    x <- data.frame(value = variable1, variable = "Variable 1")
    y <- data.frame(value = variable2, variable = "Variable 2")

    options(repr.plot.width = 10, repr.plot.height = 4)
    ggplot(rbind(x, y), aes(value)) +
      geom_histogram() +
      facet_wrap(~variable, nrow = 1)
}

compare_histograms(binomial, normal)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Comparing the cumulative density of the binomial
# Simulations
binom_sample <- rbinom(100000, 1000, .2)
normal_sample <- rnorm(100000, expected_value, stdev)

mean(binom_sample <= 190)
## [1] 0.22736
mean(normal_sample <= 190)
## [1] 0.21345
# Cumulative density
pbinom(190, 1000, .2)
## [1] 0.2273564
pnorm(190, expected_value, stdev)
## [1] 0.2145977

Poisson distribution

  • \(X \sim P(\lambda)\)
  • dpois(x, lambda, log = FALSE)
  • ppois(q, lambda, lower.tail = TRUE, log.p = FALSE)
  • qpois(p, lambda, lower.tail = TRUE, log.p = FALSE)
  • rpois(n, lambda)
  • Expected value (mean): \(E[X] = \lambda\)
  • Variance: \(Var(X) = \lambda\)
  • Used when modeling rare events as counts, and don’t care about the total
  • Binomial with large n and small p - the result approximates a Poisson distribution
    • \(\lambda = \text{size} \times p\)
  • Adding multiple Poisson distributions together, the result is also a Poisson distribution
# Poisson distribution
# Possion approximation to the binomial
binom_sample <- rbinom(100000, 1000, .002)
expected_value <- 1000 * .002
poisson_sample <- rpois(100000, expected_value)

compare_histograms(binom_sample, poisson_sample)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Comparing the density of the binomial
# Simulations
binom_sample <- rbinom(100000, 1000, .002)
poisson_sample <- rpois(100000, expected_value)

mean(binom_sample == 0)
## [1] 0.13565
mean(poisson_sample == 0)
## [1] 0.13289
# Density
dbinom(0, 1000, .002)
## [1] 0.1350645
dpois(0, expected_value)
## [1] 0.1353353
# Sum of two Poisson variables
# Use compare_histograms to compare Z to the Poisson(3)
compare_histograms(rpois(100000, 1) + rpois(100000, 2), rpois(100000, 3))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Geometric distribution

  • \(X \sim G(p)\)
  • dgeom(x, prob, log = FALSE)
  • pgeom(q, prob, lower.tail = TRUE, log.p = FALSE)
  • qgeom(p, prob, lower.tail = TRUE, log.p = FALSE)
  • rgeom(n, prob)
  • A discrete probability that the first occurrence of success requires k+1 independent trials, each with success probability p
  • Expected value (mean): \(E[X] = 1/p - 1\) (Before success)
  • Variance: \(Var(X) = \frac{1 - p}{p^2}\)
# Geometric distribution
# Simulating waiting for heads
flips <- rbinom(100, 1, .2)
which(flips == 1)[1]
## [1] 3
# Replicating simulations
replications <- replicate(100000, which(rbinom(100, 1, .2) == 1)[1] - 1)

# Simulating from the geometric distribution
geom_sample <- rgeom(100000, .2)
compare_histograms(replications, geom_sample)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mean(replications)
## [1] 3.99653
mean(geom_sample)
## [1] 4.00234
# Probability that the machine is still working across the first 30 days
still_working <- pgeom(0:29, .1, lower.tail = FALSE)
head(still_working)
## [1] 0.900000 0.810000 0.729000 0.656100 0.590490 0.531441
qplot(1:30, still_working)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.