library(tidyverse)
library(openintro)
library(palmerpenguins)
library(janitor)
library(interpretCI)
library(BSDA)
library(pwr)
# Consistent Randomization
set.seed(1)
# Remove scientific notation
options(scipen = 999)
Success would be calling heads
https://www.programmingr.com/examples/neat-tricks/sample-r-function/r-rbinom/
# Simulate 100 random coin tosses
coin_toss_1 <- rbinom(100, 1, 0.5)
# Simulate another 100 random coin tosses
coin_toss_2 <- rbinom(150, 1, 0.5)
# Probability Density Function
# Calculate the probability of getting 5 heads in 10 tosses
dbinom(5, size = 100, prob = 0.5)
## [1] 0.00000000000000000000005939138
# Cumulative probability function
# If we flip a fair coin 10 times, what is the probability of getting 5 or less heads?
pbinom(5, 100, 0.5)
## [1] 0.00000000000000000000006261623
# Need to set a cutoff score for a given point in the binomial distribution?
# The number of successes associated with that cumulative probability
qbinom(0.25, 10, .5)
## [1] 4
So in the coin tosses….
coin_toss_1 %>% tabyl()
## . n percent
## 0 52 0.52
## 1 48 0.48
coin_toss_2 %>% tabyl()
## . n percent
## 0 74 0.4933333
## 1 76 0.5066667
Confidence interval for proportions that 30 heads will be tossed from 30 out of 100 tosses
prop.test(x = 30,
n = 100,
p = 0.5,
alternative = "two.sided",
conf.level = 0.95)
##
## 1-sample proportions test with continuity correction
##
## data: 30 out of 100, null probability 0.5
## X-squared = 15.21, df = 1, p-value = 0.00009619
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.2145426 0.4010604
## sample estimates:
## p
## 0.3
The difference in proportions for our coin tosses
prop_coin_toss_1 <- length(coin_toss_1[coin_toss_1 == 1]) / length(coin_toss_1)
prop_coin_toss_2 <- length(coin_toss_2[coin_toss_2 == 1]) / length(coin_toss_2)
prop_diff <- prop_coin_toss_1 - prop_coin_toss_2
data.frame(prop_coin_toss_1, prop_coin_toss_2, prop_diff)
## prop_coin_toss_1 prop_coin_toss_2 prop_diff
## 1 0.48 0.5066667 -0.02666667
The confidence interval for the difference…
Given that the interval contains 0, based on our confidence level of 95% we can’t say that the difference in the proportions is ~ 0.06
prop_diff_ci <- propCI(n1 = length(coin_toss_1),
n2 = length(coin_toss_2),
p1 = prop_coin_toss_1,
p2 = prop_coin_toss_2,
alpha = 0.05 # Confidence = 1 - alpha
)
prop_diff_ci[["result"]] %>%
select(lower, upper, critical, z, pvalue)
## lower upper critical z pvalue
## 1 -0.1531166 0.09978322 1.959964 -0.4131314 0.6795103
https://www.econometrics-with-r.org/3-4-confidence-intervals-for-the-population-mean.html
A 95% confidence interval is given by using the t.test function and pulling out conf.int item.
Lets run this on a random sample of 30 and compare the mean interval to that of the “population” of palmers penguins:
bill_length <- penguins %>% drop_na() %>% select(bill_length_mm) %>% pull()
mean(bill_length)
## [1] 43.99279
sd(bill_length)
## [1] 5.468668
#hist(bill_length)
# If we assume for a 2nd that
t.test(sample(bill_length, size = 30))$"conf.int"
## [1] 40.76406 45.57594
## attr(,"conf.level")
## [1] 0.95
#* Population means (µ −, µ+)
We can do a quick 1 sample hypothesis test on whether the mean in the sample is different to the population mean 43.99279.
Below p value of 0.1029 suggests there is no evidence against the null hypothesis that the population mean is 43.99279.
t.test(x = sample(bill_length, size = 30),
mu = mean(bill_length))
##
## One Sample t-test
##
## data: sample(bill_length, size = 30)
## t = -0.020686, df = 29, p-value = 0.9836
## alternative hypothesis: true mean is not equal to 43.99279
## 95 percent confidence interval:
## 42.04938 45.89728
## sample estimates:
## mean of x
## 43.97333
Confidence interval for the Poisson parameter (λ −, λ+) when λn is at least 30
Let’s say Liverpool score 1.5 goals per game / 90 minutes over 100 games
liverpool_goals <- rpois(100, 1.5)
goals <- 0:5
plot(goals,
dpois(goals, lambda = 1.5),
type = 'h',
main = 'Poisson PMF Lamda 1.5',
ylab = 'Probability',
xlab = 'Goals',
lwd = 3)
What is the probability of scoring exactly 3 goals in a game?
What is the probability of 2 or MORE goals in a game?
What is the probability of 2 or LESS goals in a game?
# lower.tail argument value:
# if TRUE (default), probabilities are P[X≤x], otherwise P[X>x]
# Prob goals = 3
dpois(3, lambda = 1.5)
## [1] 0.1255107
# Prob goals will be less than or equal to 2
ppois(2, 1.5)
## [1] 0.8088468
# Prob goals will be more than 2
ppois(2, 1.5, lower.tail = FALSE)
## [1] 0.1911532
Given that we simulated this data set with a lambda of 1.5, we can calculate a conf interval using poisson.test to see if there is evidence against this being the actual lambda value.
The event rate is actually 1.53 and the test officially shows there is low evidence against the null hypothesis that the mean is indeed 1.5.
poisson.test(x = sum(liverpool_goals), # how many goals
T = length(liverpool_goals), # how many games
r = 1.5,
alternative = "two.sided",
conf.level = 0.95)
##
## Exact Poisson test
##
## data: sum(liverpool_goals) time base: length(liverpool_goals)
## number of events = 157, time base = 100, p-value = 0.5674
## alternative hypothesis: true event rate is not equal to 1.5
## 95 percent confidence interval:
## 1.334021 1.835694
## sample estimates:
## event rate
## 1.57