knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stats)
set.seed(123)
n_sims <- 10000
n <- 5
µ_true <- 0
sigma <- 1
cover_1a <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma/sqrt(n)
lower <- xbar-qnorm(0.975)*se
upper <- xbar+qnorm(0.975)*se
(lower <= µ_true) && (upper >= µ_true)
})
p_cover_1a <- mean(cover_1a)
round(p_cover_1a, 4)
## [1] 0.9506
n_sims <- 10000
n <- 5
µ_true <- 0
sigma <- 1
alpha <- 0.05
reject_two_1b <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma / sqrt(n)
z <- (xbar - 0) / se
pval <- 2 * (1 - pnorm(abs(z)))
pval < alpha
})
reject_prob_two_1b <- mean(reject_two_1b)
round(reject_prob_two_1b, 4)
## [1] 0.0486
n_sims <- 10000
n <- 5
µ_true <- 0
sigma <- 1
alpha <- 0.05
reject_right_1c <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma / sqrt(n)
z <- (xbar - 0) / se
pval <- 1 - pnorm(z)
pval < alpha
})
reject_prob_right_1c <- mean(reject_right_1c)
round(reject_prob_right_1c, 4)
## [1] 0.0503
n_sims <- 10000
n <- 100
µ_true <- 0
sigma <- 1
alpha <- 0.05
cover_1d <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma/sqrt(n)
lower <- xbar-qnorm(0.975)*se
upper <- xbar+qnorm(0.975)*se
(lower <= µ_true) && (upper >= µ_true)
})
p_cover_1d <- mean(cover_1d)
round(p_cover_1d, 4)
## [1] 0.9479
n_sims <- 10000
n <- 100
µ_true <- 0
sigma <- 1
alpha <- 0.05
reject_two_1d <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma / sqrt(n)
z <- (xbar - 0) / se
pval <- 2 * (1 - pnorm(abs(z)))
pval < alpha
})
reject_prob_two_1d <- mean(reject_two_1d)
round(reject_prob_two_1d, 4)
## [1] 0.0505
n_sims <- 10000
n <- 100
µ_true <- 0
sigma <- 1
alpha <- 0.05
reject_right_1d <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma / sqrt(n)
z <- (xbar - 0) / se
pval <- 1 - pnorm(z)
pval < alpha
})
reject_prob_right_1d <- mean(reject_right_1d)
round(reject_prob_right_1d, 4)
## [1] 0.0506
Answers: 1. The probability of the estimated interval is very close to 95%, the same as the confident level we choose 2. The probability of rejecting 𝐻0 is around 0.05, closed to the alpha we picked. 3. The sample size has limited influence on the results we already have.
n <- 5
alpha <- 0.05
µ_true <- 0
sigma <- sqrt(1/3)
cover <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma/sqrt(n)
lower <- xbar-qnorm(0.975)*se
upper <- xbar+qnorm(0.975)*se
(lower <= µ_true) && (upper >= µ_true)
})
p_cover <- mean(cover)
round(p_cover, 4)
## [1] 0.9525
n <- 5
alpha <- 0.05
µ_true <- 0
sigma <- sqrt(1/3)
reject_two_2b <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma / sqrt(n)
z <- (xbar - 0) / se
pval <- 2 * (1 - pnorm(abs(z)))
pval < alpha
})
reject_prob_two <- mean(reject_two_2b)
round(reject_prob_two, 4)
## [1] 0.0497
n <- 5
alpha <- 0.05
µ_true <- 0
sigma <- sqrt(1/3)
reject_right_2c <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma / sqrt(n)
z <- (xbar - 0) / se
pval <- 1 - pnorm(z)
pval < alpha
})
reject_prob_right <- mean(reject_right_2c)
round(reject_prob_right, 4)
## [1] 0.05
n <- 100
alpha <- 0.05
µ_true <- 0
sigma <- sqrt(1/3)
cover <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma/sqrt(n)
lower <- xbar-qnorm(0.975)*se
upper <- xbar+qnorm(0.975)*se
(lower <= µ_true) && (upper >= µ_true)
})
p_cover <- mean(cover)
round(p_cover, 4)
## [1] 0.9519
n <- 100
alpha <- 0.05
µ_true <- 0
sigma <- sqrt(1/3)
reject_two_2d <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma / sqrt(n)
z <- (xbar - 0) / se
pval <- 2 * (1 - pnorm(abs(z)))
pval < alpha
})
reject_prob_two <- mean(reject_two_2d)
round(reject_prob_two, 4)
## [1] 0.0553
n <- 100
alpha <- 0.05
µ_true <- 0
sigma <- sqrt(1/3)
reject_right_2d <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma / sqrt(n)
z <- (xbar - 0) / se
pval <- 1 - pnorm(z)
pval < alpha
})
reject_prob_right <- mean(reject_right_2d)
round(reject_prob_right, 4)
## [1] 0.0481
Answers: 1. The probability of the estimated interval is very close to 95%, the same as the confident level we choose 2. The probability of rejecting 𝐻0 is around 0.05, closed to the alpha we picked. 3. The sample size has limited influence on the results we already have.
Comments: The distribution of X doesn’t really affect the results.
µ_true <- 1
sigma <- 2
n <- 10
alpha <- 0.05
rejected_3a <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma / sqrt(n)
z <- (xbar - 0) / se
pval <- 1 - pnorm(z)
pval < alpha
})
power_3a <- mean(rejected_3a)
beta_3a <- 1 - power_3a
c(beta = round(beta_3a,4), power = round(power_3a,4))
## beta power
## 0.5256 0.4744
n <- 10
alpha <- 0.01
rejected_3b <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma / sqrt(n)
z <- (xbar - 0) / se
pval <- 1 - pnorm(z)
pval < alpha
})
power_3b <- mean(rejected_3b)
beta_3b <- 1 - power_3b
c(beta = round(beta_3b,4), power = round(power_3b,4))
## beta power
## 0.772 0.228
n <- 1000
alpha <- 0.05
rejected_3ca <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma / sqrt(n)
z <- (xbar - 0) / se
pval <- 1 - pnorm(z)
pval < alpha
})
power_3ca <- mean(rejected_3ca)
beta_3ca <- 1 - power_3ca
c(beta = round(beta_3ca,4), power = round(power_3ca,4))
## beta power
## 0 1
n <- 1000
alpha <- 0.01
rejected_3cb <- replicate(n_sims, {
x <- rnorm(n, mean = µ_true, sd = sigma)
xbar <- mean(x)
se <- sigma / sqrt(n)
z <- (xbar - 0) / se
pval <- 1 - pnorm(z)
pval < alpha
})
power_3cb <- mean(rejected_3cb)
beta_3cb <- 1 - power_3cb
c(beta = round(beta_3cb,4), power = round(power_3cb,4))
## beta power
## 0 1
Answers: 1. When we choose alpha at 0.05, the probability of having Type II error (Fail to reject wrong Null Hypothesis) is around 50-50 2. As the alpha getting lower (from 0.05 to 0.01), the probability of having Type II error has significantly increase. (Beta increase) 3. As the sample size increase, the probability of having Type II error will decrease.