simulate_confidence_interval <- function(n_simulations = 10000, sample_size = 5, confidence_level = 0.95){
set.seed(102)
contains_mu <- replicate(n_simulations,{
sample_x <- rnorm(sample_size, mean = 0, sd = 1)#generate samples from N(0,1)
x_bar <- mean(sample_x)#count the mean of sample
s <- sd(sample_x)#count standard error
t_critical <- qt((1 + confidence_level)/ 2, df = sample_size - 1)#count critical value
lower_bound <- x_bar - t_critical*(s / sqrt(sample_size))# set CI
upper_bound <- x_bar + t_critical*(s / sqrt(sample_size))
return(lower_bound <= 0 & upper_bound >= 0)#judge wether Ci included mu = 0
})
probability_contain_mu <- mean(contains_mu)#count probability
return(probability_contain_mu)
}
ci_probability_5 <- simulate_confidence_interval(sample_size = 5)
print(ci_probability_5)
## [1] 0.9504
simulate_hypothesis_test <- function(n_simulations= 10000, sample_size = 5, alpha = 0.05, alternative = "two.sided"){
set.seed(102)
reject_h0 <- replicate(n_simulations, {
sample_x <- rnorm(sample_size, mean = 0, sd = 1)
#Do a t-te
test_result <- t.test(sample_x, mu = 0, alternative = alternative)
return(test_result$p.value < alpha)
})
probability_reject_h0 <- mean(reject_h0)
return(probability_reject_h0)
}
hypothesis_test_5 <- simulate_hypothesis_test(sample_size = 5, alternative = "two.sided")
print(hypothesis_test_5)
## [1] 0.0496
simulate_hypothesis_test <- function(n_simulations= 10000, sample_size = 5, alpha = 0.05, alternative = "greater"){
set.seed(102)
reject_h0 <- replicate(n_simulations, {
sample_x <- rnorm(sample_size, mean = 0, sd = 1)
#Do a t-te
test_result <- t.test(sample_x, mu = 0, alternative = alternative)
return(test_result$p.value < alpha)
})
probability_reject_h0 <- mean(reject_h0)
return(probability_reject_h0)
}
hypothesis_test_5 <- simulate_hypothesis_test(sample_size = 5, alternative = "greater")
print(hypothesis_test_5)
## [1] 0.0502
ci_probability_100 <- simulate_confidence_interval(sample_size = 10000)
print(ci_probability_100)
## [1] 0.9521
hypothesis_test_100 <- simulate_hypothesis_test(sample_size = 10000, alternative = "two.sided")
print(hypothesis_test_100)
## [1] 0.0479
hypothesis_test_100_right <- simulate_hypothesis_test(sample_size = 1000, alternative = "greater")
print(hypothesis_test_100_right)
## [1] 0.0515
Anwser: Generally, as the sample size increases, the error rate should stabilize around 0.05, CI should be more closer to 0.95.But my results don’t support this conclusion.
simulate_confidence_interval_uniform <- function(n_simulations = 10000, sample_size = 5, confidence_level = 0.95){
set.seed(102)
contains_mu <- replicate(n_simulations,{
sample_x <- runif(sample_size, min = 0, max = 1)#generate samples from unif(1,0)
x_bar <- mean(sample_x)#count the mean of sample
s <- sd(sample_x)#count standard error
t_critical <- qt((1 + confidence_level)/ 2, df = sample_size - 1)#count critical value
lower_bound <- x_bar - t_critical*(s / sqrt(sample_size))# set CI
upper_bound <- x_bar + t_critical*(s / sqrt(sample_size))
return(lower_bound <= 0.5 & upper_bound >= 0.5)#judge wether Ci included mu = 0
})
probability_contain_mu <- mean(contains_mu)#count probability
return(probability_contain_mu)
}
ci_probability_5_uniform <- simulate_confidence_interval_uniform(sample_size = 5)
print(ci_probability_5_uniform)
## [1] 0.9358
simulate_hypothesis_test_uniform <- function(n_simulations= 10000, sample_size = 5, alpha = 0.05, alternative = "two.sided"){
set.seed(102)
reject_h0 <- replicate(n_simulations, {
sample_x <- runif(sample_size, min = 0, max = 1)
#Do a t-te
test_result <- t.test(sample_x, mu = 0.5, alternative = alternative)
return(test_result$p.value < alpha)
})
probability_reject_h0_uniform <- mean(reject_h0)
return(probability_reject_h0_uniform)
}
hypothesis_test_5_uniform <- simulate_hypothesis_test_uniform(sample_size = 5, alternative = "two.sided")
print(hypothesis_test_5_uniform)
## [1] 0.0642
simulate_hypothesis_test_uniform <- function(n_simulations= 10000, sample_size = 5, alpha = 0.05, alternative = "greater"){
set.seed(102)
reject_h0 <- replicate(n_simulations, {
sample_x <- runif(sample_size, min = 0, max = 1)
#Do a t-te
test_result <- t.test(sample_x, mu = 0.5, alternative = alternative)
return(test_result$p.value < alpha)
})
probability_reject_h0_uniform <- mean(reject_h0)
return(probability_reject_h0_uniform)
}
hypothesis_test_5_uniform <- simulate_hypothesis_test_uniform(sample_size = 5, alternative = "greater")
print(hypothesis_test_5_uniform)
## [1] 0.0573
`` (d) Increase the number of samples to 100 and repeat (a)-(c).
ci_probability_100_uniform <- simulate_confidence_interval_uniform(sample_size = 10000)
print(ci_probability_100_uniform)
## [1] 0.9486
hypothesis_test_100_uniform <- simulate_hypothesis_test_uniform(sample_size = 10000, alternative = "two.sided")
print(hypothesis_test_100_uniform)
## [1] 0.0514
hypothesis_test_100_right_uniform <- simulate_hypothesis_test_uniform(sample_size = 1000, alternative = "greater")
print(hypothesis_test_100_right_uniform)
## [1] 0.0481
Anwser: From the code results, we can see that even though X has different distribution, the mean still follews a normal distribution
simulate_power_test <- function(n_simulations = 10000, sample_size = 10, alpha = 0.05){
set.seed(102)
fail_reject_h0 <- replicate(n_simulations,{
sample_x <- rnorm(sample_size, mean = 1, sd = 2)
test_result <- t.test(sample_x, mu = 0, alternative = "greater")
#count times of not reject h0
return(test_result$p.value >= alpha)
})
beta <- mean(fail_reject_h0)
power <- 1 - beta
return(list(beta = beta,power = power))
}
power_test_10_alpha_5 <- simulate_power_test(sample_size = 10, alpha = 0.05)
print(power_test_10_alpha_5)
## $beta
## [1] 0.5656
##
## $power
## [1] 0.4344
power_test_10_alpha_01 <- simulate_power_test(sample_size = 10, alpha = 0.01)
print(power_test_10_alpha_01)
## $beta
## [1] 0.8349
##
## $power
## [1] 0.1651
power_test_1000_alpha_05 <- simulate_power_test(sample_size = 1000, alpha = 0.05)
print(power_test_1000_alpha_05)
## $beta
## [1] 0
##
## $power
## [1] 1
power_test_1000_alpha_01 <- simulate_power_test(sample_size = 1000, alpha = 0.01)
print(power_test_1000_alpha_01)
## $beta
## [1] 0
##
## $power
## [1] 1
Anwser: Yes, from the results, we can see that as the sample size increases, beta to be 0, means there is no type 2 error, since sd decreases with a larger sample.