#set data:
set.seed(42)
n_simulations <- 10000
n_samples <- 5
coverage <- replicate(n_simulations, {
sample <- rnorm(n_samples, mean=0, sd=1)
ci <- t.test(sample, conf.level=0.95)$conf.int
ci[1]<= 0 & ci[2]>=0
})
coverage_prob <- mean(coverage)
print(paste("Coverage probability:", coverage_prob))
## [1] "Coverage probability: 0.9536"
# Set data
# Set data
set.seed(42)
n_simulations <- 10000
alpha <- 0.05
p_values <- replicate(n_simulations, {
sample <- rnorm(5, mean=0, sd=1)
t.test(sample, mu=0, alternative="two.sided")$p.value
})
rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0464"
# Set data
# Set data
set.seed(42)
n_simulations <- 10000
alpha <- 0.05
p_values <- replicate(n_simulations, {
sample <- rnorm(5, mean=0, sd=1)
t.test(sample, mu=0, alternative="greater")$p.value
})
rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0492"
Repeat a:
#set data:
set.seed(42)
n_simulations <- 10000
n_samples <- 100
coverage <- replicate(n_simulations, {
sample <- rnorm(n_samples, mean=0, sd=1)
ci <- t.test(sample, conf.level=0.95)$conf.int
ci[1]<= 0 & ci[2]>=0
})
coverage_prob <- mean(coverage)
print(paste("Coverage probability:", coverage_prob))
## [1] "Coverage probability: 0.9482"
Repeat b:
# Set data
set.seed(42)
n_simulations <- 10000
alpha <- 0.05
p_values <- replicate(n_simulations, {
sample <- rnorm(100, mean=0, sd=1)
t.test(sample, mu=0, alternative="two.sided")$p.value
})
rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0518"
Repeat c:
# Set data
set.seed(42)
n_simulations <- 10000
alpha <- 0.05
p_values <- replicate(n_simulations, {
sample <- rnorm(100, mean=0, sd=1)
t.test(sample, mu=0, alternative="greater")$p.value
})
rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0526"
In the first question, for both n = 5 and n = 100, the result showed a 95% confident interval, failed to reject null hypothesis.
For the both sided - t test and the one side - t test, we remained the rejection probability approximately 5% of all the time, regardless of the sample size
#set data:
set.seed(42)
n_simulations <- 10000
n_samples <- 5
coverage <- replicate(n_simulations, {
sample <- runif(n_samples, min = -1, max=1)
ci <- t.test(sample, conf.level=0.95)$conf.int
ci[1]<= 0 & ci[2]>=0
})
coverage_prob <- mean(coverage)
print(paste("Coverage probability:", coverage_prob))
## [1] "Coverage probability: 0.9325"
# Set data
# Set data
set.seed(42)
n_simulations <- 10000
alpha <- 0.05
p_values <- replicate(n_simulations, {
sample <-runif(5, min = -1, max=1)
t.test(sample, mu=0, alternative="two.sided")$p.value
})
rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0675"
# Set data
set.seed(42)
n_simulations <- 10000
alpha <- 0.05
p_values <- replicate(n_simulations, {
sample <- runif(5, min = -1, max=1)
t.test(sample, mu=0, alternative="greater")$p.value
})
rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0561"
Repeat a:
#set data:
set.seed(42)
n_simulations <- 10000
n_samples <- 100
coverage <- replicate(n_simulations, {
sample <- runif(n_samples, min = -1, max=1)
ci <- t.test(sample, conf.level=0.95)$conf.int
ci[1]<= 0 & ci[2]>=0
})
coverage_prob <- mean(coverage)
print(paste("Coverage probability:", coverage_prob))
## [1] "Coverage probability: 0.9447"
Repeat b:
# Set data
set.seed(42)
n_simulations <- 10000
alpha <- 0.05
p_values <- replicate(n_simulations, {
sample <- runif(100, min = -1, max=1)
t.test(sample, mu=0, alternative="two.sided")$p.value
})
rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0553"
Repeat c:
# Set data
set.seed(42)
n_simulations <- 10000
alpha <- 0.05
p_values <- replicate(n_simulations, {
sample <- runif(100, min = -1, max=1)
t.test(sample, mu=0, alternative="greater")$p.value
})
rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0485"
In the first question, for both n = 5 and n = 100, the results showed that 95% of the confidence intervals captured the true population mean. For both the two-sided t-test and the one-sided t-test, the rejection rate remained approximately 5% of the time, regardless of the sample size.
These all maatch with the first question result
set.seed(42)
n_simulations <- 10000
n_samples <- 10
alpha <- 0.05
rejections <- replicate(n_simulations, {
# mean=1, variance=4, so sd=2
sample <- rnorm(n_samples, mean=1, sd=2)
p_value <- t.test(sample, mu=0, alternative="greater")$p.value
p_value < alpha
})
power <- mean(rejections) # Proportion of rejections
beta <- 1 - power # Proportion of failures to reject
print(paste("Part (a) - n=10, alpha=0.05:"))
## [1] "Part (a) - n=10, alpha=0.05:"
print(paste("Power:", power))
## [1] "Power: 0.416"
print(paste("Beta (Type II error):", beta))
## [1] "Beta (Type II error): 0.584"
set.seed(42)
n_simulations <- 10000
n_samples <- 10
alpha <- 0.01
rejections <- replicate(n_simulations, {
# mean=1, variance=4, so sd=2
sample <- rnorm(n_samples, mean=1, sd=2)
p_value <- t.test(sample, mu=0, alternative="greater")$p.value
p_value < alpha
})
power <- mean(rejections) # Proportion of rejections
beta <- 1 - power # Proportion of failures to reject
print(paste("Part (a) - n=10, alpha=0.01:"))
## [1] "Part (a) - n=10, alpha=0.01:"
print(paste("Power:", power))
## [1] "Power: 0.1604"
print(paste("Beta (Type II error):", beta))
## [1] "Beta (Type II error): 0.8396"
Repeat a:
set.seed(42)
n_simulations <- 10000
n_samples <- 1000
alpha <- 0.05
rejections <- replicate(n_simulations, {
# mean=1, variance=4, so sd=2
sample <- rnorm(n_samples, mean=1, sd=2)
p_value <- t.test(sample, mu=0, alternative="greater")$p.value
p_value < alpha
})
power <- mean(rejections) # Proportion of rejections
beta <- 1 - power # Proportion of failures to reject
print(paste("Part (a) - n=10, alpha=0.05:"))
## [1] "Part (a) - n=10, alpha=0.05:"
print(paste("Power:", power))
## [1] "Power: 1"
print(paste("Beta (Type II error):", beta))
## [1] "Beta (Type II error): 0"
Repeat b
set.seed(42)
n_simulations <- 10000
n_samples <- 1000
alpha <- 0.01
rejections <- replicate(n_simulations, {
# mean=1, variance=4, so sd=2
sample <- rnorm(n_samples, mean=1, sd=2)
p_value <- t.test(sample, mu=0, alternative="greater")$p.value
p_value < alpha
})
power <- mean(rejections) # Proportion of rejections
beta <- 1 - power # Proportion of failures to reject
print(paste("Part (a) - n=10, alpha=0.01:"))
## [1] "Part (a) - n=10, alpha=0.01:"
print(paste("Power:", power))
## [1] "Power: 1"
print(paste("Beta (Type II error):", beta))
## [1] "Beta (Type II error): 0"
When alpha decreased from 0.05 to 0.01, power decreased and beta increased
For n=10 and n=1000, the power dramatically increased with larger sample size, even for n=1000, and power approached 1.0 even with the stricter alpha=0.01. This shows that the larger sample size increase our ability to detect true effects