Problem 1:

a.

#set data:

set.seed(42)
n_simulations <- 10000
n_samples <- 5


coverage <- replicate(n_simulations, {
  sample <- rnorm(n_samples, mean=0, sd=1)
  ci <- t.test(sample, conf.level=0.95)$conf.int
  ci[1]<= 0 & ci[2]>=0
})

coverage_prob <- mean(coverage)
print(paste("Coverage probability:", coverage_prob))
## [1] "Coverage probability: 0.9536"

b

# Set data
# Set data

set.seed(42)
n_simulations <- 10000
alpha <- 0.05

p_values <- replicate(n_simulations, {
  sample <- rnorm(5, mean=0, sd=1)
  t.test(sample, mu=0, alternative="two.sided")$p.value
})

rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0464"

c

# Set data
# Set data

set.seed(42)
n_simulations <- 10000
alpha <- 0.05

p_values <- replicate(n_simulations, {
  sample <- rnorm(5, mean=0, sd=1)
  t.test(sample, mu=0, alternative="greater")$p.value
})

rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0492"

d

Repeat a:

#set data:

set.seed(42)
n_simulations <- 10000
n_samples <- 100


coverage <- replicate(n_simulations, {
  sample <- rnorm(n_samples, mean=0, sd=1)
  ci <- t.test(sample, conf.level=0.95)$conf.int
  ci[1]<= 0 & ci[2]>=0
})

coverage_prob <- mean(coverage)
print(paste("Coverage probability:", coverage_prob))
## [1] "Coverage probability: 0.9482"

Repeat b:

# Set data

set.seed(42)
n_simulations <- 10000
alpha <- 0.05

p_values <- replicate(n_simulations, {
  sample <- rnorm(100, mean=0, sd=1)
  t.test(sample, mu=0, alternative="two.sided")$p.value
})

rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0518"

Repeat c:

# Set data

set.seed(42)
n_simulations <- 10000
alpha <- 0.05

p_values <- replicate(n_simulations, {
  sample <- rnorm(100, mean=0, sd=1)
  t.test(sample, mu=0, alternative="greater")$p.value
})

rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0526"

e

In the first question, for both n = 5 and n = 100, the result showed a 95% confident interval, failed to reject null hypothesis.

For the both sided - t test and the one side - t test, we remained the rejection probability approximately 5% of all the time, regardless of the sample size

Problem 2

a.

#set data:

set.seed(42)
n_simulations <- 10000
n_samples <- 5


coverage <- replicate(n_simulations, {
  sample <- runif(n_samples, min = -1, max=1)
  ci <- t.test(sample, conf.level=0.95)$conf.int
  ci[1]<= 0 & ci[2]>=0
})

coverage_prob <- mean(coverage)
print(paste("Coverage probability:", coverage_prob))
## [1] "Coverage probability: 0.9325"

b

# Set data
# Set data

set.seed(42)
n_simulations <- 10000
alpha <- 0.05

p_values <- replicate(n_simulations, {
  sample <-runif(5, min = -1, max=1)
  t.test(sample, mu=0, alternative="two.sided")$p.value
})

rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0675"

c

# Set data

set.seed(42)
n_simulations <- 10000
alpha <- 0.05

p_values <- replicate(n_simulations, {
  sample <- runif(5, min = -1, max=1)
  t.test(sample, mu=0, alternative="greater")$p.value
})

rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0561"

d

Repeat a:

#set data:

set.seed(42)
n_simulations <- 10000
n_samples <- 100


coverage <- replicate(n_simulations, {
  sample <- runif(n_samples, min = -1, max=1)
  ci <- t.test(sample, conf.level=0.95)$conf.int
  ci[1]<= 0 & ci[2]>=0
})

coverage_prob <- mean(coverage)
print(paste("Coverage probability:", coverage_prob))
## [1] "Coverage probability: 0.9447"

Repeat b:

# Set data

set.seed(42)
n_simulations <- 10000
alpha <- 0.05

p_values <- replicate(n_simulations, {
  sample <- runif(100, min = -1, max=1)
  t.test(sample, mu=0, alternative="two.sided")$p.value
})

rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0553"

Repeat c:

# Set data

set.seed(42)
n_simulations <- 10000
alpha <- 0.05

p_values <- replicate(n_simulations, {
  sample <- runif(100, min = -1, max=1)
  t.test(sample, mu=0, alternative="greater")$p.value
})

rejection_rate <- mean(p_values < alpha)
print(paste("Rejection rate:", rejection_rate))
## [1] "Rejection rate: 0.0485"

e

In the first question, for both n = 5 and n = 100, the results showed that 95% of the confidence intervals captured the true population mean. For both the two-sided t-test and the one-sided t-test, the rejection rate remained approximately 5% of the time, regardless of the sample size.

These all maatch with the first question result

Problem 3:

a

set.seed(42)
n_simulations <- 10000
n_samples <- 10
alpha <- 0.05

rejections <- replicate(n_simulations, {
  #    mean=1, variance=4, so sd=2
  sample <- rnorm(n_samples, mean=1, sd=2)
  p_value <- t.test(sample, mu=0, alternative="greater")$p.value
  p_value < alpha
})

power <- mean(rejections)  # Proportion of rejections
beta <- 1 - power          # Proportion of failures to reject

print(paste("Part (a) - n=10, alpha=0.05:"))
## [1] "Part (a) - n=10, alpha=0.05:"
print(paste("Power:", power))
## [1] "Power: 0.416"
print(paste("Beta (Type II error):", beta))
## [1] "Beta (Type II error): 0.584"

b

set.seed(42)
n_simulations <- 10000
n_samples <- 10
alpha <- 0.01

rejections <- replicate(n_simulations, {
  #    mean=1, variance=4, so sd=2
  sample <- rnorm(n_samples, mean=1, sd=2)
  p_value <- t.test(sample, mu=0, alternative="greater")$p.value
  p_value < alpha
})

power <- mean(rejections)  # Proportion of rejections
beta <- 1 - power          # Proportion of failures to reject

print(paste("Part (a) - n=10, alpha=0.01:"))
## [1] "Part (a) - n=10, alpha=0.01:"
print(paste("Power:", power))
## [1] "Power: 0.1604"
print(paste("Beta (Type II error):", beta))
## [1] "Beta (Type II error): 0.8396"

c

Repeat a:

set.seed(42)
n_simulations <- 10000
n_samples <- 1000
alpha <- 0.05

rejections <- replicate(n_simulations, {
  #    mean=1, variance=4, so sd=2
  sample <- rnorm(n_samples, mean=1, sd=2)
  p_value <- t.test(sample, mu=0, alternative="greater")$p.value
  p_value < alpha
})

power <- mean(rejections)  # Proportion of rejections
beta <- 1 - power          # Proportion of failures to reject

print(paste("Part (a) - n=10, alpha=0.05:"))
## [1] "Part (a) - n=10, alpha=0.05:"
print(paste("Power:", power))
## [1] "Power: 1"
print(paste("Beta (Type II error):", beta))
## [1] "Beta (Type II error): 0"

Repeat b

set.seed(42)
n_simulations <- 10000
n_samples <- 1000
alpha <- 0.01

rejections <- replicate(n_simulations, {
  #    mean=1, variance=4, so sd=2
  sample <- rnorm(n_samples, mean=1, sd=2)
  p_value <- t.test(sample, mu=0, alternative="greater")$p.value
  p_value < alpha
})

power <- mean(rejections)  # Proportion of rejections
beta <- 1 - power          # Proportion of failures to reject

print(paste("Part (a) - n=10, alpha=0.01:"))
## [1] "Part (a) - n=10, alpha=0.01:"
print(paste("Power:", power))
## [1] "Power: 1"
print(paste("Beta (Type II error):", beta))
## [1] "Beta (Type II error): 0"

d

When alpha decreased from 0.05 to 0.01, power decreased and beta increased

For n=10 and n=1000, the power dramatically increased with larger sample size, even for n=1000, and power approached 1.0 even with the stricter alpha=0.01. This shows that the larger sample size increase our ability to detect true effects