Function used to simulate task
# set seed for reproducibility
set.seed(456)
# create function for simulation
est_power <- function(n, reps = 10000,
alpha = 0.05) {
#repeat for amount of reps
hypo_rej <- replicate(reps, {
#generate given values
x <- rnorm(n, 0, 0.01)
eps <- rnorm(n, 0, 0.02)
y <- 0.1 * x + eps
#model it to regression
model <- lm(y ~ x)
#t-stat and crit value
t_stat <- summary(model)$coefficients["x", "t value"]
crit <- qt(1 - alpha / 2, df = n - 2)
#tests significance; 1 if TRUE, 0 if FALSE
abs(t_stat) > crit
})
#averages significance by reps
mean(hypo_rej)
}
Test broad range of sample size, n.
# assign sample size variable
sample_sizes <- seq(1000, 4000, by = 100)
# run simulation on all given sample sizes
power_results <- data.frame(
n = sample_sizes,
power = sapply(sample_sizes, est_power, reps = 10000)
)
#show power of each sample size
power_results
## n power
## 1 1000 0.3587
## 2 1100 0.3906
## 3 1200 0.4187
## 4 1300 0.4423
## 5 1400 0.4633
## 6 1500 0.4787
## 7 1600 0.5132
## 8 1700 0.5418
## 9 1800 0.5616
## 10 1900 0.5934
## 11 2000 0.6076
## 12 2100 0.6252
## 13 2200 0.6575
## 14 2300 0.6733
## 15 2400 0.6880
## 16 2500 0.7047
## 17 2600 0.7167
## 18 2700 0.7353
## 19 2800 0.7541
## 20 2900 0.7627
## 21 3000 0.7868
## 22 3100 0.7924
## 23 3200 0.8074
## 24 3300 0.8176
## 25 3400 0.8291
## 26 3500 0.8421
## 27 3600 0.8515
## 28 3700 0.8628
## 29 3800 0.8703
## 30 3900 0.8795
## 31 4000 0.8848
Narrow down sample size
## n power
## 1 3100 0.7901
## 2 3110 0.8015
## 3 3120 0.7962
## 4 3130 0.8006
## 5 3140 0.8056
## 6 3150 0.8062
## 7 3160 0.8009
## 8 3170 0.8042
## 9 3180 0.8037
## 10 3190 0.8048
## 11 3200 0.8047
In this particular seed the sample size with at least 80% power is around 3130-3140.
We can try other seeds to make sure n is relatively the same.
set.seed(652)
## n power
## 1 3100 0.8064
## 2 3110 0.7958
## 3 3120 0.7998
## 4 3130 0.7948
## 5 3140 0.7964
## 6 3150 0.7973
## 7 3160 0.8025
## 8 3170 0.8039
## 9 3180 0.8051
## 10 3190 0.8007
## 11 3200 0.8086
set.seed(275)
## n power
## 1 3100 0.7961
## 2 3110 0.7969
## 3 3120 0.8026
## 4 3130 0.7999
## 5 3140 0.7992
## 6 3150 0.8023
## 7 3160 0.8093
## 8 3170 0.8064
## 9 3180 0.8116
## 10 3190 0.8022
## 11 3200 0.8070
Given other random sequences 80% power is obtained by at least 3100-3200 range
Change repetition amounts to see if it still holds
sample_sizes <- seq(3100, 3200, by = 10)
power_results <- data.frame(
n = sample_sizes,
power = sapply(sample_sizes, est_power, reps = 5000)
)
power_results
## n power
## 1 3100 0.7910
## 2 3110 0.8046
## 3 3120 0.7990
## 4 3130 0.7954
## 5 3140 0.8032
## 6 3150 0.8074
## 7 3160 0.7984
## 8 3170 0.7980
## 9 3180 0.7978
## 10 3190 0.8014
## 11 3200 0.8034
sample_sizes <- seq(3100, 3200, by = 10)
power_results <- data.frame(
n = sample_sizes,
power = sapply(sample_sizes, est_power, reps = 20000)
)
power_results
## n power
## 1 3100 0.79605
## 2 3110 0.80045
## 3 3120 0.79835
## 4 3130 0.79880
## 5 3140 0.80405
## 6 3150 0.80325
## 7 3160 0.80055
## 8 3170 0.80410
## 9 3180 0.80580
## 10 3190 0.80430
## 11 3200 0.81320
Decreasing and increasing amounts of repetitions still gives the same 3100-3200 range for n