# An illustration of TYPE 1 Error Create a population with mean=100 and
# sd=20
pop <- rnorm(1e+06, mean = 100, sd = 20)
# Take two random samples from the population
samp1 <- sample(pop, 100, repl = F)
samp2 <- sample(pop, 100, repl = F)
# the samples should be similar but not the same
summary(samp1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 52.1 80.5 97.4 96.7 111.0 139.0
# test the sample means
test.result <- t.test(samp1, samp2)
##
alpha <- 0.05 #signifigance threshold
counter <- 1 #count repetitions
repeat {
samp1 <- sample(pop, 100, repl = F)
samp2 <- sample(pop, 100, repl = F)
test.result <- t.test(samp1, samp2)
print(counter)
if (test.result$p.value < alpha)
break
counter <- counter + 1
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
test.result #prints the result of the test containing type 1 error
##
## Welch Two Sample t-test
##
## data: samp1 and samp2
## t = 3.437, df = 196.8, p-value = 0.0007165
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 4.362 16.105
## sample estimates:
## mean of x mean of y
## 106.12 95.88
An illustration of how alpha affects the prevalance of type 1 errors
numTests <- 1000 #number of t-test
alphaSet = c(0.001, 0.01, 0.05, 0.1, 0.2) #set of alpha values to test
sigTests <- matrix(nrow = length(alphaSet) * numTests, ncol = 3)
counter <- 1
for (i in 1:numTests) {
for (alpha in alphaSet) {
# take two samples from the same population
samp1 <- sample(pop, 100, repl = F)
samp2 <- sample(pop, 100, repl = F)
# test sample means
test.result <- t.test(samp1, samp2)
# recored results of test
if (test.result$p.value < alpha) {
sigTests[counter, 1] <- 1
sigTests[counter, 2] <- test.result$p.value
sigTests[counter, 3] <- alpha
} else {
sigTests[counter, 1] <- 0
sigTests[counter, 2] <- test.result$p.value
sigTests[counter, 3] <- alpha
}
counter <- counter + 1
}
}
sigTests <- as.data.frame(sigTests) #convert to data.frame object (easier to manupulate)
names(sigTests) <- c("type_1_errors", "p-value", "alpha") #assign column names
aggregate(sigTests$type_1_errors ~ sigTests$alpha, FUN = sum) #produce results of experiment
## sigTests$alpha sigTests$type_1_errors
## 1 0.001 0
## 2 0.010 11
## 3 0.050 57
## 4 0.100 111
## 5 0.200 193
An illustration of how alpha affects the prevalance of Type 2 errors.
numTests <- 1000 #number of t-test
difference <- 5 #difference in group means
alphaSet = c(0.001, 0.01, 0.05, 0.1, 0.2) #set of alpha values to test
sigTests <- matrix(nrow = length(alphaSet) * numTests, ncol = 3)
counter <- 1
for (i in 1:numTests) {
for (alpha in alphaSet) {
# take two samples from DIFFERENT populations
samp1 <- rnorm(100, mean = 100, sd = 10)
samp2 <- rnorm(100, mean = 100 + difference, sd = 10)
test.result <- t.test(samp1, samp2)
if (test.result$p.value > alpha) {
sigTests[counter, 1] <- 1
sigTests[counter, 2] <- test.result$p.value
sigTests[counter, 3] <- alpha
} else {
sigTests[counter, 1] <- 0
sigTests[counter, 2] <- test.result$p.value
sigTests[counter, 3] <- alpha
}
counter <- counter + 1
}
}
sigTests <- as.data.frame(sigTests) #convert to data.frame object (easier to manupulate)
names(sigTests) <- c("type_2_errors", "p-value", "alpha") #assign column names
aggregate(sigTests$type_2_errors ~ sigTests$alpha, FUN = sum) #produce results of experiment
## sigTests$alpha sigTests$type_2_errors
## 1 0.001 429
## 2 0.010 177
## 3 0.050 60
## 4 0.100 37
## 5 0.200 13