A t-test is a statistical hypothesis test used to compare the means of two groups. There are two main types of t-tests:
We will demonstrate both types using simulated data.
library(ggplot2)
library(dplyr)
library(tidyr)
library(tibble)
set.seed(123)
group1 <- rnorm(30, mean = 100, sd = 10) # Group 1: Treatment A
group2 <- rnorm(30, mean = 110, sd = 10) # Group 2: Treatment B
unpaired_data <- data.frame(
value = c(group1, group2),
group = factor(rep(c("Treatment A", "Treatment B"), each = 30))
)
head(unpaired_data)
## value group
## 1 94.39524 Treatment A
## 2 97.69823 Treatment A
## 3 115.58708 Treatment A
## 4 100.70508 Treatment A
## 5 101.29288 Treatment A
## 6 117.15065 Treatment A
set.seed(123)
group1 <- rnorm(30, mean = 100, sd = 10) # Group 1: Treatment A
group2 <- rnorm(30, mean = 110, sd = 10) # Group 2: Treatment B
unpaired_data <- data.frame(
value = c(group1, group2),
group = factor(rep(c("Treatment A", "Treatment B"), each = 30))
)
head(unpaired_data)
## value group
## 1 94.39524 Treatment A
## 2 97.69823 Treatment A
## 3 115.58708 Treatment A
## 4 100.70508 Treatment A
## 5 101.29288 Treatment A
## 6 117.15065 Treatment A
t_unpaired <- t.test(value ~ group, data = unpaired_data, var.equal = TRUE)
print(t_unpaired)
##
## Two Sample t-test
##
## data: value by group
## t = -5.2098, df = 58, p-value = 2.616e-06
## alternative hypothesis: true difference in means between group Treatment A and group Treatment B is not equal to 0
## 95 percent confidence interval:
## -16.962870 -7.545972
## sample estimates:
## mean in group Treatment A mean in group Treatment B
## 99.52896 111.78338
ggplot(unpaired_data, aes(x = group, y = value, fill = group)) +
geom_bar(stat = "summary", fun = mean, color = "black", width = 0.6) +
geom_errorbar(stat = "summary", fun.data = mean_sdl, fun.args = list(mult = 1), width = 0.2) +
labs(title = "Comparison of Means (Unpaired t-test)",
y = "Value", x = "Group") +
theme_minimal()
set.seed(456)
before <- rnorm(30, mean = 120, sd = 12)
after <- before - rnorm(30, mean = 5, sd = 8) # Improved after treatment
paired_data <- data.frame(
subject = 1:30,
before = before,
after = after
)
head(paired_data)
## subject before after
## 1 1 103.8777 103.10810
## 2 2 127.4613 127.21165
## 3 3 129.6105 140.60182
## 4 4 103.3333 95.96407
## 5 5 111.4277 105.06272
## 6 6 116.1113 96.58605
t_paired <- t.test(paired_data$before, paired_data$after, paired = TRUE)
print(t_paired)
##
## Paired t-test
##
## data: paired_data$before and paired_data$after
## t = 5.0791, df = 29, p-value = 2.036e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 3.732757 8.765539
## sample estimates:
## mean difference
## 6.249148
paired_long <- paired_data %>%
pivot_longer(cols = c(before, after), names_to = "time", values_to = "score")
ggplot(paired_long, aes(x = time, y = score, group = subject)) +
geom_line(alpha = 0.4, color = "gray") +
stat_summary(fun = mean, geom = "point", size = 4, color = "blue") +
stat_summary(fun.data = mean_sdl, geom = "errorbar", fun.args = list(mult = 1), width = 0.2) +
labs(title = "Paired Measurements Before and After",
x = "Time", y = "Score") +
theme_minimal()