📘 Introduction to t-tests

A t-test is a statistical hypothesis test used to compare the means of two groups. There are two main types of t-tests:

  1. Independent (unpaired) t-test: Used when comparing means from two different groups.
  2. Paired t-test: Used when comparing means from the same group at two time points (e.g., before and after an intervention).

We will demonstrate both types using simulated data.


📦 Load Packages and Set Options

library(ggplot2)
library(dplyr)
library(tidyr)
library(tibble)

set.seed(123)

group1 <- rnorm(30, mean = 100, sd = 10)  # Group 1: Treatment A
group2 <- rnorm(30, mean = 110, sd = 10)  # Group 2: Treatment B

unpaired_data <- data.frame(
  value = c(group1, group2),
  group = factor(rep(c("Treatment A", "Treatment B"), each = 30))
)

head(unpaired_data)
##       value       group
## 1  94.39524 Treatment A
## 2  97.69823 Treatment A
## 3 115.58708 Treatment A
## 4 100.70508 Treatment A
## 5 101.29288 Treatment A
## 6 117.15065 Treatment A
set.seed(123)

group1 <- rnorm(30, mean = 100, sd = 10)  # Group 1: Treatment A
group2 <- rnorm(30, mean = 110, sd = 10)  # Group 2: Treatment B

unpaired_data <- data.frame(
  value = c(group1, group2),
  group = factor(rep(c("Treatment A", "Treatment B"), each = 30))
)

head(unpaired_data)
##       value       group
## 1  94.39524 Treatment A
## 2  97.69823 Treatment A
## 3 115.58708 Treatment A
## 4 100.70508 Treatment A
## 5 101.29288 Treatment A
## 6 117.15065 Treatment A
t_unpaired <- t.test(value ~ group, data = unpaired_data, var.equal = TRUE)
print(t_unpaired)
## 
##  Two Sample t-test
## 
## data:  value by group
## t = -5.2098, df = 58, p-value = 2.616e-06
## alternative hypothesis: true difference in means between group Treatment A and group Treatment B is not equal to 0
## 95 percent confidence interval:
##  -16.962870  -7.545972
## sample estimates:
## mean in group Treatment A mean in group Treatment B 
##                  99.52896                 111.78338
ggplot(unpaired_data, aes(x = group, y = value, fill = group)) +
  geom_bar(stat = "summary", fun = mean, color = "black", width = 0.6) +
  geom_errorbar(stat = "summary", fun.data = mean_sdl, fun.args = list(mult = 1), width = 0.2) +
  labs(title = "Comparison of Means (Unpaired t-test)",
       y = "Value", x = "Group") +
  theme_minimal()

set.seed(456)

before <- rnorm(30, mean = 120, sd = 12)
after  <- before - rnorm(30, mean = 5, sd = 8)  # Improved after treatment

paired_data <- data.frame(
  subject = 1:30,
  before = before,
  after = after
)

head(paired_data)
##   subject   before     after
## 1       1 103.8777 103.10810
## 2       2 127.4613 127.21165
## 3       3 129.6105 140.60182
## 4       4 103.3333  95.96407
## 5       5 111.4277 105.06272
## 6       6 116.1113  96.58605
t_paired <- t.test(paired_data$before, paired_data$after, paired = TRUE)
print(t_paired)
## 
##  Paired t-test
## 
## data:  paired_data$before and paired_data$after
## t = 5.0791, df = 29, p-value = 2.036e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  3.732757 8.765539
## sample estimates:
## mean difference 
##        6.249148
paired_long <- paired_data %>%
  pivot_longer(cols = c(before, after), names_to = "time", values_to = "score")

ggplot(paired_long, aes(x = time, y = score, group = subject)) +
  geom_line(alpha = 0.4, color = "gray") +
  stat_summary(fun = mean, geom = "point", size = 4, color = "blue") +
  stat_summary(fun.data = mean_sdl, geom = "errorbar", fun.args = list(mult = 1), width = 0.2) +
  labs(title = "Paired Measurements Before and After",
       x = "Time", y = "Score") +
  theme_minimal()