This RMarkdown document simulates data from two independent groups, similar to the realistic scenario described in the blog post (comparing blood pressure between two treatments). The code performs:
All analysis is contained in a single code block.
# Load required packages
library(car)
library(ggplot2)
library(dplyr)
set.seed(123)
# ---------------------------------------------------------
# 1. SIMULATE DATA (Realistic Scenario)
# ---------------------------------------------------------
# Group 1: Standard medication (lower variance)
n1 <- 50
mean1 <- 120
sd1 <- 8
group1 <- rnorm(n1, mean = mean1, sd = sd1)
# Group 2: New medication (higher variance)
n2 <- 50
mean2 <- 118
sd2 <- 15
group2 <- rnorm(n2, mean = mean2, sd = sd2)
df <- data.frame(
value = c(group1, group2),
group = factor(c(rep("Standard Medication", n1), rep("New Medication", n2)))
)
# ---------------------------------------------------------
# 2. LEVENE'S TEST for homogeneity of variance
# ---------------------------------------------------------
levene_result <- leveneTest(value ~ group, data = df)
print("Levene's Test for Homogeneity of Variance:")
## [1] "Levene's Test for Homogeneity of Variance:"
print(levene_result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 11.128 0.001202 **
## 98
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ---------------------------------------------------------
# 3. F-TEST for comparing variances under normality
# ---------------------------------------------------------
f_test_result <- var.test(group1, group2)
print("F-Test for equality of variances:")
## [1] "F-Test for equality of variances:"
print(f_test_result)
##
## F test to compare two variances
##
## data: group1 and group2
## F = 0.29742, num df = 49, denom df = 49, p-value = 4.069e-05
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1687792 0.5241113
## sample estimates:
## ratio of variances
## 0.2974207
# ---------------------------------------------------------
# 4. CHOOSE POOLED vs WELCH'S T-TEST
# ---------------------------------------------------------
alpha <- 0.05
if (levene_result$`Pr(>F)`[1] > alpha) {
message("Variances appear equal → Using pooled t-test")
ttest_result <- t.test(group1, group2, var.equal = TRUE)
} else {
message("Variances are unequal → Using Welch's t-test")
ttest_result <- t.test(group1, group2, var.equal = FALSE)
}
print("T-Test Result:")
## [1] "T-Test Result:"
print(ttest_result)
##
## Welch Two Sample t-test
##
## data: group1 and group2
## t = 0.036157, df = 75.778, p-value = 0.9713
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.278505 4.436714
## sample estimates:
## mean of x mean of y
## 120.2752 120.1961
# ---------------------------------------------------------
# 5. VISUALIZATION
# ---------------------------------------------------------
# Plot 1: Density plots showing spread and center
p1 <- ggplot(df, aes(x = value, fill = group, color = group)) +
geom_density(alpha = 0.25, linewidth = 1.2) +
scale_color_manual(values = c("Standard Medication" = "blue", "New Medication" = "red")) +
scale_fill_manual(values = c("Standard Medication" = "blue", "New Medication" = "red")) +
geom_vline(data = df %>% group_by(group) %>% summarize(mean = mean(value)),
aes(xintercept = mean, color = group),
linetype = "dashed", linewidth = 1.1) +
labs(title = "Density Plot of Two Groups",
x = "Measurement",
y = "Density") +
theme_minimal()
print(p1)
# Plot 2: Data spread with means and variance bars
summary_stats <- df %>%
group_by(group) %>%
summarize(
mean = mean(value),
sd = sd(value)
)
p2 <- ggplot(df, aes(x = group, y = value, color = group)) +
geom_jitter(width = 0.15, alpha = 0.4) +
scale_color_manual(values = c("Standard Medication" = "blue", "New Medication" = "red")) +
geom_point(data = summary_stats, aes(x = group, y = mean),
size = 4, color = "black") +
geom_errorbar(data = summary_stats,
aes(x = group,
ymin = mean - sd,
ymax = mean + sd),
inherit.aes = FALSE,
width = 0.15, linewidth = 1.1) +
labs(title = "Group Distributions With Mean and ±1 SD Variance Bars",
y = "Measurement Value") +
theme_minimal()
print(p2)