if echo = false, then no code will show in output

Packages

Loading required packages:

library(ggplot2)
library(dplyr)
#install.packages("reshape2")
library(scales)
library(car)
library(melt)
library(data.table)
library(reshape2)

Setting global theme

theme_set(theme_bw())

Data

set.seed(123)

groupA <- rnorm(30, mean = 50, sd = 5)
groupB <- rnorm(30, mean = 55, sd = 5)
groupC <- rnorm(30, mean = 70, sd = 5)
groupD <- rnorm(30, mean = 74, sd = 5)

my_data <- data.frame(
  value = c(groupA, groupB, groupC, groupD),
  group = factor(rep(c("A", "B", "C", "D"), each = 30))
)

Assumptions Test

\(H_O:\) Data does not deviate from normal distribution \(H_1:\) Data deviates from normal distribution

shapiro.test(groupA)   # null not rejected/ data follows normality

## 
##  Shapiro-Wilk normality test
## 
## data:  groupA
## W = 0.97894, p-value = 0.7966

shapiro.test(groupB)   # null not rejected/ data follows normality

## 
##  Shapiro-Wilk normality test
## 
## data:  groupB
## W = 0.98662, p-value = 0.9614

shapiro.test(groupC)   # null not rejected/ data follows normality

## 
##  Shapiro-Wilk normality test
## 
## data:  groupC
## W = 0.98085, p-value = 0.8478

shapiro.test(groupD)

## 
##  Shapiro-Wilk normality test
## 
## data:  groupD
## W = 0.97106, p-value = 0.5685

Check variance homogeneity (Levene’s test)

\(H_0:\) Group variance are equal or homogeneous \(H_1:\) At least one group variance is unequal or not homogeneous

car::leveneTest(value ~ group, data = my_data)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  0.4176 0.7407
##       116

ANOVA

\[ H_0: \mu_1=\mu_2=\mu_3 \] \(H_1:\) At least one group means in not equal

anova_model <- aov(value ~ group, data = my_data)
summary(anova_model)

##              Df Sum Sq Mean Sq F value Pr(>F)    
## group         3  11565    3855   190.4 <2e-16 ***
## Residuals   116   2348      20                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Visualization

my_data %>%
  ggplot(aes(x= value, fill = group))+
  geom_histogram(alpha = 1)+
  facet_wrap(vars(group), ncol= 1)

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Post hoc analysis

The following procedure is wrong.

my_data_sub <- my_data %>%
  filter(group %in% c("A", "B"))
t.test(value ~ group, my_data_sub, var.equal= TRUE)

## 
##  Two Sample t-test
## 
## data:  value by group
## t = -5.2098, df = 58, p-value = 2.616e-06
## alternative hypothesis: true difference in means between group A and group B is not equal to 0
## 95 percent confidence interval:
##  -8.481435 -3.772986
## sample estimates:
## mean in group A mean in group B 
##        49.76448        55.89169

my_data_sub <- my_data %>%
  filter(group %in% c("A", "C"))
t.test(value ~ group, my_data_sub, var.equal= TRUE)

## 
##  Two Sample t-test
## 
## data:  value by group
## t = -17.009, df = 58, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group A and group C is not equal to 0
## 95 percent confidence interval:
##  -22.75339 -17.96185
## sample estimates:
## mean in group A mean in group C 
##        49.76448        70.12210

my_data_sub <- my_data %>%
  filter(group %in% c("B", "C"))
t.test(value ~ group, my_data_sub, var.equal= TRUE)

## 
##  Two Sample t-test
## 
## data:  value by group
## t = -12.928, df = 58, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group B and group C is not equal to 0
## 95 percent confidence interval:
##  -16.43380 -12.02702
## sample estimates:
## mean in group B mean in group C 
##        55.89169        70.12210

We have to run post hoc analysis

TukeyHSD(anova_model, "group", conf.level = .95)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = value ~ group, data = my_data)
## 
## $group
##          diff        lwr       upr     p adj
## B-A  6.127210  3.0990917  9.155329 0.0000037
## C-A 20.357621 17.3295020 23.385739 0.0000000
## D-A 23.766074 20.7379554 26.794193 0.0000000
## C-B 14.230410 11.2022915 17.258529 0.0000000
## D-B 17.638864 14.6107449 20.666982 0.0000000
## D-C  3.408453  0.3803346  6.436572 0.0207466

T test

Nazmul

2026-01-26