The Kruskal-Wallis test is a non-parametric alternative to ANOVA. It tests the null hypothesis that the medians of multiple independent groups are equal. This test is useful when the assumptions of ANOVA (normality and equal variances) are violated.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
The dataset consists of percentages from seven different groups, as described in the transcript
dat <- data.frame(
Judge = rep(c("Judge_A", "Judge_B", "Judge_C", "Judge_D", "Judge_E", "Judge_F", "Judge_Spock"), each = 5),
Proportion = c(
6.4, 8.7, 13.3, 17.7, 17.7, # Judge_A
24.5, 30.1, 35.6, 40.2, 45.0, # Judge_B
20.1, 25.3, 27.7, 29.4, 31.2, # Judge_C
15.8, 18.3, 21.9, 22.5, 26.0, # Judge_D
10.0, 12.1, 13.7, 15.5, 16.9, # Judge_E
7.8, 8.5, 9.3, 10.2, 11.0, # Judge_F
48.9, 50.1, 55.3, 60.4, 64.8 # Judge_S
)
)
# Display first few rows
head(dat)
## Judge Proportion
## 1 Judge_A 6.4
## 2 Judge_A 8.7
## 3 Judge_A 13.3
## 4 Judge_A 17.7
## 5 Judge_A 17.7
## 6 Judge_B 24.5
To perform the Kruskal-Wallis test, the data is ranked across all groups.
# Rank the Proportions
dat <- dat %>%
mutate(Rank = rank(Proportion))
# Display ranked data
head(dat)
## Judge Proportion Rank
## 1 Judge_A 6.4 1.0
## 2 Judge_A 8.7 4.0
## 3 Judge_A 13.3 10.0
## 4 Judge_A 17.7 15.5
## 5 Judge_A 17.7 15.5
## 6 Judge_B 24.5 21.0
The average rank for each group is calculated.
# Calculate average rank for each group
group_ranks <- dat %>%
group_by(Judge) %>%
summarise(Avg_Rank = mean(Rank), .groups = "drop")
# Display group-wise ranks
group_ranks
## # A tibble: 7 × 2
## Judge Avg_Rank
## <chr> <dbl>
## 1 Judge_A 9.2
## 2 Judge_B 26.8
## 3 Judge_C 23.2
## 4 Judge_D 18.4
## 5 Judge_E 10.4
## 6 Judge_F 5
## 7 Judge_Spock 33
The kruskal.test() function is used to perform the test.
# Perform the Kruskal-Wallis test
kruskal_result <- kruskal.test(Proportion ~ Judge, data = dat)
# Display the test results
kruskal_result
##
## Kruskal-Wallis rank sum test
##
## data: Proportion by Judge
## Kruskal-Wallis chi-squared = 30.187, df = 6, p-value = 3.622e-05
The test statistic H is compared to a chi-squared distribution with k-1 degrees of freedom (where k is the number of groups).
# Interpret results
if (kruskal_result$p.value < 0.05) {
cat("Reject the null hypothesis: Significant differences exist between the group medians.\n")
} else {
cat("Fail to reject the null hypothesis: No significant differences between the group medians.\n")
}
## Reject the null hypothesis: Significant differences exist between the group medians.