# Course: EDF 6403 - Quantitative Foundations of Educational Research
# Assignment 3: Stats Analysis 4: One-way ANOVA
# Author: Nguyet Hoang | hoangt@ufl.edu
# Load library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(haven)
library(conflicted)
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package.
library(scales)
library(psych)
library(car)
## Loading required package: carData
library(effectsize)
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(nortest)
# Read the data
SA4_reading <- read_sav("SA4_reading_experiment.sav")
view(SA4_reading)
attr(SA4_reading$Treat_group, "labels")
## Balanced Literacy Phonics
## 1 2
## Whole-language Steven's Magical Reading Method
## 3 4
# Descriptive statistics for the three groups
describeBy(SA4_reading$Reading, SA4_reading$Treat_group)
##
## Descriptive statistics by group
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 25 62.7 10.52 62.82 62.15 13.44 46.78 87.62 40.84 0.35 -0.57 2.1
## ------------------------------------------------------------
## group: 2
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 25 65.13 9.73 64.68 64.29 10.58 52.41 86.98 34.58 0.68 -0.42 1.95
## ------------------------------------------------------------
## group: 3
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 25 54.25 10.36 57.04 54.9 9.33 32.86 68.73 35.86 -0.64 -0.76
## se
## X1 2.07
## ------------------------------------------------------------
## group: 4
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 30 69.48 9.07 68.18 69.2 8.14 50.68 89.7 39.02 0.23 -0.39 1.66
# Assess the assumption of homogeneity of variances.
# Use Levene’s test
levene_test_result <- leveneTest(Reading ~ factor(Treat_group), data = SA4_reading)
levene_test_result
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.2784 0.8409
## 101
# Assess the assumption of normality for the one-factor ANOVA
# Calculate residuals
anova_model <- aov(Reading ~ factor(Treat_group), data = SA4_reading)
residuals <- resid(anova_model)
view(residuals)
# Use skewness and kurtosis statistics
skew_val <- skew(residuals, type = 2)
kurt_val <- kurtosi(residuals, type = 2)
print(paste("Skewness:", number(skew_val, accuracy = 0.01)))
## [1] "Skewness: 0.14"
print(paste("Kurtosis:", number(kurt_val, accuracy = 0.01)))
## [1] "Kurtosis: -0.28"
# A histogram with a normal curve overlayed on top of it
# Calculate the statistics
m <- mean(residuals, na.rm = TRUE)
md <- median(residuals, na.rm = TRUE)
std <- sd(residuals, na.rm = TRUE)
# Set up the histogram
h <- hist(residuals, plot=FALSE)
plot(h, col="white", border="white", main="",
xlab="", ylab="", axes=FALSE,
ylim=c(0, max(h$counts) * 1.2))
grid(nx=NA, ny=NULL, col="gray", lty="dotted")
par(new=TRUE)
hist(residuals, freq=TRUE,
col = "lightblue", border = "black",
xlab="SA3 residuals",
main="Histogram of the residuals",
cex.main = 0.9,
ylim=c(0, max(h$counts) * 1.2))
# Add the normal curve
scaling_factor <- length(residuals) * diff(h$breaks)[1]
curve(dnorm(x, mean=m, sd=std) * scaling_factor,
col="darkred", lwd=2, add=TRUE, yaxt="n")
abline(v = m, col = "darkblue", lwd = 2, lty = 2)
# Add a Legend
legend("topright", legend = c(
paste("Mean =", sprintf("%.2f", m)),
paste("SD =", sprintf("%.2f", std)),
paste("Median =", sprintf("%.2f", md)),
"Normal Curve"),
col = c("darkblue", "transparent", "transparent","darkred"),
lty = c(2, 0, 0, 1), lwd = c(2, 0, 0, 2),
bty = "n", cex = 0.8)
box(bty = "l")

# A QQplot
#Draw the points
qqnorm(residuals, main = "Q-Q Plot of data residuals",
xlab = "Theoretical Quantiles (Normal Distribution)",
ylab = "Observed data residuals",
pch = 19, col = "steelblue")
#Add the reference line
qqline(residuals, col = "darkred", lwd = 2)

# The Lilliefors-corrected KS test (n = 105)
lillie_test_result <- lillie.test(residuals)
lillie_test_result
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: residuals
## D = 0.052679, p-value = 0.6736
# One-factor ANOVA model: Tests of Between-Subjects Effects
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(Treat_group) 3 3286 1095.3 11.19 2.12e-06 ***
## Residuals 101 9890 97.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate the effect sizes (omega squared)
omega_squared <- omega_squared(anova_model)
## For one-way between subjects designs, partial omega squared is
## equivalent to omega squared. Returning omega squared.
omega_squared
## # Effect Size for ANOVA
##
## Parameter | Omega2 | 95% CI
## -------------------------------------------
## factor(Treat_group) | 0.23 | [0.10, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
# Holm-Bonferroni post hoc test
emmeans_model <- emmeans(anova_model, ~ Treat_group)
holm_result <- contrast(emmeans_model, method = "pairwise", adjust = "holm")
final_result <- as.data.frame(summary(holm_result, infer = c(TRUE, TRUE)))
final_result
## contrast estimate SE df lower.CL upper.CL
## Treat_group1 - Treat_group2 -2.428418 2.798899 101 -9.960858 5.104022
## Treat_group1 - Treat_group3 8.453494 2.798899 101 0.921054 15.985935
## Treat_group1 - Treat_group4 -6.778622 2.679741 101 -13.990385 0.433140
## Treat_group2 - Treat_group3 10.881912 2.798899 101 3.349472 18.414353
## Treat_group2 - Treat_group4 -4.350205 2.679741 101 -11.561967 2.861558
## Treat_group3 - Treat_group4 -15.232117 2.679741 101 -22.443879 -8.020354
## t.ratio p.value
## -0.868 0.3877
## 3.020 0.0128
## -2.530 0.0389
## 3.888 0.0009
## -1.623 0.2153
## -5.684 <0.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 6 estimates
## P value adjustment: holm method for 6 tests