# Course: EDF 6403 - Quantitative Foundations of Educational Research
# Assignment 3: Stats Analysis 4: One-way ANOVA 
# Author: Nguyet Hoang | hoangt@ufl.edu

# Load library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(haven)
library(conflicted)
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package.
library(scales)
library(psych)
library(car)
## Loading required package: carData
library(effectsize)
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(nortest)

# Read the data
SA4_reading <- read_sav("SA4_reading_experiment.sav")
view(SA4_reading)
attr(SA4_reading$Treat_group, "labels")
##               Balanced Literacy                         Phonics 
##                               1                               2 
##                  Whole-language Steven's Magical Reading Method 
##                               3                               4
# Descriptive statistics for the three groups
describeBy(SA4_reading$Reading, SA4_reading$Treat_group)
## 
##  Descriptive statistics by group 
## group: 1
##    vars  n mean    sd median trimmed   mad   min   max range skew kurtosis  se
## X1    1 25 62.7 10.52  62.82   62.15 13.44 46.78 87.62 40.84 0.35    -0.57 2.1
## ------------------------------------------------------------ 
## group: 2
##    vars  n  mean   sd median trimmed   mad   min   max range skew kurtosis   se
## X1    1 25 65.13 9.73  64.68   64.29 10.58 52.41 86.98 34.58 0.68    -0.42 1.95
## ------------------------------------------------------------ 
## group: 3
##    vars  n  mean    sd median trimmed  mad   min   max range  skew kurtosis
## X1    1 25 54.25 10.36  57.04    54.9 9.33 32.86 68.73 35.86 -0.64    -0.76
##      se
## X1 2.07
## ------------------------------------------------------------ 
## group: 4
##    vars  n  mean   sd median trimmed  mad   min  max range skew kurtosis   se
## X1    1 30 69.48 9.07  68.18    69.2 8.14 50.68 89.7 39.02 0.23    -0.39 1.66
# Assess the assumption of homogeneity of variances.
# Use Levene’s test
levene_test_result <- leveneTest(Reading ~ factor(Treat_group), data = SA4_reading)
levene_test_result
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  0.2784 0.8409
##       101
# Assess the assumption of normality for the one-factor ANOVA
# Calculate residuals
anova_model <- aov(Reading ~ factor(Treat_group), data = SA4_reading)
residuals <- resid(anova_model)
view(residuals)
# Use skewness and kurtosis statistics
skew_val <- skew(residuals, type = 2)
kurt_val <- kurtosi(residuals, type = 2)
print(paste("Skewness:", number(skew_val, accuracy = 0.01)))
## [1] "Skewness: 0.14"
print(paste("Kurtosis:", number(kurt_val, accuracy = 0.01)))
## [1] "Kurtosis: -0.28"
# A histogram with a normal curve overlayed on top of it
# Calculate the statistics
m   <- mean(residuals, na.rm = TRUE)
md  <- median(residuals, na.rm = TRUE)
std <- sd(residuals, na.rm = TRUE)
# Set up the histogram
h <- hist(residuals, plot=FALSE)
plot(h, col="white", border="white", main="", 
     xlab="", ylab="", axes=FALSE,
     ylim=c(0, max(h$counts) * 1.2))
grid(nx=NA, ny=NULL, col="gray", lty="dotted")
par(new=TRUE)
hist(residuals, freq=TRUE, 
     col = "lightblue", border = "black",
     xlab="SA3 residuals", 
     main="Histogram of the residuals",
     cex.main = 0.9,
     ylim=c(0, max(h$counts) * 1.2))
# Add the normal curve
scaling_factor <- length(residuals) * diff(h$breaks)[1]
curve(dnorm(x, mean=m, sd=std) * scaling_factor, 
      col="darkred", lwd=2, add=TRUE, yaxt="n")
abline(v = m, col = "darkblue", lwd = 2, lty = 2)
# Add a Legend
legend("topright", legend = c(
  paste("Mean =", sprintf("%.2f", m)),
  paste("SD =", sprintf("%.2f", std)),
  paste("Median =", sprintf("%.2f", md)),
  "Normal Curve"),
  col = c("darkblue", "transparent", "transparent","darkred"),
  lty = c(2, 0, 0, 1), lwd = c(2, 0, 0, 2),
  bty = "n", cex = 0.8)  
box(bty = "l")

# A QQplot
#Draw the points
qqnorm(residuals, main = "Q-Q Plot of data residuals",
       xlab = "Theoretical Quantiles (Normal Distribution)", 
       ylab = "Observed data residuals",
       pch = 19, col = "steelblue")
#Add the reference line
qqline(residuals, col = "darkred", lwd = 2) 

# The Lilliefors-corrected KS test (n = 105) 
lillie_test_result <- lillie.test(residuals)
lillie_test_result
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  residuals
## D = 0.052679, p-value = 0.6736
# One-factor ANOVA model: Tests of Between-Subjects Effects
summary(anova_model)
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## factor(Treat_group)   3   3286  1095.3   11.19 2.12e-06 ***
## Residuals           101   9890    97.9                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate the effect sizes (omega squared)
omega_squared <- omega_squared(anova_model)
## For one-way between subjects designs, partial omega squared is
##   equivalent to omega squared. Returning omega squared.
omega_squared
## # Effect Size for ANOVA
## 
## Parameter           | Omega2 |       95% CI
## -------------------------------------------
## factor(Treat_group) |   0.23 | [0.10, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].
# Holm-Bonferroni post hoc test
emmeans_model <- emmeans(anova_model, ~ Treat_group)
holm_result <- contrast(emmeans_model, method = "pairwise", adjust = "holm")
final_result <- as.data.frame(summary(holm_result, infer = c(TRUE, TRUE)))
final_result
##  contrast                      estimate       SE  df   lower.CL  upper.CL
##  Treat_group1 - Treat_group2  -2.428418 2.798899 101  -9.960858  5.104022
##  Treat_group1 - Treat_group3   8.453494 2.798899 101   0.921054 15.985935
##  Treat_group1 - Treat_group4  -6.778622 2.679741 101 -13.990385  0.433140
##  Treat_group2 - Treat_group3  10.881912 2.798899 101   3.349472 18.414353
##  Treat_group2 - Treat_group4  -4.350205 2.679741 101 -11.561967  2.861558
##  Treat_group3 - Treat_group4 -15.232117 2.679741 101 -22.443879 -8.020354
##  t.ratio p.value
##   -0.868  0.3877
##    3.020  0.0128
##   -2.530  0.0389
##    3.888  0.0009
##   -1.623  0.2153
##   -5.684 <0.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 6 estimates 
## P value adjustment: holm method for 6 tests