# Course: EDF 6403 - Quantitative Foundations of Educational Research
# Stats Analysis 7: Factorial ANOVA interaction analysis
# Author: Nguyet Hoang | hoangt@ufl.edu

# Step 0: Load library
library(psych)
library(haven)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(effectsize)
## 
## Attaching package: 'effectsize'
## The following object is masked from 'package:psych':
## 
##     phi
library(moments)
library(scales)
## 
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
## 
##     alpha, rescale
library(nortest)

# Step 1: Read the data
data <- read_sav("SA7_Signaling_Principle.sav")
View(data)
colnames(data)
## [1] "ID"         "Complexity" "Signaling"  "Learning"
# Descriptive statistics for the six groups
describeBy(data$Learning, list(data$Complexity, data$Signaling), mat = TRUE)
##     item group1   group2 vars  n   mean        sd median  trimmed     mad min
## X11    1   High    Heavy    1 10 75.100  7.665942   77.0 75.25000  5.9304  63
## X12    2    Low    Heavy    1 20 82.350 10.800950   85.0 83.50000 10.3782  60
## X13    3   High Moderate    1 20 78.850 10.820424   80.5 79.43750 11.8608  54
## X14    4    Low Moderate    1 40 77.675  9.624721   76.0 77.71875 10.3782  55
## X15    5   High     None    1 10 67.400  8.030497   66.0 67.12500  8.8956  57
## X16    6    Low     None    1 20 79.800  9.758128   81.0 79.93750  8.1543  54
##     max range        skew   kurtosis       se
## X11  86    23 -0.33466677 -1.3951354 2.424184
## X12  96    36 -0.69946873 -0.8040346 2.415166
## X13  94    40 -0.38472246 -0.7007750 2.419520
## X14 100    45 -0.08275037 -0.1181304 1.521802
## X15  80    23  0.27629681 -1.5113277 2.539466
## X16  97    43 -0.45482724  0.5969471 2.181984
# Step 2: Running the model
model = lm(Learning ~ Complexity * Signaling, data = data, contrasts=list(Complexity=contr.sum, Signaling=contr.sum))
anova_model <- Anova(model, type = "III")

# Step 3: Check homogeneity assumption
levene_test_results <- leveneTest(Learning ~ Complexity * Signaling, data = data)
levene_test_results
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   5  0.4955  0.779
##       114
# Step 4: Checking normality
data$residuals <- residuals(model)
residuals <- data$residuals
# 4.1. Use skewness and kurtosis statistics
skew_val <- skew(residuals, type = 2)
kurt_val <- kurtosi(residuals, type = 2)
print(paste("Skewness:", number(skew_val, accuracy = 0.01)))
## [1] "Skewness: -0.38"
print(paste("Kurtosis:", number(kurt_val, accuracy = 0.01)))
## [1] "Kurtosis: 0.03"
# 4.2. A histogram with a normal curve overlayed on top of it
# 4.2.1. Calculate the statistics
m   <- mean(residuals, na.rm = TRUE)
md  <- median(residuals, na.rm = TRUE)
std <- sd(residuals, na.rm = TRUE)
# 4.2.2. Set up the histogram
h <- hist(residuals, plot=FALSE)
plot(h, col="white", border="white", main="",
     xlab="", ylab="", axes=FALSE,
     ylim=c(0, max(h$counts) * 1.2))
grid(nx=NA, ny=NULL, col="gray", lty="dotted")
par(new=TRUE)
hist(residuals, freq=TRUE,
     col = "lightblue", border = "black",
     xlab="Deviations of scores from the cell means (residuals)",
     main="Histogram of the residuals",
     cex.main = 0.9,
     ylim=c(0, max(h$counts) * 1.2))
# 4.2.3. Add the normal curve
scaling_factor <- length(residuals) * diff(h$breaks)[1]
curve(dnorm(x, mean=m, sd=std) * scaling_factor,
      col="darkred", lwd=2, add=TRUE, yaxt="n")
abline(v = m, col = "darkblue", lwd = 2, lty = 2)
# 4.2.4. Add a Legend
legend("topright", legend = c(
  paste("Mean =", sprintf("%.2f", m)),
  paste("SD =", sprintf("%.2f", std)),
  paste("Median =", sprintf("%.2f", md)),
  "Normal Curve"),
  col = c("darkblue", "transparent", "transparent","darkred"),
  lty = c(2, 0, 0, 1), lwd = c(2, 0, 0, 2),
  bty = "n", cex = 0.8)
box(bty = "l")

# 4.3. A QQplot
# 4.3.1. Draw the points
qqnorm(residuals, main = "Q-Q Plot of data residuals",
       xlab = "Theoretical Quantiles (Normal Distribution)",
       ylab = "Observed data residuals",
       pch = 19, col = "steelblue")
# 4.3.2. Add the reference line
qqline(residuals, col = "darkred", lwd = 2)

# 4.4. The Lilliefors-corrected KS test (n = 120 > 50)
lillie_test_result <- lillie.test(residuals)
lillie_test_result
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  residuals
## D = 0.070801, p-value = 0.1478
# Step 5: ANOVA table and effect size:
anova_model
## Anova Table (Type III tests)
## 
## Response: Learning
##                      Sum Sq  Df   F value    Pr(>F)    
## (Intercept)          567153   1 5892.5660 < 2.2e-16 ***
## Complexity              910   1    9.4567  0.002634 ** 
## Signaling               468   2    2.4302  0.092567 .  
## Complexity:Signaling    895   2    4.6498  0.011449 *  
## Residuals             10972 114                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eta_squared <- eta_squared(anova_model, partial = TRUE)
## Type 3 ANOVAs only give sensible and informative results when covariates
##   are mean-centered and factors are coded with orthogonal contrasts (such
##   as those produced by `contr.sum`, `contr.poly`, or `contr.helmert`, but
##   *not* by the default `contr.treatment`).
eta_squared
## # Effect Size for ANOVA (Type III)
## 
## Parameter            | Eta2 (partial) |       95% CI
## ----------------------------------------------------
## Complexity           |           0.08 | [0.02, 1.00]
## Signaling            |           0.04 | [0.00, 1.00]
## Complexity:Signaling |           0.08 | [0.01, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].
omega_squared <- omega_squared(anova_model, partial = TRUE)
## Type 3 ANOVAs only give sensible and informative results when covariates
##   are mean-centered and factors are coded with orthogonal contrasts (such
##   as those produced by `contr.sum`, `contr.poly`, or `contr.helmert`, but
##   *not* by the default `contr.treatment`).
omega_squared
## # Effect Size for ANOVA (Type III)
## 
## Parameter            | Omega2 (partial) |       95% CI
## ------------------------------------------------------
## Complexity           |             0.07 | [0.01, 1.00]
## Signaling            |             0.02 | [0.00, 1.00]
## Complexity:Signaling |             0.06 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].
# Step 6: Post hoc test
emmeans_model <- emmeans(model, ~ Complexity * Signaling)
# 6.1. Test for simple main effects
joint_tests(emmeans_model, by = "Complexity")
## Complexity = High:
##  model term df1 df2 F.ratio p.value
##  Signaling    2 114   4.541  0.0127
## 
## Complexity = Low:
##  model term df1 df2 F.ratio p.value
##  Signaling    2 114   1.539  0.2191
joint_tests(emmeans_model, by = "Signaling")
## Signaling = Heavy:
##  model term df1 df2 F.ratio p.value
##  Complexity   1 114   3.641  0.0589
## 
## Signaling = Moderate:
##  model term df1 df2 F.ratio p.value
##  Complexity   1 114   0.191  0.6627
## 
## Signaling = None:
##  model term df1 df2 F.ratio p.value
##  Complexity   1 114  10.650  0.0015
# 6.2. Pairwise comparisons with Bonferroni adjustment
bonferroni_result1 <- contrast(emmeans_model, method = "pairwise", adjust = "bonferroni", by="Complexity")
bonferroni_result1
## Complexity = High:
##  contrast         estimate   SE  df t.ratio p.value
##  Heavy - Moderate    -3.75 3.80 114  -0.987  0.9773
##  Heavy - None         7.70 4.39 114   1.755  0.2458
##  Moderate - None     11.45 3.80 114   3.013  0.0096
## 
## Complexity = Low:
##  contrast         estimate   SE  df t.ratio p.value
##  Heavy - Moderate     4.67 2.69 114   1.740  0.2537
##  Heavy - None         2.55 3.10 114   0.822  1.0000
##  Moderate - None     -2.12 2.69 114  -0.791  1.0000
## 
## P value adjustment: bonferroni method for 3 tests
bonferroni_result2 <- contrast(emmeans_model, method = "pairwise", adjust = "bonferroni", by="Signaling")
bonferroni_result2
## Signaling = Heavy:
##  contrast   estimate   SE  df t.ratio p.value
##  High - Low    -7.25 3.80 114  -1.908  0.0589
## 
## Signaling = Moderate:
##  contrast   estimate   SE  df t.ratio p.value
##  High - Low     1.18 2.69 114   0.437  0.6627
## 
## Signaling = None:
##  contrast   estimate   SE  df t.ratio p.value
##  High - Low   -12.40 3.80 114  -3.263  0.0015
# Step 7: Graphs
emm <- emmeans(model, ~ Complexity * Signaling)
emm_df <- as.data.frame(emm)
emm_df
##  Complexity Signaling emmean       SE  df lower.CL upper.CL
##  High       Heavy     75.100 3.102401 114 68.95417 81.24583
##  Low        Heavy     82.350 2.193729 114 78.00424 86.69576
##  High       Moderate  78.850 2.193729 114 74.50424 83.19576
##  Low        Moderate  77.675 1.551200 114 74.60208 80.74792
##  High       None      67.400 3.102401 114 61.25417 73.54583
##  Low        None      79.800 2.193729 114 75.45424 84.14576
## 
## Confidence level used: 0.95
# Complexity on x-axis, emmean on y-axis, lines for each Signaling
ggplot(emm_df, aes(x = Complexity, y = emmean, group = Signaling, color = Signaling)) +
  geom_point(size = 3) +
  geom_line() +
  labs(title = "Interaction Plot: Estimated Marginal Means",
       x = "Complexity",
       y = "Estimated Marginal Means",
       color = "Signaling") +
  theme_minimal()

# Signaling on x-axis, emmean on y-axis, lines for each complexity
ggplot(emm_df, aes(x = Signaling, y = emmean, group = Complexity, color = Complexity)) +
  geom_point(size = 3) +
  geom_line() +
  labs(title = "Interaction Plot: Estimated Marginal Means",
       x = "Signaling",
       y = "Estimated Marginal Means",
       color = "Complexity") +
  theme_minimal()