# Course: EDF 6403 - Quantitative Foundations of Educational Research
# Stats Analysis 7: Factorial ANOVA interaction analysis
# Author: Nguyet Hoang | hoangt@ufl.edu
# Step 0: Load library
library(psych)
library(haven)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(effectsize)
##
## Attaching package: 'effectsize'
## The following object is masked from 'package:psych':
##
## phi
library(moments)
library(scales)
##
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
##
## alpha, rescale
library(nortest)
# Step 1: Read the data
data <- read_sav("SA7_Signaling_Principle.sav")
View(data)
colnames(data)
## [1] "ID" "Complexity" "Signaling" "Learning"
# Descriptive statistics for the six groups
describeBy(data$Learning, list(data$Complexity, data$Signaling), mat = TRUE)
## item group1 group2 vars n mean sd median trimmed mad min
## X11 1 High Heavy 1 10 75.100 7.665942 77.0 75.25000 5.9304 63
## X12 2 Low Heavy 1 20 82.350 10.800950 85.0 83.50000 10.3782 60
## X13 3 High Moderate 1 20 78.850 10.820424 80.5 79.43750 11.8608 54
## X14 4 Low Moderate 1 40 77.675 9.624721 76.0 77.71875 10.3782 55
## X15 5 High None 1 10 67.400 8.030497 66.0 67.12500 8.8956 57
## X16 6 Low None 1 20 79.800 9.758128 81.0 79.93750 8.1543 54
## max range skew kurtosis se
## X11 86 23 -0.33466677 -1.3951354 2.424184
## X12 96 36 -0.69946873 -0.8040346 2.415166
## X13 94 40 -0.38472246 -0.7007750 2.419520
## X14 100 45 -0.08275037 -0.1181304 1.521802
## X15 80 23 0.27629681 -1.5113277 2.539466
## X16 97 43 -0.45482724 0.5969471 2.181984
# Step 2: Running the model
model = lm(Learning ~ Complexity * Signaling, data = data, contrasts=list(Complexity=contr.sum, Signaling=contr.sum))
anova_model <- Anova(model, type = "III")
# Step 3: Check homogeneity assumption
levene_test_results <- leveneTest(Learning ~ Complexity * Signaling, data = data)
levene_test_results
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 0.4955 0.779
## 114
# Step 4: Checking normality
data$residuals <- residuals(model)
residuals <- data$residuals
# 4.1. Use skewness and kurtosis statistics
skew_val <- skew(residuals, type = 2)
kurt_val <- kurtosi(residuals, type = 2)
print(paste("Skewness:", number(skew_val, accuracy = 0.01)))
## [1] "Skewness: -0.38"
print(paste("Kurtosis:", number(kurt_val, accuracy = 0.01)))
## [1] "Kurtosis: 0.03"
# 4.2. A histogram with a normal curve overlayed on top of it
# 4.2.1. Calculate the statistics
m <- mean(residuals, na.rm = TRUE)
md <- median(residuals, na.rm = TRUE)
std <- sd(residuals, na.rm = TRUE)
# 4.2.2. Set up the histogram
h <- hist(residuals, plot=FALSE)
plot(h, col="white", border="white", main="",
xlab="", ylab="", axes=FALSE,
ylim=c(0, max(h$counts) * 1.2))
grid(nx=NA, ny=NULL, col="gray", lty="dotted")
par(new=TRUE)
hist(residuals, freq=TRUE,
col = "lightblue", border = "black",
xlab="Deviations of scores from the cell means (residuals)",
main="Histogram of the residuals",
cex.main = 0.9,
ylim=c(0, max(h$counts) * 1.2))
# 4.2.3. Add the normal curve
scaling_factor <- length(residuals) * diff(h$breaks)[1]
curve(dnorm(x, mean=m, sd=std) * scaling_factor,
col="darkred", lwd=2, add=TRUE, yaxt="n")
abline(v = m, col = "darkblue", lwd = 2, lty = 2)
# 4.2.4. Add a Legend
legend("topright", legend = c(
paste("Mean =", sprintf("%.2f", m)),
paste("SD =", sprintf("%.2f", std)),
paste("Median =", sprintf("%.2f", md)),
"Normal Curve"),
col = c("darkblue", "transparent", "transparent","darkred"),
lty = c(2, 0, 0, 1), lwd = c(2, 0, 0, 2),
bty = "n", cex = 0.8)
box(bty = "l")

# 4.3. A QQplot
# 4.3.1. Draw the points
qqnorm(residuals, main = "Q-Q Plot of data residuals",
xlab = "Theoretical Quantiles (Normal Distribution)",
ylab = "Observed data residuals",
pch = 19, col = "steelblue")
# 4.3.2. Add the reference line
qqline(residuals, col = "darkred", lwd = 2)

# 4.4. The Lilliefors-corrected KS test (n = 120 > 50)
lillie_test_result <- lillie.test(residuals)
lillie_test_result
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: residuals
## D = 0.070801, p-value = 0.1478
# Step 5: ANOVA table and effect size:
anova_model
## Anova Table (Type III tests)
##
## Response: Learning
## Sum Sq Df F value Pr(>F)
## (Intercept) 567153 1 5892.5660 < 2.2e-16 ***
## Complexity 910 1 9.4567 0.002634 **
## Signaling 468 2 2.4302 0.092567 .
## Complexity:Signaling 895 2 4.6498 0.011449 *
## Residuals 10972 114
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eta_squared <- eta_squared(anova_model, partial = TRUE)
## Type 3 ANOVAs only give sensible and informative results when covariates
## are mean-centered and factors are coded with orthogonal contrasts (such
## as those produced by `contr.sum`, `contr.poly`, or `contr.helmert`, but
## *not* by the default `contr.treatment`).
eta_squared
## # Effect Size for ANOVA (Type III)
##
## Parameter | Eta2 (partial) | 95% CI
## ----------------------------------------------------
## Complexity | 0.08 | [0.02, 1.00]
## Signaling | 0.04 | [0.00, 1.00]
## Complexity:Signaling | 0.08 | [0.01, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
omega_squared <- omega_squared(anova_model, partial = TRUE)
## Type 3 ANOVAs only give sensible and informative results when covariates
## are mean-centered and factors are coded with orthogonal contrasts (such
## as those produced by `contr.sum`, `contr.poly`, or `contr.helmert`, but
## *not* by the default `contr.treatment`).
omega_squared
## # Effect Size for ANOVA (Type III)
##
## Parameter | Omega2 (partial) | 95% CI
## ------------------------------------------------------
## Complexity | 0.07 | [0.01, 1.00]
## Signaling | 0.02 | [0.00, 1.00]
## Complexity:Signaling | 0.06 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
# Step 6: Post hoc test
emmeans_model <- emmeans(model, ~ Complexity * Signaling)
# 6.1. Test for simple main effects
joint_tests(emmeans_model, by = "Complexity")
## Complexity = High:
## model term df1 df2 F.ratio p.value
## Signaling 2 114 4.541 0.0127
##
## Complexity = Low:
## model term df1 df2 F.ratio p.value
## Signaling 2 114 1.539 0.2191
joint_tests(emmeans_model, by = "Signaling")
## Signaling = Heavy:
## model term df1 df2 F.ratio p.value
## Complexity 1 114 3.641 0.0589
##
## Signaling = Moderate:
## model term df1 df2 F.ratio p.value
## Complexity 1 114 0.191 0.6627
##
## Signaling = None:
## model term df1 df2 F.ratio p.value
## Complexity 1 114 10.650 0.0015
# 6.2. Pairwise comparisons with Bonferroni adjustment
bonferroni_result1 <- contrast(emmeans_model, method = "pairwise", adjust = "bonferroni", by="Complexity")
bonferroni_result1
## Complexity = High:
## contrast estimate SE df t.ratio p.value
## Heavy - Moderate -3.75 3.80 114 -0.987 0.9773
## Heavy - None 7.70 4.39 114 1.755 0.2458
## Moderate - None 11.45 3.80 114 3.013 0.0096
##
## Complexity = Low:
## contrast estimate SE df t.ratio p.value
## Heavy - Moderate 4.67 2.69 114 1.740 0.2537
## Heavy - None 2.55 3.10 114 0.822 1.0000
## Moderate - None -2.12 2.69 114 -0.791 1.0000
##
## P value adjustment: bonferroni method for 3 tests
bonferroni_result2 <- contrast(emmeans_model, method = "pairwise", adjust = "bonferroni", by="Signaling")
bonferroni_result2
## Signaling = Heavy:
## contrast estimate SE df t.ratio p.value
## High - Low -7.25 3.80 114 -1.908 0.0589
##
## Signaling = Moderate:
## contrast estimate SE df t.ratio p.value
## High - Low 1.18 2.69 114 0.437 0.6627
##
## Signaling = None:
## contrast estimate SE df t.ratio p.value
## High - Low -12.40 3.80 114 -3.263 0.0015
# Step 7: Graphs
emm <- emmeans(model, ~ Complexity * Signaling)
emm_df <- as.data.frame(emm)
emm_df
## Complexity Signaling emmean SE df lower.CL upper.CL
## High Heavy 75.100 3.102401 114 68.95417 81.24583
## Low Heavy 82.350 2.193729 114 78.00424 86.69576
## High Moderate 78.850 2.193729 114 74.50424 83.19576
## Low Moderate 77.675 1.551200 114 74.60208 80.74792
## High None 67.400 3.102401 114 61.25417 73.54583
## Low None 79.800 2.193729 114 75.45424 84.14576
##
## Confidence level used: 0.95
# Complexity on x-axis, emmean on y-axis, lines for each Signaling
ggplot(emm_df, aes(x = Complexity, y = emmean, group = Signaling, color = Signaling)) +
geom_point(size = 3) +
geom_line() +
labs(title = "Interaction Plot: Estimated Marginal Means",
x = "Complexity",
y = "Estimated Marginal Means",
color = "Signaling") +
theme_minimal()

# Signaling on x-axis, emmean on y-axis, lines for each complexity
ggplot(emm_df, aes(x = Signaling, y = emmean, group = Complexity, color = Complexity)) +
geom_point(size = 3) +
geom_line() +
labs(title = "Interaction Plot: Estimated Marginal Means",
x = "Signaling",
y = "Estimated Marginal Means",
color = "Complexity") +
theme_minimal()
