# Course: EDF 6403 - Quantitative Foundations of Educational Research
# Stats Analysis 8: Factorial ANOVA main effects analysis
# Author: Nguyet Hoang | hoangt@ufl.edu
# Step 0: Load library
library(psych)
library(haven)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(effectsize)
##
## Attaching package: 'effectsize'
## The following object is masked from 'package:psych':
##
## phi
library(moments)
library(scales)
##
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
##
## alpha, rescale
library(nortest)
# Step 1: Read the data
data <- read_sav("SA8_Online_and_studying.sav")
View(data)
colnames(data)
## [1] "ID" "Course_type" "Stdy_env" "Score"
attr(data$Course_type, "label")
## In-person Online Hybrid
## 1 2 3
attr(data$Stdy_env, "label")
## Quiet Noisy Library
## 1 2 3
# Labeling the data
data$Course_type <- factor(data$Course_type, levels = c(1, 2, 3), labels = c("In-person", "Online", "Hybrid"))
data$Stdy_env <- factor(data$Stdy_env, levels = c(1, 2, 3), labels = c("Quiet", "Noisy", "Library"))
# Descriptive statistics for the four groups
describeBy(data$Score, list(data$Course_type, data$Stdy_env), mat = TRUE)
## item group1 group2 vars n mean sd median trimmed mad
## X11 1 In-person Quiet 1 16 77.87500 5.536244 78.00 77.92857 6.6717
## X12 2 Online Quiet 1 24 76.33333 7.539269 76.00 76.50000 7.4130
## X13 3 Hybrid Quiet 1 16 76.12500 6.365270 76.00 75.64286 5.1891
## X14 4 In-person Noisy 1 32 74.65625 5.615987 75.25 75.01923 5.9304
## X15 5 Online Noisy 1 48 75.27083 5.869103 75.25 75.27500 5.9304
## X16 6 Hybrid Noisy 1 32 76.18750 5.645081 75.25 76.09615 5.1891
## X17 7 In-person Library 1 16 76.50000 6.099180 77.50 76.14286 6.6717
## X18 8 Online Library 1 24 77.08333 5.740146 77.00 77.25000 5.1891
## X19 9 Hybrid Library 1 16 79.75000 6.212890 81.00 80.14286 7.4130
## min max range skew kurtosis se
## X11 69.00 86.00 17 -0.09192076 -1.41098432 1.3840611
## X12 61.00 89.00 28 -0.15850677 -0.84597835 1.5389469
## X13 67.00 92.00 25 0.77405686 0.17929056 1.5913176
## X14 61.25 83.25 22 -0.52377389 -0.65393990 0.9927757
## X15 60.25 89.25 29 -0.02021683 -0.07810387 0.8471320
## X16 65.25 87.25 22 0.19964028 -0.72550345 0.9979187
## X17 68.00 90.00 22 0.33468934 -0.82263484 1.5247951
## X18 63.00 87.00 24 -0.31227306 -0.34035638 1.1717024
## X19 65.00 89.00 24 -0.69310504 -0.25381418 1.5532225
# Step 2: Running the model
model = lm(Score ~ Course_type * Stdy_env, data = data, contrasts=list(Course_type=contr.sum, Stdy_env=contr.sum))
anova_model <- Anova(model, type = "III")
# Step 3: Check homogeneity assumption
levene_test_results <- leveneTest(Score ~ Course_type * Stdy_env, data = data)
levene_test_results
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 8 0.417 0.9101
## 215
# Step 4: Checking normality
data$residuals <- residuals(model)
residuals <- data$residuals
# 4.1. Use skewness and kurtosis statistics
skew_val <- skew(residuals, type = 2)
kurt_val <- kurtosi(residuals, type = 2)
print(paste("Skewness:", number(skew_val, accuracy = 0.01)))
## [1] "Skewness: -0.08"
print(paste("Kurtosis:", number(kurt_val, accuracy = 0.01)))
## [1] "Kurtosis: -0.17"
# 4.2. A histogram with a normal curve overlayed on top of it
# 4.2.1. Calculate the statistics
m <- mean(residuals, na.rm = TRUE)
md <- median(residuals, na.rm = TRUE)
std <- sd(residuals, na.rm = TRUE)
# 4.2.2. Set up the histogram
h <- hist(residuals, plot=FALSE)
plot(h, col="white", border="white", main="",
xlab="", ylab="", axes=FALSE,
ylim=c(0, max(h$counts) * 1.2))
grid(nx=NA, ny=NULL, col="gray", lty="dotted")
par(new=TRUE)
hist(residuals, freq=TRUE,
col = "lightblue", border = "black",
xlab="SA3 residuals",
main="Histogram of the residuals",
cex.main = 0.9,
ylim=c(0, max(h$counts) * 1.2))
# 4.2.3. Add the normal curve
scaling_factor <- length(residuals) * diff(h$breaks)[1]
curve(dnorm(x, mean=m, sd=std) * scaling_factor,
col="darkred", lwd=2, add=TRUE, yaxt="n")
abline(v = m, col = "darkblue", lwd = 2, lty = 2)
# 4.2.4. Add a Legend
legend("topright", legend = c(
paste("Mean =", sprintf("%.2f", m)),
paste("SD =", sprintf("%.2f", std)),
paste("Median =", sprintf("%.2f", md)),
"Normal Curve"),
col = c("darkblue", "transparent", "transparent","darkred"),
lty = c(2, 0, 0, 1), lwd = c(2, 0, 0, 2),
bty = "n", cex = 0.8)
box(bty = "l")

# 4.3. A QQplot
# 4.3.1. Draw the points
qqnorm(residuals, main = "Q-Q Plot of data residuals",
xlab = "Theoretical Quantiles (Normal Distribution)",
ylab = "Observed data residuals",
pch = 19, col = "steelblue")
# 4.3.2. Add the reference line
qqline(residuals, col = "darkred", lwd = 2)

# 4.4. The Lilliefors-corrected KS test (n = 224 > 50)
lillie_test_result <- lillie.test(residuals)
lillie_test_result
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: residuals
## D = 0.031776, p-value = 0.8407
# Step 5: ANOVA table and effect size:
anova_model
## Anova Table (Type III tests)
##
## Response: Score
## Sum Sq Df F value Pr(>F)
## (Intercept) 1141916 1 31258.5139 < 2e-16 ***
## Course_type 48 2 0.6620 0.51686
## Stdy_env 223 2 3.0552 0.04916 *
## Course_type:Stdy_env 113 4 0.7708 0.54530
## Residuals 7854 215
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eta_squared <- eta_squared(anova_model, partial = TRUE)
## Type 3 ANOVAs only give sensible and informative results when covariates
## are mean-centered and factors are coded with orthogonal contrasts (such
## as those produced by `contr.sum`, `contr.poly`, or `contr.helmert`, but
## *not* by the default `contr.treatment`).
eta_squared
## # Effect Size for ANOVA (Type III)
##
## Parameter | Eta2 (partial) | 95% CI
## ----------------------------------------------------
## Course_type | 6.12e-03 | [0.00, 1.00]
## Stdy_env | 0.03 | [0.00, 1.00]
## Course_type:Stdy_env | 0.01 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
omega_squared <- omega_squared(anova_model, partial = TRUE)
## Type 3 ANOVAs only give sensible and informative results when covariates
## are mean-centered and factors are coded with orthogonal contrasts (such
## as those produced by `contr.sum`, `contr.poly`, or `contr.helmert`, but
## *not* by the default `contr.treatment`).
omega_squared
## # Effect Size for ANOVA (Type III)
##
## Parameter | Omega2 (partial) | 95% CI
## ------------------------------------------------------
## Course_type | 0.00 | [0.00, 1.00]
## Stdy_env | 0.02 | [0.00, 1.00]
## Course_type:Stdy_env | 0.00 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
# Step 6: the following main effects are significant, but the interaction is not.
# Since only study environment is significant, we will run post hoc tests for study environment only.
emmeans_model_SE <- emmeans(model, ~ Stdy_env, weights="cells")
## NOTE: Results may be misleading due to involvement in interactions
bonferroni_result_SE <- contrast(emmeans_model_SE, method = "pairwise", adjust = "Bonferroni")
final_result_SE <- as.data.frame(summary(bonferroni_result_SE, infer = c(TRUE, TRUE)))
final_result_SE
## contrast estimate SE df lower.CL upper.CL t.ratio p.value
## Quiet - Noisy 1.3571429 0.9892008 215 -1.029658 3.743944 1.372 0.5145
## Quiet - Library -0.9642857 1.1422307 215 -3.720326 1.791754 -0.844 1.0000
## Noisy - Library -2.3214286 0.9892008 215 -4.708229 0.065372 -2.347 0.0595
##
## Results are averaged over the levels of: Course_type
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 3 estimates
## P value adjustment: bonferroni method for 3 tests
# Step 7: Graphs
emm <- emmeans(model, ~ Course_type * Stdy_env)
emm_df <- as.data.frame(emm)
emm_df
## Course_type Stdy_env emmean SE df lower.CL upper.CL
## In-person Quiet 77.87500 1.5110292 215 74.89667 80.85333
## Online Quiet 76.33333 1.2337502 215 73.90154 78.76513
## Hybrid Quiet 76.12500 1.5110292 215 73.14667 79.10333
## In-person Noisy 74.65625 1.0684590 215 72.55025 76.76225
## Online Noisy 75.27083 0.8723931 215 73.55129 76.99037
## Hybrid Noisy 76.18750 1.0684590 215 74.08150 78.29350
## In-person Library 76.50000 1.5110292 215 73.52167 79.47833
## Online Library 77.08333 1.2337502 215 74.65154 79.51513
## Hybrid Library 79.75000 1.5110292 215 76.77167 82.72833
##
## Confidence level used: 0.95
# Course type on x-axis, emmean on y-axis, lines for each Study environment
ggplot(emm_df, aes(x = Course_type, y = emmean, group = Stdy_env, color = Stdy_env)) +
geom_point(size = 3) +
geom_line() +
labs(title = "Interaction Plot: Estimated Marginal Means",
x = "Course type",
y = "Estimated Marginal Means",
color = "Study environment") +
theme_minimal()

# Study environment on x-axis, emmean on y-axis, lines for each course type
ggplot(emm_df, aes(x = Stdy_env, y = emmean, group = Course_type, color = Course_type)) +
geom_point(size = 3) +
geom_line() +
labs(title = "Interaction Plot: Estimated Marginal Means",
x = "Study environment",
y = "Estimated Marginal Means",
color = "Course type") +
theme_minimal()
