# Course: EDF 6403 - Quantitative Foundations of Educational Research
# Stats Analysis 5: dependent t-test
# Author: Nguyet Hoang | hoangt@ufl.edu
# Load library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(haven)
library(conflicted)
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package.
library(scales)
library(psych)
library(car)
## Loading required package: carData
library(effectsize)
# Read the data
data <- read_sav("SA5_career_intervention.sav")
view(data)
new_data <- data %>% mutate(difference = data$Posttest - data$Pretest)
head(new_data)
## # A tibble: 6 × 4
## ID Pretest Posttest difference
## <dbl> <dbl> <dbl> <dbl>
## 1 1 2 4 2
## 2 2 2 4 2
## 3 3 3 10 7
## 4 4 2 7 5
## 5 5 2 1 -1
## 6 6 5 7 2
difference <- new_data$difference
# Use skewness and kurtosis statistics
skew_val <- skew(difference, type = 2)
kurt_val <- kurtosi(difference, type = 2)
print(paste("Skewness:", number(skew_val, accuracy = 0.01)))
## [1] "Skewness: 0.47"
print(paste("Kurtosis:", number(kurt_val, accuracy = 0.01)))
## [1] "Kurtosis: 0.02"
# A histogram with a normal curve overlayed on top of it
# Calculate the statistics
m <- mean(difference, na.rm = TRUE)
md <- median(difference, na.rm = TRUE)
std <- sd(difference, na.rm = TRUE)
# Set up the histogram
h <- hist(difference, plot=FALSE)
plot(h, col="white", border="white", main="",
xlab="", ylab="", axes=FALSE,
ylim=c(0, max(h$counts) * 1.2))
grid(nx=NA, ny=NULL, col="gray", lty="dotted")
par(new=TRUE)
hist(difference, freq=TRUE,
col = "lightblue", border = "black",
xlab="SA5 career intervention difference scores",
main="Histogram of the difference scores",
cex.main = 0.9,
ylim=c(0, max(h$counts) * 1.2))
# Add the normal curve
scaling_factor <- length(difference) * diff(h$breaks)[1]
curve(dnorm(x, mean=m, sd=std) * scaling_factor,
col="darkred", lwd=2, add=TRUE, yaxt="n")
abline(v = m, col = "darkblue", lwd = 2, lty = 2)
# Add a Legend
legend("topright", legend = c(
paste("Mean =", sprintf("%.2f", m)),
paste("SD =", sprintf("%.2f", std)),
paste("Median =", sprintf("%.2f", md)),
"Normal Curve"),
col = c("darkblue", "transparent", "transparent","darkred"),
lty = c(2, 0, 0, 1), lwd = c(2, 0, 0, 2),
bty = "n", cex = 0.8)
box(bty = "l")

# A QQplot
#Draw the points
qqnorm(difference, main = "Q-Q Plot of difference scores",
xlab = "Theoretical Quantiles (Normal Distribution)",
ylab = "Observed difference scores",
pch = 19, col = "steelblue")
#Add the reference line
qqline(difference, col = "darkred", lwd = 2)

# Run the Shapiro-Wilk test (since the sample size is 20 < 50)
shapiro.test(difference)
##
## Shapiro-Wilk normality test
##
## data: difference
## W = 0.94845, p-value = 0.3441
# Paired sample t-test
pre_post_ttest <- t.test(data$Pretest, data$Posttest, paired = TRUE, alternative = "two.sided")
pre_post_ttest
##
## Paired t-test
##
## data: data$Pretest and data$Posttest
## t = -5.7235, df = 19, p-value = 1.623e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -4.028787 -1.871213
## sample estimates:
## mean difference
## -2.95
# Calculate Cohen's d for effect size
cohens_d <- m / std
print(paste("Cohen's d:", number(cohens_d, accuracy = 0.01)))
## [1] "Cohen's d: 1.28"
# Report students that decreased or had no change in career understanding from pretest to posttest
students_decreased_or_no_change <- new_data %>% filter(difference <= 0)
students_decreased_or_no_change
## # A tibble: 3 × 4
## ID Pretest Posttest difference
## <dbl> <dbl> <dbl> <dbl>
## 1 5 2 1 -1
## 2 9 3 3 0
## 3 14 5 5 0
# Report students' posttest scores - descriptive statistics
posttest_descriptives <- describe(data$Posttest)
posttest_descriptives
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 20 5.2 2.12 5 5.12 1.48 1 10 9 0.35 -0.37 0.47
# Report students that increased in career understanding from pretest to posttest, arrange from high to low
students_increased <- new_data %>% filter(difference > 0) %>% arrange(desc(difference))
students_increased
## # A tibble: 17 × 4
## ID Pretest Posttest difference
## <dbl> <dbl> <dbl> <dbl>
## 1 19 0 8 8
## 2 3 3 10 7
## 3 4 2 7 5
## 4 12 3 8 5
## 5 17 2 7 5
## 6 10 1 5 4
## 7 16 2 6 4
## 8 20 1 5 4
## 9 18 2 5 3
## 10 1 2 4 2
## 11 2 2 4 2
## 12 6 5 7 2
## 13 7 1 3 2
## 14 11 2 4 2
## 15 13 2 4 2
## 16 15 2 4 2
## 17 8 3 4 1
# Students that increased in career understanding from pretest to posttest, at least 3 points
students_increased_at_least_3 <- new_data %>% filter(difference >= 3) %>% arrange(desc(difference))
students_increased_at_least_3
## # A tibble: 9 × 4
## ID Pretest Posttest difference
## <dbl> <dbl> <dbl> <dbl>
## 1 19 0 8 8
## 2 3 3 10 7
## 3 4 2 7 5
## 4 12 3 8 5
## 5 17 2 7 5
## 6 10 1 5 4
## 7 16 2 6 4
## 8 20 1 5 4
## 9 18 2 5 3