# Course: EDF 6403 - Quantitative Foundations of Educational Research
# Stats Analysis 5: dependent t-test
# Author: Nguyet Hoang | hoangt@ufl.edu

# Load library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(haven)
library(conflicted)
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package.
library(scales)
library(psych)
library(car)
## Loading required package: carData
library(effectsize)


# Read the data
data <- read_sav("SA5_career_intervention.sav")
view(data)
new_data <- data %>% mutate(difference = data$Posttest - data$Pretest)
head(new_data)
## # A tibble: 6 × 4
##      ID Pretest Posttest difference
##   <dbl>   <dbl>    <dbl>      <dbl>
## 1     1       2        4          2
## 2     2       2        4          2
## 3     3       3       10          7
## 4     4       2        7          5
## 5     5       2        1         -1
## 6     6       5        7          2
difference <- new_data$difference

# Use skewness and kurtosis statistics
skew_val <- skew(difference, type = 2)
kurt_val <- kurtosi(difference, type = 2)
print(paste("Skewness:", number(skew_val, accuracy = 0.01)))
## [1] "Skewness: 0.47"
print(paste("Kurtosis:", number(kurt_val, accuracy = 0.01)))
## [1] "Kurtosis: 0.02"
# A histogram with a normal curve overlayed on top of it
# Calculate the statistics
m   <- mean(difference, na.rm = TRUE)
md  <- median(difference, na.rm = TRUE)
std <- sd(difference, na.rm = TRUE)
# Set up the histogram
h <- hist(difference, plot=FALSE)
plot(h, col="white", border="white", main="", 
     xlab="", ylab="", axes=FALSE,
     ylim=c(0, max(h$counts) * 1.2))
grid(nx=NA, ny=NULL, col="gray", lty="dotted")
par(new=TRUE)
hist(difference, freq=TRUE, 
     col = "lightblue", border = "black",
     xlab="SA5 career intervention difference scores", 
     main="Histogram of the difference scores",
     cex.main = 0.9,
     ylim=c(0, max(h$counts) * 1.2))
# Add the normal curve
scaling_factor <- length(difference) * diff(h$breaks)[1]
curve(dnorm(x, mean=m, sd=std) * scaling_factor, 
      col="darkred", lwd=2, add=TRUE, yaxt="n")
abline(v = m, col = "darkblue", lwd = 2, lty = 2)
# Add a Legend
legend("topright", legend = c(
  paste("Mean =", sprintf("%.2f", m)),
  paste("SD =", sprintf("%.2f", std)),
  paste("Median =", sprintf("%.2f", md)),
  "Normal Curve"),
  col = c("darkblue", "transparent", "transparent","darkred"),
  lty = c(2, 0, 0, 1), lwd = c(2, 0, 0, 2),
  bty = "n", cex = 0.8)  
box(bty = "l")

# A QQplot
#Draw the points
qqnorm(difference, main = "Q-Q Plot of difference scores",
       xlab = "Theoretical Quantiles (Normal Distribution)", 
       ylab = "Observed difference scores",
       pch = 19, col = "steelblue")
#Add the reference line
qqline(difference, col = "darkred", lwd = 2) 

# Run the Shapiro-Wilk test (since the sample size is 20 < 50)
shapiro.test(difference)
## 
##  Shapiro-Wilk normality test
## 
## data:  difference
## W = 0.94845, p-value = 0.3441
# Paired sample t-test
pre_post_ttest <- t.test(data$Pretest, data$Posttest, paired = TRUE, alternative = "two.sided")
pre_post_ttest
## 
##  Paired t-test
## 
## data:  data$Pretest and data$Posttest
## t = -5.7235, df = 19, p-value = 1.623e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -4.028787 -1.871213
## sample estimates:
## mean difference 
##           -2.95
# Calculate Cohen's d for effect size
cohens_d <- m / std
print(paste("Cohen's d:", number(cohens_d, accuracy = 0.01)))
## [1] "Cohen's d: 1.28"
# Report students that decreased or had no change in career understanding from pretest to posttest
students_decreased_or_no_change <- new_data %>% filter(difference <= 0)
students_decreased_or_no_change
## # A tibble: 3 × 4
##      ID Pretest Posttest difference
##   <dbl>   <dbl>    <dbl>      <dbl>
## 1     5       2        1         -1
## 2     9       3        3          0
## 3    14       5        5          0
# Report students' posttest scores - descriptive statistics
posttest_descriptives <- describe(data$Posttest)
posttest_descriptives
##    vars  n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 20  5.2 2.12      5    5.12 1.48   1  10     9 0.35    -0.37 0.47
# Report students that increased in career understanding from pretest to posttest, arrange from high to low
students_increased <- new_data %>% filter(difference > 0) %>% arrange(desc(difference))
students_increased
## # A tibble: 17 × 4
##       ID Pretest Posttest difference
##    <dbl>   <dbl>    <dbl>      <dbl>
##  1    19       0        8          8
##  2     3       3       10          7
##  3     4       2        7          5
##  4    12       3        8          5
##  5    17       2        7          5
##  6    10       1        5          4
##  7    16       2        6          4
##  8    20       1        5          4
##  9    18       2        5          3
## 10     1       2        4          2
## 11     2       2        4          2
## 12     6       5        7          2
## 13     7       1        3          2
## 14    11       2        4          2
## 15    13       2        4          2
## 16    15       2        4          2
## 17     8       3        4          1
# Students that increased in career understanding from pretest to posttest, at least 3 points
students_increased_at_least_3 <- new_data %>% filter(difference >= 3) %>% arrange(desc(difference)) 
students_increased_at_least_3
## # A tibble: 9 × 4
##      ID Pretest Posttest difference
##   <dbl>   <dbl>    <dbl>      <dbl>
## 1    19       0        8          8
## 2     3       3       10          7
## 3     4       2        7          5
## 4    12       3        8          5
## 5    17       2        7          5
## 6    10       1        5          4
## 7    16       2        6          4
## 8    20       1        5          4
## 9    18       2        5          3