library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(infer)
library(stats)
library(equate)
##
## Attaching package: 'equate'
##
## The following object is masked from 'package:ggplot2':
##
## margin
set.seed(123)
# Simulate scores for Test Form X and Y
scores_y <- round(rnorm(1000, mean = 75, sd = 12))
scores_x <- round(rnorm(1000, mean = 70, sd = 10))
# Set score range
scores_y <- scores_y[scores_y >= 0 & scores_y <= 100]
scores_x <- scores_x[scores_x >= 0 & scores_x <= 100]
freq_y <- table(factor(scores_y, levels = 0:100))
freq_x <- table(factor(scores_x, levels = 0:100))
df_freq <- data.frame(Score = 0:100, Freq_Y = as.numeric(freq_y),
Freq_X = as.numeric(freq_x))
### Plotting original scores
plot(df_freq$Score, df_freq$Freq_X, type = "l", col = "blue", lwd = 2,
main = "Distribution of Test Scores", xlab = "Score", ylab = "Frequency",
ylim = range(df_freq$Freq_X, df_freq$Freq_Y))
lines(df_freq$Score, df_freq$Freq_Y, col = "red", lwd = 2)
legend("topright", legend = c("Test X", "Test Y"), col = c("blue", "red"),
lwd = 2)

#Mean equated score = x + (μY − μX)
# Mean for Test X and Test Y
mean_y <- sum(df_freq$Score * df_freq$Freq_Y) / sum(df_freq$Freq_Y)
mean_x <- sum(df_freq$Score * df_freq$Freq_X) / sum(df_freq$Freq_X)
# Mean difference between Test X and Test Y
mean_diff <- mean_y - mean_x
print(mean_y)
## [1] 74.55624
print(mean_x)
## [1] 70.32497
print(mean_diff)
## [1] 4.231262
#Equate and add mean equated scores to data.frame
df_freq$Mean_Equated_Score <- df_freq$Score + mean_diff
#Compare Equated scores to Y scores
mean_y <- sum(df_freq$Score * df_freq$Freq_Y) / sum(df_freq$Freq_Y)
mean_equated <- sum(df_freq$Mean_Equated_Score * df_freq$Freq_X) /
sum(df_freq$Freq_X)
print(mean_y)
## [1] 74.55624
print(mean_equated)
## [1] 74.55624
#Original score distribution plot
plot(df_freq$Score, df_freq$Freq_X, type = "l", col = "blue", lwd = 2,
ylim = range(c(df_freq$Freq_X, df_freq$Freq_Y)),
xlab = "Score", ylab = "Frequency",
main = "Distributions of Test X and Y")
lines(df_freq$Score, df_freq$Freq_Y, col = "red", lwd = 2)
legend("topright", legend = c("Test X", "Test Y"), col = c("blue", "red"),
lwd = 2)

#Mean Equating Plot Equated scores higher b/c we added 4.23 to the base score
plot(df_freq$Score, df_freq$Mean_Equated_Score,
type = "l", lwd = 2, col = "darkgreen",
xlab = "Original Score on Test X",
ylab = "Mean-Equated Score on Test Y",
main = "Mean Equating Function: Test X to Test Y")
abline(0, 1, col = "red", lty = 2) # Identity line for reference
legend("topleft", legend = c("Mean-Equated Scores", "Identity Line"),
col = c("darkgreen", "red"), lwd = 2, lty = c(1, 2))

#Linear Equating
# Linear_Equated_Score = (SDX/SDY) * (Score x − μX) + μY)
# This is the linear equation formula
mean_x <- sum(df_freq$Score * df_freq$Freq_X) / sum(df_freq$Freq_X)
mean_y <- sum(df_freq$Score * df_freq$Freq_Y) / sum(df_freq$Freq_Y)
# Variance for X
var_x <- sum(df_freq$Freq_X * (df_freq$Score - mean_x)^2) /
sum(df_freq$Freq_X)
sd_x <- sqrt(var_x)
# Variance for Y
var_y <- sum(df_freq$Freq_Y * (df_freq$Score - mean_y)^2) /
sum(df_freq$Freq_Y)
sd_y <- sqrt(var_y)
#Apply Linear Equating Formula
df_freq$Linear_Equated_Score <- ((df_freq$Score - mean_x) * (sd_y / sd_x)) +
mean_y
#Plot Linear Equated Results with X scores
plot(df_freq$Score, df_freq$Linear_Equated_Score,
type = "l", col = "blue", lwd = 2,
xlab = "Score on Test X", ylab = "Linear-Equated Score on Test Y",
main = "Linear Equating Function")
abline(0, 1, col = "red", lty = 2) # identity line
legend("topleft", legend = c("Linear Equating", "Identity Line"),
col = c("blue", "red"), lwd = 2, lty = c(1, 3))

#Plotting both Mean and Linear Equated Results
# Plot the original Score vs Mean Equated Score
plot(df_freq$Score, df_freq$Mean_Equated_Score,
type = "l", col = "darkgreen", lwd = 4,
xlab = "Original Score on Test X",
ylab = "Equated Score on Test Y",
main = "Mean vs Linear Equating")
# Add Linear Equated Score
lines(df_freq$Score, df_freq$Linear_Equated_Score,
col = "blue", lwd = 4, lty = 4)
# Add identity line for reference
abline(0, 1, col = "red", lty = 5, lwd = 4)
# Add legend
legend("topleft", legend = c("Mean Equating", "Linear Equating",
"Identity Line"),
col = c("darkgreen", "blue", "red"), lwd = 2, lty = c(1, 4, 5))

# Equipercentile Equating
# Create frequency tables for X and Y
ft_x <- as.freqtab(table(factor(df_freq$Score, levels = 0:100),
df_freq$Freq_X))
ft_y <- as.freqtab(table(factor(df_freq$Score, levels = 0:100),
df_freq$Freq_Y))
# Run Equiperncentile Equating
eq_equip <- equate(ft_x, ft_y, type = "equipercentile", smooth = "none")
equating_table <- data.frame( Score = eq_equip$concordance$scale,
Equipercentile_Equated_Score = eq_equip$concordance$yx )
df_freq <- merge(df_freq, equating_table, by = "Score", all.x = TRUE)
# Base plot with identity line
plot(df_freq$Score, df_freq$Score, type = "l", col = "gray", lwd = 2, lty = 2,
xlab = "Raw Score on Test X",
ylab = "Equated Score on Test Y",
main = "Comparison of Equating Methods",
ylim = range(c(df_freq$Linear_Equated_Score,
df_freq$Mean_Equated_Score,
df_freq$Equipercentile_Equated_Score)))
# Add Mean Equating
lines(df_freq$Score, df_freq$Mean_Equated_Score, col = "darkgreen", lwd = 2)
# Add Linear Equating
lines(df_freq$Score, df_freq$Linear_Equated_Score, col = "blue", lwd = 2)
# Add Equipercentile Equating
lines(df_freq$Score, df_freq$Equipercentile_Equated_Score, col = "red", lwd = 2)
# Add identity line for reference
#I added this to his code
abline(0, 1, col = "gray", lty = 5, lwd = 4)
# Add Legend
legend("topleft",
legend = c("Identity Line", "Mean Equating", "Linear Equating",
"Equipercentile Equating"),
col = c("gray", "darkgreen", "blue", "red"),
lty = c(2, 1, 1, 1),
lwd = 2)

plot(df_freq$Score, df_freq$Score, type = "l", lwd = 2, col = "black",
xlab = "Observed Score (Form X)", ylab = "Equated Score (Form Y)",
main = "Equated Scores by Method",
ylim = range(c(df_freq$Score,
df_freq$Mean_Equated_Score,
df_freq$Linear_Equated_Score,
df_freq$Equipercentile_Equated_Score),
na.rm = TRUE))
# Add identity line using abline()
abline(a = 0, b = 1, col = "black", lwd = 2, lty = 1) # identity line y = x
# Add equated scores
lines(df_freq$Score, df_freq$Mean_Equated_Score, col = "blue",
lty = 2, lwd = 2)
lines(df_freq$Score, df_freq$Linear_Equated_Score, col = "red",
lty = 3, lwd = 2)
lines(df_freq$Score, df_freq$Equipercentile_Equated_Score, col = "green",
lty = 4, lwd = 2)
# Legend
legend("topleft",
legend = c("Original (Identity)", "Mean Equated", "Linear Equated",
"Equipercentile Equated"),
col = c("black", "blue", "red", "green"),
lty = c(1, 2, 3, 4), lwd = 2)

summary(df_freq[, c("Score",
"Mean_Equated_Score",
"Linear_Equated_Score",
"Equipercentile_Equated_Score")])
## Score Mean_Equated_Score Linear_Equated_Score
## Min. : 0 Min. : 4.231 Min. : -4.452
## 1st Qu.: 25 1st Qu.: 29.231 1st Qu.: 23.635
## Median : 50 Median : 54.231 Median : 51.722
## Mean : 50 Mean : 54.231 Mean : 51.722
## 3rd Qu.: 75 3rd Qu.: 79.231 3rd Qu.: 79.809
## Max. :100 Max. :104.231 Max. :107.895
## Equipercentile_Equated_Score
## Min. : 0
## 1st Qu.: 25
## Median : 50
## Mean : 50
## 3rd Qu.: 75
## Max. :100
write.csv(df_freq, file = "equate1results.csv", row.names = FALSE)