library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(infer)
library(stats)
library(equate)
## 
## Attaching package: 'equate'
## 
## The following object is masked from 'package:ggplot2':
## 
##     margin
set.seed(123)

# Simulate scores for Test Form X and Y
scores_y <- round(rnorm(1000, mean = 75, sd = 12))
scores_x <- round(rnorm(1000, mean = 70, sd = 10))

# Set score range
scores_y <- scores_y[scores_y >= 0 & scores_y <= 100]
scores_x <- scores_x[scores_x >= 0 & scores_x <= 100]

freq_y <- table(factor(scores_y, levels = 0:100))
freq_x <- table(factor(scores_x, levels = 0:100))
df_freq <- data.frame(Score = 0:100, Freq_Y = as.numeric(freq_y), 
                        Freq_X = as.numeric(freq_x))
### Plotting original scores

 plot(df_freq$Score, df_freq$Freq_X, type = "l", col = "blue", lwd = 2,
       main = "Distribution of Test Scores", xlab = "Score", ylab = "Frequency",
     ylim = range(df_freq$Freq_X, df_freq$Freq_Y))
  lines(df_freq$Score, df_freq$Freq_Y, col = "red", lwd = 2)
  legend("topright", legend = c("Test X", "Test Y"), col = c("blue", "red"), 
       lwd = 2)

#Mean equated score = x + (μY − μX)
  
# Mean for Test  X and Test Y
  mean_y <- sum(df_freq$Score * df_freq$Freq_Y) / sum(df_freq$Freq_Y)

  mean_x <- sum(df_freq$Score * df_freq$Freq_X) / sum(df_freq$Freq_X)


# Mean difference between Test  X and Test Y
  
  mean_diff <- mean_y - mean_x
  
  print(mean_y)
## [1] 74.55624
  print(mean_x)
## [1] 70.32497
  print(mean_diff)
## [1] 4.231262
#Equate and add mean equated scores to data.frame
  
 df_freq$Mean_Equated_Score <- df_freq$Score + mean_diff
  
#Compare Equated scores to Y scores 
 
 mean_y <- sum(df_freq$Score * df_freq$Freq_Y) / sum(df_freq$Freq_Y)
 mean_equated <- sum(df_freq$Mean_Equated_Score * df_freq$Freq_X) / 
   sum(df_freq$Freq_X)
 
 print(mean_y)
## [1] 74.55624
 print(mean_equated)
## [1] 74.55624
#Original score distribution plot
   
 plot(df_freq$Score, df_freq$Freq_X, type = "l", col = "blue", lwd = 2,
        ylim = range(c(df_freq$Freq_X, df_freq$Freq_Y)),
        xlab = "Score", ylab = "Frequency",
        main = "Distributions of Test X and Y")
 lines(df_freq$Score, df_freq$Freq_Y, col = "red", lwd = 2)
 legend("topright", legend = c("Test X", "Test Y"), col = c("blue", "red"),
        lwd = 2)

#Mean Equating Plot Equated scores higher b/c we added 4.23 to the base score
   
  plot(df_freq$Score, df_freq$Mean_Equated_Score,
     type = "l", lwd = 2, col = "darkgreen",
     xlab = "Original Score on Test X",
     ylab = "Mean-Equated Score on Test Y",
     main = "Mean Equating Function: Test X to Test Y")

  abline(0, 1, col = "red", lty = 2)  # Identity line for reference
  legend("topleft", legend = c("Mean-Equated Scores", "Identity Line"),
       col = c("darkgreen", "red"), lwd = 2, lty = c(1, 2))

#Linear Equating
# Linear_Equated_Score = (SDX/SDY) * (Score x − μX) + μY)
# This is the linear equation formula
  
  mean_x <- sum(df_freq$Score * df_freq$Freq_X) / sum(df_freq$Freq_X)
  mean_y <- sum(df_freq$Score * df_freq$Freq_Y) / sum(df_freq$Freq_Y)

# Variance for X
  var_x <- sum(df_freq$Freq_X * (df_freq$Score - mean_x)^2) / 
    sum(df_freq$Freq_X)
  sd_x <- sqrt(var_x)
  
# Variance for Y
  var_y <- sum(df_freq$Freq_Y * (df_freq$Score - mean_y)^2) / 
    sum(df_freq$Freq_Y)
  sd_y <- sqrt(var_y)
#Apply Linear Equating Formula

 df_freq$Linear_Equated_Score <- ((df_freq$Score - mean_x) * (sd_y / sd_x)) +
    mean_y
  
  
#Plot Linear Equated Results with X scores
    plot(df_freq$Score, df_freq$Linear_Equated_Score,
       type = "l", col = "blue", lwd = 2,
       xlab = "Score on Test X", ylab = "Linear-Equated Score on Test Y",
       main = "Linear Equating Function")
  abline(0, 1, col = "red", lty = 2)  # identity line
  legend("topleft", legend = c("Linear Equating", "Identity Line"),
         col = c("blue", "red"), lwd = 2, lty = c(1, 3))

#Plotting both Mean and Linear Equated Results
  # Plot the original Score vs Mean Equated Score
  plot(df_freq$Score, df_freq$Mean_Equated_Score,
       type = "l", col = "darkgreen", lwd = 4,
       xlab = "Original Score on Test X",
       ylab = "Equated Score on Test Y",
       main = "Mean vs Linear Equating")

# Add Linear Equated Score
lines(df_freq$Score, df_freq$Linear_Equated_Score,
      col = "blue", lwd = 4, lty = 4)

# Add identity line for reference
abline(0, 1, col = "red", lty = 5, lwd = 4)

# Add legend
legend("topleft", legend = c("Mean Equating", "Linear Equating", 
                             "Identity Line"),
       col = c("darkgreen", "blue", "red"), lwd = 2, lty = c(1, 4, 5))

# Equipercentile Equating
  
# Create frequency tables for X and Y
  ft_x <- as.freqtab(table(factor(df_freq$Score, levels = 0:100), 
                           df_freq$Freq_X)) 
  ft_y <- as.freqtab(table(factor(df_freq$Score, levels = 0:100), 
                           df_freq$Freq_Y)) 


# Run Equiperncentile Equating
  eq_equip <- equate(ft_x, ft_y, type = "equipercentile", smooth = "none")
  
    equating_table <- data.frame( Score = eq_equip$concordance$scale,
    Equipercentile_Equated_Score = eq_equip$concordance$yx )

  df_freq <- merge(df_freq, equating_table, by = "Score", all.x = TRUE)
# Base plot with identity line
    
plot(df_freq$Score, df_freq$Score, type = "l", col = "gray", lwd = 2, lty = 2,
     xlab = "Raw Score on Test X",
     ylab = "Equated Score on Test Y",
     main = "Comparison of Equating Methods",
     ylim = range(c(df_freq$Linear_Equated_Score,
                    df_freq$Mean_Equated_Score,
                    df_freq$Equipercentile_Equated_Score)))

# Add Mean Equating
lines(df_freq$Score, df_freq$Mean_Equated_Score, col = "darkgreen", lwd = 2)

# Add Linear Equating
lines(df_freq$Score, df_freq$Linear_Equated_Score, col = "blue", lwd = 2)

# Add Equipercentile Equating
lines(df_freq$Score, df_freq$Equipercentile_Equated_Score, col = "red", lwd = 2)

# Add identity line for reference
#I added this to his code

abline(0, 1, col = "gray", lty = 5, lwd = 4)

# Add Legend
legend("topleft",
       legend = c("Identity Line", "Mean Equating", "Linear Equating", 
                  "Equipercentile Equating"),
       col = c("gray", "darkgreen", "blue", "red"),
       lty = c(2, 1, 1, 1),
       lwd = 2)  

  plot(df_freq$Score, df_freq$Score, type = "l", lwd = 2, col = "black",
         xlab = "Observed Score (Form X)", ylab = "Equated Score (Form Y)",
         main = "Equated Scores by Method",
         ylim = range(c(df_freq$Score,
                        df_freq$Mean_Equated_Score,
                        df_freq$Linear_Equated_Score,
                        df_freq$Equipercentile_Equated_Score),
                      na.rm = TRUE))
  
  # Add identity line using abline()
  abline(a = 0, b = 1, col = "black", lwd = 2, lty = 1)  # identity line y = x
  
  # Add equated scores
  lines(df_freq$Score, df_freq$Mean_Equated_Score, col = "blue", 
        lty = 2, lwd = 2)
  lines(df_freq$Score, df_freq$Linear_Equated_Score, col = "red", 
        lty = 3, lwd = 2)
  lines(df_freq$Score, df_freq$Equipercentile_Equated_Score, col = "green", 
        lty = 4, lwd = 2)
  
  # Legend
  legend("topleft",
         legend = c("Original (Identity)", "Mean Equated", "Linear Equated", 
                    "Equipercentile Equated"),
         col = c("black", "blue", "red", "green"),
         lty = c(1, 2, 3, 4), lwd = 2)

  summary(df_freq[, c("Score", 
                        "Mean_Equated_Score", 
                        "Linear_Equated_Score", 
                       "Equipercentile_Equated_Score")])
##      Score     Mean_Equated_Score Linear_Equated_Score
##  Min.   :  0   Min.   :  4.231    Min.   : -4.452     
##  1st Qu.: 25   1st Qu.: 29.231    1st Qu.: 23.635     
##  Median : 50   Median : 54.231    Median : 51.722     
##  Mean   : 50   Mean   : 54.231    Mean   : 51.722     
##  3rd Qu.: 75   3rd Qu.: 79.231    3rd Qu.: 79.809     
##  Max.   :100   Max.   :104.231    Max.   :107.895     
##  Equipercentile_Equated_Score
##  Min.   :  0                 
##  1st Qu.: 25                 
##  Median : 50                 
##  Mean   : 50                 
##  3rd Qu.: 75                 
##  Max.   :100
    write.csv(df_freq, file = "equate1results.csv", row.names = FALSE)