library(kequate)
## Loading required package: ltm
## Loading required package: MASS
## Loading required package: msm
## Loading required package: polycor
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(truncnorm)
#scores_x <- round(rnorm(50, mean = 73, sd = 16, min_value = 28, max_value = 108))
#Calculate scores_y to match scores_x 

#n <- 50
#target_sd <- 11
#target_mean <- 87
#min_val <- 28
#max_val <- 108

#final_sample <- rtruncnorm(
  #n = n, 
 # a = min_val, 
 # b = max_val, 
  #mean = target_mean, 
 # sd = target_sd
#)

#final_sample <- round(final_sample)
#summary(final_sample)

#fin_samp <- data.frame(final_sample)
#write.csv(fin_samp, file = "fin_samp.csv")

#Code to calculate scores for anchor test on y in a separate markdown file.

#Initial Code to build score files

#results_y <- data.frame(scores_y)
#write.csv(results_y, file = "results_y.csv")
#results_x <- data.frame(scores_x)
#write.csv(results_x, file = "results_x.csv")
# NEAT example using simulated data

#data(simeq)
results_x <- read.csv("results_x.csv")
results_y <- read.csv("results_y.csv")
freq1 <- kefreq(results_x$scores_x, 28:108, results_x$scores_a, 61:100)
freq2 <- kefreq(results_y$scores_y, 28:108, results_y$scores_a, 61:100)
glm1<-glm(frequency~I(X)+I(X^2)+I(X^3)+I(X^4)+I(X^5)+I(A)+I(A^2)+I(A^3)+I(A^4)+
            I(A):I(X)+I(A):I(X^2)+I(A^2):I(X)+I(A^2):I(X^2), family="poisson", data=freq1, x=TRUE)
glm2<-glm(frequency~I(X)+I(X^2)+I(A)+I(A^2)+I(A^3)+I(A^4)+I(A):I(X)+I(A):I(X^2)+
            I(A^2):I(X)+I(A^2):I(X^2), family="poisson", data=freq2, x=TRUE)
keNEATPSE <- kequate("NEAT_PSE", 28:108, 28:108, glm1, glm2)
keNEATCE <- kequate("NEAT_CE", 28:108, 28:108, 61:100, glm1, glm2)
summary(keNEATPSE)
##  Design: NEAT/NEC PSE equipercentile 
## 
##  Kernel: gaussian 
## 
##  Sample Sizes: 
##    Test X: 50 
##    Test Y: 50 
## 
##  Score Ranges: 
##    Test X: 
##        Min = 28 Max = 108
##    Test Y: 
##        Min = 28 Max = 108
## 
##  Bandwidths Used: 
##          hx        hy    hxlin    hylin
## 1 0.5699229 0.6489905 14866.82 9125.499
## 
##  Equating Function and Standard Errors: 
##    Score      eqYx     SEEYx
## 1     28  54.12942 28.897916
## 2     29  59.11636 18.572870
## 3     30  61.12189 15.238585
## 4     31  62.24235 13.571452
## 5     32  62.99195 12.514935
## 6     33  63.55126 11.777302
## 7     34  64.00241 11.184188
## 8     35  64.38915 10.710738
## 9     36  64.73735 10.283651
## 10    37  65.06331  9.897611
## 11    38  65.37899  9.547206
## 12    39  65.69307  9.199519
## 13    40  66.01209  8.857611
## 14    41  66.34203  8.529943
## 15    42  66.68782  8.191434
## 16    43  67.05323  7.848108
## 17    44  67.44216  7.511491
## 18    45  67.85735  7.157262
## 19    46  68.30074  6.811129
## 20    47  68.77414  6.456232
## 21    48  69.27758  6.108293
## 22    49  69.81139  5.759523
## 23    50  70.37409  5.425578
## 24    51  70.96443  5.094006
## 25    52  71.58012  4.786036
## 26    53  72.21846  4.489220
## 27    54  72.87715  4.211558
## 28    55  73.55302  3.960301
## 29    56  74.24329  3.726112
## 30    57  74.94566  3.513443
## 31    58  75.65744  3.327281
## 32    59  76.37621  3.160735
## 33    60  77.10022  3.011185
## 34    61  77.82764  2.882815
## 35    62  78.55648  2.773409
## 36    63  79.28525  2.676339
## 37    64  80.01277  2.591637
## 38    65  80.73755  2.521726
## 39    66  81.45805  2.462154
## 40    67  82.17325  2.408543
## 41    68  82.88210  2.364097
## 42    69  83.58313  2.329268
## 43    70  84.27521  2.298339
## 44    71  84.95762  2.273003
## 45    72  85.62908  2.256963
## 46    73  86.28852  2.244317
## 47    74  86.93552  2.236482
## 48    75  87.56896  2.237548
## 49    76  88.18821  2.240115
## 50    77  88.79312  2.249205
## 51    78  89.38276  2.263370
## 52    79  89.95745  2.278193
## 53    80  90.51679  2.300186
## 54    81  91.06106  2.319806
## 55    82  91.59067  2.345601
## 56    83  92.10597  2.367989
## 57    84  92.60803  2.395142
## 58    85  93.09757  2.418136
## 59    86  93.57616  2.445618
## 60    87  94.04511  2.468255
## 61    88  94.50639  2.496016
## 62    89  94.96213  2.519183
## 63    90  95.41457  2.547762
## 64    91  95.86681  2.573947
## 65    92  96.32142  2.604642
## 66    93  96.78240  2.636868
## 67    94  97.25286  2.672390
## 68    95  97.73752  2.713443
## 69    96  98.24018  2.757952
## 70    97  98.76624  2.810266
## 71    98  99.32017  2.868444
## 72    99  99.90801  2.932295
## 73   100 100.53498  3.005300
## 74   101 101.20734  3.075413
## 75   102 101.93163  3.142317
## 76   103 102.71430  3.191698
## 77   104 103.56260  3.193854
## 78   105 104.48455  3.102853
## 79   106 105.48919  2.837090
## 80   107 106.58739  2.246404
## 81   108 107.78141  1.215317
## 
##  Comparing the Moments: 
##           PREYx
## 1  -0.008230302
## 2  -0.023309141
## 3  -0.039898491
## 4  -0.055959618
## 5  -0.070892407
## 6  -0.084676627
## 7  -0.097495188
## 8  -0.109581813
## 9  -0.121168034
## 10 -0.132470543
summary(keNEATCE)
##  Design: NEAT CE equipercentile 
## 
##  Kernel: gaussian 
## 
##  Sample Sizes: 
##    Test X: 50 
##    Test Y: 50 
## 
##  Score Ranges: 
##    Test X: 
##        Min = 28 Max = 108
##    Test Y: 
##        Min = 28 Max = 108
##    Test A: 
##        Min = 61 Max = 100
## 
##  Bandwidths Used: 
##        hxP       hyQ       haP       haQ   hxPlin   hyQlin   haPlin  haQlin
## 1 0.547137 0.6584138 0.5038546 0.5191245 17032.07 8730.802 10806.05 8935.23
## 
##  Equating Function and Standard Errors: 
##    Score      eqYx     SEEYx
## 1     28  61.26031 7.0013225
## 2     29  65.17861 6.5576303
## 3     30  66.87495 6.5227016
## 4     31  67.89343 6.5530795
## 5     32  68.61364 6.5751627
## 6     33  69.17290 6.5618147
## 7     34  69.63732 6.5301426
## 8     35  70.04366 6.4845121
## 9     36  70.41585 6.4488779
## 10    37  70.77030 6.4154783
## 11    38  71.11935 6.4013420
## 12    39  71.47374 6.4102161
## 13    40  71.84240 6.4254197
## 14    41  72.23315 6.4477336
## 15    42  72.65269 6.4493643
## 16    43  73.10548 6.4250003
## 17    44  73.59731 6.4136488
## 18    45  74.13670 6.4491627
## 19    46  74.73559 6.5155385
## 20    47  75.39740 6.5111402
## 21    48  76.12347 6.5311574
## 22    49  76.92785 6.5845476
## 23    50  77.80607 6.5686574
## 24    51  78.76484 6.5635612
## 25    52  79.79479 6.5364196
## 26    53  80.88773 6.4214946
## 27    54  82.02613 6.2345638
## 28    55  83.18485 5.9713381
## 29    56  84.33457 5.6163569
## 30    57  85.44580 5.1828885
## 31    58  86.49625 4.7577826
## 32    59  87.47536 4.3634389
## 33    60  88.37973 3.9783352
## 34    61  89.21356 3.6665484
## 35    62  89.98729 3.4399140
## 36    63  90.71141 3.2734931
## 37    64  91.39524 3.1505277
## 38    65  92.04745 3.0634380
## 39    66  92.67485 3.0105868
## 40    67  93.28272 2.9759750
## 41    68  93.87581 2.9551070
## 42    69  94.45699 2.9499994
## 43    70  95.02830 2.9576133
## 44    71  95.58984 2.9777467
## 45    72  96.14330 2.9866112
## 46    73  96.69029 3.0119511
## 47    74  97.22749 3.0435805
## 48    75  97.75749 3.0686615
## 49    76  98.27818 3.1127604
## 50    77  98.78894 3.1460038
## 51    78  99.29008 3.1977102
## 52    79  99.77973 3.2380779
## 53    80 100.25830 3.2954395
## 54    81 100.72574 3.3405061
## 55    82 101.17986 3.3953952
## 56    83 101.62481 3.4470441
## 57    84 102.05564 3.4868889
## 58    85 102.47708 3.5395437
## 59    86 102.88900 3.5676707
## 60    87 103.28856 3.5936655
## 61    88 103.68187 3.6183824
## 62    89 104.06710 3.6120935
## 63    90 104.44211 3.5991048
## 64    91 104.81241 3.5728560
## 65    92 105.17587 3.5105794
## 66    93 105.52841 3.4274447
## 67    94 105.87393 3.3212061
## 68    95 106.20956 3.1714662
## 69    96 106.52818 2.9897533
## 70    97 106.83189 2.7830615
## 71    98 107.11716 2.5424394
## 72    99 107.37739 2.2973759
## 73   100 107.61761 2.0690210
## 74   101 107.83867 1.8535536
## 75   102 108.03946 1.6657352
## 76   103 108.22815 1.5088202
## 77   104 108.40992 1.3677659
## 78   105 108.58729 1.2358878
## 79   106 108.77146 1.1111418
## 80   107 108.98734 0.9839232
## 81   108 109.30893 0.8267464
## 
##  Comparing the Moments: 
##          PREAx      PREYa
## 1  -0.01056193 -0.1205837
## 2  -0.02072244 -0.2459774
## 3  -0.02989891 -0.3679492
## 4  -0.03761229 -0.4826418
## 5  -0.04350011 -0.5888530
## 6  -0.04730887 -0.6868952
## 7  -0.04887595 -0.7778583
## 8  -0.04810867 -0.8631506
## 9  -0.04496460 -0.9442261
## 10 -0.03943527 -1.0224324
# Plot Equated Results

# Define the score range 
score_range <- 28:108

# Combine into a single data frame
eq_df <- data.frame(
  Score = score_range,
  PSE = keNEATPSE@equating$eqYx,  # keNEATPSE object
  CE  = keNEATCE@equating$eqYx    # keNEATCE object
)

# ---- Base R plot ----
plot(eq_df$Score, eq_df$PSE, type = "l", col = "blue", lwd = 2,
     ylim = range(c(eq_df$PSE, eq_df$CE), na.rm = TRUE),
     xlab = "Form X Score", ylab = "Equated Form Y Score",
     main = "Kernel Equating: Post-Stratification vs Chain Equating")

lines(eq_df$Score, eq_df$CE, col = "red", lwd = 2, lty = 2)
abline(a = 0, b = 1, col = "black", lwd = 1, lty = 1)

legend("topleft",
       legend = c("Post-Stratification Equating (PSE)", "Chain Equating (CE)", "Identity Line"),
       col = c("blue", "red", "black"),
       lwd = c(2, 2, 1),
       lty = c(1, 2, 1))

# Extract variables from simeq

X <- results_x$scores_x
Y <- results_y$scores_y
A1 <- results_x$scores_a
A2 <- results_y$scores_a

# Set up plotting area
par(mfrow=c(2,2), mar=c(4,4,2,1))

# Histogram of X
hist(X, breaks=20, col="skyblue", main="Distribution of X", xlab="X", ylab="Frequency")

# Histogram of Y
hist(Y, breaks=20, col="lightgreen", main="Distribution of Y", xlab="Y", ylab="Frequency")

# Histogram of common item A (from results_x)
hist(A1, breaks=0:max(A1), col="salmon", main="Distribution of Common Item (A1_x)", xlab="A1", ylab="Frequency")


# Histogram of common item A (from results_y)
hist(A2, breaks=0:max(A1), col="gold", main="Distribution of Common Item (A2_y)", xlab="A2", ylab="Frequency")

# Compare Summary Statistics

# Extract variables
X <- results_x$scores_x
Y <- results_y$scores_y
A_X <- results_x$scores_a
A_Y <- results_y$scores_a

# Create summary table
summary_table <- sapply(list(X, Y, A_X, A_Y), summary)

# Assign proper row names (using the names of the variables)
rownames(summary_table) <- names(summary(X))

# Assign column names (using the variable names)
colnames(summary_table) <- c("X", "Y", "A_X", "A_Y")

# Print summary table
print(summary_table)
##              X      Y    A_X   A_Y
## Min.     28.00  66.00  61.00 62.00
## 1st Qu.  62.00  79.25  77.25 66.00
## Median   73.00  85.00  87.00 73.00
## Mean     72.52  85.24  84.62 73.72
## 3rd Qu.  84.00  90.75  93.00 78.00
## Max.    108.00 108.00 100.00 94.00