Paired Comparison Models with MCMCpack and CDAT Project Data

# Clear the environment and load packages
rm(list = ls())

library(MCMCpack)
library(bayesplot)
library(coda)
library(ggplot2)
library(tidyverse)
library(psych)

Load CDAT paired comparison data

setwd("~/Dropbox/IU25SP-EDUC-Y617 Psychometric Theory/Psychometric Theory Final Project/Paired Comparison MCMC CDAT")
data.folder <- "Data"
load(file.path(data.folder, "pairDataCDAT.Rda"))

# Prepare for use in MCMCpack
colnames(pairDataCDAT) <- c("rater","itemj1", "itemj2","choice")

# Prepare the data for easy matching to student responses as PDFs labeled by 4-digit ID

# Create a named vector for the mapping: names are the original IDs and values are the new numbers.
lookup <- c("60" = 1121, "61" = 1154, "62" = 1222, "63" = 1637,
            "64" = 1995, "65" = 3369, "66" = 4004, "67" = 4209,
            "68" = 4268, "69" = 4841, "70" = 5074, "71" = 5495,
            "72" = 5710, "73" = 8294, "74" = 8980, "75" = 9899)

# Map the itemj1 and itemj2 columns using the lookup vector.
pairDataCDAT$itemj1 <- lookup[as.character(pairDataCDAT$itemj1)]
pairDataCDAT$itemj2 <- lookup[as.character(pairDataCDAT$itemj2)]
pairDataCDAT$choice <- lookup[as.character(pairDataCDAT$choice)]

# Sort for easy matching in theta.start matrix below
pairDataCDAT <- pairDataCDAT[order(pairDataCDAT$itemj1), ] 

save(pairDataCDAT, file = file.path(data.folder, "pairDataCDAT.1.Rda"))

Set initial values for modeling parameters

n_chains <- 3 # Set the number of chains
set.seed(1234) # Set seed for randomized starting values
I <- length(unique(pairDataCDAT$rater))

Initialize respondent-rpecific parameters \(\gamma\)

The vector γ represents each respondent’s perceptual orientation in the 2D latent space. Initially, all respondents are assigned a value of π/4π/4 (45° in radians). The second line randomizes the starting values within a small range # [π/4−0.4,π/4+0.4][π/4−0.4,π/4+0.4] to introduce slight variation.

gamma_start <- rep(pi/4,I) 
# If used, this would replace the constant with I random values from a uniform distribution between 0 and π/2,
# using a broader random initialization instead of the current approach, which uses a small random variation around π/4.
# runif(I)*pi/2. 
gamma_start <- runif(I, pi/4-.4, pi/4+.4) 

Initialize item-specific parameters \(\theta\) for 2d models

Thetas start at 0 for the one-dimensional model

theta_start1d <- matrix(0, 16, 1) # All start at 0 in the model's probit units

Create the empty starting \(\theta\) matrix for 2d models

theta_start2d <- matrix(0, 16, 2) # 16 scripts, two dimensions
# Creates a 16×2 matrix (theta_start2d) to store the latent attributes of statements along two dimensions. Initially, all values are set to 0.

Find the matching indices for the anchor scripts in the matrix by viewing them in order.

unique(pairDataCDAT$itemj1)
##  [1] 1121 1154 1222 1637 1995 3369 4004 4209 4268 4841 5074 5495 5710 8294 8980
## [16] 9899

Set initial values for two anchor items whose place in the matrix is determined above.

# These values anchor certain scripts in the 2d latent space and help define the rotation.
theta_start2d[11,1] <- 1   ## script 5074 is interpreted as having high creative elaboration
theta_start2d[11,2] <- -1  ## script 5074 is interpreted as having low conventional representation
theta_start2d[12,1] <- -1  ## script 5495 is interpreted as having low creative elaboration
theta_start2d[12,2] <- 1   ## script 5495 is interpreted as having high conventional representation

MCMC 1d with normal prior

seeds <- c(0202, 8018, 1304) # Set seeds for each chain
chains1d <- vector("list", n_chains)

for (i in 1:n_chains) {
  chains1d[[i]] <- MCMCpaircompare(
    pairDataCDAT,
    theta.constraints = list('3369' = list("+")),  
    verbose=5000,         # Prints status updates every __ iterations.
    burnin=1000,          # Discards the first __ samples to stabilize.
    mcmc=9000,         # Runs __ iterations for estimation.
    thin=50,           # Keeps every __th sample to reduce autocorrelation.
    seed=seeds[i],        # Ensures reproducibility of results.
    alpha.fixed=FALSE,
    alpha.start=1,
    a=0,               # The default prio mean of alpha is 0
    A=.25,             # The default prior precision is 0.25 (variance of 4)
    store.theta=TRUE,  # Saves item-specfic draws.
    store.alpha=TRUE,  # Saves both rater-specific draws.
    tune=.3,           # Sets the tuning parameter for the Metropolis-Hastings step.
    procrustes=FALSE   # Disables Procrustes alignment
    )
}
## 
## 
## MCMCpaircompare iteration 1 of 10000 
## 
## 
## MCMCpaircompare iteration 5001 of 10000 
## 
## 
## MCMCpaircompare iteration 1 of 10000 
## 
## 
## MCMCpaircompare iteration 5001 of 10000 
## 
## 
## MCMCpaircompare iteration 1 of 10000 
## 
## 
## MCMCpaircompare iteration 5001 of 10000
# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list1d <- mcmc.list(lapply(chains1d, mcmc)) 

# Save object
save(mcmc_list1d, file = "Out/mcmc_list1d.Rdata")

# Create a list of parameters for each type of parameter
thetas <- grep("^theta", colnames(mcmc_list1d[[1]]), value = TRUE)
alphas <- grep("^alpha", colnames(mcmc_list1d[[1]]), value = TRUE)

Check model convergence

# For 1D model
gelman_diag_1d <- gelman.diag(mcmc_list1d)
max_rhat_1d <- max(gelman_diag_1d$psrf[, "Point est."])
gelman_diag_1d
## Potential scale reduction factors:
## 
##            Point est. Upper C.I.
## theta.1121       1.05       1.14
## theta.1154       1.05       1.19
## theta.1222       1.02       1.05
## theta.1637       1.02       1.08
## theta.1995       1.02       1.07
## theta.3369       1.01       1.03
## theta.4004       1.02       1.05
## theta.4209       1.03       1.10
## theta.4268       1.03       1.09
## theta.4841       1.02       1.08
## theta.5074       1.01       1.04
## theta.5495       1.04       1.12
## theta.5710       1.04       1.13
## theta.8294       1.02       1.07
## theta.8980       1.00       1.00
## theta.9899       1.02       1.04
## alpha.1          1.09       1.28
## alpha.25         1.01       1.05
## alpha.26         1.02       1.09
## alpha.27         1.00       1.02
## alpha.28         1.01       1.03
## alpha.29         1.02       1.06
## alpha.30         1.01       1.05
## alpha.31         1.02       1.06
## alpha.34         1.01       1.05
## alpha.35         1.01       1.03
## alpha.36         1.02       1.06
## alpha.37         1.03       1.11
## alpha.38         1.00       1.02
## 
## Multivariate psrf
## 
## 1.13

Summary of MCMC 1d with normal prior

View a summary

summary1d <- round(
  cbind(
    as.data.frame(summary(mcmc_list1d)$statistics),
    as.data.frame(summary(mcmc_list1d)$quantiles)
  ), 3) 
summary1d$Model <- "1d"
save(summary1d, file = "Out/summary1d.Rdata")
summary1d
##              Mean    SD Naive SE Time-series SE   2.5%    25%    50%    75%
## theta.1121 -0.117 0.277    0.012          0.022 -0.687 -0.287 -0.117  0.068
## theta.1154 -0.946 0.328    0.014          0.023 -1.637 -1.142 -0.939 -0.739
## theta.1222  0.128 0.279    0.012          0.023 -0.435 -0.062  0.137  0.321
## theta.1637  0.018 0.283    0.012          0.022 -0.577 -0.155  0.020  0.210
## theta.1995 -0.479 0.292    0.013          0.025 -1.063 -0.653 -0.481 -0.283
## theta.3369  1.722 0.460    0.020          0.030  0.958  1.383  1.701  2.022
## theta.4004  0.372 0.302    0.013          0.024 -0.280  0.189  0.379  0.569
## theta.4209 -0.232 0.288    0.012          0.024 -0.789 -0.430 -0.233 -0.034
## theta.4268  0.282 0.290    0.012          0.023 -0.300  0.098  0.296  0.485
## theta.4841 -0.556 0.293    0.013          0.024 -1.137 -0.732 -0.551 -0.367
## theta.5074 -0.145 0.279    0.012          0.024 -0.715 -0.326 -0.129  0.035
## theta.5495 -0.833 0.315    0.014          0.023 -1.465 -1.052 -0.828 -0.604
## theta.5710 -1.007 0.335    0.014          0.024 -1.650 -1.217 -1.004 -0.778
## theta.8294  0.159 0.288    0.012          0.024 -0.450 -0.024  0.165  0.355
## theta.8980  0.703 0.316    0.014          0.025  0.086  0.487  0.718  0.917
## theta.9899  0.521 0.303    0.013          0.022 -0.098  0.329  0.522  0.705
## alpha.1     2.696 0.731    0.031          0.037  1.444  2.207  2.663  3.131
## alpha.25    1.129 0.340    0.015          0.015  0.562  0.886  1.113  1.349
## alpha.26    1.881 0.554    0.024          0.028  1.039  1.481  1.807  2.175
## alpha.27    1.795 0.531    0.023          0.025  0.924  1.425  1.714  2.101
## alpha.28    0.985 0.339    0.015          0.017  0.431  0.755  0.929  1.211
## alpha.29    0.853 0.305    0.013          0.013  0.365  0.640  0.828  1.025
## alpha.30    0.703 0.260    0.011          0.012  0.249  0.516  0.683  0.848
## alpha.31    0.334 0.188    0.008          0.009  0.019  0.205  0.320  0.448
## alpha.34    1.048 0.313    0.013          0.012  0.519  0.815  1.022  1.249
## alpha.35    0.468 0.203    0.009          0.010  0.130  0.310  0.447  0.614
## alpha.36    1.004 0.354    0.015          0.015  0.428  0.737  0.977  1.209
## alpha.37    1.028 0.357    0.015          0.015  0.466  0.781  0.984  1.227
## alpha.38    0.618 0.222    0.010          0.011  0.257  0.465  0.594  0.756
##             97.5% Model
## theta.1121  0.422    1d
## theta.1154 -0.272    1d
## theta.1222  0.664    1d
## theta.1637  0.553    1d
## theta.1995  0.093    1d
## theta.3369  2.630    1d
## theta.4004  0.952    1d
## theta.4209  0.319    1d
## theta.4268  0.840    1d
## theta.4841  0.006    1d
## theta.5074  0.395    1d
## theta.5495 -0.218    1d
## theta.5710 -0.395    1d
## theta.8294  0.720    1d
## theta.8980  1.293    1d
## theta.9899  1.140    1d
## alpha.1     4.401    1d
## alpha.25    1.851    1d
## alpha.26    3.295    1d
## alpha.27    2.968    1d
## alpha.28    1.713    1d
## alpha.29    1.553    1d
## alpha.30    1.290    1d
## alpha.31    0.730    1d
## alpha.34    1.687    1d
## alpha.35    0.932    1d
## alpha.36    1.863    1d
## alpha.37    1.812    1d
## alpha.38    1.162    1d

See companion page for plots of 1d model “PWC Model 1d MCMCpack CDAT”

https://rpubs.com/steveneheil/mcmc1d

MCMC 2d with normal prior

seeds <- c(1304, 2406, 7916) # Set seeds for each chain
chains2d <- vector("list", n_chains)

for (i in 1:n_chains) {
  chains2d[[i]] <- MCMCpaircompare2d(
    pairDataCDAT,
    theta.constraints=list( 
      # Note that "+" = constrained to be positive and "-" = constrained to negative
      # Note that "n" = fixed value if desired
      # Constraining each item to one quadrant based on theory
      '5074' = list(1, "+"),  
      '5074' = list(2, "-"),
      '5495' = list(1, "-"), 
      '5495' = list(2, "+")   
      ),  
    verbose=5000,         # Prints status updates every __ iterations.
    burnin=1000,      # Discards the first __ samples to stabilize.
    mcmc=10000,        # Runs __ iterations for estimation.
    thin=50,          # Keeps every __th sample to reduce autocorrelation.
    seed=seeds[i],        # Ensures reproducibility of results.
    gamma.start=gamma_start, # Uses the initialized respondent perception values.
    theta.start=theta_start2d,
    store.theta=TRUE,  # Saves item-specfic draws.
    store.gamma=TRUE,  # Saves both rater-specific draws.
    tune=.3,           # Sets the tuning parameter for the Metropolis-Hastings step.
    procrustes=FALSE   # Disables Procrustes alignment
    )
}
## 
## 
## MCMCpaircompare2d iteration 1 of 11000 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 5001 of 11000 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 10001 of 11000 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 1 of 11000 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 5001 of 11000 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 10001 of 11000 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 1 of 11000 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 5001 of 11000 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 10001 of 11000 
## 
## Summary of acceptance rates for gamma:
# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list2d <- mcmc.list(lapply(chains2d, mcmc)) 

#Save object
save(mcmc_list2d, file = "Out/mcmc_list2d.Rdata")

# Create a list of parameters for each type of parameter
thetas <- grep("^theta", colnames(mcmc_list2d[[1]]), value = TRUE)
alpha <- grep("^alpha", colnames(mcmc_list2d[[1]]), value = TRUE)

Check model convergence

# For 2D model
gelman_diag_2d <- gelman.diag(mcmc_list2d)
max_rhat_2d <- max(gelman_diag_2d$psrf[, "Point est."])

gelman_diag_2d
## Potential scale reduction factors:
## 
##             Point est. Upper C.I.
## theta1.1121      1.002      1.007
## theta1.1154      0.995      0.996
## theta1.1222      0.997      0.999
## theta1.1637      1.000      1.012
## theta1.1995      0.996      0.998
## theta1.3369      1.007      1.037
## theta1.4004      1.014      1.050
## theta1.4209      0.996      0.996
## theta1.4268      1.001      1.017
## theta1.4841      1.000      1.007
## theta1.5074      1.000      1.009
## theta1.5495      1.003      1.021
## theta1.5710      1.004      1.013
## theta1.8294      0.997      0.998
## theta1.8980      1.009      1.038
## theta1.9899      0.999      1.004
## theta2.1121      1.023      1.087
## theta2.1154      1.001      1.014
## theta2.1222      1.001      1.013
## theta2.1637      1.003      1.019
## theta2.1995      1.031      1.107
## theta2.3369      1.009      1.030
## theta2.4004      1.001      1.016
## theta2.4209      0.998      1.000
## theta2.4268      0.999      1.008
## theta2.4841      1.000      1.012
## theta2.5074      1.011      1.041
## theta2.5495      1.001      1.004
## theta2.5710      0.996      0.999
## theta2.8294      0.999      1.006
## theta2.8980      1.004      1.009
## theta2.9899      1.001      1.015
## gamma.1          1.009      1.041
## gamma.25         1.006      1.025
## gamma.26         1.008      1.025
## gamma.27         1.007      1.035
## gamma.28         1.001      1.012
## gamma.29         1.013      1.055
## gamma.30         0.997      0.997
## gamma.31         1.013      1.040
## gamma.34         1.011      1.046
## gamma.35         1.003      1.023
## gamma.36         1.001      1.013
## gamma.37         1.009      1.035
## gamma.38         0.997      0.999
## 
## Multivariate psrf
## 
## 1.16

Summary of MCMC 2d with normal prior

View a summary of the 2d model parameter posterior distributions

summary2d <- round(
  cbind(
    as.data.frame(summary(mcmc_list2d)$statistics),
    as.data.frame(summary(mcmc_list2d)$quantiles)
  ), 3)
summary2d$Model <- "2d"

save(summary2d, file="Out/summary2d.Rdata")
summary2d
##               Mean    SD Naive SE Time-series SE   2.5%    25%    50%    75%
## theta1.1121  1.247 0.401    0.016          0.017  0.559  0.955  1.244  1.523
## theta1.1154 -0.019 0.373    0.015          0.018 -0.713 -0.257 -0.030  0.223
## theta1.1222  0.604 0.354    0.014          0.015 -0.107  0.358  0.616  0.859
## theta1.1637  0.606 0.362    0.015          0.016 -0.107  0.362  0.608  0.847
## theta1.1995  0.849 0.406    0.017          0.020  0.088  0.559  0.845  1.120
## theta1.3369  1.686 0.441    0.018          0.021  0.901  1.360  1.682  1.977
## theta1.4004 -0.821 0.395    0.016          0.016 -1.620 -1.097 -0.810 -0.549
## theta1.4209 -1.271 0.407    0.017          0.019 -2.023 -1.547 -1.261 -1.010
## theta1.4268 -0.850 0.396    0.016          0.018 -1.692 -1.101 -0.812 -0.558
## theta1.4841 -0.954 0.374    0.015          0.015 -1.684 -1.194 -0.949 -0.713
## theta1.5074  1.134 0.389    0.016          0.017  0.431  0.857  1.137  1.391
## theta1.5495 -2.323 0.427    0.017          0.017 -3.159 -2.598 -2.317 -2.044
## theta1.5710 -0.952 0.386    0.016          0.016 -1.733 -1.235 -0.943 -0.724
## theta1.8294 -0.138 0.356    0.015          0.016 -0.851 -0.374 -0.136  0.104
## theta1.8980  0.685 0.362    0.015          0.015 -0.006  0.443  0.702  0.935
## theta1.9899  0.419 0.360    0.015          0.016 -0.280  0.186  0.417  0.662
## theta2.1121 -1.168 0.364    0.015          0.016 -1.933 -1.396 -1.153 -0.918
## theta2.1154 -1.536 0.351    0.014          0.017 -2.231 -1.778 -1.531 -1.309
## theta2.1222 -0.160 0.333    0.014          0.015 -0.833 -0.376 -0.168  0.061
## theta2.1637 -0.514 0.322    0.013          0.014 -1.140 -0.735 -0.507 -0.302
## theta2.1995 -1.265 0.351    0.014          0.015 -1.910 -1.528 -1.254 -1.032
## theta2.3369  1.246 0.374    0.015          0.018  0.486  1.014  1.250  1.492
## theta2.4004  1.380 0.333    0.014          0.015  0.755  1.142  1.381  1.581
## theta2.4209  0.779 0.341    0.014          0.014  0.203  0.523  0.754  1.015
## theta2.4268  1.206 0.350    0.014          0.016  0.554  0.970  1.205  1.430
## theta2.4841 -0.250 0.324    0.013          0.013 -0.875 -0.472 -0.255 -0.048
## theta2.5074 -1.068 0.342    0.014          0.017 -1.719 -1.289 -1.067 -0.847
## theta2.5495  0.575 0.333    0.014          0.015  0.073  0.326  0.542  0.793
## theta2.5710 -0.834 0.328    0.013          0.015 -1.516 -1.037 -0.837 -0.637
## theta2.8294  0.728 0.318    0.013          0.014  0.148  0.509  0.727  0.939
## theta2.8980  0.893 0.343    0.014          0.018  0.235  0.650  0.922  1.124
## theta2.9899  0.665 0.331    0.014          0.014 -0.032  0.464  0.666  0.892
## gamma.1      0.872 0.126    0.005          0.006  0.622  0.794  0.872  0.964
## gamma.25     0.725 0.140    0.006          0.008  0.438  0.633  0.726  0.829
## gamma.26     0.830 0.121    0.005          0.006  0.588  0.747  0.828  0.917
## gamma.27     0.837 0.121    0.005          0.006  0.591  0.756  0.837  0.928
## gamma.28     0.454 0.135    0.005          0.007  0.197  0.367  0.457  0.544
## gamma.29     0.917 0.137    0.006          0.007  0.648  0.826  0.920  1.009
## gamma.30     0.317 0.150    0.006          0.007  0.032  0.209  0.317  0.418
## gamma.31     0.076 0.071    0.003          0.003  0.002  0.024  0.050  0.112
## gamma.34     0.944 0.143    0.006          0.007  0.677  0.849  0.942  1.041
## gamma.35     1.341 0.136    0.006          0.005  1.050  1.258  1.349  1.444
## gamma.36     1.313 0.128    0.005          0.006  1.049  1.235  1.325  1.407
## gamma.37     1.326 0.116    0.005          0.005  1.068  1.250  1.332  1.402
## gamma.38     1.450 0.086    0.004          0.004  1.243  1.405  1.465  1.517
##              97.5% Model
## theta1.1121  2.095    2d
## theta1.1154  0.758    2d
## theta1.1222  1.212    2d
## theta1.1637  1.294    2d
## theta1.1995  1.621    2d
## theta1.3369  2.587    2d
## theta1.4004 -0.108    2d
## theta1.4209 -0.468    2d
## theta1.4268 -0.180    2d
## theta1.4841 -0.245    2d
## theta1.5074  1.896    2d
## theta1.5495 -1.540    2d
## theta1.5710 -0.175    2d
## theta1.8294  0.514    2d
## theta1.8980  1.369    2d
## theta1.9899  1.102    2d
## theta2.1121 -0.481    2d
## theta2.1154 -0.823    2d
## theta2.1222  0.486    2d
## theta2.1637  0.141    2d
## theta2.1995 -0.595    2d
## theta2.3369  1.964    2d
## theta2.4004  2.030    2d
## theta2.4209  1.443    2d
## theta2.4268  1.912    2d
## theta2.4841  0.404    2d
## theta2.5074 -0.348    2d
## theta2.5495  1.325    2d
## theta2.5710 -0.165    2d
## theta2.8294  1.367    2d
## theta2.8980  1.565    2d
## theta2.9899  1.278    2d
## gamma.1      1.104    2d
## gamma.25     0.987    2d
## gamma.26     1.062    2d
## gamma.27     1.058    2d
## gamma.28     0.722    2d
## gamma.29     1.191    2d
## gamma.30     0.609    2d
## gamma.31     0.261    2d
## gamma.34     1.203    2d
## gamma.35     1.552    2d
## gamma.36     1.538    2d
## gamma.37     1.538    2d
## gamma.38     1.564    2d

See companion page for plots of 2d model “PWC Model 2d MCMCpack CDAT”

https://rpubs.com/steveneheil/mcmc2d

MCMC 2d DP Prior Paired Comparison model

seeds <- c(4610, 5712, 6814) # Set seeds for each chain
chains2dDP <- vector("list", n_chains)

for (i in 1:n_chains) {
  chains2dDP[[i]] <- MCMCpaircompare2dDP(
    pairDataCDAT,
    theta.constraints=list( 
      # Note that "+" = constrained to be positive and "-" = constrained to negative
      # Note that "n" = fixed value if desired
      # Constraining each item to one quadrant based on theory
      '5074' = list(1, "+"),  
      '5074' = list(2, "-"),
      '5495' = list(1, "-"), 
      '5495' = list(2, "+")   
      ),  
    verbose=5000,         # Prints status updates every __ iterations.
    burnin=5000,      # Discards the first __ samples to stabilize.
    mcmc=15000,        # Runs __ iterations for estimation.
    thin=50,          # Keeps every __th sample to reduce autocorrelation.
    seed=seeds[i],        # Ensures reproducibility of results.
    gamma.start=gamma_start, # Uses the initialized respondent perception values.
    alpha=1,           # Sets the Dirichlet Process concentration parameter.
    cluster.max=4,     # Allows up to n clusters of respondents.
    cluster.mcmc=500,  # Runs __ iterations of MCMC for clustering.
    alpha.fixed=FALSE, # Allows the concentration parameter to be learned from the data.
    a0=1, b0=1,        # Hyperparameters for the Dirichlet Process prior.
    theta.start=theta_start2d,
    store.theta=TRUE,  # Saves item-specfic draws.
    store.gamma=TRUE,  # Saves both rater-specific draws.
    tune=.3,           # Sets the tuning parameter for the Metropolis-Hastings step.
    procrustes=FALSE   # Disables Procrustes alignment
    )
}
## 
## 
## MCMCpaircompare2dDP iteration 1 of 20000 
## 
## 
## MCMCpaircompare2dDP iteration 5001 of 20000 
## 
## 
## MCMCpaircompare2dDP iteration 10001 of 20000 
## 
## 
## MCMCpaircompare2dDP iteration 15001 of 20000 
## 
## 
## MCMCpaircompare2dDP iteration 1 of 20000 
## 
## 
## MCMCpaircompare2dDP iteration 5001 of 20000 
## 
## 
## MCMCpaircompare2dDP iteration 10001 of 20000 
## 
## 
## MCMCpaircompare2dDP iteration 15001 of 20000 
## 
## 
## MCMCpaircompare2dDP iteration 1 of 20000 
## 
## 
## MCMCpaircompare2dDP iteration 5001 of 20000 
## 
## 
## MCMCpaircompare2dDP iteration 10001 of 20000 
## 
## 
## MCMCpaircompare2dDP iteration 15001 of 20000
# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list2dDP <- mcmc.list(lapply(chains2dDP, mcmc)) 

# Save object
save(mcmc_list2dDP, file = "Out/mcmc_list2dDP.Rdata")

Check model convergence

# For 2D-DP model
gelman_diag_2dDP <- gelman.diag(mcmc_list2dDP)
max_rhat_2dDP <- max(gelman_diag_2dDP$psrf[, "Point est."])

gelman_diag_2dDP
## Potential scale reduction factors:
## 
##             Point est. Upper C.I.
## theta1.1121      1.003       1.02
## theta1.1154      1.004       1.01
## theta1.1222      0.998       1.00
## theta1.1637      1.004       1.01
## theta1.1995      1.007       1.03
## theta1.3369      1.011       1.04
## theta1.4004      1.010       1.02
## theta1.4209      1.004       1.01
## theta1.4268      1.001       1.01
## theta1.4841      1.004       1.01
## theta1.5074      0.998       1.00
## theta1.5495      1.001       1.01
## theta1.5710      1.005       1.01
## theta1.8294      0.999       1.00
## theta1.8980      1.005       1.02
## theta1.9899      0.999       1.00
## theta2.1121      1.007       1.02
## theta2.1154      1.006       1.02
## theta2.1222      1.001       1.01
## theta2.1637      1.001       1.01
## theta2.1995      0.999       1.00
## theta2.3369      1.012       1.04
## theta2.4004      0.999       1.00
## theta2.4209      1.000       1.00
## theta2.4268      1.001       1.01
## theta2.4841      0.999       1.00
## theta2.5074      1.009       1.03
## theta2.5495      1.004       1.01
## theta2.5710      0.999       1.00
## theta2.8294      1.004       1.01
## theta2.8980      1.001       1.01
## theta2.9899      1.010       1.03
## gamma.1          1.007       1.03
## gamma.25         1.002       1.01
## gamma.26         1.008       1.04
## gamma.27         1.007       1.03
## gamma.28         0.998       1.00
## gamma.29         1.009       1.04
## gamma.30         1.011       1.05
## gamma.31         1.006       1.01
## gamma.34         1.008       1.04
## gamma.35         1.019       1.07
## gamma.36         1.012       1.05
## gamma.37         1.016       1.06
## gamma.38         1.014       1.06
## cluster.1        1.046       1.15
## cluster.25       1.038       1.13
## cluster.26       1.051       1.17
## cluster.27       1.041       1.14
## cluster.28       1.000       1.00
## cluster.29       1.050       1.17
## cluster.30       1.004       1.02
## cluster.31       1.006       1.03
## cluster.34       1.044       1.15
## cluster.35       1.045       1.15
## cluster.36       1.044       1.15
## cluster.37       1.044       1.15
## cluster.38       1.038       1.12
## n.clusters       1.009       1.03
## alpha            1.004       1.02
## 
## Multivariate psrf
## 
## 1.15

Show model convergence statistics of all three models– across all parameters max \(\hat{R}\)

data.frame(
  Model = c("1D", "2D", "2D-DP"),
  Max_Rhat = c(max_rhat_1d, max_rhat_2d, max_rhat_2dDP)
)
##   Model Max_Rhat
## 1    1D 1.088734
## 2    2D 1.031031
## 3 2D-DP 1.050652

Summary of MCMC chain output

summary2dDP <- round(
  cbind(
    as.data.frame(summary(mcmc_list2dDP)$statistics),
    as.data.frame(summary(mcmc_list2dDP)$quantiles)
  ), 3)
summary2dDP$Model <- "2dDP"
save(summary2dDP, file = "Out/summary2dDP.Rdata")
summary2dDP
##               Mean    SD Naive SE Time-series SE   2.5%    25%    50%    75%
## theta1.1121  1.194 0.406    0.014          0.016  0.450  0.912  1.178  1.469
## theta1.1154 -0.012 0.393    0.013          0.014 -0.733 -0.284 -0.029  0.246
## theta1.1222  0.636 0.353    0.012          0.014 -0.036  0.387  0.638  0.874
## theta1.1637  0.608 0.357    0.012          0.013 -0.059  0.370  0.599  0.835
## theta1.1995  0.753 0.414    0.014          0.015  0.040  0.461  0.715  1.007
## theta1.3369  1.579 0.457    0.015          0.016  0.713  1.267  1.570  1.877
## theta1.4004 -0.769 0.430    0.014          0.016 -1.613 -1.058 -0.749 -0.479
## theta1.4209 -1.285 0.419    0.014          0.014 -2.168 -1.560 -1.255 -1.011
## theta1.4268 -0.770 0.440    0.015          0.016 -1.737 -1.047 -0.747 -0.471
## theta1.4841 -0.884 0.368    0.012          0.013 -1.653 -1.125 -0.866 -0.637
## theta1.5074  1.156 0.400    0.013          0.015  0.386  0.881  1.126  1.435
## theta1.5495 -2.339 0.466    0.016          0.016 -3.262 -2.654 -2.323 -2.018
## theta1.5710 -0.864 0.396    0.013          0.013 -1.664 -1.123 -0.863 -0.593
## theta1.8294 -0.184 0.384    0.013          0.014 -0.927 -0.449 -0.165  0.082
## theta1.8980  0.651 0.374    0.012          0.012 -0.120  0.401  0.651  0.914
## theta1.9899  0.471 0.380    0.013          0.013 -0.290  0.222  0.476  0.733
## theta2.1121 -1.101 0.361    0.012          0.014 -1.849 -1.326 -1.090 -0.869
## theta2.1154 -1.554 0.348    0.012          0.012 -2.217 -1.790 -1.557 -1.334
## theta2.1222 -0.193 0.318    0.011          0.011 -0.865 -0.394 -0.184  0.014
## theta2.1637 -0.515 0.328    0.011          0.012 -1.177 -0.734 -0.497 -0.289
## theta2.1995 -1.192 0.355    0.012          0.013 -1.891 -1.424 -1.189 -0.944
## theta2.3369  1.315 0.410    0.014          0.015  0.499  1.026  1.325  1.597
## theta2.4004  1.280 0.336    0.011          0.012  0.658  1.061  1.270  1.488
## theta2.4209  0.762 0.346    0.012          0.012  0.160  0.512  0.730  0.995
## theta2.4268  1.083 0.323    0.011          0.012  0.480  0.869  1.069  1.295
## theta2.4841 -0.339 0.338    0.011          0.013 -0.993 -0.576 -0.331 -0.105
## theta2.5074 -1.071 0.353    0.012          0.014 -1.732 -1.317 -1.062 -0.822
## theta2.5495  0.518 0.342    0.011          0.011  0.025  0.246  0.470  0.747
## theta2.5710 -0.929 0.355    0.012          0.013 -1.615 -1.167 -0.946 -0.679
## theta2.8294  0.737 0.327    0.011          0.010  0.110  0.512  0.730  0.959
## theta2.8980  0.904 0.340    0.011          0.012  0.254  0.658  0.895  1.136
## theta2.9899  0.614 0.332    0.011          0.012 -0.048  0.396  0.621  0.846
## gamma.1      0.854 0.133    0.004          0.006  0.592  0.762  0.857  0.949
## gamma.25     0.838 0.144    0.005          0.006  0.542  0.743  0.844  0.940
## gamma.26     0.854 0.133    0.004          0.006  0.600  0.761  0.855  0.949
## gamma.27     0.854 0.133    0.004          0.006  0.593  0.763  0.859  0.949
## gamma.28     0.378 0.213    0.007          0.008  0.023  0.196  0.382  0.540
## gamma.29     0.856 0.133    0.004          0.006  0.605  0.763  0.860  0.950
## gamma.30     0.290 0.204    0.007          0.008  0.014  0.108  0.249  0.462
## gamma.31     0.129 0.102    0.003          0.003  0.006  0.046  0.106  0.186
## gamma.34     0.857 0.135    0.004          0.006  0.593  0.765  0.861  0.951
## gamma.35     1.420 0.111    0.004          0.004  1.153  1.360  1.442  1.506
## gamma.36     1.418 0.111    0.004          0.004  1.150  1.354  1.439  1.501
## gamma.37     1.417 0.112    0.004          0.004  1.148  1.354  1.439  1.502
## gamma.38     1.424 0.107    0.004          0.004  1.159  1.363  1.445  1.507
## cluster.1    2.388 1.180    0.039          0.114  1.000  1.000  2.000  4.000
## cluster.25   2.401 1.171    0.039          0.101  1.000  1.000  2.000  4.000
## cluster.26   2.381 1.185    0.039          0.114  1.000  1.000  2.000  4.000
## cluster.27   2.396 1.180    0.039          0.111  1.000  1.000  2.000  4.000
## cluster.28   2.532 1.063    0.035          0.056  1.000  2.000  3.000  3.000
## cluster.29   2.376 1.171    0.039          0.108  1.000  1.000  2.000  3.250
## cluster.30   2.559 1.058    0.035          0.066  1.000  2.000  3.000  3.000
## cluster.31   2.634 1.071    0.036          0.071  1.000  2.000  3.000  4.000
## cluster.34   2.386 1.184    0.039          0.108  1.000  1.000  2.000  4.000
## cluster.35   2.371 1.136    0.038          0.106  1.000  1.000  2.000  3.000
## cluster.36   2.374 1.132    0.038          0.089  1.000  1.000  2.000  3.000
## cluster.37   2.373 1.135    0.038          0.109  1.000  1.000  2.000  3.000
## cluster.38   2.399 1.139    0.038          0.090  1.000  1.000  2.000  3.000
## n.clusters   3.790 0.408    0.014          0.015  3.000  4.000  4.000  4.000
## alpha        1.625 0.979    0.033          0.037  0.328  0.915  1.430  2.071
##              97.5% Model
## theta1.1121  1.970  2dDP
## theta1.1154  0.777  2dDP
## theta1.1222  1.283  2dDP
## theta1.1637  1.361  2dDP
## theta1.1995  1.669  2dDP
## theta1.3369  2.484  2dDP
## theta1.4004  0.012  2dDP
## theta1.4209 -0.495  2dDP
## theta1.4268  0.052  2dDP
## theta1.4841 -0.179  2dDP
## theta1.5074  1.956  2dDP
## theta1.5495 -1.467  2dDP
## theta1.5710 -0.086  2dDP
## theta1.8294  0.547  2dDP
## theta1.8980  1.345  2dDP
## theta1.9899  1.153  2dDP
## theta2.1121 -0.456  2dDP
## theta2.1154 -0.901  2dDP
## theta2.1222  0.417  2dDP
## theta2.1637  0.109  2dDP
## theta2.1995 -0.516  2dDP
## theta2.3369  2.128  2dDP
## theta2.4004  1.987  2dDP
## theta2.4209  1.489  2dDP
## theta2.4268  1.752  2dDP
## theta2.4841  0.342  2dDP
## theta2.5074 -0.420  2dDP
## theta2.5495  1.316  2dDP
## theta2.5710 -0.245  2dDP
## theta2.8294  1.410  2dDP
## theta2.8980  1.564  2dDP
## theta2.9899  1.222  2dDP
## gamma.1      1.099  2dDP
## gamma.25     1.099  2dDP
## gamma.26     1.099  2dDP
## gamma.27     1.099  2dDP
## gamma.28     0.775  2dDP
## gamma.29     1.100  2dDP
## gamma.30     0.686  2dDP
## gamma.31     0.368  2dDP
## gamma.34     1.100  2dDP
## gamma.35     1.564  2dDP
## gamma.36     1.563  2dDP
## gamma.37     1.563  2dDP
## gamma.38     1.564  2dDP
## cluster.1    4.000  2dDP
## cluster.25   4.000  2dDP
## cluster.26   4.000  2dDP
## cluster.27   4.000  2dDP
## cluster.28   4.000  2dDP
## cluster.29   4.000  2dDP
## cluster.30   4.000  2dDP
## cluster.31   4.000  2dDP
## cluster.34   4.000  2dDP
## cluster.35   4.000  2dDP
## cluster.36   4.000  2dDP
## cluster.37   4.000  2dDP
## cluster.38   4.000  2dDP
## n.clusters   4.000  2dDP
## alpha        4.047  2dDP

See companion page for plots of 2dDP model “PWC Model 2dDP MCMCpack CDAT”

https://rpubs.com/steveneheil/mcmc2dDP

Model Checking

From Levy & Mislevy (2016) Ch. 10.2.4 & 10.2.5Posterior Predictive Model Checking

  1. What is the feature of the data that is of interest?

The common feature across the 2-dimensional models are (a) posterior thetas of each item’s location in 2-dimensional latent space and precision of that distribution; and (b) the posterior gammas of each rater.

The common feature of all models is the choice of one item over another based on th model-implied latent traits.

  1. What do the models imply about how thetas and gammas ought to be?

The prediction of choices based on model-implied latent variables should be close obserbed choices.

  1. How do choices of items found in the data compare to what the model implies about the raters’ choices?

The selection of the left item (j1) over the second (j2) is the test statistic to be examined with Posterior Predictive Model Checking.

PPMC with proportion of choice of item in a comparison as test statistic

(See Levy & Mislevy, 2016, pp. 237-239). ### 2d models with both thetas and a gamma governing rater preference For each posterior sample of \(\theta_{1}\), \(\theta_{2}\), and \(\gamma\), simulate responses.

A function to simulate responses using 2d model

First, a function that will - combine all chains into one list of samples - iterate 200 sets of simulated choices based on comparisons that match the rater and theta IDs of the observed comparisons - for each iteration draw one value from all chains of \(\theta_{1}\), \(\theta_{2}\), and \(\gamma\). - for each comparison of the 960 observations, extract the two items and the rater for a simulated choice. - find the difference between the two items’ thetas according to the model - add the two thetas in proportion to the gamma for the variable \(\eta\) - use eta as input to the probit link function to find the probability of choosing j1 over j2 - make a random draw of 1 or 0 governed by the binomial distribution and probability computed from eta - add the 1 or 0 to a matrix of choices of j1 over j2 - return the simulated choices for the 960 comparisons 200 times

simulate_pp_data_mcmclist2d <- function(mcmc_list, pair_data) {
  # Combine chains into one matrix
  all_samples <- do.call(rbind, mcmc_list)
  n_obs <- nrow(pair_data)
  n_rep = 200
  sim_choices <- matrix(NA, nrow = n_rep, ncol = n_obs)
  
  # Identify column names for theta1, theta2, gamma
  theta1_names <- grep("^theta1\\.", colnames(all_samples), value = TRUE)
  theta2_names <- grep("^theta2\\.", colnames(all_samples), value = TRUE)
  gamma_names  <- grep("^gamma\\.", colnames(all_samples), value = TRUE)

  for (r in 1:n_rep) {                     # Row by row iteration
    s <- sample(1:nrow(all_samples), 1)    # Draw from any chain
    theta1 <- all_samples[s, theta1_names] # Draw one value for theta1
    theta2 <- all_samples[s, theta2_names] # Draw one value for theta2
    gamma  <- all_samples[s, gamma_names]  # Draw one value for gamma

    for (i in 1:n_obs) {                        # Column by column iteration
      j1 <- as.character(pair_data$itemj1[i])   # Locate the first item number for that row from the observed
      j2 <- as.character(pair_data$itemj2[i])   # Locate the second item number for that row from the observed
      rater <- as.character(pair_data$rater[i]) # Locate the rater for that row from the observed
      diff1 <- theta1[paste0("theta1.", j1)] - theta1[paste0("theta1.", j2)] # Find the model-implied diff between sampled theta1 of 2 items
      diff2 <- theta2[paste0("theta2.", j1)] - theta2[paste0("theta2.", j2)] # Find the model-implied diff between sampled theta2 of 2 items
      g <- gamma[paste0("gamma.", rater)]       # Find the model-implied sampled gamma of the rater
      # eta will be the input to the probit link function
      eta <- g * diff2 + (1 - g) * diff1 # Add thetas in proportion to gamma, representing how much more favorable j1 is than j2
      # If eta is negative, then j2 is preferrable. If close to 0, then it's a toss-up.
      p <- pnorm(eta) # transform eta using the probit link to the probability j1 is chosen over j2.
      # If p is <.5 preference for j2, p = .5 no preference, p > .5 preference for j1.
      # Generate 1 draw of 0 or 1 based on p, and add it to the sim_choices matrix
      sim_choices[r, i] <- rbinom(n = 1, size = 1, p) 
    }
  }
  # return a set of binary choices where 1 is a choice of j1 and 0 is a choice of j2
  return(sim_choices) 
} 

Run the function to simulate 200 sets of observed choices for each 2d model Each data set is a row in a matrix 960 x 200

sim_2d <- simulate_pp_data_mcmclist2d(mcmc_list2d, pairDataCDAT)
sim_2dDP <- simulate_pp_data_mcmclist2d(mcmc_list2dDP, pairDataCDAT)

Convert observed item ID choices to 1/0 for comparison to simulated data. If itemj1 is chosen, then 1, if j2 is chosen, then 0.

#If choice is first item, 1, if choice is second item, 0
observed_binary <- ifelse(pairDataCDAT$choice == pairDataCDAT$itemj1, 1,
                          ifelse(pairDataCDAT$choice == pairDataCDAT$itemj2, 0, NA))

A function to compute the p-value of observed and simulated choices - given the binary version of the observed data, find the mean (proportion of j1 choices) for each simulated data set - compute the proportion of simulated data sets that have greater than or equal to the proportion of j1 choices in observed

compute_ppp_proportion <- function(observed_choices, sim_choices_matrix) {
  obs_prop <- mean(observed_choices)
  
  # The mean of the row is the proportion of same choice for that simulated data set
  sim_prop <- rowMeans(sim_choices_matrix) 
  sim_correct <- rowMeans(sim_choices_matrix == matrix(observed_choices,
                                                       nrow = nrow(sim_choices_matrix), 
                                                       ncol = length(observed_choices), 
                                                       byrow = TRUE))
  
  # Posterior predictive p-value
  ppp <- mean(sim_prop >= obs_prop)
  
  list(
    observed = obs_prop,
    simulated = sim_prop,
    simulated_correct = sim_correct,
    ppp = ppp
  )
}

Compute Posterior Predictive P-value (PPP) for each 2d model

2d with normal prior:

ppp2d <- compute_ppp_proportion(observed_binary, sim_2d)
ppp2d
## $observed
## [1] 0.5145833
## 
## $simulated
##   [1] 0.5156250 0.5281250 0.5156250 0.5197917 0.4937500 0.5250000 0.5114583
##   [8] 0.5020833 0.5072917 0.5250000 0.4916667 0.5114583 0.5135417 0.5312500
##  [15] 0.5031250 0.5385417 0.5114583 0.5125000 0.5197917 0.5270833 0.5322917
##  [22] 0.5197917 0.5208333 0.5145833 0.5197917 0.5041667 0.5125000 0.5354167
##  [29] 0.5343750 0.5218750 0.5500000 0.5312500 0.5187500 0.5062500 0.5010417
##  [36] 0.5052083 0.5458333 0.5218750 0.5270833 0.5020833 0.5187500 0.4979167
##  [43] 0.5166667 0.5270833 0.5239583 0.5114583 0.5322917 0.5177083 0.5031250
##  [50] 0.5187500 0.5197917 0.5020833 0.5218750 0.4854167 0.5145833 0.5218750
##  [57] 0.5458333 0.5052083 0.5510417 0.5218750 0.4968750 0.5239583 0.5250000
##  [64] 0.5375000 0.5270833 0.5281250 0.5114583 0.4916667 0.5197917 0.5281250
##  [71] 0.5437500 0.5104167 0.5229167 0.5114583 0.5364583 0.5270833 0.5031250
##  [78] 0.5156250 0.5406250 0.4979167 0.5135417 0.5218750 0.5177083 0.5333333
##  [85] 0.4979167 0.5156250 0.5239583 0.5218750 0.5125000 0.5041667 0.5333333
##  [92] 0.5260417 0.5125000 0.5322917 0.5218750 0.5229167 0.5458333 0.5375000
##  [99] 0.4979167 0.5177083 0.4968750 0.4895833 0.5312500 0.5281250 0.5333333
## [106] 0.5000000 0.5187500 0.5281250 0.5093750 0.5364583 0.5239583 0.5156250
## [113] 0.5260417 0.5322917 0.5187500 0.5041667 0.5052083 0.5312500 0.4958333
## [120] 0.5145833 0.5218750 0.5250000 0.5052083 0.5062500 0.4927083 0.4968750
## [127] 0.4979167 0.5083333 0.5187500 0.5104167 0.5395833 0.5135417 0.5250000
## [134] 0.5333333 0.5072917 0.5229167 0.5281250 0.5312500 0.5406250 0.5135417
## [141] 0.5510417 0.5166667 0.5156250 0.5302083 0.5041667 0.5208333 0.4989583
## [148] 0.5385417 0.5135417 0.4906250 0.5250000 0.5052083 0.5270833 0.5041667
## [155] 0.5281250 0.5145833 0.5125000 0.5187500 0.4739583 0.5125000 0.5343750
## [162] 0.5281250 0.5197917 0.5291667 0.5218750 0.5270833 0.5510417 0.5135417
## [169] 0.5135417 0.5093750 0.5020833 0.5135417 0.5375000 0.5354167 0.5354167
## [176] 0.4822917 0.5135417 0.5145833 0.5197917 0.4979167 0.5135417 0.5364583
## [183] 0.5302083 0.5395833 0.5041667 0.5250000 0.5208333 0.5125000 0.4854167
## [190] 0.4989583 0.4989583 0.5145833 0.5000000 0.4854167 0.4989583 0.5041667
## [197] 0.5125000 0.5197917 0.5114583 0.5239583
## 
## $simulated_correct
##   [1] 0.6385417 0.6572917 0.6343750 0.6197917 0.6458333 0.6416667 0.6177083
##   [8] 0.6500000 0.6260417 0.6333333 0.6333333 0.6697917 0.6510417 0.6375000
##  [15] 0.6322917 0.6385417 0.6489583 0.6645833 0.6489583 0.6666667 0.6427083
##  [22] 0.6281250 0.6458333 0.6541667 0.6447917 0.6416667 0.6416667 0.6437500
##  [29] 0.6864583 0.6322917 0.6520833 0.6520833 0.6291667 0.6354167 0.6114583
##  [36] 0.6489583 0.6291667 0.6385417 0.6375000 0.6395833 0.6187500 0.6479167
##  [43] 0.6625000 0.6500000 0.6697917 0.6489583 0.6489583 0.6531250 0.6302083
##  [50] 0.6541667 0.6572917 0.6333333 0.6364583 0.6354167 0.6354167 0.6385417
##  [57] 0.6312500 0.6406250 0.6468750 0.6614583 0.6572917 0.6281250 0.6083333
##  [64] 0.6437500 0.6541667 0.6614583 0.6427083 0.6270833 0.6197917 0.6406250
##  [71] 0.6520833 0.6416667 0.6645833 0.6322917 0.6593750 0.6562500 0.6281250
##  [78] 0.6531250 0.6156250 0.6312500 0.6239583 0.6385417 0.6302083 0.6500000
##  [85] 0.6270833 0.6593750 0.5989583 0.6343750 0.6729167 0.6604167 0.6500000
##  [92] 0.6572917 0.6541667 0.6447917 0.6614583 0.6520833 0.5979167 0.6354167
##  [99] 0.6395833 0.6052083 0.6447917 0.6458333 0.6395833 0.6614583 0.6416667
## [106] 0.6333333 0.6458333 0.6385417 0.6552083 0.6552083 0.6593750 0.6260417
## [113] 0.6427083 0.6385417 0.6562500 0.6312500 0.6302083 0.6541667 0.6687500
## [120] 0.6500000 0.6281250 0.6520833 0.6239583 0.6437500 0.6447917 0.6552083
## [127] 0.6479167 0.6437500 0.6666667 0.6479167 0.6291667 0.6697917 0.6395833
## [134] 0.6375000 0.6385417 0.6458333 0.6510417 0.6500000 0.6031250 0.6468750
## [141] 0.6052083 0.6354167 0.6197917 0.6197917 0.6375000 0.6687500 0.6427083
## [148] 0.6614583 0.6406250 0.6489583 0.6375000 0.6364583 0.6291667 0.6375000
## [155] 0.6489583 0.6187500 0.6354167 0.6625000 0.6406250 0.6604167 0.6489583
## [162] 0.6364583 0.6572917 0.6208333 0.6239583 0.6520833 0.6072917 0.6427083
## [169] 0.6364583 0.6593750 0.6562500 0.6510417 0.6208333 0.6479167 0.6541667
## [176] 0.6572917 0.6156250 0.6500000 0.6385417 0.6208333 0.6552083 0.6614583
## [183] 0.6281250 0.6187500 0.6562500 0.6229167 0.6208333 0.6458333 0.6250000
## [190] 0.6406250 0.6572917 0.6645833 0.6625000 0.6312500 0.5989583 0.6729167
## [197] 0.6479167 0.6302083 0.6406250 0.6468750
## 
## $ppp
## [1] 0.595
describe(ppp2d$simulated_correct)
##    vars   n mean   sd median trimmed  mad min  max range  skew kurtosis se
## X1    1 200 0.64 0.02   0.64    0.64 0.02 0.6 0.69  0.09 -0.36     0.14  0

2d model with DP prior and clusters:

ppp2dDP <- compute_ppp_proportion(observed_binary, sim_2dDP)
ppp2dDP
## $observed
## [1] 0.5145833
## 
## $simulated
##   [1] 0.4989583 0.5156250 0.5364583 0.5104167 0.5333333 0.5145833 0.5312500
##   [8] 0.5229167 0.5250000 0.5187500 0.5333333 0.5125000 0.5072917 0.5218750
##  [15] 0.4927083 0.5406250 0.5395833 0.5145833 0.5052083 0.5302083 0.5333333
##  [22] 0.5135417 0.5020833 0.5156250 0.5197917 0.5031250 0.5250000 0.5354167
##  [29] 0.5031250 0.5239583 0.5114583 0.5166667 0.5312500 0.5052083 0.5270833
##  [36] 0.5166667 0.5072917 0.5093750 0.4895833 0.5364583 0.5270833 0.5145833
##  [43] 0.5312500 0.5260417 0.5239583 0.5177083 0.5145833 0.5270833 0.5281250
##  [50] 0.5239583 0.5239583 0.5145833 0.4927083 0.5187500 0.5343750 0.5062500
##  [57] 0.5270833 0.5125000 0.5364583 0.5177083 0.5229167 0.4875000 0.5114583
##  [64] 0.5447917 0.5156250 0.5156250 0.5093750 0.5156250 0.5156250 0.5114583
##  [71] 0.5218750 0.5093750 0.5250000 0.5104167 0.5010417 0.5166667 0.5020833
##  [78] 0.5260417 0.5197917 0.5135417 0.5291667 0.5062500 0.5229167 0.5187500
##  [85] 0.5177083 0.5531250 0.5156250 0.5218750 0.5270833 0.5375000 0.5114583
##  [92] 0.5416667 0.4989583 0.5041667 0.5125000 0.5229167 0.5260417 0.5322917
##  [99] 0.5135417 0.5072917 0.5072917 0.5114583 0.5177083 0.5145833 0.5083333
## [106] 0.5031250 0.5333333 0.5187500 0.5260417 0.5104167 0.5208333 0.5114583
## [113] 0.5114583 0.5177083 0.5156250 0.5437500 0.4875000 0.5385417 0.5312500
## [120] 0.5322917 0.5635417 0.5156250 0.5010417 0.5093750 0.5125000 0.5552083
## [127] 0.5281250 0.5333333 0.5166667 0.5250000 0.5291667 0.5177083 0.5447917
## [134] 0.5010417 0.5114583 0.5312500 0.5229167 0.5083333 0.5270833 0.5062500
## [141] 0.4916667 0.5166667 0.5093750 0.5000000 0.5135417 0.5218750 0.4968750
## [148] 0.5322917 0.5239583 0.5229167 0.5114583 0.5218750 0.5177083 0.5000000
## [155] 0.5114583 0.5239583 0.5145833 0.5281250 0.5166667 0.5156250 0.5437500
## [162] 0.5010417 0.5218750 0.5281250 0.5250000 0.5239583 0.5385417 0.4791667
## [169] 0.5156250 0.5541667 0.5187500 0.5416667 0.5281250 0.5291667 0.5427083
## [176] 0.5114583 0.5343750 0.5218750 0.5135417 0.5291667 0.5291667 0.5135417
## [183] 0.5010417 0.5062500 0.4989583 0.5114583 0.5187500 0.5364583 0.5083333
## [190] 0.5333333 0.4937500 0.5291667 0.5250000 0.5281250 0.4916667 0.5166667
## [197] 0.5156250 0.5250000 0.5145833 0.5250000
## 
## $simulated_correct
##   [1] 0.6364583 0.6447917 0.6677083 0.6479167 0.6291667 0.6312500 0.6208333
##   [8] 0.6333333 0.6145833 0.6666667 0.6208333 0.6583333 0.6614583 0.6531250
##  [15] 0.6531250 0.6468750 0.6500000 0.6354167 0.6489583 0.6447917 0.6791667
##  [22] 0.6510417 0.6229167 0.6593750 0.6510417 0.6614583 0.6500000 0.6312500
##  [29] 0.6177083 0.6281250 0.6010417 0.6312500 0.6354167 0.6531250 0.6729167
##  [36] 0.6520833 0.6385417 0.6447917 0.6729167 0.6468750 0.6395833 0.6291667
##  [43] 0.6458333 0.6364583 0.6656250 0.6427083 0.6604167 0.6708333 0.6343750
##  [50] 0.6114583 0.6552083 0.6250000 0.6656250 0.6250000 0.6385417 0.6208333
##  [57] 0.6541667 0.6458333 0.6260417 0.6552083 0.6541667 0.6500000 0.6343750
##  [64] 0.6343750 0.6697917 0.6197917 0.6364583 0.6572917 0.6281250 0.6531250
##  [71] 0.6385417 0.6364583 0.6708333 0.6250000 0.6385417 0.6666667 0.6583333
##  [78] 0.6552083 0.6239583 0.6322917 0.6416667 0.6500000 0.6229167 0.6458333
##  [85] 0.6385417 0.6510417 0.6385417 0.6656250 0.6187500 0.6625000 0.6656250
##  [92] 0.6479167 0.6531250 0.6437500 0.6583333 0.6250000 0.6385417 0.6385417
##  [99] 0.6343750 0.6197917 0.6427083 0.6593750 0.6385417 0.6312500 0.6416667
## [106] 0.6614583 0.6312500 0.6791667 0.6364583 0.6791667 0.6583333 0.6843750
## [113] 0.6406250 0.6364583 0.6531250 0.6687500 0.6479167 0.6614583 0.6375000
## [120] 0.6302083 0.6468750 0.6510417 0.6093750 0.6302083 0.6583333 0.6385417
## [127] 0.6177083 0.6416667 0.6604167 0.6395833 0.6416667 0.6302083 0.6510417
## [134] 0.6302083 0.6572917 0.6125000 0.6458333 0.6500000 0.6458333 0.6229167
## [141] 0.6354167 0.6458333 0.6260417 0.6229167 0.6239583 0.6302083 0.6302083
## [148] 0.6385417 0.6697917 0.6458333 0.6531250 0.6197917 0.6427083 0.6333333
## [155] 0.6510417 0.6427083 0.6562500 0.6343750 0.6437500 0.6635417 0.6416667
## [162] 0.6593750 0.6552083 0.6531250 0.6562500 0.6239583 0.6239583 0.6250000
## [169] 0.6406250 0.6583333 0.6458333 0.6562500 0.6489583 0.6458333 0.6260417
## [176] 0.6239583 0.6427083 0.6572917 0.6427083 0.6729167 0.6250000 0.6489583
## [183] 0.6447917 0.6312500 0.6781250 0.6197917 0.6541667 0.6489583 0.6229167
## [190] 0.6437500 0.6687500 0.6437500 0.6395833 0.6572917 0.6312500 0.6562500
## [197] 0.6406250 0.6562500 0.6437500 0.6875000
## 
## $ppp
## [1] 0.655
describe(ppp2dDP$simulated_correct)
##    vars   n mean   sd median trimmed  mad min  max range skew kurtosis se
## X1    1 200 0.64 0.02   0.64    0.64 0.02 0.6 0.69  0.09 0.13    -0.31  0

1d model with a single theta and alpha governing choices

Let’s check the prediction of choices using the 1d model next. For each posterior sample of \(\theta\) and \(\alpha\), simulate responses.

A function to simulate responses using 1d model

simulate_pp_data_mcmclist1d <- function(mcmc_list, pair_data) {
  # Combine chains into one matrix
  all_samples <- do.call(rbind, mcmc_list)
  n_obs <- nrow(pair_data)
  n_rep = 200
  sim_choices <- matrix(NA, nrow = n_rep, ncol = n_obs)
  
  # Identify column names for theta1, theta2, gamma
  theta_names <- grep("^theta\\.", colnames(all_samples), value = TRUE)
  alpha_names  <- grep("^alpha\\.", colnames(all_samples), value = TRUE)
  
  for (r in 1:n_rep) {
    s <- sample(1:nrow(all_samples), 1)  # Select one draw from any chain in combined samples
    theta <- all_samples[s, theta_names] # Draw one value for theta
    alpha  <- all_samples[s, alpha_names] # Draw one value for alpha

    for (i in 1:n_obs) {
      j1 <- as.character(pair_data$itemj1[i])
      j2 <- as.character(pair_data$itemj2[i])
      rater <- as.character(pair_data$rater[i])

      # Safely extract theta/gamma values using column names

      diff <- theta[paste0("theta.", j1)] - theta[paste0("theta.", j2)]
      a <- alpha[paste0("alpha.", rater)]

      eta <- a * diff # Add thetas
      p <- pnorm(eta) # probit link is a normal distribution
      sim_choices[r, i] <- rbinom(1, 1, p)
    }
  }
  return(sim_choices)
}

Run the function for 1d model

sim_1d <- simulate_pp_data_mcmclist1d(mcmc_list1d, pairDataCDAT)

Compute Posterior Predictive P-value (PPP) for 1d model

ppp1d <- compute_ppp_proportion(observed_binary, sim_1d)
ppp1d
## $observed
## [1] 0.5145833
## 
## $simulated
##   [1] 0.4927083 0.5135417 0.4718750 0.4927083 0.5000000 0.4906250 0.5000000
##   [8] 0.4958333 0.5104167 0.5010417 0.4802083 0.5020833 0.4875000 0.5135417
##  [15] 0.5062500 0.5250000 0.4833333 0.4843750 0.5218750 0.5010417 0.5145833
##  [22] 0.4916667 0.5010417 0.5010417 0.5114583 0.5083333 0.5145833 0.5020833
##  [29] 0.4989583 0.4947917 0.5343750 0.4958333 0.5020833 0.5083333 0.4593750
##  [36] 0.4937500 0.5114583 0.5291667 0.4979167 0.4802083 0.5093750 0.4812500
##  [43] 0.4864583 0.5145833 0.5166667 0.5218750 0.4875000 0.5281250 0.5062500
##  [50] 0.5031250 0.5208333 0.4958333 0.4958333 0.5135417 0.5229167 0.5000000
##  [57] 0.4854167 0.5104167 0.5031250 0.4833333 0.4885417 0.5260417 0.5052083
##  [64] 0.5020833 0.5083333 0.5072917 0.4916667 0.5104167 0.5145833 0.5281250
##  [71] 0.5302083 0.5145833 0.4958333 0.4895833 0.5302083 0.5020833 0.4833333
##  [78] 0.4947917 0.5000000 0.4927083 0.4906250 0.5072917 0.5239583 0.4958333
##  [85] 0.5041667 0.5000000 0.4947917 0.4822917 0.4916667 0.4885417 0.4947917
##  [92] 0.5218750 0.4927083 0.5177083 0.5229167 0.4854167 0.5041667 0.4822917
##  [99] 0.5031250 0.4718750 0.5010417 0.4916667 0.5322917 0.4770833 0.5156250
## [106] 0.5197917 0.5177083 0.4906250 0.5062500 0.4635417 0.5208333 0.5010417
## [113] 0.5208333 0.5135417 0.4958333 0.5083333 0.4822917 0.4708333 0.4979167
## [120] 0.5145833 0.4875000 0.4750000 0.5052083 0.4979167 0.4781250 0.4958333
## [127] 0.5020833 0.4885417 0.5125000 0.4979167 0.5333333 0.5052083 0.5270833
## [134] 0.5156250 0.5208333 0.5281250 0.5239583 0.5093750 0.5145833 0.5145833
## [141] 0.5125000 0.5187500 0.5093750 0.4968750 0.5052083 0.4916667 0.4979167
## [148] 0.4979167 0.5041667 0.5083333 0.4895833 0.4750000 0.5072917 0.5125000
## [155] 0.4937500 0.5072917 0.5187500 0.4937500 0.4906250 0.5000000 0.5395833
## [162] 0.4916667 0.4927083 0.5093750 0.4937500 0.5187500 0.4916667 0.4958333
## [169] 0.4885417 0.4979167 0.5031250 0.5020833 0.4989583 0.5135417 0.5104167
## [176] 0.4708333 0.4927083 0.4864583 0.4812500 0.5052083 0.4906250 0.4927083
## [183] 0.4979167 0.5250000 0.5218750 0.5177083 0.5052083 0.5145833 0.5135417
## [190] 0.5031250 0.5114583 0.5083333 0.4791667 0.4937500 0.5260417 0.5177083
## [197] 0.4875000 0.5291667 0.5229167 0.5041667
## 
## $simulated_correct
##   [1] 0.6197917 0.6197917 0.6010417 0.6260417 0.6145833 0.6114583 0.6229167
##   [8] 0.6437500 0.6520833 0.6468750 0.6343750 0.6354167 0.6104167 0.6302083
##  [15] 0.6354167 0.6312500 0.6125000 0.6156250 0.6218750 0.6427083 0.6229167
##  [22] 0.6562500 0.6156250 0.5739583 0.6343750 0.6125000 0.6437500 0.6083333
##  [29] 0.6218750 0.6031250 0.6322917 0.6145833 0.6062500 0.6416667 0.6281250
##  [36] 0.6270833 0.6197917 0.6375000 0.6458333 0.6302083 0.6385417 0.6270833
##  [43] 0.6239583 0.6104167 0.6500000 0.6302083 0.6333333 0.6197917 0.6270833
##  [50] 0.6364583 0.6354167 0.6541667 0.6104167 0.6239583 0.6437500 0.6312500
##  [57] 0.6062500 0.6166667 0.6114583 0.6270833 0.6197917 0.6406250 0.6343750
##  [64] 0.6416667 0.6270833 0.6052083 0.6291667 0.6416667 0.6291667 0.6531250
##  [71] 0.6302083 0.6395833 0.6458333 0.5875000 0.6260417 0.6208333 0.6208333
##  [78] 0.5968750 0.6562500 0.6593750 0.6135417 0.6177083 0.6385417 0.6500000
##  [85] 0.6333333 0.6437500 0.6302083 0.6031250 0.6208333 0.6260417 0.6281250
##  [92] 0.6177083 0.6197917 0.6385417 0.6583333 0.6083333 0.6125000 0.6260417
##  [99] 0.6218750 0.6197917 0.6614583 0.6458333 0.6364583 0.6437500 0.6197917
## [106] 0.6093750 0.6427083 0.6197917 0.6416667 0.6364583 0.6020833 0.6239583
## [113] 0.6104167 0.6322917 0.6291667 0.6375000 0.6385417 0.6062500 0.6229167
## [120] 0.6375000 0.6208333 0.6291667 0.6322917 0.6270833 0.6302083 0.6125000
## [127] 0.6270833 0.6218750 0.6395833 0.6229167 0.6583333 0.6114583 0.6375000
## [134] 0.6114583 0.6125000 0.6427083 0.6489583 0.6281250 0.6270833 0.6312500
## [141] 0.6145833 0.6166667 0.6260417 0.6197917 0.6281250 0.6354167 0.6458333
## [148] 0.6041667 0.6312500 0.6270833 0.6395833 0.6437500 0.6302083 0.6270833
## [155] 0.6541667 0.6322917 0.6291667 0.6375000 0.6322917 0.6208333 0.6395833
## [162] 0.6187500 0.6260417 0.6302083 0.6333333 0.6520833 0.6479167 0.6187500
## [169] 0.6031250 0.6250000 0.6156250 0.6208333 0.6406250 0.6343750 0.6104167
## [176] 0.6270833 0.6281250 0.6197917 0.6416667 0.6427083 0.6531250 0.6052083
## [183] 0.6416667 0.6562500 0.6531250 0.6197917 0.6406250 0.6354167 0.6156250
## [190] 0.6281250 0.6135417 0.6395833 0.6333333 0.6333333 0.6468750 0.6302083
## [197] 0.6125000 0.6020833 0.6104167 0.5895833
## 
## $ppp
## [1] 0.245
describe(ppp1d$simulated_correct)
##    vars   n mean   sd median trimmed  mad  min  max range  skew kurtosis se
## X1    1 200 0.63 0.01   0.63    0.63 0.01 0.57 0.66  0.09 -0.23     0.21  0

Plot the observed proportion of selection of itemj1 in each comparison to the distribution of simulated selection of itemj1 in each comparison.

# Density of simulated proportions
dens <- density(ppp1d$simulated)

# Plot
plot(dens, main = "Posterior Predictive Density (1D Model)",
     xlab = "Simulated Proportion of itemj1 Chosen",
     ylab = "Density", lwd = 2, col = "steelblue")

# Add observed value as a vertical red line
abline(v = ppp1d$observed, col = "red", lwd = 2, lty = 2)

# Add legend
legend("topright", legend = "Observed Proportion", col = "red", lty = 2, lwd = 2)

Plot simulated and observed proportions

Plot the density of the 2d model simulated proportions and the observed proportion (see Levy & Mislevy, 2016, pp. 238-239)

# Density of simulated proportions
dens <- density(ppp2d$simulated)

# Plot
plot(dens, main = "Posterior Predictive Density (2D Normal Prior Model)",
     xlab = "Simulated Proportion of itemj1 Chosen",
     ylab = "Density", lwd = 2, col = "steelblue")

# Add observed value as a vertical red line
abline(v = ppp2d$observed, col = "red", lwd = 2, lty = 2)

# Add legend
legend("topright", legend = "Observed Proportion", col = "red", lty = 2, lwd = 2)

Plot the density of the 2dDP model simulated proportions and the observed proportion

# Density of simulated proportions
dens <- density(ppp2dDP$simulated)

# Plot
plot(dens, main = "Posterior Predictive Density (2D DP Prior Model)",
     xlab = "Simulated Proportion of itemj1 Chosen",
     ylab = "Density", lwd = 2, col = "steelblue")

# Add observed value as a vertical red line
abline(v = ppp2dDP$observed, col = "red", lwd = 2, lty = 2)

# Add legend
legend("topright", legend = "Observed Proportion", col = "red", lty = 2, lwd = 2)

Both models have similar PPP. The distribtion of simulated choices’ proportion of j1 preference is nicely amassed around the observed proportion of j1 preferences, based on the rater’s model-implied gamma and the items’ model-implied theta1 and theta2 for that same combination.

This suggests that both of the 2d models produce simulated data that sufficiently resemble actual choices of items by raters.

Additionally, the bulk of simulated choices are within the narrow range of .04 (0.49 to 0.53).

They both consistently predict the proportion of preference for itemj1, but does this mean that the row-by-row choices are the same? For that, it may be interesting to do a rank correlation or correlation of theta1 and theta2 as test statistics in PPP.

The one-dimensional model’s predictive distribution mass of proportion selecting itemj1 is comfortably surrounding the observed proportion of itemj1 selection. Further, the predictive proportion of choices of itemj1 are all quite close to the observed, between 0.49 and 0.52.

This does not compare favorably to the 2d and 2dDP models using the same test statistic of proportion of j1 choices given the same comparison combination.

PPMC with item win test statistics

A function to examine the difference between proportion of observed win rates for each item and simulated win rates for each item, based on the same simulated sets of data above.

Compute observed win rate for each item in observed and simulated, which is the proportion of times it was chosen in comparisons in the observed data.

For each posterior predictive simulated data set: - Compute simulated win rate for each item - Calculate the chi-square-like discrepancy of win rates, which is the sum for all j of the squared difference between observed win proportions and simulated win proportions (+ \(\epsilon\) as small number to avoid divide by 0 errors) - Compare observed vs simulated to get a p-value

compute_ppp_item_chisq <- function(observed_binary, sim_matrix, itemj1, itemj2, epsilon = 1e-6) {
  all_items <- unique(c(itemj1, itemj2))
  n_items <- length(all_items)
  n_reps <- nrow(sim_matrix)

  # Compute observed win counts per item
  obs_wins <- setNames(rep(0, n_items), all_items)
  for (i in seq_along(observed_binary)) {
    winner <- if (observed_binary[i] == 1) itemj1[i] else itemj2[i]
    obs_wins[as.character(winner)] <- obs_wins[as.character(winner)] + 1
  }
  obs_rates <- obs_wins / sum(obs_wins)

  # Compute discrepancy of observed win rates vs each simulated replication
  sim_discrepancies <- numeric(n_reps)
  for (r in 1:n_reps) {
    sim_wins <- setNames(rep(0, n_items), all_items)
    for (i in seq_along(observed_binary)) {
      winner <- if (sim_matrix[r, i] == 1) itemj1[i] else itemj2[i]
      sim_wins[as.character(winner)] <- sim_wins[as.character(winner)] + 1
    }
    sim_rates <- sim_wins / sum(sim_wins)

    # Chi-square style discrepancy
    sim_discrepancies[r] <- sum((obs_rates - sim_rates)^2 / (sim_rates + epsilon))
  }

  obs_discrepancy <- median(sim_discrepancies)  # Treating this as the expected baseline
  ppp <- mean(sim_discrepancies >= obs_discrepancy)

  list(
    observed = obs_discrepancy,
    simulated = sim_discrepancies,
    ppp = ppp
  )
}

First, feed the simulated data for the 2dDP model into it.

itemChisq2dDP <- compute_ppp_item_chisq(
  observed_binary = observed_binary,
  sim_matrix = sim_2dDP,
  itemj1 = pairDataCDAT$itemj1,
  itemj2 = pairDataCDAT$itemj2
)
itemChisq2dDP
## $observed
## [1] 0.09385262
## 
## $simulated
##   [1] 0.16024055 0.10748542 0.05225004 0.06385521 0.10437190 0.05596206
##   [7] 0.10813459 0.16124437 0.10298773 0.10309001 0.16593436 0.07086567
##  [13] 0.10824698 0.05627754 0.08042860 0.10667652 0.06408002 0.08629173
##  [19] 0.07278082 0.09325696 0.06021929 0.09184391 0.15342262 0.06553249
##  [25] 0.06394415 0.06857242 0.08656263 0.14424678 0.18123723 0.06850033
##  [31] 0.04632751 0.07184959 0.07223868 0.09932104 0.05627084 0.06386549
##  [37] 0.09187064 0.09415856 0.04773781 0.14050228 0.09011584 0.05706661
##  [43] 0.10985130 0.14302210 0.06394361 0.10219005 0.10480391 0.07607659
##  [49] 0.12496174 0.18373596 0.06340039 0.11583823 0.03559179 0.10089859
##  [55] 0.19785837 0.14452871 0.07443139 0.05665413 0.05556397 0.05451177
##  [61] 0.12413793 0.09019472 0.11035817 0.11793324 0.12442575 0.14134128
##  [67] 0.07609396 0.12212987 0.10268433 0.09879956 0.06482903 0.09458577
##  [73] 0.08050362 0.07765263 0.11431767 0.07102163 0.03769151 0.06705952
##  [79] 0.15587575 0.16476489 0.09896228 0.13385159 0.08193981 0.09474072
##  [85] 0.11495622 0.09568021 0.08665635 0.09630439 0.13803430 0.08587776
##  [91] 0.05887789 0.10198173 0.11713909 0.12889190 0.04033279 0.17789112
##  [97] 0.10045160 0.08809864 0.11450291 0.18145681 0.08888781 0.05217562
## [103] 0.08268421 0.13033430 0.09252817 0.07077455 0.14699097 0.04271075
## [109] 0.06387100 0.12192739 0.10837012 0.05089175 0.03098332 0.10203697
## [115] 0.09225489 0.06159748 0.06932893 0.13776411 0.07358786 0.14673582
## [121] 0.06878731 0.05770728 0.08652809 0.11820274 0.10491364 0.13237038
## [127] 0.15409561 0.12206241 0.06578734 0.12835960 0.08400606 0.13765595
## [133] 0.12107265 0.04413585 0.07669090 0.15185351 0.06110249 0.05777588
## [139] 0.08851722 0.12504720 0.09432418 0.06584949 0.14618675 0.14320618
## [145] 0.09485829 0.09922802 0.10262366 0.12084504 0.09215294 0.14270360
## [151] 0.06076215 0.06399333 0.13036003 0.11400033 0.08567095 0.09419956
## [157] 0.13509092 0.05805142 0.07852359 0.06258912 0.08817274 0.04541753
## [163] 0.09848758 0.12292388 0.07879318 0.11623911 0.13052464 0.08530631
## [169] 0.08016304 0.08940080 0.11465397 0.04510894 0.11531188 0.10096171
## [175] 0.12749446 0.11132799 0.08209674 0.06281481 0.08050916 0.05229730
## [181] 0.11945068 0.11014619 0.04946752 0.10353438 0.05129473 0.09264959
## [187] 0.10993877 0.07171348 0.06597585 0.11267141 0.08237418 0.09354667
## [193] 0.09459381 0.05210078 0.15310859 0.04652379 0.10811163 0.10039202
## [199] 0.16488683 0.04921671
## 
## $ppp
## [1] 0.5

Plot the observed proportion of item wins to the distribution of simulated item wins in each comparison.

# Density of simulated proportions
dens <- density(itemChisq2dDP$simulated)

# Plot
plot(dens, main = "Posterior Predictive Density (2D DP Prior Model)",
     xlab = "Simulated Chi-square Discrepancy of Proportion of Item Wins",
     ylab = "Density", lwd = 2, col = "steelblue")

# Add observed value as a vertical red line
abline(v = itemChisq2dDP$observed, col = "red", lwd = 2, lty = 2)

# Add legend
legend("topright", legend = "Median of Discrepancies (Simulated to Observed)", col = "red", lty = 2, lwd = 2)

Next, the 2d model

itemChisq2d <- compute_ppp_item_chisq(
  observed_binary = observed_binary,
  sim_matrix = sim_2d,
  itemj1 = pairDataCDAT$itemj1,
  itemj2 = pairDataCDAT$itemj2
)
itemChisq2d
## $observed
## [1] 0.09940797
## 
## $simulated
##   [1] 0.07809597 0.08914133 0.10392175 0.10875848 0.11360061 0.13329543
##   [7] 0.06265255 0.10566095 0.13689743 0.05504987 0.11526860 0.04484410
##  [13] 0.10166480 0.10544106 0.10875041 0.07138641 0.10222590 0.09199420
##  [19] 0.11870838 0.07218083 0.10271261 0.12155499 0.08704563 0.09412740
##  [25] 0.12699533 0.10113244 0.12481945 0.12289664 0.08495404 0.06267766
##  [31] 0.11751010 0.06290414 0.11667657 0.14379092 0.18726353 0.07546757
##  [37] 0.21498165 0.08756300 0.08961252 0.11022765 0.16497601 0.04324774
##  [43] 0.13732930 0.06927240 0.08559715 0.04279772 0.10832250 0.10829262
##  [49] 0.13756401 0.10473573 0.07956408 0.11734722 0.10078298 0.10465922
##  [55] 0.15507876 0.06796212 0.12470595 0.11420893 0.09331612 0.10340674
##  [61] 0.05145746 0.14606248 0.09602228 0.09995751 0.08414963 0.07314756
##  [67] 0.09704919 0.11091686 0.09417464 0.08811748 0.08816063 0.13716852
##  [73] 0.06571872 0.16835918 0.09968145 0.07325927 0.09487817 0.07076405
##  [79] 0.07967885 0.07171194 0.11471183 0.13963763 0.16516127 0.11710970
##  [85] 0.08326477 0.08119605 0.12184078 0.11258484 0.08452500 0.10292369
##  [91] 0.05563178 0.09910395 0.09600589 0.09350684 0.08992263 0.05596385
##  [97] 0.13717801 0.15031693 0.06615366 0.16953474 0.08791319 0.07025507
## [103] 0.19006569 0.08141082 0.10509579 0.10090480 0.10327251 0.06052407
## [109] 0.11066743 0.05780810 0.06648055 0.11552318 0.15086716 0.16955361
## [115] 0.11851834 0.13094584 0.07553799 0.10306290 0.08518856 0.09275750
## [121] 0.10612486 0.08424326 0.17372864 0.06692534 0.12027734 0.04575165
## [127] 0.06905189 0.08956916 0.06008744 0.10954105 0.10240030 0.09104094
## [133] 0.10640590 0.04482490 0.06776578 0.10026892 0.09786834 0.08454646
## [139] 0.16716007 0.07757791 0.10790788 0.15846362 0.12763905 0.13652266
## [145] 0.11459127 0.06751443 0.09913448 0.07818184 0.08278690 0.09173798
## [151] 0.09156226 0.09761459 0.12020689 0.06355210 0.10317038 0.13403759
## [157] 0.11092952 0.10984523 0.09241529 0.04425620 0.12604473 0.08574402
## [163] 0.08738697 0.14677472 0.12510214 0.12697092 0.17186838 0.14061454
## [169] 0.08996704 0.09181870 0.11992176 0.09067149 0.12819759 0.08390346
## [175] 0.10536178 0.05738557 0.11741496 0.12357438 0.09898025 0.07589217
## [181] 0.08746406 0.09524031 0.09067485 0.11977157 0.08186695 0.09291148
## [187] 0.19727734 0.09367730 0.12213148 0.11636823 0.08974960 0.06517384
## [193] 0.07560448 0.11134586 0.12024007 0.08169620 0.05016986 0.13699636
## [199] 0.06436460 0.08334910
## 
## $ppp
## [1] 0.5

Plot the observed proportion of item wins to the distribution of simulated item wins in each comparison.

# Density of simulated proportions
dens <- density(itemChisq2d$simulated)

# Plot
plot(dens, main = "Posterior Predictive Density (2D Normal Prior Model)",
     xlab = "Simulated Chi-square Discrepancy of Proportion of Item Wins",
     ylab = "Density", lwd = 2, col = "steelblue")

# Add observed value as a vertical red line
abline(v = itemChisq2d$observed, col = "red", lwd = 2, lty = 2)

# Add legend
legend("topright", legend = "Median of Discrepancies (Simulated to Observed)", col = "red", lty = 2, lwd = 2)

Finally, the 1d model

itemChisq1d <- compute_ppp_item_chisq(
  observed_binary = observed_binary,
  sim_matrix = sim_1d,
  itemj1 = pairDataCDAT$itemj1,
  itemj2 = pairDataCDAT$itemj2
)
itemChisq1d
## $observed
## [1] 0.01271147
## 
## $simulated
##   [1] 0.008689252 0.012032840 0.011397719 0.021802567 0.010187137 0.015058266
##   [7] 0.020480811 0.009595658 0.017688660 0.013683082 0.014216908 0.010462498
##  [13] 0.010493648 0.014521248 0.012135876 0.019519905 0.007647882 0.014698068
##  [19] 0.015340213 0.008565076 0.016815376 0.025571146 0.008039672 0.018223314
##  [25] 0.010062122 0.008042697 0.015321727 0.018342650 0.008523532 0.013077604
##  [31] 0.016757608 0.010910624 0.005664269 0.011695177 0.012651936 0.011825232
##  [37] 0.013534869 0.005830345 0.013252526 0.010942365 0.007148477 0.018376632
##  [43] 0.021006159 0.018630047 0.015232738 0.012878330 0.031138822 0.015216287
##  [49] 0.022970757 0.005568950 0.015130570 0.018112642 0.014952556 0.006007979
##  [55] 0.008563380 0.006425200 0.014014477 0.010782995 0.008097682 0.016199179
##  [61] 0.018278671 0.011503651 0.013927318 0.016276051 0.013919553 0.008236969
##  [67] 0.012821316 0.016509662 0.012882998 0.009306765 0.014177401 0.013862402
##  [73] 0.012797851 0.007794664 0.009880115 0.011030061 0.011801322 0.013523935
##  [79] 0.007231380 0.021138763 0.016531812 0.021075016 0.022082198 0.018604147
##  [85] 0.012076007 0.008519559 0.008525938 0.006982667 0.019694801 0.009328854
##  [91] 0.008171789 0.016822551 0.009519564 0.014434289 0.011054654 0.008410929
##  [97] 0.008872018 0.018451943 0.011830732 0.007452279 0.010911678 0.013557496
## [103] 0.013760664 0.024099615 0.022481667 0.010334270 0.004468165 0.020554080
## [109] 0.024262635 0.012770997 0.018599948 0.009613005 0.011647050 0.015760199
## [115] 0.011422446 0.015511760 0.015710970 0.006656360 0.007944362 0.012771928
## [121] 0.009694546 0.015520781 0.010304464 0.015166155 0.016742148 0.009540303
## [127] 0.011775197 0.013615666 0.017299293 0.014576820 0.007584575 0.019897335
## [133] 0.018424041 0.011005767 0.019203834 0.007741468 0.023763952 0.011130530
## [139] 0.007739875 0.006451970 0.009760221 0.007836274 0.019368541 0.018887258
## [145] 0.012844088 0.004738793 0.017115208 0.005516234 0.023224779 0.011630040
## [151] 0.012307266 0.008086319 0.008765327 0.016035877 0.007863257 0.015682024
## [157] 0.011619422 0.011785685 0.008585198 0.013044368 0.006421927 0.016372405
## [163] 0.017512142 0.013559031 0.010215444 0.009707851 0.020659325 0.023347285
## [169] 0.013151905 0.008230415 0.019727404 0.028262459 0.010872395 0.011498729
## [175] 0.011080889 0.012077145 0.007085920 0.016546820 0.010973008 0.014005887
## [181] 0.014132815 0.008851235 0.008841829 0.009204350 0.016426698 0.009173632
## [187] 0.009192348 0.014652736 0.010227302 0.014052073 0.010308212 0.017599114
## [193] 0.016517534 0.010213388 0.011548079 0.008414521 0.005474785 0.011522849
## [199] 0.012905385 0.015336770
## 
## $ppp
## [1] 0.5

Plot the observed proportion of item wins to the distribution of simulated item wins in each comparison.

# Density of simulated proportions
dens <- density(itemChisq1d$simulated)

# Plot
plot(dens, main = "Posterior Predictive Density (1D Normal Prior Model)",
     xlab = "Simulated Chi-square Discrepancy of Proportion of Item Wins",
     ylab = "Density", lwd = 2, col = "steelblue")

# Add observed value as a vertical red line
abline(v = itemChisq1d$observed, col = "red", lwd = 2, lty = 2)

# Add legend
legend("topright", legend = "Median of Discrepancies (Simulated to Observed)", col = "red", lty = 2, lwd = 2)

Model Comparison with DIC

From Chapter 10.3 of Levy & Mislevy (2016)

The Deviance INformation Criterion is defined a “the difference between the posterior mean of the deviance, \(\overline{D(\theta)}\), and the deviance evaluated at the posterior mean, \(D(\bar{\theta})\)” (p. 248). Which is appropriate when the mean of the posterior is a “reasonable point summary of the parameters” (p. 249) which is the case for these parameters. The formula includes a penalty for complexity \(p_{D}\) based on the number of parameters estimated. Look to Gelman et al. (2013) Gill (2007) and Plummer (2008) for more advanced discussion of DIC.

DIC =\(\overline{D(\theta)} + p_{D} = 2\overline{D(\theta)} - D(\bar{\theta}) +2p_{D}\)

  1. Compute \(\overline{D(\theta)}\) as the posterior mean of the deviance

Loop over all MCMC iterations:

  1. Compute \(D(\bar{\theta})\):
  1. Compute \(p_{D}\) which is \(\overline{D(\theta)}\) - \(D(\bar{\theta})\).

  2. Compute the DIC which 2 times \(\overline{D(\theta)}\) - \(\overline{D(\theta)}\).

First, the posterior mean of the deviance for 2d model. ### 2d DIC

computeDbar2d <- function(mcmc_matrix, pair_data, theta1_ids, theta2_ids, gamma_ids) {
  # Extract item and judge IDs
  itemj1 <- as.character(pair_data$itemj1)
  itemj2 <- as.character(pair_data$itemj2)
  choice <- pair_data$choice
  rater  <- as.character(pair_data$rater)
  
  S <- nrow(mcmc_matrix)
  N <- nrow(pair_data)
  deviance_vec <- numeric(S)
  
  for (s in 1:S) {
    # Extract current parameter values
    theta1 <- setNames(mcmc_matrix[s, theta1_ids], sub("theta1\\.", "", theta1_ids))
    theta2 <- setNames(mcmc_matrix[s, theta2_ids], sub("theta2\\.", "", theta2_ids))
    gamma  <- setNames(mcmc_matrix[s, gamma_ids],  sub("gamma\\.", "", gamma_ids))
    
    eta <- numeric(N)
    for (n in 1:N) {
      j1 <- itemj1[n]
      j2 <- itemj2[n]
      r  <- rater[n]
      
      g <- gamma[r]
      if (is.na(g)) g <- mean(gamma, na.rm = TRUE)
      
      diff1 <- theta1[j2] - theta1[j1]
      diff2 <- theta2[j2] - theta2[j1]
      eta[n] <- (1 - g) * diff1 + g * diff2
    }
    
    p <- pnorm(eta)
    p <- pmin(pmax(p, 1e-10), 1 - 1e-10)  # avoid log(0)
    
    is_j2_chosen <- choice == pair_data$itemj2
    loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE) # Compute log-probability of the observed binary outcome
    deviance_vec[s] <- -2 * sum(loglik)
  }
  
  mean(deviance_vec)
}

Note that the deviance is defined as minus two times the log-likelihood: \(D(\theta) = -2\log p(y | \theta)\)

So:

Then: \(\overline{D{\theta}}\) is computed by averaging this deviance over MCMC samples \(D(\bar{\theta})\) is computed by evaluating the deviance at the posteriro mean of parameters

Use that function:

mcmc_matrix <- do.call(rbind, mcmc_list2d)  # combine all chains
theta1_ids <- grep("^theta1\\.", colnames(mcmc_matrix), value = TRUE)
theta2_ids <- grep("^theta2\\.", colnames(mcmc_matrix), value = TRUE)
gamma_ids  <- grep("^gamma\\.",  colnames(mcmc_matrix), value = TRUE)

D_bar2d <- computeDbar2d(mcmc_matrix, pairDataCDAT, theta1_ids, theta2_ids, gamma_ids)

print(D_bar2d)
## [1] 1239.919

Now a function to compute the deviance at the posterior mean parameters

computeDevianceAtPosteriorMean2d <- function(mcmc_matrix, pair_data, theta1_ids, theta2_ids, gamma_ids) {
  # Compute posterior means
  theta1_mean <- setNames(colMeans(mcmc_matrix[, theta1_ids]), sub("theta1\\.", "", theta1_ids))
  theta2_mean <- setNames(colMeans(mcmc_matrix[, theta2_ids]), sub("theta2\\.", "", theta2_ids))
  gamma_mean  <- setNames(colMeans(mcmc_matrix[, gamma_ids]),  sub("gamma\\.",  "", gamma_ids))
  
  itemj1 <- as.character(pair_data$itemj1)
  itemj2 <- as.character(pair_data$itemj2)
  choice <- pair_data$choice
  rater  <- as.character(pair_data$rater)
  
  N <- nrow(pair_data)
  eta <- numeric(N)
  
  for (n in 1:N) {
    j1 <- itemj1[n]
    j2 <- itemj2[n]
    r  <- rater[n]
    
    g <- gamma_mean[r]
    if (is.na(g)) g <- mean(gamma_mean, na.rm = TRUE)
    
    diff1 <- theta1_mean[j2] - theta1_mean[j1]
    diff2 <- theta2_mean[j2] - theta2_mean[j1]
    eta[n] <- (1 - g) * diff1 + g * diff2
  }
  
  p <- pnorm(eta)
  p <- pmin(pmax(p, 1e-10), 1 - 1e-10)
  
  is_j2_chosen <- choice == pair_data$itemj2
  loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
  
  D_at_mean <- -2 * sum(loglik)
  return(D_at_mean)
}

2d DIC

Use the function, with the same variables passed to it as created in previous ste.

D_mean2d <- computeDevianceAtPosteriorMean2d(mcmc_matrix, pairDataCDAT, theta1_ids, theta2_ids, gamma_ids)
print(D_mean2d)
## [1] 1160.631

As a final step find the difference between twice the D_bar and the D_mean:

p_D2d <- D_bar2d - D_mean2d
DIC2d <- D_bar2d + p_D2d
print(DIC2d)
## [1] 1319.207

Repeat for 2dDP these steps using same functions as for the 2d normal prior model.

2dDP DIC

mcmc_matrix <- do.call(rbind, mcmc_list2dDP)  # combine all chains
theta1_ids <- grep("^theta1\\.", colnames(mcmc_matrix), value = TRUE)
theta2_ids <- grep("^theta2\\.", colnames(mcmc_matrix), value = TRUE)
gamma_ids  <- grep("^gamma\\.",  colnames(mcmc_matrix), value = TRUE)

D_bar2dDP <- computeDbar2d(mcmc_matrix, pairDataCDAT, theta1_ids, theta2_ids, gamma_ids)

print(D_bar2dDP)
## [1] 1225.952
D_mean2dDP <- computeDevianceAtPosteriorMean2d(mcmc_matrix, pairDataCDAT, theta1_ids, theta2_ids, gamma_ids)
print(D_mean2dDP)
## [1] 1144.647
p_D2dDP <- D_bar2dDP - D_mean2dDP
DIC2dDP <- D_bar2dDP + p_D2dDP
print(DIC2dDP)
## [1] 1307.257

1d DIC

computeDbar1d <- function(mcmc_matrix, pair_data, theta_ids, alpha_ids) {
  # Extract item and judge IDs
  itemj1 <- as.character(pair_data$itemj1)
  itemj2 <- as.character(pair_data$itemj2)
  choice <- pair_data$choice
  rater  <- as.character(pair_data$rater)
  
  S <- nrow(mcmc_matrix)
  N <- nrow(pair_data)
  deviance_vec <- numeric(S)
  
  for (s in 1:S) {
    # Extract current parameter values
    theta <- setNames(mcmc_matrix[s, theta_ids], sub("theta\\.", "", theta_ids))
    alpha <- setNames(mcmc_matrix[s, alpha_ids], sub("alpha\\.", "", alpha_ids))
    
    eta <- numeric(N)
    for (n in 1:N) {
      j1 <- itemj1[n]
      j2 <- itemj2[n]
      r  <- rater[n]
      
      a <- alpha[r]
      if (is.na(a)) a <- mean(alpha, na.rm = TRUE)
      
      diff <- theta[j2] - theta[j1]
      eta[n] <- a * diff
    }
    
    p <- pnorm(eta)
    p <- pmin(pmax(p, 1e-10), 1 - 1e-10)  # avoid log(0)
    
    is_j2_chosen <- choice == pair_data$itemj2
    loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
    deviance_vec[s] <- -2 * sum(loglik)
  }
  
  mean(deviance_vec)
}

Use that function:

mcmc_matrix <- do.call(rbind, mcmc_list1d)  # combine all chains
theta_ids <- grep("^theta\\.", colnames(mcmc_matrix), value = TRUE)
alpha_ids  <- grep("^alpha\\.",  colnames(mcmc_matrix), value = TRUE)

D_bar1d <- computeDbar1d(mcmc_matrix, pairDataCDAT, theta_ids, alpha_ids)

print(D_bar1d)
## [1] 1052.434

Now a function to compute the deviance at the posterior mean parameters

computeDevianceAtPosteriorMean1d <- function(mcmc_matrix, pair_data, theta_ids, alpha_ids) {
  # Compute posterior means
  theta_mean <- setNames(colMeans(mcmc_matrix[, theta_ids]), sub("theta\\.", "", theta_ids))
  alpha_mean  <- setNames(colMeans(mcmc_matrix[, alpha_ids]),  sub("alpha\\.",  "", alpha_ids))
  
  itemj1 <- as.character(pair_data$itemj1)
  itemj2 <- as.character(pair_data$itemj2)
  choice <- pair_data$choice
  rater  <- as.character(pair_data$rater)
  
  N <- nrow(pair_data)
  eta <- numeric(N)
  
  for (n in 1:N) {
    j1 <- itemj1[n]
    j2 <- itemj2[n]
    r  <- rater[n]
    
    a <- alpha_mean[r]
    if (is.na(a)) a <- mean(alpha_mean, na.rm = TRUE)
    
    diff <- theta_mean[j2] - theta_mean[j1]
    eta[n] <- a * diff
  }
  
  p <- pnorm(eta)
  p <- pmin(pmax(p, 1e-10), 1 - 1e-10)
  
  is_j2_chosen <- choice == pair_data$itemj2
  loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
  
  D_at_mean <- -2 * sum(loglik)
  return(D_at_mean)
}

Use the function, with the same variables passed to it as created in previous step.

D_mean1d <- computeDevianceAtPosteriorMean1d(mcmc_matrix, pairDataCDAT, theta_ids, alpha_ids)
print(D_mean1d)
## [1] 1026.717

As a final step find the difference between twice the D_bar and the D_mean:

p_D1d <- D_bar1d - D_mean1d
DIC1d <- D_bar1d + p_D1d
print(DIC1d)
## [1] 1078.151
# ---- Output comparison table ----
dic_table <- data.frame(
  Model = c("1d", "2d", "2dDP"),
  DIC = c(DIC1d, DIC2d, DIC2dDP),
  D_bar = c(D_bar1d, D_bar2d, D_bar2dDP),
  D_mean = c(D_mean1d, D_mean1d, D_mean2dDP),
  p_D = c(p_D1d, p_D2d, p_D2dDP)
)

print(dic_table)
##   Model      DIC    D_bar   D_mean      p_D
## 1    1d 1078.151 1052.434 1026.717 25.71714
## 2    2d 1319.207 1239.919 1026.717 79.28785
## 3  2dDP 1307.257 1225.952 1144.647 81.30483

The 1d model has the lowest DIC by far, but is less expressive. This statistic considers the trade off between fit and complexity, so even the high complexity of the 2d models reduces the DIC. Between the two 2d models, the additional parameters of the clustering does not diminish the fit of the model by DIC. The 2d and 2dDP models’ parameters have been shown above to be good at predicting the choices of raters when given the two items in a pairwise comparison.

Reflection: It would be good to reconsider the PPMC variables of interest, as they seem to conflict with the Deviance Information Criteria.

Even with the relatively sparse data set of comparisons, the model 2d DP works very well to explain the preferences of raters and the placement of items in 2d latent space.

References

Levy, R., & Mislevy, R. J. (2016). Bayesian psychometric modeling. Chapman & Hall/CRC.

Yu, Q., & Quinn, K. M. (2022). A multidimensional pairwise comparison model for heterogeneous perceptions with an application to modelling the perceived truthfulness of public statements on COVID‐19. Journal of the Royal Statistical Society. Series A, (Statistics in Society), 10.1111/rssa.12810. https://doi.org/10.1111/rssa.12810