# Clear the environment and load packages
rm(list = ls())
library(MCMCpack)
library(bayesplot)
library(coda)
library(ggplot2)
library(tidyverse)
library(psych)
setwd("~/Dropbox/IU25SP-EDUC-Y617 Psychometric Theory/Psychometric Theory Final Project/Paired Comparison MCMC CDAT")
data.folder <- "Data"
load(file.path(data.folder, "pairDataCDAT.Rda"))
# Prepare for use in MCMCpack
colnames(pairDataCDAT) <- c("rater","itemj1", "itemj2","choice")
# Prepare the data for easy matching to student responses as PDFs labeled by 4-digit ID
# Create a named vector for the mapping: names are the original IDs and values are the new numbers.
lookup <- c("60" = 1121, "61" = 1154, "62" = 1222, "63" = 1637,
"64" = 1995, "65" = 3369, "66" = 4004, "67" = 4209,
"68" = 4268, "69" = 4841, "70" = 5074, "71" = 5495,
"72" = 5710, "73" = 8294, "74" = 8980, "75" = 9899)
# Map the itemj1 and itemj2 columns using the lookup vector.
pairDataCDAT$itemj1 <- lookup[as.character(pairDataCDAT$itemj1)]
pairDataCDAT$itemj2 <- lookup[as.character(pairDataCDAT$itemj2)]
pairDataCDAT$choice <- lookup[as.character(pairDataCDAT$choice)]
# Sort for easy matching in theta.start matrix below
pairDataCDAT <- pairDataCDAT[order(pairDataCDAT$itemj1), ]
save(pairDataCDAT, file = file.path(data.folder, "pairDataCDAT.1.Rda"))
n_chains <- 3 # Set the number of chains
set.seed(1234) # Set seed for randomized starting values
I <- length(unique(pairDataCDAT$rater))
The vector γ represents each respondent’s perceptual orientation in the 2D latent space. Initially, all respondents are assigned a value of π/4π/4 (45° in radians). The second line randomizes the starting values within a small range # [π/4−0.4,π/4+0.4][π/4−0.4,π/4+0.4] to introduce slight variation.
gamma_start <- rep(pi/4,I)
# If used, this would replace the constant with I random values from a uniform distribution between 0 and π/2,
# using a broader random initialization instead of the current approach, which uses a small random variation around π/4.
# runif(I)*pi/2.
gamma_start <- runif(I, pi/4-.4, pi/4+.4)
Thetas start at 0 for the one-dimensional model
theta_start1d <- matrix(0, 16, 1) # All start at 0 in the model's probit units
Create the empty starting \(\theta\) matrix for 2d models
theta_start2d <- matrix(0, 16, 2) # 16 scripts, two dimensions
# Creates a 16×2 matrix (theta_start2d) to store the latent attributes of statements along two dimensions. Initially, all values are set to 0.
Find the matching indices for the anchor scripts in the matrix by viewing them in order.
unique(pairDataCDAT$itemj1)
## [1] 1121 1154 1222 1637 1995 3369 4004 4209 4268 4841 5074 5495 5710 8294 8980
## [16] 9899
Set initial values for two anchor items whose place in the matrix is determined above.
# These values anchor certain scripts in the 2d latent space and help define the rotation.
theta_start2d[11,1] <- 1 ## script 5074 is interpreted as having high creative elaboration
theta_start2d[11,2] <- -1 ## script 5074 is interpreted as having low conventional representation
theta_start2d[12,1] <- -1 ## script 5495 is interpreted as having low creative elaboration
theta_start2d[12,2] <- 1 ## script 5495 is interpreted as having high conventional representation
seeds <- c(0202, 8018, 1304) # Set seeds for each chain
chains1d <- vector("list", n_chains)
for (i in 1:n_chains) {
chains1d[[i]] <- MCMCpaircompare(
pairDataCDAT,
theta.constraints = list('3369' = list("+")),
verbose=5000, # Prints status updates every __ iterations.
burnin=1000, # Discards the first __ samples to stabilize.
mcmc=9000, # Runs __ iterations for estimation.
thin=50, # Keeps every __th sample to reduce autocorrelation.
seed=seeds[i], # Ensures reproducibility of results.
alpha.fixed=FALSE,
alpha.start=1,
a=0, # The default prio mean of alpha is 0
A=.25, # The default prior precision is 0.25 (variance of 4)
store.theta=TRUE, # Saves item-specfic draws.
store.alpha=TRUE, # Saves both rater-specific draws.
tune=.3, # Sets the tuning parameter for the Metropolis-Hastings step.
procrustes=FALSE # Disables Procrustes alignment
)
}
##
##
## MCMCpaircompare iteration 1 of 10000
##
##
## MCMCpaircompare iteration 5001 of 10000
##
##
## MCMCpaircompare iteration 1 of 10000
##
##
## MCMCpaircompare iteration 5001 of 10000
##
##
## MCMCpaircompare iteration 1 of 10000
##
##
## MCMCpaircompare iteration 5001 of 10000
# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list1d <- mcmc.list(lapply(chains1d, mcmc))
# Save object
save(mcmc_list1d, file = "Out/mcmc_list1d.Rdata")
# Create a list of parameters for each type of parameter
thetas <- grep("^theta", colnames(mcmc_list1d[[1]]), value = TRUE)
alphas <- grep("^alpha", colnames(mcmc_list1d[[1]]), value = TRUE)
# For 1D model
gelman_diag_1d <- gelman.diag(mcmc_list1d)
max_rhat_1d <- max(gelman_diag_1d$psrf[, "Point est."])
gelman_diag_1d
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## theta.1121 1.05 1.14
## theta.1154 1.05 1.19
## theta.1222 1.02 1.05
## theta.1637 1.02 1.08
## theta.1995 1.02 1.07
## theta.3369 1.01 1.03
## theta.4004 1.02 1.05
## theta.4209 1.03 1.10
## theta.4268 1.03 1.09
## theta.4841 1.02 1.08
## theta.5074 1.01 1.04
## theta.5495 1.04 1.12
## theta.5710 1.04 1.13
## theta.8294 1.02 1.07
## theta.8980 1.00 1.00
## theta.9899 1.02 1.04
## alpha.1 1.09 1.28
## alpha.25 1.01 1.05
## alpha.26 1.02 1.09
## alpha.27 1.00 1.02
## alpha.28 1.01 1.03
## alpha.29 1.02 1.06
## alpha.30 1.01 1.05
## alpha.31 1.02 1.06
## alpha.34 1.01 1.05
## alpha.35 1.01 1.03
## alpha.36 1.02 1.06
## alpha.37 1.03 1.11
## alpha.38 1.00 1.02
##
## Multivariate psrf
##
## 1.13
View a summary
summary1d <- round(
cbind(
as.data.frame(summary(mcmc_list1d)$statistics),
as.data.frame(summary(mcmc_list1d)$quantiles)
), 3)
summary1d$Model <- "1d"
save(summary1d, file = "Out/summary1d.Rdata")
summary1d
## Mean SD Naive SE Time-series SE 2.5% 25% 50% 75%
## theta.1121 -0.117 0.277 0.012 0.022 -0.687 -0.287 -0.117 0.068
## theta.1154 -0.946 0.328 0.014 0.023 -1.637 -1.142 -0.939 -0.739
## theta.1222 0.128 0.279 0.012 0.023 -0.435 -0.062 0.137 0.321
## theta.1637 0.018 0.283 0.012 0.022 -0.577 -0.155 0.020 0.210
## theta.1995 -0.479 0.292 0.013 0.025 -1.063 -0.653 -0.481 -0.283
## theta.3369 1.722 0.460 0.020 0.030 0.958 1.383 1.701 2.022
## theta.4004 0.372 0.302 0.013 0.024 -0.280 0.189 0.379 0.569
## theta.4209 -0.232 0.288 0.012 0.024 -0.789 -0.430 -0.233 -0.034
## theta.4268 0.282 0.290 0.012 0.023 -0.300 0.098 0.296 0.485
## theta.4841 -0.556 0.293 0.013 0.024 -1.137 -0.732 -0.551 -0.367
## theta.5074 -0.145 0.279 0.012 0.024 -0.715 -0.326 -0.129 0.035
## theta.5495 -0.833 0.315 0.014 0.023 -1.465 -1.052 -0.828 -0.604
## theta.5710 -1.007 0.335 0.014 0.024 -1.650 -1.217 -1.004 -0.778
## theta.8294 0.159 0.288 0.012 0.024 -0.450 -0.024 0.165 0.355
## theta.8980 0.703 0.316 0.014 0.025 0.086 0.487 0.718 0.917
## theta.9899 0.521 0.303 0.013 0.022 -0.098 0.329 0.522 0.705
## alpha.1 2.696 0.731 0.031 0.037 1.444 2.207 2.663 3.131
## alpha.25 1.129 0.340 0.015 0.015 0.562 0.886 1.113 1.349
## alpha.26 1.881 0.554 0.024 0.028 1.039 1.481 1.807 2.175
## alpha.27 1.795 0.531 0.023 0.025 0.924 1.425 1.714 2.101
## alpha.28 0.985 0.339 0.015 0.017 0.431 0.755 0.929 1.211
## alpha.29 0.853 0.305 0.013 0.013 0.365 0.640 0.828 1.025
## alpha.30 0.703 0.260 0.011 0.012 0.249 0.516 0.683 0.848
## alpha.31 0.334 0.188 0.008 0.009 0.019 0.205 0.320 0.448
## alpha.34 1.048 0.313 0.013 0.012 0.519 0.815 1.022 1.249
## alpha.35 0.468 0.203 0.009 0.010 0.130 0.310 0.447 0.614
## alpha.36 1.004 0.354 0.015 0.015 0.428 0.737 0.977 1.209
## alpha.37 1.028 0.357 0.015 0.015 0.466 0.781 0.984 1.227
## alpha.38 0.618 0.222 0.010 0.011 0.257 0.465 0.594 0.756
## 97.5% Model
## theta.1121 0.422 1d
## theta.1154 -0.272 1d
## theta.1222 0.664 1d
## theta.1637 0.553 1d
## theta.1995 0.093 1d
## theta.3369 2.630 1d
## theta.4004 0.952 1d
## theta.4209 0.319 1d
## theta.4268 0.840 1d
## theta.4841 0.006 1d
## theta.5074 0.395 1d
## theta.5495 -0.218 1d
## theta.5710 -0.395 1d
## theta.8294 0.720 1d
## theta.8980 1.293 1d
## theta.9899 1.140 1d
## alpha.1 4.401 1d
## alpha.25 1.851 1d
## alpha.26 3.295 1d
## alpha.27 2.968 1d
## alpha.28 1.713 1d
## alpha.29 1.553 1d
## alpha.30 1.290 1d
## alpha.31 0.730 1d
## alpha.34 1.687 1d
## alpha.35 0.932 1d
## alpha.36 1.863 1d
## alpha.37 1.812 1d
## alpha.38 1.162 1d
seeds <- c(1304, 2406, 7916) # Set seeds for each chain
chains2d <- vector("list", n_chains)
for (i in 1:n_chains) {
chains2d[[i]] <- MCMCpaircompare2d(
pairDataCDAT,
theta.constraints=list(
# Note that "+" = constrained to be positive and "-" = constrained to negative
# Note that "n" = fixed value if desired
# Constraining each item to one quadrant based on theory
'5074' = list(1, "+"),
'5074' = list(2, "-"),
'5495' = list(1, "-"),
'5495' = list(2, "+")
),
verbose=5000, # Prints status updates every __ iterations.
burnin=1000, # Discards the first __ samples to stabilize.
mcmc=10000, # Runs __ iterations for estimation.
thin=50, # Keeps every __th sample to reduce autocorrelation.
seed=seeds[i], # Ensures reproducibility of results.
gamma.start=gamma_start, # Uses the initialized respondent perception values.
theta.start=theta_start2d,
store.theta=TRUE, # Saves item-specfic draws.
store.gamma=TRUE, # Saves both rater-specific draws.
tune=.3, # Sets the tuning parameter for the Metropolis-Hastings step.
procrustes=FALSE # Disables Procrustes alignment
)
}
##
##
## MCMCpaircompare2d iteration 1 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 5001 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 10001 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 1 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 5001 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 10001 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 1 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 5001 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 10001 of 11000
##
## Summary of acceptance rates for gamma:
# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list2d <- mcmc.list(lapply(chains2d, mcmc))
#Save object
save(mcmc_list2d, file = "Out/mcmc_list2d.Rdata")
# Create a list of parameters for each type of parameter
thetas <- grep("^theta", colnames(mcmc_list2d[[1]]), value = TRUE)
alpha <- grep("^alpha", colnames(mcmc_list2d[[1]]), value = TRUE)
# For 2D model
gelman_diag_2d <- gelman.diag(mcmc_list2d)
max_rhat_2d <- max(gelman_diag_2d$psrf[, "Point est."])
gelman_diag_2d
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## theta1.1121 1.002 1.007
## theta1.1154 0.995 0.996
## theta1.1222 0.997 0.999
## theta1.1637 1.000 1.012
## theta1.1995 0.996 0.998
## theta1.3369 1.007 1.037
## theta1.4004 1.014 1.050
## theta1.4209 0.996 0.996
## theta1.4268 1.001 1.017
## theta1.4841 1.000 1.007
## theta1.5074 1.000 1.009
## theta1.5495 1.003 1.021
## theta1.5710 1.004 1.013
## theta1.8294 0.997 0.998
## theta1.8980 1.009 1.038
## theta1.9899 0.999 1.004
## theta2.1121 1.023 1.087
## theta2.1154 1.001 1.014
## theta2.1222 1.001 1.013
## theta2.1637 1.003 1.019
## theta2.1995 1.031 1.107
## theta2.3369 1.009 1.030
## theta2.4004 1.001 1.016
## theta2.4209 0.998 1.000
## theta2.4268 0.999 1.008
## theta2.4841 1.000 1.012
## theta2.5074 1.011 1.041
## theta2.5495 1.001 1.004
## theta2.5710 0.996 0.999
## theta2.8294 0.999 1.006
## theta2.8980 1.004 1.009
## theta2.9899 1.001 1.015
## gamma.1 1.009 1.041
## gamma.25 1.006 1.025
## gamma.26 1.008 1.025
## gamma.27 1.007 1.035
## gamma.28 1.001 1.012
## gamma.29 1.013 1.055
## gamma.30 0.997 0.997
## gamma.31 1.013 1.040
## gamma.34 1.011 1.046
## gamma.35 1.003 1.023
## gamma.36 1.001 1.013
## gamma.37 1.009 1.035
## gamma.38 0.997 0.999
##
## Multivariate psrf
##
## 1.16
View a summary of the 2d model parameter posterior distributions
summary2d <- round(
cbind(
as.data.frame(summary(mcmc_list2d)$statistics),
as.data.frame(summary(mcmc_list2d)$quantiles)
), 3)
summary2d$Model <- "2d"
save(summary2d, file="Out/summary2d.Rdata")
summary2d
## Mean SD Naive SE Time-series SE 2.5% 25% 50% 75%
## theta1.1121 1.247 0.401 0.016 0.017 0.559 0.955 1.244 1.523
## theta1.1154 -0.019 0.373 0.015 0.018 -0.713 -0.257 -0.030 0.223
## theta1.1222 0.604 0.354 0.014 0.015 -0.107 0.358 0.616 0.859
## theta1.1637 0.606 0.362 0.015 0.016 -0.107 0.362 0.608 0.847
## theta1.1995 0.849 0.406 0.017 0.020 0.088 0.559 0.845 1.120
## theta1.3369 1.686 0.441 0.018 0.021 0.901 1.360 1.682 1.977
## theta1.4004 -0.821 0.395 0.016 0.016 -1.620 -1.097 -0.810 -0.549
## theta1.4209 -1.271 0.407 0.017 0.019 -2.023 -1.547 -1.261 -1.010
## theta1.4268 -0.850 0.396 0.016 0.018 -1.692 -1.101 -0.812 -0.558
## theta1.4841 -0.954 0.374 0.015 0.015 -1.684 -1.194 -0.949 -0.713
## theta1.5074 1.134 0.389 0.016 0.017 0.431 0.857 1.137 1.391
## theta1.5495 -2.323 0.427 0.017 0.017 -3.159 -2.598 -2.317 -2.044
## theta1.5710 -0.952 0.386 0.016 0.016 -1.733 -1.235 -0.943 -0.724
## theta1.8294 -0.138 0.356 0.015 0.016 -0.851 -0.374 -0.136 0.104
## theta1.8980 0.685 0.362 0.015 0.015 -0.006 0.443 0.702 0.935
## theta1.9899 0.419 0.360 0.015 0.016 -0.280 0.186 0.417 0.662
## theta2.1121 -1.168 0.364 0.015 0.016 -1.933 -1.396 -1.153 -0.918
## theta2.1154 -1.536 0.351 0.014 0.017 -2.231 -1.778 -1.531 -1.309
## theta2.1222 -0.160 0.333 0.014 0.015 -0.833 -0.376 -0.168 0.061
## theta2.1637 -0.514 0.322 0.013 0.014 -1.140 -0.735 -0.507 -0.302
## theta2.1995 -1.265 0.351 0.014 0.015 -1.910 -1.528 -1.254 -1.032
## theta2.3369 1.246 0.374 0.015 0.018 0.486 1.014 1.250 1.492
## theta2.4004 1.380 0.333 0.014 0.015 0.755 1.142 1.381 1.581
## theta2.4209 0.779 0.341 0.014 0.014 0.203 0.523 0.754 1.015
## theta2.4268 1.206 0.350 0.014 0.016 0.554 0.970 1.205 1.430
## theta2.4841 -0.250 0.324 0.013 0.013 -0.875 -0.472 -0.255 -0.048
## theta2.5074 -1.068 0.342 0.014 0.017 -1.719 -1.289 -1.067 -0.847
## theta2.5495 0.575 0.333 0.014 0.015 0.073 0.326 0.542 0.793
## theta2.5710 -0.834 0.328 0.013 0.015 -1.516 -1.037 -0.837 -0.637
## theta2.8294 0.728 0.318 0.013 0.014 0.148 0.509 0.727 0.939
## theta2.8980 0.893 0.343 0.014 0.018 0.235 0.650 0.922 1.124
## theta2.9899 0.665 0.331 0.014 0.014 -0.032 0.464 0.666 0.892
## gamma.1 0.872 0.126 0.005 0.006 0.622 0.794 0.872 0.964
## gamma.25 0.725 0.140 0.006 0.008 0.438 0.633 0.726 0.829
## gamma.26 0.830 0.121 0.005 0.006 0.588 0.747 0.828 0.917
## gamma.27 0.837 0.121 0.005 0.006 0.591 0.756 0.837 0.928
## gamma.28 0.454 0.135 0.005 0.007 0.197 0.367 0.457 0.544
## gamma.29 0.917 0.137 0.006 0.007 0.648 0.826 0.920 1.009
## gamma.30 0.317 0.150 0.006 0.007 0.032 0.209 0.317 0.418
## gamma.31 0.076 0.071 0.003 0.003 0.002 0.024 0.050 0.112
## gamma.34 0.944 0.143 0.006 0.007 0.677 0.849 0.942 1.041
## gamma.35 1.341 0.136 0.006 0.005 1.050 1.258 1.349 1.444
## gamma.36 1.313 0.128 0.005 0.006 1.049 1.235 1.325 1.407
## gamma.37 1.326 0.116 0.005 0.005 1.068 1.250 1.332 1.402
## gamma.38 1.450 0.086 0.004 0.004 1.243 1.405 1.465 1.517
## 97.5% Model
## theta1.1121 2.095 2d
## theta1.1154 0.758 2d
## theta1.1222 1.212 2d
## theta1.1637 1.294 2d
## theta1.1995 1.621 2d
## theta1.3369 2.587 2d
## theta1.4004 -0.108 2d
## theta1.4209 -0.468 2d
## theta1.4268 -0.180 2d
## theta1.4841 -0.245 2d
## theta1.5074 1.896 2d
## theta1.5495 -1.540 2d
## theta1.5710 -0.175 2d
## theta1.8294 0.514 2d
## theta1.8980 1.369 2d
## theta1.9899 1.102 2d
## theta2.1121 -0.481 2d
## theta2.1154 -0.823 2d
## theta2.1222 0.486 2d
## theta2.1637 0.141 2d
## theta2.1995 -0.595 2d
## theta2.3369 1.964 2d
## theta2.4004 2.030 2d
## theta2.4209 1.443 2d
## theta2.4268 1.912 2d
## theta2.4841 0.404 2d
## theta2.5074 -0.348 2d
## theta2.5495 1.325 2d
## theta2.5710 -0.165 2d
## theta2.8294 1.367 2d
## theta2.8980 1.565 2d
## theta2.9899 1.278 2d
## gamma.1 1.104 2d
## gamma.25 0.987 2d
## gamma.26 1.062 2d
## gamma.27 1.058 2d
## gamma.28 0.722 2d
## gamma.29 1.191 2d
## gamma.30 0.609 2d
## gamma.31 0.261 2d
## gamma.34 1.203 2d
## gamma.35 1.552 2d
## gamma.36 1.538 2d
## gamma.37 1.538 2d
## gamma.38 1.564 2d
seeds <- c(4610, 5712, 6814) # Set seeds for each chain
chains2dDP <- vector("list", n_chains)
for (i in 1:n_chains) {
chains2dDP[[i]] <- MCMCpaircompare2dDP(
pairDataCDAT,
theta.constraints=list(
# Note that "+" = constrained to be positive and "-" = constrained to negative
# Note that "n" = fixed value if desired
# Constraining each item to one quadrant based on theory
'5074' = list(1, "+"),
'5074' = list(2, "-"),
'5495' = list(1, "-"),
'5495' = list(2, "+")
),
verbose=5000, # Prints status updates every __ iterations.
burnin=5000, # Discards the first __ samples to stabilize.
mcmc=15000, # Runs __ iterations for estimation.
thin=50, # Keeps every __th sample to reduce autocorrelation.
seed=seeds[i], # Ensures reproducibility of results.
gamma.start=gamma_start, # Uses the initialized respondent perception values.
alpha=1, # Sets the Dirichlet Process concentration parameter.
cluster.max=4, # Allows up to n clusters of respondents.
cluster.mcmc=500, # Runs __ iterations of MCMC for clustering.
alpha.fixed=FALSE, # Allows the concentration parameter to be learned from the data.
a0=1, b0=1, # Hyperparameters for the Dirichlet Process prior.
theta.start=theta_start2d,
store.theta=TRUE, # Saves item-specfic draws.
store.gamma=TRUE, # Saves both rater-specific draws.
tune=.3, # Sets the tuning parameter for the Metropolis-Hastings step.
procrustes=FALSE # Disables Procrustes alignment
)
}
##
##
## MCMCpaircompare2dDP iteration 1 of 20000
##
##
## MCMCpaircompare2dDP iteration 5001 of 20000
##
##
## MCMCpaircompare2dDP iteration 10001 of 20000
##
##
## MCMCpaircompare2dDP iteration 15001 of 20000
##
##
## MCMCpaircompare2dDP iteration 1 of 20000
##
##
## MCMCpaircompare2dDP iteration 5001 of 20000
##
##
## MCMCpaircompare2dDP iteration 10001 of 20000
##
##
## MCMCpaircompare2dDP iteration 15001 of 20000
##
##
## MCMCpaircompare2dDP iteration 1 of 20000
##
##
## MCMCpaircompare2dDP iteration 5001 of 20000
##
##
## MCMCpaircompare2dDP iteration 10001 of 20000
##
##
## MCMCpaircompare2dDP iteration 15001 of 20000
# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list2dDP <- mcmc.list(lapply(chains2dDP, mcmc))
# Save object
save(mcmc_list2dDP, file = "Out/mcmc_list2dDP.Rdata")
# For 2D-DP model
gelman_diag_2dDP <- gelman.diag(mcmc_list2dDP)
max_rhat_2dDP <- max(gelman_diag_2dDP$psrf[, "Point est."])
gelman_diag_2dDP
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## theta1.1121 1.003 1.02
## theta1.1154 1.004 1.01
## theta1.1222 0.998 1.00
## theta1.1637 1.004 1.01
## theta1.1995 1.007 1.03
## theta1.3369 1.011 1.04
## theta1.4004 1.010 1.02
## theta1.4209 1.004 1.01
## theta1.4268 1.001 1.01
## theta1.4841 1.004 1.01
## theta1.5074 0.998 1.00
## theta1.5495 1.001 1.01
## theta1.5710 1.005 1.01
## theta1.8294 0.999 1.00
## theta1.8980 1.005 1.02
## theta1.9899 0.999 1.00
## theta2.1121 1.007 1.02
## theta2.1154 1.006 1.02
## theta2.1222 1.001 1.01
## theta2.1637 1.001 1.01
## theta2.1995 0.999 1.00
## theta2.3369 1.012 1.04
## theta2.4004 0.999 1.00
## theta2.4209 1.000 1.00
## theta2.4268 1.001 1.01
## theta2.4841 0.999 1.00
## theta2.5074 1.009 1.03
## theta2.5495 1.004 1.01
## theta2.5710 0.999 1.00
## theta2.8294 1.004 1.01
## theta2.8980 1.001 1.01
## theta2.9899 1.010 1.03
## gamma.1 1.007 1.03
## gamma.25 1.002 1.01
## gamma.26 1.008 1.04
## gamma.27 1.007 1.03
## gamma.28 0.998 1.00
## gamma.29 1.009 1.04
## gamma.30 1.011 1.05
## gamma.31 1.006 1.01
## gamma.34 1.008 1.04
## gamma.35 1.019 1.07
## gamma.36 1.012 1.05
## gamma.37 1.016 1.06
## gamma.38 1.014 1.06
## cluster.1 1.046 1.15
## cluster.25 1.038 1.13
## cluster.26 1.051 1.17
## cluster.27 1.041 1.14
## cluster.28 1.000 1.00
## cluster.29 1.050 1.17
## cluster.30 1.004 1.02
## cluster.31 1.006 1.03
## cluster.34 1.044 1.15
## cluster.35 1.045 1.15
## cluster.36 1.044 1.15
## cluster.37 1.044 1.15
## cluster.38 1.038 1.12
## n.clusters 1.009 1.03
## alpha 1.004 1.02
##
## Multivariate psrf
##
## 1.15
Show model convergence statistics of all three models– across all parameters max \(\hat{R}\)
data.frame(
Model = c("1D", "2D", "2D-DP"),
Max_Rhat = c(max_rhat_1d, max_rhat_2d, max_rhat_2dDP)
)
## Model Max_Rhat
## 1 1D 1.088734
## 2 2D 1.031031
## 3 2D-DP 1.050652
summary2dDP <- round(
cbind(
as.data.frame(summary(mcmc_list2dDP)$statistics),
as.data.frame(summary(mcmc_list2dDP)$quantiles)
), 3)
summary2dDP$Model <- "2dDP"
save(summary2dDP, file = "Out/summary2dDP.Rdata")
summary2dDP
## Mean SD Naive SE Time-series SE 2.5% 25% 50% 75%
## theta1.1121 1.194 0.406 0.014 0.016 0.450 0.912 1.178 1.469
## theta1.1154 -0.012 0.393 0.013 0.014 -0.733 -0.284 -0.029 0.246
## theta1.1222 0.636 0.353 0.012 0.014 -0.036 0.387 0.638 0.874
## theta1.1637 0.608 0.357 0.012 0.013 -0.059 0.370 0.599 0.835
## theta1.1995 0.753 0.414 0.014 0.015 0.040 0.461 0.715 1.007
## theta1.3369 1.579 0.457 0.015 0.016 0.713 1.267 1.570 1.877
## theta1.4004 -0.769 0.430 0.014 0.016 -1.613 -1.058 -0.749 -0.479
## theta1.4209 -1.285 0.419 0.014 0.014 -2.168 -1.560 -1.255 -1.011
## theta1.4268 -0.770 0.440 0.015 0.016 -1.737 -1.047 -0.747 -0.471
## theta1.4841 -0.884 0.368 0.012 0.013 -1.653 -1.125 -0.866 -0.637
## theta1.5074 1.156 0.400 0.013 0.015 0.386 0.881 1.126 1.435
## theta1.5495 -2.339 0.466 0.016 0.016 -3.262 -2.654 -2.323 -2.018
## theta1.5710 -0.864 0.396 0.013 0.013 -1.664 -1.123 -0.863 -0.593
## theta1.8294 -0.184 0.384 0.013 0.014 -0.927 -0.449 -0.165 0.082
## theta1.8980 0.651 0.374 0.012 0.012 -0.120 0.401 0.651 0.914
## theta1.9899 0.471 0.380 0.013 0.013 -0.290 0.222 0.476 0.733
## theta2.1121 -1.101 0.361 0.012 0.014 -1.849 -1.326 -1.090 -0.869
## theta2.1154 -1.554 0.348 0.012 0.012 -2.217 -1.790 -1.557 -1.334
## theta2.1222 -0.193 0.318 0.011 0.011 -0.865 -0.394 -0.184 0.014
## theta2.1637 -0.515 0.328 0.011 0.012 -1.177 -0.734 -0.497 -0.289
## theta2.1995 -1.192 0.355 0.012 0.013 -1.891 -1.424 -1.189 -0.944
## theta2.3369 1.315 0.410 0.014 0.015 0.499 1.026 1.325 1.597
## theta2.4004 1.280 0.336 0.011 0.012 0.658 1.061 1.270 1.488
## theta2.4209 0.762 0.346 0.012 0.012 0.160 0.512 0.730 0.995
## theta2.4268 1.083 0.323 0.011 0.012 0.480 0.869 1.069 1.295
## theta2.4841 -0.339 0.338 0.011 0.013 -0.993 -0.576 -0.331 -0.105
## theta2.5074 -1.071 0.353 0.012 0.014 -1.732 -1.317 -1.062 -0.822
## theta2.5495 0.518 0.342 0.011 0.011 0.025 0.246 0.470 0.747
## theta2.5710 -0.929 0.355 0.012 0.013 -1.615 -1.167 -0.946 -0.679
## theta2.8294 0.737 0.327 0.011 0.010 0.110 0.512 0.730 0.959
## theta2.8980 0.904 0.340 0.011 0.012 0.254 0.658 0.895 1.136
## theta2.9899 0.614 0.332 0.011 0.012 -0.048 0.396 0.621 0.846
## gamma.1 0.854 0.133 0.004 0.006 0.592 0.762 0.857 0.949
## gamma.25 0.838 0.144 0.005 0.006 0.542 0.743 0.844 0.940
## gamma.26 0.854 0.133 0.004 0.006 0.600 0.761 0.855 0.949
## gamma.27 0.854 0.133 0.004 0.006 0.593 0.763 0.859 0.949
## gamma.28 0.378 0.213 0.007 0.008 0.023 0.196 0.382 0.540
## gamma.29 0.856 0.133 0.004 0.006 0.605 0.763 0.860 0.950
## gamma.30 0.290 0.204 0.007 0.008 0.014 0.108 0.249 0.462
## gamma.31 0.129 0.102 0.003 0.003 0.006 0.046 0.106 0.186
## gamma.34 0.857 0.135 0.004 0.006 0.593 0.765 0.861 0.951
## gamma.35 1.420 0.111 0.004 0.004 1.153 1.360 1.442 1.506
## gamma.36 1.418 0.111 0.004 0.004 1.150 1.354 1.439 1.501
## gamma.37 1.417 0.112 0.004 0.004 1.148 1.354 1.439 1.502
## gamma.38 1.424 0.107 0.004 0.004 1.159 1.363 1.445 1.507
## cluster.1 2.388 1.180 0.039 0.114 1.000 1.000 2.000 4.000
## cluster.25 2.401 1.171 0.039 0.101 1.000 1.000 2.000 4.000
## cluster.26 2.381 1.185 0.039 0.114 1.000 1.000 2.000 4.000
## cluster.27 2.396 1.180 0.039 0.111 1.000 1.000 2.000 4.000
## cluster.28 2.532 1.063 0.035 0.056 1.000 2.000 3.000 3.000
## cluster.29 2.376 1.171 0.039 0.108 1.000 1.000 2.000 3.250
## cluster.30 2.559 1.058 0.035 0.066 1.000 2.000 3.000 3.000
## cluster.31 2.634 1.071 0.036 0.071 1.000 2.000 3.000 4.000
## cluster.34 2.386 1.184 0.039 0.108 1.000 1.000 2.000 4.000
## cluster.35 2.371 1.136 0.038 0.106 1.000 1.000 2.000 3.000
## cluster.36 2.374 1.132 0.038 0.089 1.000 1.000 2.000 3.000
## cluster.37 2.373 1.135 0.038 0.109 1.000 1.000 2.000 3.000
## cluster.38 2.399 1.139 0.038 0.090 1.000 1.000 2.000 3.000
## n.clusters 3.790 0.408 0.014 0.015 3.000 4.000 4.000 4.000
## alpha 1.625 0.979 0.033 0.037 0.328 0.915 1.430 2.071
## 97.5% Model
## theta1.1121 1.970 2dDP
## theta1.1154 0.777 2dDP
## theta1.1222 1.283 2dDP
## theta1.1637 1.361 2dDP
## theta1.1995 1.669 2dDP
## theta1.3369 2.484 2dDP
## theta1.4004 0.012 2dDP
## theta1.4209 -0.495 2dDP
## theta1.4268 0.052 2dDP
## theta1.4841 -0.179 2dDP
## theta1.5074 1.956 2dDP
## theta1.5495 -1.467 2dDP
## theta1.5710 -0.086 2dDP
## theta1.8294 0.547 2dDP
## theta1.8980 1.345 2dDP
## theta1.9899 1.153 2dDP
## theta2.1121 -0.456 2dDP
## theta2.1154 -0.901 2dDP
## theta2.1222 0.417 2dDP
## theta2.1637 0.109 2dDP
## theta2.1995 -0.516 2dDP
## theta2.3369 2.128 2dDP
## theta2.4004 1.987 2dDP
## theta2.4209 1.489 2dDP
## theta2.4268 1.752 2dDP
## theta2.4841 0.342 2dDP
## theta2.5074 -0.420 2dDP
## theta2.5495 1.316 2dDP
## theta2.5710 -0.245 2dDP
## theta2.8294 1.410 2dDP
## theta2.8980 1.564 2dDP
## theta2.9899 1.222 2dDP
## gamma.1 1.099 2dDP
## gamma.25 1.099 2dDP
## gamma.26 1.099 2dDP
## gamma.27 1.099 2dDP
## gamma.28 0.775 2dDP
## gamma.29 1.100 2dDP
## gamma.30 0.686 2dDP
## gamma.31 0.368 2dDP
## gamma.34 1.100 2dDP
## gamma.35 1.564 2dDP
## gamma.36 1.563 2dDP
## gamma.37 1.563 2dDP
## gamma.38 1.564 2dDP
## cluster.1 4.000 2dDP
## cluster.25 4.000 2dDP
## cluster.26 4.000 2dDP
## cluster.27 4.000 2dDP
## cluster.28 4.000 2dDP
## cluster.29 4.000 2dDP
## cluster.30 4.000 2dDP
## cluster.31 4.000 2dDP
## cluster.34 4.000 2dDP
## cluster.35 4.000 2dDP
## cluster.36 4.000 2dDP
## cluster.37 4.000 2dDP
## cluster.38 4.000 2dDP
## n.clusters 4.000 2dDP
## alpha 4.047 2dDP
From Levy & Mislevy (2016) Ch. 10.2.4 & 10.2.5Posterior Predictive Model Checking
The common feature across the 2-dimensional models are (a) posterior thetas of each item’s location in 2-dimensional latent space and precision of that distribution; and (b) the posterior gammas of each rater.
The common feature of all models is the choice of one item over another based on th model-implied latent traits.
The prediction of choices based on model-implied latent variables should be close obserbed choices.
The selection of the left item (j1) over the second (j2) is the test statistic to be examined with Posterior Predictive Model Checking.
(See Levy & Mislevy, 2016, pp. 237-239). ### 2d models with both thetas and a gamma governing rater preference For each posterior sample of \(\theta_{1}\), \(\theta_{2}\), and \(\gamma\), simulate responses.
First, a function that will - combine all chains into one list of samples - iterate 200 sets of simulated choices based on comparisons that match the rater and theta IDs of the observed comparisons - for each iteration draw one value from all chains of \(\theta_{1}\), \(\theta_{2}\), and \(\gamma\). - for each comparison of the 960 observations, extract the two items and the rater for a simulated choice. - find the difference between the two items’ thetas according to the model - add the two thetas in proportion to the gamma for the variable \(\eta\) - use eta as input to the probit link function to find the probability of choosing j1 over j2 - make a random draw of 1 or 0 governed by the binomial distribution and probability computed from eta - add the 1 or 0 to a matrix of choices of j1 over j2 - return the simulated choices for the 960 comparisons 200 times
simulate_pp_data_mcmclist2d <- function(mcmc_list, pair_data) {
# Combine chains into one matrix
all_samples <- do.call(rbind, mcmc_list)
n_obs <- nrow(pair_data)
n_rep = 200
sim_choices <- matrix(NA, nrow = n_rep, ncol = n_obs)
# Identify column names for theta1, theta2, gamma
theta1_names <- grep("^theta1\\.", colnames(all_samples), value = TRUE)
theta2_names <- grep("^theta2\\.", colnames(all_samples), value = TRUE)
gamma_names <- grep("^gamma\\.", colnames(all_samples), value = TRUE)
for (r in 1:n_rep) { # Row by row iteration
s <- sample(1:nrow(all_samples), 1) # Draw from any chain
theta1 <- all_samples[s, theta1_names] # Draw one value for theta1
theta2 <- all_samples[s, theta2_names] # Draw one value for theta2
gamma <- all_samples[s, gamma_names] # Draw one value for gamma
for (i in 1:n_obs) { # Column by column iteration
j1 <- as.character(pair_data$itemj1[i]) # Locate the first item number for that row from the observed
j2 <- as.character(pair_data$itemj2[i]) # Locate the second item number for that row from the observed
rater <- as.character(pair_data$rater[i]) # Locate the rater for that row from the observed
diff1 <- theta1[paste0("theta1.", j1)] - theta1[paste0("theta1.", j2)] # Find the model-implied diff between sampled theta1 of 2 items
diff2 <- theta2[paste0("theta2.", j1)] - theta2[paste0("theta2.", j2)] # Find the model-implied diff between sampled theta2 of 2 items
g <- gamma[paste0("gamma.", rater)] # Find the model-implied sampled gamma of the rater
# eta will be the input to the probit link function
eta <- g * diff2 + (1 - g) * diff1 # Add thetas in proportion to gamma, representing how much more favorable j1 is than j2
# If eta is negative, then j2 is preferrable. If close to 0, then it's a toss-up.
p <- pnorm(eta) # transform eta using the probit link to the probability j1 is chosen over j2.
# If p is <.5 preference for j2, p = .5 no preference, p > .5 preference for j1.
# Generate 1 draw of 0 or 1 based on p, and add it to the sim_choices matrix
sim_choices[r, i] <- rbinom(n = 1, size = 1, p)
}
}
# return a set of binary choices where 1 is a choice of j1 and 0 is a choice of j2
return(sim_choices)
}
Run the function to simulate 200 sets of observed choices for each 2d model Each data set is a row in a matrix 960 x 200
sim_2d <- simulate_pp_data_mcmclist2d(mcmc_list2d, pairDataCDAT)
sim_2dDP <- simulate_pp_data_mcmclist2d(mcmc_list2dDP, pairDataCDAT)
Convert observed item ID choices to 1/0 for comparison to simulated data. If itemj1 is chosen, then 1, if j2 is chosen, then 0.
#If choice is first item, 1, if choice is second item, 0
observed_binary <- ifelse(pairDataCDAT$choice == pairDataCDAT$itemj1, 1,
ifelse(pairDataCDAT$choice == pairDataCDAT$itemj2, 0, NA))
A function to compute the p-value of observed and simulated choices - given the binary version of the observed data, find the mean (proportion of j1 choices) for each simulated data set - compute the proportion of simulated data sets that have greater than or equal to the proportion of j1 choices in observed
compute_ppp_proportion <- function(observed_choices, sim_choices_matrix) {
obs_prop <- mean(observed_choices)
# The mean of the row is the proportion of same choice for that simulated data set
sim_prop <- rowMeans(sim_choices_matrix)
sim_correct <- rowMeans(sim_choices_matrix == matrix(observed_choices,
nrow = nrow(sim_choices_matrix),
ncol = length(observed_choices),
byrow = TRUE))
# Posterior predictive p-value
ppp <- mean(sim_prop >= obs_prop)
list(
observed = obs_prop,
simulated = sim_prop,
simulated_correct = sim_correct,
ppp = ppp
)
}
Compute Posterior Predictive P-value (PPP) for each 2d model
ppp2d <- compute_ppp_proportion(observed_binary, sim_2d)
ppp2d
## $observed
## [1] 0.5145833
##
## $simulated
## [1] 0.5156250 0.5281250 0.5156250 0.5197917 0.4937500 0.5250000 0.5114583
## [8] 0.5020833 0.5072917 0.5250000 0.4916667 0.5114583 0.5135417 0.5312500
## [15] 0.5031250 0.5385417 0.5114583 0.5125000 0.5197917 0.5270833 0.5322917
## [22] 0.5197917 0.5208333 0.5145833 0.5197917 0.5041667 0.5125000 0.5354167
## [29] 0.5343750 0.5218750 0.5500000 0.5312500 0.5187500 0.5062500 0.5010417
## [36] 0.5052083 0.5458333 0.5218750 0.5270833 0.5020833 0.5187500 0.4979167
## [43] 0.5166667 0.5270833 0.5239583 0.5114583 0.5322917 0.5177083 0.5031250
## [50] 0.5187500 0.5197917 0.5020833 0.5218750 0.4854167 0.5145833 0.5218750
## [57] 0.5458333 0.5052083 0.5510417 0.5218750 0.4968750 0.5239583 0.5250000
## [64] 0.5375000 0.5270833 0.5281250 0.5114583 0.4916667 0.5197917 0.5281250
## [71] 0.5437500 0.5104167 0.5229167 0.5114583 0.5364583 0.5270833 0.5031250
## [78] 0.5156250 0.5406250 0.4979167 0.5135417 0.5218750 0.5177083 0.5333333
## [85] 0.4979167 0.5156250 0.5239583 0.5218750 0.5125000 0.5041667 0.5333333
## [92] 0.5260417 0.5125000 0.5322917 0.5218750 0.5229167 0.5458333 0.5375000
## [99] 0.4979167 0.5177083 0.4968750 0.4895833 0.5312500 0.5281250 0.5333333
## [106] 0.5000000 0.5187500 0.5281250 0.5093750 0.5364583 0.5239583 0.5156250
## [113] 0.5260417 0.5322917 0.5187500 0.5041667 0.5052083 0.5312500 0.4958333
## [120] 0.5145833 0.5218750 0.5250000 0.5052083 0.5062500 0.4927083 0.4968750
## [127] 0.4979167 0.5083333 0.5187500 0.5104167 0.5395833 0.5135417 0.5250000
## [134] 0.5333333 0.5072917 0.5229167 0.5281250 0.5312500 0.5406250 0.5135417
## [141] 0.5510417 0.5166667 0.5156250 0.5302083 0.5041667 0.5208333 0.4989583
## [148] 0.5385417 0.5135417 0.4906250 0.5250000 0.5052083 0.5270833 0.5041667
## [155] 0.5281250 0.5145833 0.5125000 0.5187500 0.4739583 0.5125000 0.5343750
## [162] 0.5281250 0.5197917 0.5291667 0.5218750 0.5270833 0.5510417 0.5135417
## [169] 0.5135417 0.5093750 0.5020833 0.5135417 0.5375000 0.5354167 0.5354167
## [176] 0.4822917 0.5135417 0.5145833 0.5197917 0.4979167 0.5135417 0.5364583
## [183] 0.5302083 0.5395833 0.5041667 0.5250000 0.5208333 0.5125000 0.4854167
## [190] 0.4989583 0.4989583 0.5145833 0.5000000 0.4854167 0.4989583 0.5041667
## [197] 0.5125000 0.5197917 0.5114583 0.5239583
##
## $simulated_correct
## [1] 0.6385417 0.6572917 0.6343750 0.6197917 0.6458333 0.6416667 0.6177083
## [8] 0.6500000 0.6260417 0.6333333 0.6333333 0.6697917 0.6510417 0.6375000
## [15] 0.6322917 0.6385417 0.6489583 0.6645833 0.6489583 0.6666667 0.6427083
## [22] 0.6281250 0.6458333 0.6541667 0.6447917 0.6416667 0.6416667 0.6437500
## [29] 0.6864583 0.6322917 0.6520833 0.6520833 0.6291667 0.6354167 0.6114583
## [36] 0.6489583 0.6291667 0.6385417 0.6375000 0.6395833 0.6187500 0.6479167
## [43] 0.6625000 0.6500000 0.6697917 0.6489583 0.6489583 0.6531250 0.6302083
## [50] 0.6541667 0.6572917 0.6333333 0.6364583 0.6354167 0.6354167 0.6385417
## [57] 0.6312500 0.6406250 0.6468750 0.6614583 0.6572917 0.6281250 0.6083333
## [64] 0.6437500 0.6541667 0.6614583 0.6427083 0.6270833 0.6197917 0.6406250
## [71] 0.6520833 0.6416667 0.6645833 0.6322917 0.6593750 0.6562500 0.6281250
## [78] 0.6531250 0.6156250 0.6312500 0.6239583 0.6385417 0.6302083 0.6500000
## [85] 0.6270833 0.6593750 0.5989583 0.6343750 0.6729167 0.6604167 0.6500000
## [92] 0.6572917 0.6541667 0.6447917 0.6614583 0.6520833 0.5979167 0.6354167
## [99] 0.6395833 0.6052083 0.6447917 0.6458333 0.6395833 0.6614583 0.6416667
## [106] 0.6333333 0.6458333 0.6385417 0.6552083 0.6552083 0.6593750 0.6260417
## [113] 0.6427083 0.6385417 0.6562500 0.6312500 0.6302083 0.6541667 0.6687500
## [120] 0.6500000 0.6281250 0.6520833 0.6239583 0.6437500 0.6447917 0.6552083
## [127] 0.6479167 0.6437500 0.6666667 0.6479167 0.6291667 0.6697917 0.6395833
## [134] 0.6375000 0.6385417 0.6458333 0.6510417 0.6500000 0.6031250 0.6468750
## [141] 0.6052083 0.6354167 0.6197917 0.6197917 0.6375000 0.6687500 0.6427083
## [148] 0.6614583 0.6406250 0.6489583 0.6375000 0.6364583 0.6291667 0.6375000
## [155] 0.6489583 0.6187500 0.6354167 0.6625000 0.6406250 0.6604167 0.6489583
## [162] 0.6364583 0.6572917 0.6208333 0.6239583 0.6520833 0.6072917 0.6427083
## [169] 0.6364583 0.6593750 0.6562500 0.6510417 0.6208333 0.6479167 0.6541667
## [176] 0.6572917 0.6156250 0.6500000 0.6385417 0.6208333 0.6552083 0.6614583
## [183] 0.6281250 0.6187500 0.6562500 0.6229167 0.6208333 0.6458333 0.6250000
## [190] 0.6406250 0.6572917 0.6645833 0.6625000 0.6312500 0.5989583 0.6729167
## [197] 0.6479167 0.6302083 0.6406250 0.6468750
##
## $ppp
## [1] 0.595
describe(ppp2d$simulated_correct)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 200 0.64 0.02 0.64 0.64 0.02 0.6 0.69 0.09 -0.36 0.14 0
ppp2dDP <- compute_ppp_proportion(observed_binary, sim_2dDP)
ppp2dDP
## $observed
## [1] 0.5145833
##
## $simulated
## [1] 0.4989583 0.5156250 0.5364583 0.5104167 0.5333333 0.5145833 0.5312500
## [8] 0.5229167 0.5250000 0.5187500 0.5333333 0.5125000 0.5072917 0.5218750
## [15] 0.4927083 0.5406250 0.5395833 0.5145833 0.5052083 0.5302083 0.5333333
## [22] 0.5135417 0.5020833 0.5156250 0.5197917 0.5031250 0.5250000 0.5354167
## [29] 0.5031250 0.5239583 0.5114583 0.5166667 0.5312500 0.5052083 0.5270833
## [36] 0.5166667 0.5072917 0.5093750 0.4895833 0.5364583 0.5270833 0.5145833
## [43] 0.5312500 0.5260417 0.5239583 0.5177083 0.5145833 0.5270833 0.5281250
## [50] 0.5239583 0.5239583 0.5145833 0.4927083 0.5187500 0.5343750 0.5062500
## [57] 0.5270833 0.5125000 0.5364583 0.5177083 0.5229167 0.4875000 0.5114583
## [64] 0.5447917 0.5156250 0.5156250 0.5093750 0.5156250 0.5156250 0.5114583
## [71] 0.5218750 0.5093750 0.5250000 0.5104167 0.5010417 0.5166667 0.5020833
## [78] 0.5260417 0.5197917 0.5135417 0.5291667 0.5062500 0.5229167 0.5187500
## [85] 0.5177083 0.5531250 0.5156250 0.5218750 0.5270833 0.5375000 0.5114583
## [92] 0.5416667 0.4989583 0.5041667 0.5125000 0.5229167 0.5260417 0.5322917
## [99] 0.5135417 0.5072917 0.5072917 0.5114583 0.5177083 0.5145833 0.5083333
## [106] 0.5031250 0.5333333 0.5187500 0.5260417 0.5104167 0.5208333 0.5114583
## [113] 0.5114583 0.5177083 0.5156250 0.5437500 0.4875000 0.5385417 0.5312500
## [120] 0.5322917 0.5635417 0.5156250 0.5010417 0.5093750 0.5125000 0.5552083
## [127] 0.5281250 0.5333333 0.5166667 0.5250000 0.5291667 0.5177083 0.5447917
## [134] 0.5010417 0.5114583 0.5312500 0.5229167 0.5083333 0.5270833 0.5062500
## [141] 0.4916667 0.5166667 0.5093750 0.5000000 0.5135417 0.5218750 0.4968750
## [148] 0.5322917 0.5239583 0.5229167 0.5114583 0.5218750 0.5177083 0.5000000
## [155] 0.5114583 0.5239583 0.5145833 0.5281250 0.5166667 0.5156250 0.5437500
## [162] 0.5010417 0.5218750 0.5281250 0.5250000 0.5239583 0.5385417 0.4791667
## [169] 0.5156250 0.5541667 0.5187500 0.5416667 0.5281250 0.5291667 0.5427083
## [176] 0.5114583 0.5343750 0.5218750 0.5135417 0.5291667 0.5291667 0.5135417
## [183] 0.5010417 0.5062500 0.4989583 0.5114583 0.5187500 0.5364583 0.5083333
## [190] 0.5333333 0.4937500 0.5291667 0.5250000 0.5281250 0.4916667 0.5166667
## [197] 0.5156250 0.5250000 0.5145833 0.5250000
##
## $simulated_correct
## [1] 0.6364583 0.6447917 0.6677083 0.6479167 0.6291667 0.6312500 0.6208333
## [8] 0.6333333 0.6145833 0.6666667 0.6208333 0.6583333 0.6614583 0.6531250
## [15] 0.6531250 0.6468750 0.6500000 0.6354167 0.6489583 0.6447917 0.6791667
## [22] 0.6510417 0.6229167 0.6593750 0.6510417 0.6614583 0.6500000 0.6312500
## [29] 0.6177083 0.6281250 0.6010417 0.6312500 0.6354167 0.6531250 0.6729167
## [36] 0.6520833 0.6385417 0.6447917 0.6729167 0.6468750 0.6395833 0.6291667
## [43] 0.6458333 0.6364583 0.6656250 0.6427083 0.6604167 0.6708333 0.6343750
## [50] 0.6114583 0.6552083 0.6250000 0.6656250 0.6250000 0.6385417 0.6208333
## [57] 0.6541667 0.6458333 0.6260417 0.6552083 0.6541667 0.6500000 0.6343750
## [64] 0.6343750 0.6697917 0.6197917 0.6364583 0.6572917 0.6281250 0.6531250
## [71] 0.6385417 0.6364583 0.6708333 0.6250000 0.6385417 0.6666667 0.6583333
## [78] 0.6552083 0.6239583 0.6322917 0.6416667 0.6500000 0.6229167 0.6458333
## [85] 0.6385417 0.6510417 0.6385417 0.6656250 0.6187500 0.6625000 0.6656250
## [92] 0.6479167 0.6531250 0.6437500 0.6583333 0.6250000 0.6385417 0.6385417
## [99] 0.6343750 0.6197917 0.6427083 0.6593750 0.6385417 0.6312500 0.6416667
## [106] 0.6614583 0.6312500 0.6791667 0.6364583 0.6791667 0.6583333 0.6843750
## [113] 0.6406250 0.6364583 0.6531250 0.6687500 0.6479167 0.6614583 0.6375000
## [120] 0.6302083 0.6468750 0.6510417 0.6093750 0.6302083 0.6583333 0.6385417
## [127] 0.6177083 0.6416667 0.6604167 0.6395833 0.6416667 0.6302083 0.6510417
## [134] 0.6302083 0.6572917 0.6125000 0.6458333 0.6500000 0.6458333 0.6229167
## [141] 0.6354167 0.6458333 0.6260417 0.6229167 0.6239583 0.6302083 0.6302083
## [148] 0.6385417 0.6697917 0.6458333 0.6531250 0.6197917 0.6427083 0.6333333
## [155] 0.6510417 0.6427083 0.6562500 0.6343750 0.6437500 0.6635417 0.6416667
## [162] 0.6593750 0.6552083 0.6531250 0.6562500 0.6239583 0.6239583 0.6250000
## [169] 0.6406250 0.6583333 0.6458333 0.6562500 0.6489583 0.6458333 0.6260417
## [176] 0.6239583 0.6427083 0.6572917 0.6427083 0.6729167 0.6250000 0.6489583
## [183] 0.6447917 0.6312500 0.6781250 0.6197917 0.6541667 0.6489583 0.6229167
## [190] 0.6437500 0.6687500 0.6437500 0.6395833 0.6572917 0.6312500 0.6562500
## [197] 0.6406250 0.6562500 0.6437500 0.6875000
##
## $ppp
## [1] 0.655
describe(ppp2dDP$simulated_correct)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 200 0.64 0.02 0.64 0.64 0.02 0.6 0.69 0.09 0.13 -0.31 0
Let’s check the prediction of choices using the 1d model next. For each posterior sample of \(\theta\) and \(\alpha\), simulate responses.
A function to simulate responses using 1d model
simulate_pp_data_mcmclist1d <- function(mcmc_list, pair_data) {
# Combine chains into one matrix
all_samples <- do.call(rbind, mcmc_list)
n_obs <- nrow(pair_data)
n_rep = 200
sim_choices <- matrix(NA, nrow = n_rep, ncol = n_obs)
# Identify column names for theta1, theta2, gamma
theta_names <- grep("^theta\\.", colnames(all_samples), value = TRUE)
alpha_names <- grep("^alpha\\.", colnames(all_samples), value = TRUE)
for (r in 1:n_rep) {
s <- sample(1:nrow(all_samples), 1) # Select one draw from any chain in combined samples
theta <- all_samples[s, theta_names] # Draw one value for theta
alpha <- all_samples[s, alpha_names] # Draw one value for alpha
for (i in 1:n_obs) {
j1 <- as.character(pair_data$itemj1[i])
j2 <- as.character(pair_data$itemj2[i])
rater <- as.character(pair_data$rater[i])
# Safely extract theta/gamma values using column names
diff <- theta[paste0("theta.", j1)] - theta[paste0("theta.", j2)]
a <- alpha[paste0("alpha.", rater)]
eta <- a * diff # Add thetas
p <- pnorm(eta) # probit link is a normal distribution
sim_choices[r, i] <- rbinom(1, 1, p)
}
}
return(sim_choices)
}
Run the function for 1d model
sim_1d <- simulate_pp_data_mcmclist1d(mcmc_list1d, pairDataCDAT)
Compute Posterior Predictive P-value (PPP) for 1d model
ppp1d <- compute_ppp_proportion(observed_binary, sim_1d)
ppp1d
## $observed
## [1] 0.5145833
##
## $simulated
## [1] 0.4927083 0.5135417 0.4718750 0.4927083 0.5000000 0.4906250 0.5000000
## [8] 0.4958333 0.5104167 0.5010417 0.4802083 0.5020833 0.4875000 0.5135417
## [15] 0.5062500 0.5250000 0.4833333 0.4843750 0.5218750 0.5010417 0.5145833
## [22] 0.4916667 0.5010417 0.5010417 0.5114583 0.5083333 0.5145833 0.5020833
## [29] 0.4989583 0.4947917 0.5343750 0.4958333 0.5020833 0.5083333 0.4593750
## [36] 0.4937500 0.5114583 0.5291667 0.4979167 0.4802083 0.5093750 0.4812500
## [43] 0.4864583 0.5145833 0.5166667 0.5218750 0.4875000 0.5281250 0.5062500
## [50] 0.5031250 0.5208333 0.4958333 0.4958333 0.5135417 0.5229167 0.5000000
## [57] 0.4854167 0.5104167 0.5031250 0.4833333 0.4885417 0.5260417 0.5052083
## [64] 0.5020833 0.5083333 0.5072917 0.4916667 0.5104167 0.5145833 0.5281250
## [71] 0.5302083 0.5145833 0.4958333 0.4895833 0.5302083 0.5020833 0.4833333
## [78] 0.4947917 0.5000000 0.4927083 0.4906250 0.5072917 0.5239583 0.4958333
## [85] 0.5041667 0.5000000 0.4947917 0.4822917 0.4916667 0.4885417 0.4947917
## [92] 0.5218750 0.4927083 0.5177083 0.5229167 0.4854167 0.5041667 0.4822917
## [99] 0.5031250 0.4718750 0.5010417 0.4916667 0.5322917 0.4770833 0.5156250
## [106] 0.5197917 0.5177083 0.4906250 0.5062500 0.4635417 0.5208333 0.5010417
## [113] 0.5208333 0.5135417 0.4958333 0.5083333 0.4822917 0.4708333 0.4979167
## [120] 0.5145833 0.4875000 0.4750000 0.5052083 0.4979167 0.4781250 0.4958333
## [127] 0.5020833 0.4885417 0.5125000 0.4979167 0.5333333 0.5052083 0.5270833
## [134] 0.5156250 0.5208333 0.5281250 0.5239583 0.5093750 0.5145833 0.5145833
## [141] 0.5125000 0.5187500 0.5093750 0.4968750 0.5052083 0.4916667 0.4979167
## [148] 0.4979167 0.5041667 0.5083333 0.4895833 0.4750000 0.5072917 0.5125000
## [155] 0.4937500 0.5072917 0.5187500 0.4937500 0.4906250 0.5000000 0.5395833
## [162] 0.4916667 0.4927083 0.5093750 0.4937500 0.5187500 0.4916667 0.4958333
## [169] 0.4885417 0.4979167 0.5031250 0.5020833 0.4989583 0.5135417 0.5104167
## [176] 0.4708333 0.4927083 0.4864583 0.4812500 0.5052083 0.4906250 0.4927083
## [183] 0.4979167 0.5250000 0.5218750 0.5177083 0.5052083 0.5145833 0.5135417
## [190] 0.5031250 0.5114583 0.5083333 0.4791667 0.4937500 0.5260417 0.5177083
## [197] 0.4875000 0.5291667 0.5229167 0.5041667
##
## $simulated_correct
## [1] 0.6197917 0.6197917 0.6010417 0.6260417 0.6145833 0.6114583 0.6229167
## [8] 0.6437500 0.6520833 0.6468750 0.6343750 0.6354167 0.6104167 0.6302083
## [15] 0.6354167 0.6312500 0.6125000 0.6156250 0.6218750 0.6427083 0.6229167
## [22] 0.6562500 0.6156250 0.5739583 0.6343750 0.6125000 0.6437500 0.6083333
## [29] 0.6218750 0.6031250 0.6322917 0.6145833 0.6062500 0.6416667 0.6281250
## [36] 0.6270833 0.6197917 0.6375000 0.6458333 0.6302083 0.6385417 0.6270833
## [43] 0.6239583 0.6104167 0.6500000 0.6302083 0.6333333 0.6197917 0.6270833
## [50] 0.6364583 0.6354167 0.6541667 0.6104167 0.6239583 0.6437500 0.6312500
## [57] 0.6062500 0.6166667 0.6114583 0.6270833 0.6197917 0.6406250 0.6343750
## [64] 0.6416667 0.6270833 0.6052083 0.6291667 0.6416667 0.6291667 0.6531250
## [71] 0.6302083 0.6395833 0.6458333 0.5875000 0.6260417 0.6208333 0.6208333
## [78] 0.5968750 0.6562500 0.6593750 0.6135417 0.6177083 0.6385417 0.6500000
## [85] 0.6333333 0.6437500 0.6302083 0.6031250 0.6208333 0.6260417 0.6281250
## [92] 0.6177083 0.6197917 0.6385417 0.6583333 0.6083333 0.6125000 0.6260417
## [99] 0.6218750 0.6197917 0.6614583 0.6458333 0.6364583 0.6437500 0.6197917
## [106] 0.6093750 0.6427083 0.6197917 0.6416667 0.6364583 0.6020833 0.6239583
## [113] 0.6104167 0.6322917 0.6291667 0.6375000 0.6385417 0.6062500 0.6229167
## [120] 0.6375000 0.6208333 0.6291667 0.6322917 0.6270833 0.6302083 0.6125000
## [127] 0.6270833 0.6218750 0.6395833 0.6229167 0.6583333 0.6114583 0.6375000
## [134] 0.6114583 0.6125000 0.6427083 0.6489583 0.6281250 0.6270833 0.6312500
## [141] 0.6145833 0.6166667 0.6260417 0.6197917 0.6281250 0.6354167 0.6458333
## [148] 0.6041667 0.6312500 0.6270833 0.6395833 0.6437500 0.6302083 0.6270833
## [155] 0.6541667 0.6322917 0.6291667 0.6375000 0.6322917 0.6208333 0.6395833
## [162] 0.6187500 0.6260417 0.6302083 0.6333333 0.6520833 0.6479167 0.6187500
## [169] 0.6031250 0.6250000 0.6156250 0.6208333 0.6406250 0.6343750 0.6104167
## [176] 0.6270833 0.6281250 0.6197917 0.6416667 0.6427083 0.6531250 0.6052083
## [183] 0.6416667 0.6562500 0.6531250 0.6197917 0.6406250 0.6354167 0.6156250
## [190] 0.6281250 0.6135417 0.6395833 0.6333333 0.6333333 0.6468750 0.6302083
## [197] 0.6125000 0.6020833 0.6104167 0.5895833
##
## $ppp
## [1] 0.245
describe(ppp1d$simulated_correct)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 200 0.63 0.01 0.63 0.63 0.01 0.57 0.66 0.09 -0.23 0.21 0
Plot the observed proportion of selection of itemj1 in each comparison to the distribution of simulated selection of itemj1 in each comparison.
# Density of simulated proportions
dens <- density(ppp1d$simulated)
# Plot
plot(dens, main = "Posterior Predictive Density (1D Model)",
xlab = "Simulated Proportion of itemj1 Chosen",
ylab = "Density", lwd = 2, col = "steelblue")
# Add observed value as a vertical red line
abline(v = ppp1d$observed, col = "red", lwd = 2, lty = 2)
# Add legend
legend("topright", legend = "Observed Proportion", col = "red", lty = 2, lwd = 2)
Plot the density of the 2d model simulated proportions and the observed proportion (see Levy & Mislevy, 2016, pp. 238-239)
# Density of simulated proportions
dens <- density(ppp2d$simulated)
# Plot
plot(dens, main = "Posterior Predictive Density (2D Normal Prior Model)",
xlab = "Simulated Proportion of itemj1 Chosen",
ylab = "Density", lwd = 2, col = "steelblue")
# Add observed value as a vertical red line
abline(v = ppp2d$observed, col = "red", lwd = 2, lty = 2)
# Add legend
legend("topright", legend = "Observed Proportion", col = "red", lty = 2, lwd = 2)
Plot the density of the 2dDP model simulated proportions and the observed proportion
# Density of simulated proportions
dens <- density(ppp2dDP$simulated)
# Plot
plot(dens, main = "Posterior Predictive Density (2D DP Prior Model)",
xlab = "Simulated Proportion of itemj1 Chosen",
ylab = "Density", lwd = 2, col = "steelblue")
# Add observed value as a vertical red line
abline(v = ppp2dDP$observed, col = "red", lwd = 2, lty = 2)
# Add legend
legend("topright", legend = "Observed Proportion", col = "red", lty = 2, lwd = 2)
Both models have similar PPP. The distribtion of simulated choices’ proportion of j1 preference is nicely amassed around the observed proportion of j1 preferences, based on the rater’s model-implied gamma and the items’ model-implied theta1 and theta2 for that same combination.
This suggests that both of the 2d models produce simulated data that sufficiently resemble actual choices of items by raters.
Additionally, the bulk of simulated choices are within the narrow range of .04 (0.49 to 0.53).
They both consistently predict the proportion of preference for itemj1, but does this mean that the row-by-row choices are the same? For that, it may be interesting to do a rank correlation or correlation of theta1 and theta2 as test statistics in PPP.
The one-dimensional model’s predictive distribution mass of proportion selecting itemj1 is comfortably surrounding the observed proportion of itemj1 selection. Further, the predictive proportion of choices of itemj1 are all quite close to the observed, between 0.49 and 0.52.
This does not compare favorably to the 2d and 2dDP models using the same test statistic of proportion of j1 choices given the same comparison combination.
A function to examine the difference between proportion of observed win rates for each item and simulated win rates for each item, based on the same simulated sets of data above.
Compute observed win rate for each item in observed and simulated, which is the proportion of times it was chosen in comparisons in the observed data.
For each posterior predictive simulated data set: - Compute simulated win rate for each item - Calculate the chi-square-like discrepancy of win rates, which is the sum for all j of the squared difference between observed win proportions and simulated win proportions (+ \(\epsilon\) as small number to avoid divide by 0 errors) - Compare observed vs simulated to get a p-value
compute_ppp_item_chisq <- function(observed_binary, sim_matrix, itemj1, itemj2, epsilon = 1e-6) {
all_items <- unique(c(itemj1, itemj2))
n_items <- length(all_items)
n_reps <- nrow(sim_matrix)
# Compute observed win counts per item
obs_wins <- setNames(rep(0, n_items), all_items)
for (i in seq_along(observed_binary)) {
winner <- if (observed_binary[i] == 1) itemj1[i] else itemj2[i]
obs_wins[as.character(winner)] <- obs_wins[as.character(winner)] + 1
}
obs_rates <- obs_wins / sum(obs_wins)
# Compute discrepancy of observed win rates vs each simulated replication
sim_discrepancies <- numeric(n_reps)
for (r in 1:n_reps) {
sim_wins <- setNames(rep(0, n_items), all_items)
for (i in seq_along(observed_binary)) {
winner <- if (sim_matrix[r, i] == 1) itemj1[i] else itemj2[i]
sim_wins[as.character(winner)] <- sim_wins[as.character(winner)] + 1
}
sim_rates <- sim_wins / sum(sim_wins)
# Chi-square style discrepancy
sim_discrepancies[r] <- sum((obs_rates - sim_rates)^2 / (sim_rates + epsilon))
}
obs_discrepancy <- median(sim_discrepancies) # Treating this as the expected baseline
ppp <- mean(sim_discrepancies >= obs_discrepancy)
list(
observed = obs_discrepancy,
simulated = sim_discrepancies,
ppp = ppp
)
}
First, feed the simulated data for the 2dDP model into it.
itemChisq2dDP <- compute_ppp_item_chisq(
observed_binary = observed_binary,
sim_matrix = sim_2dDP,
itemj1 = pairDataCDAT$itemj1,
itemj2 = pairDataCDAT$itemj2
)
itemChisq2dDP
## $observed
## [1] 0.09385262
##
## $simulated
## [1] 0.16024055 0.10748542 0.05225004 0.06385521 0.10437190 0.05596206
## [7] 0.10813459 0.16124437 0.10298773 0.10309001 0.16593436 0.07086567
## [13] 0.10824698 0.05627754 0.08042860 0.10667652 0.06408002 0.08629173
## [19] 0.07278082 0.09325696 0.06021929 0.09184391 0.15342262 0.06553249
## [25] 0.06394415 0.06857242 0.08656263 0.14424678 0.18123723 0.06850033
## [31] 0.04632751 0.07184959 0.07223868 0.09932104 0.05627084 0.06386549
## [37] 0.09187064 0.09415856 0.04773781 0.14050228 0.09011584 0.05706661
## [43] 0.10985130 0.14302210 0.06394361 0.10219005 0.10480391 0.07607659
## [49] 0.12496174 0.18373596 0.06340039 0.11583823 0.03559179 0.10089859
## [55] 0.19785837 0.14452871 0.07443139 0.05665413 0.05556397 0.05451177
## [61] 0.12413793 0.09019472 0.11035817 0.11793324 0.12442575 0.14134128
## [67] 0.07609396 0.12212987 0.10268433 0.09879956 0.06482903 0.09458577
## [73] 0.08050362 0.07765263 0.11431767 0.07102163 0.03769151 0.06705952
## [79] 0.15587575 0.16476489 0.09896228 0.13385159 0.08193981 0.09474072
## [85] 0.11495622 0.09568021 0.08665635 0.09630439 0.13803430 0.08587776
## [91] 0.05887789 0.10198173 0.11713909 0.12889190 0.04033279 0.17789112
## [97] 0.10045160 0.08809864 0.11450291 0.18145681 0.08888781 0.05217562
## [103] 0.08268421 0.13033430 0.09252817 0.07077455 0.14699097 0.04271075
## [109] 0.06387100 0.12192739 0.10837012 0.05089175 0.03098332 0.10203697
## [115] 0.09225489 0.06159748 0.06932893 0.13776411 0.07358786 0.14673582
## [121] 0.06878731 0.05770728 0.08652809 0.11820274 0.10491364 0.13237038
## [127] 0.15409561 0.12206241 0.06578734 0.12835960 0.08400606 0.13765595
## [133] 0.12107265 0.04413585 0.07669090 0.15185351 0.06110249 0.05777588
## [139] 0.08851722 0.12504720 0.09432418 0.06584949 0.14618675 0.14320618
## [145] 0.09485829 0.09922802 0.10262366 0.12084504 0.09215294 0.14270360
## [151] 0.06076215 0.06399333 0.13036003 0.11400033 0.08567095 0.09419956
## [157] 0.13509092 0.05805142 0.07852359 0.06258912 0.08817274 0.04541753
## [163] 0.09848758 0.12292388 0.07879318 0.11623911 0.13052464 0.08530631
## [169] 0.08016304 0.08940080 0.11465397 0.04510894 0.11531188 0.10096171
## [175] 0.12749446 0.11132799 0.08209674 0.06281481 0.08050916 0.05229730
## [181] 0.11945068 0.11014619 0.04946752 0.10353438 0.05129473 0.09264959
## [187] 0.10993877 0.07171348 0.06597585 0.11267141 0.08237418 0.09354667
## [193] 0.09459381 0.05210078 0.15310859 0.04652379 0.10811163 0.10039202
## [199] 0.16488683 0.04921671
##
## $ppp
## [1] 0.5
Plot the observed proportion of item wins to the distribution of simulated item wins in each comparison.
# Density of simulated proportions
dens <- density(itemChisq2dDP$simulated)
# Plot
plot(dens, main = "Posterior Predictive Density (2D DP Prior Model)",
xlab = "Simulated Chi-square Discrepancy of Proportion of Item Wins",
ylab = "Density", lwd = 2, col = "steelblue")
# Add observed value as a vertical red line
abline(v = itemChisq2dDP$observed, col = "red", lwd = 2, lty = 2)
# Add legend
legend("topright", legend = "Median of Discrepancies (Simulated to Observed)", col = "red", lty = 2, lwd = 2)
Next, the 2d model
itemChisq2d <- compute_ppp_item_chisq(
observed_binary = observed_binary,
sim_matrix = sim_2d,
itemj1 = pairDataCDAT$itemj1,
itemj2 = pairDataCDAT$itemj2
)
itemChisq2d
## $observed
## [1] 0.09940797
##
## $simulated
## [1] 0.07809597 0.08914133 0.10392175 0.10875848 0.11360061 0.13329543
## [7] 0.06265255 0.10566095 0.13689743 0.05504987 0.11526860 0.04484410
## [13] 0.10166480 0.10544106 0.10875041 0.07138641 0.10222590 0.09199420
## [19] 0.11870838 0.07218083 0.10271261 0.12155499 0.08704563 0.09412740
## [25] 0.12699533 0.10113244 0.12481945 0.12289664 0.08495404 0.06267766
## [31] 0.11751010 0.06290414 0.11667657 0.14379092 0.18726353 0.07546757
## [37] 0.21498165 0.08756300 0.08961252 0.11022765 0.16497601 0.04324774
## [43] 0.13732930 0.06927240 0.08559715 0.04279772 0.10832250 0.10829262
## [49] 0.13756401 0.10473573 0.07956408 0.11734722 0.10078298 0.10465922
## [55] 0.15507876 0.06796212 0.12470595 0.11420893 0.09331612 0.10340674
## [61] 0.05145746 0.14606248 0.09602228 0.09995751 0.08414963 0.07314756
## [67] 0.09704919 0.11091686 0.09417464 0.08811748 0.08816063 0.13716852
## [73] 0.06571872 0.16835918 0.09968145 0.07325927 0.09487817 0.07076405
## [79] 0.07967885 0.07171194 0.11471183 0.13963763 0.16516127 0.11710970
## [85] 0.08326477 0.08119605 0.12184078 0.11258484 0.08452500 0.10292369
## [91] 0.05563178 0.09910395 0.09600589 0.09350684 0.08992263 0.05596385
## [97] 0.13717801 0.15031693 0.06615366 0.16953474 0.08791319 0.07025507
## [103] 0.19006569 0.08141082 0.10509579 0.10090480 0.10327251 0.06052407
## [109] 0.11066743 0.05780810 0.06648055 0.11552318 0.15086716 0.16955361
## [115] 0.11851834 0.13094584 0.07553799 0.10306290 0.08518856 0.09275750
## [121] 0.10612486 0.08424326 0.17372864 0.06692534 0.12027734 0.04575165
## [127] 0.06905189 0.08956916 0.06008744 0.10954105 0.10240030 0.09104094
## [133] 0.10640590 0.04482490 0.06776578 0.10026892 0.09786834 0.08454646
## [139] 0.16716007 0.07757791 0.10790788 0.15846362 0.12763905 0.13652266
## [145] 0.11459127 0.06751443 0.09913448 0.07818184 0.08278690 0.09173798
## [151] 0.09156226 0.09761459 0.12020689 0.06355210 0.10317038 0.13403759
## [157] 0.11092952 0.10984523 0.09241529 0.04425620 0.12604473 0.08574402
## [163] 0.08738697 0.14677472 0.12510214 0.12697092 0.17186838 0.14061454
## [169] 0.08996704 0.09181870 0.11992176 0.09067149 0.12819759 0.08390346
## [175] 0.10536178 0.05738557 0.11741496 0.12357438 0.09898025 0.07589217
## [181] 0.08746406 0.09524031 0.09067485 0.11977157 0.08186695 0.09291148
## [187] 0.19727734 0.09367730 0.12213148 0.11636823 0.08974960 0.06517384
## [193] 0.07560448 0.11134586 0.12024007 0.08169620 0.05016986 0.13699636
## [199] 0.06436460 0.08334910
##
## $ppp
## [1] 0.5
Plot the observed proportion of item wins to the distribution of simulated item wins in each comparison.
# Density of simulated proportions
dens <- density(itemChisq2d$simulated)
# Plot
plot(dens, main = "Posterior Predictive Density (2D Normal Prior Model)",
xlab = "Simulated Chi-square Discrepancy of Proportion of Item Wins",
ylab = "Density", lwd = 2, col = "steelblue")
# Add observed value as a vertical red line
abline(v = itemChisq2d$observed, col = "red", lwd = 2, lty = 2)
# Add legend
legend("topright", legend = "Median of Discrepancies (Simulated to Observed)", col = "red", lty = 2, lwd = 2)
Finally, the 1d model
itemChisq1d <- compute_ppp_item_chisq(
observed_binary = observed_binary,
sim_matrix = sim_1d,
itemj1 = pairDataCDAT$itemj1,
itemj2 = pairDataCDAT$itemj2
)
itemChisq1d
## $observed
## [1] 0.01271147
##
## $simulated
## [1] 0.008689252 0.012032840 0.011397719 0.021802567 0.010187137 0.015058266
## [7] 0.020480811 0.009595658 0.017688660 0.013683082 0.014216908 0.010462498
## [13] 0.010493648 0.014521248 0.012135876 0.019519905 0.007647882 0.014698068
## [19] 0.015340213 0.008565076 0.016815376 0.025571146 0.008039672 0.018223314
## [25] 0.010062122 0.008042697 0.015321727 0.018342650 0.008523532 0.013077604
## [31] 0.016757608 0.010910624 0.005664269 0.011695177 0.012651936 0.011825232
## [37] 0.013534869 0.005830345 0.013252526 0.010942365 0.007148477 0.018376632
## [43] 0.021006159 0.018630047 0.015232738 0.012878330 0.031138822 0.015216287
## [49] 0.022970757 0.005568950 0.015130570 0.018112642 0.014952556 0.006007979
## [55] 0.008563380 0.006425200 0.014014477 0.010782995 0.008097682 0.016199179
## [61] 0.018278671 0.011503651 0.013927318 0.016276051 0.013919553 0.008236969
## [67] 0.012821316 0.016509662 0.012882998 0.009306765 0.014177401 0.013862402
## [73] 0.012797851 0.007794664 0.009880115 0.011030061 0.011801322 0.013523935
## [79] 0.007231380 0.021138763 0.016531812 0.021075016 0.022082198 0.018604147
## [85] 0.012076007 0.008519559 0.008525938 0.006982667 0.019694801 0.009328854
## [91] 0.008171789 0.016822551 0.009519564 0.014434289 0.011054654 0.008410929
## [97] 0.008872018 0.018451943 0.011830732 0.007452279 0.010911678 0.013557496
## [103] 0.013760664 0.024099615 0.022481667 0.010334270 0.004468165 0.020554080
## [109] 0.024262635 0.012770997 0.018599948 0.009613005 0.011647050 0.015760199
## [115] 0.011422446 0.015511760 0.015710970 0.006656360 0.007944362 0.012771928
## [121] 0.009694546 0.015520781 0.010304464 0.015166155 0.016742148 0.009540303
## [127] 0.011775197 0.013615666 0.017299293 0.014576820 0.007584575 0.019897335
## [133] 0.018424041 0.011005767 0.019203834 0.007741468 0.023763952 0.011130530
## [139] 0.007739875 0.006451970 0.009760221 0.007836274 0.019368541 0.018887258
## [145] 0.012844088 0.004738793 0.017115208 0.005516234 0.023224779 0.011630040
## [151] 0.012307266 0.008086319 0.008765327 0.016035877 0.007863257 0.015682024
## [157] 0.011619422 0.011785685 0.008585198 0.013044368 0.006421927 0.016372405
## [163] 0.017512142 0.013559031 0.010215444 0.009707851 0.020659325 0.023347285
## [169] 0.013151905 0.008230415 0.019727404 0.028262459 0.010872395 0.011498729
## [175] 0.011080889 0.012077145 0.007085920 0.016546820 0.010973008 0.014005887
## [181] 0.014132815 0.008851235 0.008841829 0.009204350 0.016426698 0.009173632
## [187] 0.009192348 0.014652736 0.010227302 0.014052073 0.010308212 0.017599114
## [193] 0.016517534 0.010213388 0.011548079 0.008414521 0.005474785 0.011522849
## [199] 0.012905385 0.015336770
##
## $ppp
## [1] 0.5
Plot the observed proportion of item wins to the distribution of simulated item wins in each comparison.
# Density of simulated proportions
dens <- density(itemChisq1d$simulated)
# Plot
plot(dens, main = "Posterior Predictive Density (1D Normal Prior Model)",
xlab = "Simulated Chi-square Discrepancy of Proportion of Item Wins",
ylab = "Density", lwd = 2, col = "steelblue")
# Add observed value as a vertical red line
abline(v = itemChisq1d$observed, col = "red", lwd = 2, lty = 2)
# Add legend
legend("topright", legend = "Median of Discrepancies (Simulated to Observed)", col = "red", lty = 2, lwd = 2)
From Chapter 10.3 of Levy & Mislevy (2016)
The Deviance INformation Criterion is defined a “the difference between the posterior mean of the deviance, \(\overline{D(\theta)}\), and the deviance evaluated at the posterior mean, \(D(\bar{\theta})\)” (p. 248). Which is appropriate when the mean of the posterior is a “reasonable point summary of the parameters” (p. 249) which is the case for these parameters. The formula includes a penalty for complexity \(p_{D}\) based on the number of parameters estimated. Look to Gelman et al. (2013) Gill (2007) and Plummer (2008) for more advanced discussion of DIC.
DIC =\(\overline{D(\theta)} + p_{D} = 2\overline{D(\theta)} - D(\bar{\theta}) +2p_{D}\)
Loop over all MCMC iterations:
Compute \(p_{D}\) which is \(\overline{D(\theta)}\) - \(D(\bar{\theta})\).
Compute the DIC which 2 times \(\overline{D(\theta)}\) - \(\overline{D(\theta)}\).
First, the posterior mean of the deviance for 2d model. ### 2d DIC
computeDbar2d <- function(mcmc_matrix, pair_data, theta1_ids, theta2_ids, gamma_ids) {
# Extract item and judge IDs
itemj1 <- as.character(pair_data$itemj1)
itemj2 <- as.character(pair_data$itemj2)
choice <- pair_data$choice
rater <- as.character(pair_data$rater)
S <- nrow(mcmc_matrix)
N <- nrow(pair_data)
deviance_vec <- numeric(S)
for (s in 1:S) {
# Extract current parameter values
theta1 <- setNames(mcmc_matrix[s, theta1_ids], sub("theta1\\.", "", theta1_ids))
theta2 <- setNames(mcmc_matrix[s, theta2_ids], sub("theta2\\.", "", theta2_ids))
gamma <- setNames(mcmc_matrix[s, gamma_ids], sub("gamma\\.", "", gamma_ids))
eta <- numeric(N)
for (n in 1:N) {
j1 <- itemj1[n]
j2 <- itemj2[n]
r <- rater[n]
g <- gamma[r]
if (is.na(g)) g <- mean(gamma, na.rm = TRUE)
diff1 <- theta1[j2] - theta1[j1]
diff2 <- theta2[j2] - theta2[j1]
eta[n] <- (1 - g) * diff1 + g * diff2
}
p <- pnorm(eta)
p <- pmin(pmax(p, 1e-10), 1 - 1e-10) # avoid log(0)
is_j2_chosen <- choice == pair_data$itemj2
loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE) # Compute log-probability of the observed binary outcome
deviance_vec[s] <- -2 * sum(loglik)
}
mean(deviance_vec)
}
Note that the deviance is defined as minus two times the log-likelihood: \(D(\theta) = -2\log p(y | \theta)\)
So:
Then: \(\overline{D{\theta}}\) is computed by averaging this deviance over MCMC samples \(D(\bar{\theta})\) is computed by evaluating the deviance at the posteriro mean of parameters
Use that function:
mcmc_matrix <- do.call(rbind, mcmc_list2d) # combine all chains
theta1_ids <- grep("^theta1\\.", colnames(mcmc_matrix), value = TRUE)
theta2_ids <- grep("^theta2\\.", colnames(mcmc_matrix), value = TRUE)
gamma_ids <- grep("^gamma\\.", colnames(mcmc_matrix), value = TRUE)
D_bar2d <- computeDbar2d(mcmc_matrix, pairDataCDAT, theta1_ids, theta2_ids, gamma_ids)
print(D_bar2d)
## [1] 1239.919
Now a function to compute the deviance at the posterior mean parameters
computeDevianceAtPosteriorMean2d <- function(mcmc_matrix, pair_data, theta1_ids, theta2_ids, gamma_ids) {
# Compute posterior means
theta1_mean <- setNames(colMeans(mcmc_matrix[, theta1_ids]), sub("theta1\\.", "", theta1_ids))
theta2_mean <- setNames(colMeans(mcmc_matrix[, theta2_ids]), sub("theta2\\.", "", theta2_ids))
gamma_mean <- setNames(colMeans(mcmc_matrix[, gamma_ids]), sub("gamma\\.", "", gamma_ids))
itemj1 <- as.character(pair_data$itemj1)
itemj2 <- as.character(pair_data$itemj2)
choice <- pair_data$choice
rater <- as.character(pair_data$rater)
N <- nrow(pair_data)
eta <- numeric(N)
for (n in 1:N) {
j1 <- itemj1[n]
j2 <- itemj2[n]
r <- rater[n]
g <- gamma_mean[r]
if (is.na(g)) g <- mean(gamma_mean, na.rm = TRUE)
diff1 <- theta1_mean[j2] - theta1_mean[j1]
diff2 <- theta2_mean[j2] - theta2_mean[j1]
eta[n] <- (1 - g) * diff1 + g * diff2
}
p <- pnorm(eta)
p <- pmin(pmax(p, 1e-10), 1 - 1e-10)
is_j2_chosen <- choice == pair_data$itemj2
loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
D_at_mean <- -2 * sum(loglik)
return(D_at_mean)
}
Use the function, with the same variables passed to it as created in previous ste.
D_mean2d <- computeDevianceAtPosteriorMean2d(mcmc_matrix, pairDataCDAT, theta1_ids, theta2_ids, gamma_ids)
print(D_mean2d)
## [1] 1160.631
As a final step find the difference between twice the D_bar and the D_mean:
p_D2d <- D_bar2d - D_mean2d
DIC2d <- D_bar2d + p_D2d
print(DIC2d)
## [1] 1319.207
Repeat for 2dDP these steps using same functions as for the 2d normal prior model.
mcmc_matrix <- do.call(rbind, mcmc_list2dDP) # combine all chains
theta1_ids <- grep("^theta1\\.", colnames(mcmc_matrix), value = TRUE)
theta2_ids <- grep("^theta2\\.", colnames(mcmc_matrix), value = TRUE)
gamma_ids <- grep("^gamma\\.", colnames(mcmc_matrix), value = TRUE)
D_bar2dDP <- computeDbar2d(mcmc_matrix, pairDataCDAT, theta1_ids, theta2_ids, gamma_ids)
print(D_bar2dDP)
## [1] 1225.952
D_mean2dDP <- computeDevianceAtPosteriorMean2d(mcmc_matrix, pairDataCDAT, theta1_ids, theta2_ids, gamma_ids)
print(D_mean2dDP)
## [1] 1144.647
p_D2dDP <- D_bar2dDP - D_mean2dDP
DIC2dDP <- D_bar2dDP + p_D2dDP
print(DIC2dDP)
## [1] 1307.257
computeDbar1d <- function(mcmc_matrix, pair_data, theta_ids, alpha_ids) {
# Extract item and judge IDs
itemj1 <- as.character(pair_data$itemj1)
itemj2 <- as.character(pair_data$itemj2)
choice <- pair_data$choice
rater <- as.character(pair_data$rater)
S <- nrow(mcmc_matrix)
N <- nrow(pair_data)
deviance_vec <- numeric(S)
for (s in 1:S) {
# Extract current parameter values
theta <- setNames(mcmc_matrix[s, theta_ids], sub("theta\\.", "", theta_ids))
alpha <- setNames(mcmc_matrix[s, alpha_ids], sub("alpha\\.", "", alpha_ids))
eta <- numeric(N)
for (n in 1:N) {
j1 <- itemj1[n]
j2 <- itemj2[n]
r <- rater[n]
a <- alpha[r]
if (is.na(a)) a <- mean(alpha, na.rm = TRUE)
diff <- theta[j2] - theta[j1]
eta[n] <- a * diff
}
p <- pnorm(eta)
p <- pmin(pmax(p, 1e-10), 1 - 1e-10) # avoid log(0)
is_j2_chosen <- choice == pair_data$itemj2
loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
deviance_vec[s] <- -2 * sum(loglik)
}
mean(deviance_vec)
}
Use that function:
mcmc_matrix <- do.call(rbind, mcmc_list1d) # combine all chains
theta_ids <- grep("^theta\\.", colnames(mcmc_matrix), value = TRUE)
alpha_ids <- grep("^alpha\\.", colnames(mcmc_matrix), value = TRUE)
D_bar1d <- computeDbar1d(mcmc_matrix, pairDataCDAT, theta_ids, alpha_ids)
print(D_bar1d)
## [1] 1052.434
Now a function to compute the deviance at the posterior mean parameters
computeDevianceAtPosteriorMean1d <- function(mcmc_matrix, pair_data, theta_ids, alpha_ids) {
# Compute posterior means
theta_mean <- setNames(colMeans(mcmc_matrix[, theta_ids]), sub("theta\\.", "", theta_ids))
alpha_mean <- setNames(colMeans(mcmc_matrix[, alpha_ids]), sub("alpha\\.", "", alpha_ids))
itemj1 <- as.character(pair_data$itemj1)
itemj2 <- as.character(pair_data$itemj2)
choice <- pair_data$choice
rater <- as.character(pair_data$rater)
N <- nrow(pair_data)
eta <- numeric(N)
for (n in 1:N) {
j1 <- itemj1[n]
j2 <- itemj2[n]
r <- rater[n]
a <- alpha_mean[r]
if (is.na(a)) a <- mean(alpha_mean, na.rm = TRUE)
diff <- theta_mean[j2] - theta_mean[j1]
eta[n] <- a * diff
}
p <- pnorm(eta)
p <- pmin(pmax(p, 1e-10), 1 - 1e-10)
is_j2_chosen <- choice == pair_data$itemj2
loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
D_at_mean <- -2 * sum(loglik)
return(D_at_mean)
}
Use the function, with the same variables passed to it as created in previous step.
D_mean1d <- computeDevianceAtPosteriorMean1d(mcmc_matrix, pairDataCDAT, theta_ids, alpha_ids)
print(D_mean1d)
## [1] 1026.717
As a final step find the difference between twice the D_bar and the D_mean:
p_D1d <- D_bar1d - D_mean1d
DIC1d <- D_bar1d + p_D1d
print(DIC1d)
## [1] 1078.151
# ---- Output comparison table ----
dic_table <- data.frame(
Model = c("1d", "2d", "2dDP"),
DIC = c(DIC1d, DIC2d, DIC2dDP),
D_bar = c(D_bar1d, D_bar2d, D_bar2dDP),
D_mean = c(D_mean1d, D_mean1d, D_mean2dDP),
p_D = c(p_D1d, p_D2d, p_D2dDP)
)
print(dic_table)
## Model DIC D_bar D_mean p_D
## 1 1d 1078.151 1052.434 1026.717 25.71714
## 2 2d 1319.207 1239.919 1026.717 79.28785
## 3 2dDP 1307.257 1225.952 1144.647 81.30483
The 1d model has the lowest DIC by far, but is less expressive. This statistic considers the trade off between fit and complexity, so even the high complexity of the 2d models reduces the DIC. Between the two 2d models, the additional parameters of the clustering does not diminish the fit of the model by DIC. The 2d and 2dDP models’ parameters have been shown above to be good at predicting the choices of raters when given the two items in a pairwise comparison.
Reflection: It would be good to reconsider the PPMC variables of interest, as they seem to conflict with the Deviance Information Criteria.
Even with the relatively sparse data set of comparisons, the model 2d DP works very well to explain the preferences of raters and the placement of items in 2d latent space.
Levy, R., & Mislevy, R. J. (2016). Bayesian psychometric modeling. Chapman & Hall/CRC.
Yu, Q., & Quinn, K. M. (2022). A multidimensional pairwise comparison model for heterogeneous perceptions with an application to modelling the perceived truthfulness of public statements on COVID‐19. Journal of the Royal Statistical Society. Series A, (Statistics in Society), 10.1111/rssa.12810. https://doi.org/10.1111/rssa.12810