# Clear the environment and load packages
rm(list = ls())
library(MCMCpack)
library(bayesplot)
library(coda)
library(ggplot2)
library(tidyverse)
library(psych)
setwd("~/Dropbox/IU25SP-EDUC-Y617 Psychometric Theory/Psychometric Theory Final Project/Paired Comparison MCMC CDAT")
data.folder <- "Data"
load(file.path(data.folder, "pairDataCDAT.Rda"))
# Prepare for use in MCMCpack
colnames(pairDataCDAT) <- c("rater","itemj1", "itemj2","choice")
# Prepare the data for easy matching to student responses as PDFs labeled by 4-digit ID
# Create a named vector for the mapping: names are the original IDs and values are the new numbers.
lookup <- c("60" = 1121, "61" = 1154, "62" = 1222, "63" = 1637,
"64" = 1995, "65" = 3369, "66" = 4004, "67" = 4209,
"68" = 4268, "69" = 4841, "70" = 5074, "71" = 5495,
"72" = 5710, "73" = 8294, "74" = 8980, "75" = 9899)
# Map the itemj1 and itemj2 columns using the lookup vector.
pairDataCDAT$itemj1 <- lookup[as.character(pairDataCDAT$itemj1)]
pairDataCDAT$itemj2 <- lookup[as.character(pairDataCDAT$itemj2)]
pairDataCDAT$choice <- lookup[as.character(pairDataCDAT$choice)]
# Sort for easy matching in theta.start matrix below
pairDataCDAT <- pairDataCDAT[order(pairDataCDAT$itemj1), ]
save(pairDataCDAT, file = file.path(data.folder, "pairDataCDAT.1.Rda"))
n_chains <- 3 # Set the number of chains
set.seed(1234) # Set seed for randomized starting values
I <- length(unique(pairDataCDAT$rater))
The vector γ represents each respondent’s perceptual orientation in the 2D latent space in radians. Initially, all respondents are assigned a value of π/4π/4 (45° in radians). The second line randomizes the starting values within a small range [π/4−0.4,π/4+0.4][π/4−0.4,π/4+0.4] to introduce slight variation.
gamma_start <- rep(pi/4,I)
# If used, this would replace the constant with I random values from a uniform distribution between 0 and π/2,
# using a broader random initialization instead of the current approach, which uses a small random variation around π/4.
# runif(I)*pi/2.
gamma_start <- runif(I, pi/4-.4, pi/4+.4)
Thetas start at 0 for the one-dimensional model
theta_start1d <- matrix(0, 16, 1) # All start at 0 in the model's probit units
Create the empty starting \(\theta\) matrix for 2d models
theta_start2d <- matrix(0, 16, 2) # 16 scripts, two dimensions
# Creates a 16×2 matrix (theta_start2d) to store the latent attributes of statements along two dimensions. Initially, all values are set to 0.
Find the matching indices for the anchor scripts in the matrix by viewing them in order.
unique(pairDataCDAT$itemj1)
## [1] 1121 1154 1222 1637 1995 3369 4004 4209 4268 4841 5074 5495 5710 8294 8980
## [16] 9899
Set initial values for two anchor items whose place in the matrix is determined above.
# These values anchor certain scripts in the 2d latent space and help define the rotation.
theta_start2d[11,1] <- 1 ## script 5074 is interpreted as having high creative elaboration
theta_start2d[11,2] <- -1 ## script 5074 is interpreted as having low conventional representation
theta_start2d[12,1] <- -1 ## script 5495 is interpreted as having low creative elaboration
theta_start2d[12,2] <- 1 ## script 5495 is interpreted as having high conventional representation
seeds <- c(0202, 8018, 1304) # Set seeds for each chain
chains1d <- vector("list", n_chains)
for (i in 1:n_chains) {
chains1d[[i]] <- MCMCpaircompare(
pairDataCDAT,
verbose=5000, # Prints status updates every __ iterations.
burnin=1000, # Discards the first __ samples to stabilize.
mcmc=9000, # Runs __ iterations for estimation.
thin=20, # Keeps every __th sample to reduce autocorrelation.
seed=seeds[i], # Ensures reproducibility of results.
alpha.fixed=FALSE,
alpha.start=1,
a=0, # The default prio mean of alpha is 0
A=.25, # The default prior precision is 0.25 (variance of 4)
store.theta=TRUE, # Saves item-specfic draws.
store.alpha=TRUE, # Saves both rater-specific draws.
tune=.3, # Sets the tuning parameter for the Metropolis-Hastings step.
procrustes=FALSE # Disables Procrustes alignment
)
}
##
##
## MCMCpaircompare iteration 1 of 10000
##
##
## MCMCpaircompare iteration 5001 of 10000
##
##
## MCMCpaircompare iteration 1 of 10000
##
##
## MCMCpaircompare iteration 5001 of 10000
##
##
## MCMCpaircompare iteration 1 of 10000
##
##
## MCMCpaircompare iteration 5001 of 10000
# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list1d <- mcmc.list(lapply(chains1d, mcmc))
# Save object
save(mcmc_list1d, file = "Out/mcmc_list1d.Rdata")
# Create a list of parameters for each type of parameter
thetas <- grep("^theta", colnames(mcmc_list1d[[1]]), value = TRUE)
alphas <- grep("^alpha", colnames(mcmc_list1d[[1]]), value = TRUE)
# For 1D model
gelman_diag_1d <- gelman.diag(mcmc_list1d)
max_rhat_1d <- max(gelman_diag_1d$psrf[, "Point est."])
gelman_diag_1d
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## theta.1121 1.00 1.01
## theta.1154 1.02 1.05
## theta.1222 1.00 1.01
## theta.1637 1.01 1.02
## theta.1995 1.01 1.03
## theta.3369 1.01 1.03
## theta.4004 1.01 1.01
## theta.4209 1.01 1.02
## theta.4268 1.00 1.01
## theta.4841 1.01 1.04
## theta.5074 1.01 1.03
## theta.5495 1.02 1.05
## theta.5710 1.02 1.06
## theta.8294 1.01 1.02
## theta.8980 1.00 1.01
## theta.9899 1.00 1.01
## alpha.1 1.00 1.01
## alpha.25 1.01 1.02
## alpha.26 1.02 1.07
## alpha.27 1.02 1.06
## alpha.28 1.01 1.04
## alpha.29 1.02 1.08
## alpha.30 1.00 1.01
## alpha.31 1.00 1.01
## alpha.34 1.00 1.01
## alpha.35 1.00 1.01
## alpha.36 1.01 1.03
## alpha.37 1.00 1.02
## alpha.38 1.00 1.01
##
## Multivariate psrf
##
## 1.05
View a summary
summary1d <- round(
cbind(
as.data.frame(summary(mcmc_list1d)$statistics),
as.data.frame(summary(mcmc_list1d)$quantiles)
), 3)
summary1d$Model <- "1d"
save(summary1d, file = "Out/summary1d.Rdata")
summary1d
## Mean SD Naive SE Time-series SE 2.5% 25% 50% 75%
## theta.1121 -0.068 0.272 0.007 0.024 -0.583 -0.255 -0.077 0.111
## theta.1154 -0.923 0.341 0.009 0.024 -1.614 -1.153 -0.915 -0.682
## theta.1222 0.167 0.266 0.007 0.023 -0.304 -0.026 0.153 0.350
## theta.1637 0.069 0.267 0.007 0.022 -0.411 -0.119 0.060 0.247
## theta.1995 -0.451 0.293 0.008 0.023 -1.003 -0.657 -0.450 -0.260
## theta.3369 1.803 0.476 0.013 0.027 0.955 1.474 1.802 2.095
## theta.4004 0.417 0.287 0.008 0.022 -0.121 0.215 0.397 0.625
## theta.4209 -0.190 0.274 0.007 0.023 -0.691 -0.383 -0.195 -0.006
## theta.4268 0.324 0.273 0.007 0.023 -0.171 0.132 0.317 0.503
## theta.4841 -0.524 0.295 0.008 0.024 -1.121 -0.719 -0.529 -0.339
## theta.5074 -0.115 0.272 0.007 0.023 -0.650 -0.307 -0.118 0.070
## theta.5495 -0.796 0.334 0.009 0.024 -1.449 -1.019 -0.793 -0.555
## theta.5710 -0.981 0.343 0.009 0.023 -1.660 -1.223 -0.958 -0.738
## theta.8294 0.203 0.272 0.007 0.022 -0.320 0.014 0.187 0.395
## theta.8980 0.750 0.309 0.008 0.021 0.183 0.527 0.738 0.948
## theta.9899 0.563 0.294 0.008 0.021 0.027 0.354 0.555 0.762
## alpha.1 2.719 0.813 0.022 0.037 1.393 2.156 2.636 3.180
## alpha.25 1.129 0.365 0.010 0.014 0.541 0.886 1.084 1.321
## alpha.26 1.876 0.578 0.016 0.026 0.937 1.455 1.810 2.233
## alpha.27 1.781 0.526 0.014 0.023 0.942 1.403 1.703 2.107
## alpha.28 0.978 0.339 0.009 0.014 0.438 0.737 0.934 1.175
## alpha.29 0.833 0.317 0.009 0.013 0.322 0.608 0.795 1.014
## alpha.30 0.705 0.277 0.008 0.010 0.268 0.502 0.681 0.882
## alpha.31 0.336 0.201 0.005 0.006 -0.025 0.205 0.319 0.449
## alpha.34 1.035 0.341 0.009 0.012 0.500 0.802 0.994 1.230
## alpha.35 0.473 0.213 0.006 0.007 0.116 0.326 0.450 0.596
## alpha.36 1.000 0.363 0.010 0.012 0.416 0.740 0.956 1.214
## alpha.37 1.022 0.366 0.010 0.014 0.461 0.752 0.985 1.214
## alpha.38 0.606 0.240 0.007 0.008 0.200 0.437 0.590 0.745
## 97.5% Model
## theta.1121 0.499 1d
## theta.1154 -0.284 1d
## theta.1222 0.702 1d
## theta.1637 0.599 1d
## theta.1995 0.131 1d
## theta.3369 2.786 1d
## theta.4004 0.999 1d
## theta.4209 0.379 1d
## theta.4268 0.877 1d
## theta.4841 0.080 1d
## theta.5074 0.445 1d
## theta.5495 -0.166 1d
## theta.5710 -0.352 1d
## theta.8294 0.745 1d
## theta.8980 1.367 1d
## theta.9899 1.170 1d
## alpha.1 4.639 1d
## alpha.25 1.950 1d
## alpha.26 3.148 1d
## alpha.27 3.023 1d
## alpha.28 1.760 1d
## alpha.29 1.519 1d
## alpha.30 1.344 1d
## alpha.31 0.791 1d
## alpha.34 1.864 1d
## alpha.35 0.954 1d
## alpha.36 1.801 1d
## alpha.37 1.900 1d
## alpha.38 1.161 1d
Consider thinning less than 50.
Is there a way to back up that the model statistically speaking is
seeds <- c(1304, 2406, 7916) # Set seeds for each chain
chains2d <- vector("list", n_chains)
for (i in 1:n_chains) {
chains2d[[i]] <- MCMCpaircompare2d(
pairDataCDAT,
theta.constraints=list(
# Note that "+" = constrained to be positive and "-" = constrained to negative
# Note that "n" = fixed value if desired
# Constraining each item to one quadrant based on theory
'5074' = list(1, "+"),
'5074' = list(2, "-"),
'5495' = list(1, "-"),
'5495' = list(2, "+")
),
verbose=5000, # Prints status updates every __ iterations.
burnin=1000, # Discards the first __ samples to stabilize.
mcmc=10000, # Runs __ iterations for estimation.
thin=20, # Keeps every __th sample to reduce autocorrelation.
seed=seeds[i], # Ensures reproducibility of results.
gamma.start=gamma_start, # Uses the initialized respondent perception values.
theta.start=theta_start2d,
store.theta=TRUE, # Saves item-specfic draws.
store.gamma=TRUE, # Saves both rater-specific draws.
tune=.3, # Sets the tuning parameter for the Metropolis-Hastings step.
procrustes=FALSE # Disables Procrustes alignment
)
}
##
##
## MCMCpaircompare2d iteration 1 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 5001 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 10001 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 1 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 5001 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 10001 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 1 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 5001 of 11000
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 10001 of 11000
##
## Summary of acceptance rates for gamma:
# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list2d <- mcmc.list(lapply(chains2d, mcmc))
#Save object
save(mcmc_list2d, file = "Out/mcmc_list2d.Rdata")
# Create a list of parameters for each type of parameter
thetas <- grep("^theta", colnames(mcmc_list2d[[1]]), value = TRUE)
alpha <- grep("^alpha", colnames(mcmc_list2d[[1]]), value = TRUE)
# For 2D model
gelman_diag_2d <- gelman.diag(mcmc_list2d)
max_rhat_2d <- max(gelman_diag_2d$psrf[, "Point est."])
gelman_diag_2d
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## theta1.1121 1.007 1.029
## theta1.1154 1.021 1.075
## theta1.1222 0.998 0.999
## theta1.1637 1.007 1.027
## theta1.1995 0.999 1.001
## theta1.3369 1.001 1.007
## theta1.4004 0.998 1.000
## theta1.4209 1.000 1.003
## theta1.4268 1.002 1.010
## theta1.4841 1.001 1.009
## theta1.5074 1.009 1.030
## theta1.5495 1.005 1.024
## theta1.5710 1.016 1.056
## theta1.8294 0.999 1.002
## theta1.8980 0.999 0.999
## theta1.9899 1.005 1.015
## theta2.1121 1.016 1.057
## theta2.1154 0.999 1.000
## theta2.1222 1.012 1.042
## theta2.1637 1.003 1.016
## theta2.1995 1.029 1.105
## theta2.3369 1.002 1.008
## theta2.4004 1.002 1.007
## theta2.4209 1.004 1.019
## theta2.4268 1.001 1.004
## theta2.4841 1.009 1.037
## theta2.5074 1.006 1.025
## theta2.5495 1.004 1.019
## theta2.5710 1.008 1.027
## theta2.8294 1.003 1.016
## theta2.8980 1.004 1.014
## theta2.9899 1.002 1.008
## gamma.1 1.008 1.034
## gamma.25 1.010 1.037
## gamma.26 1.002 1.010
## gamma.27 1.004 1.018
## gamma.28 1.007 1.025
## gamma.29 1.008 1.032
## gamma.30 1.001 1.006
## gamma.31 1.017 1.049
## gamma.34 1.011 1.044
## gamma.35 1.006 1.020
## gamma.36 1.002 1.010
## gamma.37 1.002 1.006
## gamma.38 1.000 1.005
##
## Multivariate psrf
##
## 1.09
View a summary of the 2d model parameter posterior distributions
summary2d <- round(
cbind(
as.data.frame(summary(mcmc_list2d)$statistics),
as.data.frame(summary(mcmc_list2d)$quantiles)
), 3)
summary2d$Model <- "2d"
save(summary2d, file="Out/summary2d.Rdata")
summary2d
## Mean SD Naive SE Time-series SE 2.5% 25% 50% 75%
## theta1.1121 1.257 0.394 0.010 0.015 0.545 0.982 1.250 1.518
## theta1.1154 -0.012 0.376 0.010 0.014 -0.722 -0.260 -0.029 0.233
## theta1.1222 0.611 0.355 0.009 0.013 -0.075 0.370 0.601 0.846
## theta1.1637 0.607 0.366 0.009 0.015 -0.101 0.379 0.594 0.833
## theta1.1995 0.851 0.408 0.011 0.016 0.112 0.559 0.833 1.111
## theta1.3369 1.685 0.459 0.012 0.015 0.835 1.375 1.683 1.989
## theta1.4004 -0.793 0.378 0.010 0.014 -1.524 -1.057 -0.789 -0.514
## theta1.4209 -1.227 0.402 0.010 0.017 -2.024 -1.496 -1.219 -0.952
## theta1.4268 -0.839 0.395 0.010 0.013 -1.636 -1.101 -0.834 -0.553
## theta1.4841 -0.928 0.377 0.010 0.013 -1.668 -1.166 -0.919 -0.681
## theta1.5074 1.136 0.392 0.010 0.014 0.413 0.865 1.133 1.394
## theta1.5495 -2.312 0.442 0.011 0.017 -3.184 -2.601 -2.307 -2.022
## theta1.5710 -0.922 0.393 0.010 0.013 -1.687 -1.190 -0.921 -0.654
## theta1.8294 -0.130 0.353 0.009 0.012 -0.822 -0.369 -0.131 0.115
## theta1.8980 0.692 0.381 0.010 0.013 -0.082 0.426 0.697 0.946
## theta1.9899 0.438 0.370 0.010 0.012 -0.290 0.184 0.438 0.689
## theta2.1121 -1.159 0.361 0.009 0.014 -1.913 -1.367 -1.149 -0.905
## theta2.1154 -1.532 0.345 0.009 0.012 -2.216 -1.773 -1.533 -1.296
## theta2.1222 -0.145 0.328 0.008 0.013 -0.804 -0.364 -0.134 0.083
## theta2.1637 -0.491 0.331 0.009 0.012 -1.140 -0.704 -0.498 -0.268
## theta2.1995 -1.240 0.357 0.009 0.014 -1.953 -1.475 -1.242 -1.007
## theta2.3369 1.259 0.376 0.010 0.013 0.505 1.022 1.274 1.500
## theta2.4004 1.376 0.338 0.009 0.012 0.721 1.142 1.372 1.594
## theta2.4209 0.769 0.334 0.009 0.011 0.144 0.531 0.765 0.990
## theta2.4268 1.206 0.346 0.009 0.012 0.552 0.968 1.205 1.431
## theta2.4841 -0.251 0.332 0.009 0.012 -0.878 -0.477 -0.256 -0.028
## theta2.5074 -1.047 0.349 0.009 0.013 -1.726 -1.276 -1.043 -0.821
## theta2.5495 0.578 0.335 0.009 0.012 0.043 0.322 0.543 0.805
## theta2.5710 -0.840 0.343 0.009 0.012 -1.519 -1.072 -0.833 -0.610
## theta2.8294 0.740 0.321 0.008 0.011 0.145 0.512 0.743 0.953
## theta2.8980 0.909 0.350 0.009 0.013 0.211 0.670 0.910 1.162
## theta2.9899 0.661 0.347 0.009 0.013 -0.036 0.431 0.667 0.892
## gamma.1 0.871 0.128 0.003 0.005 0.613 0.793 0.872 0.953
## gamma.25 0.720 0.138 0.004 0.006 0.434 0.632 0.722 0.812
## gamma.26 0.832 0.125 0.003 0.006 0.579 0.747 0.836 0.922
## gamma.27 0.839 0.123 0.003 0.005 0.591 0.760 0.841 0.924
## gamma.28 0.451 0.138 0.004 0.005 0.184 0.355 0.453 0.550
## gamma.29 0.914 0.137 0.004 0.006 0.641 0.819 0.916 1.009
## gamma.30 0.308 0.147 0.004 0.006 0.044 0.200 0.301 0.410
## gamma.31 0.077 0.065 0.002 0.002 0.002 0.026 0.061 0.110
## gamma.34 0.942 0.143 0.004 0.006 0.639 0.852 0.944 1.037
## gamma.35 1.347 0.140 0.004 0.004 1.039 1.260 1.365 1.455
## gamma.36 1.314 0.124 0.003 0.004 1.061 1.234 1.319 1.409
## gamma.37 1.329 0.115 0.003 0.004 1.081 1.250 1.336 1.412
## gamma.38 1.451 0.087 0.002 0.003 1.245 1.396 1.468 1.521
## 97.5% Model
## theta1.1121 2.054 2d
## theta1.1154 0.759 2d
## theta1.1222 1.327 2d
## theta1.1637 1.350 2d
## theta1.1995 1.689 2d
## theta1.3369 2.581 2d
## theta1.4004 -0.096 2d
## theta1.4209 -0.452 2d
## theta1.4268 -0.115 2d
## theta1.4841 -0.225 2d
## theta1.5074 1.922 2d
## theta1.5495 -1.486 2d
## theta1.5710 -0.139 2d
## theta1.8294 0.529 2d
## theta1.8980 1.436 2d
## theta1.9899 1.151 2d
## theta2.1121 -0.512 2d
## theta2.1154 -0.868 2d
## theta2.1222 0.473 2d
## theta2.1637 0.140 2d
## theta2.1995 -0.556 2d
## theta2.3369 1.986 2d
## theta2.4004 2.051 2d
## theta2.4209 1.419 2d
## theta2.4268 1.897 2d
## theta2.4841 0.411 2d
## theta2.5074 -0.363 2d
## theta2.5495 1.288 2d
## theta2.5710 -0.161 2d
## theta2.8294 1.390 2d
## theta2.8980 1.575 2d
## theta2.9899 1.347 2d
## gamma.1 1.127 2d
## gamma.25 0.984 2d
## gamma.26 1.064 2d
## gamma.27 1.081 2d
## gamma.28 0.713 2d
## gamma.29 1.180 2d
## gamma.30 0.607 2d
## gamma.31 0.241 2d
## gamma.34 1.212 2d
## gamma.35 1.557 2d
## gamma.36 1.535 2d
## gamma.37 1.533 2d
## gamma.38 1.566 2d
seeds <- c(4610, 5712, 6814) # Set seeds for each chain
chains2dDP <- vector("list", n_chains)
for (i in 1:n_chains) {
chains2dDP[[i]] <- MCMCpaircompare2dDP(
pairDataCDAT,
# Main sampling chain defined:
theta.constraints=list(
# Note that "+" = constrained to be positive and "-" = constrained to negative
# Note that "n" = fixed value if desired
# Constraining each item to one quadrant based on theory
'5074' = list(1, "+"),
'5074' = list(2, "-"),
'5495' = list(1, "-"),
'5495' = list(2, "+")
),
verbose=5000, # Prints status updates every __ iterations.
burnin=5000, # Discards the first __ samples to stabilize.
mcmc=15000, # Runs __ iterations for estimation.
thin=25, # Keeps every __th sample to reduce autocorrelation.
seed=seeds[i], # Ensures reproducibility of results.
gamma.start=gamma_start, # Uses the initialized respondent perception values.
theta.start=theta_start2d,
store.theta=TRUE, # Saves item-specfic draws.
store.gamma=TRUE, # Saves both rater-specific draws.
tune=.3, # Sets the tuning parameter for the Metropolis-Hastings step.
procrustes=FALSE, # Disables Procrustes alignment
# Cluster MCMC at each stored iteration of the main chain:
alpha=1, # Sets the Dirichlet Process concentration parameter.
cluster.max=4, # Allows up to n clusters of respondents.
cluster.mcmc=100, # Runs __ iterations of MCMC for clustering at each stored iteration of main chain
alpha.fixed=FALSE, # Allows the concentration parameter to be learned from the data.
a0=1, b0=1 # Hyperparameters for the Dirichlet Process prior for cluster sampling.
)
}
##
##
## MCMCpaircompare2dDP iteration 1 of 20000
##
##
## MCMCpaircompare2dDP iteration 5001 of 20000
##
##
## MCMCpaircompare2dDP iteration 10001 of 20000
##
##
## MCMCpaircompare2dDP iteration 15001 of 20000
##
##
## MCMCpaircompare2dDP iteration 1 of 20000
##
##
## MCMCpaircompare2dDP iteration 5001 of 20000
##
##
## MCMCpaircompare2dDP iteration 10001 of 20000
##
##
## MCMCpaircompare2dDP iteration 15001 of 20000
##
##
## MCMCpaircompare2dDP iteration 1 of 20000
##
##
## MCMCpaircompare2dDP iteration 5001 of 20000
##
##
## MCMCpaircompare2dDP iteration 10001 of 20000
##
##
## MCMCpaircompare2dDP iteration 15001 of 20000
# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list2dDP <- mcmc.list(lapply(chains2dDP, mcmc))
# Save object
save(mcmc_list2dDP, file = "Out/mcmc_list2dDP.Rdata")
# For 2D-DP model
gelman_diag_2dDP <- gelman.diag(mcmc_list2dDP)
max_rhat_2dDP <- max(gelman_diag_2dDP$psrf[, "Point est."])
gelman_diag_2dDP
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## theta1.1121 1.011 1.04
## theta1.1154 1.013 1.04
## theta1.1222 1.011 1.04
## theta1.1637 1.010 1.04
## theta1.1995 1.016 1.06
## theta1.3369 1.013 1.05
## theta1.4004 1.004 1.02
## theta1.4209 1.002 1.01
## theta1.4268 1.010 1.04
## theta1.4841 1.009 1.03
## theta1.5074 1.015 1.06
## theta1.5495 1.007 1.03
## theta1.5710 1.003 1.01
## theta1.8294 1.005 1.02
## theta1.8980 1.002 1.01
## theta1.9899 1.003 1.01
## theta2.1121 1.004 1.01
## theta2.1154 1.003 1.01
## theta2.1222 1.001 1.00
## theta2.1637 0.999 1.00
## theta2.1995 1.001 1.01
## theta2.3369 1.004 1.01
## theta2.4004 1.005 1.02
## theta2.4209 1.003 1.02
## theta2.4268 1.003 1.01
## theta2.4841 1.007 1.02
## theta2.5074 1.002 1.01
## theta2.5495 1.002 1.01
## theta2.5710 1.000 1.00
## theta2.8294 1.001 1.01
## theta2.8980 1.001 1.01
## theta2.9899 1.001 1.00
## gamma.1 1.005 1.02
## gamma.25 1.001 1.00
## gamma.26 1.004 1.02
## gamma.27 1.004 1.02
## gamma.28 1.007 1.03
## gamma.29 1.002 1.01
## gamma.30 1.004 1.02
## gamma.31 1.000 1.00
## gamma.34 1.004 1.01
## gamma.35 1.007 1.02
## gamma.36 1.002 1.01
## gamma.37 1.002 1.01
## gamma.38 1.002 1.01
## cluster.1 1.018 1.05
## cluster.25 1.013 1.04
## cluster.26 1.017 1.05
## cluster.27 1.018 1.05
## cluster.28 1.039 1.14
## cluster.29 1.018 1.05
## cluster.30 1.044 1.15
## cluster.31 1.031 1.09
## cluster.34 1.016 1.04
## cluster.35 1.096 1.30
## cluster.36 1.095 1.30
## cluster.37 1.080 1.26
## cluster.38 1.092 1.29
## n.clusters 1.002 1.01
## alpha 1.006 1.03
##
## Multivariate psrf
##
## 1.14
Show model convergence statistics of all three models– across all parameters max \(\hat{R}\)
data.frame(
Model = c("1D", "2D", "2D-DP"),
Max_Rhat = c(max_rhat_1d, max_rhat_2d, max_rhat_2dDP)
)
## Model Max_Rhat
## 1 1D 1.023782
## 2 2D 1.029444
## 3 2D-DP 1.095794
summary2dDP <- round(
cbind(
as.data.frame(summary(mcmc_list2dDP)$statistics),
as.data.frame(summary(mcmc_list2dDP)$quantiles)
), 3)
summary2dDP$Model <- "2dDP"
save(summary2dDP, file = "Out/summary2dDP.Rdata")
summary2dDP
## Mean SD Naive SE Time-series SE 2.5% 25% 50% 75%
## theta1.1121 1.214 0.403 0.009 0.013 0.477 0.938 1.189 1.477
## theta1.1154 0.020 0.412 0.010 0.014 -0.736 -0.270 -0.003 0.304
## theta1.1222 0.680 0.353 0.008 0.010 0.013 0.443 0.672 0.906
## theta1.1637 0.623 0.350 0.008 0.011 -0.027 0.380 0.614 0.852
## theta1.1995 0.770 0.428 0.010 0.015 -0.015 0.467 0.754 1.048
## theta1.3369 1.601 0.453 0.011 0.013 0.729 1.297 1.597 1.910
## theta1.4004 -0.748 0.414 0.010 0.014 -1.614 -1.024 -0.735 -0.467
## theta1.4209 -1.267 0.401 0.009 0.013 -2.040 -1.536 -1.265 -1.002
## theta1.4268 -0.768 0.446 0.011 0.015 -1.699 -1.048 -0.744 -0.458
## theta1.4841 -0.866 0.384 0.009 0.011 -1.647 -1.120 -0.865 -0.610
## theta1.5074 1.172 0.392 0.009 0.013 0.446 0.902 1.165 1.429
## theta1.5495 -2.329 0.460 0.011 0.016 -3.289 -2.611 -2.319 -2.019
## theta1.5710 -0.831 0.390 0.009 0.011 -1.578 -1.085 -0.835 -0.574
## theta1.8294 -0.169 0.374 0.009 0.012 -0.955 -0.415 -0.159 0.083
## theta1.8980 0.697 0.385 0.009 0.012 -0.072 0.431 0.700 0.965
## theta1.9899 0.483 0.378 0.009 0.012 -0.232 0.235 0.481 0.743
## theta2.1121 -1.064 0.370 0.009 0.012 -1.812 -1.308 -1.053 -0.807
## theta2.1154 -1.532 0.357 0.008 0.010 -2.215 -1.763 -1.534 -1.291
## theta2.1222 -0.162 0.322 0.008 0.010 -0.825 -0.377 -0.156 0.060
## theta2.1637 -0.483 0.332 0.008 0.011 -1.156 -0.700 -0.493 -0.257
## theta2.1995 -1.141 0.354 0.008 0.010 -1.849 -1.373 -1.127 -0.893
## theta2.3369 1.357 0.402 0.009 0.012 0.532 1.104 1.373 1.633
## theta2.4004 1.320 0.326 0.008 0.009 0.696 1.100 1.310 1.528
## theta2.4209 0.787 0.326 0.008 0.010 0.188 0.561 0.773 1.002
## theta2.4268 1.129 0.326 0.008 0.009 0.513 0.905 1.122 1.353
## theta2.4841 -0.306 0.324 0.008 0.009 -0.906 -0.534 -0.319 -0.093
## theta2.5074 -1.022 0.369 0.009 0.012 -1.777 -1.252 -1.020 -0.759
## theta2.5495 0.532 0.347 0.008 0.011 0.034 0.267 0.482 0.732
## theta2.5710 -0.909 0.338 0.008 0.009 -1.565 -1.133 -0.918 -0.694
## theta2.8294 0.776 0.317 0.007 0.009 0.162 0.564 0.763 0.994
## theta2.8980 0.930 0.337 0.008 0.010 0.283 0.707 0.922 1.157
## theta2.9899 0.650 0.337 0.008 0.011 -0.026 0.429 0.662 0.876
## gamma.1 0.861 0.134 0.003 0.005 0.595 0.774 0.867 0.956
## gamma.25 0.840 0.150 0.004 0.006 0.515 0.752 0.852 0.948
## gamma.26 0.861 0.133 0.003 0.006 0.595 0.774 0.866 0.955
## gamma.27 0.861 0.133 0.003 0.006 0.595 0.775 0.867 0.956
## gamma.28 0.386 0.211 0.005 0.008 0.025 0.222 0.398 0.531
## gamma.29 0.863 0.133 0.003 0.005 0.595 0.775 0.869 0.957
## gamma.30 0.299 0.205 0.005 0.008 0.012 0.115 0.279 0.467
## gamma.31 0.129 0.102 0.002 0.002 0.005 0.047 0.105 0.185
## gamma.34 0.865 0.137 0.003 0.006 0.595 0.776 0.869 0.959
## gamma.35 1.424 0.110 0.003 0.004 1.164 1.365 1.444 1.510
## gamma.36 1.423 0.110 0.003 0.004 1.164 1.362 1.444 1.510
## gamma.37 1.423 0.112 0.003 0.004 1.164 1.360 1.443 1.511
## gamma.38 1.428 0.106 0.002 0.004 1.175 1.370 1.446 1.511
## cluster.1 2.260 1.184 0.028 0.117 1.000 1.000 2.000 3.000
## cluster.25 2.283 1.180 0.028 0.110 1.000 1.000 2.000 3.000
## cluster.26 2.253 1.188 0.028 0.116 1.000 1.000 2.000 3.000
## cluster.27 2.269 1.188 0.028 0.117 1.000 1.000 2.000 3.000
## cluster.28 2.527 1.084 0.026 0.066 1.000 2.000 3.000 3.000
## cluster.29 2.266 1.187 0.028 0.116 1.000 1.000 2.000 3.000
## cluster.30 2.569 1.069 0.025 0.068 1.000 2.000 3.000 3.000
## cluster.31 2.638 1.049 0.025 0.070 1.000 2.000 3.000 4.000
## cluster.34 2.266 1.180 0.028 0.115 1.000 1.000 2.000 3.000
## cluster.35 2.517 1.155 0.027 0.114 1.000 1.000 2.000 4.000
## cluster.36 2.513 1.151 0.027 0.110 1.000 1.000 2.000 4.000
## cluster.37 2.508 1.151 0.027 0.113 1.000 1.000 2.000 4.000
## cluster.38 2.509 1.147 0.027 0.117 1.000 1.000 2.000 4.000
## n.clusters 3.817 0.387 0.009 0.010 3.000 4.000 4.000 4.000
## alpha 1.668 1.029 0.024 0.029 0.339 0.957 1.435 2.143
## 97.5% Model
## theta1.1121 2.066 2dDP
## theta1.1154 0.858 2dDP
## theta1.1222 1.426 2dDP
## theta1.1637 1.323 2dDP
## theta1.1995 1.634 2dDP
## theta1.3369 2.471 2dDP
## theta1.4004 0.028 2dDP
## theta1.4209 -0.494 2dDP
## theta1.4268 0.015 2dDP
## theta1.4841 -0.112 2dDP
## theta1.5074 1.993 2dDP
## theta1.5495 -1.454 2dDP
## theta1.5710 -0.076 2dDP
## theta1.8294 0.546 2dDP
## theta1.8980 1.443 2dDP
## theta1.9899 1.222 2dDP
## theta2.1121 -0.378 2dDP
## theta2.1154 -0.827 2dDP
## theta2.1222 0.449 2dDP
## theta2.1637 0.154 2dDP
## theta2.1995 -0.481 2dDP
## theta2.3369 2.104 2dDP
## theta2.4004 1.986 2dDP
## theta2.4209 1.456 2dDP
## theta2.4268 1.775 2dDP
## theta2.4841 0.379 2dDP
## theta2.5074 -0.332 2dDP
## theta2.5495 1.362 2dDP
## theta2.5710 -0.222 2dDP
## theta2.8294 1.394 2dDP
## theta2.8980 1.605 2dDP
## theta2.9899 1.301 2dDP
## gamma.1 1.115 2dDP
## gamma.25 1.096 2dDP
## gamma.26 1.115 2dDP
## gamma.27 1.115 2dDP
## gamma.28 0.802 2dDP
## gamma.29 1.115 2dDP
## gamma.30 0.700 2dDP
## gamma.31 0.386 2dDP
## gamma.34 1.121 2dDP
## gamma.35 1.565 2dDP
## gamma.36 1.565 2dDP
## gamma.37 1.565 2dDP
## gamma.38 1.565 2dDP
## cluster.1 4.000 2dDP
## cluster.25 4.000 2dDP
## cluster.26 4.000 2dDP
## cluster.27 4.000 2dDP
## cluster.28 4.000 2dDP
## cluster.29 4.000 2dDP
## cluster.30 4.000 2dDP
## cluster.31 4.000 2dDP
## cluster.34 4.000 2dDP
## cluster.35 4.000 2dDP
## cluster.36 4.000 2dDP
## cluster.37 4.000 2dDP
## cluster.38 4.000 2dDP
## n.clusters 4.000 2dDP
## alpha 4.268 2dDP
With a max of 5 clusters and 100 iterations for a cluster at each stored iteration if the main MCMC chain, the final number of clusters matches the visual plot of gammas perfectly.
From Levy & Mislevy (2016) Ch. 10.2.4 & 10.2.5Posterior Predictive Model Checking
The common feature across the 2-dimensional models are (a) posterior thetas of each item’s location in 2-dimensional latent space and precision of that distribution; and (b) the posterior gammas of each rater.
The common feature of all models is the choice of one item over another based on th model-implied latent traits.
The prediction of choices based on model-implied latent variables should be close obserbed choices.
The selection of the left item (j1) over the second (j2) is the test statistic to be examined with Posterior Predictive Model Checking.
(See Levy & Mislevy, 2016, pp. 237-239).
First, a function that will - combine all chains into one list of samples - iterate 200 sets of simulated choices based on comparisons that match the rater and theta IDs of the observed comparisons - for each iteration draw one value from all chains of \(\theta_{1}\), \(\theta_{2}\), and \(\gamma\). - for each comparison of the 960 observations, extract the two items and the rater for a simulated choice. - find the difference between the two items’ thetas according to the model - add the two thetas in proportion to the gamma for the variable \(\eta\) - use eta as input to the probit link function to find the probability of choosing j1 over j2 - make a random draw of 1 or 0 governed by the binomial distribution and probability computed from eta - add the 1 or 0 to a matrix of choices of j1 over j2 - return the simulated choices for the 960 comparisons 200 times
simulate_pp_data_mcmclist2d <- function(mcmc_list, pair_data) {
# Combine chains into one matrix
all_samples <- do.call(rbind, mcmc_list)
n_obs <- nrow(pair_data)
n_rep = 200
sim_choices <- matrix(NA, nrow = n_rep, ncol = n_obs)
# Identify column names for theta1, theta2, gamma
theta1_names <- grep("^theta1\\.", colnames(all_samples), value = TRUE)
theta2_names <- grep("^theta2\\.", colnames(all_samples), value = TRUE)
gamma_names <- grep("^gamma\\.", colnames(all_samples), value = TRUE)
for (r in 1:n_rep) { # Row by row iteration
s <- sample(1:nrow(all_samples), 1) # Draw from any chain
theta1 <- all_samples[s, theta1_names] # Draw one value for theta1
theta2 <- all_samples[s, theta2_names] # Draw one value for theta2
gamma <- all_samples[s, gamma_names] # Draw one value for gamma
for (i in 1:n_obs) { # Column by column iteration
j1 <- as.character(pair_data$itemj1[i]) # Locate the first item number for that row from the observed
j2 <- as.character(pair_data$itemj2[i]) # Locate the second item number for that row from the observed
rater <- as.character(pair_data$rater[i]) # Locate the rater for that row from the observed
# Find the model-implied diff between sampled theta1 of 2 items
diff1 <- theta1[paste0("theta1.", j1)] - theta1[paste0("theta1.", j2)]
# Find the model-implied diff between sampled theta2 of 2 items
diff2 <- theta2[paste0("theta2.", j1)] - theta2[paste0("theta2.", j2)]
g <- gamma[paste0("gamma.", rater)] # Find the model-implied sampled gamma of the rater
# eta will be the input to the probit link function
# cos() & sin() assume angles are in radians as they are
zi1 <- cos(g)
zi2 <- sin(g)
# Add thetas in proportion to sine or cosine of gamma, representing how much more favorable j1 is than j2
eta <- zi1 * diff1 + zi2 * diff2
# If eta is negative, then j2 is preferrable. If close to 0, then it's a toss-up.
p <- pnorm(eta) # transform eta using the probit link to the probability j1 is chosen over j2.
# If p is <.5 preference for j2, p = .5 no preference, p > .5 preference for j1.
# Generate 1 draw of 0 or 1 based on p, and add it to the sim_choices matrix
sim_choices[r, i] <- rbinom(n = 1, size = 1, p)
}
}
# return a set of binary choices where 1 is a choice of j1 and 0 is a choice of j2
return(sim_choices)
}
Run the function to simulate 200 sets of observed choices for each 2d model Each data set is a row in a matrix 960 x 200
First simulated data sets for the 2d without clusters
sim_2d <- simulate_pp_data_mcmclist2d(mcmc_list2d, pairDataCDAT)
Next simulate data sets for the 2d with clusters
sim_2dDP <- simulate_pp_data_mcmclist2d(mcmc_list2dDP, pairDataCDAT)
Convert observed item ID choices to 1/0 for comparison to simulated data. If itemj1 is chosen, then 1, if j2 is chosen, then 0.
#If choice is first item, 1, if choice is second item, 0
observed_binary <- ifelse(pairDataCDAT$choice == pairDataCDAT$itemj1, 1,
ifelse(pairDataCDAT$choice == pairDataCDAT$itemj2, 0, NA))
A function to compute the p-value of observed and simulated choices - given the binary version of the observed data, find the mean (proportion of j1 choices) for each simulated data set - compute the proportion of simulated data sets that have greater than or equal to the proportion of j1 choices in observed
compute_ppp_proportion <- function(observed_choices, sim_choices_matrix) {
obs_prop <- mean(observed_choices)
# The mean of the row is the proportion of same choice for that simulated data set
sim_prop <- rowMeans(sim_choices_matrix)
sim_correct <- rowMeans(sim_choices_matrix == matrix(observed_choices,
nrow = nrow(sim_choices_matrix),
ncol = length(observed_choices),
byrow = TRUE))
# Posterior predictive p-value
ppp <- mean(sim_prop >= obs_prop)
list(
observed = obs_prop,
simulated = sim_prop,
simulated_correct = sim_correct,
ppp = ppp
)
}
Compute Posterior Predictive P-value (PPP) for each 2d model
ppp2d <- compute_ppp_proportion(observed_binary, sim_2d)
ppp2d
## $observed
## [1] 0.5145833
##
## $simulated
## [1] 0.5333333 0.5093750 0.5000000 0.4958333 0.4989583 0.5375000 0.5291667
## [8] 0.5354167 0.5020833 0.5083333 0.5020833 0.5177083 0.4989583 0.5197917
## [15] 0.5229167 0.5114583 0.4989583 0.5239583 0.5145833 0.5031250 0.5322917
## [22] 0.4864583 0.5093750 0.5062500 0.5156250 0.4770833 0.5270833 0.4968750
## [29] 0.5343750 0.5218750 0.5135417 0.5052083 0.5052083 0.4885417 0.4885417
## [36] 0.5114583 0.5208333 0.5031250 0.5041667 0.5156250 0.5041667 0.4802083
## [43] 0.5093750 0.4968750 0.4979167 0.4927083 0.5281250 0.4864583 0.5125000
## [50] 0.5072917 0.5208333 0.4989583 0.5156250 0.5114583 0.4895833 0.4916667
## [57] 0.5166667 0.5145833 0.4864583 0.4979167 0.4770833 0.5145833 0.5125000
## [64] 0.5010417 0.5072917 0.5052083 0.4947917 0.5093750 0.5260417 0.4875000
## [71] 0.5197917 0.4958333 0.4864583 0.5260417 0.5208333 0.4937500 0.4958333
## [78] 0.4781250 0.5020833 0.5104167 0.5322917 0.5135417 0.5083333 0.4812500
## [85] 0.5114583 0.4916667 0.5197917 0.5020833 0.5041667 0.5114583 0.5020833
## [92] 0.5197917 0.5114583 0.5072917 0.5041667 0.4822917 0.5177083 0.4947917
## [99] 0.5104167 0.5291667 0.5010417 0.5041667 0.4989583 0.5093750 0.5052083
## [106] 0.5145833 0.4729167 0.5104167 0.5156250 0.4875000 0.5208333 0.5135417
## [113] 0.4927083 0.4802083 0.5260417 0.5177083 0.5145833 0.5020833 0.5270833
## [120] 0.5145833 0.4906250 0.4875000 0.4854167 0.4916667 0.5187500 0.5260417
## [127] 0.4916667 0.5125000 0.4947917 0.5000000 0.5156250 0.4979167 0.4979167
## [134] 0.4958333 0.5156250 0.5208333 0.5395833 0.4697917 0.4958333 0.5229167
## [141] 0.4791667 0.4854167 0.5000000 0.5218750 0.4989583 0.4906250 0.5104167
## [148] 0.5020833 0.5166667 0.4958333 0.5083333 0.5031250 0.4979167 0.5197917
## [155] 0.4958333 0.5145833 0.4968750 0.5114583 0.5000000 0.5000000 0.4927083
## [162] 0.5197917 0.4989583 0.5302083 0.5020833 0.4947917 0.4989583 0.5177083
## [169] 0.5145833 0.5104167 0.4906250 0.5125000 0.5083333 0.5093750 0.4854167
## [176] 0.5072917 0.5052083 0.5177083 0.4895833 0.5041667 0.5000000 0.5343750
## [183] 0.4937500 0.5052083 0.4781250 0.5062500 0.5000000 0.5062500 0.5041667
## [190] 0.4916667 0.5072917 0.5093750 0.5093750 0.5083333 0.4947917 0.5104167
## [197] 0.5343750 0.5010417 0.5218750 0.5083333
##
## $simulated_correct
## [1] 0.6750000 0.6739583 0.6708333 0.6666667 0.6697917 0.7020833 0.6895833
## [8] 0.6708333 0.6729167 0.6708333 0.6833333 0.6677083 0.6927083 0.6718750
## [15] 0.6708333 0.6552083 0.6677083 0.6739583 0.7020833 0.6718750 0.6635417
## [22] 0.6677083 0.6718750 0.6770833 0.6531250 0.6937500 0.6958333 0.6906250
## [29] 0.6802083 0.6947917 0.6989583 0.6989583 0.6989583 0.6718750 0.6802083
## [36] 0.6656250 0.6562500 0.6677083 0.6791667 0.6885417 0.6854167 0.6802083
## [43] 0.6802083 0.6906250 0.6687500 0.6760417 0.6927083 0.6718750 0.6812500
## [50] 0.6697917 0.6645833 0.6656250 0.6468750 0.6718750 0.6625000 0.6645833
## [57] 0.6750000 0.6541667 0.6739583 0.6854167 0.6875000 0.6875000 0.6958333
## [64] 0.6760417 0.6927083 0.6531250 0.6739583 0.6864583 0.6864583 0.6770833
## [71] 0.7010417 0.6687500 0.6947917 0.6447917 0.6770833 0.6916667 0.6854167
## [78] 0.6718750 0.6625000 0.6854167 0.6531250 0.6718750 0.7062500 0.6583333
## [85] 0.6739583 0.6833333 0.6885417 0.7125000 0.6791667 0.6802083 0.6854167
## [92] 0.6843750 0.6552083 0.6968750 0.6666667 0.6885417 0.6802083 0.6906250
## [99] 0.6895833 0.6770833 0.6781250 0.6645833 0.6593750 0.6718750 0.6843750
## [106] 0.6812500 0.6812500 0.6666667 0.6885417 0.7020833 0.6666667 0.6593750
## [113] 0.6947917 0.6697917 0.6781250 0.6822917 0.6958333 0.6750000 0.7000000
## [120] 0.6812500 0.6677083 0.6791667 0.7187500 0.6750000 0.6875000 0.6947917
## [127] 0.6916667 0.6916667 0.6552083 0.6875000 0.6739583 0.6541667 0.6604167
## [134] 0.6812500 0.6697917 0.6625000 0.6958333 0.6614583 0.6875000 0.6875000
## [141] 0.6812500 0.6562500 0.6812500 0.6864583 0.6822917 0.6718750 0.6958333
## [148] 0.6708333 0.6625000 0.6854167 0.6791667 0.6510417 0.6500000 0.6864583
## [155] 0.6770833 0.6687500 0.6718750 0.6885417 0.6770833 0.6729167 0.6864583
## [162] 0.7010417 0.6822917 0.6697917 0.6750000 0.6968750 0.6572917 0.6906250
## [169] 0.6708333 0.7000000 0.6802083 0.6791667 0.6854167 0.6906250 0.7062500
## [176] 0.6760417 0.6802083 0.6989583 0.6854167 0.6604167 0.6687500 0.6781250
## [183] 0.6791667 0.6677083 0.6864583 0.7145833 0.6791667 0.6500000 0.6812500
## [190] 0.6500000 0.6739583 0.6947917 0.6781250 0.6729167 0.6843750 0.6729167
## [197] 0.6927083 0.6781250 0.6885417 0.6916667
##
## $ppp
## [1] 0.29
summary(ppp2d$simulated_correct)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6448 0.6698 0.6792 0.6786 0.6878 0.7188
ppp2dDP <- compute_ppp_proportion(observed_binary, sim_2dDP)
ppp2dDP
## $observed
## [1] 0.5145833
##
## $simulated
## [1] 0.4885417 0.4875000 0.4979167 0.5208333 0.5312500 0.5208333 0.5177083
## [8] 0.5187500 0.5104167 0.5010417 0.5125000 0.5020833 0.4989583 0.5197917
## [15] 0.4989583 0.5197917 0.4906250 0.4895833 0.5208333 0.5052083 0.5083333
## [22] 0.5083333 0.5062500 0.4802083 0.5208333 0.5125000 0.5052083 0.5072917
## [29] 0.4947917 0.4937500 0.4895833 0.4937500 0.5156250 0.5260417 0.5020833
## [36] 0.5083333 0.5010417 0.5072917 0.4979167 0.4906250 0.4937500 0.4729167
## [43] 0.4875000 0.4760417 0.4968750 0.5093750 0.5250000 0.4791667 0.4822917
## [50] 0.5250000 0.5000000 0.5104167 0.5041667 0.5187500 0.5031250 0.5072917
## [57] 0.4593750 0.5114583 0.5000000 0.4927083 0.4822917 0.5250000 0.4968750
## [64] 0.5031250 0.5041667 0.4854167 0.5135417 0.5062500 0.4885417 0.4843750
## [71] 0.5093750 0.5145833 0.4875000 0.5062500 0.5062500 0.5041667 0.4718750
## [78] 0.5156250 0.5239583 0.4979167 0.4864583 0.5270833 0.5041667 0.4760417
## [85] 0.5177083 0.4781250 0.5000000 0.5083333 0.4854167 0.5093750 0.4979167
## [92] 0.4968750 0.5166667 0.4864583 0.5156250 0.5072917 0.5135417 0.5083333
## [99] 0.5135417 0.5010417 0.5145833 0.4864583 0.4916667 0.5218750 0.5104167
## [106] 0.5166667 0.5187500 0.5041667 0.5020833 0.5291667 0.5083333 0.4937500
## [113] 0.5250000 0.5135417 0.5104167 0.5062500 0.4885417 0.5104167 0.5020833
## [120] 0.5062500 0.5135417 0.5197917 0.5187500 0.5125000 0.5020833 0.5010417
## [127] 0.5177083 0.5062500 0.5145833 0.5041667 0.4875000 0.5052083 0.5312500
## [134] 0.5000000 0.5208333 0.5010417 0.5145833 0.5135417 0.5177083 0.5052083
## [141] 0.5062500 0.4916667 0.5062500 0.5208333 0.5135417 0.5010417 0.5104167
## [148] 0.4864583 0.5010417 0.5208333 0.5041667 0.4864583 0.5062500 0.5395833
## [155] 0.4958333 0.5177083 0.5135417 0.5072917 0.5458333 0.4958333 0.4979167
## [162] 0.5114583 0.5135417 0.4833333 0.5187500 0.5145833 0.5145833 0.4875000
## [169] 0.4750000 0.4968750 0.5062500 0.5229167 0.5197917 0.4968750 0.5062500
## [176] 0.5062500 0.5062500 0.4822917 0.4833333 0.5010417 0.5083333 0.4854167
## [183] 0.5041667 0.5135417 0.5197917 0.5114583 0.5041667 0.5083333 0.5052083
## [190] 0.5343750 0.5104167 0.5093750 0.5322917 0.5375000 0.4958333 0.5093750
## [197] 0.5072917 0.5166667 0.4875000 0.5052083
##
## $simulated_correct
## [1] 0.6614583 0.6750000 0.6791667 0.6687500 0.6937500 0.6687500 0.6906250
## [8] 0.6833333 0.6791667 0.6760417 0.6895833 0.6875000 0.6843750 0.6781250
## [15] 0.6927083 0.6677083 0.6760417 0.7104167 0.6687500 0.7031250 0.6833333
## [22] 0.6895833 0.6583333 0.6781250 0.6708333 0.6937500 0.6760417 0.6677083
## [29] 0.6739583 0.6666667 0.6625000 0.6854167 0.6614583 0.6885417 0.6687500
## [36] 0.6895833 0.6885417 0.6927083 0.6812500 0.6718750 0.6875000 0.6979167
## [43] 0.6791667 0.6760417 0.6635417 0.6677083 0.6979167 0.6625000 0.6697917
## [50] 0.6812500 0.6895833 0.6854167 0.6645833 0.6791667 0.6885417 0.6781250
## [57] 0.6802083 0.6843750 0.6729167 0.6885417 0.6635417 0.6750000 0.6864583
## [64] 0.6697917 0.6666667 0.6541667 0.6781250 0.6854167 0.7135417 0.6677083
## [71] 0.6885417 0.6833333 0.6812500 0.6916667 0.6875000 0.6916667 0.6906250
## [78] 0.6677083 0.6677083 0.7020833 0.6802083 0.6770833 0.6520833 0.6677083
## [85] 0.6614583 0.6781250 0.6625000 0.6833333 0.6833333 0.6760417 0.6770833
## [92] 0.6593750 0.6791667 0.6781250 0.6760417 0.7052083 0.6614583 0.6875000
## [99] 0.6739583 0.6968750 0.6875000 0.6802083 0.6500000 0.6781250 0.6979167
## [106] 0.6958333 0.6750000 0.6583333 0.6500000 0.6895833 0.6708333 0.6958333
## [113] 0.6791667 0.6885417 0.6750000 0.6916667 0.6635417 0.6708333 0.6770833
## [120] 0.7208333 0.6864583 0.6760417 0.6666667 0.6833333 0.6708333 0.6927083
## [127] 0.6760417 0.6645833 0.6666667 0.6687500 0.6791667 0.6947917 0.6875000
## [134] 0.6791667 0.6666667 0.7031250 0.6833333 0.7010417 0.6864583 0.6864583
## [141] 0.6500000 0.6625000 0.6937500 0.7020833 0.6510417 0.6885417 0.6604167
## [148] 0.6802083 0.6656250 0.6916667 0.6750000 0.6885417 0.7083333 0.7000000
## [155] 0.6958333 0.6843750 0.6802083 0.6864583 0.6645833 0.6645833 0.6750000
## [162] 0.6843750 0.6760417 0.6812500 0.6645833 0.6895833 0.6875000 0.6541667
## [169] 0.7062500 0.6989583 0.6812500 0.6645833 0.6864583 0.6864583 0.6916667
## [176] 0.6958333 0.6604167 0.6531250 0.6750000 0.7052083 0.6750000 0.6729167
## [183] 0.7020833 0.6906250 0.6968750 0.6677083 0.6708333 0.6687500 0.7177083
## [190] 0.6739583 0.6875000 0.6989583 0.6739583 0.6520833 0.6687500 0.6927083
## [197] 0.6531250 0.6645833 0.6770833 0.6739583
##
## $ppp
## [1] 0.255
summary(ppp2dDP$simulated_correct)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6500 0.6687 0.6792 0.6796 0.6885 0.7208
Let’s check the prediction of choices using the 1d model next. For each posterior sample of \(\theta\) and \(\alpha\), simulate responses.
A function to simulate responses using 1d model
simulate_pp_data_mcmclist1d <- function(mcmc_list, pair_data) {
# Combine chains into one matrix
all_samples <- do.call(rbind, mcmc_list)
n_obs <- nrow(pair_data)
n_rep = 200
sim_choices <- matrix(NA, nrow = n_rep, ncol = n_obs)
# Identify column names for theta1, theta2, gamma
theta_names <- grep("^theta\\.", colnames(all_samples), value = TRUE)
alpha_names <- grep("^alpha\\.", colnames(all_samples), value = TRUE)
for (r in 1:n_rep) {
s <- sample(1:nrow(all_samples), 1) # Select one draw from any chain in combined samples
theta <- all_samples[s, theta_names] # Draw one value for theta
alpha <- all_samples[s, alpha_names] # Draw one value for alpha
for (i in 1:n_obs) {
j1 <- as.character(pair_data$itemj1[i])
j2 <- as.character(pair_data$itemj2[i])
rater <- as.character(pair_data$rater[i])
# Safely extract theta/gamma values using column names
diff <- theta[paste0("theta.", j1)] - theta[paste0("theta.", j2)]
a <- alpha[paste0("alpha.", rater)]
eta <- a * diff # Add thetas
p <- pnorm(eta) # probit link is a normal distribution
sim_choices[r, i] <- rbinom(1, 1, p)
}
}
return(sim_choices)
}
Run the function for 1d model
sim_1d <- simulate_pp_data_mcmclist1d(mcmc_list1d, pairDataCDAT)
Compute Posterior Predictive P-value (PPP) for 1d model
ppp1d <- compute_ppp_proportion(observed_binary, sim_1d)
ppp1d
## $observed
## [1] 0.5145833
##
## $simulated
## [1] 0.4833333 0.5229167 0.5000000 0.4916667 0.5239583 0.5375000 0.4989583
## [8] 0.4833333 0.4927083 0.5083333 0.5177083 0.5010417 0.5239583 0.4979167
## [15] 0.4802083 0.5177083 0.4854167 0.5177083 0.4812500 0.4739583 0.5218750
## [22] 0.4937500 0.5156250 0.5062500 0.4937500 0.4895833 0.5072917 0.5104167
## [29] 0.5229167 0.5166667 0.5239583 0.4750000 0.4854167 0.5166667 0.4833333
## [36] 0.4864583 0.5104167 0.5187500 0.4979167 0.5239583 0.5062500 0.4833333
## [43] 0.5218750 0.5145833 0.4947917 0.4937500 0.4968750 0.5020833 0.5177083
## [50] 0.4895833 0.4989583 0.4927083 0.4947917 0.5145833 0.5062500 0.4812500
## [57] 0.4937500 0.4885417 0.4916667 0.5187500 0.5125000 0.4895833 0.5083333
## [64] 0.4937500 0.4885417 0.5052083 0.5000000 0.5156250 0.5041667 0.5031250
## [71] 0.4958333 0.4916667 0.5031250 0.5062500 0.5135417 0.4875000 0.4864583
## [78] 0.5177083 0.5218750 0.5052083 0.5166667 0.4906250 0.5218750 0.4916667
## [85] 0.5291667 0.5072917 0.5062500 0.5041667 0.5010417 0.5062500 0.4895833
## [92] 0.4885417 0.4968750 0.5250000 0.5000000 0.4854167 0.4833333 0.5020833
## [99] 0.4739583 0.5354167 0.4968750 0.5156250 0.5135417 0.4958333 0.4885417
## [106] 0.4989583 0.5281250 0.5010417 0.5062500 0.5135417 0.4843750 0.5083333
## [113] 0.5104167 0.5177083 0.4947917 0.5104167 0.4885417 0.5302083 0.5187500
## [120] 0.4927083 0.5031250 0.4802083 0.4781250 0.5156250 0.5010417 0.5010417
## [127] 0.5239583 0.4864583 0.5114583 0.5229167 0.4885417 0.4989583 0.5083333
## [134] 0.5083333 0.4979167 0.4979167 0.5062500 0.4989583 0.5114583 0.5104167
## [141] 0.5062500 0.4979167 0.4916667 0.4947917 0.4864583 0.4947917 0.4968750
## [148] 0.5072917 0.5125000 0.4989583 0.5062500 0.4927083 0.5072917 0.4979167
## [155] 0.4843750 0.5041667 0.4906250 0.4531250 0.4968750 0.5052083 0.5177083
## [162] 0.4958333 0.4927083 0.5031250 0.5270833 0.4958333 0.4937500 0.4979167
## [169] 0.5093750 0.4937500 0.5072917 0.4843750 0.5041667 0.5052083 0.5000000
## [176] 0.4979167 0.5062500 0.5135417 0.5083333 0.5052083 0.5104167 0.5072917
## [183] 0.5104167 0.4979167 0.4906250 0.5000000 0.4979167 0.5187500 0.5093750
## [190] 0.4927083 0.4947917 0.4812500 0.5010417 0.5156250 0.4989583 0.5281250
## [197] 0.4906250 0.4770833 0.4968750 0.5125000
##
## $simulated_correct
## [1] 0.6312500 0.6270833 0.6187500 0.6250000 0.6406250 0.6145833 0.6302083
## [8] 0.6395833 0.6281250 0.6541667 0.6239583 0.6281250 0.6385417 0.6062500
## [15] 0.6322917 0.6114583 0.6333333 0.6302083 0.6208333 0.6343750 0.6406250
## [22] 0.6312500 0.6239583 0.6437500 0.6229167 0.6250000 0.6302083 0.6458333
## [29] 0.6000000 0.6354167 0.6406250 0.6000000 0.6375000 0.6020833 0.6333333
## [36] 0.6322917 0.6583333 0.6083333 0.6375000 0.6281250 0.6395833 0.6187500
## [43] 0.6552083 0.6166667 0.6302083 0.6312500 0.6114583 0.6208333 0.6281250
## [50] 0.6520833 0.6010417 0.6260417 0.6177083 0.6104167 0.6583333 0.6187500
## [57] 0.6395833 0.6135417 0.6145833 0.6166667 0.6208333 0.6125000 0.6479167
## [64] 0.6104167 0.6260417 0.6239583 0.6395833 0.6572917 0.6145833 0.5968750
## [71] 0.6395833 0.6145833 0.6302083 0.6145833 0.6239583 0.6145833 0.6093750
## [78] 0.6239583 0.6135417 0.6156250 0.6270833 0.6343750 0.6406250 0.6416667
## [85] 0.6270833 0.6239583 0.6270833 0.6458333 0.6093750 0.6250000 0.6166667
## [92] 0.6197917 0.6135417 0.6125000 0.6187500 0.6250000 0.6145833 0.6125000
## [99] 0.6135417 0.6375000 0.6364583 0.6197917 0.6343750 0.6250000 0.6427083
## [106] 0.6177083 0.6302083 0.6197917 0.6291667 0.6406250 0.6364583 0.6437500
## [113] 0.6437500 0.6302083 0.6260417 0.6145833 0.6322917 0.6364583 0.6229167
## [120] 0.6177083 0.6260417 0.6593750 0.6218750 0.6281250 0.6010417 0.6489583
## [127] 0.6281250 0.6197917 0.6239583 0.6104167 0.6447917 0.6114583 0.6541667
## [134] 0.6250000 0.6062500 0.6333333 0.6312500 0.6302083 0.6385417 0.6145833
## [141] 0.6208333 0.6416667 0.6062500 0.6364583 0.6281250 0.6427083 0.6260417
## [148] 0.6218750 0.6312500 0.6260417 0.6062500 0.6302083 0.6177083 0.6458333
## [155] 0.6427083 0.6437500 0.6260417 0.6218750 0.6614583 0.6322917 0.6135417
## [162] 0.6270833 0.6135417 0.6322917 0.6229167 0.6437500 0.6208333 0.6104167
## [169] 0.6197917 0.6187500 0.6281250 0.6406250 0.6437500 0.6218750 0.6291667
## [176] 0.6562500 0.6229167 0.5989583 0.6250000 0.6260417 0.6187500 0.6135417
## [183] 0.6416667 0.6270833 0.6218750 0.6625000 0.6229167 0.6354167 0.6260417
## [190] 0.6364583 0.6385417 0.6250000 0.6302083 0.6427083 0.6135417 0.6322917
## [197] 0.6447917 0.6166667 0.6093750 0.6250000
##
## $ppp
## [1] 0.205
summary(ppp1d$simulated_correct)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5969 0.6177 0.6260 0.6273 0.6365 0.6625
Plot the observed proportion of selection of itemj1 in each comparison to the distribution of simulated selection of itemj1 in each comparison.
# Density of simulated proportions
dens <- density(ppp1d$simulated)
# Plot
plot(dens, main = "Posterior Predictive Density (1D Model)",
xlab = "Simulated Proportion of itemj1 Chosen",
ylab = "Density", lwd = 2, col = "steelblue")
# Add observed value as a vertical red line
abline(v = ppp1d$observed, col = "red", lwd = 2, lty = 2)
# Add legend
legend("topright", legend = "Observed Proportion", col = "red", lty = 2, lwd = 2)
Plot the density of the 2d model simulated proportions and the observed proportion (see Levy & Mislevy, 2016, pp. 238-239)
# Density of simulated proportions
dens <- density(ppp2d$simulated)
# Plot
plot(dens, main = "Posterior Predictive Density (2D Normal Prior Model)",
xlab = "Simulated Proportion of itemj1 Chosen",
ylab = "Density", lwd = 2, col = "steelblue")
# Add observed value as a vertical red line
abline(v = ppp2d$observed, col = "red", lwd = 2, lty = 2)
# Add legend
legend("topright", legend = "Observed Proportion", col = "red", lty = 2, lwd = 2)
Plot the density of the 2dDP model simulated proportions and the observed proportion
# Density of simulated proportions
dens <- density(ppp2dDP$simulated)
# Plot
plot(dens, main = "Posterior Predictive Density (2D DP Prior Model)",
xlab = "Simulated Proportion of itemj1 Chosen",
ylab = "Density", lwd = 2, col = "steelblue")
# Add observed value as a vertical red line
abline(v = ppp2dDP$observed, col = "red", lwd = 2, lty = 2)
# Add legend
legend("topright", legend = "Observed Proportion", col = "red", lty = 2, lwd = 2)
All models have similar PPP values (~.25) indicating that the distribution of simulated choices’ proportion of j1 preference is sufficiently amassed around the observed proportion of j1 preferences, based on the rater’s model-implied gamma and the items’ model-implied theta1 and theta2 for that same combination.
This begins to suggest that both of the 2d models produce simulated data that sufficiently resemble actual choices of items by raters.
Additionally, the bulk of simulated choices are within the narrow range of .05 (0.48 to 0.53).
They both consistently predict the proportion of preference for itemj1, but this does not mean that the row-by-row choices are the same. For that, the percent of correct choices was added to the ppp function.
The simulated choices using the model-implied latent variables produced twice as many correct as incorrect choices for each combination of rater and paired items, and slightly better for the 2-dimensional models (0.68) than the one-dimensional mode (0.63). This suggests that the two-dimensional models are just plain better at predicting actual choices, whatever other statistics we come up with.
From Chapter 10.3 of Levy & Mislevy (2016)
The Deviance Information Criterion is defined as “the difference between the posterior mean of the deviance, \(\overline{D(\theta)}\), and the deviance evaluated at the posterior mean, \(D(\bar{\theta})\)” (p. 248). Which is appropriate when the mean of the posterior is a “reasonable point summary of the parameters” (p. 249) which is the case for these parameters. The formula includes a penalty for complexity \(p_{D}\) based on the number of parameters estimated. Look to Gelman et al. (2013) Gill (2007) and Plummer (2008) for more advanced discussion of DIC.
DIC =\(\overline{D(\theta)} + p_{D} = 2\overline{D(\theta)} - D(\bar{\theta}) +2p_{D}\)
Loop over all MCMC iterations:
Compute \(p_{D}\) which is \(\overline{D(\theta)}\) - \(D(\bar{\theta})\).
Compute the DIC which 2 times \(\overline{D(\theta)}\) - \(\overline{D(\theta)}\).
First, the posterior mean of the deviance for 2d model.
computeDbar2d <- function(mcmc_matrix, pair_data, theta1_ids, theta2_ids, gamma_ids) {
# Extract item and judge IDs
itemj1 <- as.character(pair_data$itemj1)
itemj2 <- as.character(pair_data$itemj2)
choice <- pair_data$choice
rater <- as.character(pair_data$rater)
S <- nrow(mcmc_matrix)
N <- nrow(pair_data)
deviance_vec <- numeric(S)
for (s in 1:S) {
# Extract current parameter values
theta1 <- setNames(mcmc_matrix[s, theta1_ids], sub("theta1\\.", "", theta1_ids))
theta2 <- setNames(mcmc_matrix[s, theta2_ids], sub("theta2\\.", "", theta2_ids))
gamma <- setNames(mcmc_matrix[s, gamma_ids], sub("gamma\\.", "", gamma_ids))
eta <- numeric(N)
for (n in 1:N) {
j1 <- itemj1[n]
j2 <- itemj2[n]
r <- rater[n]
g <- gamma[r]
if (is.na(g)) g <- mean(gamma, na.rm = TRUE)
diff1 <- theta1[j2] - theta1[j1]
diff2 <- theta2[j2] - theta2[j1]
eta[n] <- (1 - g) * diff1 + g * diff2
}
p <- pnorm(eta)
p <- pmin(pmax(p, 1e-10), 1 - 1e-10) # avoid log(0)
is_j2_chosen <- choice == pair_data$itemj2
loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE) # Compute log-probability of the observed binary outcome
deviance_vec[s] <- -2 * sum(loglik)
}
mean(deviance_vec)
}
Note that the deviance is defined as minus two times the log-likelihood: \(D(\theta) = -2\log p(y | \theta)\)
So:
Then: \(\overline{D{\theta}}\) is computed by averaging this deviance over MCMC samples \(D(\bar{\theta})\) is computed by evaluating the deviance at the posterior mean of parameters
Use that function:
mcmc_matrix <- do.call(rbind, mcmc_list2d) # combine all chains
theta1_ids <- grep("^theta1\\.", colnames(mcmc_matrix), value = TRUE)
theta2_ids <- grep("^theta2\\.", colnames(mcmc_matrix), value = TRUE)
gamma_ids <- grep("^gamma\\.", colnames(mcmc_matrix), value = TRUE)
D_bar2d <- computeDbar2d(mcmc_matrix, pairDataCDAT, theta1_ids, theta2_ids, gamma_ids)
print(D_bar2d)
## [1] 1233.462
Now a function to compute the deviance at the posterior mean parameters
computeDevianceAtPosteriorMean2d <- function(mcmc_matrix, pair_data, theta1_ids, theta2_ids, gamma_ids) {
# Compute posterior means
theta1_mean <- setNames(colMeans(mcmc_matrix[, theta1_ids]), sub("theta1\\.", "", theta1_ids))
theta2_mean <- setNames(colMeans(mcmc_matrix[, theta2_ids]), sub("theta2\\.", "", theta2_ids))
gamma_mean <- setNames(colMeans(mcmc_matrix[, gamma_ids]), sub("gamma\\.", "", gamma_ids))
itemj1 <- as.character(pair_data$itemj1)
itemj2 <- as.character(pair_data$itemj2)
choice <- pair_data$choice
rater <- as.character(pair_data$rater)
N <- nrow(pair_data)
eta <- numeric(N)
for (n in 1:N) {
j1 <- itemj1[n]
j2 <- itemj2[n]
r <- rater[n]
g <- gamma_mean[r]
if (is.na(g)) g <- mean(gamma_mean, na.rm = TRUE)
diff1 <- theta1_mean[j2] - theta1_mean[j1]
diff2 <- theta2_mean[j2] - theta2_mean[j1]
eta[n] <- (1 - g) * diff1 + g * diff2
}
p <- pnorm(eta)
p <- pmin(pmax(p, 1e-10), 1 - 1e-10)
is_j2_chosen <- choice == pair_data$itemj2
loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
D_at_mean <- -2 * sum(loglik)
return(D_at_mean)
}
Use the function, with the same variables passed to it as created in previous ste.
D_mean2d <- computeDevianceAtPosteriorMean2d(mcmc_matrix, pairDataCDAT, theta1_ids, theta2_ids, gamma_ids)
print(D_mean2d)
## [1] 1153.236
As a final step find the difference between twice the D_bar and the D_mean:
p_D2d <- D_bar2d - D_mean2d
DIC2d <- D_bar2d + p_D2d
print(DIC2d)
## [1] 1313.688
Repeat for 2dDP these steps using same functions as for the 2d normal prior model.
mcmc_matrix <- do.call(rbind, mcmc_list2dDP) # combine all chains
theta1_ids <- grep("^theta1\\.", colnames(mcmc_matrix), value = TRUE)
theta2_ids <- grep("^theta2\\.", colnames(mcmc_matrix), value = TRUE)
gamma_ids <- grep("^gamma\\.", colnames(mcmc_matrix), value = TRUE)
D_bar2dDP <- computeDbar2d(mcmc_matrix, pairDataCDAT, theta1_ids, theta2_ids, gamma_ids)
print(D_bar2dDP)
## [1] 1227.389
D_mean2dDP <- computeDevianceAtPosteriorMean2d(mcmc_matrix, pairDataCDAT, theta1_ids, theta2_ids, gamma_ids)
print(D_mean2dDP)
## [1] 1147.307
p_D2dDP <- D_bar2dDP - D_mean2dDP
DIC2dDP <- D_bar2dDP + p_D2dDP
print(DIC2dDP)
## [1] 1307.471
computeDbar1d <- function(mcmc_matrix, pair_data, theta_ids, alpha_ids) {
# Extract item and judge IDs
itemj1 <- as.character(pair_data$itemj1)
itemj2 <- as.character(pair_data$itemj2)
choice <- pair_data$choice
rater <- as.character(pair_data$rater)
S <- nrow(mcmc_matrix)
N <- nrow(pair_data)
deviance_vec <- numeric(S)
for (s in 1:S) {
# Extract current parameter values
theta <- setNames(mcmc_matrix[s, theta_ids], sub("theta\\.", "", theta_ids))
alpha <- setNames(mcmc_matrix[s, alpha_ids], sub("alpha\\.", "", alpha_ids))
eta <- numeric(N)
for (n in 1:N) {
j1 <- itemj1[n]
j2 <- itemj2[n]
r <- rater[n]
a <- alpha[r]
if (is.na(a)) a <- mean(alpha, na.rm = TRUE)
diff <- theta[j2] - theta[j1]
eta[n] <- a * diff
}
p <- pnorm(eta)
p <- pmin(pmax(p, 1e-10), 1 - 1e-10) # avoid log(0)
is_j2_chosen <- choice == pair_data$itemj2
loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
deviance_vec[s] <- -2 * sum(loglik)
}
mean(deviance_vec)
}
Use that function:
mcmc_matrix <- do.call(rbind, mcmc_list1d) # combine all chains
theta_ids <- grep("^theta\\.", colnames(mcmc_matrix), value = TRUE)
alpha_ids <- grep("^alpha\\.", colnames(mcmc_matrix), value = TRUE)
D_bar1d <- computeDbar1d(mcmc_matrix, pairDataCDAT, theta_ids, alpha_ids)
print(D_bar1d)
## [1] 1053.268
Now a function to compute the deviance at the posterior mean parameters
computeDevianceAtPosteriorMean1d <- function(mcmc_matrix, pair_data, theta_ids, alpha_ids) {
# Compute posterior means
theta_mean <- setNames(colMeans(mcmc_matrix[, theta_ids]), sub("theta\\.", "", theta_ids))
alpha_mean <- setNames(colMeans(mcmc_matrix[, alpha_ids]), sub("alpha\\.", "", alpha_ids))
itemj1 <- as.character(pair_data$itemj1)
itemj2 <- as.character(pair_data$itemj2)
choice <- pair_data$choice
rater <- as.character(pair_data$rater)
N <- nrow(pair_data)
eta <- numeric(N)
for (n in 1:N) {
j1 <- itemj1[n]
j2 <- itemj2[n]
r <- rater[n]
a <- alpha_mean[r]
if (is.na(a)) a <- mean(alpha_mean, na.rm = TRUE)
diff <- theta_mean[j2] - theta_mean[j1]
eta[n] <- a * diff
}
p <- pnorm(eta)
p <- pmin(pmax(p, 1e-10), 1 - 1e-10)
is_j2_chosen <- choice == pair_data$itemj2
loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
D_at_mean <- -2 * sum(loglik)
return(D_at_mean)
}
Use the function, with the same variables passed to it as created in previous step.
D_mean1d <- computeDevianceAtPosteriorMean1d(mcmc_matrix, pairDataCDAT, theta_ids, alpha_ids)
print(D_mean1d)
## [1] 1026.522
As a final step find the difference between twice the D_bar and the D_mean, then
p_D1d <- D_bar1d - D_mean1d
DIC1d <- D_bar1d + p_D1d
print(DIC1d)
## [1] 1080.014
dic_table <- data.frame(
Model = c("1d", "2d", "2dDP"),
DIC = c(DIC1d, DIC2d, DIC2dDP),
D_bar = c(D_bar1d, D_bar2d, D_bar2dDP),
D_mean = c(D_mean1d, D_mean2d, D_mean2dDP),
p_D = c(p_D1d, p_D2d, p_D2dDP)
)
print(dic_table)
## Model DIC D_bar D_mean p_D
## 1 1d 1080.014 1053.268 1026.522 26.74626
## 2 2d 1313.688 1233.462 1153.236 80.22597
## 3 2dDP 1307.471 1227.389 1147.307 80.08181
The 1d model has the lowest DIC by far, but is less expressive. This statistic considers the trade off between fit and complexity, so the high complexity of the 2d models increases the DIC. Between the two 2d models, the additional parameters of the clustering does increase the deviance statistics of the 2dDP model, with the main difference beeing in the deviance at the posterior mean. The 2d and 2dDP models’ parameters have been shown above to be good at predicting the choices of raters when given the two items in a pairwise comparison.
The 2D-DP model now has virtually the same predictive deviance (D̄ = 1237.04) as before reducing clusters from 6 to 5, confirming that predictive performance is unchanged.
The small drop in p_D (from 81.89 → 81.17) from 6 to 5 clusters still translates into a meaningful DIC reduction, because DIC is sensitive to cumulative model complexity.
The fact that cluster assignments didn’t change confirms that the sixth cluster was effectively unused — but still counted in complexity due to the flexible prior.
Reflection: It would be good to reconsider the PPMC variables of interest, as they seem to conflict with the Deviance Information Criteria.
Even with the relatively sparse data set of comparisons, the model 2d DP works very well to explain the preferences of raters and the placement of items in 2d latent space.
Notes: Set yourself up for further analysis, but not in this paper, more next steps.
Say which defaults you did not use in the functions, such as 3 chains, etc. Maybe say something about “we recognize that winbugs is standard software, but we used this because. . .”
No meeting until after returning July 14th, o
Levy, R., & Mislevy, R. J. (2016). Bayesian psychometric modeling. Chapman & Hall/CRC.
Yu, Q., & Quinn, K. M. (2022). A multidimensional pairwise comparison model for heterogeneous perceptions with an application to modelling the perceived truthfulness of public statements on COVID‐19. Journal of the Royal Statistical Society. Series A, (Statistics in Society), 10.1111/rssa.12810. https://doi.org/10.1111/rssa.12810