library(MCMCpack)
library(bayesplot)
library(coda)
library(ggplot2)
library(tidyverse)

Load the prepared data set

load("Data/pairDataCDAT.1.Rda")

Filter to include only the middle cluster of raters and set up parameters for model function

best_raters <- c(25,26,27,29,34)
n_chains <- 3
pairDataCDAT_select <- pairDataCDAT[pairDataCDAT$rater %in% best_raters, ]

Re-shape the gamma start values for 2d model with only 5 raters

I_select <- length(best_raters)
gamma_start_select <- runif(I_select, pi/4-.4, pi/4+.4)

MCMC 1d model

seeds <- c(0202, 8018, 1304) # Set seeds for each chain
chains1d_select <- vector("list", n_chains)

for (i in 1:n_chains) {
  chains1d_select[[i]] <- MCMCpaircompare(
    pairDataCDAT_select,
    verbose=10000,      # Prints status updates every __ iterations.
    burnin=500,       # Discards the first __ samples to stabilize.
    mcmc=50000,         # Runs __ iterations for estimation.
    thin=20,           # Keeps every __th sample to reduce autocorrelation.
    seed=seeds[i],     # Ensures reproducibility of results.
    alpha.fixed=FALSE,
    alpha.start=1,
    a=0,               # The default prio mean of alpha is 0
    A=.25,             # The default prior precision is 0.25 (variance of 4)
    store.theta=TRUE,  # Saves item-specfic draws.
    store.alpha=TRUE,  # Saves both rater-specific draws.
    tune=.3,           # Sets the tuning parameter for the Metropolis-Hastings step.
    procrustes=FALSE   # Disables Procrustes alignment
    )
}

## 
## 
## MCMCpaircompare iteration 1 of 50500 
## 
## 
## MCMCpaircompare iteration 10001 of 50500 
## 
## 
## MCMCpaircompare iteration 20001 of 50500 
## 
## 
## MCMCpaircompare iteration 30001 of 50500 
## 
## 
## MCMCpaircompare iteration 40001 of 50500 
## 
## 
## MCMCpaircompare iteration 50001 of 50500 
## 
## 
## MCMCpaircompare iteration 1 of 50500 
## 
## 
## MCMCpaircompare iteration 10001 of 50500 
## 
## 
## MCMCpaircompare iteration 20001 of 50500 
## 
## 
## MCMCpaircompare iteration 30001 of 50500 
## 
## 
## MCMCpaircompare iteration 40001 of 50500 
## 
## 
## MCMCpaircompare iteration 50001 of 50500 
## 
## 
## MCMCpaircompare iteration 1 of 50500 
## 
## 
## MCMCpaircompare iteration 10001 of 50500 
## 
## 
## MCMCpaircompare iteration 20001 of 50500 
## 
## 
## MCMCpaircompare iteration 30001 of 50500 
## 
## 
## MCMCpaircompare iteration 40001 of 50500 
## 
## 
## MCMCpaircompare iteration 50001 of 50500

# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list1d_select <- mcmc.list(lapply(chains1d_select, mcmc)) 

# Save object
save(mcmc_list1d_select, file = "Out/mcmc_list1d.Rdata")

# Create a list of parameters for each type of parameter
thetas <- grep("^theta", colnames(mcmc_list1d_select[[1]]), value = TRUE)
alphas <- grep("^alpha", colnames(mcmc_list1d_select[[1]]), value = TRUE)

Check convergence

gelman_diag_1d_select <- gelman.diag(mcmc_list1d_select)
max_rhat_1d_select <- max(gelman_diag_1d_select$psrf[, "Point est."])
gelman_diag_1d_select

## Potential scale reduction factors:
## 
##            Point est. Upper C.I.
## theta.1121          1       1.00
## theta.1154          1       1.00
## theta.1222          1       1.00
## theta.1637          1       1.00
## theta.1995          1       1.00
## theta.3369          1       1.00
## theta.4004          1       1.00
## theta.4209          1       1.00
## theta.4268          1       1.00
## theta.4841          1       1.00
## theta.5074          1       1.00
## theta.5495          1       1.00
## theta.5710          1       1.00
## theta.8294          1       1.00
## theta.8980          1       1.00
## theta.9899          1       1.00
## alpha.25            1       1.01
## alpha.26            1       1.00
## alpha.27            1       1.00
## alpha.29            1       1.00
## alpha.34            1       1.01
## 
## Multivariate psrf
## 
## 1

Summarize the model

summary1d_select <- round(
  cbind(
    as.data.frame(summary(mcmc_list1d_select)$statistics),
    as.data.frame(summary(mcmc_list1d_select)$quantiles)
  ), 3) 
summary1d_select$Model <- "1d"
save(summary1d_select, file = "Out/summary1d_select.Rdata")
summary1d_select

##              Mean    SD Naive SE Time-series SE   2.5%    25%    50%    75%
## theta.1121 -0.276 0.316    0.004          0.006 -0.900 -0.487 -0.276 -0.061
## theta.1154 -1.272 0.406    0.005          0.007 -2.099 -1.535 -1.263 -0.988
## theta.1222  0.429 0.318    0.004          0.005 -0.175  0.206  0.421  0.639
## theta.1637  0.107 0.302    0.003          0.005 -0.479 -0.093  0.105  0.304
## theta.1995 -0.438 0.322    0.004          0.006 -1.073 -0.656 -0.436 -0.217
## theta.3369  1.709 0.480    0.006          0.008  0.840  1.371  1.682  2.019
## theta.4004  0.618 0.342    0.004          0.006 -0.036  0.386  0.610  0.843
## theta.4209 -0.247 0.317    0.004          0.005 -0.875 -0.459 -0.246 -0.029
## theta.4268  0.278 0.310    0.004          0.005 -0.328  0.072  0.276  0.481
## theta.4841 -0.676 0.339    0.004          0.006 -1.362 -0.906 -0.666 -0.443
## theta.5074  0.137 0.298    0.003          0.005 -0.457 -0.066  0.139  0.338
## theta.5495 -1.215 0.397    0.005          0.007 -2.030 -1.475 -1.200 -0.942
## theta.5710 -1.251 0.409    0.005          0.007 -2.088 -1.518 -1.230 -0.969
## theta.8294  0.310 0.309    0.004          0.006 -0.279  0.100  0.305  0.513
## theta.8980  1.088 0.381    0.004          0.006  0.374  0.831  1.071  1.333
## theta.9899  0.690 0.343    0.004          0.006  0.037  0.456  0.683  0.911
## alpha.25    0.784 0.276    0.003          0.004  0.338  0.590  0.753  0.941
## alpha.26    1.576 0.470    0.005          0.007  0.824  1.238  1.525  1.848
## alpha.27    1.331 0.407    0.005          0.006  0.687  1.043  1.273  1.560
## alpha.29    0.876 0.301    0.003          0.004  0.399  0.665  0.841  1.049
## alpha.34    0.784 0.266    0.003          0.004  0.351  0.595  0.754  0.939
##             97.5% Model
## theta.1121  0.334    1d
## theta.1154 -0.514    1d
## theta.1222  1.076    1d
## theta.1637  0.708    1d
## theta.1995  0.172    1d
## theta.3369  2.720    1d
## theta.4004  1.317    1d
## theta.4209  0.372    1d
## theta.4268  0.903    1d
## theta.4841 -0.037    1d
## theta.5074  0.708    1d
## theta.5495 -0.475    1d
## theta.5710 -0.501    1d
## theta.8294  0.922    1d
## theta.8980  1.882    1d
## theta.9899  1.399    1d
## alpha.25    1.426    1d
## alpha.26    2.672    1d
## alpha.27    2.295    1d
## alpha.29    1.566    1d
## alpha.34    1.408    1d

MCMC 2d with normal prior

seeds <- c(1304, 2406, 7916) # Set seeds for each chain
chains2d_select <- vector("list", n_chains)
theta_start2d <- matrix(0, 16, 2) # 16 scripts, two dimensions


for (i in 1:n_chains) {
  chains2d_select[[i]] <- MCMCpaircompare2d(
    pairDataCDAT_select,
    theta.constraints=list(
      '5074' = list(1, "+"),  
      '5074' = list(2, "-"),
      '5495' = list(1, "-"), 
      '5495' = list(2, "+")   
      ),  
    verbose=10000,         # Prints status updates every __ iterations.
    burnin=200,      # Discards the first __ samples to stabilize.
    mcmc=20000,        # Runs __ iterations for estimation.
    thin=20,          # Keeps every __th sample to reduce autocorrelation.
    seed=seeds[i],        # Ensures reproducibility of results.
    gamma.start=gamma_start_select, # Uses the initialized respondent perception values.
    theta.start=theta_start2d,
    store.theta=TRUE,  # Saves item-specfic draws.
    store.gamma=TRUE,  # Saves both rater-specific draws.
    tune=.25,           # Sets the tuning parameter for the Metropolis-Hastings step.
    procrustes=FALSE   # Disables Procrustes alignment
    )
}

## 
## 
## MCMCpaircompare2d iteration 1 of 20200 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 10001 of 20200 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 20001 of 20200 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 1 of 20200 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 10001 of 20200 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 20001 of 20200 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 1 of 20200 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 10001 of 20200 
## 
## Summary of acceptance rates for gamma:
## 
## 
## MCMCpaircompare2d iteration 20001 of 20200 
## 
## Summary of acceptance rates for gamma:

# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list2d_select <- mcmc.list(lapply(chains2d_select, mcmc)) 

#Save object
save(mcmc_list2d_select, file = "Out/mcmc_list2d_select.Rdata")

# Create a list of parameters for each type of parameter
thetas <- grep("^theta", colnames(mcmc_list2d_select[[1]]), value = TRUE)
alpha <- grep("^alpha", colnames(mcmc_list2d_select[[1]]), value = TRUE)

Check model convergence

# For 2D model
gelman_diag_2d_select <- gelman.diag(mcmc_list2d_select)
max_rhat_2d_select <- max(gelman_diag_2d_select$psrf[, "Point est."])

gelman_diag_2d_select

## Potential scale reduction factors:
## 
##             Point est. Upper C.I.
## theta1.1121      1.009      1.032
## theta1.1154      1.001      1.004
## theta1.1222      1.006      1.022
## theta1.1637      1.009      1.032
## theta1.1995      1.011      1.041
## theta1.3369      1.006      1.010
## theta1.4004      1.004      1.012
## theta1.4209      1.009      1.032
## theta1.4268      1.004      1.013
## theta1.4841      1.006      1.021
## theta1.5074      1.002      1.007
## theta1.5495      1.005      1.020
## theta1.5710      0.999      0.999
## theta1.8294      1.001      1.005
## theta1.8980      1.001      1.004
## theta1.9899      1.000      1.001
## theta2.1121      1.008      1.031
## theta2.1154      1.003      1.012
## theta2.1222      1.003      1.014
## theta2.1637      1.002      1.006
## theta2.1995      1.010      1.037
## theta2.3369      1.002      1.007
## theta2.4004      1.003      1.010
## theta2.4209      1.008      1.031
## theta2.4268      1.002      1.008
## theta2.4841      1.000      1.003
## theta2.5074      1.001      1.006
## theta2.5495      1.006      1.014
## theta2.5710      1.000      1.002
## theta2.8294      1.003      1.013
## theta2.8980      1.002      1.003
## theta2.9899      0.999      0.999
## gamma.25         1.008      1.033
## gamma.26         1.015      1.051
## gamma.27         1.002      1.011
## gamma.29         1.010      1.035
## gamma.34         1.005      1.021
## 
## Multivariate psrf
## 
## 1.03

Summary of MCMC 2d with normal prior

View a summary of the 2d model parameter posterior distributions

summary2d_select <- round(
  cbind(
    as.data.frame(summary(mcmc_list2d_select)$statistics),
    as.data.frame(summary(mcmc_list2d_select)$quantiles)
  ), 3)
summary2d_select$Model <- "2d"

save(summary2d_select, file="Out/summary2d_select.Rdata")
summary2d_select

##               Mean    SD Naive SE Time-series SE   2.5%    25%    50%    75%
## theta1.1121 -0.133 0.762    0.014          0.040 -1.540 -0.700 -0.148  0.427
## theta1.1154 -1.038 0.585    0.011          0.018 -2.084 -1.447 -1.068 -0.659
## theta1.1222  0.383 0.616    0.011          0.023 -0.850 -0.019  0.410  0.805
## theta1.1637  0.097 0.479    0.009          0.012 -0.840 -0.220  0.105  0.416
## theta1.1995 -0.133 0.602    0.011          0.023 -1.271 -0.543 -0.169  0.264
## theta1.3369  1.532 0.613    0.011          0.013  0.308  1.133  1.543  1.936
## theta1.4004  0.814 0.615    0.011          0.020 -0.369  0.409  0.789  1.200
## theta1.4209 -0.058 0.645    0.012          0.030 -1.278 -0.504 -0.055  0.387
## theta1.4268  0.259 0.514    0.009          0.013 -0.803 -0.074  0.271  0.591
## theta1.4841 -0.730 0.564    0.010          0.017 -1.877 -1.083 -0.718 -0.359
## theta1.5074  0.508 0.329    0.006          0.009  0.030  0.252  0.466  0.714
## theta1.5495 -1.736 0.481    0.009          0.014 -2.773 -2.031 -1.701 -1.394
## theta1.5710 -1.189 0.615    0.011          0.016 -2.382 -1.597 -1.196 -0.809
## theta1.8294  0.339 0.471    0.009          0.009 -0.576  0.019  0.340  0.659
## theta1.8980  1.050 0.535    0.010          0.011 -0.063  0.724  1.067  1.394
## theta1.9899  0.791 0.552    0.010          0.012 -0.344  0.438  0.804  1.162
## theta2.1121 -0.040 1.088    0.020          0.054 -1.974 -0.912 -0.067  0.833
## theta2.1154 -0.662 0.816    0.015          0.029 -2.206 -1.238 -0.660 -0.138
## theta2.1222  0.200 0.851    0.016          0.037 -1.470 -0.408  0.243  0.807
## theta2.1637 -0.009 0.684    0.012          0.018 -1.389 -0.458 -0.003  0.449
## theta2.1995 -0.417 0.876    0.016          0.028 -2.064 -1.034 -0.425  0.194
## theta2.3369  0.989 0.883    0.016          0.021 -0.770  0.398  0.976  1.591
## theta2.4004  0.057 0.879    0.016          0.024 -1.697 -0.521  0.076  0.629
## theta2.4209 -0.039 0.946    0.017          0.043 -1.810 -0.741 -0.030  0.659
## theta2.4268  0.177 0.702    0.013          0.020 -1.229 -0.276  0.189  0.655
## theta2.4841 -0.320 0.754    0.014          0.023 -1.794 -0.833 -0.314  0.183
## theta2.5074 -0.474 0.406    0.007          0.011 -1.514 -0.674 -0.369 -0.167
## theta2.5495  0.377 0.352    0.006          0.007  0.012  0.119  0.277  0.512
## theta2.5710 -0.324 0.848    0.015          0.025 -2.000 -0.888 -0.310  0.248
## theta2.8294  0.103 0.668    0.012          0.013 -1.158 -0.355  0.087  0.547
## theta2.8980  0.708 0.716    0.013          0.015 -0.765  0.240  0.725  1.196
## theta2.9899  0.628 0.787    0.014          0.016 -0.943  0.086  0.642  1.164
## gamma.25     0.533 0.346    0.006          0.017  0.028  0.224  0.501  0.818
## gamma.26     0.490 0.267    0.005          0.013  0.049  0.275  0.478  0.689
## gamma.27     0.608 0.262    0.005          0.013  0.115  0.413  0.605  0.804
## gamma.29     0.516 0.275    0.005          0.012  0.046  0.308  0.497  0.710
## gamma.34     0.666 0.279    0.005          0.010  0.139  0.463  0.671  0.864
##              97.5% Model
## theta1.1121  1.322    2d
## theta1.1154  0.188    2d
## theta1.1222  1.559    2d
## theta1.1637  1.016    2d
## theta1.1995  1.077    2d
## theta1.3369  2.698    2d
## theta1.4004  2.057    2d
## theta1.4209  1.154    2d
## theta1.4268  1.262    2d
## theta1.4841  0.368    2d
## theta1.5074  1.252    2d
## theta1.5495 -0.879    2d
## theta1.5710  0.066    2d
## theta1.8294  1.262    2d
## theta1.8980  2.057    2d
## theta1.9899  1.833    2d
## theta2.1121  1.870    2d
## theta2.1154  0.992    2d
## theta2.1222  1.770    2d
## theta2.1637  1.315    2d
## theta2.1995  1.255    2d
## theta2.3369  2.700    2d
## theta2.4004  1.727    2d
## theta2.4209  1.677    2d
## theta2.4268  1.519    2d
## theta2.4841  1.158    2d
## theta2.5074 -0.013    2d
## theta2.5495  1.321    2d
## theta2.5710  1.314    2d
## theta2.8294  1.437    2d
## theta2.8980  2.036    2d
## theta2.9899  2.191    2d
## gamma.25     1.203    2d
## gamma.26     1.014    2d
## gamma.27     1.088    2d
## gamma.29     1.069    2d
## gamma.34     1.205    2d

Next simulate data sets for the 2d with clusters

simulate_pp_data_mcmclist2d <- function(mcmc_list, pair_data) {
  # Combine chains into one matrix
  all_samples <- do.call(rbind, mcmc_list)
  n_obs <- nrow(pair_data)
  n_rep = 200
  sim_choices <- matrix(NA, nrow = n_rep, ncol = n_obs)
  
  # Identify column names for theta1, theta2, gamma
  theta1_names <- grep("^theta1\\.", colnames(all_samples), value = TRUE)
  theta2_names <- grep("^theta2\\.", colnames(all_samples), value = TRUE)
  gamma_names  <- grep("^gamma\\.", colnames(all_samples), value = TRUE)

  for (r in 1:n_rep) {                     # Row by row iteration
    s <- sample(1:nrow(all_samples), 1)    # Draw from any chain
    theta1 <- all_samples[s, theta1_names] # Draw one value for theta1
    theta2 <- all_samples[s, theta2_names] # Draw one value for theta2
    gamma  <- all_samples[s, gamma_names]  # Draw one value for gamma

    for (i in 1:n_obs) {                        # Column by column iteration
      j1 <- as.character(pair_data$itemj1[i])   # Locate the first item number for that row from the observed
      j2 <- as.character(pair_data$itemj2[i])   # Locate the second item number for that row from the observed
      rater <- as.character(pair_data$rater[i]) # Locate the rater for that row from the observed
      # Find the model-implied diff between sampled theta1 of 2 items
      diff1 <- theta1[paste0("theta1.", j1)] - theta1[paste0("theta1.", j2)] 
      # Find the model-implied diff between sampled theta2 of 2 items
      diff2 <- theta2[paste0("theta2.", j1)] - theta2[paste0("theta2.", j2)] 
      g <- gamma[paste0("gamma.", rater)]       # Find the model-implied sampled gamma of the rater
      # eta will be the input to the probit link function
      # cos() & sin() assume angles are in radians as they are
      zi1 <- cos(g) 
      zi2 <- sin(g)
      # Add thetas in proportion to sine or cosine of gamma, representing how much more favorable j1 is than j2
      eta <- zi1 * diff1 + zi2 * diff2 
      # If eta is negative, then j2 is preferrable. If close to 0, then it's a toss-up.
      p <- pnorm(eta) # transform eta using the probit link to the probability j1 is chosen over j2.
      # If p is <.5 preference for j2, p = .5 no preference, p > .5 preference for j1.
      # Generate 1 draw of 0 or 1 based on p, and add it to the sim_choices matrix
      sim_choices[r, i] <- rbinom(n = 1, size = 1, p) 
    }
  }
  # return a set of binary choices where 1 is a choice of j1 and 0 is a choice of j2
  return(sim_choices) 
}

sim_2d_select <- simulate_pp_data_mcmclist2d(mcmc_list2d_select, pairDataCDAT_select)

Convert observed item ID choices to 1/0 for comparison to simulated data. If itemj1 is chosen, then 1, if j2 is chosen, then 0.

#If choice is first item, 1, if choice is second item, 0
observed_binary_select <- ifelse(pairDataCDAT_select$choice == pairDataCDAT_select$itemj1, 1,
                          ifelse(pairDataCDAT_select$choice == pairDataCDAT_select$itemj2, 0, NA))

Model checking with ppp and simulated correct proportion

2d PPP

compute_ppp_proportion <- function(observed_choices, sim_choices_matrix) {
  obs_prop <- mean(observed_choices)
  
  # The mean of the row is the proportion of same choice for that simulated data set
  sim_prop <- rowMeans(sim_choices_matrix) 
  sim_correct <- rowMeans(sim_choices_matrix == matrix(observed_choices,
                                                       nrow = nrow(sim_choices_matrix), 
                                                       ncol = length(observed_choices), 
                                                       byrow = TRUE))
  
  # Posterior predictive p-value
  ppp <- mean(sim_prop >= obs_prop)
  
  list(
    observed = obs_prop,
    simulated = sim_prop,
    simulated_correct = sim_correct,
    ppp = ppp
  )
}

ppp2d_select <- compute_ppp_proportion(observed_binary_select, sim_2d_select)
ppp2d_select

## $observed
## [1] 0.5091864
## 
## $simulated
##   [1] 0.5144357 0.4698163 0.4908136 0.4881890 0.4855643 0.4750656 0.5275591
##   [8] 0.5039370 0.4776903 0.5118110 0.5065617 0.4619423 0.5275591 0.4881890
##  [15] 0.5196850 0.4934383 0.5196850 0.5275591 0.4803150 0.5301837 0.4750656
##  [22] 0.5065617 0.4881890 0.5065617 0.4645669 0.4986877 0.5223097 0.4855643
##  [29] 0.5144357 0.4934383 0.4855643 0.4566929 0.4986877 0.4671916 0.5091864
##  [36] 0.5091864 0.4619423 0.4776903 0.4881890 0.4908136 0.5249344 0.4803150
##  [43] 0.4750656 0.5223097 0.5223097 0.4881890 0.4776903 0.4881890 0.5065617
##  [50] 0.4908136 0.4881890 0.4934383 0.4881890 0.4593176 0.4803150 0.4855643
##  [57] 0.5118110 0.5196850 0.5039370 0.4881890 0.4829396 0.4671916 0.4776903
##  [64] 0.4829396 0.4776903 0.5328084 0.4960630 0.5091864 0.4986877 0.4986877
##  [71] 0.4881890 0.5144357 0.5118110 0.4986877 0.5249344 0.5354331 0.5196850
##  [78] 0.4960630 0.5091864 0.4881890 0.4776903 0.4908136 0.4986877 0.5380577
##  [85] 0.5196850 0.4934383 0.5196850 0.5013123 0.5223097 0.5118110 0.4671916
##  [92] 0.5091864 0.5091864 0.4776903 0.4960630 0.5091864 0.5301837 0.5301837
##  [99] 0.4593176 0.4855643 0.5433071 0.5118110 0.4750656 0.5065617 0.5249344
## [106] 0.4724409 0.5249344 0.5170604 0.4881890 0.4829396 0.5013123 0.5170604
## [113] 0.4750656 0.4960630 0.4383202 0.5118110 0.5065617 0.4934383 0.4829396
## [120] 0.4671916 0.4776903 0.5406824 0.4566929 0.5354331 0.5039370 0.4855643
## [127] 0.5144357 0.5013123 0.5118110 0.4593176 0.4461942 0.5144357 0.5380577
## [134] 0.5459318 0.5039370 0.4671916 0.4960630 0.4986877 0.5144357 0.5118110
## [141] 0.4986877 0.5196850 0.5039370 0.4855643 0.5039370 0.5144357 0.4986877
## [148] 0.5065617 0.4855643 0.4986877 0.4645669 0.4934383 0.4855643 0.4986877
## [155] 0.5144357 0.4619423 0.5196850 0.4934383 0.4724409 0.4855643 0.5013123
## [162] 0.5354331 0.4435696 0.4724409 0.5118110 0.5223097 0.4619423 0.4960630
## [169] 0.4908136 0.4776903 0.4698163 0.4593176 0.4776903 0.5354331 0.5118110
## [176] 0.5065617 0.4750656 0.5223097 0.4803150 0.4829396 0.5118110 0.4829396
## [183] 0.4724409 0.4986877 0.4881890 0.4803150 0.5564304 0.5170604 0.4934383
## [190] 0.4671916 0.5223097 0.4986877 0.4724409 0.4908136 0.5196850 0.4934383
## [197] 0.5091864 0.5196850 0.5301837 0.5013123
## 
## $simulated_correct
##   [1] 0.6850394 0.6666667 0.6981627 0.6062992 0.6404199 0.6929134 0.6876640
##   [8] 0.6430446 0.6482940 0.6824147 0.7086614 0.6640420 0.6719160 0.6692913
##  [15] 0.6377953 0.6797900 0.6640420 0.6561680 0.7086614 0.6482940 0.6561680
##  [22] 0.6561680 0.6535433 0.6666667 0.6666667 0.6797900 0.6351706 0.6666667
##  [29] 0.6955381 0.6640420 0.6614173 0.6377953 0.6797900 0.6850394 0.6640420
##  [36] 0.6640420 0.7270341 0.6850394 0.6325459 0.6876640 0.6587927 0.6299213
##  [43] 0.6719160 0.6824147 0.6509186 0.6430446 0.6850394 0.6640420 0.6456693
##  [50] 0.7191601 0.6850394 0.7112861 0.6640420 0.6246719 0.6981627 0.6509186
##  [57] 0.6299213 0.7007874 0.7060367 0.6587927 0.7060367 0.6587927 0.6850394
##  [64] 0.6797900 0.6535433 0.6719160 0.6876640 0.6587927 0.6797900 0.6902887
##  [71] 0.6745407 0.6220472 0.6824147 0.6430446 0.6587927 0.6430446 0.6535433
##  [78] 0.6666667 0.7270341 0.6955381 0.6587927 0.6456693 0.6325459 0.7191601
##  [85] 0.7007874 0.6482940 0.6640420 0.6351706 0.6666667 0.6719160 0.6272966
##  [92] 0.6482940 0.6587927 0.6797900 0.6509186 0.6325459 0.6902887 0.6482940
##  [99] 0.6771654 0.6981627 0.6719160 0.6719160 0.6929134 0.6929134 0.6692913
## [106] 0.6850394 0.6850394 0.6509186 0.6272966 0.6692913 0.6929134 0.6509186
## [113] 0.6981627 0.6561680 0.6614173 0.7034121 0.6614173 0.6325459 0.7112861
## [120] 0.6535433 0.6640420 0.6587927 0.6692913 0.6745407 0.6587927 0.6876640
## [127] 0.6640420 0.6614173 0.6666667 0.6876640 0.6640420 0.6482940 0.6614173
## [134] 0.7112861 0.6220472 0.6482940 0.6456693 0.6850394 0.6692913 0.6719160
## [141] 0.6640420 0.7217848 0.6955381 0.6456693 0.6587927 0.6902887 0.6535433
## [148] 0.7034121 0.6509186 0.6745407 0.6824147 0.6797900 0.7086614 0.6797900
## [155] 0.6745407 0.6850394 0.6482940 0.6902887 0.6797900 0.6771654 0.7139108
## [162] 0.7007874 0.7139108 0.6902887 0.6771654 0.6876640 0.6482940 0.7034121
## [169] 0.6614173 0.6850394 0.6771654 0.6824147 0.6797900 0.6797900 0.7454068
## [176] 0.6876640 0.6614173 0.7034121 0.6456693 0.6377953 0.6876640 0.6745407
## [183] 0.6797900 0.6797900 0.6377953 0.6561680 0.6640420 0.6614173 0.6692913
## [190] 0.6167979 0.6719160 0.6692913 0.7165354 0.6194226 0.6430446 0.6640420
## [197] 0.6745407 0.6850394 0.6587927 0.7034121
## 
## $ppp
## [1] 0.345

summary(ppp2d_select$simulated_correct)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6063  0.6535  0.6693  0.6707  0.6850  0.7454

Run the function for 1d model ## 1d PPP

simulate_pp_data_mcmclist1d <- function(mcmc_list, pair_data) {
  # Combine chains into one matrix
  all_samples <- do.call(rbind, mcmc_list)
  n_obs <- nrow(pair_data)
  n_rep = 200
  sim_choices <- matrix(NA, nrow = n_rep, ncol = n_obs)
  
  # Identify column names for theta1, theta2, gamma
  theta_names <- grep("^theta\\.", colnames(all_samples), value = TRUE)
  alpha_names  <- grep("^alpha\\.", colnames(all_samples), value = TRUE)
  
  for (r in 1:n_rep) {
    s <- sample(1:nrow(all_samples), 1)  # Select one draw from any chain in combined samples
    theta <- all_samples[s, theta_names] # Draw one value for theta
    alpha  <- all_samples[s, alpha_names] # Draw one value for alpha

    for (i in 1:n_obs) {
      j1 <- as.character(pair_data$itemj1[i])
      j2 <- as.character(pair_data$itemj2[i])
      rater <- as.character(pair_data$rater[i])

      # Safely extract theta/gamma values using column names

      diff <- theta[paste0("theta.", j1)] - theta[paste0("theta.", j2)]
      a <- alpha[paste0("alpha.", rater)]

      eta <- a * diff # Add thetas
      p <- pnorm(eta) # probit link is a normal distribution
      sim_choices[r, i] <- rbinom(1, 1, p)
    }
  }
  return(sim_choices)
}

sim_1d_select <- simulate_pp_data_mcmclist1d(mcmc_list1d_select, pairDataCDAT_select)

Compute Posterior Predictive P-value (PPP) for 1d model

ppp1d_select <- compute_ppp_proportion(observed_binary_select, sim_1d_select)
ppp1d_select

## $observed
## [1] 0.5091864
## 
## $simulated
##   [1] 0.5118110 0.4934383 0.5380577 0.4461942 0.5275591 0.5170604 0.4645669
##   [8] 0.4881890 0.5065617 0.4803150 0.4960630 0.4960630 0.4803150 0.5170604
##  [15] 0.4960630 0.4881890 0.4776903 0.4986877 0.4829396 0.4986877 0.4698163
##  [22] 0.5039370 0.4750656 0.5065617 0.5354331 0.5091864 0.4619423 0.4881890
##  [29] 0.4855643 0.4829396 0.5144357 0.5223097 0.4593176 0.5301837 0.4908136
##  [36] 0.4934383 0.4986877 0.5013123 0.4803150 0.5039370 0.5065617 0.5695538
##  [43] 0.4540682 0.5091864 0.5144357 0.5039370 0.5170604 0.4750656 0.4855643
##  [50] 0.4960630 0.4803150 0.4908136 0.4724409 0.4829396 0.5091864 0.5013123
##  [57] 0.4566929 0.4908136 0.5459318 0.5223097 0.5223097 0.4986877 0.4724409
##  [64] 0.4671916 0.4908136 0.4881890 0.4383202 0.4750656 0.5013123 0.5144357
##  [71] 0.4803150 0.5013123 0.4750656 0.5065617 0.4934383 0.5091864 0.5170604
##  [78] 0.5091864 0.4934383 0.5065617 0.4908136 0.5039370 0.4934383 0.5039370
##  [85] 0.5170604 0.5013123 0.4435696 0.5170604 0.4750656 0.4829396 0.5223097
##  [92] 0.5091864 0.5328084 0.5013123 0.5485564 0.4803150 0.5039370 0.5013123
##  [99] 0.4724409 0.4619423 0.4776903 0.4724409 0.5039370 0.4960630 0.5459318
## [106] 0.4829396 0.5065617 0.4881890 0.5013123 0.4750656 0.5091864 0.4960630
## [113] 0.5013123 0.4776903 0.5039370 0.4540682 0.4724409 0.4934383 0.5091864
## [120] 0.4986877 0.4934383 0.5065617 0.5196850 0.4881890 0.4671916 0.4908136
## [127] 0.5223097 0.4671916 0.5118110 0.4855643 0.4960630 0.5144357 0.4645669
## [134] 0.5223097 0.4776903 0.5039370 0.4934383 0.4698163 0.4934383 0.4750656
## [141] 0.4671916 0.4724409 0.4934383 0.4908136 0.4829396 0.4934383 0.4960630
## [148] 0.5301837 0.4750656 0.4750656 0.5223097 0.5065617 0.5118110 0.4356955
## [155] 0.4881890 0.5406824 0.4986877 0.4698163 0.5118110 0.4776903 0.5013123
## [162] 0.5039370 0.4908136 0.4908136 0.5144357 0.4750656 0.4908136 0.5196850
## [169] 0.4803150 0.4803150 0.5249344 0.4881890 0.4829396 0.4934383 0.4881890
## [176] 0.5196850 0.5223097 0.5091864 0.4803150 0.4829396 0.4960630 0.5223097
## [183] 0.4724409 0.5118110 0.4986877 0.5196850 0.4671916 0.5013123 0.5170604
## [190] 0.4960630 0.4855643 0.5118110 0.4855643 0.4960630 0.5118110 0.5170604
## [197] 0.5170604 0.5065617 0.4671916 0.4908136
## 
## $simulated_correct
##   [1] 0.6719160 0.6797900 0.6771654 0.7217848 0.7034121 0.6719160 0.6666667
##   [8] 0.6115486 0.6194226 0.6351706 0.6299213 0.6561680 0.6404199 0.6981627
##  [15] 0.6666667 0.6745407 0.6482940 0.6692913 0.6692913 0.6797900 0.6719160
##  [22] 0.6640420 0.6771654 0.6771654 0.6482940 0.6797900 0.6377953 0.6692913
##  [29] 0.6351706 0.6850394 0.6640420 0.6299213 0.6404199 0.6482940 0.6719160
##  [36] 0.6850394 0.6377953 0.6036745 0.6666667 0.6902887 0.6771654 0.6719160
##  [43] 0.6771654 0.6640420 0.6587927 0.7007874 0.6089239 0.6404199 0.6561680
##  [50] 0.6719160 0.6719160 0.6404199 0.6167979 0.6325459 0.6692913 0.6666667
##  [57] 0.6482940 0.6404199 0.6692913 0.6246719 0.6824147 0.6115486 0.6745407
##  [64] 0.6850394 0.6719160 0.7217848 0.6456693 0.6456693 0.6929134 0.6587927
##  [71] 0.6929134 0.6876640 0.6456693 0.6561680 0.7007874 0.6797900 0.6509186
##  [78] 0.6745407 0.6797900 0.6561680 0.6089239 0.6640420 0.6430446 0.6482940
##  [85] 0.6719160 0.6719160 0.6404199 0.6456693 0.6614173 0.6745407 0.6509186
##  [92] 0.6587927 0.6614173 0.6614173 0.6929134 0.6509186 0.6640420 0.6614173
##  [99] 0.6745407 0.6430446 0.6797900 0.7007874 0.6640420 0.6771654 0.6272966
## [106] 0.6430446 0.6771654 0.6745407 0.6509186 0.6929134 0.6430446 0.6561680
## [113] 0.6719160 0.6902887 0.6955381 0.6876640 0.6587927 0.6587927 0.6797900
## [120] 0.6640420 0.6377953 0.6509186 0.6955381 0.6797900 0.6167979 0.6929134
## [127] 0.6089239 0.6692913 0.6509186 0.6561680 0.6929134 0.6482940 0.6666667
## [134] 0.6404199 0.6430446 0.6745407 0.6377953 0.6771654 0.6482940 0.6404199
## [141] 0.6797900 0.7007874 0.6430446 0.6771654 0.6850394 0.6797900 0.6404199
## [148] 0.6640420 0.6509186 0.7139108 0.7034121 0.6299213 0.6404199 0.6377953
## [155] 0.6850394 0.6902887 0.6902887 0.6246719 0.6561680 0.6587927 0.6509186
## [162] 0.6797900 0.6876640 0.6876640 0.6220472 0.6614173 0.6719160 0.6797900
## [169] 0.6089239 0.6771654 0.6902887 0.6010499 0.7112861 0.6587927 0.6535433
## [176] 0.6535433 0.6981627 0.6272966 0.6614173 0.6062992 0.6299213 0.6771654
## [183] 0.6482940 0.6509186 0.6377953 0.6745407 0.6062992 0.6456693 0.6351706
## [190] 0.7086614 0.6456693 0.6719160 0.6666667 0.6771654 0.6246719 0.6456693
## [197] 0.7086614 0.6824147 0.6062992 0.6614173
## 
## $ppp
## [1] 0.275

summary(ppp1d_select$simulated_correct)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6010  0.6450  0.6640  0.6616  0.6778  0.7218

Interpretation: The one-dimensional model matches the predictiveness of the 2-dimensional model when using only the five middle-gamma raters comparisons. # DIC

computeDbar2d <- function(mcmc_matrix, pair_data, theta1_ids, theta2_ids, gamma_ids) {
  # Extract item and judge IDs
  itemj1 <- as.character(pair_data$itemj1)
  itemj2 <- as.character(pair_data$itemj2)
  choice <- pair_data$choice
  rater  <- as.character(pair_data$rater)
  
  S <- nrow(mcmc_matrix)
  N <- nrow(pair_data)
  deviance_vec <- numeric(S)
  
  for (s in 1:S) {
    # Extract current parameter values
    theta1 <- setNames(mcmc_matrix[s, theta1_ids], sub("theta1\\.", "", theta1_ids))
    theta2 <- setNames(mcmc_matrix[s, theta2_ids], sub("theta2\\.", "", theta2_ids))
    gamma  <- setNames(mcmc_matrix[s, gamma_ids],  sub("gamma\\.", "", gamma_ids))
    
    eta <- numeric(N)
    for (n in 1:N) {
      j1 <- itemj1[n]
      j2 <- itemj2[n]
      r  <- rater[n]
      
      g <- gamma[r]
      if (is.na(g)) g <- mean(gamma, na.rm = TRUE)
      
      diff1 <- theta1[j2] - theta1[j1]
      diff2 <- theta2[j2] - theta2[j1]
      eta[n] <- (1 - g) * diff1 + g * diff2
    }
    
    p <- pnorm(eta)
    p <- pmin(pmax(p, 1e-10), 1 - 1e-10)  # avoid log(0)
    
    is_j2_chosen <- choice == pair_data$itemj2
    loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE) # Compute log-probability of the observed binary outcome
    deviance_vec[s] <- -2 * sum(loglik)
  }
  
  mean(deviance_vec)
}

computeDevianceAtPosteriorMean2d <- function(mcmc_matrix, pair_data, theta1_ids, theta2_ids, gamma_ids) {
  # Compute posterior means
  theta1_mean <- setNames(colMeans(mcmc_matrix[, theta1_ids]), sub("theta1\\.", "", theta1_ids))
  theta2_mean <- setNames(colMeans(mcmc_matrix[, theta2_ids]), sub("theta2\\.", "", theta2_ids))
  gamma_mean  <- setNames(colMeans(mcmc_matrix[, gamma_ids]),  sub("gamma\\.",  "", gamma_ids))
  
  itemj1 <- as.character(pair_data$itemj1)
  itemj2 <- as.character(pair_data$itemj2)
  choice <- pair_data$choice
  rater  <- as.character(pair_data$rater)
  
  N <- nrow(pair_data)
  eta <- numeric(N)
  
  for (n in 1:N) {
    j1 <- itemj1[n]
    j2 <- itemj2[n]
    r  <- rater[n]
    
    g <- gamma_mean[r]
    if (is.na(g)) g <- mean(gamma_mean, na.rm = TRUE)
    
    diff1 <- theta1_mean[j2] - theta1_mean[j1]
    diff2 <- theta2_mean[j2] - theta2_mean[j1]
    eta[n] <- (1 - g) * diff1 + g * diff2
  }
  
  p <- pnorm(eta)
  p <- pmin(pmax(p, 1e-10), 1 - 1e-10)
  
  is_j2_chosen <- choice == pair_data$itemj2
  loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
  
  D_at_mean <- -2 * sum(loglik)
  return(D_at_mean)
}

2d model

mcmc_matrix_select <- do.call(rbind, mcmc_list2d_select)  # combine all chains
theta1_ids <- grep("^theta1\\.", colnames(mcmc_matrix_select), value = TRUE)
theta2_ids <- grep("^theta2\\.", colnames(mcmc_matrix_select), value = TRUE)
gamma_ids  <- grep("^gamma\\.",  colnames(mcmc_matrix_select), value = TRUE)

D_bar2d_select <- computeDbar2d(mcmc_matrix_select, pairDataCDAT_select, theta1_ids, theta2_ids, gamma_ids)

D_mean2d_select <- computeDevianceAtPosteriorMean2d(mcmc_matrix_select, pairDataCDAT_select, theta1_ids, theta2_ids, gamma_ids)

p_D2d_select <- D_bar2d_select - D_mean2d_select
DIC2d_select <- D_bar2d_select + p_D2d_select
print(DIC2d_select)

## [1] 477.1262

1d DIC

computeDbar1d <- function(mcmc_matrix, pair_data, theta_ids, alpha_ids) {
  # Extract item and judge IDs
  itemj1 <- as.character(pair_data$itemj1)
  itemj2 <- as.character(pair_data$itemj2)
  choice <- pair_data$choice
  rater  <- as.character(pair_data$rater)
  
  S <- nrow(mcmc_matrix)
  N <- nrow(pair_data)
  deviance_vec <- numeric(S)
  
  for (s in 1:S) {
    # Extract current parameter values
    theta <- setNames(mcmc_matrix[s, theta_ids], sub("theta\\.", "", theta_ids))
    alpha <- setNames(mcmc_matrix[s, alpha_ids], sub("alpha\\.", "", alpha_ids))
    
    eta <- numeric(N)
    for (n in 1:N) {
      j1 <- itemj1[n]
      j2 <- itemj2[n]
      r  <- rater[n]
      
      a <- alpha[r]
      if (is.na(a)) a <- mean(alpha, na.rm = TRUE)
      
      diff <- theta[j2] - theta[j1]
      eta[n] <- a * diff
    }
    
    p <- pnorm(eta)
    p <- pmin(pmax(p, 1e-10), 1 - 1e-10)  # avoid log(0)
    
    is_j2_chosen <- choice == pair_data$itemj2
    loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
    deviance_vec[s] <- -2 * sum(loglik)
  }
  
  mean(deviance_vec)
}

computeDevianceAtPosteriorMean1d <- function(mcmc_matrix, pair_data, theta_ids, alpha_ids) {
  # Compute posterior means
  theta_mean <- setNames(colMeans(mcmc_matrix[, theta_ids]), sub("theta\\.", "", theta_ids))
  alpha_mean  <- setNames(colMeans(mcmc_matrix[, alpha_ids]),  sub("alpha\\.",  "", alpha_ids))
  
  itemj1 <- as.character(pair_data$itemj1)
  itemj2 <- as.character(pair_data$itemj2)
  choice <- pair_data$choice
  rater  <- as.character(pair_data$rater)
  
  N <- nrow(pair_data)
  eta <- numeric(N)
  
  for (n in 1:N) {
    j1 <- itemj1[n]
    j2 <- itemj2[n]
    r  <- rater[n]
    
    a <- alpha_mean[r]
    if (is.na(a)) a <- mean(alpha_mean, na.rm = TRUE)
    
    diff <- theta_mean[j2] - theta_mean[j1]
    eta[n] <- a * diff
  }
  
  p <- pnorm(eta)
  p <- pmin(pmax(p, 1e-10), 1 - 1e-10)
  
  is_j2_chosen <- choice == pair_data$itemj2
  loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
  
  D_at_mean <- -2 * sum(loglik)
  return(D_at_mean)
}

mcmc_matrix_select <- do.call(rbind, mcmc_list1d_select)  # combine all chains
theta_ids <- grep("^theta\\.", colnames(mcmc_matrix_select), value = TRUE)
alpha_ids  <- grep("^alpha\\.",  colnames(mcmc_matrix_select), value = TRUE)

D_bar1d_select <- computeDbar1d(mcmc_matrix_select, pairDataCDAT_select, theta_ids, alpha_ids)
D_mean1d_select <- computeDevianceAtPosteriorMean1d(mcmc_matrix_select, pairDataCDAT_select, theta_ids, alpha_ids)

p_D1d_select <- D_bar1d_select - D_mean1d_select
DIC1d_select <- D_bar1d_select + p_D1d_select
print(DIC1d_select)

## [1] 405.6239

Interpretation: No question that the one-dimensional model fits better than the two0dimensional model when the raters all share the same perceptual orientation. Also the 1D model is much improved in predictiveness when just the five best raters are included. This shows that having raters with differing perceptual orientations is a problem when considering only one dimension, but disappears when modeling both dimensions. Could there be more dimensions that would yet increase predictiveness? Also check to see that the posterior distributions of theta more precise? Check out the interquartile range of the 1d_select and the 1_d to compare.

load("Out/summary1d.Rdata")
summary1d_thetas <- summary1d[1:16,]
summary1d_thetas

##              Mean    SD Naive SE Time-series SE   2.5%    25%    50%    75%
## theta.1121 -0.177 0.277    0.003          0.008 -0.730 -0.359 -0.173  0.010
## theta.1154 -0.947 0.343    0.004          0.009 -1.650 -1.172 -0.931 -0.711
## theta.1222  0.208 0.278    0.003          0.008 -0.342  0.022  0.204  0.398
## theta.1637 -0.029 0.270    0.003          0.008 -0.571 -0.209 -0.028  0.153
## theta.1995 -0.401 0.285    0.003          0.008 -0.971 -0.591 -0.400 -0.206
## theta.3369  1.719 0.480    0.006          0.011  0.845  1.379  1.696  2.028
## theta.4004  0.348 0.293    0.003          0.008 -0.224  0.145  0.345  0.543
## theta.4209 -0.211 0.279    0.003          0.008 -0.767 -0.390 -0.206 -0.022
## theta.4268  0.204 0.280    0.003          0.008 -0.347  0.015  0.205  0.391
## theta.4841 -0.485 0.288    0.003          0.008 -1.066 -0.668 -0.479 -0.287
## theta.5074 -0.064 0.273    0.003          0.008 -0.607 -0.244 -0.064  0.120
## theta.5495 -0.721 0.321    0.004          0.008 -1.387 -0.931 -0.713 -0.499
## theta.5710 -0.958 0.342    0.004          0.009 -1.664 -1.178 -0.945 -0.723
## theta.8294  0.290 0.279    0.003          0.008 -0.266  0.101  0.290  0.475
## theta.8980  0.803 0.333    0.004          0.009  0.167  0.572  0.793  1.021
## theta.9899  0.405 0.296    0.003          0.008 -0.177  0.203  0.402  0.609
##             97.5% Model
## theta.1121  0.356    1d
## theta.1154 -0.310    1d
## theta.1222  0.763    1d
## theta.1637  0.496    1d
## theta.1995  0.144    1d
## theta.3369  2.742    1d
## theta.4004  0.935    1d
## theta.4209  0.328    1d
## theta.4268  0.750    1d
## theta.4841  0.066    1d
## theta.5074  0.465    1d
## theta.5495 -0.103    1d
## theta.5710 -0.317    1d
## theta.8294  0.843    1d
## theta.8980  1.484    1d
## theta.9899  0.985    1d

summary1d_select_thetas <- summary1d_select[1:16,]
summary1d_select_thetas$Model <- "1d Select"
summary1d_select_thetas

##              Mean    SD Naive SE Time-series SE   2.5%    25%    50%    75%
## theta.1121 -0.276 0.316    0.004          0.006 -0.900 -0.487 -0.276 -0.061
## theta.1154 -1.272 0.406    0.005          0.007 -2.099 -1.535 -1.263 -0.988
## theta.1222  0.429 0.318    0.004          0.005 -0.175  0.206  0.421  0.639
## theta.1637  0.107 0.302    0.003          0.005 -0.479 -0.093  0.105  0.304
## theta.1995 -0.438 0.322    0.004          0.006 -1.073 -0.656 -0.436 -0.217
## theta.3369  1.709 0.480    0.006          0.008  0.840  1.371  1.682  2.019
## theta.4004  0.618 0.342    0.004          0.006 -0.036  0.386  0.610  0.843
## theta.4209 -0.247 0.317    0.004          0.005 -0.875 -0.459 -0.246 -0.029
## theta.4268  0.278 0.310    0.004          0.005 -0.328  0.072  0.276  0.481
## theta.4841 -0.676 0.339    0.004          0.006 -1.362 -0.906 -0.666 -0.443
## theta.5074  0.137 0.298    0.003          0.005 -0.457 -0.066  0.139  0.338
## theta.5495 -1.215 0.397    0.005          0.007 -2.030 -1.475 -1.200 -0.942
## theta.5710 -1.251 0.409    0.005          0.007 -2.088 -1.518 -1.230 -0.969
## theta.8294  0.310 0.309    0.004          0.006 -0.279  0.100  0.305  0.513
## theta.8980  1.088 0.381    0.004          0.006  0.374  0.831  1.071  1.333
## theta.9899  0.690 0.343    0.004          0.006  0.037  0.456  0.683  0.911
##             97.5%     Model
## theta.1121  0.334 1d Select
## theta.1154 -0.514 1d Select
## theta.1222  1.076 1d Select
## theta.1637  0.708 1d Select
## theta.1995  0.172 1d Select
## theta.3369  2.720 1d Select
## theta.4004  1.317 1d Select
## theta.4209  0.372 1d Select
## theta.4268  0.903 1d Select
## theta.4841 -0.037 1d Select
## theta.5074  0.708 1d Select
## theta.5495 -0.475 1d Select
## theta.5710 -0.501 1d Select
## theta.8294  0.922 1d Select
## theta.8980  1.882 1d Select
## theta.9899  1.399 1d Select

library(ggplot2)

# Assuming these are matched and in the same order
df_compare <- data.frame(
  item  = rownames(summary1d_select_thetas),
  median1 = summary1d_thetas$`50%`[1:nrow(summary1d_select_thetas)],
  median2 = summary1d_select_thetas$`50%`,
  CI1 = summary1d_thetas$`97.5%` - summary1d_thetas$`2.5%`,
  CI2 = summary1d_select_thetas$`97.5%` - summary1d_select_thetas$`2.5%`
)

cor(df_compare$median1, df_compare$median2)

## [1] 0.9770371

cor(df_compare$CI1, df_compare$CI2)

## [1] 0.9406444

df_compare$diff <- df_compare$median1 - df_compare$median2
df_compare$avg <- (df_compare$median1 + df_compare$median2) / 2

ggplot(df_compare, aes(x = avg, y = diff)) +
  geom_point() +
  geom_hline(yintercept = mean(df_compare$diff), linetype = "dashed") +
  geom_hline(yintercept = mean(df_compare$diff) + 1.96 * sd(df_compare$diff), color = "red") +
  geom_hline(yintercept = mean(df_compare$diff) - 1.96 * sd(df_compare$diff), color = "red") +
  labs(x = "Average Theta", y = "Difference in Theta", title = "Bland-Altman Plot")

t.test(df_compare$median1, df_compare$median2, paired = TRUE)

## 
##  Paired t-test
## 
## data:  df_compare$median1 and df_compare$median2
## t = 0.022523, df = 15, p-value = 0.9823
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.1228939  0.1255189
## sample estimates:
## mean difference 
##       0.0013125

The distribution of posterior medians are not significantly different.

t.test(df_compare$CI1, df_compare$CI2, paired = TRUE)

## 
##  Paired t-test
## 
## data:  df_compare$CI1 and df_compare$CI2
## t = -8.9684, df = 15, p-value = 2.052e-07
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.1958599 -0.1206401
## sample estimates:
## mean difference 
##        -0.15825

There is a significant difference in the credible intervals of the two models.

ggplot(df_compare, aes(x = median1, y = median2)) +
  geom_point() +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "blue") +
  labs(x = "Full Model Theta", y = "Select Raters Theta", title = "Theta Comparison")

ggplot(df_compare, aes(x = CI1, y = CI2)) +
  geom_point() +
  labs(x = "Full Model Theta CI", y = "Select Raters Theta CI", title = "Theta Comparison")

ci_reg <- lm(CI2 ~ CI1, data = df_compare)
summary(ci_reg)

## 
## Call:
## lm(formula = CI2 ~ CI1, data = df_compare)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.10873 -0.04128 -0.01999  0.03846  0.11979 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.27580    0.10693   2.579   0.0218 *  
## CI1          0.90297    0.08707  10.370 5.94e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07002 on 14 degrees of freedom
## Multiple R-squared:  0.8848, Adjusted R-squared:  0.8766 
## F-statistic: 107.5 on 1 and 14 DF,  p-value: 5.944e-08

CIs are significantly but slightly smaller for the selected judges in the center of the range of gamma distributions.

Selected Raters Models

Steve Heil

2025-06-29