library(MCMCpack)
library(bayesplot)
library(coda)
library(ggplot2)
library(tidyverse)
Load the prepared data set
load("Data/pairDataCDAT.1.Rda")
Filter to include only the middle cluster of raters and set up parameters for model function
best_raters <- c(25,26,27,29,34)
n_chains <- 3
pairDataCDAT_select <- pairDataCDAT[pairDataCDAT$rater %in% best_raters, ]
Re-shape the gamma start values for 2d model with only 5 raters
I_select <- length(best_raters)
gamma_start_select <- runif(I_select, pi/4-.4, pi/4+.4)
seeds <- c(0202, 8018, 1304) # Set seeds for each chain
chains1d_select <- vector("list", n_chains)
for (i in 1:n_chains) {
chains1d_select[[i]] <- MCMCpaircompare(
pairDataCDAT_select,
verbose=10000, # Prints status updates every __ iterations.
burnin=500, # Discards the first __ samples to stabilize.
mcmc=50000, # Runs __ iterations for estimation.
thin=20, # Keeps every __th sample to reduce autocorrelation.
seed=seeds[i], # Ensures reproducibility of results.
alpha.fixed=FALSE,
alpha.start=1,
a=0, # The default prio mean of alpha is 0
A=.25, # The default prior precision is 0.25 (variance of 4)
store.theta=TRUE, # Saves item-specfic draws.
store.alpha=TRUE, # Saves both rater-specific draws.
tune=.3, # Sets the tuning parameter for the Metropolis-Hastings step.
procrustes=FALSE # Disables Procrustes alignment
)
}
##
##
## MCMCpaircompare iteration 1 of 50500
##
##
## MCMCpaircompare iteration 10001 of 50500
##
##
## MCMCpaircompare iteration 20001 of 50500
##
##
## MCMCpaircompare iteration 30001 of 50500
##
##
## MCMCpaircompare iteration 40001 of 50500
##
##
## MCMCpaircompare iteration 50001 of 50500
##
##
## MCMCpaircompare iteration 1 of 50500
##
##
## MCMCpaircompare iteration 10001 of 50500
##
##
## MCMCpaircompare iteration 20001 of 50500
##
##
## MCMCpaircompare iteration 30001 of 50500
##
##
## MCMCpaircompare iteration 40001 of 50500
##
##
## MCMCpaircompare iteration 50001 of 50500
##
##
## MCMCpaircompare iteration 1 of 50500
##
##
## MCMCpaircompare iteration 10001 of 50500
##
##
## MCMCpaircompare iteration 20001 of 50500
##
##
## MCMCpaircompare iteration 30001 of 50500
##
##
## MCMCpaircompare iteration 40001 of 50500
##
##
## MCMCpaircompare iteration 50001 of 50500
# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list1d_select <- mcmc.list(lapply(chains1d_select, mcmc))
# Save object
save(mcmc_list1d_select, file = "Out/mcmc_list1d.Rdata")
# Create a list of parameters for each type of parameter
thetas <- grep("^theta", colnames(mcmc_list1d_select[[1]]), value = TRUE)
alphas <- grep("^alpha", colnames(mcmc_list1d_select[[1]]), value = TRUE)
gelman_diag_1d_select <- gelman.diag(mcmc_list1d_select)
max_rhat_1d_select <- max(gelman_diag_1d_select$psrf[, "Point est."])
gelman_diag_1d_select
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## theta.1121 1 1.00
## theta.1154 1 1.00
## theta.1222 1 1.00
## theta.1637 1 1.00
## theta.1995 1 1.00
## theta.3369 1 1.00
## theta.4004 1 1.00
## theta.4209 1 1.00
## theta.4268 1 1.00
## theta.4841 1 1.00
## theta.5074 1 1.00
## theta.5495 1 1.00
## theta.5710 1 1.00
## theta.8294 1 1.00
## theta.8980 1 1.00
## theta.9899 1 1.00
## alpha.25 1 1.01
## alpha.26 1 1.00
## alpha.27 1 1.00
## alpha.29 1 1.00
## alpha.34 1 1.01
##
## Multivariate psrf
##
## 1
summary1d_select <- round(
cbind(
as.data.frame(summary(mcmc_list1d_select)$statistics),
as.data.frame(summary(mcmc_list1d_select)$quantiles)
), 3)
summary1d_select$Model <- "1d"
save(summary1d_select, file = "Out/summary1d_select.Rdata")
summary1d_select
## Mean SD Naive SE Time-series SE 2.5% 25% 50% 75%
## theta.1121 -0.276 0.316 0.004 0.006 -0.900 -0.487 -0.276 -0.061
## theta.1154 -1.272 0.406 0.005 0.007 -2.099 -1.535 -1.263 -0.988
## theta.1222 0.429 0.318 0.004 0.005 -0.175 0.206 0.421 0.639
## theta.1637 0.107 0.302 0.003 0.005 -0.479 -0.093 0.105 0.304
## theta.1995 -0.438 0.322 0.004 0.006 -1.073 -0.656 -0.436 -0.217
## theta.3369 1.709 0.480 0.006 0.008 0.840 1.371 1.682 2.019
## theta.4004 0.618 0.342 0.004 0.006 -0.036 0.386 0.610 0.843
## theta.4209 -0.247 0.317 0.004 0.005 -0.875 -0.459 -0.246 -0.029
## theta.4268 0.278 0.310 0.004 0.005 -0.328 0.072 0.276 0.481
## theta.4841 -0.676 0.339 0.004 0.006 -1.362 -0.906 -0.666 -0.443
## theta.5074 0.137 0.298 0.003 0.005 -0.457 -0.066 0.139 0.338
## theta.5495 -1.215 0.397 0.005 0.007 -2.030 -1.475 -1.200 -0.942
## theta.5710 -1.251 0.409 0.005 0.007 -2.088 -1.518 -1.230 -0.969
## theta.8294 0.310 0.309 0.004 0.006 -0.279 0.100 0.305 0.513
## theta.8980 1.088 0.381 0.004 0.006 0.374 0.831 1.071 1.333
## theta.9899 0.690 0.343 0.004 0.006 0.037 0.456 0.683 0.911
## alpha.25 0.784 0.276 0.003 0.004 0.338 0.590 0.753 0.941
## alpha.26 1.576 0.470 0.005 0.007 0.824 1.238 1.525 1.848
## alpha.27 1.331 0.407 0.005 0.006 0.687 1.043 1.273 1.560
## alpha.29 0.876 0.301 0.003 0.004 0.399 0.665 0.841 1.049
## alpha.34 0.784 0.266 0.003 0.004 0.351 0.595 0.754 0.939
## 97.5% Model
## theta.1121 0.334 1d
## theta.1154 -0.514 1d
## theta.1222 1.076 1d
## theta.1637 0.708 1d
## theta.1995 0.172 1d
## theta.3369 2.720 1d
## theta.4004 1.317 1d
## theta.4209 0.372 1d
## theta.4268 0.903 1d
## theta.4841 -0.037 1d
## theta.5074 0.708 1d
## theta.5495 -0.475 1d
## theta.5710 -0.501 1d
## theta.8294 0.922 1d
## theta.8980 1.882 1d
## theta.9899 1.399 1d
## alpha.25 1.426 1d
## alpha.26 2.672 1d
## alpha.27 2.295 1d
## alpha.29 1.566 1d
## alpha.34 1.408 1d
seeds <- c(1304, 2406, 7916) # Set seeds for each chain
chains2d_select <- vector("list", n_chains)
theta_start2d <- matrix(0, 16, 2) # 16 scripts, two dimensions
for (i in 1:n_chains) {
chains2d_select[[i]] <- MCMCpaircompare2d(
pairDataCDAT_select,
theta.constraints=list(
'5074' = list(1, "+"),
'5074' = list(2, "-"),
'5495' = list(1, "-"),
'5495' = list(2, "+")
),
verbose=10000, # Prints status updates every __ iterations.
burnin=200, # Discards the first __ samples to stabilize.
mcmc=20000, # Runs __ iterations for estimation.
thin=20, # Keeps every __th sample to reduce autocorrelation.
seed=seeds[i], # Ensures reproducibility of results.
gamma.start=gamma_start_select, # Uses the initialized respondent perception values.
theta.start=theta_start2d,
store.theta=TRUE, # Saves item-specfic draws.
store.gamma=TRUE, # Saves both rater-specific draws.
tune=.25, # Sets the tuning parameter for the Metropolis-Hastings step.
procrustes=FALSE # Disables Procrustes alignment
)
}
##
##
## MCMCpaircompare2d iteration 1 of 20200
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 10001 of 20200
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 20001 of 20200
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 1 of 20200
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 10001 of 20200
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 20001 of 20200
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 1 of 20200
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 10001 of 20200
##
## Summary of acceptance rates for gamma:
##
##
## MCMCpaircompare2d iteration 20001 of 20200
##
## Summary of acceptance rates for gamma:
# Apply the mcmc() funciton to each chain and combine into one mcmc_list2dDP
mcmc_list2d_select <- mcmc.list(lapply(chains2d_select, mcmc))
#Save object
save(mcmc_list2d_select, file = "Out/mcmc_list2d_select.Rdata")
# Create a list of parameters for each type of parameter
thetas <- grep("^theta", colnames(mcmc_list2d_select[[1]]), value = TRUE)
alpha <- grep("^alpha", colnames(mcmc_list2d_select[[1]]), value = TRUE)
# For 2D model
gelman_diag_2d_select <- gelman.diag(mcmc_list2d_select)
max_rhat_2d_select <- max(gelman_diag_2d_select$psrf[, "Point est."])
gelman_diag_2d_select
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## theta1.1121 1.009 1.032
## theta1.1154 1.001 1.004
## theta1.1222 1.006 1.022
## theta1.1637 1.009 1.032
## theta1.1995 1.011 1.041
## theta1.3369 1.006 1.010
## theta1.4004 1.004 1.012
## theta1.4209 1.009 1.032
## theta1.4268 1.004 1.013
## theta1.4841 1.006 1.021
## theta1.5074 1.002 1.007
## theta1.5495 1.005 1.020
## theta1.5710 0.999 0.999
## theta1.8294 1.001 1.005
## theta1.8980 1.001 1.004
## theta1.9899 1.000 1.001
## theta2.1121 1.008 1.031
## theta2.1154 1.003 1.012
## theta2.1222 1.003 1.014
## theta2.1637 1.002 1.006
## theta2.1995 1.010 1.037
## theta2.3369 1.002 1.007
## theta2.4004 1.003 1.010
## theta2.4209 1.008 1.031
## theta2.4268 1.002 1.008
## theta2.4841 1.000 1.003
## theta2.5074 1.001 1.006
## theta2.5495 1.006 1.014
## theta2.5710 1.000 1.002
## theta2.8294 1.003 1.013
## theta2.8980 1.002 1.003
## theta2.9899 0.999 0.999
## gamma.25 1.008 1.033
## gamma.26 1.015 1.051
## gamma.27 1.002 1.011
## gamma.29 1.010 1.035
## gamma.34 1.005 1.021
##
## Multivariate psrf
##
## 1.03
View a summary of the 2d model parameter posterior distributions
summary2d_select <- round(
cbind(
as.data.frame(summary(mcmc_list2d_select)$statistics),
as.data.frame(summary(mcmc_list2d_select)$quantiles)
), 3)
summary2d_select$Model <- "2d"
save(summary2d_select, file="Out/summary2d_select.Rdata")
summary2d_select
## Mean SD Naive SE Time-series SE 2.5% 25% 50% 75%
## theta1.1121 -0.133 0.762 0.014 0.040 -1.540 -0.700 -0.148 0.427
## theta1.1154 -1.038 0.585 0.011 0.018 -2.084 -1.447 -1.068 -0.659
## theta1.1222 0.383 0.616 0.011 0.023 -0.850 -0.019 0.410 0.805
## theta1.1637 0.097 0.479 0.009 0.012 -0.840 -0.220 0.105 0.416
## theta1.1995 -0.133 0.602 0.011 0.023 -1.271 -0.543 -0.169 0.264
## theta1.3369 1.532 0.613 0.011 0.013 0.308 1.133 1.543 1.936
## theta1.4004 0.814 0.615 0.011 0.020 -0.369 0.409 0.789 1.200
## theta1.4209 -0.058 0.645 0.012 0.030 -1.278 -0.504 -0.055 0.387
## theta1.4268 0.259 0.514 0.009 0.013 -0.803 -0.074 0.271 0.591
## theta1.4841 -0.730 0.564 0.010 0.017 -1.877 -1.083 -0.718 -0.359
## theta1.5074 0.508 0.329 0.006 0.009 0.030 0.252 0.466 0.714
## theta1.5495 -1.736 0.481 0.009 0.014 -2.773 -2.031 -1.701 -1.394
## theta1.5710 -1.189 0.615 0.011 0.016 -2.382 -1.597 -1.196 -0.809
## theta1.8294 0.339 0.471 0.009 0.009 -0.576 0.019 0.340 0.659
## theta1.8980 1.050 0.535 0.010 0.011 -0.063 0.724 1.067 1.394
## theta1.9899 0.791 0.552 0.010 0.012 -0.344 0.438 0.804 1.162
## theta2.1121 -0.040 1.088 0.020 0.054 -1.974 -0.912 -0.067 0.833
## theta2.1154 -0.662 0.816 0.015 0.029 -2.206 -1.238 -0.660 -0.138
## theta2.1222 0.200 0.851 0.016 0.037 -1.470 -0.408 0.243 0.807
## theta2.1637 -0.009 0.684 0.012 0.018 -1.389 -0.458 -0.003 0.449
## theta2.1995 -0.417 0.876 0.016 0.028 -2.064 -1.034 -0.425 0.194
## theta2.3369 0.989 0.883 0.016 0.021 -0.770 0.398 0.976 1.591
## theta2.4004 0.057 0.879 0.016 0.024 -1.697 -0.521 0.076 0.629
## theta2.4209 -0.039 0.946 0.017 0.043 -1.810 -0.741 -0.030 0.659
## theta2.4268 0.177 0.702 0.013 0.020 -1.229 -0.276 0.189 0.655
## theta2.4841 -0.320 0.754 0.014 0.023 -1.794 -0.833 -0.314 0.183
## theta2.5074 -0.474 0.406 0.007 0.011 -1.514 -0.674 -0.369 -0.167
## theta2.5495 0.377 0.352 0.006 0.007 0.012 0.119 0.277 0.512
## theta2.5710 -0.324 0.848 0.015 0.025 -2.000 -0.888 -0.310 0.248
## theta2.8294 0.103 0.668 0.012 0.013 -1.158 -0.355 0.087 0.547
## theta2.8980 0.708 0.716 0.013 0.015 -0.765 0.240 0.725 1.196
## theta2.9899 0.628 0.787 0.014 0.016 -0.943 0.086 0.642 1.164
## gamma.25 0.533 0.346 0.006 0.017 0.028 0.224 0.501 0.818
## gamma.26 0.490 0.267 0.005 0.013 0.049 0.275 0.478 0.689
## gamma.27 0.608 0.262 0.005 0.013 0.115 0.413 0.605 0.804
## gamma.29 0.516 0.275 0.005 0.012 0.046 0.308 0.497 0.710
## gamma.34 0.666 0.279 0.005 0.010 0.139 0.463 0.671 0.864
## 97.5% Model
## theta1.1121 1.322 2d
## theta1.1154 0.188 2d
## theta1.1222 1.559 2d
## theta1.1637 1.016 2d
## theta1.1995 1.077 2d
## theta1.3369 2.698 2d
## theta1.4004 2.057 2d
## theta1.4209 1.154 2d
## theta1.4268 1.262 2d
## theta1.4841 0.368 2d
## theta1.5074 1.252 2d
## theta1.5495 -0.879 2d
## theta1.5710 0.066 2d
## theta1.8294 1.262 2d
## theta1.8980 2.057 2d
## theta1.9899 1.833 2d
## theta2.1121 1.870 2d
## theta2.1154 0.992 2d
## theta2.1222 1.770 2d
## theta2.1637 1.315 2d
## theta2.1995 1.255 2d
## theta2.3369 2.700 2d
## theta2.4004 1.727 2d
## theta2.4209 1.677 2d
## theta2.4268 1.519 2d
## theta2.4841 1.158 2d
## theta2.5074 -0.013 2d
## theta2.5495 1.321 2d
## theta2.5710 1.314 2d
## theta2.8294 1.437 2d
## theta2.8980 2.036 2d
## theta2.9899 2.191 2d
## gamma.25 1.203 2d
## gamma.26 1.014 2d
## gamma.27 1.088 2d
## gamma.29 1.069 2d
## gamma.34 1.205 2d
Next simulate data sets for the 2d with clusters
simulate_pp_data_mcmclist2d <- function(mcmc_list, pair_data) {
# Combine chains into one matrix
all_samples <- do.call(rbind, mcmc_list)
n_obs <- nrow(pair_data)
n_rep = 200
sim_choices <- matrix(NA, nrow = n_rep, ncol = n_obs)
# Identify column names for theta1, theta2, gamma
theta1_names <- grep("^theta1\\.", colnames(all_samples), value = TRUE)
theta2_names <- grep("^theta2\\.", colnames(all_samples), value = TRUE)
gamma_names <- grep("^gamma\\.", colnames(all_samples), value = TRUE)
for (r in 1:n_rep) { # Row by row iteration
s <- sample(1:nrow(all_samples), 1) # Draw from any chain
theta1 <- all_samples[s, theta1_names] # Draw one value for theta1
theta2 <- all_samples[s, theta2_names] # Draw one value for theta2
gamma <- all_samples[s, gamma_names] # Draw one value for gamma
for (i in 1:n_obs) { # Column by column iteration
j1 <- as.character(pair_data$itemj1[i]) # Locate the first item number for that row from the observed
j2 <- as.character(pair_data$itemj2[i]) # Locate the second item number for that row from the observed
rater <- as.character(pair_data$rater[i]) # Locate the rater for that row from the observed
# Find the model-implied diff between sampled theta1 of 2 items
diff1 <- theta1[paste0("theta1.", j1)] - theta1[paste0("theta1.", j2)]
# Find the model-implied diff between sampled theta2 of 2 items
diff2 <- theta2[paste0("theta2.", j1)] - theta2[paste0("theta2.", j2)]
g <- gamma[paste0("gamma.", rater)] # Find the model-implied sampled gamma of the rater
# eta will be the input to the probit link function
# cos() & sin() assume angles are in radians as they are
zi1 <- cos(g)
zi2 <- sin(g)
# Add thetas in proportion to sine or cosine of gamma, representing how much more favorable j1 is than j2
eta <- zi1 * diff1 + zi2 * diff2
# If eta is negative, then j2 is preferrable. If close to 0, then it's a toss-up.
p <- pnorm(eta) # transform eta using the probit link to the probability j1 is chosen over j2.
# If p is <.5 preference for j2, p = .5 no preference, p > .5 preference for j1.
# Generate 1 draw of 0 or 1 based on p, and add it to the sim_choices matrix
sim_choices[r, i] <- rbinom(n = 1, size = 1, p)
}
}
# return a set of binary choices where 1 is a choice of j1 and 0 is a choice of j2
return(sim_choices)
}
sim_2d_select <- simulate_pp_data_mcmclist2d(mcmc_list2d_select, pairDataCDAT_select)
Convert observed item ID choices to 1/0 for comparison to simulated data. If itemj1 is chosen, then 1, if j2 is chosen, then 0.
#If choice is first item, 1, if choice is second item, 0
observed_binary_select <- ifelse(pairDataCDAT_select$choice == pairDataCDAT_select$itemj1, 1,
ifelse(pairDataCDAT_select$choice == pairDataCDAT_select$itemj2, 0, NA))
compute_ppp_proportion <- function(observed_choices, sim_choices_matrix) {
obs_prop <- mean(observed_choices)
# The mean of the row is the proportion of same choice for that simulated data set
sim_prop <- rowMeans(sim_choices_matrix)
sim_correct <- rowMeans(sim_choices_matrix == matrix(observed_choices,
nrow = nrow(sim_choices_matrix),
ncol = length(observed_choices),
byrow = TRUE))
# Posterior predictive p-value
ppp <- mean(sim_prop >= obs_prop)
list(
observed = obs_prop,
simulated = sim_prop,
simulated_correct = sim_correct,
ppp = ppp
)
}
ppp2d_select <- compute_ppp_proportion(observed_binary_select, sim_2d_select)
ppp2d_select
## $observed
## [1] 0.5091864
##
## $simulated
## [1] 0.5144357 0.4698163 0.4908136 0.4881890 0.4855643 0.4750656 0.5275591
## [8] 0.5039370 0.4776903 0.5118110 0.5065617 0.4619423 0.5275591 0.4881890
## [15] 0.5196850 0.4934383 0.5196850 0.5275591 0.4803150 0.5301837 0.4750656
## [22] 0.5065617 0.4881890 0.5065617 0.4645669 0.4986877 0.5223097 0.4855643
## [29] 0.5144357 0.4934383 0.4855643 0.4566929 0.4986877 0.4671916 0.5091864
## [36] 0.5091864 0.4619423 0.4776903 0.4881890 0.4908136 0.5249344 0.4803150
## [43] 0.4750656 0.5223097 0.5223097 0.4881890 0.4776903 0.4881890 0.5065617
## [50] 0.4908136 0.4881890 0.4934383 0.4881890 0.4593176 0.4803150 0.4855643
## [57] 0.5118110 0.5196850 0.5039370 0.4881890 0.4829396 0.4671916 0.4776903
## [64] 0.4829396 0.4776903 0.5328084 0.4960630 0.5091864 0.4986877 0.4986877
## [71] 0.4881890 0.5144357 0.5118110 0.4986877 0.5249344 0.5354331 0.5196850
## [78] 0.4960630 0.5091864 0.4881890 0.4776903 0.4908136 0.4986877 0.5380577
## [85] 0.5196850 0.4934383 0.5196850 0.5013123 0.5223097 0.5118110 0.4671916
## [92] 0.5091864 0.5091864 0.4776903 0.4960630 0.5091864 0.5301837 0.5301837
## [99] 0.4593176 0.4855643 0.5433071 0.5118110 0.4750656 0.5065617 0.5249344
## [106] 0.4724409 0.5249344 0.5170604 0.4881890 0.4829396 0.5013123 0.5170604
## [113] 0.4750656 0.4960630 0.4383202 0.5118110 0.5065617 0.4934383 0.4829396
## [120] 0.4671916 0.4776903 0.5406824 0.4566929 0.5354331 0.5039370 0.4855643
## [127] 0.5144357 0.5013123 0.5118110 0.4593176 0.4461942 0.5144357 0.5380577
## [134] 0.5459318 0.5039370 0.4671916 0.4960630 0.4986877 0.5144357 0.5118110
## [141] 0.4986877 0.5196850 0.5039370 0.4855643 0.5039370 0.5144357 0.4986877
## [148] 0.5065617 0.4855643 0.4986877 0.4645669 0.4934383 0.4855643 0.4986877
## [155] 0.5144357 0.4619423 0.5196850 0.4934383 0.4724409 0.4855643 0.5013123
## [162] 0.5354331 0.4435696 0.4724409 0.5118110 0.5223097 0.4619423 0.4960630
## [169] 0.4908136 0.4776903 0.4698163 0.4593176 0.4776903 0.5354331 0.5118110
## [176] 0.5065617 0.4750656 0.5223097 0.4803150 0.4829396 0.5118110 0.4829396
## [183] 0.4724409 0.4986877 0.4881890 0.4803150 0.5564304 0.5170604 0.4934383
## [190] 0.4671916 0.5223097 0.4986877 0.4724409 0.4908136 0.5196850 0.4934383
## [197] 0.5091864 0.5196850 0.5301837 0.5013123
##
## $simulated_correct
## [1] 0.6850394 0.6666667 0.6981627 0.6062992 0.6404199 0.6929134 0.6876640
## [8] 0.6430446 0.6482940 0.6824147 0.7086614 0.6640420 0.6719160 0.6692913
## [15] 0.6377953 0.6797900 0.6640420 0.6561680 0.7086614 0.6482940 0.6561680
## [22] 0.6561680 0.6535433 0.6666667 0.6666667 0.6797900 0.6351706 0.6666667
## [29] 0.6955381 0.6640420 0.6614173 0.6377953 0.6797900 0.6850394 0.6640420
## [36] 0.6640420 0.7270341 0.6850394 0.6325459 0.6876640 0.6587927 0.6299213
## [43] 0.6719160 0.6824147 0.6509186 0.6430446 0.6850394 0.6640420 0.6456693
## [50] 0.7191601 0.6850394 0.7112861 0.6640420 0.6246719 0.6981627 0.6509186
## [57] 0.6299213 0.7007874 0.7060367 0.6587927 0.7060367 0.6587927 0.6850394
## [64] 0.6797900 0.6535433 0.6719160 0.6876640 0.6587927 0.6797900 0.6902887
## [71] 0.6745407 0.6220472 0.6824147 0.6430446 0.6587927 0.6430446 0.6535433
## [78] 0.6666667 0.7270341 0.6955381 0.6587927 0.6456693 0.6325459 0.7191601
## [85] 0.7007874 0.6482940 0.6640420 0.6351706 0.6666667 0.6719160 0.6272966
## [92] 0.6482940 0.6587927 0.6797900 0.6509186 0.6325459 0.6902887 0.6482940
## [99] 0.6771654 0.6981627 0.6719160 0.6719160 0.6929134 0.6929134 0.6692913
## [106] 0.6850394 0.6850394 0.6509186 0.6272966 0.6692913 0.6929134 0.6509186
## [113] 0.6981627 0.6561680 0.6614173 0.7034121 0.6614173 0.6325459 0.7112861
## [120] 0.6535433 0.6640420 0.6587927 0.6692913 0.6745407 0.6587927 0.6876640
## [127] 0.6640420 0.6614173 0.6666667 0.6876640 0.6640420 0.6482940 0.6614173
## [134] 0.7112861 0.6220472 0.6482940 0.6456693 0.6850394 0.6692913 0.6719160
## [141] 0.6640420 0.7217848 0.6955381 0.6456693 0.6587927 0.6902887 0.6535433
## [148] 0.7034121 0.6509186 0.6745407 0.6824147 0.6797900 0.7086614 0.6797900
## [155] 0.6745407 0.6850394 0.6482940 0.6902887 0.6797900 0.6771654 0.7139108
## [162] 0.7007874 0.7139108 0.6902887 0.6771654 0.6876640 0.6482940 0.7034121
## [169] 0.6614173 0.6850394 0.6771654 0.6824147 0.6797900 0.6797900 0.7454068
## [176] 0.6876640 0.6614173 0.7034121 0.6456693 0.6377953 0.6876640 0.6745407
## [183] 0.6797900 0.6797900 0.6377953 0.6561680 0.6640420 0.6614173 0.6692913
## [190] 0.6167979 0.6719160 0.6692913 0.7165354 0.6194226 0.6430446 0.6640420
## [197] 0.6745407 0.6850394 0.6587927 0.7034121
##
## $ppp
## [1] 0.345
summary(ppp2d_select$simulated_correct)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6063 0.6535 0.6693 0.6707 0.6850 0.7454
Run the function for 1d model ## 1d PPP
simulate_pp_data_mcmclist1d <- function(mcmc_list, pair_data) {
# Combine chains into one matrix
all_samples <- do.call(rbind, mcmc_list)
n_obs <- nrow(pair_data)
n_rep = 200
sim_choices <- matrix(NA, nrow = n_rep, ncol = n_obs)
# Identify column names for theta1, theta2, gamma
theta_names <- grep("^theta\\.", colnames(all_samples), value = TRUE)
alpha_names <- grep("^alpha\\.", colnames(all_samples), value = TRUE)
for (r in 1:n_rep) {
s <- sample(1:nrow(all_samples), 1) # Select one draw from any chain in combined samples
theta <- all_samples[s, theta_names] # Draw one value for theta
alpha <- all_samples[s, alpha_names] # Draw one value for alpha
for (i in 1:n_obs) {
j1 <- as.character(pair_data$itemj1[i])
j2 <- as.character(pair_data$itemj2[i])
rater <- as.character(pair_data$rater[i])
# Safely extract theta/gamma values using column names
diff <- theta[paste0("theta.", j1)] - theta[paste0("theta.", j2)]
a <- alpha[paste0("alpha.", rater)]
eta <- a * diff # Add thetas
p <- pnorm(eta) # probit link is a normal distribution
sim_choices[r, i] <- rbinom(1, 1, p)
}
}
return(sim_choices)
}
sim_1d_select <- simulate_pp_data_mcmclist1d(mcmc_list1d_select, pairDataCDAT_select)
Compute Posterior Predictive P-value (PPP) for 1d model
ppp1d_select <- compute_ppp_proportion(observed_binary_select, sim_1d_select)
ppp1d_select
## $observed
## [1] 0.5091864
##
## $simulated
## [1] 0.5118110 0.4934383 0.5380577 0.4461942 0.5275591 0.5170604 0.4645669
## [8] 0.4881890 0.5065617 0.4803150 0.4960630 0.4960630 0.4803150 0.5170604
## [15] 0.4960630 0.4881890 0.4776903 0.4986877 0.4829396 0.4986877 0.4698163
## [22] 0.5039370 0.4750656 0.5065617 0.5354331 0.5091864 0.4619423 0.4881890
## [29] 0.4855643 0.4829396 0.5144357 0.5223097 0.4593176 0.5301837 0.4908136
## [36] 0.4934383 0.4986877 0.5013123 0.4803150 0.5039370 0.5065617 0.5695538
## [43] 0.4540682 0.5091864 0.5144357 0.5039370 0.5170604 0.4750656 0.4855643
## [50] 0.4960630 0.4803150 0.4908136 0.4724409 0.4829396 0.5091864 0.5013123
## [57] 0.4566929 0.4908136 0.5459318 0.5223097 0.5223097 0.4986877 0.4724409
## [64] 0.4671916 0.4908136 0.4881890 0.4383202 0.4750656 0.5013123 0.5144357
## [71] 0.4803150 0.5013123 0.4750656 0.5065617 0.4934383 0.5091864 0.5170604
## [78] 0.5091864 0.4934383 0.5065617 0.4908136 0.5039370 0.4934383 0.5039370
## [85] 0.5170604 0.5013123 0.4435696 0.5170604 0.4750656 0.4829396 0.5223097
## [92] 0.5091864 0.5328084 0.5013123 0.5485564 0.4803150 0.5039370 0.5013123
## [99] 0.4724409 0.4619423 0.4776903 0.4724409 0.5039370 0.4960630 0.5459318
## [106] 0.4829396 0.5065617 0.4881890 0.5013123 0.4750656 0.5091864 0.4960630
## [113] 0.5013123 0.4776903 0.5039370 0.4540682 0.4724409 0.4934383 0.5091864
## [120] 0.4986877 0.4934383 0.5065617 0.5196850 0.4881890 0.4671916 0.4908136
## [127] 0.5223097 0.4671916 0.5118110 0.4855643 0.4960630 0.5144357 0.4645669
## [134] 0.5223097 0.4776903 0.5039370 0.4934383 0.4698163 0.4934383 0.4750656
## [141] 0.4671916 0.4724409 0.4934383 0.4908136 0.4829396 0.4934383 0.4960630
## [148] 0.5301837 0.4750656 0.4750656 0.5223097 0.5065617 0.5118110 0.4356955
## [155] 0.4881890 0.5406824 0.4986877 0.4698163 0.5118110 0.4776903 0.5013123
## [162] 0.5039370 0.4908136 0.4908136 0.5144357 0.4750656 0.4908136 0.5196850
## [169] 0.4803150 0.4803150 0.5249344 0.4881890 0.4829396 0.4934383 0.4881890
## [176] 0.5196850 0.5223097 0.5091864 0.4803150 0.4829396 0.4960630 0.5223097
## [183] 0.4724409 0.5118110 0.4986877 0.5196850 0.4671916 0.5013123 0.5170604
## [190] 0.4960630 0.4855643 0.5118110 0.4855643 0.4960630 0.5118110 0.5170604
## [197] 0.5170604 0.5065617 0.4671916 0.4908136
##
## $simulated_correct
## [1] 0.6719160 0.6797900 0.6771654 0.7217848 0.7034121 0.6719160 0.6666667
## [8] 0.6115486 0.6194226 0.6351706 0.6299213 0.6561680 0.6404199 0.6981627
## [15] 0.6666667 0.6745407 0.6482940 0.6692913 0.6692913 0.6797900 0.6719160
## [22] 0.6640420 0.6771654 0.6771654 0.6482940 0.6797900 0.6377953 0.6692913
## [29] 0.6351706 0.6850394 0.6640420 0.6299213 0.6404199 0.6482940 0.6719160
## [36] 0.6850394 0.6377953 0.6036745 0.6666667 0.6902887 0.6771654 0.6719160
## [43] 0.6771654 0.6640420 0.6587927 0.7007874 0.6089239 0.6404199 0.6561680
## [50] 0.6719160 0.6719160 0.6404199 0.6167979 0.6325459 0.6692913 0.6666667
## [57] 0.6482940 0.6404199 0.6692913 0.6246719 0.6824147 0.6115486 0.6745407
## [64] 0.6850394 0.6719160 0.7217848 0.6456693 0.6456693 0.6929134 0.6587927
## [71] 0.6929134 0.6876640 0.6456693 0.6561680 0.7007874 0.6797900 0.6509186
## [78] 0.6745407 0.6797900 0.6561680 0.6089239 0.6640420 0.6430446 0.6482940
## [85] 0.6719160 0.6719160 0.6404199 0.6456693 0.6614173 0.6745407 0.6509186
## [92] 0.6587927 0.6614173 0.6614173 0.6929134 0.6509186 0.6640420 0.6614173
## [99] 0.6745407 0.6430446 0.6797900 0.7007874 0.6640420 0.6771654 0.6272966
## [106] 0.6430446 0.6771654 0.6745407 0.6509186 0.6929134 0.6430446 0.6561680
## [113] 0.6719160 0.6902887 0.6955381 0.6876640 0.6587927 0.6587927 0.6797900
## [120] 0.6640420 0.6377953 0.6509186 0.6955381 0.6797900 0.6167979 0.6929134
## [127] 0.6089239 0.6692913 0.6509186 0.6561680 0.6929134 0.6482940 0.6666667
## [134] 0.6404199 0.6430446 0.6745407 0.6377953 0.6771654 0.6482940 0.6404199
## [141] 0.6797900 0.7007874 0.6430446 0.6771654 0.6850394 0.6797900 0.6404199
## [148] 0.6640420 0.6509186 0.7139108 0.7034121 0.6299213 0.6404199 0.6377953
## [155] 0.6850394 0.6902887 0.6902887 0.6246719 0.6561680 0.6587927 0.6509186
## [162] 0.6797900 0.6876640 0.6876640 0.6220472 0.6614173 0.6719160 0.6797900
## [169] 0.6089239 0.6771654 0.6902887 0.6010499 0.7112861 0.6587927 0.6535433
## [176] 0.6535433 0.6981627 0.6272966 0.6614173 0.6062992 0.6299213 0.6771654
## [183] 0.6482940 0.6509186 0.6377953 0.6745407 0.6062992 0.6456693 0.6351706
## [190] 0.7086614 0.6456693 0.6719160 0.6666667 0.6771654 0.6246719 0.6456693
## [197] 0.7086614 0.6824147 0.6062992 0.6614173
##
## $ppp
## [1] 0.275
summary(ppp1d_select$simulated_correct)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6010 0.6450 0.6640 0.6616 0.6778 0.7218
Interpretation: The one-dimensional model matches the predictiveness of the 2-dimensional model when using only the five middle-gamma raters comparisons. # DIC
computeDbar2d <- function(mcmc_matrix, pair_data, theta1_ids, theta2_ids, gamma_ids) {
# Extract item and judge IDs
itemj1 <- as.character(pair_data$itemj1)
itemj2 <- as.character(pair_data$itemj2)
choice <- pair_data$choice
rater <- as.character(pair_data$rater)
S <- nrow(mcmc_matrix)
N <- nrow(pair_data)
deviance_vec <- numeric(S)
for (s in 1:S) {
# Extract current parameter values
theta1 <- setNames(mcmc_matrix[s, theta1_ids], sub("theta1\\.", "", theta1_ids))
theta2 <- setNames(mcmc_matrix[s, theta2_ids], sub("theta2\\.", "", theta2_ids))
gamma <- setNames(mcmc_matrix[s, gamma_ids], sub("gamma\\.", "", gamma_ids))
eta <- numeric(N)
for (n in 1:N) {
j1 <- itemj1[n]
j2 <- itemj2[n]
r <- rater[n]
g <- gamma[r]
if (is.na(g)) g <- mean(gamma, na.rm = TRUE)
diff1 <- theta1[j2] - theta1[j1]
diff2 <- theta2[j2] - theta2[j1]
eta[n] <- (1 - g) * diff1 + g * diff2
}
p <- pnorm(eta)
p <- pmin(pmax(p, 1e-10), 1 - 1e-10) # avoid log(0)
is_j2_chosen <- choice == pair_data$itemj2
loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE) # Compute log-probability of the observed binary outcome
deviance_vec[s] <- -2 * sum(loglik)
}
mean(deviance_vec)
}
computeDevianceAtPosteriorMean2d <- function(mcmc_matrix, pair_data, theta1_ids, theta2_ids, gamma_ids) {
# Compute posterior means
theta1_mean <- setNames(colMeans(mcmc_matrix[, theta1_ids]), sub("theta1\\.", "", theta1_ids))
theta2_mean <- setNames(colMeans(mcmc_matrix[, theta2_ids]), sub("theta2\\.", "", theta2_ids))
gamma_mean <- setNames(colMeans(mcmc_matrix[, gamma_ids]), sub("gamma\\.", "", gamma_ids))
itemj1 <- as.character(pair_data$itemj1)
itemj2 <- as.character(pair_data$itemj2)
choice <- pair_data$choice
rater <- as.character(pair_data$rater)
N <- nrow(pair_data)
eta <- numeric(N)
for (n in 1:N) {
j1 <- itemj1[n]
j2 <- itemj2[n]
r <- rater[n]
g <- gamma_mean[r]
if (is.na(g)) g <- mean(gamma_mean, na.rm = TRUE)
diff1 <- theta1_mean[j2] - theta1_mean[j1]
diff2 <- theta2_mean[j2] - theta2_mean[j1]
eta[n] <- (1 - g) * diff1 + g * diff2
}
p <- pnorm(eta)
p <- pmin(pmax(p, 1e-10), 1 - 1e-10)
is_j2_chosen <- choice == pair_data$itemj2
loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
D_at_mean <- -2 * sum(loglik)
return(D_at_mean)
}
mcmc_matrix_select <- do.call(rbind, mcmc_list2d_select) # combine all chains
theta1_ids <- grep("^theta1\\.", colnames(mcmc_matrix_select), value = TRUE)
theta2_ids <- grep("^theta2\\.", colnames(mcmc_matrix_select), value = TRUE)
gamma_ids <- grep("^gamma\\.", colnames(mcmc_matrix_select), value = TRUE)
D_bar2d_select <- computeDbar2d(mcmc_matrix_select, pairDataCDAT_select, theta1_ids, theta2_ids, gamma_ids)
D_mean2d_select <- computeDevianceAtPosteriorMean2d(mcmc_matrix_select, pairDataCDAT_select, theta1_ids, theta2_ids, gamma_ids)
p_D2d_select <- D_bar2d_select - D_mean2d_select
DIC2d_select <- D_bar2d_select + p_D2d_select
print(DIC2d_select)
## [1] 477.1262
computeDbar1d <- function(mcmc_matrix, pair_data, theta_ids, alpha_ids) {
# Extract item and judge IDs
itemj1 <- as.character(pair_data$itemj1)
itemj2 <- as.character(pair_data$itemj2)
choice <- pair_data$choice
rater <- as.character(pair_data$rater)
S <- nrow(mcmc_matrix)
N <- nrow(pair_data)
deviance_vec <- numeric(S)
for (s in 1:S) {
# Extract current parameter values
theta <- setNames(mcmc_matrix[s, theta_ids], sub("theta\\.", "", theta_ids))
alpha <- setNames(mcmc_matrix[s, alpha_ids], sub("alpha\\.", "", alpha_ids))
eta <- numeric(N)
for (n in 1:N) {
j1 <- itemj1[n]
j2 <- itemj2[n]
r <- rater[n]
a <- alpha[r]
if (is.na(a)) a <- mean(alpha, na.rm = TRUE)
diff <- theta[j2] - theta[j1]
eta[n] <- a * diff
}
p <- pnorm(eta)
p <- pmin(pmax(p, 1e-10), 1 - 1e-10) # avoid log(0)
is_j2_chosen <- choice == pair_data$itemj2
loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
deviance_vec[s] <- -2 * sum(loglik)
}
mean(deviance_vec)
}
computeDevianceAtPosteriorMean1d <- function(mcmc_matrix, pair_data, theta_ids, alpha_ids) {
# Compute posterior means
theta_mean <- setNames(colMeans(mcmc_matrix[, theta_ids]), sub("theta\\.", "", theta_ids))
alpha_mean <- setNames(colMeans(mcmc_matrix[, alpha_ids]), sub("alpha\\.", "", alpha_ids))
itemj1 <- as.character(pair_data$itemj1)
itemj2 <- as.character(pair_data$itemj2)
choice <- pair_data$choice
rater <- as.character(pair_data$rater)
N <- nrow(pair_data)
eta <- numeric(N)
for (n in 1:N) {
j1 <- itemj1[n]
j2 <- itemj2[n]
r <- rater[n]
a <- alpha_mean[r]
if (is.na(a)) a <- mean(alpha_mean, na.rm = TRUE)
diff <- theta_mean[j2] - theta_mean[j1]
eta[n] <- a * diff
}
p <- pnorm(eta)
p <- pmin(pmax(p, 1e-10), 1 - 1e-10)
is_j2_chosen <- choice == pair_data$itemj2
loglik <- dbinom(is_j2_chosen, size = 1, prob = p, log = TRUE)
D_at_mean <- -2 * sum(loglik)
return(D_at_mean)
}
mcmc_matrix_select <- do.call(rbind, mcmc_list1d_select) # combine all chains
theta_ids <- grep("^theta\\.", colnames(mcmc_matrix_select), value = TRUE)
alpha_ids <- grep("^alpha\\.", colnames(mcmc_matrix_select), value = TRUE)
D_bar1d_select <- computeDbar1d(mcmc_matrix_select, pairDataCDAT_select, theta_ids, alpha_ids)
D_mean1d_select <- computeDevianceAtPosteriorMean1d(mcmc_matrix_select, pairDataCDAT_select, theta_ids, alpha_ids)
p_D1d_select <- D_bar1d_select - D_mean1d_select
DIC1d_select <- D_bar1d_select + p_D1d_select
print(DIC1d_select)
## [1] 405.6239
Interpretation: No question that the one-dimensional model fits better than the two0dimensional model when the raters all share the same perceptual orientation. Also the 1D model is much improved in predictiveness when just the five best raters are included. This shows that having raters with differing perceptual orientations is a problem when considering only one dimension, but disappears when modeling both dimensions. Could there be more dimensions that would yet increase predictiveness? Also check to see that the posterior distributions of theta more precise? Check out the interquartile range of the 1d_select and the 1_d to compare.
load("Out/summary1d.Rdata")
summary1d_thetas <- summary1d[1:16,]
summary1d_thetas
## Mean SD Naive SE Time-series SE 2.5% 25% 50% 75%
## theta.1121 -0.177 0.277 0.003 0.008 -0.730 -0.359 -0.173 0.010
## theta.1154 -0.947 0.343 0.004 0.009 -1.650 -1.172 -0.931 -0.711
## theta.1222 0.208 0.278 0.003 0.008 -0.342 0.022 0.204 0.398
## theta.1637 -0.029 0.270 0.003 0.008 -0.571 -0.209 -0.028 0.153
## theta.1995 -0.401 0.285 0.003 0.008 -0.971 -0.591 -0.400 -0.206
## theta.3369 1.719 0.480 0.006 0.011 0.845 1.379 1.696 2.028
## theta.4004 0.348 0.293 0.003 0.008 -0.224 0.145 0.345 0.543
## theta.4209 -0.211 0.279 0.003 0.008 -0.767 -0.390 -0.206 -0.022
## theta.4268 0.204 0.280 0.003 0.008 -0.347 0.015 0.205 0.391
## theta.4841 -0.485 0.288 0.003 0.008 -1.066 -0.668 -0.479 -0.287
## theta.5074 -0.064 0.273 0.003 0.008 -0.607 -0.244 -0.064 0.120
## theta.5495 -0.721 0.321 0.004 0.008 -1.387 -0.931 -0.713 -0.499
## theta.5710 -0.958 0.342 0.004 0.009 -1.664 -1.178 -0.945 -0.723
## theta.8294 0.290 0.279 0.003 0.008 -0.266 0.101 0.290 0.475
## theta.8980 0.803 0.333 0.004 0.009 0.167 0.572 0.793 1.021
## theta.9899 0.405 0.296 0.003 0.008 -0.177 0.203 0.402 0.609
## 97.5% Model
## theta.1121 0.356 1d
## theta.1154 -0.310 1d
## theta.1222 0.763 1d
## theta.1637 0.496 1d
## theta.1995 0.144 1d
## theta.3369 2.742 1d
## theta.4004 0.935 1d
## theta.4209 0.328 1d
## theta.4268 0.750 1d
## theta.4841 0.066 1d
## theta.5074 0.465 1d
## theta.5495 -0.103 1d
## theta.5710 -0.317 1d
## theta.8294 0.843 1d
## theta.8980 1.484 1d
## theta.9899 0.985 1d
summary1d_select_thetas <- summary1d_select[1:16,]
summary1d_select_thetas$Model <- "1d Select"
summary1d_select_thetas
## Mean SD Naive SE Time-series SE 2.5% 25% 50% 75%
## theta.1121 -0.276 0.316 0.004 0.006 -0.900 -0.487 -0.276 -0.061
## theta.1154 -1.272 0.406 0.005 0.007 -2.099 -1.535 -1.263 -0.988
## theta.1222 0.429 0.318 0.004 0.005 -0.175 0.206 0.421 0.639
## theta.1637 0.107 0.302 0.003 0.005 -0.479 -0.093 0.105 0.304
## theta.1995 -0.438 0.322 0.004 0.006 -1.073 -0.656 -0.436 -0.217
## theta.3369 1.709 0.480 0.006 0.008 0.840 1.371 1.682 2.019
## theta.4004 0.618 0.342 0.004 0.006 -0.036 0.386 0.610 0.843
## theta.4209 -0.247 0.317 0.004 0.005 -0.875 -0.459 -0.246 -0.029
## theta.4268 0.278 0.310 0.004 0.005 -0.328 0.072 0.276 0.481
## theta.4841 -0.676 0.339 0.004 0.006 -1.362 -0.906 -0.666 -0.443
## theta.5074 0.137 0.298 0.003 0.005 -0.457 -0.066 0.139 0.338
## theta.5495 -1.215 0.397 0.005 0.007 -2.030 -1.475 -1.200 -0.942
## theta.5710 -1.251 0.409 0.005 0.007 -2.088 -1.518 -1.230 -0.969
## theta.8294 0.310 0.309 0.004 0.006 -0.279 0.100 0.305 0.513
## theta.8980 1.088 0.381 0.004 0.006 0.374 0.831 1.071 1.333
## theta.9899 0.690 0.343 0.004 0.006 0.037 0.456 0.683 0.911
## 97.5% Model
## theta.1121 0.334 1d Select
## theta.1154 -0.514 1d Select
## theta.1222 1.076 1d Select
## theta.1637 0.708 1d Select
## theta.1995 0.172 1d Select
## theta.3369 2.720 1d Select
## theta.4004 1.317 1d Select
## theta.4209 0.372 1d Select
## theta.4268 0.903 1d Select
## theta.4841 -0.037 1d Select
## theta.5074 0.708 1d Select
## theta.5495 -0.475 1d Select
## theta.5710 -0.501 1d Select
## theta.8294 0.922 1d Select
## theta.8980 1.882 1d Select
## theta.9899 1.399 1d Select
library(ggplot2)
# Assuming these are matched and in the same order
df_compare <- data.frame(
item = rownames(summary1d_select_thetas),
median1 = summary1d_thetas$`50%`[1:nrow(summary1d_select_thetas)],
median2 = summary1d_select_thetas$`50%`,
CI1 = summary1d_thetas$`97.5%` - summary1d_thetas$`2.5%`,
CI2 = summary1d_select_thetas$`97.5%` - summary1d_select_thetas$`2.5%`
)
cor(df_compare$median1, df_compare$median2)
## [1] 0.9770371
cor(df_compare$CI1, df_compare$CI2)
## [1] 0.9406444
df_compare$diff <- df_compare$median1 - df_compare$median2
df_compare$avg <- (df_compare$median1 + df_compare$median2) / 2
ggplot(df_compare, aes(x = avg, y = diff)) +
geom_point() +
geom_hline(yintercept = mean(df_compare$diff), linetype = "dashed") +
geom_hline(yintercept = mean(df_compare$diff) + 1.96 * sd(df_compare$diff), color = "red") +
geom_hline(yintercept = mean(df_compare$diff) - 1.96 * sd(df_compare$diff), color = "red") +
labs(x = "Average Theta", y = "Difference in Theta", title = "Bland-Altman Plot")
t.test(df_compare$median1, df_compare$median2, paired = TRUE)
##
## Paired t-test
##
## data: df_compare$median1 and df_compare$median2
## t = 0.022523, df = 15, p-value = 0.9823
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.1228939 0.1255189
## sample estimates:
## mean difference
## 0.0013125
The distribution of posterior medians are not significantly different.
t.test(df_compare$CI1, df_compare$CI2, paired = TRUE)
##
## Paired t-test
##
## data: df_compare$CI1 and df_compare$CI2
## t = -8.9684, df = 15, p-value = 2.052e-07
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.1958599 -0.1206401
## sample estimates:
## mean difference
## -0.15825
There is a significant difference in the credible intervals of the two models.
ggplot(df_compare, aes(x = median1, y = median2)) +
geom_point() +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "blue") +
labs(x = "Full Model Theta", y = "Select Raters Theta", title = "Theta Comparison")
ggplot(df_compare, aes(x = CI1, y = CI2)) +
geom_point() +
labs(x = "Full Model Theta CI", y = "Select Raters Theta CI", title = "Theta Comparison")
ci_reg <- lm(CI2 ~ CI1, data = df_compare)
summary(ci_reg)
##
## Call:
## lm(formula = CI2 ~ CI1, data = df_compare)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.10873 -0.04128 -0.01999 0.03846 0.11979
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.27580 0.10693 2.579 0.0218 *
## CI1 0.90297 0.08707 10.370 5.94e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07002 on 14 degrees of freedom
## Multiple R-squared: 0.8848, Adjusted R-squared: 0.8766
## F-statistic: 107.5 on 1 and 14 DF, p-value: 5.944e-08
CIs are significantly but slightly smaller for the selected judges in the center of the range of gamma distributions.