Load data

data <- readr::read_csv("evaluation-exp2-long.csv", col_names = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   Duration = col_double(),
##   ReadCheck2 = col_double(),
##   Age = col_double(),
##   Race2 = col_logical(),
##   Correct = col_double(),
##   Error = col_double(),
##   Unsure = col_double()
## )
## See spec(...) for full column specifications.

Incorrect lure and correct answer descriptive stats for summaries in Table 2

summary <- data %>%
  group_by(ID,Instructions,Validity,Difficulty) %>%
  summarise(
    mean_error = mean(Error,na.rm=TRUE),
    mean_correct = mean(Correct, na.rm=TRUE),
  ) %>%
  group_by(Instructions,Validity,Difficulty) %>%
  summarise(
    mean_err = mean(mean_error),
    sd_err = sd(mean_error),
    se_error = sd(mean_error)/sqrt(n()),
    mean_corr = mean(mean_correct),
    sd_corr = sd(mean_correct),
    se_corr = sd(mean_correct)/sqrt(n())
  )
## `summarise()` has grouped output by 'ID', 'Instructions', 'Validity'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'Instructions', 'Validity'. You can override using the `.groups` argument.
print(summary)
## # A tibble: 8 x 9
## # Groups:   Instructions, Validity [4]
##   Instructions Validity   Difficulty mean_err sd_err se_error mean_corr sd_corr
##   <chr>        <chr>      <chr>         <dbl>  <dbl>    <dbl>     <dbl>   <dbl>
## 1 Accuracy     Accurate   Easy        0.00128 0.0113  0.00128     0.851   0.239
## 2 Accuracy     Accurate   Hard        0.0295  0.0459  0.00520     0.497   0.303
## 3 Accuracy     Inaccurate Easy        0.0936  0.129   0.0146      0.692   0.270
## 4 Accuracy     Inaccurate Hard        0.197   0.179   0.0202      0.299   0.277
## 5 Interest     Accurate   Easy        0.00256 0.0159  0.00180     0.829   0.254
## 6 Interest     Accurate   Hard        0.0295  0.0512  0.00580     0.467   0.301
## 7 Interest     Inaccurate Easy        0.0936  0.144   0.0163      0.691   0.273
## 8 Interest     Inaccurate Hard        0.197   0.173   0.0196      0.269   0.253
## # … with 1 more variable: se_corr <dbl>

Models to analyze incorrect lure responses

modelerror1 <- data %>%
  dplyr::mutate(Validity = ifelse(Validity == "Accurate",-.5,.5)) %>%
  dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",-.5,.5)) %>%
    glmer(Error ~ Difficulty*Validity + (1 + Difficulty + Validity | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))
## boundary (singular) fit: see ?isSingular
summary(modelerror1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Error ~ Difficulty * Validity + (1 + Difficulty + Validity |  
##     ID) + (1 | Item)
##    Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##      AIC      BIC   logLik deviance df.resid 
##   2602.5   2676.6  -1290.2   2580.5     6229 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7860 -0.2494 -0.1221 -0.0403 16.5770 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr       
##  Item   (Intercept) 1.1169   1.0568              
##  ID     (Intercept) 0.6465   0.8040              
##         Difficulty  0.7531   0.8678   -0.34      
##         Validity    0.7968   0.8926    0.80  0.30
## Number of obs: 6240, groups:  Item, 80; ID, 78
## 
## Fixed effects:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -4.0907     0.2228 -18.358  < 2e-16 ***
## Difficulty            2.0435     0.4111   4.971 6.65e-07 ***
## Validity              2.9538     0.3205   9.218  < 2e-16 ***
## Difficulty:Validity  -1.8025     0.6017  -2.996  0.00274 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Dffclt Valdty
## Difficulty  -0.490              
## Validity    -0.487  0.581       
## Dffclty:Vld  0.548 -0.668 -0.779
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(modelerror1)
  Error
Predictors Odds Ratios CI p
(Intercept) 0.02 0.01 – 0.03 <0.001
Difficulty 7.72 3.45 – 17.27 <0.001
Validity 19.18 10.23 – 35.94 <0.001
Difficulty * Validity 0.16 0.05 – 0.54 0.003
Random Effects
σ2 3.29
τ00 Item 1.12
τ00 ID 0.65
τ11 ID.Difficulty 0.75
τ11 ID.Validity 0.80
ρ01 ID.Difficulty -0.34
ρ01 ID.Validity 0.80
N ID 78
N Item 80
Observations 6240
Marginal R2 / Conditional R2 0.510 / NA
modelerror2 <- data %>%
  dplyr::filter(Validity == "Inaccurate") %>%
  dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",-.5,.5)) %>%
  dplyr::mutate(Instructions = ifelse(Instructions == "Accuracy",-.5,.5)) %>%
    glmer(Error ~ Difficulty*Instructions + (1 + Difficulty | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))

summary(modelerror2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Error ~ Difficulty * Instructions + (1 + Difficulty | ID) + (1 |  
##     Item)
##    Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##      AIC      BIC   logLik deviance df.resid 
##   2225.0   2273.3  -1104.5   2209.0     3112 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9675 -0.3802 -0.2318 -0.1237  6.5245 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr 
##  Item   (Intercept) 0.915    0.9565        
##  ID     (Intercept) 1.333    1.1545        
##         Difficulty  0.861    0.9279   -0.23
## Number of obs: 3120, groups:  Item, 80; ID, 78
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -2.56248    0.19224 -13.329  < 2e-16 ***
## Difficulty               1.21085    0.29337   4.127 3.67e-05 ***
## Instructions            -0.01679    0.11596  -0.145    0.885    
## Difficulty:Instructions  0.03632    0.23199   0.157    0.876    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Dffclt Instrc
## Difficulty  -0.173              
## Instructins  0.007 -0.008       
## Dffclty:Ins -0.007  0.008 -0.261
tab_model(modelerror2)
  Error
Predictors Odds Ratios CI p
(Intercept) 0.08 0.05 – 0.11 <0.001
Difficulty 3.36 1.89 – 5.96 <0.001
Instructions 0.98 0.78 – 1.23 0.885
Difficulty * Instructions 1.04 0.66 – 1.63 0.876
Random Effects
σ2 3.29
τ00 Item 0.91
τ00 ID 1.33
τ11 ID.Difficulty 0.86
ρ01 ID -0.23
ICC 0.43
N ID 78
N Item 80
Observations 3120
Marginal R2 / Conditional R2 0.060 / 0.462
# test of simple effects

emmeans(modelerror1, ~ Validity * Difficulty) %>%
pairs(., simple = "Validity", reverse = TRUE)
## Difficulty = -0.5:
##  contrast   estimate    SE  df z.ratio p.value
##  0.5 - -0.5     3.86 0.586 Inf 6.578   <.0001 
## 
## Difficulty =  0.5:
##  contrast   estimate    SE  df z.ratio p.value
##  0.5 - -0.5     2.05 0.207 Inf 9.905   <.0001 
## 
## Results are given on the log odds ratio (not the response) scale.

Models to analyze correct answer responses

modelcorrect1 <- data %>%
  dplyr::mutate(Validity = ifelse(Validity == "Accurate",.5,-.5)) %>%
  dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",.5,-.5)) %>%
 glmer(Correct ~ Difficulty*Validity + (1 + Difficulty + Validity | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))

summary(modelcorrect1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Correct ~ Difficulty * Validity + (1 + Difficulty + Validity |  
##     ID) + (1 | Item)
##    Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##      AIC      BIC   logLik deviance df.resid 
##   5815.2   5889.3  -2896.6   5793.2     6229 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.5764 -0.4961  0.1583  0.4842  5.0050 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr     
##  Item   (Intercept) 0.338    0.5814            
##  ID     (Intercept) 1.942    1.3934            
##         Difficulty  1.869    1.3673   0.20     
##         Validity    1.243    1.1148   0.23 0.64
## Number of obs: 6240, groups:  Item, 80; ID, 78
## 
## Fixed effects:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           0.5513     0.1761   3.131  0.00174 ** 
## Difficulty            2.6312     0.2201  11.953  < 2e-16 ***
## Validity              1.3952     0.1494   9.340  < 2e-16 ***
## Difficulty:Validity   0.2887     0.1602   1.802  0.07157 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Dffclt Valdty
## Difficulty  0.155               
## Validity    0.197  0.453        
## Dffclty:Vld 0.083  0.075  0.120
tab_model(modelcorrect1)
  Correct
Predictors Odds Ratios CI p
(Intercept) 1.74 1.23 – 2.45 0.002
Difficulty 13.89 9.02 – 21.38 <0.001
Validity 4.04 3.01 – 5.41 <0.001
Difficulty * Validity 1.33 0.97 – 1.83 0.072
Random Effects
σ2 3.29
τ00 Item 0.34
τ00 ID 1.94
τ11 ID.Difficulty 1.87
τ11 ID.Validity 1.24
ρ01 ID.Difficulty 0.20
ρ01 ID.Validity 0.23
ICC 0.48
N ID 78
N Item 80
Observations 6240
Marginal R2 / Conditional R2 0.259 / 0.616
modelcorrect2 <- data %>%
  dplyr::filter(Validity == "Inaccurate") %>%
  dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",.5,-.5)) %>%
  dplyr::mutate(Instructions = ifelse(Instructions == "Accuracy",.5,-.5)) %>%
    glmer(Correct ~ Difficulty*Instructions + (1 + Difficulty | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))

summary(modelcorrect2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Correct ~ Difficulty * Instructions + (1 + Difficulty | ID) +  
##     (1 | Item)
##    Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##      AIC      BIC   logLik deviance df.resid 
##   3165.3   3213.6  -1574.6   3149.3     3112 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8545 -0.5251 -0.1165  0.5201  3.8278 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr 
##  Item   (Intercept) 0.261    0.5109        
##  ID     (Intercept) 1.957    1.3989        
##         Difficulty  1.581    1.2573   -0.19
## Number of obs: 3120, groups:  Item, 80; ID, 78
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -0.15550    0.17762  -0.875    0.381    
## Difficulty               2.51123    0.21443  11.711   <2e-16 ***
## Instructions             0.11409    0.09217   1.238    0.216    
## Difficulty:Instructions -0.20820    0.18433  -1.130    0.259    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Dffclt Instrc
## Difficulty  -0.144              
## Instructins -0.008  0.015       
## Dffclty:Ins  0.008 -0.014 -0.034
tab_model(modelcorrect2)
  Correct
Predictors Odds Ratios CI p
(Intercept) 0.86 0.60 – 1.21 0.381
Difficulty 12.32 8.09 – 18.76 <0.001
Instructions 1.12 0.94 – 1.34 0.216
Difficulty * Instructions 0.81 0.57 – 1.17 0.259
Random Effects
σ2 3.29
τ00 Item 0.26
τ00 ID 1.96
τ11 ID.Difficulty 1.58
ρ01 ID -0.19
ICC 0.44
N ID 78
N Item 80
Observations 3120
Marginal R2 / Conditional R2 0.211 / 0.561
# test of simple effects

emmeans(modelcorrect1, ~ Validity * Difficulty) %>%
pairs(., simple = "Validity", reverse = TRUE)
## Difficulty = -0.5:
##  contrast   estimate    SE  df z.ratio p.value
##  0.5 - -0.5     1.25 0.161 Inf 7.780   <.0001 
## 
## Difficulty =  0.5:
##  contrast   estimate    SE  df z.ratio p.value
##  0.5 - -0.5     1.54 0.178 Inf 8.659   <.0001 
## 
## Results are given on the log odds ratio (not the response) scale.

Incorrect lure responses by within-subj judgment type (Figure 3)

Evaluation_Exp2_Figure3 <- summary %>%
mutate(Validity = ifelse(Validity == "Accurate", "True","False")) %>%
ggplot(.) + aes(x = reorder(Validity, mean_err), y = mean_err, fill = reorder(Instructions, -mean_err)) +
      geom_bar(stat = "summary", fun.y = "mean", position = "dodge") + 
      xlab("Accuracy of Item") + ylab("Proportion Lure Reported") + labs(fill = "Judgment") +
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black")) +
   geom_errorbar(aes(ymin= mean_err - se_error, ymax= mean_err + se_error), position=position_dodge(width=0.9), width=.1) +  geom_text(aes(label=sprintf("%0.2f", round(mean_err, digits=2))), position=position_dodge(width=.9), vjust=-2.7) +   scale_fill_manual(values=c("#f0f0f0","#636363")) + facet_wrap(vars(Difficulty))

print(Evaluation_Exp2_Figure3)

Correct answer responses by within-subj judgment type (Figure 4)

Evaluation_Exp2_Figure4 <- summary %>%
mutate(Validity = ifelse(Validity == "Accurate", "True","False")) %>%
ggplot(.) + aes(x = reorder(Validity, -mean_corr), y = mean_corr, fill = reorder(Instructions, mean_corr)) +
      geom_bar(stat = "summary", fun.y = "mean", position = "dodge") + 
      xlab("Accuracy of Item") + ylab("Proportion Correct Answer Reported") + labs(fill = "Judgment") +
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black")) +
   geom_errorbar(aes(ymin= mean_corr - se_corr, ymax= mean_corr + se_corr), position=position_dodge(width=0.9), width=.1) +   geom_text(aes(label=sprintf("%0.2f", round(mean_corr, digits=2))), position=position_dodge(width=.9), vjust=3) +   scale_fill_manual(values=c("#f0f0f0", "#636363")) + facet_wrap(vars(Difficulty))

print(Evaluation_Exp2_Figure4)