Load data

data <- readr::read_csv("evaluation-exp1-long.csv", col_names = TRUE) 
## Parsed with column specification:
## cols(
##   ID = col_character(),
##   CheckStove = col_character(),
##   Check2 = col_double(),
##   LookUp = col_character(),
##   Age = col_double(),
##   Gender = col_character(),
##   Education = col_character(),
##   Race = col_character(),
##   Comments = col_character(),
##   Version = col_double(),
##   Item = col_character(),
##   Response = col_character(),
##   Correct = col_double(),
##   Error = col_double(),
##   Unsure = col_double(),
##   Instructions = col_character(),
##   Difficulty = col_character(),
##   Validity = col_character()
## )

Incorrect lure and correct answer descriptive stats for summaries in Table 1

summary <- data %>%
  group_by(ID,Instructions,Validity,Difficulty) %>%
  summarise(
    mean_error = mean(Error,na.rm=TRUE),
    mean_correct = mean(Correct, na.rm=TRUE),
  ) %>%
  group_by(Instructions,Validity,Difficulty) %>%
  summarise(
    mean_err = mean(mean_error),
    sd_err = sd(mean_error),
    se_error = sd(mean_error)/sqrt(n()),
    mean_corr = mean(mean_correct),
    sd_corr = sd(mean_correct),
    se_corr = sd(mean_correct)/sqrt(n())
  )
## `summarise()` has grouped output by 'ID', 'Instructions', 'Validity'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'Instructions', 'Validity'. You can override using the `.groups` argument.
print(summary)
## # A tibble: 12 x 9
## # Groups:   Instructions, Validity [6]
##    Instructions Validity   Difficulty mean_err sd_err se_error mean_corr sd_corr
##    <chr>        <chr>      <chr>         <dbl>  <dbl>    <dbl>     <dbl>   <dbl>
##  1 Accuracy     Accurate   Easy        0.00577 0.0203  0.00228     0.925   0.106
##  2 Accuracy     Accurate   Hard        0.0321  0.0428  0.00482     0.448   0.276
##  3 Accuracy     Filler     Easy        0.0191  0.0394  0.00443     0.825   0.197
##  4 Accuracy     Filler     Hard        0.0483  0.0687  0.00773     0.280   0.245
##  5 Accuracy     Inaccurate Easy        0.0788  0.106   0.0119      0.773   0.218
##  6 Accuracy     Inaccurate Hard        0.191   0.141   0.0158      0.251   0.248
##  7 Interest     Accurate   Easy        0.00669 0.0218  0.00227     0.824   0.278
##  8 Interest     Accurate   Hard        0.0312  0.0429  0.00447     0.477   0.295
##  9 Interest     Filler     Easy        0.0115  0.0297  0.00309     0.763   0.259
## 10 Interest     Filler     Hard        0.0424  0.0662  0.00690     0.294   0.254
## 11 Interest     Inaccurate Easy        0.155   0.205   0.0214      0.651   0.309
## 12 Interest     Inaccurate Hard        0.241   0.193   0.0201      0.259   0.230
## # … with 1 more variable: se_corr <dbl>

Analysis of filler items

# Difference in correct answers for easy and hard filler statements

data %>%
  filter(Validity == "Filler") %>%
  dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",1,0)) %>%
  glmer(Correct ~ Difficulty + (1 + Difficulty | ID) + (1 | Item), ., family = binomial) %>%
  summary()
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Correct ~ Difficulty + (1 + Difficulty | ID) + (1 | Item)
##    Data: .
## 
##      AIC      BIC   logLik deviance df.resid 
##   4246.6   4285.3  -2117.3   4234.6     4611 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.0368 -0.4451  0.1485  0.4324  5.4979 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr 
##  ID     (Intercept) 2.4078   1.5517        
##         Difficulty  1.2820   1.1323   -0.29
##  Item   (Intercept) 0.4508   0.6714        
## Number of obs: 4617, groups:  ID, 171; Item, 81
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.3950     0.1726  -8.084 6.27e-16 ***
## Difficulty    3.3902     0.2031  16.694  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## Difficulty -0.533
# Difference in errors between easy and hard filler statements
data %>%
  filter(Validity == "Filler") %>%
  dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",1,0)) %>%
  glmer(Error ~ Difficulty + (1 + Difficulty | ID) + (1 | Item), ., family = binomial) %>%
  summary()
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Error ~ Difficulty + (1 + Difficulty | ID) + (1 | Item)
##    Data: .
## 
##      AIC      BIC   logLik deviance df.resid 
##   1053.9   1092.5   -521.0   1041.9     4611 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1960 -0.1465 -0.0672 -0.0493  8.4412 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr 
##  ID     (Intercept) 0.9657   0.9827        
##         Difficulty  0.9725   0.9861   -0.80
##  Item   (Intercept) 2.9912   1.7295        
## Number of obs: 4617, groups:  ID, 171; Item, 81
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -4.5584     0.4021 -11.337   <2e-16 ***
## Difficulty   -1.0833     0.5853  -1.851   0.0642 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## Difficulty -0.512

Models to analyze incorrect lure responses

modelerror1 <- data %>%
  dplyr::filter(Validity == "Accurate" | Validity == "Inaccurate") %>%
  dplyr::mutate(Validity = ifelse(Validity == "Accurate",-.5,.5)) %>%
  dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",-.5,.5)) %>%
    glmer(Error ~ Difficulty*Validity + (1 + Difficulty + Validity | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))
## boundary (singular) fit: see ?isSingular
summary(modelerror1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Error ~ Difficulty * Validity + (1 + Difficulty + Validity |  
##     ID) + (1 | Item)
##    Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##      AIC      BIC   logLik deviance df.resid 
##   4206.2   4284.7  -2092.1   4184.2     9223 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6644 -0.2580 -0.1317 -0.0534 10.3142 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr       
##  ID     (Intercept) 0.7646   0.8744              
##         Difficulty  1.1070   1.0521   -0.62      
##         Validity    1.0686   1.0338    0.78  0.01
##  Item   (Intercept) 1.2982   1.1394              
## Number of obs: 9234, groups:  ID, 171; Item, 81
## 
## Fixed effects:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -3.7955     0.1759 -21.583  < 2e-16 ***
## Difficulty            1.6702     0.3303   5.056 4.29e-07 ***
## Validity              2.5915     0.1864  13.905  < 2e-16 ***
## Difficulty:Validity  -0.9270     0.3347  -2.770  0.00561 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Dffclt Valdty
## Difficulty  -0.287              
## Validity    -0.248  0.298       
## Dffclty:Vld  0.314 -0.438 -0.596
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(modelerror1)
  Error
Predictors Odds Ratios CI p
(Intercept) 0.02 0.02 – 0.03 <0.001
Difficulty 5.31 2.78 – 10.15 <0.001
Validity 13.35 9.27 – 19.24 <0.001
Difficulty * Validity 0.40 0.21 – 0.76 0.006
Random Effects
σ2 3.29
τ00 ID 0.76
τ00 Item 1.30
τ11 ID.Difficulty 1.11
τ11 ID.Validity 1.07
ρ01 ID.Difficulty -0.62
ρ01 ID.Validity 0.78
ICC 0.44
N ID 171
N Item 81
Observations 9234
Marginal R2 / Conditional R2 0.291 / 0.604
modelerror2 <- data %>%
  dplyr::filter(Validity == "Inaccurate") %>%
  dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",-.5,.5)) %>%
  dplyr::mutate(Instructions = ifelse(Instructions == "Accuracy",-.5,.5)) %>%
    glmer(Error ~ Difficulty*Instructions + (1 + Difficulty | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))

summary(modelerror2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Error ~ Difficulty * Instructions + (1 + Difficulty | ID) + (1 |  
##     Item)
##    Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##      AIC      BIC   logLik deviance df.resid 
##   3480.0   3531.5  -1732.0   3464.0     4609 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7395 -0.4015 -0.2280 -0.1064  6.9292 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr 
##  ID     (Intercept) 1.606    1.267         
##         Difficulty  1.108    1.053    -0.51
##  Item   (Intercept) 1.116    1.057         
## Number of obs: 4617, groups:  ID, 171; Item, 81
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -2.4876     0.1697 -14.661  < 2e-16 ***
## Difficulty                1.2999     0.2856   4.552 5.31e-06 ***
## Instructions              0.5620     0.2231   2.519   0.0118 *  
## Difficulty:Instructions  -0.5180     0.2634  -1.967   0.0492 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Dffclt Instrc
## Difficulty  -0.188              
## Instructins -0.075  0.037       
## Dffclty:Ins  0.055 -0.081 -0.415
tab_model(modelerror2)
  Error
Predictors Odds Ratios CI p
(Intercept) 0.08 0.06 – 0.12 <0.001
Difficulty 3.67 2.10 – 6.42 <0.001
Instructions 1.75 1.13 – 2.72 0.012
Difficulty * Instructions 0.60 0.36 – 1.00 0.049
Random Effects
σ2 3.29
τ00 ID 1.61
τ00 Item 1.12
τ11 ID.Difficulty 1.11
ρ01 ID -0.51
ICC 0.48
N ID 171
N Item 81
Observations 4617
Marginal R2 / Conditional R2 0.074 / 0.515
# test of simple effects

emmeans(modelerror1, ~ Validity * Difficulty) %>%
pairs(., simple = "Validity", reverse = TRUE)
## Difficulty = -0.5:
##  contrast   estimate    SE  df z.ratio p.value
##  0.5 - -0.5     3.06 0.316 Inf  9.666  <.0001 
## 
## Difficulty =  0.5:
##  contrast   estimate    SE  df z.ratio p.value
##  0.5 - -0.5     2.13 0.160 Inf 13.304  <.0001 
## 
## Results are given on the log odds ratio (not the response) scale.
emmeans(modelerror2, ~ Instructions * Difficulty) %>%
pairs(., simple = "Instructions", reverse = TRUE)
## Difficulty = -0.5:
##  contrast   estimate    SE  df z.ratio p.value
##  0.5 - -0.5    0.821 0.303 Inf 2.714   0.0067 
## 
## Difficulty =  0.5:
##  contrast   estimate    SE  df z.ratio p.value
##  0.5 - -0.5    0.303 0.207 Inf 1.466   0.1427 
## 
## Results are given on the log odds ratio (not the response) scale.

Models to analyze correct answer responses

modelcorrect1 <- data %>%
  dplyr::filter(Validity == "Accurate" | Validity == "Inaccurate") %>%
  dplyr::mutate(Validity = ifelse(Validity == "Accurate",.5,-.5)) %>%
  dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",.5,-.5)) %>%
glmer(Correct ~ Difficulty*Validity + (1 + Difficulty + Validity | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))

summary(modelcorrect1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Correct ~ Difficulty * Validity + (1 + Difficulty + Validity |  
##     ID) + (1 | Item)
##    Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##      AIC      BIC   logLik deviance df.resid 
##   8147.0   8225.5  -4062.5   8125.0     9223 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.1623 -0.4492  0.1137  0.4359  6.0528 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr     
##  ID     (Intercept) 2.2486   1.4995            
##         Difficulty  2.0031   1.4153   0.31     
##         Validity    1.2693   1.1266   0.26 0.52
##  Item   (Intercept) 0.5467   0.7394            
## Number of obs: 9234, groups:  ID, 171; Item, 81
## 
## Fixed effects:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           0.7064     0.1472   4.800 1.59e-06 ***
## Difficulty            3.2696     0.2139  15.287  < 2e-16 ***
## Validity              1.6221     0.1121  14.476  < 2e-16 ***
## Difficulty:Validity   0.5220     0.1476   3.537 0.000404 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Dffclt Valdty
## Difficulty  0.178               
## Validity    0.210  0.305        
## Dffclty:Vld 0.117  0.103  0.214
tab_model(modelcorrect1)
  Correct
Predictors Odds Ratios CI p
(Intercept) 2.03 1.52 – 2.70 <0.001
Difficulty 26.30 17.29 – 40.00 <0.001
Validity 5.06 4.07 – 6.31 <0.001
Difficulty * Validity 1.69 1.26 – 2.25 <0.001
Random Effects
σ2 3.29
τ00 ID 2.25
τ00 Item 0.55
τ11 ID.Difficulty 2.00
τ11 ID.Validity 1.27
ρ01 ID.Difficulty 0.31
ρ01 ID.Validity 0.26
ICC 0.52
N ID 171
N Item 81
Observations 9234
Marginal R2 / Conditional R2 0.327 / 0.679
modelcorrect2 <- data %>%
  dplyr::filter(Validity == "Inaccurate") %>%
  dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",.5,-.5)) %>%
  dplyr::mutate(Instructions = ifelse(Instructions == "Accuracy",.5,-.5)) %>%
    glmer(Correct ~ Difficulty*Instructions + (1 + Difficulty | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))

summary(modelcorrect2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Correct ~ Difficulty * Instructions + (1 + Difficulty | ID) +  
##     (1 | Item)
##    Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##      AIC      BIC   logLik deviance df.resid 
##   4419.6   4471.1  -2201.8   4403.6     4609 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0815 -0.4778 -0.1399  0.4650  4.7603 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr
##  ID     (Intercept) 2.1679   1.4724       
##         Difficulty  1.8944   1.3764   0.01
##  Item   (Intercept) 0.4949   0.7035       
## Number of obs: 4617, groups:  ID, 171; Item, 81
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -0.1161     0.1466  -0.792 0.428173    
## Difficulty                3.0790     0.2151  14.311  < 2e-16 ***
## Instructions              0.3672     0.2428   1.512 0.130501    
## Difficulty:Instructions   0.9225     0.2787   3.310 0.000934 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Dffclt Instrc
## Difficulty  -0.002              
## Instructins  0.072  0.006       
## Dffclty:Ins  0.007  0.078  0.009
tab_model(modelcorrect2)
  Correct
Predictors Odds Ratios CI p
(Intercept) 0.89 0.67 – 1.19 0.428
Difficulty 21.74 14.26 – 33.14 <0.001
Instructions 1.44 0.90 – 2.32 0.131
Difficulty * Instructions 2.52 1.46 – 4.34 0.001
Random Effects
σ2 3.29
τ00 ID 2.17
τ00 Item 0.49
τ11 ID.Difficulty 1.89
ρ01 ID 0.01
ICC 0.49
N ID 171
N Item 81
Observations 4617
Marginal R2 / Conditional R2 0.272 / 0.627
# test of simple effects

emmeans(modelcorrect1, ~ Validity * Difficulty) %>%
pairs(., simple = "Validity", reverse = TRUE)
## Difficulty = -0.5:
##  contrast   estimate    SE  df z.ratio p.value
##  0.5 - -0.5     1.36 0.120 Inf 11.317  <.0001 
## 
## Difficulty =  0.5:
##  contrast   estimate    SE  df z.ratio p.value
##  0.5 - -0.5     1.88 0.147 Inf 12.831  <.0001 
## 
## Results are given on the log odds ratio (not the response) scale.
emmeans(modelcorrect2, ~ Instructions * Difficulty) %>%
pairs(., simple = "Instructions", reverse = TRUE)
## Difficulty = -0.5:
##  contrast   estimate    SE  df z.ratio p.value
##  0.5 - -0.5  -0.0941 0.279 Inf -0.337  0.7360 
## 
## Difficulty =  0.5:
##  contrast   estimate    SE  df z.ratio p.value
##  0.5 - -0.5   0.8285 0.281 Inf  2.948  0.0032 
## 
## Results are given on the log odds ratio (not the response) scale.

Incorrect lure responses by between-subj judgment type (Figure 1)

Evaluation_Exp1_Figure1 <- summary %>%
dplyr::filter(Validity == "Accurate" | Validity == "Inaccurate") %>%
group_by(Instructions, Validity, Difficulty) %>%
ggplot(.) + aes(x = reorder(Validity, mean_err), y = mean_err, fill = reorder(Instructions, -mean_err)) +
      geom_bar(stat = "summary", fun.y = "mean", position = "dodge") + 
      xlab("Accuracy of Item") + ylab("Proportion Lure Reported") + labs(fill = "Judgment") +
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black")) +
   geom_errorbar(aes(ymin= mean_err - se_error, ymax= mean_err + se_error), position=position_dodge(width=0.9), width=.1) +  geom_text(aes(label=round(mean_err,digits=2)), position=position_dodge(width=.9), vjust=-2.4) +   scale_fill_manual(values=c("#f0f0f0", "#636363")) + facet_wrap(vars(Difficulty))

print(Evaluation_Exp1_Figure1)

Correct answer responses by between-subj judgment type (Figure 2)

Evaluation_Exp1_Figure2 <- summary %>%
dplyr::filter(Validity == "Accurate" | Validity == "Inaccurate") %>%
group_by(Instructions, Validity, Difficulty) %>%
mutate(Validity = ifelse(Validity == "Accurate", "True","False")) %>%
ggplot(.) + aes(x = reorder(Validity, -mean_corr), y = mean_corr, fill = reorder(Instructions, mean_corr)) +
      geom_bar(stat = "summary", fun.y = "mean", position = "dodge") + 
      xlab("Accuracy of Item") + ylab("Proportion Correct Answer Reported") + labs(fill = "Judgment") +
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black")) +
   geom_errorbar(aes(ymin= mean_corr - se_corr, ymax= mean_corr + se_corr), position=position_dodge(width=0.9), width=.1) +  geom_text(aes(label=round(mean_corr,digits=2)), position=position_dodge(width=.9), vjust=3) +   scale_fill_manual(values=c("#f0f0f0","#636363")) + facet_wrap(vars(Difficulty))

print(Evaluation_Exp1_Figure2)