Load data
data <- readr::read_csv("evaluation-exp1-long.csv", col_names = TRUE)
## Parsed with column specification:
## cols(
## ID = col_character(),
## CheckStove = col_character(),
## Check2 = col_double(),
## LookUp = col_character(),
## Age = col_double(),
## Gender = col_character(),
## Education = col_character(),
## Race = col_character(),
## Comments = col_character(),
## Version = col_double(),
## Item = col_character(),
## Response = col_character(),
## Correct = col_double(),
## Error = col_double(),
## Unsure = col_double(),
## Instructions = col_character(),
## Difficulty = col_character(),
## Validity = col_character()
## )
Incorrect lure and correct answer descriptive stats for summaries in Table 1
summary <- data %>%
group_by(ID,Instructions,Validity,Difficulty) %>%
summarise(
mean_error = mean(Error,na.rm=TRUE),
mean_correct = mean(Correct, na.rm=TRUE),
) %>%
group_by(Instructions,Validity,Difficulty) %>%
summarise(
mean_err = mean(mean_error),
sd_err = sd(mean_error),
se_error = sd(mean_error)/sqrt(n()),
mean_corr = mean(mean_correct),
sd_corr = sd(mean_correct),
se_corr = sd(mean_correct)/sqrt(n())
)
## `summarise()` has grouped output by 'ID', 'Instructions', 'Validity'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'Instructions', 'Validity'. You can override using the `.groups` argument.
print(summary)
## # A tibble: 12 x 9
## # Groups: Instructions, Validity [6]
## Instructions Validity Difficulty mean_err sd_err se_error mean_corr sd_corr
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Accuracy Accurate Easy 0.00577 0.0203 0.00228 0.925 0.106
## 2 Accuracy Accurate Hard 0.0321 0.0428 0.00482 0.448 0.276
## 3 Accuracy Filler Easy 0.0191 0.0394 0.00443 0.825 0.197
## 4 Accuracy Filler Hard 0.0483 0.0687 0.00773 0.280 0.245
## 5 Accuracy Inaccurate Easy 0.0788 0.106 0.0119 0.773 0.218
## 6 Accuracy Inaccurate Hard 0.191 0.141 0.0158 0.251 0.248
## 7 Interest Accurate Easy 0.00669 0.0218 0.00227 0.824 0.278
## 8 Interest Accurate Hard 0.0312 0.0429 0.00447 0.477 0.295
## 9 Interest Filler Easy 0.0115 0.0297 0.00309 0.763 0.259
## 10 Interest Filler Hard 0.0424 0.0662 0.00690 0.294 0.254
## 11 Interest Inaccurate Easy 0.155 0.205 0.0214 0.651 0.309
## 12 Interest Inaccurate Hard 0.241 0.193 0.0201 0.259 0.230
## # … with 1 more variable: se_corr <dbl>
Analysis of filler items
# Difference in correct answers for easy and hard filler statements
data %>%
filter(Validity == "Filler") %>%
dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",1,0)) %>%
glmer(Correct ~ Difficulty + (1 + Difficulty | ID) + (1 | Item), ., family = binomial) %>%
summary()
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Correct ~ Difficulty + (1 + Difficulty | ID) + (1 | Item)
## Data: .
##
## AIC BIC logLik deviance df.resid
## 4246.6 4285.3 -2117.3 4234.6 4611
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.0368 -0.4451 0.1485 0.4324 5.4979
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 2.4078 1.5517
## Difficulty 1.2820 1.1323 -0.29
## Item (Intercept) 0.4508 0.6714
## Number of obs: 4617, groups: ID, 171; Item, 81
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.3950 0.1726 -8.084 6.27e-16 ***
## Difficulty 3.3902 0.2031 16.694 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Difficulty -0.533
# Difference in errors between easy and hard filler statements
data %>%
filter(Validity == "Filler") %>%
dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",1,0)) %>%
glmer(Error ~ Difficulty + (1 + Difficulty | ID) + (1 | Item), ., family = binomial) %>%
summary()
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Error ~ Difficulty + (1 + Difficulty | ID) + (1 | Item)
## Data: .
##
## AIC BIC logLik deviance df.resid
## 1053.9 1092.5 -521.0 1041.9 4611
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.1960 -0.1465 -0.0672 -0.0493 8.4412
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 0.9657 0.9827
## Difficulty 0.9725 0.9861 -0.80
## Item (Intercept) 2.9912 1.7295
## Number of obs: 4617, groups: ID, 171; Item, 81
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.5584 0.4021 -11.337 <2e-16 ***
## Difficulty -1.0833 0.5853 -1.851 0.0642 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Difficulty -0.512
Models to analyze incorrect lure responses
modelerror1 <- data %>%
dplyr::filter(Validity == "Accurate" | Validity == "Inaccurate") %>%
dplyr::mutate(Validity = ifelse(Validity == "Accurate",-.5,.5)) %>%
dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",-.5,.5)) %>%
glmer(Error ~ Difficulty*Validity + (1 + Difficulty + Validity | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))
## boundary (singular) fit: see ?isSingular
summary(modelerror1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Error ~ Difficulty * Validity + (1 + Difficulty + Validity |
## ID) + (1 | Item)
## Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
##
## AIC BIC logLik deviance df.resid
## 4206.2 4284.7 -2092.1 4184.2 9223
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6644 -0.2580 -0.1317 -0.0534 10.3142
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 0.7646 0.8744
## Difficulty 1.1070 1.0521 -0.62
## Validity 1.0686 1.0338 0.78 0.01
## Item (Intercept) 1.2982 1.1394
## Number of obs: 9234, groups: ID, 171; Item, 81
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.7955 0.1759 -21.583 < 2e-16 ***
## Difficulty 1.6702 0.3303 5.056 4.29e-07 ***
## Validity 2.5915 0.1864 13.905 < 2e-16 ***
## Difficulty:Validity -0.9270 0.3347 -2.770 0.00561 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Dffclt Valdty
## Difficulty -0.287
## Validity -0.248 0.298
## Dffclty:Vld 0.314 -0.438 -0.596
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(modelerror1)
| Error | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 0.02 | 0.02 – 0.03 | <0.001 |
| Difficulty | 5.31 | 2.78 – 10.15 | <0.001 |
| Validity | 13.35 | 9.27 – 19.24 | <0.001 |
| Difficulty * Validity | 0.40 | 0.21 – 0.76 | 0.006 |
| Random Effects | |||
| σ2 | 3.29 | ||
| τ00 ID | 0.76 | ||
| τ00 Item | 1.30 | ||
| τ11 ID.Difficulty | 1.11 | ||
| τ11 ID.Validity | 1.07 | ||
| ρ01 ID.Difficulty | -0.62 | ||
| ρ01 ID.Validity | 0.78 | ||
| ICC | 0.44 | ||
| N ID | 171 | ||
| N Item | 81 | ||
| Observations | 9234 | ||
| Marginal R2 / Conditional R2 | 0.291 / 0.604 | ||
modelerror2 <- data %>%
dplyr::filter(Validity == "Inaccurate") %>%
dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",-.5,.5)) %>%
dplyr::mutate(Instructions = ifelse(Instructions == "Accuracy",-.5,.5)) %>%
glmer(Error ~ Difficulty*Instructions + (1 + Difficulty | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))
summary(modelerror2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Error ~ Difficulty * Instructions + (1 + Difficulty | ID) + (1 |
## Item)
## Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
##
## AIC BIC logLik deviance df.resid
## 3480.0 3531.5 -1732.0 3464.0 4609
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7395 -0.4015 -0.2280 -0.1064 6.9292
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 1.606 1.267
## Difficulty 1.108 1.053 -0.51
## Item (Intercept) 1.116 1.057
## Number of obs: 4617, groups: ID, 171; Item, 81
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.4876 0.1697 -14.661 < 2e-16 ***
## Difficulty 1.2999 0.2856 4.552 5.31e-06 ***
## Instructions 0.5620 0.2231 2.519 0.0118 *
## Difficulty:Instructions -0.5180 0.2634 -1.967 0.0492 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Dffclt Instrc
## Difficulty -0.188
## Instructins -0.075 0.037
## Dffclty:Ins 0.055 -0.081 -0.415
tab_model(modelerror2)
| Error | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 0.08 | 0.06 – 0.12 | <0.001 |
| Difficulty | 3.67 | 2.10 – 6.42 | <0.001 |
| Instructions | 1.75 | 1.13 – 2.72 | 0.012 |
| Difficulty * Instructions | 0.60 | 0.36 – 1.00 | 0.049 |
| Random Effects | |||
| σ2 | 3.29 | ||
| τ00 ID | 1.61 | ||
| τ00 Item | 1.12 | ||
| τ11 ID.Difficulty | 1.11 | ||
| ρ01 ID | -0.51 | ||
| ICC | 0.48 | ||
| N ID | 171 | ||
| N Item | 81 | ||
| Observations | 4617 | ||
| Marginal R2 / Conditional R2 | 0.074 / 0.515 | ||
# test of simple effects
emmeans(modelerror1, ~ Validity * Difficulty) %>%
pairs(., simple = "Validity", reverse = TRUE)
## Difficulty = -0.5:
## contrast estimate SE df z.ratio p.value
## 0.5 - -0.5 3.06 0.316 Inf 9.666 <.0001
##
## Difficulty = 0.5:
## contrast estimate SE df z.ratio p.value
## 0.5 - -0.5 2.13 0.160 Inf 13.304 <.0001
##
## Results are given on the log odds ratio (not the response) scale.
emmeans(modelerror2, ~ Instructions * Difficulty) %>%
pairs(., simple = "Instructions", reverse = TRUE)
## Difficulty = -0.5:
## contrast estimate SE df z.ratio p.value
## 0.5 - -0.5 0.821 0.303 Inf 2.714 0.0067
##
## Difficulty = 0.5:
## contrast estimate SE df z.ratio p.value
## 0.5 - -0.5 0.303 0.207 Inf 1.466 0.1427
##
## Results are given on the log odds ratio (not the response) scale.
Models to analyze correct answer responses
modelcorrect1 <- data %>%
dplyr::filter(Validity == "Accurate" | Validity == "Inaccurate") %>%
dplyr::mutate(Validity = ifelse(Validity == "Accurate",.5,-.5)) %>%
dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",.5,-.5)) %>%
glmer(Correct ~ Difficulty*Validity + (1 + Difficulty + Validity | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))
summary(modelcorrect1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Correct ~ Difficulty * Validity + (1 + Difficulty + Validity |
## ID) + (1 | Item)
## Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
##
## AIC BIC logLik deviance df.resid
## 8147.0 8225.5 -4062.5 8125.0 9223
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.1623 -0.4492 0.1137 0.4359 6.0528
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 2.2486 1.4995
## Difficulty 2.0031 1.4153 0.31
## Validity 1.2693 1.1266 0.26 0.52
## Item (Intercept) 0.5467 0.7394
## Number of obs: 9234, groups: ID, 171; Item, 81
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7064 0.1472 4.800 1.59e-06 ***
## Difficulty 3.2696 0.2139 15.287 < 2e-16 ***
## Validity 1.6221 0.1121 14.476 < 2e-16 ***
## Difficulty:Validity 0.5220 0.1476 3.537 0.000404 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Dffclt Valdty
## Difficulty 0.178
## Validity 0.210 0.305
## Dffclty:Vld 0.117 0.103 0.214
tab_model(modelcorrect1)
| Correct | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 2.03 | 1.52 – 2.70 | <0.001 |
| Difficulty | 26.30 | 17.29 – 40.00 | <0.001 |
| Validity | 5.06 | 4.07 – 6.31 | <0.001 |
| Difficulty * Validity | 1.69 | 1.26 – 2.25 | <0.001 |
| Random Effects | |||
| σ2 | 3.29 | ||
| τ00 ID | 2.25 | ||
| τ00 Item | 0.55 | ||
| τ11 ID.Difficulty | 2.00 | ||
| τ11 ID.Validity | 1.27 | ||
| ρ01 ID.Difficulty | 0.31 | ||
| ρ01 ID.Validity | 0.26 | ||
| ICC | 0.52 | ||
| N ID | 171 | ||
| N Item | 81 | ||
| Observations | 9234 | ||
| Marginal R2 / Conditional R2 | 0.327 / 0.679 | ||
modelcorrect2 <- data %>%
dplyr::filter(Validity == "Inaccurate") %>%
dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",.5,-.5)) %>%
dplyr::mutate(Instructions = ifelse(Instructions == "Accuracy",.5,-.5)) %>%
glmer(Correct ~ Difficulty*Instructions + (1 + Difficulty | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))
summary(modelcorrect2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Correct ~ Difficulty * Instructions + (1 + Difficulty | ID) +
## (1 | Item)
## Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
##
## AIC BIC logLik deviance df.resid
## 4419.6 4471.1 -2201.8 4403.6 4609
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0815 -0.4778 -0.1399 0.4650 4.7603
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 2.1679 1.4724
## Difficulty 1.8944 1.3764 0.01
## Item (Intercept) 0.4949 0.7035
## Number of obs: 4617, groups: ID, 171; Item, 81
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1161 0.1466 -0.792 0.428173
## Difficulty 3.0790 0.2151 14.311 < 2e-16 ***
## Instructions 0.3672 0.2428 1.512 0.130501
## Difficulty:Instructions 0.9225 0.2787 3.310 0.000934 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Dffclt Instrc
## Difficulty -0.002
## Instructins 0.072 0.006
## Dffclty:Ins 0.007 0.078 0.009
tab_model(modelcorrect2)
| Correct | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 0.89 | 0.67 – 1.19 | 0.428 |
| Difficulty | 21.74 | 14.26 – 33.14 | <0.001 |
| Instructions | 1.44 | 0.90 – 2.32 | 0.131 |
| Difficulty * Instructions | 2.52 | 1.46 – 4.34 | 0.001 |
| Random Effects | |||
| σ2 | 3.29 | ||
| τ00 ID | 2.17 | ||
| τ00 Item | 0.49 | ||
| τ11 ID.Difficulty | 1.89 | ||
| ρ01 ID | 0.01 | ||
| ICC | 0.49 | ||
| N ID | 171 | ||
| N Item | 81 | ||
| Observations | 4617 | ||
| Marginal R2 / Conditional R2 | 0.272 / 0.627 | ||
# test of simple effects
emmeans(modelcorrect1, ~ Validity * Difficulty) %>%
pairs(., simple = "Validity", reverse = TRUE)
## Difficulty = -0.5:
## contrast estimate SE df z.ratio p.value
## 0.5 - -0.5 1.36 0.120 Inf 11.317 <.0001
##
## Difficulty = 0.5:
## contrast estimate SE df z.ratio p.value
## 0.5 - -0.5 1.88 0.147 Inf 12.831 <.0001
##
## Results are given on the log odds ratio (not the response) scale.
emmeans(modelcorrect2, ~ Instructions * Difficulty) %>%
pairs(., simple = "Instructions", reverse = TRUE)
## Difficulty = -0.5:
## contrast estimate SE df z.ratio p.value
## 0.5 - -0.5 -0.0941 0.279 Inf -0.337 0.7360
##
## Difficulty = 0.5:
## contrast estimate SE df z.ratio p.value
## 0.5 - -0.5 0.8285 0.281 Inf 2.948 0.0032
##
## Results are given on the log odds ratio (not the response) scale.
Incorrect lure responses by between-subj judgment type (Figure 1)
Evaluation_Exp1_Figure1 <- summary %>%
dplyr::filter(Validity == "Accurate" | Validity == "Inaccurate") %>%
group_by(Instructions, Validity, Difficulty) %>%
ggplot(.) + aes(x = reorder(Validity, mean_err), y = mean_err, fill = reorder(Instructions, -mean_err)) +
geom_bar(stat = "summary", fun.y = "mean", position = "dodge") +
xlab("Accuracy of Item") + ylab("Proportion Lure Reported") + labs(fill = "Judgment") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black")) +
geom_errorbar(aes(ymin= mean_err - se_error, ymax= mean_err + se_error), position=position_dodge(width=0.9), width=.1) + geom_text(aes(label=round(mean_err,digits=2)), position=position_dodge(width=.9), vjust=-2.4) + scale_fill_manual(values=c("#f0f0f0", "#636363")) + facet_wrap(vars(Difficulty))
print(Evaluation_Exp1_Figure1)
Correct answer responses by between-subj judgment type (Figure 2)
Evaluation_Exp1_Figure2 <- summary %>%
dplyr::filter(Validity == "Accurate" | Validity == "Inaccurate") %>%
group_by(Instructions, Validity, Difficulty) %>%
mutate(Validity = ifelse(Validity == "Accurate", "True","False")) %>%
ggplot(.) + aes(x = reorder(Validity, -mean_corr), y = mean_corr, fill = reorder(Instructions, mean_corr)) +
geom_bar(stat = "summary", fun.y = "mean", position = "dodge") +
xlab("Accuracy of Item") + ylab("Proportion Correct Answer Reported") + labs(fill = "Judgment") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black")) +
geom_errorbar(aes(ymin= mean_corr - se_corr, ymax= mean_corr + se_corr), position=position_dodge(width=0.9), width=.1) + geom_text(aes(label=round(mean_corr,digits=2)), position=position_dodge(width=.9), vjust=3) + scale_fill_manual(values=c("#f0f0f0","#636363")) + facet_wrap(vars(Difficulty))
print(Evaluation_Exp1_Figure2)