Load data and function
data <- readr::read_csv("twitter-exp1-long.csv", col_names = TRUE)
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 6480 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): ID, gender, race, social_use, social_time, lookup, comments, Judgm...
## dbl (6): age, Correct, Error, Unsure, Duration (in seconds), Read2
## lgl (3): gender_4_TEXT, race_6_TEXT, social_use_other
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
stdCoef.merMod <- function(object) {
sdy <- sd(getME(object,"y"))
sdx <- apply(getME(object,"X"), 2, sd)
sc <- fixef(object)*sdx/sdy
se.fixef <- coef(summary(object))[,"Std. Error"]
se <- se.fixef*sdx/sdy
return(data.frame(stdcoef=sc, stdse=se))
}
Incorrect lure and correct answer descriptive stats for summaries in Table 1
summary <- data %>%
group_by(ID,Validity,Difficulty) %>%
summarise(
mean_error = mean(Error,na.rm=TRUE),
mean_correct = mean(Correct, na.rm=TRUE),
) %>%
group_by(Validity,Difficulty) %>%
summarise(
mean_err = mean(mean_error),
sd_err = sd(mean_error),
se_error = sd(mean_error)/sqrt(n()),
mean_corr = mean(mean_correct),
sd_corr = sd(mean_correct),
se_corr = sd(mean_correct)/sqrt(n())
)
## `summarise()` has grouped output by 'ID', 'Validity'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Validity'. You can override using the
## `.groups` argument.
print(summary)
## # A tibble: 6 × 8
## # Groups: Validity [3]
## Validity Difficulty mean_err sd_err se_error mean_corr sd_corr se_corr
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FALSE Easy 0.0861 0.112 0.0118 0.673 0.277 0.0292
## 2 FALSE Hard 0.140 0.136 0.0143 0.256 0.272 0.0287
## 3 FILLER Easy 0.0157 0.0351 0.00370 0.765 0.272 0.0287
## 4 FILLER Hard 0.0343 0.0528 0.00557 0.257 0.273 0.0287
## 5 TRUE Easy 0.0102 0.0274 0.00289 0.819 0.275 0.0290
## 6 TRUE Hard 0.0361 0.0500 0.00528 0.333 0.304 0.0320
# supplemental material, response count
data %>%
count(Difficulty,Response) %>%
summarise(
Difficulty = Difficulty,
Response = Response,
n = n,
prop = n/3240
)
## # A tibble: 8 × 4
## Difficulty Response n prop
## <chr> <chr> <int> <dbl>
## 1 Easy Correct 2438 0.752
## 2 Easy Incorrect-Lure 121 0.0373
## 3 Easy Incorrect-Other 154 0.0475
## 4 Easy Unsure 527 0.163
## 5 Hard Correct 914 0.282
## 6 Hard Incorrect-Lure 227 0.0701
## 7 Hard Incorrect-Other 471 0.145
## 8 Hard Unsure 1628 0.502
Models to analyze incorrect lure responses
# set false as referrent condition
data <- within(data, Validity <- as.factor(Validity))
data <- within(data, Validity <- relevel(Validity, ref = "FALSE"))
modelerror1 <- data %>%
dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",-.5,.5)) %>%
glmer(Error ~ Difficulty*Validity + (1 + Difficulty + Validity | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))
## boundary (singular) fit: see help('isSingular')
summary(modelerror1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Error ~ Difficulty * Validity + (1 + Difficulty + Validity |
## ID) + (1 | Item)
## Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
##
## AIC BIC logLik deviance df.resid
## 2086.5 2201.7 -1026.2 2052.5 6463
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5875 -0.1922 -0.1020 -0.0592 13.8836
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 1.4113 1.1880
## Difficulty 0.5953 0.7715 -0.18
## ValidityFILLER 0.6568 0.8104 -0.82 0.13
## ValidityTRUE 0.7850 0.8860 -0.97 0.29 0.92
## Item (Intercept) 2.0439 1.4296
## Number of obs: 6480, groups: ID, 90; Item, 72
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.24495 0.26136 -12.416 < 2e-16 ***
## Difficulty 0.91156 0.43146 2.113 0.0346 *
## ValidityFILLER -1.69648 0.25599 -6.627 3.43e-11 ***
## ValidityTRUE -1.76454 0.24223 -7.285 3.23e-13 ***
## Difficulty:ValidityFILLER 0.02256 0.41430 0.054 0.9566
## Difficulty:ValidityTRUE 0.51484 0.44695 1.152 0.2494
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Dffclt VFILLE VlTRUE D:VFIL
## Difficulty -0.141
## VldtyFILLER -0.277 0.049
## ValidtyTRUE -0.340 0.050 0.353
## Dff:VFILLER 0.035 -0.172 -0.297 -0.080
## Dffcl:VTRUE 0.035 -0.169 -0.082 -0.370 0.266
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# test of simple effects
emmeans(modelerror1, ~ Validity * Difficulty) %>%
pairs(., simple = "Validity", reverse = TRUE)
## Difficulty = -0.5:
## contrast estimate SE df z.ratio p.value
## FILLER - FALSE -1.708 0.374 Inf -4.566 <.0001
## TRUE - FALSE -2.022 0.386 Inf -5.244 <.0001
## TRUE - FILLER -0.314 0.450 Inf -0.698 0.7649
##
## Difficulty = 0.5:
## contrast estimate SE df z.ratio p.value
## FILLER - FALSE -1.685 0.277 Inf -6.074 <.0001
## TRUE - FALSE -1.507 0.262 Inf -5.757 <.0001
## TRUE - FILLER 0.178 0.307 Inf 0.579 0.8313
##
## Results are given on the log odds ratio (not the response) scale.
## P value adjustment: tukey method for comparing a family of 3 estimates
# standardized betas and se
stdCoef.merMod(modelerror1)
## stdcoef stdse
## (Intercept) 0.00000000 0.0000000
## Difficulty 2.02181686 0.9569581
## ValidityFILLER -3.54753107 0.5353142
## ValidityTRUE -3.68985314 0.5065251
## Difficulty:ValidityFILLER 0.02889506 0.5305311
## Difficulty:ValidityTRUE 0.65926797 0.5723404
Models to analyze correct answer responses
modelcorrect1 <- data %>%
dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",.5,-.5)) %>%
glmer(Correct ~ Difficulty*Validity + (1 + Difficulty + Validity | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))
# test of simple effects
emmeans(modelcorrect1, ~ Validity * Difficulty) %>%
pairs(., simple = "Validity", reverse = TRUE)
## Difficulty = -0.5:
## contrast estimate SE df z.ratio p.value
## FILLER - FALSE -0.0298 0.137 Inf -0.217 0.9744
## TRUE - FALSE 0.4822 0.151 Inf 3.184 0.0041
## TRUE - FILLER 0.5120 0.144 Inf 3.546 0.0011
##
## Difficulty = 0.5:
## contrast estimate SE df z.ratio p.value
## FILLER - FALSE 0.7651 0.132 Inf 5.800 <.0001
## TRUE - FALSE 1.4137 0.164 Inf 8.610 <.0001
## TRUE - FILLER 0.6485 0.166 Inf 3.907 0.0003
##
## Results are given on the log odds ratio (not the response) scale.
## P value adjustment: tukey method for comparing a family of 3 estimates
#standardized beta and se
stdCoef.merMod(modelcorrect1)
## stdcoef stdse
## (Intercept) 0.0000000 0.00000000
## Difficulty 2.8313958 0.25004343
## ValidityFILLER 0.3468401 0.08622827
## ValidityTRUE 0.8942719 0.11184599
## Difficulty:ValidityFILLER 0.4592420 0.11436075
## Difficulty:ValidityTRUE 0.5380997 0.12058109
Graph of incorrect lure responses by tweet accuracy
Twitter_Pilot_Figure1 <- summary %>%
group_by(Validity, Difficulty) %>%
ggplot(.) + aes(x = reorder(Difficulty, mean_err), y = mean_err, fill = reorder(Validity, mean_err)) +
geom_bar(stat = "summary", fun.y = "mean", position = "dodge") +
xlab("Difficulty of Item") + ylab("Proportion Lure Reported") + labs(fill = "Accuracy of Item") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black")) +
geom_errorbar(aes(ymin= mean_err - se_error, ymax= mean_err + se_error), position=position_dodge(width=0.9), width=.1) + geom_text(aes(label=round(mean_err,digits=2)), position=position_dodge(width=.9), vjust=-3) + scale_fill_manual(values=c("#f0f0f0","#bdbdbd","#636363"), labels = c("True","Filler","False"))
## Warning: Ignoring unknown parameters: fun.y
print(Twitter_Pilot_Figure1)
## No summary function supplied, defaulting to `mean_se()`
Correct answer responses by tweet accuracy
Twitter_Pilot_Figure2 <- summary %>%
group_by(Validity, Difficulty) %>%
ggplot(.) + aes(x = reorder(Difficulty, -mean_corr), y = mean_corr, fill = reorder(Validity, mean_corr)) +
geom_bar(stat = "summary", fun.y = "mean", position = "dodge") +
xlab("Difficulty of Item") + ylab("Proportion Correct Answer Reported") + labs(fill = "Accuracy of Item") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black")) +
geom_errorbar(aes(ymin= mean_corr - se_corr, ymax= mean_corr + se_corr), position=position_dodge(width=0.9), width=.1) + geom_text(aes(label=round(mean_corr,digits=2)), position=position_dodge(width=.9), vjust=3) + scale_fill_manual(values=c("#f0f0f0","#bdbdbd","#636363"), labels = c("True","Filler","False"))
## Warning: Ignoring unknown parameters: fun.y
print(Twitter_Pilot_Figure2)
## No summary function supplied, defaulting to `mean_se()`
Demographics (incl social media use)
# age
data %>%
summarise(mean(age))
## # A tibble: 1 × 1
## `mean(age)`
## <dbl>
## 1 37.4
# gender
data %>%
count(gender) %>%
mutate(n = n/72)
## # A tibble: 3 × 2
## gender n
## <chr> <dbl>
## 1 Female 28
## 2 Male 60
## 3 Prefer not to answer 2
# prop of sample who uses social media apps
data %>%
mutate(use_twitter = ifelse(grepl("Twitter", social_use),1,0)) %>%
mutate(use_facebook = ifelse(grepl("Facebook", social_use),1,0)) %>%
mutate(use_reddit = ifelse(grepl("Reddit", social_use),1,0)) %>%
mutate(use_youtube = ifelse(grepl("Youtube", social_use),1,0)) %>%
mutate(use_snapchat = ifelse(grepl("Snapchat", social_use),1,0)) %>%
mutate(use_instagram = ifelse(grepl("Instagram", social_use),1,0)) %>%
mutate(use_pinterest = ifelse(grepl("Pinterest", social_use),1,0)) %>%
summarise(
across(use_twitter:use_pinterest, mean))
## # A tibble: 1 × 7
## use_twitter use_facebook use_reddit use_youtube use_snapchat use_instagram
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.578 0.678 0.622 0.811 0.189 0.544
## # … with 1 more variable: use_pinterest <dbl>
# avg number of different apps used
data %>%
mutate(social_use_num = as.numeric((str_count(social_use, ',') + 1))) %>%
summarise(mean(social_use_num))
## # A tibble: 1 × 1
## `mean(social_use_num)`
## <dbl>
## 1 3.67
# count number of hours spent on social per day
data %>%
count(social_time) %>%
mutate(n = n/72) %>%
arrange(n) %>%
mutate(prop = n/90)
## # A tibble: 9 × 3
## social_time n prop
## <chr> <dbl> <dbl>
## 1 More than 6 hours per day 2 0.0222
## 2 5–6 hours per day 3 0.0333
## 3 4–5 hours per day 6 0.0667
## 4 2–3 hours per day 7 0.0778
## 5 3–4 hours per day 10 0.111
## 6 Less than 10 minutes per day 10 0.111
## 7 10–30 minutes per day 16 0.178
## 8 1–2 hours per day 17 0.189
## 9 31–60 minutes per day 19 0.211