Evaluating Tweets Reduces the Influence of Inaccurate Content, But Does “Liking” a Tweet Reflect Evaluation?

Load data and function

data <- readr::read_csv("twitter-exp1-long.csv", col_names = TRUE)

## Warning: One or more parsing issues, see `problems()` for details

## Rows: 6480 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): ID, gender, race, social_use, social_time, lookup, comments, Judgm...
## dbl  (6): age, Correct, Error, Unsure, Duration (in seconds), Read2
## lgl  (3): gender_4_TEXT, race_6_TEXT, social_use_other
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

stdCoef.merMod <- function(object) {
  sdy <- sd(getME(object,"y"))
  sdx <- apply(getME(object,"X"), 2, sd)
  sc <- fixef(object)*sdx/sdy
  se.fixef <- coef(summary(object))[,"Std. Error"]
  se <- se.fixef*sdx/sdy
  return(data.frame(stdcoef=sc, stdse=se))
}

Incorrect lure and correct answer descriptive stats for summaries in Table 1

summary <- data %>%
  group_by(ID,Validity,Difficulty) %>%
  summarise(
    mean_error = mean(Error,na.rm=TRUE),
    mean_correct = mean(Correct, na.rm=TRUE),
  ) %>%
  group_by(Validity,Difficulty) %>%
  summarise(
    mean_err = mean(mean_error),
    sd_err = sd(mean_error),
    se_error = sd(mean_error)/sqrt(n()),
    mean_corr = mean(mean_correct),
    sd_corr = sd(mean_correct),
    se_corr = sd(mean_correct)/sqrt(n())
  )

## `summarise()` has grouped output by 'ID', 'Validity'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Validity'. You can override using the
## `.groups` argument.

print(summary)

## # A tibble: 6 × 8
## # Groups:   Validity [3]
##   Validity Difficulty mean_err sd_err se_error mean_corr sd_corr se_corr
##   <chr>    <chr>         <dbl>  <dbl>    <dbl>     <dbl>   <dbl>   <dbl>
## 1 FALSE    Easy         0.0861 0.112   0.0118      0.673   0.277  0.0292
## 2 FALSE    Hard         0.140  0.136   0.0143      0.256   0.272  0.0287
## 3 FILLER   Easy         0.0157 0.0351  0.00370     0.765   0.272  0.0287
## 4 FILLER   Hard         0.0343 0.0528  0.00557     0.257   0.273  0.0287
## 5 TRUE     Easy         0.0102 0.0274  0.00289     0.819   0.275  0.0290
## 6 TRUE     Hard         0.0361 0.0500  0.00528     0.333   0.304  0.0320

# supplemental material, response count

data %>%
count(Difficulty,Response) %>%
  summarise(
    Difficulty = Difficulty,
    Response = Response,
    n = n,
    prop = n/3240
  )

## # A tibble: 8 × 4
##   Difficulty Response            n   prop
##   <chr>      <chr>           <int>  <dbl>
## 1 Easy       Correct          2438 0.752 
## 2 Easy       Incorrect-Lure    121 0.0373
## 3 Easy       Incorrect-Other   154 0.0475
## 4 Easy       Unsure            527 0.163 
## 5 Hard       Correct           914 0.282 
## 6 Hard       Incorrect-Lure    227 0.0701
## 7 Hard       Incorrect-Other   471 0.145 
## 8 Hard       Unsure           1628 0.502

Models to analyze incorrect lure responses

# set false as referrent condition

data <- within(data, Validity <- as.factor(Validity)) 
data <- within(data, Validity <- relevel(Validity, ref = "FALSE"))

modelerror1 <- data %>%
    dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",-.5,.5)) %>%
    glmer(Error ~ Difficulty*Validity + (1 + Difficulty + Validity | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))

## boundary (singular) fit: see help('isSingular')

summary(modelerror1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Error ~ Difficulty * Validity + (1 + Difficulty + Validity |  
##     ID) + (1 | Item)
##    Data: .
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##      AIC      BIC   logLik deviance df.resid 
##   2086.5   2201.7  -1026.2   2052.5     6463 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5875 -0.1922 -0.1020 -0.0592 13.8836 
## 
## Random effects:
##  Groups Name           Variance Std.Dev. Corr             
##  ID     (Intercept)    1.4113   1.1880                    
##         Difficulty     0.5953   0.7715   -0.18            
##         ValidityFILLER 0.6568   0.8104   -0.82  0.13      
##         ValidityTRUE   0.7850   0.8860   -0.97  0.29  0.92
##  Item   (Intercept)    2.0439   1.4296                    
## Number of obs: 6480, groups:  ID, 90; Item, 72
## 
## Fixed effects:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -3.24495    0.26136 -12.416  < 2e-16 ***
## Difficulty                 0.91156    0.43146   2.113   0.0346 *  
## ValidityFILLER            -1.69648    0.25599  -6.627 3.43e-11 ***
## ValidityTRUE              -1.76454    0.24223  -7.285 3.23e-13 ***
## Difficulty:ValidityFILLER  0.02256    0.41430   0.054   0.9566    
## Difficulty:ValidityTRUE    0.51484    0.44695   1.152   0.2494    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Dffclt VFILLE VlTRUE D:VFIL
## Difficulty  -0.141                            
## VldtyFILLER -0.277  0.049                     
## ValidtyTRUE -0.340  0.050  0.353              
## Dff:VFILLER  0.035 -0.172 -0.297 -0.080       
## Dffcl:VTRUE  0.035 -0.169 -0.082 -0.370  0.266
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

# test of simple effects

emmeans(modelerror1, ~ Validity * Difficulty) %>%
pairs(., simple = "Validity", reverse = TRUE)

## Difficulty = -0.5:
##  contrast       estimate    SE  df z.ratio p.value
##  FILLER - FALSE   -1.708 0.374 Inf  -4.566  <.0001
##  TRUE - FALSE     -2.022 0.386 Inf  -5.244  <.0001
##  TRUE - FILLER    -0.314 0.450 Inf  -0.698  0.7649
## 
## Difficulty =  0.5:
##  contrast       estimate    SE  df z.ratio p.value
##  FILLER - FALSE   -1.685 0.277 Inf  -6.074  <.0001
##  TRUE - FALSE     -1.507 0.262 Inf  -5.757  <.0001
##  TRUE - FILLER     0.178 0.307 Inf   0.579  0.8313
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 3 estimates

# standardized betas and se

stdCoef.merMod(modelerror1)

##                               stdcoef     stdse
## (Intercept)                0.00000000 0.0000000
## Difficulty                 2.02181686 0.9569581
## ValidityFILLER            -3.54753107 0.5353142
## ValidityTRUE              -3.68985314 0.5065251
## Difficulty:ValidityFILLER  0.02889506 0.5305311
## Difficulty:ValidityTRUE    0.65926797 0.5723404

Models to analyze correct answer responses

modelcorrect1 <- data %>%
    dplyr::mutate(Difficulty = ifelse(Difficulty == "Easy",.5,-.5)) %>%
    glmer(Correct ~ Difficulty*Validity + (1 + Difficulty + Validity | ID) + (1 | Item), ., family = binomial, control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=2e5)))

# test of simple effects

emmeans(modelcorrect1, ~ Validity * Difficulty) %>%
pairs(., simple = "Validity", reverse = TRUE)

## Difficulty = -0.5:
##  contrast       estimate    SE  df z.ratio p.value
##  FILLER - FALSE  -0.0298 0.137 Inf  -0.217  0.9744
##  TRUE - FALSE     0.4822 0.151 Inf   3.184  0.0041
##  TRUE - FILLER    0.5120 0.144 Inf   3.546  0.0011
## 
## Difficulty =  0.5:
##  contrast       estimate    SE  df z.ratio p.value
##  FILLER - FALSE   0.7651 0.132 Inf   5.800  <.0001
##  TRUE - FALSE     1.4137 0.164 Inf   8.610  <.0001
##  TRUE - FILLER    0.6485 0.166 Inf   3.907  0.0003
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 3 estimates

#standardized beta and se

stdCoef.merMod(modelcorrect1)

##                             stdcoef      stdse
## (Intercept)               0.0000000 0.00000000
## Difficulty                2.8313958 0.25004343
## ValidityFILLER            0.3468401 0.08622827
## ValidityTRUE              0.8942719 0.11184599
## Difficulty:ValidityFILLER 0.4592420 0.11436075
## Difficulty:ValidityTRUE   0.5380997 0.12058109

Graph of incorrect lure responses by tweet accuracy

Twitter_Pilot_Figure1 <- summary %>%
group_by(Validity, Difficulty) %>%
ggplot(.) + aes(x = reorder(Difficulty, mean_err), y = mean_err, fill = reorder(Validity, mean_err)) +
      geom_bar(stat = "summary", fun.y = "mean", position = "dodge") + 
      xlab("Difficulty of Item") + ylab("Proportion Lure Reported") + labs(fill = "Accuracy of Item") +
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black")) +
   geom_errorbar(aes(ymin= mean_err - se_error, ymax= mean_err + se_error), position=position_dodge(width=0.9), width=.1) +  geom_text(aes(label=round(mean_err,digits=2)), position=position_dodge(width=.9), vjust=-3) +   scale_fill_manual(values=c("#f0f0f0","#bdbdbd","#636363"), labels = c("True","Filler","False"))

## Warning: Ignoring unknown parameters: fun.y

print(Twitter_Pilot_Figure1)

## No summary function supplied, defaulting to `mean_se()`

Correct answer responses by tweet accuracy

Twitter_Pilot_Figure2 <- summary %>%
group_by(Validity, Difficulty) %>%
ggplot(.) + aes(x = reorder(Difficulty, -mean_corr), y = mean_corr, fill = reorder(Validity, mean_corr)) +
      geom_bar(stat = "summary", fun.y = "mean", position = "dodge") + 
      xlab("Difficulty of Item") + ylab("Proportion Correct Answer Reported") + labs(fill = "Accuracy of Item") +
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black")) +
   geom_errorbar(aes(ymin= mean_corr - se_corr, ymax= mean_corr + se_corr), position=position_dodge(width=0.9), width=.1) +  geom_text(aes(label=round(mean_corr,digits=2)), position=position_dodge(width=.9), vjust=3) +   scale_fill_manual(values=c("#f0f0f0","#bdbdbd","#636363"), labels = c("True","Filler","False"))

## Warning: Ignoring unknown parameters: fun.y

print(Twitter_Pilot_Figure2)

## No summary function supplied, defaulting to `mean_se()`

Demographics (incl social media use)

# age

data %>%
  summarise(mean(age))

## # A tibble: 1 × 1
##   `mean(age)`
##         <dbl>
## 1        37.4

# gender

data %>%
  count(gender) %>%
  mutate(n = n/72)

## # A tibble: 3 × 2
##   gender                   n
##   <chr>                <dbl>
## 1 Female                  28
## 2 Male                    60
## 3 Prefer not to answer     2

# prop of sample who uses social media apps

data %>%
  mutate(use_twitter = ifelse(grepl("Twitter", social_use),1,0)) %>%
  mutate(use_facebook = ifelse(grepl("Facebook", social_use),1,0)) %>%
  mutate(use_reddit = ifelse(grepl("Reddit", social_use),1,0)) %>%
  mutate(use_youtube = ifelse(grepl("Youtube", social_use),1,0)) %>%
  mutate(use_snapchat = ifelse(grepl("Snapchat", social_use),1,0)) %>%
  mutate(use_instagram = ifelse(grepl("Instagram", social_use),1,0)) %>%
  mutate(use_pinterest = ifelse(grepl("Pinterest", social_use),1,0)) %>%
  summarise(
    across(use_twitter:use_pinterest, mean))

## # A tibble: 1 × 7
##   use_twitter use_facebook use_reddit use_youtube use_snapchat use_instagram
##         <dbl>        <dbl>      <dbl>       <dbl>        <dbl>         <dbl>
## 1       0.578        0.678      0.622       0.811        0.189         0.544
## # … with 1 more variable: use_pinterest <dbl>

# avg number of different apps used

data %>%
mutate(social_use_num = as.numeric((str_count(social_use, ',') + 1))) %>%
summarise(mean(social_use_num))

## # A tibble: 1 × 1
##   `mean(social_use_num)`
##                    <dbl>
## 1                   3.67

# count number of hours spent on social per day

data %>%
  count(social_time) %>%
  mutate(n = n/72) %>%
  arrange(n) %>%
  mutate(prop = n/90)

## # A tibble: 9 × 3
##   social_time                      n   prop
##   <chr>                        <dbl>  <dbl>
## 1 More than 6 hours per day        2 0.0222
## 2 5–6 hours per day                3 0.0333
## 3 4–5 hours per day                6 0.0667
## 4 2–3 hours per day                7 0.0778
## 5 3–4 hours per day               10 0.111 
## 6 Less than 10 minutes per day    10 0.111 
## 7 10–30 minutes per day           16 0.178 
## 8 1–2 hours per day               17 0.189 
## 9 31–60 minutes per day           19 0.211

Evaluating Tweets Reduces the Influence of Inaccurate Content, But Does “Liking” a Tweet Reflect Evaluation? - Experiment 1