MPA 2026 - Experts and Everyone Else: The Evaluation of Mental Health Information Online

Author

Heather Perkins, Shin Loubriel, & Sophia Sullivan

Presentation

Code

Load Libraries

Code

library(psych)
library(kableExtra)
library(ggplot2)
library(ggrepel)
library(tidyr)
library(dplyr)
# library(gtools)
library(nFactors)
library(corrplot)
# library(sjPlot)
library(afex)
library(naniar)
library(stringr)
library(emmeans)
library(lavaan)
library(sjPlot)
library(purrr)
library(DT)
library(paletteer)
library(tidyverse)

Load Data

Code

df1 <- read.csv(file="data/Learning About Psychology (SONA)_August 22, 2025_20.13.csv", header=T, na.strings = c(""," "))
labels1 <- data.frame(t(cbind(df1[1,],samp="Sample Source")))
df1 <- df1[-c(1,2), ]
df1 <- subset(df1, Progress == "100", select=-c(4,14:15))
df1$samp <- "SONA"
colnames(df1)[250] <- "race"
colnames(df1)[252] <- "eth"

df2 <- read.csv(file="data/Learning About Psychology (Prolific)_August 22, 2025_20.14.csv", header=T, na.strings = c(""," "))
labels2 <- data.frame(t(cbind(df2[1,],samp="Sample Source")))
df2 <- df2[-c(1,2), ]
# df2_quality <- subset(df2, select=c(8,173:175))
df2 <- subset(df2, Progress == "100" & Q1.1 == 2 & Q2 < 4, select=-c(4,14:15))
df2$id <- df2$ResponseId
df2$samp <- "Prolific"
colnames(df2)[253] <- "race"
colnames(df2)[255] <- "eth"

common_cols <- intersect(names(df1), names(df2))

df <- bind_rows(
  select(df1, all_of(common_cols)),
  select(df2, all_of(common_cols))
)

flag <- labels1 %>%
  filter(grepl("truthful", X1))
flag2 <- rownames(flag)

a_list <- c("23","34","36","38","48","52","56","60","64","70","72","76","80","84","88")
m_list <- c("40","42","44","46","50","54","58","62","66","68","74","78","82","86","90")

# Identify target columns
target_cols <- grep(
  paste(c(a_list, m_list), collapse = "|"),
  names(df),
  value = TRUE
)

# Recode only for Prolific participants
df[df$samp == "Prolific", target_cols] <- lapply(
  df[df$samp == "Prolific", target_cols],
  function(x) {
    x <- as.numeric(x)
    x[x == 4] <- 3
    x[x == 5] <- 4
    x
  }
)

Missing Data

Code

d <- subset(df, select=c(15:79,200:248))

# total percentage of missing data
# mean(is.na(d))

# total number of participants (rows) with any missing data
# (sum(rowSums(is.na(d)) > 0))/nrow(d)

d <- na.omit(subset(df, select=c(id,15:79,200:248)))
backup <- df
df <- subset(df, df$id %in% d$id)

Attention Checks

Code

# attention check
attn <- subset(labels1, grepl("attention", X1))
attn_chk <- rownames(attn)

# Q18_12 == 4
df$Q18_12 <- as.numeric(df$Q18_12)
# table(df$Q18_12)

# Q28_4 > 9
df$Q28_4 <- as.numeric(df$Q28_4)
# table(df$Q28_4)
# table(df$Q28_4 <= 9, useNA = "ifany")

backup <- df
df <- subset(df, df$Q28_4 > 9 & Q18_12 == 4)

Demographics

Code

# table(df$Q4, df$samp, useNA = "always")
# table(df$race, df$samp, useNA = "always")

# ── Recode lookups ──────────────────────────────────────────────────────────
race_labels <- c(
  "1" = "American Indian or Alaska Native",
  "2" = "Asian",
  "3" = "Black or African American",
  "4" = "Hispanic or Spanish Origin",
  "5" = "Latino, Latina, or Latinx",
  "6" = "Middle Eastern or North African",
  "7" = "Native Hawaiian or Other Pacific Islander",
  "8" = "White",
  "9" = "Another race/ethnicity not listed"
)

# ── Gender: recode to Woman/Man only, drop rest ──────────────────────────────
expand_gender_grouped <- function(col, samp) {
  tibble(raw = col, samp = samp) %>%
    filter(!is.na(raw)) %>%
    mutate(
      raw = str_replace_all(raw, "\\s", ""),
      response = case_when(
        str_detect(raw, "\\b1\\b") ~ "Woman",
        str_detect(raw, "\\b2\\b") ~ "Man"
      )
    ) %>%
    filter(!is.na(response)) %>%
    count(samp, response) %>%
    group_by(samp) %>%
    mutate(pct = n / sum(n) * 100) %>%
    ungroup()
}

# ── Race: single selection = recode label, multiple = "Multiple options selected"
expand_race_grouped <- function(col, samp, labels) {
  tibble(raw = col, samp = samp) %>%
    filter(!is.na(raw)) %>%
    mutate(
      raw = str_replace_all(raw, "\\s", ""),
      response = if_else(
        str_detect(raw, ","),
        "Multiple options selected",
        recode(raw, !!!labels)
      )
    ) %>%
    count(samp, response) %>%
    group_by(samp) %>%
    mutate(pct = n / sum(n) * 100) %>%
    ungroup()
}

# ── Build count tables ───────────────────────────────────────────────────────
gender_counts <- expand_gender_grouped(df$Q4,   df$samp)
race_counts   <- expand_race_grouped(df$race, df$samp, race_labels)

# ── N per group ───────────────────────────────────────────────────────────────
group_ns <- df %>%
  filter(!is.na(samp)) %>%
  count(samp) %>%
  mutate(label = paste0(samp, " (n=", n, ")"))

# ── Plot function ─────────────────────────────────────────────────────────────
demo_bar_grouped <- function(counts, title, group_ns, show_legend = TRUE,
                             samp_names = NULL) {

  samp_levels <- as.character(group_ns$samp)

  samp_labels <- if (!is.null(samp_names)) {
    paste0(samp_names, " (n=", group_ns$n, ")")
  } else {
    paste0(group_ns$samp, " (n=", group_ns$n, ")")
  }

  fill_colors  <- c("#5B8DB8", "#E07B54")[seq_along(samp_levels)]
  names(fill_colors) <- samp_levels

  counts %>%
    mutate(
      response = fct_reorder(response, n, .fun = sum),
      samp     = factor(samp, levels = samp_levels)
    ) %>%
    ggplot(aes(x = n, y = response, fill = samp)) +
    geom_col(position = position_dodge2(width = 0.7, preserve = "single"),
             width = 0.65) +
    geom_text(
      aes(label = paste0(n, " (", round(pct, 0), "%)")),
      position = position_dodge2(width = 0.7, preserve = "single"),
      hjust = -0.1, size = 3
    ) +
    scale_x_continuous(
      expand = expansion(mult = c(0, 0)),
      limits = c(0, max(counts$n) * 1.35)
    ) +
    scale_fill_manual(values = fill_colors, labels = samp_labels) +
    labs(
      title    = title,
      x        = NULL,
      y        = NULL,
      fill     = NULL
    ) +
    theme_minimal(base_size = 13) +
    theme(
      plot.title          = element_text(face = "bold", hjust = 1),
      plot.title.position = "plot",
      panel.grid.major.y  = element_blank(),
      legend.position     = if (show_legend) "top" else "none"
    )
}

# ── Draw plots ────────────────────────────────────────────────────────────────
p_gender <- demo_bar_grouped(gender_counts, "Gender Identity",  group_ns,
                             samp_names = c("Non-College", "Students"))
p_race   <- demo_bar_grouped(race_counts,   "Race / Ethnicity", group_ns,
                             show_legend = FALSE,
                             samp_names = c("Online Sample", "Student Sample"))

print(p_gender)

Code

print(p_race)

Code

# ── Optional: save ────────────────────────────────────────────────────────────
# ggsave("gender_grouped.png", p_gender, width = 5.29, height = 2.15, dpi = 300)
# ggsave("race_grouped.png",   p_race,   width = 5.29, height = 4.35, dpi = 300)

Measure Checking

Source Credibility

EFA

Code

describe(subset(df, select=c(grep("Q27_",colnames(df)), grep("Q94_",colnames(df)))), type=2)

       vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
Q27_1*    1 184 3.92 1.18      4    3.95 1.48   1   6     5 -0.23    -0.26 0.09
Q27_2*    2 184 3.94 1.44      4    3.95 1.48   1   7     6 -0.05    -0.41 0.11
Q27_3*    3 184 3.87 1.36      4    3.86 1.48   1   7     6 -0.02    -0.22 0.10
Q27_4*    4 184 4.64 1.43      5    4.66 1.48   1   7     6 -0.25    -0.55 0.11
Q27_5*    5 184 5.01 1.36      5    5.07 1.48   1   7     6 -0.37    -0.26 0.10
Q27_6*    6 184 3.98 1.21      4    3.99 1.48   1   6     5 -0.12    -0.25 0.09
Q94_1*    7 184 4.70 1.37      5    4.71 1.48   1   7     6 -0.19    -0.57 0.10
Q94_2*    8 184 4.41 1.50      4    4.43 1.48   1   7     6 -0.10    -0.57 0.11
Q94_3*    9 184 4.59 1.32      5    4.63 1.48   1   7     6 -0.28    -0.16 0.10

Code

df[paste0("Q27_", 1:6)] <- lapply(df[paste0("Q27_", 1:6)], as.numeric)
df[paste0("Q94_", 1:3)] <- lapply(df[paste0("Q94_", 1:3)], as.numeric)

d <- subset(df, select=c(grep("Q27_", colnames(df)),grep("Q94_", colnames(df))))

# mapping of labels to text
labels <- c(
  Q27_1 = "Unintelligent : Intelligent",
  Q27_2 = "Untrained : Trained",
  Q27_3 = "Inexpert : Expert",
  Q27_4 = "Uninformed : Informed",
  Q27_5 = "Incompetent : Competent",
  Q27_6 = "Stupid : Bright",
  Q94_1 = "Dishonest : Honest",
  Q94_2 = "Untrustworthy : Trustworthy",
  Q94_3 = "Dishonorable : Honorable"
)

# shorten labels to max 40 characters (you can adjust the number)
labels <- str_trunc(labels, width = 60, side = "right")

# replace column names
names(d) <- ifelse(names(d) %in% names(labels),
                           labels[names(d)],
                           names(d))


ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

Code

EFA <- factanal(d, factors = 1, rotation = "promax")
print(EFA, digits=3, cutoff=.4, sort=T)


Call:
factanal(x = d, factors = 1, rotation = "promax")

Uniquenesses:
Unintelligent : Intelligent         Untrained : Trained 
                      0.287                       0.353 
          Inexpert : Expert       Uninformed : Informed 
                      0.392                       0.226 
    Incompetent : Competent             Stupid : Bright 
                      0.280                       0.283 
         Dishonest : Honest Untrustworthy : Trustworthy 
                      0.224                       0.205 
   Dishonorable : Honorable 
                      0.269 

Loadings:
[1] 0.844 0.804 0.780 0.880 0.849 0.847 0.881 0.892 0.855

               Factor1
SS loadings       6.48
Proportion Var    0.72

Test of the hypothesis that 1 factor is sufficient.
The chi square statistic is 298.38 on 27 degrees of freedom.
The p-value is 1.52e-47

Code

EFA <- factanal(d, factors = 2, rotation = "promax")
print(EFA, digits=3, cutoff=.4, sort=T)


Call:
factanal(x = d, factors = 2, rotation = "promax")

Uniquenesses:
Unintelligent : Intelligent         Untrained : Trained 
                      0.217                       0.372 
          Inexpert : Expert       Uninformed : Informed 
                      0.408                       0.200 
    Incompetent : Competent             Stupid : Bright 
                      0.220                       0.202 
         Dishonest : Honest Untrustworthy : Trustworthy 
                      0.155                       0.055 
   Dishonorable : Honorable 
                      0.234 

Loadings:
                            Factor1 Factor2
Unintelligent : Intelligent  0.916         
Uninformed : Informed        0.753         
Incompetent : Competent      0.859         
Stupid : Bright              0.917         
Dishonest : Honest                   0.835 
Untrustworthy : Trustworthy          1.019 
Dishonorable : Honorable             0.731 
Untrained : Trained          0.465         
Inexpert : Expert            0.497         

               Factor1 Factor2
SS loadings      3.489   2.526
Proportion Var   0.388   0.281
Cumulative Var   0.388   0.668

Factor Correlations:
        Factor1 Factor2
Factor1   1.000   0.829
Factor2   0.829   1.000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 156.68 on 19 degrees of freedom.
The p-value is 1.12e-23

Code

d <- subset(df, select=c(grep("Q27_", colnames(df)),grep("Q94_", colnames(df))))
d <- subset(d, select=-c(Q27_2, Q27_3))

# mapping of labels to text
labels <- c(
  Q27_1 = "Unintelligent : Intelligent",
  Q27_4 = "Uninformed : Informed",
  Q27_5 = "Incompetent : Competent",
  Q27_6 = "Stupid : Bright",
  Q94_1 = "Dishonest : Honest",
  Q94_2 = "Untrustworthy : Trustworthy",
  Q94_3 = "Dishonorable : Honorable"
)

# shorten labels to max 40 characters (you can adjust the number)
labels <- str_trunc(labels, width = 60, side = "right")

# replace column names
names(d) <- ifelse(names(d) %in% names(labels),
                           labels[names(d)],
                           names(d))


ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

Code

EFA <- factanal(d, factors = 1, rotation = "promax")
print(EFA, digits=3, cutoff=.4, sort=T)


Call:
factanal(x = d, factors = 1, rotation = "promax")

Uniquenesses:
Unintelligent : Intelligent       Uninformed : Informed 
                      0.305                       0.241 
    Incompetent : Competent             Stupid : Bright 
                      0.271                       0.268 
         Dishonest : Honest Untrustworthy : Trustworthy 
                      0.205                       0.204 
   Dishonorable : Honorable 
                      0.251 

Loadings:
[1] 0.834 0.871 0.854 0.855 0.892 0.892 0.866

               Factor1
SS loadings      5.256
Proportion Var   0.751

Test of the hypothesis that 1 factor is sufficient.
The chi square statistic is 167.68 on 14 degrees of freedom.
The p-value is 2.02e-28

Code

EFA <- factanal(d, factors = 2, rotation = "promax")
print(EFA, digits=3, cutoff=.4, sort=T)


Call:
factanal(x = d, factors = 2, rotation = "promax")

Uniquenesses:
Unintelligent : Intelligent       Uninformed : Informed 
                      0.245                       0.211 
    Incompetent : Competent             Stupid : Bright 
                      0.202                       0.172 
         Dishonest : Honest Untrustworthy : Trustworthy 
                      0.130                       0.086 
   Dishonorable : Honorable 
                      0.218 

Loadings:
                            Factor1 Factor2
Unintelligent : Intelligent  0.823         
Uninformed : Informed        0.729         
Incompetent : Competent      0.853         
Stupid : Bright              0.918         
Dishonest : Honest                   0.849 
Untrustworthy : Trustworthy          0.933 
Dishonorable : Honorable             0.730 

               Factor1 Factor2
SS loadings      2.826   2.166
Proportion Var   0.404   0.309
Cumulative Var   0.404   0.713

Factor Correlations:
        Factor1 Factor2
Factor1   1.000   0.795
Factor2   0.795   1.000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 22.81 on 8 degrees of freedom.
The p-value is 0.00362

Plots

Histograms

Code

labels <- c(
  Q27_1 = "Unintelligent : Intelligent",
  Q27_2 = "Untrained : Trained",
  Q27_3 = "Inexpert : Expert",
  Q27_4 = "Uninformed : Informed",
  Q27_5 = "Incompetent : Competent",
  Q27_6 = "Stupid : Bright",
  Q94_1 = "Dishonest : Honest",
  Q94_2 = "Untrustworthy : Trustworthy",
  Q94_3 = "Dishonorable : Honorable"
)

# Reshape to long format
df_long <- df %>%
  pivot_longer(
    cols = matches("Q27_|Q94_"),
    names_to = "item",
    values_to = "response"
  )

# Plot histograms for all items
ggplot(df_long, aes(x = response, fill = item)) +
  geom_histogram(alpha = 0.8, bins = 6, color = "white", linewidth = 0.3) +
  facet_wrap(~ item, labeller = as_labeller(labels)) +
  scale_fill_paletteer_d("MetBrewer::Cross") +
  labs(
    x = "Response",
    y = "Count",
    title = "Distributions of Competence and Trustworthiness Ratings"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    strip.text = element_text(face = "bold"),
    strip.background = element_rect(fill = "#f0f0f0", color = NA),
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    axis.text = element_text(color = "gray30"),
    text = element_text(size = 10)
  )

Code

ggsave("hist_source_cred.png", width = 5.29, height = 5.29, dpi = 300)

Calculation

Code

df$source_comp <- scale(rowMeans(df[, paste0("Q27_", c(1, 4, 5, 6))], na.rm = TRUE), center=T, scale=T)
df$source_exp <- scale(rowMeans(df[, paste0("Q27_", c(2, 3))], na.rm = TRUE), center=T, scale=T)
df$source_trust <- scale(rowMeans(df[, paste0("Q94_", 1:3)], na.rm = TRUE), center=T, scale=T)

Third Person Effect

Item Analysis

Code

describe(subset(df, select=c(grep("Q28_",colnames(df)))))

       vars   n  mean   sd median trimmed  mad min max range  skew kurtosis
Q28_1*    1 184  8.01 2.89      8    8.18 2.97   1  12    11 -0.46    -0.74
Q28_2*    2 184  7.41 3.00      8    7.57 2.97   1  12    11 -0.40    -0.79
Q28_3*    3 184  8.30 3.06      9    8.59 2.97   1  12    11 -0.68    -0.64
Q28_4     4 184 10.68 0.47     11   10.72 0.00  10  11     1 -0.76    -1.43
Q28_5*    5 184  7.93 2.68      9    8.28 1.48   1  11    10 -1.04     0.11
         se
Q28_1* 0.21
Q28_2* 0.22
Q28_3* 0.23
Q28_4  0.03
Q28_5* 0.20

Code

df$Q28_1 <- as.numeric(df$Q28_1)
df$Q28_2 <- as.numeric(df$Q28_2)
df$Q28_3 <- as.numeric(df$Q28_3)
df$Q28_5 <- as.numeric(df$Q28_5)

d <- na.omit(subset(df, select=c(grep("Q28",colnames(df)))))
d <- subset(d, select=-c(Q28_4))

cor_out <- corr.test(d)
cor_out

Call:corr.test(x = d)
Correlation matrix 
      Q28_1 Q28_2 Q28_3 Q28_5
Q28_1  1.00  0.68  0.79  0.49
Q28_2  0.68  1.00  0.66  0.50
Q28_3  0.79  0.66  1.00  0.54
Q28_5  0.49  0.50  0.54  1.00
Sample Size 
[1] 184
Probability values (Entries above the diagonal are adjusted for multiple tests.) 
      Q28_1 Q28_2 Q28_3 Q28_5
Q28_1     0     0     0     0
Q28_2     0     0     0     0
Q28_3     0     0     0     0
Q28_5     0     0     0     0

 To see confidence intervals of the correlations, print with the short=FALSE option

Code

df$tpe_self <- (df$Q28_1 + df$Q28_2)/2
df$tpe_othr <- (df$Q28_3 + df$Q28_5)/2
df <- df %>%
  mutate(Q28_6 = case_when(
    Q28_1 == Q28_3 ~ 0,
    Q28_1 <  Q28_3 ~ 1,
    Q28_1 >  Q28_3 ~ -1
  )) %>%
  mutate(Q28_7 = case_when(
    Q28_2 == Q28_5 ~ 0,
    Q28_2 <  Q28_5 ~ 1,
    Q28_2 >  Q28_5 ~ -1
  ))
df$tpe_diff <- (df$Q28_6 + df$Q28_7)

Difference Heatmaps

Code

tab <- as.data.frame(table(df$Q28_2, df$Q28_5))
names(tab) <- c("Q28_2", "Q28_5", "Freq")
tab$Q28_2 <- as.numeric(as.character(tab$Q28_2))
tab$Q28_5 <- as.numeric(as.character(tab$Q28_5))
tab$agreement <- ifelse(tab$Q28_2 > tab$Q28_5, "more self",
                        ifelse(tab$Q28_2 < tab$Q28_5, "more other", "same"))
tab2 <- subset(tab, Freq > 0)

ggplot(tab2, aes(Q28_2, Q28_5, fill = Freq)) +
  geom_tile(color = "white") +
  geom_tile(
    data = subset(tab2, agreement == "same"),
    fill = "black", alpha = 0.3
  ) +
  geom_tile(
    data = subset(tab2, agreement == "more other"),
    aes(alpha = Freq),
    fill = "red"
  ) +
  geom_tile(
    data = subset(tab2, agreement == "more self"),
    aes(alpha = Freq),
    fill = "blue"
  ) +
  scale_alpha(range = c(0.05, 0.7), guide = "none") +
  scale_fill_gradient(low = "white", high = "black") +
  coord_equal() +
  theme_minimal() +
  theme(
    panel.grid = element_blank(),
    panel.background = element_blank(),
    plot.background = element_blank()
  ) +
  labs(title = "Effect of Posts on Attitudes", x = str_wrap("You Influenced", width = 40), y = str_wrap("Average Person Influenced", width = 40)) +
  scale_x_continuous(
  breaks = -1:12,
  expand = c(0, 0),
  limits = c(-1, 12)
  ) +
  scale_y_continuous(
  breaks = -1:12,
  expand = c(0, 0),
  limits = c(-1, 12)
  ) +
  theme_minimal() +
  theme(
    panel.grid.major = element_line(color = "gray90", size = 0.3),
    panel.grid.minor = element_blank(),
    legend.position = "none"
  )

Code

# ggsave("tpe_attitudes.png", width = 5.38, height = 5.38, dpi = 300)

SciPop

Factor Analysis

Code

df$Q29_1 <- as.numeric(df$Q29_1)
df$Q29_2 <- as.numeric(df$Q29_2)
df$Q29_3 <- as.numeric(df$Q29_3)
df$Q29_4 <- as.numeric(df$Q29_4)
df$Q29_5 <- as.numeric(df$Q29_5)
df$Q29_6 <- as.numeric(df$Q29_6)
df$Q29_7 <- as.numeric(df$Q29_7)
df$Q29_8 <- as.numeric(df$Q29_8)

describe(subset(df, select=c(grep("Q29_",colnames(df)))))

      vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
Q29_1    1 184 2.86 0.66      3    2.86 0.00   1   4     3 -0.51     0.69 0.05
Q29_2    2 184 2.95 0.67      3    2.96 0.00   1   4     3 -0.27     0.07 0.05
Q29_3    3 184 1.77 0.80      2    1.68 1.48   1   4     3  0.76    -0.14 0.06
Q29_4    4 184 2.02 0.95      2    1.93 1.48   1   4     3  0.53    -0.72 0.07
Q29_5    5 184 2.27 0.95      2    2.21 1.48   1   4     3  0.14    -0.98 0.07
Q29_6    6 184 2.28 0.96      2    2.23 1.48   1   4     3  0.15    -1.00 0.07
Q29_7    7 184 2.09 0.91      2    2.03 1.48   1   4     3  0.34    -0.86 0.07
Q29_8    8 184 1.87 0.88      2    1.78 1.48   1   4     3  0.63    -0.60 0.07

Code

# Specify CFA model
model <- '
  ppl =~ Q29_1 + Q29_2
  eli =~ Q29_3 + Q29_4
  dec =~ Q29_5 + Q29_6
  tru =~ Q29_7 + Q29_8
'

# Fit CFA model
fit <- cfa(model, data = df, std.lv = TRUE)

# View summary with fit indices and standardized loadings
summary(fit, fit.measures = TRUE, standardized = TRUE)

lavaan 0.6-21 ended normally after 29 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        22

  Number of observations                           184

Model Test User Model:
                                                      
  Test statistic                                24.826
  Degrees of freedom                                14
  P-value (Chi-square)                           0.036

Model Test Baseline Model:

  Test statistic                               490.930
  Degrees of freedom                                28
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.977
  Tucker-Lewis Index (TLI)                       0.953

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -1594.557
  Loglikelihood unrestricted model (H1)      -1582.144
                                                      
  Akaike (AIC)                                3233.114
  Bayesian (BIC)                              3303.843
  Sample-size adjusted Bayesian (SABIC)       3234.163

Root Mean Square Error of Approximation:

  RMSEA                                          0.065
  90 Percent confidence interval - lower         0.016
  90 Percent confidence interval - upper         0.106
  P-value H_0: RMSEA <= 0.050                    0.251
  P-value H_0: RMSEA >= 0.080                    0.301

Standardized Root Mean Square Residual:

  SRMR                                           0.036

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ppl =~                                                                
    Q29_1             0.511    0.205    2.497    0.013    0.511    0.774
    Q29_2             0.252    0.110    2.296    0.022    0.252    0.376
  eli =~                                                                
    Q29_3             0.640    0.059   10.910    0.000    0.640    0.803
    Q29_4             0.704    0.070   10.125    0.000    0.704    0.746
  dec =~                                                                
    Q29_5             0.808    0.067   12.023    0.000    0.808    0.856
    Q29_6             0.801    0.068   11.703    0.000    0.801    0.835
  tru =~                                                                
    Q29_7             0.747    0.063   11.824    0.000    0.747    0.820
    Q29_8             0.692    0.061   11.268    0.000    0.692    0.785

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ppl ~~                                                                
    eli              -0.067    0.109   -0.614    0.539   -0.067   -0.067
    dec               0.152    0.111    1.363    0.173    0.152    0.152
    tru               0.154    0.115    1.346    0.178    0.154    0.154
  eli ~~                                                                
    dec               0.532    0.074    7.213    0.000    0.532    0.532
    tru               0.726    0.062   11.735    0.000    0.726    0.726
  dec ~~                                                                
    tru               0.628    0.064    9.811    0.000    0.628    0.628

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .Q29_1             0.175    0.206    0.848    0.396    0.175    0.401
   .Q29_2             0.385    0.064    6.013    0.000    0.385    0.858
   .Q29_3             0.225    0.049    4.633    0.000    0.225    0.355
   .Q29_4             0.396    0.066    6.000    0.000    0.396    0.444
   .Q29_5             0.237    0.066    3.574    0.000    0.237    0.267
   .Q29_6             0.279    0.067    4.148    0.000    0.279    0.303
   .Q29_7             0.272    0.055    4.946    0.000    0.272    0.328
   .Q29_8             0.297    0.051    5.834    0.000    0.297    0.383
    ppl               1.000                               1.000    1.000
    eli               1.000                               1.000    1.000
    dec               1.000                               1.000    1.000
    tru               1.000                               1.000    1.000

Code

df$scipop <- scale((df$Q29_1 + df$Q29_2 + df$Q29_3 + df$Q29_4 + df$Q29_5 + df$Q29_6 + df$Q29_7 + df$Q29_8)/8, center=T, scale=T)
df$scipop_ppl <- scale((df$Q29_1 + df$Q29_2) / 2, center=T, scale=T)
df$scipop_eli <- scale((df$Q29_3 + df$Q29_4) / 2, center=T, scale=T)
df$scipop_dec <- scale((df$Q29_5 + df$Q29_6) / 2, center=T, scale=T)
df$scipop_tru <- scale((df$Q29_7 + df$Q29_8) / 2, center=T, scale=T)

Descriptives

Code

describe(subset(df, select=c(grep("scipop",colnames(df)))))

           vars   n mean sd median trimmed  mad   min  max range  skew kurtosis
scipop        1 184    0  1  -0.02   -0.05 1.06 -2.16 2.59  4.75  0.37    -0.43
scipop_ppl    2 184    0  1   0.18    0.01 1.38 -2.62 2.04  4.66 -0.10     0.10
scipop_eli    3 184    0  1   0.14   -0.09 0.95 -1.14 2.70  3.84  0.56    -0.64
scipop_dec    4 184    0  1   0.26   -0.04 0.84 -1.44 1.95  3.40  0.10    -0.90
scipop_tru    5 184    0  1   0.03   -0.07 0.91 -1.20 2.48  3.68  0.39    -0.86
             se
scipop     0.07
scipop_ppl 0.07
scipop_eli 0.07
scipop_dec 0.07
scipop_tru 0.07

Item Histograms

Code

# --- 1. Histograms for the 8 individual items ---
df_items_long <- df %>%
  select(Q29_1:Q29_8) %>%
  pivot_longer(cols = everything(), names_to = "item", values_to = "value")

ggplot(df_items_long, aes(x = value)) +
  geom_histogram(binwidth = 1, fill = "steelblue", color = "white", boundary = 0.5) +
  facet_wrap(~ item, scales = "free_y") +
  labs(
    x = "Response",
    y = "Count",
    title = "Distributions of Science Popularity Items (Q29_1–Q29_8)"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 9),
    plot.title = element_text(size = 12, face = "bold")
  )

Composite Histograms

Code

# --- 2. Histograms for the 5 composite scores ---
df_scores_long <- df %>%
  select(scipop, scipop_ppl, scipop_eli, scipop_dec, scipop_tru) %>%
  pivot_longer(cols = everything(), names_to = "score", values_to = "value")

ggplot(df_scores_long, aes(x = value)) +
  geom_histogram(binwidth = 1, fill = "darkorange", color = "white", boundary = 0.5) +
  facet_wrap(~ score, scales = "free_y") +
  labs(
    x = "Average Score",
    y = "Count",
    title = "Distributions of Science Popularity Composite Scores"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 9),
    plot.title = element_text(size = 12, face = "bold")
  )

Misinformation Flagging

Code

df_temp_subset <- select(df, all_of(flag2))

df_temp_subset[] <- lapply(df_temp_subset, function(x) {
  x[x == ""] <- NA
  as.numeric(x)
})

df_temp_subset_a <- df_temp_subset[ , grepl(paste(a_list, collapse="|"), names(df_temp_subset))]
df_temp_subset_m <- df_temp_subset[ , grepl(paste(m_list, collapse="|"), names(df_temp_subset))]

df_long_a <- pivot_longer(df_temp_subset_a, everything(), names_to = "variable", values_to = "value")

ggplot(df_long_a, aes(x = value)) +
  geom_histogram(binwidth = 1, fill = "steelblue", color = "black") +
  facet_wrap(~variable, scales = "free_y") +
  theme_minimal()

Code

df_long_m <- pivot_longer(df_temp_subset_m, everything(), names_to = "variable", values_to = "value")

ggplot(df_long_m, aes(x = value)) +
  geom_histogram(binwidth = 1, fill = "steelblue", color = "black") +
  facet_wrap(~variable, scales = "free_y") +
  theme_minimal()

Code

df$mis_rating <- rowMeans(df_temp_subset_m, na.rm = T)
df$acc_rating <- rowMeans(df_temp_subset_a, na.rm = T)
df$diff_rating <- df$mis_rating - df$acc_rating
# higher diff_rating means higher misinformation acceptance

Analysis

Correlation Matrix

Code

# write.csv(df, file="forshin.csv", row.names = F)

# ppl = conceptions of the ordinary people
# eli = conceptions of the academic elite
# dec = demands for decision making sovereignty
# tru = demands for truth speaking sovereignty

df$source_cred <- rowMeans(subset(df, select=c(source_exp, source_comp, source_trust)))

corrout1 <- corr.test(subset(df, select=c(mis_rating, acc_rating, grep("scipop_",colnames(df)),grep("tpe_",colnames(df)),grep("source_cred",colnames(df)))))

corrplot(
  corrout1$r,
  p.mat = corrout1$p,          # add p-values
  sig.level = 0.05,           # hide correlations above this p-value
  insig = "blank",            # or "pch" to mark nonsignificant ones
  method = "color",
  type = "upper",
  tl.col = "black",
  tl.srt = 45,
  addCoef.col = "black",
  number.cex = 0.8)

Regressions

TPE Diff

Code

df$samp <- as.factor(df$samp)

t.test(df$tpe_self, df$tpe_othr, paired = TRUE)


    Paired t-test

data:  df$tpe_self and df$tpe_othr
t = -9.0498, df = 183, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.3007631 -0.8351065
sample estimates:
mean of the differences 
              -1.067935

Code

regout4 <- lm(data=df, source_cred ~ tpe_diff + samp)
plot_model(regout4, type="diag")

[[1]]


[[2]]


[[3]]


[[4]]

Code

plot(regout4, 5)

Code

summary(regout4)


Call:
lm(formula = source_cred ~ tpe_diff + samp, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.36809 -0.50817  0.02176  0.58227  2.26239 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.34249    0.07987   4.288 2.93e-05 ***
tpe_diff    -0.20218    0.05330  -3.793 0.000202 ***
sampSONA    -0.63685    0.13280  -4.796 3.37e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8268 on 181 degrees of freedom
Multiple R-squared:  0.1959,    Adjusted R-squared:  0.187 
F-statistic: 22.04 on 2 and 181 DF,  p-value: 2.707e-09

Plot

Code

p <- plot_model(regout4, type = "eff")
p[[1]] + ylim(-2, 2)

Code

# get the effect data from plot_model
effect_data <- p[[1]]$data

ggplot() +
  # raw data points behind
  geom_point(data = df, aes(x = tpe_diff, y = source_cred),
             alpha = 0.3, color = "gray40", size = 3) +
  # regression line + CI ribbon
  geom_ribbon(data = effect_data, aes(x = x, ymin = conf.low, ymax = conf.high),
              alpha = 0.2, fill = "#E6B10E") +
  geom_line(data = effect_data, aes(x = x, y = predicted),
            color = "#E6B10E", linewidth = 1) +
  ylim(-2, 2) +
  labs(x = "Third Person Effect", y = "Source Credibility",
       title = "Effect of Third Person Effect on Source Credibility") +
  theme_minimal()

Code

# ggsave("tpe_reg.png", width = 5.38, height = 5.38, dpi = 300)

SciPop

Code

df$scipop_ppl <- as.numeric(df$scipop_ppl)
df$scipop_eli <- as.numeric(df$scipop_eli)
df$scipop_dec <- as.numeric(df$scipop_dec)
df$scipop_tru <- as.numeric(df$scipop_tru)

regout4 <- lm(data=df, source_cred ~ scipop_ppl + scipop_eli + scipop_dec + scipop_tru + samp)
car::vif(regout4)

scipop_ppl scipop_eli scipop_dec scipop_tru       samp 
  1.043541   1.547102   1.408563   1.711523   1.040051

Code

plot_model(regout4, type="diag")

[[1]]


[[2]]


[[3]]


[[4]]

Code

plot(regout4, 5)

Code

summary(regout4)


Call:
lm(formula = source_cred ~ scipop_ppl + scipop_eli + scipop_dec + 
    scipop_tru + samp, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.03499 -0.49947 -0.01502  0.49718  2.77403 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.19708    0.07279   2.708  0.00744 ** 
scipop_ppl   0.19528    0.06131   3.185  0.00171 ** 
scipop_eli   0.01576    0.07465   0.211  0.83308    
scipop_dec   0.16731    0.07123   2.349  0.01993 *  
scipop_tru   0.06080    0.07852   0.774  0.43975    
sampSONA    -0.62522    0.13139  -4.759 4.02e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8119 on 178 degrees of freedom
Multiple R-squared:  0.2374,    Adjusted R-squared:  0.216 
F-statistic: 11.08 on 5 and 178 DF,  p-value: 2.633e-09

Plot - Ordinary People Subscale

Code

p <- plot_model(regout4, type = "eff")
p[[1]] + ylim(-2, 2)

Code

# get the effect data from plot_model
effect_data <- p[[1]]$data

ggplot() +
  # raw data points behind
  geom_point(data = df, aes(x = scipop_ppl, y = source_cred),
             alpha = 0.3, color = "gray40", size = 3) +
  # regression line + CI ribbon
  geom_ribbon(data = effect_data, aes(x = x, ymin = conf.low, ymax = conf.high),
              alpha = 0.2, fill = "#E6B10E") +
  geom_line(data = effect_data, aes(x = x, y = predicted),
            color = "#E6B10E", linewidth = 1) +
  ylim(-2, 2) +
  labs(x = "Science Populism - Ordinary People", y = "Source Credibility",
       title = "Effect of Science Populism on Source Credibility") +
  theme_minimal()

Code

# ggsave("scipop_ppl.png", width = 5.38, height = 5.38, dpi = 300)

Plot - Ordinary People Subscale

Code

p <- plot_model(regout4, type = "eff")
# p[[3]] + ylim(-2, 2)

# get the effect data from plot_model
effect_data <- p[[3]]$data

ggplot() +
  # raw data points behind
  geom_point(data = df, aes(x = scipop_dec, y = source_cred),
             alpha = 0.3, color = "gray40", size = 3) +
  # regression line + CI ribbon
  geom_ribbon(data = effect_data, aes(x = x, ymin = conf.low, ymax = conf.high),
              alpha = 0.2, fill = "#E6B10E") +
  geom_line(data = effect_data, aes(x = x, y = predicted),
            color = "#E6B10E", linewidth = 1) +
  ylim(-2, 2) +
  labs(x = "Science Populism - Decision Making", y = "Source Credibility",
       title = "Effect of Science Populism on Source Credibility") +
  theme_minimal()

Code

# ggsave("scipop_dec.png", width = 5.38, height = 5.38, dpi = 300)

Sample Difference

Code

regout4 <- lm(data=df, source_cred ~ scipop_ppl + scipop_eli + scipop_dec + scipop_tru + tpe_diff + samp)
car::vif(regout4)

scipop_ppl scipop_eli scipop_dec scipop_tru   tpe_diff       samp 
  1.061551   1.550070   1.408708   1.714771   1.043690   1.063383

Code

plot_model(regout4, type="diag")

[[1]]


[[2]]


[[3]]


[[4]]

Code

plot(regout4, 5)

Code

summary(regout4)


Call:
lm(formula = source_cred ~ scipop_ppl + scipop_eli + scipop_dec + 
    scipop_tru + tpe_diff + samp, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.86452 -0.49143 -0.00839  0.47987  2.55330 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.303450   0.076575   3.963 0.000107 ***
scipop_ppl   0.167494   0.059904   2.796 0.005745 ** 
scipop_eli   0.004476   0.072387   0.062 0.950761    
scipop_dec   0.164817   0.069007   2.388 0.017975 *  
scipop_tru   0.072604   0.076136   0.954 0.341580    
tpe_diff    -0.182247   0.051172  -3.561 0.000474 ***
sampSONA    -0.557326   0.128696  -4.331 2.49e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7865 on 177 degrees of freedom
Multiple R-squared:  0.2884,    Adjusted R-squared:  0.2643 
F-statistic: 11.96 on 6 and 177 DF,  p-value: 2.984e-11

Plot

Code

ggplot(df, aes(x = factor(samp), y = as.numeric(unlist(source_cred)), fill = factor(samp))) +
  geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.4, alpha = 0.4) +
  geom_boxplot(width = 0.3, alpha = 0.6, outlier.shape = NA) +
  scale_fill_manual(values = c("Prolific" = "black", "SONA" = "#E6B10E")) +
  scale_x_discrete(labels = c("Prolific" = "Non-College Sample", "SONA" = "Student Sample")) +
  labs(x = "Sample", y = "Source Credibility",
       title = "Distribution of Source Credibility by Sample") +
  theme_minimal() +
  theme(legend.position = "none")

Code

ggsave("samp.png", width = 5.38, height = 5.38, dpi = 300)

Misinformation Acceptance

Code

df$mis_rating_std <- as.numeric(scale(df$mis_rating, center=T, scale=T))
df$acc_rating_std <- as.numeric(scale(df$acc_rating, center=T, scale=T))

regout2 <- lm(data=df, mis_rating_std ~ source_cred*samp + acc_rating_std)
car::vif(regout2, type="predictor")

                   GVIF Df GVIF^(1/(2*Df)) Interacts With  Other Predictors
source_cred    1.289424  3        1.043276           samp    acc_rating_std
samp           1.289424  3        1.043276    source_cred    acc_rating_std
acc_rating_std 1.289424  1        1.135528           --   source_cred, samp

Code

plot_model(regout2, type="diag")

[[1]]


[[2]]


[[3]]


[[4]]

Code

plot(regout2, 5)

Code

summary(regout2)


Call:
lm(formula = mis_rating_std ~ source_cred * samp + acc_rating_std, 
    data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.41972 -0.45901 -0.01354  0.52481  1.85586 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)           0.03726    0.06840   0.545 0.586631    
source_cred           0.28254    0.08365   3.378 0.000897 ***
sampSONA             -0.28757    0.13347  -2.155 0.032535 *  
acc_rating_std        0.54016    0.06179   8.742 1.63e-15 ***
source_cred:sampSONA -0.34595    0.13459  -2.570 0.010970 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7361 on 179 degrees of freedom
Multiple R-squared:   0.47, Adjusted R-squared:  0.4582 
F-statistic: 39.69 on 4 and 179 DF,  p-value: < 2.2e-16

Plot - Ordinary People Subscale

Code

p <- plot_model(regout2, type = "int")
effect_data <- p$data

ggplot() +
  # raw data points behind, colored by group
  geom_point(data = df, aes(x = source_cred, y = mis_rating_std, color = samp),
             alpha = 0.3, size = 3) +
  # CI ribbon per group
  geom_ribbon(data = effect_data, aes(x = x, ymin = conf.low, ymax = conf.high,
              group = group_col, fill = group_col), alpha = 0.2) +
  # regression line per group
  geom_line(data = effect_data, aes(x = x, y = predicted,
            color = group_col, group = group_col), linewidth = 1) +
  scale_color_manual(values = c("Prolific" = "#E6B10E", "SONA" = "black"),
                     labels = c("Prolific" = "Non-College Sample", "SONA" = "Student Sample")) +
  scale_fill_manual(values = c("Prolific" = "#E6B10E", "SONA" = "black"),
                    labels = c("Prolific" = "Non-College Sample", "SONA" = "Student Sample")) +
  labs(x = "Source Credibility", y = "Misinformation Acceptance",
       title = "Moderation: Source Credibility, College Experience, and \nMisinformation Acceptance",
       color = "Moderator", fill = "Moderator") +
  theme_minimal() +
  theme(
    legend.position = c(0.80, 0.85),
    legend.background = element_rect(fill = "white", color = "black"),
    plot.title = element_text(lineheight = 1.1)
  )

Code

# ggsave("mis_mod.png", width = 5.38, height = 5.38, dpi = 300)