Confidence by news quality across posts

ALL_Platforms_cleaned <- na.omit(ALL_Platforms[, c("pc1", "certainty_avg", "username","platform","lean1")])

results_model1 <- list()
results_model2 <- list()


for (platform in platforms) {
  data_subset <- ALL_Platforms_cleaned[ALL_Platforms_cleaned$platform == platform, ]
  
  # Model 1: Without control
  model1 <- feols(scale(pc1) ~ scale(certainty_avg) | username , cluster = ~username, data = data_subset)
  coef1 <- coef(model1)[1]
  confint1 <- confint(model1)[1, ]
  
  
  results_model1[[platform]] <- data.frame(
    platform = platform,
    model = "Without Control",
    estimate = coef1,
    ci_lb = confint1[,1],
    ci_ub = confint1[,2])
  
  # Model 2: With control
  model2 <- feols(scale(pc1) ~ scale(certainty_avg) + scale(lean1) | username , cluster = ~username, data = data_subset)
  coef2 <- coef(model2)["scale(certainty_avg)"]
  confint2 <- confint(model2)["scale(certainty_avg)", ]
  
  results_model2[[platform]] <- data.frame(
    platform = platform,
    model = "With Control",
    estimate = coef2,
    ci_lb = confint2[,1],
    ci_ub = confint2[,2])}

results_df_model1 <- do.call(rbind, results_model1)
results_df_model2 <- do.call(rbind, results_model2)

results_df_model1$vi <- ((results_df_model1$ci_ub - results_df_model1$ci_lb) / (2 * 1.96))^2
results_df_model2$vi <- ((results_df_model2$ci_ub - results_df_model2$ci_lb) / (2 * 1.96))^2

res_model1 <- rma(estimate, vi, data=results_df_model1)
res_model2 <- rma(estimate, vi, data=results_df_model2)

results_df <- rbind(results_df_model1, results_df_model2)
results_df <- results_df[order(results_df$platform, results_df$model), ]
results_df <- results_df[order(results_df$estimate, results_df$platform), ]
with_control <- results_df[results_df$model == "With Control", ]
platform_order <- with_control$platform[order(with_control$estimate)]
results_df <- results_df[order(factor(results_df$platform, levels = platform_order), results_df$model), ]
results_df$color <- ifelse(results_df$model == "Without Control", "black", "black")
results_df$label <- paste(results_df$platform)
Ceratainty_bins <- ALL_Platforms %>%
  select(pc1, username, platform, certainty_avg) %>%
  mutate( certainty_bin = ifelse(is.na(certainty_avg), "Missing", as.character(round(certainty_avg / 200, 2) * 200)))

Ceratainty_bins_summary <- Ceratainty_bins %>% group_by(certainty_bin) %>% summarise(avg_pc1 = mean(pc1, na.rm = TRUE), .groups = "drop")

Ceratainty_bins_summary$certainty_bin <- factor(Ceratainty_bins_summary$certainty_bin, levels = c("Missing", sort(unique(Ceratainty_bins_summary$certainty_bin)[unique(Ceratainty_bins_summary$certainty_bin) != "Missing"])))

Ceratainty_bins_plot <- ggplot(Ceratainty_bins_summary, aes(x = certainty_bin, y = avg_pc1, group = 1)) +
  geom_line() +
  geom_point() +
  labs(x = "Certainty Bin", y = "Average Quality (pc1)")

Confidence by news quality across headlines

data_headlines_cleaned <- na.omit(data_headlines[, c("pc1", "certainty_avg", "username","platform","lean1")])

results_model1 <- list()
results_model2 <- list()


for (platform in platforms) {
  data_subset <- data_headlines_cleaned[data_headlines_cleaned$platform == platform, ]
  
  # Model 1: Without control
  model1 <- feols(scale(pc1) ~ scale(certainty_avg) | username , cluster = ~username, data = data_subset)
  coef1 <- coef(model1)[1]
  confint1 <- confint(model1)[1, ]
  
  
  results_model1[[platform]] <- data.frame(
    platform = platform,
    model = "Without Control",
    estimate = coef1,
    ci_lb = confint1[,1],
    ci_ub = confint1[,2])
  
  # Model 2: With control
  model2 <- feols(scale(pc1) ~ scale(certainty_avg) + scale(lean1) | username , cluster = ~username, data = data_subset)
  coef2 <- coef(model2)["scale(certainty_avg)"]
  confint2 <- confint(model2)["scale(certainty_avg)", ]
  
  results_model2[[platform]] <- data.frame(
    platform = platform,
    model = "With Control",
    estimate = coef2,
    ci_lb = confint2[,1],
    ci_ub = confint2[,2])}

results_df_model1 <- do.call(rbind, results_model1)
results_df_model2 <- do.call(rbind, results_model2)

results_df_model1$vi <- ((results_df_model1$ci_ub - results_df_model1$ci_lb) / (2 * 1.96))^2
results_df_model2$vi <- ((results_df_model2$ci_ub - results_df_model2$ci_lb) / (2 * 1.96))^2

res_model1 <- rma(estimate, vi, data=results_df_model1)
res_model2 <- rma(estimate, vi, data=results_df_model2)

results_df <- rbind(results_df_model1, results_df_model2)
results_df <- results_df[order(results_df$platform, results_df$model), ]
results_df <- results_df[order(results_df$estimate, results_df$platform), ]
with_control <- results_df[results_df$model == "With Control", ]
platform_order <- with_control$platform[order(with_control$estimate)]
results_df <- results_df[order(factor(results_df$platform, levels = platform_order), results_df$model), ]
results_df$color <- ifelse(results_df$model == "Without Control", "black", "black")
results_df$label <- paste(results_df$platform)
headlines_cleaned <- na.omit(data_headlines[, c("pc1", "certainty_avg","lean1")])

headline_Ceratainty_bins <- data_headlines %>%
  select(pc1, certainty_avg) %>%
  mutate( certainty_bin = ifelse(is.na(certainty_avg), "Missing", as.character(round(certainty_avg / 200, 2) * 200)))

headline_Ceratainty_bins_summary <- headline_Ceratainty_bins %>% group_by(certainty_bin) %>% summarise(avg_pc1 = mean(pc1, na.rm = TRUE), .groups = "drop")

headline_Ceratainty_bins_summary$certainty_bin <- factor(headline_Ceratainty_bins_summary$certainty_bin, levels = c("Missing", sort(unique(headline_Ceratainty_bins_summary$certainty_bin)[unique(headline_Ceratainty_bins_summary$certainty_bin) != "Missing"])))


Ceratainty_bins_headlines_plot <- ggplot(headline_Ceratainty_bins_summary, aes(x = certainty_bin, y = avg_pc1, group = 1)) +
      geom_line() +
      geom_point() +
      labs(x = "Certainty Bin",y = "Average Quality (pc1)",title = "News quality across confidence levels for headlines") +theme(plot.title = element_text(hjust = 0.5))

Confidence by engagement across posts

ALL_Platforms_cleaned <- na.omit(ALL_Platforms[, c("engagement", "certainty_avg", "username","platform","lean1")])

results_model1 <- list()
results_model2 <- list()


for (platform in platforms) {
  data_subset <- ALL_Platforms_cleaned[ALL_Platforms_cleaned$platform == platform, ]
  
  # Model 1: Without control
  model1 <- feols(scale(engagement) ~ scale(certainty_avg)  | username , cluster = ~username, data = data_subset)
  coef1 <- coef(model1)[1]
  confint1 <- confint(model1)[1, ]
  
  
  results_model1[[platform]] <- data.frame(
    platform = platform,
    model = "Without Control",
    estimate = coef1,
    ci_lb = confint1[,1],
    ci_ub = confint1[,2])
  
  # Model 2: With control
  model2 <- feols(scale(engagement) ~ scale(certainty_avg) + scale(lean1)  | username , cluster = ~username, data = data_subset)
  coef2 <- coef(model2)["scale(certainty_avg)"]
  confint2 <- confint(model2)["scale(certainty_avg)", ]
  
  results_model2[[platform]] <- data.frame(
    platform = platform,
    model = "With Control",
    estimate = coef2,
    ci_lb = confint2[,1],
    ci_ub = confint2[,2])}

results_df_model1 <- do.call(rbind, results_model1)
results_df_model2 <- do.call(rbind, results_model2)

results_df_model1$vi <- ((results_df_model1$ci_ub - results_df_model1$ci_lb) / (2 * 1.96))^2
results_df_model2$vi <- ((results_df_model2$ci_ub - results_df_model2$ci_lb) / (2 * 1.96))^2

res_model1 <- rma(estimate, vi, data=results_df_model1)
res_model2 <- rma(estimate, vi, data=results_df_model2)

results_df <- rbind(results_df_model1, results_df_model2)
results_df <- results_df[order(results_df$platform, results_df$model), ]
results_df <- results_df[order(results_df$estimate, results_df$platform), ]
with_control <- results_df[results_df$model == "With Control", ]
platform_order <- with_control$platform[order(with_control$estimate)]
results_df <- results_df[order(factor(results_df$platform, levels = platform_order), results_df$model), ]
results_df$color <- ifelse(results_df$model == "Without Control", "black", "black")
results_df$label <- paste(results_df$platform)


forest(results_df$estimate, ci.lb = results_df$ci_lb, ci.ub = results_df$ci_ub,slab = results_df$label,  xlab = "Effect Size",refline = 0,  shade = TRUE,col = results_df$color,ylim = c(-3, nrow(results_df) + 4), digits=3)
# Aggregated effect for Model 1
addpoly(res_model1, col = "black",cex = 1,border = "black", lwd = 2,row = nrow(results_df) - 17,  mlab='Confidence without political lean control')
# Aggregated effect for Model 2
addpoly(res_model2, col = "black",   cex = 1, border = "black", lwd = 1, row = nrow(results_df) - 19,  mlab='Confidence with political lean control')

title(main = "Association between confidence and engagemnet")

Confidence, quality, and toxicity coefficients in separate and joint model

ALL_Platforms$pc1 <- 1- ALL_Platforms$pc1

# We fit models once with all variables in a joint model and once for each variable separately
fit_model <- function(var, label, with_control = TRUE, joint = FALSE) {
  
  if (joint) {
    if (with_control) {
      form <- as.formula("scale(log10(engagement + 1)) ~ scale(certainty_avg) + scale(toxicity_ntile) + scale(pc1) + scale(lean1) | username")
    } else {
      form <- as.formula("scale(log10(engagement + 1)) ~ scale(certainty_avg) + scale(toxicity_ntile) + scale(pc1) | username")
    }
    vars <- c("scale(certainty_avg)", "scale(toxicity_ntile)", "scale(pc1)")
    labels <- c("Confidence", "Toxicity", "Quality")
  } else {
    if (with_control) {
      form <- as.formula(paste0("scale(log10(engagement + 1)) ~ ", var, " + scale(lean1) | username"))
    } else {
      form <- as.formula(paste0("scale(log10(engagement + 1)) ~ ", var, " | username"))
    }
    vars <- var
    labels <- label
  }
  
  model <- feglm(form, cluster = "username", data = ALL_Platforms)
  coef_ <- coef(model)[vars]
  ci_ <- confint(model)[vars, ]
  
  tibble(
    variable = labels,
    estimate = coef_,
    ci_lb = ci_[, 1],
    ci_ub = ci_[, 2],
    model_type = ifelse(joint, "Joint", "Separate"),
    control = ifelse(with_control, "With Control", "Without Control")
  )
}


all_results <- bind_rows(
  # Joint
  fit_model(NULL, NULL, with_control = TRUE, joint = TRUE),
  fit_model(NULL, NULL, with_control = FALSE, joint = TRUE),
  
  # Separate
  fit_model("scale(certainty_avg)", "Confidence", with_control = TRUE),
  fit_model("scale(toxicity_ntile)", "Toxicity", with_control = TRUE),
  fit_model("scale(pc1)", "Quality", with_control = TRUE),
  fit_model("scale(certainty_avg)", "Confidence", with_control = FALSE),
  fit_model("scale(toxicity_ntile)", "Toxicity", with_control = FALSE),
  fit_model("scale(pc1)", "Quality", with_control = FALSE)
)
## NOTE: 18,531,484 observations removed because of NA values (LHS: 13,702, RHS: 18,526,578).
## NOTE: 18,529,177 observations removed because of NA values (LHS: 13,702, RHS: 18,524,268).
## NOTE: 18,531,484 observations removed because of NA values (LHS: 13,702, RHS: 18,526,578).
## NOTE: 2,706,586 observations removed because of NA values (LHS: 13,702, RHS: 2,692,899).
## NOTE: 2,706,586 observations removed because of NA values (LHS: 13,702, RHS: 2,692,899).
## NOTE: 17,999,669 observations removed because of NA values (LHS: 13,702, RHS: 17,994,760).
## NOTE: 13,702 observations removed because of NA values (LHS: 13,702).
## NOTE: 2,698,180 observations removed because of NA values (LHS: 13,702, RHS: 2,684,478).
my_theme <- theme_bw() +
  theme(text = element_text(size = 18),
    legend.title = element_blank(),
    legend.background = element_blank(),
    legend.position = c(.3, .92),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_line(),
    axis.title.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold"),
    panel.spacing = unit(.2, "lines"),
    strip.background = element_rect(colour = 'blue', fill = 'lightblue'),
    strip.placement = "outside",
    axis.line.x = element_line(),
    panel.border = element_blank())

plot_coef <- function(df, title) {
  ggplot(df, aes(x = estimate, y = variable, color = variable, shape = model_type)) +
    geom_point(size = 3, position = position_dodge(width = 0.5)) +
    geom_errorbarh(aes(xmin = ci_lb, xmax = ci_ub),
                   height = 0.2, position = position_dodge(width = 0.5)) +
    scale_color_manual(values = c("Confidence" = "darkblue",
                                  "Toxicity" = "darkgreen",
                                  "Quality" = "darkred")) +
    labs(x = "Coefficient estimate", y = "",
         title = title, color = "Variable", shape = "Model") +
    theme_minimal(base_size = 14) + my_theme
}


plot_with_control <- plot_coef(all_results %>% filter(control == "With Control"), "With political lean")
plot_without_control <- plot_coef(all_results %>% filter(control == "Without Control"), "Without political lean")


plot_with_control <- plot_with_control + theme(legend.position = "none")
plot_without_control <- plot_without_control + theme(legend.position = "right") 

combined_plot <- plot_with_control | plot_without_control + plot_layout(guides = "collect")
combined_plot