Plot1 : Comparison of average quality of news with average toxicity of posts shared across each platform.
Plot2 : Comparison of average quality of news with average political lean of news across platforms.
Plot3 : Comparison of average quality of news with average confidence of posts shared across each platform.
platform_means1 <- ALL_Platforms %>%group_by(platform) %>%summarise(mean_pc1 = mean(pc1, na.rm = TRUE),mean_toxicity = mean(toxicity, na.rm = TRUE))
x_min <- min(platform_means1$mean_toxicity, na.rm = TRUE)
x_max <- max(platform_means1$mean_toxicity, na.rm = TRUE)
y_min <- min(platform_means1$mean_pc1, na.rm = TRUE)
y_max <- max(platform_means1$mean_pc1, na.rm = TRUE)
plot1 <- ggplot(platform_means1, aes(x = mean_toxicity, y = mean_pc1, label = platform)) +
geom_point(color = "blue", size = 3) +
geom_text(hjust = 0.5, vjust = -0.5, size = 3) +
labs(x = "Average toxicity of posts shared", y = "Average quality of news shared", title = "Toxicity Vs. News Quality") +
annotate("text", x = x_min, y = y_min, label = "Low", hjust = 0, vjust = 2, color = "darkgreen") +
annotate("text", x = x_max, y = y_min, label = "High", hjust = 1, vjust = 2, color = "darkgreen") +
annotate("text", x = x_min, y = y_min, label = "Low", hjust = 0.9, vjust = 0, color = "darkblue") +
annotate("text", x = x_min, y = y_max, label = "High", hjust = 0.8, vjust = -1.1, color = "darkblue") +
theme_minimal()
platform_means2 <- ALL_Platforms %>%group_by(platform) %>%summarise(mean_pc1 = mean(pc1, na.rm = TRUE),mean_lean1_political = mean(lean1, na.rm = TRUE))
x_min <- min(platform_means2$mean_lean1_political, na.rm = TRUE)
x_max <- max(platform_means2$mean_lean1_political, na.rm = TRUE)
y_min <- min(platform_means2$mean_pc1, na.rm = TRUE)
y_max <- max(platform_means2$mean_pc1, na.rm = TRUE)
plot2 <- ggplot(platform_means2, aes(x = mean_lean1_political, y = mean_pc1, label = platform)) +
geom_point(color = "blue", size = 3) +
geom_text(hjust = 0.5, vjust = -0.5, size = 3) +
labs(x = "Average political lean of news shared", y = "Average quality of news shared", title = "Political Lean Vs. News Quality") +
annotate("text", x = x_min, y = y_min, label = "Liberal", hjust = 0, vjust = 2, color = "darkgreen") +
annotate("text", x = x_max, y = y_min, label = "Conservative", hjust = 1, vjust = 2, color = "darkgreen") +
annotate("text", x = x_min, y = y_min, label = "Low", hjust = 0.1, vjust = 0, color = "darkblue") +
annotate("text", x = x_min, y = y_max, label = "High", hjust = 0.7, vjust = -1.1, color = "darkblue") +
theme_minimal()
# Third Plot: Certainty vs News Quality
platform_means3 <- ALL_Platforms %>%group_by(platform) %>%summarise( mean_pc1 = mean(pc1, na.rm = TRUE),mean_certainty = mean(certainty_avg, na.rm = TRUE))
x_min <- min(platform_means3$mean_certainty, na.rm = TRUE)
x_max <- max(platform_means3$mean_certainty, na.rm = TRUE)
y_min <- min(platform_means3$mean_pc1, na.rm = TRUE)
y_max <- max(platform_means3$mean_pc1, na.rm = TRUE)
plot3 <- ggplot(platform_means3, aes(x = mean_certainty, y = mean_pc1, label = platform)) +
geom_point(color = "blue", size = 3) +
geom_text(hjust = 0.5, vjust = -0.5, size = 3) +
labs(x = "Average Certainty of posts shared",y = "Average news quality of news shared",title = "Certainty Vs. News Quality") +
annotate("text", x = x_min, y = y_min, label = "Low", hjust = 0, vjust = 2, color = "darkgreen") +
annotate("text", x = x_max, y = y_min, label = "High", hjust = 1, vjust = 2, color = "darkgreen") +
annotate("text", x = x_min, y = y_min, label = "Low", hjust = 0.1, vjust = 0, color = "darkblue") +
annotate("text", x = x_min, y = y_max, label = "High", hjust = 0.7, vjust = -1.1, color = "darkblue") +
theme_minimal()
plot1 / plot2 / plot3
\[ Model_1 :Quality = \beta_1 Toxicity_i + \epsilon \] \[ Model_2 :Quality = \beta_1 Toxicity_i + \beta_2 PoliticalLean_i + \epsilon \]
ALL_Platforms_cleaned <- na.omit(ALL_Platforms[, c("platform", "pc1", "toxicity", "username","lean1")])
ALL_Platforms_cleaned$toxicity_ntile <- ntile(ALL_Platforms_cleaned$toxicity, 100)
results_model1 <- list()
results_model2 <- list()
for (platform in platforms) {
data_subset <- ALL_Platforms_cleaned[ALL_Platforms_cleaned$platform == platform, ]
# Model 1: Without control
model1 <- feols(scale(pc1) ~ scale(toxicity_ntile), cluster = ~username, data = data_subset)
coef1 <- coef(model1)[2]
confint1 <- confint(model1)[2, ]
results_model1[[platform]] <- data.frame(
platform = platform,
model = "Without Control",
estimate = coef1,
ci_lb = confint1[,1],
ci_ub = confint1[,2])
# Model 2: With control on Political lean
model2 <- feols(scale(pc1) ~ scale(toxicity_ntile) + scale(lean1), cluster = ~username, data = data_subset)
coef2 <- coef(model2)["scale(toxicity_ntile)"]
confint2 <- confint(model2)["scale(toxicity_ntile)", ]
results_model2[[platform]] <- data.frame(
platform = platform,
model = "With Control",
estimate = coef2,
ci_lb = confint2[,1],
ci_ub = confint2[,2])}
results_df_model1 <- do.call(rbind, results_model1)
results_df_model2 <- do.call(rbind, results_model2)
results_df_model1$vi <- ((results_df_model1$ci_ub - results_df_model1$ci_lb) / (2 * 1.96))^2
results_df_model2$vi <- ((results_df_model2$ci_ub - results_df_model2$ci_lb) / (2 * 1.96))^2
res_model1 <- rma(estimate, vi, data=results_df_model1)
res_model2 <- rma(estimate, vi, data=results_df_model2)
results_df <- rbind(results_df_model1, results_df_model2)
results_df <- results_df[order(results_df$platform, results_df$model), ]
results_df <- results_df[order(results_df$estimate, results_df$platform), ]
with_control <- results_df[results_df$model == "With Control", ]
platform_order <- with_control$platform[order(with_control$estimate)]
results_df <- results_df[order(factor(results_df$platform, levels = platform_order), results_df$model), ]
results_df$color <- ifelse(results_df$model == "Without Control", "grey", "black")
results_df$label <- paste(results_df$platform)
forest(results_df$estimate, ci.lb = results_df$ci_lb, ci.ub = results_df$ci_ub,slab = results_df$label, xlab = "Effect Size",refline = 0, shade = TRUE,col = results_df$color,ylim = c(-3, nrow(results_df) + 4), digits=3)
# Aggregated effect for Model 1
addpoly(res_model1, col = "grey",cex = 1,border = "grey", lwd = 2,row = nrow(results_df) - 17, mlab='toxicity without political lean')
# Aggregated effect for Model 2
addpoly(res_model2, col = "black", cex = 1, border = "black", lwd = 1, row = nrow(results_df) - 19, mlab='toxicity with political lean')
legend("topright", legend = c("Without Political lean control", "With Political lean control"), col = c("grey", "black"), pch = 15, bty = "n",cex = 1.2)
title(main = "Association between toxicity and news quality")
\[ Model_1 : Quality = \beta_1 Confidence_i + \epsilon \] \[ Model_2 :Quality = \beta_1 Confidence_i + \beta_2 PoliticalLean_i + \epsilon \]
ALL_Platforms_cleaned <- na.omit(ALL_Platforms[, c("pc1", "certainty_avg", "username","platform","lean1")])
results_model1 <- list()
results_model2 <- list()
for (platform in platforms) {
data_subset <- ALL_Platforms_cleaned[ALL_Platforms_cleaned$platform == platform, ]
# Model 1: Without control
model1 <- feols(scale(pc1) ~ scale(certainty_avg), cluster = ~username, data = data_subset)
coef1 <- coef(model1)[2]
confint1 <- confint(model1)[2, ]
results_model1[[platform]] <- data.frame(
platform = platform,
model = "Without Control",
estimate = coef1,
ci_lb = confint1[,1],
ci_ub = confint1[,2])
# Model 2: With control
model2 <- feols(scale(pc1) ~ scale(certainty_avg) + scale(lean1), cluster = ~username, data = data_subset)
coef2 <- coef(model2)["scale(certainty_avg)"]
confint2 <- confint(model2)["scale(certainty_avg)", ]
results_model2[[platform]] <- data.frame(
platform = platform,
model = "With Control",
estimate = coef2,
ci_lb = confint2[,1],
ci_ub = confint2[,2])}
results_df_model1 <- do.call(rbind, results_model1)
results_df_model2 <- do.call(rbind, results_model2)
results_df_model1$vi <- ((results_df_model1$ci_ub - results_df_model1$ci_lb) / (2 * 1.96))^2
results_df_model2$vi <- ((results_df_model2$ci_ub - results_df_model2$ci_lb) / (2 * 1.96))^2
res_model1 <- rma(estimate, vi, data=results_df_model1)
res_model2 <- rma(estimate, vi, data=results_df_model2)
results_df <- rbind(results_df_model1, results_df_model2)
results_df <- results_df[order(results_df$platform, results_df$model), ]
results_df <- results_df[order(results_df$estimate, results_df$platform), ]
with_control <- results_df[results_df$model == "With Control", ]
platform_order <- with_control$platform[order(with_control$estimate)]
results_df <- results_df[order(factor(results_df$platform, levels = platform_order), results_df$model), ]
results_df$color <- ifelse(results_df$model == "Without Control", "grey", "black")
results_df$label <- paste(results_df$platform)
forest(results_df$estimate, ci.lb = results_df$ci_lb, ci.ub = results_df$ci_ub,slab = results_df$label, xlab = "Effect Size",refline = 0, shade = TRUE,col = results_df$color,ylim = c(-3, nrow(results_df) + 4), digits=3)
# Aggregated effect for Model 1
addpoly(res_model1, col = "grey",cex = 1,border = "grey", lwd = 2,row = nrow(results_df) - 17, mlab='confidence without political lean')
# Aggregated effect for Model 2
addpoly(res_model2, col = "black", cex = 1, border = "black", lwd = 1, row = nrow(results_df) - 19, mlab='confidence with political lean')
legend("topright", legend = c("Without Political lean control", "With Political lean control"), col = c("grey", "black"), pch = 15, bty = "n",cex = 1.2)
title(main = "Association between confidence and news quality")
\[ Engagement = \beta_1Confidence_i + \beta_2 Toxicity_{ntile} + \beta_3 Quality_i + \beta_4 PoliticalLean_i +\lambda_{UserName_i} + \epsilon \]
ALL_Platforms_eng <- na.omit(ALL_Platforms[, c("pc1", "certainty_avg","toxicity_ntile", "username","platform","lean1","engagement")])
results_model1 <- list()
results_model2 <- list()
par(mfrow = c(1, 2))
for (platform in platforms) {
data_subset <- ALL_Platforms_eng[ALL_Platforms_eng$platform == platform, ]
# Model 1: With control
model1 <- feglm(scale(log10(engagement + 1)) ~ scale(certainty_avg) + scale(toxicity_ntile) + scale(pc1) + scale(lean1) | username, cluster = "username", data = data_subset)
coef1 <- coef(model1)
confint1 <- confint(model1)
desired_vars <- c("scale(certainty_avg)", "scale(toxicity_ntile)", "scale(pc1)")
coef1 <- coef1[desired_vars]
confint1 <- confint1[desired_vars, ]
results_model1[[platform]] <- data.frame(
platform = platform,
model = "With Control",
variable = desired_vars,
estimate = coef1,
ci_lb = confint1[, 1],
ci_ub = confint1[, 2],
color = c("darkblue", "darkgreen", "darkred"),
shape = 15 )
# Model 2: Without control
model2 <- feglm(scale(log10(engagement + 1)) ~ scale(certainty_avg) + scale(toxicity_ntile) + scale(pc1) | username, cluster = "username", data = data_subset)
coef2 <- coef(model2)
confint2 <- confint(model2)
coef2 <- coef2[desired_vars]
confint2 <- confint2[desired_vars, ]
results_model2[[platform]] <- data.frame(
platform = platform,
model = "Without Control",
variable = desired_vars,
estimate = coef2,
ci_lb = confint2[, 1],
ci_ub = confint2[, 2],
color = c("darkblue", "darkgreen", "darkred"),
shape = 16 )}
results_df_model1 <- do.call(rbind, results_model1)
results_df_model2 <- do.call(rbind, results_model2)
results_df_model1$vi <- ((results_df_model1$ci_ub - results_df_model1$ci_lb) / (2 * 1.96))^2
results_df_model2$vi <- ((results_df_model2$ci_ub - results_df_model2$ci_lb) / (2 * 1.96))^2
variables <- unique(results_df_model1$variable)
res_model1_pc1 <- rma(estimate, vi, data = results_df_model1, subset = (variable == "scale(pc1)"))
res_model1_toxicity <- rma(estimate, vi, data = results_df_model1, subset = (variable == "scale(toxicity_ntile)"))
res_model1_certainty <- rma(estimate, vi, data = results_df_model1, subset = (variable == "scale(certainty_avg)"))
results_df_model1$color <- ifelse(results_df_model1$variable == "scale(certainty_avg)", "darkblue",ifelse(results_df_model1$variable == "scale(toxicity_ntile)", "darkgreen", "darkred"))
results_df_model1$label <- paste(results_df_model1$platform, results_df_model1$variable)
res_model2_pc1 <- rma(estimate, vi, data = results_df_model2, subset = (variable == "scale(pc1)"))
res_model2_toxicity <- rma(estimate, vi, data = results_df_model2, subset = (variable == "scale(toxicity_ntile)"))
res_model2_certainty <- rma(estimate, vi, data = results_df_model2, subset = (variable == "scale(certainty_avg)"))
results_df_model2$color <- ifelse(results_df_model2$variable == "scale(certainty_avg)", "darkblue",ifelse(results_df_model2$variable == "scale(toxicity_ntile)", "darkgreen", "darkred"))
results_df_model2$label <- paste(results_df_model2$platform, results_df_model2$variable)
certainty_avg_model1 <- results_df_model1[results_df_model1$variable == "scale(certainty_avg)" & results_df_model1$model == "With Control", ]
certainty_avg_model1 <- certainty_avg_model1[order(certainty_avg_model1$estimate), ]
sorted_platforms <- certainty_avg_model1$platform
results_df_model1 <- results_df_model1[order(factor(results_df_model1$platform, levels = sorted_platforms)), ]
results_df_model2 <- results_df_model2[order(factor(results_df_model2$platform, levels = sorted_platforms)), ]
# Model 1(With Political lean control)
forest(results_df_model1$estimate, ci.lb = results_df_model1$ci_lb, ci.ub = results_df_model1$ci_ub,slab = results_df_model1$label, xlab = "Effect Size",refline = 0, shade = TRUE,col = results_df_model1$color,ylim = c(-3, nrow(results_df_model1) + 4), digits=3)
addpoly(res_model1_pc1, col = "darkred",cex = 1,border = "darkred", lwd = 2,row = nrow(results_df_model1) - 25, mlab='quality with political lean')
addpoly(res_model1_toxicity, col = "darkgreen",cex = 1,border = "darkgreen", lwd = 2,row = nrow(results_df_model1) - 26, , mlab='toxicity with political lean')
addpoly(res_model1_certainty, col = "darkblue",cex = 1,border = "darkblue", lwd = 2,row = nrow(results_df_model1) - 27, mlab='certainty with political lean')
title(main = "With Political lean control")
# Model 2(Without Political lean control)
forest(results_df_model2$estimate, ci.lb = results_df_model2$ci_lb, ci.ub = results_df_model2$ci_ub,slab = results_df_model2$label, xlab = "Effect Size",refline = 0, shade = TRUE,col = results_df_model2$color,ylim = c(-3, nrow(results_df_model2) + 4), digits=3)
addpoly(res_model2_pc1, col = "darkred",cex = 1,border = "darkred", lwd = 2,row = nrow(results_df_model2) - 25, mlab='quality without political lean')
addpoly(res_model2_toxicity, col = "darkgreen",cex = 1,border = "darkgreen", lwd = 2,row = nrow(results_df_model2) - 26, mlab='toxicity without political lean')
addpoly(res_model2_certainty, col = "darkblue",cex = 1,border = "darkblue", lwd = 2,row = nrow(results_df_model2) - 27, mlab='certainty without political lean')
title(main = "Without Political lean control")
legend(x = 0.023, y = 28.5 ,legend = c("News Quality", "Toxicity", "Certainty"), col = c("darkred", "darkgreen", "darkblue"), pch = 16, bty = "n", cex = 1, y.intersp = 0.8)
ALL_Platforms_eng <- na.omit(ALL_Platforms[, c("pc1", "certainty_avg","toxicity_ntile", "username","platform","lean1","engagement")])
results_model1 <- list()
results_model2 <- list()
par(mfrow = c(1, 2))
for (platform in platforms) {
data_subset <- ALL_Platforms_eng[ALL_Platforms_eng$platform == platform, ]
# Model 1: With control
model1 <- feglm(scale(log10(engagement + 1)) ~ scale(pc1) + scale(lean1) | username, cluster = "username", data = data_subset)
coef1 <- coef(model1)
confint1 <- confint(model1)
desired_vars <- c("scale(pc1)")
coef1 <- coef1[desired_vars]
confint1 <- confint1[desired_vars, ]
results_model1[[platform]] <- data.frame(
platform = platform,
model = "With Control",
variable = desired_vars,
estimate = coef1,
ci_lb = confint1[, 1],
ci_ub = confint1[, 2],
color = c("darkred"),
shape = 15 )
# Model 2: Without control
model2 <- feglm(scale(log10(engagement + 1)) ~ scale(pc1) | username, cluster = "username", data = data_subset)
coef2 <- coef(model2)
confint2 <- confint(model2)
coef2 <- coef2[desired_vars]
confint2 <- confint2[desired_vars, ]
results_model2[[platform]] <- data.frame(
platform = platform,
model = "Without Control",
variable = desired_vars,
estimate = coef2,
ci_lb = confint2[, 1],
ci_ub = confint2[, 2],
color = c("darkred"),
shape = 16 )}
results_df_model1 <- do.call(rbind, results_model1)
results_df_model2 <- do.call(rbind, results_model2)
results_df_model1$vi <- ((results_df_model1$ci_ub - results_df_model1$ci_lb) / (2 * 1.96))^2
results_df_model2$vi <- ((results_df_model2$ci_ub - results_df_model2$ci_lb) / (2 * 1.96))^2
variables <- unique(results_df_model1$variable)
res_model1_pc1 <- rma(estimate, vi, data = results_df_model1, subset = (variable == "scale(pc1)"))
results_df_model1$color <- ifelse(results_df_model1$variable == "scale(pc1)", "darkred")
results_df_model1$label <- paste(results_df_model1$platform, results_df_model1$variable)
res_model2_pc1 <- rma(estimate, vi, data = results_df_model2, subset = (variable == "scale(pc1)"))
results_df_model2$color <- ifelse(results_df_model2$variable == "scale(pc1)", "darkred")
results_df_model2$label <- paste(results_df_model2$platform, results_df_model2$variable)
pc1_avg_model1 <- results_df_model1[results_df_model1$variable == "scale(pc1)" & results_df_model1$model == "With Control", ]
pc1_avg_model1 <- pc1_avg_model1[order(pc1_avg_model1$estimate), ]
sorted_platforms <- pc1_avg_model1$platform
results_df_model1 <- results_df_model1[order(factor(results_df_model1$platform, levels = sorted_platforms)), ]
results_df_model2 <- results_df_model2[order(factor(results_df_model2$platform, levels = sorted_platforms)), ]
# Model 1(With Political lean control)
forest(results_df_model1$estimate, ci.lb = results_df_model1$ci_lb, ci.ub = results_df_model1$ci_ub,slab = results_df_model1$label, xlab = "Effect Size",refline = 0, shade = TRUE,col = results_df_model1$color,ylim = c(-3, nrow(results_df_model1) + 4), digits=3)
addpoly(res_model1_pc1, col = "darkred",cex = 1,border = "darkred", lwd = 2,row = nrow(results_df_model1) - 10, mlab='quality with political lean')
title(main = "With Political lean control")
# Model 2(Without Political lean control)
forest(results_df_model2$estimate, ci.lb = results_df_model2$ci_lb, ci.ub = results_df_model2$ci_ub,slab = results_df_model2$label, xlab = "Effect Size",refline = 0, shade = TRUE,col = results_df_model2$color,ylim = c(-3, nrow(results_df_model2) + 4), digits=3)
addpoly(res_model2_pc1, col = "darkred",cex = 1,border = "darkred", lwd = 2,row = nrow(results_df_model2) - 10, mlab='quality without political lean')
title(main = "Without Political lean control")
legend(x = 0.023, y = 28.5 ,legend = c("News Quality"), col = c("darkred"), pch = 16, bty = "n", cex = 1, y.intersp = 0.8)
Headline_1 <- feglm( scale(certainty_avg)~scale(pc1) ,data_headlines)
## NOTE: 746,866 observations removed because of NA values (LHS: 678,258, RHS: 559,391).
summary(Headline_1)
## GLM estimation, family = gaussian, Dep. Var.: scale(certainty_avg)
## Observations: 54,014
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0101 0.00418 2.41 0.016064 *
## scale(pc1) -0.1017 0.00404 -25.15 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -74,725.0 Adj. Pseudo R2: 0.004177
## BIC: 149,471.7 Squared Cor.: 0.011576
Headline_2 <- feglm( scale(toxicity)~scale(pc1),data_headlines)
## NOTE: 559,391 observations removed because of NA values (RHS: 559,391).
summary(Headline_2)
## GLM estimation, family = gaussian, Dep. Var.: scale(toxicity)
## Observations: 241,489
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.124 0.00261 47.4 < 2.2e-16 ***
## scale(pc1) -0.124 0.00261 -47.4 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -402,844.8 Adj. Pseudo R2: 0.002767
## BIC: 805,714.4 Squared Cor.: 0.009222
Model_engagement_base <-feglm(scale(log10(engagement+1))~ scale(certainty_avg)+scale(toxicity_ntile)+scale(lean1)|username,cluster='username',ALL_Platforms)
## NOTE: 18,531,484 observations removed because of NA values (LHS: 13,702, RHS: 18,526,578).
summary(Model_engagement_base)
## GLM estimation, family = gaussian
## Dep. Var.: scale(log10(engagement + 1))
## Observations: 4,764,040
## Fixed-effects: username: 659,895
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(certainty_avg) 0.00939 0.00211 4.45 8.5979e-06 ***
## scale(toxicity_ntile) 0.01552 0.00202 7.68 1.5351e-14 ***
## scale(lean1) -0.00692 0.00299 -2.31 2.0683e-02 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -3,217,870.4 Adj. Pseudo R2: 0.483642
## BIC: 16,582,732.8 Squared Cor.: 0.835001
Model_engagement_headline<-feglm(scale(log10(engagement+1))~ scale(certainty_avg)+scale(toxicity_ntile)+scale(lean1)|username,
cluster='username',ALL_Platforms[(ALL_Platforms$headline_avail),])
## NOTE: 1,822,165 observations removed because of NA values (LHS: 8,544, RHS: 1,819,247).
summary(Model_engagement_headline)
## GLM estimation, family = gaussian
## Dep. Var.: scale(log10(engagement + 1))
## Observations: 585,624
## Fixed-effects: username: 169,173
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(certainty_avg) 0.00775 0.00102 7.57 3.6744e-14 ***
## scale(toxicity_ntile) 0.02193 0.00150 14.61 < 2.2e-16 ***
## scale(lean1) 0.00336 0.00223 1.51 1.3165e-01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -328,757.7 Adj. Pseudo R2: 0.440373
## BIC: 2,904,246.0 Squared Cor.: 0.852792
Model_engagement_headline_certainty <-feglm(scale(log10(engagement+1))~ scale(certainty_avg)+scale(headline_certainty_avg)+scale(toxicity_ntile)+scale(lean1)|username,
cluster='username',ALL_Platforms[(ALL_Platforms$headline_avail),])
## NOTE: 2,140,570 observations removed because of NA values (LHS: 8,544, RHS: 2,139,666).
summary(Model_engagement_headline_certainty)
## GLM estimation, family = gaussian
## Dep. Var.: scale(log10(engagement + 1))
## Observations: 267,219
## Fixed-effects: username: 82,123
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(certainty_avg) 0.00106 0.00252 0.422 6.7319e-01
## scale(headline_certainty_avg) 0.00512 0.00255 2.006 4.4888e-02 *
## scale(toxicity_ntile) 0.01624 0.00226 7.176 7.2438e-13 ***
## scale(lean1) 0.00256 0.00313 0.820 4.1249e-01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -122,201.6 Adj. Pseudo R2: 0.479
## BIC: 1,270,647.8 Squared Cor.: 0.867434
# pc1 certainty for whole set
ALL_Platforms_pc1 <-feglm(scale(pc1)~scale(certainty_avg)+scale(lean1),cluster='username',ALL_Platforms)
## NOTE: 18,526,578 observations removed because of NA values (LHS: 2,684,478, RHS: 18,526,578).
summary(ALL_Platforms_pc1)
## GLM estimation, family = gaussian, Dep. Var.: scale(pc1)
## Observations: 4,768,946
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0332 0.00600 -5.54 3.0996e-08 ***
## scale(certainty_avg) -0.0279 0.00194 -14.40 < 2.2e-16 ***
## scale(lean1) -0.4777 0.00797 -59.91 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -6,251,503.9 Adj. Pseudo R2: 0.096426
## BIC: 12,503,053.8 Squared Cor.: 0.244052
# pc1 certainty for subsetting for posts w/ headline
ALL_Platforms_headline<-feglm(scale(pc1)~scale(certainty_avg)+scale(lean1),cluster='username',ALL_Platforms[!is.na(ALL_Platforms$headline_toxic),])
## NOTE: 1,820,750 observations removed because of NA values (LHS: 5,097, RHS: 1,820,750).
summary(ALL_Platforms_headline)
## GLM estimation, family = gaussian, Dep. Var.: scale(pc1)
## Observations: 589,198
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0347 0.00623 -5.57 2.5777e-08 ***
## scale(certainty_avg) -0.0252 0.00277 -9.10 < 2.2e-16 ***
## scale(lean1) -0.5851 0.00636 -91.99 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -704,860.1 Adj. Pseudo R2: 0.161102
## BIC: 1,409,760.1 Squared Cor.: 0.368391
# pc1 certainty for subsetting for posts w/ headline controlling for headline certainty
All_platfroms_headline_certainty<-feglm(scale(pc1)~scale(certainty_avg)+scale(headline_certainty_avg),cluster='username',ALL_Platforms[!is.na(ALL_Platforms$headline_toxic),])
## NOTE: 2,141,817 observations removed because of NA values (LHS: 5,097, RHS: 2,141,502).
summary(All_platfroms_headline_certainty)
## GLM estimation, family = gaussian, Dep. Var.: scale(pc1)
## Observations: 268,131
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1270 0.01028 -12.36 < 2.2e-16 ***
## scale(certainty_avg) -0.0314 0.00549 -5.73 1.0004e-08 ***
## scale(headline_certainty_avg) -0.0529 0.00594 -8.91 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -385,495.1 Adj. Pseudo R2: 0.002167
## BIC: 771,027.8 Squared Cor.: 0.00624
# pc1 certainty for subsetting for posts w/ headline controlling for headline certainty w/ lean control
All_platfroms_headline_lean<-feglm(scale(pc1)~scale(certainty_avg)+scale(headline_certainty_avg)+scale(lean1), cluster='username',ALL_Platforms[ALL_Platforms$headline_avail,])
## NOTE: 2,139,666 observations removed because of NA values (LHS: 5,084, RHS: 2,139,666).
summary(All_platfroms_headline_lean)
## GLM estimation, family = gaussian, Dep. Var.: scale(pc1)
## Observations: 268,123
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.07135 0.00734 -9.73 < 2.2e-16 ***
## scale(certainty_avg) -0.00873 0.00407 -2.15 3.1721e-02 *
## scale(headline_certainty_avg) -0.02926 0.00458 -6.39 1.7106e-10 ***
## scale(lean1) -0.57434 0.00836 -68.66 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -325,244.9 Adj. Pseudo R2: 0.158158
## BIC: 650,539.7 Squared Cor.: 0.366071
# pc1 certainty for subsetting for posts w/ headline where headline and content are not similar
All_platfroms_headline2<-feglm(scale(pc1)~scale(certainty_avg)+scale(lean1),cluster='username',ALL_Platforms[(ALL_Platforms$headline_avail) &(ALL_Platforms$text_headline_similarity<.5) ,])
## NOTE: 1,136,997 observations removed because of NA values (LHS: 4,108, RHS: 1,136,997).
summary(All_platfroms_headline2)
## GLM estimation, family = gaussian, Dep. Var.: scale(pc1)
## Observations: 403,449
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0422 0.00749 -5.63 1.7987e-08 ***
## scale(certainty_avg) -0.0244 0.00375 -6.51 7.3905e-11 ***
## scale(lean1) -0.5697 0.00738 -77.17 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -491,625.4 Adj. Pseudo R2: 0.147961
## BIC: 983,289.6 Squared Cor.: 0.345072
certainty_avail <-feglm(is.na(certainty_avg)~scale(toxicity_ntile) +scale(pc1) +scale(lean1)+scale(log10(1+engagement)),cluster='username',ALL_Platforms)
## NOTE: 2,706,586 observations removed because of NA values (RHS: 2,706,586).
summary(certainty_avail)
## GLM estimation, family = gaussian, Dep. Var.: is.na(certainty_avg)
## Observations: 20,588,938
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.77515 0.00160 484.18 < 2.2e-16 ***
## scale(toxicity_ntile) -0.06622 0.00104 -63.71 < 2.2e-16 ***
## scale(pc1) 0.00521 0.00108 4.83 1.3877e-06 ***
## scale(lean1) 0.00496 0.00108 4.61 4.1235e-06 ***
## scale(log10(1 + engagement)) -0.03694 0.00224 -16.51 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -11,105,784.0 Adj. Pseudo R2: 0.029016
## BIC: 22,211,652.2 Squared Cor.: 0.031725
certainty_engagement_FE <-feglm(scale(log10(1+engagement))~scale(certainty_avg)|username,cluster='username',ALL_Platforms)
## NOTE: 17,999,669 observations removed because of NA values (LHS: 13,702, RHS: 17,994,760).
summary(certainty_engagement_FE)
## GLM estimation, family = gaussian
## Dep. Var.: scale(log10(1 + engagement))
## Observations: 5,295,855
## Fixed-effects: username: 696,456
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(certainty_avg) 0.00918 0.0019 4.83 1.3675e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -3,496,293.8 Adj. Pseudo R2: 0.488073
## BIC: 17,775,437.8 Squared Cor.: 0.830117
print(mean(certainty_engagement_FE$sumFE))
## [1] 0.173
missing_certainty_engagement <-feglm(scale(log10(1+engagement))~as.numeric(is.na(certainty_avg)==TRUE)|username,cluster='username',ALL_Platforms)
## NOTE: 13,702 observations removed because of NA values (LHS: 13,702).
print(missing_certainty_engagement)
## GLM estimation, family = gaussian
## Dep. Var.: scale(log10(1 + engagement))
## Observations: 23,281,822
## Fixed-effects: username: 1,922,971
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## as.numeric(is.na(certainty_avg) == TRUE) -0.0301 0.0018 -16.7 < 2.2e-16
##
## as.numeric(is.na(certainty_avg) == TRUE) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -13,792,392.2 Adj. Pseudo R2: 0.524288
## BIC: 60,204,511.1 Squared Cor.: 0.808536
print(mean(missing_certainty_engagement$sumFE))
## [1] 0.0232
ALL_Platforms <- ALL_Platforms %>%
mutate(certainty_avg_dummy = case_when(
is.na(certainty_avg) ~ 0,
TRUE ~ ntile(certainty_avg, 5) ))
#Model_uncertainty <- feglm(log10(1+engagement) ~ factor(certainty_avg_dummy)|username ,data = ALL_Platforms)
#summary(Model_uncertainty)
#Model_uncertainty_coefficients_df <- data.frame(
# Variable = names(coef(Model_uncertainty)),
# Coefficient = coef(Model_uncertainty),
# CI_lower = confint(Model_uncertainty)[, 1],
# CI_upper = confint(Model_uncertainty)[, 2]) %>%
# mutate(dummy_level = as.numeric(gsub("factor\\(certainty_avg_dummy\\)", "", Variable)))
certainty_avg_summary <- ALL_Platforms %>%
filter(!is.na(certainty_avg)) %>%
group_by(certainty_avg_dummy) %>%
summarise(avg_certainty = mean(certainty_avg, na.rm = TRUE)) %>%ungroup()
certainty_avg_dummy <- Model_uncertainty_coefficients_df %>%
inner_join(certainty_avg_summary, by = c("dummy_level" = "certainty_avg_dummy"))
ggplot(certainty_avg_dummy, aes(x = avg_certainty, y = Coefficient)) +
geom_point(size = 3, color = "blue") +
geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), width = 0.2, color = "blue") +
geom_text(aes(label = dummy_level), vjust = -1, size = 3.5, color = "black") +
labs(
title = "Average Certainty for each dummy level",
x = "Average Certainty of each level",
y = "Coefficient"
) +theme_minimal() + theme(
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_blank())