NOTE:
- this paper about certainty and engagement using controlled exp and branding context https://journals.sagepub.com/doi/full/10.1016/j.intmar.2020.06.005
- and this one about engagement and diff language features: https://journals.sagepub.com/doi/full/10.1177/00222429231152880
p1 <- ggplot(data_posts, aes(x = lean1)) + geom_histogram(bins = 20, fill = "skyblue") +theme_minimal()
p2 <- ggplot(data_posts, aes(x = certainty_avg)) + geom_histogram(bins = 20, fill = "salmon") +theme_minimal()
p3 <- ggplot(data_posts, aes(x = toxicity)) + geom_histogram(bins = 20, fill = "lightgreen")+theme_minimal()
p4 <- ggplot(data_posts, aes(x = pc1)) + geom_histogram(bins = 20, fill = "blue") +theme_minimal()
p5 <- ggplot(data_posts, aes(x = log10(1+toxicity))) + geom_histogram(bins = 20, fill = "blue") +theme_minimal()
p6 <- ggplot(data_posts, aes(x = log10(1+engage))) + geom_histogram(bins = 20, fill = "blue") +theme_minimal()
p7 <- ggplot(data_posts, aes(x = engage)) + geom_histogram(bins = 20, fill = "blue") +theme_minimal()
# Arrange them in one row
grid.arrange(p1, p2, p3,p4,p5,p6,p7, nrow = 4)
## Warning: Removed 2692899 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 17994760 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 2684478 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 582384 rows containing non-finite values (`stat_bin()`).
## Removed 582384 rows containing non-finite values (`stat_bin()`).
d_<-
data_posts |>
group_by(platform)|>
summarise(
lean=mean(lean1,na.rm=TRUE),
certainty_avg=mean(certainty_avg,na.rm=TRUE),
pc1=mean(pc1,na.rm=TRUE),
toxicity=mean(toxicity,na.rm=TRUE),
)
p1<-d_ |>
ggplot(aes(x = pc1, y = toxicity)) +
geom_point(color = 'red') +
geom_text(aes(label = platform), vjust = -0.5, color = "blue")+
coord_cartesian(clip = "off")+theme_minimal()
p2<-d_ |>
ggplot(aes(x = pc1, y = certainty_avg)) +
geom_point(color = 'red') +
geom_text(aes(label = platform), vjust = -0.5, color = "blue")+
coord_cartesian(clip = "off")+theme_minimal()
p3<-d_ |>
ggplot(aes(x = lean, y = toxicity)) +
geom_point(color = 'red') +
geom_text(aes(label = platform), vjust = -0.5, color = "blue")+
coord_cartesian(clip = "off")+theme_minimal()
p4<-d_ |>
ggplot(aes(x = lean, y = certainty_avg)) +
geom_point(color = 'red') +
geom_text(aes(label = platform), vjust = -0.5, color = "blue")+
coord_cartesian(clip = "off")+theme_minimal()
p5<-d_ |>
ggplot(aes(x = toxicity, y = certainty_avg)) +
geom_point(color = 'red') +
geom_text(aes(label = platform), vjust = -0.5, color = "blue")+
coord_cartesian(clip = "off")+theme_minimal()
grid.arrange(grobs=list(p1, p2, p3,p4,p5),
width = 15, height = 8, units = "in",
nrow = 3)
print(unique(data_posts$platform))
## [1] "Bluesky" "Gettr" "Truthsocial" "Gab" "Mastodon"
## [6] "LinkedIn" "Telegram" "X"
feglm( scale(certainty_avg)~scale(pc1),cluster='username',data_posts)
## NOTE: 18,524,268 observations removed because of NA values (LHS: 17,994,760, RHS: 2,684,478).
## GLM estimation, family = gaussian, Dep. Var.: scale(certainty_avg)
## Observations: 4,771,256
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.004749 0.005308 0.894709 0.37094
## scale(pc1) -0.050577 0.002505 -20.188669 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -6,741,374.8 Adj. Pseudo R2: 9.749e-4
## BIC: 13,482,780.4 Squared Cor.: 0.002754
feglm( scale(certainty_avg)~scale(pc1)+scale(lean1),cluster='username',data_posts)
## NOTE: 18,526,578 observations removed because of NA values (LHS: 17,994,760, RHS: 2,692,899).
## GLM estimation, family = gaussian, Dep. Var.: scale(certainty_avg)
## Observations: 4,768,946
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.005769 0.005284 1.09187 0.27489
## scale(pc1) -0.034188 0.002322 -14.72135 < 2.2e-16 ***
## scale(lean1) 0.032164 0.002360 13.62781 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -6,736,320.9 Adj. Pseudo R2: 0.001288
## BIC: 13,472,687.9 Squared Cor.: 0.003638
feglm( scale(certainty_avg)~scale(pc1)+scale(lean1)|username,cluster='username',data_posts)
## NOTE: 18,526,578 observations removed because of NA values (LHS: 17,994,760, RHS: 2,692,899).
## GLM estimation, family = gaussian, Dep. Var.: scale(certainty_avg)
## Observations: 4,768,946
## Fixed-effects: username: 662,123
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(pc1) -0.011607 0.001979 -5.86498 4.4939e-09 ***
## scale(lean1) 0.005990 0.001362 4.39829 1.0913e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -6,071,559.8 Adj. Pseudo R2: 0.00168
## BIC: 22,325,036.8 Squared Cor.: 0.246052
results<-data.frame()
i<-1
for (platform in unique(data_posts$platform)){
if (platform!='Telegram'){
print(platform)
d<-data_posts[data_posts$platform==platform,]
r<-feglm( scale(toxicity_ntile)~scale(pc1),cluster='username',d)
results<-rbind(results,get_param('toxic_pc1',2,r,platform))
r<-feglm( scale(toxicity_ntile)~scale(pc1)+scale(lean1),cluster='username',d)
results<-rbind(results,get_param('toxic_pc1_control',2,r,platform))
}
}
## [1] "Bluesky"
## NOTE: 16 observations removed because of NA values (RHS: 16).
## [1] "Gettr"
## NOTE: 26 observations removed because of NA values (RHS: 26).
## [1] "Truthsocial"
## NOTE: 312 observations removed because of NA values (RHS: 312).
## [1] "Gab"
## NOTE: 5,612 observations removed because of NA values (RHS: 5,612).
## [1] "Mastodon"
## NOTE: 2,684,361 observations removed because of NA values (RHS: 2,684,361).
## NOTE: 2,684,369 observations removed because of NA values (RHS: 2,684,369).
## [1] "LinkedIn"
## NOTE: 117 observations removed because of NA values (RHS: 117).
## NOTE: 519 observations removed because of NA values (RHS: 519).
## [1] "X"
## NOTE: 1,312 observations removed because of NA values (RHS: 1,312).
d_<-
results|>
filter(var=='estimate'|var=='se')|>
filter(measure=='toxic_pc1'|measure=='toxic_pc1_control' )|>
pivot_wider( names_from = var,
values_from = value
# names_glue = "{measure}_{var}"
)|>
arrange(estimate)
d_<-d_|>mutate(
group=ifelse (grepl( 'control', measure, fixed = TRUE),'With Political Lean Control', 'No Control')
)
# Calculate the aggregate effect for each group
res_A <- rma(yi = d_$estimate[d_$group == 'With Political Lean Control'], sei = d_$se[d_$group == 'With Political Lean Control'])
res_B <- rma(yi = d_$estimate[d_$group == 'No Control'], sei = d_$se[d_$group == 'No Control'])
group_A_effects <- d_[d_$group == "With Political Lean Control", ]
# Order data by group A effect sizes
order_indices <- order(group_A_effects$estimate)
ordered_studies <- group_A_effects$platform[order_indices]
# Create a factor for group with levels reordered to put B before A
d_$group <- factor(d_$group, levels = c("With Political Lean Control", "No Control"))
# Reorder data by study and then by group (with B first)
d_ <- d_[order(match(d_$platform, ordered_studies), d_$group), ]
# Modify slab labels to only show the study name for the first entry of each study
slab_labels <- ifelse(duplicated(d_$platform), "", d_$platform)
#png("../../figs/fig3a.png",width =25, height = 25,units = "cm", res = 300) # Adjust size and resolution
# Create a forest plot with additional space for aggregate effects
forest(
x = d_$estimate,
sei = d_$se,
slab = slab_labels,
col = rep(c("black", "gray"), length.out = nrow(d_)),
xlab = "Effect Size",
main = "Association toxicity and quality",
ylim = c(-5, nrow(d_) + 3),
psize=.8,# Add space at the bottom,
digits = 3
)
# Add a horizontal line
abline(h = 0.5, col = "gray")
abline(v = 0, col = "black")#, lty = 1)
# Add aggregate effects with diamond shapes manually
addpoly(res_A, atransf = FALSE, row = -1, mlab = "With Political Lean Control", col = "black", cex = 1.2)
addpoly(res_B, atransf = FALSE, row = -2, mlab = "No Control", col = "gray", cex = 1.2)
#dev.off()
feglm( scale(toxicity_ntile)~scale(pc1),cluster='username',data_posts)
## NOTE: 2,684,478 observations removed because of NA values (RHS: 2,684,478).
## GLM estimation, family = gaussian, Dep. Var.: scale(toxicity_ntile)
## Observations: 20,611,046
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.079589 0.004716 16.8764 < 2.2e-16 ***
## scale(pc1) -0.066464 0.003253 -20.4328 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -28,773,361.0 Adj. Pseudo R2: 0.00165
## BIC: 57,546,755.7 Squared Cor.: 0.004603
feglm( scale(toxicity_ntile)~scale(pc1)+scale(lean1),cluster='username',data_posts)
## NOTE: 2,692,899 observations removed because of NA values (RHS: 2,692,899).
## GLM estimation, family = gaussian, Dep. Var.: scale(toxicity_ntile)
## Observations: 20,602,625
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.079610 0.004704 16.9230 < 2.2e-16 ***
## scale(pc1) -0.087116 0.003788 -22.9961 < 2.2e-16 ***
## scale(lean1) -0.044158 0.003667 -12.0404 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -28,743,817.6 Adj. Pseudo R2: 0.002214
## BIC: 57,487,685.7 Squared Cor.: 0.006171
feglm( scale(toxicity_ntile)~scale(pc1)+scale(lean1)|username,cluster='username',data_posts)
## NOTE: 2,692,899 observations removed because of NA values (RHS: 2,692,899).
## GLM estimation, family = gaussian, Dep. Var.: scale(toxicity_ntile)
## Observations: 20,602,625
## Fixed-effects: username: 1,862,255
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(pc1) -0.019835 0.001482 -13.38314 < 2.2e-16 ***
## scale(lean1) -0.005808 0.001818 -3.19496 0.0013985 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -22,127,757.9 Adj. Pseudo R2: 0.167233
## BIC: 75,617,653.8 Squared Cor.: 0.477142
results<-data.frame()
for (platform in unique(data_posts$platform)){
print(platform)
d<-data_posts[data_posts$platform==platform,]
r<-feglm( scale(certainty_avg)~scale(pc1),cluster='username',d)
results<-rbind(results,get_param('certainty_pc1',2,r,platform))
r<-feglm( scale(certainty_avg)~scale(pc1)+scale(lean1),cluster='username',d)
results<-rbind(results,get_param('certainty_pc1_control',2,r,platform))
}
## [1] "Bluesky"
## NOTE: 1,182,479 observations removed because of NA values (LHS: 1,182,479).
## NOTE: 1,182,481 observations removed because of NA values (LHS: 1,182,479, RHS: 16).
## [1] "Gettr"
## NOTE: 14,226 observations removed because of NA values (LHS: 14,226).
## NOTE: 14,230 observations removed because of NA values (LHS: 14,226, RHS: 26).
## [1] "Truthsocial"
## NOTE: 1,305,002 observations removed because of NA values (LHS: 1,305,002).
## NOTE: 1,305,024 observations removed because of NA values (LHS: 1,305,002, RHS: 312).
## [1] "Gab"
## NOTE: 379,792 observations removed because of NA values (LHS: 379,792).
## NOTE: 381,707 observations removed because of NA values (LHS: 379,792, RHS: 5,612).
## [1] "Mastodon"
## NOTE: 3,344,319 observations removed because of NA values (LHS: 2,814,848, RHS: 2,684,361).
## NOTE: 3,344,320 observations removed because of NA values (LHS: 2,814,848, RHS: 2,684,369).
## [1] "LinkedIn"
## NOTE: 16,024 observations removed because of NA values (LHS: 15,987, RHS: 117).
## NOTE: 16,288 observations removed because of NA values (LHS: 15,987, RHS: 519).
## [1] "Telegram"
## NOTE: 615,783 observations removed because of NA values (LHS: 615,783).
## NOTE: 615,794 observations removed because of NA values (LHS: 615,783, RHS: 733).
## [1] "X"
## NOTE: 11,666,643 observations removed because of NA values (LHS: 11,666,643).
## NOTE: 11,666,734 observations removed because of NA values (LHS: 11,666,643, RHS: 1,312).
d_<-
results|>
filter(var=='estimate'|var=='se')|>
filter(measure=='certainty_pc1'|measure=='certainty_pc1_control' )|>
pivot_wider( names_from = var,
values_from = value
# names_glue = "{measure}_{var}"
)|>
arrange(estimate)
d_<-d_|>mutate(
group=ifelse (grepl( 'control', measure, fixed = TRUE),'With Political Lean Control', 'No Control')
)
# Calculate the aggregate effect for each group
res_A <- rma(yi = d_$estimate[d_$group == 'With Political Lean Control'], sei = d_$se[d_$group == 'With Political Lean Control'])
res_B <- rma(yi = d_$estimate[d_$group == 'No Control'], sei = d_$se[d_$group == 'No Control'])
group_A_effects <- d_[d_$group == "With Political Lean Control", ]
# Order data by group A effect sizes
order_indices <- order(group_A_effects$estimate)
ordered_studies <- group_A_effects$platform[order_indices]
# Create a factor for group with levels reordered to put B before A
d_$group <- factor(d_$group, levels = c("With Political Lean Control", "No Control"))
# Reorder data by study and then by group (with B first)
d_ <- d_[order(match(d_$platform, ordered_studies), d_$group), ]
# Modify slab labels to only show the study name for the first entry of each study
slab_labels <- ifelse(duplicated(d_$platform), "", d_$platform)
#png("../../figs/fig3a.png",width =25, height = 25,units = "cm", res = 300) # Adjust size and resolution
# Create a forest plot with additional space for aggregate effects
forest(
x = d_$estimate,
sei = d_$se,
slab = slab_labels,
col = rep(c("black", "gray"), length.out = nrow(d_)),
xlab = "Effect Size",
main = "association confidence and quality",
ylim = c(-5, nrow(d_) + 3),
psize=.8,# Add space at the bottom,
digits = 3
)
# Add a horizontal line
abline(h = 0.5, col = "gray")
abline(v = 0, col = "black")#, lty = 1)
# Add aggregate effects with diamond shapes manually
addpoly(res_A, atransf = FALSE, row = -1, mlab = "With Political Lean Control", col = "black", cex = 1.2)
addpoly(res_B, atransf = FALSE, row = -2, mlab = "No Control", col = "gray", cex = 1.2)
#dev.off()
feglm( scale(certainty_avg)~scale(pc1),cluster='username',data_posts)
## NOTE: 18,524,268 observations removed because of NA values (LHS: 17,994,760, RHS: 2,684,478).
## GLM estimation, family = gaussian, Dep. Var.: scale(certainty_avg)
## Observations: 4,771,256
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.004749 0.005308 0.894709 0.37094
## scale(pc1) -0.050577 0.002505 -20.188669 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -6,741,374.8 Adj. Pseudo R2: 9.749e-4
## BIC: 13,482,780.4 Squared Cor.: 0.002754
feglm( scale(certainty_avg)~scale(pc1)+scale(lean1),cluster='username',data_posts)
## NOTE: 18,526,578 observations removed because of NA values (LHS: 17,994,760, RHS: 2,692,899).
## GLM estimation, family = gaussian, Dep. Var.: scale(certainty_avg)
## Observations: 4,768,946
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.005769 0.005284 1.09187 0.27489
## scale(pc1) -0.034188 0.002322 -14.72135 < 2.2e-16 ***
## scale(lean1) 0.032164 0.002360 13.62781 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -6,736,320.9 Adj. Pseudo R2: 0.001288
## BIC: 13,472,687.9 Squared Cor.: 0.003638
feglm( scale(certainty_avg)~scale(pc1)+scale(lean1)|username,cluster='username',data_posts)
## NOTE: 18,526,578 observations removed because of NA values (LHS: 17,994,760, RHS: 2,692,899).
## GLM estimation, family = gaussian, Dep. Var.: scale(certainty_avg)
## Observations: 4,768,946
## Fixed-effects: username: 662,123
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(pc1) -0.011607 0.001979 -5.86498 4.4939e-09 ***
## scale(lean1) 0.005990 0.001362 4.39829 1.0913e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -6,071,559.8 Adj. Pseudo R2: 0.00168
## BIC: 22,325,036.8 Squared Cor.: 0.246052
r<-feglm(scale(log10(1+engage))~ scale(certainty_avg)+scale(toxicity_ntile)+scale(lean1)|username,
cluster='username',data_posts[data_posts$platform=='X',])
## NOTE: 11,666,734 observations removed because of NA values (RHS: 11,666,734).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(log10(1 + engage))
## Observations: 3,291,774
## Fixed-effects: username: 498,674
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(certainty_avg) 0.008283 0.002737 3.02669 0.0024726 **
## scale(toxicity_ntile) 0.010315 0.002703 3.81572 0.0001358 ***
## scale(lean1) -0.008901 0.003906 -2.27867 0.0226873 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -1,944,357.5 Adj. Pseudo R2: 0.524344
## BIC: 11,372,329.3 Squared Cor.: 0.856187
r<-feglm(scale(log10(1+engage))~ scale(certainty_avg)+scale(toxicity_ntile)+scale(pc1)+scale(lean1)|username,
cluster='username',data_posts)
## NOTE: 18,668,219 observations removed because of NA values (LHS: 582,384, RHS: 18,526,578).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(log10(1 + engage))
## Observations: 4,627,305
## Fixed-effects: username: 658,283
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(certainty_avg) 0.009484 0.002156 4.39808 1.0923e-05 ***
## scale(toxicity_ntile) 0.015517 0.002090 7.42559 1.1242e-13 ***
## scale(pc1) -0.014081 0.001301 -10.82548 < 2.2e-16 ***
## scale(lean1) -0.011040 0.003167 -3.48554 4.9117e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -3,109,735.0 Adj. Pseudo R2: 0.483355
## BIC: 16,322,520.0 Squared Cor.: 0.836049
r<-feglm(scale(log10(1+engage))~ scale(certainty_avg)*scale(toxicity_ntile)*scale(pc1)+scale(lean1)|username,
cluster='username',data_posts)
## NOTE: 18,668,219 observations removed because of NA values (LHS: 582,384, RHS: 18,526,578).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(log10(1 + engage))
## Observations: 4,627,305
## Fixed-effects: username: 658,283
## Standard-errors: Clustered (username)
## Estimate Std. Error
## scale(certainty_avg) 0.011602 0.002034
## scale(toxicity_ntile) 0.015750 0.002103
## scale(pc1) -0.014507 0.001292
## scale(lean1) -0.010948 0.003177
## scale(certainty_avg):scale(toxicity_ntile) -0.005228 0.001330
## scale(certainty_avg):scale(pc1) 0.000621 0.000479
## scale(toxicity_ntile):scale(pc1) 0.000995 0.000597
## scale(certainty_avg):scale(toxicity_ntile):scale(pc1) 0.000158 0.000405
## t value Pr(>|t|)
## scale(certainty_avg) 5.703620 1.1734e-08 ***
## scale(toxicity_ntile) 7.490527 6.8682e-14 ***
## scale(pc1) -11.225761 < 2.2e-16 ***
## scale(lean1) -3.446076 5.6883e-04 ***
## scale(certainty_avg):scale(toxicity_ntile) -3.930737 8.4695e-05 ***
## scale(certainty_avg):scale(pc1) 1.296267 1.9488e-01
## scale(toxicity_ntile):scale(pc1) 1.666427 9.5629e-02 .
## scale(certainty_avg):scale(toxicity_ntile):scale(pc1) 0.389646 6.9680e-01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -3,109,541.3 Adj. Pseudo R2: 0.483381
## BIC: 16,322,194.0 Squared Cor.: 0.836062
results<-data.frame()
for (platform in unique(data_posts$platform)){
print(platform)
d<-data_posts[data_posts$platform==platform,]
r<-feglm(scale(log10(engage+1))~ scale(certainty_avg)+scale(toxicity_ntile)+scale(lean1)|username,cluster='username',d)
results<-rbind(results,get_param('engage_certainty',1,r,platform))
results<-rbind(results,get_param('engage_toxicity',2,r,platform))
}
## [1] "Bluesky"
## NOTE: 1,182,481 observations removed because of NA values (RHS: 1,182,481).
## [1] "Gettr"
## NOTE: 20,259 observations removed because of NA values (LHS: 16,998, RHS: 14,230).
## [1] "Truthsocial"
## NOTE: 1,305,024 observations removed because of NA values (RHS: 1,305,024).
## [1] "Gab"
## NOTE: 381,707 observations removed because of NA values (RHS: 381,707).
## [1] "Mastodon"
## NOTE: 3,344,320 observations removed because of NA values (RHS: 3,344,320).
## [1] "LinkedIn"
## NOTE: 16,289 observations removed because of NA values (LHS: 4, RHS: 16,288).
## [1] "Telegram"
## NOTE: 751,405 observations removed because of NA values (LHS: 565,382, RHS: 615,794).
## [1] "X"
## NOTE: 11,666,734 observations removed because of NA values (RHS: 11,666,734).
d_<-
results|>
filter(var=='estimate'|var=='se')|>
filter(measure=='engage_certainty'|measure=='engage_toxicity' )|>
pivot_wider( names_from = var,
values_from = value
# names_glue = "{measure}_{var}"
)|>
arrange(estimate)
d_<-d_|>mutate(
group=ifelse (grepl( 'engage_certainty', measure, fixed = TRUE),'Certainty', 'Toxicity')
)
# Calculate the aggregate effect for each group
res_A <- rma(yi = d_$estimate[d_$group == 'Toxicity'], sei = d_$se[d_$group == 'Toxicity'])
res_B <- rma(yi = d_$estimate[d_$group == 'Certainty'], sei = d_$se[d_$group == 'Certainty'])
group_A_effects <- d_[d_$group == "Toxicity", ]
# Order data by group A effect sizes
order_indices <- order(group_A_effects$estimate)
ordered_studies <- group_A_effects$platform[order_indices]
# Create a factor for group with levels reordered to put B before A
d_$group <- factor(d_$group, levels = c("Toxicity", "Certainty"))
# Reorder data by study and then by group (with B first)
d_ <- d_[order(match(d_$platform, ordered_studies), d_$group), ]
# Modify slab labels to only show the study name for the first entry of each study
slab_labels <- ifelse(duplicated(d_$platform), "", d_$platform)
forest(
x = d_$estimate,
sei = d_$se,
slab = slab_labels,
col = rep(c("black", "gray"), length.out = nrow(d_)),
xlab = "Effect Size",
main = "association engagment, confidence and toxcity (w/ lean control)",
ylim = c(-5, nrow(d_) + 3),
psize=.8,# Add space at the bottom,
digits = 3
)
# Add a horizontal line
abline(h = 0.5, col = "gray")
abline(v = 0, col = "black")#, lty = 1)
# Add aggregate effects with diamond shapes manually
addpoly(res_A, atransf = FALSE, row = -1, mlab = "Toxicity", col = "black", cex = 1.2)
addpoly(res_B, atransf = FALSE, row = -2, mlab = "Certainty", col = "gray", cex = 1.2)
#dev.off()
results<-data.frame()
for (platform in unique(data_posts$platform)){
print(platform)
d<-data_posts[data_posts$platform==platform,]
r<-feglm(scale(log10(engage+1))~ scale(certainty_avg)+scale(toxicity_ntile)+scale(pc1)+scale(lean1)|username,cluster='username',d)
results<-rbind(results,get_param('Certainty',1,r,platform))
results<-rbind(results,get_param('Toxicity',2,r,platform))
results<-rbind(results,get_param('Quality',3,r,platform))
}
## [1] "Bluesky"
## NOTE: 1,182,481 observations removed because of NA values (RHS: 1,182,481).
## [1] "Gettr"
## NOTE: 20,259 observations removed because of NA values (LHS: 16,998, RHS: 14,230).
## [1] "Truthsocial"
## NOTE: 1,305,024 observations removed because of NA values (RHS: 1,305,024).
## [1] "Gab"
## NOTE: 381,707 observations removed because of NA values (RHS: 381,707).
## [1] "Mastodon"
## NOTE: 3,344,320 observations removed because of NA values (RHS: 3,344,320).
## [1] "LinkedIn"
## NOTE: 16,289 observations removed because of NA values (LHS: 4, RHS: 16,288).
## [1] "Telegram"
## NOTE: 751,405 observations removed because of NA values (LHS: 565,382, RHS: 615,794).
## [1] "X"
## NOTE: 11,666,734 observations removed because of NA values (RHS: 11,666,734).
d_<-
results|>
filter(var=='estimate'|var=='se')|>
pivot_wider( names_from = var,
values_from = value
# names_glue = "{measure}_{var}"
)|>
arrange(estimate)
# Calculate the aggregate effect for each group
d_$group <- d_$measure
res_A <- rma(yi = d_$estimate[d_$group == 'Toxicity'], sei = d_$se[d_$group == 'Toxicity'])
res_B <- rma(yi = d_$estimate[d_$group == 'Certainty'], sei = d_$se[d_$group == 'Certainty'])
res_C <- rma(yi = d_$estimate[d_$group == 'Quality'], sei = d_$se[d_$group == 'Quality'])
group_A_effects <- d_[d_$group == "Toxicity", ]
# Order data by group A effect sizes
order_indices <- order(group_A_effects$estimate)
ordered_studies <- group_A_effects$platform[order_indices]
# Create a factor for group with levels reordered to put B before A
d_$group <- factor(d_$group, levels = c("Toxicity", "Certainty","Quality"))
# Reorder data by study and then by group (with B first)
d_ <- d_[order(match(d_$platform, ordered_studies), d_$group), ]
# Modify slab labels to only show the study name for the first entry of each study
slab_labels <- ifelse(duplicated(d_$platform), "", d_$platform)
forest(
x = d_$estimate,
sei = d_$se,
slab = slab_labels,
col = rep(c("blue", "green",'red'), length.out = nrow(d_)),
xlab = "Effect Size",
main = "association engagment, confidence, toxcity, and quality (w/ lean control)",
ylim = c(-5, nrow(d_) + 3),
psize=.8,# Add space at the bottom,
digits = 3
)
# Add a horizontal line
abline(h = 0.5, col = "gray")
abline(v = 0, col = "black")#, lty = 1)
# Add aggregate effects with diamond shapes manually
addpoly(res_A, atransf = FALSE, row = -1, mlab = "Toxicity", col = "blue", cex = 1.2)
addpoly(res_B, atransf = FALSE, row = -2, mlab = "Certainty", col = "green", cex = 1.2)
addpoly(res_C, atransf = FALSE, row = -3, mlab = "Quality", col = "red", cex = 1.2)
r <- feglm( scale(certainty_avg)~scale(pc1)#,cluster='username'
,data_headlines)
## NOTE: 746,866 observations removed because of NA values (LHS: 678,258, RHS: 559,391).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(certainty_avg)
## Observations: 54,014
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.010071 0.004183 2.40753 0.016064 *
## scale(pc1) -0.101662 0.004042 -25.15059 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -74,725.0 Adj. Pseudo R2: 0.004177
## BIC: 149,471.7 Squared Cor.: 0.011576
r <- feglm( scale(toxicity)~scale(pc1)#,cluster='username'
,data_headlines)
## NOTE: 559,391 observations removed because of NA values (RHS: 559,391).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(toxicity)
## Observations: 241,489
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.123659 0.002611 47.3622 < 2.2e-16 ***
## scale(pc1) -0.123782 0.002611 -47.4094 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -402,844.8 Adj. Pseudo R2: 0.002767
## BIC: 805,714.4 Squared Cor.: 0.009222
r<-feglm(scale(log10(engage+1))~ scale(certainty_avg)+scale(toxicity_ntile)+scale(lean1)|username,cluster='username',data_posts)
## NOTE: 18,668,219 observations removed because of NA values (LHS: 582,384, RHS: 18,526,578).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(log10(engage + 1))
## Observations: 4,627,305
## Fixed-effects: username: 658,283
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(certainty_avg) 0.009554 0.002157 4.43014 9.4189e-06 ***
## scale(toxicity_ntile) 0.015852 0.002092 7.57670 3.5492e-14 ***
## scale(lean1) -0.006831 0.003064 -2.22938 2.5789e-02 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -3,110,528.2 Adj. Pseudo R2: 0.483246
## BIC: 16,324,090.9 Squared Cor.: 0.835992
r<-feglm(scale(log10(engage+1))~ scale(certainty_avg)+scale(toxicity_ntile)+scale(lean1)|username,
cluster='username',data_posts[!is.na(data_posts$headline_toxic),])
## NOTE: 1,833,345 observations removed because of NA values (LHS: 43,041, RHS: 1,820,750).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(log10(engage + 1))
## Observations: 576,603
## Fixed-effects: username: 168,491
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(certainty_avg) 0.007829 0.001037 7.54781 4.4486e-14 ***
## scale(toxicity_ntile) 0.022431 0.001518 14.78066 < 2.2e-16 ***
## scale(lean1) 0.003570 0.002257 1.58195 1.1366e-01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -319,516.3 Adj. Pseudo R2: 0.442941
## BIC: 2,874,090.1 Squared Cor.: 0.854906
r<-feglm(scale(log10(engage+1))~ scale(certainty_avg)+scale(toxicity_ntile)+scale(lean1)|username,
cluster='username',data_posts[!is.na(data_posts$headline_toxic),])
## NOTE: 1,833,345 observations removed because of NA values (LHS: 43,041, RHS: 1,820,750).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(log10(engage + 1))
## Observations: 576,603
## Fixed-effects: username: 168,491
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(certainty_avg) 0.007829 0.001037 7.54781 4.4486e-14 ***
## scale(toxicity_ntile) 0.022431 0.001518 14.78066 < 2.2e-16 ***
## scale(lean1) 0.003570 0.002257 1.58195 1.1366e-01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -319,516.3 Adj. Pseudo R2: 0.442941
## BIC: 2,874,090.1 Squared Cor.: 0.854906
r<-feglm(scale(log10(engage+1))~ scale(certainty_avg)+scale(headline_certainty_avg)+scale(toxicity_ntile)+scale(lean1)|username,
cluster='username',data_posts[!is.na(data_posts$headline_toxic),])
## NOTE: 2,147,177 observations removed because of NA values (LHS: 43,041, RHS: 2,141,825).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(log10(engage + 1))
## Observations: 262,771
## Fixed-effects: username: 81,643
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## scale(certainty_avg) 0.001359 0.002538 0.535733 5.9214e-01
## scale(headline_certainty_avg) 0.004979 0.002567 1.939943 5.2390e-02 .
## scale(toxicity_ntile) 0.016472 0.002298 7.166434 7.7625e-13 ***
## scale(lean1) 0.002893 0.003158 0.916246 3.5954e-01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -118,355.3 Adj. Pseudo R2: 0.481684
## BIC: 1,255,586.6 Squared Cor.: 0.869462
r<-feglm(scale(pc1)~scale(certainty_avg)+scale(lean1),
cluster='username',data_posts)
## NOTE: 18,526,578 observations removed because of NA values (LHS: 2,684,478, RHS: 18,526,578).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(pc1)
## Observations: 4,768,946
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.033244 0.006005 -5.53578 3.0996e-08 ***
## scale(certainty_avg) -0.027898 0.001937 -14.40392 < 2.2e-16 ***
## scale(lean1) -0.477683 0.007974 -59.90871 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -6,251,503.9 Adj. Pseudo R2: 0.096426
## BIC: 12,503,053.8 Squared Cor.: 0.244052
r<-feglm(scale(pc1)~scale(certainty_avg)+scale(lean1),
cluster='username',data_posts[!is.na(data_posts$headline_toxic),])
## NOTE: 1,820,750 observations removed because of NA values (LHS: 5,097, RHS: 1,820,750).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(pc1)
## Observations: 589,198
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.034685 0.006229 -5.56820 2.5777e-08 ***
## scale(certainty_avg) -0.025217 0.002771 -9.09928 < 2.2e-16 ***
## scale(lean1) -0.585104 0.006360 -91.99306 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -704,860.1 Adj. Pseudo R2: 0.161102
## BIC: 1,409,760.1 Squared Cor.: 0.368391
r<-feglm(scale(pc1)~scale(certainty_avg)+scale(headline_certainty_avg),
cluster='username',data_posts[!is.na(data_posts$headline_toxic),])
## NOTE: 2,141,817 observations removed because of NA values (LHS: 5,097, RHS: 2,141,502).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(pc1)
## Observations: 268,131
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.126977 0.010276 -12.35659 < 2.2e-16 ***
## scale(certainty_avg) -0.031447 0.005487 -5.73125 1.0004e-08 ***
## scale(headline_certainty_avg) -0.052913 0.005937 -8.91244 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -385,495.1 Adj. Pseudo R2: 0.002167
## BIC: 771,027.8 Squared Cor.: 0.00624
r<-feglm(scale(pc1)~scale(certainty_avg)+scale(headline_certainty_avg)+scale(lean1),
cluster='username',data_posts[data_posts$headline_avail,])
## NOTE: 2,139,666 observations removed because of NA values (LHS: 5,084, RHS: 2,139,666).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(pc1)
## Observations: 268,123
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.071351 0.007336 -9.72591 < 2.2e-16 ***
## scale(certainty_avg) -0.008731 0.004065 -2.14795 3.1721e-02 *
## scale(headline_certainty_avg) -0.029257 0.004581 -6.38610 1.7106e-10 ***
## scale(lean1) -0.574338 0.008365 -68.65833 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -325,244.9 Adj. Pseudo R2: 0.158158
## BIC: 650,539.7 Squared Cor.: 0.366071
r<-feglm(scale(pc1)~scale(certainty_avg)+scale(lean1),
cluster='username',data_posts[(data_posts$headline_avail) &(data_posts$text_headline_similarity<.5) ,])
## NOTE: 1,136,997 observations removed because of NA values (LHS: 4,108, RHS: 1,136,997).
r
## GLM estimation, family = gaussian, Dep. Var.: scale(pc1)
## Observations: 403,449
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.042191 0.007493 -5.63065 1.7987e-08 ***
## scale(certainty_avg) -0.024388 0.003745 -6.51301 7.3905e-11 ***
## scale(lean1) -0.569686 0.007382 -77.16790 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -491,625.4 Adj. Pseudo R2: 0.147961
## BIC: 983,289.6 Squared Cor.: 0.345072
r<-feglm(is.na(certainty_avg)~scale(toxicity_ntile)+scale(lean1)+scale(log10(1+engage)),
cluster='username',data_posts)
## NOTE: 3,274,625 observations removed because of NA values (RHS: 3,274,625).
r
## GLM estimation, family = gaussian, Dep. Var.: is.na(certainty_avg)
## Observations: 20,020,899
## Standard-errors: Clustered (username)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.776474 0.001356 572.47117 < 2.2e-16 ***
## scale(toxicity_ntile) -0.065841 0.001013 -64.99559 < 2.2e-16 ***
## scale(lean1) 0.005318 0.000918 5.79385 6.8802e-09 ***
## scale(log10(1 + engage)) -0.038115 0.002209 -17.25483 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -10,796,082.7 Adj. Pseudo R2: 0.028614
## BIC: 21,592,232.6 Squared Cor.: 0.031269