- we didn’t find sig relation between certainty and misinfo in headline datasets from fact-checkers websites. but could be different dataset and how veracity is coded.
1. Reading data
2. Preprocess data
2.1 filtering
2.2 PCA
3. Descriptives
3.1 histograms
p1 <-ggplot(data_posts, aes(x = lean1)) +geom_histogram(bins =20, fill ="skyblue") +theme_minimal()p2 <-ggplot(data_posts, aes(x = certainty_avg)) +geom_histogram(bins =20, fill ="salmon") +theme_minimal()p3 <-ggplot(data_posts, aes(x = toxicity)) +geom_histogram(bins =20, fill ="lightgreen")+theme_minimal()p4 <-ggplot(data_posts, aes(x = pc1)) +geom_histogram(bins =20, fill ="blue") +theme_minimal()p5 <-ggplot(data_posts, aes(x =log10(1+toxicity))) +geom_histogram(bins =20, fill ="blue") +theme_minimal()p6 <-ggplot(data_posts, aes(x =log10(1+engage))) +geom_histogram(bins =20, fill ="blue") +theme_minimal()p7 <-ggplot(data_posts, aes(x = engage)) +geom_histogram(bins =20, fill ="blue") +theme_minimal()# Arrange them in one rowgrid.arrange(p1, p2, p3,p4,p5,p6,p7, nrow =4)
NOTE: 16 observations removed because of NA values (RHS: 16).
[1] "Gettr"
NOTE: 26 observations removed because of NA values (RHS: 26).
[1] "Truthsocial"
NOTE: 312 observations removed because of NA values (RHS: 312).
[1] "Gab"
NOTE: 5,612 observations removed because of NA values (RHS: 5,612).
[1] "Mastodon"
NOTE: 2,684,361 observations removed because of NA values (RHS: 2,684,361).
NOTE: 2,684,369 observations removed because of NA values (RHS: 2,684,369).
[1] "LinkedIn"
NOTE: 117 observations removed because of NA values (RHS: 117).
NOTE: 519 observations removed because of NA values (RHS: 519).
[1] "Telegram"
NOTE: 733 observations removed because of NA values (RHS: 733).
[1] "X"
NOTE: 1,312 observations removed because of NA values (RHS: 1,312).
d_<- results|>filter(var=='estimate'|var=='se')|>filter(measure=='toxic_pc1'|measure=='toxic_pc1_control' )|>pivot_wider( names_from = var, values_from = value# names_glue = "{measure}_{var}" )|>arrange(estimate)d_<-d_|>mutate(group=ifelse (grepl( 'control', measure, fixed =TRUE),'With Political Lean Control', 'No Control'))# Calculate the aggregate effect for each groupres_A <-rma(yi = d_$estimate[d_$group =='With Political Lean Control'], sei = d_$se[d_$group =='With Political Lean Control'])res_B <-rma(yi = d_$estimate[d_$group =='No Control'], sei = d_$se[d_$group =='No Control'])group_A_effects <- d_[d_$group =="With Political Lean Control", ]# Order data by group A effect sizesorder_indices <-order(group_A_effects$estimate)ordered_studies <- group_A_effects$platform[order_indices]# Create a factor for group with levels reordered to put B before Ad_$group <-factor(d_$group, levels =c("With Political Lean Control", "No Control"))# Reorder data by study and then by group (with B first)d_ <- d_[order(match(d_$platform, ordered_studies), d_$group), ]# Modify slab labels to only show the study name for the first entry of each studyslab_labels <-ifelse(duplicated(d_$platform), "", d_$platform)#png("../../figs/fig3a.png",width =25, height = 25,units = "cm", res = 300) # Adjust size and resolution# Create a forest plot with additional space for aggregate effectsforest(x = d_$estimate,sei = d_$se,slab = slab_labels,col =rep(c("black", "gray"), length.out =nrow(d_)),xlab ="Effect Size",main ="Association toxicity and quality",ylim =c(-5, nrow(d_) +3),psize=.8,# Add space at the bottom,digits =3)# Add a horizontal lineabline(h =0.5, col ="gray")abline(v =0, col ="black")#, lty = 1)# Add aggregate effects with diamond shapes manuallyaddpoly(res_A, atransf =FALSE, row =-1, mlab ="With Political Lean Control", col ="black", cex =1.2)addpoly(res_B, atransf =FALSE, row =-2, mlab ="No Control", col ="gray", cex =1.2)
NOTE: 1,182,479 observations removed because of NA values (LHS: 1,182,479).
NOTE: 1,182,481 observations removed because of NA values (LHS: 1,182,479, RHS: 16).
[1] "Gettr"
NOTE: 14,226 observations removed because of NA values (LHS: 14,226).
NOTE: 14,230 observations removed because of NA values (LHS: 14,226, RHS: 26).
[1] "Truthsocial"
NOTE: 1,305,002 observations removed because of NA values (LHS: 1,305,002).
NOTE: 1,305,024 observations removed because of NA values (LHS: 1,305,002, RHS: 312).
[1] "Gab"
NOTE: 379,792 observations removed because of NA values (LHS: 379,792).
NOTE: 381,707 observations removed because of NA values (LHS: 379,792, RHS: 5,612).
[1] "Mastodon"
NOTE: 3,344,319 observations removed because of NA values (LHS: 2,814,848, RHS: 2,684,361).
NOTE: 3,344,320 observations removed because of NA values (LHS: 2,814,848, RHS: 2,684,369).
[1] "LinkedIn"
NOTE: 16,024 observations removed because of NA values (LHS: 15,987, RHS: 117).
NOTE: 16,288 observations removed because of NA values (LHS: 15,987, RHS: 519).
[1] "Telegram"
NOTE: 615,783 observations removed because of NA values (LHS: 615,783).
NOTE: 615,794 observations removed because of NA values (LHS: 615,783, RHS: 733).
[1] "X"
NOTE: 11,666,643 observations removed because of NA values (LHS: 11,666,643).
NOTE: 11,666,734 observations removed because of NA values (LHS: 11,666,643, RHS: 1,312).
d_<- results|>filter(var=='estimate'|var=='se')|>filter(measure=='certainty_pc1'|measure=='certainty_pc1_control' )|>pivot_wider( names_from = var, values_from = value# names_glue = "{measure}_{var}" )|>arrange(estimate)d_<-d_|>mutate(group=ifelse (grepl( 'control', measure, fixed =TRUE),'With Political Lean Control', 'No Control'))# Calculate the aggregate effect for each groupres_A <-rma(yi = d_$estimate[d_$group =='With Political Lean Control'], sei = d_$se[d_$group =='With Political Lean Control'])res_B <-rma(yi = d_$estimate[d_$group =='No Control'], sei = d_$se[d_$group =='No Control'])group_A_effects <- d_[d_$group =="With Political Lean Control", ]# Order data by group A effect sizesorder_indices <-order(group_A_effects$estimate)ordered_studies <- group_A_effects$platform[order_indices]# Create a factor for group with levels reordered to put B before Ad_$group <-factor(d_$group, levels =c("With Political Lean Control", "No Control"))# Reorder data by study and then by group (with B first)d_ <- d_[order(match(d_$platform, ordered_studies), d_$group), ]# Modify slab labels to only show the study name for the first entry of each studyslab_labels <-ifelse(duplicated(d_$platform), "", d_$platform)#png("../../figs/fig3a.png",width =25, height = 25,units = "cm", res = 300) # Adjust size and resolution# Create a forest plot with additional space for aggregate effectsforest(x = d_$estimate,sei = d_$se,slab = slab_labels,col =rep(c("black", "gray"), length.out =nrow(d_)),xlab ="Effect Size",main ="association confidence and quality",ylim =c(-5, nrow(d_) +3),psize=.8,# Add space at the bottom,digits =3)# Add a horizontal lineabline(h =0.5, col ="gray")abline(v =0, col ="black")#, lty = 1)# Add aggregate effects with diamond shapes manuallyaddpoly(res_A, atransf =FALSE, row =-1, mlab ="With Political Lean Control", col ="black", cex =1.2)addpoly(res_B, atransf =FALSE, row =-2, mlab ="No Control", col ="gray", cex =1.2)
NOTE: 1,182,481 observations removed because of NA values (RHS: 1,182,481).
[1] "Gettr"
NOTE: 20,259 observations removed because of NA values (LHS: 16,998, RHS: 14,230).
[1] "Truthsocial"
NOTE: 1,305,024 observations removed because of NA values (RHS: 1,305,024).
[1] "Gab"
NOTE: 381,707 observations removed because of NA values (RHS: 381,707).
[1] "Mastodon"
NOTE: 3,344,320 observations removed because of NA values (RHS: 3,344,320).
[1] "LinkedIn"
NOTE: 16,289 observations removed because of NA values (LHS: 4, RHS: 16,288).
[1] "Telegram"
NOTE: 751,405 observations removed because of NA values (LHS: 565,382, RHS: 615,794).
[1] "X"
NOTE: 11,666,734 observations removed because of NA values (RHS: 11,666,734).
d_<- results|>filter(var=='estimate'|var=='se')|>filter(measure=='engage_certainty'|measure=='engage_toxicity' )|>pivot_wider( names_from = var, values_from = value# names_glue = "{measure}_{var}" )|>arrange(estimate)d_<-d_|>mutate(group=ifelse (grepl( 'engage_certainty', measure, fixed =TRUE),'Certainty', 'Toxicity'))# Calculate the aggregate effect for each groupres_A <-rma(yi = d_$estimate[d_$group =='Toxicity'], sei = d_$se[d_$group =='Toxicity'])res_B <-rma(yi = d_$estimate[d_$group =='Certainty'], sei = d_$se[d_$group =='Certainty'])group_A_effects <- d_[d_$group =="Toxicity", ]# Order data by group A effect sizesorder_indices <-order(group_A_effects$estimate)ordered_studies <- group_A_effects$platform[order_indices]# Create a factor for group with levels reordered to put B before Ad_$group <-factor(d_$group, levels =c("Toxicity", "Certainty"))# Reorder data by study and then by group (with B first)d_ <- d_[order(match(d_$platform, ordered_studies), d_$group), ]# Modify slab labels to only show the study name for the first entry of each studyslab_labels <-ifelse(duplicated(d_$platform), "", d_$platform)forest(x = d_$estimate,sei = d_$se,slab = slab_labels,col =rep(c("black", "gray"), length.out =nrow(d_)),xlab ="Effect Size",main ="association engagment, confidence and toxcity (w/ lean control)",ylim =c(-5, nrow(d_) +3),psize=.8,# Add space at the bottom,digits =3)# Add a horizontal lineabline(h =0.5, col ="gray")abline(v =0, col ="black")#, lty = 1)# Add aggregate effects with diamond shapes manuallyaddpoly(res_A, atransf =FALSE, row =-1, mlab ="Toxicity", col ="black", cex =1.2)addpoly(res_B, atransf =FALSE, row =-2, mlab ="Certainty", col ="gray", cex =1.2)
# Calculate the aggregate effect for each groupd_$group <- d_$measureres_A <-rma(yi = d_$estimate[d_$group =='Toxicity'], sei = d_$se[d_$group =='Toxicity'])res_B <-rma(yi = d_$estimate[d_$group =='Certainty'], sei = d_$se[d_$group =='Certainty'])res_C <-rma(yi = d_$estimate[d_$group =='Quality'], sei = d_$se[d_$group =='Quality'])group_A_effects <- d_[d_$group =="Toxicity", ]# Order data by group A effect sizesorder_indices <-order(group_A_effects$estimate)ordered_studies <- group_A_effects$platform[order_indices]# Create a factor for group with levels reordered to put B before Ad_$group <-factor(d_$group, levels =c("Toxicity", "Certainty","Quality"))# Reorder data by study and then by group (with B first)d_ <- d_[order(match(d_$platform, ordered_studies), d_$group), ]# Modify slab labels to only show the study name for the first entry of each studyslab_labels <-ifelse(duplicated(d_$platform), "", d_$platform)forest(x = d_$estimate,sei = d_$se,slab = slab_labels,col =rep(c("blue", "green",'red'), length.out =nrow(d_)),xlab ="Effect Size",main ="association engagment, confidence, toxcity, and quality (w/ lean control)",ylim =c(-5, nrow(d_) +3),psize=.8,# Add space at the bottom,digits =3)# Add a horizontal lineabline(h =0.5, col ="gray")abline(v =0, col ="black")#, lty = 1)# Add aggregate effects with diamond shapes manuallyaddpoly(res_A, atransf =FALSE, row =-1, mlab ="Toxicity", col ="blue", cex =1.2)addpoly(res_B, atransf =FALSE, row =-2, mlab ="Certainty", col ="green", cex =1.2)addpoly(res_C, atransf =FALSE, row =-3, mlab ="Quality", col ="red", cex =1.2)
5. analysis - headline
r <-feglm( scale(certainty_avg)~scale(pc1)#,cluster='username' ,data_headlines)
NOTE: 746,866 observations removed because of NA values (LHS: 678,258, RHS: 559,391).