Packages
library(tidyverse)
library(ggthemes)
library(ggsci)
library(ggpubr)
library(cowplot)
library(Zelig)
library(texreg)
What Type of Content are Different News Sources Publishing on Facebook?
facebook <- read.csv('facebook_fact.csv')
question_1 <- facebook %>%
group_by(Category, Rating) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count))
center_title <- theme(plot.title = element_text(hjust=.5))
question_1_plot <- ggplot(question_1, aes(Category, percent, fill = Rating)) + geom_bar(stat='identity') + labs(title='Content Rating by News Category', y=NULL, x=NULL) + theme_minimal() + theme(text = element_text(size = 12)) + center_title + scale_fill_npg()
By Outlet, Who is Publishing Non-factual Stories?
question_2_left <- facebook %>%
filter(Category == 'left') %>%
group_by(Category, Page, Rating) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count))
question_2_main <- facebook %>%
filter(Category == 'mainstream') %>%
group_by(Category, Page, Rating) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count))
question_2_right <- facebook %>%
filter(Category == 'right') %>%
group_by(Category, Page, Rating) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count))
question_2_plot_l <- ggplot(question_2_left, aes(Page, percent, fill = Rating)) + geom_bar(stat='identity') + theme_minimal() + labs(title='Left', x=NULL, y=NULL) + center_title + scale_fill_npg()+ theme(text = element_text(size = 12)) + coord_flip() + center_title
question_2_plot_m <- ggplot(question_2_main, aes(Page, percent, fill = Rating)) + geom_bar(stat='identity') + theme_minimal() + scale_fill_npg()+ theme(text = element_text(size = 12))+ labs(title='Mainstream',x=NULL, y=NULL) + coord_flip() + center_title
question_2_plot_r <- ggplot(question_2_right, aes(Page, percent, fill = Rating)) + geom_bar(stat='identity') + theme_minimal() + scale_fill_npg()+ theme(text = element_text(size = 12)) + labs(title='Right',x=NULL, y=NULL) + coord_flip() + center_title
question_2_plot_combined <- ggarrange(question_2_plot_l,question_2_plot_m,question_2_plot_r, nrow=3,legend='right') %>%
annotate_figure(top=text_grob('Content Published by News Outlet', size = 16, hjust=.6))
Linear Model
#Add column for strictly false news as integer, add another for mix of false and true as integer
facebook <- facebook %>%
mutate(false_int = as.integer(ifelse(Rating %in% c('mostly false'),1,0))) %>%
mutate(false_true_int = as.integer(ifelse(Rating %in% c('mostly false', 'mixture of true and false'),1,0)))
#Linear regression to see what category gets more content shared
share_by_category <- zelig(share_count ~ Category, model = 'ls', data = facebook, cite = FALSE)
Logistic Models (1)
#Logistic regression to see probaility of a category producing fake content
truth_by_category <- zelig(false_int ~ Category, model = 'logit', data = facebook, cite = FALSE)
x.left <- setx(truth_by_category, Category = 'left')
x.right <- setx(truth_by_category,Category = 'right')
false_model <- sim(truth_by_category, x = x.left, x1 = x.right)
right_fd <- false_model$get_qi(xvalue = 'x', qi='ev')
left_fd <- false_model$get_qi(xvalue = 'x1', qi='ev')
combined_fd <- as.data.frame(cbind(right_fd,left_fd))
left_hist <- combined_fd %>%
select('V1') %>%
gather(class, simv)
right_hist <- combined_fd %>%
select('V2') %>%
gather(class, simv)
right_plot <- ggplot(right_hist, aes(simv)) + geom_histogram(color='white',fill='red',bins=25) + theme_minimal() + labs(title='Right',x=NULL,y=NULL) + center_title
left_plot <- ggplot(left_hist, aes(simv)) + geom_histogram(color='white',fill='blue',bins=25) + theme_minimal() + labs(title='Left',x=NULL,y=NULL) + center_title
not_included_1 <- ggarrange(left_plot, right_plot, ncol=2) %>%
annotate_figure(left='Count', bottom = 'Simulated Values')
fd <- false_model$get_qi(xvalue="x1", qi="fd")
Logistic Models (2)
#Logistic regression to see probaility of a category producing fake & fake/true content
false_true <- zelig(false_true_int ~ Category, model = 'logit', data = facebook, cite = FALSE)
x.left2 <- setx(false_true, Category = 'left')
x.right2 <- setx(false_true, Category = 'right')
false_model_sim <- sim(false_true, x = x.left2, x1 = x.right2)
right_fd2 <- false_model_sim$get_qi(xvalue = 'x1', qi='ev')
left_fd2 <- false_model_sim$get_qi(xvalue = 'x', qi='ev')
combined_fd2 <- as.data.frame(cbind(left_fd2,right_fd2))
left_hist2 <- combined_fd2 %>%
select('V1') %>%
gather(class, simv)
right_hist2 <- combined_fd2 %>%
select('V2') %>%
gather(class, simv)
right_plot2 <- ggplot(right_hist2, aes(simv)) + geom_histogram(color='white',fill='red',bins=25) + theme_minimal() + labs(title='Right',x=NULL,y=NULL) + center_title
left_plot2 <- ggplot(left_hist2, aes(simv)) + geom_histogram(color='white',fill='blue',bins=25) + theme_minimal() + labs(title='Left',x=NULL,y=NULL) + center_title
not_included <- ggarrange(left_plot2, right_plot2, ncol=2) %>%
annotate_figure(left='Count', bottom = 'Simulated Values')
fd2 <- false_model_sim$get_qi(xvalue="x1", qi="fd")