Introduction

Visualizations in this file were created to support the Final Project where we analyze news articles classified as “Real” news vs. “Fake” news.

r2d3 Top Words

Create dataframe of true and fake words to use with r2d3

#load true and fake data into dataframes
true_df <-read.csv(here('data','csv','true_df.csv'), encoding="ascii", stringsAsFactors = FALSE)
fake_df <- read.csv(here('data','csv', 'fake_df.csv'), encoding="ascii", stringsAsFactors = FALSE)


#total count of words in true df
true_df_words <- true_df %>%
  unnest_tokens(word,text) %>%
  anti_join(get_stopwords()) %>%
  count(type, word, sort = TRUE)

#top 20 true words df
top_true_df_words <- true_df_words %>%
  #select(-c(type)) %>%
  filter(!word %in% c("s","â")) %>%
  top_n(20, n)

#create .tsv file to use in r2d3 testing
con<-file(here('model_scripts','data.tsv'),encoding="UTF-8")
write.table(top_true_df_words,file=con , row.names = FALSE, sep = "\t")

#total count of words in fake df
fake_df_words <- fake_df %>%
  unnest_tokens(word,text) %>%
  anti_join(get_stopwords()) %>%
  count(type, word, sort = TRUE)

#top 20 fake words df
top_fake_df_words <- fake_df_words %>%
  #select(-c(type)) %>%
  filter(!word %in% c("s","â","t")) %>%
  top_n(20, n)

D3 bar graphs

#call r2d3 javascript code to display top 20 words

#fake words
r2d3::r2d3(data=top_fake_df_words, script = "r2d3/d3_scripts.js", d3_version = "3", container = "div")
#true words
r2d3::r2d3(data=top_true_df_words, script = "r2d3/d3_scripts.js", d3_version = "3", container = "div")

Sentiment Analysis

Create dataframe to use with shiny app

top_words_all <- fake_df_words %>%
  filter(!word %in% c("s","â","t")) %>%
  top_n(20, n)

x <- true_df_words %>%
  filter(!word %in% c("s","â","t")) %>%
  top_n(20, n)

#top 20 real and fake news words df to use in shiny app
top_words_all <- rbind(top_words_all,x)

Tables for a sentiment analysis. True vs. Fake

library(reshape2)
library(dplyr)
library(htmlwidgets)

fake_df <- mutate(fake_df, text = as.character(fake_df$text))

true_df_words <- true_df %>%
  unnest_tokens(word,text) %>%
  anti_join(get_stopwords())

fake_df_words <- fake_df %>%
  unnest_tokens(word,text) %>%
  anti_join(get_stopwords())


fake_df_words_top <- fake_df_words %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment) %>%
  top_n(10,n) %>%
  arrange(desc(n))

true_df_words_top <- true_df_words %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment) %>%
  top_n(10, n) %>%
  arrange(desc(n))

# table of top 20 words
knitr::kable(true_df_words_top)
word sentiment n
trump positive 42795
support positive 7124
like positive 4664
work positive 4653
top positive 4410
opposition negative 4008
right positive 3917
well positive 3848
intelligence positive 3829
led positive 3641
# table of top 20 words
knitr::kable(fake_df_words_top)
word sentiment n
trump positive 75423
like positive 17892
right positive 10636
well positive 7773
support positive 5859
good positive 5370
work positive 5118
great positive 4235
attack negative 4124
won positive 3726

Wordclouds

#fake and true wordclouds comparing positive and negative words
fake_df_words %>%
  #select(word, type) %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("gray10", "gray50"),
                   max.words = 175)

true_df_words %>%
  #select(word, type) %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("gray10", "gray50"),
                   max.words = 175)