Visualizations for Final Project

Introduction

Visualizations in this file were created to support the Final Project where we analyze news articles classified as “Real” news vs. “Fake” news.

r2d3 Top Words

Create dataframe of true and fake words to use with r2d3

#load true and fake data into dataframes
true_df <-read.csv(here('data','csv','true_df.csv'), encoding="ascii", stringsAsFactors = FALSE)
fake_df <- read.csv(here('data','csv', 'fake_df.csv'), encoding="ascii", stringsAsFactors = FALSE)


#total count of words in true df
true_df_words <- true_df %>%
  unnest_tokens(word,text) %>%
  anti_join(get_stopwords()) %>%
  count(type, word, sort = TRUE)

#top 20 true words df
top_true_df_words <- true_df_words %>%
  #select(-c(type)) %>%
  filter(!word %in% c("s","â")) %>%
  top_n(20, n)

#create .tsv file to use in r2d3 testing
con<-file(here('model_scripts','data.tsv'),encoding="UTF-8")
write.table(top_true_df_words,file=con , row.names = FALSE, sep = "\t")

#total count of words in fake df
fake_df_words <- fake_df %>%
  unnest_tokens(word,text) %>%
  anti_join(get_stopwords()) %>%
  count(type, word, sort = TRUE)

#top 20 fake words df
top_fake_df_words <- fake_df_words %>%
  #select(-c(type)) %>%
  filter(!word %in% c("s","â","t")) %>%
  top_n(20, n)

D3 bar graphs

#call r2d3 javascript code to display top 20 words

#fake words
r2d3::r2d3(data=top_fake_df_words, script = "r2d3/d3_scripts.js", d3_version = "3", container = "div")

#true words
r2d3::r2d3(data=top_true_df_words, script = "r2d3/d3_scripts.js", d3_version = "3", container = "div")

Sentiment Analysis

Create dataframe to use with shiny app

top_words_all <- fake_df_words %>%
  filter(!word %in% c("s","â","t")) %>%
  top_n(20, n)

x <- true_df_words %>%
  filter(!word %in% c("s","â","t")) %>%
  top_n(20, n)

#top 20 real and fake news words df to use in shiny app
top_words_all <- rbind(top_words_all,x)

Tables for a sentiment analysis. True vs. Fake

library(reshape2)
library(dplyr)
library(htmlwidgets)

fake_df <- mutate(fake_df, text = as.character(fake_df$text))

true_df_words <- true_df %>%
  unnest_tokens(word,text) %>%
  anti_join(get_stopwords())

fake_df_words <- fake_df %>%
  unnest_tokens(word,text) %>%
  anti_join(get_stopwords())


fake_df_words_top <- fake_df_words %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment) %>%
  top_n(10,n) %>%
  arrange(desc(n))

true_df_words_top <- true_df_words %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment) %>%
  top_n(10, n) %>%
  arrange(desc(n))

# table of top 20 words
knitr::kable(true_df_words_top)

word	sentiment	n
trump	positive	42795
support	positive	7124
like	positive	4664
work	positive	4653
top	positive	4410
opposition	negative	4008
right	positive	3917
well	positive	3848
intelligence	positive	3829
led	positive	3641

# table of top 20 words
knitr::kable(fake_df_words_top)

word	sentiment	n
trump	positive	75423
like	positive	17892
right	positive	10636
well	positive	7773
support	positive	5859
good	positive	5370
work	positive	5118
great	positive	4235
attack	negative	4124
won	positive	3726

Wordclouds

#fake and true wordclouds comparing positive and negative words
fake_df_words %>%
  #select(word, type) %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("gray10", "gray50"),
                   max.words = 175)

true_df_words %>%
  #select(word, type) %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("gray10", "gray50"),
                   max.words = 175)

Visualizations for Final Project

LeTicia Cancel

George Cruz

Jack Wright

12/5/2020

Introduction

r2d3 Top Words

D3 bar graphs

Sentiment Analysis