Lodaing libraries

library(readr)
library(rtweet)
library(tidyverse)
## -- Attaching packages ------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.2.1     v dplyr   0.8.4
## v tibble  2.1.3     v stringr 1.4.0
## v tidyr   1.0.2     v forcats 0.4.0
## v purrr   0.3.3
## -- Conflicts --------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter()  masks stats::filter()
## x purrr::flatten() masks rtweet::flatten()
## x dplyr::lag()     masks stats::lag()
library(tidytext)
library(viridis)
## Loading required package: viridisLite
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows

August 2016

NB Gab was launched 15 August 2016

Reading in data from .csv

gab_data_1 <- read_csv("gab data/gab_data_1.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Warning: 22 parsing failures.
##   row                    col           expected actual                      file
## 21760 conversation_parent_id 1/0/T/F/TRUE/FALSE  31368 'gab data/gab_data_1.csv'
## 23241 conversation_parent_id 1/0/T/F/TRUE/FALSE  31823 'gab data/gab_data_1.csv'
## 23275 conversation_parent_id 1/0/T/F/TRUE/FALSE  33933 'gab data/gab_data_1.csv'
## 23654 conversation_parent_id 1/0/T/F/TRUE/FALSE  34227 'gab data/gab_data_1.csv'
## 30736 conversation_parent_id 1/0/T/F/TRUE/FALSE  44076 'gab data/gab_data_1.csv'
## ..... ...................... .................. ...... .........................
## See problems(...) for more details.

Timeseries of posts

gab_data_1 %>%
  ts_plot("1 hours", color = "dark blue", lwd = 0.5) + theme_minimal() + ggtitle("Gab posts August 2016") + ylab("Count of Posts") + xlab("Date of Posting")

Extracting text (“body”)

gab_text_1 <- gab_data_1$body
gab_text_1 <- as.data.frame(gab_text_1, stringsAsFactors = FALSE)
gab_text_1$text <- gab_text_1$gab_text_1
gab_text_1$gab_text_1 <- NULL
gab_tokens_1 <- unnest_tokens(gab_text_1, word, text)
data("stop_words")
gab_tokens_1 <- anti_join(gab_tokens_1, stop_words)
## Joining, by = "word"
gab_tokens_count_1 <- count(gab_tokens_1, word )
top_10 <- top_n(gab_tokens_count_1, 10)
## Selecting by n

there are head(top_10)

ggplot(top_10, aes(x = reorder(top_10$word, top_10$n), y = top_10$n, fill = top_10$n)) + geom_col() + coord_flip() +xlab("Top 10 words in Gab posts") + ylab("Count") + ggtitle("Top 10 words in Gab posts by count") + theme_minimal() + scale_fill_viridis(option = "D") + theme(legend.position = "none")

sent <- get_sentiments(lexicon = "afinn")
gab_sentiments_1 <- inner_join(gab_tokens_count_1, sent, by="word")
weight <- gab_sentiments_1$n * gab_sentiments_1$value
gab_sentiments_1 <- cbind(gab_sentiments_1, weight)

often_used <- filter(gab_sentiments_1, n > 500)
ggplot(often_used, aes(word, weight, fill = weight)) + geom_col() + scale_fill_viridis(option = "D") + coord_flip() + xlab("Most Common Words in Gab Dataset") + ylab("Sentiment score") + ggtitle("Gab Sentiment Analysis")  + theme_minimal() + theme(legend.position = "none")

Top Users August 2016

user_count <- count(gab_data_1, user.name, sort = TRUE)
top10_users <- top_n(user_count, 10, n)
kable(top10_users)
user.name n
Tony H 2249
Andrew Torba 2194
Don 1501
Zeren 1387
Claire Jordan 1303
NimbleCentipede ✔️ 1224
James Calhoun 1176
redacted 1137
Jesus Christ 1127
Shannon Montague 1122
user_vector <- top10_users$user.name
top10_posts <- filter(gab_data_1, user.name == user_vector)
top10_posts %>%
  group_by(user.name) %>%
  ts_plot() + theme_minimal()

top10_posts %>%
  group_by(user.name) %>%
  ts_plot() + facet_wrap(~ user.name) + theme_minimal()

## Posts per User

no_of_posts_by_user <- count(gab_data_1, user.name, sort = TRUE)
nums <- seq(1:2696)
no_of_posts_by_user$user.name <- nums
no_of_posts_by_user$user.name  <- as.numeric(no_of_posts_by_user$user.name)
ggplot(no_of_posts_by_user, aes(x = user.name, y = n, fill = n)) + geom_col() +scale_fill_viridis(option = "A") + theme_minimal() + theme(axis.text.x = element_blank()) + xlab("Users") + theme(legend.position = "none")

Analyse users’ gab text by treating each user as a “document” - their postings can be the terms.