August 2016

NB Gab was launched 15 August 2016

Reading in data from .csv

gab_data_1 <- read_csv("gab data/gab_data_1.csv")

## Warning: Missing column names filled in: 'X1' [1]

## Warning: 22 parsing failures.
##   row                    col           expected actual                      file
## 21760 conversation_parent_id 1/0/T/F/TRUE/FALSE  31368 'gab data/gab_data_1.csv'
## 23241 conversation_parent_id 1/0/T/F/TRUE/FALSE  31823 'gab data/gab_data_1.csv'
## 23275 conversation_parent_id 1/0/T/F/TRUE/FALSE  33933 'gab data/gab_data_1.csv'
## 23654 conversation_parent_id 1/0/T/F/TRUE/FALSE  34227 'gab data/gab_data_1.csv'
## 30736 conversation_parent_id 1/0/T/F/TRUE/FALSE  44076 'gab data/gab_data_1.csv'
## ..... ...................... .................. ...... .........................
## See problems(...) for more details.

Timeseries of posts

gab_data_1 %>%
  ts_plot("1 hours", color = "dark blue", lwd = 0.5) + theme_minimal() + ggtitle("Gab posts August 2016") + ylab("Count of Posts") + xlab("Date of Posting")

Extracting text (“body”)

gab_text_1 <- gab_data_1$body
gab_text_1 <- as.data.frame(gab_text_1, stringsAsFactors = FALSE)
gab_text_1$text <- gab_text_1$gab_text_1
gab_text_1$gab_text_1 <- NULL

gab_tokens_1 <- unnest_tokens(gab_text_1, word, text)
data("stop_words")
gab_tokens_1 <- anti_join(gab_tokens_1, stop_words)

## Joining, by = "word"

gab_tokens_count_1 <- count(gab_tokens_1, word )
top_10 <- top_n(gab_tokens_count_1, 10)

## Selecting by n

there are head(top_10)

ggplot(top_10, aes(x = reorder(top_10$word, top_10$n), y = top_10$n, fill = top_10$n)) + geom_col() + coord_flip() +xlab("Top 10 words in Gab posts") + ylab("Count") + ggtitle("Top 10 words in Gab posts by count") + theme_minimal() + scale_fill_viridis(option = "D") + theme(legend.position = "none")

sent <- get_sentiments(lexicon = "afinn")
gab_sentiments_1 <- inner_join(gab_tokens_count_1, sent, by="word")
weight <- gab_sentiments_1$n * gab_sentiments_1$value
gab_sentiments_1 <- cbind(gab_sentiments_1, weight)

often_used <- filter(gab_sentiments_1, n > 500)
ggplot(often_used, aes(word, weight, fill = weight)) + geom_col() + scale_fill_viridis(option = "D") + coord_flip() + xlab("Most Common Words in Gab Dataset") + ylab("Sentiment score") + ggtitle("Gab Sentiment Analysis")  + theme_minimal() + theme(legend.position = "none")

Top Users August 2016

user_count <- count(gab_data_1, user.name, sort = TRUE)
top10_users <- top_n(user_count, 10, n)

kable(top10_users)

user.name	n
Tony H	2249
Andrew Torba	2194
Don	1501
Zeren	1387
Claire Jordan	1303
NimbleCentipede ✔️	1224
James Calhoun	1176
redacted	1137
Jesus Christ	1127
Shannon Montague	1122

user_vector <- top10_users$user.name
top10_posts <- filter(gab_data_1, user.name == user_vector)
top10_posts %>%
  group_by(user.name) %>%
  ts_plot() + theme_minimal()

top10_posts %>%
  group_by(user.name) %>%
  ts_plot() + facet_wrap(~ user.name) + theme_minimal()

## Posts per User

no_of_posts_by_user <- count(gab_data_1, user.name, sort = TRUE)
nums <- seq(1:2696)
no_of_posts_by_user$user.name <- nums
no_of_posts_by_user$user.name  <- as.numeric(no_of_posts_by_user$user.name)

ggplot(no_of_posts_by_user, aes(x = user.name, y = n, fill = n)) + geom_col() +scale_fill_viridis(option = "A") + theme_minimal() + theme(axis.text.x = element_blank()) + xlab("Users") + theme(legend.position = "none")

Gab Text Analysis

August 2016

NB Gab was launched 15 August 2016

Timeseries of posts

Top Users August 2016

Analyse users’ gab text by treating each user as a “document” - their postings can be the terms.