Lodaing libraries
library(readr)
library(rtweet)
library(tidyverse)
## -- Attaching packages ------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.2.1 v dplyr 0.8.4
## v tibble 2.1.3 v stringr 1.4.0
## v tidyr 1.0.2 v forcats 0.4.0
## v purrr 0.3.3
## -- Conflicts --------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks rtweet::flatten()
## x dplyr::lag() masks stats::lag()
library(tidytext)
library(viridis)
## Loading required package: viridisLite
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
Reading in data from .csv
gab_data_1 <- read_csv("gab data/gab_data_1.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Warning: 22 parsing failures.
## row col expected actual file
## 21760 conversation_parent_id 1/0/T/F/TRUE/FALSE 31368 'gab data/gab_data_1.csv'
## 23241 conversation_parent_id 1/0/T/F/TRUE/FALSE 31823 'gab data/gab_data_1.csv'
## 23275 conversation_parent_id 1/0/T/F/TRUE/FALSE 33933 'gab data/gab_data_1.csv'
## 23654 conversation_parent_id 1/0/T/F/TRUE/FALSE 34227 'gab data/gab_data_1.csv'
## 30736 conversation_parent_id 1/0/T/F/TRUE/FALSE 44076 'gab data/gab_data_1.csv'
## ..... ...................... .................. ...... .........................
## See problems(...) for more details.
gab_data_1 %>%
ts_plot("1 hours", color = "dark blue", lwd = 0.5) + theme_minimal() + ggtitle("Gab posts August 2016") + ylab("Count of Posts") + xlab("Date of Posting")
Extracting text (“body”)
gab_text_1 <- gab_data_1$body
gab_text_1 <- as.data.frame(gab_text_1, stringsAsFactors = FALSE)
gab_text_1$text <- gab_text_1$gab_text_1
gab_text_1$gab_text_1 <- NULL
gab_tokens_1 <- unnest_tokens(gab_text_1, word, text)
data("stop_words")
gab_tokens_1 <- anti_join(gab_tokens_1, stop_words)
## Joining, by = "word"
gab_tokens_count_1 <- count(gab_tokens_1, word )
top_10 <- top_n(gab_tokens_count_1, 10)
## Selecting by n
there are head(top_10)
ggplot(top_10, aes(x = reorder(top_10$word, top_10$n), y = top_10$n, fill = top_10$n)) + geom_col() + coord_flip() +xlab("Top 10 words in Gab posts") + ylab("Count") + ggtitle("Top 10 words in Gab posts by count") + theme_minimal() + scale_fill_viridis(option = "D") + theme(legend.position = "none")
sent <- get_sentiments(lexicon = "afinn")
gab_sentiments_1 <- inner_join(gab_tokens_count_1, sent, by="word")
weight <- gab_sentiments_1$n * gab_sentiments_1$value
gab_sentiments_1 <- cbind(gab_sentiments_1, weight)
often_used <- filter(gab_sentiments_1, n > 500)
ggplot(often_used, aes(word, weight, fill = weight)) + geom_col() + scale_fill_viridis(option = "D") + coord_flip() + xlab("Most Common Words in Gab Dataset") + ylab("Sentiment score") + ggtitle("Gab Sentiment Analysis") + theme_minimal() + theme(legend.position = "none")
user_count <- count(gab_data_1, user.name, sort = TRUE)
top10_users <- top_n(user_count, 10, n)
kable(top10_users)
| user.name | n |
|---|---|
| Tony H | 2249 |
| Andrew Torba | 2194 |
| Don | 1501 |
| Zeren | 1387 |
| Claire Jordan | 1303 |
| NimbleCentipede ✔️ | 1224 |
| James Calhoun | 1176 |
| redacted | 1137 |
| Jesus Christ | 1127 |
| Shannon Montague | 1122 |
user_vector <- top10_users$user.name
top10_posts <- filter(gab_data_1, user.name == user_vector)
top10_posts %>%
group_by(user.name) %>%
ts_plot() + theme_minimal()
top10_posts %>%
group_by(user.name) %>%
ts_plot() + facet_wrap(~ user.name) + theme_minimal()
## Posts per User
no_of_posts_by_user <- count(gab_data_1, user.name, sort = TRUE)
nums <- seq(1:2696)
no_of_posts_by_user$user.name <- nums
no_of_posts_by_user$user.name <- as.numeric(no_of_posts_by_user$user.name)
ggplot(no_of_posts_by_user, aes(x = user.name, y = n, fill = n)) + geom_col() +scale_fill_viridis(option = "A") + theme_minimal() + theme(axis.text.x = element_blank()) + xlab("Users") + theme(legend.position = "none")