library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.1 v purrr 0.3.2
## v tibble 2.1.1 v dplyr 0.8.0.1
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## -- Conflicts ------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
IRAhandle_tweets_1_csv <- read_csv("C:/Users/Nicholas/Downloads/IRAhandle_tweets_1.csv.txt")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double(),
## tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 2803 parsing failures.
## row col expected actual file
## 1149 tco3_step1 1/0/T/F/TRUE/FALSE http://ow.ly/KH2T30a9YGX 'C:/Users/Nicholas/Downloads/IRAhandle_tweets_1.csv.txt'
## 1397 tco3_step1 1/0/T/F/TRUE/FALSE https://goo.gl/hV3VlX 'C:/Users/Nicholas/Downloads/IRAhandle_tweets_1.csv.txt'
## 2153 tco3_step1 1/0/T/F/TRUE/FALSE http://ow.ly/NRHJ30aPVFM 'C:/Users/Nicholas/Downloads/IRAhandle_tweets_1.csv.txt'
## 2164 tco3_step1 1/0/T/F/TRUE/FALSE https://goo.gl/CzsrEU 'C:/Users/Nicholas/Downloads/IRAhandle_tweets_1.csv.txt'
## 4319 tco3_step1 1/0/T/F/TRUE/FALSE http://bit.ly/sonar_2016 'C:/Users/Nicholas/Downloads/IRAhandle_tweets_1.csv.txt'
## .... .......... .................. ........................ ........................................................
## See problems(...) for more details.
head(IRAhandle_tweets_1_csv)
d <- IRAhandle_tweets_1_csv
hashtags_column <- d$content
hashtags_column <- as.data.frame(hashtags_column)
head(hashtags_column)
hashtags <- str_extract_all(hashtags_column$hashtags_column,"#[a-zA-Z0-9]{1,}")
hashtags <- unlist(hashtags)
hashtags.table <- table(hashtags)
head(hashtags.table)
## hashtags
## #016 #021 #038 #039 #04 #082
## 1 1 2 22 1 2
hashtags_df <- as.data.frame(hashtags.table)
top100 <- hashtags_df[1:100,]
top100 <- arrange(top100, desc(Freq))
head(top100)
date <- Sys.Date()
date <- rep(date, 100)
top100 <- cbind(top100, date)
head(top100)
ggplot(top100, aes(hashtags, Freq)) + geom_col(width = 0.5, position = position_dodge(width = 0.9)) + coord_flip()
