Twitter Sentiment Analysis #CapitolRiot

## check to see if the token is loaded
library(rtweet)
get_token()

Background

Sentiment analysis refers to the use of natural language processing, text analysis, computational linguistics, and biometrics to systematically identify, extract, quantify, and study affective states and subjective information. With the widely used Social Media Platforms, publicly available data can be used to determine sentiment of topics, people, places, and things. This analysis incorporated the use of Twitter Application Programming Interface (API) which allows developers to read and write Twitter data. Thus, you can use it to compose tweets, read profiles, and access your followers’ data and a high volume of tweets on particular subjects in specific locations.

The Twitter data collected for this analysis focused on the hashtag #CapitolRiot and limited to 2500 recent tweets.

Data Exploration

## search and pull 2500 tweets using the #capitolriot hashtag, ignoring retweets into dataframe
tweet_df <- search_tweets(q = '"capitol riot"', n = 2500, include_rts = FALSE, type = "mixed")

# view the first 10 rows of the dataframe
head(tweet_df, n = 10)

## # A tibble: 10 x 90
##    user_id   status_id   created_at          screen_name text            source 
##    <chr>     <chr>       <dttm>              <chr>       <chr>           <chr>  
##  1 32871086  1374028092~ 2021-03-22 16:00:00 kylegriffi~ "Democratic Re~ TweetD~
##  2 32871086  1374096041~ 2021-03-22 20:30:00 kylegriffi~ "A police dete~ TweetD~
##  3 32871086  1374065887~ 2021-03-22 18:30:11 kylegriffi~ "Prosecutors a~ Twitte~
##  4 807095    1373780623~ 2021-03-21 23:36:38 nytimes     "Breaking News~ Social~
##  5 10975919~ 1374120536~ 2021-03-22 22:07:20 O19928734   "Former top Ca~ Twitte~
##  6 10975919~ 1374107544~ 2021-03-22 21:15:42 O19928734   "Evidence in C~ Twitte~
##  7 290785720 1374120180~ 2021-03-22 22:05:55 jrefior     "Criminal who ~ Twitte~
##  8 290785720 1374012690~ 2021-03-22 14:58:47 jrefior     "“Prosecutors ~ Twitte~
##  9 203646071 1374111758~ 2021-03-22 21:32:27 donnlovett  "Corrections o~ Twitte~
## 10 203646071 1374120082~ 2021-03-22 22:05:32 donnlovett  "UCLA Student ~ Twitte~
## # ... with 84 more variables: display_text_width <dbl>,
## #   reply_to_status_id <chr>, reply_to_user_id <chr>,
## #   reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## #   favorite_count <int>, retweet_count <int>, quote_count <int>,
## #   reply_count <int>, hashtags <list>, symbols <list>, urls_url <list>,
## #   urls_t.co <list>, urls_expanded_url <list>, media_url <list>,
## #   media_t.co <list>, media_expanded_url <list>, media_type <list>,
## #   ext_media_url <list>, ext_media_t.co <list>, ext_media_expanded_url <list>,
## #   ext_media_type <chr>, mentions_user_id <list>, mentions_screen_name <list>,
## #   lang <chr>, quoted_status_id <chr>, quoted_text <chr>,
## #   quoted_created_at <dttm>, quoted_source <chr>, quoted_favorite_count <int>,
## #   quoted_retweet_count <int>, quoted_user_id <chr>, quoted_screen_name <chr>,
## #   quoted_name <chr>, quoted_followers_count <int>,
## #   quoted_friends_count <int>, quoted_statuses_count <int>,
## #   quoted_location <chr>, quoted_description <chr>, quoted_verified <lgl>,
## #   retweet_status_id <chr>, retweet_text <chr>, retweet_created_at <dttm>,
## #   retweet_source <chr>, retweet_favorite_count <int>,
## #   retweet_retweet_count <int>, retweet_user_id <chr>,
## #   retweet_screen_name <chr>, retweet_name <chr>,
## #   retweet_followers_count <int>, retweet_friends_count <int>,
## #   retweet_statuses_count <int>, retweet_location <chr>,
## #   retweet_description <chr>, retweet_verified <lgl>, place_url <chr>,
## #   place_name <chr>, place_full_name <chr>, place_type <chr>, country <chr>,
## #   country_code <chr>, geo_coords <list>, coords_coords <list>,
## #   bbox_coords <list>, status_url <chr>, name <chr>, location <chr>,
## #   description <chr>, url <chr>, protected <lgl>, followers_count <int>,
## #   friends_count <int>, listed_count <int>, statuses_count <int>,
## #   favourites_count <int>, account_created_at <dttm>, verified <lgl>,
## #   profile_url <chr>, profile_expanded_url <chr>, account_lang <lgl>,
## #   profile_banner_url <chr>, profile_background_url <chr>,
## #   profile_image_url <chr>

# identify column names of dataframe
names(tweet_df)

##  [1] "user_id"                 "status_id"              
##  [3] "created_at"              "screen_name"            
##  [5] "text"                    "source"                 
##  [7] "display_text_width"      "reply_to_status_id"     
##  [9] "reply_to_user_id"        "reply_to_screen_name"   
## [11] "is_quote"                "is_retweet"             
## [13] "favorite_count"          "retweet_count"          
## [15] "quote_count"             "reply_count"            
## [17] "hashtags"                "symbols"                
## [19] "urls_url"                "urls_t.co"              
## [21] "urls_expanded_url"       "media_url"              
## [23] "media_t.co"              "media_expanded_url"     
## [25] "media_type"              "ext_media_url"          
## [27] "ext_media_t.co"          "ext_media_expanded_url" 
## [29] "ext_media_type"          "mentions_user_id"       
## [31] "mentions_screen_name"    "lang"                   
## [33] "quoted_status_id"        "quoted_text"            
## [35] "quoted_created_at"       "quoted_source"          
## [37] "quoted_favorite_count"   "quoted_retweet_count"   
## [39] "quoted_user_id"          "quoted_screen_name"     
## [41] "quoted_name"             "quoted_followers_count" 
## [43] "quoted_friends_count"    "quoted_statuses_count"  
## [45] "quoted_location"         "quoted_description"     
## [47] "quoted_verified"         "retweet_status_id"      
## [49] "retweet_text"            "retweet_created_at"     
## [51] "retweet_source"          "retweet_favorite_count" 
## [53] "retweet_retweet_count"   "retweet_user_id"        
## [55] "retweet_screen_name"     "retweet_name"           
## [57] "retweet_followers_count" "retweet_friends_count"  
## [59] "retweet_statuses_count"  "retweet_location"       
## [61] "retweet_description"     "retweet_verified"       
## [63] "place_url"               "place_name"             
## [65] "place_full_name"         "place_type"             
## [67] "country"                 "country_code"           
## [69] "geo_coords"              "coords_coords"          
## [71] "bbox_coords"             "status_url"             
## [73] "name"                    "location"               
## [75] "description"             "url"                    
## [77] "protected"               "followers_count"        
## [79] "friends_count"           "listed_count"           
## [81] "statuses_count"          "favourites_count"       
## [83] "account_created_at"      "verified"               
## [85] "profile_url"             "profile_expanded_url"   
## [87] "account_lang"            "profile_banner_url"     
## [89] "profile_background_url"  "profile_image_url"

# view the text of the first 15 tweets of the dataframe, this data column is the raw tweet content
head(tweet_df$text, n = 15)

##  [1] "Democratic Rep. Ruben Gallego, a Marine Corps veteran, is calling on Veterans Affairs Secretary Denis McDonough to withdraw benefits from active-duty service members, veterans, or military retirees who participated in the Capitol riot. https://t.co/2yqlrLcVbd"                       
##  [2] "A police detective filed for divorce from his wife after she was photographed with another man at the Capitol riot. https://t.co/av66PbJbmO"                                                                                                                                               
##  [3] "Prosecutors are preparing to start plea discussions as early as this week with many of the more than 300 suspects charged in the Capitol riot.\n\nProsecutors have wanted to sort \"misdemeanor plea offers, felonies, felonies with substantial [prison] time.\"\nhttps://t.co/3FGZj7mWL8"
##  [4] "Breaking News: Evidence in the Capitol riot investigation most likely supports sedition charges for some of the suspects, a federal prosecutor told \"60 Minutes.\" https://t.co/Aca2s68xDl"                                                                                               
##  [5] "Former top Capitol riot prosecutor says 'maybe the President is culpable' when asked about Trump\nhttps://t.co/7zNC0QYLPN"                                                                                                                                                                 
##  [6] "Evidence in Capitol riot 'trending towards' sedition charges, prosecutor says https://t.co/Aq9feP8JK4"                                                                                                                                                                                     
##  [7] "Criminal who was only free to conspire against the US because he was pardoned by another criminal: Donald Trump (who may only free because he had presidential immunity)\nhttps://t.co/TYIHE3OQqE"                                                                                         
##  [8] "“Prosecutors and the FBI also have accused 10 members and affiliates of the Oath Keepers with conspiring to obstruct Congress”\n\nWasn’t everyone who stormed the Capitol grounds attempting to obstruct Congress? https://t.co/TqYrFfQYbv"                                                
##  [9] "Corrections officer from Raiford faces charges connected to Capitol riot https://t.co/LUYaaScI25"                                                                                                                                                                                          
## [10] "UCLA Student Christian Secor Sat In VP Chair During Capitol Riot https://t.co/31EnnFYQIG"                                                                                                                                                                                                  
## [11] "Thomasville woman suspended after Capitol riot charges https://t.co/VlztjEpMz7"                                                                                                                                                                                                            
## [12] "'Bullhorn Lady' Rachel Powell Indicted For Role In Capitol Riot https://t.co/rOZMDKWRug"                                                                                                                                                                                                   
## [13] "@brikeilarcnn calls out Kayleigh McEnany's Capitol riot claims https://t.co/pOnuq3ThWJ"                                                                                                                                                                                                    
## [14] "Idaho man facing US Capitol riot charges pleads not guilty https://t.co/irYZhW26TU"                                                                                                                                                                                                        
## [15] "DOJ: Man accused in US Capitol riot appears in Tampa courtroom https://t.co/gpyBlDBFpC"

# show screen names of first 10 in dataframe
head(tweet_df$screen_name, n=10)

##  [1] "kylegriffin1" "kylegriffin1" "kylegriffin1" "nytimes"      "O19928734"   
##  [6] "O19928734"    "jrefior"      "jrefior"      "donnlovett"   "donnlovett"

# Repetitive screen names are not helpful - trying another function 
# identify unique screen names in dataframe
head(unique(tweet_df$screen_name), n=20)

##  [1] "kylegriffin1"    "nytimes"         "O19928734"       "jrefior"        
##  [5] "donnlovett"      "profreedomTom"   "wallisweaver"    "donholio"       
##  [9] "JacobHeilbrunn"  "poohtoo2"        "preachingterp"   "aJSDecepida"    
## [13] "Scout20202"      "ShamrockBlessed" "photoframd"      "PCUnitedPage"   
## [17] "tempkt"          "txsue1"          "ReporterCioffi"  "thomasleedunla1"

# what users are tweeting with #capitolriot
users <- search_users("#capitolriot", n = 10)

## Searching for users...

## Finished collecting users!

# view users 
head(users)

## # A tibble: 1 x 90
##   user_id    status_id  created_at screen_name text  source display_text_width
##   <chr>      <chr>      <chr>      <chr>       <chr> <chr>               <int>
## 1 3142333469 3142333469 <NA>       CapitolRiot <NA>  <NA>                   NA
## # ... with 83 more variables: reply_to_status_id <chr>, reply_to_user_id <chr>,
## #   reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## #   favorite_count <int>, retweet_count <int>, quote_count <int>,
## #   reply_count <int>, hashtags <list>, symbols <list>, urls_url <list>,
## #   urls_t.co <list>, urls_expanded_url <list>, media_url <list>,
## #   media_t.co <list>, media_expanded_url <list>, media_type <list>,
## #   ext_media_url <list>, ext_media_t.co <list>, ext_media_expanded_url <list>,
## #   ext_media_type <chr>, mentions_user_id <list>, mentions_screen_name <list>,
## #   lang <chr>, quoted_status_id <chr>, quoted_text <chr>,
## #   quoted_created_at <dttm>, quoted_source <chr>, quoted_favorite_count <int>,
## #   quoted_retweet_count <int>, quoted_user_id <chr>, quoted_screen_name <chr>,
## #   quoted_name <chr>, quoted_followers_count <int>,
## #   quoted_friends_count <int>, quoted_statuses_count <int>,
## #   quoted_location <chr>, quoted_description <chr>, quoted_verified <lgl>,
## #   retweet_status_id <chr>, retweet_text <chr>, retweet_created_at <dttm>,
## #   retweet_source <chr>, retweet_favorite_count <int>,
## #   retweet_retweet_count <int>, retweet_user_id <chr>,
## #   retweet_screen_name <chr>, retweet_name <chr>,
## #   retweet_followers_count <int>, retweet_friends_count <int>,
## #   retweet_statuses_count <int>, retweet_location <chr>,
## #   retweet_description <chr>, retweet_verified <lgl>, place_url <chr>,
## #   place_name <chr>, place_full_name <chr>, place_type <chr>, country <chr>,
## #   country_code <chr>, geo_coords <list>, coords_coords <list>,
## #   bbox_coords <list>, status_url <chr>, name <chr>, location <chr>,
## #   description <chr>, url <lgl>, protected <lgl>, followers_count <int>,
## #   friends_count <int>, listed_count <int>, statuses_count <int>,
## #   favourites_count <int>, account_created_at <dttm>, verified <lgl>,
## #   profile_url <chr>, profile_expanded_url <chr>, account_lang <lgl>,
## #   profile_banner_url <lgl>, profile_background_url <chr>,
## #   profile_image_url <chr>

# identify how many locations are represented in selected tweets
length(unique(tweet_df$location))

## [1] 938

# frequency chart of locations represented in selected tweets
tweet_df %>%
  ggplot(aes(location)) +
  theme_bw() +
  geom_bar(na.rm = TRUE) + 
  coord_flip() +
      labs(x = "Location",
      y = "Count",
      title = "Count of Tweets by Unique Locations")

# Top 20 frequency chart of locations represented in selected tweets 
tweet_df %>% 
  count(location, sort = TRUE) %>%
  filter(!is.na(location)) %>%
    mutate(location = reorder(location, n)) %>%
    top_n(20) %>%
    ggplot(aes(x = location, y = n)) +
    theme_bw() +
    geom_col(na.rm = TRUE) +
    coord_flip() +
        labs(x = "Location",
        y = "Count",
        title = "Frequency of Unique Locations Which Tweets Are From")

## Selecting by n

# frequency chart of countries represented in selected tweets
tweet_df %>%
   filter(!is.na(country)) %>%
    ggplot(aes(country)) +
    theme_bw() +
    geom_bar() + coord_flip() +
        labs(x = "Country",
        y = "Count",
        title = "Count of Unique Countries From Tweets")

Data Cleaning and Preparation

# create subsets of dataframe
tweets_ez <- tweet_df %>% select(user_id, screen_name, name, description, text, location, country, friends_count, source, geo_coords, coords_coords, hashtags)

tweets_ez

## # A tibble: 2,498 x 12
##    user_id  screen_name name  description  text   location country friends_count
##    <chr>    <chr>       <chr> <chr>        <chr>  <chr>    <chr>           <int>
##  1 32871086 kylegriffi~ Kyle~ "Senior Pro~ "Demo~ "Manhat~ <NA>              827
##  2 32871086 kylegriffi~ Kyle~ "Senior Pro~ "A po~ "Manhat~ <NA>              827
##  3 32871086 kylegriffi~ Kyle~ "Senior Pro~ "Pros~ "Manhat~ <NA>              827
##  4 807095   nytimes     The ~ "News tips?~ "Brea~ "New Yo~ <NA>              902
##  5 1097591~ O19928734   O     ""           "Form~ ""       <NA>               51
##  6 1097591~ O19928734   O     ""           "Evid~ ""       <NA>               51
##  7 2907857~ jrefior     John~ "Software E~ "Crim~ "Waltha~ <NA>             1678
##  8 2907857~ jrefior     John~ "Software E~ "“Pro~ "Waltha~ <NA>             1678
##  9 2036460~ donnlovett  Donn~ "President/~ "Corr~ "Calgar~ <NA>             2605
## 10 2036460~ donnlovett  Donn~ "President/~ "UCLA~ "Calgar~ <NA>             2605
## # ... with 2,488 more rows, and 4 more variables: source <chr>,
## #   geo_coords <list>, coords_coords <list>, hashtags <list>

head(tweets_ez$text)

## [1] "Democratic Rep. Ruben Gallego, a Marine Corps veteran, is calling on Veterans Affairs Secretary Denis McDonough to withdraw benefits from active-duty service members, veterans, or military retirees who participated in the Capitol riot. https://t.co/2yqlrLcVbd"                       
## [2] "A police detective filed for divorce from his wife after she was photographed with another man at the Capitol riot. https://t.co/av66PbJbmO"                                                                                                                                               
## [3] "Prosecutors are preparing to start plea discussions as early as this week with many of the more than 300 suspects charged in the Capitol riot.\n\nProsecutors have wanted to sort \"misdemeanor plea offers, felonies, felonies with substantial [prison] time.\"\nhttps://t.co/3FGZj7mWL8"
## [4] "Breaking News: Evidence in the Capitol riot investigation most likely supports sedition charges for some of the suspects, a federal prosecutor told \"60 Minutes.\" https://t.co/Aca2s68xDl"                                                                                               
## [5] "Former top Capitol riot prosecutor says 'maybe the President is culpable' when asked about Trump\nhttps://t.co/7zNC0QYLPN"                                                                                                                                                                 
## [6] "Evidence in Capitol riot 'trending towards' sedition charges, prosecutor says https://t.co/Aq9feP8JK4"

# removing http elements
tweets_ez$stripped_text <- gsub("https\\S+", "", tweets_ez$text)

# convert to lowercase and remove punctuation
tweets_ez_stem <- tweets_ez %>%
  select(stripped_text) %>%
  unnest_tokens(word, stripped_text)

head(tweets_ez_stem, n = 10)

## # A tibble: 10 x 1
##    word      
##    <chr>     
##  1 democratic
##  2 rep       
##  3 ruben     
##  4 gallego   
##  5 a         
##  6 marine    
##  7 corps     
##  8 veteran   
##  9 is        
## 10 calling

# remove stop words
cleaned_tweets <- tweets_ez_stem %>%
  anti_join(stop_words)

## Joining, by = "word"

head(cleaned_tweets, n = 10)

## # A tibble: 10 x 1
##    word      
##    <chr>     
##  1 democratic
##  2 rep       
##  3 ruben     
##  4 gallego   
##  5 marine    
##  6 corps     
##  7 veteran   
##  8 calling   
##  9 veterans  
## 10 affairs

# finding the Top 15 Words in #capitolriot tweets

cleaned_tweets %>%
  count(word, sort = TRUE) %>%
  top_n(15) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
  theme_bw() + 
  labs(x = "Count", y = "Unique Words", 
       title = "Counts of Top 15 Words Found in #CapitolRiot Tweets")

## Selecting by n

Performing Sentiment Analysis

# sentiment text is positive
get_sentiments("bing") %>% filter(sentiment=="positive")

## # A tibble: 2,005 x 2
##    word        sentiment
##    <chr>       <chr>    
##  1 abound      positive 
##  2 abounds     positive 
##  3 abundance   positive 
##  4 abundant    positive 
##  5 accessable  positive 
##  6 accessible  positive 
##  7 acclaim     positive 
##  8 acclaimed   positive 
##  9 acclamation positive 
## 10 accolade    positive 
## # ... with 1,995 more rows

# sentiment text is negative
get_sentiments("bing") %>% filter(sentiment=="negative")

## # A tibble: 4,781 x 2
##    word        sentiment
##    <chr>       <chr>    
##  1 2-faces     negative 
##  2 abnormal    negative 
##  3 abolish     negative 
##  4 abominable  negative 
##  5 abominably  negative 
##  6 abominate   negative 
##  7 abomination negative 
##  8 abort       negative 
##  9 aborted     negative 
## 10 aborts      negative 
## # ... with 4,771 more rows

# sentiment analysis
bing_tweets <- cleaned_tweets %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  ungroup()

## Joining, by = "word"

# visualize the word counts for each sentiment

bing_tweets %>%
  group_by(sentiment) %>%
  top_n(15) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") + 
  labs(title = "Tweets Containing #capitolriot", 
       y = "Contribution to Sentiment", 
       x  = NULL) +
  coord_flip() + 
  theme_bw()

## Selecting by n

# determining a sentiment score for each tweet
sentiment_bing <- function(twt){
  twt_tbl = tibble(text = twt) %>%
    mutate(
      stripped_text = gsub("http\\S+", "", text)
    ) %>%
    unnest_tokens(word, stripped_text) %>%
    anti_join(stop_words) %>%
    inner_join(get_sentiments("bing")) %>%
    count(word, sentiment, sort = TRUE) %>%
    ungroup() %>%
    # creating colummn for score assigning -1 to all negative and 1 to all positive
    mutate(
      score = case_when(
        sentiment == "negative"~ n*(-1), 
        sentiment == "positive"~ n*1)
    )
  # calculate a total score
  total_score = case_when(
    nrow(twt_tbl) ==0~0, 
    nrow(twt_tbl)>0~sum(twt_tbl$score)
  )
  
  # track tweets that did not have match to bing list 
  zero_word = case_when(
    nrow(twt_tbl)==0~"Type 1", 
    nrow(twt_tbl)>0~"Type 2"
  )
  list(score = total_score, type = zero_word, twt_tbl = twt_tbl)
}

# applying the function to processed tweets
sentiment_riot = invisible(lapply(tweet_df$text, function(x){sentiment_bing(x)}))

# create table of total score results 
capitol_sentiment = bind_rows(
  tibble(
    tweet = '#CapitolRiot', 
    score = unlist(map(sentiment_riot, 'score')), 
    type = unlist(map(sentiment_riot, 'type'))
  )
)
capitol_sentiment

## # A tibble: 2,498 x 3
##    tweet        score type  
##    <chr>        <dbl> <chr> 
##  1 #CapitolRiot     1 Type 2
##  2 #CapitolRiot     0 Type 1
##  3 #CapitolRiot    -3 Type 2
##  4 #CapitolRiot     0 Type 2
##  5 #CapitolRiot     1 Type 2
##  6 #CapitolRiot     0 Type 1
##  7 #CapitolRiot     0 Type 2
##  8 #CapitolRiot    -2 Type 2
##  9 #CapitolRiot     0 Type 1
## 10 #CapitolRiot     0 Type 1
## # ... with 2,488 more rows

# plot the table results for total sentiment score for tweets with #Capitolriot
ggplot(capitol_sentiment, aes(x = score, fill = tweet)) +
  geom_histogram(bins = 15, alpha = .6) +
  labs(title = "Distribution of Total Sentiment Score for Tweets") +
  theme_bw()

The negative sentiment scores skew the distribution and have a larger spread than the positive sentiment scores. We can also see a larger frequency for the tweets with -1 to -4 compared to the counts of the same values as positive integer scores.

# using a different sentiment analysis scoring method and plotting via sentiment score

# Converting tweets to ASCII to trackle strange characters
tweets <- iconv(cleaned_tweets, from="UTF-8", to="ASCII", sub="")
# removing retweets, in case needed 
tweets <-gsub("(RT|via)((?:\\b\\w*@\\w+)+)","",tweets)
# removing mentions, in case needed
tweets <-gsub("@\\w+","",tweets)
ew_sentiment<-get_nrc_sentiment((tweets))
sentimentscores<-data.frame(colSums(ew_sentiment[,]))
names(sentimentscores) <- "Score"
sentimentscores <- cbind("sentiment"=rownames(sentimentscores),sentimentscores)
rownames(sentimentscores) <- NULL
ggplot(data=sentimentscores,aes(x=sentiment,y=Score))+
  geom_bar(aes(fill=sentiment),stat = "identity")+
  theme(legend.position="none")+
  xlab("Sentiments")+ylab("Scores")+
  ggtitle("Total sentiment based on scores")+
  theme_minimal()

Word Clustering Analytics

TweetsToWordcloud <- function(topic){
  
  tweets <- search_tweets(topic, n = 800, include_rts = FALSE)
  
  # Clean the data
  text <- str_c(tweets$text, collapse = "") %>%
  str_remove("\\n") %>%                   # remove linebreaks
  rm_twitter_url() %>%                    # Remove URLS
  rm_url() %>%
  str_remove_all("#\\S+") %>%             # Remove any hashtags
  str_remove_all("@\\S+") %>%             # Remove any @ mentions
  removeWords(stopwords("en")) %>%   # Remove common words (a, the, it etc.)
  removeNumbers() %>%
  removePunctuation() %>%
  stripWhitespace() %>%
  removeWords(c("amp"))                   # Final cleanup of other small changes
  
    # Convert the data into a summary table
  textCorpus <- 
    Corpus(VectorSource(text)) %>%
    TermDocumentMatrix() %>%
    as.matrix()
  
  textCorpus <- sort(rowSums(textCorpus), decreasing=TRUE)
  textCorpus <- data.frame(word = names(textCorpus), freq=textCorpus, row.names = NULL)
  
  wordcloud <- wordcloud2(data = textCorpus, minRotation = 0, maxRotation = 0, ellipticity = 0.5, color = 'random-dark')
  return(wordcloud)
}

#Then using this function on twitter topics related to the Capitol Riot 
TweetsToWordcloud(topic = "capitolriot")