Twitter Sentiment Analysis

library(twitteR)
library(tidytext)
library(stringr)
library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:twitteR':
## 
##     id, location

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

generosity

num_tweets <- 1000
Fd <- searchTwitter('#Trump', n = num_tweets)
Fd_df <- twListToDF(Fd)
Fd_df %>% group_by(statusSource) %>% summarise(n = n()) %>% arrange(desc(n)) %>% top_n(10)

## Selecting by n

## # A tibble: 10 × 2
##                                                                   statusSource
##                                                                          <chr>
## 1  <a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPh
## 2  <a href="http://twitter.com/download/android" rel="nofollow">Twitter for An
## 3           <a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>
## 4  <a href="http://twitter.com/#!/download/ipad" rel="nofollow">Twitter for iP
## 5  <a href="https://about.twitter.com/products/tweetdeck" rel="nofollow">Tweet
## 6      <a href="https://mobile.twitter.com" rel="nofollow">Mobile Web (M5)</a>
## 7                          <a href="http://ifttt.com" rel="nofollow">IFTTT</a>
## 8                        <a href="http://paper.li" rel="nofollow">Paper.li</a>
## 9  <a href="http://linkis.com" rel="nofollow">Put your button on any page! </a
## 10             <a href="http://www.hootsuite.com" rel="nofollow">Hootsuite</a>
## # ... with 1 more variables: n <int>

#extract the platform
Fd_df$statusSource = substr(Fd_df$statusSource, 
                        regexpr('>', Fd_df$statusSource) + 1, 
                        regexpr('</a>', Fd_df$statusSource) - 1)
Fd_platform <- Fd_df %>% group_by(statusSource) %>% 
                summarize(n = n()) %>% 
                mutate(percent_of_tweets = n / sum(n)) %>% 
                arrange(desc(n))

Fd_platform %>% top_n(10)

## Selecting by percent_of_tweets

## # A tibble: 10 × 3
##                     statusSource     n percent_of_tweets
##                            <chr> <int>             <dbl>
## 1             Twitter for iPhone   311             0.311
## 2            Twitter for Android   281             0.281
## 3             Twitter Web Client   238             0.238
## 4               Twitter for iPad    53             0.053
## 5                      TweetDeck    20             0.020
## 6                Mobile Web (M5)    17             0.017
## 7                          IFTTT    15             0.015
## 8                       Paper.li     8             0.008
## 9                      Hootsuite     5             0.005
## 10 Put your button on any page!      5             0.005

Fd_df %>% group_by(screenName) %>% summarise(n = n()) %>% mutate(percent_of_tweets_Fd = n/ sum(n)) %>% arrange(desc(n)) %>% top_n(10)

## Selecting by percent_of_tweets_Fd

## # A tibble: 48 × 3
##         screenName     n percent_of_tweets_Fd
##              <chr> <int>                <dbl>
## 1     TheUSofFail_     6                0.006
## 2           lbljm1     4                0.004
## 3  obssessedlawley     4                0.004
## 4  OnceUpo00786525     4                0.004
## 5       ChadKMills     3                0.003
## 6   DaNastysFinest     3                0.003
## 7         FilsFoto     3                0.003
## 8  MichaelMcGreev5     3                0.003
## 9          Nini116     3                0.003
## 10      _izadoraf_     2                0.002
## # ... with 38 more rows

reg <- "([^A-Za-z\\d#@']|'(?![A-Za-z\\d#@]))"
Fd_words <- Fd_df %>%
  filter(!str_detect(text, '^"')) %>%
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(word, text, token = "regex", pattern = reg) %>%
  filter(!word %in% stop_words$word,
         str_detect(word, "[a-z]"))

Fd_words %>% group_by(word) %>% summarize(n = n()) %>% arrange(desc(n)) %>% top_n(20)

## Selecting by n

## # A tibble: 21 × 2
##          word     n
##         <chr> <int>
## 1      #trump   937
## 2          rt   689
## 3       trump   111
## 4       https   103
## 5  protesters    96
## 6   @wdfx2eu8    72
## 7     pilling    71
## 8         red    71
## 9        @the    70
## 10         de    70
## # ... with 11 more rows

nrc <- sentiments %>% filter(lexicon == "nrc") %>% select(word, sentiment) 
head(nrc)

## # A tibble: 6 × 2
##        word sentiment
##       <chr>     <chr>
## 1    abacus     trust
## 2   abandon      fear
## 3   abandon  negative
## 4   abandon   sadness
## 5 abandoned     anger
## 6 abandoned      fear

Fd_words_sentiments <- Fd_words %>% inner_join(nrc, by = "word")
Fd_words_sentiments %>% group_by(sentiment) %>% summarise(n = n()) %>% arrange(desc(n))

## # A tibble: 10 × 2
##       sentiment     n
##           <chr> <int>
## 1      positive   727
## 2         trust   616
## 3      negative   497
## 4  anticipation   359
## 5           joy   329
## 6         anger   293
## 7          fear   278
## 8       sadness   234
## 9      surprise   223
## 10      disgust   191

pos_tw_ids <- Fd_words_sentiments %>% filter(sentiment == "positive") %>% distinct(id)
Fd_df %>% inner_join(pos_tw_ids, by = "id") %>% select(text) %>% slice(1:10)

##                                                                                                                                                   text
## 1  RT @ToDropADime2: #Trump less than 48 hrs:\n\nRespects Obama\nNot Repealing OC\nWants Bill Clinton as Advisor\nNo Wall\nNo Deportation\n#ChrisChri<U+0085>
## 2       Also relevant, my follow-up tweet from last June @marcmaron\n#Election2016 #Hillary #Trump #notmypresident #safetypin\nhttps://t.co/GbR1Vy1eye
## 3         RT @RVAwonk: Instead of telling us that not all #Trump voters are racist, how about condemning the acts of racism being committed in his na<U+0085>
## 4                   A madman has been given the keys to the surveillance state | Electing #Trump is a major #FAIL for #America https://t.co/WM9DrBnqCi
## 5                                                       RT @immigrant4trump: Trump: No Salary for Me as President #maga #trump https://t.co/edrQuTS5HT
## 6             Humanity strikes back at @realDonaldTrump. Join the #WorldToTrump open letter that<U+0092>s sweeping the planet! #Trump https://t.co/q8HKnoZ59N
## 7         RT @YaleE360: With #Trump win, the Paris agreement will go on, but without U.S. as a key leader, policy expert David Victor writes https://<U+0085>
## 8          @PrisonPlanet Obviously the #trump supporters were not #whitetrash enough for their opposition, could not get down on their low brow level.
## 9         RT @Diplomat_APAC: The 'Pivot' Gets Trumped: The #Asia-Pacific Under President #Trump https://t.co/yhi09Dfqst @nktpnd and @TheAsianist disc<U+0085>
## 10    RT @The_NewRight: When Obama won, we didn't riot. We waited 8 years &amp; protested at the ballot box! #TrumpProtest #TrumpRiot #Trump #Trump20<U+0085>

neg_id_words <- Fd_words_sentiments %>% filter(sentiment == "disgust") %>% distinct(id, word)
Fd_df %>% inner_join(neg_id_words, by = "id") %>% select(text, word) %>% slice(1:10)

##                                                                                                                                                                                     text
## 1                                                   @SharylAttkisson #CognitiveDissonance is the only explanation. They should look in a mirror when saying how bad #Trump is! #clueless
## 2                                              #HealingAmerica  #POTUS why won't Obama or #trump or #HillaryClinton  (the supposed leaders)  say anything to help  the scared and angry.
## 3                                                     RT @ToDropADime2: #Trump gets crushed and screws his #AltRight #nazi followers all in 1 shot. #VeteransDay https://t.co/tM0wxMIkYd
## 4                                              RT @WDFx2EU8: Stay safe, folks, this weekend is when the #Trump protesters unleash hell, this is the GREATEST RED-PILLING IN USA HISTORY.
## 5  @MatthewKick @EspeOppenheimer \nRage! <ed><U+00A0><U+00BD><ed><U+00B8><U+00A1>\nThere's a difference between sour grapes and sheer terror, and the differenc<U+0085> https://t.co/iTfPTW2pXy
## 6                                              RT @WDFx2EU8: Stay safe, folks, this weekend is when the #Trump protesters unleash hell, this is the GREATEST RED-PILLING IN USA HISTORY.
## 7                                         RT @ToDropADime2: Here's the CRAZY LUNATIC U idiots just elected.\n#Trump tweets in 2012 when he Thought #MittRomney won\nPopular vote:  SMH.<U+0085>
## 8                                                                  Hate filled orange #POTUS #Trump doesn't know beauty #NewBluehand #Bluehand #USA #DonaldTrump https://t.co/yFwCuF2ckT
## 9                                           RT @OutFrontCNN: "This is what happens when you elect a fascist, racist bigot" - Nationwide protests erupt following #Trump's win https://t<U+0085>
## 10                                         RT @shomaristone: Calls Grow for Trump to Respond to Hate Incidents, KKK Rally. #Trump #TrumpPence16 @nbcwashington \nhttps://t.co/hDlaTM0y3w
##       word
## 1      bad
## 2    angry
## 3  crushed
## 4     hell
## 5     sour
## 6     hell
## 7  lunatic
## 8     hate
## 9    bigot
## 10    hate

guilt

Bl <- searchTwitter('#Obama', n = num_tweets)
Bl_df <- twListToDF(Bl)

#extract the platform
Bl_df$statusSource = substr(Bl_df$statusSource, 
                        regexpr('>', Bl_df$statusSource) + 1, 
                        regexpr('</a>', Bl_df$statusSource) - 1)
Bl_platform <- Bl_df %>% group_by(statusSource) %>% 
                summarize(n = n()) %>% 
                mutate(percent_of_tweets = n / sum(n)) %>% 
                arrange(desc(n))

Bl_platform %>% top_n(10)

## Selecting by percent_of_tweets

## # A tibble: 12 × 3
##                     statusSource     n percent_of_tweets
##                            <chr> <int>             <dbl>
## 1             Twitter for iPhone   488             0.488
## 2            Twitter for Android   186             0.186
## 3             Twitter Web Client   184             0.184
## 4               Twitter for iPad    49             0.049
## 5                Mobile Web (M5)    24             0.024
## 6                      Instagram     9             0.009
## 7                       Facebook     5             0.005
## 8              PatriotJournalist     5             0.005
## 9  Put your button on any page!      4             0.004
## 10       TweetCaster for Android     4             0.004
## 11               Twitter for Mac     4             0.004
## 12                VenezuelaFlash     4             0.004

#extract the words and join to nrc sentiFdnt words
Bl_words <- Bl_df %>%
  filter(!str_detect(text, '^"')) %>%
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(word, text, token = "regex", pattern = reg) %>%
  filter(!word %in% stop_words$word,
         str_detect(word, "[a-z]"))
Bl_words_sentiments <- Bl_words %>% inner_join(nrc, by = "word")

Fd_platform$hashtag <- "#Trump"
Bl_platform$hashtag <- "#Obama"
Fd_words_sentiments$hashtag <- "#Trump"
Bl_words_sentiments$hashtag <- "#Obama"
platform <- rbind(Fd_platform, Bl_platform)
words_sentiments <- rbind(Fd_words_sentiments, Bl_words_sentiments)

pf <- c("Twitter Web Client", "Twitter for iPhone", "Twitter for Android", "Hootsuite", "Instagram")
pf_df <- platform %>% filter(statusSource %in% pf)

ggplot(pf_df, aes(x = statusSource, y = percent_of_tweets, fill = hashtag)) + 
  geom_bar(stat = "identity", position = "dodge") +
  xlab("Platform") +
  ylab("Percent of tweets") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

sent_df <- words_sentiments %>% group_by(hashtag, sentiment) %>% summarise(n = n()) %>% mutate(frequency = n/sum(n))

ggplot(sent_df, aes(x = sentiment, y = frequency, fill = hashtag)) + geom_bar(stat = "identity", position = "dodge") + xlab("Sentiment") + ylab("Percent of tweets") + theme(axis.text.x = element_text(angle = 90, hjust = 1))

Twitter Sentiment Analysis

Christine

November 11, 2016

generosity

guilt