library(twitteR)
library(tidytext)
library(stringr)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
generosity
num_tweets <- 1000
Fd <- searchTwitter('#Trump', n = num_tweets)
Fd_df <- twListToDF(Fd)
Fd_df %>% group_by(statusSource) %>% summarise(n = n()) %>% arrange(desc(n)) %>% top_n(10)
## Selecting by n
## # A tibble: 10 × 2
## statusSource
## <chr>
## 1 <a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPh
## 2 <a href="http://twitter.com/download/android" rel="nofollow">Twitter for An
## 3 <a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>
## 4 <a href="http://twitter.com/#!/download/ipad" rel="nofollow">Twitter for iP
## 5 <a href="https://about.twitter.com/products/tweetdeck" rel="nofollow">Tweet
## 6 <a href="https://mobile.twitter.com" rel="nofollow">Mobile Web (M5)</a>
## 7 <a href="http://ifttt.com" rel="nofollow">IFTTT</a>
## 8 <a href="http://paper.li" rel="nofollow">Paper.li</a>
## 9 <a href="http://linkis.com" rel="nofollow">Put your button on any page! </a
## 10 <a href="http://www.hootsuite.com" rel="nofollow">Hootsuite</a>
## # ... with 1 more variables: n <int>
#extract the platform
Fd_df$statusSource = substr(Fd_df$statusSource,
regexpr('>', Fd_df$statusSource) + 1,
regexpr('</a>', Fd_df$statusSource) - 1)
Fd_platform <- Fd_df %>% group_by(statusSource) %>%
summarize(n = n()) %>%
mutate(percent_of_tweets = n / sum(n)) %>%
arrange(desc(n))
Fd_platform %>% top_n(10)
## Selecting by percent_of_tweets
## # A tibble: 10 × 3
## statusSource n percent_of_tweets
## <chr> <int> <dbl>
## 1 Twitter for iPhone 311 0.311
## 2 Twitter for Android 281 0.281
## 3 Twitter Web Client 238 0.238
## 4 Twitter for iPad 53 0.053
## 5 TweetDeck 20 0.020
## 6 Mobile Web (M5) 17 0.017
## 7 IFTTT 15 0.015
## 8 Paper.li 8 0.008
## 9 Hootsuite 5 0.005
## 10 Put your button on any page! 5 0.005
Fd_df %>% group_by(screenName) %>% summarise(n = n()) %>% mutate(percent_of_tweets_Fd = n/ sum(n)) %>% arrange(desc(n)) %>% top_n(10)
## Selecting by percent_of_tweets_Fd
## # A tibble: 48 × 3
## screenName n percent_of_tweets_Fd
## <chr> <int> <dbl>
## 1 TheUSofFail_ 6 0.006
## 2 lbljm1 4 0.004
## 3 obssessedlawley 4 0.004
## 4 OnceUpo00786525 4 0.004
## 5 ChadKMills 3 0.003
## 6 DaNastysFinest 3 0.003
## 7 FilsFoto 3 0.003
## 8 MichaelMcGreev5 3 0.003
## 9 Nini116 3 0.003
## 10 _izadoraf_ 2 0.002
## # ... with 38 more rows
reg <- "([^A-Za-z\\d#@']|'(?![A-Za-z\\d#@]))"
Fd_words <- Fd_df %>%
filter(!str_detect(text, '^"')) %>%
mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&", "")) %>%
unnest_tokens(word, text, token = "regex", pattern = reg) %>%
filter(!word %in% stop_words$word,
str_detect(word, "[a-z]"))
Fd_words %>% group_by(word) %>% summarize(n = n()) %>% arrange(desc(n)) %>% top_n(20)
## Selecting by n
## # A tibble: 21 × 2
## word n
## <chr> <int>
## 1 #trump 937
## 2 rt 689
## 3 trump 111
## 4 https 103
## 5 protesters 96
## 6 @wdfx2eu8 72
## 7 pilling 71
## 8 red 71
## 9 @the 70
## 10 de 70
## # ... with 11 more rows
nrc <- sentiments %>% filter(lexicon == "nrc") %>% select(word, sentiment)
head(nrc)
## # A tibble: 6 × 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
## 6 abandoned fear
Fd_words_sentiments <- Fd_words %>% inner_join(nrc, by = "word")
Fd_words_sentiments %>% group_by(sentiment) %>% summarise(n = n()) %>% arrange(desc(n))
## # A tibble: 10 × 2
## sentiment n
## <chr> <int>
## 1 positive 727
## 2 trust 616
## 3 negative 497
## 4 anticipation 359
## 5 joy 329
## 6 anger 293
## 7 fear 278
## 8 sadness 234
## 9 surprise 223
## 10 disgust 191
pos_tw_ids <- Fd_words_sentiments %>% filter(sentiment == "positive") %>% distinct(id)
Fd_df %>% inner_join(pos_tw_ids, by = "id") %>% select(text) %>% slice(1:10)
## text
## 1 RT @ToDropADime2: #Trump less than 48 hrs:\n\nRespects Obama\nNot Repealing OC\nWants Bill Clinton as Advisor\nNo Wall\nNo Deportation\n#ChrisChri<U+0085>
## 2 Also relevant, my follow-up tweet from last June @marcmaron\n#Election2016 #Hillary #Trump #notmypresident #safetypin\nhttps://t.co/GbR1Vy1eye
## 3 RT @RVAwonk: Instead of telling us that not all #Trump voters are racist, how about condemning the acts of racism being committed in his na<U+0085>
## 4 A madman has been given the keys to the surveillance state | Electing #Trump is a major #FAIL for #America https://t.co/WM9DrBnqCi
## 5 RT @immigrant4trump: Trump: No Salary for Me as President #maga #trump https://t.co/edrQuTS5HT
## 6 Humanity strikes back at @realDonaldTrump. Join the #WorldToTrump open letter that<U+0092>s sweeping the planet! #Trump https://t.co/q8HKnoZ59N
## 7 RT @YaleE360: With #Trump win, the Paris agreement will go on, but without U.S. as a key leader, policy expert David Victor writes https://<U+0085>
## 8 @PrisonPlanet Obviously the #trump supporters were not #whitetrash enough for their opposition, could not get down on their low brow level.
## 9 RT @Diplomat_APAC: The 'Pivot' Gets Trumped: The #Asia-Pacific Under President #Trump https://t.co/yhi09Dfqst @nktpnd and @TheAsianist disc<U+0085>
## 10 RT @The_NewRight: When Obama won, we didn't riot. We waited 8 years & protested at the ballot box! #TrumpProtest #TrumpRiot #Trump #Trump20<U+0085>
neg_id_words <- Fd_words_sentiments %>% filter(sentiment == "disgust") %>% distinct(id, word)
Fd_df %>% inner_join(neg_id_words, by = "id") %>% select(text, word) %>% slice(1:10)
## text
## 1 @SharylAttkisson #CognitiveDissonance is the only explanation. They should look in a mirror when saying how bad #Trump is! #clueless
## 2 #HealingAmerica #POTUS why won't Obama or #trump or #HillaryClinton (the supposed leaders) say anything to help the scared and angry.
## 3 RT @ToDropADime2: #Trump gets crushed and screws his #AltRight #nazi followers all in 1 shot. #VeteransDay https://t.co/tM0wxMIkYd
## 4 RT @WDFx2EU8: Stay safe, folks, this weekend is when the #Trump protesters unleash hell, this is the GREATEST RED-PILLING IN USA HISTORY.
## 5 @MatthewKick @EspeOppenheimer \nRage! <ed><U+00A0><U+00BD><ed><U+00B8><U+00A1>\nThere's a difference between sour grapes and sheer terror, and the differenc<U+0085> https://t.co/iTfPTW2pXy
## 6 RT @WDFx2EU8: Stay safe, folks, this weekend is when the #Trump protesters unleash hell, this is the GREATEST RED-PILLING IN USA HISTORY.
## 7 RT @ToDropADime2: Here's the CRAZY LUNATIC U idiots just elected.\n#Trump tweets in 2012 when he Thought #MittRomney won\nPopular vote: SMH.<U+0085>
## 8 Hate filled orange #POTUS #Trump doesn't know beauty #NewBluehand #Bluehand #USA #DonaldTrump https://t.co/yFwCuF2ckT
## 9 RT @OutFrontCNN: "This is what happens when you elect a fascist, racist bigot" - Nationwide protests erupt following #Trump's win https://t<U+0085>
## 10 RT @shomaristone: Calls Grow for Trump to Respond to Hate Incidents, KKK Rally. #Trump #TrumpPence16 @nbcwashington \nhttps://t.co/hDlaTM0y3w
## word
## 1 bad
## 2 angry
## 3 crushed
## 4 hell
## 5 sour
## 6 hell
## 7 lunatic
## 8 hate
## 9 bigot
## 10 hate
guilt
Bl <- searchTwitter('#Obama', n = num_tweets)
Bl_df <- twListToDF(Bl)
#extract the platform
Bl_df$statusSource = substr(Bl_df$statusSource,
regexpr('>', Bl_df$statusSource) + 1,
regexpr('</a>', Bl_df$statusSource) - 1)
Bl_platform <- Bl_df %>% group_by(statusSource) %>%
summarize(n = n()) %>%
mutate(percent_of_tweets = n / sum(n)) %>%
arrange(desc(n))
Bl_platform %>% top_n(10)
## Selecting by percent_of_tweets
## # A tibble: 12 × 3
## statusSource n percent_of_tweets
## <chr> <int> <dbl>
## 1 Twitter for iPhone 488 0.488
## 2 Twitter for Android 186 0.186
## 3 Twitter Web Client 184 0.184
## 4 Twitter for iPad 49 0.049
## 5 Mobile Web (M5) 24 0.024
## 6 Instagram 9 0.009
## 7 Facebook 5 0.005
## 8 PatriotJournalist 5 0.005
## 9 Put your button on any page! 4 0.004
## 10 TweetCaster for Android 4 0.004
## 11 Twitter for Mac 4 0.004
## 12 VenezuelaFlash 4 0.004
#extract the words and join to nrc sentiFdnt words
Bl_words <- Bl_df %>%
filter(!str_detect(text, '^"')) %>%
mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&", "")) %>%
unnest_tokens(word, text, token = "regex", pattern = reg) %>%
filter(!word %in% stop_words$word,
str_detect(word, "[a-z]"))
Bl_words_sentiments <- Bl_words %>% inner_join(nrc, by = "word")
Fd_platform$hashtag <- "#Trump"
Bl_platform$hashtag <- "#Obama"
Fd_words_sentiments$hashtag <- "#Trump"
Bl_words_sentiments$hashtag <- "#Obama"
platform <- rbind(Fd_platform, Bl_platform)
words_sentiments <- rbind(Fd_words_sentiments, Bl_words_sentiments)
pf <- c("Twitter Web Client", "Twitter for iPhone", "Twitter for Android", "Hootsuite", "Instagram")
pf_df <- platform %>% filter(statusSource %in% pf)
ggplot(pf_df, aes(x = statusSource, y = percent_of_tweets, fill = hashtag)) +
geom_bar(stat = "identity", position = "dodge") +
xlab("Platform") +
ylab("Percent of tweets") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))

sent_df <- words_sentiments %>% group_by(hashtag, sentiment) %>% summarise(n = n()) %>% mutate(frequency = n/sum(n))
ggplot(sent_df, aes(x = sentiment, y = frequency, fill = hashtag)) + geom_bar(stat = "identity", position = "dodge") + xlab("Sentiment") + ylab("Percent of tweets") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
