num_tweets <- 1000
GM <- searchTwitter('Gael_Monfils', n = num_tweets)
GM_df <- twListToDF(GM)
GM_df %>% group_by(statusSource) %>% summarise(n = n()) %>% arrange(desc(n)) %>% top_n(10)
## Selecting by n
## # A tibble: 10 × 2
## statusSource
## <chr>
## 1 <a href="http://twitter.com/download/android" rel="nofollow">Twitter for An
## 2 <a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPh
## 3 <a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>
## 4 <a href="https://about.twitter.com/products/tweetdeck" rel="nofollow">Tweet
## 5 <a href="http://ifttt.com" rel="nofollow">IFTTT</a>
## 6 <a href="http://dlvr.it" rel="nofollow">dlvr.it</a>
## 7 <a href="http://twitter.com/#!/download/ipad" rel="nofollow">Twitter for iP
## 8 <a href="http://www.hootsuite.com" rel="nofollow">Hootsuite</a>
## 9 <a href="http://www.twitter.com" rel="nofollow">Twitter for Windows Phone</
## 10 <a href="http://www.facebook.com/twitter" rel="nofollow">Facebook</a>
## # ... with 1 more variables: n <int>
#extract the platform
GM_df$statusSource = substr(GM_df$statusSource,
regexpr('>', GM_df$statusSource) + 1,
regexpr('</a>', GM_df$statusSource) - 1)
GM_platform <- GM_df %>% group_by(statusSource) %>%
summarize(n = n()) %>%
mutate(percent_of_tweets = n / sum(n)) %>%
arrange(desc(n))
GM_platform %>% top_n(10)
## Selecting by percent_of_tweets
## # A tibble: 10 × 3
## statusSource n percent_of_tweets
## <chr> <int> <dbl>
## 1 Twitter for Android 324 0.324
## 2 Twitter for iPhone 251 0.251
## 3 Twitter Web Client 142 0.142
## 4 TweetDeck 52 0.052
## 5 IFTTT 44 0.044
## 6 dlvr.it 26 0.026
## 7 Twitter for iPad 25 0.025
## 8 Hootsuite 16 0.016
## 9 Twitter for Windows Phone 12 0.012
## 10 Facebook 9 0.009
GM_df %>% group_by(screenName) %>% summarise(n = n()) %>% mutate(percent_of_tweets_GM = n/ sum(n)) %>% arrange(desc(n)) %>% top_n(10)
## Selecting by percent_of_tweets_GM
## # A tibble: 12 × 3
## screenName n percent_of_tweets_GM
## <chr> <int> <dbl>
## 1 eazeee2004 16 0.016
## 2 SkySportsTennis 12 0.012
## 3 ESPNtenis 8 0.008
## 4 racket_swing 8 0.008
## 5 AndrewT90024305 6 0.006
## 6 Milos_Raonic_ 6 0.006
## 7 rosigcua 6 0.006
## 8 SadiQBiCHi 6 0.006
## 9 Chrisovglyn14 5 0.005
## 10 estellewenes 5 0.005
## 11 Guti99 5 0.005
## 12 SkySports 5 0.005
reg <- "([^A-Za-z\\d#@']|'(?![A-Za-z\\d#@]))"
GM_words <- GM_df %>%
filter(!str_detect(text, '^"')) %>%
mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&", "")) %>%
unnest_tokens(word, text, token = "regex", pattern = reg) %>%
filter(!word %in% stop_words$word,
str_detect(word, "[a-z]"))
GM_words %>% group_by(word) %>% summarize(n = n()) %>% arrange(desc(n)) %>% top_n(20)
## Selecting by n
## # A tibble: 21 × 2
## word n
## <chr> <int>
## 1 monfils 880
## 2 rt 530
## 3 milos 431
## 4 raonic 426
## 5 gael 404
## 6 @gael 372
## 7 #atpfinals 347
## 8 en 236
## 9 @milosraonic 193
## 10 ga 150
## # ... with 11 more rows
I looked on the ATP World Tour Twitter and there are very few updates about the ongoing game. Therefore to make sure these tweets are current, I do a date analysis.
GM_words$created = as.Date(GM_words$created)
head(GM_words$created)
## [1] "2016-11-13" "2016-11-13" "2016-11-13" "2016-11-13" "2016-11-13"
## [6] "2016-11-13"
GM_words %>% filter(created == "2016-11-13") %>% summarise(n = n())
## # A tibble: 1 × 1
## n
## <int>
## 1 12226
nrc <- sentiments %>% filter(lexicon == "nrc") %>% select(word, sentiment)
head(nrc)
## # A tibble: 6 × 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
## 6 abandoned fear
GM_words_sentiments <- GM_words %>% inner_join(nrc, by = "word")
GM_words_sentiments %>% group_by(sentiment, word) %>% summarise(n = n()) %>% arrange(desc(n))
## Source: local data frame [292 x 3]
## Groups: sentiment [10]
##
## sentiment word n
## <chr> <chr> <int>
## 1 anticipation watch 67
## 2 fear watch 67
## 3 anticipation start 42
## 4 anger smash 41
## 5 fear smash 41
## 6 negative smash 41
## 7 anticipation grin 33
## 8 joy grin 33
## 9 positive grin 33
## 10 surprise grin 33
## # ... with 282 more rows
pos_tw_ids <- GM_words_sentiments %>% filter(sentiment == "positive") %>% distinct(id)
GM_df %>% inner_join(pos_tw_ids, by = "id") %>% select(text) %>% slice(1:10)
## text
## 1 RT @TennisTV: Video: do the @Gael_Monfils! The stop-start no-look with added cheeky grin! https://t.co/kRsZ9udpkC #ATPFinals https://t.co/m<U+0085>
## 2 RT @SkySportsEFL: BREAKING: Milos Raonic beats Gael Monf, 6-3, 6-4. Reaction on Sky Sports 3 HD now: https://t.co/gbt2L5mOJ5 https://t.co/o<U+0085>
## 3 Milos Raonic makes winning start to ATP World Tour Finals with straight-sets win over Gael Monfils at O2 Arena<U+0085> https://t.co/dDg1rqA1rN
## 4 Milos Raonic makes winning start to ATP World Tour Finals with straight-sets win over Gael Monfils at O2 Arena<U+0085> https://t.co/H7oxQbNn0R
## 5 Tennis - ATP - Master https://t.co/IU6nVxdNgM
## 6 RT @TennisTV: Video: do the @Gael_Monfils! The stop-start no-look with added cheeky grin! https://t.co/kRsZ9udpkC #ATPFinals https://t.co/m<U+0085>
## 7 RT @SkySportsTennis: LIVE: @3gerardpique is enjoying Milos Raonic v Gael Monfils. Join us on Sky Sports 3. More here: https://t.co/KEoKgnki<U+0085>
## 8 RT @TennisTV: Video: do the @Gael_Monfils! The stop-start no-look with added cheeky grin! https://t.co/kRsZ9udpkC #ATPFinals https://t.co/m<U+0085>
## 9 RT @infosportplus: [Masters - Londres] <ed><U+00A0><U+00BC><ed><U+00BE><U+00BE>\nPour le premier match de sa carrière en Masters, @Gael_Monfils s'incline face à @milosraonic, 6/3<U+0085>
## 10 Milos Raonic gets his World Tour Finals campaign off to a winning start with a 6-3 6-4 victory over Gael Monfils. https://t.co/n5p0ZMKTc2
neg_id_words <- GM_words_sentiments %>% filter(sentiment == "negative") %>% distinct(id, word)
GM_df %>% inner_join(neg_id_words, by = "id") %>% select(text, word) %>% slice(1:10)
## text
## 1 RT @Sportags: #ATPWorldTourFinals\n\n@Gael_Monfils al mejor estilo #NBA...\n\nCrack. https://t.co/Ecq7LoXgCV
## 2 RT @Sportags: #ATPWorldTourFinals\n\n@Gael_Monfils al mejor estilo #NBA...\n\nCrack. https://t.co/Ecq7LoXgCV
## 3 Milos Raonic shrugs off injury concerns to defeat Gael Monfils https://t.co/vpAbPKnij0 #wots https://t.co/ribMP6lBos
## 4 Milos Raonic shrugs off injury concerns to defeat Gael Monfils https://t.co/vpAbPKnij0 #wots https://t.co/ribMP6lBos
## 5 RT @ATPWorldTour: .@Gael_Monfils goes airborne <ed><U+00A0><U+00BD><ed><U+00B1><U+0080> for a smash against #Raonic at the #ATPFinals. Watch Hot Shot: https://t.co/fcpNrXnJWo htt<U+0085>
## 6 RT @ATPWorldTour: .@Gael_Monfils goes airborne <ed><U+00A0><U+00BD><ed><U+00B1><U+0080> for a smash against #Raonic at the #ATPFinals. Watch Hot Shot: https://t.co/fcpNrXnJWo htt<U+0085>
## 7 RT @Sportags: #ATPWorldTourFinals\n\n@Gael_Monfils al mejor estilo #NBA...\n\nCrack. https://t.co/Ecq7LoXgCV
## 8 RT @Sportags: #ATPWorldTourFinals\n\n@Gael_Monfils al mejor estilo #NBA...\n\nCrack. https://t.co/Ecq7LoXgCV
## 9 RT @Sportags: #ATPWorldTourFinals\n\n@Gael_Monfils al mejor estilo #NBA...\n\nCrack. https://t.co/Ecq7LoXgCV
## 10 Milos Raonic shrugs off injury concerns to defeat Gael Monfils in the ATP World Tour Finals -<U+0085> https://t.co/d41mfLZtKD
## word
## 1 crack
## 2 crack
## 3 injury
## 4 defeat
## 5 smash
## 6 shot
## 7 crack
## 8 crack
## 9 crack
## 10 injury
MR <- searchTwitter('milosraonic', n = num_tweets)
MR_df <- twListToDF(MR)
#extract the platform
MR_df$statusSource = substr(MR_df$statusSource,
regexpr('>', MR_df$statusSource) + 1,
regexpr('</a>', MR_df$statusSource) - 1)
MR_platform <- MR_df %>% group_by(statusSource) %>%
summarize(n = n()) %>%
mutate(percent_of_tweets = n / sum(n)) %>%
arrange(desc(n))
MR_platform %>% top_n(10)
## Selecting by percent_of_tweets
## # A tibble: 10 × 3
## statusSource n percent_of_tweets
## <chr> <int> <dbl>
## 1 Twitter for iPhone 280 0.280
## 2 Twitter for Android 277 0.277
## 3 Twitter Web Client 179 0.179
## 4 TweetDeck 49 0.049
## 5 Twitter for iPad 45 0.045
## 6 Google 41 0.041
## 7 dlvr.it 29 0.029
## 8 IFTTT 13 0.013
## 9 Twitter for Windows 10 0.010
## 10 twicca 9 0.009
#extract the words and join to nrc sentiGMnt words
MR_words <- MR_df %>%
filter(!str_detect(text, '^"')) %>%
mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&", "")) %>%
unnest_tokens(word, text, token = "regex", pattern = reg) %>%
filter(!word %in% stop_words$word,
str_detect(word, "[a-z]"))
MR_words_sentiments <- MR_words %>% inner_join(nrc, by = "word")
MR_words %>% select(word) %>% distinct(word) %>% arrange(desc(word)) %>% top_n(20)
## Selecting by word
## # A tibble: 20 × 1
## word
## <chr>
## 1 zmaj
## 2 zarco
## 3 yrs
## 4 yo
## 5 yeh
## 6 ya
## 7 xico
## 8 world
## 9 won
## 10 witness
## 11 withdraws
## 12 wins
## 13 winning
## 14 win
## 15 wimbledon
## 16 whew
## 17 whatsoever
## 18 week
## 19 wearing
## 20 wawrinka
MR_words$created = as.Date(MR_words$created)
head(MR_words$created)
## [1] "2016-11-11" "2016-11-11" "2016-11-11" "2016-11-11" "2016-11-11"
## [6] "2016-11-11"
MR_words %>% filter(created == "2016-11-13") %>% summarise(n = n())
## # A tibble: 1 × 1
## n
## <int>
## 1 7870
GM_platform$player <- "#Monfils"
MR_platform$player <- "#Raonic"
GM_words_sentiments$player <- "#Monfils"
MR_words_sentiments$player <- "#Raonic"
platform1 <- rbind(GM_platform, MR_platform)
words_sentiments1 <- rbind(GM_words_sentiments, MR_words_sentiments)
pf <- c("Twitter Web Client", "Twitter for iPhone", "Twitter for Android", "Hootsuite", "Instagram")
pf_df <- platform1 %>% filter(statusSource %in% pf)
ggplot(pf_df, aes(x = statusSource, y = percent_of_tweets, fill = player)) +
geom_bar(stat = "identity", position = "dodge") +
xlab("Platform") +
ylab("Percent of tweets") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
sent_df <- words_sentiments1 %>% group_by(player, sentiment) %>% summarise(n = n()) %>% mutate(frequency = n/sum(n))
ggplot(sent_df, aes(x = sentiment, y = frequency, fill = player)) + geom_bar(stat = "identity", position = "dodge") + xlab("Sentiment") + ylab("Percent of tweets") + theme(axis.text.x = element_text(angle = 90, hjust = 1))