ATP World Finals, Monfils vs. Raonic

num_tweets <- 1000
GM <- searchTwitter('Gael_Monfils', n = num_tweets)
GM_df <- twListToDF(GM)

GM_df %>% group_by(statusSource) %>% summarise(n = n()) %>% arrange(desc(n)) %>% top_n(10)

## Selecting by n

## # A tibble: 10 × 2
##                                                                   statusSource
##                                                                          <chr>
## 1  <a href="http://twitter.com/download/android" rel="nofollow">Twitter for An
## 2  <a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPh
## 3           <a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>
## 4  <a href="https://about.twitter.com/products/tweetdeck" rel="nofollow">Tweet
## 5                          <a href="http://ifttt.com" rel="nofollow">IFTTT</a>
## 6                          <a href="http://dlvr.it" rel="nofollow">dlvr.it</a>
## 7  <a href="http://twitter.com/#!/download/ipad" rel="nofollow">Twitter for iP
## 8              <a href="http://www.hootsuite.com" rel="nofollow">Hootsuite</a>
## 9  <a href="http://www.twitter.com" rel="nofollow">Twitter for Windows Phone</
## 10       <a href="http://www.facebook.com/twitter" rel="nofollow">Facebook</a>
## # ... with 1 more variables: n <int>

#extract the platform
GM_df$statusSource = substr(GM_df$statusSource, 
                        regexpr('>', GM_df$statusSource) + 1, 
                        regexpr('</a>', GM_df$statusSource) - 1)
GM_platform <- GM_df %>% group_by(statusSource) %>% 
                summarize(n = n()) %>% 
                mutate(percent_of_tweets = n / sum(n)) %>% 
                arrange(desc(n))

GM_platform %>% top_n(10)

## Selecting by percent_of_tweets

## # A tibble: 10 × 3
##                 statusSource     n percent_of_tweets
##                        <chr> <int>             <dbl>
## 1        Twitter for Android   324             0.324
## 2         Twitter for iPhone   251             0.251
## 3         Twitter Web Client   142             0.142
## 4                  TweetDeck    52             0.052
## 5                      IFTTT    44             0.044
## 6                    dlvr.it    26             0.026
## 7           Twitter for iPad    25             0.025
## 8                  Hootsuite    16             0.016
## 9  Twitter for Windows Phone    12             0.012
## 10                  Facebook     9             0.009

GM_df %>% group_by(screenName) %>% summarise(n = n()) %>% mutate(percent_of_tweets_GM = n/ sum(n)) %>% arrange(desc(n)) %>% top_n(10)

## Selecting by percent_of_tweets_GM

## # A tibble: 12 × 3
##         screenName     n percent_of_tweets_GM
##              <chr> <int>                <dbl>
## 1       eazeee2004    16                0.016
## 2  SkySportsTennis    12                0.012
## 3        ESPNtenis     8                0.008
## 4     racket_swing     8                0.008
## 5  AndrewT90024305     6                0.006
## 6    Milos_Raonic_     6                0.006
## 7         rosigcua     6                0.006
## 8       SadiQBiCHi     6                0.006
## 9    Chrisovglyn14     5                0.005
## 10    estellewenes     5                0.005
## 11          Guti99     5                0.005
## 12       SkySports     5                0.005

reg <- "([^A-Za-z\\d#@']|'(?![A-Za-z\\d#@]))"
GM_words <- GM_df %>%
  filter(!str_detect(text, '^"')) %>%
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(word, text, token = "regex", pattern = reg) %>%
  filter(!word %in% stop_words$word,
         str_detect(word, "[a-z]"))

GM_words %>% group_by(word) %>% summarize(n = n()) %>% arrange(desc(n)) %>% top_n(20)

## Selecting by n

## # A tibble: 21 × 2
##            word     n
##           <chr> <int>
## 1       monfils   880
## 2            rt   530
## 3         milos   431
## 4        raonic   426
## 5          gael   404
## 6         @gael   372
## 7    #atpfinals   347
## 8            en   236
## 9  @milosraonic   193
## 10           ga   150
## # ... with 11 more rows

I looked on the ATP World Tour Twitter and there are very few updates about the ongoing game. Therefore to make sure these tweets are current, I do a date analysis.

GM_words$created = as.Date(GM_words$created) 
head(GM_words$created)

## [1] "2016-11-13" "2016-11-13" "2016-11-13" "2016-11-13" "2016-11-13"
## [6] "2016-11-13"

GM_words %>% filter(created == "2016-11-13") %>% summarise(n = n())

## # A tibble: 1 × 1
##       n
##   <int>
## 1 12226

nrc <- sentiments %>% filter(lexicon == "nrc") %>% select(word, sentiment) 
head(nrc)

## # A tibble: 6 × 2
##        word sentiment
##       <chr>     <chr>
## 1    abacus     trust
## 2   abandon      fear
## 3   abandon  negative
## 4   abandon   sadness
## 5 abandoned     anger
## 6 abandoned      fear

GM_words_sentiments <- GM_words %>% inner_join(nrc, by = "word")
GM_words_sentiments %>% group_by(sentiment, word) %>% summarise(n = n()) %>% arrange(desc(n))

## Source: local data frame [292 x 3]
## Groups: sentiment [10]
## 
##       sentiment  word     n
##           <chr> <chr> <int>
## 1  anticipation watch    67
## 2          fear watch    67
## 3  anticipation start    42
## 4         anger smash    41
## 5          fear smash    41
## 6      negative smash    41
## 7  anticipation  grin    33
## 8           joy  grin    33
## 9      positive  grin    33
## 10     surprise  grin    33
## # ... with 282 more rows

pos_tw_ids <- GM_words_sentiments %>% filter(sentiment == "positive") %>% distinct(id)
GM_df %>% inner_join(pos_tw_ids, by = "id") %>% select(text) %>% slice(1:10)

##                                                                                                                                                                                    text
## 1                                          RT @TennisTV: Video: do the @Gael_Monfils! The stop-start no-look with added cheeky grin! https://t.co/kRsZ9udpkC #ATPFinals https://t.co/m<U+0085>
## 2                                          RT @SkySportsEFL: BREAKING: Milos Raonic beats Gael Monf, 6-3, 6-4. Reaction on Sky Sports 3 HD now: https://t.co/gbt2L5mOJ5 https://t.co/o<U+0085>
## 3                                               Milos Raonic makes winning start to ATP World Tour Finals with straight-sets win over Gael Monfils at O2 Arena<U+0085> https://t.co/dDg1rqA1rN
## 4                                               Milos Raonic makes winning start to ATP World Tour Finals with straight-sets win over Gael Monfils at O2 Arena<U+0085> https://t.co/H7oxQbNn0R
## 5                                                                                                                                         Tennis - ATP - Master https://t.co/IU6nVxdNgM
## 6                                          RT @TennisTV: Video: do the @Gael_Monfils! The stop-start no-look with added cheeky grin! https://t.co/kRsZ9udpkC #ATPFinals https://t.co/m<U+0085>
## 7                                          RT @SkySportsTennis: LIVE: @3gerardpique is enjoying Milos Raonic v Gael Monfils. Join us on Sky Sports 3. More here: https://t.co/KEoKgnki<U+0085>
## 8                                          RT @TennisTV: Video: do the @Gael_Monfils! The stop-start no-look with added cheeky grin! https://t.co/kRsZ9udpkC #ATPFinals https://t.co/m<U+0085>
## 9  RT @infosportplus: [Masters - Londres]  <ed><U+00A0><U+00BC><ed><U+00BE><U+00BE>\nPour le premier match de sa carrière en Masters, @Gael_Monfils s'incline face à @milosraonic, 6/3<U+0085>
## 10                                            Milos Raonic gets his World Tour Finals campaign off to a winning start with a 6-3 6-4 victory over Gael Monfils. https://t.co/n5p0ZMKTc2

neg_id_words <- GM_words_sentiments %>% filter(sentiment == "negative") %>% distinct(id, word)
GM_df %>% inner_join(neg_id_words, by = "id") %>% select(text, word) %>% slice(1:10)

##                                                                                                                                                                                   text
## 1                                                                         RT @Sportags: #ATPWorldTourFinals\n\n@Gael_Monfils al mejor estilo #NBA...\n\nCrack. https://t.co/Ecq7LoXgCV
## 2                                                                         RT @Sportags: #ATPWorldTourFinals\n\n@Gael_Monfils al mejor estilo #NBA...\n\nCrack. https://t.co/Ecq7LoXgCV
## 3                                                                 Milos Raonic shrugs off injury concerns to defeat Gael Monfils https://t.co/vpAbPKnij0 #wots https://t.co/ribMP6lBos
## 4                                                                 Milos Raonic shrugs off injury concerns to defeat Gael Monfils https://t.co/vpAbPKnij0 #wots https://t.co/ribMP6lBos
## 5  RT @ATPWorldTour: .@Gael_Monfils goes airborne <ed><U+00A0><U+00BD><ed><U+00B1><U+0080> for a smash against #Raonic at the #ATPFinals. Watch Hot Shot: https://t.co/fcpNrXnJWo htt<U+0085>
## 6  RT @ATPWorldTour: .@Gael_Monfils goes airborne <ed><U+00A0><U+00BD><ed><U+00B1><U+0080> for a smash against #Raonic at the #ATPFinals. Watch Hot Shot: https://t.co/fcpNrXnJWo htt<U+0085>
## 7                                                                         RT @Sportags: #ATPWorldTourFinals\n\n@Gael_Monfils al mejor estilo #NBA...\n\nCrack. https://t.co/Ecq7LoXgCV
## 8                                                                         RT @Sportags: #ATPWorldTourFinals\n\n@Gael_Monfils al mejor estilo #NBA...\n\nCrack. https://t.co/Ecq7LoXgCV
## 9                                                                         RT @Sportags: #ATPWorldTourFinals\n\n@Gael_Monfils al mejor estilo #NBA...\n\nCrack. https://t.co/Ecq7LoXgCV
## 10                                                              Milos Raonic shrugs off injury concerns to defeat Gael Monfils in the ATP World Tour Finals -<U+0085> https://t.co/d41mfLZtKD
##      word
## 1   crack
## 2   crack
## 3  injury
## 4  defeat
## 5   smash
## 6    shot
## 7   crack
## 8   crack
## 9   crack
## 10 injury

MR <- searchTwitter('milosraonic', n = num_tweets)
MR_df <- twListToDF(MR)

#extract the platform
MR_df$statusSource = substr(MR_df$statusSource, 
                        regexpr('>', MR_df$statusSource) + 1, 
                        regexpr('</a>', MR_df$statusSource) - 1)
MR_platform <- MR_df %>% group_by(statusSource) %>% 
                summarize(n = n()) %>% 
                mutate(percent_of_tweets = n / sum(n)) %>% 
                arrange(desc(n))

MR_platform %>% top_n(10)

## Selecting by percent_of_tweets

## # A tibble: 10 × 3
##           statusSource     n percent_of_tweets
##                  <chr> <int>             <dbl>
## 1   Twitter for iPhone   280             0.280
## 2  Twitter for Android   277             0.277
## 3   Twitter Web Client   179             0.179
## 4            TweetDeck    49             0.049
## 5     Twitter for iPad    45             0.045
## 6               Google    41             0.041
## 7              dlvr.it    29             0.029
## 8                IFTTT    13             0.013
## 9  Twitter for Windows    10             0.010
## 10              twicca     9             0.009

#extract the words and join to nrc sentiGMnt words
MR_words <- MR_df %>%
  filter(!str_detect(text, '^"')) %>%
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(word, text, token = "regex", pattern = reg) %>%
  filter(!word %in% stop_words$word,
         str_detect(word, "[a-z]"))
MR_words_sentiments <- MR_words %>% inner_join(nrc, by = "word")

MR_words %>% select(word) %>% distinct(word) %>% arrange(desc(word)) %>% top_n(20)

## Selecting by word

## # A tibble: 20 × 1
##          word
##         <chr>
## 1        zmaj
## 2       zarco
## 3         yrs
## 4          yo
## 5         yeh
## 6          ya
## 7        xico
## 8       world
## 9         won
## 10    witness
## 11  withdraws
## 12       wins
## 13    winning
## 14        win
## 15  wimbledon
## 16       whew
## 17 whatsoever
## 18       week
## 19    wearing
## 20   wawrinka

MR_words$created = as.Date(MR_words$created) 
head(MR_words$created)

## [1] "2016-11-11" "2016-11-11" "2016-11-11" "2016-11-11" "2016-11-11"
## [6] "2016-11-11"

MR_words %>% filter(created == "2016-11-13") %>% summarise(n = n())

## # A tibble: 1 × 1
##       n
##   <int>
## 1  7870

GM_platform$player <- "#Monfils"
MR_platform$player <- "#Raonic"
GM_words_sentiments$player <- "#Monfils"
MR_words_sentiments$player <- "#Raonic"
platform1 <- rbind(GM_platform, MR_platform)
words_sentiments1 <- rbind(GM_words_sentiments, MR_words_sentiments)

pf <- c("Twitter Web Client", "Twitter for iPhone", "Twitter for Android", "Hootsuite", "Instagram")
pf_df <- platform1 %>% filter(statusSource %in% pf)

ggplot(pf_df, aes(x = statusSource, y = percent_of_tweets, fill = player)) + 
  geom_bar(stat = "identity", position = "dodge") +
  xlab("Platform") +
  ylab("Percent of tweets") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

sent_df <- words_sentiments1 %>% group_by(player, sentiment) %>% summarise(n = n()) %>% mutate(frequency = n/sum(n))

ggplot(sent_df, aes(x = sentiment, y = frequency, fill = player)) + geom_bar(stat = "identity", position = "dodge") + xlab("Sentiment") + ylab("Percent of tweets") + theme(axis.text.x = element_text(angle = 90, hjust = 1))

ATP World Finals, Monfils vs. Raonic

Christine

November 13, 2016