load tweets and convert to data frame

trump_tweets <- userTimeline ("realDonaldTrump", n = 3200)
trump_tweets_df <- tbl_df(map_df(trump_tweets, as.data.frame))

extract tweet source from the data frame

tweets <- trump_tweets_df %>% select(id, statusSource, text, created) %>% extract(statusSource, "source", "Twitter for (.*?)<") %>% filter(source %in% c("iPhone", "Android"))
kable(head(tweets, 10))
id source text created
797455295928791040 Android This will prove to be a great time in the lives of ALL Americans. We will unite and we will win, win, win! 2016-11-12 15:05:33
797098212599496704 iPhone Today we express our deepest gratitude to all those who have served in our armed forces. #ThankAVet https://t.co/wPk7QWpK8Z 2016-11-11 15:26:37
797069763801387008 Android Busy day planned in New York. Will soon be making some very important decisions on the people who will be running our government! 2016-11-11 13:33:35
797034721075228672 Android Love the fact that the small groups of protesters last night have passion for our great country. We will all come together and be proud! 2016-11-11 11:14:20
796900183955095552 Android Just had a very open and successful presidential election. Now professional protesters, incited by the media, are protesting. Very unfair! 2016-11-11 02:19:44
796897928048766976 Android A fantastic day in D.C. Met with President Obama for first time. Really good meeting, great chemistry. Melania liked Mrs. O a lot! 2016-11-11 02:10:46
796797436752707585 iPhone Happy 241st birthday to the U.S. Marine Corps! Thank you for your service!! https://t.co/Lz2dhrXzo4 2016-11-10 19:31:27
796315640307060738 Android Such a beautiful and important evening! The forgotten man and woman will never be forgotten again. We will all come together as never before 2016-11-09 11:36:58
796182637622816768 iPhone Watching the returns at 9:45pm.
#ElectionNight #MAGA< ed><U+00A0 > https://t.co/HfuJeRZbod 2016-11-09 02:48:27
796126077647196160 iPhone Still time to #VoteTrump!
#iVoted #ElectionNigh t https:// t.co/UZtYAY1Ba6 2016-11-08 23:03:42

chart by time of day

tweets %>% count(source, hour = hour(with_tz(created, "EST"))) %>% mutate(percent = n/sum(n)) %>% ggplot(aes(hour, percent, color = source))+geom_line() + scale_y_continuous(labels = percent_format()) + labs(x = "Hour of the day (EST)", y = "% of tweets", color = "")

chart by picture/link

tweet_picture_counts <- tweets %>% filter(!str_detect(text, '^"')) %>% count(source, picture = ifelse(str_detect(text, "t.co"), "Picture/link", "No picture/link"))

ggplot(tweet_picture_counts, aes(source, n, fill = picture)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "", y = "Number of tweets", fill = "")

Isolating Words

reg <- "([^A-Za-z\\d#@']|'('?![A-Za-z\\d#@]))"
tweet_words <- tweets %>% filter(!str_detect(text, '^"')) %>% mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>% unnest_tokens(word, text, token = "regex", pattern = reg) %>% filter (!word %in% stop_words$word, str_detect(word, "[a-z]"))
kable(head(tweet_words))
id source created word
764045345332396032 Android 2016-08-12 10:26:20 ratings
764045345332396032 Android 2016-08-12 10:26:20 challenged
764045345332396032 Android 2016-08-12 10:26:20 @cnn
764045345332396032 Android 2016-08-12 10:26:20 reports
764045345332396032 Android 2016-08-12 10:26:20 call
764045345332396032 Android 2016-08-12 10:26:20 president

Most common words and liklihood of Android and iPhone

android_iphone_ratios <- tweet_words %>%
  count(word, source) %>%
  filter(sum(n) >= 5) %>%
  spread(source, n, fill = 0) %>%
  ungroup() %>%
  mutate_each(funs((. + 1) / sum(. + 1)), -word) %>%
  mutate(logratio = log2(Android / iPhone)) %>%
  arrange(desc(logratio))
kable(head(android_iphone_ratios, digits = 2)) 
word Android iPhone logratio
exciting 0.0183486 0.0009116 4.331163
wow 0.0214067 0.0018232 3.553556
paul 0.0183486 0.0018232 3.331164
ryan 0.0183486 0.0018232 3.331164
@cnn 0.0214067 0.0027347 2.968593
media 0.0214067 0.0036463 2.553556

Visualization of most common words

android_iphone_ratios %>% 
  group_by(logratio > 0) %>% 
  top_n(15, abs(logratio)) %>% 
  ungroup() %>% 
  mutate(word = reorder(word, logratio)) %>%
  ggplot(aes(word, logratio, fill = logratio < 0))+ 
  geom_bar(stat = "identity") +
  coord_flip() + 
  ylab("Android/ iPhone log ratio") + 
  scale_fill_manual(name = "", labels = c("Android", "iPhone"), 
                    values = c("red", "lightblue"))
## Warning: Stacking not well defined when ymin != 0

Sentiments

nrc <- sentiments %>% filter( lexicon == "nrc") %>% select(word, sentiment)
kable(head(nrc))
word sentiment
abacus trust
abandon fear
abandon negative
abandon sadness
abandoned anger
abandoned fear

Word Count

sources <- tweet_words %>% 
  group_by(source) %>% 
  mutate(total_words = n()) %>% 
  ungroup() %>% 
  distinct(id, source, total_words)

Sentiments

iPhone

by_source_sentiment_iPh <-  tweet_words %>% 
  inner_join(nrc, by = "word") %>%
  count(sentiment, id) %>% 
  ungroup() %>% 
  complete(sentiment, id, fill = list(n = 0)) %>% 
  inner_join(sources) %>% 
  group_by(source, sentiment, total_words) %>%
  filter(source == "iPhone") %>% 
  summarise(words = sum(n)) %>% 
    ungroup()
## Joining, by = "id"
kable(head(by_source_sentiment_iPh) %>% mutate(percent_sentiment = total_words/ words), digits = 2)
source sentiment total_words words percent_sentiment
iPhone anger 2212 90 24.58
iPhone anticipation 2212 151 14.65
iPhone disgust 2212 42 52.67
iPhone fear 2212 113 19.58
iPhone joy 2212 99 22.34
iPhone negative 2212 150 14.75

Android

by_source_sentiment_An <-  tweet_words %>% 
  inner_join(nrc, by = "word") %>%
  count(sentiment, id) %>% 
  ungroup() %>% 
  complete(sentiment, id, fill = list(n = 0)) %>% 
  inner_join(sources) %>% 
  group_by(source, sentiment, total_words) %>%
  filter(source == "Android") %>% 
  summarise(words = sum(n)) %>% 
    ungroup()
## Joining, by = "id"
kable(head(by_source_sentiment_An) %>% mutate(percent_sentiment = total_words/words), digits = 2)
source sentiment total_words words percent_sentiment
Android anger 536 31 17.29
Android anticipation 536 41 13.07
Android disgust 536 22 24.36
Android fear 536 22 24.36
Android joy 536 28 19.14
Android negative 536 47 11.40

Join words to sentiments

by_source_sentiment <- tweet_words %>% 
  inner_join(nrc, by = "word") %>% 
  count(sentiment, id) %>% 
  ungroup() %>% complete(sentiment, id, fill = list(n = 0)) %>% inner_join(sources) %>% group_by(source, sentiment, total_words) %>% summarise(words = sum(n)) %>% ungroup()
## Joining, by = "id"
kable(head(by_source_sentiment))
source sentiment total_words words
Android anger 536 31
Android anticipation 536 41
Android disgust 536 22
Android fear 536 22
Android joy 536 28
Android negative 536 47

Liklihood of a difference in word uses between phones

sentiment_differences <- by_source_sentiment %>% 
  group_by(sentiment) %>% 
  do(tidy(poisson.test(.$words, .$total_words)))
kable(sentiment_differences, digits = 2)
sentiment estimate statistic p.value parameter conf.low conf.high method alternative
anger 1.42 31 0.11 23.60 0.91 2.16 Comparison of Poisson rates two.sided
anticipation 1.12 41 0.52 37.45 0.77 1.59 Comparison of Poisson rates two.sided
disgust 2.16 22 0.01 12.48 1.23 3.70 Comparison of Poisson rates two.sided
fear 0.80 22 0.39 26.33 0.48 1.28 Comparison of Poisson rates two.sided
joy 1.17 28 0.50 24.77 0.74 1.79 Comparison of Poisson rates two.sided
negative 1.29 47 0.13 38.43 0.91 1.81 Comparison of Poisson rates two.sided
positive 1.16 65 0.30 57.74 0.87 1.54 Comparison of Poisson rates two.sided
sadness 1.25 24 0.32 20.09 0.76 2.00 Comparison of Poisson rates two.sided
surprise 1.27 21 0.35 17.36 0.74 2.11 Comparison of Poisson rates two.sided
trust 1.15 45 0.38 40.18 0.81 1.61 Comparison of Poisson rates two.sided

Visualization of word use liklihood

sentiment_differences %>%
  ungroup() %>% 
  mutate(sentiment = reorder(sentiment, estimate)) %>%             mutate_each(funs(. - 1), estimate, conf.low, conf.high) %>%
  ggplot(aes(estimate, sentiment)) +
  geom_point() +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) + 
  scale_x_continuous(labels = percent_format()) +
  labs(x = "% increase in Android relative to iPhone", y = "Sentiment")

Most common words with their associated symptoms

android_iphone_ratios %>% 
  inner_join(nrc, by = "word") %>% 
  filter(!sentiment %in% c("positive", "negative")) %>% 
  mutate(sentiment = reorder(sentiment, -logratio), 
         word = reorder(word, -logratio)) %>% 
  group_by(sentiment) %>% 
  top_n(10, abs(logratio)) %>% 
  ungroup() %>% 
  ggplot(aes(word, logratio, fill = logratio < 0)) +
  facet_wrap(~sentiment, scales = "free", nrow = 2) +
  geom_bar(stat = "identity") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "", y = "Android / iPhone log ratio") + 
  scale_fill_manual(name = "", labels = c("Android", "iPhone"), 
                    values = c("red", "lightblue"))
## Warning: Stacking not well defined when ymin != 0