load tweets and convert to data frame

trump_tweets <- userTimeline ("realDonaldTrump", n = 3200)
trump_tweets_df <- tbl_df(map_df(trump_tweets, as.data.frame))

extract tweet source from the data frame

tweets <- trump_tweets_df %>% select(id, statusSource, text, created) %>% extract(statusSource, "source", "Twitter for (.*?)<") %>% filter(source %in% c("iPhone", "Android"))
kable(head(tweets, 10))

id	source	text	created
797455295928791040	Android	This will prove to be a great time in the lives of ALL Americans. We will unite and we will win, win, win!	2016-11-12 15:05:33
797098212599496704	iPhone	Today we express our deepest gratitude to all those who have served in our armed forces. #ThankAVet https://t.co/wPk7QWpK8Z	2016-11-11 15:26:37
797069763801387008	Android	Busy day planned in New York. Will soon be making some very important decisions on the people who will be running our government!	2016-11-11 13:33:35
797034721075228672	Android	Love the fact that the small groups of protesters last night have passion for our great country. We will all come together and be proud!	2016-11-11 11:14:20
796900183955095552	Android	Just had a very open and successful presidential election. Now professional protesters, incited by the media, are protesting. Very unfair!	2016-11-11 02:19:44
796897928048766976	Android	A fantastic day in D.C. Met with President Obama for first time. Really good meeting, great chemistry. Melania liked Mrs. O a lot!	2016-11-11 02:10:46
796797436752707585	iPhone	Happy 241st birthday to the U.S. Marine Corps! Thank you for your service!! https://t.co/Lz2dhrXzo4	2016-11-10 19:31:27
796315640307060738	Android	Such a beautiful and important evening! The forgotten man and woman will never be forgotten again. We will all come together as never before	2016-11-09 11:36:58
796182637622816768	iPhone	Watching the returns at 9:45pm.
#ElectionNight #MAGA<	ed><U+00A0	> https://t.co/HfuJeRZbod 2016-11-09 02:48:27
796126077647196160	iPhone	Still time to #VoteTrump!
#iVoted #ElectionNigh	t https://	t.co/UZtYAY1Ba6 2016-11-08 23:03:42

chart by time of day

tweets %>% count(source, hour = hour(with_tz(created, "EST"))) %>% mutate(percent = n/sum(n)) %>% ggplot(aes(hour, percent, color = source))+geom_line() + scale_y_continuous(labels = percent_format()) + labs(x = "Hour of the day (EST)", y = "% of tweets", color = "")

chart by picture/link

tweet_picture_counts <- tweets %>% filter(!str_detect(text, '^"')) %>% count(source, picture = ifelse(str_detect(text, "t.co"), "Picture/link", "No picture/link"))

ggplot(tweet_picture_counts, aes(source, n, fill = picture)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "", y = "Number of tweets", fill = "")

Isolating Words

reg <- "([^A-Za-z\\d#@']|'('?![A-Za-z\\d#@]))"
tweet_words <- tweets %>% filter(!str_detect(text, '^"')) %>% mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>% unnest_tokens(word, text, token = "regex", pattern = reg) %>% filter (!word %in% stop_words$word, str_detect(word, "[a-z]"))
kable(head(tweet_words))

id	source	created	word
764045345332396032	Android	2016-08-12 10:26:20	ratings
764045345332396032	Android	2016-08-12 10:26:20	challenged
764045345332396032	Android	2016-08-12 10:26:20	@cnn
764045345332396032	Android	2016-08-12 10:26:20	reports
764045345332396032	Android	2016-08-12 10:26:20	call
764045345332396032	Android	2016-08-12 10:26:20	president

Most common words and liklihood of Android and iPhone

android_iphone_ratios <- tweet_words %>%
  count(word, source) %>%
  filter(sum(n) >= 5) %>%
  spread(source, n, fill = 0) %>%
  ungroup() %>%
  mutate_each(funs((. + 1) / sum(. + 1)), -word) %>%
  mutate(logratio = log2(Android / iPhone)) %>%
  arrange(desc(logratio))
kable(head(android_iphone_ratios, digits = 2))

word	Android	iPhone	logratio
exciting	0.0183486	0.0009116	4.331163
wow	0.0214067	0.0018232	3.553556
paul	0.0183486	0.0018232	3.331164
ryan	0.0183486	0.0018232	3.331164
@cnn	0.0214067	0.0027347	2.968593
media	0.0214067	0.0036463	2.553556

Visualization of most common words

android_iphone_ratios %>% 
  group_by(logratio > 0) %>% 
  top_n(15, abs(logratio)) %>% 
  ungroup() %>% 
  mutate(word = reorder(word, logratio)) %>%
  ggplot(aes(word, logratio, fill = logratio < 0))+ 
  geom_bar(stat = "identity") +
  coord_flip() + 
  ylab("Android/ iPhone log ratio") + 
  scale_fill_manual(name = "", labels = c("Android", "iPhone"), 
                    values = c("red", "lightblue"))

## Warning: Stacking not well defined when ymin != 0

Sentiments

nrc <- sentiments %>% filter( lexicon == "nrc") %>% select(word, sentiment)
kable(head(nrc))

word	sentiment
abacus	trust
abandon	fear
abandon	negative
abandon	sadness
abandoned	anger
abandoned	fear

Word Count

sources <- tweet_words %>% 
  group_by(source) %>% 
  mutate(total_words = n()) %>% 
  ungroup() %>% 
  distinct(id, source, total_words)

Sentiments

iPhone

by_source_sentiment_iPh <-  tweet_words %>% 
  inner_join(nrc, by = "word") %>%
  count(sentiment, id) %>% 
  ungroup() %>% 
  complete(sentiment, id, fill = list(n = 0)) %>% 
  inner_join(sources) %>% 
  group_by(source, sentiment, total_words) %>%
  filter(source == "iPhone") %>% 
  summarise(words = sum(n)) %>% 
    ungroup()

## Joining, by = "id"

kable(head(by_source_sentiment_iPh) %>% mutate(percent_sentiment = total_words/ words), digits = 2)

source	sentiment	total_words	words	percent_sentiment
iPhone	anger	2212	90	24.58
iPhone	anticipation	2212	151	14.65
iPhone	disgust	2212	42	52.67
iPhone	fear	2212	113	19.58
iPhone	joy	2212	99	22.34
iPhone	negative	2212	150	14.75

Android

by_source_sentiment_An <-  tweet_words %>% 
  inner_join(nrc, by = "word") %>%
  count(sentiment, id) %>% 
  ungroup() %>% 
  complete(sentiment, id, fill = list(n = 0)) %>% 
  inner_join(sources) %>% 
  group_by(source, sentiment, total_words) %>%
  filter(source == "Android") %>% 
  summarise(words = sum(n)) %>% 
    ungroup()

## Joining, by = "id"

kable(head(by_source_sentiment_An) %>% mutate(percent_sentiment = total_words/words), digits = 2)

source	sentiment	total_words	words	percent_sentiment
Android	anger	536	31	17.29
Android	anticipation	536	41	13.07
Android	disgust	536	22	24.36
Android	fear	536	22	24.36
Android	joy	536	28	19.14
Android	negative	536	47	11.40

Join words to sentiments

by_source_sentiment <- tweet_words %>% 
  inner_join(nrc, by = "word") %>% 
  count(sentiment, id) %>% 
  ungroup() %>% complete(sentiment, id, fill = list(n = 0)) %>% inner_join(sources) %>% group_by(source, sentiment, total_words) %>% summarise(words = sum(n)) %>% ungroup()

## Joining, by = "id"

kable(head(by_source_sentiment))

source	sentiment	total_words	words
Android	anger	536	31
Android	anticipation	536	41
Android	disgust	536	22
Android	fear	536	22
Android	joy	536	28
Android	negative	536	47

Liklihood of a difference in word uses between phones

sentiment_differences <- by_source_sentiment %>% 
  group_by(sentiment) %>% 
  do(tidy(poisson.test(.$words, .$total_words)))
kable(sentiment_differences, digits = 2)

sentiment	estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
anger	1.42	31	0.11	23.60	0.91	2.16	Comparison of Poisson rates	two.sided
anticipation	1.12	41	0.52	37.45	0.77	1.59	Comparison of Poisson rates	two.sided
disgust	2.16	22	0.01	12.48	1.23	3.70	Comparison of Poisson rates	two.sided
fear	0.80	22	0.39	26.33	0.48	1.28	Comparison of Poisson rates	two.sided
joy	1.17	28	0.50	24.77	0.74	1.79	Comparison of Poisson rates	two.sided
negative	1.29	47	0.13	38.43	0.91	1.81	Comparison of Poisson rates	two.sided
positive	1.16	65	0.30	57.74	0.87	1.54	Comparison of Poisson rates	two.sided
sadness	1.25	24	0.32	20.09	0.76	2.00	Comparison of Poisson rates	two.sided
surprise	1.27	21	0.35	17.36	0.74	2.11	Comparison of Poisson rates	two.sided
trust	1.15	45	0.38	40.18	0.81	1.61	Comparison of Poisson rates	two.sided

Visualization of word use liklihood

sentiment_differences %>%
  ungroup() %>% 
  mutate(sentiment = reorder(sentiment, estimate)) %>%             mutate_each(funs(. - 1), estimate, conf.low, conf.high) %>%
  ggplot(aes(estimate, sentiment)) +
  geom_point() +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) + 
  scale_x_continuous(labels = percent_format()) +
  labs(x = "% increase in Android relative to iPhone", y = "Sentiment")

Most common words with their associated symptoms

android_iphone_ratios %>% 
  inner_join(nrc, by = "word") %>% 
  filter(!sentiment %in% c("positive", "negative")) %>% 
  mutate(sentiment = reorder(sentiment, -logratio), 
         word = reorder(word, -logratio)) %>% 
  group_by(sentiment) %>% 
  top_n(10, abs(logratio)) %>% 
  ungroup() %>% 
  ggplot(aes(word, logratio, fill = logratio < 0)) +
  facet_wrap(~sentiment, scales = "free", nrow = 2) +
  geom_bar(stat = "identity") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "", y = "Android / iPhone log ratio") + 
  scale_fill_manual(name = "", labels = c("Android", "iPhone"), 
                    values = c("red", "lightblue"))

## Warning: Stacking not well defined when ymin != 0

angrytweets

Christine

November 10, 2016

Isolating Words

Most common words and liklihood of Android and iPhone

Sentiments