Sentiment Analysis of Trumps Tweets

Questions

What is the most popular tweet?
Is there a difference between Android and Iphone tweets?
What are the most likely words to come out of different phone brands?
What are the most popular topics?
Where are the users friends located?

library(dplyr)
library(twitteR)
library(tidyr)
library(lubridate)
library(scales)
library(stringr)
library(ggplot2)
library(tm)
library(tidytext)
library(base64enc)
library(wordcloud)
library(reshape2)
library(gridExtra)
library(topicmodels)
library(maps)
library(geosphere)
library(ggmap)
library(leaflet)
library(ggdendro)
Sys.setlocale("LC_TIME", "English")

## [1] "English_United States.1252"

## [1] "Using direct authentication"

First we’ll retrieve the content of Donald Trump’s timeline.

trump_tweets <- userTimeline("realDonaldTrump", n = 3200)
trump_tweets_df <- twListToDF(trump_tweets)

Here is the most populer (most retweeted) tweet.

trump_tweets_df[trump_tweets_df$retweetCount == max(trump_tweets_df$retweetCount), c("text", "created")]

##                                   text             created
## 504 TODAY WE MAKE AMERICA GREAT AGAIN! 2016-11-08 11:43:14

sources <- sapply(trump_tweets, function(x) x$getStatusSource())
sources <- gsub("</a>", "", sources)
sources <- strsplit(sources, ">")
sources <- sapply(sources, function(x) ifelse(length(x) > 1, x[2], x[1]))

trump_tweets_df$statusSource <- sources

Here are the amount of tweets from each device.

sum(trump_tweets_df$statusSource %in% "Twitter for iPhone")

## [1] 341

sum(trump_tweets_df$statusSource %in% "Twitter for Android")

## [1] 406

sum(trump_tweets_df$statusSource %in% "Twitter Web Client")

## [1] 46

Next is a timeseries plot of retweeted tweets made by Trump. Those that have been retweeted over 100000 times have their text shown.

trump_tweets_df_1 <- trump_tweets_df
trump_tweets_df_1$dates <- strptime(trump_tweets_df_1$created, format="%Y-%m-%d")
trump_tweets_df_1$weekday <- ifelse(weekdays(trump_tweets_df_1$dates) == "Saturday" | weekdays(trump_tweets_df_1$dates) == "Sunday", "weekend", "weekday")
reTweets <- trump_tweets_df_1[trump_tweets_df_1$retweetCount >= 100000,]
reTweets$text <- gsub("\\.",'\\.\n', reTweets$text)
reTweets$text <- gsub("!",'!\n', reTweets$text)
reTweets$text <- gsub(",",',\n', reTweets$text)
reTweets$text <- gsub("and",'and\n', reTweets$text)

ggplot(trump_tweets_df_1, aes(x=dates, y=retweetCount)) + 
  geom_line()+
  geom_point(data=trump_tweets_df_1[trump_tweets_df_1$retweetCount >= 100000,], aes(x=dates, y=retweetCount))+
  geom_text(data=reTweets, aes(x=dates, y=retweetCount, label = text), size = 3)+
  ggtitle("Retweets by Date")+
  xlab("")+
  #coord_cartesian(ylim = c(0, 300000)) +
  theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))

ggplot(trump_tweets_df_1, aes(x=dates, y=retweetCount)) + 
  geom_line() + 
  facet_grid(.~weekday, scales = "free")+
  geom_point(data=trump_tweets_df_1[trump_tweets_df_1$retweetCount >= 100000,], aes(x=dates, y=retweetCount))+
  geom_text(data=reTweets, aes(x=dates, y=retweetCount, label = text), size = 3, vjust=1)+
  xlab("") + 
  ggtitle("Retweets weekday vs weekend")+
  theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))

Here are the times each divce has been used in GMT.

temp <- trump_tweets_df %>%
  count(statusSource, hour = hour(with_tz(created, "GMT"))) %>%
  mutate(percent = n / sum(n))

ggplot(temp[temp$statusSource %in% c("Twitter for Android", "Twitter for iPhone", "Twitter Web Client"),], aes(hour, percent, color = statusSource)) +
  geom_line() +
  scale_y_continuous(labels = percent_format()) +
  labs(x = "Hour of day (GMT)", y = "% of tweets")+
  theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))

Not quoted tweets by device and tweets with pictues/links by device.

temp <- trump_tweets_df %>%
  count(statusSource,
        quoted = ifelse(str_detect(text, '^"'), "Quoted", "Not quoted"))

plot1 <- ggplot(temp[!(temp$statusSource %in% c("Periscope", "Twitter Ads")),], aes(statusSource, n, fill = quoted)) +
  geom_bar(stat = "identity", position = "dodge", colour = "black") +
  labs(x = "", y = "Number of tweets", fill = "") +
  ggtitle('Whether tweets start with a\nquotation mark (")')+
  theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"), axis.text.x = element_text(angle = 45, hjust = 1))

tweet_picture_counts <- trump_tweets_df %>%
  filter(!str_detect(text, '^"')) %>%
  count(statusSource,
        picture = ifelse(str_detect(text, "t.co"),
                         "Picture/link", "No picture/link"))

plot2 <- ggplot(tweet_picture_counts[!(tweet_picture_counts$statusSource %in% c("Periscope", "Twitter Ads")),], aes(statusSource, n, fill = picture)) +
  geom_bar(stat = "identity", position = "dodge", colour = "black") +
  ggtitle('Whether tweets have a picture\nor link')+
  labs(x = "", y = "Number of tweets", fill = "")+
  theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"), axis.text.x = element_text(angle = 45, hjust = 1))

grid.arrange(plot1, plot2, ncol = 2)

It turns out that tweets from the iPhone were about 40 times as likely to contain either a picture or a link.

tweet_picture_counts$statusSource <- ifelse(tweet_picture_counts$statusSource == "Twitter for Android",
                                            "Android", tweet_picture_counts$statusSource)

tweet_picture_counts$statusSource <- ifelse(tweet_picture_counts$statusSource == "Twitter for iPhone",
                                            "iPhone", tweet_picture_counts$statusSource)

spr <- tweet_picture_counts %>%
  spread(statusSource, n) %>%
  mutate_each(funs(. / sum(.)), Android, iPhone)

rr <- spr$iPhone[2] / spr$Android[2]
round(rr)

## [1] 40

What can we say about the difference in the content from Android and Iphone? Here are the most common words used by Trump.

myStopwords <- c(stopwords('SMART'), "use", "see", "used", "via", "amp")

trump_tweets_df$text <- gsub("http[^[:space:]]*", "", trump_tweets_df$text)

trump_tweets_df <- trump_tweets_df %>%
  filter(!str_detect(text, '^"'))
  
trump_tweets_df$text <- gsub("[[:punct:]]", "", trump_tweets_df$text)
trump_tweets_df$text <- tolower(trump_tweets_df$text)

list <- strsplit(trump_tweets_df$text, " ")
words_all <- data.frame(id=rep(trump_tweets_df$id, sapply(list, length)),
                        source=rep(trump_tweets_df$statusSource, sapply(list, length)),
                        words=unlist(list))
words_all$words <- droplevels(words_all$words)

words_all$words <- removeWords(as.character(words_all$words), myStopwords)

words_all_stop <- words_all[!(is.na(words_all$words)) & words_all$words!="" & 
                                words_all$words!="\n" &  words_all$words!="'" &  
                              words_all$words!="amp" &
                              words_all$words!="...", ]

words_all_stop[,"words"] <- factor(words_all_stop[,"words"], 
                           levels=names(sort(table(words_all_stop[,"words"]), 
                                             decreasing=TRUE))) 

a <- data.frame(table(words_all_stop[,"words"]))
a <- head(a,20)

ggplot(a, aes(x=reorder(Var1, Freq), y=Freq, group = 1)) +
  geom_bar(stat='identity', position = "dodge", color = "black") +
  ylab("Occurrences")+
  xlab("")+
  coord_flip()+
  theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))

Now let’s consider which words are most common from the Android relative to the iPhone. We’ll use the simple measure of log odds ratio.

words_all_stop$source <- ifelse(words_all_stop$source == "Twitter for Android",
                                            "Android", words_all_stop$source)

words_all_stop$source <- ifelse(words_all_stop$source == 5,
                                            "iPhone", words_all_stop$source)

android_iphone_ratios <- words_all_stop[words_all_stop$source %in% c("Android", "iPhone"),] %>%
  count(words, source) %>%
  filter(sum(n) >= 5) %>%
  spread(source, n, fill = 0) %>%
  ungroup() %>%
  mutate_each(funs((. + 1) / sum(. + 1)), -words) %>%
  mutate(logratio = log2(Android / iPhone)) %>%
  arrange(desc(logratio))

android_iphone_ratios <- android_iphone_ratios[!(is.na(android_iphone_ratios$words)) & android_iphone_ratios$words!="\n" & android_iphone_ratios$words!="'", ]

temp <- android_iphone_ratios %>%
  group_by(logratio > 0) %>%
  top_n(15, abs(logratio)) %>%
  ungroup() %>%
  mutate(words = reorder(words, logratio))

ggplot(temp, aes(words, logratio, fill = logratio < 0)) +
  geom_bar(stat = "identity", colour = "black") +
  geom_hline(yintercept = 0)+
  coord_flip() +
  ylab("Android / iPhone log ratio") +
  scale_fill_manual(name = "", labels = c("Android", "iPhone"), values = c("red", "lightblue"))+
  theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))

As we can see a lot of emotionally charged words were more common on Android.

Since we’ve observed a difference in sentiment between the Android and iPhone tweets, let’s try quantifying it. We’ll work with the NRC Word-Emotion Association lexicon, available from the tidytext package, which associates words with 10 sentiments: positive, negative, anger, anticipation, disgust, fear, joy, sadness, surprise, and trust. To measure the sentiment of the Android and iPhone tweets, we can count the number of words in each category:

nrc <- sentiments %>%
  filter(lexicon == "nrc") %>%
  dplyr::select(word, sentiment)

head(nrc,5)

## # A tibble: 5 × 2
##        word sentiment
##       <chr>     <chr>
## 1    abacus     trust
## 2   abandon      fear
## 3   abandon  negative
## 4   abandon   sadness
## 5 abandoned     anger

sources <- words_all_stop[words_all_stop$source %in% c("Android", "iPhone"),] %>%
  group_by(source) %>%
  mutate(total_words = n()) %>%
  ungroup() %>%
  distinct(id, source, total_words)

names(words_all_stop)[3] <- "word"
words_all_stop$word <- as.character(words_all_stop$word)

by_source_sentiment <- words_all_stop[words_all_stop$source %in% c("Android", "iPhone"),] %>% 
  inner_join(nrc, by = "word") %>%
  count(sentiment, id) %>%
  ungroup() %>%
  complete(sentiment, id, fill = list(n = 0)) %>%
  inner_join(sources, by = "id") %>%
  group_by(source, sentiment, total_words) %>%
  summarise(words = sum(n)) %>%
  ungroup()

head(by_source_sentiment)

## # A tibble: 6 × 4
##    source    sentiment total_words words
##     <chr>        <chr>       <int> <dbl>
## 1 Android        anger        3839   217
## 2 Android anticipation        3839   253
## 3 Android      disgust        3839   152
## 4 Android         fear        3839   218
## 5 Android          joy        3839   177
## 6 Android     negative        3839   401

Here is a wordcloud of positive and negative words.

wordcloud_temp <- words_all_stop[words_all_stop$source %in% c("Android", "iPhone"),] %>% 
  inner_join(nrc, by = "word") 

wordcloud_temp <- wordcloud_temp[wordcloud_temp$sentiment %in% c("negative", "positive"),]

wordcloud_temp[wordcloud_temp$sentiment %in% c("negative", "positive"),] %>%
  count(word, sentiment, sort = TRUE) %>%
  acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("#F8766D", "#00BFC4"), max.words = 100, title.size= 2)

temp <- wordcloud_temp[wordcloud_temp$sentiment %in% c("negative", "positive"),] %>%
  count(word, sentiment, sort = TRUE) %>%
  filter(n > 5) %>%
  mutate(n = ifelse(sentiment == "negative", -n, n))

temp <- temp[order(temp$n, decreasing = TRUE),]
temp$word <- ifelse(temp$sentiment == "positive", paste(temp$word, "_pos"), paste(temp$word, "_neg"))
temp$word <- factor(temp$word, levels = temp$word[order(temp$n, decreasing = TRUE)])

ggplot(temp, aes(word, n, fill = sentiment)) +
  geom_bar(stat = "identity", colour = "black") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  geom_hline(yintercept = 0)+
  ylab("Contribution to sentiment")+
  theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))

We can see that the overall sentiment seems to be more positive.

pos_neg <- wordcloud_temp
pos_neg$count <- table(pos_neg$sentiment)

ggplot(pos_neg, aes(x=sentiment))+
  geom_bar(aes(fill = source), color = "black", width=0.5)+
  theme(axis.line = element_line(), axis.text=element_text(color='black'), 
        axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text())

Trump’s Android account uses more words related to “negative” sentiments than the iPhone account does. The positive emotions weren’t different to a statistically significant extent.

sentiment_differences <- by_source_sentiment %>%
  group_by(sentiment) %>%
  do(tidy(poisson.test(.$words, .$total_words)))

sentiment_differences

## Source: local data frame [10 x 9]
## Groups: sentiment [10]
## 
##       sentiment estimate statistic      p.value parameter  conf.low
##           <chr>    <dbl>     <dbl>        <dbl>     <dbl>     <dbl>
## 1         anger 2.184233       217 4.425426e-09  169.6136 1.6537396
## 2  anticipation 1.282871       253 1.955130e-02  230.5311 1.0358942
## 3       disgust 2.628409       152 8.152777e-09  114.0711 1.8383640
## 4          fear 1.750214       218 7.459656e-06  180.3637 1.3547586
## 5           joy 1.075386       177 5.888292e-01  172.0025 0.8434595
## 6      negative 2.018150       401 1.615522e-13  319.5185 1.6556466
## 7      positive 1.173084       454 3.956612e-02  427.0201 1.0051986
## 8       sadness 2.604558       224 2.248507e-12  168.4191 1.9434889
## 9      surprise 1.055960       119 7.703508e-01  116.4600 0.7852959
## 10        trust 1.136208       310 1.834019e-01  295.0320 0.9437246
## # ... with 3 more variables: conf.high <dbl>, method <fctr>,
## #   alternative <fctr>

temp <- sentiment_differences %>%
  ungroup() %>%
  mutate(sentiment = reorder(sentiment, estimate)) %>%
  mutate_each(funs(. - 1), estimate, conf.low, conf.high)

ggplot(temp, aes(estimate, sentiment)) +
  geom_point() +
  geom_vline(xintercept = 0, linetype = "dashed")+
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
  scale_x_continuous(labels = percent_format()) +
  labs(x = "% increase in Android relative to iPhone",y = "Sentiment")+
    theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))

Let’s consider the words with the largest changes within each category.

names(android_iphone_ratios)[1] <- "word"
android_iphone_ratios$word <- as.character(android_iphone_ratios$word)

temp <- android_iphone_ratios %>%
  inner_join(nrc, by = "word") %>%
  filter(!sentiment %in% c("positive", "negative")) %>%
  mutate(sentiment = reorder(sentiment, -logratio),
         word = reorder(word, -logratio)) %>%
  group_by(sentiment) %>%
  top_n(10, abs(logratio)) %>%
  ungroup()

ggplot(temp, aes(word, logratio, fill = logratio < 0)) +
  facet_wrap(~ sentiment, scales = "free", nrow = 2) +
  geom_bar(stat = "identity", colour = "black") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(x = "", y = "Android / iPhone log ratio") +
  geom_hline(yintercept = 0)+
  scale_fill_manual(name = "", labels = c("Android", "iPhone"), values = c("red", "lightblue"))+
  theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))

Here are the most common stemmed words. They don’t differe much from the raw comparison we saw earlier.

trump_tweets_df <- twListToDF(trump_tweets)

myCorpus <- Corpus(VectorSource(trump_tweets_df$text))

removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeURL))
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeNumPunct))
myCorpus <- tm_map(myCorpus, content_transformer(tolower))

myStopwords <- c(stopwords('SMART'), "use", "see", "used", "via", "amp")
myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
myCorpus <- tm_map(myCorpus, stripWhitespace)
myCorpusCopy <- myCorpus

myCorpus <- tm_map(myCorpus, stemDocument)
#writeLines(strwrap(myCorpus[[1]]$content, 60))

stemCompletion2 <- function(x, dictionary) {
  x <- unlist(strsplit(as.character(x), " "))
  x <- x[x != ""]
  x <- stemCompletion(x, dictionary=dictionary)
  x <- paste(x, sep="", collapse=" ")
  PlainTextDocument(stripWhitespace(x))
}

myCorpus <- lapply(myCorpus, stemCompletion2, dictionary=myCorpusCopy)
myCorpus <- Corpus(VectorSource(myCorpus))
#writeLines(strwrap(myCorpus[[1]]$content, 60))

tdm <- TermDocumentMatrix(myCorpus, control = list(wordLengths = c(1, Inf)))

freq.terms <- findFreqTerms(tdm, lowfreq = 20)

term.freq <- rowSums(as.matrix(tdm))
term.freq <- subset(term.freq, term.freq >= 20)
df <- data.frame(term = names(term.freq), freq = term.freq)

df <- df[order(df$freq, decreasing = TRUE),]

ggplot(df, aes(x=reorder(term, freq), y=freq)) + 
  geom_bar(stat='identity', color = "black") +
  xlab("Terms") + 
  ylab("Count") + 
  ggtitle("Frequent words with stemming")+
  coord_flip() +
  theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))

Next I’d like to see what words are associated with “clinton” and “america”.

findAssocs(tdm, "clinton", 0.2)

## $clinton
##        betray         fumed         labor  postelection     surrogate 
##          0.27          0.27          0.27          0.27          0.27 
##           tpp          camp draintheswamp         email          told 
##          0.27          0.26          0.23          0.23          0.23 
##         video      campaign 
##          0.23          0.21

findAssocs(tdm, "america", 0.2)

## $america
##  make great  safe 
##  0.56  0.39  0.25

If one is familiar with the themes surrouding the elections, these associated words should not come as a surprise.
Below are the most common topics from Trumps twitter account.

dtm <- as.DocumentTermMatrix(tdm)

rowTotals <- apply(dtm, 1, sum) #Find the sum of words in each Document
dtm.new   <- dtm[rowTotals> 0, ] #remove all docs without words

lda <- LDA(dtm.new, k = 8) # find 8 topics
term <- terms(lda, 7) # first 7 terms of every topic
(term <- apply(term, MARGIN = 2, paste, collapse = ", "))

##                                                Topic 1 
## "time, news, people, fake, draintheswamp, great, vote" 
##                                                Topic 2 
##  "great, state, make, support, trump, people, clinton" 
##                                                Topic 3 
##    "back, vote, trump, join, clinton, tonight, people" 
##                                                Topic 4 
##          "bad, watch, join, pm, america, job, florida" 
##                                                Topic 5 
##  "media, make, elect, trump, countries, tonight, vote" 
##                                                Topic 6 
##  "great, people, trump, state, bad, pm, draintheswamp" 
##                                                Topic 7 
##   "clinton, elect, pm, cnn, draintheswamp, back, news" 
##                                                Topic 8 
##         "great, job, pm, support, people, join, watch"

topics <- topics(lda) # 1st topic identified for every document (tweet)

topics <- data.frame(date=as.Date(trump_tweets_df$created[1:length(topics)]), topic=topics)

ggplot(topics, aes(date, fill = term[topic])) +
  geom_density(position = "stack")+
  xlab("")+
  theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))

Below we can see the association of common words with each other.

tdm2 <- removeSparseTerms(tdm, sparse = 0.95)
m2 <- as.matrix(tdm2)
distMatrix <- dist(scale(m2))
fit <- hclust(distMatrix, method = "ward.D2")
dhc <- as.dendrogram(fit)
ddata <- dendro_data(dhc, type = "rectangle")

ggplot(segment(ddata)) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend))+
  geom_text(data = ddata$labels, aes(x = x, y = y, label = label), size = 3, vjust = 0, angle = 45) +
  xlab("")+
  ylab("")+
  ggtitle("Dendrogram of words")+
  theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))

Next I would like to consider Trumps friends from Twitter an their locations.

user <- getUser("realDonaldTrump")

user$toDataFrame()[1:5]

##                                      description statusesCount
## 1 45th President of the United States of America         34517
##   followersCount favoritesCount friendsCount
## 1       25261503             45           43

friends <- user$getFriends()

trim <- function(x){gsub("^\\s+|\\s+$", "", x)}

userLocation <- location(user)

friendsLocation <- sapply(friends, function(x){location(x)})

friendsLocation <- friendsLocation[friendsLocation != ""]

head(friendsLocation, 3)

##                        471672239                         20733972 
##                 "Washington, DC" "Kenosha, WI and Washington, DC" 
##                        322293052 
##                         "Malibu"

Here we have the lattitude and longtitude of each friend.

for(i in 1:length(friendsLocation)){
  friendsLocation[[i]] <- ifelse(friendsLocation[[i]] %in% "Washington, DC", "DC", friendsLocation[[i]])
}

friendsLL <- matrix(NA, nrow = length(friendsLocation), ncol=2)

for(i in 1:length(friendsLocation)){
  tmpLL <- geocode(strsplit(trim(friendsLocation[[i]]), ",")[[1]][1])
  if(any(!is.na(tmpLL$lat))){
    friendsLL[i,] = c(tmpLL$lat, tmpLL$lon)
  }
}

friendsLL <- data.frame(friendsLL)
names(friendsLL) <- c("lat", "lon")
head(friendsLL, 3)

##        lat        lon
## 1 38.90719  -77.03687
## 2 42.58474  -87.82119
## 3 34.02592 -118.77976

Here is an interactive map of Trump, his friends and their locations.

friendsName <- sapply(friends, function(x){name(x)})
friendsScreenname <- sapply(friends, function(x){screenName(x)})

friendsName_df <- data.frame(friendsName)
friendsName_df$key <- row.names(friendsName_df)

friendsScreenname_df <- data.frame(friendsScreenname)
friendsScreenname_df$key <- row.names(friendsScreenname_df)

friendsLoc_df <- data.frame(friendsLocation)
friendsLoc_df$key <- row.names(friendsLoc_df)
friendsLoc_df <- cbind(friendsLoc_df, friendsLL)

friendsAll_df <- merge(friendsName_df, friendsLoc_df, by = "key")
friendsAll_df <- merge(friendsAll_df, friendsScreenname_df, by = "key")

temp <- geocode(strsplit(trim(location(user)), ",")[[1]][2])

user_df <- data.frame(key = 1, friendsName = name(user), friendsLocation = location(user), 
                      lat = temp$lat, lon = temp$lon, friendsScreenname = screenName(user))
friendsAll_df <- rbind(user_df, friendsAll_df)

friendsAll_df$colors <- ifelse(friendsAll_df$friendsScreenname == "realDonaldTrump", "red", "blue")


leaflet(friendsAll_df) %>%
  addTiles() %>%  #Add default OpenStreetMap map tiles
  addCircleMarkers(
    lng=~lon, lat=~lat, popup=paste(sep = "<br/>",
  "<b>Name</b>",
  friendsAll_df$friendsName,
  "<b>Screen Name</b>",
  friendsAll_df$friendsScreenname,
  "<b>Location</b>",
  friendsAll_df$friendsLocation
),
    radius = ~ifelse(friendsName == "Donald J. Trump", 10, 5),
    color = ~colors,
    stroke = FALSE,
    fillOpacity = 0.5
  )

Sentiment Analysis of Trumps Tweets

JMFlin

Summary

Questions