In this excerice I will extract tweets and friends from the Twitter website. Clean text by removing punctuations, numbers, hyperlinks and stop words. After this I will analyze the sentiment, topics and words of the chosen person.
What is the most popular tweet?
Is there a difference between Android and Iphone tweets?
What are the most likely words to come out of different phone brands?
What are the most popular topics?
Where are the users friends located?
library(dplyr)
library(twitteR)
library(tidyr)
library(lubridate)
library(scales)
library(stringr)
library(ggplot2)
library(tm)
library(tidytext)
library(base64enc)
library(wordcloud)
library(reshape2)
library(gridExtra)
library(topicmodels)
library(maps)
library(geosphere)
library(ggmap)
library(leaflet)
library(ggdendro)
Sys.setlocale("LC_TIME", "English")
## [1] "English_United States.1252"
## [1] "Using direct authentication"
First weāll retrieve the content of Donald Trumpās timeline.
trump_tweets <- userTimeline("realDonaldTrump", n = 3200)
trump_tweets_df <- twListToDF(trump_tweets)
Here is the most populer (most retweeted) tweet.
trump_tweets_df[trump_tweets_df$retweetCount == max(trump_tweets_df$retweetCount), c("text", "created")]
## text created
## 504 TODAY WE MAKE AMERICA GREAT AGAIN! 2016-11-08 11:43:14
sources <- sapply(trump_tweets, function(x) x$getStatusSource())
sources <- gsub("</a>", "", sources)
sources <- strsplit(sources, ">")
sources <- sapply(sources, function(x) ifelse(length(x) > 1, x[2], x[1]))
trump_tweets_df$statusSource <- sources
Here are the amount of tweets from each device.
sum(trump_tweets_df$statusSource %in% "Twitter for iPhone")
## [1] 341
sum(trump_tweets_df$statusSource %in% "Twitter for Android")
## [1] 406
sum(trump_tweets_df$statusSource %in% "Twitter Web Client")
## [1] 46
Next is a timeseries plot of retweeted tweets made by Trump. Those that have been retweeted over 100000 times have their text shown.
trump_tweets_df_1 <- trump_tweets_df
trump_tweets_df_1$dates <- strptime(trump_tweets_df_1$created, format="%Y-%m-%d")
trump_tweets_df_1$weekday <- ifelse(weekdays(trump_tweets_df_1$dates) == "Saturday" | weekdays(trump_tweets_df_1$dates) == "Sunday", "weekend", "weekday")
reTweets <- trump_tweets_df_1[trump_tweets_df_1$retweetCount >= 100000,]
reTweets$text <- gsub("\\.",'\\.\n', reTweets$text)
reTweets$text <- gsub("!",'!\n', reTweets$text)
reTweets$text <- gsub(",",',\n', reTweets$text)
reTweets$text <- gsub("and",'and\n', reTweets$text)
ggplot(trump_tweets_df_1, aes(x=dates, y=retweetCount)) +
geom_line()+
geom_point(data=trump_tweets_df_1[trump_tweets_df_1$retweetCount >= 100000,], aes(x=dates, y=retweetCount))+
geom_text(data=reTweets, aes(x=dates, y=retweetCount, label = text), size = 3)+
ggtitle("Retweets by Date")+
xlab("")+
#coord_cartesian(ylim = c(0, 300000)) +
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))
ggplot(trump_tweets_df_1, aes(x=dates, y=retweetCount)) +
geom_line() +
facet_grid(.~weekday, scales = "free")+
geom_point(data=trump_tweets_df_1[trump_tweets_df_1$retweetCount >= 100000,], aes(x=dates, y=retweetCount))+
geom_text(data=reTweets, aes(x=dates, y=retweetCount, label = text), size = 3, vjust=1)+
xlab("") +
ggtitle("Retweets weekday vs weekend")+
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))
Here are the times each divce has been used in GMT.
temp <- trump_tweets_df %>%
count(statusSource, hour = hour(with_tz(created, "GMT"))) %>%
mutate(percent = n / sum(n))
ggplot(temp[temp$statusSource %in% c("Twitter for Android", "Twitter for iPhone", "Twitter Web Client"),], aes(hour, percent, color = statusSource)) +
geom_line() +
scale_y_continuous(labels = percent_format()) +
labs(x = "Hour of day (GMT)", y = "% of tweets")+
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))
Not quoted tweets by device and tweets with pictues/links by device.
temp <- trump_tweets_df %>%
count(statusSource,
quoted = ifelse(str_detect(text, '^"'), "Quoted", "Not quoted"))
plot1 <- ggplot(temp[!(temp$statusSource %in% c("Periscope", "Twitter Ads")),], aes(statusSource, n, fill = quoted)) +
geom_bar(stat = "identity", position = "dodge", colour = "black") +
labs(x = "", y = "Number of tweets", fill = "") +
ggtitle('Whether tweets start with a\nquotation mark (")')+
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"), axis.text.x = element_text(angle = 45, hjust = 1))
tweet_picture_counts <- trump_tweets_df %>%
filter(!str_detect(text, '^"')) %>%
count(statusSource,
picture = ifelse(str_detect(text, "t.co"),
"Picture/link", "No picture/link"))
plot2 <- ggplot(tweet_picture_counts[!(tweet_picture_counts$statusSource %in% c("Periscope", "Twitter Ads")),], aes(statusSource, n, fill = picture)) +
geom_bar(stat = "identity", position = "dodge", colour = "black") +
ggtitle('Whether tweets have a picture\nor link')+
labs(x = "", y = "Number of tweets", fill = "")+
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"), axis.text.x = element_text(angle = 45, hjust = 1))
grid.arrange(plot1, plot2, ncol = 2)
It turns out that tweets from the iPhone were about 40 times as likely to contain either a picture or a link.
tweet_picture_counts$statusSource <- ifelse(tweet_picture_counts$statusSource == "Twitter for Android",
"Android", tweet_picture_counts$statusSource)
tweet_picture_counts$statusSource <- ifelse(tweet_picture_counts$statusSource == "Twitter for iPhone",
"iPhone", tweet_picture_counts$statusSource)
spr <- tweet_picture_counts %>%
spread(statusSource, n) %>%
mutate_each(funs(. / sum(.)), Android, iPhone)
rr <- spr$iPhone[2] / spr$Android[2]
round(rr)
## [1] 40
What can we say about the difference in the content from Android and Iphone? Here are the most common words used by Trump.
myStopwords <- c(stopwords('SMART'), "use", "see", "used", "via", "amp")
trump_tweets_df$text <- gsub("http[^[:space:]]*", "", trump_tweets_df$text)
trump_tweets_df <- trump_tweets_df %>%
filter(!str_detect(text, '^"'))
trump_tweets_df$text <- gsub("[[:punct:]]", "", trump_tweets_df$text)
trump_tweets_df$text <- tolower(trump_tweets_df$text)
list <- strsplit(trump_tweets_df$text, " ")
words_all <- data.frame(id=rep(trump_tweets_df$id, sapply(list, length)),
source=rep(trump_tweets_df$statusSource, sapply(list, length)),
words=unlist(list))
words_all$words <- droplevels(words_all$words)
words_all$words <- removeWords(as.character(words_all$words), myStopwords)
words_all_stop <- words_all[!(is.na(words_all$words)) & words_all$words!="" &
words_all$words!="\n" & words_all$words!="'" &
words_all$words!="amp" &
words_all$words!="...", ]
words_all_stop[,"words"] <- factor(words_all_stop[,"words"],
levels=names(sort(table(words_all_stop[,"words"]),
decreasing=TRUE)))
a <- data.frame(table(words_all_stop[,"words"]))
a <- head(a,20)
ggplot(a, aes(x=reorder(Var1, Freq), y=Freq, group = 1)) +
geom_bar(stat='identity', position = "dodge", color = "black") +
ylab("Occurrences")+
xlab("")+
coord_flip()+
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))
Now letās consider which words are most common from the Android relative to the iPhone. Weāll use the simple measure of log odds ratio.
words_all_stop$source <- ifelse(words_all_stop$source == "Twitter for Android",
"Android", words_all_stop$source)
words_all_stop$source <- ifelse(words_all_stop$source == 5,
"iPhone", words_all_stop$source)
android_iphone_ratios <- words_all_stop[words_all_stop$source %in% c("Android", "iPhone"),] %>%
count(words, source) %>%
filter(sum(n) >= 5) %>%
spread(source, n, fill = 0) %>%
ungroup() %>%
mutate_each(funs((. + 1) / sum(. + 1)), -words) %>%
mutate(logratio = log2(Android / iPhone)) %>%
arrange(desc(logratio))
android_iphone_ratios <- android_iphone_ratios[!(is.na(android_iphone_ratios$words)) & android_iphone_ratios$words!="\n" & android_iphone_ratios$words!="'", ]
temp <- android_iphone_ratios %>%
group_by(logratio > 0) %>%
top_n(15, abs(logratio)) %>%
ungroup() %>%
mutate(words = reorder(words, logratio))
ggplot(temp, aes(words, logratio, fill = logratio < 0)) +
geom_bar(stat = "identity", colour = "black") +
geom_hline(yintercept = 0)+
coord_flip() +
ylab("Android / iPhone log ratio") +
scale_fill_manual(name = "", labels = c("Android", "iPhone"), values = c("red", "lightblue"))+
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))
As we can see a lot of emotionally charged words were more common on Android.
Since weāve observed a difference in sentiment between the Android and iPhone tweets, letās try quantifying it. Weāll work with the NRC Word-Emotion Association lexicon, available from the tidytext package, which associates words with 10 sentiments: positive, negative, anger, anticipation, disgust, fear, joy, sadness, surprise, and trust. To measure the sentiment of the Android and iPhone tweets, we can count the number of words in each category:
nrc <- sentiments %>%
filter(lexicon == "nrc") %>%
dplyr::select(word, sentiment)
head(nrc,5)
## # A tibble: 5 Ć 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
sources <- words_all_stop[words_all_stop$source %in% c("Android", "iPhone"),] %>%
group_by(source) %>%
mutate(total_words = n()) %>%
ungroup() %>%
distinct(id, source, total_words)
names(words_all_stop)[3] <- "word"
words_all_stop$word <- as.character(words_all_stop$word)
by_source_sentiment <- words_all_stop[words_all_stop$source %in% c("Android", "iPhone"),] %>%
inner_join(nrc, by = "word") %>%
count(sentiment, id) %>%
ungroup() %>%
complete(sentiment, id, fill = list(n = 0)) %>%
inner_join(sources, by = "id") %>%
group_by(source, sentiment, total_words) %>%
summarise(words = sum(n)) %>%
ungroup()
head(by_source_sentiment)
## # A tibble: 6 Ć 4
## source sentiment total_words words
## <chr> <chr> <int> <dbl>
## 1 Android anger 3839 217
## 2 Android anticipation 3839 253
## 3 Android disgust 3839 152
## 4 Android fear 3839 218
## 5 Android joy 3839 177
## 6 Android negative 3839 401
Here is a wordcloud of positive and negative words.
wordcloud_temp <- words_all_stop[words_all_stop$source %in% c("Android", "iPhone"),] %>%
inner_join(nrc, by = "word")
wordcloud_temp <- wordcloud_temp[wordcloud_temp$sentiment %in% c("negative", "positive"),]
wordcloud_temp[wordcloud_temp$sentiment %in% c("negative", "positive"),] %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("#F8766D", "#00BFC4"), max.words = 100, title.size= 2)
temp <- wordcloud_temp[wordcloud_temp$sentiment %in% c("negative", "positive"),] %>%
count(word, sentiment, sort = TRUE) %>%
filter(n > 5) %>%
mutate(n = ifelse(sentiment == "negative", -n, n))
temp <- temp[order(temp$n, decreasing = TRUE),]
temp$word <- ifelse(temp$sentiment == "positive", paste(temp$word, "_pos"), paste(temp$word, "_neg"))
temp$word <- factor(temp$word, levels = temp$word[order(temp$n, decreasing = TRUE)])
ggplot(temp, aes(word, n, fill = sentiment)) +
geom_bar(stat = "identity", colour = "black") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
geom_hline(yintercept = 0)+
ylab("Contribution to sentiment")+
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))
We can see that the overall sentiment seems to be more positive.
pos_neg <- wordcloud_temp
pos_neg$count <- table(pos_neg$sentiment)
ggplot(pos_neg, aes(x=sentiment))+
geom_bar(aes(fill = source), color = "black", width=0.5)+
theme(axis.line = element_line(), axis.text=element_text(color='black'),
axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text())
Trumpās Android account uses more words related to ānegativeā sentiments than the iPhone account does. The positive emotions werenāt different to a statistically significant extent.
sentiment_differences <- by_source_sentiment %>%
group_by(sentiment) %>%
do(tidy(poisson.test(.$words, .$total_words)))
sentiment_differences
## Source: local data frame [10 x 9]
## Groups: sentiment [10]
##
## sentiment estimate statistic p.value parameter conf.low
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 anger 2.184233 217 4.425426e-09 169.6136 1.6537396
## 2 anticipation 1.282871 253 1.955130e-02 230.5311 1.0358942
## 3 disgust 2.628409 152 8.152777e-09 114.0711 1.8383640
## 4 fear 1.750214 218 7.459656e-06 180.3637 1.3547586
## 5 joy 1.075386 177 5.888292e-01 172.0025 0.8434595
## 6 negative 2.018150 401 1.615522e-13 319.5185 1.6556466
## 7 positive 1.173084 454 3.956612e-02 427.0201 1.0051986
## 8 sadness 2.604558 224 2.248507e-12 168.4191 1.9434889
## 9 surprise 1.055960 119 7.703508e-01 116.4600 0.7852959
## 10 trust 1.136208 310 1.834019e-01 295.0320 0.9437246
## # ... with 3 more variables: conf.high <dbl>, method <fctr>,
## # alternative <fctr>
temp <- sentiment_differences %>%
ungroup() %>%
mutate(sentiment = reorder(sentiment, estimate)) %>%
mutate_each(funs(. - 1), estimate, conf.low, conf.high)
ggplot(temp, aes(estimate, sentiment)) +
geom_point() +
geom_vline(xintercept = 0, linetype = "dashed")+
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
scale_x_continuous(labels = percent_format()) +
labs(x = "% increase in Android relative to iPhone",y = "Sentiment")+
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))
Letās consider the words with the largest changes within each category.
names(android_iphone_ratios)[1] <- "word"
android_iphone_ratios$word <- as.character(android_iphone_ratios$word)
temp <- android_iphone_ratios %>%
inner_join(nrc, by = "word") %>%
filter(!sentiment %in% c("positive", "negative")) %>%
mutate(sentiment = reorder(sentiment, -logratio),
word = reorder(word, -logratio)) %>%
group_by(sentiment) %>%
top_n(10, abs(logratio)) %>%
ungroup()
ggplot(temp, aes(word, logratio, fill = logratio < 0)) +
facet_wrap(~ sentiment, scales = "free", nrow = 2) +
geom_bar(stat = "identity", colour = "black") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x = "", y = "Android / iPhone log ratio") +
geom_hline(yintercept = 0)+
scale_fill_manual(name = "", labels = c("Android", "iPhone"), values = c("red", "lightblue"))+
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))
Here are the most common stemmed words. They donāt differe much from the raw comparison we saw earlier.
trump_tweets_df <- twListToDF(trump_tweets)
myCorpus <- Corpus(VectorSource(trump_tweets_df$text))
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeURL))
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeNumPunct))
myCorpus <- tm_map(myCorpus, content_transformer(tolower))
myStopwords <- c(stopwords('SMART'), "use", "see", "used", "via", "amp")
myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
myCorpus <- tm_map(myCorpus, stripWhitespace)
myCorpusCopy <- myCorpus
myCorpus <- tm_map(myCorpus, stemDocument)
#writeLines(strwrap(myCorpus[[1]]$content, 60))
stemCompletion2 <- function(x, dictionary) {
x <- unlist(strsplit(as.character(x), " "))
x <- x[x != ""]
x <- stemCompletion(x, dictionary=dictionary)
x <- paste(x, sep="", collapse=" ")
PlainTextDocument(stripWhitespace(x))
}
myCorpus <- lapply(myCorpus, stemCompletion2, dictionary=myCorpusCopy)
myCorpus <- Corpus(VectorSource(myCorpus))
#writeLines(strwrap(myCorpus[[1]]$content, 60))
tdm <- TermDocumentMatrix(myCorpus, control = list(wordLengths = c(1, Inf)))
freq.terms <- findFreqTerms(tdm, lowfreq = 20)
term.freq <- rowSums(as.matrix(tdm))
term.freq <- subset(term.freq, term.freq >= 20)
df <- data.frame(term = names(term.freq), freq = term.freq)
df <- df[order(df$freq, decreasing = TRUE),]
ggplot(df, aes(x=reorder(term, freq), y=freq)) +
geom_bar(stat='identity', color = "black") +
xlab("Terms") +
ylab("Count") +
ggtitle("Frequent words with stemming")+
coord_flip() +
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))
Next Iād like to see what words are associated with āclintonā and āamericaā.
findAssocs(tdm, "clinton", 0.2)
## $clinton
## betray fumed labor postelection surrogate
## 0.27 0.27 0.27 0.27 0.27
## tpp camp draintheswamp email told
## 0.27 0.26 0.23 0.23 0.23
## video campaign
## 0.23 0.21
findAssocs(tdm, "america", 0.2)
## $america
## make great safe
## 0.56 0.39 0.25
If one is familiar with the themes surrouding the elections, these associated words should not come as a surprise.
Below are the most common topics from Trumps twitter account.
dtm <- as.DocumentTermMatrix(tdm)
rowTotals <- apply(dtm, 1, sum) #Find the sum of words in each Document
dtm.new <- dtm[rowTotals> 0, ] #remove all docs without words
lda <- LDA(dtm.new, k = 8) # find 8 topics
term <- terms(lda, 7) # first 7 terms of every topic
(term <- apply(term, MARGIN = 2, paste, collapse = ", "))
## Topic 1
## "time, news, people, fake, draintheswamp, great, vote"
## Topic 2
## "great, state, make, support, trump, people, clinton"
## Topic 3
## "back, vote, trump, join, clinton, tonight, people"
## Topic 4
## "bad, watch, join, pm, america, job, florida"
## Topic 5
## "media, make, elect, trump, countries, tonight, vote"
## Topic 6
## "great, people, trump, state, bad, pm, draintheswamp"
## Topic 7
## "clinton, elect, pm, cnn, draintheswamp, back, news"
## Topic 8
## "great, job, pm, support, people, join, watch"
topics <- topics(lda) # 1st topic identified for every document (tweet)
topics <- data.frame(date=as.Date(trump_tweets_df$created[1:length(topics)]), topic=topics)
ggplot(topics, aes(date, fill = term[topic])) +
geom_density(position = "stack")+
xlab("")+
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))
Below we can see the association of common words with each other.
tdm2 <- removeSparseTerms(tdm, sparse = 0.95)
m2 <- as.matrix(tdm2)
distMatrix <- dist(scale(m2))
fit <- hclust(distMatrix, method = "ward.D2")
dhc <- as.dendrogram(fit)
ddata <- dendro_data(dhc, type = "rectangle")
ggplot(segment(ddata)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend))+
geom_text(data = ddata$labels, aes(x = x, y = y, label = label), size = 3, vjust = 0, angle = 45) +
xlab("")+
ylab("")+
ggtitle("Dendrogram of words")+
theme(axis.line = element_line(), axis.text=element_text(color='black'), axis.title = element_text(colour = 'black'), legend.text=element_text(), legend.title=element_text(), legend.key = element_rect(colour = "black"))
Next I would like to consider Trumps friends from Twitter an their locations.
user <- getUser("realDonaldTrump")
user$toDataFrame()[1:5]
## description statusesCount
## 1 45th President of the United States of America 34517
## followersCount favoritesCount friendsCount
## 1 25261503 45 43
friends <- user$getFriends()
trim <- function(x){gsub("^\\s+|\\s+$", "", x)}
userLocation <- location(user)
friendsLocation <- sapply(friends, function(x){location(x)})
friendsLocation <- friendsLocation[friendsLocation != ""]
head(friendsLocation, 3)
## 471672239 20733972
## "Washington, DC" "Kenosha, WI and Washington, DC"
## 322293052
## "Malibu"
Here we have the lattitude and longtitude of each friend.
for(i in 1:length(friendsLocation)){
friendsLocation[[i]] <- ifelse(friendsLocation[[i]] %in% "Washington, DC", "DC", friendsLocation[[i]])
}
friendsLL <- matrix(NA, nrow = length(friendsLocation), ncol=2)
for(i in 1:length(friendsLocation)){
tmpLL <- geocode(strsplit(trim(friendsLocation[[i]]), ",")[[1]][1])
if(any(!is.na(tmpLL$lat))){
friendsLL[i,] = c(tmpLL$lat, tmpLL$lon)
}
}
friendsLL <- data.frame(friendsLL)
names(friendsLL) <- c("lat", "lon")
head(friendsLL, 3)
## lat lon
## 1 38.90719 -77.03687
## 2 42.58474 -87.82119
## 3 34.02592 -118.77976
Here is an interactive map of Trump, his friends and their locations.
friendsName <- sapply(friends, function(x){name(x)})
friendsScreenname <- sapply(friends, function(x){screenName(x)})
friendsName_df <- data.frame(friendsName)
friendsName_df$key <- row.names(friendsName_df)
friendsScreenname_df <- data.frame(friendsScreenname)
friendsScreenname_df$key <- row.names(friendsScreenname_df)
friendsLoc_df <- data.frame(friendsLocation)
friendsLoc_df$key <- row.names(friendsLoc_df)
friendsLoc_df <- cbind(friendsLoc_df, friendsLL)
friendsAll_df <- merge(friendsName_df, friendsLoc_df, by = "key")
friendsAll_df <- merge(friendsAll_df, friendsScreenname_df, by = "key")
temp <- geocode(strsplit(trim(location(user)), ",")[[1]][2])
user_df <- data.frame(key = 1, friendsName = name(user), friendsLocation = location(user),
lat = temp$lat, lon = temp$lon, friendsScreenname = screenName(user))
friendsAll_df <- rbind(user_df, friendsAll_df)
friendsAll_df$colors <- ifelse(friendsAll_df$friendsScreenname == "realDonaldTrump", "red", "blue")
leaflet(friendsAll_df) %>%
addTiles() %>% #Add default OpenStreetMap map tiles
addCircleMarkers(
lng=~lon, lat=~lat, popup=paste(sep = "<br/>",
"<b>Name</b>",
friendsAll_df$friendsName,
"<b>Screen Name</b>",
friendsAll_df$friendsScreenname,
"<b>Location</b>",
friendsAll_df$friendsLocation
),
radius = ~ifelse(friendsName == "Donald J. Trump", 10, 5),
color = ~colors,
stroke = FALSE,
fillOpacity = 0.5
)