library(twitteR)
library(wordcloud)
## Loading required package: RColorBrewer
library(tm)
## Loading required package: NLP
library(plyr)
##
## Attaching package: 'plyr'
## The following object is masked from 'package:twitteR':
##
## id
consumer_key <- 'CuoGiIjsVgQBbPtwbVzRurIGg'
consumer_secret <- 'tzIh218ZqfbULfvUS60NpRHH4RK9S2Q8eUfuTx95J8w5IoF0L8'
access_token <- '3002069207-u48Lnej3tjFB8D3csqVkn1XF0UM27DwAajLcQpT'
access_secret <- 'p7buxOuqu3u2rdzswaUQX2oLsW66akvVAxSVb4yN8jbiY'
setup_twitter_oauth(consumer_key,consumer_secret,access_token,access_secret)
## [1] "Using direct authentication"
user <- getUser("HarrisburgU")
friends <- user$getFriends() # who HU follows
friends_df <- twListToDF(friends)
save(friends_df, file = "hu_friends.RData")
followers <- user$getFollowers() # HU followers
followers_df <- twListToDF(followers)
save(followers_df, file = "hu_followers.RData")
unique(friends_df$lang)
## [1] "en"
#They speak in English
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(friends_df)
## Observations: 152
## Variables: 17
## $ description <chr> "", "Philadelphia's @CodeforAmerica Brigade....
## $ statusesCount <dbl> 986, 6856, 1926, 81787, 1593, 205, 4330, 420...
## $ followersCount <dbl> 544, 4083, 66043, 900690, 1004, 446, 6096, 1...
## $ favoritesCount <dbl> 450, 6258, 1353, 2890, 983, 210, 18, 2467, 5...
## $ friendsCount <dbl> 322, 995, 76, 667, 829, 530, 2504, 3984, 139...
## $ url <chr> NA, "https://t.co/8gPJQLnWb2", "https://t.co...
## $ name <chr> "Cpt. Gabriel Olivera", "Code for Philly", "...
## $ created <dttm> 2015-04-08 22:15:14, 2013-01-04 18:32:46, 2...
## $ protected <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA...
## $ verified <lgl> FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALS...
## $ screenName <chr> "HPDDetectives", "CodeForPhilly", "PHL_Fusio...
## $ location <chr> "City of Harrisburg", "Philadelphia, PA", ""...
## $ lang <chr> "en", "en", "en", "en", "en", "en", "en", "e...
## $ id <chr> "3149497701", "1061105220", "907193396049182...
## $ listedCount <dbl> 9, 223, 434, 10930, 44, 4, 256, 444, 99, 91,...
## $ followRequestSent <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA...
## $ profileImageUrl <chr> "http://pbs.twimg.com/profile_images/6814983...
plot(density(friends_df$friendsCount))
mean(friends_df$statusesCount)
## [1] 20230.5
length(which(friends_df$statusesCount>20231))/ length(friends_df$statusesCount)*100
## [1] 24.34211
#24.5% of hu friends are active more than average status of all friends
max(friends_df$followersCount)
## [1] 29619738
mostactive = friends_df$name[which( friends_df$followersCount== 29619169 )]
mostactive
## character(0)
#Nasa has thE most followers
max(friends_df$statusesCount)
## [1] 210281
mosttweet = friends_df$name[which(friends_df$statusesCount== 210276)]
mosttweet
## character(0)
#WGAL has the most tweets
cor(friends_df$followersCount,friends_df$statusesCount)
## [1] 0.09621023
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:wordcloud':
##
## textplot
## The following object is masked from 'package:graphics':
##
## legend
my_data <- friends_df[, c(2,3)]
chart.Correlation(my_data, histogram=TRUE, pch=19)
#There is correlation between them
(Tidytext package)
r_stats_text_corpus = Corpus(VectorSource(friends_df$description))#create corpus - Constructs a text document collection (corpus)
r_stats_text_corpus = tm_map(r_stats_text_corpus, function(x) iconv(enc2utf8(x), sub = "byte"))
## Warning in tm_map.SimpleCorpus(r_stats_text_corpus, function(x)
## iconv(enc2utf8(x), : transformation drops documents
r_stats_text_corpus = tm_map(r_stats_text_corpus, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(r_stats_text_corpus,
## content_transformer(tolower)): transformation drops documents
r_stats_text_corpus = tm_map(r_stats_text_corpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(r_stats_text_corpus, removePunctuation):
## transformation drops documents
r_stats_text_corpus = tm_map(r_stats_text_corpus, function(x)removeWords(x,stopwords()))
## Warning in tm_map.SimpleCorpus(r_stats_text_corpus, function(x)
## removeWords(x, : transformation drops documents
wordcloud(r_stats_text_corpus)
## Warning in wordcloud(r_stats_text_corpus): patriotnews could not be fit on
## page. It will not be plotted.
## Warning in wordcloud(r_stats_text_corpus): pennsylvanias could not be fit
## on page. It will not be plotted.
## Warning in wordcloud(r_stats_text_corpus): pennlive could not be fit on
## page. It will not be plotted.