library(twitteR)
library(wordcloud)
## Loading required package: RColorBrewer
library(tm)
## Loading required package: NLP
library(plyr)
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:twitteR':
## 
##     id
consumer_key <-  'CuoGiIjsVgQBbPtwbVzRurIGg'

consumer_secret <- 'tzIh218ZqfbULfvUS60NpRHH4RK9S2Q8eUfuTx95J8w5IoF0L8'

access_token <- '3002069207-u48Lnej3tjFB8D3csqVkn1XF0UM27DwAajLcQpT'

access_secret <- 'p7buxOuqu3u2rdzswaUQX2oLsW66akvVAxSVb4yN8jbiY'

setup_twitter_oauth(consumer_key,consumer_secret,access_token,access_secret)
## [1] "Using direct authentication"
user <- getUser("HarrisburgU")
friends <- user$getFriends() # who HU follows
friends_df <- twListToDF(friends)
save(friends_df, file = "hu_friends.RData")
followers <- user$getFollowers() # HU followers
followers_df <- twListToDF(followers)
save(followers_df, file = "hu_followers.RData")

1.What language do HU friends speak?

unique(friends_df$lang)
## [1] "en"
#They speak in English

2.Draw the distribution of friends

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:twitteR':
## 
##     id, location
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
glimpse(friends_df)
## Observations: 152
## Variables: 17
## $ description       <chr> "", "Philadelphia's @CodeforAmerica Brigade....
## $ statusesCount     <dbl> 986, 6856, 1926, 81787, 1593, 205, 4330, 420...
## $ followersCount    <dbl> 544, 4083, 66043, 900690, 1004, 446, 6096, 1...
## $ favoritesCount    <dbl> 450, 6258, 1353, 2890, 983, 210, 18, 2467, 5...
## $ friendsCount      <dbl> 322, 995, 76, 667, 829, 530, 2504, 3984, 139...
## $ url               <chr> NA, "https://t.co/8gPJQLnWb2", "https://t.co...
## $ name              <chr> "Cpt. Gabriel Olivera", "Code for Philly", "...
## $ created           <dttm> 2015-04-08 22:15:14, 2013-01-04 18:32:46, 2...
## $ protected         <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA...
## $ verified          <lgl> FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALS...
## $ screenName        <chr> "HPDDetectives", "CodeForPhilly", "PHL_Fusio...
## $ location          <chr> "City of Harrisburg", "Philadelphia, PA", ""...
## $ lang              <chr> "en", "en", "en", "en", "en", "en", "en", "e...
## $ id                <chr> "3149497701", "1061105220", "907193396049182...
## $ listedCount       <dbl> 9, 223, 434, 10930, 44, 4, 256, 444, 99, 91,...
## $ followRequestSent <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA...
## $ profileImageUrl   <chr> "http://pbs.twimg.com/profile_images/6814983...
plot(density(friends_df$friendsCount))

3.How active HU friends are?

mean(friends_df$statusesCount)
## [1] 20230.5
length(which(friends_df$statusesCount>20231))/ length(friends_df$statusesCount)*100
## [1] 24.34211
#24.5% of hu friends are active more than average status of all friends

4.Who are my followers with the biggest network and who tweet the most?

max(friends_df$followersCount)
## [1] 29619738
mostactive = friends_df$name[which( friends_df$followersCount== 29619169 )]
mostactive
## character(0)
#Nasa has thE most followers
max(friends_df$statusesCount)
## [1] 210281
mosttweet = friends_df$name[which(friends_df$statusesCount== 210276)]
mosttweet
## character(0)
#WGAL has the most tweets

5.Is there a correlation between number of followers and number of tweets?

cor(friends_df$followersCount,friends_df$statusesCount) 
## [1] 0.09621023

OR

library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:wordcloud':
## 
##     textplot
## The following object is masked from 'package:graphics':
## 
##     legend
my_data <- friends_df[, c(2,3)]
chart.Correlation(my_data, histogram=TRUE, pch=19)

#There is correlation between them

6.What are the most commonly used words in HU friends followers descriptions?

(Tidytext package)

r_stats_text_corpus = Corpus(VectorSource(friends_df$description))#create corpus - Constructs a text document collection (corpus)
r_stats_text_corpus = tm_map(r_stats_text_corpus, function(x) iconv(enc2utf8(x), sub = "byte"))
## Warning in tm_map.SimpleCorpus(r_stats_text_corpus, function(x)
## iconv(enc2utf8(x), : transformation drops documents
r_stats_text_corpus = tm_map(r_stats_text_corpus, content_transformer(tolower)) 
## Warning in tm_map.SimpleCorpus(r_stats_text_corpus,
## content_transformer(tolower)): transformation drops documents
r_stats_text_corpus = tm_map(r_stats_text_corpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(r_stats_text_corpus, removePunctuation):
## transformation drops documents
r_stats_text_corpus = tm_map(r_stats_text_corpus, function(x)removeWords(x,stopwords()))
## Warning in tm_map.SimpleCorpus(r_stats_text_corpus, function(x)
## removeWords(x, : transformation drops documents
wordcloud(r_stats_text_corpus)
## Warning in wordcloud(r_stats_text_corpus): patriotnews could not be fit on
## page. It will not be plotted.
## Warning in wordcloud(r_stats_text_corpus): pennsylvanias could not be fit
## on page. It will not be plotted.
## Warning in wordcloud(r_stats_text_corpus): pennlive could not be fit on
## page. It will not be plotted.