library(twitteR)
library(wordcloud)

## Loading required package: RColorBrewer

library(tm)

## Loading required package: NLP

library(plyr)

## 
## Attaching package: 'plyr'

## The following object is masked from 'package:twitteR':
## 
##     id

consumer_key <-  'CuoGiIjsVgQBbPtwbVzRurIGg'

consumer_secret <- 'tzIh218ZqfbULfvUS60NpRHH4RK9S2Q8eUfuTx95J8w5IoF0L8'

access_token <- '3002069207-u48Lnej3tjFB8D3csqVkn1XF0UM27DwAajLcQpT'

access_secret <- 'p7buxOuqu3u2rdzswaUQX2oLsW66akvVAxSVb4yN8jbiY'

setup_twitter_oauth(consumer_key,consumer_secret,access_token,access_secret)

## [1] "Using direct authentication"

user <- getUser("HarrisburgU")
friends <- user$getFriends() # who HU follows
friends_df <- twListToDF(friends)
save(friends_df, file = "hu_friends.RData")

followers <- user$getFollowers() # HU followers
followers_df <- twListToDF(followers)
save(followers_df, file = "hu_followers.RData")

1.What language do HU friends speak?

unique(friends_df$lang)

## [1] "en"

#They speak in English

2.Draw the distribution of friends

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following objects are masked from 'package:twitteR':
## 
##     id, location

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

glimpse(friends_df)

## Observations: 152
## Variables: 17
## $ description       <chr> "", "Philadelphia's @CodeforAmerica Brigade....
## $ statusesCount     <dbl> 986, 6856, 1926, 81787, 1593, 205, 4330, 420...
## $ followersCount    <dbl> 544, 4083, 66043, 900690, 1004, 446, 6096, 1...
## $ favoritesCount    <dbl> 450, 6258, 1353, 2890, 983, 210, 18, 2467, 5...
## $ friendsCount      <dbl> 322, 995, 76, 667, 829, 530, 2504, 3984, 139...
## $ url               <chr> NA, "https://t.co/8gPJQLnWb2", "https://t.co...
## $ name              <chr> "Cpt. Gabriel Olivera", "Code for Philly", "...
## $ created           <dttm> 2015-04-08 22:15:14, 2013-01-04 18:32:46, 2...
## $ protected         <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA...
## $ verified          <lgl> FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALS...
## $ screenName        <chr> "HPDDetectives", "CodeForPhilly", "PHL_Fusio...
## $ location          <chr> "City of Harrisburg", "Philadelphia, PA", ""...
## $ lang              <chr> "en", "en", "en", "en", "en", "en", "en", "e...
## $ id                <chr> "3149497701", "1061105220", "907193396049182...
## $ listedCount       <dbl> 9, 223, 434, 10930, 44, 4, 256, 444, 99, 91,...
## $ followRequestSent <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA...
## $ profileImageUrl   <chr> "http://pbs.twimg.com/profile_images/6814983...

plot(density(friends_df$friendsCount))

3.How active HU friends are?

mean(friends_df$statusesCount)

## [1] 20230.5

length(which(friends_df$statusesCount>20231))/ length(friends_df$statusesCount)*100

## [1] 24.34211

#24.5% of hu friends are active more than average status of all friends

4.Who are my followers with the biggest network and who tweet the most?

max(friends_df$followersCount)

## [1] 29619738

mostactive = friends_df$name[which( friends_df$followersCount== 29619169 )]
mostactive

## character(0)

#Nasa has thE most followers

max(friends_df$statusesCount)

## [1] 210281

mosttweet = friends_df$name[which(friends_df$statusesCount== 210276)]
mosttweet

## character(0)

#WGAL has the most tweets

5.Is there a correlation between number of followers and number of tweets?

cor(friends_df$followersCount,friends_df$statusesCount)

## [1] 0.09621023

OR

library(PerformanceAnalytics)

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

## 
## Attaching package: 'PerformanceAnalytics'

## The following object is masked from 'package:wordcloud':
## 
##     textplot

## The following object is masked from 'package:graphics':
## 
##     legend

my_data <- friends_df[, c(2,3)]
chart.Correlation(my_data, histogram=TRUE, pch=19)

#There is correlation between them

6.What are the most commonly used words in HU friends followers descriptions?

(Tidytext package)

r_stats_text_corpus = Corpus(VectorSource(friends_df$description))#create corpus - Constructs a text document collection (corpus)
r_stats_text_corpus = tm_map(r_stats_text_corpus, function(x) iconv(enc2utf8(x), sub = "byte"))

## Warning in tm_map.SimpleCorpus(r_stats_text_corpus, function(x)
## iconv(enc2utf8(x), : transformation drops documents

r_stats_text_corpus = tm_map(r_stats_text_corpus, content_transformer(tolower))

## Warning in tm_map.SimpleCorpus(r_stats_text_corpus,
## content_transformer(tolower)): transformation drops documents

r_stats_text_corpus = tm_map(r_stats_text_corpus, removePunctuation)

## Warning in tm_map.SimpleCorpus(r_stats_text_corpus, removePunctuation):
## transformation drops documents

r_stats_text_corpus = tm_map(r_stats_text_corpus, function(x)removeWords(x,stopwords()))

## Warning in tm_map.SimpleCorpus(r_stats_text_corpus, function(x)
## removeWords(x, : transformation drops documents

wordcloud(r_stats_text_corpus)

## Warning in wordcloud(r_stats_text_corpus): patriotnews could not be fit on
## page. It will not be plotted.

## Warning in wordcloud(r_stats_text_corpus): pennsylvanias could not be fit
## on page. It will not be plotted.

## Warning in wordcloud(r_stats_text_corpus): pennlive could not be fit on
## page. It will not be plotted.

lab#2

Group1

June 23, 2018

1.What language do HU friends speak?

2.Draw the distribution of friends

3.How active HU friends are?

4.Who are my followers with the biggest network and who tweet the most?

5.Is there a correlation between number of followers and number of tweets?

OR

6.What are the most commonly used words in HU friends followers descriptions?