library("twitteR")
## Warning: package 'twitteR' was built under R version 3.4.4
library("wordcloud")
## Warning: package 'wordcloud' was built under R version 3.4.4
## Loading required package: RColorBrewer
library("tm")
## Warning: package 'tm' was built under R version 3.4.4
## Loading required package: NLP
library("plyr")
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:twitteR':
## 
##     id
consumer_key='lDu8KP5dWReKsxBjydUe3z6mG'
consumer_secret='HdtmcIbXwP7AniiSmwvyDXYWaCb9hEvldcgz3ORiTJmlZkO0Md'
access_token ='842760312801710085-xNxMZjD0AOQVkgB8KcoFg4Rsztq0e4K'
access_secret='lqlaS64jhH5zn9HAIQ0iwVm0TupUUdEBffEz30sSKQIOF'
setup_twitter_oauth(consumer_key,consumer_secret,access_token,access_secret)
## [1] "Using direct authentication"
  1. What language do HU friends speak?
user <- getUser("HarrisburgU")
friends <- user$getFriends()
friends_df <- twListToDF(friends)
save(friends_df, file = "hu_friends.RData")
friends_df$lang
##   [1] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
##  [15] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
##  [29] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
##  [43] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
##  [57] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
##  [71] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
##  [85] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
##  [99] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [113] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [127] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [141] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
  1. Draw the distribution of friends
hist(friends_df$friendsCount)

3.How active HU friends are?

mean(friends_df$statusesCount)
## [1] 20221.69
  1. Who are my followers with the biggest network and who tweet the most?
followers <- data.frame(friends_df$name, friends_df$followersCount)
followers_ranked <- followers[order(-friends_df$followersCount),]
followers_ranked
##                                       friends_df.name
## 140                                              NASA
## 146                                          Bill Nye
## 128                               Scientific American
## 141                                     New Scientist
## 132                                   Science Channel
## 129                                      Science News
## 131                                     NASA HQ PHOTO
## 147                                     WIRED Science
## 136                                   Popular Science
## 113                       National Science Foundation
## 125                              Reuters Science News
## 4                                           VICE News
## 150                                    Science Friday
## 130                                   NASA Technology
## 127                                            Seeker
## 144                              The NPR Science Desk
## 126                                      ScienceDaily
## 62                               Data Science Central
## 91                                               WGAL
## 121                               Help a Reporter Out
## 145                                      PennLive.com
## 21                                         abc27 WHTM
## 124                                        WPMT FOX43
## 33                                  Governor Tom Wolf
## 3                                 Philadelphia Fusion
## 138                                       CBS 21 News
## 97                                   The Patriot-News
## 79                              LNP | LancasterOnline
## 20                                         Kumar Garg
## 31                                      Chuck Russell
## 133                                     Daniel Victor
## 55                                          WITF news
## 10                                   Chad Smeltz @ E3
## 94                               Anne Deeter Gallaher
## 27                                              pcntv
## 16                                       Megan Healey
## 8                                        ByJohnLMicek
## 87                                        Sara Bozich
## 29                                               CPBJ
## 26                                               WITF
## 143                                           TheBurg
## 12  PA Department of Community & Economic Development
## 11                                       Dennis Owens
## 7                                        Brad Bumsted
## 137                                    Dauphin County
## 23                                               HACC
## 83                                Vineyard at Hershey
## 47                                         Jan Murphy
## 77                                    Whitaker Center
## 61                                       Roxbury News
## 135                                   Midtown Scholar
## 123                                    Eric Papenfuse
## 43                                    Dave Marcheskie
## 118                                City of Harrisburg
## 50                                          Christian
## 114                                    Julia Hatmaker
## 2                                     Code for Philly
## 84                                Broad Street Market
## 115                                    Rep. Patty Kim
## 81                                 Brewery at Hershey
## 60                                                HYP
## 76                                     Lara Greenberg
## 134                                         Patty Kim
## 56                                         harrisburg
## 139                               Historic Harrisburg
## 120                                   Larry Portzline
## 66                                     Midtown Cinema
## 90                                         Dan Christ
## 68                                        Jason Scott
## 49                                   Charles Thompson
## 86                                AmyRichardsHarinath
## 22                                                SKI
## 48                                        Sue Gleiter
## 58                                    Chamber & CREDC
## 19                                         Ryan Group
## 42                  Dauphin County Parks & Recreation
## 92                                        Merlot Mike
## 9                                       Emily Previti
## 149                                    Eric Veronikis
## 102                                  Donald Gilliland
## 112                                      Sean Simmers
## 44                                 Friends of Midtown
## 100                                Stephanie Sadowski
## 52                                   Wallace McKelvey
## 98                                   Christine Vendel
## 34                                     Barbara Miller
## 116   Real_Dave_La_Torre <f0><U+009F><U+008D><U+00BF>
## 41                                    Brian Enterline
## 28                                Nell McCormack Abom
## 63                                      The Millworks
## 106                                        Mark Pynes
## 53                                    Wesley Robinson
## 148          Harrisburg Downtown Improvement District
## 151                                     Andy Petroski
## 30                                    Daniel Zampogna
## 32                               Harrisburg Area YMCA
## 89                                Harrisburg Symphony
## 110                                    Nick Malawskey
## 82                           Open Stage of Harrisburg
## 108                                      Ivey DeJesus
## 105                                       John Luciew
## 24                                     James Robinson
## 111                                  Julianne Mattera
## 40                                     Lucy M. Gnazzo
## 109                                       Dan Gleiter
## 101                                  Brian J. Ostella
## 57                                    Bike Harrisburg
## 64                                      Candy Woodall
## 14                                        NAC Esports
## 96                                   Art Assoc of Hbg
## 67                                               HMAC
## 45                               Capital Region Water
## 104                                        mikefeeley
## 122                                   Bigley & Blikle
## 103                                      David Wenner
## 15                                       John Quigley
## 71                                      LeighAnnUrban
## 80                                 YWCA of Harrisburg
## 5                                                 PAR
## 25                                              abc27
## 18                                       Colin Deppen
## 13                                      [CSL] Theresa
## 36                                            st@rtup
## 35                                           J.C. Lee
## 88                                         Rich Hauck
## 70                                        joelbYorkPA
## 85                                        Lisa Wardle
## 95                                        Cate Barron
## 107                                         Deb Kiner
## 46                               Alan Kennedy-Shaffer
## 65                                     Ashley Lucente
## 69                                     Charles Palmer
## 38                                         Adam Klein
## 54                                  Matthew S. Miller
## 51                                      Steve Marroni
## 119                               Marcus Lingenfelter
## 37                               GreenUrbanInitiative
## 1                                Cpt. Gabriel Olivera
## 59                                              TCRPC
## 93                                     Lara Greenberg
## 78                                 Vartan Group, Inc.
## 72                                     JEM Group, LLC
## 39                                               CAGA
## 142                               Nicole Borda Conway
## 6                                Tech Forward Podcast
## 99                                     Jeanette Krebs
## 73                                       Ralph Vartan
## 75                                   Marco Echevarria
## 74                                        Cafe Fresco
## 152                                  Harrisburg Univ.
## 117                                         counselor
## 17                                        Brian Myers
##     friends_df.followersCount
## 140                  29614066
## 146                   5838393
## 128                   3704680
## 141                   3430579
## 132                   2949046
## 129                   2838799
## 131                   2572979
## 147                   2100946
## 136                   1271986
## 113                   1099342
## 125                    957853
## 4                      900635
## 150                    784071
## 130                    659955
## 127                    410405
## 144                    256449
## 126                    226090
## 62                     185061
## 91                     125734
## 121                    120330
## 145                    110823
## 21                     102644
## 124                     80810
## 33                      77909
## 3                       65941
## 138                     61903
## 97                      59290
## 79                      49318
## 20                      41463
## 31                      32884
## 133                     23281
## 55                      20529
## 10                      12627
## 94                      12278
## 27                      11562
## 16                      10883
## 8                       10187
## 87                       9690
## 29                       9573
## 26                       7686
## 143                      7467
## 12                       6439
## 11                       6342
## 7                        6094
## 137                      5782
## 23                       5567
## 83                       5486
## 47                       4994
## 77                       4824
## 61                       4803
## 135                      4802
## 123                      4644
## 43                       4485
## 118                      4318
## 50                       4159
## 114                      4110
## 2                        4082
## 84                       3936
## 115                      3911
## 81                       3790
## 60                       3522
## 76                       3503
## 134                      3443
## 56                       3328
## 139                      3284
## 120                      3243
## 66                       2998
## 90                       2869
## 68                       2723
## 49                       2600
## 86                       2581
## 22                       2562
## 48                       2549
## 58                       2458
## 19                       2388
## 42                       2347
## 92                       2329
## 9                        2069
## 149                      2066
## 102                      2055
## 112                      2006
## 44                       1924
## 100                      1913
## 52                       1900
## 98                       1885
## 34                       1857
## 116                      1832
## 41                       1802
## 28                       1753
## 63                       1746
## 106                      1687
## 53                       1643
## 148                      1609
## 151                      1603
## 30                       1599
## 32                       1568
## 89                       1535
## 110                      1520
## 82                       1516
## 108                      1514
## 105                      1507
## 24                       1483
## 111                      1463
## 40                       1456
## 109                      1429
## 101                      1402
## 57                       1316
## 64                       1301
## 14                       1288
## 96                       1271
## 67                       1256
## 45                       1221
## 104                      1207
## 122                      1193
## 103                      1134
## 15                       1125
## 71                       1081
## 80                       1007
## 5                        1004
## 25                        991
## 18                        972
## 13                        914
## 36                        864
## 35                        859
## 88                        809
## 70                        796
## 85                        785
## 95                        769
## 107                       745
## 46                        744
## 65                        714
## 69                        677
## 38                        643
## 54                        636
## 51                        608
## 119                       568
## 37                        564
## 1                         545
## 59                        535
## 93                        531
## 78                        522
## 72                        478
## 39                        462
## 142                       462
## 6                         445
## 99                        440
## 73                        373
## 75                        254
## 74                        246
## 152                       216
## 117                       136
## 17                         73

5.Is there a correlation between number of followers and number of tweets?

plot(friends_df$followersCount, friends_df$statusesCount)

  1. What are the most commonly used words in HU friends followers’ description?
library(tidytext)
## Warning: package 'tidytext' was built under R version 3.4.4
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.4
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:twitteR':
## 
##     id, location
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
## 
##     annotate
data("stop_words")
tidy_descr = unnest_tokens(friends_df, word, description)
new_descr = anti_join(tidy_descr, stop_words, by="word")
best_descr = filter(new_descr, !grepl("\\.|http", word))
## Warning: package 'bindrcpp' was built under R version 3.4.4
wscount = as.data.frame(table(best_descr$word))
colnames(wscount) = c("word", "count")
wscount_t10 = wscount[order(wscount$count, decreasing = T),][1:10,]
ggplot(wscount_t10, aes(x = reorder(word, count), y = count)) +
  geom_col(color = "blue") +
  theme_light() +
  coord_flip() +
  labs(x = "", y = "Commonly Used words in HU friens followers' description")