library("twitteR")
## Warning: package 'twitteR' was built under R version 3.4.4
library("wordcloud")
## Warning: package 'wordcloud' was built under R version 3.4.4
## Loading required package: RColorBrewer
library("tm")
## Warning: package 'tm' was built under R version 3.4.4
## Loading required package: NLP
library("plyr")
##
## Attaching package: 'plyr'
## The following object is masked from 'package:twitteR':
##
## id
consumer_key='lDu8KP5dWReKsxBjydUe3z6mG'
consumer_secret='HdtmcIbXwP7AniiSmwvyDXYWaCb9hEvldcgz3ORiTJmlZkO0Md'
access_token ='842760312801710085-xNxMZjD0AOQVkgB8KcoFg4Rsztq0e4K'
access_secret='lqlaS64jhH5zn9HAIQ0iwVm0TupUUdEBffEz30sSKQIOF'
setup_twitter_oauth(consumer_key,consumer_secret,access_token,access_secret)
## [1] "Using direct authentication"
user <- getUser("HarrisburgU")
friends <- user$getFriends()
friends_df <- twListToDF(friends)
save(friends_df, file = "hu_friends.RData")
friends_df$lang
## [1] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [15] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [29] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [43] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [57] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [71] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [85] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [99] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [113] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [127] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
## [141] "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en" "en"
hist(friends_df$friendsCount)
3.How active HU friends are?
mean(friends_df$statusesCount)
## [1] 20221.69
followers <- data.frame(friends_df$name, friends_df$followersCount)
followers_ranked <- followers[order(-friends_df$followersCount),]
followers_ranked
## friends_df.name
## 140 NASA
## 146 Bill Nye
## 128 Scientific American
## 141 New Scientist
## 132 Science Channel
## 129 Science News
## 131 NASA HQ PHOTO
## 147 WIRED Science
## 136 Popular Science
## 113 National Science Foundation
## 125 Reuters Science News
## 4 VICE News
## 150 Science Friday
## 130 NASA Technology
## 127 Seeker
## 144 The NPR Science Desk
## 126 ScienceDaily
## 62 Data Science Central
## 91 WGAL
## 121 Help a Reporter Out
## 145 PennLive.com
## 21 abc27 WHTM
## 124 WPMT FOX43
## 33 Governor Tom Wolf
## 3 Philadelphia Fusion
## 138 CBS 21 News
## 97 The Patriot-News
## 79 LNP | LancasterOnline
## 20 Kumar Garg
## 31 Chuck Russell
## 133 Daniel Victor
## 55 WITF news
## 10 Chad Smeltz @ E3
## 94 Anne Deeter Gallaher
## 27 pcntv
## 16 Megan Healey
## 8 ByJohnLMicek
## 87 Sara Bozich
## 29 CPBJ
## 26 WITF
## 143 TheBurg
## 12 PA Department of Community & Economic Development
## 11 Dennis Owens
## 7 Brad Bumsted
## 137 Dauphin County
## 23 HACC
## 83 Vineyard at Hershey
## 47 Jan Murphy
## 77 Whitaker Center
## 61 Roxbury News
## 135 Midtown Scholar
## 123 Eric Papenfuse
## 43 Dave Marcheskie
## 118 City of Harrisburg
## 50 Christian
## 114 Julia Hatmaker
## 2 Code for Philly
## 84 Broad Street Market
## 115 Rep. Patty Kim
## 81 Brewery at Hershey
## 60 HYP
## 76 Lara Greenberg
## 134 Patty Kim
## 56 harrisburg
## 139 Historic Harrisburg
## 120 Larry Portzline
## 66 Midtown Cinema
## 90 Dan Christ
## 68 Jason Scott
## 49 Charles Thompson
## 86 AmyRichardsHarinath
## 22 SKI
## 48 Sue Gleiter
## 58 Chamber & CREDC
## 19 Ryan Group
## 42 Dauphin County Parks & Recreation
## 92 Merlot Mike
## 9 Emily Previti
## 149 Eric Veronikis
## 102 Donald Gilliland
## 112 Sean Simmers
## 44 Friends of Midtown
## 100 Stephanie Sadowski
## 52 Wallace McKelvey
## 98 Christine Vendel
## 34 Barbara Miller
## 116 Real_Dave_La_Torre <f0><U+009F><U+008D><U+00BF>
## 41 Brian Enterline
## 28 Nell McCormack Abom
## 63 The Millworks
## 106 Mark Pynes
## 53 Wesley Robinson
## 148 Harrisburg Downtown Improvement District
## 151 Andy Petroski
## 30 Daniel Zampogna
## 32 Harrisburg Area YMCA
## 89 Harrisburg Symphony
## 110 Nick Malawskey
## 82 Open Stage of Harrisburg
## 108 Ivey DeJesus
## 105 John Luciew
## 24 James Robinson
## 111 Julianne Mattera
## 40 Lucy M. Gnazzo
## 109 Dan Gleiter
## 101 Brian J. Ostella
## 57 Bike Harrisburg
## 64 Candy Woodall
## 14 NAC Esports
## 96 Art Assoc of Hbg
## 67 HMAC
## 45 Capital Region Water
## 104 mikefeeley
## 122 Bigley & Blikle
## 103 David Wenner
## 15 John Quigley
## 71 LeighAnnUrban
## 80 YWCA of Harrisburg
## 5 PAR
## 25 abc27
## 18 Colin Deppen
## 13 [CSL] Theresa
## 36 st@rtup
## 35 J.C. Lee
## 88 Rich Hauck
## 70 joelbYorkPA
## 85 Lisa Wardle
## 95 Cate Barron
## 107 Deb Kiner
## 46 Alan Kennedy-Shaffer
## 65 Ashley Lucente
## 69 Charles Palmer
## 38 Adam Klein
## 54 Matthew S. Miller
## 51 Steve Marroni
## 119 Marcus Lingenfelter
## 37 GreenUrbanInitiative
## 1 Cpt. Gabriel Olivera
## 59 TCRPC
## 93 Lara Greenberg
## 78 Vartan Group, Inc.
## 72 JEM Group, LLC
## 39 CAGA
## 142 Nicole Borda Conway
## 6 Tech Forward Podcast
## 99 Jeanette Krebs
## 73 Ralph Vartan
## 75 Marco Echevarria
## 74 Cafe Fresco
## 152 Harrisburg Univ.
## 117 counselor
## 17 Brian Myers
## friends_df.followersCount
## 140 29614066
## 146 5838393
## 128 3704680
## 141 3430579
## 132 2949046
## 129 2838799
## 131 2572979
## 147 2100946
## 136 1271986
## 113 1099342
## 125 957853
## 4 900635
## 150 784071
## 130 659955
## 127 410405
## 144 256449
## 126 226090
## 62 185061
## 91 125734
## 121 120330
## 145 110823
## 21 102644
## 124 80810
## 33 77909
## 3 65941
## 138 61903
## 97 59290
## 79 49318
## 20 41463
## 31 32884
## 133 23281
## 55 20529
## 10 12627
## 94 12278
## 27 11562
## 16 10883
## 8 10187
## 87 9690
## 29 9573
## 26 7686
## 143 7467
## 12 6439
## 11 6342
## 7 6094
## 137 5782
## 23 5567
## 83 5486
## 47 4994
## 77 4824
## 61 4803
## 135 4802
## 123 4644
## 43 4485
## 118 4318
## 50 4159
## 114 4110
## 2 4082
## 84 3936
## 115 3911
## 81 3790
## 60 3522
## 76 3503
## 134 3443
## 56 3328
## 139 3284
## 120 3243
## 66 2998
## 90 2869
## 68 2723
## 49 2600
## 86 2581
## 22 2562
## 48 2549
## 58 2458
## 19 2388
## 42 2347
## 92 2329
## 9 2069
## 149 2066
## 102 2055
## 112 2006
## 44 1924
## 100 1913
## 52 1900
## 98 1885
## 34 1857
## 116 1832
## 41 1802
## 28 1753
## 63 1746
## 106 1687
## 53 1643
## 148 1609
## 151 1603
## 30 1599
## 32 1568
## 89 1535
## 110 1520
## 82 1516
## 108 1514
## 105 1507
## 24 1483
## 111 1463
## 40 1456
## 109 1429
## 101 1402
## 57 1316
## 64 1301
## 14 1288
## 96 1271
## 67 1256
## 45 1221
## 104 1207
## 122 1193
## 103 1134
## 15 1125
## 71 1081
## 80 1007
## 5 1004
## 25 991
## 18 972
## 13 914
## 36 864
## 35 859
## 88 809
## 70 796
## 85 785
## 95 769
## 107 745
## 46 744
## 65 714
## 69 677
## 38 643
## 54 636
## 51 608
## 119 568
## 37 564
## 1 545
## 59 535
## 93 531
## 78 522
## 72 478
## 39 462
## 142 462
## 6 445
## 99 440
## 73 373
## 75 254
## 74 246
## 152 216
## 117 136
## 17 73
5.Is there a correlation between number of followers and number of tweets?
plot(friends_df$followersCount, friends_df$statusesCount)
library(tidytext)
## Warning: package 'tidytext' was built under R version 3.4.4
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.4
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
data("stop_words")
tidy_descr = unnest_tokens(friends_df, word, description)
new_descr = anti_join(tidy_descr, stop_words, by="word")
best_descr = filter(new_descr, !grepl("\\.|http", word))
## Warning: package 'bindrcpp' was built under R version 3.4.4
wscount = as.data.frame(table(best_descr$word))
colnames(wscount) = c("word", "count")
wscount_t10 = wscount[order(wscount$count, decreasing = T),][1:10,]
ggplot(wscount_t10, aes(x = reorder(word, count), y = count)) +
geom_col(color = "blue") +
theme_light() +
coord_flip() +
labs(x = "", y = "Commonly Used words in HU friens followers' description")