if (!require("rtweet")) install.packages("rtweet", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)

library(rtweet)  # load the required libraries
library(plotly)
library(ggmap)
library(rworldmap)
library(wordcloud)
library(tm)

1. @JMSCHKU

# Twitter Handle Profile 
jmsc_handle <- lookup_users("JMSCHKU")
jmsc_handle$name # Name of the user
## [1] "JMSC"
jmsc_handle$followers_count # Followers count
## [1] 3391
jmsc_handle$description # User's description
## [1] "The Journalism and Media Studies Centre of The University of Hong Kong is Asia's premier journalism school."
jmsc_handle$location # Location
## [1] "HKU, Hong Kong"
jmsc_handle$statuses_count # Total number of posts 
## [1] 5989
jmsc_handle$account_created_at # Date of account created
## [1] "2009-11-12 10:02:33 UTC"
# Time since created
duration <- difftime(Sys.time(),jmsc_handle$account_created_at,units='days') 
duration
## Time difference of 3069.694 days
# Average no. of posts per day 
jmsc_handle$statuses_count/as.numeric(duration)
## [1] 1.951009
# Obtain the list of @JMSCHKU's Followers
jmsc_folls <- get_followers("JMSCHKU", n = 5000)
head(jmsc_folls)  # First 6 followers
## # A tibble: 6 x 1
##   user_id           
##   <chr>             
## 1 983139092522196992
## 2 983046779632418816
## 3 983028214040707072
## 4 958262990435794944
## 5 983001119499567104
## 6 982997640907341824
jmsc_folls_data <- lookup_users(jmsc_folls$user_id) # Obtain the followers' profile
jmsc_folls_data[1,]$screen_name # Show first follower's screen name
## [1] "EEopM91F0JnnwCq"
jmsc_folls_data[1,]$location # location
## [1] ""
## Display Top 10 Locations of the followers
class(jmsc_folls_data$location) # Check the data class of location
## [1] "character"
sort(table(jmsc_folls_data$location),decreasing = TRUE)[1:20]  
## 
##                                             Hong Kong 
##                       1121                        721 
##                    Beijing                   Shanghai 
##                         50                         29 
##               New York, NY Central & Western District 
##                         26                         23 
##                 Hong Kong                   Singapore 
##                         19                         18 
##                      China                     London 
##                         17                         17 
##           New Delhi, India              Paris, France 
##                         15                         15 
## People's Republic of China             Washington, DC 
##                         14                         14 
##            London, England             Beijing, China 
##                         13                         11 
##                         HK              United States 
##                         11                         11 
##                   New York           <U+9999><U+6E2F> 
##                         10                          9
jfd_loc_top20 <- sort(table(jmsc_folls_data$location),decreasing = TRUE)[1:20]
jfd_loc_top20 <- jfd_loc_top20[names(jfd_loc_top20) != ""] # Remove "" 
# Geocode the locations and plot a map
jfd_loc <- geocode(names(jfd_loc_top20))  # Geocode top20 locations

wmp <- getMap()
mapCountryData(wmp, colourPalette = rep('wheat',7), oceanCol='lightblue', addLegend=FALSE, mapTitle = "Locations of JMSC's Followers")
p_size <- (1 + 5*(jfd_loc_top20 - min(jfd_loc_top20))/(max(jfd_loc_top20)-min(jfd_loc_top20)))
points(jfd_loc$lon, jfd_loc$lat, col = "red", cex = p_size, type = "p")

## More about Followers
class(jmsc_folls_data$followers_count) # Check the data class of followers_count
## [1] "integer"
summary(jmsc_folls_data$followers_count) # Show its summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0      15     107    4494     610 1942736
jmsc_folls_data[order(jmsc_folls_data$followers_count,decreasing = T)[1:10],c("screen_name","followers_count")] # Top 10 followers
## # A tibble: 10 x 2
##    screen_name     followers_count
##    <chr>                     <int>
##  1 britishmuseum           1942736
##  2 Lara                    1783498
##  3 soledadobrien            830095
##  4 DrEricGrabowsky          707526
##  5 TayeDiggs                670277
##  6 BraveLad                 542922
##  7 TelNews1                 453481
##  8 indymediacn              333754
##  9 roseluqiu                298268
## 10 HerbertRSim              259351
jmsc_followers_grp <- cut(jmsc_folls_data$followers_count,breaks = c(0,10,100,500,1000,2500,max(jmsc_folls_data$followers_count)))
jmsc_followers_grp_table <- table(jmsc_followers_grp)

p <- plot_ly(labels = names(jmsc_followers_grp_table), values = jmsc_followers_grp_table, type = 'pie', sort = FALSE, textposition = 'inside', textinfo = 'label+percent') 
p <- layout(p, title = "JMSC's Followers", xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE), yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
p
# Language
sort(table(jmsc_folls_data$account_lang),decreasing=T)
## 
##      en   zh-cn   zh-tw   en-gb   zh-CN      es      fr      ko      de 
##    2644     222     110      72      64      52      43      28      22 
##   zh-TW      ja zh-Hans   en-GB      id      it      pt      nl      ru 
##      17      16      16      12      12       9       7       6       6 
##      ar      ca      da      sv   zh-HK      th      tr      vi      bn 
##       5       4       4       4       4       3       3       2       1 
##      el      hi zh-Hant 
##       1       1       1
sort(table(tolower(jmsc_folls_data$account_lang)),decreasing=T)
## 
##      en   zh-cn   zh-tw   en-gb      es      fr      ko      de      ja 
##    2644     286     127      84      52      43      28      22      16 
## zh-hans      id      it      pt      nl      ru      ar      ca      da 
##      16      12       9       7       6       6       5       4       4 
##      sv   zh-hk      th      tr      vi      bn      el      hi zh-hant 
##       4       4       3       3       2       1       1       1       1
# Description
Preprocessing <- function(doc){
  #create corpus
  doc.corpus <- Corpus(VectorSource(doc))
  #clean up
  doc.corpus <- tm_map(doc.corpus, function(x)chartr("ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz",x))### Convert to lower case
  doc.corpus <- tm_map(doc.corpus, removePunctuation)   ### remove punctuation
  doc.corpus <- tm_map(doc.corpus, function(x)removeWords(x,stopwords("english")))  #### remove stopwords
  return(doc.corpus)
}

jfd_des <- gsub('[^[:alnum:] ]','',jmsc_folls_data$description) # remove punctuation non-print characters
jfd_des <- paste(jfd_des,collapse=" ") # Collapse into one single line
jfd_des_tm <- Preprocessing(jfd_des)
wordcloud(jfd_des_tm, scale=c(4,.5), min.freq=30, max.words=Inf, random.order=F, colors=brewer.pal(8, "Accent"))   

# Followers count vs. Average posts per day 

jmsc_folls_data$avgppd <- jmsc_folls_data$statuses_count/as.numeric(difftime(Sys.time(),jmsc_folls_data$account_created_at,units='days'))

p <- plot_ly(data = jmsc_folls_data, x = ~followers_count, y = ~avgppd, text = ~screen_name, type = 'scatter', mode = 'markers')

# Output in Linear scale
layout(p, title = "Followers count vs. Average activities per day @JMSCHKU (in linear scale)", xaxis = list(title = "Followers Count"), yaxis = list (title = "Average posts per day"))
p <- plot_ly(data = jmsc_folls_data, x = ~followers_count, y = ~avgppd, text = ~screen_name, type = 'scatter', mode = 'markers')

# Output in log scale
layout(p, title = "Followers count vs. Average activities per day @JMSCHKU (both xy in log scale)", xaxis = list(title = "Followers Count", type = "log"), yaxis = list (title = "Average posts per day", type = "log"))
# @JMSCHKU's timeline
jmsc_timeline <- get_timeline("JMSCHKU", n = 3200)

## % of retweet
sum(jmsc_timeline$is_retweet)/nrow(jmsc_timeline)
## [1] 0.6612953
## Most retweeted post (tweet + retweet)
jmsc_timeline[which.max(jmsc_timeline$retweet_count),c("text","retweet_count","is_retweet")]
## # A tibble: 1 x 3
##   text                                            retweet_count is_retweet
##   <chr>                                                   <int> <lgl>     
## 1 RT @kreshjun: Camera shutter speed synced to h~         12293 T
## Most retweeted post (tweet only)
jmsc_timeline_ori <- jmsc_timeline[!jmsc_timeline$is_retweet,]
jmsc_timeline_ori[which.max(jmsc_timeline_ori$retweet_count),c("text","retweet_count","is_retweet")]
## # A tibble: 1 x 3
##   text                                            retweet_count is_retweet
##   <chr>                                                   <int> <lgl>     
## 1 It's Prof Chan's last day today. First present~            19 F
## Top 3 reply
sort(table(jmsc_timeline$reply_to_screen_name),decreasing=T)[1:3]
## 
##       JMSCHKU keithrichburg   Wasifshakil 
##            62             5             5
## Top 3 mention
sort(table(unlist(jmsc_timeline$mentions_screen_name)),decreasing=T)[1:3]
## 
##       JMSCHKU keithrichburg        cmphku 
##           446           197           126
## Time trend
jmsc_tt <- table(format(jmsc_timeline$created_at,"%Y-%m"))
jmsc_tt_ori <- table(format(jmsc_timeline_ori$created_at,"%Y-%m"))
p <- plot_ly(x = names(jmsc_tt), y = jmsc_tt, name = "Tweets + Retweets", type = 'scatter', mode = 'lines')
p <- add_trace(p, y = jmsc_tt_ori, name = "Tweets only")
layout(p, title = "@JMSCHKU's Post History (latest 3200 posts)", xaxis = list(title = "Year-Month"), yaxis = list (title = "Number of Posts"))

2. @KEITHRICHBURG

keith_folls <- get_followers("keithrichburg", n = 5500)
keith_folls_data <- lookup_users(keith_folls$user_id) # Obtain the followers' 
#The following indicators provide the strongest signals to separate bots from humans: 
# 1.    absence of user description (whether the public Twitter profile looks like the default one or it is customized); 
# 2.    absence of geographical metadata (humans often use smartphones and the Twitter iPhone/Android App, which records as digital footprint the physical location of the mobile device); and, 
# 3.    Low number of followers, say less than 10
# 4.    Account creation date, less than a year
# 5.    Non-verified user
#
# x is the data.frame returned from rtweet::lookup_users

remove_bot <- function(x,followers_cutoff,date_cutoff){
 return(x[
 x$followers_count > followers_cutoff |
 nchar(x$location) != 0 |
 nchar(x$description) != 0 |  
 as.Date(x$account_created_at) < as.Date(date_cutoff) |
 x$verified,])
}

keith_folls_data_nobot <- remove_bot(keith_folls_data,10,"2017-04-09")

# Number of followers before bot removal
nrow(keith_folls_data)
## [1] 5183
# Number of followers after bot removal
nrow(keith_folls_data_nobot)
## [1] 5014
jmsc_folls_data_nobot <- remove_bot(jmsc_folls_data,10,"2017-04-09")

# Number of followers before bot removal
nrow(jmsc_folls_data)
## [1] 3391
# Number of followers after bot removal
nrow(jmsc_folls_data_nobot)
## [1] 3280
# % of detected bots (@keithrichburg)
round(100*(nrow(keith_folls_data)-nrow(keith_folls_data_nobot))/nrow(keith_folls_data),2)
## [1] 3.26
# % of detected bots (@JMSCHKU)
round(100*(nrow(jmsc_folls_data)-nrow(jmsc_folls_data_nobot))/nrow(jmsc_folls_data),2)
## [1] 3.27
## Show names of detected "Bots"
keith_folls_data[!(keith_folls_data$screen_name %in% keith_folls_data_nobot$screen_name),]$screen_name
##   [1] "jason67449857"   "bennykung51"     "ajcfnzbqvix1959"
##   [4] "bradleyleong28"  "tommy63823274"   "PcbjasonZhang"  
##   [7] "AFfe58"          "rOgfBWnW6SNfo7J" "10693_net"      
##  [10] "willing_hunter"  "EvemVon"         "Phoebe_729"     
##  [13] "wolf_12138"      "ppppppp42936934" "ggsgydhhy"      
##  [16] "Honma45859841"   "Jacksonshi4"     "Rita__yin"      
##  [19] "zyj1361"         "rach53978713"    "copwowo"        
##  [22] "BarclayMoses"    "ogegetfreaahen2" "RYU_2387"       
##  [25] "tanxj1"          "ALVIN53501090"   "K0HAF1DkJMuupGM"
##  [28] "fibryika87"      "Sally94841473"   "Puxun119937224" 
##  [31] "woshinibabahh"   "1fEN9hUbdHW4xGj" "chenyunwen98"   
##  [34] "AianLiu"         "iris114hk"       "cJE9GuNpkm5CBsD"
##  [37] "rhanhphong6409"  "quynhchaoia560"  "SP_Yeung"       
##  [40] "wx_mjh"          "dannymak48"      "pbvmgjhihv"     
##  [43] "vVQZf9ROepk9zJw" "805106862Snow"   "903914249Cherry"
##  [46] "Moore14252420"   "KoCheukLun"      "zahirud03311953"
##  [49] "mrferrero1"      "light9_Man"      "cyw012"         
##  [52] "lwwhiufung1"     "marthaproduct"   "mmbpfbzibe"     
##  [55] "jaypaul41950649" "franslo811"      "WongWaiCheong5" 
##  [58] "anYnRM3UkMvUEGo" "Huhang1995"      "JacqueMaonio"   
##  [61] "zhr2018331"      "DavidY72984261"  "HeyVlogDV"      
##  [64] "RebeccaChung18"  "Alex007Grant"    "KSR_neal"       
##  [67] "marques2987"     "BillyLai10"      "LEE02637871"    
##  [70] "yangchiking"     "herman_hok"      "bobochan01"     
##  [73] "dchen020"        "breeze842200979" "pengyuan8592121"
##  [76] "laoyuanqu"       "lyy45541064"     "samuelw73271848"
##  [79] "zzy50774888"     "chungch65495555" "miketso200528"  
##  [82] "Kevinzhang113"   "yezhixingxh"     "JudgerLee"      
##  [85] "zHPwWPEiiedksvl" "ju520jing"       "xiaofeng1898"   
##  [88] "qfish3"          "TANG_Louie_BJ"   "Jinkela63Nmb63" 
##  [91] "PargatSinghGil7" "SuihanshiTunfo"  "liuchaovsying"  
##  [94] "wuwei75272952"   "Shahid97591971"  "fzjXBnhLR2fIeAm"
##  [97] "cici07317316"    "Jerry60992076"   "Melondly"       
## [100] "khakaghy"        "Vivienn37402853" "Carmanc2018"    
## [103] "hahuekin"        "Gemingrm"        "ChanRaff"       
## [106] "shzu_wu"         "RoyiJiang"       "Hese87821976"   
## [109] "RanMinzhuwansiu" "guoan007"        "JeremyChiang711"
## [112] "J71S6v"          "oML69dbOONXZ6iG" "xiao_bo1999"    
## [115] "gLcDgwHcREyvF1l" "GzLtabcovWcr2Zf" "EasyMining_2018"
## [118] "qianz262"        "JackMjj007"      "CXLGkSW6GiXiv0C"
## [121] "Jifsy2"          "GIFTOFLIFE1412"  "Lucio39917218"  
## [124] "Zowip2"          "KBz2lQB2eQf1RYq" "johnnywang1108" 
## [127] "eobeans"         "Jasmine91726794" "hao30624300"    
## [130] "louis5881"       "Mogehair"        "GWaiGeorge1"    
## [133] "kwTP9LqjWQ694sB" "lixiaoming88"    "LucyZeng4602"   
## [136] "cathy63149106"   "ng_laikwan"      "johntit39768620"
## [139] "pmZm5aEutttmJAE" "saifai1211"      "michaelw2001_76"
## [142] "chen_everywill"  "Liyinggang6"     "ERICGONG13"     
## [145] "soochikeung3"    "zwcode"          "klh11649452500" 
## [148] "Yau00608694"     "onchakchui1"     "skyuan953"      
## [151] "76RrPTXmO1WQDmM" "Wilson1983NG"    "jason_zx08"     
## [154] "BenjaminYan9"    "yvialLvVDnd3ThD" "liuyanjunzoe"   
## [157] "Pauline64849978" "thanuka66"       "fordhambaldie"  
## [160] "mediazoomer"     "Fyngirz"         "Joris23492276"  
## [163] "adam_addieba"    "Ice_anywhere"    "ChhimSithar"    
## [166] "maria_juanias"   "JoyRead7"        "francesisgreat" 
## [169] "Geraldi29921239"