###### Analyze Twitter Network ####################################

##Install new libraries
#install.packages("networkD3") #for interactive network visualization
#install.packages("igraph") #for network analysis

#Load required libraries
library(twitteR)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:twitteR':
## 
##     id, location
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(qdap)
## Loading required package: qdapDictionaries
## Loading required package: qdapRegex
## 
## Attaching package: 'qdapRegex'
## The following objects are masked from 'package:dplyr':
## 
##     escape, explain
## Loading required package: qdapTools
## 
## Attaching package: 'qdapTools'
## The following object is masked from 'package:dplyr':
## 
##     id
## The following object is masked from 'package:twitteR':
## 
##     id
## Loading required package: RColorBrewer
## 
## Attaching package: 'qdap'
## The following object is masked from 'package:dplyr':
## 
##     %>%
## The following object is masked from 'package:base':
## 
##     Filter
library(networkD3)
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:qdap':
## 
##     %>%, diversity
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(stringr) 
## 
## Attaching package: 'stringr'
## The following object is masked from 'package:qdap':
## 
##     %>%
##Collect tweets through Twitter API

#Twitter API Authorization
setup_twitter_oauth('4GLxyq5LmomwbAtVmHV4tTdMJ', 
                    'wq4FSa1zC0dLYYMygWk6wTxra1Vrpx4luMnW2Z92XNHfmpwjGD', 
                    '174544158-Zn9NARAAZCOoQBtsCXtzqDpDk4eHvJK6fj8ykfvA', 
                    'I8uIbwuoZL9A1xjJ0hdDUrVsl1bdBuNAfBNURvvzyqk0i')
## [1] "Using direct authentication"
#collect recent 5000 tweets
#alltweets <- twListToDF(searchTwitter("#basicincome", n=5000, lang=NULL,since=NULL, until=NULL,locale=NULL, geocode=NULL, sinceID=NULL, maxID=NULL,resultType=NULL, retryOnRateLimit=120))
#save tweets as .csv
# write.csv(alltweets, "basicincome_tweets.csv")
## Or use pre-saved data
alltweets <- read.csv("basicincome_tweets1030.csv")


##Extract network information from retweets. If A retweets B, then there is a link from A to B.
#Network analysis and visualization is computationally intensive. We will use only the first 500 tweets for demo.

alltweets <- alltweets[1:500,]

#split the data into two sets; one for retweet network and the other for mention network.

#create an edge-list for retweet network
sp = split(alltweets, alltweets$isRetweet)
rt = mutate(sp[['TRUE']], sender = substr(text, 5, regexpr(':', text) - 1))
el = as.data.frame(cbind(sender = tolower(rt$sender), receiver = tolower(rt$screenName)))
el = count(el, sender, receiver) 
el[1:5,] #show the first 5 edges in the edgelist
## Source: local data frame [5 x 3]
## Groups: sender [1]
## 
##            sender        receiver     n
##            <fctr>          <fctr> <int>
## 1 all4basicincome     blanketcrap     1
## 2 all4basicincome mathotahcuksikl     1
## 3 all4basicincome     nwpofficial     1
## 4 all4basicincome   off_tha_rails     1
## 5 all4basicincome        olmar555     2
#Based on the edge-list, create a retweet network.
rt_graph <- graph_from_data_frame(d=el, directed=T)

#Visualize the retweet network.
glay = layout.fruchterman.reingold(rt_graph) 
plot(rt_graph)

# Edit the network 
glay = layout.fruchterman.reingold(rt_graph)
par(bg="gray15", mar=c(1,1,1,1))
plot(rt_graph, layout=glay,
     vertex.color="gray25",
     vertex.size=(degree(rt_graph, mode = "in")), #sized by in-degree centrality
     vertex.label = NA,
     edge.arrow.size=0.8,
     edge.arrow.width=0.5,
     edge.width=edge_attr(rt_graph)$n/10, #sized by edge weight
     edge.color=hsv(h=.95, s=1, v=.7, alpha=0.5))
title("Retweet Network", cex.main=1, col.main="gray95")

#The above network visualization does not show vertex label. 
#Let’s add a few lines to make the graph more informative. 
glay = layout.fruchterman.reingold(rt_graph)
par(bg="gray15", mar=c(1,1,1,1))
plot(rt_graph, layout=glay,
     vertex.color="gray25",
     vertex.size=(degree(rt_graph, mode = "in")), #sized by in-degree centrality
     vertex.label.family="sans",
     vertex.shape="circle",  #can also try "square", "rectangle", etc. More in igraph manual
     vertex.label.color=hsv(h=0, s=0, v=.95, alpha=0.5),
     vertex.label.cex=(degree(rt_graph, mode = "in"))/300, #sized by in-degree centrality
     edge.arrow.size=0.8,
     edge.arrow.width=0.5,
     edge.width=edge_attr(rt_graph)$n/10, #sized by edge weight
     edge.color=hsv(h=.95, s=1, v=.7, alpha=0.5))
title("Retweet Network", cex.main=1, col.main="gray95")

## Create an interactive visualization for the retweet network
wc <- cluster_walktrap(rt_graph)
members <- membership(wc)
d3_rt <- igraph_to_networkD3(rt_graph, group = members)

forceNetwork(Links = d3_rt$links, Nodes = d3_rt$nodes, 
             Source = 'source', Target = 'target', 
             NodeID = 'name', Group = 'group')
# Calculate some network-level statistics for the retweet network
ecount(rt_graph) #the number of edges
## [1] 286
vcount(rt_graph) #the number of vertices
## [1] 270
E(rt_graph)[1:50] #list the first 50 edges
## + 50/286 edges from 5480a3f (vertex names):
##  [1] all4basicincome->blanketcrap     all4basicincome->mathotahcuksikl
##  [3] all4basicincome->nwpofficial     all4basicincome->off_tha_rails  
##  [5] all4basicincome->olmar555        allentien      ->ganjasmokercb  
##  [7] allentien      ->kossmannatalie  aurianneor     ->mariegirardchop
##  [9] baseincomequote->3f0ld           baseincomequote->andrejd1offical
## [11] baseincomequote->basicincome_usa baseincomequote->blackswanburst 
## [13] baseincomequote->dynamican_eea   baseincomequote->forastieri     
## [15] baseincomequote->geogmi          baseincomequote->glaucon_       
## [17] baseincomequote->humanistasrbu   baseincomequote->jchapman1729   
## [19] baseincomequote->jenincanada     baseincomequote->julie4peace    
## + ... omitted several edges
V(rt_graph)[1:50] #list the first 50 vertex ids
## + 50/270 vertices, named, from 5480a3f:
##  [1] all4basicincome allentien       aurianneor      baseincomequote
##  [5] basicincomeact  basicincomeimg  basicincomepdx  basicincwales  
##  [9] cartokurtis     casebasicincome chagallsierra   ed4socialchange
## [13] gertrude_o      humanvsmachine  jonsm99         leahkwatson    
## [17] leblanc_isa     lindsayontario  luskcenter      mentornet_csih 
## [21] mitsawokett_uk  nearestfork     newclearistbau  oldridge_s     
## [25] planetecova     psychologydoc   rbasicincome    rentabasicattac
## [29] scottsantens    sdlk2           stepupbg1       sthomeh        
## [33] stokedproject   tpickard1cogeco trebor155       ubi_bug        
## [37] ubisticker      uiproj          women4ubi       worldbasicincom
## + ... omitted several vertices
#Calculate density:The proportion of present edges from all possible edges in the network.
edge_density(rt_graph, loops=F) #for an undirected network
## [1] 0.003937767
ecount(rt_graph)/(vcount(rt_graph)*(vcount(rt_graph)-1)) #for a directed network
## [1] 0.003937767
#Calculate reciprocity:The proportion of reciprocated ties (for a directed network).
reciprocity(rt_graph)
## [1] 0
#Calculate centralization
centr_degree(rt_graph, mode = c("in"), loops = TRUE,normalized = TRUE)$centralization
## [1] 0.03323695
#Calculate transitivity:the probability that the neighbors of a vertex are connected. 
transitivity(rt_graph, type="local")
##   [1] 0.000000000 0.000000000         NaN 0.000000000 0.000000000
##   [6] 0.000000000         NaN 0.000000000 0.000000000 0.000000000
##  [11] 0.000000000 0.054945055 0.166666667 0.000000000 0.000000000
##  [16] 0.000000000 0.200000000 0.000000000         NaN 0.500000000
##  [21] 0.000000000 0.000000000 0.000000000         NaN         NaN
##  [26]         NaN 0.000000000 0.000000000 0.000617284         NaN
##  [31]         NaN 0.000000000         NaN         NaN 0.200000000
##  [36]         NaN 0.000000000 0.000000000 0.000000000 0.000000000
##  [41] 1.000000000         NaN         NaN 0.022222222         NaN
##  [46] 0.047619048         NaN         NaN         NaN         NaN
##  [51]         NaN 0.000000000         NaN         NaN 0.000000000
##  [56]         NaN 0.000000000 0.000000000         NaN         NaN
##  [61]         NaN         NaN         NaN         NaN         NaN
##  [66]         NaN         NaN         NaN         NaN         NaN
##  [71]         NaN         NaN 0.000000000         NaN         NaN
##  [76]         NaN         NaN 0.000000000         NaN         NaN
##  [81]         NaN         NaN         NaN         NaN 0.000000000
##  [86]         NaN         NaN         NaN         NaN         NaN
##  [91]         NaN         NaN         NaN         NaN         NaN
##  [96] 0.000000000 0.000000000         NaN         NaN         NaN
## [101]         NaN         NaN 0.000000000         NaN         NaN
## [106]         NaN         NaN         NaN         NaN         NaN
## [111]         NaN         NaN         NaN         NaN         NaN
## [116]         NaN 0.000000000         NaN         NaN 0.000000000
## [121]         NaN         NaN         NaN         NaN         NaN
## [126]         NaN 0.500000000 1.000000000         NaN         NaN
## [131]         NaN         NaN         NaN         NaN         NaN
## [136]         NaN         NaN 0.000000000         NaN         NaN
## [141]         NaN         NaN         NaN         NaN         NaN
## [146]         NaN 0.000000000         NaN         NaN         NaN
## [151]         NaN         NaN         NaN         NaN         NaN
## [156]         NaN         NaN         NaN         NaN         NaN
## [161]         NaN         NaN         NaN 0.000000000         NaN
## [166]         NaN         NaN 0.000000000         NaN         NaN
## [171]         NaN         NaN         NaN         NaN         NaN
## [176]         NaN         NaN         NaN         NaN         NaN
## [181]         NaN         NaN         NaN         NaN         NaN
## [186]         NaN         NaN         NaN         NaN         NaN
## [191]         NaN         NaN         NaN         NaN         NaN
## [196]         NaN         NaN         NaN         NaN         NaN
## [201]         NaN         NaN         NaN         NaN         NaN
## [206]         NaN         NaN         NaN         NaN         NaN
## [211]         NaN         NaN         NaN         NaN         NaN
## [216]         NaN         NaN         NaN         NaN         NaN
## [221]         NaN         NaN         NaN         NaN         NaN
## [226]         NaN         NaN         NaN         NaN         NaN
## [231]         NaN         NaN         NaN         NaN         NaN
## [236]         NaN         NaN 0.000000000         NaN         NaN
## [241]         NaN         NaN         NaN         NaN         NaN
## [246]         NaN         NaN 0.000000000         NaN         NaN
## [251]         NaN         NaN         NaN         NaN         NaN
## [256]         NaN         NaN         NaN         NaN         NaN
## [261]         NaN         NaN         NaN         NaN         NaN
## [266]         NaN         NaN         NaN         NaN         NaN
#Calculate the length of the longest path between two vertices in the network
diameter(rt_graph, directed=F, weights=NA) 
## [1] 8
#Some vertex-level statistics (based on the retweet network)
#Calculate in-degree centrality
indegree <- sort(degree(rt_graph,mode = "in"),decreasing = TRUE)
indegree[1:20] #show the top vertices by in-degree 
##     nwpofficial        olmar555     kmplamondon basicincome_usa 
##              10               7               5               4 
##        glaucon_   humanistasrbu     livable4all     leblanc_isa 
##               4               4               4               3 
##   rhymingmisfit         sthomeh        yoshidrp      forastieri 
##               3               2               2               2 
##     unicorn_999      joseartusi         wrtwrds        mattison 
##               2               2               2               2 
##          rafios        sumwin10         lmychan   basicincomela 
##               2               2               2               2
#Calculate out-degree
outdegree <- sort(degree(rt_graph,mode = "out"),decreasing = TRUE)
outdegree[1:20] #show the top vertices by out-degree
##    scottsantens     cartokurtis baseincomequote  basicincomeimg 
##              80              27              25              16 
## casebasicincome       women4ubi ed4socialchange  humanvsmachine 
##              15              15              14              11 
##      ubisticker  lindsayontario    rbasicincome          uiproj 
##               9               6               6               6 
## all4basicincome       trebor155      gertrude_o   basicincwales 
##               5               5               4               3 
##     leblanc_isa  mentornet_csih     nearestfork       allentien 
##               3               3               3               2
#Calculate betweenness centrality
bt <- sort(betweenness(rt_graph, directed=F, weights=NA), decreasing = TRUE)
bt[1:20] #show the top vertices by betweenness centrality 
##    scottsantens     cartokurtis     nwpofficial baseincomequote 
##      14912.0721       6798.8667       6055.5471       4945.4363 
##        olmar555  basicincomeimg       women4ubi   humanistasrbu 
##       3842.8600       2943.9709       2740.0002       2548.6929 
## casebasicincome          rafios      ubisticker  humanvsmachine 
##       2478.1441       2280.8286       1943.3291       1909.2817 
##   rhymingmisfit        glaucon_     livable4all  lindsayontario 
##       1711.0219       1167.9364       1135.5728       1065.0000 
##          uiproj    rbasicincome        mattison   basicincomela 
##        872.7903        854.5000        775.8251        672.4423
#Calculate closeness centrality: measures how many steps is required to access every other vertex from a given vertex
cc <- sort(closeness(rt_graph, mode="all", weights=NA), decreasing = TRUE)
cc[1:20] #show the top vertices by closeness centrality 
##    scottsantens     nwpofficial        olmar555       trebor155 
##    6.751283e-05    6.747638e-05    6.734007e-05    6.705109e-05 
##     cartokurtis   humanistasrbu baseincomequote     livable4all 
##    6.701964e-05    6.700168e-05    6.697475e-05    6.692097e-05 
##        mattison   rhymingmisfit  basicincomeimg casebasicincome 
##    6.690306e-05    6.687621e-05    6.681366e-05    6.670669e-05 
##        sumwin10   basicincomela all4basicincome     basicincome 
##    6.666222e-05    6.663557e-05    6.659120e-05    6.659120e-05 
##    susandellet4          xiobus      ubisticker   chagallsierra 
##    6.659120e-05    6.659120e-05    6.656017e-05    6.655574e-05
#Calculate eigenvector centrality: connectivity with highly connected neighbors
ec <- eigen_centrality(rt_graph, directed=T, weights=NA)
## Warning in eigen_centrality(rt_graph, directed = T, weights = NA): At
## centrality.c:344 :graph is directed and acyclic; eigenvector centralities
## will be zeros
sort(ec$vector)[1:20] #show the top vertices by eigenvector centrality centrality 
## all4basicincome       allentien      aurianneor baseincomequote 
##               0               0               0               0 
##  basicincomeact  basicincomeimg  basicincomepdx   basicincwales 
##               0               0               0               0 
##     cartokurtis casebasicincome   chagallsierra ed4socialchange 
##               0               0               0               0 
##      gertrude_o  humanvsmachine         jonsm99     leahkwatson 
##               0               0               0               0 
##     leblanc_isa  lindsayontario      luskcenter  mentornet_csih 
##               0               0               0               0
##Find hubs and authorities. Hubs: lots outgoing edges; Authorities: lots incoming edges.
hs <- hub_score(rt_graph, weights=NA)$vector
as <- authority_score(rt_graph, weights=NA)$vector
sort(hs, decreasing = TRUE)[1:20] #show the top 20 vertices by hub score
##    scottsantens     cartokurtis baseincomequote  basicincomeimg 
##     1.000000000     0.085682976     0.067757198     0.058193910 
## casebasicincome all4basicincome       trebor155          uiproj 
##     0.051378324     0.034405546     0.034405546     0.030275900 
##  humanvsmachine    rbasicincome     leahkwatson  lindsayontario 
##     0.030231610     0.026552623     0.026468392     0.018133594 
##           sdlk2       stepupbg1      ubisticker      luskcenter 
##     0.017018670     0.017018670     0.016709696     0.013846140 
##  basicincomeact tpickard1cogeco   basicincwales  basicincomepdx 
##     0.002688224     0.002118344     0.001608117     0.001601543
sort(as, decreasing = TRUE)[1:20] #show the top 20 vertices by authority score
##   nwpofficial      olmar555 humanistasrbu   livable4all      mattison 
##     1.0000000     0.9470566     0.8353955     0.8135853     0.7844562 
## rhymingmisfit      sumwin10        xiobus basicincomela   basicincome 
##     0.7717430     0.7596695     0.7444220     0.7443900     0.7417318 
##  susandellet4     in_camden chagallsierra       _ip_qi_   adventureur 
##     0.7417318     0.7416709     0.7225463     0.7225463     0.7225463 
##    allman1100      andlibsh angela_n_hunt  angelabocage       ant7236 
##     0.7225463     0.7225463     0.7225463     0.7225463     0.7225463
#Find clusters
cliques(rt_graph)[1:10]
## Warning in cliques(rt_graph): At igraph_cliquer.c:56 :Edge directions are
## ignored for clique calculations
## [[1]]
## + 1/270 vertex, named, from 5480a3f:
## [1] scottsantens
## 
## [[2]]
## + 1/270 vertex, named, from 5480a3f:
## [1] cartokurtis
## 
## [[3]]
## + 1/270 vertex, named, from 5480a3f:
## [1] baseincomequote
## 
## [[4]]
## + 1/270 vertex, named, from 5480a3f:
## [1] basicincomeimg
## 
## [[5]]
## + 1/270 vertex, named, from 5480a3f:
## [1] women4ubi
## 
## [[6]]
## + 1/270 vertex, named, from 5480a3f:
## [1] casebasicincome
## 
## [[7]]
## + 1/270 vertex, named, from 5480a3f:
## [1] ed4socialchange
## 
## [[8]]
## + 1/270 vertex, named, from 5480a3f:
## [1] humanvsmachine
## 
## [[9]]
## + 1/270 vertex, named, from 5480a3f:
## [1] ubisticker
## 
## [[10]]
## + 1/270 vertex, named, from 5480a3f:
## [1] uiproj
sapply(cliques(rt_graph), length)
## Warning in cliques(rt_graph): At igraph_cliquer.c:56 :Edge directions are
## ignored for clique calculations
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [36] 1 1 1 1 1 1 1 2 2 1 2 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2
##  [71] 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 1 2
## [106] 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2
## [141] 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1
## [176] 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2
## [211] 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1
## [246] 2 1 2 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2
## [281] 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1
## [316] 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 2 2 1 2 1 2 1 2 1 2 1
## [351] 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2
## [386] 2 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 1 2 1
## [421] 2 1 2 1 2 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2
## [456] 1 2 1 2 1 2 1 2 1 2 1 2 2 2 2 1 2 2 2 2 1 2 1 2 2 1 2 1 2 1 2 2 2 2 1
## [491] 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 2 1 2 1 2 1 2 1 2 2 1 2 1 2 3
## [526] 2 1 2 3 2 2 2 2 2 2 1 2 3 2 2 2 2 2 2 2 2 2 1 2 3 2 3 3 2 2 2 2 1 2 3
## [561] 4 3 2 3 2 2
largest_cliques(rt_graph)[1:20] #list only 20 vertices in that cluster
## Warning in largest_cliques(rt_graph): At cliques.c:1087 :directionality of
## edges is ignored for directed graphs
## [[1]]
## + 4/270 vertices, named, from 5480a3f:
## [1] kmplamondon     ed4socialchange mentornet_csih  yoshidrp       
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
## 
## [[12]]
## NULL
## 
## [[13]]
## NULL
## 
## [[14]]
## NULL
## 
## [[15]]
## NULL
## 
## [[16]]
## NULL
## 
## [[17]]
## NULL
## 
## [[18]]
## NULL
## 
## [[19]]
## NULL
## 
## [[20]]
## NULL
ceb <- cluster_edge_betweenness(rt_graph) #Community detection based on edge betweenness (Newman-Girvan)
length(ceb)
## [1] 77
modularity(ceb)
## [1] 0.367573