###### Analyze Twitter Network ####################################
##Install new libraries
#install.packages("networkD3") #for interactive network visualization
#install.packages("igraph") #for network analysis
#Load required libraries
library(twitteR)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(qdap)
## Loading required package: qdapDictionaries
## Loading required package: qdapRegex
##
## Attaching package: 'qdapRegex'
## The following objects are masked from 'package:dplyr':
##
## escape, explain
## Loading required package: qdapTools
##
## Attaching package: 'qdapTools'
## The following object is masked from 'package:dplyr':
##
## id
## The following object is masked from 'package:twitteR':
##
## id
## Loading required package: RColorBrewer
##
## Attaching package: 'qdap'
## The following object is masked from 'package:dplyr':
##
## %>%
## The following object is masked from 'package:base':
##
## Filter
library(networkD3)
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:qdap':
##
## %>%, diversity
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(stringr)
##
## Attaching package: 'stringr'
## The following object is masked from 'package:qdap':
##
## %>%
##Collect tweets through Twitter API
#Twitter API Authorization
setup_twitter_oauth('4GLxyq5LmomwbAtVmHV4tTdMJ',
'wq4FSa1zC0dLYYMygWk6wTxra1Vrpx4luMnW2Z92XNHfmpwjGD',
'174544158-Zn9NARAAZCOoQBtsCXtzqDpDk4eHvJK6fj8ykfvA',
'I8uIbwuoZL9A1xjJ0hdDUrVsl1bdBuNAfBNURvvzyqk0i')
## [1] "Using direct authentication"
#collect recent 5000 tweets
#alltweets <- twListToDF(searchTwitter("#basicincome", n=5000, lang=NULL,since=NULL, until=NULL,locale=NULL, geocode=NULL, sinceID=NULL, maxID=NULL,resultType=NULL, retryOnRateLimit=120))
#save tweets as .csv
# write.csv(alltweets, "basicincome_tweets.csv")
## Or use pre-saved data
alltweets <- read.csv("basicincome_tweets1030.csv")
##Extract network information from retweets. If A retweets B, then there is a link from A to B.
#Network analysis and visualization is computationally intensive. We will use only the first 500 tweets for demo.
alltweets <- alltweets[1:500,]
#split the data into two sets; one for retweet network and the other for mention network.
#create an edge-list for retweet network
sp = split(alltweets, alltweets$isRetweet)
rt = mutate(sp[['TRUE']], sender = substr(text, 5, regexpr(':', text) - 1))
el = as.data.frame(cbind(sender = tolower(rt$sender), receiver = tolower(rt$screenName)))
el = count(el, sender, receiver)
el[1:5,] #show the first 5 edges in the edgelist
## Source: local data frame [5 x 3]
## Groups: sender [1]
##
## sender receiver n
## <fctr> <fctr> <int>
## 1 all4basicincome blanketcrap 1
## 2 all4basicincome mathotahcuksikl 1
## 3 all4basicincome nwpofficial 1
## 4 all4basicincome off_tha_rails 1
## 5 all4basicincome olmar555 2
#Based on the edge-list, create a retweet network.
rt_graph <- graph_from_data_frame(d=el, directed=T)
#Visualize the retweet network.
glay = layout.fruchterman.reingold(rt_graph)
plot(rt_graph)

# Edit the network
glay = layout.fruchterman.reingold(rt_graph)
par(bg="gray15", mar=c(1,1,1,1))
plot(rt_graph, layout=glay,
vertex.color="gray25",
vertex.size=(degree(rt_graph, mode = "in")), #sized by in-degree centrality
vertex.label = NA,
edge.arrow.size=0.8,
edge.arrow.width=0.5,
edge.width=edge_attr(rt_graph)$n/10, #sized by edge weight
edge.color=hsv(h=.95, s=1, v=.7, alpha=0.5))
title("Retweet Network", cex.main=1, col.main="gray95")

#The above network visualization does not show vertex label.
#Let’s add a few lines to make the graph more informative.
glay = layout.fruchterman.reingold(rt_graph)
par(bg="gray15", mar=c(1,1,1,1))
plot(rt_graph, layout=glay,
vertex.color="gray25",
vertex.size=(degree(rt_graph, mode = "in")), #sized by in-degree centrality
vertex.label.family="sans",
vertex.shape="circle", #can also try "square", "rectangle", etc. More in igraph manual
vertex.label.color=hsv(h=0, s=0, v=.95, alpha=0.5),
vertex.label.cex=(degree(rt_graph, mode = "in"))/300, #sized by in-degree centrality
edge.arrow.size=0.8,
edge.arrow.width=0.5,
edge.width=edge_attr(rt_graph)$n/10, #sized by edge weight
edge.color=hsv(h=.95, s=1, v=.7, alpha=0.5))
title("Retweet Network", cex.main=1, col.main="gray95")

## Create an interactive visualization for the retweet network
wc <- cluster_walktrap(rt_graph)
members <- membership(wc)
d3_rt <- igraph_to_networkD3(rt_graph, group = members)
forceNetwork(Links = d3_rt$links, Nodes = d3_rt$nodes,
Source = 'source', Target = 'target',
NodeID = 'name', Group = 'group')
# Calculate some network-level statistics for the retweet network
ecount(rt_graph) #the number of edges
## [1] 286
vcount(rt_graph) #the number of vertices
## [1] 270
E(rt_graph)[1:50] #list the first 50 edges
## + 50/286 edges from 5480a3f (vertex names):
## [1] all4basicincome->blanketcrap all4basicincome->mathotahcuksikl
## [3] all4basicincome->nwpofficial all4basicincome->off_tha_rails
## [5] all4basicincome->olmar555 allentien ->ganjasmokercb
## [7] allentien ->kossmannatalie aurianneor ->mariegirardchop
## [9] baseincomequote->3f0ld baseincomequote->andrejd1offical
## [11] baseincomequote->basicincome_usa baseincomequote->blackswanburst
## [13] baseincomequote->dynamican_eea baseincomequote->forastieri
## [15] baseincomequote->geogmi baseincomequote->glaucon_
## [17] baseincomequote->humanistasrbu baseincomequote->jchapman1729
## [19] baseincomequote->jenincanada baseincomequote->julie4peace
## + ... omitted several edges
V(rt_graph)[1:50] #list the first 50 vertex ids
## + 50/270 vertices, named, from 5480a3f:
## [1] all4basicincome allentien aurianneor baseincomequote
## [5] basicincomeact basicincomeimg basicincomepdx basicincwales
## [9] cartokurtis casebasicincome chagallsierra ed4socialchange
## [13] gertrude_o humanvsmachine jonsm99 leahkwatson
## [17] leblanc_isa lindsayontario luskcenter mentornet_csih
## [21] mitsawokett_uk nearestfork newclearistbau oldridge_s
## [25] planetecova psychologydoc rbasicincome rentabasicattac
## [29] scottsantens sdlk2 stepupbg1 sthomeh
## [33] stokedproject tpickard1cogeco trebor155 ubi_bug
## [37] ubisticker uiproj women4ubi worldbasicincom
## + ... omitted several vertices
#Calculate density:The proportion of present edges from all possible edges in the network.
edge_density(rt_graph, loops=F) #for an undirected network
## [1] 0.003937767
ecount(rt_graph)/(vcount(rt_graph)*(vcount(rt_graph)-1)) #for a directed network
## [1] 0.003937767
#Calculate reciprocity:The proportion of reciprocated ties (for a directed network).
reciprocity(rt_graph)
## [1] 0
#Calculate centralization
centr_degree(rt_graph, mode = c("in"), loops = TRUE,normalized = TRUE)$centralization
## [1] 0.03323695
#Calculate transitivity:the probability that the neighbors of a vertex are connected.
transitivity(rt_graph, type="local")
## [1] 0.000000000 0.000000000 NaN 0.000000000 0.000000000
## [6] 0.000000000 NaN 0.000000000 0.000000000 0.000000000
## [11] 0.000000000 0.054945055 0.166666667 0.000000000 0.000000000
## [16] 0.000000000 0.200000000 0.000000000 NaN 0.500000000
## [21] 0.000000000 0.000000000 0.000000000 NaN NaN
## [26] NaN 0.000000000 0.000000000 0.000617284 NaN
## [31] NaN 0.000000000 NaN NaN 0.200000000
## [36] NaN 0.000000000 0.000000000 0.000000000 0.000000000
## [41] 1.000000000 NaN NaN 0.022222222 NaN
## [46] 0.047619048 NaN NaN NaN NaN
## [51] NaN 0.000000000 NaN NaN 0.000000000
## [56] NaN 0.000000000 0.000000000 NaN NaN
## [61] NaN NaN NaN NaN NaN
## [66] NaN NaN NaN NaN NaN
## [71] NaN NaN 0.000000000 NaN NaN
## [76] NaN NaN 0.000000000 NaN NaN
## [81] NaN NaN NaN NaN 0.000000000
## [86] NaN NaN NaN NaN NaN
## [91] NaN NaN NaN NaN NaN
## [96] 0.000000000 0.000000000 NaN NaN NaN
## [101] NaN NaN 0.000000000 NaN NaN
## [106] NaN NaN NaN NaN NaN
## [111] NaN NaN NaN NaN NaN
## [116] NaN 0.000000000 NaN NaN 0.000000000
## [121] NaN NaN NaN NaN NaN
## [126] NaN 0.500000000 1.000000000 NaN NaN
## [131] NaN NaN NaN NaN NaN
## [136] NaN NaN 0.000000000 NaN NaN
## [141] NaN NaN NaN NaN NaN
## [146] NaN 0.000000000 NaN NaN NaN
## [151] NaN NaN NaN NaN NaN
## [156] NaN NaN NaN NaN NaN
## [161] NaN NaN NaN 0.000000000 NaN
## [166] NaN NaN 0.000000000 NaN NaN
## [171] NaN NaN NaN NaN NaN
## [176] NaN NaN NaN NaN NaN
## [181] NaN NaN NaN NaN NaN
## [186] NaN NaN NaN NaN NaN
## [191] NaN NaN NaN NaN NaN
## [196] NaN NaN NaN NaN NaN
## [201] NaN NaN NaN NaN NaN
## [206] NaN NaN NaN NaN NaN
## [211] NaN NaN NaN NaN NaN
## [216] NaN NaN NaN NaN NaN
## [221] NaN NaN NaN NaN NaN
## [226] NaN NaN NaN NaN NaN
## [231] NaN NaN NaN NaN NaN
## [236] NaN NaN 0.000000000 NaN NaN
## [241] NaN NaN NaN NaN NaN
## [246] NaN NaN 0.000000000 NaN NaN
## [251] NaN NaN NaN NaN NaN
## [256] NaN NaN NaN NaN NaN
## [261] NaN NaN NaN NaN NaN
## [266] NaN NaN NaN NaN NaN
#Calculate the length of the longest path between two vertices in the network
diameter(rt_graph, directed=F, weights=NA)
## [1] 8
#Some vertex-level statistics (based on the retweet network)
#Calculate in-degree centrality
indegree <- sort(degree(rt_graph,mode = "in"),decreasing = TRUE)
indegree[1:20] #show the top vertices by in-degree
## nwpofficial olmar555 kmplamondon basicincome_usa
## 10 7 5 4
## glaucon_ humanistasrbu livable4all leblanc_isa
## 4 4 4 3
## rhymingmisfit sthomeh yoshidrp forastieri
## 3 2 2 2
## unicorn_999 joseartusi wrtwrds mattison
## 2 2 2 2
## rafios sumwin10 lmychan basicincomela
## 2 2 2 2
#Calculate out-degree
outdegree <- sort(degree(rt_graph,mode = "out"),decreasing = TRUE)
outdegree[1:20] #show the top vertices by out-degree
## scottsantens cartokurtis baseincomequote basicincomeimg
## 80 27 25 16
## casebasicincome women4ubi ed4socialchange humanvsmachine
## 15 15 14 11
## ubisticker lindsayontario rbasicincome uiproj
## 9 6 6 6
## all4basicincome trebor155 gertrude_o basicincwales
## 5 5 4 3
## leblanc_isa mentornet_csih nearestfork allentien
## 3 3 3 2
#Calculate betweenness centrality
bt <- sort(betweenness(rt_graph, directed=F, weights=NA), decreasing = TRUE)
bt[1:20] #show the top vertices by betweenness centrality
## scottsantens cartokurtis nwpofficial baseincomequote
## 14912.0721 6798.8667 6055.5471 4945.4363
## olmar555 basicincomeimg women4ubi humanistasrbu
## 3842.8600 2943.9709 2740.0002 2548.6929
## casebasicincome rafios ubisticker humanvsmachine
## 2478.1441 2280.8286 1943.3291 1909.2817
## rhymingmisfit glaucon_ livable4all lindsayontario
## 1711.0219 1167.9364 1135.5728 1065.0000
## uiproj rbasicincome mattison basicincomela
## 872.7903 854.5000 775.8251 672.4423
#Calculate closeness centrality: measures how many steps is required to access every other vertex from a given vertex
cc <- sort(closeness(rt_graph, mode="all", weights=NA), decreasing = TRUE)
cc[1:20] #show the top vertices by closeness centrality
## scottsantens nwpofficial olmar555 trebor155
## 6.751283e-05 6.747638e-05 6.734007e-05 6.705109e-05
## cartokurtis humanistasrbu baseincomequote livable4all
## 6.701964e-05 6.700168e-05 6.697475e-05 6.692097e-05
## mattison rhymingmisfit basicincomeimg casebasicincome
## 6.690306e-05 6.687621e-05 6.681366e-05 6.670669e-05
## sumwin10 basicincomela all4basicincome basicincome
## 6.666222e-05 6.663557e-05 6.659120e-05 6.659120e-05
## susandellet4 xiobus ubisticker chagallsierra
## 6.659120e-05 6.659120e-05 6.656017e-05 6.655574e-05
#Calculate eigenvector centrality: connectivity with highly connected neighbors
ec <- eigen_centrality(rt_graph, directed=T, weights=NA)
## Warning in eigen_centrality(rt_graph, directed = T, weights = NA): At
## centrality.c:344 :graph is directed and acyclic; eigenvector centralities
## will be zeros
sort(ec$vector)[1:20] #show the top vertices by eigenvector centrality centrality
## all4basicincome allentien aurianneor baseincomequote
## 0 0 0 0
## basicincomeact basicincomeimg basicincomepdx basicincwales
## 0 0 0 0
## cartokurtis casebasicincome chagallsierra ed4socialchange
## 0 0 0 0
## gertrude_o humanvsmachine jonsm99 leahkwatson
## 0 0 0 0
## leblanc_isa lindsayontario luskcenter mentornet_csih
## 0 0 0 0
##Find hubs and authorities. Hubs: lots outgoing edges; Authorities: lots incoming edges.
hs <- hub_score(rt_graph, weights=NA)$vector
as <- authority_score(rt_graph, weights=NA)$vector
sort(hs, decreasing = TRUE)[1:20] #show the top 20 vertices by hub score
## scottsantens cartokurtis baseincomequote basicincomeimg
## 1.000000000 0.085682976 0.067757198 0.058193910
## casebasicincome all4basicincome trebor155 uiproj
## 0.051378324 0.034405546 0.034405546 0.030275900
## humanvsmachine rbasicincome leahkwatson lindsayontario
## 0.030231610 0.026552623 0.026468392 0.018133594
## sdlk2 stepupbg1 ubisticker luskcenter
## 0.017018670 0.017018670 0.016709696 0.013846140
## basicincomeact tpickard1cogeco basicincwales basicincomepdx
## 0.002688224 0.002118344 0.001608117 0.001601543
sort(as, decreasing = TRUE)[1:20] #show the top 20 vertices by authority score
## nwpofficial olmar555 humanistasrbu livable4all mattison
## 1.0000000 0.9470566 0.8353955 0.8135853 0.7844562
## rhymingmisfit sumwin10 xiobus basicincomela basicincome
## 0.7717430 0.7596695 0.7444220 0.7443900 0.7417318
## susandellet4 in_camden chagallsierra _ip_qi_ adventureur
## 0.7417318 0.7416709 0.7225463 0.7225463 0.7225463
## allman1100 andlibsh angela_n_hunt angelabocage ant7236
## 0.7225463 0.7225463 0.7225463 0.7225463 0.7225463
#Find clusters
cliques(rt_graph)[1:10]
## Warning in cliques(rt_graph): At igraph_cliquer.c:56 :Edge directions are
## ignored for clique calculations
## [[1]]
## + 1/270 vertex, named, from 5480a3f:
## [1] scottsantens
##
## [[2]]
## + 1/270 vertex, named, from 5480a3f:
## [1] cartokurtis
##
## [[3]]
## + 1/270 vertex, named, from 5480a3f:
## [1] baseincomequote
##
## [[4]]
## + 1/270 vertex, named, from 5480a3f:
## [1] basicincomeimg
##
## [[5]]
## + 1/270 vertex, named, from 5480a3f:
## [1] women4ubi
##
## [[6]]
## + 1/270 vertex, named, from 5480a3f:
## [1] casebasicincome
##
## [[7]]
## + 1/270 vertex, named, from 5480a3f:
## [1] ed4socialchange
##
## [[8]]
## + 1/270 vertex, named, from 5480a3f:
## [1] humanvsmachine
##
## [[9]]
## + 1/270 vertex, named, from 5480a3f:
## [1] ubisticker
##
## [[10]]
## + 1/270 vertex, named, from 5480a3f:
## [1] uiproj
sapply(cliques(rt_graph), length)
## Warning in cliques(rt_graph): At igraph_cliquer.c:56 :Edge directions are
## ignored for clique calculations
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [36] 1 1 1 1 1 1 1 2 2 1 2 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2
## [71] 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 1 2
## [106] 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2
## [141] 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1
## [176] 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2
## [211] 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1
## [246] 2 1 2 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2
## [281] 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1
## [316] 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 2 2 1 2 1 2 1 2 1 2 1
## [351] 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2
## [386] 2 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 1 2 1
## [421] 2 1 2 1 2 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2
## [456] 1 2 1 2 1 2 1 2 1 2 1 2 2 2 2 1 2 2 2 2 1 2 1 2 2 1 2 1 2 1 2 2 2 2 1
## [491] 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 2 1 2 1 2 1 2 1 2 2 1 2 1 2 3
## [526] 2 1 2 3 2 2 2 2 2 2 1 2 3 2 2 2 2 2 2 2 2 2 1 2 3 2 3 3 2 2 2 2 1 2 3
## [561] 4 3 2 3 2 2
largest_cliques(rt_graph)[1:20] #list only 20 vertices in that cluster
## Warning in largest_cliques(rt_graph): At cliques.c:1087 :directionality of
## edges is ignored for directed graphs
## [[1]]
## + 4/270 vertices, named, from 5480a3f:
## [1] kmplamondon ed4socialchange mentornet_csih yoshidrp
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
##
## [[10]]
## NULL
##
## [[11]]
## NULL
##
## [[12]]
## NULL
##
## [[13]]
## NULL
##
## [[14]]
## NULL
##
## [[15]]
## NULL
##
## [[16]]
## NULL
##
## [[17]]
## NULL
##
## [[18]]
## NULL
##
## [[19]]
## NULL
##
## [[20]]
## NULL
ceb <- cluster_edge_betweenness(rt_graph) #Community detection based on edge betweenness (Newman-Girvan)
length(ceb)
## [1] 77
modularity(ceb)
## [1] 0.367573