The dataset used here consists of ‘Edges’ (or ‘friends lists’) from Facebook. This data was collected from survey participants using this Facebook app. The dataset includes node features (profiles), circles, and ego networks.
This data has been anonymized by replacing the Facebook-internal ids for each user with a new value. Each ego-network we have circles, edges, egofeat, feat, featnames file. As we are going to cluster people only by their friendship, so we just focus in the edges file.
network <- read.csv("3980.edges", header = FALSE, sep = " ", col.names = c("Source", "Target"))
str(network)
## 'data.frame': 292 obs. of 2 variables:
## $ Source: int 4038 4032 4019 4023 4018 4023 4021 4013 4023 4027 ...
## $ Target: int 4014 4027 4026 4003 3997 4031 3998 4004 4030 4032 ...
head(network)
## Source Target
## 1 4038 4014
## 2 4032 4027
## 3 4019 4026
## 4 4023 4003
## 5 4018 3997
## 6 4023 4031
g.network <- graph.data.frame(network, directed = T)
g.network <- connect(g.network, 100, mode = c("all", "out", "in", "total"))
table(degree(g.network))
##
## 2 6 44 45 46 47 48 49 50 51 52 53 54 56 60 61
## 4 4 6 4 5 1 4 4 3 6 2 4 1 2 1 1
plot.igraph(g.network, layout = layout.fruchterman.reingold,
vertex.label = NA,
vertex.label.cex = 1,
vertex.size = 4, edge.curved = TRUE,
main = "Graph of Full dataset",
edge.arrow.size = 0)
wc.base <- walktrap.community(g.network, steps = 8,
modularity = TRUE)
wc.base
## IGRAPH clustering walktrap, groups: 4, mod: 0.029
## + groups:
## $`1`
## [1] "4029" "4001"
##
## $`2`
## [1] "4012" "3987"
##
## $`3`
## [1] "4025" "4016" "3990" "4007"
##
## $`4`
## + ... omitted several groups/vertices
str(wc.base)
## List of 4
## $ merges : chr [1:2] "4029" "4001"
## $ modularity: chr [1:2] "4012" "3987"
## $ membership: chr [1:4] "4025" "4016" "3990" "4007"
## $ names : chr [1:44] "4038" "4032" "4019" "4023" ...
## - attr(*, "class")= chr "communities"
dend.g.network <- as.dendrogram(wc.base)
plot(dend.g.network, main = "Dendrogram on Full dataset")
plot(wc.base, g.network, edge.arrow.size = 0.25, vertex.label = NA, main = "Walktrap Community on Full dataset")
bad.network<-V(g.network)[degree(g.network)<=50]
g.network<-delete.vertices(g.network, bad.network)
V(g.network)$name <- c("Ann", "Julie", "Peter", "John", "Rose", "Mary", "Ada", "Eva", "Adam",
"Alan", "Loic", "Amy", "Mara", "Milo", "Belle", "Brett", "Claire")
###Number of edges per vertex (relationships per people)
table(degree(g.network))
##
## 19 20 21 22 23 25 26 27 28
## 1 1 2 4 4 2 1 1 1
## The degree of the network reduced as we removed most of the vertices
###Inspect the data
str(g.network)
## IGRAPH DN-- 17 196 --
## + attr: name (v/c)
## + edges (vertex names):
## Ann -> Julie, Peter, John, Rose, Mary, Ada, Eva, Adam, Alan, Loic,
## Amy, Mara, Milo, Belle, Brett, Claire
## Julie -> Ann, Peter, John, Rose, Mary, Ada, Eva, Adam, Alan, Loic,
## Amy, Mara, Milo, Belle, Brett, Claire
## Peter -> John, Rose, Mary, Ada, Eva, Adam, Alan, Loic, Amy, Mara,
## Milo, Belle, Brett, Claire
## John -> Julie, Peter, Rose, Mary, Ada, Eva, Adam, Alan, Loic, Amy,
## Mara, Milo, Belle, Brett, Claire
## Rose -> Ann, Julie, Mary, Ada, Eva, Adam, Alan, Loic, Amy, Mara,
## Milo, Belle, Brett, Claire
## Mary -> Julie, Ada, Eva, Adam, Alan, Loic, Amy, Mara, Milo, Belle,
## Brett, Claire
## Ada -> Ann, Julie, Rose, Mary, Eva, Adam, Alan, Loic, Amy, Mara,
## Milo, Belle, Brett, Claire
## Eva -> Julie, Peter, John, Adam, Alan, Loic, Amy, Mara, Milo,
## Belle, Brett, Claire
## Adam -> Peter, Alan, Loic, Amy, Mara, Milo, Belle, Brett, Claire
## Alan -> Julie, John, Eva, Loic, Amy, Mara, Milo, Belle, Brett,
## Claire
## Loic -> Ann, Julie, Peter, John, Mary, Eva, Adam, Amy, Mara, Milo,
## Belle, Brett, Claire
## Amy -> Julie, Peter, John, Eva, Alan, Mara, Milo, Belle, Brett,
## Claire
## Mara -> Julie, John, Eva, Alan, Amy, Milo, Belle, Brett, Claire
## Milo -> Julie, Peter, Ada, Eva, Adam, Alan, Loic, Amy, Mara,
## Belle, Brett, Claire
## Belle -> Peter, Mary, Adam, Milo, Brett, Claire
## Brett -> Peter, Eva, Adam, Loic, Milo, Belle, Claire
## Claire -> Peter, Mary, Eva, Adam, Milo, Belle, Brett
###print the list of vertices (people)
V(g.network)
## + 17/17 vertices, named:
## [1] Ann Julie Peter John Rose Mary Ada Eva Adam Alan
## [11] Loic Amy Mara Milo Belle Brett Claire
###print the list of edges (relationships)
E(g.network)
## + 196/196 edges (vertex names):
## [1] Ann ->Loic Julie ->Rose Peter ->John Julie ->Milo
## [5] John ->Eva Ada ->Mary Eva ->Brett Alan ->John
## [9] Loic ->Eva Amy ->Alan Mara ->Eva Eva ->Peter
## [13] Julie ->Ada Julie ->Amy Milo ->Loic Mara ->Milo
## [17] Mary ->Belle Julie ->Loic Belle ->Claire Milo ->Adam
## [21] Rose ->Ann Belle ->Peter Brett ->Milo Alan ->Milo
## [25] John ->Peter Alan ->Mara Eva ->Julie Julie ->Mara
## [29] Loic ->Brett Mara ->Julie Alan ->Amy Adam ->Brett
## [33] Amy ->Milo Amy ->Peter Milo ->Amy Claire->Adam
## [37] Belle ->Milo John ->Loic Mara ->John Belle ->Mary
## + ... omitted several edges
###print the list of neighbor for vertices 5
neighbors(g.network, 5) # neighbours for Rose
## + 14/17 vertices, named:
## [1] Ann Julie Mary Ada Eva Adam Alan Loic Amy Mara
## [11] Milo Belle Brett Claire
###find the connectivity
are.connected(g.network, 5, 10) # Rose and Alan are connected!!
## [1] TRUE
plot.igraph(g.network, layout = layout.fruchterman.reingold,
vertex.label=V(g.network)$name,
vertex.size = degree(g.network), edge.curved = TRUE,
main = "Fruchterman Reingold Layout for Vertices > 50 Edges",
edge.arrow.size = 0.25)
plot.igraph(g.network, layout = layout_in_circle,
vertex.label=V(g.network)$name,
vertex.size = degree(g.network), edge.curved = TRUE,
main = "In_Circle Layout",
edge.arrow.size = 0.25)
#### To find in degrees
deg.in <- degree(g.network, mode = "in")
deg.in
## Ann Julie Peter John Rose Mary Ada Eva Adam Alan
## 4 11 11 8 5 9 7 14 13 12
## Loic Amy Mara Milo Belle Brett Claire
## 12 13 13 16 16 16 16
#### To find out degrees
deg.out <- degree(g.network, mode = "out")
deg.out
## Ann Julie Peter John Rose Mary Ada Eva Adam Alan
## 16 16 14 15 14 12 14 12 9 10
## Loic Amy Mara Milo Belle Brett Claire
## 13 10 9 12 6 7 7
####Compute shortest paths in and out between each pairs of nodes
sp.in <- shortest.paths(g.network, mode='in')
sp.out <- shortest.paths(g.network, mode = 'out')
sp.in
## Ann Julie Peter John Rose Mary Ada Eva Adam Alan Loic Amy Mara Milo
## Ann 0 1 2 2 1 2 1 2 2 2 1 2 2 2
## Julie 1 0 2 1 1 1 1 1 2 1 1 1 1 1
## Peter 1 1 0 1 2 2 2 1 1 2 1 1 2 1
## John 1 1 1 0 2 2 2 1 2 1 1 1 1 2
## Rose 1 1 1 1 0 2 1 2 2 2 2 2 2 2
## Mary 1 1 1 1 1 0 1 2 2 2 1 2 2 2
## Ada 1 1 1 1 1 1 0 2 2 2 2 2 2 1
## Eva 1 1 1 1 1 1 1 0 2 1 1 1 1 1
## Adam 1 1 1 1 1 1 1 1 0 2 1 2 2 1
## Alan 1 1 1 1 1 1 1 1 1 0 2 1 1 1
## Loic 1 1 1 1 1 1 1 1 1 1 0 2 2 1
## Amy 1 1 1 1 1 1 1 1 1 1 1 0 1 1
## Mara 1 1 1 1 1 1 1 1 1 1 1 1 0 1
## Milo 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## Belle 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## Brett 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## Claire 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## Belle Brett Claire
## Ann 3 2 3
## Julie 2 2 2
## Peter 1 1 1
## John 2 2 2
## Rose 2 2 2
## Mary 1 2 1
## Ada 2 2 2
## Eva 2 1 1
## Adam 1 1 1
## Alan 2 2 2
## Loic 2 1 2
## Amy 2 2 2
## Mara 2 2 2
## Milo 1 1 1
## Belle 0 1 1
## Brett 1 0 1
## Claire 1 1 0
sp.out
## Ann Julie Peter John Rose Mary Ada Eva Adam Alan Loic Amy Mara Milo
## Ann 0 1 1 1 1 1 1 1 1 1 1 1 1 1
## Julie 1 0 1 1 1 1 1 1 1 1 1 1 1 1
## Peter 2 2 0 1 1 1 1 1 1 1 1 1 1 1
## John 2 1 1 0 1 1 1 1 1 1 1 1 1 1
## Rose 1 1 2 2 0 1 1 1 1 1 1 1 1 1
## Mary 2 1 2 2 2 0 1 1 1 1 1 1 1 1
## Ada 1 1 2 2 1 1 0 1 1 1 1 1 1 1
## Eva 2 1 1 1 2 2 2 0 1 1 1 1 1 1
## Adam 2 2 1 2 2 2 2 2 0 1 1 1 1 1
## Alan 2 1 2 1 2 2 2 1 2 0 1 1 1 1
## Loic 1 1 1 1 2 1 2 1 1 2 0 1 1 1
## Amy 2 1 1 1 2 2 2 1 2 1 2 0 1 1
## Mara 2 1 2 1 2 2 2 1 2 1 2 1 0 1
## Milo 2 1 1 2 2 2 1 1 1 1 1 1 1 0
## Belle 3 2 1 2 2 1 2 2 1 2 2 2 2 1
## Brett 2 2 1 2 2 2 2 1 1 2 1 2 2 1
## Claire 3 2 1 2 2 1 2 1 1 2 2 2 2 1
## Belle Brett Claire
## Ann 1 1 1
## Julie 1 1 1
## Peter 1 1 1
## John 1 1 1
## Rose 1 1 1
## Mary 1 1 1
## Ada 1 1 1
## Eva 1 1 1
## Adam 1 1 1
## Alan 1 1 1
## Loic 1 1 1
## Amy 1 1 1
## Mara 1 1 1
## Milo 1 1 1
## Belle 0 1 1
## Brett 1 0 1
## Claire 1 1 0
####Find out mean and standard deviation in the shortest paths
mean.sp.in <- mean(sp.in)
mean.sp.out <- mean(sp.out)
sd.sp.in <- sd(sp.in)
sd.sp.out <- sd(sp.out)
mean.sp.in
## [1] 1.211073
mean.sp.out
## [1] 1.211073
sd.sp.in
## [1] 0.5468488
sd.sp.out
## [1] 0.5468488
#### The mean and standard deviation are same for both 'from' and 'to' as they simply transpose each other
deg.met <- degree(g.network)
bet.met <- betweenness(g.network)
clo.met <- closeness(g.network)
eig.met <- evcent(g.network)$vector
cor.met <- graph.coreness(g.network)
metrices <- data.frame(deg.met, bet.met, clo.met, eig.met, cor.met)
metrices
## deg.met bet.met clo.met eig.met cor.met
## Ann 20 1.0023810 0.06250000 0.7174610 19
## Julie 27 11.3407620 0.06250000 0.9537646 19
## Peter 25 14.3558775 0.05555556 0.9020874 19
## John 23 5.2051587 0.05882353 0.8349699 19
## Rose 19 0.8511655 0.05555556 0.6818503 19
## Mary 21 4.2868298 0.05000000 0.7562108 19
## Ada 21 2.8534965 0.05555556 0.7512839 19
## Eva 26 5.1153541 0.05000000 0.9403772 19
## Adam 22 2.9456099 0.04347826 0.7987306 19
## Alan 22 1.5297619 0.04545455 0.8032302 19
## Loic 25 9.0335442 0.05263158 0.9007166 19
## Amy 23 1.8674603 0.04545455 0.8388952 19
## Mara 22 1.3202381 0.04347826 0.8032302 19
## Milo 28 9.0114330 0.05000000 1.0000000 19
## Belle 22 2.4234127 0.03703704 0.7930174 19
## Brett 23 2.1091020 0.04000000 0.8359094 19
## Claire 23 2.7484127 0.03846154 0.8301962 19
plot(metrices)
#To find out density
den.g.network = graph.density(g.network)
den.g.network
## [1] 0.7205882
#To find out Reciprocity of a directed graph
recp.g.network = reciprocity(g.network)
recp.g.network
## [1] 0.6122449
#Transivity measures the probability that the adjacent vertices of a vertex are connected. This is sometimes also called the clustering coefficient.
trans.g.network = transitivity(g.network)
trans.g.network
## [1] 1
#Triad.Census counts the different subgraphs of three vertices in a graph
triad.g.network = triad.census(g.network)
triad.g.network
## [1] 0 0 0 0 0 0 0 0 108 0 0 140 105 82 162 83
un.g.network <- as.undirected(g.network)
E(un.g.network)
## + 136/136 edges (vertex names):
## [1] Ann --Julie Ann --Peter Julie--Peter Ann --John Julie--John
## [6] Peter--John Ann --Rose Julie--Rose Peter--Rose John --Rose
## [11] Ann --Mary Julie--Mary Peter--Mary John --Mary Rose --Mary
## [16] Ann --Ada Julie--Ada Peter--Ada John --Ada Rose --Ada
## [21] Mary --Ada Ann --Eva Julie--Eva Peter--Eva John --Eva
## [26] Rose --Eva Mary --Eva Ada --Eva Ann --Adam Julie--Adam
## [31] Peter--Adam John --Adam Rose --Adam Mary --Adam Ada --Adam
## [36] Eva --Adam Ann --Alan Julie--Alan Peter--Alan John --Alan
## [41] Rose --Alan Mary --Alan Ada --Alan Eva --Alan Adam --Alan
## [46] Ann --Loic Julie--Loic Peter--Loic John --Loic Rose --Loic
## + ... omitted several edges
fg.g.network <- fastgreedy.community(un.g.network)
str(fg.g.network)
## List of 1
## $ merges : chr [1:17] "Ann" "Julie" "Peter" "John" ...
## - attr(*, "class")= chr "communities"
plot(as.dendrogram(fg.g.network), main = "Dendrogram for Fast Greedy community Findings")
plot(fg.g.network, g.network, edge.arrow.size = 0.25, main = "Fast Greedy community")
edge.g.network <- edge.betweenness.community(g.network)
str(edge.g.network)
## List of 1
## $ removed.edges : chr [1:17] "Ann" "Julie" "Peter" "John" ...
## - attr(*, "class")= chr "communities"
plot(as.dendrogram(edge.g.network), main = "Dendrogram for Edge Betweenness Method")
plot(edge.g.network, g.network, edge.arrow.size = 0.25, main = "Edge Betweenness Community")
cl.g.network <- leading.eigenvector.community(un.g.network)
str(cl.g.network)
## List of 1
## $ merges : chr [1:17] "Ann" "Julie" "Peter" "John" ...
## - attr(*, "class")= chr "communities"
plot(cl.g.network, g.network, edge.arrow.size = 0.25, main = "Leading Eigenvector Community")
wc.g.network <- walktrap.community(g.network, steps = 5,
modularity = TRUE)
wc.g.network
## IGRAPH clustering walktrap, groups: 3, mod: 0.05
## + groups:
## $`1`
## [1] "Peter" "Mary" "Adam" "Loic" "Milo" "Belle" "Brett"
## [8] "Claire"
##
## $`2`
## [1] "Julie" "John" "Eva" "Alan" "Amy" "Mara"
##
## $`3`
## [1] "Ann" "Rose" "Ada"
##
str(wc.g.network)
## List of 3
## $ merges : chr [1:8] "Peter" "Mary" "Adam" "Loic" ...
## $ modularity: chr [1:6] "Julie" "John" "Eva" "Alan" ...
## $ membership: chr [1:3] "Ann" "Rose" "Ada"
## - attr(*, "class")= chr "communities"
dend.g.network <- as.dendrogram(wc.g.network)
plot(dend.g.network, main = "Dendrogram for Walktrap Community")
plot(wc.g.network, g.network, edge.arrow.size = 0.25, main = "Walktrap Community")
In the above all community detection method ‘Walktrap Community’ has identified the clusters (groups) among the People in the Facebook.
Social Network Analysis (SNA)
Social Network Analysis (SNA) is the process of investigating social structures through the use of network and graph theories. It characterizes networked structures in terms of nodes (individual actors, people, or things within the network) and the ties or edges (relationships or interactions) that connect them.