packages = c('sf','tidyverse','igraph', 'knitr', 'threejs')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
Network graph depicts interconnection between individuals
There are several packages for working with network data in R but in this course, we use igraph Package.
Function V() returns all vertices and function E() returns all edges. To find total number of vertices or edges in a network, you can use:
library(igraph)
friends <- data.frame("name1" = c("Jessie", "Jessie", "Sidney", "Sidney", "Karl", "Sidney", "Britt", "Shayne", "Sidney", "Sidney", "Jessie", "Donnie", "Sidney", " Rene", "Shayne"), "name2" = c("Sidney","Britt", "Britt", "Donnie", "Berry", "Rene", "Rene", "Sidney", "Elisha", "Whitney", "Whitney", "Odell", "Odell", "Whitney", "Donnie"))
head(friends)
## name1 name2
## 1 Jessie Sidney
## 2 Jessie Britt
## 3 Sidney Britt
## 4 Sidney Donnie
## 5 Karl Berry
## 6 Sidney Rene
friends.mat <- as.matrix(friends)
g <- graph.edgelist(friends.mat, directed = FALSE)
plot(g)
Each vertex has an attribute or a name which can be found by calling the matrix or calling gorder().
Vertex attributes may be categorical or numerical. For example, it might be gender or age or population of city or country that vertex is in
This might depict the type of relationship the vertices have, it could be whether romantic or platonic in a friendship network. The weight of an edge is visualised by adjusting the thickness of edges, the thicker the higher the weight.
genders <- c("M", "F", "F","M", "M", "M", "F", "M", "M", "F", "M", "F", "M", "F", "M")
ages <- c(18, 19, 21, 20, 22, 18, 23, 21, 22, 20, 20, 22, 21, 18, 19)
hours <- c(1,3,2,3,5,1,3,5,2,1,3,5,3,2,6)
g <- set_vertex_attr(g, 'gender' , value = genders)
g <- set_vertex_attr(g, 'age', value = ages)
g<- set_edge_attr(g, 'hours', value = hours)
vertex_attr(g)
## $name
## [1] "Jessie" "Sidney" "Britt" "Donnie" "Karl" "Berry" "Rene"
## [8] "Shayne" "Elisha" "Whitney" "Odell" " Rene"
##
## $gender
## [1] "M" "F" "F" "M" "M" "M" "F" "M" "M" "F" "M" "F"
##
## $age
## [1] 18 19 21 20 22 18 23 21 22 20 20 22
V(g)[[1:5]]
## + 5/12 vertices, named, from dd58d9d:
## name gender age
## 1 Jessie M 18
## 2 Sidney F 19
## 3 Britt F 21
## 4 Donnie M 20
## 5 Karl M 22
We can get a subset of the network by E(igraph object)[[]] when we want to impose a condition to the edges
E(g)[[inc('Sidney')]]
## + 8/15 edges from dd58d9d (vertex names):
## tail head tid hid hours
## 1 Jessie Sidney 1 2 1
## 3 Sidney Britt 2 3 2
## 4 Sidney Donnie 2 4 3
## 6 Sidney Rene 2 7 1
## 8 Sidney Shayne 2 8 5
## 9 Sidney Elisha 2 9 2
## 10 Sidney Whitney 2 10 1
## 13 Sidney Odell 2 11 3
V(g)$color <- ifelse((V(g)$name =="Sidney"), "red", "white")
plot(g, vertex.label.color = "black")
V(g)$color <- ifelse((V(g)$gender == "M"), "blue", "red")
plot(g, vertex.label.color = "black")
plot(g, vertex.label.color = "black", layout = layout_in_circle(g))
plot(g, vertex.label.color = "black", layout = layout_with_fr(g))
plot(g, vertex.label.color = "black", layout = layout_as_tree(g))
plot(g, vertex.label.color = "black", layout = layout_nicely(g))
plot(g, vertex.label.color = "black", edge.color = 'black', edge.width = E(g)$hours, layout = layout_nicely(g))
Directed network - network with edges that have directionality with arrow from one vertex to another vertex ( example would be email exchange)
Undirected network - network with no arrows, indicate a relationship of some kind exist
You can check whether a network is directed / undirected by doing this:
is.directed(g)
## [1] FALSE
is.weighted(g)
## [1] FALSE
g['Sidney', 'Jessie']
## [1] 1
incident(g, "Jessie", mode = c('all'))
## + 3/15 edges from dd58d9d (vertex names):
## [1] Jessie--Sidney Jessie--Britt Jessie--Whitney
neighbors(g, "Sidney", mode = c('all'))
## + 8/12 vertices, named, from dd58d9d:
## [1] Jessie Britt Donnie Rene Shayne Elisha Whitney Odell
x <- neighbors(g, "Sidney", mode = c('all'))
y <- neighbors(g, "Jessie", mode = c('all'))
intersection(x,y)
## + 2/12 vertices, named, from dd58d9d:
## [1] Britt Whitney
Usually we are interested in the diameter of the network, the longest path given in a network and we use farthest_vertices(g).
farthest_vertices(g)
## $vertices
## + 2/12 vertices, named, from dd58d9d:
## [1] Britt Rene
##
## $distance
## [1] 3
mean_distance(g, directed = FALSE)
## [1] 1.804348
We use get_diameter(g) to get the exact sequence of connection of the longest path.
get_diameter(g)
## + 4/12 vertices, named, from dd58d9d:
## [1] Britt Jessie Whitney Rene
ego(g, 2, 'Sidney', mode = c('all'))
## [[1]]
## + 10/12 vertices, named, from dd58d9d:
## [1] Sidney Jessie Britt Donnie Rene Shayne Elisha Whitney Odell
## [10] Rene
For directed networks with arrows, we can calculate the influentialness of vertices by calculating the in-degree, out-degree and total degree which is the sum of out and in degree
degree(g, mode = c('out'))
## Jessie Sidney Britt Donnie Karl Berry Rene Shayne Elisha Whitney
## 3 8 3 3 1 1 2 2 1 3
## Odell Rene
## 2 1
a <- degree(g, mode = c('out'))
hist(a, breaks = 20)
degree(g, mode = c('all'))
## Jessie Sidney Britt Donnie Karl Berry Rene Shayne Elisha Whitney
## 3 8 3 3 1 1 2 2 1 3
## Odell Rene
## 2 1
betweenness(g, directed = FALSE)
## Jessie Sidney Britt Donnie Karl Berry Rene Shayne Elisha Whitney
## 1.0 27.0 0.5 0.5 0.0 0.0 0.0 0.0 0.0 8.0
## Odell Rene
## 0.0 0.0
Eigenvector centrality is a measure of how connected the vertex is, vertex with high value means the vertex is highly connected with other vertex which has high values too.
eigen_centrality(g)$vector
## Jessie Sidney Britt Donnie Karl Berry
## 5.571141e-01 1.000000e+00 5.489049e-01 5.068430e-01 1.675836e-17 4.627842e-18
## Rene Shayne Elisha Whitney Odell Rene
## 4.282373e-01 4.166081e-01 2.764775e-01 4.661385e-01 4.166081e-01 1.288768e-01
Density is a measure of the overall structure of the network, the proportion of the edges that actually do exist out of those that can potentially exist
edge_density(g)
## [1] 0.2272727
random <- erdos.renyi.game(n = gorder(g), p.or.m = edge_density(g), type = "gnp")
par(mfrow=c(1,2))
plot(g)
plot(random)
Assortativity - preferential attachment of vertices to other vertices that are similar in numerical/categorical attributes eg. Shy students tend to associate with other shy students -> high assortativity
assortativity.degree(g,directed = FALSE)
## [1] -0.3235294
values <- as.numeric(factor(vertex_attr(g)$gender))
assortativity(g, values)
## [1] 0.04977376
Reciprocity of a directed network is equal to the proportion of outgoing edges that have incoming edges
reciprocity(some directed network)
Community - unique set of vertices, connection within the unique set is more dense
fastgreedy.community(g)
## IGRAPH clustering fast greedy, groups: 4, mod: 0.3
## + groups:
## $`1`
## [1] "Jessie" "Whitney" " Rene"
##
## $`2`
## [1] "Sidney" "Donnie" "Shayne" "Elisha" "Odell"
##
## $`3`
## [1] "Britt" "Rene"
##
## $`4`
## + ... omitted several groups/vertices
edge.betweenness.community(g)
## IGRAPH clustering edge betweenness, groups: 5, mod: 0.26
## + groups:
## $`1`
## [1] "Jessie" "Whitney" " Rene"
##
## $`2`
## [1] "Sidney" "Donnie" "Shayne" "Odell"
##
## $`3`
## [1] "Britt" "Rene"
##
## $`4`
## + ... omitted several groups/vertices
fast <- fastgreedy.community(g)
edge <- edge.betweenness.community(g)
par(mfrow=c(1,2))
plot(fast, g, main="Fast Greedy")
plot(edge, g, main="Edge Betweeness")
Fully interactive network, created with javascript
graphjs(g)
g <- set_vertex_attr(g, "label", value = vertex_attr(g)$gender)
g<- set_vertex_attr(g, 'color', value = 'mistyrose')
graphjs(g, vertex.size = 1)
x = fastgreedy.community(g)
i <- membership(x)
g<- set_vertex_attr(g, 'color', value= c('yellow', 'cyan', 'red', 'pink')[i])
graphjs(g)