library("igraph")
Un gráfico de redes representa interconexiones entre individuos. La presencia o ausencia de cada interconexión puede indicar si existe alguna relación entre cada par de individuos
La visualización de la red está basada en datos subyacentes que pueden se representados en dos estructuras.
Matrices de proximidad (adjacency matrix): Tenemos un matriz que relaciona los nodos en filas y columnas, donde se marca el valor de 1 si existe relación entre individuos y cero en caso contrario (No hay relación entre individuos). De estos resultados podemos matemáticamente derivar muchos hallazgos en la red, como la importancia e influencia de nodos clave en la estructura.
Edgelist: es una matriz de dos coumnas, cada fila representa una conexión entre dos nodos individuales. Esta es la forma más común de recolectar datos para la construccción de una red.
friends <- read.csv("friends.csv")
head(friends)
## name1 name2
## 1 Jessie Sidney
## 2 Jessie Britt
## 3 Sidney Britt
## 4 Sidney Donnie
## 5 Karl Berry
## 6 Sidney Rene
# Para crear una estructura de redes en R es necesariio trabajar con matrices
friends_mat <- as.matrix(friends)
g <- graph.edgelist(friends_mat, directed = FALSE)
V(g) # Nodos de la red (vertices)
## + 16/16 vertices, named, from 628f14a:
## [1] Jessie Sidney Britt Donnie Karl Berry Rene Shayne Elisha
## [10] Whitney Odell Lacy Eugene Jude Rickie Tommy
E(g) # Conexiones de la red (Edges)
## + 27/27 edges from 628f14a (vertex names):
## [1] Jessie --Sidney Jessie --Britt Sidney --Britt Sidney --Donnie
## [5] Karl --Berry Sidney --Rene Britt --Rene Sidney --Shayne
## [9] Sidney --Elisha Sidney --Whitney Jessie --Whitney Donnie --Odell
## [13] Sidney --Odell Rene --Whitney Donnie --Shayne Jessie --Lacy
## [17] Rene --Lacy Elisha --Eugene Eugene --Jude Berry --Odell
## [21] Odell --Rickie Karl --Odell Britt --Lacy Elisha --Jude
## [25] Whitney--Lacy Britt --Whitney Karl --Tommy
plot(g)
# número de conexions
gsize(g)
## [1] 27
# número d nodos
gorder(g)
## [1] 16
Dependiendo la situación que se evalue (redes sociales, vuelos entre ciudades, cartas entre personas), estaremos hablando de atributos distintos de los nodos, que son individuos que conforman la red y tienen multiples atributos como edad, género, país, etc.
Las conexiones también están sujetas a la situación, pero un atributo de la conexión bastante común es el peso (En las visualizaciones entre más gruesa es la linea entre dos nodos más importante o más peso tiene dicha conexión), por ejemplo, en una red de amigos, el peso podría estar determinado por cuantas veces se llaman los amigos a la semana o cuantos vuelos al mes hay entre dos ciudades.
Para agregar dichos atributos a una red que ya existe:
# Inspect the objects 'genders' and 'ages'
genders <- c("M", "F", "F", "M", "M", "M", "F", "M",
"M", "F", "M", "F", "M", "F", "M", "M")
ages <- c(20:35)
# Create new vertex attribute called 'gender'
g <- set_vertex_attr(g, "gender", value = genders)
# Create new vertex attribute called 'age'
g <- set_vertex_attr(g, "age", value = ages)
# View all vertex attributes in a list
vertex_attr(g)
## $name
## [1] "Jessie" "Sidney" "Britt" "Donnie" "Karl" "Berry" "Rene"
## [8] "Shayne" "Elisha" "Whitney" "Odell" "Lacy" "Eugene" "Jude"
## [15] "Rickie" "Tommy"
##
## $gender
## [1] "M" "F" "F" "M" "M" "M" "F" "M" "M" "F" "M" "F" "M" "F" "M" "M"
##
## $age
## [1] 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
# View attributes of first five vertices in a dataframe
V(g)[[1:5]]
## + 5/16 vertices, named, from 628f14a:
## name gender age
## 1 Jessie M 20
## 2 Sidney F 21
## 3 Britt F 22
## 4 Donnie M 23
## 5 Karl M 24
# View hours
hours <- round(rnorm(16, mean=2.5, sd = 2))
# Create new edge attribute called 'hours'
g <- set_edge_attr(g, "hours", value = hours)
## Warning in eattrs[[name]][index] <- value: número de items para para sustituir
## no es un múltiplo de la longitud del reemplazo
# View edge attributes of graph object
edge_attr(g)
## $hours
## [1] 4 0 2 2 4 2 1 2 3 3 6 0 4 1 3 3 4 0 2 2 4 2 1 2 3 3 6
# Find all edges that include "Britt"
E(g)[[inc('Britt')]]
## + 5/27 edges from 628f14a (vertex names):
## tail head tid hid hours
## 2 Jessie Britt 1 3 0
## 3 Sidney Britt 2 3 2
## 7 Britt Rene 3 7 1
## 23 Britt Lacy 3 12 1
## 26 Britt Whitney 3 10 3
# Find all pairs that spend 4 or more hours together per week
E(g)[[hours>=4]]
## + 7/27 edges from 628f14a (vertex names):
## tail head tid hid hours
## 1 Jessie Sidney 1 2 4
## 5 Karl Berry 5 6 4
## 11 Jessie Whitney 1 10 6
## 13 Sidney Odell 2 11 4
## 17 Rene Lacy 7 12 4
## 21 Odell Rickie 11 15 4
## 27 Karl Tommy 5 16 6
Adicionalmente podemos crear las redes desde el dataframe directamente
friends1_edges <- read.csv("friends1_edges.csv")
friends1_nodes <- read.csv("friends1_nodes.csv")
head(friends1_nodes)
## name gender
## 1 Joe M
## 2 Erin F
## 3 Kelley F
## 4 Ronald M
## 5 Michael M
## 6 Valentine F
head(friends1_edges)
## name1 name2 hours
## 1 Joe Ronald 1
## 2 Joe Michael 3
## 3 Joe Troy 2
## 4 Erin Kelley 3
## 5 Kelley Valentine 5
## 6 Ronald Troy 1
# Create an igraph object with attributes directly from dataframes
g1 <- graph_from_data_frame(d = friends1_edges, vertices = friends1_nodes, directed = FALSE)
# Subset edges greater than or equal to 5 hours
E(g1)[[hours>=5]]
## + 4/25 edges from 62db6da (vertex names):
## tail head tid hid hours
## 5 Kelley Valentine 3 6 5
## 8 Ronald Jasmine 4 8 5
## 12 Valentine Perry 6 15 5
## 15 Jasmine Juan 8 9 6
# Set vertex color by gender
V(g1)$color <- ifelse(V(g1)$gender == "F", "orange", "dodgerblue")
# Plot the graph
plot(g1, vertex.label.color = "black")
Una de las principales características que debe tener una visualización de redes es que rápidamente se debe poder identificar sus hallazagos principales, por lo que elegir el tipo de visualización es clave. Estos son los factores más tenidos en cuenta para contruir una red:
Estos factores pueden ser usados por separado o conjuntos en una sola visualización. Sin importar nuestra elección la gráfica debe ser capaz de entregar información clave que deseamos comunicar.
Otros consejos que por lo general las librerías como igraph
solucionan bien:
# Plot the graph object g1 in a circle layout
plot(g, vertex.label.color = "black", layout = layout_in_circle(g))
# Plot the graph object g1 in a Fruchterman-Reingold layout
plot(g, vertex.label.color = "black", layout = layout_with_fr(g))
# Plot the graph object g1 in a Tree layout
m <- layout_as_tree(g)
plot(g, vertex.label.color = "black", layout = m)
# Plot the graph object g1 using igraph's chosen layout
m1 <- layout_nicely(g)
plot(g, vertex.label.color = "black", layout = m1)
library(igraph)
# Create a vector of weights based on the number of hours each pair spend together
w1 <- E(g)$hours
# Plot the network varying edges by weights
m1 <- layout_nicely(g)
plot(g,
vertex.label.color = "black",
edge.color = 'black',
edge.width = w1,
layout = m1)
# Create a new igraph object by deleting edges that are less than 2 hours long
g <- delete_edges(g1, E(g1)[hours < 2])
# Plot the new graph
w2 <- E(g)$hours
m2 <- layout_nicely(g)
plot(g,
vertex.label.color = "black",
edge.color = 'black',
edge.width = w2,
layout = m2)
E(g)[[inc("Jasmine")]]
## + 3/17 edges from 637f481 (vertex names):
## tail head tid hid hours
## 6 Ronald Jasmine 4 8 5
## 10 Troy Jasmine 7 8 3
## 12 Jasmine Juan 8 9 6
Las redes puedes ser direccionadas o no direccionadas, es decir, el flujo (desde donde se envía una carta, desde donde sale un vuelo, quién llama a quién) hacia donde se dirige la información. Hasta el momento se ha asumido que la dirección no es tenida en cuenta.
Conexiones no direccionadas simplemente indican que hay una conexión entre dos nodos (y tal vez el tiempo entre ellas).
La repreesntación gráfica de las las redes direccionadas se hace con flechas
# podemos validar si una red está direccionada o ponderada
is.directed(g)
## [1] FALSE
is.weighted(g)
## [1] FALSE
La dirección también permite determinar la influencia o importancia de un nodo en la red, evaluando sus entradas y salidas, mientras que con las redes no direccionadas solo podiamos evaluar la importancia por el número de conexiones.
measles <- read.csv("measles.csv")
g <- graph_from_data_frame(measles, directed = TRUE)
# De dónde vienen las conexiones
table(head_of(g, E(g)))
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 17 18 19 20 21
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 61 62 63
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 184 185 186 187
## 1 1 1 1
library(igraph)
# Make a basic plot
plot(g,
vertex.label.color = "black",
edge.color = 'gray77',
vertex.size = 0,
edge.arrow.size = 0.1,
layout = layout_nicely(g))
# Is there an edge going from vertex 184 to vertex 178?
g['184', '178']
## [1] 1
# Is there an edge going from vertex 178 to vertex 184?
g['178', '184']
## [1] 0
# Show all edges going to or from vertex 184
incident(g, '184', mode = c("all"))
## + 6/184 edges from 63d0264 (vertex names):
## [1] 184->45 184->182 184->181 184->178 184->183 184->177
# Show all edges going out from vertex 184
incident(g, '184', mode = c("out"))
## + 6/184 edges from 63d0264 (vertex names):
## [1] 184->45 184->182 184->181 184->178 184->183 184->177
Podemos analizar los vecinos de cada nodo
neighbors(g, '184', mode = c("all"))
## + 6/187 vertices, named, from 63d0264:
## [1] 45 182 181 178 183 177
Aunque dos nodos podrían no tener conexión entre ellos, si podrían tener un vecino en común que los conecte
X <- neighbors(g,"183", mode = c("all"))
y <- neighbors(g, "177", mode = c("all"))
intersection(X,y)
## + 1/187 vertex, named, from 63d0264:
## [1] 184
Una buena medida para ver qué tan bien conectada es la red es medir la longitud de conexiones que existen entre cada par de nodos en la red. Si un nodo es alcanzable por sus vecinos entonces la longitud (distancia geodesica) es 1.
farthest_vertices(g)
## $vertices
## + 2/187 vertices, named, from 63d0264:
## [1] 184 162
##
## $distance
## [1] 5
# Shows the path sequence between two furthest apart vertices.
get_diameter(g)
## + 6/187 vertices, named, from 63d0264:
## [1] 184 178 42 7 123 162
ego(g, 2, "183", mode=c("out"))
## [[1]]
## + 2/187 vertices, named, from 63d0264:
## [1] 183 94
Hay varias formas de determinar la importancia de los nodos en la red:
Podemos considerar un nodo que tiene muchas conexiones, es decir, que tiene un alto grado para ser importante
También podemos considerar nodos que podrían tener mayor influencia en la red si ellos están conectados a otros nodos que están altamente interconectados en la red. Estos pueden ser considerados como nodos que tienen alta centralidad del vector propio (eigenvector centrality).
Hay otros como closeness centrality y pagerank centrality
# Conexiones en todas las direcciones de cada nodo de la red
degree(g, mode=c("all"))
## 45 172 180 42 182 12 181 175 22 10 31 34 17 93 178 184 8 56 58 186
## 31 3 8 4 5 5 9 3 3 5 4 4 8 5 2 6 2 4 4 2
## 11 19 64 179 54 74 5 78 39 82 44 1 47 183 97 7 21 37 106 18
## 5 3 4 3 2 3 2 4 3 5 2 2 4 2 2 8 3 4 3 4
## 16 116 188 14 79 4 6 145 148 153 73 156 68 123 102 110 98 169 174 173
## 3 4 2 3 2 2 2 4 3 7 2 3 2 3 3 2 2 2 1 1
## 146 177 2 3 9 13 15 20 23 24 25 26 27 28 29 30 32 33 35 36
## 2 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 38 40 41 43 46 48 49 50 51 52 53 55 57 59 60 61 62 63 65 66
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 67 69 70 71 72 75 76 77 80 81 83 84 85 86 87 88 89 90 91 92
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 94 95 96 99 100 101 103 104 105 107 108 109 111 112 113 114 115 117 118 119
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 120 121 122 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 142 143 144 147 149 150 151 152 154 155 157 158 159 160 161 162 163 164 165 166
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 167 168 170 171 176 185 187
## 1 1 1 1 1 1 1
betweenness(g, directed = TRUE, normalized = T)
## 45 172 180 42 182 12
## 2.586457e-03 8.718396e-05 5.812264e-04 8.137169e-04 3.196745e-04 2.324906e-04
## 181 175 22 10 31 34
## 9.880849e-04 2.034292e-04 1.162453e-04 2.324906e-04 1.743679e-04 1.743679e-04
## 17 93 178 184 8 56
## 4.649811e-04 2.324906e-04 4.359198e-04 0.000000e+00 5.812264e-05 1.743679e-04
## 58 186 11 19 64 179
## 1.743679e-04 5.812264e-05 2.324906e-04 1.162453e-04 2.324906e-04 2.324906e-04
## 54 74 5 78 39 82
## 8.718396e-05 1.162453e-04 5.812264e-05 1.743679e-04 1.162453e-04 2.324906e-04
## 44 1 47 183 97 7
## 5.812264e-05 5.812264e-05 1.743679e-04 2.906132e-05 5.812264e-05 7.846556e-04
## 21 37 106 18 16 116
## 2.324906e-04 1.743679e-04 1.743679e-04 2.324906e-04 1.162453e-04 1.743679e-04
## 188 14 79 4 6 145
## 5.812264e-05 2.324906e-04 8.718396e-05 8.718396e-05 8.718396e-05 1.743679e-04
## 148 153 73 156 68 123
## 1.743679e-04 4.068585e-04 8.718396e-05 1.162453e-04 8.718396e-05 2.324906e-04
## 102 110 98 169 174 173
## 1.743679e-04 8.718396e-05 8.718396e-05 8.718396e-05 0.000000e+00 0.000000e+00
## 146 177 2 3 9 13
## 5.812264e-05 4.649811e-04 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 15 20 23 24 25 26
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 27 28 29 30 32 33
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 35 36 38 40 41 43
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 46 48 49 50 51 52
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 53 55 57 59 60 61
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 62 63 65 66 67 69
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 70 71 72 75 76 77
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 80 81 83 84 85 86
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 87 88 89 90 91 92
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 94 95 96 99 100 101
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 103 104 105 107 108 109
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 111 112 113 114 115 117
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 118 119 120 121 122 124
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 125 126 127 128 129 130
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 131 132 133 134 135 136
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 137 138 139 140 142 143
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 144 147 149 150 151 152
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 154 155 157 158 159 160
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 161 162 163 164 165 166
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 167 168 170 171 176 185
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 187
## 0.000000e+00
library(igraph)
# Calculate the out-degree of each vertex
g.outd <- degree(g, mode = c("out"))
# View a summary of out-degree
table(g.outd)
## g.outd
## 0 1 2 3 4 6 7 8 30
## 125 21 16 12 6 2 3 1 1
# Make a histogram of out-degrees
hist(g.outd, breaks = 30)
# Find the vertex that has the maximum out-degree
which.max(g.outd)
## 45
## 1
# Calculate betweenness of each vertex
g.b <- betweenness(g, directed = TRUE)
# Show histogram of vertex betweenness
hist(g.b, breaks = 80)
# Create plot with vertex size determined by betweenness score
plot(g,
vertex.label = NA,
edge.color = 'black',
vertex.size = sqrt(g.b)+1,
edge.arrow.size = 0.05,
layout = layout_nicely(g))
One issue with the measles dataset is that there are three individuals for whom no information is known about who infected them. One of these individuals (vertex 184) appears ultimately responsible for spreading the disease to many other individuals even though they did not directly infect too many individuals. However, because vertex 184 has no incoming edge in the network they appear to have low betweenness. One way to explore the importance of this vertex is by visualizing the geodesic distances of connections going out from this individual. In this exercise you shall create a plot of these distances from this patient zero.
# Make an ego graph
g184 <- make_ego_graph(g, diameter(g), nodes = '184', mode = c("all"))[[1]]
# Get a vector of geodesic distances of all vertices from vertex 184
dists <- distances(g184, "184")
# Create a color palette of length equal to the maximal geodesic distance plus one.
colors <- c("black", "red", "orange", "blue", "dodgerblue", "cyan")
# Set color attribute to vertices of network g184.
V(g184)$color <- colors[dists+1]
# Visualize the network based on geodesic distance from vertex 184 (patient zero).
plot(g184,
vertex.label = dists,
vertex.label.color = "white",
vertex.label.cex = .6,
edge.color = 'black',
vertex.size = 7,
edge.arrow.size = .05,
main = "Geodesic Distances from Patient Zero"
)
Densidad de una medida de qué tan interconectada está la red
\[ densidad = \frac{conexiones}{\frac{n!}{p!(n-p)!}} \] * Longitud media del camino promedio de las longitudes de los caminos más cortos entre todos los pares de nodos de la red. entre más pequeño el valor más interconectada la red
gump <- read.csv("gump.csv")
library(igraph)
# Inspect Forrest Gump Movie dataset
head(gump)
## V1 V2
## 1 ABBIE HOFFMAN JENNY
## 2 ABBIE HOFFMAN POLICEMAN
## 3 ANCHORMAN FORREST
## 4 ANCHORMAN LT DAN
## 5 ANCHORMAN MARGO
## 6 ANCHORMAN MRS GUMP
# Make an undirected network
g <- graph_from_data_frame(gump, directed = FALSE)
# Identify key nodes using eigenvector centrality
g.ec <- eigen_centrality(g)
which.max(g.ec$vector)
## FORREST
## 36
# Plot Forrest Gump Network
plot(g,
vertex.label.color = "black",
vertex.label.cex = 0.6,
vertex.size = 25*(g.ec$vector),
edge.color = 'gray88',
main = "Forrest Gump Network"
)
library(igraph)
# Get density of a graph
gd <- edge_density(g)
# Get the diameter of the graph g
diameter(g, directed = FALSE)
## [1] 4
# Get the average path length of the graph g
g.apl <- mean_distance(g, directed = FALSE)
g.apl
## [1] 1.994967
Esto es util para identificar si la red es inusual con respecto a una red con cierta cantidad de nodos y densidad.
# Generate 1000 random graphs
gl <- vector('list', 1000)
for(i in 1:1000){
gl[[i]] <- erdos.renyi.game(n = gorder(g), p.or.m = edge_density(g), type = "gnp")
}
# Calculate average path length of 1000 random graphs
gl.apls <- unlist(lapply(gl, mean_distance, directed = FALSE))
# Plot the distribution of average path lengths
hist(gl.apls, xlim = range(c(1.5, 6)))
abline(v = mean_distance(g), col = "red", lty = 3, lwd = 2)
# Calculate the proportion of graphs with an average path length lower than our observed
mean(gl.apls < mean_distance(g))
## [1] 0
Triangulos o triada: Por cada grupo de 3 nodos existen tres conexiones potenciales. Si loa 3 nodos están conectados la triada está cerrada
Cliques : Cada nodos está conectado a otros nodos. La red entera es un clique. Por ejemplo, las triadas cerradas en una red son cliques.
library(igraph)
# Show all triangles in the network.
matrix(triangles(g), nrow = 3)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,] 36 36 36 36 36 36 36 36 36 36 36 36 36 36
## [2,] 1 1 1 1 2 4 4 6 6 6 6 7 7 8
## [3,] 83 38 39 66 68 57 24 27 75 40 45 8 69 69
## [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
## [1,] 36 36 36 36 36 36 36 36 36 36 36 36
## [2,] 11 11 11 12 12 13 14 14 14 14 14 14
## [3,] 12 13 70 70 13 70 4 19 24 71 65 57
## [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
## [1,] 36 36 36 36 36 36 36 36 36 36 36 36
## [2,] 14 14 14 15 15 17 17 18 18 19 19 21
## [3,] 62 63 64 21 72 22 42 5 28 71 63 72
## [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
## [1,] 36 36 36 36 36 36 36 36 36 36 36 36
## [2,] 22 24 26 26 26 26 26 26 27 27 27 28
## [3,] 42 57 73 52 47 48 49 50 75 45 40 5
## [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
## [1,] 36 36 36 36 36 36 36 36 36 36 36 36
## [2,] 28 30 30 30 34 38 38 38 39 39 40 40
## [3,] 90 84 61 51 88 83 66 39 83 66 75 45
## [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74]
## [1,] 36 36 36 36 36 36 36 36 36 36 36 36
## [2,] 41 41 41 41 41 41 41 41 41 41 41 41
## [3,] 1 3 6 7 8 11 12 13 26 27 30 32
## [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86]
## [1,] 36 36 36 36 36 36 36 36 36 36 36 36
## [2,] 41 41 41 41 41 41 41 41 41 41 41 41
## [3,] 33 86 37 38 39 40 43 44 45 47 48 49
## [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98]
## [1,] 36 36 36 36 36 36 36 36 36 36 36 36
## [2,] 41 41 41 41 41 41 41 41 41 41 41 41
## [3,] 50 51 52 53 54 56 58 61 66 69 70 73
## [,99] [,100] [,101] [,102] [,103] [,104] [,105] [,106] [,107] [,108]
## [1,] 36 36 36 36 36 36 36 36 36 36
## [2,] 41 41 41 41 41 41 43 43 43 44
## [3,] 74 75 79 82 83 84 82 54 53 2
## [,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118]
## [1,] 36 36 36 36 36 36 36 36 36 36
## [2,] 44 44 44 44 44 44 44 44 44 44
## [3,] 3 9 14 17 19 22 82 71 42 43
## [,119] [,120] [,121] [,122] [,123] [,124] [,125] [,126] [,127] [,128]
## [1,] 36 36 36 36 36 36 36 36 36 36
## [2,] 44 44 44 44 44 45 47 47 47 47
## [3,] 53 62 63 64 65 75 73 52 50 48
## [,129] [,130] [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138]
## [1,] 36 36 36 36 36 36 36 36 36 36
## [2,] 47 48 48 48 48 49 49 49 50 50
## [3,] 49 73 52 50 49 73 52 50 73 52
## [,139] [,140] [,141] [,142] [,143] [,144] [,145] [,146] [,147] [,148]
## [1,] 36 36 36 36 36 36 36 36 36 36
## [2,] 51 51 52 53 54 54 56 58 59 60
## [3,] 84 61 73 82 87 56 89 79 92 2
## [,149] [,150] [,151] [,152] [,153] [,154] [,155] [,156] [,157] [,158]
## [1,] 36 36 36 36 36 36 36 36 36 36
## [2,] 60 60 60 60 60 60 61 62 62 62
## [3,] 20 23 25 31 81 43 84 71 19 35
## [,159] [,160] [,161] [,162] [,163] [,164] [,165] [,166] [,167] [,168]
## [1,] 36 36 36 36 36 36 36 36 36 36
## [2,] 62 63 64 64 64 64 64 64 65 65
## [3,] 63 71 3 71 19 63 62 46 4 71
## [,169] [,170] [,171] [,172] [,173] [,174] [,175] [,176] [,177] [,178]
## [1,] 36 36 36 36 36 36 36 41 41 41
## [2,] 65 65 65 65 65 65 66 1 1 1
## [3,] 19 24 64 63 57 62 83 83 38 39
## [,179] [,180] [,181] [,182] [,183] [,184] [,185] [,186] [,187] [,188]
## [1,] 41 41 41 41 41 41 41 41 41 41
## [2,] 1 6 6 6 6 7 7 8 11 11
## [3,] 66 27 75 40 45 8 69 69 12 13
## [,189] [,190] [,191] [,192] [,193] [,194] [,195] [,196] [,197] [,198]
## [1,] 41 41 41 41 41 41 41 41 41 41
## [2,] 11 12 12 13 26 26 26 26 26 26
## [3,] 70 70 13 70 73 52 47 48 49 50
## [,199] [,200] [,201] [,202] [,203] [,204] [,205] [,206] [,207] [,208]
## [1,] 41 41 41 41 41 41 41 41 41 41
## [2,] 27 27 27 30 30 30 38 38 38 39
## [3,] 75 45 40 84 61 51 83 66 39 83
## [,209] [,210] [,211] [,212] [,213] [,214] [,215] [,216] [,217] [,218]
## [1,] 41 41 41 41 41 41 41 41 41 41
## [2,] 39 40 40 43 43 43 44 44 44 44
## [3,] 66 75 45 82 54 53 3 82 43 53
## [,219] [,220] [,221] [,222] [,223] [,224] [,225] [,226] [,227] [,228]
## [1,] 41 41 41 41 41 41 41 41 41 41
## [2,] 45 47 47 47 47 47 48 48 48 48
## [3,] 75 73 52 50 48 49 73 52 50 49
## [,229] [,230] [,231] [,232] [,233] [,234] [,235] [,236] [,237] [,238]
## [1,] 41 41 41 41 41 41 41 41 41 41
## [2,] 49 49 49 50 50 51 51 52 53 54
## [3,] 73 52 50 73 52 84 61 73 82 56
## [,239] [,240] [,241] [,242] [,243] [,244] [,245] [,246] [,247] [,248]
## [1,] 41 41 41 41 44 44 44 44 44 44
## [2,] 58 58 61 66 2 14 14 14 14 14
## [3,] 10 79 84 83 67 19 71 65 62 63
## [,249] [,250] [,251] [,252] [,253] [,254] [,255] [,256] [,257] [,258]
## [1,] 44 44 44 44 44 44 44 44 44 44
## [2,] 14 17 17 19 19 22 43 43 53 62
## [3,] 64 22 42 71 63 42 82 53 82 71
## [,259] [,260] [,261] [,262] [,263] [,264] [,265] [,266] [,267] [,268]
## [1,] 44 44 44 44 44 44 44 44 44 44
## [2,] 62 62 63 64 64 64 64 64 65 65
## [3,] 19 63 71 3 71 19 63 62 71 19
## [,269] [,270] [,271] [,272] [,273] [,274] [,275] [,276] [,277] [,278]
## [1,] 44 44 44 14 14 14 14 14 14 14
## [2,] 65 65 65 4 4 19 19 24 65 65
## [3,] 64 63 62 57 24 71 63 57 4 71
## [,279] [,280] [,281] [,282] [,283] [,284] [,285] [,286] [,287] [,288]
## [1,] 14 14 14 14 14 14 14 14 14 14
## [2,] 65 65 65 65 65 65 62 62 62 63
## [3,] 19 24 64 63 57 62 71 19 63 71
## [,289] [,290] [,291] [,292] [,293] [,294] [,295] [,296] [,297] [,298]
## [1,] 14 14 14 14 65 65 65 65 65 65
## [2,] 64 64 64 64 4 4 19 19 24 64
## [3,] 71 19 63 62 57 24 71 63 57 71
## [,299] [,300] [,301] [,302] [,303] [,304] [,305] [,306] [,307] [,308]
## [1,] 65 65 65 65 65 65 65 64 64 64
## [2,] 64 64 64 63 62 62 62 19 19 63
## [3,] 19 63 62 71 71 19 63 71 63 71
## [,309] [,310] [,311] [,312] [,313] [,314] [,315] [,316] [,317] [,318]
## [1,] 64 64 64 62 62 62 19 26 26 26
## [2,] 62 62 62 19 19 63 63 52 47 47
## [3,] 71 19 63 71 63 71 71 73 73 52
## [,319] [,320] [,321] [,322] [,323] [,324] [,325] [,326] [,327] [,328]
## [1,] 26 26 26 26 26 26 26 26 26 26
## [2,] 47 47 47 48 48 48 48 49 49 49
## [3,] 50 48 49 73 52 50 49 73 52 50
## [,329] [,330] [,331] [,332] [,333] [,334] [,335] [,336] [,337] [,338]
## [1,] 26 26 47 47 47 47 47 47 47 47
## [2,] 50 50 52 50 50 48 48 48 48 49
## [3,] 73 52 73 73 52 73 52 50 49 73
## [,339] [,340] [,341] [,342] [,343] [,344] [,345] [,346] [,347] [,348]
## [1,] 47 47 48 48 48 48 48 48 49 49
## [2,] 49 49 52 50 50 49 49 49 52 50
## [3,] 52 50 73 73 52 73 52 50 73 73
## [,349] [,350] [,351] [,352] [,353] [,354] [,355] [,356] [,357] [,358]
## [1,] 49 50 43 1 1 1 1 1 1 6
## [2,] 50 52 53 38 38 38 39 39 66 27
## [3,] 52 73 82 83 66 39 83 66 83 75
## [,359] [,360] [,361] [,362] [,363] [,364] [,365] [,366] [,367] [,368]
## [1,] 6 6 6 6 6 27 27 27 38 38
## [2,] 27 27 40 40 45 45 40 40 66 39
## [3,] 45 40 75 45 75 75 75 45 83 83
## [,369] [,370] [,371] [,372] [,373] [,374] [,375] [,376] [,377] [,378]
## [1,] 38 39 40 4 11 11 11 12 30 30
## [2,] 39 66 45 24 12 12 13 13 61 51
## [3,] 66 83 75 57 70 13 70 70 84 84
## [,379] [,380] [,381] [,382] [,383] [,384]
## [1,] 30 51 7 17 18 15
## [2,] 51 61 8 22 28 21
## [3,] 61 84 69 42 5 72
# Count the number of triangles that vertex "BUBBA" is in.
count_triangles(g, vids='BUBBA')
## [1] 37
# Calculate the global transitivity of the network.
g.tr <- transitivity(g)
g.tr
## [1] 0.1918082
# Calculate the local transitivity for vertex BUBBA.
transitivity(g, vids='BUBBA', type = "local")
## [1] 0.6727273
# Calculate average transitivity of 1000 random graphs
gl.tr <- lapply(gl, transitivity)
gl.trs <- unlist(gl.tr)
# Get summary statistics of transitivity scores
summary(gl.trs)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.02630 0.05439 0.06172 0.06150 0.06904 0.09280
# Calculate the proportion of graphs with a transitivity score higher than Forrest Gump's network
mean(gl.trs > g.tr)
## [1] 0
# Identify the largest cliques in the network
largest_cliques(g)
## [[1]]
## + 9/94 vertices, named, from 660b272:
## [1] FORREST STRONGARM BUBBA DALLAS LT DAN MAN SGT SIMS
## [8] SOLDIER SONG
##
## [[2]]
## + 9/94 vertices, named, from 660b272:
## [1] FORREST JENNY EMCEE MAN # MAN #1 MAN #2 MAN #3 MAN #5 MEN
# Determine all maximal cliques in the network and assign to object 'clq'
clq <- max_cliques(g)
# Calculate the size of each maximal clique.
table(unlist(lapply(clq, length)))
##
## 2 3 4 5 6 7 9
## 12 24 7 2 4 2 2
library(igraph)
# Assign largest cliques output to object 'lc'
lc <- largest_cliques(g)
# Create two new undirected subgraphs, each containing only the vertices of each largest clique.
gs1 <- as.undirected(subgraph(g, lc[[1]]))
## Warning in subgraph(g, lc[[1]]): At structural_properties.c:
## 1984 :igraph_subgraph is deprecated from igraph 0.6, use igraph_induced_subgraph
## instead
gs2 <- as.undirected(subgraph(g, lc[[2]]))
## Warning in subgraph(g, lc[[2]]): At structural_properties.c:
## 1984 :igraph_subgraph is deprecated from igraph 0.6, use igraph_induced_subgraph
## instead
# Plot the two largest cliques side-by-side
par(mfrow=c(1,2)) # To plot two plots side-by-side
plot(gs1,
vertex.label.color = "black",
vertex.label.cex = 0.9,
vertex.size = 0,
edge.color = 'gray28',
main = "Largest Clique 1",
layout = layout.circle(gs1)
)
plot(gs2,
vertex.label.color = "black",
vertex.label.cex = 0.9,
vertex.size = 0,
edge.color = 'gray28',
main = "Largest Clique 2",
layout = layout.circle(gs2)
)
En R, podemos crear la red con valores númericos o categorías como factores.
0 indica que no hay patrón preferencial por algun grupo. +1 indicada que individuos solo se juntan con individuos similares y -1 indica que individuos de forma activa evitan similares
Assortativity degree : valores negativos indican que individuos altamente conectados no se conectan de forma preferencial con otros individuos altamente conctados
reciprocidad en una red direccionada la reciprocidad es igual a la proporción de conexiones que son simetricas, esto es la proporción de las conexiones de salida que también tienen una conexión de entrada.
# Plot the network
plot(g1)
# Convert the gender attribute into a numeric value
values <- as.numeric(factor(V(g1)$gender))
# Calculate the assortativity of the network based on gender
assortativity(g1, values)
## [1] 0.1319444
# Calculate the assortativity degree of the network
assortativity.degree(g1, directed = FALSE)
## [1] 0.4615385
# Calculate the observed assortativity
observed.assortativity <- assortativity(g1, values)
# Calculate the assortativity of the network randomizing the gender attribute 1000 times
results <- vector('list', 1000)
for(i in 1:1000){
results[[i]] <- assortativity(g1, sample(values))
}
# Plot the distribution of assortativity values and add a red vertical line at the original observed value
hist(unlist(results))
abline(v = observed.assortativity, col = "red", lty = 3, lwd=2)
# Make a plot of the chimp grooming network
plot(g,
edge.color = "black",
edge.arrow.size = 0.3,
edge.arrow.width = 0.5)
# Calculate the reciprocity of the graph
reciprocity(g)
## [1] 1
# Perform fast-greedy community detection on network graph
kc = fastgreedy.community(g)
# Determine sizes of each community
sizes(kc)
## Community sizes
## 1 2 3 4 5 6 7
## 21 21 32 7 5 5 3
# Determine which individuals belong to which community
membership(kc)
## ABBIE HOFFMAN ANCHORMAN ANNOUNCER
## 6 1 1
## ANOTHER DAY ASSISTANT COACH BERT
## 1 3 5
## BILLY BLACK WOMAN BOB HOPE
## 2 2 1
## BOY BOY #1 BOY #2
## 2 2 2
## BOY #3 BUBBA BUS STOP - PRESENT - DAY
## 2 1 3
## CAB DRIVER CARLA CHET HUNTLEY
## 2 1 3
## DALLAS DEAN DICK CAVETT
## 1 3 3
## DICK CLARK DOCTOR DRILL SERGEANT
## 1 3 1
## ELVIS EMCEE ERNIE
## 3 4 5
## FOOTBALL COACH AGING HIPPIE BLACK PANTHER
## 3 3 2
## BUS DRIVER DJ DRIVER
## 3 2 2
## EARL ELDERLY WOMAN FORREST
## 3 1 3
## GIRL HILARY ISABEL
## 2 6 6
## FORREST JR JENNY LENORE
## 5 2 1
## LOUISE LT DAN LITTLE BOY
## 7 1 5
## MALE NURSE MAN # MAN #1
## 1 4 4
## MAN #2 MAN #3 MASAI
## 4 4 2
## MAN #5 MINISTER NEWSCASTER
## 4 7 3
## GOVERNOR WALLACE NEWSMAN NIGHT
## 3 3 1
## OLDER BOY #1 OFFICER MRS GUMP
## 2 3 3
## RUBEN MAN SGT SIMS
## 2 1 1
## SOLDIER SONG POLICEMAN
## 1 1 6
## MARGO PRESIDENT JOHNSON WHITE WOMAN
## 1 1 2
## SLOW MOTION STRONGARM JOHN LENNON
## 2 1 3
## MEN JENNY'S DAD LYNN MARIE
## 4 2 5
## MRS BLUE NURSE OLD SHRIMPER
## 3 3 3
## OLDER BOY #2 PRESIDENT KENNEDY PRINCIPAL
## 2 3 3
## SUSAN VET WESLEY
## 7 6 2
## WILD-EYED MAN YOUNG HIPPIE YOUNG MAN
## 3 2 3
## KATZENBACH REPORTER OLDER BOY
## 3 3 3
## PATRONS PRESIDENT NIXON REVEREND
## 3 3 3
## SECURITY GUARD
## 3
# Plot the community structure of the network
plot(kc, g)
# Perform edge-betweenness community detection on network graph
gc = edge.betweenness.community(g)
# Determine sizes of each community
sizes(gc)
## Community sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 5 3 1 10 5 14 3 5 3 4 3 1 6 1 4 1 1 2 1 1 3 1 2 2 2 1
## 27 28 29 30 31 32 33 34 35
## 1 1 1 1 1 1 1 1 1
# Plot community networks determined by fast-greedy and edge-betweenness methods side-by-side
par(mfrow = c(1, 2))
plot(kc, g)
plot(gc, g)
library(igraph)
library(threejs)
###
# Set a vertex attribute called 'color' to 'dodgerblue'
g <- set_vertex_attr(g, "color", value = "dodgerblue")
# Redraw the graph and make the vertex size 1
graphjs(g, vertex.size = 1)
#### Eigen centrailites
# Create numerical vector of vertex eigenvector centralities
ec <- as.numeric(eigen_centrality(g)$vector )
# Create new vector 'v' that is equal to the square-root of 'ec' multiplied by 5
v <- 5*sqrt(ec)
# Plot threejs plot of graph setting vertex size to v
graphjs(g, vertex.size = v)
#### Clusters
# Create an object 'i' containin the memberships of the fast-greedy community detection
i <- membership(kc)
# Check the number of different communities
sizes(kc)
## Community sizes
## 1 2 3 4 5 6 7
## 21 21 32 7 5 5 3
# Add a color attribute to each vertex, setting the vertex color based on community membership
g <- set_vertex_attr(g, "color", value = c("yellow", "blue", "red")[i])
# Plot the graph using threejs
graphjs(g)