Install packages

Install igraph and some additional packages to process and plot the network data.

# ,echo=FALSE, include=FALSE
# install.packages("statnet")
if (!require("pacman")) install.packages("pacman")
pacman::p_load("ape",
               "d3r", 
               "ergm",
               "ggplot2",
               "jsonlite", 
               "tidyverse", 
               "igraph", 
               "lubridate",
               "purrr",
               "RColorBrewer", 
               "sand",
               "sqldf",
               "wrapr")

Load packages

Load igraph and some additional packages to process and plot the network data.

# ,echo=FALSE, include=FALSE
library("ape")
library("d3r")
library("ergm")
library("ggplot2")
library("jsonlite")
library("dplyr")
library("igraph")
library("lubridate")
library("purrr")
library("RColorBrewer")
library("readr")
library("sand")
library("sqldf")
library("wrapr")

Network Analysis

Lord of the Rings

Read Data

Ontology

Read CSV data from the morethanbooks Lord of the Rings Networks repository into a tibble.
ontology.csv contains the basic metadata about each entity (i.e. proper names used to reference characters, places, or groups) together with its identifier (e.g. the identifier for Aragorn is “arag”).

ontology = tibble(read.csv(url("https://raw.githubusercontent.com/morethanbooks/projects/master/LotR/ontologies/ontology.csv"), sep = "\t"))
names(ontology) <- c("id", "type", "label", "freqsum", "subtype", "gender")
head(ontology)
## # A tibble: 6 × 6
##   id    type  label    freqsum subtype gender  
##   <chr> <chr> <chr>      <int> <chr>   <chr>   
## 1 andu  pla   Anduin       109 pla     ""      
## 2 arag  per   Aragorn     1069 men     "male"  
## 3 arat  per   Arathorn      36 men     "male"  
## 4 arwe  per   Arwen         51 elves   "female"
## 5 bage  pla   Bag End       77 pla     ""      
## 6 bali  per   Balin         30 dwarf   "male"

Books 1, 2, 3 Combined

Read CSV data from the morethanbooks Lord of the Rings Networks repository into a tibble.

networks-id-3books.csv contains an edges table with the number of times two entities are mentioned in the same paragraph across all three books of the series.

In this project, the nodes represent entities (i.e. proper names used to reference characters, places, or groups), and two of them are connected by an edge if in any paragraph there are references to these two entities.

Across the three books, Frodo and Sam are referenced in the same paragraph most frequently (533 paragraphs), and Frodo and Gandalf are referenced in the second most number of paragraphs(181 paragraphs).

books123 = tibble(read.csv(url("https://raw.githubusercontent.com/morethanbooks/projects/master/LotR/tables/networks-id-3books.csv"), sep = ","))
books123 <- books123 %>% 
  dplyr::select("IdSource", "IdTarget", "Weight", "Type") %>% 
  dplyr::mutate("Type" = "Books 123", 
                "Weight" = as.double(Weight))
names(books123) <- c("source", "target", "weight", "volume")
head(books123)
## # A tibble: 6 × 4
##   source target weight volume   
##   <chr>  <chr>   <dbl> <chr>    
## 1 frod   sams      533 Books 123
## 2 frod   ganda     181 Books 123
## 3 merr   pipp      162 Books 123
## 4 arag   frod      146 Books 123
## 5 frod   goll      127 Books 123
## 6 bilb   frod      126 Books 123

Book 1

Read CSV data from the morethanbooks Lord of the Rings Networks repository into a tibble.

networks-id-volume1.csv contains an edges table with the number of times two entities are mentioned in the same paragraph in The Fellowship of the Ring, published on July 29, 1954.

book1 = tibble(read.csv(url("https://raw.githubusercontent.com/morethanbooks/projects/master/LotR/tables/networks-id-volume1.csv"), sep = ","))
book1 <- book1 %>% 
  dplyr::select("IdSource", "IdTarget", "Weight", "Type") %>% 
  dplyr::mutate("Type" = "Book 1", 
                "Weight" = as.double(Weight))
names(book1) <- c("source", "target", "weight", "volume")
book1 <- book1 %>% mutate("title" = "The Fellowship of the Ring", 
                          "publication_date" = lubridate::ymd("1954-07-29"))
head(book1)
## # A tibble: 6 × 6
##   source target weight volume title                      publication_date
##   <chr>  <chr>   <dbl> <chr>  <chr>                      <date>          
## 1 frod   sams      171 Book 1 The Fellowship of the Ring 1954-07-29      
## 2 frod   ganda     129 Book 1 The Fellowship of the Ring 1954-07-29      
## 3 arag   frod      105 Book 1 The Fellowship of the Ring 1954-07-29      
## 4 bilb   frod       96 Book 1 The Fellowship of the Ring 1954-07-29      
## 5 frod   pipp       80 Book 1 The Fellowship of the Ring 1954-07-29      
## 6 pipp   sams       72 Book 1 The Fellowship of the Ring 1954-07-29

Book 2

Read CSV data from the morethanbooks Lord of the Rings Networks repository into a tibble.

networks-id-volume2.csv contains an edges table with the number of times two entities are mentioned in the same paragraph in The Two Towers, published on November 11, 1954.

book2 = tibble(read.csv(url("https://raw.githubusercontent.com/morethanbooks/projects/master/LotR/tables/networks-id-volume2.csv"), sep = ","))
book2 <- book2 %>% 
  dplyr::select("IdSource", "IdTarget", "Weight", "Type") %>% 
  dplyr::mutate("Type" = "Book 2", 
                "Weight" = as.double(Weight))
names(book2) <- c("source", "target", "weight", "volume")
book2 <- book2 %>% mutate("title" = "The Two Towers", 
                          "publication_date" = lubridate::ymd("1954-11-11"))
head(book2)
## # A tibble: 6 × 6
##   source target weight volume title          publication_date
##   <chr>  <chr>   <dbl> <chr>  <chr>          <date>          
## 1 frod   sams      158 Book 2 The Two Towers 1954-11-11      
## 2 goll   sams      101 Book 2 The Two Towers 1954-11-11      
## 3 frod   goll       99 Book 2 The Two Towers 1954-11-11      
## 4 merr   pipp       51 Book 2 The Two Towers 1954-11-11      
## 5 gimli  lego       50 Book 2 The Two Towers 1954-11-11      
## 6 ganda  saru       49 Book 2 The Two Towers 1954-11-11

Book 3

Read CSV data from the morethanbooks Lord of the Rings Networks repository into a tibble.

networks-id-volume3.csv contains an edges table with the number of times two entities are mentioned in the same paragraph in The Return of the King, published on October 20, 1955.

book3 = tibble(read.csv(url("https://raw.githubusercontent.com/morethanbooks/projects/master/LotR/tables/networks-id-volume3.csv"), sep = ","))
book3 <- book3 %>% 
  dplyr::select("Source", "Target", "Weight", "Type") %>% 
  dplyr::mutate("Type" = "Book 3", 
                "Weight" = as.double(Weight))
names(book3) <- c("source", "target", "weight", "volume")
book3 <- book3 %>% mutate("title" = "The Return of the King", 
                          "publication_date" = lubridate::ymd("1955-10-20"))
head(book3)
## # A tibble: 6 × 6
##   source target weight volume title                  publication_date
##   <chr>  <chr>   <dbl> <chr>  <chr>                  <date>          
## 1 frod   sams      193 Book 3 The Return of the King 1955-10-20      
## 2 ganda  pipp       79 Book 3 The Return of the King 1955-10-20      
## 3 merr   pipp       45 Book 3 The Return of the King 1955-10-20      
## 4 arag   ganda      34 Book 3 The Return of the King 1955-10-20      
## 5 dene   ganda      34 Book 3 The Return of the King 1955-10-20      
## 6 dene   pipp       34 Book 3 The Return of the King 1955-10-20

Create a DataFrame from the books123 edgelist for an undirected graph

We can use sqldf to create a R data frame that combines the edges data from books123 and the metadata about the entities from ontology. The result is a data frame with all of the information we have about the paragraph references to pairs of entities across all three books.

We have undirected data, i.e. a particular person can appear as the source or the target and we’re not interested in the distinction (direction), so we can make it easier to get the ego network (the network that is “visible” to a certain person) by duplicating each row with its inverse. This way, we can group by a single column to get the ego network.

For simplicity in this early cell, and because this processing isn’t actually needed for igraph, we don’t perform this processing in this cell; however, we do perform it in the next cell to illustrate the difference between these methods.

Note that in the data frame row filtering shown in this cell we have to search for Frodo both as a source or as a target to select all of the rows (edges) associated with Frodo’s ego network. In the next cell, we will see that conducting the processing in the SQL query will make it easier for us to identify character ego networks.

g_df <- sqldf::sqldf("
    SELECT 
      sour.id AS source_id, sour.label as source_name, sour.type AS source_type, sour.subtype AS source_subtype, sour.gender AS source_gender,
      dest.id AS target_id, dest.label AS target_name, dest.type AS target_type, dest.subtype AS target_subtype, dest.gender AS target_gender,
      conn.weight, conn.volume
    FROM 
      books123 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id"
)

g_df %>% 
  dplyr::filter(source_name == "Frodo" | target_name == "Frodo", source_type == "per", target_type == "per") %>%
  dplyr::arrange(source_id, desc(weight)) %>% 
  head(10)
##    source_id source_name source_type source_subtype source_gender target_id
## 1       arag     Aragorn         per            men          male      frod
## 2       arat    Arathorn         per            men          male      frod
## 3       arwe       Arwen         per          elves        female      frod
## 4       bali       Balin         per          dwarf          male      frod
## 5       bere    Beregond         per            men          male      frod
## 6       bilb       Bilbo         per         hobbit          male      frod
## 7       bill        Bill         per         animal          male      frod
## 8       boro     Boromir         per            men          male      frod
## 9       cele    Celeborn         per          elves          male      frod
## 10      dene    Denethor         per            men          male      frod
##    target_name target_type target_subtype target_gender weight    volume
## 1        Frodo         per         hobbit          male    146 Books 123
## 2        Frodo         per         hobbit          male      2 Books 123
## 3        Frodo         per         hobbit          male      6 Books 123
## 4        Frodo         per         hobbit          male      4 Books 123
## 5        Frodo         per         hobbit          male      1 Books 123
## 6        Frodo         per         hobbit          male    126 Books 123
## 7        Frodo         per         hobbit          male      9 Books 123
## 8        Frodo         per         hobbit          male     68 Books 123
## 9        Frodo         per         hobbit          male      1 Books 123
## 10       Frodo         per         hobbit          male      7 Books 123
# Number of references (edges) between Frodo and other people across the three books: 38
g_df %>% 
  dplyr::filter(source_name == "Frodo" | target_name == "Frodo", source_type == "per", target_type == "per") %>%
  dim()
## [1] 38 12

Create a DataFrame with bidirectional edgelists for an undirected graph

We have undirected data, so to make it easier to work with we’re going to duplicate each row with its inverse. This way, we can group by a single column to get the ego network (the network that is “visible” to a certain person).

In contrast to the previous cell, we perform this processing step in the SQL query here to illustrate the difference between these methods.

Note that in the data frame row filtering shown in this cell we have to search for Frodo only in the source column to select all of the rows (edges) associated with Frodo’s ego network.

Again, this processing isn’t actually needed for igraph, we’re doing it for manual manipulation of the graph.

network_df <- sqldf::sqldf("
    SELECT 
      sour.id AS source_id, sour.label as source_name, sour.type AS source_type, sour.subtype AS source_subtype, sour.gender AS source_gender,
      dest.id AS target_id, dest.label AS target_name, dest.type AS target_type, dest.subtype AS target_subtype, dest.gender AS target_gender,
      conn.weight, conn.volume
    FROM 
      books123 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id

    UNION 

    SELECT 
      dest.id AS source_id, dest.label as source_name, dest.type AS source_type, dest.subtype AS source_subtype, dest.gender AS source_gender,
      sour.id AS target_id, sour.label AS target_name, sour.type AS target_type, sour.subtype AS target_subtype, sour.gender AS target_gender,
      conn.weight, conn.volume
    FROM 
      books123 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id"
)

network_df %>% 
  dplyr::filter(source_name == "Frodo", source_type == "per", target_type == "per") %>%
  dplyr::arrange(source_id, desc(weight)) %>% 
  head(10)
##    source_id source_name source_type source_subtype source_gender target_id
## 1       frod       Frodo         per         hobbit          male      sams
## 2       frod       Frodo         per         hobbit          male     ganda
## 3       frod       Frodo         per         hobbit          male      arag
## 4       frod       Frodo         per         hobbit          male      goll
## 5       frod       Frodo         per         hobbit          male      bilb
## 6       frod       Frodo         per         hobbit          male      pipp
## 7       frod       Frodo         per         hobbit          male      merr
## 8       frod       Frodo         per         hobbit          male      boro
## 9       frod       Frodo         per         hobbit          male      fara
## 10      frod       Frodo         per         hobbit          male      elro
##    target_name target_type target_subtype target_gender weight    volume
## 1          Sam         per         hobbit          male    533 Books 123
## 2      Gandalf         per          ainur          male    181 Books 123
## 3      Aragorn         per            men          male    146 Books 123
## 4       Gollum         per         hobbit          male    127 Books 123
## 5        Bilbo         per         hobbit          male    126 Books 123
## 6       Pippin         per         hobbit          male    115 Books 123
## 7        Merry         per         hobbit          male     86 Books 123
## 8      Boromir         per            men          male     68 Books 123
## 9      Faramir         per            men          male     55 Books 123
## 10      Elrond         per          elves          male     39 Books 123
# Number of references (edges) between Frodo and other people across the three books: 38
network_df %>% 
  dplyr::filter(source_name == "Frodo", source_type == "per", target_type == "per") %>%
  dim()
## [1] 38 12

Filter for pairs of entities that were mentioned in the same paragraph at least twice and then sort by weight and source_id

# Filter for edges with weights >= 2
network_df %>% 
  dplyr::filter(weight >= 2) %>% 
  dplyr::arrange(weight, source_id) %>% 
  head(10)
##    source_id source_name source_type source_subtype source_gender target_id
## 1       andu      Anduin         pla            pla                    dwar
## 2       andu      Anduin         pla            pla                    elro
## 3       andu      Anduin         pla            pla                    fara
## 4       andu      Anduin         pla            pla                    goll
## 5       andu      Anduin         pla            pla                    orth
## 6       arag     Aragorn         per            men          male      osgi
## 7       arat    Arathorn         per            men          male      arwe
## 8       arat    Arathorn         per            men          male      frod
## 9       arat    Arathorn         per            men          male     gimli
## 10      arat    Arathorn         per            men          male      mord
##    target_name target_type target_subtype target_gender weight    volume
## 1      Dwarves         gro          dwarf                    2 Books 123
## 2       Elrond         per          elves          male      2 Books 123
## 3      Faramir         per            men          male      2 Books 123
## 4       Gollum         per         hobbit          male      2 Books 123
## 5      Orthanc         pla            pla                    2 Books 123
## 6    Osgiliath         pla            pla                    2 Books 123
## 7        Arwen         per          elves        female      2 Books 123
## 8        Frodo         per         hobbit          male      2 Books 123
## 9        Gimli         per          dwarf          male      2 Books 123
## 10      Mordor         pla            pla                    2 Books 123

Check that the same number of source and target nodes were created

c( length(unique(network_df$source_id)), length(unique(network_df$target_id)) )
## [1] 73 73

Count the number of nodes that are: group, person, place, thing

Here, we are interested in the people, so when we create our network graph we will only include the per nodes and edges, i.e. the ones associated with people.

network_df %>% group_by(source_type) %>% count()
## # A tibble: 4 × 2
## # Groups:   source_type [4]
##   source_type     n
##   <chr>       <int>
## 1 gro           317
## 2 per          1614
## 3 pla           896
## 4 thin           61

Create a network graph of people with edge weights greater than 20

igraph has many functions for reading and writing graphs and converting to and from other data formats. We can create a network graph G from a R data frame using igraph’s graph_from_data_frame function.

A graph G = (V, E) is mathematical structure with a set V of elements, called vertices or nodes, and a set E of pairs of nodes, called edges or links. A network can be undirected or directed. In addition, a network can be unweighted or weighted.

To convert the edgelist data in the data frame into a graph G using graph_from_data_frame, the data frame must contain an edge list representation of a graph, i.e. two columns of node names and zero or more columns of edge attributes. Each row will be processed as one edge instance.

d is a data frame containing a symbolic edge list in the first two columns. Additional columns are considered as edge attributes. Since version 0.7 this argument is coerced to a data frame with as.data.frame..

directed is a logical scalar, whether or not to create a directed graph.

vertices is a data frame with vertex metadata, or NULL. Since version 0.7 this argument is coerced to a data frame with as.data.frame, if not NULL.

Given our interest in characters, we restrict the graph to references about people. In addition, we further restrict our attention to pairs of people who are referenced together more than 20 times across all the books to reduce the number of edges such that, for readability, the edge count is not more than four times the node count [Melancon 06].

my_edges <- sqldf::sqldf("
    SELECT 
      sour.label as source_name, sour.type as source_type, 
      dest.label AS target_name, dest.type AS target_type, 
      conn.weight
    FROM 
      books123 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id"
) %>% 
  dplyr::filter(source_type == "per", target_type == "per", weight > 20) %>%
  dplyr::select(source_name, target_name, weight) %>%
  dplyr::rename(from = source_name, to = target_name)

my_nodes <- network_df %>% 
  dplyr::filter(source_type == "per", target_type == "per", weight > 20) %>%
  dplyr::select(source_name, source_type, source_subtype, source_gender, volume) %>% 
  dplyr::rename(name = source_name, type = source_type, subtype = source_subtype, gender = source_gender) %>%
  dplyr::distinct()

G <- graph_from_data_frame(my_edges, directed = FALSE, vertices = my_nodes)

#print_all(G)
G
## IGRAPH 670f9cf UNWB 31 79 -- 
## + attr: name (v/c), type (v/c), subtype (v/c), gender (v/c), volume
## | (v/c), weight (e/n)
## + edges from 670f9cf (vertex names):
##  [1] Frodo  --Sam     Frodo  --Gandalf Merry  --Pippin  Aragorn--Frodo  
##  [5] Frodo  --Gollum  Bilbo  --Frodo   Gandalf--Pippin  Aragorn--Gandalf
##  [9] Gollum --Sam     Frodo  --Pippin  Gimli  --Legolas Pippin --Sam    
## [13] Frodo  --Merry   Aragorn--Legolas Aragorn--Sam     Aragorn--Gimli  
## [17] Gandalf--Saruman Boromir--Frodo   Aragorn--Merry   Aragorn--Pippin 
## [21] Aragorn--Boromir Merry  --Sam     Aragorn--Elrond  Gandalf--Théoden
## [25] Faramir--Frodo   Bilbo  --Gandalf Gandalf--Gimli   Gandalf--Sam    
## + ... omitted several edges

Plot the nodes of the LotR network

set.seed(10)
igraph_options(vertex.size=6, 
               vertex.color="blue", 
               vertex.frame.color="blue",
               vertex.label=NA, # V(G)$name, 
               edge.color=NA)
par(mfrow=c(1,1))
plot(G, layout=layout_randomly) # layout_nicely
title("LotR network (no edges!)")

Plot the nodes and edges of the LotR network

e.size <- 4 * E(G)$weight / max( E(G)$weight, na.rm = TRUE )

set.seed(10)
igraph_options(vertex.size=6, 
               vertex.color="blue", 
               vertex.frame.color="blue",
               vertex.label=NA, # V(G)$name,
               edge.width=e.size, 
               edge.color="blue")
par(mfrow=c(1,1))
plot(G, layout=layout_randomly) # layout_nicely
title("LotR network")

Number of nodes

Sometimes the number of vertices (also called nodes) in a graph is called the order of the graph. We can get the number of nodes in a graph with gorder(G).

# Count the number of nodes
gorder(G)
## [1] 31
vcount(G)
## [1] 31

List of nodes

# List all of the nodes
V(G)
## + 31/31 vertices, named, from 670f9cf:
##  [1] Aragorn   Arathorn  Arwen     Beregond  Bilbo     Bill      Boromir  
##  [8] Celeborn  Denethor  Elendil   Elrond    Éomer     Éowyn     Faramir  
## [15] Frodo     Galadriel Gandalf   Gimli     Glóin     Gollum    Isildur  
## [22] Legolas   Merry     Pippin    Sam       Saruman   Sauron    Shadowfax
## [29] Théoden   Bombadil  Treebeard

List neighbors of a node

# List neighbors of node "Aragorn"
neighbors(G, "Aragorn")
## + 17/31 vertices, named, from 670f9cf:
##  [1] Arathorn Arwen    Boromir  Elendil  Elrond   Éomer    Faramir  Frodo   
##  [9] Gandalf  Gimli    Legolas  Merry    Pippin   Sam      Saruman  Sauron  
## [17] Théoden

Determine the degree of each node in the graph

G_degree <- degree(G)

# Arrange nodes alphabetically
# G_degree[sort(names(G_degree))]

# Arrange nodes in descending order of degree
G_degree[order(G_degree, decreasing = TRUE)]
##   Gandalf   Aragorn     Frodo    Pippin       Sam     Merry   Boromir   Faramir 
##        18        17        14        11        10         9         7         7 
##     Gimli   Legolas   Saruman Treebeard     Bilbo  Denethor    Elrond     Éomer 
##         7         6         5         5         4         4         4         4 
##    Sauron   Théoden    Gollum     Arwen   Elendil     Éowyn  Arathorn  Beregond 
##         4         4         3         2         2         2         1         1 
##      Bill  Celeborn Galadriel     Glóin   Isildur Shadowfax  Bombadil 
##         1         1         1         1         1         1         1
G_degree <- degree(G)

# Arrange nodes alphabetically
# G_degree[sort(names(G_degree))]

# Arrange nodes in descending order of degree
G_degree_distribution <- G_degree[order(G_degree, decreasing = TRUE)]

# Alternatively: tibble::enframe(G_degree_distribution) # returns columns `name` and `value`
G_degree_distribution <- data.frame(Character=factor(names(G_degree_distribution)), 
                                    Degree=G_degree_distribution, 
                                    row.names=NULL) %>% arrange(desc(Degree))

ggplot(data = G_degree_distribution, aes(x = reorder(Character, -Degree), y = Degree)) +
  geom_col(aes(fill = Degree)) +
  scale_color_distiller(palette = "Blues", direction = 1, aesthetics = c("colour", "fill")) +
  scale_y_continuous(breaks=seq(0, 20, 2.5), limits = c(0, 20)) +
  # ylim(0, 20) +
  labs(title = "Degree Distribution of LotR Characters") +
  xlab("Character") +
  theme_light() + 
  theme(legend.position = "none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        plot.title  = element_text(hjust = 0.5), 
        axis.text.x = element_text(angle = 90))

Determine the average degree of the graph

The average degree of a network is

\[\begin{align} \langle k \rangle = \frac{\sum_{i=1} k_i}{N} \end{align}\]

round(mean(degree(G), na.rm = TRUE), 2)
## [1] 5.1
# The average degree of an undirected network equals 2L / N
round( 2 * length(E(G)) / length(V(G)), 2)
## [1] 5.1

Number of edges

Sometimes the number of edges (also called links) in a graph is called the size of the graph. We can get the number of edges in a graph with ecount(G).

# Count the number of edges
ecount(G)
## [1] 79

List of edges

# List all of the edges
E(G)
## + 79/79 edges from 670f9cf (vertex names):
##  [1] Frodo   --Sam       Frodo   --Gandalf   Merry   --Pippin   
##  [4] Aragorn --Frodo     Frodo   --Gollum    Bilbo   --Frodo    
##  [7] Gandalf --Pippin    Aragorn --Gandalf   Gollum  --Sam      
## [10] Frodo   --Pippin    Gimli   --Legolas   Pippin  --Sam      
## [13] Frodo   --Merry     Aragorn --Legolas   Aragorn --Sam      
## [16] Aragorn --Gimli     Gandalf --Saruman   Boromir --Frodo    
## [19] Aragorn --Merry     Aragorn --Pippin    Aragorn --Boromir  
## [22] Merry   --Sam       Aragorn --Elrond    Gandalf --Théoden  
## [25] Faramir --Frodo     Bilbo   --Gandalf   Gandalf --Gimli    
## [28] Gandalf --Sam       Aragorn --Éomer     Gandalf --Legolas  
## + ... omitted several edges

Determine the density of the graph

The density of a network is the fraction of possible links that actually exist, which is the same as the fraction of pairs of nodes that are actually connected. The average degree of a network is related (directly proportional) to its density. The density is the ratio between the average and maximum degree.

The higher the density, the more likely it is that the network is connected (i.e. that you can reach any node from any other node by following a path along links and intermediate nodes).

The density of a network with N nodes and L links is

\[\begin{align} d = \frac{L}{L_{max}} \end{align}\]

In an undirected network this equals

\[\begin{align} d = \frac{L}{L_{max}} = \frac{2L}{N(N-1)} \end{align}\]

In a directed network this equals

\[\begin{align} d = \frac{L}{L_{max}} = \frac{L}{N(N-1)} \end{align}\]

round(edge_density(G), 2)
## [1] 0.17
# The density of an undirected network equals 2L / ( N * (N-1) )   
# Alternatively: 2 * len(G.edges) / len(G.nodes) / (len(G.nodes)-1)
round( 2 * length(E(G)) / ( length(V(G)) * (length(V(G))-1) ), 2)
## [1] 0.17

Determine the length of the shortest path between two specific nodes

length(shortest_paths(G, "Frodo", "Arwen", weights = E(G, directed = FALSE)$weight)$vpath[[1]])-1
## [1] 2

Specify the shortest path between two specific nodes

shortest_paths(G, "Frodo", "Arwen", weights = E(G, directed = FALSE)$weight)$vpath[[1]]
## + 3/31 vertices, named, from 670f9cf:
## [1] Frodo  Elrond Arwen

Determine whether the network is connected

A node j is reachable from another node i if there is a walk from from i to j. A graph G is connected if every node is reachable from every other.

is_connected(G)
## [1] FALSE

Determine the number and size of the connected components

A connected component of a graph G is a maximally connected (i.e. that you can reach any node from any other node by following a path along links and intermediate nodes) subgraph of G.

clusters(G)
## $membership
##   Aragorn  Arathorn     Arwen  Beregond     Bilbo      Bill   Boromir  Celeborn 
##         1         1         1         1         1         1         1         2 
##  Denethor   Elendil    Elrond     Éomer     Éowyn   Faramir     Frodo Galadriel 
##         1         1         1         1         1         1         1         2 
##   Gandalf     Gimli     Glóin    Gollum   Isildur   Legolas     Merry    Pippin 
##         1         1         1         1         1         1         1         1 
##       Sam   Saruman    Sauron Shadowfax   Théoden  Bombadil Treebeard 
##         1         1         1         1         1         1         1 
## 
## $csize
## [1] 29  2
## 
## $no
## [1] 2

Determine the average shortest path length of connected components in the graph

A common measure of the distance between nodes in a network is the minimum number of edges needed to traverse from one node to the other. This minimum path is called the shortest path, and its length is called the shortest path length.

We can then define the average shortest path length for the entire network by averaging the shortest path lengths across all pairs of nodes.

#round(mean_distance(G), 2)

c( round(mean_distance(induced_subgraph(G, clusters(G)$membership == 1)), 2), 
   round(mean_distance(induced_subgraph(G, clusters(G)$membership == 2)), 2) )
## [1] 2.13 1.00

Determine the diameter of the connected components in the graph

The diameter of a network is the maximum shortest path length across all pairs of nodes. That is, the length of the longest shortest path in the network.

#diameter(G, weights = NA)

c( diameter(induced_subgraph(G, clusters(G)$membership == 1), weights = NA),
   diameter(induced_subgraph(G, clusters(G)$membership == 2), weights = NA) )
## [1] 4 1

Determine the average clustering coefficient of the graph

The local clustering of each node in a graph G is the fraction of triangles that actually exist over all possible triangles in its neighborhood. The average clustering coefficient of a graph G is the mean of local clusterings.

# Global average clustering
round(transitivity(G, type="average",
                   vids = NULL, # V(conn_comp),
                   weights = NULL,
                   isolates = "zero"), 3)
## [1] 0.44
# Global transitivity (weights nodes with large degree higher than average clustering)
round(transitivity(G, type="global",
                   vids = NULL, # V(conn_comp),
                   weights = NULL,
                   isolates = "zero"), 3)
## [1] 0.453

Determine the assortativity of the network based on the correlation between degrees of neighbor nodes, as well as a categorical attribute

Nodes (People) in a social network may have a variety of attributes, e.g. age, gender, location, interests, etc. For example, in our LotR network, we have the attributes gender and subtype for each person.

In social networks, it’s often the case that people who are connected to one another tend to share similar attributes. For example, in our LotR network, it may be the case that people who are connected to one another tend to be of the same gender or subtype. This property is called assortativity.

Assortativity based on degree is called degree assortativity, which occurs when high-degree nodes tend to be connected to other high-degree nodes and low-degree nodes tend to be connected to other low-degree nodes. In addition to degree, assortativity can also be based on categorical or numeric attributes.

# Assortativity of the network based on the correlation between degrees of neighbor nodes
round(assortativity_degree(G, directed = FALSE), 2)
## [1] -0.26
# Assortativity of the network based on subtype, a categorical attribute of the nodes
v_types <- as.numeric(factor(V(G)$subtype))
round(assortativity_nominal(G, types = v_types, directed = FALSE), 2)
## [1] 0.16

Add gender and subtype attributes and colors to the nodes

Determine the unique genders in the dataset

genders <- network_df %>% 
  dplyr::filter(source_type == "per") %>% 
  dplyr::distinct(source_gender) %>% 
  dplyr::select(source_gender) %>%
  dplyr::arrange(source_gender)

genders <- genders$source_gender
genders
## [1] "female" "male"
# tableau
color_palette <- brewer.pal(n = 8, name = "Set1") 
color_palette
## [1] "#E41A1C" "#377EB8" "#4DAF4A" "#984EA3" "#FF7F00" "#FFFF33" "#A65628"
## [8] "#F781BF"

Associate the unique genders with colors

# qc source: https://www.r-bloggers.com/2018/03/r-tip-use-named-vectors-to-re-map-values/
# tableau: orange, blue
gender_colormap <- qc( "female" = "#e49444" , "male" = "#5778a4" )  
gender_colormap 
##    female      male 
## "#e49444" "#5778a4"

Determine the unique subtypes in the dataset

subtypes <- network_df %>% 
  dplyr::filter(source_type == "per") %>% 
  dplyr::distinct(source_subtype) %>% 
  dplyr::select(source_subtype) %>% 
  dplyr::arrange(source_subtype)

subtypes <- subtypes$source_subtype
subtypes
## [1] "ainur"  "animal" "dwarf"  "elves"  "ents"   "hobbit" "men"    "orcs"

Associate the unique subtypes with colors

# tableau: blue, orange, green, red, purple, brown, pink, cyan
subtype_colormap <- qc( "ainur"  = "#5778a4", 
                        "animal" = "#e49444", 
                        "dwarf"  = "#6a9f58", 
                        "elves"  = "#d1615d", 
                        "ents"   = "#a87c9f", 
                        "hobbit" = "#967662", 
                        "men"    = "#f1a2a9", 
                        "orcs"   = "#17becf" )
subtype_colormap 
##     ainur    animal     dwarf     elves      ents    hobbit       men      orcs 
## "#5778a4" "#e49444" "#6a9f58" "#d1615d" "#a87c9f" "#967662" "#f1a2a9" "#17becf"

Create a data frame that associates each node (person) with four new attributes for that person

attrs <- network_df %>% 
  dplyr::filter(source_type == "per", target_type == "per", weight > 20) %>%
  dplyr::select(source_name, source_subtype, source_gender) %>%
  dplyr::mutate(subtype_color = subtype_colormap[source_subtype], 
                gender_color  = gender_colormap[source_gender]) %>%
  dplyr::distinct()

attrs
##    source_name source_subtype source_gender subtype_color gender_color
## 1      Aragorn            men          male       #f1a2a9      #5778a4
## 2     Arathorn            men          male       #f1a2a9      #5778a4
## 3        Arwen          elves        female       #d1615d      #e49444
## 4     Beregond            men          male       #f1a2a9      #5778a4
## 5        Bilbo         hobbit          male       #967662      #5778a4
## 6         Bill         animal          male       #e49444      #5778a4
## 7      Boromir            men          male       #f1a2a9      #5778a4
## 8     Celeborn          elves          male       #d1615d      #5778a4
## 9     Denethor            men          male       #f1a2a9      #5778a4
## 10     Elendil            men          male       #f1a2a9      #5778a4
## 11      Elrond          elves          male       #d1615d      #5778a4
## 12       Éomer            men          male       #f1a2a9      #5778a4
## 13       Éowyn            men        female       #f1a2a9      #e49444
## 14     Faramir            men          male       #f1a2a9      #5778a4
## 15       Frodo         hobbit          male       #967662      #5778a4
## 16   Galadriel          elves        female       #d1615d      #e49444
## 17     Gandalf          ainur          male       #5778a4      #5778a4
## 18       Gimli          dwarf          male       #6a9f58      #5778a4
## 19       Glóin          dwarf          male       #6a9f58      #5778a4
## 20      Gollum         hobbit          male       #967662      #5778a4
## 21     Isildur            men          male       #f1a2a9      #5778a4
## 22     Legolas          elves          male       #d1615d      #5778a4
## 23       Merry         hobbit          male       #967662      #5778a4
## 24      Pippin         hobbit          male       #967662      #5778a4
## 25         Sam         hobbit          male       #967662      #5778a4
## 26     Saruman          ainur          male       #5778a4      #5778a4
## 27      Sauron          ainur          male       #5778a4      #5778a4
## 28   Shadowfax         animal          male       #e49444      #5778a4
## 29     Théoden            men          male       #f1a2a9      #5778a4
## 30    Bombadil          ainur          male       #5778a4      #5778a4
## 31   Treebeard           ents          male       #a87c9f      #5778a4

Assign the attributes to the nodes in the network

V(G)$subtype_color <- attrs$subtype_color
V(G)$gender_color  <- attrs$gender_color

Check that the attributes were added to the nodes

# V(G)

vertex.attributes(G)
## $name
##  [1] "Aragorn"   "Arathorn"  "Arwen"     "Beregond"  "Bilbo"     "Bill"     
##  [7] "Boromir"   "Celeborn"  "Denethor"  "Elendil"   "Elrond"    "Éomer"    
## [13] "Éowyn"     "Faramir"   "Frodo"     "Galadriel" "Gandalf"   "Gimli"    
## [19] "Glóin"     "Gollum"    "Isildur"   "Legolas"   "Merry"     "Pippin"   
## [25] "Sam"       "Saruman"   "Sauron"    "Shadowfax" "Théoden"   "Bombadil" 
## [31] "Treebeard"
## 
## $type
##  [1] "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per"
## [13] "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per"
## [25] "per" "per" "per" "per" "per" "per" "per"
## 
## $subtype
##  [1] "men"    "men"    "elves"  "men"    "hobbit" "animal" "men"    "elves" 
##  [9] "men"    "men"    "elves"  "men"    "men"    "men"    "hobbit" "elves" 
## [17] "ainur"  "dwarf"  "dwarf"  "hobbit" "men"    "elves"  "hobbit" "hobbit"
## [25] "hobbit" "ainur"  "ainur"  "animal" "men"    "ainur"  "ents"  
## 
## $gender
##  [1] "male"   "male"   "female" "male"   "male"   "male"   "male"   "male"  
##  [9] "male"   "male"   "male"   "male"   "female" "male"   "male"   "female"
## [17] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
## [25] "male"   "male"   "male"   "male"   "male"   "male"   "male"  
## 
## $volume
##  [1] "Books 123" "Books 123" "Books 123" "Books 123" "Books 123" "Books 123"
##  [7] "Books 123" "Books 123" "Books 123" "Books 123" "Books 123" "Books 123"
## [13] "Books 123" "Books 123" "Books 123" "Books 123" "Books 123" "Books 123"
## [19] "Books 123" "Books 123" "Books 123" "Books 123" "Books 123" "Books 123"
## [25] "Books 123" "Books 123" "Books 123" "Books 123" "Books 123" "Books 123"
## [31] "Books 123"
## 
## $subtype_color
##  [1] "#f1a2a9" "#f1a2a9" "#d1615d" "#f1a2a9" "#967662" "#e49444" "#f1a2a9"
##  [8] "#d1615d" "#f1a2a9" "#f1a2a9" "#d1615d" "#f1a2a9" "#f1a2a9" "#f1a2a9"
## [15] "#967662" "#d1615d" "#5778a4" "#6a9f58" "#6a9f58" "#967662" "#f1a2a9"
## [22] "#d1615d" "#967662" "#967662" "#967662" "#5778a4" "#5778a4" "#e49444"
## [29] "#f1a2a9" "#5778a4" "#a87c9f"
## 
## $gender_color
##  [1] "#5778a4" "#5778a4" "#e49444" "#5778a4" "#5778a4" "#5778a4" "#5778a4"
##  [8] "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#e49444" "#5778a4"
## [15] "#5778a4" "#e49444" "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#5778a4"
## [22] "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#5778a4"
## [29] "#5778a4" "#5778a4" "#5778a4"

Check that the undirected edges have weights

# E(G)

edge.attributes(G)
## $weight
##  [1] 533 181 162 146 127 126 124 123 118 115 109 101  86  84  75  73  72  68  65
## [20]  64  63  63  59  57  55  54  54  51  50  45  44  42  41  40  39  39  36  36
## [39]  36  35  34  34  33  32  32  31  30  29  29  28  28  28  28  27  27  27  27
## [58]  27  26  26  26  26  26  26  25  25  25  24  24  24  24  24  23  22  22  22
## [77]  21  21  21

Create a new network graph that only consists of the largest connected component

Determine the largest connected component and

Create a new network graph that only consists of the largest connected component

largest_cc <- (clusters(G)$membership == 1)

conn_comp <- induced_subgraph(G, largest_cc)
conn_comp
## IGRAPH 65d568c UNWB 29 78 -- 
## + attr: name (v/c), type (v/c), subtype (v/c), gender (v/c), volume
## | (v/c), subtype_color (v/c), gender_color (v/c), weight (e/n)
## + edges from 65d568c (vertex names):
##  [1] Frodo  --Sam     Frodo  --Gandalf Merry  --Pippin  Aragorn--Frodo  
##  [5] Frodo  --Gollum  Bilbo  --Frodo   Gandalf--Pippin  Aragorn--Gandalf
##  [9] Gollum --Sam     Frodo  --Pippin  Gimli  --Legolas Pippin --Sam    
## [13] Frodo  --Merry   Aragorn--Legolas Aragorn--Sam     Aragorn--Gimli  
## [17] Gandalf--Saruman Boromir--Frodo   Aragorn--Merry   Aragorn--Pippin 
## [21] Aragorn--Boromir Merry  --Sam     Aragorn--Elrond  Gandalf--Théoden
## [25] Faramir--Frodo   Bilbo  --Gandalf Gandalf--Gimli   Gandalf--Sam    
## + ... omitted several edges

Confirm that the new network graph (conn_comp) is indeed connected

is_connected(conn_comp)
## [1] TRUE

Analyze characteristics of the largest connected component of the LotR network

Number of nodes

Sometimes the number of vertices (also called nodes) in a graph is called the order of the graph. We can get the number of nodes in a graph with gorder(G).

# Count the number of nodes
gorder(conn_comp)
## [1] 29
vcount(conn_comp)
## [1] 29

List of nodes

# List all of the nodes
V(conn_comp)
## + 29/29 vertices, named, from 65d568c:
##  [1] Aragorn   Arathorn  Arwen     Beregond  Bilbo     Bill      Boromir  
##  [8] Denethor  Elendil   Elrond    Éomer     Éowyn     Faramir   Frodo    
## [15] Gandalf   Gimli     Glóin     Gollum    Isildur   Legolas   Merry    
## [22] Pippin    Sam       Saruman   Sauron    Shadowfax Théoden   Bombadil 
## [29] Treebeard

List neighbors of a node

# List neighbors of node "Aragorn"
neighbors(conn_comp, "Aragorn")
## + 17/29 vertices, named, from 65d568c:
##  [1] Arathorn Arwen    Boromir  Elendil  Elrond   Éomer    Faramir  Frodo   
##  [9] Gandalf  Gimli    Legolas  Merry    Pippin   Sam      Saruman  Sauron  
## [17] Théoden

Determine the degree of each node in the graph

cc_degree <- degree(conn_comp)

# Arrange nodes alphabetically
# cc_degree[sort(names(cc_degree))]

# Arrange nodes in descending order of degree
cc_degree[order(cc_degree, decreasing = TRUE)]
##   Gandalf   Aragorn     Frodo    Pippin       Sam     Merry   Boromir   Faramir 
##        18        17        14        11        10         9         7         7 
##     Gimli   Legolas   Saruman Treebeard     Bilbo  Denethor    Elrond     Éomer 
##         7         6         5         5         4         4         4         4 
##    Sauron   Théoden    Gollum     Arwen   Elendil     Éowyn  Arathorn  Beregond 
##         4         4         3         2         2         2         1         1 
##      Bill     Glóin   Isildur Shadowfax  Bombadil 
##         1         1         1         1         1
cc_degree <- degree(conn_comp)

# Arrange nodes alphabetically
# cc_degree[sort(names(cc_degree))]

# Arrange nodes in descending order of degree
cc_degree_distribution <- cc_degree[order(cc_degree, decreasing = TRUE)]

# Alternatively: tibble::enframe(cc_degree_distribution) # returns columns `name` and `value`
cc_degree_distribution <- data.frame(Character=factor(names(cc_degree_distribution)), 
                                    Degree=cc_degree_distribution, 
                                    row.names=NULL) %>% arrange(desc(Degree))

ggplot(data = cc_degree_distribution, aes(x = reorder(Character, -Degree), y = Degree)) +
  geom_col(aes(fill = Degree)) +
  scale_color_distiller(palette = "Blues", direction = 1, aesthetics = c("colour", "fill")) +
  scale_y_continuous(breaks=seq(0, 20, 2.5), limits = c(0, 20)) +
  # ylim(0, 20) +
  labs(title = "Degree Distribution of LotR Characters") +
  xlab("Character") +
  theme_light() + 
  theme(legend.position = "none", 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        plot.title  = element_text(hjust = 0.5), 
        axis.text.x = element_text(angle = 90))

Determine the average degree of the graph

The average degree of a network is

\[\begin{align} \langle k \rangle = \frac{\sum_{i=1} k_i}{N} \end{align}\]

round(mean(degree(conn_comp), na.rm = TRUE), 2)
## [1] 5.38
# The average degree of an undirected network equals 2L / N
round( 2 * length(E(conn_comp)) / length(V(conn_comp)), 2)
## [1] 5.38

Number of edges

Sometimes the number of edges (also called links) in a graph is called the size of the graph. We can get the number of edges in a graph with ecount(G).

# Count the number of edges
ecount(conn_comp)
## [1] 78

List of edges

# List all of the edges
E(conn_comp)
## + 78/78 edges from 65d568c (vertex names):
##  [1] Frodo   --Sam       Frodo   --Gandalf   Merry   --Pippin   
##  [4] Aragorn --Frodo     Frodo   --Gollum    Bilbo   --Frodo    
##  [7] Gandalf --Pippin    Aragorn --Gandalf   Gollum  --Sam      
## [10] Frodo   --Pippin    Gimli   --Legolas   Pippin  --Sam      
## [13] Frodo   --Merry     Aragorn --Legolas   Aragorn --Sam      
## [16] Aragorn --Gimli     Gandalf --Saruman   Boromir --Frodo    
## [19] Aragorn --Merry     Aragorn --Pippin    Aragorn --Boromir  
## [22] Merry   --Sam       Aragorn --Elrond    Gandalf --Théoden  
## [25] Faramir --Frodo     Bilbo   --Gandalf   Gandalf --Gimli    
## [28] Gandalf --Sam       Aragorn --Éomer     Gandalf --Legolas  
## + ... omitted several edges

Determine the density of the largest connected component

The density of a network is the fraction of possible links that actually exist, which is the same as the fraction of pairs of nodes that are actually connected. The average degree of a network is related (directly proportional) to its density. The density is the ratio between the average and maximum degree.

The higher the density, the more likely it is that the network is connected (i.e. that you can reach any node from any other node by following a path along links and intermediate nodes).

The density of a network with N nodes and L links is

\[\begin{align} d = \frac{L}{L_{max}} \end{align}\]

In an undirected network this equals

\[\begin{align} d = \frac{L}{L_{max}} = \frac{2L}{N(N-1)} \end{align}\]

In a directed network this equals

\[\begin{align} d = \frac{L}{L_{max}} = \frac{L}{N(N-1)} \end{align}\]

round(edge_density(conn_comp), 2)
## [1] 0.19
# The density of an undirected network equals 2L / ( N * (N-1) )   
# Alternatively: 2 * len(conn_comp.edges) / len(conn_comp.nodes) / (len(conn_comp.nodes)-1)
round( 2 * length(E(conn_comp)) / ( length(V(conn_comp)) * (length(V(conn_comp))-1) ), 2)
## [1] 0.19

Determine the length of the shortest path between two specific nodes

length(shortest_paths(conn_comp, "Frodo", "Arwen", weights = E(conn_comp, directed = FALSE)$weight)$vpath[[1]])-1
## [1] 2

Specify the shortest path between two specific nodes

shortest_paths(conn_comp, "Frodo", "Arwen", weights = E(conn_comp, directed = FALSE)$weight)$vpath[[1]]
## + 3/29 vertices, named, from 65d568c:
## [1] Frodo  Elrond Arwen

Determine whether the network is connected

A node j is reachable from another node i if there is a walk from from i to j. A graph G is connected if every node is reachable from every other.

is_connected(conn_comp)
## [1] TRUE

Determine the number and size of the connected components

A connected component of a graph G is a maximally connected (i.e. that you can reach any node from any other node by following a path along links and intermediate nodes) subgraph of G.

# Count the number of connected components
clusters(conn_comp)
## $membership
##   Aragorn  Arathorn     Arwen  Beregond     Bilbo      Bill   Boromir  Denethor 
##         1         1         1         1         1         1         1         1 
##   Elendil    Elrond     Éomer     Éowyn   Faramir     Frodo   Gandalf     Gimli 
##         1         1         1         1         1         1         1         1 
##     Glóin    Gollum   Isildur   Legolas     Merry    Pippin       Sam   Saruman 
##         1         1         1         1         1         1         1         1 
##    Sauron Shadowfax   Théoden  Bombadil Treebeard 
##         1         1         1         1         1 
## 
## $csize
## [1] 29
## 
## $no
## [1] 1

Determine the average shortest path length of the largest connected component

A common measure of the distance between nodes in a network is the minimum number of edges needed to traverse from one node to the other. This minimum path is called the shortest path, and its length is called the shortest path length.

We can then define the average shortest path length for the entire network by averaging the shortest path lengths across all pairs of nodes.

round(mean_distance(conn_comp), 2)
## [1] 2.13

Determine the diameter of the largest connected component

The diameter of a network is the maximum shortest path length across all pairs of nodes. That is, the length of the longest shortest path in the network.

diameter(conn_comp, weights = NA)
## [1] 4

Determine the eccentricity of the nodes in the largest connected component

The eccentricity of a node v is the maximum distance from v to all other nodes in G.

eccentricity(conn_comp)
##   Aragorn  Arathorn     Arwen  Beregond     Bilbo      Bill   Boromir  Denethor 
##         2         3         3         4         4         4         3         4 
##   Elendil    Elrond     Éomer     Éowyn   Faramir     Frodo   Gandalf     Gimli 
##         3         3         3         4         3         3         3         3 
##     Glóin    Gollum   Isildur   Legolas     Merry    Pippin       Sam   Saruman 
##         4         4         4         3         3         3         3         3 
##    Sauron Shadowfax   Théoden  Bombadil Treebeard 
##         3         4         3         4         4

Determine the radius of the largest connected component

The radius is the minimum eccentricity.

radius(conn_comp)
## [1] 2

Determine the nodes in the center of the largest connected component

The center is the set of nodes with eccentricity equal to radius.

in_center <- (eccentricity(conn_comp) == radius(conn_comp))

eccentricity(conn_comp)[in_center]
## Aragorn 
##       2

Determine the nodes in the periphery of the largest connected component

The periphery is the set of nodes with eccentricity equal to the diameter.

in_periphery <- (eccentricity(conn_comp) == diameter(conn_comp, weights = NA))

eccentricity(conn_comp)[in_periphery]
##  Beregond     Bilbo      Bill  Denethor     Éowyn     Glóin    Gollum   Isildur 
##         4         4         4         4         4         4         4         4 
## Shadowfax  Bombadil Treebeard 
##         4         4         4

Determine the average clustering coefficient of the graph

The local clustering of each node in a graph G is the fraction of triangles that actually exist over all possible triangles in its neighborhood. The average clustering coefficient of a graph G is the mean of local clusterings.

# Global average clustering
round(transitivity(conn_comp, type="average",
                   vids = NULL, # V(conn_comp),
                   weights = NULL,
                   isolates = "zero"), 3)
## [1] 0.471
# Global transitivity (weights nodes with large degree higher than average clustering)
round(transitivity(conn_comp, type="global",
                   vids = NULL, # V(conn_comp),
                   weights = NULL,
                   isolates = "zero"), 3)
## [1] 0.453

Determine the assortativity of the connected component based on the correlation between degrees of neighbor nodes, as well as a categorical attribute

Nodes (People) in a social network may have a variety of attributes, e.g. age, gender, location, interests, etc. For example, in our LotR network, we have the attributes gender and subtype for each person.

In social networks, it’s often the case that people who are connected to one another tend to share similar attributes. For example, in our LotR network, it may be the case that people who are connected to one another tend to be of the same gender or subtype. This property is called assortativity.

Assortativity based on degree is called degree assortativity, which occurs when high-degree nodes tend to be connected to other high-degree nodes and low-degree nodes tend to be connected to other low-degree nodes. In addition to degree, assortativity can also be based on categorical or numeric attributes.

# Assortativity of the connected component based on the correlation between degrees of neighbor nodes
round(assortativity_degree(conn_comp, directed = FALSE), 2)
## [1] -0.3
# Assortativity of the connected component based on subtype, a categorical attribute of the nodes
v_types <- as.numeric(factor(V(conn_comp)$subtype))
round(assortativity_nominal(conn_comp, types = v_types, directed = FALSE), 2)
## [1] 0.14

Analyze the nodes of the largest connected component of the LotR network

With social networks, people are often interested in how “important” a particular person is in the network. There are many measures of “importance”.

For instance, in basketball, we might use the number of NBA championships, MVP awards, triple-doubles, points per game, three-pointers, rebounds, assists, etc. to assess “importance”.

Similar to these sports statistics, there are many measures of “importance” in social networks:

The point isn’t that one of these measures is more important than the others, but rather that each one:

As Matthew Jackson notes in The Human Network, “Our lives would be simpler if measuring something could always be boiled down to a single statistic. But part of what makes our lives so interesting is that such unidimensional rankings are generally impossible for many of the things that are most important to understand: lists of rankings end up being both controversial and intriguing”.

Degree centrality

In social networks, the number of connections (relationships) a person has is called the person’s “degree”. The associated measure of how central the person is within the network is called the person’s “degree centrality”. In A First Course in Network Science, Menczer, Fortunato, and Davis refer to such high-degree nodes as hubs.

People with higher “degree centrality” have a disproportionate presence and influence in the network (e.g. through the phenomenon known as the “friendship paradox” – that most people have fewer friends than their friends have, on average).

In igraph, you can set the normalized argument equal to TRUE to normalize the degree. If TRUE then the result is divided by n-1, where n is the number of vertices in the graph.

V(conn_comp)$degree_centrality <- degree(conn_comp, 
                                         normalized = TRUE)

as_tibble(vertex.attributes(conn_comp)[c("name", "degree_centrality")]) %>%
  mutate_if(is.numeric, round, 4) %>%
  arrange(desc(degree_centrality))
## # A tibble: 29 × 2
##    name    degree_centrality
##    <chr>               <dbl>
##  1 Gandalf             0.643
##  2 Aragorn             0.607
##  3 Frodo               0.5  
##  4 Pippin              0.393
##  5 Sam                 0.357
##  6 Merry               0.321
##  7 Boromir             0.25 
##  8 Faramir             0.25 
##  9 Gimli               0.25 
## 10 Legolas             0.214
## # … with 19 more rows
cc_degree_centrality <- degree(conn_comp, 
                               normalized = TRUE)

# Arrange nodes alphabetically
# cc_degree_centrality[sort(names(cc_degree_centrality))]

# Arrange nodes in descending order of degree centrality
cc_degree_centralities <- cc_degree_centrality[order(cc_degree_centrality, decreasing = TRUE)]

# Alternatively: tibble::enframe(cc_degree_centrality) # returns columns `name` and `value`
cc_degree_centralities <- data.frame(Character=factor(names(cc_degree_centrality)),
                                     Centrality=cc_degree_centrality,
                                     row.names=NULL) %>% arrange(desc(Centrality))

ggplot(data = cc_degree_centralities, aes(x = reorder(Character, -Centrality), 
                                          y = Centrality)) +
  geom_col(aes(fill = Centrality)) +
  scale_color_distiller(palette = "Greens", 
                        direction = 1, 
                        aesthetics = c("colour", "fill")) +
  scale_y_continuous(breaks=seq(0, 0.7, 0.1)) +
  # ylim(0, 1) +
  labs(title = "Degree Centralities of LotR Characters") +
  xlab("Character") +
  theme_light() + 
  theme(legend.position = "none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        plot.title  = element_text(hjust = 0.5), 
        axis.text.x = element_text(angle = 90))

Eigenvector centrality

Eigenvector centrality is based on the notions of “status” or “prestige” or “rank”. It’s based on the idea that a node is more central if its neighbors are more central. The definitions of this measure are inherently implicit and can be conveyed as eigenvector solutions of linear systems of equations.

One such definition by Bonacich, in which the centrality of a node is proportional to the sum of the cetnrality of its neighbors, is:

\[\begin{align} \lambda C_i^e(g) = \sum_j g_{ij} C_j^e(g) \end{align}\]

where \(C^e(g)\) denotes the eigenvector centrality associated with a network g and \(\lambda\) is a proportionality factor.

When the network is undirected and connected, the largest eigenvalue will be simple and its eigenvector will have entries that are all nonzero and share the same sign. Convention is to report the absolute values of these entries, which will automatically lie between 0 and 1.

# weights = E(conn_comp, directed = FALSE)$weight, 
V(conn_comp)$eigenvector_centrality <- eigen_centrality(conn_comp,
                                                        directed = FALSE, 
                                                        weights = NA,
                                                        scale = FALSE)$vector # [["vector"]]

as_tibble(vertex.attributes(conn_comp)[c("name", "eigenvector_centrality")]) %>%
  mutate_if(is.numeric, round, 4) %>%
  arrange(desc(eigenvector_centrality))
## # A tibble: 29 × 2
##    name    eigenvector_centrality
##    <chr>                    <dbl>
##  1 Gandalf                  0.400
##  2 Aragorn                  0.369
##  3 Frodo                    0.343
##  4 Pippin                   0.301
##  5 Sam                      0.272
##  6 Merry                    0.261
##  7 Boromir                  0.234
##  8 Faramir                  0.234
##  9 Legolas                  0.214
## 10 Gimli                    0.200
## # … with 19 more rows
V(conn_comp)$eigenvector_centrality <- eigen_centrality(conn_comp,
                                                        directed = FALSE, 
                                                        weights = NA,
                                                        scale = FALSE)$vector # [["vector"]]

# Arrange nodes alphabetically
# as_tibble(vertex.attributes(conn_comp)[c("name", "eigenvector_centrality")]) %>%
#   arrange(name)

# Arrange nodes in descending order of eigenvector centrality
cc_eigenvector_centralities <- as_tibble(vertex.attributes(conn_comp)[c("name", "eigenvector_centrality")]) %>%
  rename(Character = name, Centrality = eigenvector_centrality) %>%
  arrange(desc(Centrality))

ggplot(data = cc_eigenvector_centralities, aes(x = reorder(Character, -Centrality), 
                                             y = Centrality)) +
  geom_col(aes(fill = Centrality)) +
  scale_color_distiller(palette = "Blues", 
                        direction = 1, 
                        aesthetics = c("colour", "fill")) +
  scale_y_continuous(breaks=seq(0, 0.4, 0.05)) +
  # ylim(0, 1) +
  labs(title = "Eigenvector Centralities of LotR Characters") +
  xlab("Character") +
  theme_light() + 
  theme(legend.position = "none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        plot.title  = element_text(hjust = 0.5), 
        axis.text.x = element_text(angle = 90))

PageRank

PageRank is an algorithm to compute a centrality measure that attempts to capture the “status” or “prestige” or “rank” of each node by imagining people carrying out modified random walk processes (random surfing plus jumping) for a long time and measuring the fraction of times they visit each node. Nodes with many paths leading to them are visited more often by random surfers, and therefore have high PageRank.

PageRank can be computed with an iterative approach called the power method (although there are newer / alternative methods). With the power method, the PageRank of node i at time t is the sum of two terms that convey the two ways one can arrive at node i:

\[\begin{align} R_t(i) = \frac{\alpha}{N} + (1 - \alpha) \sum_{j \in pred(i)} \frac{R_{t-1}(j)}{k_{out}(j)} \end{align}\]

The first term conveys teleportation to node i, which is one of N possible targets of a jump. The second term conveys how someone can travel along one of the edges to node i during the random walk.

The definition is recursive: the PageRank of a node depends on that of its neighbors. For \(\alpha > 0\), PageRank is guaranteed to converge, even in very large networks.

# weights = E(conn_comp,directed = FALSE)$weight
V(conn_comp)$pagerank <- page_rank(conn_comp,
                                   algo = "prpack",
                                   directed = FALSE,
                                   damping = 0.9,
                                   weights = NA)$vector # [["vector"]]

as_tibble(vertex.attributes(conn_comp)[c("name", "pagerank")]) %>%
  mutate_if(is.numeric, round, 4) %>%
  arrange(desc(pagerank))
## # A tibble: 29 × 2
##    name    pagerank
##    <chr>      <dbl>
##  1 Gandalf   0.107 
##  2 Aragorn   0.106 
##  3 Frodo     0.0843
##  4 Pippin    0.0662
##  5 Sam       0.0614
##  6 Merry     0.0543
##  7 Gimli     0.0446
##  8 Boromir   0.0418
##  9 Faramir   0.0418
## 10 Legolas   0.0364
## # … with 19 more rows
V(conn_comp)$pagerank_centrality <- page_rank(conn_comp,
                                              algo = "prpack",
                                              directed = FALSE,
                                              damping = 0.9,
                                              weights = NA)$vector # [["vector"]]

# Arrange nodes alphabetically
# as_tibble(vertex.attributes(conn_comp)[c("name", "pagerank_centrality")]) %>%
#   arrange(name)

# Arrange nodes in descending order of pagerank centrality
cc_pagerank_centralities <- as_tibble(vertex.attributes(conn_comp)[c("name", "pagerank_centrality")]) %>%
  rename(Character = name, Centrality = pagerank_centrality) %>%
  arrange(desc(Centrality))

ggplot(data = cc_pagerank_centralities, aes(x = reorder(Character, -Centrality), 
                                            y = Centrality)) +
  geom_col(aes(fill = Centrality)) +
  scale_color_distiller(palette = "Greys", 
                        direction = 1, 
                        aesthetics = c("colour", "fill")) +
  scale_y_continuous(breaks=seq(0, 0.15, 0.02)) +
  # ylim(0, 1) +
  labs(title = "PageRank Centralities of LotR Characters") +
  xlab("Character") +
  theme_light() + 
  theme(legend.position = "none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        plot.title  = element_text(hjust = 0.5), 
        axis.text.x = element_text(angle = 90))

Closeness centrality

The closeness centrality of a node is the inverse of the sum of the distances from the node to all the other nodes.

\[\begin{align} closeness_i = \frac{1}{\sum_{j \ne i} l_{ij}} ; \end{align}\]

Alternatively, by multiplying this definition by N - 1, we discount the network size and make the measure comparable across networks.

\[\begin{align} \bar{closeness_i} = \frac{N - 1}{\sum_{j \ne i} l_{ij}} = \frac{1}{\frac{\sum_{j \ne i} l_{ij}}{(N - 1)}} \end{align}\]

Since the term in the denominator is the average distance from a node to the other nodes, this definition conveys that we can also interpret the node’s closeness centrality as the inverse of its average distance.

# weights = E(conn_comp, directed = FALSE)$weight
V(conn_comp)$closeness_centrality <- closeness(conn_comp, 
                                               weights = NA, 
                                               normalized = TRUE)

as_tibble(vertex.attributes(conn_comp)[c("name", "closeness_centrality")]) %>%
  mutate_if(is.numeric, round, 4) %>%
  arrange(desc(closeness_centrality))
## # A tibble: 29 × 2
##    name    closeness_centrality
##    <chr>                  <dbl>
##  1 Aragorn                0.718
##  2 Gandalf                0.718
##  3 Frodo                  0.651
##  4 Pippin                 0.596
##  5 Merry                  0.583
##  6 Sam                    0.583
##  7 Boromir                0.538
##  8 Faramir                0.538
##  9 Gimli                  0.538
## 10 Legolas                0.538
## # … with 19 more rows
V(conn_comp)$closeness_centrality <- closeness(conn_comp, 
                                               weights = NA, 
                                               normalized = TRUE)

# Arrange nodes alphabetically
# as_tibble(vertex.attributes(conn_comp)[c("name", "closeness_centrality")]) %>%
#   arrange(name)

# Arrange nodes in descending order of closeness centrality
cc_closeness_centralities <- as_tibble(vertex.attributes(conn_comp)[c("name", "closeness_centrality")]) %>%
  rename(Character = name, Centrality = closeness_centrality) %>%
  arrange(desc(Centrality))

ggplot(data = cc_closeness_centralities, aes(x = reorder(Character, -Centrality), 
                                             y = Centrality)) +
  geom_col(aes(fill = Centrality)) +
  scale_color_distiller(palette = "Purples", 
                        direction = 1, 
                        aesthetics = c("colour", "fill")) +
  scale_y_continuous(breaks=seq(0, 0.8, 0.1), limits = c(0, 0.8)) +
  # ylim(0, 0.8) +
  labs(title = "Closeness Centralities of LotR Characters") +
  xlab("Character") +
  theme_light() + 
  theme(legend.position = "none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        plot.title  = element_text(hjust = 0.5), 
        axis.text.x = element_text(angle = 90))

Betweenness centrality

Imagine diffusion processes happening in networks, e.g. information being shared in social networks, products being transported through manufacturing networks, data being transported through communication networks. Suppose these diffusion processes travel from node to node along shortest paths. In this situation, a node is more central (has higher betweenness centrality) when more and more of these diffusion processes (shortest paths) pass through the node.

Specifically, a node’s betweenness centrality is:

\[\begin{align} betweenness_i = \sum_{h \ne j \ne i} \frac{\sigma_{hj}(i)}{\sigma_{hj}} \end{align}\]

where the sum is over all pairs of nodes h and j, not equal to i or each other. If no shortest path between h and j goes through i, the contribution of (h, j) to the betweenness of i is 0. If all shortest paths between h and j go through i, the contribution is 1.

Since the potential contributions come from all pairs of nodes, the betweenness grows with the network size. To normalize \(betweenness_i\) and make the measure comparable across networks, we divide it by \({N - 1 \choose 2}\).

# weights = E(conn_comp, directed = FALSE)$weight,
V(conn_comp)$betweenness_centrality <- betweenness(conn_comp, 
                                                   directed = FALSE, 
                                                   weights = NA, 
                                                   normalized = TRUE)

as_tibble(vertex.attributes(conn_comp)[c("name", "betweenness_centrality")]) %>%
  mutate_if(is.numeric, round, 4) %>%
  arrange(desc(betweenness_centrality))
## # A tibble: 29 × 2
##    name    betweenness_centrality
##    <chr>                    <dbl>
##  1 Aragorn                 0.345 
##  2 Gandalf                 0.241 
##  3 Frodo                   0.155 
##  4 Sam                     0.103 
##  5 Pippin                  0.0965
##  6 Gimli                   0.0763
##  7 Elendil                 0.0714
##  8 Merry                   0.0628
##  9 Éomer                   0.0184
## 10 Elrond                  0.0082
## # … with 19 more rows
V(conn_comp)$betweenness_centrality <- betweenness(conn_comp, 
                                                   directed = FALSE, 
                                                   weights = NA, 
                                                   normalized = TRUE)

# Arrange nodes alphabetically
# as_tibble(vertex.attributes(conn_comp)[c("name", "betweenness_centrality")]) %>%
#   arrange(name)

# Arrange nodes in descending order of betweenness centrality
cc_betweenness_centralities <- as_tibble(vertex.attributes(conn_comp)[c("name", "betweenness_centrality")]) %>%
  rename(Character = name, Centrality = betweenness_centrality) %>%
  arrange(desc(Centrality))

ggplot(data = cc_betweenness_centralities, aes(x = reorder(Character, -Centrality), 
                                             y = Centrality)) +
  geom_col(aes(fill = Centrality)) +
  scale_color_distiller(palette = "Oranges", 
                        direction = 1, 
                        aesthetics = c("colour", "fill")) +
  scale_y_continuous(breaks=seq(0, 0.35, 0.05), limits = c(0, 0.35)) +
  # ylim(0, 1) +
  labs(title = "Betweenness Centralities of LotR Characters") +
  xlab("Character") +
  theme_light() + 
  theme(legend.position = "none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        plot.title  = element_text(hjust = 0.5), 
        axis.text.x = element_text(angle = 90))

Clustering coefficient (Transitivity)

The clustering coefficient (transitivity) of a node is the fraction of pairs of the node’s neighbors that are connected to each other. In a social network we can rephrase this definition as – the clustering coefficient for a person is the fraction of pairs of the person’s connections that are connected to each other. The connectivity among neighbors of nodes is an important feature of local structure because it indicates the extent to which the nodes are tightly knit, or clustered.

\[\begin{align} clustering_i = \frac{\tau(i)}{\tau_{max}(i)} = \frac{2\tau(i)}{k_i(k_i - 1)} \end{align}\]

where \(\tau(i)\) is the number of triangles involving i. Nodes with degree k < 2 are excluded when calculating the average clustering coefficient.

By averaging the clustering coefficient across the nodes, we can calculate a clustering coefficient for the entire network. A low clustering coefficient (near zero) indicates the network has few triangles, whereas a high clustering coefficient indicates it has many triangles. Social networks tend to have high clustering coefficients.

# weights = E(conn_comp, directed = FALSE)$weight,
V(conn_comp)$clustering_coefficient <- transitivity(conn_comp, 
                                                    type="local", 
                                                    vids = NULL, # V(conn_comp), 
                                                    weights = NULL,
                                                    isolates = "zero")

as_tibble(vertex.attributes(conn_comp)[c("name", "clustering_coefficient")]) %>%
  mutate_if(is.numeric, round, 4) %>%
  arrange(desc(clustering_coefficient))
## # A tibble: 29 × 2
##    name     clustering_coefficient
##    <chr>                     <dbl>
##  1 Arwen                     1    
##  2 Denethor                  1    
##  3 Gollum                    1    
##  4 Legolas                   0.933
##  5 Boromir                   0.857
##  6 Faramir                   0.857
##  7 Bilbo                     0.833
##  8 Sauron                    0.833
##  9 Théoden                   0.833
## 10 Elrond                    0.667
## # … with 19 more rows
# weights = E(conn_comp, directed = FALSE)$weight,
V(conn_comp)$clustering_coefficient <- transitivity(conn_comp, 
                                                    type="local", 
                                                    vids = NULL, 
                                                    weights = NULL,
                                                    isolates = "zero")

# Arrange nodes alphabetically
# as_tibble(vertex.attributes(conn_comp)[c("name", "clustering_coefficient")]) %>%
#   arrange(name)

# Arrange nodes in descending order of clustering centrality
cc_clustering_coefficients <- as_tibble(vertex.attributes(conn_comp)[c("name", "clustering_coefficient")]) %>%
  rename(Character = name, Centrality = clustering_coefficient) %>%
  arrange(desc(Centrality))

ggplot(data = cc_clustering_coefficients, aes(x = reorder(Character, -Centrality), 
                                              y = Centrality)) +
  geom_col(aes(fill = Centrality)) +
  scale_color_distiller(palette = "Reds", 
                        direction = 1, 
                        aesthetics = c("colour", "fill")) +
  scale_y_continuous(breaks=seq(0, 1.0, 0.2), limits = c(0, 1.0)) +
  # ylim(0, 1.2) +
  labs(title = "Clustering Centralities of LotR Characters") +
  xlab("Character") +
  theme_light() + 
  theme(legend.position = "none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        plot.title  = element_text(hjust = 0.5), 
        axis.text.x = element_text(angle = 90))

Remove attributes, if necessary

# Check attributes
vertex.attributes(conn_comp)
## $name
##  [1] "Aragorn"   "Arathorn"  "Arwen"     "Beregond"  "Bilbo"     "Bill"     
##  [7] "Boromir"   "Denethor"  "Elendil"   "Elrond"    "Éomer"     "Éowyn"    
## [13] "Faramir"   "Frodo"     "Gandalf"   "Gimli"     "Glóin"     "Gollum"   
## [19] "Isildur"   "Legolas"   "Merry"     "Pippin"    "Sam"       "Saruman"  
## [25] "Sauron"    "Shadowfax" "Théoden"   "Bombadil"  "Treebeard"
## 
## $type
##  [1] "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per"
## [13] "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per"
## [25] "per" "per" "per" "per" "per"
## 
## $subtype
##  [1] "men"    "men"    "elves"  "men"    "hobbit" "animal" "men"    "men"   
##  [9] "men"    "elves"  "men"    "men"    "men"    "hobbit" "ainur"  "dwarf" 
## [17] "dwarf"  "hobbit" "men"    "elves"  "hobbit" "hobbit" "hobbit" "ainur" 
## [25] "ainur"  "animal" "men"    "ainur"  "ents"  
## 
## $gender
##  [1] "male"   "male"   "female" "male"   "male"   "male"   "male"   "male"  
##  [9] "male"   "male"   "male"   "female" "male"   "male"   "male"   "male"  
## [17] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
## [25] "male"   "male"   "male"   "male"   "male"  
## 
## $volume
##  [1] "Books 123" "Books 123" "Books 123" "Books 123" "Books 123" "Books 123"
##  [7] "Books 123" "Books 123" "Books 123" "Books 123" "Books 123" "Books 123"
## [13] "Books 123" "Books 123" "Books 123" "Books 123" "Books 123" "Books 123"
## [19] "Books 123" "Books 123" "Books 123" "Books 123" "Books 123" "Books 123"
## [25] "Books 123" "Books 123" "Books 123" "Books 123" "Books 123"
## 
## $subtype_color
##  [1] "#f1a2a9" "#f1a2a9" "#d1615d" "#f1a2a9" "#967662" "#e49444" "#f1a2a9"
##  [8] "#f1a2a9" "#f1a2a9" "#d1615d" "#f1a2a9" "#f1a2a9" "#f1a2a9" "#967662"
## [15] "#5778a4" "#6a9f58" "#6a9f58" "#967662" "#f1a2a9" "#d1615d" "#967662"
## [22] "#967662" "#967662" "#5778a4" "#5778a4" "#e49444" "#f1a2a9" "#5778a4"
## [29] "#a87c9f"
## 
## $gender_color
##  [1] "#5778a4" "#5778a4" "#e49444" "#5778a4" "#5778a4" "#5778a4" "#5778a4"
##  [8] "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#e49444" "#5778a4" "#5778a4"
## [15] "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#5778a4"
## [22] "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#5778a4" "#5778a4"
## [29] "#5778a4"
## 
## $degree_centrality
##  [1] 0.60714286 0.03571429 0.07142857 0.03571429 0.14285714 0.03571429
##  [7] 0.25000000 0.14285714 0.07142857 0.14285714 0.14285714 0.07142857
## [13] 0.25000000 0.50000000 0.64285714 0.25000000 0.03571429 0.10714286
## [19] 0.03571429 0.21428571 0.32142857 0.39285714 0.35714286 0.17857143
## [25] 0.14285714 0.03571429 0.14285714 0.03571429 0.17857143
## 
## $eigenvector_centrality
##  [1] 0.368560552 0.042028641 0.057235334 0.034365889 0.125421977 0.030996562
##  [7] 0.234095530 0.133409500 0.042582377 0.133351610 0.105562137 0.041807968
## [13] 0.234095530 0.343251321 0.400349229 0.200451731 0.022858425 0.084441517
## [19] 0.004855863 0.213819354 0.261063274 0.301363798 0.271817262 0.133865003
## [25] 0.142090041 0.045653649 0.114985259 0.039142514 0.147913436
## 
## $pagerank
##  [1] 0.106087723 0.009064685 0.015053442 0.008863824 0.025809283 0.008971643
##  [7] 0.041768216 0.024962005 0.020450644 0.026616700 0.027054331 0.014966407
## [13] 0.041768216 0.084313955 0.107155663 0.044596240 0.009182078 0.020198914
## [19] 0.012651066 0.036422907 0.054309063 0.066190027 0.061370745 0.031661721
## [25] 0.025541760 0.008806059 0.026208802 0.008868459 0.031085425
## 
## $pagerank_centrality
##  [1] 0.106087723 0.009064685 0.015053442 0.008863824 0.025809283 0.008971643
##  [7] 0.041768216 0.024962005 0.020450644 0.026616700 0.027054331 0.014966407
## [13] 0.041768216 0.084313955 0.107155663 0.044596240 0.009182078 0.020198914
## [19] 0.012651066 0.036422907 0.054309063 0.066190027 0.061370745 0.031661721
## [25] 0.025541760 0.008806059 0.026208802 0.008868459 0.031085425
## 
## $closeness_centrality
##  [1] 0.7179487 0.4242424 0.4307692 0.3783784 0.4666667 0.3733333 0.5384615
##  [8] 0.4516129 0.4375000 0.4912281 0.4827586 0.3888889 0.5384615 0.6511628
## [15] 0.7179487 0.5384615 0.3544304 0.4179104 0.3076923 0.5384615 0.5833333
## [22] 0.5957447 0.5833333 0.4827586 0.4912281 0.4242424 0.4827586 0.4000000
## [29] 0.4745763
## 
## $betweenness_centrality
##  [1] 0.3448979592 0.0000000000 0.0000000000 0.0000000000 0.0038874192
##  [6] 0.0000000000 0.0064079953 0.0000000000 0.0714285714 0.0082010582
## [11] 0.0183988410 0.0008818342 0.0064079953 0.1553266986 0.2414367599
## [16] 0.0763007055 0.0000000000 0.0000000000 0.0000000000 0.0017636684
## [21] 0.0628180902 0.0964936592 0.1034087092 0.0060846561 0.0022045855
## [26] 0.0000000000 0.0013227513 0.0000000000 0.0066137566
## 
## $clustering_coefficient
##  [1] 0.2794118 0.0000000 1.0000000 0.0000000 0.8333333 0.0000000 0.8571429
##  [8] 1.0000000 0.0000000 0.6666667 0.5000000 0.0000000 0.8571429 0.3846154
## [15] 0.3006536 0.5714286 0.0000000 1.0000000 0.0000000 0.9333333 0.5833333
## [22] 0.5090909 0.5111111 0.6000000 0.8333333 0.0000000 0.8333333 0.0000000
## [29] 0.6000000
# Remove specific attributes
#vertex_attr(conn_comp)["transitivity"] <- NULL

Plot the nodes and weighted edges of the largest connected LotR network

(including the names of the characters)

e.size <- 4 * E(conn_comp)$weight / max( E(conn_comp)$weight, na.rm = TRUE )
v.label <- V(conn_comp)$name

set.seed(10)
igraph_options(vertex.size=5, 
               vertex.color="blue", 
               vertex.frame.color="blue",
               vertex.label=v.label, 
               vertex.label.dist=1.1, 
               vertex.label.cex=0.75, 
               edge.width=e.size, 
               edge.color="blue")
par(mfrow=c(1,1))
plot(conn_comp, layout=layout_nicely)
title("LotR network")

Plot the nodes and edges of the LotR network (color the nodes by gender)

e.size <- 4 * E(conn_comp)$weight / max( E(conn_comp)$weight, na.rm = TRUE )
v.label <- V(conn_comp)$name
v.color <- V(conn_comp)$gender_color

set.seed(10)
igraph_options(vertex.size=5, 
               vertex.color=v.color, 
               vertex.frame.color=v.color, 
               vertex.label=v.label, 
               vertex.label.dist=1.1, 
               vertex.label.cex=0.75,
               edge.width=e.size, 
               edge.color="grey")
par(mfrow=c(1,1))
plot(conn_comp, layout=layout_nicely)
title("LotR network")

Plot the nodes and edges of the LotR network (color the nodes by subtype)

e.size <- 4 * E(conn_comp)$weight / max( E(conn_comp)$weight, na.rm = TRUE )
v.label <- V(conn_comp)$name
v.color <- V(conn_comp)$subtype_color

set.seed(10)
igraph_options(vertex.size=5, 
               vertex.color=v.color, 
               vertex.frame.color=v.color, 
               vertex.label=v.label, 
               vertex.label.dist=1.1, 
               vertex.label.cex=0.75,
               edge.width=e.size, 
               edge.color="grey")
par(mfrow=c(1,1))
plot(conn_comp, layout=layout_nicely)
title("LotR network")

Create a network graph for LotR Book 1

book1_df <- sqldf::sqldf("
    SELECT 
      sour.id AS source_id, sour.label as source_name, sour.type AS source_type, sour.subtype AS source_subtype, sour.gender AS source_gender,
      dest.id AS target_id, dest.label AS target_name, dest.type AS target_type, dest.subtype AS target_subtype, dest.gender AS target_gender,
      conn.weight, conn.volume, conn.title, conn.publication_date
    FROM 
      book1 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id

    UNION 

    SELECT 
      dest.id AS source_id, dest.label as source_name, dest.type AS source_type, dest.subtype AS source_subtype, dest.gender AS source_gender,
      sour.id AS target_id, sour.label AS target_name, sour.type AS target_type, sour.subtype AS target_subtype, sour.gender AS target_gender,
      conn.weight, conn.volume, conn.title, conn.publication_date
    FROM 
      book1 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id"
)

book1_df %>% 
  dplyr::mutate("weight" = as.double(weight), 
                "publication_date" = as.character(publication_date)) %>%
  dplyr::arrange(desc(weight)) %>% 
  head(10)
##    source_id source_name source_type source_subtype source_gender target_id
## 1       frod       Frodo         per         hobbit          male      sams
## 2       sams         Sam         per         hobbit          male      frod
## 3       frod       Frodo         per         hobbit          male     ganda
## 4      ganda     Gandalf         per          ainur          male      frod
## 5       arag     Aragorn         per            men          male      frod
## 6       frod       Frodo         per         hobbit          male      arag
## 7       bilb       Bilbo         per         hobbit          male      frod
## 8       frod       Frodo         per         hobbit          male      bilb
## 9       frod       Frodo         per         hobbit          male      pipp
## 10      pipp      Pippin         per         hobbit          male      frod
##    target_name target_type target_subtype target_gender weight volume
## 1          Sam         per         hobbit          male    171 Book 1
## 2        Frodo         per         hobbit          male    171 Book 1
## 3      Gandalf         per          ainur          male    129 Book 1
## 4        Frodo         per         hobbit          male    129 Book 1
## 5        Frodo         per         hobbit          male    105 Book 1
## 6      Aragorn         per            men          male    105 Book 1
## 7        Frodo         per         hobbit          male     96 Book 1
## 8        Bilbo         per         hobbit          male     96 Book 1
## 9       Pippin         per         hobbit          male     80 Book 1
## 10       Frodo         per         hobbit          male     80 Book 1
##                         title publication_date
## 1  The Fellowship of the Ring       1954-07-29
## 2  The Fellowship of the Ring       1954-07-29
## 3  The Fellowship of the Ring       1954-07-29
## 4  The Fellowship of the Ring       1954-07-29
## 5  The Fellowship of the Ring       1954-07-29
## 6  The Fellowship of the Ring       1954-07-29
## 7  The Fellowship of the Ring       1954-07-29
## 8  The Fellowship of the Ring       1954-07-29
## 9  The Fellowship of the Ring       1954-07-29
## 10 The Fellowship of the Ring       1954-07-29
book1_edges <- sqldf::sqldf("
    SELECT 
      sour.label as source_name, sour.type as source_type, 
      dest.label AS target_name, dest.type AS target_type, 
      conn.weight
    FROM 
      book1 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id"
) %>% 
  dplyr::filter(source_type == "per", target_type == "per", weight > 20) %>%
  dplyr::select(source_name, target_name, weight) %>%
  dplyr::rename(from = source_name, to = target_name)

book1_nodes <- book1_df %>% 
  dplyr::filter(source_type == "per", target_type == "per", weight > 20) %>%
  dplyr::select(source_name, source_type, source_subtype, source_gender, volume, title, publication_date) %>% 
  dplyr::mutate(publication_date = as.character(publication_date)) %>%
  dplyr::rename(name = source_name, type = source_type, subtype = source_subtype, gender = source_gender) %>%
  dplyr::distinct()

G_book1 <- graph_from_data_frame(book1_edges, directed = FALSE, vertices = book1_nodes)

#print_all(G_book1)
G_book1
## IGRAPH 577ee4e UNWB 12 24 -- 
## + attr: name (v/c), type (v/c), subtype (v/c), gender (v/c), volume
## | (v/c), title (v/c), publication_date (v/c), weight (e/n)
## + edges from 577ee4e (vertex names):
##  [1] Frodo  --Sam      Frodo  --Gandalf  Aragorn--Frodo    Bilbo  --Frodo   
##  [5] Frodo  --Pippin   Pippin --Sam      Frodo  --Merry    Merry  --Pippin  
##  [9] Aragorn--Sam      Bilbo  --Gandalf  Merry  --Sam      Boromir--Frodo   
## [13] Aragorn--Gandalf  Aragorn--Boromir  Gandalf--Sam      Aragorn--Merry   
## [17] Elrond --Frodo    Elrond --Gandalf  Aragorn--Elrond   Aragorn--Pippin  
## [21] Gimli  --Legolas  Bilbo  --Sam      Frodo  --Legolas  Frodo  --Bombadil

Check that the attributes were added to the nodes

vertex.attributes(G_book1)
## $name
##  [1] "Aragorn"  "Bilbo"    "Boromir"  "Elrond"   "Frodo"    "Gandalf" 
##  [7] "Gimli"    "Legolas"  "Merry"    "Pippin"   "Sam"      "Bombadil"
## 
## $type
##  [1] "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per"
## 
## $subtype
##  [1] "men"    "hobbit" "men"    "elves"  "hobbit" "ainur"  "dwarf"  "elves" 
##  [9] "hobbit" "hobbit" "hobbit" "ainur" 
## 
## $gender
##  [1] "male" "male" "male" "male" "male" "male" "male" "male" "male" "male"
## [11] "male" "male"
## 
## $volume
##  [1] "Book 1" "Book 1" "Book 1" "Book 1" "Book 1" "Book 1" "Book 1" "Book 1"
##  [9] "Book 1" "Book 1" "Book 1" "Book 1"
## 
## $title
##  [1] "The Fellowship of the Ring" "The Fellowship of the Ring"
##  [3] "The Fellowship of the Ring" "The Fellowship of the Ring"
##  [5] "The Fellowship of the Ring" "The Fellowship of the Ring"
##  [7] "The Fellowship of the Ring" "The Fellowship of the Ring"
##  [9] "The Fellowship of the Ring" "The Fellowship of the Ring"
## [11] "The Fellowship of the Ring" "The Fellowship of the Ring"
## 
## $publication_date
##  [1] "1954-07-29" "1954-07-29" "1954-07-29" "1954-07-29" "1954-07-29"
##  [6] "1954-07-29" "1954-07-29" "1954-07-29" "1954-07-29" "1954-07-29"
## [11] "1954-07-29" "1954-07-29"

Check that the undirected edges have weights

E(G_book1)
## + 24/24 edges from 577ee4e (vertex names):
##  [1] Frodo  --Sam      Frodo  --Gandalf  Aragorn--Frodo    Bilbo  --Frodo   
##  [5] Frodo  --Pippin   Pippin --Sam      Frodo  --Merry    Merry  --Pippin  
##  [9] Aragorn--Sam      Bilbo  --Gandalf  Merry  --Sam      Boromir--Frodo   
## [13] Aragorn--Gandalf  Aragorn--Boromir  Gandalf--Sam      Aragorn--Merry   
## [17] Elrond --Frodo    Elrond --Gandalf  Aragorn--Elrond   Aragorn--Pippin  
## [21] Gimli  --Legolas  Bilbo  --Sam      Frodo  --Legolas  Frodo  --Bombadil
edge.attributes(G_book1)
## $weight
##  [1] 171 129 105  96  80  72  64  57  50  44  43  42  41  40  31  30  27  27  22
## [20]  22  22  21  21  21

Identify the largest connected component

book1_largest_cc <- clusters(G_book1)$membership == 1

G_book1_cc <- induced_subgraph(G_book1, book1_largest_cc)
G_book1_cc
## IGRAPH 44dd484 UNWB 12 24 -- 
## + attr: name (v/c), type (v/c), subtype (v/c), gender (v/c), volume
## | (v/c), title (v/c), publication_date (v/c), weight (e/n)
## + edges from 44dd484 (vertex names):
##  [1] Frodo  --Sam      Frodo  --Gandalf  Aragorn--Frodo    Bilbo  --Frodo   
##  [5] Frodo  --Pippin   Pippin --Sam      Frodo  --Merry    Merry  --Pippin  
##  [9] Aragorn--Sam      Bilbo  --Gandalf  Merry  --Sam      Boromir--Frodo   
## [13] Aragorn--Gandalf  Aragorn--Boromir  Gandalf--Sam      Aragorn--Merry   
## [17] Elrond --Frodo    Elrond --Gandalf  Aragorn--Elrond   Aragorn--Pippin  
## [21] Gimli  --Legolas  Bilbo  --Sam      Frodo  --Legolas  Frodo  --Bombadil

Add centrality measures to nodes in the largest connected component

V(G_book1_cc)$degree_centrality <- degree(G_book1_cc,
                                          normalized = TRUE)

# weights = E(G_book1_cc, directed = FALSE)$weight,
V(G_book1_cc)$clustering_centrality <- transitivity(G_book1_cc,
                                                    type="local",
                                                    vids = NULL, # V(G_book1_cc),
                                                    weights = NA,
                                                    isolates = 'zero')

V(G_book1_cc)$closeness_centrality <- closeness(G_book1_cc,
                                                weights = NA,
                                                normalized = TRUE)

V(G_book1_cc)$betweenness_centrality <- betweenness(G_book1_cc,
                                                    directed = FALSE,
                                                    weights = NA,
                                                    normalized = TRUE)

V(G_book1_cc)$eigenvector_centrality <- eigen_centrality(G_book1_cc,
                                                         directed = FALSE,
                                                         weights = NA,
                                                         scale = FALSE)[["vector"]]

V(G_book1_cc)$pagerank_centrality <- page_rank(G_book1_cc, 
                                               algo = "prpack",
                                               directed = FALSE,
                                               damping = 0.9,
                                               weights = NA)[["vector"]]

vertex.attributes(G_book1_cc)
## $name
##  [1] "Aragorn"  "Bilbo"    "Boromir"  "Elrond"   "Frodo"    "Gandalf" 
##  [7] "Gimli"    "Legolas"  "Merry"    "Pippin"   "Sam"      "Bombadil"
## 
## $type
##  [1] "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per"
## 
## $subtype
##  [1] "men"    "hobbit" "men"    "elves"  "hobbit" "ainur"  "dwarf"  "elves" 
##  [9] "hobbit" "hobbit" "hobbit" "ainur" 
## 
## $gender
##  [1] "male" "male" "male" "male" "male" "male" "male" "male" "male" "male"
## [11] "male" "male"
## 
## $volume
##  [1] "Book 1" "Book 1" "Book 1" "Book 1" "Book 1" "Book 1" "Book 1" "Book 1"
##  [9] "Book 1" "Book 1" "Book 1" "Book 1"
## 
## $title
##  [1] "The Fellowship of the Ring" "The Fellowship of the Ring"
##  [3] "The Fellowship of the Ring" "The Fellowship of the Ring"
##  [5] "The Fellowship of the Ring" "The Fellowship of the Ring"
##  [7] "The Fellowship of the Ring" "The Fellowship of the Ring"
##  [9] "The Fellowship of the Ring" "The Fellowship of the Ring"
## [11] "The Fellowship of the Ring" "The Fellowship of the Ring"
## 
## $publication_date
##  [1] "1954-07-29" "1954-07-29" "1954-07-29" "1954-07-29" "1954-07-29"
##  [6] "1954-07-29" "1954-07-29" "1954-07-29" "1954-07-29" "1954-07-29"
## [11] "1954-07-29" "1954-07-29"
## 
## $degree_centrality
##  [1] 0.63636364 0.27272727 0.18181818 0.27272727 0.90909091 0.45454545
##  [7] 0.09090909 0.18181818 0.36363636 0.36363636 0.54545455 0.09090909
## 
## $clustering_centrality
##  [1] 0.5238095 1.0000000 1.0000000 1.0000000 0.2888889 0.7000000 0.0000000
##  [8] 0.0000000 1.0000000 1.0000000 0.6666667 0.0000000
## 
## $closeness_centrality
##  [1] 0.6875000 0.5500000 0.5238095 0.5500000 0.9166667 0.6111111 0.3666667
##  [8] 0.5500000 0.5789474 0.5789474 0.6470588 0.5000000
## 
## $betweenness_centrality
##  [1] 0.08181818 0.00000000 0.00000000 0.00000000 0.60606061 0.02121212
##  [7] 0.00000000 0.18181818 0.00000000 0.00000000 0.03636364 0.00000000
## 
## $eigenvector_centrality
##  [1] 0.41692398 0.22725633 0.17004595 0.23252534 0.48260060 0.33050919
##  [7] 0.01788539 0.09461177 0.30037499 0.30037499 0.38905148 0.09123072
## 
## $pagerank_centrality
##  [1] 0.13610289 0.06187892 0.04416263 0.06193728 0.20367056 0.09874807
##  [7] 0.03417141 0.05741796 0.07948802 0.07948802 0.11627056 0.02666368

Convert G_book1 into JSON

G_book1_json <- d3r::d3_igraph(as.undirected(G_book1_cc))
G_book1_json
## {"nodes":[{"name":"Aragorn","type":"per","subtype":"men","gender":"male","volume":"Book 1","title":"The Fellowship of the Ring","publication_date":"1954-07-29","degree_centrality":0.6364,"clustering_centrality":0.5238,"closeness_centrality":0.6875,"betweenness_centrality":0.0818,"eigenvector_centrality":0.4169,"pagerank_centrality":0.1361,"id":"Aragorn","_row":"Aragorn"},{"name":"Bilbo","type":"per","subtype":"hobbit","gender":"male","volume":"Book 1","title":"The Fellowship of the Ring","publication_date":"1954-07-29","degree_centrality":0.2727,"clustering_centrality":1,"closeness_centrality":0.55,"betweenness_centrality":0,"eigenvector_centrality":0.2273,"pagerank_centrality":0.0619,"id":"Bilbo","_row":"Bilbo"},{"name":"Boromir","type":"per","subtype":"men","gender":"male","volume":"Book 1","title":"The Fellowship of the Ring","publication_date":"1954-07-29","degree_centrality":0.1818,"clustering_centrality":1,"closeness_centrality":0.5238,"betweenness_centrality":0,"eigenvector_centrality":0.17,"pagerank_centrality":0.0442,"id":"Boromir","_row":"Boromir"},{"name":"Elrond","type":"per","subtype":"elves","gender":"male","volume":"Book 1","title":"The Fellowship of the Ring","publication_date":"1954-07-29","degree_centrality":0.2727,"clustering_centrality":1,"closeness_centrality":0.55,"betweenness_centrality":0,"eigenvector_centrality":0.2325,"pagerank_centrality":0.0619,"id":"Elrond","_row":"Elrond"},{"name":"Frodo","type":"per","subtype":"hobbit","gender":"male","volume":"Book 1","title":"The Fellowship of the Ring","publication_date":"1954-07-29","degree_centrality":0.9091,"clustering_centrality":0.2889,"closeness_centrality":0.9167,"betweenness_centrality":0.6061,"eigenvector_centrality":0.4826,"pagerank_centrality":0.2037,"id":"Frodo","_row":"Frodo"},{"name":"Gandalf","type":"per","subtype":"ainur","gender":"male","volume":"Book 1","title":"The Fellowship of the Ring","publication_date":"1954-07-29","degree_centrality":0.4545,"clustering_centrality":0.7,"closeness_centrality":0.6111,"betweenness_centrality":0.0212,"eigenvector_centrality":0.3305,"pagerank_centrality":0.0987,"id":"Gandalf","_row":"Gandalf"},{"name":"Gimli","type":"per","subtype":"dwarf","gender":"male","volume":"Book 1","title":"The Fellowship of the Ring","publication_date":"1954-07-29","degree_centrality":0.0909,"clustering_centrality":0,"closeness_centrality":0.3667,"betweenness_centrality":0,"eigenvector_centrality":0.0179,"pagerank_centrality":0.0342,"id":"Gimli","_row":"Gimli"},{"name":"Legolas","type":"per","subtype":"elves","gender":"male","volume":"Book 1","title":"The Fellowship of the Ring","publication_date":"1954-07-29","degree_centrality":0.1818,"clustering_centrality":0,"closeness_centrality":0.55,"betweenness_centrality":0.1818,"eigenvector_centrality":0.0946,"pagerank_centrality":0.0574,"id":"Legolas","_row":"Legolas"},{"name":"Merry","type":"per","subtype":"hobbit","gender":"male","volume":"Book 1","title":"The Fellowship of the Ring","publication_date":"1954-07-29","degree_centrality":0.3636,"clustering_centrality":1,"closeness_centrality":0.5789,"betweenness_centrality":0,"eigenvector_centrality":0.3004,"pagerank_centrality":0.0795,"id":"Merry","_row":"Merry"},{"name":"Pippin","type":"per","subtype":"hobbit","gender":"male","volume":"Book 1","title":"The Fellowship of the Ring","publication_date":"1954-07-29","degree_centrality":0.3636,"clustering_centrality":1,"closeness_centrality":0.5789,"betweenness_centrality":0,"eigenvector_centrality":0.3004,"pagerank_centrality":0.0795,"id":"Pippin","_row":"Pippin"},{"name":"Sam","type":"per","subtype":"hobbit","gender":"male","volume":"Book 1","title":"The Fellowship of the Ring","publication_date":"1954-07-29","degree_centrality":0.5455,"clustering_centrality":0.6667,"closeness_centrality":0.6471,"betweenness_centrality":0.0364,"eigenvector_centrality":0.3891,"pagerank_centrality":0.1163,"id":"Sam","_row":"Sam"},{"name":"Bombadil","type":"per","subtype":"ainur","gender":"male","volume":"Book 1","title":"The Fellowship of the Ring","publication_date":"1954-07-29","degree_centrality":0.0909,"clustering_centrality":0,"closeness_centrality":0.5,"betweenness_centrality":0,"eigenvector_centrality":0.0912,"pagerank_centrality":0.0267,"id":"Bombadil","_row":"Bombadil"}],"links":[{"source":"Frodo","target":"Sam","weight":171},{"source":"Frodo","target":"Gandalf","weight":129},{"source":"Aragorn","target":"Frodo","weight":105},{"source":"Bilbo","target":"Frodo","weight":96},{"source":"Frodo","target":"Pippin","weight":80},{"source":"Pippin","target":"Sam","weight":72},{"source":"Frodo","target":"Merry","weight":64},{"source":"Merry","target":"Pippin","weight":57},{"source":"Aragorn","target":"Sam","weight":50},{"source":"Bilbo","target":"Gandalf","weight":44},{"source":"Merry","target":"Sam","weight":43},{"source":"Boromir","target":"Frodo","weight":42},{"source":"Aragorn","target":"Gandalf","weight":41},{"source":"Aragorn","target":"Boromir","weight":40},{"source":"Gandalf","target":"Sam","weight":31},{"source":"Aragorn","target":"Merry","weight":30},{"source":"Elrond","target":"Frodo","weight":27},{"source":"Elrond","target":"Gandalf","weight":27},{"source":"Aragorn","target":"Elrond","weight":22},{"source":"Aragorn","target":"Pippin","weight":22},{"source":"Gimli","target":"Legolas","weight":22},{"source":"Bilbo","target":"Sam","weight":21},{"source":"Frodo","target":"Legolas","weight":21},{"source":"Frodo","target":"Bombadil","weight":21}],"attributes":{}}
# jsonlite::minify(G_book1_json)
# jsonlite::prettify(G_book1_json, indent = 4)

Write G_book1 to JSON

write(G_book1_json, file = "data/LotR_Book1_from_R.json")

Create a network graph for LotR Book 2

book2_df <- sqldf::sqldf("
    SELECT 
      sour.id AS source_id, sour.label as source_name, sour.type AS source_type, sour.subtype AS source_subtype, sour.gender AS source_gender,
      dest.id AS target_id, dest.label AS target_name, dest.type AS target_type, dest.subtype AS target_subtype, dest.gender AS target_gender,
      conn.weight, conn.volume, conn.title, conn.publication_date
    FROM 
      book2 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id

    UNION 

    SELECT 
      dest.id AS source_id, dest.label as source_name, dest.type AS source_type, dest.subtype AS source_subtype, dest.gender AS source_gender,
      sour.id AS target_id, sour.label AS target_name, sour.type AS target_type, sour.subtype AS target_subtype, sour.gender AS target_gender,
      conn.weight, conn.volume, conn.title, conn.publication_date
    FROM 
      book2 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id"
)

book2_df %>% 
  dplyr::mutate("weight" = as.double(weight), 
                "publication_date" = as.character(publication_date)) %>%
  dplyr::arrange(desc(weight)) %>% 
  head(10)
##    source_id source_name source_type source_subtype source_gender target_id
## 1       frod       Frodo         per         hobbit          male      sams
## 2       sams         Sam         per         hobbit          male      frod
## 3       goll      Gollum         per         hobbit          male      sams
## 4       sams         Sam         per         hobbit          male      goll
## 5       frod       Frodo         per         hobbit          male      goll
## 6       goll      Gollum         per         hobbit          male      frod
## 7       merr       Merry         per         hobbit          male      pipp
## 8       pipp      Pippin         per         hobbit          male      merr
## 9      gimli       Gimli         per          dwarf          male      lego
## 10      lego     Legolas         per          elves          male     gimli
##    target_name target_type target_subtype target_gender weight volume
## 1          Sam         per         hobbit          male    158 Book 2
## 2        Frodo         per         hobbit          male    158 Book 2
## 3          Sam         per         hobbit          male    101 Book 2
## 4       Gollum         per         hobbit          male    101 Book 2
## 5       Gollum         per         hobbit          male     99 Book 2
## 6        Frodo         per         hobbit          male     99 Book 2
## 7       Pippin         per         hobbit          male     51 Book 2
## 8        Merry         per         hobbit          male     51 Book 2
## 9      Legolas         per          elves          male     50 Book 2
## 10       Gimli         per          dwarf          male     50 Book 2
##             title publication_date
## 1  The Two Towers       1954-11-11
## 2  The Two Towers       1954-11-11
## 3  The Two Towers       1954-11-11
## 4  The Two Towers       1954-11-11
## 5  The Two Towers       1954-11-11
## 6  The Two Towers       1954-11-11
## 7  The Two Towers       1954-11-11
## 8  The Two Towers       1954-11-11
## 9  The Two Towers       1954-11-11
## 10 The Two Towers       1954-11-11
book2_edges <- sqldf::sqldf("
    SELECT 
      sour.label as source_name, sour.type as source_type, 
      dest.label AS target_name, dest.type AS target_type, 
      conn.weight
    FROM 
      book2 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id"
) %>% 
  dplyr::filter(source_type == "per", target_type == "per", weight > 20) %>%
  dplyr::select(source_name, target_name, weight) %>%
  dplyr::rename(from = source_name, to = target_name)

book2_nodes <- book2_df %>% 
  dplyr::filter(source_type == "per", target_type == "per", weight > 20) %>%
  dplyr::select(source_name, source_type, source_subtype, source_gender, volume, title, publication_date) %>% 
  dplyr::mutate(publication_date = as.character(publication_date)) %>%
  dplyr::rename(name = source_name, type = source_type, subtype = source_subtype, gender = source_gender) %>%
  dplyr::distinct()

G_book2 <- graph_from_data_frame(book2_edges, directed = FALSE, vertices = book2_nodes)

#print_all(G_book2)
G_book2
## IGRAPH bb1f645 UNWB 16 21 -- 
## + attr: name (v/c), type (v/c), subtype (v/c), gender (v/c), volume
## | (v/c), title (v/c), publication_date (v/c), weight (e/n)
## + edges from bb1f645 (vertex names):
##  [1] Frodo  --Sam       Gollum --Sam       Frodo  --Gollum    Merry  --Pippin   
##  [5] Gimli  --Legolas   Gandalf--Saruman   Gandalf--Théoden   Faramir--Frodo    
##  [9] Aragorn--Gandalf   Aragorn--Legolas   Aragorn--Gimli     Gandalf--Treebeard
## [13] Merry  --Treebeard Faramir--Sam       Gandalf--Legolas   Gandalf--Pippin   
## [17] Pippin --Treebeard Gandalf--Gimli     Aragorn--Éomer     Boromir--Frodo    
## [21] Gandalf--Shadowfax

Check that the attributes were added to the nodes

vertex.attributes(G_book2)
## $name
##  [1] "Aragorn"   "Boromir"   "Éomer"     "Faramir"   "Frodo"     "Gandalf"  
##  [7] "Gimli"     "Gollum"    "Legolas"   "Merry"     "Pippin"    "Sam"      
## [13] "Saruman"   "Shadowfax" "Théoden"   "Treebeard"
## 
## $type
##  [1] "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per"
## [13] "per" "per" "per" "per"
## 
## $subtype
##  [1] "men"    "men"    "men"    "men"    "hobbit" "ainur"  "dwarf"  "hobbit"
##  [9] "elves"  "hobbit" "hobbit" "hobbit" "ainur"  "animal" "men"    "ents"  
## 
## $gender
##  [1] "male" "male" "male" "male" "male" "male" "male" "male" "male" "male"
## [11] "male" "male" "male" "male" "male" "male"
## 
## $volume
##  [1] "Book 2" "Book 2" "Book 2" "Book 2" "Book 2" "Book 2" "Book 2" "Book 2"
##  [9] "Book 2" "Book 2" "Book 2" "Book 2" "Book 2" "Book 2" "Book 2" "Book 2"
## 
## $title
##  [1] "The Two Towers" "The Two Towers" "The Two Towers" "The Two Towers"
##  [5] "The Two Towers" "The Two Towers" "The Two Towers" "The Two Towers"
##  [9] "The Two Towers" "The Two Towers" "The Two Towers" "The Two Towers"
## [13] "The Two Towers" "The Two Towers" "The Two Towers" "The Two Towers"
## 
## $publication_date
##  [1] "1954-11-11" "1954-11-11" "1954-11-11" "1954-11-11" "1954-11-11"
##  [6] "1954-11-11" "1954-11-11" "1954-11-11" "1954-11-11" "1954-11-11"
## [11] "1954-11-11" "1954-11-11" "1954-11-11" "1954-11-11" "1954-11-11"
## [16] "1954-11-11"

Check that the undirected edges have weights

E(G_book2)
## + 21/21 edges from bb1f645 (vertex names):
##  [1] Frodo  --Sam       Gollum --Sam       Frodo  --Gollum    Merry  --Pippin   
##  [5] Gimli  --Legolas   Gandalf--Saruman   Gandalf--Théoden   Faramir--Frodo    
##  [9] Aragorn--Gandalf   Aragorn--Legolas   Aragorn--Gimli     Gandalf--Treebeard
## [13] Merry  --Treebeard Faramir--Sam       Gandalf--Legolas   Gandalf--Pippin   
## [17] Pippin --Treebeard Gandalf--Gimli     Aragorn--Éomer     Boromir--Frodo    
## [21] Gandalf--Shadowfax
edge.attributes(G_book2)
## $weight
##  [1] 158 101  99  51  50  49  46  41  38  34  28  26  25  24  23  23  23  22  21
## [20]  21  21

Identify the largest connected component

book2_largest_cc <- clusters(G_book2)$membership == 1

G_book2_cc <- induced_subgraph(G_book2, book2_largest_cc)
G_book2_cc
## IGRAPH 1059ae9 UNWB 11 15 -- 
## + attr: name (v/c), type (v/c), subtype (v/c), gender (v/c), volume
## | (v/c), title (v/c), publication_date (v/c), weight (e/n)
## + edges from 1059ae9 (vertex names):
##  [1] Merry  --Pippin    Gimli  --Legolas   Gandalf--Saruman   Gandalf--Théoden  
##  [5] Aragorn--Gandalf   Aragorn--Legolas   Aragorn--Gimli     Gandalf--Treebeard
##  [9] Merry  --Treebeard Gandalf--Legolas   Gandalf--Pippin    Pippin --Treebeard
## [13] Gandalf--Gimli     Aragorn--Éomer     Gandalf--Shadowfax

Add centrality measures to nodes in the largest connected component

V(G_book2_cc)$degree_centrality <- degree(G_book2_cc,
                                          normalized = TRUE)

# weights = E(G_book2_cc, directed = FALSE)$weight,
V(G_book2_cc)$clustering_centrality <- transitivity(G_book2_cc,
                                                    type="local",
                                                    vids = NULL, # V(G_book1_cc),
                                                    weights = NA,
                                                    isolates = 'zero')

V(G_book2_cc)$closeness_centrality <- closeness(G_book2_cc,
                                                weights = NA,
                                                normalized = TRUE)

V(G_book2_cc)$betweenness_centrality <- betweenness(G_book2_cc,
                                                    directed = FALSE,
                                                    weights = NA,
                                                    normalized = TRUE)

V(G_book2_cc)$eigenvector_centrality <- eigen_centrality(G_book2_cc,
                                                         directed = FALSE,
                                                         weights = NA,
                                                         scale = FALSE)[["vector"]]

V(G_book2_cc)$pagerank_centrality <- page_rank(G_book2_cc, 
                                               algo = "prpack",
                                               directed = FALSE,
                                               damping = 0.9,
                                               weights = NA)[["vector"]]

vertex.attributes(G_book2_cc)
## $name
##  [1] "Aragorn"   "Éomer"     "Gandalf"   "Gimli"     "Legolas"   "Merry"    
##  [7] "Pippin"    "Saruman"   "Shadowfax" "Théoden"   "Treebeard"
## 
## $type
##  [1] "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per"
## 
## $subtype
##  [1] "men"    "men"    "ainur"  "dwarf"  "elves"  "hobbit" "hobbit" "ainur" 
##  [9] "animal" "men"    "ents"  
## 
## $gender
##  [1] "male" "male" "male" "male" "male" "male" "male" "male" "male" "male"
## [11] "male"
## 
## $volume
##  [1] "Book 2" "Book 2" "Book 2" "Book 2" "Book 2" "Book 2" "Book 2" "Book 2"
##  [9] "Book 2" "Book 2" "Book 2"
## 
## $title
##  [1] "The Two Towers" "The Two Towers" "The Two Towers" "The Two Towers"
##  [5] "The Two Towers" "The Two Towers" "The Two Towers" "The Two Towers"
##  [9] "The Two Towers" "The Two Towers" "The Two Towers"
## 
## $publication_date
##  [1] "1954-11-11" "1954-11-11" "1954-11-11" "1954-11-11" "1954-11-11"
##  [6] "1954-11-11" "1954-11-11" "1954-11-11" "1954-11-11" "1954-11-11"
## [11] "1954-11-11"
## 
## $degree_centrality
##  [1] 0.4 0.1 0.8 0.3 0.3 0.2 0.3 0.1 0.1 0.1 0.3
## 
## $clustering_centrality
##  [1] 0.5000000 0.0000000 0.1428571 1.0000000 1.0000000 1.0000000 0.6666667
##  [8] 0.0000000 0.0000000 0.0000000 0.6666667
## 
## $closeness_centrality
##  [1] 0.5882353 0.3846154 0.8333333 0.5555556 0.5555556 0.3846154 0.5555556
##  [8] 0.4761905 0.4761905 0.4761905 0.5555556
## 
## $betweenness_centrality
##  [1] 0.20000000 0.00000000 0.80000000 0.00000000 0.00000000 0.00000000
##  [7] 0.08888889 0.00000000 0.00000000 0.00000000 0.08888889
## 
## $eigenvector_centrality
##  [1] 0.3832493 0.1044969 0.5793725 0.3608614 0.3608614 0.1488724 0.2729997
##  [8] 0.1579719 0.1579719 0.1579719 0.2729997
## 
## $pagerank_centrality
##  [1] 0.13076738 0.03851357 0.25847412 0.09655987 0.09655987 0.06805857
##  [7] 0.09827943 0.03816925 0.03816925 0.03816925 0.09827943

Convert G_book2 into JSON

G_book2_json <- d3r::d3_igraph(as.undirected(G_book2_cc))
G_book2_json
## {"nodes":[{"name":"Aragorn","type":"per","subtype":"men","gender":"male","volume":"Book 2","title":"The Two Towers","publication_date":"1954-11-11","degree_centrality":0.4,"clustering_centrality":0.5,"closeness_centrality":0.5882,"betweenness_centrality":0.2,"eigenvector_centrality":0.3832,"pagerank_centrality":0.1308,"id":"Aragorn","_row":"Aragorn"},{"name":"Éomer","type":"per","subtype":"men","gender":"male","volume":"Book 2","title":"The Two Towers","publication_date":"1954-11-11","degree_centrality":0.1,"clustering_centrality":0,"closeness_centrality":0.3846,"betweenness_centrality":0,"eigenvector_centrality":0.1045,"pagerank_centrality":0.0385,"id":"Éomer","_row":"Éomer"},{"name":"Gandalf","type":"per","subtype":"ainur","gender":"male","volume":"Book 2","title":"The Two Towers","publication_date":"1954-11-11","degree_centrality":0.8,"clustering_centrality":0.1429,"closeness_centrality":0.8333,"betweenness_centrality":0.8,"eigenvector_centrality":0.5794,"pagerank_centrality":0.2585,"id":"Gandalf","_row":"Gandalf"},{"name":"Gimli","type":"per","subtype":"dwarf","gender":"male","volume":"Book 2","title":"The Two Towers","publication_date":"1954-11-11","degree_centrality":0.3,"clustering_centrality":1,"closeness_centrality":0.5556,"betweenness_centrality":0,"eigenvector_centrality":0.3609,"pagerank_centrality":0.0966,"id":"Gimli","_row":"Gimli"},{"name":"Legolas","type":"per","subtype":"elves","gender":"male","volume":"Book 2","title":"The Two Towers","publication_date":"1954-11-11","degree_centrality":0.3,"clustering_centrality":1,"closeness_centrality":0.5556,"betweenness_centrality":0,"eigenvector_centrality":0.3609,"pagerank_centrality":0.0966,"id":"Legolas","_row":"Legolas"},{"name":"Merry","type":"per","subtype":"hobbit","gender":"male","volume":"Book 2","title":"The Two Towers","publication_date":"1954-11-11","degree_centrality":0.2,"clustering_centrality":1,"closeness_centrality":0.3846,"betweenness_centrality":0,"eigenvector_centrality":0.1489,"pagerank_centrality":0.0681,"id":"Merry","_row":"Merry"},{"name":"Pippin","type":"per","subtype":"hobbit","gender":"male","volume":"Book 2","title":"The Two Towers","publication_date":"1954-11-11","degree_centrality":0.3,"clustering_centrality":0.6667,"closeness_centrality":0.5556,"betweenness_centrality":0.0889,"eigenvector_centrality":0.273,"pagerank_centrality":0.0983,"id":"Pippin","_row":"Pippin"},{"name":"Saruman","type":"per","subtype":"ainur","gender":"male","volume":"Book 2","title":"The Two Towers","publication_date":"1954-11-11","degree_centrality":0.1,"clustering_centrality":0,"closeness_centrality":0.4762,"betweenness_centrality":0,"eigenvector_centrality":0.158,"pagerank_centrality":0.0382,"id":"Saruman","_row":"Saruman"},{"name":"Shadowfax","type":"per","subtype":"animal","gender":"male","volume":"Book 2","title":"The Two Towers","publication_date":"1954-11-11","degree_centrality":0.1,"clustering_centrality":0,"closeness_centrality":0.4762,"betweenness_centrality":0,"eigenvector_centrality":0.158,"pagerank_centrality":0.0382,"id":"Shadowfax","_row":"Shadowfax"},{"name":"Théoden","type":"per","subtype":"men","gender":"male","volume":"Book 2","title":"The Two Towers","publication_date":"1954-11-11","degree_centrality":0.1,"clustering_centrality":0,"closeness_centrality":0.4762,"betweenness_centrality":0,"eigenvector_centrality":0.158,"pagerank_centrality":0.0382,"id":"Théoden","_row":"Théoden"},{"name":"Treebeard","type":"per","subtype":"ents","gender":"male","volume":"Book 2","title":"The Two Towers","publication_date":"1954-11-11","degree_centrality":0.3,"clustering_centrality":0.6667,"closeness_centrality":0.5556,"betweenness_centrality":0.0889,"eigenvector_centrality":0.273,"pagerank_centrality":0.0983,"id":"Treebeard","_row":"Treebeard"}],"links":[{"source":"Merry","target":"Pippin","weight":51},{"source":"Gimli","target":"Legolas","weight":50},{"source":"Gandalf","target":"Saruman","weight":49},{"source":"Gandalf","target":"Théoden","weight":46},{"source":"Aragorn","target":"Gandalf","weight":38},{"source":"Aragorn","target":"Legolas","weight":34},{"source":"Aragorn","target":"Gimli","weight":28},{"source":"Gandalf","target":"Treebeard","weight":26},{"source":"Merry","target":"Treebeard","weight":25},{"source":"Gandalf","target":"Legolas","weight":23},{"source":"Gandalf","target":"Pippin","weight":23},{"source":"Pippin","target":"Treebeard","weight":23},{"source":"Gandalf","target":"Gimli","weight":22},{"source":"Aragorn","target":"Éomer","weight":21},{"source":"Gandalf","target":"Shadowfax","weight":21}],"attributes":{}}
# jsonlite::minify(G_book2_json)
# jsonlite::prettify(G_book2_json, indent = 4)

Write G_book2 to JSON

write(G_book2_json, file = "data/LotR_Book2_from_R.json")

Create a network graph for LotR Book 3

book3_df <- sqldf::sqldf("
    SELECT 
      sour.id AS source_id, sour.label as source_name, sour.type AS source_type, sour.subtype AS source_subtype, sour.gender AS source_gender,
      dest.id AS target_id, dest.label AS target_name, dest.type AS target_type, dest.subtype AS target_subtype, dest.gender AS target_gender,
      conn.weight, conn.volume, conn.title, conn.publication_date
    FROM 
      book3 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id

    UNION 

    SELECT 
      dest.id AS source_id, dest.label as source_name, dest.type AS source_type, dest.subtype AS source_subtype, dest.gender AS source_gender,
      sour.id AS target_id, sour.label AS target_name, sour.type AS target_type, sour.subtype AS target_subtype, sour.gender AS target_gender,
      conn.weight, conn.volume, conn.title, conn.publication_date
    FROM 
      book3 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id"
)

book3_df %>% 
  dplyr::mutate("weight" = as.double(weight), 
                "publication_date" = as.character(publication_date)) %>%
  dplyr::arrange(desc(weight)) %>% 
  head(10)
##    source_id source_name source_type source_subtype source_gender target_id
## 1       frod       Frodo         per         hobbit          male      sams
## 2       sams         Sam         per         hobbit          male      frod
## 3      ganda     Gandalf         per          ainur          male      pipp
## 4       pipp      Pippin         per         hobbit          male     ganda
## 5       merr       Merry         per         hobbit          male      pipp
## 6       pipp      Pippin         per         hobbit          male      merr
## 7       arag     Aragorn         per            men          male     ganda
## 8       dene    Denethor         per            men          male     ganda
## 9       dene    Denethor         per            men          male      pipp
## 10     ganda     Gandalf         per          ainur          male      arag
##    target_name target_type target_subtype target_gender weight volume
## 1          Sam         per         hobbit          male    193 Book 3
## 2        Frodo         per         hobbit          male    193 Book 3
## 3       Pippin         per         hobbit          male     79 Book 3
## 4      Gandalf         per          ainur          male     79 Book 3
## 5       Pippin         per         hobbit          male     45 Book 3
## 6        Merry         per         hobbit          male     45 Book 3
## 7      Gandalf         per          ainur          male     34 Book 3
## 8      Gandalf         per          ainur          male     34 Book 3
## 9       Pippin         per         hobbit          male     34 Book 3
## 10     Aragorn         per            men          male     34 Book 3
##                     title publication_date
## 1  The Return of the King       1955-10-20
## 2  The Return of the King       1955-10-20
## 3  The Return of the King       1955-10-20
## 4  The Return of the King       1955-10-20
## 5  The Return of the King       1955-10-20
## 6  The Return of the King       1955-10-20
## 7  The Return of the King       1955-10-20
## 8  The Return of the King       1955-10-20
## 9  The Return of the King       1955-10-20
## 10 The Return of the King       1955-10-20
book3_edges <- sqldf::sqldf("
    SELECT 
      sour.label as source_name, sour.type as source_type, 
      dest.label AS target_name, dest.type AS target_type, 
      conn.weight
    FROM 
      book3 conn 
      JOIN ontology sour
      ON
        conn.source = sour.id
      JOIN ontology dest
      ON  
        conn.target = dest.id"
) %>% 
  dplyr::filter(source_type == "per", target_type == "per", weight > 20) %>%
  dplyr::select(source_name, target_name, weight) %>%
  dplyr::rename(from = source_name, to = target_name)

book3_nodes <- book3_df %>% 
  dplyr::filter(source_type == "per", target_type == "per", weight > 20) %>%
  dplyr::select(source_name, source_type, source_subtype, source_gender, volume, title, publication_date) %>% 
  dplyr::mutate(publication_date = as.character(publication_date)) %>%
  dplyr::rename(name = source_name, type = source_type, subtype = source_subtype, gender = source_gender) %>%
  dplyr::distinct()

G_book3 <- graph_from_data_frame(book3_edges, directed = FALSE, vertices = book3_nodes)

#print_all(G_book3)
G_book3
## IGRAPH b035c85 UNWB 13 19 -- 
## + attr: name (v/c), type (v/c), subtype (v/c), gender (v/c), volume
## | (v/c), title (v/c), publication_date (v/c), weight (e/n)
## + edges from b035c85 (vertex names):
##  [1] Frodo   --Sam     Gandalf --Pippin  Merry   --Pippin  Aragorn --Gandalf
##  [5] Denethor--Gandalf Denethor--Pippin  Gimli   --Legolas Aragorn --Gimli  
##  [9] Beregond--Pippin  Frodo   --Pippin  Aragorn --Legolas Aragorn --Éomer  
## [13] Faramir --Gandalf Faramir --Pippin  Frodo   --Gandalf Aragorn --Merry  
## [17] Denethor--Faramir Éowyn   --Merry   Pippin  --Sam

Check that the attributes were added to the nodes

vertex.attributes(G_book3)
## $name
##  [1] "Aragorn"  "Beregond" "Denethor" "Éomer"    "Éowyn"    "Faramir" 
##  [7] "Frodo"    "Gandalf"  "Gimli"    "Legolas"  "Merry"    "Pippin"  
## [13] "Sam"     
## 
## $type
##  [1] "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per"
## [13] "per"
## 
## $subtype
##  [1] "men"    "men"    "men"    "men"    "men"    "men"    "hobbit" "ainur" 
##  [9] "dwarf"  "elves"  "hobbit" "hobbit" "hobbit"
## 
## $gender
##  [1] "male"   "male"   "male"   "male"   "female" "male"   "male"   "male"  
##  [9] "male"   "male"   "male"   "male"   "male"  
## 
## $volume
##  [1] "Book 3" "Book 3" "Book 3" "Book 3" "Book 3" "Book 3" "Book 3" "Book 3"
##  [9] "Book 3" "Book 3" "Book 3" "Book 3" "Book 3"
## 
## $title
##  [1] "The Return of the King" "The Return of the King" "The Return of the King"
##  [4] "The Return of the King" "The Return of the King" "The Return of the King"
##  [7] "The Return of the King" "The Return of the King" "The Return of the King"
## [10] "The Return of the King" "The Return of the King" "The Return of the King"
## [13] "The Return of the King"
## 
## $publication_date
##  [1] "1955-10-20" "1955-10-20" "1955-10-20" "1955-10-20" "1955-10-20"
##  [6] "1955-10-20" "1955-10-20" "1955-10-20" "1955-10-20" "1955-10-20"
## [11] "1955-10-20" "1955-10-20" "1955-10-20"

Check that the undirected edges have weights

E(G_book3)
## + 19/19 edges from b035c85 (vertex names):
##  [1] Frodo   --Sam     Gandalf --Pippin  Merry   --Pippin  Aragorn --Gandalf
##  [5] Denethor--Gandalf Denethor--Pippin  Gimli   --Legolas Aragorn --Gimli  
##  [9] Beregond--Pippin  Frodo   --Pippin  Aragorn --Legolas Aragorn --Éomer  
## [13] Faramir --Gandalf Faramir --Pippin  Frodo   --Gandalf Aragorn --Merry  
## [17] Denethor--Faramir Éowyn   --Merry   Pippin  --Sam
edge.attributes(G_book3)
## $weight
##  [1] 193  79  45  34  34  34  32  28  27  27  26  24  24  24  24  21  21  21  21

Identify the largest connected component

book3_largest_cc <- clusters(G_book3)$membership == 1

G_book3_cc <- induced_subgraph(G_book3, book3_largest_cc)
G_book3_cc
## IGRAPH 5f72b48 UNWB 13 19 -- 
## + attr: name (v/c), type (v/c), subtype (v/c), gender (v/c), volume
## | (v/c), title (v/c), publication_date (v/c), weight (e/n)
## + edges from 5f72b48 (vertex names):
##  [1] Frodo   --Sam     Gandalf --Pippin  Merry   --Pippin  Aragorn --Gandalf
##  [5] Denethor--Gandalf Denethor--Pippin  Gimli   --Legolas Aragorn --Gimli  
##  [9] Beregond--Pippin  Frodo   --Pippin  Aragorn --Legolas Aragorn --Éomer  
## [13] Faramir --Gandalf Faramir --Pippin  Frodo   --Gandalf Aragorn --Merry  
## [17] Denethor--Faramir Éowyn   --Merry   Pippin  --Sam

Add centrality measures to nodes in the largest connected component

V(G_book3_cc)$degree_centrality <- degree(G_book3_cc,
                                          normalized = TRUE)

# weights = E(G_book3_cc, directed = FALSE)$weight,
V(G_book3_cc)$clustering_centrality <- transitivity(G_book3_cc,
                                                    type="local",
                                                    vids = NULL, # V(G_book1_cc),
                                                    weights = NA,
                                                    isolates = 'zero')

V(G_book3_cc)$closeness_centrality <- closeness(G_book3_cc,
                                                weights = NA,
                                                normalized = TRUE)

V(G_book3_cc)$betweenness_centrality <- betweenness(G_book3_cc,
                                                    directed = FALSE,
                                                    weights = NA,
                                                    normalized = TRUE)

V(G_book3_cc)$eigenvector_centrality <- eigen_centrality(G_book3_cc,
                                                         directed = FALSE,
                                                         weights = NA,
                                                         scale = FALSE)[["vector"]]

V(G_book3_cc)$pagerank_centrality <- page_rank(G_book3_cc, 
                                               algo = "prpack",
                                               directed = FALSE,
                                               damping = 0.9,
                                               weights = NA)[["vector"]]

vertex.attributes(G_book3_cc)
## $name
##  [1] "Aragorn"  "Beregond" "Denethor" "Éomer"    "Éowyn"    "Faramir" 
##  [7] "Frodo"    "Gandalf"  "Gimli"    "Legolas"  "Merry"    "Pippin"  
## [13] "Sam"     
## 
## $type
##  [1] "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per" "per"
## [13] "per"
## 
## $subtype
##  [1] "men"    "men"    "men"    "men"    "men"    "men"    "hobbit" "ainur" 
##  [9] "dwarf"  "elves"  "hobbit" "hobbit" "hobbit"
## 
## $gender
##  [1] "male"   "male"   "male"   "male"   "female" "male"   "male"   "male"  
##  [9] "male"   "male"   "male"   "male"   "male"  
## 
## $volume
##  [1] "Book 3" "Book 3" "Book 3" "Book 3" "Book 3" "Book 3" "Book 3" "Book 3"
##  [9] "Book 3" "Book 3" "Book 3" "Book 3" "Book 3"
## 
## $title
##  [1] "The Return of the King" "The Return of the King" "The Return of the King"
##  [4] "The Return of the King" "The Return of the King" "The Return of the King"
##  [7] "The Return of the King" "The Return of the King" "The Return of the King"
## [10] "The Return of the King" "The Return of the King" "The Return of the King"
## [13] "The Return of the King"
## 
## $publication_date
##  [1] "1955-10-20" "1955-10-20" "1955-10-20" "1955-10-20" "1955-10-20"
##  [6] "1955-10-20" "1955-10-20" "1955-10-20" "1955-10-20" "1955-10-20"
## [11] "1955-10-20" "1955-10-20" "1955-10-20"
## 
## $degree_centrality
##  [1] 0.41666667 0.08333333 0.25000000 0.08333333 0.08333333 0.25000000
##  [7] 0.25000000 0.41666667 0.16666667 0.16666667 0.25000000 0.58333333
## [13] 0.16666667
## 
## $clustering_centrality
##  [1] 0.1000000 0.0000000 1.0000000 0.0000000 0.0000000 1.0000000 0.6666667
##  [8] 0.4000000 1.0000000 1.0000000 0.0000000 0.2380952 1.0000000
## 
## $closeness_centrality
##  [1] 0.5714286 0.3870968 0.4800000 0.3750000 0.3750000 0.4800000 0.4800000
##  [8] 0.6000000 0.3870968 0.3870968 0.5714286 0.6000000 0.4000000
## 
## $betweenness_centrality
##  [1] 0.45454545 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
##  [7] 0.02777778 0.29797980 0.00000000 0.00000000 0.24747475 0.39646465
## [13] 0.00000000
## 
## $eigenvector_centrality
##  [1] 0.23082225 0.13631009 0.34419409 0.05996801 0.05468441 0.34419409
##  [7] 0.31118983 0.45597013 0.08101613 0.08101613 0.21048519 0.52466977
## [13] 0.21715772
## 
## $pagerank_centrality
##  [1] 0.13646966 0.02976743 0.07365198 0.03225685 0.03286561 0.07365198
##  [7] 0.07508870 0.12104976 0.05864881 0.05864881 0.08391102 0.17169536
## [13] 0.05229404

Convert G_book3 into JSON

G_book3_json <- d3r::d3_igraph(as.undirected(G_book3_cc))
G_book3_json
## {"nodes":[{"name":"Aragorn","type":"per","subtype":"men","gender":"male","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.4167,"clustering_centrality":0.1,"closeness_centrality":0.5714,"betweenness_centrality":0.4545,"eigenvector_centrality":0.2308,"pagerank_centrality":0.1365,"id":"Aragorn","_row":"Aragorn"},{"name":"Beregond","type":"per","subtype":"men","gender":"male","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.0833,"clustering_centrality":0,"closeness_centrality":0.3871,"betweenness_centrality":0,"eigenvector_centrality":0.1363,"pagerank_centrality":0.0298,"id":"Beregond","_row":"Beregond"},{"name":"Denethor","type":"per","subtype":"men","gender":"male","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.25,"clustering_centrality":1,"closeness_centrality":0.48,"betweenness_centrality":0,"eigenvector_centrality":0.3442,"pagerank_centrality":0.0737,"id":"Denethor","_row":"Denethor"},{"name":"Éomer","type":"per","subtype":"men","gender":"male","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.0833,"clustering_centrality":0,"closeness_centrality":0.375,"betweenness_centrality":0,"eigenvector_centrality":0.06,"pagerank_centrality":0.0323,"id":"Éomer","_row":"Éomer"},{"name":"Éowyn","type":"per","subtype":"men","gender":"female","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.0833,"clustering_centrality":0,"closeness_centrality":0.375,"betweenness_centrality":0,"eigenvector_centrality":0.0547,"pagerank_centrality":0.0329,"id":"Éowyn","_row":"Éowyn"},{"name":"Faramir","type":"per","subtype":"men","gender":"male","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.25,"clustering_centrality":1,"closeness_centrality":0.48,"betweenness_centrality":0,"eigenvector_centrality":0.3442,"pagerank_centrality":0.0737,"id":"Faramir","_row":"Faramir"},{"name":"Frodo","type":"per","subtype":"hobbit","gender":"male","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.25,"clustering_centrality":0.6667,"closeness_centrality":0.48,"betweenness_centrality":0.0278,"eigenvector_centrality":0.3112,"pagerank_centrality":0.0751,"id":"Frodo","_row":"Frodo"},{"name":"Gandalf","type":"per","subtype":"ainur","gender":"male","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.4167,"clustering_centrality":0.4,"closeness_centrality":0.6,"betweenness_centrality":0.298,"eigenvector_centrality":0.456,"pagerank_centrality":0.121,"id":"Gandalf","_row":"Gandalf"},{"name":"Gimli","type":"per","subtype":"dwarf","gender":"male","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.1667,"clustering_centrality":1,"closeness_centrality":0.3871,"betweenness_centrality":0,"eigenvector_centrality":0.081,"pagerank_centrality":0.0586,"id":"Gimli","_row":"Gimli"},{"name":"Legolas","type":"per","subtype":"elves","gender":"male","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.1667,"clustering_centrality":1,"closeness_centrality":0.3871,"betweenness_centrality":0,"eigenvector_centrality":0.081,"pagerank_centrality":0.0586,"id":"Legolas","_row":"Legolas"},{"name":"Merry","type":"per","subtype":"hobbit","gender":"male","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.25,"clustering_centrality":0,"closeness_centrality":0.5714,"betweenness_centrality":0.2475,"eigenvector_centrality":0.2105,"pagerank_centrality":0.0839,"id":"Merry","_row":"Merry"},{"name":"Pippin","type":"per","subtype":"hobbit","gender":"male","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.5833,"clustering_centrality":0.2381,"closeness_centrality":0.6,"betweenness_centrality":0.3965,"eigenvector_centrality":0.5247,"pagerank_centrality":0.1717,"id":"Pippin","_row":"Pippin"},{"name":"Sam","type":"per","subtype":"hobbit","gender":"male","volume":"Book 3","title":"The Return of the King","publication_date":"1955-10-20","degree_centrality":0.1667,"clustering_centrality":1,"closeness_centrality":0.4,"betweenness_centrality":0,"eigenvector_centrality":0.2172,"pagerank_centrality":0.0523,"id":"Sam","_row":"Sam"}],"links":[{"source":"Frodo","target":"Sam","weight":193},{"source":"Gandalf","target":"Pippin","weight":79},{"source":"Merry","target":"Pippin","weight":45},{"source":"Aragorn","target":"Gandalf","weight":34},{"source":"Denethor","target":"Gandalf","weight":34},{"source":"Denethor","target":"Pippin","weight":34},{"source":"Gimli","target":"Legolas","weight":32},{"source":"Aragorn","target":"Gimli","weight":28},{"source":"Beregond","target":"Pippin","weight":27},{"source":"Frodo","target":"Pippin","weight":27},{"source":"Aragorn","target":"Legolas","weight":26},{"source":"Aragorn","target":"Éomer","weight":24},{"source":"Faramir","target":"Gandalf","weight":24},{"source":"Faramir","target":"Pippin","weight":24},{"source":"Frodo","target":"Gandalf","weight":24},{"source":"Aragorn","target":"Merry","weight":21},{"source":"Denethor","target":"Faramir","weight":21},{"source":"Éowyn","target":"Merry","weight":21},{"source":"Pippin","target":"Sam","weight":21}],"attributes":{}}
# jsonlite::minify(G_book3_json)
# jsonlite::prettify(G_book3_json, indent = 4)

Write G_book3 to JSON

write(G_book3_json, file = "data/LotR_Book3_from_R.json")

References

  1. Filippo Menczer, Santo Fortunato, and Clayton Davis. A First Course in Network Science. Cambridge University Press, 2020.

  2. Eric Kolaczyk and Gabor Csardi. Statistical Analysis of Network Data with R, 2nd Edition. Springer, 2020.

  3. Mark Newman. Networks, 2nd Edition. Oxford University Press, 2018.

  4. Matthew Jackson. Social and Economic Networks. Princeton University Press, 2008.

  5. Matthew Jackson. The Human Network: How Your Social Position Determines Your Power, Beliefs, and Behaviors. Vintage Books, 2020.

  6. David Easley and Jon Kleinberg. Networks, Crowds, and Markets: Reasoning about a Highly Connected World. Cambridge University Press, 2010.