Network visualisations

Jason Ola

2023-01-24

Import the libraries

library(tidyverse)
library(tidygraph)
library(readxl)
library(ggraph)

Load data

Let’s first load our edges and nodes data with readxl, you can find the Excel file here

edges <- read_excel("data/data.xlsx", 
    sheet = "edges")
nodes <- read_excel("data/data.xlsx", 
    sheet = "nodes")

Now we combine that data into a graph format

graph <- tbl_graph(
  nodes = nodes,
  edges = edges
)

Plots

Let’s plot the graph architecture

graph %>% 
  ggraph(layout = "igraph", algorithm = "kk")+
  geom_edge_fan()+
  geom_node_point()+
  theme_graph()

We now add the labels to our graph, we use repel to see more names, we mutate a new column first name and extract all that is before the space

graph %N>% 
  mutate(first_name = str_extract(name, "[^ ]+") %>% str_trim()) %>% 
  ggraph(layout = "kk")+
  geom_edge_fan()+
  geom_node_label(aes(label = first_name))+
  theme_graph()

Let’s first see who has the most edges, first we count the top four id with number of edges with a pivot longer

top4 <- graph %E>%
  as_tibble() %>% 
  pivot_longer(c(from,to),values_to = "id") %>% 
  count(id) %>% 
  slice_max(n,n = 4, with_ties = TRUE) %>% 
  pull(id)

Then we create a column with highlight to flag our top4 ids

graph_hl <- graph %E>%
  mutate(highlight = if_else(from %in% top4 | to %in% top4, 1, 0))
graph_hl <- graph_hl %N>% 
  mutate(highlight = if_else(id %in% top4, 1, 0)) 

Finally we plot the results

graph_hl %>% 
  ggraph(layout = "igraph", algorithm = "kk") +
  geom_edge_fan(aes(alpha = highlight),
                show.legend = FALSE)+
  geom_node_point(alpha = 0.1)+
  theme_graph()

top4names <- graph_hl %N>% 
  filter(highlight == 1) %>% 
  pull(name) %>% 
  glue::glue_collapse(", ", last = " and ")
top4counts <- graph %E>%
  as_tibble() %>% 
  pivot_longer(c(from,to),values_to = "id") %>% 
  count(id) %>% 
  slice_max(n,n = 4) %>% 
  arrange(id) %>% 
  pull(n) %>% 
  glue::glue_collapse(", ", last = " and ")

The 4 names with the most connections are : Rain Lemmi, Nyambura Fischer, Woo-Jin Tosetti and Silvestre Molina, with respectively 16, 10, 12 and 17 connections

Let’s now add our names and other point shapes

graph_hl %>% 
  ggraph(layout = "igraph", algorithm = "kk") +
  geom_edge_fan(aes(alpha = highlight),
                show.legend = FALSE)+
  geom_node_point(alpha = 0.2,
                  shape = "*",
                  size = 7)+
  geom_node_label(aes(label = if_else(highlight == 1, name, NULL)),
                  repel = TRUE)+
  theme_graph()

Now let’s color the edges and nodes by these 4 central figures, we need to mutate both tables to have color column with desired color

graph_hl <- graph_hl %>% 
  mutate(color = case_when(id == 34 ~ "darkblue",
                           id == 1 ~ "darkred",
                           id == 33 ~ "darkgreen",
                           id == 3 ~ "purple",
                           TRUE ~ "gray"))
graph_hl <- graph_hl %E>%
  mutate(color = case_when(from == 34 | to == 34 ~ "darkblue",
                           from == 1 | to == 1 ~ "darkred",
                           from == 33 | to == 33 ~ "darkgreen",
                           from == 3 | to == 3 ~ "purple",
                           TRUE ~ "gray"))

We color the nodes and edges in the plot with scale color identity

graph_hl %>% 
  ggraph(layout = "igraph", algorithm = "kk") +
  geom_edge_fan(aes(color = color),
                show.legend = FALSE)+
  geom_node_point(shape = "*",
                  size = 7,
                  mapping = aes(color = color))+
  geom_node_label(aes(label = if_else(highlight == 1, name, NULL)),
                  repel = TRUE)+
  scale_edge_color_identity (breaks = graph_hl$color)+
  scale_color_identity (breaks = graph_hl$color)+
  theme_graph()

Now we can polish our graph

graph_hl %>% 
  ggraph(layout = "igraph", algorithm = "kk") +
  geom_edge_fan(aes(color = color,
                    alpha = highlight),
                width = 0.3,
                show.legend = FALSE)+
  geom_node_point(shape = 16,
                  size = 2,
                  alpha = 0.5,
                  mapping = aes(color = color))+
  geom_node_label(aes(label = if_else(highlight == 1, name, NULL)),
                  repel = TRUE,
                  size = 3,
                  label.padding = 0.15,
                  label.size = 0.1)+
  scale_edge_color_identity (breaks = graph_hl$color)+
  scale_color_identity (breaks = graph_hl$color)+
  theme_graph()+
  labs(caption = "Visualisation of an undirected network of social ties in a course classroom")