1.0 Things to learn

  • create graph object data frames, manipulate them using appropriate functions of dplyr, lubridate, and tidygraph,
  • build network graph visualisation using appropriate functions of ggraph,
  • compute network geometrics using tidygraph,
  • build advanced graph visualisation by incorporating the network geometrics, and
  • build interactive network visualisation using visNetwork package.

2.0 Load Packages

packages = c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')

for(p in packages){library
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}

Make sure all required package is installed

p <- c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
lapply(p, require, character.only = TRUE)
## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] TRUE
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] TRUE
## 
## [[5]]
## [1] TRUE
## 
## [[6]]
## [1] TRUE

3.0 Data Wrangling

3.1 Load Data

setwd("D:/Materials of Study/Visual analytics/Hands-on_Ex11")
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
## Parsed with column specification:
## cols(
##   id = col_double(),
##   label = col_character(),
##   Department = col_character(),
##   Title = col_character()
## )
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
## Parsed with column specification:
## cols(
##   source = col_double(),
##   target = col_double(),
##   SentDate = col_character(),
##   SentTime = col_time(format = ""),
##   Subject = col_character(),
##   MainSubject = col_character(),
##   sourceLabel = col_character(),
##   targetLabel = col_character()
## )

Check the metadata

glimpse(GAStech_edges)
## Observations: 9,063
## Variables: 8
## $ source      <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 26, 26,...
## $ target      <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, 27, 28,...
## $ SentDate    <chr> "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014", "6/1/20...
## $ SentTime    <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:00, 08:5...
## $ Subject     <chr> "GT-SeismicProcessorPro Bug Report", "GT-SeismicProcess...
## $ MainSubject <chr> "Work related", "Work related", "Work related", "Work r...
## $ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "Kanon.H...
## $ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "Hideki....

Warning: SentDate is character, not SentTime

3.2 Wrangling time

Both dmy() and wday() are functions of lubridate package

GAStech_edges$SentDate  = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)

Check the transformation whether succeed.

glimpse(GAStech_edges)
## Observations: 9,063
## Variables: 9
## $ source      <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 26, 26,...
## $ target      <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, 27, 28,...
## $ SentDate    <date> 2014-01-06, 2014-01-06, 2014-01-06, 2014-01-06, 2014-0...
## $ SentTime    <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:00, 08:5...
## $ Subject     <chr> "GT-SeismicProcessorPro Bug Report", "GT-SeismicProcess...
## $ MainSubject <chr> "Work related", "Work related", "Work related", "Work r...
## $ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "Kanon.H...
## $ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "Hideki....
## $ Weekday     <ord> Monday, Monday, Monday, Monday, Monday, Monday, Monday,...

3.3 Wrangling attributes

Functions filter(), group(), summarise(), and ungroup() are from dylyr package

GAStech_edges_aggregated <- GAStech_edges %>%
  filter(MainSubject == "Work related") %>%
  group_by(source, target, Weekday) %>%
    summarise(Weight = n()) %>%
  filter(source!=target) %>%
  filter(Weight > 1) %>%
  ungroup()
GAStech_edges_aggregated
## # A tibble: 1,456 x 4
##    source target Weekday   Weight
##     <dbl>  <dbl> <ord>      <int>
##  1      1      2 Monday         4
##  2      1      2 Tuesday        3
##  3      1      2 Wednesday      5
##  4      1      2 Friday         8
##  5      1      3 Monday         4
##  6      1      3 Tuesday        3
##  7      1      3 Wednesday      5
##  8      1      3 Friday         8
##  9      1      4 Monday         4
## 10      1      4 Tuesday        3
## # ... with 1,446 more rows

3.4 Creating network objects using tidygraph

  • tbl_graph() creates a network object from nodes and edges data
  • as_tbl_graph() converts network data and objects to a tbl_graph network.
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
GAStech_graph
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Node Data: 54 x 4 (active)
##      id label               Department     Title                                
##   <dbl> <chr>               <chr>          <chr>                                
## 1     1 Mat.Bramar          Administration Assistant to CEO                     
## 2     2 Anda.Ribera         Administration Assistant to CFO                     
## 3     3 Rachel.Pantanal     Administration Assistant to CIO                     
## 4     4 Linda.Lagos         Administration Assistant to COO                     
## 5     5 Ruscella.Mies.Haber Administration Assistant to Engineering Group Manag~
## 6     6 Carla.Forluniau     Administration Assistant to IT Group Manager        
## # ... with 48 more rows
## #
## # Edge Data: 1,456 x 4
##    from    to Weekday   Weight
##   <int> <int> <ord>      <int>
## 1     1     2 Monday         4
## 2     1     2 Tuesday        3
## 3     1     2 Wednesday      5
## # ... with 1,453 more rows

4.0 Plotting Network Data with ggraph package

As in all network graph, there are three main aspects to a ggraph’s network graph, they are: nodes, edges and layouts

4.1 Plotting a basic network graph

ggraph(GAStech_graph) +
  geom_edge_link() +
  geom_node_point()
## Using `stress` as default layout

### 4.2 Changing the default network graph theme

The ggraph theme can be set for a series of plots with the set_graph_style() command run before the graphs are plotted or by using theme_graph() in the individual plots.

g <- ggraph(GAStech_graph) + 
  geom_edge_link(aes(colour = 'grey50')) +
  geom_node_point(aes(colour = 'grey40'))
## Using `stress` as default layout
g + theme_graph(background = 'grey10', text_colour = 'white')

4.3 Working with ggraph layouts

Some examples. * The default layout is nicely.

data <- matrix(sample(0:1, 400, replace=TRUE, prob=c(0.8,0.2)), nrow=20)
network <- graph_from_adjacency_matrix(data , mode='undirected', diag=F )
par(mfrow=c(2,2), mar=c(1,1,1,1))
plot(network, layout=layout.sphere, main="sphere")
plot(network, layout=layout.circle, main="circle")
plot(network, layout=layout.random, main="random")
plot(network, layout=layout.fruchterman.reingold, main="fruchterman.reingold")

g <- ggraph(GAStech_graph, layout = "circle") + 
  geom_edge_link(aes()) +
  geom_node_point(aes())

g + theme_graph()

### 4.4 Modifying network nodes

geom_node_point is equivalent in functionality to geom_point of ggplot2. It allows for simple plotting of nodes in different shapes, colours and sizes. (Also the position with x and y mapped)

g <- ggraph(GAStech_graph, layout = "nicely") + 
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = Department, size = 3))
g + theme_graph()

### 4.5 Modifying Edges

geom_edge_link draws edges with wited and transparency

g <- ggraph(GAStech_graph, layout = "nicely") + 
  geom_edge_link(aes(width=Weight), alpha=0.2) +
  scale_edge_width(range = c(0.1, 5)) +
  geom_node_point(aes(colour = Department), size = 3)

g + theme_graph()

5.0 Creating facet graphs

There are three functions in ggraph to implement facetting, they are: * facet_nodes() whereby edges are only draw in a panel if both terminal nodes are present here, * facet_edges() whereby nodes are always drawn in al panels even if the node data contains an attribute named the same as the one used for the edge facetting, and * facet_graph() facetting on two variables simultaneously.

5.1 Working with facet_edges()

A frameless facet graph

set_graph_style()

g <- ggraph(GAStech_graph) + 
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = Department))
## Using `stress` as default layout
g + facet_edges(~Weekday)

A framed facet graph

set_graph_style()

g <- ggraph(GAStech_graph) + 
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = Department))+
  theme(legend.position = "bottom")
## Using `stress` as default layout
g + facet_edges(~Weekday) +
  th_foreground(foreground = "grey80",  border = TRUE)

5.2 Working with facet_nodes()

set_graph_style()

g <- ggraph(GAStech_graph) + 
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = Department))
## Using `stress` as default layout
g + facet_nodes(~Department)+
  th_foreground(foreground = "grey80",  border = TRUE)

6.0 Network Metrics Analysis

6.1 Computing centrality indices

From ggraph ver 2.0.0 onwards, all centrality measures can be calculated within ggraph and do not need mutate

g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness()) %>%
  ggraph(layout = "nicely") + 
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = Department, size=betweenness_centrality))

g + theme_graph()

7.0 Building Interactive Network Graph with visNetwork

visNetwork is a R package for network visualization, using vis.js javascript library.

7.1 Building a basic interactive network graph

visNetwork() function uses a nodes list and edges list to create an interactive graph. The nodes list must include an “id” column, and the edge list must have “from” and “to” columns.

7.1.1

GAStech_edges_aggregated <- GAStech_edges %>%
  left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
  rename(from = id) %>%
  left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
  rename(to = id) %>%
  filter(MainSubject == "Work related") %>%
  group_by(from, to) %>%
    summarise(weight = n()) %>%
  filter(from!=to) %>%
  filter(weight > 1) %>%
  ungroup()

7.1.2 Plotting the first interactive network graph

visNetwork(GAStech_nodes, GAStech_edges_aggregated)

7.2 Working with layout

visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr")

7.3 Working with visual attributes

7.3.1 Nodes

The code chunk below rename Department field to group. (variable group is department)

GAStech_nodes <- GAStech_nodes %>%
  rename(group = Department)

The code chunk below shades the nodes by assigning unique colour to each category in the group field.

visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") 

7.3.2 Edges

visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visEdges(arrows = "to", smooth = list(enabled = TRUE, type = "curvedCW"))

7.4 Interactivity

visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE) %>%
  visPhysics(stabilization = FALSE)