Lim Pei Xuan

14 November 2019

Setting Up

Installing and Launching R Packages

packages = c('devtools','igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse','RColorBrewer')

for(p in packages){library
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}

install_github("datastorm-open/visNetwork")
p <- c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
lapply(p, require, character.only = TRUE)

Data Wrangling

Importing network data from files

GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
glimpse(GAStech_edges)
## Observations: 9,063
## Variables: 8
## $ source      <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 26, …
## $ target      <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, 27, …
## $ SentDate    <chr> "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014", "6/1…
## $ SentTime    <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:00, 0…
## $ Subject     <chr> "GT-SeismicProcessorPro Bug Report", "GT-SeismicProc…
## $ MainSubject <chr> "Work related", "Work related", "Work related", "Wor…
## $ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "Kano…
## $ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "Hide…

Wrangling Time

GAStech_edges$SentDate  = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)

Wrangling Attributes

GAStech_edges_aggregated <- GAStech_edges %>%
  filter(MainSubject == "Work related") %>%
  group_by(source, target, Weekday) %>%
    summarise(Weight = n()) %>%
  filter(source!=target) %>%
  filter(Weight > 1) %>%
  ungroup()

GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)

Task 1: Static Organization Chart

Original Code

GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness()) %>%
  mutate(closeness_centrality = centrality_closeness()) %>%
  ggraph(layout = "nicely") + 
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))

g + theme_graph()

Improved Code with ggraph2.0

Some Key Improvements:

  • No longer need to create tbl_graph as input to ggraph
  • tidygraph algorithms are available as inputs to aesthetic mappings
g2 <- ggraph(GAStech_edges_aggregated, layout = "nicely") + 
  geom_edge_link() + 
  geom_node_point(aes(colour = centrality_closeness(),size = centrality_betweenness()))
  
g2 + theme_graph()

Aspects for Improvement

Problem Proposed Solution
Can not see strength of relation between nodes Use Alpha to represent weight of edges
Unable to identify the nodes with highest centrality Provide labels for nodes with highest centrality
Overlapping of nodes Change the layout to ensure all nodes can be seen
No representation of department Include department with color

Proposed Sketch

Alternative Design

Join Dataframes

alt_data <- merge(x = GAStech_edges_aggregated, y = GAStech_nodes, by.x = "source", by.y = "id", all.x = TRUE)
glimpse(alt_data)
## Observations: 1,456
## Variables: 7
## $ source     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ target     <dbl> 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6,…
## $ Weekday    <ord> Monday, Tuesday, Wednesday, Friday, Monday, Tuesday, …
## $ Weight     <int> 4, 3, 5, 8, 4, 3, 5, 8, 4, 3, 5, 8, 4, 3, 5, 8, 4, 3,…
## $ label      <chr> "Mat.Bramar", "Mat.Bramar", "Mat.Bramar", "Mat.Bramar…
## $ Department <chr> "Administration", "Administration", "Administration",…
## $ Title      <chr> "Assistant to CEO", "Assistant to CEO", "Assistant to…

Prepare Graph

alt_graph <- tbl_graph(nodes = GAStech_nodes, edges = alt_data, directed = TRUE)
alt_graph <- alt_graph %>%
  mutate(`Betweenness Centrality` = centrality_betweenness()) %>%
  mutate(`Closeness Centrality` = centrality_closeness())

Plot Graph

alt_g <- ggraph(alt_graph, layout = "linear", circular = TRUE) +
  geom_edge_arc(aes(alpha = Weight), width = 0.5) + 
  geom_node_point(aes(alpha = `Closeness Centrality`,size = `Betweenness Centrality`,color=Department)) 

alt_g + theme_graph() +
  coord_fixed() + 
  geom_node_label(aes(label=ifelse(`Closeness Centrality` > 0.015 | `Betweenness Centrality` > 400, label, NA)), repel = TRUE, alpha = 0.7)

Task 2: Interactive Organization Graph

Data Preparation

GAStech_edges_aggregated <- GAStech_edges %>%
  left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
  rename(from = id) %>%
  left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
  rename(to = id) %>%
  filter(MainSubject == "Work related") %>%
  group_by(from, to) %>%
    summarise(weight = n()) %>%
  filter(from!=to) %>%
  filter(weight > 1) %>%
  ungroup()

GAStech_nodes <- GAStech_nodes %>%
  rename(group = Department)

Show only selected labels

When a node is selected via the dropdown or on click, all directly connected nodes and the corresponding edges will be highlighted, with the others greyed out. Only the highlight nodes will have labels.

visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visNodes(font = list(size = 30)) %>%
  visOptions(highlightNearest = list(enabled = TRUE, algorithm='hierarchical',degree = 1, labelOnly = FALSE), nodesIdSelection = TRUE)

Aspects for Improvement

Problem Proposed Solution
Can not see connectivity of entire department Include dropdown selector for department
Can not see strength or direction of relation between edges Include direction and weight in the edges
Nodes are coloured by department, but not identified Include Legend for departments
Job Titles not reflected Include Job Title in tooltip on hover
Labels outside of the node overlap with each other and the edges, makes it hard to read Change node shape to a box and include the label within

Proposed Sketch

Alternative Design

Prepare Data

Rename columns appropriately to make use of visNetwork’s capabilities

GAStech_edges_alt <- GAStech_edges_aggregated %>%
  rename(value = weight)

GAStech_nodes <- GAStech_nodes %>%
  rename(title = Title)

Plot Graph

visNetwork(GAStech_nodes, GAStech_edges_alt) %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visNodes(font = list(size = 30), shape = "box", title = "Title") %>%
  visEdges(arrows = 'to') %>%
  visOptions(nodesIdSelection = TRUE,selectedBy = list(variable = "group", highlight = TRUE),highlightNearest = list(enabled = TRUE, algorithm='hierarchical', labelOnly = FALSE)) %>%
  visLegend(width = 0.2, position = "right", main = "Department")