Author: Goh Jia Xian
Date: 22-Nov-2019

Setting Up the Environment

Preparing R Packages for Both Tasks

packages = c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')

for(p in packages){library
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}

Importing Network Data

GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")

Processing the Data

#Wrangling Time
GAStech_edges$SentDate  = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)

# Wrangling Attributes
GAStech_edges_aggregated <- GAStech_edges %>%
  filter(MainSubject == "Work related") %>%
  group_by(source, target, Weekday) %>%
    summarise(Weight = n()) %>%
  filter(source!=target) %>%
  filter(Weight > 1) %>%
  ungroup()

Task 1

Task 1 Part 1: Improve the orginal code

Qns: Improve the code chunk used to create the organisation network graph by using the latest functions provided in ggraph2.0

<34>Before: Section 6.1 of Hands-on Exercise 10
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)

g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness()) %>%
  mutate(closeness_centrality = centrality_closeness()) %>%
  ggraph(layout = "nicely") + 
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = closeness_centrality, 
                      size=betweenness_centrality))

g + theme_graph()

After: Improvement Made

Changes made:
1. Does not require to create a tbl_graph object as GGraph2’s internals can be based on tidygraph. The inputs will be automatically be transformed into a tbl_graph object.
2. Mutate functions are not necessary as the functions can be called in ‘colour’ and ‘size’.

ggraph(GAStech_edges_aggregated, layout = 'nicely') +
  geom_edge_link() +  
  geom_node_point(aes(colour = centrality_closeness(), 
                      size = centrality_betweenness())) +
  theme_graph()

Task 1 Part 2: Three aspects of improvement

Qns: Identify three aspects of the graph visualisation in Section 6.1 that can be improved.

Based on the graph plotted in part 1, it is difficult to retrieve any meaningful insights due poor design in the following aspects:

1. Network

  Problem: The graph in general looks disorganised and unncessarily complicated, it makes it diffcult
  for the readers to study the links between the nodes.
  
  Solution: Use a new layout to display the chart to prevent the edges from crossing and overlapping.

2. Nodes

  Problem: Unable to identify what each node represents as there are no labels indicating either name
  or group. Also, some of the nodes cannot be seen as they share the same colour as its edges. 
  
  Solution: Use labels to show nodes with high Betweenness Centrality and Closeness Centrality. 
  The nodes should be coloured according to its department.

3. Edges

Problem: Unable to derive the frequency of emails sent in the network as all edges used the same weight

Solution: Set the weight of the edges according to the frequency of emails sent between nodes. 

Task 1 Part 3: Alternative design

Qns: Provide the sketch of your alternative design.

Task 1 Part 4: Plot Alternate Design

Qns: Using appropriate ggraph functions, plot the alternative design

Preparing Graph

GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)

GAStech_graph %>%
  activate(edges) %>%
  arrange(desc(Weight))
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Edge Data: 1,456 x 4 (active)
##    from    to Weekday Weight
##   <int> <int> <ord>    <int>
## 1    40    41 Tuesday     23
## 2    40    43 Tuesday     19
## 3    41    43 Tuesday     15
## 4    41    40 Tuesday     14
## 5    42    41 Tuesday     13
## 6    42    40 Tuesday     12
## # ... with 1,450 more rows
## #
## # Node Data: 54 x 4
##      id label           Department     Title           
##   <dbl> <chr>           <chr>          <chr>           
## 1     1 Mat.Bramar      Administration Assistant to CEO
## 2     2 Anda.Ribera     Administration Assistant to CFO
## 3     3 Rachel.Pantanal Administration Assistant to CIO
## # ... with 51 more rows

Plotting the Graph

# 
GAStech_graph <- GAStech_graph %>%
  mutate(BetweennessCentrality = centrality_betweenness()) %>%
  mutate(ClosenessCentrality = centrality_closeness()) %>%
  mutate(ClosenessCentrality = ifelse(ClosenessCentrality >= 0.015, 'High (> 0.015)', 'Low (< 0.015)'))

# Plotting Graph
ggraph(GAStech_graph, layout = 'linear') + 
    geom_edge_arc(aes(width=Weight), 
                  alpha=0.15, 
                  strength = 0.5) +
    scale_edge_width(range = c(0.3, 5)) +
    geom_node_point(aes(colour = Department, 
                        size = BetweennessCentrality, 
                        shape = ClosenessCentrality, 
                        fill = Department)) +
    scale_shape_manual(values=c(23, 21)) +
    geom_node_label(aes(label=ifelse(BetweennessCentrality > 300 | ClosenessCentrality == 'High (> 0.015)',
                                     label, 
                                     NA)), 
                    repel = TRUE, 
                    alpha  = 0.5, 
                    size = 4) +
    theme_graph()

Task 2 Part 1: Improve the design of the graph

Data preparation for Interactive Graph

GAStech_edges_aggregated <- GAStech_edges %>%
  left_join(GAStech_nodes, 
            by = c("sourceLabel" = "label")) %>%
  rename(from = id) %>%
  left_join(GAStech_nodes, 
            by = c("targetLabel" = "label")) %>%
  rename(to = id) %>%
  filter(MainSubject == "Work related") %>%
  group_by(from, to) %>%
  summarise(weight = n()) %>%
  filter(from!=to) %>%
  filter(weight > 1) %>%
  ungroup()

GAStech_nodes <- GAStech_nodes %>%
  rename(group = Department)

Task 2 Part 1: Improve the design of the graph

Incorporating the following interactivity:

1. When a name is selected from the drop-down list, the corresponding node will not only be highlighted but also will be labelled. Furthermore, all the linked nodes of the selected node will also be labelled too.

2. When a node of the interactive graph is selected, the node will not only be highlighted but also will be labelled. Furthermore, all the linked nodes of the selected node will be labelled as well.

Before: Section 7.4 of Hands-on Exercise 10

visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visOptions(highlightNearest = list(enabled = TRUE, 
                                     labelOnly=TRUE), 
             nodesIdSelection=TRUE)

After: Interactive Graph Showing Highlighted Labels only

visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visOptions(highlightNearest = list(enabled = TRUE, 
                                     labelOnly=FALSE), 
             nodesIdSelection=TRUE) 

Task 2 Part 2: Identify three aspects of the graph visualisation in Section 7.4 that can be improved.

Based on the graph plotted in part 1, the following aspects should be improved :

1. Network The graph in general looks disorganised and unncessarily complicated, it makes it diffcult for the readers to study the links between the nodes.

2. Nodes Unable to identify what each node represents as there are no labels indicating either name or group. Also, some of the nodes cannot be seen as they share the same colour as its edges.

3. Edges Unable to derive the frequency of emails sent in the network as all edges used the same weight

Task 2 Part 4: Using appropriate visNetwork functions, plot the alternative design.

GAStech_nodes <- GAStech_nodes %>%
  mutate(label = str_replace(label,"[[:punct:]]"," ")) %>%
  rename(title = Title) %>%
  mutate(title = paste("Title: ", title))

GAStech_edges_aggregated <- GAStech_edges_aggregated %>%
  mutate(label = paste(weight))
  
visNetwork(GAStech_nodes, GAStech_edges_aggregated, main = "GASTech Email's Network Graph") %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visEdges(selectionWidth=7, arrows = "to") %>%
  visOptions(highlightNearest = list(enabled = TRUE, labelOnly=FALSE), 
             nodesIdSelection=TRUE, selectedBy = "group",
             width='100%',
             height='100%') %>%
  visInteraction(tooltipDelay = 0, 
                 tooltipStay = 60,
                 tooltipStyle='position: fixed;visibility:hidden;padding: 1px;font-size:12px;background-color: white;') %>%
  visNodes(font = list(size = 30), shape='ellipse') %>%
  visLegend(main = "Department", position='right', width=0.15)