1. Installation of required package

Command of checking whether the stated packages are installed, if haven’t installed, then it will proceed for installation. However, if installed, then it will skip that particular package.

packages = c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse', 'ggrepel')

for(p in packages){library
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}
## Loading required package: igraph
## Warning: package 'igraph' was built under R version 3.5.3
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
## Loading required package: tidygraph
## Warning: package 'tidygraph' was built under R version 3.5.3
## 
## Attaching package: 'tidygraph'
## The following object is masked from 'package:igraph':
## 
##     groups
## The following object is masked from 'package:stats':
## 
##     filter
## Loading required package: ggraph
## Warning: package 'ggraph' was built under R version 3.5.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
## Loading required package: visNetwork
## Warning: package 'visNetwork' was built under R version 3.5.3
## Loading required package: lubridate
## Warning: package 'lubridate' was built under R version 3.5.3
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:igraph':
## 
##     %--%
## The following object is masked from 'package:base':
## 
##     date
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v tibble  2.1.3     v purrr   0.3.3
## v tidyr   1.0.0     v dplyr   0.8.3
## v readr   1.3.1     v stringr 1.4.0
## v tibble  2.1.3     v forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::%--%()        masks igraph::%--%()
## x lubridate::as.difftime() masks base::as.difftime()
## x dplyr::as_data_frame()   masks tibble::as_data_frame(), igraph::as_data_frame()
## x purrr::compose()         masks igraph::compose()
## x tidyr::crossing()        masks igraph::crossing()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks tidygraph::filter(), stats::filter()
## x dplyr::groups()          masks tidygraph::groups(), igraph::groups()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x lubridate::setdiff()     masks base::setdiff()
## x purrr::simplify()        masks igraph::simplify()
## x lubridate::union()       masks igraph::union(), base::union()
## Loading required package: ggrepel
## Warning: package 'ggrepel' was built under R version 3.5.3

2. Data Wrangling

2.1 Reading csv files needed

In this step, we will read the .csv files and assign it into respecttive variable of GAStech_nodes and GAStech_edges.

GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
## Parsed with column specification:
## cols(
##   id = col_double(),
##   label = col_character(),
##   Department = col_character(),
##   Title = col_character()
## )
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
## Parsed with column specification:
## cols(
##   source = col_double(),
##   target = col_double(),
##   SentDate = col_character(),
##   SentTime = col_time(format = ""),
##   Subject = col_character(),
##   MainSubject = col_character(),
##   sourceLabel = col_character(),
##   targetLabel = col_character()
## )

2.2 Transform into Date type and get the Weekday

GAStech_edges$SentDate  = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)

2.3 Aggregate the Edges

GAStech_edges will be aggregated with the result shown in source, target that will be used to show the direction of the edges when plotting the graph. While for Weekday will show the interaction on which day and regarding on how often the interaction will be shown on weight.

GAStech_edges_aggregated <- GAStech_edges %>%
  filter(MainSubject == "Work related") %>%
  group_by(source, target, Weekday) %>%
    summarise(Weight = n()) %>%
  filter(source!=target) %>%
  filter(Weight > 1) %>%
  ungroup()
GAStech_edges_aggregated
## # A tibble: 1,456 x 4
##    source target Weekday   Weight
##     <dbl>  <dbl> <ord>      <int>
##  1      1      2 Monday         4
##  2      1      2 Tuesday        3
##  3      1      2 Wednesday      5
##  4      1      2 Friday         8
##  5      1      3 Monday         4
##  6      1      3 Tuesday        3
##  7      1      3 Wednesday      5
##  8      1      3 Friday         8
##  9      1      4 Monday         4
## 10      1      4 Tuesday        3
## # ... with 1,446 more rows

2.4 Build network object

Network object will be built by using the tbl_graph. tbl_graph will be created from nodes and edges data.

GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
GAStech_graph
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Node Data: 54 x 4 (active)
##      id label               Department     Title                                
##   <dbl> <chr>               <chr>          <chr>                                
## 1     1 Mat.Bramar          Administration Assistant to CEO                     
## 2     2 Anda.Ribera         Administration Assistant to CFO                     
## 3     3 Rachel.Pantanal     Administration Assistant to CIO                     
## 4     4 Linda.Lagos         Administration Assistant to COO                     
## 5     5 Ruscella.Mies.Haber Administration Assistant to Engineering Group Manag~
## 6     6 Carla.Forluniau     Administration Assistant to IT Group Manager        
## # ... with 48 more rows
## #
## # Edge Data: 1,456 x 4
##    from    to Weekday   Weight
##   <int> <int> <ord>      <int>
## 1     1     2 Monday         4
## 2     1     2 Tuesday        3
## 3     1     2 Wednesday      5
## # ... with 1,453 more rows

Here the edges data will be activated and arrange it weight in descending order.

GAStech_graph %>%
  activate(edges) %>%
  arrange(desc(Weight))
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Edge Data: 1,456 x 4 (active)
##    from    to Weekday Weight
##   <int> <int> <ord>    <int>
## 1    40    41 Tuesday     23
## 2    40    43 Tuesday     19
## 3    41    43 Tuesday     15
## 4    41    40 Tuesday     14
## 5    42    41 Tuesday     13
## 6    42    40 Tuesday     12
## # ... with 1,450 more rows
## #
## # Node Data: 54 x 4
##      id label           Department     Title           
##   <dbl> <chr>           <chr>          <chr>           
## 1     1 Mat.Bramar      Administration Assistant to CEO
## 2     2 Anda.Ribera     Administration Assistant to CFO
## 3     3 Rachel.Pantanal Administration Assistant to CIO
## # ... with 51 more rows

3. Task 1: Static Organisation Graph

3.1 Initial Graph

This will be the initial graph that need to improve on.

g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness()) %>%
  mutate(closeness_centrality = centrality_closeness()) %>%
  ggraph(layout = "nicely") + 
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))

g + theme_graph() + labs(title = "Centrality Indices (Initial Graph)")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

3.2 Aspect that be improved on Initial Graph

After viewing the initial graph, there are three aspects of the graph visualization that can be improved.

  • Edges link (geom_edge_link) that connecting each node has the same weight and are too thick.
    • Edge link supposed to show the relationship / interaction between each node (Employee) and the weight can represent as how often they are interacting with each other. However, with all similar weight and thickness being applied, result in overlapping of the edge link. Hence, it’s hard to spot the relationship among nodes and also hard to identify on how frequent the interaction between each node.
  • Unable to identify the identity of each node due to no labelling.
    • From the initial graph, as noticed that there are 4 nodes located at the center that represent high closeness_centrality and betweenness_centrality but without any proper labelling we are unable to indentify the identity of the nodes. Initial exploration of the alternative design by placing labels on the centre 4 nodes so that we can know which node (employee) interact with most people. Another final alternative is creating facet by weekday and department which can be used to find out the most significant node (employee) at each department and also can observe the network pattern per department on each day (weekday).
  • Colour of node (geom_node_point) cannot differentiate the difference of closeness_centrality and also the choice of blue node that make it hard to read along with black edge link.
    • From the initial graph, as observed that the difference in closeness_centrality only represent in different shades of blue which is very hard to diffentiate it and on top of that, the lower the closeness_centrality, the darker the shade of blue which has similar shade with the black edge link. These could cause confusion as well as hard to spot the node. Hence, we should implement two colours to represent the high and low value of closeness_centrality.

3.3 Skecth of Alternative Design

Sketch of Task 1

Sketch of Task 1

3.4 Exploration of alternative graph

g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness()) %>%
  mutate(closeness_centrality = centrality_closeness()) %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(width = Weight), colour= "black", alpha=0.2) +
  scale_edge_width(range = c(0.1, 5))+
  geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality)) +
  scale_color_gradient(low = "#850101", high = "#0c818e")

g + theme_graph() +
  geom_label_repel(aes(x=ifelse(closeness_centrality > 0.015, x, 0), 
                      y=ifelse(closeness_centrality > 0.015, y, 0),
                      label=ifelse(closeness_centrality > 0.015, label, "")),
                     fontface = 'bold', color = 'black',
                     box.padding = 0.80, point.padding = 0.5,
                     na.rm=TRUE) +
  labs(title = "Centrality Indices (Exploration of Graph)")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

From this initial exploration of graph, we could know the identity of node (Employee) that has high closeness_centrality and betweenness_Centrality; however, there still have room for improvement as we cannot know further information about the node or even Department. Hence, another alternative design could be done through facet by Department and Weekday.

3.5 Graph of Alternative Design

g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness()) %>%
  mutate(closeness_centrality = centrality_closeness()) %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(width = Weight), colour= "black", alpha=0.4) +
  scale_edge_width(aes(width = Weight), range = c(0.1, 7))+
  geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality), alpha=0.5) +
  scale_color_gradient(low = "#850101", high = "#0c818e")

g + facet_graph(Weekday ~ Department, labeller = label_wrap_gen(width=10)) +
  geom_text_repel(aes(x=ifelse(closeness_centrality > 0.015, x, 0), 
                      y=ifelse(closeness_centrality > 0.015, y, 0),
                      label=ifelse(closeness_centrality > 0.015, label, "")),
                  fontface = 'bold', color = 'black',
                  size = 3,
                  box.padding = 0.80, point.padding = 0.6,
                  segment.colour = "yellow",
                  na.rm=TRUE) +
  theme(strip.text.x = element_text(size = 12, color = "black", face = "bold.italic"),
        strip.text.y = element_text(size = 12, color = "black", face = "bold.italic")) +
  labs(title = "Static Organization Graph Makeover by Weekday & Department")

4. Task 2: Interactive Organization Graph

4.1 Data preparation

GAStech_edges_aggregated_visNetwork <- GAStech_edges %>%
  left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
  rename(from = id) %>%
  left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
  rename(to = id) %>%
  filter(MainSubject == "Work related") %>%
  group_by(from, to) %>%
    summarise(weight = n()) %>%
  filter(from!=to) %>%
  filter(weight > 1) %>%
  ungroup()

GAStech_nodes_visNetwork <- GAStech_nodes

GAStech_nodes_visNetwork <- GAStech_nodes_visNetwork %>%
  rename(group = Department)

4.2 Initial Graph

visNetwork(GAStech_nodes_visNetwork, GAStech_edges_aggregated_visNetwork,
           main = "") %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE)

4.3 Aspect that be improved on Initial Graph

After viewing the initial graph, there are three aspects of the graph visualization that can be improved.

  • There is no label to indicate the identity of each node (Employee) and also no hover info is provided regarding the relevant information of each node.
    • From the initial graph, we could not identify the identity of node simply by looking at the graph unless we need to zoom in to locate the label provided. In this case, we need to make the node to be clearer so that we can spot the identity of node without zooming in. Another point is when we hover over the node, there are no any special information will be provided which will hinder us to have chance to understand more about the graph representation. Hence, label of node and hover info of relevant information should be added for clear information display.
  • There are no indicator that shows what the colour represent.
    • We can observe that each colour represent by group (Department), however, just by looking at the graph, we could not identify which colour that each department belongs to, this will hinder user to know more about the graph as well.
  • When node is selected, only those unrelated nodes will be grey-shaded.
    • When node is selected, only those unrelated nodes will be grey-shaded, while the edge link still maintain the colour which will give confusion in aesthetic wise. And it’s not really helping user to clearly see the relationship of the node selected. Hence, I think that not only the node should be grey-shaded, but the edge link (Line) should be grey-shaded as well.

4.4 Sketch of Alternative Design

Sketch of Task 2

Sketch of Task 2

4.5 Customize the hover info and colour of the nodes

This section is to set the customize colour of nodes as well as the legend colour. As noticed, when another dropdown is provided (select by Department), and for department that being selected, the highlighted nodes’ colour show different colour as compared to the colour of Legend. Hence, in order to have a consistent nodes’ colour with legend’s colour shown, customize colour was set.

uniqueLabel = unique(GAStech_nodes_visNetwork$group)
legend_node <- data.frame(label = uniqueLabel, color=c("#C33C23", "#77DF79", "#799FCB", "#FFB447", "#B29DD9", "#FCE2C2"))

GAStech_nodes_visNetwork$title <- paste0(GAStech_nodes_visNetwork$label, "<br> Department: ",
                                         GAStech_nodes_visNetwork$group,
                                         "<br> Title: ", GAStech_nodes_visNetwork$Title)
GAStech_nodes_visNetwork$color <- plyr::mapvalues(GAStech_nodes_visNetwork$group, from = legend_node$label, to = levels(legend_node$color))

uniquecolor = unique(GAStech_nodes_visNetwork$color)
legend_nodes <- data.frame(label = unique(GAStech_nodes_visNetwork$group), color= uniquecolor)

4.6 Graph of Alternative Design (Interactive Network Graph)

visNetwork(GAStech_nodes_visNetwork, GAStech_edges_aggregated_visNetwork, 
           main = "Interactive Network Graph Makeover")%>%
  # Optimization
  visIgraphLayout(layout = "layout_with_fr") %>%
  visEdges(smooth = FALSE) %>%
  visPhysics(stabilization = FALSE) %>%
  # Customization
  visInteraction(dragNodes = TRUE, dragView = FALSE, zoomView = TRUE) %>%
  visNodes(labelHighlightBold = TRUE, font = list(size=30), shape="box") %>%
           #scaling = list(label=list(Threshold=30, maxVisible=60))) %>%
  visOptions(highlightNearest = list(enabled = TRUE, hover = FALSE, algorithm = "hierarchical"), 
             nodesIdSelection = list(enabled = TRUE, main = "Employee"),
             selectedBy = list(variable="group", main="Department")) %>%
  visLegend(width=0.2, position="right", zoom=FALSE, addNodes = legend_nodes, useGroups = FALSE)