Wei Ming
23 November 2019

Preparation

1. Installing and Launching R Packages

packages = c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')

for(p in packages){library
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}
p <- c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
lapply(p, require, character.only = TRUE)
## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] TRUE
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] TRUE
## 
## [[5]]
## [1] TRUE
## 
## [[6]]
## [1] TRUE

2. Data Wrangling

2.1 Importing Network Data

GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
glimpse(GAStech_edges)
## Observations: 9,063
## Variables: 8
## $ source      <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 26...
## $ target      <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, 27...
## $ SentDate    <chr> "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014", "6...
## $ SentTime    <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:00,...
## $ Subject     <chr> "GT-SeismicProcessorPro Bug Report", "GT-SeismicPr...
## $ MainSubject <chr> "Work related", "Work related", "Work related", "W...
## $ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "Ka...
## $ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "Hi...

2.2 Wrangling Time

GAStech_edges$SentDate  = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)

2.3 Wrangling Attributes

GAStech_edges_aggregated <- GAStech_edges %>%
  filter(MainSubject == "Work related") %>%
  group_by(source, target, Weekday) %>%
    summarise(Weight = n()) %>%
  filter(source!=target) %>%
  filter(Weight > 1) %>%
  ungroup()
GAStech_edges_aggregated
## # A tibble: 1,456 x 4
##    source target Weekday   Weight
##     <dbl>  <dbl> <ord>      <int>
##  1      1      2 Monday         4
##  2      1      2 Tuesday        3
##  3      1      2 Wednesday      5
##  4      1      2 Friday         8
##  5      1      3 Monday         4
##  6      1      3 Tuesday        3
##  7      1      3 Wednesday      5
##  8      1      3 Friday         8
##  9      1      4 Monday         4
## 10      1      4 Tuesday        3
## # ... with 1,446 more rows

2.4 Creating Network Objects

GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
GAStech_graph
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Node Data: 54 x 4 (active)
##      id label              Department    Title                             
##   <dbl> <chr>              <chr>         <chr>                             
## 1     1 Mat.Bramar         Administrati~ Assistant to CEO                  
## 2     2 Anda.Ribera        Administrati~ Assistant to CFO                  
## 3     3 Rachel.Pantanal    Administrati~ Assistant to CIO                  
## 4     4 Linda.Lagos        Administrati~ Assistant to COO                  
## 5     5 Ruscella.Mies.Hab~ Administrati~ Assistant to Engineering Group Ma~
## 6     6 Carla.Forluniau    Administrati~ Assistant to IT Group Manager     
## # ... with 48 more rows
## #
## # Edge Data: 1,456 x 4
##    from    to Weekday   Weight
##   <int> <int> <ord>      <int>
## 1     1     2 Monday         4
## 2     1     2 Tuesday        3
## 3     1     2 Wednesday      5
## # ... with 1,453 more rows

Task 1: Static Organisation Graph

Original Code

g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness()) %>%
  mutate(closeness_centrality = centrality_closeness()) %>%
  ggraph(layout = "nicely") + 
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))

g + theme_graph()

1. Improve the code chunk used to create the organisation network graph by using the latest functions provided in ggraph2.0.

Improvements:
1. Original data frame can be directly passed in as an input. A network object (tbl_graph) is not necessarily needed.
2. ‘centrality_closeness()’ and ‘centrality_betweenness()’ functions can be used in aesthetic mappings.

graph1 <- 
  ggraph(GAStech_edges_aggregated, layout = "nicely") + 
  geom_edge_link() + 
  geom_node_point(aes(colour = centrality_closeness(),size = centrality_betweenness()))

graph1 + theme_graph()

2. Identify three aspects of the graph visualisation in Section 6.1 that can be improved.

  1. The nodes and edges are not clear enough because of the color and the intensive lines.
  2. The weight of the edges is not reflected in this graph.
  3. There is no label for people to identify difference nodes.
  4. There is no title for this graph.

3. Provide the sketch of your alternative design.

4. Using appropriate ggraph functions, plot the alternative design.

  1. The edges and nodes are easier to differentiate with rearranged layout.
  2. Different colors represent different department.
  3. The frequency(weight) is shown by the transparency of the edges.
  4. The betweenness centrality and closeness centrality are represented by the size and transparency of the nodes.
  5. The nodes are labeled with names.
  6. A title is added for the new design.
GAStech_graph <- GAStech_graph %>%
  mutate(Betweenness_Centrality = centrality_betweenness()) %>%
  mutate(Closeness_Centrality = centrality_closeness())

graph2 <- ggraph(GAStech_graph, layout = "circle") + 
  geom_node_point(aes(colour = Department, size = Betweenness_Centrality, alpha = Closeness_Centrality)) +
  geom_edge_link(aes(alpha = Weight), width = 0.3) +
  scale_edge_alpha(range = c(0.2, 0.8)) +
  geom_node_text(aes(label = label), repel = TRUE, size = 2, alpha = 0.8, color = 'brown') +
  ggtitle("Task1: Email Network Visualization")

graph2 + theme_graph()

Task 2: Interactive Organisation Graph

1. Improve the design of the graph

Data Preparation
GAStech_edges_aggregated <- GAStech_edges %>%
  left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
  rename(from = id) %>%
  left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
  rename(to = id) %>%
  filter(MainSubject == "Work related") %>%
  group_by(from, to) %>%
    summarise(weight = n()) %>%
  filter(from!=to) %>%
  filter(weight > 1) %>%
  ungroup()

GAStech_nodes <- GAStech_nodes %>%
  rename(group = Department)
Plot the Graph
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visNodes(label=GAStech_nodes$id, shape = "circle") %>%
  visOptions(highlightNearest = list(enabled = TRUE, labelOnly = FALSE), nodesIdSelection = TRUE)

2. Identify three aspects of the graph visualisation in Section 7.4 that can be improved.

  1. The frequency and direction of connection is not reflected in the previous design.
  2. There is no title and legend to clarify the graph.
  3. The job title is not shown in the previous design.
  4. The label is outside the node in original design, which may be hard to see or overlapping with other labels.

3. Provide the sketch of your alternative design.

4. Using appropriate visNetwork functions, plot the alternative design.

  1. Direction and frequency are shown by arrows and weight of the edge in the new design.
  2. Label are included in the node shape, making it clearer to view.
  3. A legend and a title is added to the graph.
  4. Tooltips of job title is shown when mouse is above.
GAStech_nodes <- GAStech_nodes %>%
  rename(title = Title)

visNetwork(GAStech_nodes, GAStech_edges_aggregated, main = "Task2: Email Network Visualization") %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visNodes(label=GAStech_nodes$id, shape = "box", title = "Title") %>%
  visEdges(arrows = 'to') %>%
  visOptions(highlightNearest = list(enabled = TRUE, algorithm='hierarchical', labelOnly = FALSE), nodesIdSelection = TRUE) %>%
  visLegend(main = "Department", width = 0.2, position = "right", zoom = FALSE)