library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
packages = c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse','ggrepel')

for(p in packages){library
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
## Loading required package: tidygraph
## 
## Attaching package: 'tidygraph'
## The following object is masked from 'package:igraph':
## 
##     groups
## The following object is masked from 'package:stats':
## 
##     filter
## Loading required package: ggraph
## Loading required package: ggplot2
## Loading required package: visNetwork
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:igraph':
## 
##     %--%
## The following object is masked from 'package:base':
## 
##     date
## Loading required package: tidyverse
## -- Attaching packages ---------------------------------------------------------------- tidyverse 1.2.1 --
## v tibble  2.1.3     v purrr   0.3.2
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------- tidyverse_conflicts() --
## x lubridate::%--%()        masks igraph::%--%()
## x lubridate::as.difftime() masks base::as.difftime()
## x tibble::as_data_frame()  masks igraph::as_data_frame(), dplyr::as_data_frame()
## x purrr::compose()         masks igraph::compose()
## x tidyr::crossing()        masks igraph::crossing()
## x lubridate::date()        masks base::date()
## x tidygraph::filter()      masks dplyr::filter(), stats::filter()
## x tidygraph::groups()      masks igraph::groups(), dplyr::groups()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x lubridate::setdiff()     masks base::setdiff()
## x purrr::simplify()        masks igraph::simplify()
## x lubridate::union()       masks igraph::union(), base::union()
## Loading required package: ggrepel
p <- c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse','ggrepel')
lapply(p, require, character.only = TRUE)
## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] TRUE
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] TRUE
## 
## [[5]]
## [1] TRUE
## 
## [[6]]
## [1] TRUE
## 
## [[7]]
## [1] TRUE
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
## Parsed with column specification:
## cols(
##   id = col_double(),
##   label = col_character(),
##   Department = col_character(),
##   Title = col_character()
## )
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
## Parsed with column specification:
## cols(
##   source = col_double(),
##   target = col_double(),
##   SentDate = col_character(),
##   SentTime = col_time(format = ""),
##   Subject = col_character(),
##   MainSubject = col_character(),
##   sourceLabel = col_character(),
##   targetLabel = col_character()
## )
GAStech_edges$SentDate  = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)

GAStech_edges_aggregated <- GAStech_edges %>%
  filter(MainSubject == "Work related") %>%
  group_by(source, target, Weekday) %>%
    summarise(Weight = n()) %>%
  filter(source!=target) %>%
  filter(Weight > 1) %>%
  ungroup()
GAStech_edges_aggregated
## # A tibble: 1,456 x 4
##    source target Weekday   Weight
##     <dbl>  <dbl> <ord>      <int>
##  1      1      2 Monday         4
##  2      1      2 Tuesday        3
##  3      1      2 Wednesday      5
##  4      1      2 Friday         8
##  5      1      3 Monday         4
##  6      1      3 Tuesday        3
##  7      1      3 Wednesday      5
##  8      1      3 Friday         8
##  9      1      4 Monday         4
## 10      1      4 Tuesday        3
## # ... with 1,446 more rows
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
GAStech_graph
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Node Data: 54 x 4 (active)
##      id label              Department    Title                             
##   <dbl> <chr>              <chr>         <chr>                             
## 1     1 Mat.Bramar         Administrati~ Assistant to CEO                  
## 2     2 Anda.Ribera        Administrati~ Assistant to CFO                  
## 3     3 Rachel.Pantanal    Administrati~ Assistant to CIO                  
## 4     4 Linda.Lagos        Administrati~ Assistant to COO                  
## 5     5 Ruscella.Mies.Hab~ Administrati~ Assistant to Engineering Group Ma~
## 6     6 Carla.Forluniau    Administrati~ Assistant to IT Group Manager     
## # ... with 48 more rows
## #
## # Edge Data: 1,456 x 4
##    from    to Weekday   Weight
##   <int> <int> <ord>      <int>
## 1     1     2 Monday         4
## 2     1     2 Tuesday        3
## 3     1     2 Wednesday      5
## # ... with 1,453 more rows

Task 1

Original Graph

g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness()) %>%
  mutate(closeness_centrality = centrality_closeness()) %>%
  ggraph(layout = "nicely") + 
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))

g + theme_graph()
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

Areas for improvement

  1. The range of colour scheme chosen to represent the node “closeness_centrality”, which is a continuous variable, is not the best colour scheme. The colour for the lower range of the node, which is below 0.010, has visibility issues because these nodes overlap with the black coloured edges, thus making it difficult to distinguish these nodes from the edges.

  2. It is rather difficult to distinguish a node from another due to the relatively thick edges

  3. As there was no facet used in this visualisation, the information gained from the visualisation is not very insightful, since the information is reflective of the entire data without any categorisation to differentiate the data belonging to different categories such as Weekday or Department.

Sketch of alternative Design

Alternative Design - Recommendation 1 & 2 Implemented

g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness()) %>%
  mutate(closeness_centrality = centrality_closeness()) %>%
  ggraph(layout = "nicely") + 
  geom_edge_link(aes(width=Weight), alpha=0.1, show.legend = FALSE) +
  scale_edge_width(aes(width = Weight), range = c(0.1, 7)) +
  geom_node_point(aes(colour = closeness_centrality,alpha=0.3,   size=betweenness_centrality))+
  scale_color_gradient(low='purple', high='light pink')

g + theme_graph() +labs(title = "Centrality Indices") +
  geom_text_repel(aes(x=ifelse(closeness_centrality > 0.015, x, 0),
                      y=ifelse(closeness_centrality > 0.015, y, 0),
                      label=ifelse(closeness_centrality > 0.015, label,"")),
                  fontface = 'bold', color = 'black',
                  size = 3,
                  box.padding = 1, point.padding = 0.8,
                  segment.colour = "white",
                  na.rm=TRUE) +
  theme(strip.text.x = element_text(size = 8, color = "black", face = "bold"),
        strip.text.y = element_text(size = 8, color = "black", face = "bold"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

Alternative Design - Recommendation 3 Implemented

g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness()) %>%
  mutate(closeness_centrality = centrality_closeness()) %>%
  ggraph(layout = "nicely") + 
  geom_edge_link(aes(width=Weight), alpha=0.5,show.legend = FALSE) +
  scale_edge_width(aes(width = Weight), range = c(0.1, 10)) +
  geom_node_point(aes(colour = closeness_centrality,alpha=0.5,
                      size=betweenness_centrality))+
  scale_color_gradient(low='purple', high='light pink')

g + facet_graph(Weekday ~ Department, labeller = label_wrap_gen(width=10)) + theme_graph() +
  labs(title = "Centrality Indices") +
  geom_text_repel(aes(x=ifelse(closeness_centrality > 0.015, x, 0),
                      y=ifelse(closeness_centrality > 0.015, y, 0),
                      label=ifelse(closeness_centrality > 0.015, label,"")),
                  color = 'black',size = 1, box.padding = 1, 
                  point.padding = 0.8, segment.colour = "white",na.rm=TRUE)+
  theme(strip.text.x = element_text(size = 5, color = "black"),
        strip.text.y = element_text(size = 5, color = "black"))
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

Task 2

Original Graph

GAStech_edges_aggregated <- GAStech_edges %>%
  left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
  rename(from = id) %>%
  left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
  rename(to = id) %>%
  filter(MainSubject == "Work related") %>%
  group_by(from, to) %>%
    summarise(weight = n()) %>%
  filter(from!=to) %>%
  filter(weight > 1) %>%
  ungroup()

GAStech_nodes <- GAStech_nodes %>%
  rename(group = Department)

visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") 

Areas for improvement

  1. There’s no legend to show how the nodes are coloured according to their respective departments. The title is missing as well.

  2. The edges do not show any direction so there could be some information not being presented to the user when he/she views it.

  3. There is no label on each node unless the visualisation is zoomed in.

Sketch of alternative Design

Alternative Design

visNetwork(GAStech_nodes, GAStech_edges_aggregated, 
           main = "Improved Interactive Network Graph")%>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visEdges(arrows = "to") %>%
  visNodes(font = list(size=40),color="black") %>%
  visOptions(
    highlightNearest = list(enabled = TRUE, degree = 1, hover = FALSE),
    nodesIdSelection = list(enabled = TRUE, main = "Select by Name",
                            values = unique(GAStech_nodes$id))
    ) %>%
  visLegend(main = "Department")