1.0 Overview
This data visualization makeover involves 2 tasks.
Task 1: Static Organisation Graph
Task 2: Interactive Organisation Graph
2.0 Installing and Launching R Package
2.1 Installing packages
packages = c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse', 'ggrepel', 'deldir')
for(p in packages){library
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
## Loading required package: igraph
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
## Loading required package: tidygraph
##
## Attaching package: 'tidygraph'
## The following object is masked from 'package:igraph':
##
## groups
## The following object is masked from 'package:stats':
##
## filter
## Loading required package: ggraph
## Loading required package: ggplot2
## Loading required package: visNetwork
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:igraph':
##
## %--%
## The following object is masked from 'package:base':
##
## date
## Loading required package: tidyverse
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v tibble 2.1.3 v purrr 0.3.2
## v tidyr 1.0.0 v dplyr 0.8.3
## v readr 1.3.1 v stringr 1.4.0
## v tibble 2.1.3 v forcats 0.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::%--%() masks igraph::%--%()
## x lubridate::as.difftime() masks base::as.difftime()
## x dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## x purrr::compose() masks igraph::compose()
## x tidyr::crossing() masks igraph::crossing()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks tidygraph::filter(), stats::filter()
## x dplyr::groups() masks tidygraph::groups(), igraph::groups()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x lubridate::setdiff() masks base::setdiff()
## x purrr::simplify() masks igraph::simplify()
## x lubridate::union() masks igraph::union(), base::union()
## Loading required package: ggrepel
## Loading required package: deldir
## deldir 0.1-23
2.2 Launch packages if packages are already installed
p <- c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse', 'ggrepel')
lapply(p, require, character.only = TRUE)
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] TRUE
##
## [[5]]
## [1] TRUE
##
## [[6]]
## [1] TRUE
##
## [[7]]
## [1] TRUE
3.0 Data Wrangling
3.1 Importing network data from files
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
## Parsed with column specification:
## cols(
## id = col_double(),
## label = col_character(),
## Department = col_character(),
## Title = col_character()
## )
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
## Parsed with column specification:
## cols(
## source = col_double(),
## target = col_double(),
## SentDate = col_character(),
## SentTime = col_time(format = ""),
## Subject = col_character(),
## MainSubject = col_character(),
## sourceLabel = col_character(),
## targetLabel = col_character()
## )
3.2 Wrangling time
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
3.3 Wrangling attributes
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source, target, Weekday) %>%
summarise(Weight = n()) %>%
filter(source!=target) %>%
filter(Weight > 1) %>%
ungroup()
GAStech_edges_aggregated
## # A tibble: 1,456 x 4
## source target Weekday Weight
## <dbl> <dbl> <ord> <int>
## 1 1 2 Monday 4
## 2 1 2 Tuesday 3
## 3 1 2 Wednesday 5
## 4 1 2 Friday 8
## 5 1 3 Monday 4
## 6 1 3 Tuesday 3
## 7 1 3 Wednesday 5
## 8 1 3 Friday 8
## 9 1 4 Monday 4
## 10 1 4 Tuesday 3
## # ... with 1,446 more rows
3.4 Creating network objects using tidygraph
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
GAStech_graph
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Node Data: 54 x 4 (active)
## id label Department Title
## <dbl> <chr> <chr> <chr>
## 1 1 Mat.Bramar Administrati~ Assistant to CEO
## 2 2 Anda.Ribera Administrati~ Assistant to CFO
## 3 3 Rachel.Pantanal Administrati~ Assistant to CIO
## 4 4 Linda.Lagos Administrati~ Assistant to COO
## 5 5 Ruscella.Mies.Hab~ Administrati~ Assistant to Engineering Group Ma~
## 6 6 Carla.Forluniau Administrati~ Assistant to IT Group Manager
## # ... with 48 more rows
## #
## # Edge Data: 1,456 x 4
## from to Weekday Weight
## <int> <int> <ord> <int>
## 1 1 2 Monday 4
## 2 1 2 Tuesday 3
## 3 1 2 Wednesday 5
## # ... with 1,453 more rows
#Arranging the rows in the GAStech_graph
GAStech_graph %>%
activate(edges) %>%
arrange(desc(Weight))
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Edge Data: 1,456 x 4 (active)
## from to Weekday Weight
## <int> <int> <ord> <int>
## 1 40 41 Tuesday 23
## 2 40 43 Tuesday 19
## 3 41 43 Tuesday 15
## 4 41 40 Tuesday 14
## 5 42 41 Tuesday 13
## 6 42 40 Tuesday 12
## # ... with 1,450 more rows
## #
## # Node Data: 54 x 4
## id label Department Title
## <dbl> <chr> <chr> <chr>
## 1 1 Mat.Bramar Administration Assistant to CEO
## 2 2 Anda.Ribera Administration Assistant to CFO
## 3 3 Rachel.Pantanal Administration Assistant to CIO
## # ... with 51 more rows
4.0 Task 1: Static Organisation Graph
4.1 Initial Graph (before improvements)
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_link(aes()) +
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))
g + theme_graph() + labs(title = "Centrality Indices")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

4.1 Initial Graph (Improved code)
gg2 <- ggraph(GAStech_edges_aggregated, layout = "nicely") +
geom_edge_link() +
geom_node_point(aes(colour = centrality_closeness(),size = centrality_betweenness()))
## Warning in if (class(newval) == "factor") {: the condition has length > 1
## and only the first element will be used
gg2 + theme_graph()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

4.2 Alternative Graph Design
After viewing the initial graph, there are three aspects of the graph that can be improved.
1.There are no labels on the nodes in the visualization. Hence:
Solution: Add text labels (employee names) to identify nodes that has high closeness centrality and betweenness centrality
Solution: Incorporate a Voronoi diagram to the visualization to identify the department of every node/employee. Each department is represented using a unique colour and it can be identified using the legend.
2.The edge link that connect nodes has the same weight and it is not representative of the relationship between the nodes.
Solution: Apply weights to the edge link. When weight and thickness of the edge link is being applied, we can easily visualize the relationship and interaction between the nodes. When the weight is higher, it means that the frequency of interaction between the nodes are higher.
3.Using different shades of blue makes it hard to visualize the centrality closeness of every node. Furthermore, from the initial graph, it shows that the darker shade of blue is represented by a lower centrality closeness and vice versa – this could cause confusion as it is a norm that darker shades should represent a higher centrality.
Solution: Change the colour gradient because using blue makes it hard to visualize the nodes on the static organisation graph.
Sketch of alternative graph design

newGraph <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness())
ggraph(newGraph) +
geom_node_voronoi(aes(fill = Department), max.radius = 0.1, alpha=0.5) +
geom_edge_link(aes(width = Weight), colour= "black", alpha=0.1) +
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))+
scale_color_gradient(low = "#0E097E", high = "#EDF241") +
scale_edge_width(range = c(0.2, 5))+
geom_label_repel(aes(x=ifelse(closeness_centrality > 0.015, x, 0),
y=ifelse(closeness_centrality > 0.015, y, 0),
label=ifelse(closeness_centrality > 0.015, label, "")),
fontface = 'bold', color = 'black', size = 3, box.padding = 0.80,
point.padding = 0.5, segment.colour = "white", na.rm=TRUE) +
guides(fill=guide_legend(ncol = 1),size=guide_legend(ncol = 2)) +
theme(legend.position="right")+
labs(title = "Centralitiy Indices")
## Using `stress` as default layout

5.0 Task 2: Interactive Organisation Graph
5.1 Data Preparation
#Data preparation
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
rename(from = id) %>%
left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
rename(to = id) %>%
filter(MainSubject == "Work related") %>%
group_by(from, to) %>%
summarise(weight = n()) %>%
filter(from!=to) %>%
filter(weight > 1) %>%
ungroup()
GAStech_nodes_vn <- GAStech_nodes
GAStech_nodes_vn <- GAStech_nodes_vn %>%
rename(group = Department)
5.2 Initial Graph (before improvement)
visNetwork(GAStech_nodes_vn, GAStech_edges_aggregated, main = "Interactive Network Graph") %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE)
5.3 Improved Design
visNetwork(GAStech_nodes_vn, GAStech_edges_aggregated, width = "100%", main = "Interactive Network Graph (Improved Design)") %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visNodes(font = list(size=50)) %>%
visOptions(highlightNearest = list(enabled = TRUE, degree = 1, labelOnly = FALSE), nodesIdSelection = list(enabled = TRUE)) %>%
visLayout(randomSeed = 100)
GAStech_nodes_vn$title <- paste0("Name: ", GAStech_nodes_vn$label, "<br> Department: ",
GAStech_nodes_vn$group,
"<br> Title: ", GAStech_nodes_vn$Title)
5.4 Alternative Graph Design
1.By looking at the visualization, we do not know what the colours represent. We only know that it represents a department. Hence, this would cause misinterpretation in users’ analysis as they may make different assumptions about the different colours on the visualization and what they represent.
Solution: Add a legend on the right of the visualization to indicate the representation of each colour. For example, blue represents Administration. This improves the clarity of the visualization for users.
2.Even though there are labels on each node, it is hard to read because the labels overlay one another.
Solution: Add a box shape to the labels on the visualization. This makes the label of the node visually clearer and readable for users.
3.The details on the labels are too brief. It only tells us the employee’s name and the department that he/she is in.
Solution: Provide users with more information on the employee. For example, his/her title in the department. This can be done using a hover function as it is a convenient and easy way for users to gain more insights from the visualisation and users do not have to click on any node on the visualisation.
4.There are so many employees in the company and it is hard to remember employee names. Hence, this may affect the user experience when using the employee name filter to analyse the visualization.
Solution: Add another drop-down list that allows the user to filter the results by the department. Filtering by department makes it much easier for users to find the employee that they want to analyse/gather insights from. When a department is selected, users can select the employee that they want to analyse.
Sketch of alternative graph design

visNetwork(GAStech_nodes_vn, GAStech_edges_aggregated, width = "100%", main = "Interactive Network Graph (Alternative Design)") %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visEdges(arrows = "to", smooth = FALSE) %>%
visPhysics(stabilization = FALSE) %>%
visInteraction(dragNodes = TRUE, dragView = TRUE, zoomView = TRUE) %>%
visNodes(shape="box", labelHighlightBold = TRUE, font = list(size=50)) %>%
visOptions(highlightNearest = list(enabled = TRUE, hover = TRUE, algorithm = "hierarchical"), nodesIdSelection = list(enabled = TRUE, main = "Employees"), selectedBy = list(variable="group", main = "Department"), manipulation = TRUE) %>%
visLegend(position="right", zoom=FALSE, main = list(text = "Department", style = "color:#00000;font-size:12px;text-align:center;"))