Environment Setup & Data Loading
packages = c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
for(p in packages){library
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source, target, Weekday) %>%
summarise(Weight = n()) %>%
filter(source!=target) %>%
filter(Weight > 1) %>%
ungroup()
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
GAStech_graph %>%
activate(edges) %>%
arrange(desc(Weight))
Task 1
The Given Plot
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_link(aes()) +
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))
g + theme_graph()

Plot with Improved Code
g2 <- ggraph(GAStech_edges_aggregated, layout = "nicely") +
geom_edge_link() +
geom_node_point(aes(colour = centrality_closeness(),size = centrality_betweenness()))
g2 + theme_graph()

Three Aspects for Improvement
1. Nodes of low closeness centrality has a very dark color, which makes them hard to be identified from the edges.
2. Under the given plot size, the edges overlap with each other which affects the aesthetics.
3. The graph lacks label that gives more information of the key nodes with high influencing power (high closeness centrality or betweness centrality).
New Design Plotted
1. Edges are colored in grey
2. Transparency for edges is set to 0.4
2. Nodes with high infulucencing power are labeled, repel is set to TRUE to prevent overlapping.
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_link(color = 'grey', alpha = 0.4) +
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality)) +
geom_node_label(aes(label = ifelse(closeness_centrality > 0.015 | betweenness_centrality > 400, label, NA) ), repel = TRUE)
g + theme_graph()

Task 2
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
rename(from = id) %>%
left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
rename(to = id) %>%
filter(MainSubject == "Work related") %>%
group_by(from, to) %>%
summarise(weight = n()) %>%
filter(from!=to) %>%
filter(weight > 1) %>%
ungroup()
GAStech_nodes <- GAStech_nodes %>%
rename(group = Department)
The Given Plot
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE)
Plot with Improved Interactivity
visNetwork(GAStech_nodes, GAStech_edges_aggregated)%>%
visNodes(shape = "box") %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = list(enabled = TRUE, labelOnly = FALSE), nodesIdSelection = TRUE)
Three Aspects for Improvement
1. The labels are hard to read when the underlying network edges are too crowded
2. Without legend provided, it is hard to tell the colour representation for each department
3. The colour pallete differentiates nodes of different departments. It gives no meaning to the edges and creates user confusion.
New Design Plotted
1. Labels are inside the nodes
2. Edges are coloured in grey
3. Legends created for colour grouping
visNetwork(GAStech_nodes, GAStech_edges_aggregated)%>%
visNodes(shape = "box") %>%
visEdges(color = 'grey') %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = list(enabled = TRUE, labelOnly = FALSE), nodesIdSelection = TRUE) %>%
visLegend()