Author: Goh Jia Xian
Date: 22-Nov-2019
packages = c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
for(p in packages){library
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
#Wrangling Time
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
# Wrangling Attributes
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source, target, Weekday) %>%
summarise(Weight = n()) %>%
filter(source!=target) %>%
filter(Weight > 1) %>%
ungroup()
Qns: Improve the code chunk used to create the organisation network graph by using the latest functions provided in ggraph2.0
<34>Before: Section 6.1 of Hands-on Exercise 10GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_link(aes()) +
geom_node_point(aes(colour = closeness_centrality,
size=betweenness_centrality))
g + theme_graph()
Changes made:
1. Does not require to create a tbl_graph object as GGraph2’s internals can be based on tidygraph. The inputs will be automatically be transformed into a tbl_graph object.
2. Mutate functions are not necessary as the functions can be called in ‘colour’ and ‘size’.
ggraph(GAStech_edges_aggregated, layout = 'nicely') +
geom_edge_link() +
geom_node_point(aes(colour = centrality_closeness(),
size = centrality_betweenness())) +
theme_graph()
Qns: Identify three aspects of the graph visualisation in Section 6.1 that can be improved.
Based on the graph plotted in part 1, it is difficult to retrieve any meaningful insights due poor design in the following aspects:
1. Network
Problem: The graph in general looks disorganised and unncessarily complicated, it makes it diffcult
for the readers to study the links between the nodes.
Solution: Use a new layout to display the chart to prevent the edges from crossing and overlapping.
2. Nodes
Problem: Unable to identify what each node represents as there are no labels indicating either name
or group. Also, some of the nodes cannot be seen as they share the same colour as its edges.
Solution: Use labels to show nodes with high Betweenness Centrality and Closeness Centrality.
The nodes should be coloured according to its department.
3. Edges
Problem: Unable to derive the frequency of emails sent in the network as all edges used the same weight
Solution: Set the weight of the edges according to the frequency of emails sent between nodes.
Qns: Provide the sketch of your alternative design.
Qns: Using appropriate ggraph functions, plot the alternative design
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
GAStech_graph %>%
activate(edges) %>%
arrange(desc(Weight))
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Edge Data: 1,456 x 4 (active)
## from to Weekday Weight
## <int> <int> <ord> <int>
## 1 40 41 Tuesday 23
## 2 40 43 Tuesday 19
## 3 41 43 Tuesday 15
## 4 41 40 Tuesday 14
## 5 42 41 Tuesday 13
## 6 42 40 Tuesday 12
## # ... with 1,450 more rows
## #
## # Node Data: 54 x 4
## id label Department Title
## <dbl> <chr> <chr> <chr>
## 1 1 Mat.Bramar Administration Assistant to CEO
## 2 2 Anda.Ribera Administration Assistant to CFO
## 3 3 Rachel.Pantanal Administration Assistant to CIO
## # ... with 51 more rows
#
GAStech_graph <- GAStech_graph %>%
mutate(BetweennessCentrality = centrality_betweenness()) %>%
mutate(ClosenessCentrality = centrality_closeness()) %>%
mutate(ClosenessCentrality = ifelse(ClosenessCentrality >= 0.015, 'High (> 0.015)', 'Low (< 0.015)'))
# Plotting Graph
ggraph(GAStech_graph, layout = 'linear') +
geom_edge_arc(aes(width=Weight),
alpha=0.15,
strength = 0.5) +
scale_edge_width(range = c(0.3, 5)) +
geom_node_point(aes(colour = Department,
size = BetweennessCentrality,
shape = ClosenessCentrality,
fill = Department)) +
scale_shape_manual(values=c(23, 21)) +
geom_node_label(aes(label=ifelse(BetweennessCentrality > 300 | ClosenessCentrality == 'High (> 0.015)',
label,
NA)),
repel = TRUE,
alpha = 0.5,
size = 4) +
theme_graph()
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes,
by = c("sourceLabel" = "label")) %>%
rename(from = id) %>%
left_join(GAStech_nodes,
by = c("targetLabel" = "label")) %>%
rename(to = id) %>%
filter(MainSubject == "Work related") %>%
group_by(from, to) %>%
summarise(weight = n()) %>%
filter(from!=to) %>%
filter(weight > 1) %>%
ungroup()
GAStech_nodes <- GAStech_nodes %>%
rename(group = Department)
Incorporating the following interactivity:
1. When a name is selected from the drop-down list, the corresponding node will not only be highlighted but also will be labelled. Furthermore, all the linked nodes of the selected node will also be labelled too.
2. When a node of the interactive graph is selected, the node will not only be highlighted but also will be labelled. Furthermore, all the linked nodes of the selected node will be labelled as well.
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = list(enabled = TRUE,
labelOnly=TRUE),
nodesIdSelection=TRUE)
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = list(enabled = TRUE,
labelOnly=FALSE),
nodesIdSelection=TRUE)
Based on the graph plotted in part 1, the following aspects should be improved :
1. Network The graph in general looks disorganised and unncessarily complicated, it makes it diffcult for the readers to study the links between the nodes.
2. Nodes Unable to identify what each node represents as there are no labels indicating either name or group. Also, some of the nodes cannot be seen as they share the same colour as its edges.
3. Edges Unable to derive the frequency of emails sent in the network as all edges used the same weight
GAStech_nodes <- GAStech_nodes %>%
mutate(label = str_replace(label,"[[:punct:]]"," ")) %>%
rename(title = Title) %>%
mutate(title = paste("Title: ", title))
GAStech_edges_aggregated <- GAStech_edges_aggregated %>%
mutate(label = paste(weight))
visNetwork(GAStech_nodes, GAStech_edges_aggregated, main = "GASTech Email's Network Graph") %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visEdges(selectionWidth=7, arrows = "to") %>%
visOptions(highlightNearest = list(enabled = TRUE, labelOnly=FALSE),
nodesIdSelection=TRUE, selectedBy = "group",
width='100%',
height='100%') %>%
visInteraction(tooltipDelay = 0,
tooltipStay = 60,
tooltipStyle='position: fixed;visibility:hidden;padding: 1px;font-size:12px;background-color: white;') %>%
visNodes(font = list(size = 30), shape='ellipse') %>%
visLegend(main = "Department", position='right', width=0.15)