Ong Qian Ling
19/11/2019
packages = c( 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
for(p in packages){library
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
#Import Data
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
#Wrangling Time
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
#Wrangling Attribute
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source, target, Weekday) %>%
summarise(Weight = n()) %>%
filter(source!=target) %>%
filter(Weight > 1) %>%
ungroup()
#create network objects using tidygraph
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
GAStech_graph %>%
activate(edges) %>%
arrange(desc(Weight))
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Edge Data: 1,456 x 4 (active)
## from to Weekday Weight
## <int> <int> <ord> <int>
## 1 40 41 Tuesday 23
## 2 40 43 Tuesday 19
## 3 41 43 Tuesday 15
## 4 41 40 Tuesday 14
## 5 42 41 Tuesday 13
## 6 42 40 Tuesday 12
## # ... with 1,450 more rows
## #
## # Node Data: 54 x 4
## id label Department Title
## <dbl> <chr> <chr> <chr>
## 1 1 Mat.Bramar Administration Assistant to CEO
## 2 2 Anda.Ribera Administration Assistant to CFO
## 3 3 Rachel.Pantanal Administration Assistant to CIO
## # ... with 51 more rows
With graph 2.0, you can utilise the qgraph() function which works as an Quickplot wrapper for networks. Instead of having to create new variables betweenness_centrality (using tidygraph function centrality_betweenness) and closeness_centrality using (using tidygraph function centrality_closeness), tidygraph function can directly be used as an input to the node layout.
qgraph(
GAStech_graph,
node_colour = centrality_closeness(),
node_size = centrality_betweenness()
)
#### 1.2 Area of improvement
Problem 1: The color used for the edge nodes and line are very similar which reduced the visibility of the nodes.
Solution 1: Used different color for the edge node and line to increase visibility of the nodes.
Problem 2: The layout of the network diagram is too clustered, which made the visualization of the graph unreadable.
Solution 2: Recreate the network diagram with other edge types to make it more comprehensible.
Problem 3: No labeling of each node in the diagram.
Solution 3: nclude labels to indicate what each node represents.
graph <- as_tbl_graph(GAStech_graph) %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness())
ggraph(graph,layout = "nicely") +
geom_edge_link(edge_colour = "gray80") +
geom_node_point(aes(colour= closeness_centrality, size=betweenness_centrality)) +
scale_color_gradient(low = "yellow", high = "red")+
geom_node_text(aes(label = label,size=30), repel = TRUE)
The improved version of the code below include two additional interactive features which are:
#Import Data
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
#Wrangling Time
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
#Data Preparation
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
rename(from = id) %>%
left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
rename(to = id) %>%
filter(MainSubject == "Work related") %>%
group_by(from, to) %>%
summarise(weight = n()) %>%
filter(from!=to) %>%
filter(weight > 1) %>%
ungroup()
#Rename Department field to group
GAStech_nodes <- GAStech_nodes %>%
rename(group = Department)
#Assign colour to each category in the group field.
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = list(enabled = TRUE,degree = 1, labelOnly =TRUE, hover = TRUE),nodesIdSelection = TRUE)
#### 2.2 Area of improvement
Problem 1: The label of each individual node is too small to be view.
Solution 1: Increase the font size of the label. Alternatively, we can include a tooltip when hovering over a node.
Problem 2: There is no legend included to provide information about the color nodes in the diagram.
Solution 2: Include a legend at the side to denote which color each node represents.
Problem 3: The diagram does not show the network direction link between each node.
Solution 3: Include arrows between the nodes which would indictate the network direction link.
#Import Data
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
#Wrangling Time
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
#Data Preparation
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
rename(from = id) %>%
left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
rename(to = id) %>%
filter(MainSubject == "Work related") %>%
group_by(from, to) %>%
summarise(weight = n()) %>%
filter(from!=to) %>%
filter(weight > 1) %>%
ungroup()
#Rename Department field to group
GAStech_nodes <- GAStech_nodes %>%
rename(group = Department)
#For Tooltip
GAStech_nodes$title <- paste0("<p>Name: ", GAStech_nodes$label, "</p> <p>Job Title: ", GAStech_nodes$Title, "</p> <p>Department: ", GAStech_nodes$group, "</p>")
#Visualization Network
visNetwork(GAStech_nodes, GAStech_edges_aggregated, width = "100%") %>%
visNodes(font = list(size=40,color = "black")) %>%
visLegend(position = "left", main = "Department") %>%
visEdges(arrows = "to") %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = list(enabled = TRUE,degree = 1, labelOnly = TRUE, hover = TRUE),
nodesIdSelection = TRUE,)