Lim Pei Xuan
14 November 2019
packages = c('devtools','igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse','RColorBrewer')
for(p in packages){library
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
install_github("datastorm-open/visNetwork")
p <- c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
lapply(p, require, character.only = TRUE)
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
glimpse(GAStech_edges)
## Observations: 9,063
## Variables: 8
## $ source <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 26, …
## $ target <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, 27, …
## $ SentDate <chr> "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014", "6/1…
## $ SentTime <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:00, 0…
## $ Subject <chr> "GT-SeismicProcessorPro Bug Report", "GT-SeismicProc…
## $ MainSubject <chr> "Work related", "Work related", "Work related", "Wor…
## $ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "Kano…
## $ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "Hide…
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source, target, Weekday) %>%
summarise(Weight = n()) %>%
filter(source!=target) %>%
filter(Weight > 1) %>%
ungroup()
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_link(aes()) +
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))
g + theme_graph()
g2 <- ggraph(GAStech_edges_aggregated, layout = "nicely") +
geom_edge_link() +
geom_node_point(aes(colour = centrality_closeness(),size = centrality_betweenness()))
g2 + theme_graph()
| Problem | Proposed Solution |
|---|---|
| Can not see strength of relation between nodes | Use Alpha to represent weight of edges |
| Unable to identify the nodes with highest centrality | Provide labels for nodes with highest centrality |
| Overlapping of nodes | Change the layout to ensure all nodes can be seen |
| No representation of department | Include department with color |
alt_data <- merge(x = GAStech_edges_aggregated, y = GAStech_nodes, by.x = "source", by.y = "id", all.x = TRUE)
glimpse(alt_data)
## Observations: 1,456
## Variables: 7
## $ source <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ target <dbl> 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6,…
## $ Weekday <ord> Monday, Tuesday, Wednesday, Friday, Monday, Tuesday, …
## $ Weight <int> 4, 3, 5, 8, 4, 3, 5, 8, 4, 3, 5, 8, 4, 3, 5, 8, 4, 3,…
## $ label <chr> "Mat.Bramar", "Mat.Bramar", "Mat.Bramar", "Mat.Bramar…
## $ Department <chr> "Administration", "Administration", "Administration",…
## $ Title <chr> "Assistant to CEO", "Assistant to CEO", "Assistant to…
alt_graph <- tbl_graph(nodes = GAStech_nodes, edges = alt_data, directed = TRUE)
alt_graph <- alt_graph %>%
mutate(`Betweenness Centrality` = centrality_betweenness()) %>%
mutate(`Closeness Centrality` = centrality_closeness())
alt_g <- ggraph(alt_graph, layout = "linear", circular = TRUE) +
geom_edge_arc(aes(alpha = Weight), width = 0.5) +
geom_node_point(aes(alpha = `Closeness Centrality`,size = `Betweenness Centrality`,color=Department))
alt_g + theme_graph() +
coord_fixed() +
geom_node_label(aes(label=ifelse(`Closeness Centrality` > 0.015 | `Betweenness Centrality` > 400, label, NA)), repel = TRUE, alpha = 0.7)
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
rename(from = id) %>%
left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
rename(to = id) %>%
filter(MainSubject == "Work related") %>%
group_by(from, to) %>%
summarise(weight = n()) %>%
filter(from!=to) %>%
filter(weight > 1) %>%
ungroup()
GAStech_nodes <- GAStech_nodes %>%
rename(group = Department)
When a node is selected via the dropdown or on click, all directly connected nodes and the corresponding edges will be highlighted, with the others greyed out. Only the highlight nodes will have labels.
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visNodes(font = list(size = 30)) %>%
visOptions(highlightNearest = list(enabled = TRUE, algorithm='hierarchical',degree = 1, labelOnly = FALSE), nodesIdSelection = TRUE)
| Problem | Proposed Solution |
|---|---|
| Can not see connectivity of entire department | Include dropdown selector for department |
| Can not see strength or direction of relation between edges | Include direction and weight in the edges |
| Nodes are coloured by department, but not identified | Include Legend for departments |
| Job Titles not reflected | Include Job Title in tooltip on hover |
| Labels outside of the node overlap with each other and the edges, makes it hard to read | Change node shape to a box and include the label within |
Rename columns appropriately to make use of visNetwork’s capabilities
GAStech_edges_alt <- GAStech_edges_aggregated %>%
rename(value = weight)
GAStech_nodes <- GAStech_nodes %>%
rename(title = Title)
visNetwork(GAStech_nodes, GAStech_edges_alt) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visNodes(font = list(size = 30), shape = "box", title = "Title") %>%
visEdges(arrows = 'to') %>%
visOptions(nodesIdSelection = TRUE,selectedBy = list(variable = "group", highlight = TRUE),highlightNearest = list(enabled = TRUE, algorithm='hierarchical', labelOnly = FALSE)) %>%
visLegend(width = 0.2, position = "right", main = "Department")