packages = c('igraph', 'tidygraph', 'ggraph', 'ggrepel', 'visNetwork', 'lubridate', 'tidyverse')
for(p in packages){library
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source, target, Weekday) %>%
summarise(Weight = n()) %>%
filter(source!=target) %>%
#remove email that is sent to self
filter(Weight > 1) %>%
ungroup()
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
GAStech_graph
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Node Data: 54 x 4 (active)
## id label Department Title
## <dbl> <chr> <chr> <chr>
## 1 1 Mat.Bramar Administrati~ Assistant to CEO
## 2 2 Anda.Ribera Administrati~ Assistant to CFO
## 3 3 Rachel.Pantanal Administrati~ Assistant to CIO
## 4 4 Linda.Lagos Administrati~ Assistant to COO
## 5 5 Ruscella.Mies.Hab~ Administrati~ Assistant to Engineering Group Ma~
## 6 6 Carla.Forluniau Administrati~ Assistant to IT Group Manager
## # ... with 48 more rows
## #
## # Edge Data: 1,456 x 4
## from to Weekday Weight
## <int> <int> <ord> <int>
## 1 1 2 Monday 4
## 2 1 2 Tuesday 3
## 3 1 2 Wednesday 5
## # ... with 1,453 more rows
GAStech_graph %>%
activate(edges) %>%
arrange(desc(Weight))
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Edge Data: 1,456 x 4 (active)
## from to Weekday Weight
## <int> <int> <ord> <int>
## 1 40 41 Tuesday 23
## 2 40 43 Tuesday 19
## 3 41 43 Tuesday 15
## 4 41 40 Tuesday 14
## 5 42 41 Tuesday 13
## 6 42 40 Tuesday 12
## # ... with 1,450 more rows
## #
## # Node Data: 54 x 4
## id label Department Title
## <dbl> <chr> <chr> <chr>
## 1 1 Mat.Bramar Administration Assistant to CEO
## 2 2 Anda.Ribera Administration Assistant to CFO
## 3 3 Rachel.Pantanal Administration Assistant to CIO
## # ... with 51 more rows
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_link(aes()) +
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))
g + theme_graph()
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_density(aes(fill = Weight)) +
geom_edge_link(aes(width = Weight), alpha = 0.2) +
geom_node_point(aes(color = closeness_centrality, size = betweenness_centrality)) +
scale_color_viridis()
g + theme_graph() + facet_edges(~Weekday)
Analysing relationship between pairs of vertexes is made difficult due to the opacity of the graph’s edge.
The dimension ‘Weekday’ could be use to help facet the graph to show interaction between the subjects for each weekday.
All edges are present on the graph. It would help the user to visualize important edges by their weight that is assigned. This helps user to identify important relationships that would be crucial to identify.
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
rename(from = id) %>%
left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
rename(to = id) %>%
filter(MainSubject == "Work related") %>%
group_by(from, to) %>%
summarise(weight = n()) %>%
filter(from!=to) %>%
filter(weight > 1) %>%
ungroup()
GAStech_nodes <- GAStech_nodes %>%
rename(group = Department)
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE)
Vertexes do not show the in-degree or out-degree relationship with other vertexes.
Colored edges do not add value to the analysis of the graph. It is a distraction to view the network when one vertex is highlighted.
Names shown under the shape of the vertex, makes it difficult to read and identify if there is any edges overlapping over the text.
#Calculate Out-Degree values for each node
graph <- graph.data.frame(GAStech_edges_aggregated, directed = T)
degree_value <- degree(graph, mode = "out")
GAStech_nodes$value <- degree_value[match(GAStech_nodes$id, names(degree_value))]
visNetwork(GAStech_nodes, GAStech_edges_aggregated, main = "Interactive Graph - Outward interaction") %>%
# Optimization
visIgraphLayout(layout = "layout_with_fr") %>%
visEdges(smooth = FALSE) %>%
visPhysics(stabilization = FALSE) %>%
# Customization
visInteraction(dragNodes = TRUE, dragView = TRUE,zoomView = TRUE) %>%
visNodes(shape = "box" ,shadow = list(enabled = TRUE, size = 10)) %>%
visEdges(arrows = "to" ,shadow = FALSE,color = list(highlight = "#C62F4B")) %>%
visOptions(highlightNearest = list(enabled = TRUE, hover = FALSE, algorithm = "hierarchical")
, nodesIdSelection = list(enabled = TRUE, selected = "1", values = unique(GAStech_nodes$id)))