Wei Ming
23 November 2019
packages = c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
for(p in packages){library
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
p <- c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
lapply(p, require, character.only = TRUE)
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] TRUE
##
## [[5]]
## [1] TRUE
##
## [[6]]
## [1] TRUE
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
glimpse(GAStech_edges)
## Observations: 9,063
## Variables: 8
## $ source <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 26...
## $ target <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, 27...
## $ SentDate <chr> "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014", "6...
## $ SentTime <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:00,...
## $ Subject <chr> "GT-SeismicProcessorPro Bug Report", "GT-SeismicPr...
## $ MainSubject <chr> "Work related", "Work related", "Work related", "W...
## $ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "Ka...
## $ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "Hi...
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source, target, Weekday) %>%
summarise(Weight = n()) %>%
filter(source!=target) %>%
filter(Weight > 1) %>%
ungroup()
GAStech_edges_aggregated
## # A tibble: 1,456 x 4
## source target Weekday Weight
## <dbl> <dbl> <ord> <int>
## 1 1 2 Monday 4
## 2 1 2 Tuesday 3
## 3 1 2 Wednesday 5
## 4 1 2 Friday 8
## 5 1 3 Monday 4
## 6 1 3 Tuesday 3
## 7 1 3 Wednesday 5
## 8 1 3 Friday 8
## 9 1 4 Monday 4
## 10 1 4 Tuesday 3
## # ... with 1,446 more rows
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
GAStech_graph
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Node Data: 54 x 4 (active)
## id label Department Title
## <dbl> <chr> <chr> <chr>
## 1 1 Mat.Bramar Administrati~ Assistant to CEO
## 2 2 Anda.Ribera Administrati~ Assistant to CFO
## 3 3 Rachel.Pantanal Administrati~ Assistant to CIO
## 4 4 Linda.Lagos Administrati~ Assistant to COO
## 5 5 Ruscella.Mies.Hab~ Administrati~ Assistant to Engineering Group Ma~
## 6 6 Carla.Forluniau Administrati~ Assistant to IT Group Manager
## # ... with 48 more rows
## #
## # Edge Data: 1,456 x 4
## from to Weekday Weight
## <int> <int> <ord> <int>
## 1 1 2 Monday 4
## 2 1 2 Tuesday 3
## 3 1 2 Wednesday 5
## # ... with 1,453 more rows
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_link(aes()) +
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))
g + theme_graph()
Improvements:
1. Original data frame can be directly passed in as an input. A network object (tbl_graph) is not necessarily needed.
2. ‘centrality_closeness()’ and ‘centrality_betweenness()’ functions can be used in aesthetic mappings.
graph1 <-
ggraph(GAStech_edges_aggregated, layout = "nicely") +
geom_edge_link() +
geom_node_point(aes(colour = centrality_closeness(),size = centrality_betweenness()))
graph1 + theme_graph()
GAStech_graph <- GAStech_graph %>%
mutate(Betweenness_Centrality = centrality_betweenness()) %>%
mutate(Closeness_Centrality = centrality_closeness())
graph2 <- ggraph(GAStech_graph, layout = "circle") +
geom_node_point(aes(colour = Department, size = Betweenness_Centrality, alpha = Closeness_Centrality)) +
geom_edge_link(aes(alpha = Weight), width = 0.3) +
scale_edge_alpha(range = c(0.2, 0.8)) +
geom_node_text(aes(label = label), repel = TRUE, size = 2, alpha = 0.8, color = 'brown') +
ggtitle("Task1: Email Network Visualization")
graph2 + theme_graph()
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
rename(from = id) %>%
left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
rename(to = id) %>%
filter(MainSubject == "Work related") %>%
group_by(from, to) %>%
summarise(weight = n()) %>%
filter(from!=to) %>%
filter(weight > 1) %>%
ungroup()
GAStech_nodes <- GAStech_nodes %>%
rename(group = Department)
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visNodes(label=GAStech_nodes$id, shape = "circle") %>%
visOptions(highlightNearest = list(enabled = TRUE, labelOnly = FALSE), nodesIdSelection = TRUE)
GAStech_nodes <- GAStech_nodes %>%
rename(title = Title)
visNetwork(GAStech_nodes, GAStech_edges_aggregated, main = "Task2: Email Network Visualization") %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visNodes(label=GAStech_nodes$id, shape = "box", title = "Title") %>%
visEdges(arrows = 'to') %>%
visOptions(highlightNearest = list(enabled = TRUE, algorithm='hierarchical', labelOnly = FALSE), nodesIdSelection = TRUE) %>%
visLegend(main = "Department", width = 0.2, position = "right", zoom = FALSE)