#Set Up- Installing R Packages
packages = c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
for(p in packages){library
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
p <- c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
lapply(p, require, character.only = TRUE)
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] TRUE
##
## [[5]]
## [1] TRUE
##
## [[6]]
## [1] TRUE
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
glimpse(GAStech_edges)
## Observations: 9,063
## Variables: 8
## $ source <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 26...
## $ target <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, 27...
## $ SentDate <chr> "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014", "6...
## $ SentTime <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:00,...
## $ Subject <chr> "GT-SeismicProcessorPro Bug Report", "GT-SeismicPr...
## $ MainSubject <chr> "Work related", "Work related", "Work related", "W...
## $ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "Ka...
## $ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "Hi...
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source, target, Weekday) %>%
summarise(Weight = n()) %>%
filter(source!=target) %>%
filter(Weight > 1) %>%
ungroup()
GAStech_edges_aggregated
## # A tibble: 1,456 x 4
## source target Weekday Weight
## <dbl> <dbl> <ord> <int>
## 1 1 2 Monday 4
## 2 1 2 Tuesday 3
## 3 1 2 Wednesday 5
## 4 1 2 Friday 8
## 5 1 3 Monday 4
## 6 1 3 Tuesday 3
## 7 1 3 Wednesday 5
## 8 1 3 Friday 8
## 9 1 4 Monday 4
## 10 1 4 Tuesday 3
## # ... with 1,446 more rows
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_link(aes()) +
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))
g + theme_graph()
.
GAStech_graph <- mutate(GAStech_graph, betweenness_centrality = centrality_betweenness())
GAStech_graph <- mutate(GAStech_graph, closeness_centrality = centrality_closeness())
ggraph(GAStech_graph, layout='nicely') +
geom_edge_link(aes()) +
geom_node_point(aes(colour=closeness_centrality, size=betweenness_centrality)) +
theme_graph()
Number 1 - The network diagram is extremly clustered, we are unable to derive value from looking at the graph, some of the nodes are hidden from view in this layout.
Number 2 - The nodes lack labelling, but it is unbeknownst to us which node belongs to which person.
Number 3 - The edges are not weighted.
Alternative Sketch
#Alternative Design
alternate_g <- ggraph(GAStech_graph, layout = "kk") +
geom_edge_link(colour="gray") +
geom_node_point(aes(colour = Department, size=centrality_betweenness(), alpha=centrality_closeness()))+
geom_node_text(aes(label = label), repel = TRUE)
alternate_g
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
rename(from = id) %>%
left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
rename(to = id) %>%
filter(MainSubject == "Work related") %>%
group_by(from, to) %>%
summarise(weight = n()) %>%
filter(from!=to) %>%
filter(weight > 1) %>%
ungroup()
#Rename Department field to group
GAStech_nodes <- GAStech_nodes %>%
rename(group = Department)
#Assign colour to each category in the group field.
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE)
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = list(enabled= TRUE, labelOnly = FALSE, hover=TRUE), nodesIdSelection = TRUE) %>%
visNodes(label = TRUE, font = list(size=35), shape="box")
Though there are labels, the labels are very small and require the user to zoom in to view them proper.
There is no legend that shows which node belongs to which department.
The direction and weight of edges not shown in the graph.
Alternative Design
GAStech_nodes$title <- paste0("<p>Name: ", GAStech_nodes$label, "</p> <p>Job Title: ", GAStech_nodes$Title, "</p> <p>Department: ", GAStech_nodes$group, "</p>")
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE),
selectedBy = list(variable= "group", main="Department"),
nodesIdSelection = list(main="Employee Name")) %>%
visEdges(arrows="to") %>%
visNodes(label=TRUE, font=list(size=40), shape="box") %>%
visLegend(zoom=FALSE)
https://cran.r-project.org/web/packages/visNetwork/vignettes/Introduction-to-visNetwork.html https://www.data-imaginist.com/2017/ggraph-introduction-nodes/ https://www.data-imaginist.com/2017/ggraph-introduction-layouts/ https://www.data-imaginist.com/2017/ggraph-introduction-edges/