Retriving relevant files and Data Preparation
packages = c('tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse','plotly','qgraph')
for(p in packages){library
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
p <- c('tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
lapply(p, require, character.only = TRUE)
[[1]]
[1] TRUE
[[2]]
[1] TRUE
[[3]]
[1] TRUE
[[4]]
[1] TRUE
[[5]]
[1] TRUE
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
Parsed with column specification:
cols(
id = [32mcol_double()[39m,
label = [31mcol_character()[39m,
Department = [31mcol_character()[39m,
Title = [31mcol_character()[39m
)
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
Parsed with column specification:
cols(
source = [32mcol_double()[39m,
target = [32mcol_double()[39m,
SentDate = [31mcol_character()[39m,
SentTime = [34mcol_time(format = "")[39m,
Subject = [31mcol_character()[39m,
MainSubject = [31mcol_character()[39m,
sourceLabel = [31mcol_character()[39m,
targetLabel = [31mcol_character()[39m
)
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source, target, Weekday) %>%
summarise(Weight = n()) %>%
filter(source!=target) %>%
filter(Weight > 1) %>%
ungroup()
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
GAStech_graph %>%
activate(edges) %>%
arrange(desc(Weight))
[38;5;246m# A tbl_graph: 54 nodes and 1456 edges
[39m[38;5;246m#
[39m[38;5;246m# A directed multigraph with 1 component
[39m[38;5;246m#
[39m[38;5;246m# Edge Data: 1,456 x 4 (active)[39m
from to Weekday Weight
[3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<ord>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m 40 41 Tuesday 23
[38;5;250m2[39m 40 43 Tuesday 19
[38;5;250m3[39m 41 43 Tuesday 15
[38;5;250m4[39m 41 40 Tuesday 14
[38;5;250m5[39m 42 41 Tuesday 13
[38;5;250m6[39m 42 40 Tuesday 12
[38;5;246m# ... with 1,450 more rows[39m
[38;5;246m#
[39m[38;5;246m# Node Data: 54 x 4[39m
id label Department Title
[3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m
[38;5;250m1[39m 1 Mat.Bramar Administration Assistant to CEO
[38;5;250m2[39m 2 Anda.Ribera Administration Assistant to CFO
[38;5;250m3[39m 3 Rachel.Pantanal Administration Assistant to CIO
[38;5;246m# ... with 51 more rows[39m
GAStech_graph %>%
activate(edges) %>%
arrange(desc(Weight))
[38;5;246m# A tbl_graph: 54 nodes and 1456 edges
[39m[38;5;246m#
[39m[38;5;246m# A directed multigraph with 1 component
[39m[38;5;246m#
[39m[38;5;246m# Edge Data: 1,456 x 4 (active)[39m
from to Weekday Weight
[3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<ord>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m 40 41 Tuesday 23
[38;5;250m2[39m 40 43 Tuesday 19
[38;5;250m3[39m 41 43 Tuesday 15
[38;5;250m4[39m 41 40 Tuesday 14
[38;5;250m5[39m 42 41 Tuesday 13
[38;5;250m6[39m 42 40 Tuesday 12
[38;5;246m# ... with 1,450 more rows[39m
[38;5;246m#
[39m[38;5;246m# Node Data: 54 x 4[39m
id label Department Title
[3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m
[38;5;250m1[39m 1 Mat.Bramar Administration Assistant to CEO
[38;5;250m2[39m 2 Anda.Ribera Administration Assistant to CFO
[38;5;250m3[39m 3 Rachel.Pantanal Administration Assistant to CIO
[38;5;246m# ... with 51 more rows[39m
Task 1: Static Organization Graph
Original Design
set_graph_style()
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_link(aes( )) +
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))
g + theme_graph()
Three aspects of graph to be improved:
1. There is overlap between edge link and nodes, making it hard to see the connections between nodes
2. Scale colour gradient of nodes colour are too dark to visualize
3. Edge line’s colour is too dark to have a clear view
Improved sketch
Improved plot
g <-
ggraph(GAStech_graph, layout = "nicely",) +
geom_edge_link(edge_colour = "gray60",aes()) +
geom_node_point(aes(colour = centrality_closeness(), size=centrality_betweenness())) +
scale_colour_gradient(low = "#00008B", high = "#63B8FF")
g + theme_graph()
Alternative sketch
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_bend(edge_colour = "gray69",aes()) +
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))+
scale_colour_gradient(low = "#00008B", high = "#63B8FF")
g + theme_graph()
Task 2: Interactivity Organization Graph
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
rename(from = id) %>%
left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
rename(to = id) %>%
filter(MainSubject == "Work related") %>%
group_by(from, to) %>%
summarise(weight = n()) %>%
filter(from!=to) %>%
filter(weight > 1) %>%
ungroup()
GAStech_nodes <- GAStech_nodes %>%
rename(group = Department)
Plot Alternative sketch
Original Plot
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE)
Three aspects of graph to be improved:
1. All labels are displayed resulting in overlapping of labels
2. Labels are overlapping with the network links making it difficult to read the labels
3. It does not show the connections between nodes.
Improved plot
GAStech_nodes$shape = "circle"
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visInteraction(navigationButtons = TRUE) %>%
visEdges(arrows = "from") %>%
visOptions(highlightNearest = list(enabled = TRUE,degree = 1, labelOnly = FALSE, hover = TRUE),
nodesIdSelection = TRUE,)