18 November 2019
packages = c('igraph', 'tidygraph', 'ggraph', 'ggrepel', 'visNetwork', 'lubridate', 'tidyverse')
for(p in packages){library
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
## Loading required package: igraph
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
## Loading required package: tidygraph
##
## Attaching package: 'tidygraph'
## The following object is masked from 'package:igraph':
##
## groups
## The following object is masked from 'package:stats':
##
## filter
## Loading required package: ggraph
## Loading required package: ggplot2
## Loading required package: ggrepel
## Loading required package: visNetwork
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:igraph':
##
## %--%
## The following object is masked from 'package:base':
##
## date
## Loading required package: tidyverse
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v tibble 2.1.3 v purrr 0.3.3
## v tidyr 1.0.0 v dplyr 0.8.3
## v readr 1.3.1 v stringr 1.4.0
## v tibble 2.1.3 v forcats 0.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::%--%() masks igraph::%--%()
## x lubridate::as.difftime() masks base::as.difftime()
## x dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## x purrr::compose() masks igraph::compose()
## x tidyr::crossing() masks igraph::crossing()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks tidygraph::filter(), stats::filter()
## x dplyr::groups() masks tidygraph::groups(), igraph::groups()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x lubridate::setdiff() masks base::setdiff()
## x purrr::simplify() masks igraph::simplify()
## x lubridate::union() masks igraph::union(), base::union()
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
## Parsed with column specification:
## cols(
## id = col_double(),
## label = col_character(),
## Department = col_character(),
## Title = col_character()
## )
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
## Parsed with column specification:
## cols(
## source = col_double(),
## target = col_double(),
## SentDate = col_character(),
## SentTime = col_time(format = ""),
## Subject = col_character(),
## MainSubject = col_character(),
## sourceLabel = col_character(),
## targetLabel = col_character()
## )
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source, target, Weekday) %>%
summarise(Weight = n()) %>%
filter(source!=target) %>%
filter(Weight > 1) %>%
ungroup()
GAStech_edges_aggregated
## # A tibble: 1,456 x 4
## source target Weekday Weight
## <dbl> <dbl> <ord> <int>
## 1 1 2 Monday 4
## 2 1 2 Tuesday 3
## 3 1 2 Wednesday 5
## 4 1 2 Friday 8
## 5 1 3 Monday 4
## 6 1 3 Tuesday 3
## 7 1 3 Wednesday 5
## 8 1 3 Friday 8
## 9 1 4 Monday 4
## 10 1 4 Tuesday 3
## # ... with 1,446 more rows
GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)
GAStech_graph
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Node Data: 54 x 4 (active)
## id label Department Title
## <dbl> <chr> <chr> <chr>
## 1 1 Mat.Bramar Administration Assistant to CEO
## 2 2 Anda.Ribera Administration Assistant to CFO
## 3 3 Rachel.Pantanal Administration Assistant to CIO
## 4 4 Linda.Lagos Administration Assistant to COO
## 5 5 Ruscella.Mies.Haber Administration Assistant to Engineering Group Manag~
## 6 6 Carla.Forluniau Administration Assistant to IT Group Manager
## # ... with 48 more rows
## #
## # Edge Data: 1,456 x 4
## from to Weekday Weight
## <int> <int> <ord> <int>
## 1 1 2 Monday 4
## 2 1 2 Tuesday 3
## 3 1 2 Wednesday 5
## # ... with 1,453 more rows
GAStech_graph %>%
activate(edges) %>%
arrange(desc(Weight))
## # A tbl_graph: 54 nodes and 1456 edges
## #
## # A directed multigraph with 1 component
## #
## # Edge Data: 1,456 x 4 (active)
## from to Weekday Weight
## <int> <int> <ord> <int>
## 1 40 41 Tuesday 23
## 2 40 43 Tuesday 19
## 3 41 43 Tuesday 15
## 4 41 40 Tuesday 14
## 5 42 41 Tuesday 13
## 6 42 40 Tuesday 12
## # ... with 1,450 more rows
## #
## # Node Data: 54 x 4
## id label Department Title
## <dbl> <chr> <chr> <chr>
## 1 1 Mat.Bramar Administration Assistant to CEO
## 2 2 Anda.Ribera Administration Assistant to CFO
## 3 3 Rachel.Pantanal Administration Assistant to CIO
## # ... with 51 more rows
The original network graph from section 6.1 of Hands-on-Exercise 10 is as follow:
# computing centrality indices
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_link(aes()) +
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))
g + theme_graph()
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
The graph above shows the network of a particular employee based on the closeness centrality and betweenness centrality. However, the aesthetic and clarity of the graph is still not satisfactory. It is still leveraging on ggraph 1.0.
Negative remarks on clarity:
- I am not sure which employees are of higher clonessness centrality or betweenness centrality.
- With all the edges similar to each other, I am not sure which area has a higher weightage than the other. This means who are the employees that are more connected with each other.
- Just from looking the graph at a glance, I cannot derive much information. For example, when are the time they interacts? From which departments are they from?
Negative remarks on aesthetics:
- The nodes are overlapping of each other. Hence, making certain nodes not obvious
- Not all nodes can be seen due to the colour complement with the black edges.
I have drawn out the desired design to improve the aesthetics and clarity of the graph. In this sketch design, there are 2 graphs. The first graph will focus to improve on the aesthetics of the current design while the second graph will focus to improve the clarity.
In the the first graph, here is the improvement made:
1. The opacity of the node is reduced
2. Add in label for employees whose closeness centrality is very high
3. Add in the colour gradient. The higher the closeness centrality, the node will be indicated by red while lower closeness centrality will be indicated by yellow
4. Assign width = Weight in the aesthetic of the edge to show how much interactions done among the employees.
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "nicely") +
geom_edge_link(aes(width = Weight),colour= "black", alpha=0.1, show.legend = FALSE) +
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality),alpha=0.7) +
scale_color_gradient(low = "yellow", high = "red")+
geom_node_label(aes(filter=closeness_centrality > 0.015, label= label),
family = "serif",repel = TRUE, label.size = 0.25)
g + theme_graph()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
Although the first graph has improved the overall aesthetics, it still does not tell much about the graph. Hence, I make the second graph to gain more information about the graph. In the second graph, we faceted by department and weekdays. Hence, we are able to see when is the most interaction occurs. Which departments has the most interaction with each other. Here is my findings from the second graph:
1. Based on the graph below, I can see that Hideki Cocinaro from security department as the highest interactions on tuesday.
2. People from Executive department do not interact with each other.
3. The interactions happeining at Administration and Information Technology department does not vary much throughout the week.
g1 <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness()) %>%
mutate(closeness_centrality = centrality_closeness()) %>%
ggraph(layout = "fr") +
geom_edge_link(aes(width = Weight),colour= "black", alpha=0.6, show.legend = FALSE) +
#scale_edge_width(range = c(0.1, 5))+
geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality),alpha=0.5) +
scale_color_gradient(low = "yellow", high = "red") +
geom_label_repel(aes(x=ifelse(closeness_centrality > 0.015, x, 0),
y=ifelse(closeness_centrality > 0.015, y, 0),
label=ifelse(closeness_centrality > 0.015, label,"")),
fontface = 'bold', color = 'black',
size = 3,
box.padding = 0.80, point.padding = 0.5,
segment.colour = "white",
na.rm=TRUE)
#scale_edge_width(range = c(0.1, 5))
g1 + facet_graph(Weekday~Department,labeller = label_wrap_gen(width=10))
# data prep
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
rename(from = id) %>%
left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
rename(to = id) %>%
filter(MainSubject == "Work related") %>%
group_by(from, to) %>%
summarise(weight = n()) %>%
filter(from!=to) %>%
filter(weight > 1) %>%
ungroup()
GAStech_nodes <- GAStech_nodes %>%
rename(group = Department)
The original network graph from section 7.4 of Hands-on-Exercise 10 is as follow:
#interactivity
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE)
The graph above adding an interactivity element. User can choose the name of the employee. Then, the connections related to that employees are highlighted. Other than choosing the drop down list, user can also see the highlight by clicking on the node immediately. However, there are few problems with this graph: For the following sketch, i have improved the aesthetics and the clarity from the previous graph.
In the improved graph, here are the changes made:
1. Adding of legend to clearly show in which department does a particular employee belong to. The departments are also differentiated by colour.
2. Instead of a circle, I change the shape of the node to box. This way, we can see the labels clearly and without overlapping with the edge link.
3. I have also added tooltip to add more interactivity when user hover on the node. The tooltip shows the name of the employees clearly in case the nodes are overlapping with each other.
4. I added the arrow in the edge showing the direction of the link.
5. I have added title to show clearly what this graph is about and also change the description of the dropdown list.
GAStech_nodes$title <- paste0(GAStech_nodes$label)
visNetwork(GAStech_nodes, GAStech_edges_aggregated,main = "Task 2: Interactive Network Graph") %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visEdges(arrows = "to", smooth = FALSE) %>%
visPhysics(stabilization = FALSE) %>%
visInteraction(dragNodes = TRUE, dragView = TRUE, zoomView = FALSE) %>%
visNodes(shape="box", labelHighlightBold = TRUE, font = list(size=30)) %>%
visOptions(highlightNearest = list(enabled = T,degree = list(from = 1, to = 1), hover = F,algorithm = "hierarchical"), nodesIdSelection = list(enabled = TRUE,main = "Employees' name" ))%>%
visLegend(zoom = FALSE)%>%
addFontAwesome()