Setting up and Installation of R Packages

#Set Up- Installing R Packages
packages = c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')

for(p in packages){library
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}

Data Preparation

p <- c('igraph', 'tidygraph', 'ggraph', 'visNetwork', 'lubridate', 'tidyverse')
lapply(p, require, character.only = TRUE)
## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] TRUE
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] TRUE
## 
## [[5]]
## [1] TRUE
## 
## [[6]]
## [1] TRUE

Data Wrangling

GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
glimpse(GAStech_edges)
## Observations: 9,063
## Variables: 8
## $ source      <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 26...
## $ target      <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, 27...
## $ SentDate    <chr> "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014", "6...
## $ SentTime    <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:00,...
## $ Subject     <chr> "GT-SeismicProcessorPro Bug Report", "GT-SeismicPr...
## $ MainSubject <chr> "Work related", "Work related", "Work related", "W...
## $ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "Ka...
## $ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "Hi...
GAStech_edges$SentDate  = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, label = TRUE, abbr = FALSE)
GAStech_edges_aggregated <- GAStech_edges %>%
  filter(MainSubject == "Work related") %>%
  group_by(source, target, Weekday) %>%
    summarise(Weight = n()) %>%
  filter(source!=target) %>%
  filter(Weight > 1) %>%
  ungroup()
GAStech_edges_aggregated
## # A tibble: 1,456 x 4
##    source target Weekday   Weight
##     <dbl>  <dbl> <ord>      <int>
##  1      1      2 Monday         4
##  2      1      2 Tuesday        3
##  3      1      2 Wednesday      5
##  4      1      2 Friday         8
##  5      1      3 Monday         4
##  6      1      3 Tuesday        3
##  7      1      3 Wednesday      5
##  8      1      3 Friday         8
##  9      1      4 Monday         4
## 10      1      4 Tuesday        3
## # ... with 1,446 more rows

Original Code

GAStech_graph <- tbl_graph(nodes = GAStech_nodes, edges = GAStech_edges_aggregated, directed = TRUE)

g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness()) %>%
  mutate(closeness_centrality = centrality_closeness()) %>%
  ggraph(layout = "nicely") + 
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = closeness_centrality, size=betweenness_centrality))

g + theme_graph()

Task1: Static Organisation Graph

Improve the Code Chunk used to create the organisation network graph by using the latest functions provided in ggraph2.0

.

GAStech_graph <- mutate(GAStech_graph, betweenness_centrality = centrality_betweenness()) 
GAStech_graph <- mutate(GAStech_graph, closeness_centrality = centrality_closeness())


ggraph(GAStech_graph, layout='nicely') +
  geom_edge_link(aes()) + 
  geom_node_point(aes(colour=closeness_centrality, size=betweenness_centrality)) +
  theme_graph()

Identify three aspects of the graph visualization in Section 6.1 that can be improved.

Number 1 - The network diagram is extremly clustered, we are unable to derive value from looking at the graph, some of the nodes are hidden from view in this layout.

Number 2 - The nodes lack labelling, but it is unbeknownst to us which node belongs to which person.

Number 3 - The edges are not weighted.

Provide the sketch of your alternative design

Alternative Sketch

Alternative Sketch

Using appropriate ggraph functions, plot the alternative design.

#Alternative Design 

alternate_g  <- ggraph(GAStech_graph, layout = "kk") + 
  geom_edge_link(colour="gray") +
  geom_node_point(aes(colour = Department, size=centrality_betweenness(), alpha=centrality_closeness()))+
  geom_node_text(aes(label = label), repel = TRUE) 

alternate_g

Visualization Two

ORIGINAL GRAPH

GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")

GAStech_edges_aggregated <- GAStech_edges %>%
  left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
  rename(from = id) %>%
  left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
  rename(to = id) %>%
  filter(MainSubject == "Work related") %>%
  group_by(from, to) %>%
    summarise(weight = n()) %>%
  filter(from!=to) %>%
  filter(weight > 1) %>%
  ungroup()

#Rename Department field to group
GAStech_nodes <- GAStech_nodes %>%
  rename(group = Department)

#Assign colour to each category in the group field.
visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE)

Improved Design

  visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visOptions(highlightNearest = list(enabled= TRUE, labelOnly = FALSE, hover=TRUE), nodesIdSelection = TRUE) %>%
  visNodes(label = TRUE, font = list(size=35), shape="box")

Identify three aspects of the graph visualization in Section 7.4 that can be improved.

  1. Though there are labels, the labels are very small and require the user to zoom in to view them proper.

  2. There is no legend that shows which node belongs to which department.

  3. The direction and weight of edges not shown in the graph.

Provide the sketch of your alternative design

Alternative Design

Alternative Design

Using appropriate visNetwork functions, plot the alternative design.

GAStech_nodes$title <- paste0("<p>Name: ", GAStech_nodes$label, "</p> <p>Job Title: ", GAStech_nodes$Title, "</p> <p>Department: ", GAStech_nodes$group, "</p>")

visNetwork(GAStech_nodes, GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visOptions(highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE), 
            selectedBy = list(variable= "group", main="Department"), 
            nodesIdSelection = list(main="Employee Name")) %>%
            visEdges(arrows="to") %>%
            visNodes(label=TRUE, font=list(size=40), shape="box") %>%
  visLegend(zoom=FALSE)

references

https://cran.r-project.org/web/packages/visNetwork/vignettes/Introduction-to-visNetwork.html https://www.data-imaginist.com/2017/ggraph-introduction-nodes/ https://www.data-imaginist.com/2017/ggraph-introduction-layouts/ https://www.data-imaginist.com/2017/ggraph-introduction-edges/