Social Network Analysis - Lab Relations

For this exercise, we were requested to make a network graph that would reflect relations of individuals in the lab.

I looked at relations from a collaboration perspective.

library(igraph)
library(wesanderson)
library(networkD3)

loading data:

df<-read.csv("data.csv", stringsAsFactors = F)

Using Base Plot Method

First, a data set was generated. The first two columns are the individual pairs, and the “weights” column reflect the degree of collaboration among those individuals.

relations<-df
relations$color<-"white"

actors<-data.frame(name=unique(c(df$from, df$to)))
actors$color<-c(wes_palettes$GrandBudapest2, wes_palettes$GrandBudapest1, wes_palettes$Moonrise1[1]) 

isDark <- function(colr) { (sum( col2rgb(colr) * c(299,587,114))/1000 < 150) } # function to tell if a colour is dark or light

for (i in 1:length(actors$color)){
  actors$fontCol[i]<-ifelse(isDark(actors$color[i]), "white", "black")
}

print(relations)

##     from       to weight color
## 1   Evan    Megan      5 white
## 2   Evan    Xinyi      2 white
## 3   Evan     Mona      4 white
## 4   Evan Michelle      4 white
## 5   Evan    Shane      3 white
## 6   Evan    April      1 white
## 7   Evan     Daev      3 white
## 8   Evan    Laura      3 white
## 9  Laura     Mona      1 white
## 10 Laura     Daev      1 white
## 11 Xinyi    Megan      2 white
## 12 Xinyi     Mona      2 white
## 13 Xinyi    April      1 white
## 14 Megan    April      2 white
## 15 Megan     Mona      1 white
## 16 April     Mona      1 white

As for the individual person within the lab, I’ve assigned each person a unique colour.

print(actors)

##       name   color fontCol
## 1     Evan #E6A0C4   black
## 2    Laura #C6CDF7   black
## 3    Xinyi #D8A499   black
## 4    Megan #7294D4   white
## 5    April #F1BB7B   black
## 6     Mona #FD6467   white
## 7 Michelle #5B1A18   white
## 8    Shane #D67236   white
## 9     Daev #F3DF6C   black

When generating an igraph object, it seems that attributes such as ‘colour’ and ‘weight’ are automatically generated without me having to create/assign them, which was pretty neat.

g <- graph_from_data_frame(relations, directed=F, vertices=actors)
print(g, e=TRUE, v=TRUE)

## IGRAPH 0a1cbb5 UNW- 9 16 -- 
## + attr: name (v/c), color (v/c), fontCol (v/c), weight (e/n), color
## | (e/c)
## + edges from 0a1cbb5 (vertex names):
##  [1] Evan --Megan    Evan --Xinyi    Evan --Mona     Evan --Michelle
##  [5] Evan --Shane    Evan --April    Evan --Daev     Evan --Laura   
##  [9] Laura--Mona     Laura--Daev     Xinyi--Megan    Xinyi--Mona    
## [13] Xinyi--April    Megan--April    Megan--Mona     April--Mona

par(bg="black")
plot(g, edge.width = E(g)$weight, 
         vertex.frame.color = NULL,
     vertex.label.color=V(g)$fontCol,
     vertex.size=50,
     vertex.label.font=2,)

Using ‘networkD3’ Package

Here is a quick interactive graph made using the ‘simpleNetwork’ function in the ‘networkD3’ package.

(You can zoom in on the graph using your mouse and drag the nodes around.)

simpleNetwork(relations, zoom=T, nodeColour = "lightseagreen")

You can also directly use an igraph object and convert it into a (prettier, interactive) network graph using this package.

For the ‘forceNetwork’ function to work, it requires a grouping variable. Here, a random walk function was used to try and identify clusters or communities within the network. You can use other ways to assign group membership (e.g, PI, grad student, RAs, etc)

wc <- cluster_walktrap(g) 
members <- membership(wc)

g2 <- igraph_to_networkD3(g, group = members)

network<-forceNetwork(Links = g2$links, Nodes = g2$nodes,
            Source = "source", Target = "target",
            NodeID = "name",
            Group = "group" , opacity = 0.6, 
            colourScale = JS("d3.scaleOrdinal(d3.schemeCategory10);"), 
            zoom=T)

network

# saveNetwork(network, "homework.html", selfcontained = TRUE)

Here’s a fancier code someone else made and I’ve just coopted this for this exercise.

The size of the nodes reflect their betweenness centrality; here, the different colour groups are generated based on dice similarity.

In future sessions, I will hopefully learn more about what these terms mean!

# Create a graph. Use simplyfy to ensure that there are no duplicated edges or self loops
edgeList<-relations[c(1:3)]
names(edgeList)<-c("SourceName", "TargetName", "Weight")
gD <- igraph::simplify(igraph::graph.data.frame(edgeList, directed=FALSE))

# Create a node list object (actually a data frame object) that will contain information about nodes
nodeList <- data.frame(ID = c(0:(igraph::vcount(gD) - 1)), # because networkD3 library requires IDs to start at 0
                       nName = igraph::V(gD)$name)

# Map node names from the edge list to node IDs
getNodeID <- function(x){
  which(x == igraph::V(gD)$name) - 1 # to ensure that IDs start at 0
}
# And add them to the edge list
edgeList <- plyr::ddply(edgeList, .variables = c("SourceName", "TargetName", "Weight"), 
                        function (x) data.frame(SourceID = getNodeID(x$SourceName), 
                                                TargetID = getNodeID(x$TargetName)))

############################################################################################
# Calculate some node properties and node similarities that will be used to illustrate 
# different plotting abilities and add them to the edge and node lists

# Calculate degree for all nodes
nodeList <- cbind(nodeList, nodeDegree=igraph::degree(gD, v = igraph::V(gD), mode = "all"))

# Calculate betweenness for all nodes
betAll <- igraph::betweenness(gD, v = igraph::V(gD), directed = FALSE) / (((igraph::vcount(gD) - 1) * (igraph::vcount(gD)-2)) / 2)
betAll.norm <- (betAll - min(betAll))/(max(betAll) - min(betAll))
nodeList <- cbind(nodeList, nodeBetweenness=100*betAll.norm) # We are scaling the value by multiplying it by 100 for visualization purposes only (to create larger nodes)
rm(betAll, betAll.norm)

#Calculate Dice similarities between all pairs of nodes
dsAll <- igraph::similarity.dice(gD, vids = igraph::V(gD), mode = "all")

F1 <- function(x) {data.frame(diceSim = dsAll[x$SourceID +1, x$TargetID + 1])}
edgeList <- plyr::ddply(edgeList, .variables=c("SourceName", "TargetName", "Weight", "SourceID", "TargetID"), 
                           function(x) data.frame(F1(x)))

rm(dsAll, F1, getNodeID, gD)

############################################################################################
# We will also create a set of colors for each edge, based on their dice similarity values
# We'll interpolate edge colors based on the using the "colorRampPalette" function, that 
# returns a function corresponding to a collor palete of "bias" number of elements (in our case, that
# will be a total number of edges, i.e., number of rows in the edgeList data frame)
F2 <- colorRampPalette(c("#FFFF00", "#FF0000"), bias = nrow(edgeList), space = "rgb", interpolate = "linear")
colCodes <- F2(length(unique(edgeList$diceSim)))
edges_col <- sapply(edgeList$diceSim, function(x) colCodes[which(sort(unique(edgeList$diceSim)) == x)])

rm(colCodes, F2)
############################################################################################
# Let's create a network

D3_network_LM <- networkD3::forceNetwork(Links = edgeList, # data frame that contains info about edges
                        Nodes = nodeList, # data frame that contains info about nodes
                        Source = "SourceID", # ID of source node 
                        Target = "TargetID", # ID of target node
                        Value = "Weight", # value from the edge list (data frame) that will be used to value/weight relationship amongst nodes
                        NodeID = "nName", # value from the node list (data frame) that contains node description we want to use (e.g., node name)
                        Nodesize = "nodeBetweenness",  # value from the node list (data frame) that contains value we want to use for a node size
                        Group = "nodeDegree",  # value from the node list (data frame) that contains value we want to use for node color
                        height = 500, # Size of the plot (vertical)
                        width = 1000,  # Size of the plot (horizontal)
                        fontSize = 20, # Font size
                        linkDistance = networkD3::JS("function(d) { return 10*d.value; }"), # Function to determine distance between any two nodes, uses variables already defined in forceNetwork function (not variables from a data frame)
                        linkWidth = networkD3::JS("function(d) { return d.value/5; }"),# Function to determine link/edge thickness, uses variables already defined in forceNetwork function (not variables from a data frame)
                        opacity = 0.85, # opacity
                        zoom = TRUE, # ability to zoom when click on the node
                        opacityNoHover = 0.1, # opacity of labels when static
                        linkColour = edges_col) # edge colors

# Plot network
D3_network_LM

Social Network Analysis - Lab Relations

Mona Zhu

March 24, 2020

Using Base Plot Method

Using ‘networkD3’ Package