Social network analysis using R

Social network analysis is a method or tool by which one can analyze the connections across individuals or groups or institutions.

It allows us to examine how actors (e.g.consumers) or institutions (e.g.companies) are interrelated.

The advantage of social network analysis is that, unlike many other methods, it focuses on interaction (rather than on individual behavior).

A network analysis might assess the network for changes in structure deriving from technological disruptions.

This tutorial starts with a simple example and draws from another tutorial by Katherine Ognyanova that has a more in depth look at some of the features of igraph (2016) of any embedded R code chunks within the document.

rm(list=ls())# clear memory

#install.packages ("igraph")
#library(igraph)# load package igraph
##I. Edge lists: graph() and get.edgelist(): graph() id starts from 1.
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
g_el <- graph( c(1,2, 1,3, 2,3, 3,4 ), directed=FALSE)
summary(g_el)
## IGRAPH 88f08ae U--- 4 4 --
plot(g_el)

rm(list=ls())# clear memory
library(igraph)# load package igraph
##II. Adjacency matrices: graph.adjacency() and get.adjacency()
adjm_u<-matrix(
  c(0, 1, 0, 0, 1, 0,
    1, 0, 1, 0, 1, 0,
    0, 1, 0, 1, 0, 0,
    0, 0, 1, 0, 1, 1,
    1, 1, 0, 1, 0, 0,
    0, 0, 0, 1, 0, 0), # the data elements
  nrow=6, # number of rows
  ncol=6, # number of columns
  byrow = TRUE) # fill matrix by rows
g_adj_u <- graph.adjacency(adjm_u, mode="undirected")
plot(g_adj_u)
get.adjacency(g_adj_u) # get the adjacency matrix

## 6 x 6 sparse Matrix of class "dgCMatrix"
##                 
## [1,] . 1 . . 1 .
## [2,] 1 . 1 . 1 .
## [3,] . 1 . 1 . .
## [4,] . . 1 . 1 1
## [5,] 1 1 . 1 . .
## [6,] . . . 1 . .
## III. create graph from data frame after creating data frame: graph.data.frame()
# First createa data frame
node1 = c("Her", "You", "Him")
node2 = c("Him", "Her", "You")
weight = c(10, -2, 3)
df = data.frame(node1, node2, weight)
# Use graph.data.frame() to create a gaph
g <- graph.data.frame(df, directed=FALSE)
V(g)$name # node names
## [1] "Her" "You" "Him"
E(g)$weight # edge weights
## [1] 10 -2  3
plot(g)

#install.packages ("miniCRAN")
library(miniCRAN)
library(igraph)
library(magrittr)


# Download matrix of available packages at specific date ------------------

MRAN <- "http://mran.revolutionanalytics.com/snapshot/2014-11-01/"

pdb <- MRAN %>%
  contrib.url(type = "source") %>%
  available.packages(type="source", filters = NULL)


# Use miniCRAN to build a graph of package dependencies -------------------

# Note that this step takes a while, expect ~15-30 seconds

g <- pdb[, "Package"] %>%
  makeDepGraph(availPkgs = pdb, suggests=FALSE, enhances=TRUE, includeBasePkgs = FALSE)


# Use the page.rank algorithm in igraph -----------------------------------

pr <- g %>%
  page.rank(directed = FALSE) %>%
  use_series("vector") %>%
  sort(decreasing = TRUE) %>%
  as.matrix %>%
  set_colnames("page.rank")


# Display results ---------------------------------------------------------

head(pr, 25)
##                 page.rank
## MASS          0.020973243
## Rcpp          0.016623273
## Matrix        0.010039482
## lattice       0.009630356
## mvtnorm       0.008782968
## survival      0.008346058
## ggplot2       0.007270482
## plyr          0.006715150
## XML           0.004692814
## igraph        0.004668075
## RCurl         0.004289085
## sp            0.004265781
## coda          0.004154109
## nlme          0.004057037
## boot          0.003813675
## stringr       0.003793370
## rgl           0.003388690
## rJava         0.003273038
## reshape2      0.003262706
## RcppArmadillo 0.003236968
## ape           0.003132962
## zoo           0.003102256
## Hmisc         0.002698988
## numDeriv      0.002657455
## mgcv          0.002598307
# build dependency graph of top packages ----------------------------------

set.seed(42)
pr %>%
  head(25) %>%
  rownames %>%
  makeDepGraph(pdb) %>%
  plot(main="Top packages by page rank", cex=0.5)

#install.packages("igraph")
library(igraph)

g3 <- graph( c("John", "Jim", "Jim", "Jill", "Jill", "John")) # named vertices

# When the edge list has vertex names, the number of nodes is not needed

plot(g3)

g4 <- graph( c("John", "Jim", "Jim", "Jack", "Jim", "Jack", "John", "John"), 
             
             isolates=c("Jesse", "Janis", "Jennifer", "Justin") )  

# In named graphs we can specify isolates by providing a list of their names.



plot(g4, edge.arrow.size=.5, vertex.color="gold", vertex.size=15, 
     
     vertex.frame.color="gray", vertex.label.color="black", 
     
     vertex.label.cex=0.8, vertex.label.dist=2, edge.curved=0.2) 

gl <- graph_from_literal(a-b-c-d-e-f, a-g-h-b, h-e:f:i, j)

plot(gl)

g5 <- graph_from_literal(a-b-c-d-e-f, e-b, a-g-h-b, h-e:f:i, j)

plot(g5)

g5 <- graph_from_literal(Jimmy-Mother-Farther-Wife, Jimmy-B:S, Jimmy-A, A-B, Jimmy-G-E, Jimmy-E )

plot(g5)