1 Introduction to Social Network Analysis in R

1.1 October 4th, 2022

Objective: Today we will learn how to use R to analyze social network data. By the end of the lab, participants should be able to import data into R, manipulate the data to the desired form, analyze various network measures, and graph the networks.

Data: Today, we will be using a Grey’s Anatomy hookup network in order to introduce social network analysis in R. Grey’s Anatomy is a popular medical drama television series on ABC. It is ABC’s longest-running scripted primetime series.

Source: Today’s lab is a modification of several other social network analysis labs in R, publicly available online. I am thankful for the following sources for SNA code in R, for publicly providing the Grey’s Anatomy hookup network, and inspiring the incorporation of the data into social network analysis in R workshops. In particular, I am thankful to Alex Leavitt and Josh Clark as well as Gary Weissman. https://github.com/alexleavitt/SNAinRworkshop https://gweissman.github.io/post/grey-s-anatomy-network-of-sexual-relations/ https://finnstats.com/index.php/2021/04/22/social-network-analysis-in-r/

1.2 Steps in Today’s Lab

  1. Setup R, including installing and loading network packages
  2. Converting txt files to network objects
  3. Examining network measures
  4. Graphing Networks

2 Setup

library("knitr")
knitr::opts_chunk$set(echo = TRUE, results='hold', root.dir = "C:/Users/Julie/Dropbox/Sociology/Current Projects/Diane Work/SNA Course- Teaching R")
installAndLoad <- function(x) {
  if(!suppressWarnings(require(x, character.only=TRUE, quietly=TRUE, warn.conflicts=FALSE))) {
    paste("Didn't find ", x, ". Installing it.")
    install.packages(x)
    invisible(require(x, character.only=TRUE, warn.conflicts=FALSE))
  }
  invisible(TRUE)
}
# if(interactive() &&
#    installAndLoad("rstudioapi") && rstudioapi::isAvailable() &&
#    as.integer(substr(rstudioapi::versionInfo()$version, 1,1)) >= 1)
#         setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

2.1 Loading Packages

installAndLoad("ggplot2")
installAndLoad("igraph")
installAndLoad("ggraph")
installAndLoad("igraph")
installAndLoad("sna")
## 
## Attaching package: 'statnet.common'
## The following objects are masked from 'package:base':
## 
##     attr, order
## 
## 'network' 1.17.1 (2021-06-12), part of the Statnet Project
## * 'news(package="network")' for changes since last version
## * 'citation("network")' for citation information
## * 'https://statnet.org' for help, support, and other information
## 
## Attaching package: 'network'
## The following objects are masked from 'package:igraph':
## 
##     %c%, %s%, add.edges, add.vertices, delete.edges, delete.vertices,
##     get.edge.attribute, get.edges, get.vertex.attribute, is.bipartite,
##     is.directed, list.edge.attributes, list.vertex.attributes,
##     set.edge.attribute, set.vertex.attribute
## sna: Tools for Social Network Analysis
## Version 2.6 created on 2020-10-5.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##  For citation information, type citation("sna").
##  Type help(package="sna") to get started.
installAndLoad("network")
installAndLoad("statnet")
## 
## 'ergm' 4.2.2 (2022-06-01), part of the Statnet Project
## * 'news(package="ergm")' for changes since last version
## * 'citation("ergm")' for citation information
## * 'https://statnet.org' for help, support, and other information
## 'ergm' 4 is a major update that introduces some backwards-incompatible
## changes. Please type 'news(package="ergm")' for a list of major
## changes.
## 
## Attaching package: 'ergm'
## The following object is masked from 'package:statnet.common':
## 
##     snctrl
## 
## 'networkDynamic' 0.11.2 (2022-05-04), part of the Statnet Project
## * 'news(package="networkDynamic")' for changes since last version
## * 'citation("networkDynamic")' for citation information
## * 'https://statnet.org' for help, support, and other information
## Registered S3 method overwritten by 'tergm':
##   method                   from
##   simulate_formula.network ergm
## 
## 'tergm' 4.1.0 (2022-06-22), part of the Statnet Project
## * 'news(package="tergm")' for changes since last version
## * 'citation("tergm")' for citation information
## * 'https://statnet.org' for help, support, and other information
## 
## Attaching package: 'tergm'
## The following object is masked from 'package:ergm':
## 
##     snctrl
## The following object is masked from 'package:statnet.common':
## 
##     snctrl
## 
## 'ergm.count' 4.1.1 (2022-05-24), part of the Statnet Project
## * 'news(package="ergm.count")' for changes since last version
## * 'citation("ergm.count")' for citation information
## * 'https://statnet.org' for help, support, and other information
## 
## 'statnet' 2019.6 (2019-06-13), part of the Statnet Project
## * 'news(package="statnet")' for changes since last version
## * 'citation("statnet")' for citation information
## * 'https://statnet.org' for help, support, and other information
## unable to reach CRAN
installAndLoad("tidyverse")
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## x purrr::compose()       masks igraph::compose()
## x tidyr::crossing()      masks igraph::crossing()
## x dplyr::filter()        masks stats::filter()
## x dplyr::groups()        masks igraph::groups()
## x dplyr::lag()           masks stats::lag()
## x purrr::simplify()      masks igraph::simplify()
set.seed(10051990)
setwd("C:/Users/Julie/Dropbox/Sociology/Current Projects/Diane Work/SNA Course- Teaching R")
getwd()

# Note: in R the direction of your slash matters (compared to in Stata where it doesn't). All file directories need to have a forward slash in order to work. You will note that if you copy and paste your file directory it contains back slashes, rather than forward slashes. As a result, you will need to change all of these back slashes to forward slashes.


## This is the more typical way to install and load packages ##
# install.packages("ggplot2") # for first-time downloading, then:
# library(ggplot2) # run every time, to "load" the package into the R session
## [1] "C:/Users/Julie/Dropbox/Sociology/Current Projects/Diane Work/SNA Course- Teaching R"

3 General Help in R

Writing “?” before a function opens the help page for that function, in the bottom-righthand side of the interface. For example: ?library(), ?readRDS()

You can also access the help page by highlighting the function in your code and pressing the F1 key.
If the package of the function is loaded, the help page for the function will open.
If the package of the function is not loaded, in the help window it will say, “Help for topic [function_name] is not in any loaded package but can be found in the following packages:” and then it will list the package(s) where the function can be found. In other words, if this happens, you haven’t loaded the package for the function that you’re trying to use. More on types of data in R can be found here: https://r-lang.com/r-data-types-vector-list-matrix-array-and-data-frame/

4 Load your Data

data <- read.csv(file = "grey_nodes.csv", header = TRUE)

## Remove objects in your environment with the function rm([object_name])
# rm(data)


# Read in Matrix .txt File
## transform data into a vector
data_vector <- as.vector(data)

## pull the names attribute from the data
data_names <- data_vector$name
## This accesses the variable "name" in the dataframe "data"

data_matrix <- matrix(scan("grey_adjacency.txt", n=44*44), 44, 44, byrow = TRUE, dimnames = list(data_names, data_names))



# Transform Matrix into Network Object
data_network <- graph_from_adjacency_matrix(data_matrix, mode = "directed")
plot(data_network)

# see "graph_from_" for full list of functions for creating graph objects


# Assign Attributes to Vertices
igraph::set.vertex.attribute(data_network, "sex", value = data_vector$sex)
igraph::set.vertex.attribute(data_network, "race", value = data_vector$race)
igraph::set.vertex.attribute(data_network, "birthyear", value = data_vector$birthyear)
igraph::set.vertex.attribute(data_network, "position", value = data_vector$position)
igraph::set.vertex.attribute(data_network, "season", value = data_vector$season)
igraph::set.vertex.attribute(data_network, "sign", value = data_vector$sign)


# Alternative Method of assigning attributes to Vertices
V(data_network)$sex <- data_vector$sex
V(data_network)$race <- data_vector$race
V(data_network)$birthyear <- data_vector$birthyear
V(data_network)$position <- data_vector$position
V(data_network)$season <- data_vector$season
V(data_network)$sign <- data_vector$sign

## The same can be done with edges: E(data_network) <- ...
## IGRAPH 2f2111a DN-- 44 92 -- 
## + attr: name (v/c), sex (v/c)
## + edges from 2f2111a (vertex names):
##  [1] addison  ->derek      addison  ->karev      addison  ->sloan     
##  [4] adele    ->chief      altman   ->burton     altman   ->owen      
##  [7] altman   ->perkins    altman   ->sloan      amelia   ->sloan     
## [10] arizona  ->torres     ava      ->karev      avery    ->kepner    
## [13] avery    ->lexi       bailey   ->ben        bailey   ->lloyd     
## [16] bailey   ->tucker     ben      ->bailey     burton   ->altman    
## [19] catherine->chief      chief    ->adele      chief    ->catherine 
## [22] chief    ->ellis grey colin    ->yang       denny    ->izzie     
## + ... omitted several edges
## IGRAPH 2f2111a DN-- 44 92 -- 
## + attr: name (v/c), race (v/c)
## + edges from 2f2111a (vertex names):
##  [1] addison  ->derek      addison  ->karev      addison  ->sloan     
##  [4] adele    ->chief      altman   ->burton     altman   ->owen      
##  [7] altman   ->perkins    altman   ->sloan      amelia   ->sloan     
## [10] arizona  ->torres     ava      ->karev      avery    ->kepner    
## [13] avery    ->lexi       bailey   ->ben        bailey   ->lloyd     
## [16] bailey   ->tucker     ben      ->bailey     burton   ->altman    
## [19] catherine->chief      chief    ->adele      chief    ->catherine 
## [22] chief    ->ellis grey colin    ->yang       denny    ->izzie     
## + ... omitted several edges
## IGRAPH 2f2111a DN-- 44 92 -- 
## + attr: name (v/c), birthyear (v/n)
## + edges from 2f2111a (vertex names):
##  [1] addison  ->derek      addison  ->karev      addison  ->sloan     
##  [4] adele    ->chief      altman   ->burton     altman   ->owen      
##  [7] altman   ->perkins    altman   ->sloan      amelia   ->sloan     
## [10] arizona  ->torres     ava      ->karev      avery    ->kepner    
## [13] avery    ->lexi       bailey   ->ben        bailey   ->lloyd     
## [16] bailey   ->tucker     ben      ->bailey     burton   ->altman    
## [19] catherine->chief      chief    ->adele      chief    ->catherine 
## [22] chief    ->ellis grey colin    ->yang       denny    ->izzie     
## + ... omitted several edges
## IGRAPH 2f2111a DN-- 44 92 -- 
## + attr: name (v/c), position (v/c)
## + edges from 2f2111a (vertex names):
##  [1] addison  ->derek      addison  ->karev      addison  ->sloan     
##  [4] adele    ->chief      altman   ->burton     altman   ->owen      
##  [7] altman   ->perkins    altman   ->sloan      amelia   ->sloan     
## [10] arizona  ->torres     ava      ->karev      avery    ->kepner    
## [13] avery    ->lexi       bailey   ->ben        bailey   ->lloyd     
## [16] bailey   ->tucker     ben      ->bailey     burton   ->altman    
## [19] catherine->chief      chief    ->adele      chief    ->catherine 
## [22] chief    ->ellis grey colin    ->yang       denny    ->izzie     
## + ... omitted several edges
## IGRAPH 2f2111a DN-- 44 92 -- 
## + attr: name (v/c), season (v/n)
## + edges from 2f2111a (vertex names):
##  [1] addison  ->derek      addison  ->karev      addison  ->sloan     
##  [4] adele    ->chief      altman   ->burton     altman   ->owen      
##  [7] altman   ->perkins    altman   ->sloan      amelia   ->sloan     
## [10] arizona  ->torres     ava      ->karev      avery    ->kepner    
## [13] avery    ->lexi       bailey   ->ben        bailey   ->lloyd     
## [16] bailey   ->tucker     ben      ->bailey     burton   ->altman    
## [19] catherine->chief      chief    ->adele      chief    ->catherine 
## [22] chief    ->ellis grey colin    ->yang       denny    ->izzie     
## + ... omitted several edges
## IGRAPH 2f2111a DN-- 44 92 -- 
## + attr: name (v/c), sign (v/c)
## + edges from 2f2111a (vertex names):
##  [1] addison  ->derek      addison  ->karev      addison  ->sloan     
##  [4] adele    ->chief      altman   ->burton     altman   ->owen      
##  [7] altman   ->perkins    altman   ->sloan      amelia   ->sloan     
## [10] arizona  ->torres     ava      ->karev      avery    ->kepner    
## [13] avery    ->lexi       bailey   ->ben        bailey   ->lloyd     
## [16] bailey   ->tucker     ben      ->bailey     burton   ->altman    
## [19] catherine->chief      chief    ->adele      chief    ->catherine 
## [22] chief    ->ellis grey colin    ->yang       denny    ->izzie     
## + ... omitted several edges

5 Network Statistics

summary(data_network) # Get an overall summary

gsize(data_network) # How many edges are present?
gorder(data_network) # How many nodes are present?
distance_table(data_network, directed = TRUE) # geodesics
edge_density(data_network, loops = TRUE) # graph density 
transitivity(data_network, type = c("average"), isolates = c("zero")) # transitivity
reciprocity(data_network, ignore.loops = TRUE) # reciprocity; reciprocity of 1 makes sense for a dating network...
igraph::components(data_network) # components
centr_degree(data_network) # degree centralization
centr_betw(data_network) # betweenness centralization
centr_eigen(data_network) # eigenvector centralization

centr_degree_res <- centr_degree(data_network)$res
view(centr_degree_res)

# Actor Centrality Measures
igraph::degree(data_network) # What is each actor's degree?
mean(igraph::degree(data_network)) # What is the average degree in the whole network?

# Alternative Coding to the above computation of the average degree
data_network %>% 
  igraph::degree() %>% # This is the magrittr pipe, heavily used in the Tidyverse
  mean()


hist(igraph::degree(data_network), right = FALSE, xlab = "Degree", main = "Grey's Anatomy") # Plot the degree distribution in a histogram

data_network$degree <- igraph::degree(data_network) # Create a column in the attribute file with each actor's corresponding degree
deg <- igraph::degree(data_network) # Create a separate value object in R which contains the degree of each node
deg # See what is stored in the value object you just created

saveRDS(data_network, file = "data_network_v1.0.RDS") # Save your R object
## IGRAPH 2f2111a DN-- 44 92 -- 
## + attr: name (v/c), sex (v/c), race (v/c), birthyear (v/n), position
## | (v/c), season (v/n), sign (v/c)
## [1] 92
## [1] 44
## $res
## [1]  92 204 246 186 132  66  36  16
## 
## $unconnected
## [1] 914
## 
## [1] 0.04752066
## [1] 0
## [1] 1
## $membership
##      addison        adele       altman       amelia      arizona          ava 
##            1            2            1            1            1            1 
##        avery       bailey          ben       burton    catherine        chief 
##            1            3            3            1            2            2 
##        colin        denny        derek   ellis grey         finn         grey 
##            1            1            1            2            1            1 
##         hahn         hank        izzie        karev       kepner         lexi 
##            1            1            1            1            1            1 
##        lloyd         lucy        megan       mostow mrs. seabury        nancy 
##            3            1            4            4            1            1 
##       olivia     o'malley         owen      perkins       pierce      preston 
##            1            1            1            1            4            1 
##         reed        sloan        steve   susan grey  thatch grey       torres 
##            1            1            1            2            2            1 
##       tucker         yang 
##            3            1 
## 
## $csize
## [1] 31  6  4  3
## 
## $no
## [1] 4
## 
## $res
##  [1]  6  2  8  2  2  2  4  6  2  2  2  6  2  2  4  4  2  8  2  2  8 18  4  6  2
## [26]  2  4  2  2  2  4  8  4  2  2  2  2 14  2  2  4 10  2  6
## 
## $centralization
## [1] 0.1644132
## 
## $theoretical_max
## [1] 3698
## 
## $res
##  [1] 145.13333   0.00000 306.00000   0.00000   0.00000   0.00000   6.50000
##  [8]   6.00000   0.00000   0.00000   0.00000  14.00000   0.00000   0.00000
## [15]  50.70000  12.00000   0.00000 122.40000   0.00000   0.00000 128.50476
## [22] 350.38095  14.59048 103.74286   0.00000   0.00000   2.00000   0.00000
## [29]   0.00000   0.00000  14.50476 148.70000 162.00000   0.00000   0.00000
## [36]   0.00000   0.00000 465.71905   0.00000   0.00000   8.00000 265.12381
## [43]   0.00000 114.00000
## 
## $centralization
## [1] 0.2324505
## 
## $theoretical_max
## [1] 77658
## 
## $vector
##  [1] 5.024923e-01 8.525957e-17 2.326524e-01 1.800718e-01 1.831075e-01
##  [6] 2.702897e-01 2.281153e-01 8.147959e-17 7.664961e-17 6.288357e-02
## [11] 6.677966e-17 8.063959e-17 5.883552e-03 1.261075e-01 1.928699e-01
## [16] 1.335593e-16 5.705135e-02 2.110748e-01 1.831075e-01 1.261075e-01
## [21] 4.665641e-01 1.000000e+00 3.319470e-01 5.120188e-01 6.719966e-17
## [26] 2.702897e-01 6.184469e-17 3.548982e-17 2.702897e-01 1.800718e-01
## [31] 3.983930e-01 4.739479e-01 6.876712e-02 6.288357e-02 5.491472e-17
## [36] 5.883552e-03 1.800718e-01 6.662177e-01 5.705135e-02 5.018975e-17
## [41] 9.953950e-17 6.774490e-01 6.572967e-17 2.176757e-02
## 
## $value
## [1] 7.399467
## 
## $options
## $options$bmat
## [1] "I"
## 
## $options$n
## [1] 44
## 
## $options$which
## [1] "LA"
## 
## $options$nev
## [1] 1
## 
## $options$tol
## [1] 0
## 
## $options$ncv
## [1] 0
## 
## $options$ldv
## [1] 0
## 
## $options$ishift
## [1] 1
## 
## $options$maxiter
## [1] 1000
## 
## $options$nb
## [1] 1
## 
## $options$mode
## [1] 1
## 
## $options$start
## [1] 1
## 
## $options$sigma
## [1] 0
## 
## $options$sigmai
## [1] 0
## 
## $options$info
## [1] 0
## 
## $options$iter
## [1] 2
## 
## $options$nconv
## [1] 1
## 
## $options$numop
## [1] 30
## 
## $options$numopb
## [1] 0
## 
## $options$numreo
## [1] 20
## 
## 
## $centralization
## [1] 0.8522517
## 
## $theoretical_max
## [1] 42
## 
##      addison        adele       altman       amelia      arizona          ava 
##            6            2            8            2            2            2 
##        avery       bailey          ben       burton    catherine        chief 
##            4            6            2            2            2            6 
##        colin        denny        derek   ellis grey         finn         grey 
##            2            2            4            4            2            8 
##         hahn         hank        izzie        karev       kepner         lexi 
##            2            2            8           18            4            6 
##        lloyd         lucy        megan       mostow mrs. seabury        nancy 
##            2            2            4            2            2            2 
##       olivia     o'malley         owen      perkins       pierce      preston 
##            4            8            4            2            2            2 
##         reed        sloan        steve   susan grey  thatch grey       torres 
##            2           14            2            2            4           10 
##       tucker         yang 
##            2            6 
## [1] 4.181818
## [1] 4.181818
##      addison        adele       altman       amelia      arizona          ava 
##            6            2            8            2            2            2 
##        avery       bailey          ben       burton    catherine        chief 
##            4            6            2            2            2            6 
##        colin        denny        derek   ellis grey         finn         grey 
##            2            2            4            4            2            8 
##         hahn         hank        izzie        karev       kepner         lexi 
##            2            2            8           18            4            6 
##        lloyd         lucy        megan       mostow mrs. seabury        nancy 
##            2            2            4            2            2            2 
##       olivia     o'malley         owen      perkins       pierce      preston 
##            4            8            4            2            2            2 
##         reed        sloan        steve   susan grey  thatch grey       torres 
##            2           14            2            2            4           10 
##       tucker         yang 
##            2            6

6 Network Graphs

data_network <- readRDS("data_network_v1.0.RDS") # Load the previously saved R object


plot(data_network) # Generic Plot

# Graph 1
ggraph(data_network, layout = "fr") +
  geom_edge_link(aes()) +
  geom_node_point() +
  geom_node_text(aes(label = name)) +
  theme_void()

# Graph 2
arrow_1 <- grid::arrow(angle = 30, ends = "last", type = "closed", length = unit(.08, "inches")) # Setting what the arrows look like

graph_2 <- ggraph(data_network, layout = "fr") +
  geom_edge_link(aes(), 
                 show.legend = TRUE, 
                 end_cap = circle(.05, 'inches'),
                 arrow = arrow_1) +
  geom_node_point(color = "lightblue", 
                  size = 3) +
  geom_node_text(aes(label = name), 
                 repel = TRUE, 
                 size = 5) +
  labs(title = "Graph 2") +
  theme_void()

graph_2

# png(filename = "C:/Users/Julie/Dropbox/Sociology/Current Projects/Diane Work/SNA Course- Teaching R/Graph 1 v1.0.png", 500, 500)
# graph_2
# dev.off()


# Graph 3, nodes sized by degree
graph_3 <- ggraph(data_network, layout = "fr") +
  geom_edge_link(aes(), 
                 show.legend = TRUE, 
                 end_cap = circle(.05, 'inches'),
                 arrow = arrow_1) +
  geom_node_point(color = "lightblue", 
                  size = data_network$degree) +
  geom_node_text(aes(label = name), 
                 repel = TRUE, 
                 size = 5) +
  labs(title = "Graph 3: Nodes sized by Degree") +
  theme_void()

graph_3

# Alternative Plotting:
plot(data_network,
     vertex.size = data_network$degree,
     vertex.color = adjustcolor("lightblue"),
     vertex.label = data_network$name,
     edge.arrow = arrow_1,
     layout = layout_with_fr(data_network))

# Plotting Communities
data_network_betw <- data_network %>% 
  cluster_edge_betweenness()

plot(data_network_betw, data_network)