W9 Network Analysis Notebook

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

#install the necessary packages
#install.packages("mclust")
#install.packages("plyr")
#install.packages("stringr")
#install.packages("igraph")
#install.packages("stringi")
#install.packages("magrittr")
#install.packages("dplyr")
#install.packages("sna")
#install.packages("RColorBrewer")
#source("http://bioconductor.org/biocLite.R")
#biocLite("RBGL")
#biocLite("graph")

library(mclust)

## Package 'mclust' version 5.3

## Type 'citation("mclust")' for citing this R package in publications.

library("RColorBrewer")       
library(sna)

## Loading required package: statnet.common

## 
## Attaching package: 'statnet.common'

## The following object is masked from 'package:base':
## 
##     order

## Loading required package: network

## network: Classes for Relational Data
## Version 1.13.0 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##                     Mark S. Handcock, University of California -- Los Angeles
##                     David R. Hunter, Penn State University
##                     Martina Morris, University of Washington
##                     Skye Bender-deMoll, University of Washington
##  For citation information, type citation("network").
##  Type help("network-package") to get started.

## sna: Tools for Social Network Analysis
## Version 2.4 created on 2016-07-23.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##  For citation information, type citation("sna").
##  Type help(package="sna") to get started.

library(graph)

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following object is masked from 'package:statnet.common':
## 
##     order

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colMeans,
##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min

## 
## Attaching package: 'graph'

## The following object is masked from 'package:sna':
## 
##     degree

library(igraph)

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:graph':
## 
##     degree, edges, intersection, union

## The following objects are masked from 'package:BiocGenerics':
## 
##     normalize, union

## The following objects are masked from 'package:sna':
## 
##     betweenness, bonpow, closeness, components, degree,
##     dyad.census, evcent, hierarchy, is.connected, neighborhood,
##     triad.census

## The following objects are masked from 'package:network':
## 
##     %c%, %s%, add.edges, add.vertices, delete.edges,
##     delete.vertices, get.edge.attribute, get.edges,
##     get.vertex.attribute, is.bipartite, is.directed,
##     list.edge.attributes, list.vertex.attributes,
##     set.edge.attribute, set.vertex.attribute

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

library(readr)
library("plyr")

## 
## Attaching package: 'plyr'

## The following object is masked from 'package:graph':
## 
##     join

## The following object is masked from 'package:network':
## 
##     is.discrete

library("stringr")

## 
## Attaching package: 'stringr'

## The following object is masked from 'package:graph':
## 
##     boundary

library("stringi")
library("magrittr")
library("dplyr")

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union

## The following object is masked from 'package:graph':
## 
##     union

## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(plotly)

## Loading required package: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise

## The following object is masked from 'package:igraph':
## 
##     groups

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

#********************************************
#         Social Network Analysis
#********************************************

retweeterPoster <- read_csv("trumpLinks.csv")

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_integer(),
##   from = col_character(),
##   to = col_character(),
##   weight = col_integer()
## )

# Create graph
m <- ftM2adjM(ft = as.matrix(retweeterPoster[, 2:3]), W = retweeterPoster$weight, edgemode = "directed")
g1 <- as(m, "graphNEL")

# We won't plot the initial graph because it is too large

# Prune graph 
# Original graph has too many small clusters, so we need to exclude small clusters
clust <- igraph::components(graph_from_adjacency_matrix(m), mode = "weak")
table(clust$csize)

## 
##   2   3   4   5   6   7   8   9  10  11  12  13  18  21  22  24  25  31 
## 288  65  28  16  11   4   6   3   2   2   1   2   2   1   1   1   1   1 
##  33  42  56  64  69 891 
##   1   1   2   1   1   1

# The largest cluster contains 891 nodes, so we create a graph of it
large_clusters <- which(clust$csize > 800)
selected_nodes <- names(clust$membership[clust$membership %in% large_clusters])
selected_nodes <- which(rownames(m) %in% selected_nodes)
m2 <- m[selected_nodes, selected_nodes]
gfrom2 <- graph_from_adjacency_matrix(m2)
g2 <- as(m2, "graphNEL")

# centrality measurements of new graph
require(sna) # to mask centrality functions
central <- data.frame(nodes(g2))
central$betweenness <-  sna::betweenness(m2)
central$degree <- sna::degree(m2)
sortlist <- central[order(-central$degree),]
head(sortlist, 10)

#**************************
# Plot Pruned Graph
# The largest cluster still has too many nodes, so we only label
# the most important ones based on centrality scores
#***************************

# Clasterize betweenness values to get groups of nodes 
central %<>%
  mutate(size = log(central$betweenness)) %>%
  mutate(size = ifelse(size == -Inf, 1, size))

# Number of groups for colors
N <- 9
# Colors for nodes
pal <- brewer.pal(N, "Oranges")

# Defines clusters for nodes in groups with different colors
central %<>%
  mutate(group = Mclust(size, G = N)$classification,
         color = pal[group])

# Removes labels for small nodes
# central$label = as.character(central$nodes.g2.)
# central$label[central$group < 7] = ''
central %<>%
  mutate(label = ifelse(group < 7, "", as.character(central$nodes.g2.)))

# Updates node sizes
central %<>%
  mutate(size = ifelse(group == N, 5, ifelse(group < 3, 1, 2)))

# Arranges vertexes by m2
indx <- plyr::laply(colnames(m2), function(i) {which(central$nodes.g2. == i)})
central <- central[indx, ]



# Plot graph with by centrality scores and save the image
set.seed(1)
gplot(m2, gmode = "graph", 
        label = central$label,
        label.cex = 0.5,
        vertex.col = central$color,
        vertex.enclose = FALSE,
        edge.col = "#CCCCCC",
        vertex.cex = central$size,
        main = "Pruned Graph by Centrality",
        cex.main = 1)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

W9 Network Analysis Notebook

Richard Shang