#########################################################################
# Lab 8: ERGM LAB
#########################################################################
# Lab goals:
# 1) To create ERGM models
# 2) To compare ERGM models
# 3) Consider ERGM performance complications
##########################################################################
#--------------------------------------------------------------------#
# 2016.05: updated by Daniel McFarland
#--------------------------------------------------------------------#
# NOTE: if you have trouble because some packages are not installed,
# see lab 1 for instructions on how to install all necessary packages.
# Outline
#1) ERGM creation
#2) MCMC diagnostics
#3) ERGM model simulation
#4) Model comparison
#5) ERGM performance improvements
# load the "ergm" library
#install.packages("GGally")
#install.packages("ggplot2")
library(ergm)
## Loading required package: network
## network: Classes for Relational Data
## Version 1.13.0.1 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
##
## ergm: version 3.9.4, created on 2018-08-15
## Copyright (c) 2018, Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Carter T. Butts, University of California -- Irvine
## Steven M. Goodreau, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Martina Morris, University of Washington
## with contributions from
## Li Wang
## Kirk Li, University of Washington
## Skye Bender-deMoll, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ergm").
## NOTE: Versions before 3.6.1 had a bug in the implementation of the
## bd() constriant which distorted the sampled distribution somewhat.
## In addition, Sampson's Monks datasets had mislabeled vertices. See
## the NEWS and the documentation for more details.
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
library(GGally)
## Warning: package 'GGally' was built under R version 3.5.3
# If you have been working with igraph in the same session, you may
# want to detach that packages now that we use ergm.
# Having both ergm and igraph attached is known to result into
# problems. Detach igraph as follows:
# detach(package:igraph)
## Load the data:
data(studentnets.ergm173, package = "NetData")
# The IDs in the data correspond to IDs in the complete dataset.
# To execute the ERGM, R requires continuous integer IDs: [1:n],
# where n is the total number of nodes in the ERGM. So, create
# node IDs acceptable to R and map these to the edges.
# Create 22 unique and sequenced IDs.
id <- seq(1,22,1)
# Join these IDs to the nodes data (cbind = column bind), and
# reassign this object to 'nodes'.
nodes<-cbind(id, nodes)
# Check the new nodes data to see what we've got. Notice that we
# now have integer-increasing IDs as a vector in our data frame.
nodes
## id std_id gnd grd rce per_cap_inc
## 1 1 104456 2 10 4 4342
## 2 2 113211 2 10 1 13452
## 3 3 114144 1 10 4 13799
## 4 4 114992 1 10 4 13138
## 5 5 118466 1 10 2 8387
## 6 6 118680 2 10 4 9392
## 7 7 122713 2 10 4 12471
## 8 8 122714 1 10 1 10391
## 9 9 122723 1 10 4 17524
## 10 10 125522 1 10 4 12145
## 11 11 126101 2 10 1 8622
## 12 12 126784 2 10 3 17524
## 13 13 128033 2 10 4 11651
## 14 14 128041 1 10 4 10116
## 15 15 132942 2 10 4 12452
## 16 16 134494 1 10 4 5255
## 17 17 138966 2 10 3 7427
## 18 18 139441 2 10 3 11933
## 19 19 139596 2 10 4 8509
## 20 20 140270 1 10 4 12145
## 21 21 140271 2 10 4 9121
## 22 22 140442 1 10 3 7949
# Merge the new IDs from nodes with the ego_id and alter_id values
# in edges. Between merge steps, rename variables to maintain
# consistency. Note that you should check each data step using
# earlier syntax. Note that R requires the same ordering for node
# and edge-level data by ego_id. The following sequence preserves
# the edgelist ordering, rendering it consistent with the
# node ordering.
edges2<-merge(nodes[,1:2], edges, by.x = "std_id", by.y="alter_id")
edges2
## std_id id ego_id sem1_friend sem2_friend sem1_wtd_dicht_seat
## 1 104456 1 132942 0 1 1
## 2 104456 1 114992 1 1 0
## 3 104456 1 126784 0 1 0
## 4 104456 1 104456 0 1 0
## 5 104456 1 139596 1 1 0
## 6 113211 2 132942 1 1 0
## 7 113211 2 126784 0 1 0
## 8 113211 2 126101 0 1 1
## 9 114144 3 118680 1 1 0
## 10 114144 3 139596 0 1 0
## 11 114144 3 114144 0 1 0
## 12 114992 4 114992 0 1 0
## 13 114992 4 104456 0 1 0
## 14 118466 5 118466 0 1 0
## 15 118466 5 140442 0 1 0
## 16 118466 5 128041 0 1 0
## 17 118466 5 139441 0 1 1
## 18 118466 5 134494 0 1 1
## 19 118466 5 126784 0 1 0
## 20 118680 6 114144 0 1 0
## 21 118680 6 118680 0 1 0
## 22 118680 6 104456 0 1 0
## 23 118680 6 126784 0 1 0
## 24 122713 7 113211 1 1 1
## 25 122713 7 126784 0 1 0
## 26 122713 7 132942 0 1 0
## 27 122713 7 122713 0 1 0
## 28 122713 7 122723 1 1 0
## 29 122713 7 114992 0 1 1
## 30 122713 7 139596 0 1 0
## 31 122713 7 104456 0 1 0
## 32 122714 8 134494 0 1 0
## 33 122714 8 126101 0 1 0
## 34 122714 8 122714 0 1 0
## 35 122714 8 128033 0 1 0
## 36 122723 9 126784 1 1 1
## 37 122723 9 125522 0 1 0
## 38 122723 9 126101 1 1 0
## 39 122723 9 140442 0 1 0
## 40 122723 9 122713 1 1 0
## 41 122723 9 132942 1 1 1
## 42 122723 9 134494 1 1 1
## 43 122723 9 139596 0 1 0
## 44 122723 9 104456 0 1 1
## 45 122723 9 128033 0 1 0
## 46 122723 9 122723 0 1 0
## 47 125522 10 126784 0 1 0
## 48 125522 10 140270 1 1 0
## 49 125522 10 125522 0 1 0
## 50 125522 10 139596 0 1 1
## 51 125522 10 122723 0 1 0
## 52 125522 10 128033 0 1 0
## 53 125522 10 104456 1 1 0
## 54 126101 11 140442 0 1 0
## 55 126101 11 114144 0 1 1
## 56 126101 11 140270 0 1 0
## 57 126101 11 128041 0 1 0
## 58 126101 11 122723 0 1 0
## 59 126101 11 113211 0 1 1
## 60 126101 11 114992 0 1 0
## 61 126101 11 126101 0 1 0
## 62 126101 11 134494 0 1 0
## 63 126101 11 104456 0 1 0
## 64 126101 11 132942 0 1 0
## 65 126101 11 122714 0 1 0
## 66 126101 11 139441 0 1 0
## 67 126784 12 140271 1 1 0
## 68 126784 12 118680 1 1 0
## 69 126784 12 134494 0 1 0
## 70 126784 12 118466 0 1 0
## 71 126784 12 104456 0 1 0
## 72 126784 12 132942 1 1 1
## 73 128033 13 134494 0 1 0
## 74 128033 13 125522 0 1 0
## 75 128033 13 140271 1 1 0
## 76 128033 13 126784 0 1 0
## 77 128033 13 122714 1 1 0
## 78 128033 13 128033 0 1 0
## 79 128041 14 134494 0 1 0
## 80 128041 14 118466 0 1 0
## 81 132942 15 134494 1 1 1
## 82 132942 15 126784 1 1 1
## 83 132942 15 139596 0 1 0
## 84 132942 15 113211 1 1 0
## 85 132942 15 126101 1 1 0
## 86 132942 15 140442 0 1 0
## 87 132942 15 139441 0 1 0
## 88 132942 15 122723 1 1 1
## 89 132942 15 132942 0 1 0
## 90 132942 15 140271 1 1 0
## 91 132942 15 104456 0 1 1
## 92 134494 16 140270 1 1 0
## 93 134494 16 134494 0 1 0
## 94 134494 16 122714 1 1 0
## 95 134494 16 139596 1 1 0
## 96 134494 16 118466 1 1 1
## 97 134494 16 128041 0 1 0
## 98 134494 16 126784 0 1 0
## 99 134494 16 122723 1 1 1
## 100 134494 16 126101 0 1 0
## 101 134494 16 139441 0 1 1
## 102 138966 17 140442 1 1 1
## 103 138966 17 134494 0 1 0
## 104 138966 17 122723 1 1 0
## 105 138966 17 104456 0 1 1
## 106 138966 17 126101 1 1 0
## 107 138966 17 128041 0 1 0
## 108 138966 17 139441 1 1 0
## 109 138966 17 138966 0 1 0
## 110 138966 17 126784 0 1 0
## 111 138966 17 132942 0 1 1
## 112 138966 17 125522 0 1 0
## 113 139441 18 140442 1 1 0
## 114 139441 18 104456 0 1 1
## 115 139441 18 134494 1 1 1
## 116 139441 18 132942 1 1 0
## 117 139441 18 139441 0 1 0
## 118 139441 18 126101 1 1 0
## 119 139441 18 140270 0 1 0
## 120 139441 18 118466 1 1 1
## 121 139596 19 140442 0 1 1
## 122 139596 19 128041 0 1 0
## 123 139596 19 104456 1 1 0
## 124 139596 19 122723 0 1 0
## 125 139596 19 125522 1 1 1
## 126 139596 19 126784 0 1 0
## 127 139596 19 114992 1 1 0
## 128 139596 19 132942 1 1 0
## 129 139596 19 140270 0 1 0
## 130 139596 19 114144 0 1 0
## 131 139596 19 139596 0 1 0
## 132 140270 20 140271 0 1 0
## 133 140270 20 134494 1 1 0
## 134 140270 20 125522 1 1 0
## 135 140270 20 140270 0 1 0
## 136 140271 21 140270 0 1 0
## 137 140271 21 128033 0 1 0
## 138 140271 21 126784 0 1 0
## 139 140271 21 114992 0 1 0
## 140 140271 21 132942 0 1 1
## 141 140442 22 138966 1 1 1
## 142 140442 22 139441 1 1 0
## 143 140442 22 104456 1 1 0
## 144 140442 22 140442 0 1 0
# Note that we lose some observations here. This is because the
# alter_id values do not exist in the node list. Search will
# indicate that these IDs are also not in the set of ego_id values.
names(edges2)[1]<-"alter_id"
# Just assigning new names to first column.
names(edges2)[2]<-"alter_R_id"
edges3<- merge(nodes[,1:2], edges2, by.x = "std_id", by.y="ego_id")
# Shows that we merged new alter id that reflects
# integer id which R requires.
names(edges3)[1]<-"ego_id"
names(edges3)[2]<-"ego_R_id"
edges3
## ego_id ego_R_id alter_id alter_R_id sem1_friend sem2_friend
## 1 104456 1 125522 10 1 1
## 2 104456 1 126784 12 0 1
## 3 104456 1 139596 19 1 1
## 4 104456 1 104456 1 0 1
## 5 104456 1 122713 7 0 1
## 6 104456 1 126101 11 0 1
## 7 104456 1 132942 15 0 1
## 8 104456 1 139441 18 0 1
## 9 104456 1 118680 6 0 1
## 10 104456 1 122723 9 0 1
## 11 104456 1 138966 17 0 1
## 12 104456 1 114992 4 0 1
## 13 104456 1 140442 22 1 1
## 14 113211 2 126101 11 0 1
## 15 113211 2 122713 7 1 1
## 16 113211 2 132942 15 1 1
## 17 114144 3 126101 11 0 1
## 18 114144 3 118680 6 0 1
## 19 114144 3 139596 19 0 1
## 20 114144 3 114144 3 0 1
## 21 114992 4 114992 4 0 1
## 22 114992 4 104456 1 1 1
## 23 114992 4 122713 7 0 1
## 24 114992 4 126101 11 0 1
## 25 114992 4 139596 19 1 1
## 26 114992 4 140271 21 0 1
## 27 118466 5 118466 5 0 1
## 28 118466 5 128041 14 0 1
## 29 118466 5 139441 18 1 1
## 30 118466 5 126784 12 0 1
## 31 118466 5 134494 16 1 1
## 32 118680 6 114144 3 1 1
## 33 118680 6 118680 6 0 1
## 34 118680 6 126784 12 1 1
## 35 122713 7 122723 9 1 1
## 36 122713 7 122713 7 0 1
## 37 122714 8 122714 8 0 1
## 38 122714 8 126101 11 0 1
## 39 122714 8 128033 13 1 1
## 40 122714 8 134494 16 1 1
## 41 122723 9 126101 11 0 1
## 42 122723 9 125522 10 0 1
## 43 122723 9 134494 16 1 1
## 44 122723 9 132942 15 1 1
## 45 122723 9 122723 9 0 1
## 46 122723 9 138966 17 1 1
## 47 122723 9 139596 19 0 1
## 48 122723 9 122713 7 1 1
## 49 125522 10 125522 10 0 1
## 50 125522 10 139596 19 1 1
## 51 125522 10 128033 13 0 1
## 52 125522 10 122723 9 0 1
## 53 125522 10 138966 17 0 1
## 54 125522 10 140270 20 1 1
## 55 126101 11 126101 11 0 1
## 56 126101 11 122714 8 0 1
## 57 126101 11 139441 18 1 1
## 58 126101 11 134494 16 0 1
## 59 126101 11 132942 15 1 1
## 60 126101 11 113211 2 0 1
## 61 126101 11 122723 9 1 1
## 62 126101 11 138966 17 1 1
## 63 126784 12 104456 1 0 1
## 64 126784 12 128033 13 0 1
## 65 126784 12 122713 7 0 1
## 66 126784 12 122723 9 1 1
## 67 126784 12 125522 10 0 1
## 68 126784 12 139596 19 0 1
## 69 126784 12 138966 17 0 1
## 70 126784 12 118680 6 0 1
## 71 126784 12 134494 16 0 1
## 72 126784 12 113211 2 0 1
## 73 126784 12 118466 5 0 1
## 74 126784 12 132942 15 1 1
## 75 126784 12 140271 21 0 1
## 76 128033 13 140271 21 0 1
## 77 128033 13 128033 13 0 1
## 78 128033 13 125522 10 0 1
## 79 128033 13 122723 9 0 1
## 80 128033 13 122714 8 0 1
## 81 128041 14 138966 17 0 1
## 82 128041 14 118466 5 0 1
## 83 128041 14 126101 11 0 1
## 84 128041 14 139596 19 0 1
## 85 128041 14 134494 16 0 1
## 86 132942 15 139596 19 1 1
## 87 132942 15 104456 1 0 1
## 88 132942 15 132942 15 0 1
## 89 132942 15 126101 11 0 1
## 90 132942 15 122723 9 1 1
## 91 132942 15 113211 2 1 1
## 92 132942 15 139441 18 1 1
## 93 132942 15 140271 21 0 1
## 94 132942 15 126784 12 1 1
## 95 132942 15 122713 7 0 1
## 96 132942 15 138966 17 0 1
## 97 134494 16 126784 12 0 1
## 98 134494 16 134494 16 0 1
## 99 134494 16 132942 15 1 1
## 100 134494 16 122723 9 1 1
## 101 134494 16 126101 11 0 1
## 102 134494 16 139441 18 1 1
## 103 134494 16 128033 13 0 1
## 104 134494 16 128041 14 0 1
## 105 134494 16 138966 17 0 1
## 106 134494 16 140270 20 1 1
## 107 134494 16 122714 8 0 1
## 108 134494 16 118466 5 0 1
## 109 138966 17 140442 22 1 1
## 110 138966 17 138966 17 0 1
## 111 139441 18 134494 16 0 1
## 112 139441 18 126101 11 0 1
## 113 139441 18 140442 22 1 1
## 114 139441 18 139441 18 0 1
## 115 139441 18 138966 17 1 1
## 116 139441 18 132942 15 0 1
## 117 139441 18 118466 5 0 1
## 118 139596 19 122713 7 0 1
## 119 139596 19 114144 3 0 1
## 120 139596 19 125522 10 0 1
## 121 139596 19 139596 19 0 1
## 122 139596 19 104456 1 1 1
## 123 139596 19 132942 15 0 1
## 124 139596 19 134494 16 1 1
## 125 139596 19 122723 9 0 1
## 126 140270 20 139441 18 0 1
## 127 140270 20 134494 16 1 1
## 128 140270 20 140270 20 0 1
## 129 140270 20 139596 19 0 1
## 130 140270 20 126101 11 0 1
## 131 140270 20 125522 10 1 1
## 132 140270 20 140271 21 0 1
## 133 140271 21 140270 20 0 1
## 134 140271 21 126784 12 1 1
## 135 140271 21 132942 15 1 1
## 136 140271 21 128033 13 1 1
## 137 140442 22 139441 18 1 1
## 138 140442 22 126101 11 0 1
## 139 140442 22 118466 5 0 1
## 140 140442 22 122723 9 0 1
## 141 140442 22 139596 19 0 1
## 142 140442 22 138966 17 1 1
## 143 140442 22 132942 15 0 1
## 144 140442 22 140442 22 0 1
## sem1_wtd_dicht_seat
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 1
## 8 1
## 9 0
## 10 1
## 11 1
## 12 0
## 13 0
## 14 1
## 15 1
## 16 0
## 17 1
## 18 0
## 19 0
## 20 0
## 21 0
## 22 0
## 23 1
## 24 0
## 25 0
## 26 0
## 27 0
## 28 0
## 29 1
## 30 0
## 31 1
## 32 0
## 33 0
## 34 0
## 35 0
## 36 0
## 37 0
## 38 0
## 39 0
## 40 0
## 41 0
## 42 0
## 43 1
## 44 1
## 45 0
## 46 0
## 47 0
## 48 0
## 49 0
## 50 1
## 51 0
## 52 0
## 53 0
## 54 0
## 55 0
## 56 0
## 57 0
## 58 0
## 59 0
## 60 1
## 61 0
## 62 0
## 63 0
## 64 0
## 65 0
## 66 1
## 67 0
## 68 0
## 69 0
## 70 0
## 71 0
## 72 0
## 73 0
## 74 1
## 75 0
## 76 0
## 77 0
## 78 0
## 79 0
## 80 0
## 81 0
## 82 0
## 83 0
## 84 0
## 85 0
## 86 0
## 87 1
## 88 0
## 89 0
## 90 1
## 91 0
## 92 0
## 93 1
## 94 1
## 95 0
## 96 1
## 97 0
## 98 0
## 99 1
## 100 1
## 101 0
## 102 1
## 103 0
## 104 0
## 105 0
## 106 0
## 107 0
## 108 1
## 109 1
## 110 0
## 111 1
## 112 0
## 113 0
## 114 0
## 115 0
## 116 0
## 117 1
## 118 0
## 119 0
## 120 1
## 121 0
## 122 0
## 123 0
## 124 0
## 125 0
## 126 0
## 127 0
## 128 0
## 129 0
## 130 0
## 131 0
## 132 0
## 133 0
## 134 0
## 135 0
## 136 0
## 137 0
## 138 0
## 139 0
## 140 0
## 141 1
## 142 1
## 143 0
## 144 0
# The edges3 dataset now contains integer-increasing IDs sorted by
# ego_R_id. For our work, we will use the ego_R_id and alter_R_id
# values, but we retain the std_id values for reference.
# Specify the network we'll call net - where dyads
# are the unit of analysis...
net<-network(edges3[,c("ego_R_id", "alter_R_id")])
# Assign edge-level attributes - dyad attributes.
set.edge.attribute(net, "ego_R_id", edges[,2])
set.edge.attribute(net, "alter_R_id", edges[,4])
# Assign node-level attributes to actors in "net".
net %v% "gender" <- nodes[,3]
net %v% "grade" <- nodes[,4]
net %v% "race" <- nodes[,5]
net %v% "pci" <- nodes[,6]
# Review some summary information regarding the network to make
# sure we have #specified things correctly.
net
## Network attributes:
## vertices = 22
## directed = TRUE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## bipartite = FALSE
## total edges= 144
## missing edges= 0
## non-missing edges= 144
##
## Vertex attribute names:
## gender grade pci race vertex.names
##
## Edge attribute names:
## alter_R_id ego_R_id
summary(net)
## Network attributes:
## vertices = 22
## directed = TRUE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## bipartite = FALSE
## total edges = 144
## missing edges = 0
## non-missing edges = 144
## density = 0.3116883
##
## Vertex attributes:
##
## gender:
## integer valued attribute
## 22 values
##
## grade:
## integer valued attribute
## 22 values
##
## pci:
## integer valued attribute
## 22 values
##
## race:
## integer valued attribute
## 22 values
## vertex.names:
## character valued attribute
## 22 valid vertex names
##
## Edge attributes:
##
## alter_R_id:
## integer valued attribute
## 144values
##
## ego_R_id:
## integer valued attribute
## 144values
##
## Network edgelist matrix:
## [,1] [,2]
## [1,] 1 10
## [2,] 1 12
## [3,] 1 19
## [4,] 1 1
## [5,] 1 7
## [6,] 1 11
## [7,] 1 15
## [8,] 1 18
## [9,] 1 6
## [10,] 1 9
## [11,] 1 17
## [12,] 1 4
## [13,] 1 22
## [14,] 2 11
## [15,] 2 7
## [16,] 2 15
## [17,] 3 11
## [18,] 3 6
## [19,] 3 19
## [20,] 3 3
## [21,] 4 4
## [22,] 4 1
## [23,] 4 7
## [24,] 4 11
## [25,] 4 19
## [26,] 4 21
## [27,] 5 5
## [28,] 5 14
## [29,] 5 18
## [30,] 5 12
## [31,] 5 16
## [32,] 6 3
## [33,] 6 6
## [34,] 6 12
## [35,] 7 9
## [36,] 7 7
## [37,] 8 8
## [38,] 8 11
## [39,] 8 13
## [40,] 8 16
## [41,] 9 11
## [42,] 9 10
## [43,] 9 16
## [44,] 9 15
## [45,] 9 9
## [46,] 9 17
## [47,] 9 19
## [48,] 9 7
## [49,] 10 10
## [50,] 10 19
## [51,] 10 13
## [52,] 10 9
## [53,] 10 17
## [54,] 10 20
## [55,] 11 11
## [56,] 11 8
## [57,] 11 18
## [58,] 11 16
## [59,] 11 15
## [60,] 11 2
## [61,] 11 9
## [62,] 11 17
## [63,] 12 1
## [64,] 12 13
## [65,] 12 7
## [66,] 12 9
## [67,] 12 10
## [68,] 12 19
## [69,] 12 17
## [70,] 12 6
## [71,] 12 16
## [72,] 12 2
## [73,] 12 5
## [74,] 12 15
## [75,] 12 21
## [76,] 13 21
## [77,] 13 13
## [78,] 13 10
## [79,] 13 9
## [80,] 13 8
## [81,] 14 17
## [82,] 14 5
## [83,] 14 11
## [84,] 14 19
## [85,] 14 16
## [86,] 15 19
## [87,] 15 1
## [88,] 15 15
## [89,] 15 11
## [90,] 15 9
## [91,] 15 2
## [92,] 15 18
## [93,] 15 21
## [94,] 15 12
## [95,] 15 7
## [96,] 15 17
## [97,] 16 12
## [98,] 16 16
## [99,] 16 15
## [100,] 16 9
## [101,] 16 11
## [102,] 16 18
## [103,] 16 13
## [104,] 16 14
## [105,] 16 17
## [106,] 16 20
## [107,] 16 8
## [108,] 16 5
## [109,] 17 22
## [110,] 17 17
## [111,] 18 16
## [112,] 18 11
## [113,] 18 22
## [114,] 18 18
## [115,] 18 17
## [116,] 18 15
## [117,] 18 5
## [118,] 19 7
## [119,] 19 3
## [120,] 19 10
## [121,] 19 19
## [122,] 19 1
## [123,] 19 15
## [124,] 19 16
## [125,] 19 9
## [126,] 20 18
## [127,] 20 16
## [128,] 20 20
## [129,] 20 19
## [130,] 20 11
## [131,] 20 10
## [132,] 20 21
## [133,] 21 20
## [134,] 21 12
## [135,] 21 15
## [136,] 21 13
## [137,] 22 18
## [138,] 22 11
## [139,] 22 5
## [140,] 22 9
## [141,] 22 19
## [142,] 22 17
## [143,] 22 15
## [144,] 22 22
# Let's take a look at the network.
pdf("8.1_lab8_network.pdf")
net %v% "gender" = ifelse(net %v% "gender" == 2,
"girl", "boy")
ggnet2(net,
mode = "kamadakawai",
color = "gender",
palette = c("boy" = "steelblue", "girl" = "hotpink"),
color.legend = "",
size = 4,
arrow.size = 5,
arrow.gap = 0.025) +
ggtitle("Friendship ties between boys and girls in class 173")
## Warning: Removed 18 rows containing missing values (geom_segment).
dev.off()
## png
## 2
#?ggnet2
# Hm - maybe they differ by per_cap_inc?
# So let's make those actors larger...
net %v% "per_cap_inc"= nodes$per_cap_inc / 1000
ggnet2(net,
color = "gender",
size = "per_cap_inc",
palette = c("boy" = "steelblue", "girl" = "hotpink"),
color.legend = "",
arrow.size = 5,
arrow.gap = 0.025) +
guides(size = FALSE) +
ggtitle("Friendship ties between boys and girls in class 173")
## Warning: Removed 18 rows containing missing values (geom_segment).

# What do we see? What hypotheses might be pose to explain this network?
# Let's execute a model in which we attempt to explain semester 2
# friendship selections exclusively with node-level
# characteristics. First, look at the ergm command.
#?ergm
# Next, let's run it.
#DIEGO: NODEMATCH DE GENDER AND RACE TESTS FOR HOMOPHILY
m1<-ergm(net ~ edges + mutual + nodematch("gender") + absdiff
("pci") + nodefactor("gender"), control=control.ergm(MCMC.burnin=15000, MCMC.samplesize =30000), verbose=TRUE)
## Evaluating network in model.
## Initializing Metropolis-Hastings proposal(s): ergm:MH_TNT
## Initializing model.
## Using initial method 'MPLE'.
## Fitting initial model.
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## MPLE covariate matrix has 265 rows.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Starting Monte Carlo maximum likelihood estimation (MCMLE):
## Density guard set to 10000 from an initial count of 144 edges.
##
## Iteration 1 of at most 20 with parameter:
## edges mutual nodematch.gender
## -2.5577674723 2.3845292034 -0.0499505350
## absdiff.pci nodefactor.gender.girl
## 0.0001133459 0.2474452853
## Starting unconstrained MCMC...
## Back from unconstrained MCMC.
## Average estimating function values:
## edges mutual nodematch.gender
## 0.20686667 0.08513333 0.21790000
## absdiff.pci nodefactor.gender.girl
## -540.92373333 -0.05523333
## Starting MCMLE Optimization...
## Optimizing with step length 1.
## Using lognormal metric (see control.ergm function).
## Using log-normal approx (no optim)
## The log-likelihood improved by 0.00261.
## Step length converged once. Increasing MCMC sample size.
##
## Iteration 2 of at most 20 with parameter:
## edges mutual nodematch.gender
## -2.5694090843 2.3812636691 -0.0561430343
## absdiff.pci nodefactor.gender.girl
## 0.0001148512 0.2541487431
## Starting unconstrained MCMC...
## Back from unconstrained MCMC.
## Average estimating function values:
## edges mutual nodematch.gender
## -0.05279167 -0.04511667 -0.13554167
## absdiff.pci nodefactor.gender.girl
## 210.96760000 -0.10411667
## Starting MCMLE Optimization...
## Optimizing with step length 1.
## Using lognormal metric (see control.ergm function).
## Using log-normal approx (no optim)
## Starting MCMC s.e. computation.
## The log-likelihood improved by 0.0003484.
## Step length converged twice. Stopping.
## Finished MCMLE.
## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
summary(m1)
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: net ~ edges + mutual + nodematch("gender") + absdiff("pci") +
## nodefactor("gender")
##
## Iterations: 2 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -2.570e+00 3.275e-02 0 -78.475 < 1e-04 ***
## mutual 2.385e+00 1.358e-02 0 175.651 < 1e-04 ***
## nodematch.gender -5.260e-02 2.487e-02 0 -2.115 0.034430 *
## absdiff.pci 1.144e-04 2.642e-05 0 4.329 < 1e-04 ***
## nodefactor.gender.girl 2.540e-01 6.694e-02 0 3.794 0.000148 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 640.5 on 462 degrees of freedom
## Residual Deviance: 465.0 on 457 degrees of freedom
##
## AIC: 475 BIC: 495.7 (Smaller is better.)
# We could start and interpret the coefficients
# but there are two things you need to check before you start to interpret
# the results of the model.
# 1) model convergence
# 2) model fit
# Let's examine model convergence first.
# The ERGM runs by an MCMC process with multiple starts, and this
# helps you see if the model is converging. When you specify verbose = TRUE
# you can inspect if coefficients change dramatically.
# It is ok if the values fluctuate from positive and negative,
# but you should be suspicious of the model convergence when the
# coefficients are extreme. What is extreme? Probably when coefficients
# are larger than 5.
# Also examine the log likelihood between iterations. It should increase
# with each iteration, but not too much. If the
# log likelihood increases extremely, it means that the model is
# running towards a completely full or completely empty network,
# also known as a degenerate network. Especially take a look at
# the log likelihood between the final iterations that is stored under
# yourmodelsname$loglikelihood. If the returned value is larger than
# 5, you are likely to have a degenerate model.
m1$loglikelihood
## [,1]
## [1,] 0.000348377
# Very low value, so the model is not likely to be degenerate...
# We can also inspect the MCMC process by running the mcmc diagnostics
# the MCMC process
pdf("8.2_lab8_mcmc_m1.pdf")
mcmc.diagnostics(m1)
## Sample statistics summary:
##
## Iterations = 15000:122893976
## Thinning interval = 1024
## Number of chains = 1
## Sample size per chain = 120000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## edges 0.05279 11.334 0.03272 0.04209
## mutual 0.04512 5.623 0.01623 0.02142
## nodematch.gender 0.13554 7.889 0.02277 0.02979
## absdiff.pci -210.96760 59259.693 171.06800 238.28651
## nodefactor.gender.girl 0.10412 15.282 0.04412 0.05840
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## edges -23 -8 0.0 8 22
## mutual -11 -4 0.0 4 11
## nodematch.gender -16 -5 0.0 6 15
## absdiff.pci -116316 -40197 -19.5 39965 115688
## nodefactor.gender.girl -30 -10 0.0 10 30
##
##
## Sample statistics cross-correlations:
## edges mutual nodematch.gender absdiff.pci
## edges 1.0000000 0.8613563 0.6950541 0.8335522
## mutual 0.8613563 1.0000000 0.6042217 0.7610892
## nodematch.gender 0.6950541 0.6042217 1.0000000 0.5926701
## absdiff.pci 0.8335522 0.7610892 0.5926701 1.0000000
## nodefactor.gender.girl 0.8658873 0.7639196 0.6940166 0.7048794
## nodefactor.gender.girl
## edges 0.8658873
## mutual 0.7639196
## nodematch.gender 0.6940166
## absdiff.pci 0.7048794
## nodefactor.gender.girl 1.0000000
##
## Sample statistics auto-correlation:
## Chain 1
## edges mutual nodematch.gender absdiff.pci
## Lag 0 1.0000000000 1.0000000000 1.000000000 1.000000000
## Lag 1024 0.2346051691 0.2655171074 0.242736191 0.301384976
## Lag 2048 0.0671294337 0.0756461880 0.074783332 0.105078223
## Lag 3072 0.0212634999 0.0245277170 0.025916268 0.040156237
## Lag 4096 0.0106730240 0.0103804262 0.010252427 0.017623618
## Lag 5120 0.0007707817 0.0004045159 0.004252654 0.004588308
## nodefactor.gender.girl
## Lag 0 1.000000000
## Lag 1024 0.253126877
## Lag 2048 0.075828821
## Lag 3072 0.026444345
## Lag 4096 0.012844251
## Lag 5120 0.004686558
##
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## edges mutual nodematch.gender
## 0.34522 -0.04666 1.73272
## absdiff.pci nodefactor.gender.girl
## -0.52859 0.44334
##
## Individual P-values (lower = worse):
## edges mutual nodematch.gender
## 0.72993208 0.96278296 0.08314552
## absdiff.pci nodefactor.gender.girl
## 0.59708904 0.65752333
## Joint P-value (lower = worse): 0.694991 .
## Warning in formals(fun): argument is not a function
##
## MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).
dev.off()
## png
## 2
# You will see several plots and output. The plots to the right
# should look approximately normal. The output tells us three
# things of interest:
# 1) The accuracy of the model (r)
# 2) If we used a sufficiently large burn-in
# 3) If we used a sufficiently large sample in the simulation
# In our case the samples might be too small. This doesn't mean
# the results of the ERGM results are wrong, but we should take
# care in specifying the sample size.
# Besides checking convergence, it is also good to check the model
# goodness of fit. In addition to the standard
# GOF statistics, we can use the simulation features of the
# program to see if our models match reality. Since the models
# are effectively proposals about what is driving the observed
# network, we can back predict from the model to produce a set
# of random networks that are draws from the distribution of
# networks implied by the model. We can then compare the
# predicted model to the observed model for features not built
# into the model. So, for example, if the only features
# generating the global network in reality are mixing by grade and
# race, then we should get matching levels of transitivity,
# geodesic distances and so forth with the predicted model. The
# tools for doing this are (a) to simulate from the model and (b)
# to use the built in GOF functions.
# We will simulate 50 networks based on the estimates of m1.
# You have to make sure that your interval is high enough so that the
# simulated networks have sufficient time and space to 'move'.
gof <-gof(m1~distance+espartners+dspartners+odegree+idegree+triadcensus,
burnin = 1e+5,
control = control.gof.ergm(nsim=50),
interval = 1e+5)
pdf("8.3_lab8_gof.pdf")
plot(gof)
dev.off()
## png
## 2
# The black line in the plots is your observed network and the boxplots
# refer to the values of your simulated networks.
# Let's take the indegree plot as an example.
# The y axis is the proportion of nodes, the x axis is the indegree.
# For example, 20% of the nodes in the observed networks
# have an indegree of 10, whereas only 5% of the nodes in the
# simulated networks have an indegree of 10 (on average).
# Is this difference significant?
# We can check by calling a summary
summary(gof)
## Warning: 'summary.gof' is deprecated.
## See help("Deprecated")
##
## Goodness-of-fit for minimum geodesic distance
##
## obs min mean max MC p-value
## 1 126 93 125.82 159 1.00
## 2 234 201 260.80 289 0.16
## 3 93 19 69.14 127 0.24
## 4 9 0 3.58 38 0.24
## 5 0 0 0.16 4 1.00
## Inf 0 0 2.50 41 1.00
##
## Goodness-of-fit for edgewise shared partner
##
## obs min mean max MC p-value
## esp0 18 8 25.62 48 0.36
## esp1 35 26 39.88 55 0.56
## esp2 27 10 31.74 50 0.52
## esp3 18 2 17.56 38 1.00
## esp4 12 0 7.64 25 0.28
## esp5 8 0 2.50 12 0.16
## esp6 5 0 0.66 6 0.12
## esp7 3 0 0.20 2 0.00
## esp8 0 0 0.02 1 1.00
##
## Goodness-of-fit for dyadwise shared partner
##
## obs min mean max MC p-value
## dsp0 120 31 101.00 216 0.52
## dsp1 154 97 150.80 182 0.92
## dsp2 99 57 115.70 152 0.28
## dsp3 43 21 60.72 104 0.40
## dsp4 22 4 23.68 62 1.00
## dsp5 16 0 7.40 36 0.16
## dsp6 5 0 2.04 10 0.20
## dsp7 3 0 0.48 5 0.08
## dsp8 0 0 0.18 3 1.00
##
## Goodness-of-fit for out-degree
##
## obs min mean max MC p-value
## 0 0 0 0.06 1 1.00
## 1 2 0 0.34 3 0.12
## 2 1 0 1.12 5 1.00
## 3 3 0 2.18 8 0.80
## 4 3 0 3.16 8 1.00
## 5 3 0 4.12 9 0.76
## 6 2 0 3.68 9 0.52
## 7 4 0 2.84 7 0.68
## 8 0 0 1.60 5 0.44
## 9 0 0 1.36 6 0.48
## 10 1 0 0.78 3 1.00
## 11 1 0 0.60 2 1.00
## 12 1 0 0.08 1 0.16
## 13 1 0 0.02 1 0.04
## 14 0 0 0.06 1 1.00
##
## Goodness-of-fit for in-degree
##
## obs min mean max MC p-value
## 0 0 0 0.04 1 1.00
## 1 1 0 0.32 2 0.56
## 2 2 0 1.34 4 0.68
## 3 5 0 2.00 4 0.00
## 4 1 0 3.28 7 0.36
## 5 3 0 3.82 8 0.80
## 6 2 1 3.68 9 0.44
## 7 2 0 2.90 7 0.80
## 8 0 0 1.96 4 0.24
## 9 1 0 1.26 4 1.00
## 10 4 0 0.70 3 0.00
## 11 0 0 0.36 3 1.00
## 12 1 0 0.24 2 0.40
## 13 0 0 0.06 1 1.00
## 14 0 0 0.02 1 1.00
## 15 0 0 0.02 1 1.00
##
## Goodness-of-fit for triad census
##
## obs min mean max MC p-value
## 003 433 288 402.18 565 0.56
## 012 311 236 350.34 483 0.48
## 102 313 221 311.54 395 0.88
## 021D 45 8 26.48 42 0.00
## 021U 47 11 25.50 45 0.00
## 021C 15 28 51.00 87 0.00
## 111D 88 59 97.68 137 0.60
## 111U 91 58 96.80 131 0.84
## 030T 14 1 8.48 22 0.32
## 030C 0 0 2.44 8 0.24
## 201 87 41 97.44 181 0.88
## 120D 17 2 7.50 16 0.00
## 120U 25 1 6.92 14 0.00
## 120C 10 4 15.56 29 0.44
## 210 30 9 29.70 65 1.00
## 300 14 0 10.44 29 0.48
##
## Goodness-of-fit for model statistics
##
## obs min mean max MC p-value
## edges 126 93 125.82 159 1.00
## mutual 41 26 41.08 60 1.00
## nodematch.gender 62 44 62.62 84 0.96
## absdiff.pci 607481 436417 604508.60 797122 0.88
## nodefactor.gender.girl 152 111 151.40 191 1.00
# The observed network has 4 nodes with an indegree of 10.
# See "Goodness-of-fit for in-degree", then row 10 with obs = 4,
# min=0, mean=.66, max=3, and p-value=0.00. This says that theh
# simulated networks show on average of 0.66 nodes with an
# indegree of 10. The p-value is zero, meaning that the difference
# is significant. You can also see a poor fit for nodes with
# indegree of 3. Looking to other goodness of fit relations,
# we see a avriety of poor fitting cases.
# If your model turns out to be degenerate and to have a bad fit,
# there are a few things you can do.
# 1) You can increase the burnin and MCMC sample size.
# One way to determine if they have been sufficiently
# high is by comparing the coefficients and loglikelihoods between
# models with lower and higher values for the burnin and sample size.
# If they are very different, you may want to add even higher values
# for the burnin and MCMC sample size and compare the coefficients
# and loglikelihood again. Once they become similar, you've reached
# a sufficient burnin and mcmc sample size.
# The burnin should not be too big relative to the MCMC sample size.
# Remember that the burnin refers to how many iterations are discarded
# before we assume the distribution is getting closer to the target
# distribution (that is, the observed network). Usually, the burnin
# is half the size of the MCMC sample size
# 2) You can add more iterations.
# 3) Geometricaly weighted terms were designed to reduce degeneracy
# For example, try using GWESP for higher order balancing mechanisms
# 4) Add meaningful attributes.
# The better your model resembles the mechanisms that drive
# a network, the less likely it turns degenerate and th better the fit
# 5) Constrain the network.
# Were respondents only able to nominate 5 friends? Then set the
# the outdegree to five by using constraints=~bd(maxout=5).
# For now, we will keep the current model and look at the results
# We can create a new object that is the summary score info, but here
# we'll just send it to the screen.
# Let's assess the model
summary(m1)
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: net ~ edges + mutual + nodematch("gender") + absdiff("pci") +
## nodefactor("gender")
##
## Iterations: 2 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -2.570e+00 3.275e-02 0 -78.475 < 1e-04 ***
## mutual 2.385e+00 1.358e-02 0 175.651 < 1e-04 ***
## nodematch.gender -5.260e-02 2.487e-02 0 -2.115 0.034430 *
## absdiff.pci 1.144e-04 2.642e-05 0 4.329 < 1e-04 ***
## nodefactor.gender.girl 2.540e-01 6.694e-02 0 3.794 0.000148 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 640.5 on 462 degrees of freedom
## Residual Deviance: 465.0 on 457 degrees of freedom
##
## AIC: 475 BIC: 495.7 (Smaller is better.)
# show the exp() for the ERGM coefficients
lapply(m1[1],exp)
## $coef
## edges mutual nodematch.gender
## 0.07654096 10.85691223 0.94875998
## absdiff.pci nodefactor.gender.girl
## 1.00011438 1.28914823
# The first section gives the model (formula) estimated in the
# ERGM. Here we said that the network was a function of edges,
# mutual ties and matching with respect gender and pci.
# Another way to think about this, is that we're generating
# random networks that match the observed network with respect
# to the number of edges, the number of mutual dyads, the number
# of ties within/between gender and similarity/dissimilarity
# in socioeconomic background.
# The second section tells how many iterations were done. The
# MCMC sample size is the number of random networks generated in
# the production of the estimate.
# The next section gives the coefficients, their SEs and pValues.
# These are on a log-odds scale, so we interpret them like logit
# coefficients.
# An edges value of -2.3e+00 means that the odds of seeing a tie on
# any dyad are exp(-2.3) ~= 0.1 odds, which could be thought of as
# density net of the other factors in the model. If you only
# have 'edges' in the model, then exp(b1) should be very close to
# the observed density. Edges are equivalent to a model intercept
# -- while possible, I can't imagine why one would estimate a
# model without an edges parameter.
# A mutual value of 2.4 means that reciprocity is more common
# than expected by chance (positive and significant), and here we
# see that exp(2.4) ~= 11, so it's much more likely than chance
# -- we are 11 times as likely to see a tie from ij if ji than if
# j did not nominate i.
# We are exp(1.8e-02) ~= 1.018 times more likely to nominate within
# gender than across gender. It is not significant however
# The absdiff coefficient is small but positive and significant
# (exp(1.1e-04) ~= 1.0001) and reflects that with increasing
# difference in income, the likelihood of nominating increases as
# well.
# The final section refers to overall model fit and MCMC
# diagnostic statistics (AIC, BIC).
## PANEL ERGMS
# Let's now create a couple of additional networks so that we can
# add earlier friendships and seating proximity to our model.
# In that manner we do a sort of panel design, or longitudinal model.
# Assign an edge-level attribute of 'net' to capture the network
# of seating we create a proximity network via seating location...
set.edge.attribute(net, "seat_net", edges3[,7])
# Assign an additional edge-level attribute of 'net' to capture sem1
# friendships.
set.edge.attribute(net, "friend1", edges3[,5])
# get edgelist for sem 1 friendships and seating
edgelist_sem1_friend <- edges3[edges3$sem1_friend == 1, c(2,4)]
edgelist_sem1_seat <- edges3[edges3$sem1_wtd_dicht_seat == 1, c(2,4)]
# Turn them into network objects
net_sem1_friend <- network(edgelist_sem1_friend, matrix.type = "edgelist")
net_sem1_seat <- network(edgelist_sem1_seat, matrix.type = "edgelist")
# One can also create variables to represent sem1 mutuality and
# transitivity. Create a new network based on the sem1 friendships.
# Use the network commands to convert this to a matrix.
test<-edges["sem1_friend">=1,]
test2<-merge(nodes[,1:2], test, by.x = "std_id", by.y="alter_id")
names(test2)[1]<-"alter_id"
names(test2)[2]<-"alter_R_id"
test3<- merge(nodes[,1:2], test2, by.x = "std_id", by.y="ego_id")
names(test3)[1]<-"ego_id"
names(test3)[2]<-"ego_R_id"
net1<-network(test3[,c("ego_R_id", "alter_R_id")])
A<-as.matrix(net1)
B<-t(as.matrix(net1)) #B = A transpose
mut_mat <- A + B
lag_mut<-as.network(mut_mat) # relies on dichotomous
# interpretation of edges
# Calculate sem1 transitivity using A matrix from above.
# This is highly colienar with our response variable and will
# cause the ERGM to fail. For a different network, you would use
# the code below to calculate semester 1 transitvity:
# sqA<-A%*%A #matrix multiplication
# sem2_trans<-sqA*A #element-wise multiplication
# sem2_trans_net <- as.network(sem2_trans)
# Create another model that uses the sem1 mutuality
m2<-ergm(net ~ edges + mutual + nodefactor("gender") + nodematch("gender") +
nodematch("race") + edgecov(lag_mut),
control= control.ergm(MCMC.burnin= 20000, MCMC.samplesize = 70000,
seed = 25, MCMLE.maxit=6))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Starting Monte Carlo maximum likelihood estimation (MCMLE):
## Iteration 1 of at most 6:
## Optimizing with step length 1.
## The log-likelihood improved by 0.002909.
## Step length converged once. Increasing MCMC sample size.
## Iteration 2 of at most 6:
## Optimizing with step length 1.
## Warning in ergm.MCMCse.lognormal(theta = theta, init = init, statsmatrix =
## statsmatrix0, : Approximate Hessian matrix is singular. Standard errors due
## to MCMC approximation of the likelihood cannot be evaluated. This is likely
## due to insufficient MCMC sample size or highly correlated model terms.
## The log-likelihood improved by < 0.0001.
## Step length converged twice. Stopping.
## Finished MCMLE.
## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
summary(m2)
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: net ~ edges + mutual + nodefactor("gender") + nodematch("gender") +
## nodematch("race") + edgecov(lag_mut)
##
## Iterations: 2 out of 6
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -21.9480 NA NA NA NA
## mutual -19.4169 NA NA NA NA
## nodefactor.gender.girl -0.7457 NA NA NA NA
## nodematch.gender 1.0194 NA NA NA NA
## nodematch.race 0.7577 NA NA NA NA
## edgecov.lag_mut 42.0912 NA NA NA NA
##
## Null Deviance: 640.5 on 462 degrees of freedom
## Residual Deviance: 171.1 on 456 degrees of freedom
##
## AIC: 183.1 BIC: 207.9 (Smaller is better.)
# doesn't work! Works fine if we leave out lag_mut...
pdf("8.4_lab8_mcmc_m2.pdf")
mcmc.diagnostics(m2)
## Sample statistics summary:
##
## Iterations = 20000:286738976
## Thinning interval = 1024
## Number of chains = 1
## Sample size per chain = 280000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## edges 0.019725 4.403 0.008320 0.009266
## mutual 0.019725 4.403 0.008320 0.009266
## nodefactor.gender.girl 0.031650 6.282 0.011872 0.013065
## nodematch.gender 0.007796 2.965 0.005603 0.006394
## nodematch.race 0.009536 3.050 0.005764 0.006617
## edgecov.lag_mut 0.019725 4.403 0.008320 0.009266
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## edges -9 -3 0 3 9
## mutual -9 -3 0 3 9
## nodefactor.gender.girl -12 -4 0 4 12
## nodematch.gender -6 -2 0 2 6
## nodematch.race -6 -2 0 2 6
## edgecov.lag_mut -9 -3 0 3 9
##
##
## Sample statistics cross-correlations:
## edges mutual nodefactor.gender.girl
## edges 1.0000000 1.0000000 0.9051490
## mutual 1.0000000 1.0000000 0.9051490
## nodefactor.gender.girl 0.9051490 0.9051490 1.0000000
## nodematch.gender 0.6723087 0.6723087 0.7753487
## nodematch.race 0.6918729 0.6918729 0.6061060
## edgecov.lag_mut 1.0000000 1.0000000 0.9051490
## nodematch.gender nodematch.race edgecov.lag_mut
## edges 0.6723087 0.6918729 1.0000000
## mutual 0.6723087 0.6918729 1.0000000
## nodefactor.gender.girl 0.7753487 0.6061060 0.9051490
## nodematch.gender 1.0000000 0.3912823 0.6723087
## nodematch.race 0.3912823 1.0000000 0.6918729
## edgecov.lag_mut 0.6723087 0.6918729 1.0000000
##
## Sample statistics auto-correlation:
## Chain 1
## edges mutual nodefactor.gender.girl nodematch.gender
## Lag 0 1.000000000 1.000000000 1.0000000000 1.0000000000
## Lag 1024 0.100837247 0.100837247 0.0928025669 0.1246778707
## Lag 2048 0.016594051 0.016594051 0.0113111939 0.0221797925
## Lag 3072 0.003791211 0.003791211 0.0018334594 0.0040539671
## Lag 4096 0.003551495 0.003551495 0.0006375131 0.0004603381
## Lag 5120 0.002562395 0.002562395 0.0024079514 0.0037032776
## nodematch.race edgecov.lag_mut
## Lag 0 1.0000000000 1.000000000
## Lag 1024 0.1370172354 0.100837247
## Lag 2048 0.0206105570 0.016594051
## Lag 3072 0.0043951091 0.003791211
## Lag 4096 0.0006347185 0.003551495
## Lag 5120 0.0029517615 0.002562395
##
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## edges mutual nodefactor.gender.girl
## 1.1204 1.1204 1.0678
## nodematch.gender nodematch.race edgecov.lag_mut
## 0.8769 0.7118 1.1204
##
## Individual P-values (lower = worse):
## edges mutual nodefactor.gender.girl
## 0.2625521 0.2625521 0.2855959
## nodematch.gender nodematch.race edgecov.lag_mut
## 0.3805163 0.4765988 0.2625521
## Joint P-value (lower = worse): 0.970385 .
## Warning in formals(fun): argument is not a function
##
## MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).
dev.off()
## png
## 2
# We might get a warning here. This means that R was unable to
# compute standard errors for all predictors. This could be due
# to a number of causes for the purpose of this example we ignore
# the warning and move on, but in your work you will want to check
# your data for potential problems
# There are also a number of advanced options for running ERGM
# models designed to (a) allow one to specify structural
# parameters of interest, (b) evaluate the convergence of the
# MCMC, and (c) test for degenerate models (models that look
# like they fit, but that actually predict an odd portion of the
# graph sample space).
# Ergm is slow, but modern computers can help a lot.
# An ergm model tries to compute the same general result multiple
# times we can use many threads to harness the power of multicore
# processors we do this with the parallel arguement in ergm.
# If you are not using a multicore processor this will slow down
# your analysis. For most new computers you should use parallel=4.
# let's run the model 4 again with four threads.
m2_fast<-ergm(net ~ edges + mutual + nodematch("gender") +
nodematch("race") + edgecov(net_sem1_friend) + edgecov(net_sem1_seat)
+ edgecov(lag_mut),
edgeverbose=FALSE,
control= control.ergm(MCMC.burnin =20000, MCMC.samplesize =70000,
seed = 25, MCMLE.maxit=6, parallel = 4))
## Observed statistic(s) edgecov.net_sem1_friend and edgecov.net_sem1_seat are at their greatest attainable values. Their coefficients will be fixed at +Inf.
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Starting Monte Carlo maximum likelihood estimation (MCMLE):
## Iteration 1 of at most 6:
## Optimizing with step length 1.
## The log-likelihood improved by 0.08119.
## Step length converged once. Increasing MCMC sample size.
## Iteration 2 of at most 6:
## Optimizing with step length 1.
## Warning in ergm.MCMCse.lognormal(theta = theta, init = init, statsmatrix =
## statsmatrix0, : Approximate Hessian matrix is singular. Standard errors due
## to MCMC approximation of the likelihood cannot be evaluated. This is likely
## due to insufficient MCMC sample size or highly correlated model terms.
## The log-likelihood improved by < 0.0001.
## Step length converged twice. Stopping.
## Finished MCMLE.
## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
summary(m2_fast)
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: net ~ edges + mutual + nodematch("gender") + nodematch("race") +
## edgecov(net_sem1_friend) + edgecov(net_sem1_seat) + edgecov(lag_mut)
##
## Iterations: 2 out of 6
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -22.3499 NA NA NA NA
## mutual -20.0167 NA NA NA NA
## nodematch.gender 0.3930 NA NA NA NA
## nodematch.race 0.4103 NA NA NA NA
## edgecov.net_sem1_friend Inf 0.0000 0 Inf <1e-04 ***
## edgecov.net_sem1_seat Inf 0.0000 0 Inf <1e-04 ***
## edgecov.lag_mut 41.7887 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 640.5 on 462 degrees of freedom
## Residual Deviance: NaN on 455 degrees of freedom
##
## AIC: NaN BIC: NaN (Smaller is better.)
##
## Warning: The following terms have infinite coefficient estimates:
## edgecov.net_sem1_friend edgecov.net_sem1_seat
# sigh... doesn't work either. Means we need to trouble shoot. It may be too
# much to include the prior networks and lagged mutuals. Works fine when we
# omit lagged vars.
# LAB QUESTIONS:
# 1. On your own, using these variables and the 'summary(<model
# name>)' command, explore the model that you believe to be the best
# one. Explain its strengths relative to the other models and the
# logic that suggests this answer to you.
# 2. Why don't we use the node-level variable 'grade' for any of
# the models? Using the syntax above as a guide, include 'grade'
# in a variant of m1, m2, and report the results from R.
# 3. Describe each of the command terms: edgecov, mutual,
# edges, nodematch, and absdiff.
# NOTE: the command '
help("ergm-terms")
## starting httpd help server ... done
# will be very useful here.