Initial Data Input
library(RCurl)
## Loading required package: bitops
el1997 <- read.csv(text = getURL("https://docs.google.com/spreadsheet/pub?key=0AhVqDdZgThPldGtubjJ2WGFBOU1qNGxVZjJYM3o5OEE&output=csv"),
stringsAsFactors = F)
el1998 <- read.csv(text = getURL("https://docs.google.com/spreadsheet/pub?key=0AhVqDdZgThPldF80RW55OUl3RlFneUNPVTNHYjNnMGc&output=csv"),
stringsAsFactors = F)
el1999 <- read.csv(text = getURL("https://docs.google.com/spreadsheet/pub?key=0AhVqDdZgThPldFFzRXZud0ZLYTZ3OFFZMzYyOHBfZXc&output=csv"),
stringsAsFactors = F)
el2000 <- read.csv(text = getURL("https://docs.google.com/spreadsheet/pub?key=0AhVqDdZgThPldEtfOGY2UUhmbGdqSkxNekFFVmtHMmc&output=csv"),
stringsAsFactors = F)
vtxDat <- read.csv(text = getURL("https://docs.google.com/spreadsheet/pub?key=0AhVqDdZgThPldC1ESDhfMExsWmZuQ0NHcVZzWVdsMmc&output=csv"),
stringsAsFactors = F)
### Build the network objects
require(network)
## Loading required package: network
## network: Classes for Relational Data
## Version 1.8.1 created on 2013-09-11.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of Washington
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
## Number of vertices
nv <- nrow(vtxDat)
## Initialize Networks
net1998 <- network.initialize(nv)
net1999 <- network.initialize(nv)
## Define vertex names
network.vertex.names(x = net1998) <- vtxDat$Abb
network.vertex.names(x = net1999) <- vtxDat$Abb
## define edges
net1999[as.matrix(el1999)] <- 1
net1998[as.matrix(el1998)] <- 1
# standardize
stdGDP <- (vtxDat$GDP1999 - mean(vtxDat$GDP1999))/sd(vtxDat$GDP1999)
# define as vertex attribute
set.vertex.attribute(x = net1999 # network
, attrname = "GDP" # name of attribute
, val = stdGDP) # vertex attribute values
## lagged network models influence of i->j at t-1 on i->j at t using [,]
## after network object extracts adjacency matrix
lNet <- net1998[, ]
## Lagged sender and receiver attributes sender attribute models the effect
## of out-going ties at t-1
lSend <- apply(X = lNet, MARGIN = 1, FUN = sum)
set.vertex.attribute(x = net1999, attrname = "lSend", val = lSend)
# same for in-coming ties
lRecv <- apply(X = lNet, MARGIN = 2, FUN = sum)
set.vertex.attribute(x = net1999, attrname = "lRecv", val = lRecv)
## Estimate ERGM
require(ergm)
## Loading required package: ergm
## Loading required package: statnet.common
##
## ergm: version 3.1.1, created on 2013-11-07
## Copyright (c) 2013, Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Carter T. Butts, University of California -- Irvine
## Steven M. Goodreau, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Martina Morris, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ergm").
##
## NOTE: If you use custom ERGM terms based on 'ergm.userterms'
## version prior to 3.1, you will need to perform a one-time update
## of the package boilerplate files (the files that you did not write
## or modify) from 'ergm.userterms' 3.1 or later. See
## help('eut-upgrade') for instructions.
##
## NOTE: Dynamic network modeling functionality (STERGMs) has been
## moved to a new package, 'tergm'.
est1 <- ergm(net1999~
edges # density
+edgecov(lNet) # autocorrelation
+nodeocov("lSend") # persistent out-degree
+nodeicov("lRecv") # persistent in-degree
+nodeicov("GDP") # targeting wealthy states
+isolates, # unusual number of isolates
control=control.ergm( # simulation tuning
MCMC.samplesize=200000, # networks drawn
MCMC.burnin=50000, # initial throw-away
seed=10)) # assure replicability
## Iteration 1 of at most 20:
## Convergence test P-value: 0e+00
## The log-likelihood improved by 0.1865
## Iteration 2 of at most 20:
## Convergence test P-value: 5e-57
## The log-likelihood improved by 0.02412
## Iteration 3 of at most 20:
## Convergence test P-value: 7.1e-14
## The log-likelihood improved by 0.00641
## Iteration 4 of at most 20:
## Convergence test P-value: 2.6e-04
## The log-likelihood improved by 0.002679
## Iteration 5 of at most 20:
## Convergence test P-value: 2.3e-01
## The log-likelihood improved by 0.0007261
## Iteration 6 of at most 20:
## Convergence test P-value: 9.5e-01
## Convergence detected. Stopping.
## The log-likelihood improved by < 0.0001
##
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
require(latticeExtra)
## Loading required package: latticeExtra
## Loading required package: RColorBrewer
## Loading required package: lattice
mcmc.diagnostics(est1)
## Sample statistics summary:
##
## Iterations = 50000:20049900
## Thinning interval = 100
## Number of chains = 1
## Sample size per chain = 2e+05
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## edges 0.1381 11.60 0.02594 0.1908
## edgecov.lNet 0.0287 1.68 0.00376 0.0587
## nodeocov.lSend 0.0292 13.00 0.02908 0.3073
## nodeicov.lRecv 0.0855 4.72 0.01055 0.1338
## nodeicov.GDP -0.0437 6.46 0.01444 0.0965
## isolates -0.1184 9.92 0.02219 0.1455
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## edges -18.0 -8.00 -1.000 7.00 27
## edgecov.lNet -3.0 -1.00 0.000 1.00 4
## nodeocov.lSend -25.0 -9.00 -1.000 8.00 27
## nodeicov.lRecv -7.0 -3.00 0.000 3.00 11
## nodeicov.GDP -11.6 -4.42 -0.518 3.87 14
## isolates -21.0 -6.00 1.000 7.00 17
##
##
## Are sample statistics significantly different from observed?
## edges edgecov.lNet nodeocov.lSend nodeicov.lRecv nodeicov.GDP
## diff. 0.1381 0.02869 0.02920 0.08547 -0.04367
## test stat. 0.7238 0.48864 0.09502 0.63871 -0.45252
## P-val. 0.4692 0.62510 0.92430 0.52301 0.65089
## isolates Overall (Chi^2)
## diff. -0.1184 NA
## test stat. -0.8135 1.5749
## P-val. 0.4160 0.9544
##
## Sample statistics cross-correlations:
## edges edgecov.lNet nodeocov.lSend nodeicov.lRecv
## edges 1.0000 0.5167 0.6898 0.6570
## edgecov.lNet 0.5167 1.0000 0.6884 0.7821
## nodeocov.lSend 0.6898 0.6884 1.0000 0.6269
## nodeicov.lRecv 0.6570 0.7821 0.6269 1.0000
## nodeicov.GDP 0.1758 0.2524 0.2176 0.2330
## isolates -0.9626 -0.4662 -0.6192 -0.5968
## nodeicov.GDP isolates
## edges 0.1758 -0.9626
## edgecov.lNet 0.2524 -0.4662
## nodeocov.lSend 0.2176 -0.6192
## nodeicov.lRecv 0.2330 -0.5968
## nodeicov.GDP 1.0000 -0.1510
## isolates -0.1510 1.0000
##
## Sample statistics auto-correlation:
## Chain 1
## edges edgecov.lNet nodeocov.lSend nodeicov.lRecv nodeicov.GDP
## Lag 0 1.0000 1.0000 1.0000 1.0000 1.0000
## Lag 100 0.8797 0.9902 0.9336 0.8823 0.6910
## Lag 200 0.7953 0.9809 0.8819 0.8223 0.5510
## Lag 300 0.7281 0.9718 0.8384 0.7829 0.4671
## Lag 400 0.6719 0.9630 0.8008 0.7532 0.4113
## Lag 500 0.6243 0.9544 0.7676 0.7299 0.3676
## isolates
## Lag 0 1.0000
## Lag 100 0.7916
## Lag 200 0.6875
## Lag 300 0.6166
## Lag 400 0.5610
## Lag 500 0.5176
##
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## edges edgecov.lNet nodeocov.lSend nodeicov.lRecv nodeicov.GDP
## 1.1559 0.8163 1.5188 0.8485 1.0291
## isolates
## -1.0817
##
## P-values (lower = worse):
## edges edgecov.lNet nodeocov.lSend nodeicov.lRecv nodeicov.GDP
## 0.2477 0.4143 0.1288 0.3962 0.3034
## isolates
## 0.2794
# looking for black lines through boxplots
gf1 <- gof(est1)
par(mfrow = c(2, 2))
plot(gf1)
## Warning: no non-missing arguments to max; returning -Inf
summary(est1)
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: net1999 ~ edges + edgecov(lNet) + nodeocov("lSend") + nodeicov("lRecv") +
## nodeicov("GDP") + isolates
##
## Iterations: 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % p-value
## edges -4.5731 0.3710 0 <1e-04 ***
## edgecov.lNet 3.4361 1.0845 0 0.0015 **
## nodeocov.lSend 0.3550 0.1287 0 0.0058 **
## nodeicov.lRecv 0.0807 0.3924 0 0.8370
## nodeicov.GDP 0.0307 0.1608 0 0.8486
## isolates 2.0755 0.3864 0 <1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 28547 on 20592 degrees of freedom
## Residual Deviance: 335 on 20586 degrees of freedom
##
## AIC: 347 BIC: 394 (Smaller is better.)
### Build all the network objects Initialize Networks
net1997 <- network.initialize(nv)
net1998 <- network.initialize(nv)
net1999 <- network.initialize(nv)
net2000 <- network.initialize(nv)
## Define vertex names
network.vertex.names(x = net1997) <- vtxDat$Abb
network.vertex.names(x = net1998) <- vtxDat$Abb
network.vertex.names(x = net1999) <- vtxDat$Abb
network.vertex.names(x = net2000) <- vtxDat$Abb
## define edges
net1997[as.matrix(el1997)] <- 1
net1998[as.matrix(el1998)] <- 1
net1999[as.matrix(el1999)] <- 1
net2000[as.matrix(el2000)] <- 1
## define *standardized* GDP attribute
set.vertex.attribute(x = net1997, attrname = "GDP", val = (vtxDat$GDP1997 -
mean(vtxDat$GDP1997))/sd(vtxDat$GDP1997))
set.vertex.attribute(x = net1998, attrname = "GDP", val = (vtxDat$GDP1998 -
mean(vtxDat$GDP1998))/sd(vtxDat$GDP1998))
set.vertex.attribute(x = net1999, attrname = "GDP", val = (vtxDat$GDP1999 -
mean(vtxDat$GDP1999))/sd(vtxDat$GDP1999))
set.vertex.attribute(x = net2000, attrname = "GDP", val = (vtxDat$GDP2000 -
mean(vtxDat$GDP2000))/sd(vtxDat$GDP2000))
## define year attribute
set.vertex.attribute(x = net1997, attrname = "year", val = rep(1997, nv))
set.vertex.attribute(x = net1998, attrname = "year", val = rep(1998, nv))
set.vertex.attribute(x = net1999, attrname = "year", val = rep(1999, nv))
set.vertex.attribute(x = net2000, attrname = "year", val = rep(2000, nv))
## Initialize network
require(Matrix) # useful block-diagonal construction
## Loading required package: Matrix
## Combine networks, block-by-block
net98_99 <- bdiag(net1998[, ], net1999[, ]) # constructs block-diagonal
# sparse matrix
net98_99 <- network(as.matrix(net98_99)) # convert to matrix object
## Set existing vertex attributes year extract year attributes
yr98 <- get.vertex.attribute(x = net1998 # network
, attrname = "year") # attribute to extract
yr99 <- get.vertex.attribute(net1999, attrname = "year")
# combine year attributes
yrCombined <- c(yr98, yr99)
# define year attribute in network
set.vertex.attribute(net98_99, "year", yrCombined)
# GDP
set.vertex.attribute(net98_99, "GDP", c(get.vertex.attribute(net1998, "GDP"),
get.vertex.attribute(net1999, "GDP")))
## Combine lagged networks, block-by-block
lNet <- as.matrix(bdiag(net1997[, ], net1998[, ]))
## Lagged sender and receiver attributes sender attribute models the effect
## of out-going ties at t-1
lSend <- apply(X = lNet, MARGIN = 1, FUN = sum)
set.vertex.attribute(x = net98_99, attrname = "lSend", val = lSend)
# same for in-coming ties
lRecv <- apply(X = lNet, MARGIN = 2, FUN = sum)
set.vertex.attribute(x = net98_99, attrname = "lRecv", val = lRecv)
## Estimate parameters
# Note the 'constraints = ~ blockdiag("year")'
est2 <- ergm(net98_99~
edges
+edgecov(lNet)
+nodeocov("lSend")
+nodeicov("lRecv")
+nodeicov("GDP")
+isolates,
constraints = ~ blockdiag("year"), ## NOTE CONSTRAINT!!!
control=control.ergm(
MCMC.samplesize=200000,
MCMC.burnin=50000,
seed=10))
## Iteration 1 of at most 20:
## Convergence test P-value: 0e+00
## The log-likelihood improved by 0.2378
## Iteration 2 of at most 20:
## Convergence test P-value: 3.1e-90
## The log-likelihood improved by 0.05006
## Iteration 3 of at most 20:
## Convergence test P-value: 1.5e-13
## The log-likelihood improved by 0.007731
## Iteration 4 of at most 20:
## Convergence test P-value: 2.5e-04
## The log-likelihood improved by 0.002439
## Iteration 5 of at most 20:
## Convergence test P-value: 3.4e-03
## The log-likelihood improved by 0.001914
## Iteration 6 of at most 20:
## Convergence test P-value: 1.9e-01
## The log-likelihood improved by 0.0007596
## Iteration 7 of at most 20:
## Convergence test P-value: 1.9e-01
## The log-likelihood improved by 0.0004092
## Iteration 8 of at most 20:
## Convergence test P-value: 5.1e-01
## Convergence detected. Stopping.
## The log-likelihood improved by 0.0003985
##
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
mcmc.diagnostics(est2)
## Sample statistics summary:
##
## Iterations = 50000:20049900
## Thinning interval = 100
## Number of chains = 1
## Sample size per chain = 2e+05
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## edges 0.3107 15.23 0.03406 0.3132
## edgecov.lNet -0.0441 1.89 0.00422 0.0868
## nodeocov.lSend -0.2386 15.11 0.03378 0.4410
## nodeicov.lRecv -0.0386 4.64 0.01038 0.1611
## nodeicov.GDP -0.1077 8.47 0.01894 0.1244
## isolates -0.2277 13.30 0.02975 0.2333
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## edges -24.0 -11.00 -2.000 9.00 36.0
## edgecov.lNet -3.0 -1.00 0.000 1.00 4.0
## nodeocov.lSend -28.0 -11.00 -1.000 9.00 32.0
## nodeicov.lRecv -8.0 -3.00 -1.000 3.00 10.0
## nodeicov.GDP -14.8 -5.97 -0.806 5.03 18.4
## isolates -29.0 -9.00 1.000 9.00 23.0
##
##
## Are sample statistics significantly different from observed?
## edges edgecov.lNet nodeocov.lSend nodeicov.lRecv nodeicov.GDP
## diff. 0.3107 -0.04408 -0.2386 -0.03855 -0.1077
## test stat. 0.9920 -0.50755 -0.5411 -0.23925 -0.8657
## P-val. 0.3212 0.61177 0.5884 0.81091 0.3867
## isolates Overall (Chi^2)
## diff. -0.2277 NA
## test stat. -0.9763 5.2604
## P-val. 0.3289 0.5109
##
## Sample statistics cross-correlations:
## edges edgecov.lNet nodeocov.lSend nodeicov.lRecv
## edges 1.0000 0.4938 0.6894 0.5901
## edgecov.lNet 0.4938 1.0000 0.6448 0.8122
## nodeocov.lSend 0.6894 0.6448 1.0000 0.5902
## nodeicov.lRecv 0.5901 0.8122 0.5902 1.0000
## nodeicov.GDP 0.2764 0.2747 0.2612 0.3002
## isolates -0.9575 -0.4441 -0.5965 -0.5355
## nodeicov.GDP isolates
## edges 0.2764 -0.9575
## edgecov.lNet 0.2747 -0.4441
## nodeocov.lSend 0.2612 -0.5965
## nodeicov.lRecv 0.3002 -0.5355
## nodeicov.GDP 1.0000 -0.2472
## isolates -0.2472 1.0000
##
## Sample statistics auto-correlation:
## Chain 1
## edges edgecov.lNet nodeocov.lSend nodeicov.lRecv nodeicov.GDP
## Lag 0 1.0000 1.0000 1.0000 1.0000 1.0000
## Lag 100 0.9277 0.9923 0.9687 0.9087 0.7894
## Lag 200 0.8730 0.9851 0.9427 0.8590 0.6692
## Lag 300 0.8278 0.9782 0.9198 0.8252 0.5889
## Lag 400 0.7884 0.9715 0.8991 0.7997 0.5286
## Lag 500 0.7533 0.9651 0.8795 0.7789 0.4831
## isolates
## Lag 0 1.0000
## Lag 100 0.8544
## Lag 200 0.7687
## Lag 300 0.7077
## Lag 400 0.6594
## Lag 500 0.6201
##
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## edges edgecov.lNet nodeocov.lSend nodeicov.lRecv nodeicov.GDP
## -0.7134 -0.6529 -0.6274 -1.1106 0.9441
## isolates
## 0.7561
##
## P-values (lower = worse):
## edges edgecov.lNet nodeocov.lSend nodeicov.lRecv nodeicov.GDP
## 0.4756 0.5138 0.5304 0.2667 0.3451
## isolates
## 0.4496
gf2 <- gof(est2)
par(mfrow = c(2, 2))
plot(gf2)
summary(est2)
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: net98_99 ~ edges + edgecov(lNet) + nodeocov("lSend") + nodeicov("lRecv") +
## nodeicov("GDP") + isolates
##
## Iterations: 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % p-value
## edges -4.4781 0.2732 0 < 1e-04 ***
## edgecov.lNet 3.8866 0.9942 0 < 1e-04 ***
## nodeocov.lSend 0.3955 0.1096 0 0.00031 ***
## nodeicov.lRecv -0.2398 0.4024 0 0.55127
## nodeicov.GDP 0.0799 0.1253 0 0.52409
## isolates 2.1704 0.2761 0 < 1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 57093 on 41184 degrees of freedom
## Residual Deviance: 544 on 41178 degrees of freedom
##
## AIC: 556 BIC: 607 (Smaller is better.)
## lagged network models influence of i->j at t-1 on i->j at t
lNet <- net1999[, ]
### copy over attribute data from net2000
net0 <- network.copy(net2000)
# replace edges
net0[, ] <- lNet
## Lagged sender and receiver attributes sender attribute models the effect
## of out-going ties at t-1
lSend <- apply(X = lNet, MARGIN = 1, FUN = sum)
set.vertex.attribute(x = net0, attrname = "lSend", val = lSend)
# same for in-coming ties
lRecv <- apply(X = lNet, MARGIN = 2, FUN = sum)
set.vertex.attribute(x = net0, attrname = "lRecv", val = lRecv)
set.seed(12345)
sims <- simulate(net0~ # network at which to start
edges # ERGM formula
+edgecov(lNet)
+nodeocov("lSend")
+nodeicov("lRecv")
+nodeicov("GDP")
+isolates,
coef=coef(est2), # Coefficients to use in sim
nsim=8, # Number of networks to draw
control=control.simulate.formula(MCMC.burnin=50000, MCMC.interval=20000))
set.seed(12345)
par(mfrow = c(3, 3), mar = c(0.01, 0.01, 0.01, 0.01))
plot(sims[[1]])
plot(sims[[2]])
plot(sims[[3]])
plot(sims[[4]])
plot(net2000, vertex.col = "orange", edge.col = "blue", bg = "grey85")
plot(sims[[5]])
plot(sims[[6]])
plot(sims[[7]])
plot(sims[[8]])