Class ERGM 2018

Data

To access your data, go to the network data link on the website and look under Practicum 10 for ClassData2018.zip. This is a zip file with all six of the networks you provided. It is late in the semester, so have preloaded the networks with attributes to save time and effort on your part.

Before you load the data, start statnet.

library(statnet)

Load Data

load("Aquaint_0.rda")
load("Aquaint_1.rda")
load("Buddy_0.rda")
load("Buddy_1.rda")
load("Friend_0.rda")
load("Friend_1.rda")
load("Group_Work.rda")
load("Immersive_ties.rda")
load("Past_class.rda")

Loading attributes

The networks above were pre-loaded with attributes for your convenience. The attributes are:

gender
Yrs_to_grad
major
concentration
age
Lang_of_Study
Cyber_Club
META_Lab
WIIS_Club
QAAAM
ODRC
German_Club
Residence
From_Abroad
Grad_Plan
Get_it

To look at any of these attributes, use the get.vertex.attribute() command.

get.vertex.attribute(Friend_1, "age")

##  [1] 23 31 23 26 23 31 24 26 27 27 26 23 31 26 26 24 27 24 29 27 28 24

Useful searches in “Help” window

ergm.terms
?traid.classify

Setting Up a Model

Structural Elements

# Set your computer's random 
#   seed so that you can compare
#   your results to those below.
set.seed(8675309)

# Start with a structural model
Model1 <- ergm(Friend_1 ~ edges + gwdsp +  gwesp,
               control=control.ergm(MCMC.burnin=1024*75, MCMLE.maxit=40))

## Starting maximum pseudolikelihood estimation (MPLE):

## Evaluating the predictor and response matrix.

## Maximizing the pseudolikelihood.

## Finished MPLE.

## Starting Monte Carlo maximum likelihood estimation (MCMLE):

## Iteration 1 of at most 40:

## Optimizing with step length 0.646704043794084.

## The log-likelihood improved by 2.797.

## Iteration 2 of at most 40:

## Optimizing with step length 0.947115154323218.

## The log-likelihood improved by 1.496.

## Iteration 3 of at most 40:

## Optimizing with step length 1.

## The log-likelihood improved by 0.8604.

## Step length converged once. Increasing MCMC sample size.

## Iteration 4 of at most 40:

## Optimizing with step length 1.

## The log-likelihood improved by 0.3604.

## Step length converged twice. Stopping.

## Finished MCMLE.

## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC.  To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.

summary(Model1)

## 
## ==========================
## Summary of model fit
## ==========================
## 
## Formula:   Friend_1 ~ edges + gwdsp + gwesp
## 
## Iterations:  4 out of 40 
## 
## Monte Carlo MLE Results:
##             Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges        -2.0943     0.3449      0  -6.072  < 1e-04 ***
## gwdsp        -0.4533     0.1227      0  -3.694 0.000221 ***
## gwdsp.decay   5.0038   120.1254      0   0.042 0.966774    
## gwesp         1.9221     0.2717      0   7.075  < 1e-04 ***
## gwesp.decay  -0.2694     0.3886      0  -0.693 0.488084    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 640.5  on 462  degrees of freedom
##  Residual Deviance: 254.4  on 457  degrees of freedom
##  
## AIC: 264.4    BIC: 285    (Smaller is better.)

Next, you can check the model’s convergence.

# save(Model1, file="Model1.Rdata")  # Save this model if you like.

# Then run diagnostics
mcmc.diagnostics(Model1)

## Sample statistics summary:
## 
## Iterations = 76800:4270080
## Thinning interval = 1024 
## Number of chains = 1 
## Sample size per chain = 4096 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                 Mean       SD  Naive SE Time-series SE
## edges       -0.17090  6.77358 0.1058372      0.1162981
## gwdsp       -3.13248 20.53800 0.3209062      0.3515938
## gwdsp.decay  0.01585  0.01812 0.0002831      0.0003025
## gwesp       -0.42152  7.62986 0.1192165      0.1534759
## gwesp.decay -2.48042  4.63916 0.0724869      0.0724869
## 
## 2. Quantiles for each variable:
## 
##                   2.5%        25%      50%      75%    97.5%
## edges       -14.000000  -5.000000  0.00000  4.00000 13.00000
## gwdsp       -45.932904 -16.953024 -1.95977 11.97651 32.99329
## gwdsp.decay  -0.009128   0.003043  0.01217  0.02434  0.06069
## gwesp       -15.763047  -5.690762 -0.07229  5.00000 13.69076
## gwesp.decay -13.542392  -5.032921 -2.51646  0.00000  2.51646
## 
## 
## Sample statistics cross-correlations:
##                  edges      gwdsp gwdsp.decay      gwesp gwesp.decay
## edges        1.0000000  0.8686563  -0.6915941  0.8501711   0.4898986
## gwdsp        0.8686563  1.0000000  -0.7840898  0.8453842   0.4799050
## gwdsp.decay -0.6915941 -0.7840898   1.0000000 -0.6941996  -0.6624814
## gwesp        0.8501711  0.8453842  -0.6941996  1.0000000   0.4197970
## gwesp.decay  0.4898986  0.4799050  -0.6624814  0.4197970   1.0000000
## 
## Sample statistics auto-correlation:
## Chain 1 
##                edges      gwdsp gwdsp.decay      gwesp  gwesp.decay
## Lag 0    1.000000000 1.00000000 1.000000000 1.00000000  1.000000000
## Lag 1024 0.093855094 0.09935735 0.066283456 0.22136555  0.015712578
## Lag 2048 0.027177179 0.02695869 0.020004724 0.09168300 -0.005191169
## Lag 3072 0.028945887 0.01695877 0.004847959 0.04932846  0.010608019
## Lag 4096 0.018470430 0.01993745 0.013261862 0.03888066  0.006827068
## Lag 5120 0.001602337 0.01154688 0.008395237 0.01622361  0.010603823
## 
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##       edges       gwdsp gwdsp.decay       gwesp gwesp.decay 
##    -0.44272    -0.03998     0.05142    -0.04380     0.92964 
## 
## Individual P-values (lower = worse):
##       edges       gwdsp gwdsp.decay       gwesp gwesp.decay 
##   0.6579696   0.9681077   0.9589908   0.9650659   0.3525564 
## Joint P-value (lower = worse):  0.6548264 .

## Warning in formals(fun): argument is not a function

## 
## MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).

summary(Model1)

## 
## ==========================
## Summary of model fit
## ==========================
## 
## Formula:   Friend_1 ~ edges + gwdsp + gwesp
## 
## Iterations:  4 out of 40 
## 
## Monte Carlo MLE Results:
##             Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges        -2.0943     0.3449      0  -6.072  < 1e-04 ***
## gwdsp        -0.4533     0.1227      0  -3.694 0.000221 ***
## gwdsp.decay   5.0038   120.1254      0   0.042 0.966774    
## gwesp         1.9221     0.2717      0   7.075  < 1e-04 ***
## gwesp.decay  -0.2694     0.3886      0  -0.693 0.488084    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 640.5  on 462  degrees of freedom
##  Residual Deviance: 254.4  on 457  degrees of freedom
##  
## AIC: 264.4    BIC: 285    (Smaller is better.)

Next, you can check the model’s “Goodness of Fit.”

mod.fit <- gof(Model1, GOF = ~ distance + espartners + idegree + odegree)
summary(mod.fit)
par(mfrow=c(2,2))
plot(mod.fit, main="GOF - Model 1")
par(mfrow=c(1,1))

Adding Attributes to the Model

The model was written this way so that you could follow the comments, and so that you could “turn off” some or all of the elements of the model using the “#” mark.

Model2 <- ergm(Friend_1 ~ edges + gwdsp +  gwesp
               # Start with dyadic ties
                 + dyadcov(Group_Work)
                 + dyadcov(Immersive_ties) 
                 + dyadcov(Past_class)
               # Next, try descriptives
                 + absdiff("age")
                 + absdiff("Yrs_to_grad")
               # For gender, and other factors, you can look at it as homophily (nodematch)
               #       or heterophily (nodemix). But only use one at a time.
               # I set this up for gender, major, and concentraition.
                 # + nodefactor("gender") # Use "gender" only once
                 + nodematch("gender")
                 # + nodematch("major")
                 + nodefactor("major") # Use major only once
                 + nodematch("concentraition")
                 # + nodefactor("concentraition") # Use concentraition only once
               # Next, test whether clubs create homophily.
                 + nodematch("Lang_of_Study")
                 + nodematch("Cyber_Club")
                 + nodematch("META_Lab")
                 + nodematch("WIIS_Club")
                 + nodematch("QAAAM")
                 + nodematch("ODRC")
                 + nodematch("German_Club")               
               # Last, see if any of these are "attractive" traits.
                 + nodefactor("Residence")
                 + nodefactor("From_Abroad")
                 + nodefactor("Grad_Plan")
                 + nodefactor("Get_it")
                 )
summary(Model2)
# save(Model2, file="Model2.Rdata")
# Then run diagnostics
mcmc.diagnostics(Model2)
summary(Model2) 
mod.fit <- gof(Model2, GOF = ~ distance + espartners + degree + triadcensus, nsim=10000) # , control=control.ergm(force.main=TRUE))
# mod.fit <- gof(Model2, GOF = ~ distance + dspartners + degree + triadcensus, nsim=10000) # Test re. dyads reveals dyad probs.
summary(mod.fit)
par(mfrow=c(2,2))
plot(mod.fit, main="GOF - Model 1")
par(mfrow=c(1,1))
summary(Model2)

For more terms that you may use in an ERGM, check out any one of the terms to bring up the help viewer. ?absdiff