• rm(list=ls())
  • getwd()

0.1 Step 1 :

  • Load the the given data file :
library(sna)
library(ergm)
library(sand)
library(igraph)
library(network)
(sort(mydata))
##  [1] "alc.att" "alc.beh" "fem"     "fri"     "hob"     "res.act" "res.pas"
##  [8] "sch.att" "sch.beh" "spo.att" "spo.beh" "str.abs" "tob.att" "tob.beh"
  • Explore the given dataset such as its class, dataframe, and format

  • Notes

    • V = August/September 2003
    • W = November/December 2003
    • X = February/March 2004
    • Y = May/June 2004
    • NA = non-response

0.2 Step 2 :

  • data cleansing the adjacency matrix: fri_V

  • “fri” : adjacency matrix

    • 0 = no friendship reported
    • 1 = friendship reported
    • NA = person did not respond
    • 10 = structurally absent student
  • “fem” : gender

    • male(0) = yellow in color
    • female(1) = green in color
  • Gender Clustering: “the socio-demographic variable is not ethnicity, but gender”

1 “fri_V” : adjacency matrix at Time V

fri_mat = matrix(fri)
fri_V = fri_mat[,1][[1]]
omitted_V_nodes = c(-23, -24,-25) 
fri_V_final  = fri_V[omitted_V_nodes , omitted_V_nodes ] 
igraph_fV_directed = graph.adjacency(as.matrix(fri_V_final),mode = "directed",weighted = NULL)
mynet_V = igraph_fV_directed
mynet_V$gender = fem[omitted_V_nodes]
plot(mynet_V , vertex.color=c( "yellow", "green")[ 1+ (mynet_V$gender==1)], vertex.size=15, edge.arrow.size=0.5,  vertex.label.dist=0.2)
title("Yellow(Male)  at Time V")

Introduction:

This assignment suggests to use to the ERGM Models Hunter et al. (2008) to model highly nonlinear and interdependent friendship network structure with 1) a network structural index of “network density”, 2) homophily effect of Gender, 3) reciprocity effect and 4) transitivity effect for friendship with the given dataset and listed below:

  • The ERGM Models: mutuality(= reciprocity )

      + Model_01 :  friendship's Network = mutual + density  + Gender 
    
      + Model_02 :  friendship's Network = mutual + density + transitive 
    
      + Model_03 :  friendship's Network = mutual + density + Gender + gwesp(triad closure)
library(network)

fem_V = fem[omitted_V_nodes]
fri_V_network = network(fri_V_final,directed=T)
fri_V_network %v% "sexFem" = fem_V

ERGM (Hunter et al. (2008)) Model_01 : friendship’s Network = mutual + density + Gender

library(ergm)

model_01 = formula( fri_V_network~ mutual + density + nodematch("sexFem"))
summary(model_01)
##           mutual          density nodematch.sexFem 
##       16.0000000        0.1233766       53.0000000
#results_01 = ergm(model_01)
summary(results_01)
## Call:
## ergm(formula = model_01)
## 
## Monte Carlo Maximum Likelihood Results:
## 
##                    Estimate Std. Error MCMC % z value Pr(>|z|)    
## mutual               2.3889     0.4742      0   5.038   <1e-04 ***
## density          -1932.7951   205.0643      0  -9.425   <1e-04 ***
## nodematch.sexFem     2.1720     0.4747      0   4.576   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 640.5  on 462  degrees of freedom
##  Residual Deviance: 268.1  on 459  degrees of freedom
##  
## AIC: 274.1  BIC: 286.5  (Smaller is better. MC Std. Err. = 0.395)
mcmc.diagnostics(results_01,vars.per.page=4)
## Sample statistics summary:
## 
## Iterations = 14336:262144
## Thinning interval = 1024 
## Number of chains = 1 
## Sample size per chain = 243 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                       Mean      SD Naive SE Time-series SE
## mutual           -0.411523 3.84452 0.246626       0.280831
## density          -0.001888 0.01732 0.001111       0.001294
## nodematch.sexFem -1.082305 7.77800 0.498959       0.578341
## 
## 2. Quantiles for each variable:
## 
##                       2.5%      25%       50%     75%    97.5%
## mutual            -7.95000 -3.00000 -1.000000 2.00000  7.00000
## density           -0.03669 -0.01407 -0.002165 0.01082  0.03019
## nodematch.sexFem -16.95000 -6.00000 -1.000000 4.00000 14.95000
## 
## 
## Are sample statistics significantly different from observed?
##                mutual      density nodematch.sexFem Overall (Chi^2)
## diff.      -0.4115226 -0.001888372      -1.08230453              NA
## test stat. -1.4653746 -1.459083610      -1.87139397      6.43175776
## P-val.      0.1428188  0.144542097       0.06129049      0.09757736
## 
## Sample statistics cross-correlations:
##                     mutual   density nodematch.sexFem
## mutual           1.0000000 0.8167622        0.8318669
## density          0.8167622 1.0000000        0.9585554
## nodematch.sexFem 0.8318669 0.9585554        1.0000000
## 
## Sample statistics auto-correlation:
## Chain 1 
##                mutual      density nodematch.sexFem
## Lag 0     1.000000000  1.000000000      1.000000000
## Lag 1024  0.127117065  0.149507600      0.144550114
## Lag 2048  0.004220899 -0.012577533     -0.047749608
## Lag 3072 -0.017205366  0.006942584     -0.007301803
## Lag 4096 -0.046394173 -0.140118071     -0.095016307
## Lag 5120 -0.069267789 -0.108349274     -0.042751848
## 
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##           mutual          density nodematch.sexFem 
##            1.344            2.538            2.956 
## 
## Individual P-values (lower = worse):
##           mutual          density nodematch.sexFem 
##      0.178803040      0.011152326      0.003121538 
## Joint P-value (lower = worse):  0.06961864 .

## 
## MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).
par(mfrow=c(1,3))
fit_01 = gof(results_01 ~ distance + espartners )
summary(fit_01)
##                Length Class   Mode   
## network.size      1   -none-  numeric
## GOF               2   formula call   
## pval.model       15   -none-  numeric
## summary.model    15   -none-  numeric
## pobs.model        3   -none-  numeric
## psim.model      300   -none-  numeric
## bds.model         6   -none-  numeric
## obs.model         3   -none-  numeric
## sim.model       300   -none-  numeric
## pval.dist       110   -none-  numeric
## summary.dist    110   -none-  numeric
## pobs.dist        22   -none-  numeric
## psim.dist      2200   -none-  numeric
## bds.dist         44   -none-  numeric
## obs.dist         22   -none-  numeric
## sim.dist       2200   -none-  numeric
## pval.espart     105   -none-  numeric
## summary.espart  105   -none-  numeric
## pobs.espart      21   -none-  numeric
## psim.espart    2100   -none-  numeric
## bds.espart       42   -none-  numeric
## obs.espart       21   -none-  numeric
## sim.espart     2100   -none-  numeric
plot(fit_01)

Results: ERGM Mode_01

  • There is no reason to accept the null hypothesis at 1% significant level for both cases.

  • It concludes that all coefficients (mutual, density, gender) are statistically signifcant different from zero at 1% level.

ERGM Model_02 : friendship’s Network = mutual + density + transitive

library(ergm)

model_02 = formula( fri_V_network~ mutual + density + transitive )
summary(model_02)
##     mutual    density transitive 
## 16.0000000  0.1233766 56.0000000
#results_02 = ergm(model_02)

Results: Model_02

  • Model_02 has problem of “Unconstrained MCMC sampling”.

  • Computation process is crashed and stopped.

  • The reason is mainly due to the interdependence of transitive triad with other main effects(mutuality and density)

ERGM Model_03 : friendship’s Network = reciporality + density + Gender + gwesp(triad Closure)

library(ergm)

model_03 = formula( fri_V_network~ mutual + density + nodematch("sexFem") + gwesp(.5,fixed=T) )
     
summary(model_03)
##           mutual          density nodematch.sexFem  gwesp.fixed.0.5 
##       16.0000000        0.1233766       53.0000000       44.8791232
#results_03 = ergm(model_03)
summary(results_03)
## Call:
## ergm(formula = model_03)
## 
## Monte Carlo Maximum Likelihood Results:
## 
##                    Estimate Std. Error MCMC % z value Pr(>|z|)    
## mutual               2.1229     0.5224      0   4.064  < 1e-04 ***
## density          -1949.5618   224.7396      0  -8.675  < 1e-04 ***
## nodematch.sexFem     1.6269     0.5360      0   3.035  0.00240 ** 
## gwesp.fixed.0.5      0.4648     0.1538      0   3.022  0.00251 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 640.5  on 462  degrees of freedom
##  Residual Deviance: 258.1  on 458  degrees of freedom
##  
## AIC: 266.1  BIC: 282.6  (Smaller is better. MC Std. Err. = 0.2022)
mcmc.diagnostics(results_03,vars.per.page=6)
## Sample statistics summary:
## 
## Iterations = 163840:3174400
## Thinning interval = 4096 
## Number of chains = 1 
## Sample size per chain = 736 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                      Mean      SD Naive SE Time-series SE
## mutual           1.042120  5.4884 0.202303       0.216022
## density          0.004194  0.0279 0.001029       0.001108
## nodematch.sexFem 2.169837 12.5987 0.464393       0.498922
## gwesp.fixed.0.5  3.861931 21.8071 0.803819       0.885809
## 
## 2. Quantiles for each variable:
## 
##                       2.5%       25%      50%      75%    97.5%
## mutual           -10.00000  -3.00000 1.000000  5.00000 11.00000
## density           -0.04978  -0.01515 0.004329  0.02381  0.05411
## nodematch.sexFem -22.00000  -7.00000 2.000000 11.00000 26.00000
## gwesp.fixed.0.5  -35.20902 -12.51831 3.252896 18.94685 49.26575
## 
## 
## Are sample statistics significantly different from observed?
##                  mutual      density nodematch.sexFem gwesp.fixed.0.5
## diff.      1.0421195652 0.0041937229     2.169837e+00    3.861931e+00
## test stat. 4.8241380822 3.7835957967     4.349050e+00    4.359781e+00
## P-val.     0.0000014061 0.0001545788     1.367284e-05    1.301929e-05
##            Overall (Chi^2)
## diff.                   NA
## test stat.    3.716036e+01
## P-val.        2.871370e-07
## 
## Sample statistics cross-correlations:
##                     mutual   density nodematch.sexFem gwesp.fixed.0.5
## mutual           1.0000000 0.9268320        0.9292926       0.9165619
## density          0.9268320 1.0000000        0.9867965       0.9413082
## nodematch.sexFem 0.9292926 0.9867965        1.0000000       0.9499376
## gwesp.fixed.0.5  0.9165619 0.9413082        0.9499376       1.0000000
## 
## Sample statistics auto-correlation:
## Chain 1 
##                 mutual      density nodematch.sexFem gwesp.fixed.0.5
## Lag 0      1.000000000  1.000000000     1.000000e+00     1.000000000
## Lag 4096   0.064839380  0.073967120     7.091922e-02     0.096147956
## Lag 8192  -0.008951965  0.005858649     9.746928e-04     0.004436311
## Lag 12288  0.007837038 -0.015206655    -9.458723e-05    -0.001382298
## Lag 16384  0.050687828  0.043655730     4.110856e-02     0.023805544
## Lag 20480 -0.050350589 -0.051514369    -5.754417e-02    -0.044932096
## 
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##           mutual          density nodematch.sexFem  gwesp.fixed.0.5 
##           -1.858           -2.017           -1.928           -1.808 
## 
## Individual P-values (lower = worse):
##           mutual          density nodematch.sexFem  gwesp.fixed.0.5 
##       0.06311304       0.04372666       0.05389010       0.07059113 
## Joint P-value (lower = worse):  0.5130099 .

## 
## MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).
par(mfrow=c(1,3))
fit_03 = gof(results_03 ~ distance + espartners )
summary(fit_03)
##                Length Class   Mode   
## network.size      1   -none-  numeric
## GOF               2   formula call   
## pval.model       20   -none-  numeric
## summary.model    20   -none-  numeric
## pobs.model        4   -none-  numeric
## psim.model      400   -none-  numeric
## bds.model         8   -none-  numeric
## obs.model         4   -none-  numeric
## sim.model       400   -none-  numeric
## pval.dist       110   -none-  numeric
## summary.dist    110   -none-  numeric
## pobs.dist        22   -none-  numeric
## psim.dist      2200   -none-  numeric
## bds.dist         44   -none-  numeric
## obs.dist         22   -none-  numeric
## sim.dist       2200   -none-  numeric
## pval.espart     105   -none-  numeric
## summary.espart  105   -none-  numeric
## pobs.espart      21   -none-  numeric
## psim.espart    2100   -none-  numeric
## bds.espart       42   -none-  numeric
## obs.espart       21   -none-  numeric
## sim.espart     2100   -none-  numeric
plot(fit_03)

Results: ERGM Mode_03

  • There is no reason to accept the null hypothesis at 1% significant level for both cases.

  • It concludes that all coefficients (mutual, density, gender and triad closure measured with gwesp ) are statistically significant different from zero at 1% level.

Netlogit Model Permutation Test(Butts (2008)) : Model_04

# same gender square matrix:
fem_final = fem[omitted_V_nodes]
fem_final
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 
##  1  1  1  1  1  0  1  0  1  1  0  1  1  1  0  1  1  0  0  0  0  1
same_gender = 1*outer(fem_final ,fem_final ,"==")
diag(same_gender) = 0  # remove diagonal effect  and set it to zero
same_gender
##    1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 1  0 1 1 1 1 0 1 0 1  1  0  1  1  1  0  1  1  0  0  0  0  1
## 2  1 0 1 1 1 0 1 0 1  1  0  1  1  1  0  1  1  0  0  0  0  1
## 3  1 1 0 1 1 0 1 0 1  1  0  1  1  1  0  1  1  0  0  0  0  1
## 4  1 1 1 0 1 0 1 0 1  1  0  1  1  1  0  1  1  0  0  0  0  1
##  [ reached getOption("max.print") -- omitted 18 rows ]
# reciprocity (or mutuality )
reciprocity_effect = t(fri_V_final)
diag(reciprocity_effect) = 0  # remove diagonal effect  and set it to zero
reciprocity_effect
##    1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 1  0 0 0 1 0 0 1 0 0  1  0  0  1  0  0  0  0  0  0  0  0  0
## 2  0 0 1 0 0 0 0 0 1  0  0  0  0  1  0  0  0  0  0  0  0  1
## 3  0 1 0 0 0 0 0 0 1  1  0  0  0  1  0  0  0  0  0  0  0  0
## 4  0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [ reached getOption("max.print") -- omitted 18 rows ]
# transitivity
transitivity_effect = (fri_V_final) %*% (fri_V_final)
diag(transitivity_effect) = 0  # remove diagonal effect  and set it to zero
transitivity_effect
##    1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 1  0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  1  0  0  0  0  0  0
## 2  0 0 2 0 3 0 0 1 4  0  0  2  0  1  0  0  0  1  0  0  0  2
## 3  0 1 0 0 2 0 0 0 1  0  0  2  0  1  0  0  0  0  0  0  0  2
## 4  1 0 0 0 0 0 0 0 0  0  0  0  1  0  0  2  0  0  0  0  0  0
##  [ reached getOption("max.print") -- omitted 18 rows ]
y_04  = fri_V_final
x_04 =  list( same_gender, reciprocity_effect  )
 
set.seed(1913)
(Model_04_qap = netlogit(y_04, x_04,    nullhyp="qapspp",reps=1000))
## 
## Network Logit Model
## 
## Coefficients:
##             Estimate  Exp(b)      Pr(<=b) Pr(>=b) Pr(>=|b|)
## (intercept) -4.151538  0.01574019 0.254   0.746   0.254    
## x1           2.127549  8.39426525 1.000   0.000   0.000    
## x2           2.398821 11.01018984 1.000   0.000   0.000    
## 
## Goodness of Fit Statistics:
## 
## Null deviance: 640.468 on 462 degrees of freedom
## Residual deviance: 243.4879 on 459 degrees of freedom
## Chi-Squared test of fit improvement:
##   396.9801 on 3 degrees of freedom, p-value 0 
## AIC: 249.4879    BIC: 261.8946 
## Pseudo-R^2 Measures:
##  (Dn-Dr)/(Dn-Dr+dfn): 0.4621529 
##  (Dn-Dr)/Dn: 0.6198282

Netlogit Model Permutation Test : Model_05

y_05  = fri_V_final
x_05 =  list( same_gender, transitivity_effect)

set.seed(1913)
(Model_05_qap = netlogit(y_05, x_05,    nullhyp="qapspp",reps=1000))
## 
## Network Logit Model
## 
## Coefficients:
##             Estimate  Exp(b)     Pr(<=b) Pr(>=b) Pr(>=|b|)
## (intercept) -4.195615 0.01506148 0.29    0.71    0.290    
## x1           2.252083 9.50752361 1.00    0.00    0.001    
## x2           1.187089 3.27752597 1.00    0.00    0.000    
## 
## Goodness of Fit Statistics:
## 
## Null deviance: 640.468 on 462 degrees of freedom
## Residual deviance: 256.2435 on 459 degrees of freedom
## Chi-Squared test of fit improvement:
##   384.2244 on 3 degrees of freedom, p-value 0 
## AIC: 262.2435    BIC: 274.6502 
## Pseudo-R^2 Measures:
##  (Dn-Dr)/(Dn-Dr+dfn): 0.4540456 
##  (Dn-Dr)/Dn: 0.599912

Netlogit Model Permutation Test : Model_06

y_06  = fri_V_final
x_06 =  list(same_gender, reciprocity_effect,  transitivity_effect)

set.seed(1913);
(Model_06_qap = netlogit(y_06, x_06,    nullhyp="qapspp",reps=1000));
## 
## Network Logit Model
## 
## Coefficients:
##             Estimate  Exp(b)     Pr(<=b) Pr(>=b) Pr(>=|b|)
## (intercept) -4.280998 0.01382886 0.193   0.807   0.193    
## x1           1.659706 5.25776345 1.000   0.000   0.001    
## x2           2.275291 9.73074816 1.000   0.000   0.000    
## x3           1.066596 2.90547231 1.000   0.000   0.000    
## 
## Goodness of Fit Statistics:
## 
## Null deviance: 640.468 on 462 degrees of freedom
## Residual deviance: 218.9737 on 458 degrees of freedom
## Chi-Squared test of fit improvement:
##   421.4943 on 4 degrees of freedom, p-value 0 
## AIC: 226.9737    BIC: 243.516 
## Pseudo-R^2 Measures:
##  (Dn-Dr)/(Dn-Dr+dfn): 0.4770764 
##  (Dn-Dr)/Dn: 0.6581036

References

Butts, Carter T. 2008. “Social Network Analysis with Sna.” Journal of Statistical Software; Vol 1, Issue 6 (2008). https://doi.org/10.18637/jss.v024.i06.
Hunter, David R., Mark S. Handcock, Carter T. Butts, Steven M. Goodreau, and Martina Morris. 2008. “Ergm: A Package to Fit, Simulate and Diagnose Exponential-Family Models for Networks.” Journal of Statistical Software; Vol 1, Issue 3 (2008). https://doi.org/10.18637/jss.v024.i03.