library(sna)
library(ergm)
library(sand)
library(igraph)
library(network)(sort(mydata))## [1] "alc.att" "alc.beh" "fem" "fri" "hob" "res.act" "res.pas"
## [8] "sch.att" "sch.beh" "spo.att" "spo.beh" "str.abs" "tob.att" "tob.beh"
Explore the given dataset such as its class, dataframe, and format
Notes
data cleansing the adjacency matrix: fri_V
“fri” : adjacency matrix
“fem” : gender
Gender Clustering: “the socio-demographic variable is not ethnicity, but gender”
fri_mat = matrix(fri)
fri_V = fri_mat[,1][[1]]
omitted_V_nodes = c(-23, -24,-25)
fri_V_final = fri_V[omitted_V_nodes , omitted_V_nodes ] igraph_fV_directed = graph.adjacency(as.matrix(fri_V_final),mode = "directed",weighted = NULL)
mynet_V = igraph_fV_directed
mynet_V$gender = fem[omitted_V_nodes]
plot(mynet_V , vertex.color=c( "yellow", "green")[ 1+ (mynet_V$gender==1)], vertex.size=15, edge.arrow.size=0.5, vertex.label.dist=0.2)
title("Yellow(Male) at Time V")Introduction:
This assignment suggests to use to the ERGM Models Hunter et al. (2008) to model highly nonlinear and interdependent friendship network structure with 1) a network structural index of “network density”, 2) homophily effect of Gender, 3) reciprocity effect and 4) transitivity effect for friendship with the given dataset and listed below:
The ERGM Models: mutuality(= reciprocity )
+ Model_01 : friendship's Network = mutual + density + Gender
+ Model_02 : friendship's Network = mutual + density + transitive
+ Model_03 : friendship's Network = mutual + density + Gender + gwesp(triad closure)library(network)
fem_V = fem[omitted_V_nodes]
fri_V_network = network(fri_V_final,directed=T)
fri_V_network %v% "sexFem" = fem_VERGM (Hunter et al. (2008)) Model_01 : friendship’s Network = mutual + density + Gender
library(ergm)
model_01 = formula( fri_V_network~ mutual + density + nodematch("sexFem"))
summary(model_01)## mutual density nodematch.sexFem
## 16.0000000 0.1233766 53.0000000
#results_01 = ergm(model_01)
summary(results_01)## Call:
## ergm(formula = model_01)
##
## Monte Carlo Maximum Likelihood Results:
##
## Estimate Std. Error MCMC % z value Pr(>|z|)
## mutual 2.3889 0.4742 0 5.038 <1e-04 ***
## density -1932.7951 205.0643 0 -9.425 <1e-04 ***
## nodematch.sexFem 2.1720 0.4747 0 4.576 <1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 640.5 on 462 degrees of freedom
## Residual Deviance: 268.1 on 459 degrees of freedom
##
## AIC: 274.1 BIC: 286.5 (Smaller is better. MC Std. Err. = 0.395)
mcmc.diagnostics(results_01,vars.per.page=4)## Sample statistics summary:
##
## Iterations = 14336:262144
## Thinning interval = 1024
## Number of chains = 1
## Sample size per chain = 243
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## mutual -0.411523 3.84452 0.246626 0.280831
## density -0.001888 0.01732 0.001111 0.001294
## nodematch.sexFem -1.082305 7.77800 0.498959 0.578341
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## mutual -7.95000 -3.00000 -1.000000 2.00000 7.00000
## density -0.03669 -0.01407 -0.002165 0.01082 0.03019
## nodematch.sexFem -16.95000 -6.00000 -1.000000 4.00000 14.95000
##
##
## Are sample statistics significantly different from observed?
## mutual density nodematch.sexFem Overall (Chi^2)
## diff. -0.4115226 -0.001888372 -1.08230453 NA
## test stat. -1.4653746 -1.459083610 -1.87139397 6.43175776
## P-val. 0.1428188 0.144542097 0.06129049 0.09757736
##
## Sample statistics cross-correlations:
## mutual density nodematch.sexFem
## mutual 1.0000000 0.8167622 0.8318669
## density 0.8167622 1.0000000 0.9585554
## nodematch.sexFem 0.8318669 0.9585554 1.0000000
##
## Sample statistics auto-correlation:
## Chain 1
## mutual density nodematch.sexFem
## Lag 0 1.000000000 1.000000000 1.000000000
## Lag 1024 0.127117065 0.149507600 0.144550114
## Lag 2048 0.004220899 -0.012577533 -0.047749608
## Lag 3072 -0.017205366 0.006942584 -0.007301803
## Lag 4096 -0.046394173 -0.140118071 -0.095016307
## Lag 5120 -0.069267789 -0.108349274 -0.042751848
##
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## mutual density nodematch.sexFem
## 1.344 2.538 2.956
##
## Individual P-values (lower = worse):
## mutual density nodematch.sexFem
## 0.178803040 0.011152326 0.003121538
## Joint P-value (lower = worse): 0.06961864 .
##
## MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).
par(mfrow=c(1,3))
fit_01 = gof(results_01 ~ distance + espartners )
summary(fit_01)## Length Class Mode
## network.size 1 -none- numeric
## GOF 2 formula call
## pval.model 15 -none- numeric
## summary.model 15 -none- numeric
## pobs.model 3 -none- numeric
## psim.model 300 -none- numeric
## bds.model 6 -none- numeric
## obs.model 3 -none- numeric
## sim.model 300 -none- numeric
## pval.dist 110 -none- numeric
## summary.dist 110 -none- numeric
## pobs.dist 22 -none- numeric
## psim.dist 2200 -none- numeric
## bds.dist 44 -none- numeric
## obs.dist 22 -none- numeric
## sim.dist 2200 -none- numeric
## pval.espart 105 -none- numeric
## summary.espart 105 -none- numeric
## pobs.espart 21 -none- numeric
## psim.espart 2100 -none- numeric
## bds.espart 42 -none- numeric
## obs.espart 21 -none- numeric
## sim.espart 2100 -none- numeric
plot(fit_01)Results: ERGM Mode_01
There is no reason to accept the null hypothesis at 1% significant level for both cases.
It concludes that all coefficients (mutual, density, gender) are statistically signifcant different from zero at 1% level.
ERGM Model_02 : friendship’s Network = mutual + density + transitive
library(ergm)
model_02 = formula( fri_V_network~ mutual + density + transitive )
summary(model_02)## mutual density transitive
## 16.0000000 0.1233766 56.0000000
#results_02 = ergm(model_02)Results: Model_02
Model_02 has problem of “Unconstrained MCMC sampling”.
Computation process is crashed and stopped.
The reason is mainly due to the interdependence of transitive triad with other main effects(mutuality and density)
ERGM Model_03 : friendship’s Network = reciporality + density + Gender + gwesp(triad Closure)
library(ergm)
model_03 = formula( fri_V_network~ mutual + density + nodematch("sexFem") + gwesp(.5,fixed=T) )
summary(model_03)## mutual density nodematch.sexFem gwesp.fixed.0.5
## 16.0000000 0.1233766 53.0000000 44.8791232
#results_03 = ergm(model_03)
summary(results_03)## Call:
## ergm(formula = model_03)
##
## Monte Carlo Maximum Likelihood Results:
##
## Estimate Std. Error MCMC % z value Pr(>|z|)
## mutual 2.1229 0.5224 0 4.064 < 1e-04 ***
## density -1949.5618 224.7396 0 -8.675 < 1e-04 ***
## nodematch.sexFem 1.6269 0.5360 0 3.035 0.00240 **
## gwesp.fixed.0.5 0.4648 0.1538 0 3.022 0.00251 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 640.5 on 462 degrees of freedom
## Residual Deviance: 258.1 on 458 degrees of freedom
##
## AIC: 266.1 BIC: 282.6 (Smaller is better. MC Std. Err. = 0.2022)
mcmc.diagnostics(results_03,vars.per.page=6)## Sample statistics summary:
##
## Iterations = 163840:3174400
## Thinning interval = 4096
## Number of chains = 1
## Sample size per chain = 736
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## mutual 1.042120 5.4884 0.202303 0.216022
## density 0.004194 0.0279 0.001029 0.001108
## nodematch.sexFem 2.169837 12.5987 0.464393 0.498922
## gwesp.fixed.0.5 3.861931 21.8071 0.803819 0.885809
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## mutual -10.00000 -3.00000 1.000000 5.00000 11.00000
## density -0.04978 -0.01515 0.004329 0.02381 0.05411
## nodematch.sexFem -22.00000 -7.00000 2.000000 11.00000 26.00000
## gwesp.fixed.0.5 -35.20902 -12.51831 3.252896 18.94685 49.26575
##
##
## Are sample statistics significantly different from observed?
## mutual density nodematch.sexFem gwesp.fixed.0.5
## diff. 1.0421195652 0.0041937229 2.169837e+00 3.861931e+00
## test stat. 4.8241380822 3.7835957967 4.349050e+00 4.359781e+00
## P-val. 0.0000014061 0.0001545788 1.367284e-05 1.301929e-05
## Overall (Chi^2)
## diff. NA
## test stat. 3.716036e+01
## P-val. 2.871370e-07
##
## Sample statistics cross-correlations:
## mutual density nodematch.sexFem gwesp.fixed.0.5
## mutual 1.0000000 0.9268320 0.9292926 0.9165619
## density 0.9268320 1.0000000 0.9867965 0.9413082
## nodematch.sexFem 0.9292926 0.9867965 1.0000000 0.9499376
## gwesp.fixed.0.5 0.9165619 0.9413082 0.9499376 1.0000000
##
## Sample statistics auto-correlation:
## Chain 1
## mutual density nodematch.sexFem gwesp.fixed.0.5
## Lag 0 1.000000000 1.000000000 1.000000e+00 1.000000000
## Lag 4096 0.064839380 0.073967120 7.091922e-02 0.096147956
## Lag 8192 -0.008951965 0.005858649 9.746928e-04 0.004436311
## Lag 12288 0.007837038 -0.015206655 -9.458723e-05 -0.001382298
## Lag 16384 0.050687828 0.043655730 4.110856e-02 0.023805544
## Lag 20480 -0.050350589 -0.051514369 -5.754417e-02 -0.044932096
##
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## mutual density nodematch.sexFem gwesp.fixed.0.5
## -1.858 -2.017 -1.928 -1.808
##
## Individual P-values (lower = worse):
## mutual density nodematch.sexFem gwesp.fixed.0.5
## 0.06311304 0.04372666 0.05389010 0.07059113
## Joint P-value (lower = worse): 0.5130099 .
##
## MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).
par(mfrow=c(1,3))
fit_03 = gof(results_03 ~ distance + espartners )
summary(fit_03)## Length Class Mode
## network.size 1 -none- numeric
## GOF 2 formula call
## pval.model 20 -none- numeric
## summary.model 20 -none- numeric
## pobs.model 4 -none- numeric
## psim.model 400 -none- numeric
## bds.model 8 -none- numeric
## obs.model 4 -none- numeric
## sim.model 400 -none- numeric
## pval.dist 110 -none- numeric
## summary.dist 110 -none- numeric
## pobs.dist 22 -none- numeric
## psim.dist 2200 -none- numeric
## bds.dist 44 -none- numeric
## obs.dist 22 -none- numeric
## sim.dist 2200 -none- numeric
## pval.espart 105 -none- numeric
## summary.espart 105 -none- numeric
## pobs.espart 21 -none- numeric
## psim.espart 2100 -none- numeric
## bds.espart 42 -none- numeric
## obs.espart 21 -none- numeric
## sim.espart 2100 -none- numeric
plot(fit_03)Results: ERGM Mode_03
There is no reason to accept the null hypothesis at 1% significant level for both cases.
It concludes that all coefficients (mutual, density, gender and triad closure measured with gwesp ) are statistically significant different from zero at 1% level.
Netlogit Model Permutation Test(Butts (2008)) : Model_04
# same gender square matrix:
fem_final = fem[omitted_V_nodes]
fem_final## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 1 1 1 1 1 0 1 0 1 1 0 1 1 1 0 1 1 0 0 0 0 1
same_gender = 1*outer(fem_final ,fem_final ,"==")
diag(same_gender) = 0 # remove diagonal effect and set it to zero
same_gender## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 1 0 1 1 1 1 0 1 0 1 1 0 1 1 1 0 1 1 0 0 0 0 1
## 2 1 0 1 1 1 0 1 0 1 1 0 1 1 1 0 1 1 0 0 0 0 1
## 3 1 1 0 1 1 0 1 0 1 1 0 1 1 1 0 1 1 0 0 0 0 1
## 4 1 1 1 0 1 0 1 0 1 1 0 1 1 1 0 1 1 0 0 0 0 1
## [ reached getOption("max.print") -- omitted 18 rows ]
# reciprocity (or mutuality )
reciprocity_effect = t(fri_V_final)
diag(reciprocity_effect) = 0 # remove diagonal effect and set it to zero
reciprocity_effect## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 1 0 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0
## 2 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1
## 3 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [ reached getOption("max.print") -- omitted 18 rows ]
# transitivity
transitivity_effect = (fri_V_final) %*% (fri_V_final)
diag(transitivity_effect) = 0 # remove diagonal effect and set it to zero
transitivity_effect## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 2 0 0 2 0 3 0 0 1 4 0 0 2 0 1 0 0 0 1 0 0 0 2
## 3 0 1 0 0 2 0 0 0 1 0 0 2 0 1 0 0 0 0 0 0 0 2
## 4 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 2 0 0 0 0 0 0
## [ reached getOption("max.print") -- omitted 18 rows ]
y_04 = fri_V_final
x_04 = list( same_gender, reciprocity_effect )
set.seed(1913)
(Model_04_qap = netlogit(y_04, x_04, nullhyp="qapspp",reps=1000))##
## Network Logit Model
##
## Coefficients:
## Estimate Exp(b) Pr(<=b) Pr(>=b) Pr(>=|b|)
## (intercept) -4.151538 0.01574019 0.254 0.746 0.254
## x1 2.127549 8.39426525 1.000 0.000 0.000
## x2 2.398821 11.01018984 1.000 0.000 0.000
##
## Goodness of Fit Statistics:
##
## Null deviance: 640.468 on 462 degrees of freedom
## Residual deviance: 243.4879 on 459 degrees of freedom
## Chi-Squared test of fit improvement:
## 396.9801 on 3 degrees of freedom, p-value 0
## AIC: 249.4879 BIC: 261.8946
## Pseudo-R^2 Measures:
## (Dn-Dr)/(Dn-Dr+dfn): 0.4621529
## (Dn-Dr)/Dn: 0.6198282
Netlogit Model Permutation Test : Model_05
y_05 = fri_V_final
x_05 = list( same_gender, transitivity_effect)
set.seed(1913)
(Model_05_qap = netlogit(y_05, x_05, nullhyp="qapspp",reps=1000))##
## Network Logit Model
##
## Coefficients:
## Estimate Exp(b) Pr(<=b) Pr(>=b) Pr(>=|b|)
## (intercept) -4.195615 0.01506148 0.29 0.71 0.290
## x1 2.252083 9.50752361 1.00 0.00 0.001
## x2 1.187089 3.27752597 1.00 0.00 0.000
##
## Goodness of Fit Statistics:
##
## Null deviance: 640.468 on 462 degrees of freedom
## Residual deviance: 256.2435 on 459 degrees of freedom
## Chi-Squared test of fit improvement:
## 384.2244 on 3 degrees of freedom, p-value 0
## AIC: 262.2435 BIC: 274.6502
## Pseudo-R^2 Measures:
## (Dn-Dr)/(Dn-Dr+dfn): 0.4540456
## (Dn-Dr)/Dn: 0.599912
Netlogit Model Permutation Test : Model_06
y_06 = fri_V_final
x_06 = list(same_gender, reciprocity_effect, transitivity_effect)
set.seed(1913);
(Model_06_qap = netlogit(y_06, x_06, nullhyp="qapspp",reps=1000));##
## Network Logit Model
##
## Coefficients:
## Estimate Exp(b) Pr(<=b) Pr(>=b) Pr(>=|b|)
## (intercept) -4.280998 0.01382886 0.193 0.807 0.193
## x1 1.659706 5.25776345 1.000 0.000 0.001
## x2 2.275291 9.73074816 1.000 0.000 0.000
## x3 1.066596 2.90547231 1.000 0.000 0.000
##
## Goodness of Fit Statistics:
##
## Null deviance: 640.468 on 462 degrees of freedom
## Residual deviance: 218.9737 on 458 degrees of freedom
## Chi-Squared test of fit improvement:
## 421.4943 on 4 degrees of freedom, p-value 0
## AIC: 226.9737 BIC: 243.516
## Pseudo-R^2 Measures:
## (Dn-Dr)/(Dn-Dr+dfn): 0.4770764
## (Dn-Dr)/Dn: 0.6581036