This script analyzes virtual team networks across three time points using Exponential Random Graph Models (ERGMs). The analysis examines how individual attributes (Big Five personality traits, demographics) influence network tie formation within teams.
Key Features:
# Load necessary library for network analysis
library(statnet) # Suite of packages for statistical network analysis
Before running this script, please ensure you have the following files in your working directory:
T1_AttributeData.csv - Individual attributes at Time
1T1_VoteMatrix.csv - Voting adjacency matrix at Time
1T2_AttributeData.csv - Individual attributes at Time
2T2_VoteMatrix.csv - Voting adjacency matrix at Time
2T3_AttributeData.csv - Individual attributes at Time
3T3_VoteMatrix.csv - Voting adjacency matrix at Time
3You can set your working directory using
setwd("your/path/here") or through RStudio’s Session
menu.
This function loads and preprocesses data for a given time point.
# Function to load and prepare network data for a specific time point
prepare_network_data <- function(n) {
# Load attribute and matrix files
attribute_file <- ifelse(n == 1,
"T1_AttributeData.csv",
paste0("t", n, "_AttributeData.csv"))
matrix_file <- ifelse(n == 1,
"T1_VoteMatrix.csv",
paste0("t", n, "_VoteMatrix.csv"))
attribute <- read.csv(attribute_file)
vote_matrix <- read.csv(matrix_file)
# Calculate matrix dimensions
endnum <- nrow(vote_matrix)
# Recode voting data: Convert to binary network
# Only rating 5 (strongest endorsement) becomes 1, all others become 0
vote_matrix[is.na(vote_matrix)] <- 0
vote_matrix[vote_matrix == "1"] <- 0
vote_matrix[vote_matrix == "2"] <- 0
vote_matrix[vote_matrix == "3"] <- 0
vote_matrix[vote_matrix == "4"] <- 0
vote_matrix[vote_matrix == "5"] <- 1
# Extract adjacency matrix (excluding header row/column)
network_matrix <- vote_matrix[2:endnum, 2:endnum]
network_matrix <- as.network.matrix(sapply(network_matrix, as.numeric))
# Create network object
# - directed = TRUE: votes have direction (A voting for B ≠ B voting for A)
# - loops = FALSE: people cannot vote for themselves
network_obj <- as.network(x = network_matrix,
directed = TRUE,
loops = FALSE,
matrix.type = "adjacency")
# Attach individual attributes as vertex attributes
for (i in 1:ncol(attribute)) {
set.vertex.attribute(network_obj, colnames(attribute)[i], attribute[, i])
}
return(network_obj)
}
This function runs the full ERGM model for a given network.
# Function to run ERGM model with control variables
run_ergm_model <- function(network_obj, time_label) {
cat("\n========================================\n")
cat(paste("Time", time_label, "- ERGM Analysis\n"))
cat("========================================\n")
# Run ERGM with full set of predictors
ergm_model <- ergm(network_obj ~
# Structural effects
edges + # Baseline tie probability
mutual + # Reciprocity (A→B and B→A)
transitiveties + # Transitive closure (A→B→C implies A→C)
cyclicalties + # Cyclical patterns (A→B→C→A)
# Control variables (centered)
nodeicov("age.c") + # Age effect on receiving ties
nodeicov("tech.c") + # Technical skill effect
nodeicov("eng.c") + # English proficiency effect
nodeicov("self_ef.c") + # Self-efficacy effect
nodeicov("gender.c") + # Gender effect (receiver)
# Big Five personality traits (centered)
nodeicov("B5_a.c") + nodeocov("B5_a.c") + # Agreeableness
nodeicov("B5_c.c") + nodeocov("B5_c.c") + # Conscientiousness
nodeicov("B5_e.c") + nodeocov("B5_e.c") + # Extraversion
nodeicov("B5_n.c") + nodeocov("B5_n.c") + # Neuroticism
nodeicov("B5_o.c") + nodeocov("B5_o.c"), # Openness
# Model specifications
eval.loglik = TRUE,
constraints = ~blockdiag("team_id"), # Analyze teams separately
control = control.ergm(MCMLE.maxit = 10000))
cat("\nModel estimation complete!\n")
print(summary(ergm_model))
return(ergm_model)
}
Each time point is analyzed separately below.
# Prepare Time 1 network data
t1.network <- prepare_network_data(1)
# Run ERGM for Time 1
t1.model <- run_ergm_model(t1.network, 1)
##
## ========================================
## Time 1 - ERGM Analysis
## ========================================
##
## Model estimation complete!
## Call:
## ergm(formula = network_obj ~ edges + mutual + transitiveties +
## cyclicalties + nodeicov("age.c") + nodeicov("tech.c") + nodeicov("eng.c") +
## nodeicov("self_ef.c") + nodeicov("gender.c") + nodeicov("B5_a.c") +
## nodeocov("B5_a.c") + nodeicov("B5_c.c") + nodeocov("B5_c.c") +
## nodeicov("B5_e.c") + nodeocov("B5_e.c") + nodeicov("B5_n.c") +
## nodeocov("B5_n.c") + nodeicov("B5_o.c") + nodeocov("B5_o.c"),
## constraints = ~blockdiag("team_id"), eval.loglik = TRUE,
## control = control.ergm(MCMLE.maxit = 10000))
##
## Monte Carlo Maximum Likelihood Results:
##
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -1.664186 0.109892 0 -15.144 < 1e-04 ***
## mutual -0.089766 0.233735 0 -0.384 0.700942
## transitiveties 1.070495 0.115778 0 9.246 < 1e-04 ***
## cyclicalties -0.360384 0.098049 0 -3.676 0.000237 ***
## nodeicov.age.c 0.008717 0.010897 0 0.800 0.423742
## nodeicov.tech.c 0.043848 0.059487 0 0.737 0.461061
## nodeicov.eng.c 0.346110 0.106836 0 3.240 0.001197 **
## nodeicov.self_ef.c 0.161739 0.102921 0 1.571 0.116069
## nodeicov.gender.c 0.370572 0.127584 0 2.905 0.003678 **
## nodeicov.B5_a.c -0.116292 0.131978 0 -0.881 0.378240
## nodeocov.B5_a.c 0.261072 0.118856 0 2.197 0.028053 *
## nodeicov.B5_c.c 0.001898 0.138137 0 0.014 0.989035
## nodeocov.B5_c.c -0.225444 0.126239 0 -1.786 0.074124 .
## nodeicov.B5_e.c 0.175988 0.101898 0 1.727 0.084149 .
## nodeocov.B5_e.c -0.010747 0.097711 0 -0.110 0.912419
## nodeicov.B5_n.c 0.161877 0.118948 0 1.361 0.173544
## nodeocov.B5_n.c -0.030082 0.105367 0 -0.285 0.775266
## nodeicov.B5_o.c 0.004704 0.126749 0 0.037 0.970392
## nodeocov.B5_o.c -0.079097 0.121300 0 -0.652 0.514350
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 1791 on 1292 degrees of freedom
## Residual Deviance: 1409 on 1273 degrees of freedom
##
## AIC: 1447 BIC: 1545 (Smaller is better. MC Std. Err. = 0.5714)
# Prepare Time 2 network data
t2.network <- prepare_network_data(2)
# Run ERGM for Time 2
t2.model <- run_ergm_model(t2.network, 2)
##
## ========================================
## Time 2 - ERGM Analysis
## ========================================
##
## Model estimation complete!
## Call:
## ergm(formula = network_obj ~ edges + mutual + transitiveties +
## cyclicalties + nodeicov("age.c") + nodeicov("tech.c") + nodeicov("eng.c") +
## nodeicov("self_ef.c") + nodeicov("gender.c") + nodeicov("B5_a.c") +
## nodeocov("B5_a.c") + nodeicov("B5_c.c") + nodeocov("B5_c.c") +
## nodeicov("B5_e.c") + nodeocov("B5_e.c") + nodeicov("B5_n.c") +
## nodeocov("B5_n.c") + nodeicov("B5_o.c") + nodeocov("B5_o.c"),
## constraints = ~blockdiag("team_id"), eval.loglik = TRUE,
## control = control.ergm(MCMLE.maxit = 10000))
##
## Monte Carlo Maximum Likelihood Results:
##
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -1.64129 0.11361 0 -14.446 < 1e-04 ***
## mutual -0.18046 0.23287 0 -0.775 0.438360
## transitiveties 1.16448 0.11580 0 10.056 < 1e-04 ***
## cyclicalties -0.34694 0.08986 0 -3.861 0.000113 ***
## nodeicov.age.c 0.01012 0.01066 0 0.949 0.342526
## nodeicov.tech.c 0.10942 0.05888 0 1.858 0.063100 .
## nodeicov.eng.c 0.22685 0.10299 0 2.203 0.027624 *
## nodeicov.self_ef.c 0.07266 0.10080 0 0.721 0.471018
## nodeicov.gender.c 0.29366 0.12297 0 2.388 0.016934 *
## nodeicov.B5_a.c -0.28868 0.12471 0 -2.315 0.020627 *
## nodeocov.B5_a.c 0.20332 0.11741 0 1.732 0.083313 .
## nodeicov.B5_c.c 0.26190 0.13181 0 1.987 0.046928 *
## nodeocov.B5_c.c -0.09075 0.12728 0 -0.713 0.475876
## nodeicov.B5_e.c 0.22190 0.09642 0 2.301 0.021366 *
## nodeocov.B5_e.c -0.06891 0.09598 0 -0.718 0.472761
## nodeicov.B5_n.c 0.13531 0.11662 0 1.160 0.245960
## nodeocov.B5_n.c -0.11461 0.10461 0 -1.096 0.273261
## nodeicov.B5_o.c 0.01262 0.12249 0 0.103 0.917933
## nodeocov.B5_o.c -0.10240 0.11164 0 -0.917 0.359025
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 1791 on 1292 degrees of freedom
## Residual Deviance: 1443 on 1273 degrees of freedom
##
## AIC: 1481 BIC: 1580 (Smaller is better. MC Std. Err. = 0.7572)
# Prepare Time 3 network data
t3.network <- prepare_network_data(3)
# Run ERGM for Time 3
t3.model <- run_ergm_model(t3.network, 3)
##
## ========================================
## Time 3 - ERGM Analysis
## ========================================
##
## Model estimation complete!
## Call:
## ergm(formula = network_obj ~ edges + mutual + transitiveties +
## cyclicalties + nodeicov("age.c") + nodeicov("tech.c") + nodeicov("eng.c") +
## nodeicov("self_ef.c") + nodeicov("gender.c") + nodeicov("B5_a.c") +
## nodeocov("B5_a.c") + nodeicov("B5_c.c") + nodeocov("B5_c.c") +
## nodeicov("B5_e.c") + nodeocov("B5_e.c") + nodeicov("B5_n.c") +
## nodeocov("B5_n.c") + nodeicov("B5_o.c") + nodeocov("B5_o.c"),
## constraints = ~blockdiag("team_id"), eval.loglik = TRUE,
## control = control.ergm(MCMLE.maxit = 10000))
##
## Monte Carlo Maximum Likelihood Results:
##
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -1.6884611 0.1066627 0 -15.830 < 1e-04 ***
## mutual -0.3987723 0.2571413 0 -1.551 0.120952
## transitiveties 1.1832188 0.1198246 0 9.875 < 1e-04 ***
## cyclicalties -0.3545615 0.0994413 0 -3.566 0.000363 ***
## nodeicov.age.c -0.0011231 0.0113471 0 -0.099 0.921154
## nodeicov.tech.c 0.0798275 0.0591427 0 1.350 0.177098
## nodeicov.eng.c 0.2474597 0.1087733 0 2.275 0.022906 *
## nodeicov.self_ef.c 0.0737805 0.0975654 0 0.756 0.449520
## nodeicov.gender.c 0.3183640 0.1260726 0 2.525 0.011562 *
## nodeicov.B5_a.c -0.3118668 0.1320420 0 -2.362 0.018183 *
## nodeocov.B5_a.c 0.3220964 0.1236904 0 2.604 0.009213 **
## nodeicov.B5_c.c 0.3569689 0.1403064 0 2.544 0.010953 *
## nodeocov.B5_c.c -0.0516081 0.1354198 0 -0.381 0.703131
## nodeicov.B5_e.c 0.2086917 0.0972247 0 2.146 0.031834 *
## nodeocov.B5_e.c -0.0607342 0.0957227 0 -0.634 0.525767
## nodeicov.B5_n.c 0.2241314 0.1218863 0 1.839 0.065936 .
## nodeocov.B5_n.c 0.0084202 0.1086520 0 0.077 0.938228
## nodeicov.B5_o.c 0.0005149 0.1222440 0 0.004 0.996639
## nodeocov.B5_o.c -0.0558455 0.1197928 0 -0.466 0.641084
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 1791 on 1292 degrees of freedom
## Residual Deviance: 1374 on 1273 degrees of freedom
##
## AIC: 1412 BIC: 1510 (Smaller is better. MC Std. Err. = 0.5069)
# Document R version and package versions for reproducibility
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Chicago
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] statnet_2019.6 tsna_0.3.6 sna_2.8
## [4] statnet.common_4.12.0 ergm.count_4.1.3 tergm_4.2.2
## [7] networkDynamic_0.11.5 ergm_4.10.1 network_1.19.0
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 generics_0.1.4 robustbase_0.99-6
## [4] stringi_1.8.7 lattice_0.22-7 digest_0.6.37
## [7] magrittr_2.0.4 evaluate_1.0.5 lpSolveAPI_5.5.2.0-17.14
## [10] grid_4.5.1 networkLite_1.1.0 fastmap_1.2.0
## [13] jsonlite_2.0.0 Matrix_1.7-3 rle_0.10.0
## [16] purrr_1.1.0 jquerylib_0.1.4 Rdpack_2.6.4
## [19] cli_3.6.5 rlang_1.1.6 rbibutils_2.3
## [22] cachem_1.1.0 yaml_2.3.10 tools_4.5.1
## [25] parallel_4.5.1 memoise_2.0.1 coda_0.19-4.1
## [28] dplyr_1.1.4 vctrs_0.6.5 R6_2.6.1
## [31] lifecycle_1.0.4 stringr_1.5.2 MASS_7.3-65
## [34] trust_0.1-8 pkgconfig_2.0.3 pillar_1.11.1
## [37] bslib_0.9.0 ergm.multi_0.3.0 glue_1.8.0
## [40] DEoptimR_1.1-4 xfun_0.53 tibble_3.3.0
## [43] tidyselect_1.2.1 rstudioapi_0.17.1 knitr_1.50
## [46] htmltools_0.5.8.1 nlme_3.1-168 rmarkdown_2.29
## [49] compiler_4.5.1
ERGM Terms Explained:
Variables:
Constraints:
Code Structure:
This script uses functional programming to reduce repetition: -
prepare_network_data(n): Loads and prepares data for time
point n - run_ergm_model(): Runs the full ERGM model - Each
time point is analyzed in a single chunk for simplicity - Models stored
as t1.model, t2.model, and
t3.model