Functions For Measuring The Graph Concordance Of Locally Dependent Observations

Introduction

This R script contains functions used to replicate and apply the methodology from the paper “Measuring the Graph Concordance of Locally Dependent Observations” by Song (2017). This paper introduces the Graph Concordance (GC) statistic, a permutation-based nonparametric test for measuring whether outcomes are more similar among connected nodes than would be expected under random network structure.

The following functions were implemented as part of my bachelor’s thesis to reproduce, analyze, and extend the methods presented in the paper. These functions cover network generation, concordance calculation, inbreeding homophily, permutation inference, and Monte Carlo simulation. The functions are written with a focus on clarity and reproducibility, providing a step-by-step implementation of the statistical procedures described in the paper.

needed packages for this code

library(igraph)

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

library(network)

## 
## 'network' 1.19.0 (2024-12-08), part of the Statnet Project
## * 'news(package="network")' for changes since last version
## * 'citation("network")' for citation information
## * 'https://statnet.org' for help, support, and other information

## 
## Attaching package: 'network'

## The following objects are masked from 'package:igraph':
## 
##     %c%, %s%, add.edges, add.vertices, delete.edges, delete.vertices,
##     get.edge.attribute, get.edges, get.vertex.attribute, is.bipartite,
##     is.directed, list.edge.attributes, list.vertex.attributes,
##     set.edge.attribute, set.vertex.attribute

library(sna)

## Loading required package: statnet.common

## 
## Attaching package: 'statnet.common'

## The following objects are masked from 'package:base':
## 
##     attr, order

## sna: Tools for Social Network Analysis
## Version 2.8 created on 2024-09-07.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##  For citation information, type citation("sna").
##  Type help(package="sna") to get started.

## 
## Attaching package: 'sna'

## The following objects are masked from 'package:igraph':
## 
##     betweenness, bonpow, closeness, components, degree, dyad.census,
##     evcent, hierarchy, is.connected, neighborhood, triad.census

what to define:

# n - number of nodes
# adj_mat or A - adjacency matrix
# Y - outcome vector
# type_vector - self explanatory
# X - vector for groups (e.g. A and B)
# alpha - level
# seed - set seed for randomness

Function 1: descriptive_graph_analysis()

Description: Computes node degrees and the average outcome for each node’s neighbors and non-neighbors, providing a descriptive summary of network structure and outcome distribution. Optionally visualizes the network for exploratory analysis.

descriptive_graph_analysis <- function(adj_mat, Y, plot_graph = TRUE, max_nodes_plot = 100) {
  n <- nrow(adj_mat)
  stopifnot(length(Y) == n)
  
  # Initialize
  Y_bar <- numeric(n)
  Y_bar_c <- numeric(n)
  degree <- rowSums(adj_mat)
  
  for (i in 1:n) {
    neighbors <- which(adj_mat[i, ] == 1)
    non_neighbors <- setdiff(1:n, c(neighbors, i))
    
    if (length(neighbors) > 0) {
      Y_bar[i] <- mean(Y[neighbors])
    } else {
      Y_bar[i] <- NA
    }
    if (length(non_neighbors) > 0) {
      Y_bar_c[i] <- mean(Y[non_neighbors])
    } else {
      Y_bar_c[i] <- NA
    }
  }
  
  result_df <- data.frame(
    Node = 1:n,
    Degree = degree,
    Y = Y,
    Y_bar = Y_bar,
    Y_bar_c = Y_bar_c
  )
  
  cat("Summary of node degrees:\n")
  print(summary(degree))
  
  cat("\nSummary of Y values:\n")
  print(summary(Y))
  
  if (plot_graph) {
    plot_nodes <- min(n, max_nodes_plot)
    adj_sub <- adj_mat[1:plot_nodes, 1:plot_nodes]
    net <- as.network(adj_sub)
    
    gplot(net,
          displaylabels = TRUE,
          vertex.col = "pink",
          edge.col = "gray50",
          vertex.cex = 2,
          main = paste("Network Visualization (First", plot_nodes, "nodes)"))
  }
  
  return(head(result_df))
}

Function 2: compute_graph_concordance()

Description: Calculates the raw Graph Concordance (GC) for a given adjacency matrix and outcome vector. Measures how much more aligned outcomes are among linked nodes compared to unlinked nodes.

compute_graph_concordance <- function(adj_mat, Y, print_output = TRUE) {
  n <- length(Y)
  stopifnot(nrow(adj_mat) == n, ncol(adj_mat) == n)

  # Initialize
  Y_bar <- numeric(n)
  Y_bar_c <- numeric(n)

  # Compute neighborhood and complement means
  for (i in 1:n) {
    neighbors <- which(adj_mat[i, ] == 1)
    non_neighbors <- setdiff(1:n, c(neighbors, i))
    
    Y_bar[i] <- if (length(neighbors) > 0) mean(Y[neighbors]) else 0
    Y_bar_c[i] <- if (length(non_neighbors) > 0) mean(Y[non_neighbors]) else 0
  }

  # Compute graph concordance metrics
  Y_mean <- mean(Y)
  nu_sq <- var(Y)
  
  gamma <- mean((Y - Y_mean) * (Y_bar - Y_mean)) / nu_sq
  gamma_c <- mean((Y - Y_mean) * (Y_bar_c - Y_mean)) / nu_sq
  GC <- gamma - gamma_c

 if (print_output) {
    cat("Gamma (linked):     ", round(gamma, 4), "\n")
    cat("Gamma_c (unlinked): ", round(gamma_c, 4), "\n")
    cat("Graph Concordance:  ", round(GC, 4), "\n")
    
    # Optional interpretation
    if (GC > 0) {
      cat("→ Y is graph concordant.\n")
    } else if (GC < 0) {
      cat("→ Y is graph discordant.\n")
    } else {
      cat("→ No concordance difference detected.\n")
    }
  }
return(invisible(list(
    gamma = gamma,
    gamma_c = gamma_c,
    concordance = GC
  )))
}

apply the following for including the previous GC in Function 3

# gc_result <- compute_graph_concordance(adj_mat, Y, print_output = FALSE)
# GC <- gc_result[["concordance"]]

Function 3: compute_inbreeding_homophily()

Description: Calculates inbreeding homophily (IH) for a binary group attribute over the network. This measures whether nodes of the same type are more likely to be connected than expected by chance.

compute_inbreeding_homophily <- function(adj_mat, type_vector, GC = NULL, print_output = TRUE) {
  stopifnot(length(type_vector) == nrow(adj_mat))
  n <- length(type_vector)
  D <- type_vector  # 1 = type t, 0 = not type t
  w <- mean(D)

  # Identify linked and unlinked node pairs (upper triangle only)
  linked_pairs <- which(upper.tri(adj_mat) & adj_mat == 1, arr.ind = TRUE)
  unlinked_pairs <- which(upper.tri(adj_mat) & adj_mat == 0, arr.ind = TRUE)

  # Proportion of same-type pairs among linked and unlinked
  same_type_linked <- sum(D[linked_pairs[, 1]] == 1 & D[linked_pairs[, 2]] == 1)
  same_type_unlinked <- sum(D[unlinked_pairs[, 1]] == 1 & D[unlinked_pairs[, 2]] == 1)

  s_n <- same_type_linked / nrow(linked_pairs)
  d_n <- same_type_unlinked / nrow(unlinked_pairs)

  H <- s_n / (s_n + d_n)
  IH <- (H - w) / (1 - w)

  if (print_output) {
    cat("s_n (linked same-type prop): ", round(s_n, 3), "\n")
    cat("d_n (unlinked same-type prop): ", round(d_n, 3), "\n")
    cat("H (ratio):                   ", round(H, 3), "\n")
    cat("w (type proportion):         ", round(w, 3), "\n")
    cat("Inbreeding Homophily IH(Gn): ", round(IH, 3), "\n")
  }

  # Optional comparison with graph concordance
  if (!is.null(GC)) {
    cat("Graph Concordance C(Gn):     ", round(GC, 4), "\n")
    if (abs(GC - IH) < 1e-6) {
      cat("IH(Gn) ≈ C(Gn) — matches theory!\n")
    } else {
      cat("Difference detected: assumptions may not hold.\n")
    }
  }
return(invisible(list(
    s_n = s_n,
    d_n = d_n,
    H = H,
    w = w,
    IH = IH
  )))
}

Function 4: compute_residual_graph_concordance()

Description: Calculates the residual graph concordance (RGC) as a descriptive measure after regressing the outcome on a covariate. Provides point estimates of concordance (γ, γᶜ, and Cₓ) for each group in the covariate, without hypothesis testing or confidence intervals. Useful for exploring whether behavioral alignment persists within subgroups after accounting for covariates.

compute_residual_graph_concordance <- function(Y, X, adj_mat, print_output = TRUE) {
  stopifnot(length(Y) == length(X), nrow(adj_mat) == length(Y))
  
  # Regress Y on X and get residuals
  model <- lm(Y ~ X)
  u <- resid(model)
  
  unique_groups <- unique(X)
  residual_gc_results <- list()
  
  for (group in unique_groups) {
    group_indices <- which(X == group)
    n_group <- length(group_indices)
    
    if (n_group < 2) {
      residual_gc_results[[group]] <- list(
        gamma = NA,
        gamma_c = NA,
        Cx = NA
      )
      next
    }
    
    u_group <- u[group_indices]
    adj_group <- adj_mat[group_indices, group_indices, drop = FALSE]
    
    u_bar <- numeric(n_group)
    u_bar_c <- numeric(n_group)
    
    for (i in 1:n_group) {
      neighbors <- which(adj_group[i, ] == 1)
      non_neighbors <- setdiff(1:n_group, c(neighbors, i))
      
      if (length(neighbors) > 0) {
        u_bar[i] <- mean(u_group[neighbors])
      }
      if (length(non_neighbors) > 0) {
        u_bar_c[i] <- mean(u_group[non_neighbors])
      }
    }
    
    u_mean <- mean(u_group)
    var_u <- var(u_group)
    
    gamma <- mean((u_group - u_mean) * (u_bar - u_mean)) / var_u
    gamma_c <- mean((u_group - u_mean) * (u_bar_c - u_mean)) / var_u
    Cx <- gamma - gamma_c
    
    residual_gc_results[[group]] <- list(
      gamma = gamma,
      gamma_c = gamma_c,
      Cx = Cx
    )
  }

  # Optional output
  if (print_output) {
    for (group in names(residual_gc_results)) {
      res <- residual_gc_results[[group]]
      cat("Group:", group, "\n")
      cat("  Gamma (linked):    ", round(res$gamma, 4), "\n")
      cat("  Gamma_c (unlinked):", round(res$gamma_c, 4), "\n")
      cat("  Residual GC Cx:     ", round(res$Cx, 4), "\n\n")
    }
  }
  return(invisible(residual_gc_results))
}

Function 5: estimate_graph_concordance()

Description: Computes the standardized GC estimate using normalized residuals, providing a scaled version of GC that adjusts for variance.

estimate_graph_concordance <- function(adj_mat, Y, print_output = TRUE) {
  n <- length(Y)
  stopifnot(nrow(adj_mat) == n, ncol(adj_mat) == n)

  # Step 1: Normalize Y
  Y_bar <- mean(Y)
  v_hat_sq <- mean((Y - Y_bar)^2)
  e_hat <- (Y - Y_bar) / sqrt(v_hat_sq)

  # Step 2: Compute a_hat (average of e_hat over neighbors)
  a_hat <- numeric(n)
  for (i in 1:n) {
    neighbors <- which(adj_mat[i, ] == 1)
    if (length(neighbors) > 0) {
      a_hat[i] <- mean(e_hat[neighbors])
    }
  }

  # Step 3: Compute a_hat_c (average over non-neighbors)
  a_hat_c <- numeric(n)
  for (i in 1:n) {
    non_neighbors <- setdiff(1:n, c(which(adj_mat[i, ] == 1), i))
    if (length(non_neighbors) > 0) {
      a_hat_c[i] <- mean(e_hat[non_neighbors])
    }
  }

  # Step 4: Estimate concordance
  gamma_hat <- mean(e_hat * a_hat)
  gamma_hat_c <- mean(e_hat * a_hat_c)
  C_hat <- gamma_hat - gamma_hat_c

  if (print_output) {
    cat("Gamma_hat (linked):     ", round(gamma_hat, 4), "\n")
    cat("Gamma_hat_c (unlinked): ", round(gamma_hat_c, 4), "\n")
    cat("Estimated Concordance:  ", round(C_hat, 4), "\n")
  }
  return(invisible(C_hat))
}

Function 6: permutation_gc_inference()

Description: Performs permutation-based inference for GC, producing a GC estimate, test statistic, p-value, and confidence interval. This tests whether the observed GC is significantly greater than expected under random graph permutations.

permutation_gc_inference <- function(A, Y, B = 1000, alpha = 0.05, seed = 42, print_output = TRUE) {
  set.seed(seed)
  n <- length(Y)
  
  # Standardize Y
  Y_bar <- mean(Y)
  v_hat <- sqrt(mean((Y - Y_bar)^2))
  e_hat <- (Y - Y_bar) / v_hat
  degrees <- rowSums(A)
  
  # Compute neighbor and non-neighbor means
  a_hat <- sapply(1:n, function(i) {
    neighbors <- which(A[i, ] == 1)
    if (length(neighbors) > 0) mean(e_hat[neighbors]) else 0
  })
  
  a_hat_c <- sapply(1:n, function(i) {
    non_neighbors <- setdiff(1:n, c(which(A[i, ] == 1), i))
    if (length(non_neighbors) > 0) mean(e_hat[non_neighbors]) else 0
  })
  
  # Graph concordance estimate
  gamma_hat <- mean(e_hat * a_hat)
  gamma_hat_c <- mean(e_hat * a_hat_c)
  C_hat <- gamma_hat - gamma_hat_c
  
  # Variance estimate
  q_hat <- e_hat * (a_hat - e_hat * gamma_hat)
  q_bar <- sapply(1:n, function(i) {
    peers <- which(degrees == degrees[i])
    mean(q_hat[peers])
  })
  
  sigma_sq <- mean((q_hat - q_bar)^2)
  sigma_sq1 <- mean(q_hat^2)
  sigma_plus <- ifelse(sigma_sq > 0, sigma_sq, sigma_sq1)
  
  T_stat <- sqrt(n) * C_hat / sqrt(sigma_plus)
  
  # Permutation test statistics
  T_perm <- numeric(B)
  
  for (b in 1:B) {
    perm <- sample(1:n)
    e_perm <- e_hat[perm]
    
    a_perm <- sapply(1:n, function(i) {
      neighbors <- which(A[i, ] == 1)
      if (length(neighbors) > 0) mean(e_perm[neighbors]) else 0
    })
    
    a_perm_c <- sapply(1:n, function(i) {
      non_neighbors <- setdiff(1:n, c(which(A[i, ] == 1), i))
      if (length(non_neighbors) > 0) mean(e_perm[non_neighbors]) else 0
    })
    
    gamma_perm <- mean(e_perm * a_perm)
    gamma_perm_c <- mean(e_perm * a_perm_c)
    C_perm <- gamma_perm - gamma_perm_c
    
    q_perm <- e_perm * (a_perm - e_perm * gamma_perm)
    q_bar_perm <- sapply(1:n, function(i) {
      peers <- which(degrees == degrees[i])
      mean(q_perm[peers])
    })
    
    sigma_perm <- mean((q_perm - q_bar_perm)^2)
    sigma1_perm <- mean(q_perm^2)
    sigma_plus_perm <- ifelse(sigma_perm > 0, sigma_perm, sigma1_perm)
    
    T_perm[b] <- sqrt(n) * C_perm / sqrt(sigma_plus_perm)
  }
  
  # Confidence interval and p-value
  crit_val <- quantile(abs(T_perm), 1 - alpha)
  CI <- c(C_hat - crit_val * sqrt(sigma_plus) / sqrt(n),
          C_hat + crit_val * sqrt(sigma_plus) / sqrt(n))
  
  p_value <- max(1 / B, mean(T_perm > T_stat))
  
  # Print output if requested
  if (print_output) {
    cat("Permutation Inference Results:\n")
    cat("  GC estimate:   ", round(C_hat, 4), "\n")
    cat("  T-statistic:   ", round(T_stat, 4), "\n")
    cat("  95% CI:        [", round(CI[1], 4), ", ", round(CI[2], 4), "]\n")
    cat("  p-value:       ", round(p_value, 4), "\n")
  }
  return(invisible(list(
    GC_estimate = C_hat,
    T_stat = T_stat,
    CI = CI,
    p_value = p_value,
    perm_stats = T_perm
  )))
}

Function 7: residual_graph_concordance_inference()

Description: Extends residual graph concordance to include statistical inference for a single group. Computes the RGC point estimate along with a permutation-based hypothesis test, generating a p-value, confidence interval, and test statistic to assess whether the observed concordance is greater than expected under the null of no network alignment.

residual_graph_concordance_inference <- function(A, Y, X, group_label, B = 1000, alpha = 0.05, seed = 42, print_output = TRUE) {
  set.seed(seed)
  n <- length(Y)
  stopifnot(length(Y) == length(X), nrow(A) == n)
  
  # Step 1: Get residuals from regression
  model <- lm(Y ~ X)
  u_hat <- resid(model)
  
  # Step 2: Restrict to group of interest
  idx_group <- which(X == group_label)
  if (length(idx_group) < 2) stop("Group must have at least 2 nodes.")
  
  A_sub <- A[idx_group, idx_group]
  u_group <- u_hat[idx_group]
  n_g <- length(u_group)
  
  # Step 3: Normalize residuals
  u_bar <- mean(u_group)
  v_hat <- sqrt(mean((u_group - u_bar)^2))
  e_hat <- (u_group - u_bar) / v_hat
  degrees <- rowSums(A_sub)
  
  # Step 4: a_hat and a_hat_c
  a_hat <- sapply(1:n_g, function(i) {
    neighbors <- which(A_sub[i, ] == 1)
    if (length(neighbors) > 0) mean(e_hat[neighbors]) else 0
  })
  a_hat_c <- sapply(1:n_g, function(i) {
    non_neighbors <- setdiff(1:n_g, c(which(A_sub[i, ] == 1), i))
    if (length(non_neighbors) > 0) mean(e_hat[non_neighbors]) else 0
  })
  
  # Step 5: Compute residual concordance estimate
  gamma_hat <- mean(e_hat * a_hat)
  gamma_hat_c <- mean(e_hat * a_hat_c)
  C_hat <- gamma_hat - gamma_hat_c
  
  # Step 6: Variance estimate
  q_hat <- e_hat * (a_hat - e_hat * gamma_hat)
  q_bar <- sapply(1:n_g, function(i) {
    peers <- which(degrees == degrees[i])
    mean(q_hat[peers])
  })
  sigma_sq <- mean((q_hat - q_bar)^2)
  sigma_sq1 <- mean(q_hat^2)
  sigma_plus <- ifelse(sigma_sq > 0, sigma_sq, sigma_sq1)
  
  T_stat <- sqrt(n_g) * C_hat / sqrt(sigma_plus)
  
  # Step 7: Permutation test
  T_perm <- numeric(B)
  for (b in 1:B) {
    perm <- sample(1:n_g)
    e_perm <- e_hat[perm]
    
    a_perm <- sapply(1:n_g, function(i) {
      neighbors <- which(A_sub[i, ] == 1)
      if (length(neighbors) > 0) mean(e_perm[neighbors]) else 0
    })
    a_perm_c <- sapply(1:n_g, function(i) {
      non_neighbors <- setdiff(1:n_g, c(which(A_sub[i, ] == 1), i))
      if (length(non_neighbors) > 0) mean(e_perm[non_neighbors]) else 0
    })
    
    gamma_perm <- mean(e_perm * a_perm)
    gamma_perm_c <- mean(e_perm * a_perm_c)
    C_perm <- gamma_perm - gamma_perm_c
    
    q_perm <- e_perm * (a_perm - e_perm * gamma_perm)
    q_bar_perm <- sapply(1:n_g, function(i) {
      peers <- which(degrees == degrees[i])
      mean(q_perm[peers])
    })
    
    sigma_perm <- mean((q_perm - q_bar_perm)^2)
    sigma1_perm <- mean(q_perm^2)
    sigma_plus_perm <- ifelse(sigma_perm > 0, sigma_perm, sigma1_perm)
    
    T_perm[b] <- sqrt(n_g) * C_perm / sqrt(sigma_plus_perm)
  }
  
  T_perm <- T_perm[!is.na(T_perm)]
  crit_val <- quantile(abs(T_perm), 1 - alpha)
  CI <- c(C_hat - crit_val * sqrt(sigma_plus) / sqrt(n_g),
          C_hat + crit_val * sqrt(sigma_plus) / sqrt(n_g))
  
  p_value <- max(1 / B, mean(T_perm > T_stat))
  
  # Output
  if (print_output) {
    cat("Residual Graph Concordance Inference (Group =", group_label, ")\n")
    cat("  RGC estimate:   ", round(C_hat, 4), "\n")
    cat("  T-statistic:    ", round(T_stat, 4), "\n")
    cat("  95% CI:         [", round(CI[1], 4), ", ", round(CI[2], 4), "]\n")
    cat("  p-value:        ", round(p_value, 4), "\n")
  }
  return(invisible(list(
    RGC_estimate = C_hat,
    T_stat = T_stat,
    CI = CI,
    p_value = p_value,
    perm_stats = T_perm
  )))
}

Function 8: check_asymptotic_validity()

Description: Computes key network properties (average inverse degree, maximum degree, max 3-degree, and empirical 8th moment) to assess whether asymptotic validity conditions hold for the GC test.

check_asymptotic_validity <- function(A, Y, print_output = TRUE) {
  n <- length(Y)
  stopifnot(nrow(A) == n, ncol(A) == n)

  # 1. Average inverse degree (only valid for nodes with deg > 0)
  degrees <- rowSums(A)
  valid_deg <- degrees > 0
  d_avi_n <- mean(1 / degrees[valid_deg])

  # 2. Max degree
  d_max_n <- max(degrees)

  # 3. Max 3-degree (size of 3-hop neighborhood)
  g <- igraph::graph_from_adjacency_matrix(A, mode = "undirected")
  d_max_n3 <- max(sapply(1:n, function(i) {
    length(igraph::neighborhood(g, order = 3, nodes = i)[[1]]) - 1
  }))

  # 4. Empirical 8th moment of Y
  moment_8 <- mean(abs(Y)^8)

  if (print_output) {
    cat("Asymptotic Validity Checks:\n")
    cat("  1. Avg. inverse degree (d_avi_n):    ", round(d_avi_n, 4), "\n")
    cat("  2. Max degree (d_max_n):             ", d_max_n, "\n")
    cat("  3. Max 3-degree (d_max_n3):          ", d_max_n3, "\n")
    cat("  4. Empirical 8th moment E|Y|^8:      ", format(moment_8, digits = 4), "\n")
  }
  return(invisible(list(
    d_avi_n = d_avi_n,
    d_max_n = d_max_n,
    d_max_n3 = d_max_n3,
    moment_8 = moment_8
  )))

}

Function 9: test_graph_concordance()

Description: Conducts a one-sided permutation hypothesis test for GC (H₀: GC ≤ 0 vs. H₁: GC > 0). Outputs a test statistic, critical value, p-value, and decision to reject or not reject the null hypothesis.

test_graph_concordance <- function(A, Y, B = 1000, alpha = 0.05, seed = 42, print_output = TRUE) {
  set.seed(seed)
  n <- length(Y)
  stopifnot(nrow(A) == n, ncol(A) == n)

  # Step 1: Standardize Y
  Y_bar <- mean(Y)
  v_hat_sq <- mean((Y - Y_bar)^2)
  e_hat <- (Y - Y_bar) / sqrt(v_hat_sq)
  degrees <- rowSums(A)

  # Step 2: Compute a_hat and a_hat_c
  a_hat <- sapply(1:n, function(i) {
    neighbors <- which(A[i, ] == 1)
    if (length(neighbors) > 0) mean(e_hat[neighbors]) else 0
  })
  a_hat_c <- sapply(1:n, function(i) {
    non_neighbors <- setdiff(1:n, c(which(A[i, ] == 1), i))
    if (length(non_neighbors) > 0) mean(e_hat[non_neighbors]) else 0
  })

  # Step 3: Graph concordance estimate
  gamma_hat <- mean(e_hat * a_hat)
  gamma_hat_c <- mean(e_hat * a_hat_c)
  C_hat <- gamma_hat - gamma_hat_c

  # Step 4: Variance estimate
  q_hat <- e_hat * (a_hat - e_hat * gamma_hat)
  q_bar <- sapply(1:n, function(i) {
    peers <- which(degrees == degrees[i])
    mean(q_hat[peers])
  })
  sigma_sq <- mean((q_hat - q_bar)^2)
  sigma_sq1 <- mean(q_hat^2)
  sigma_plus <- ifelse(sigma_sq > 0, sigma_sq, sigma_sq1)

  T1 <- sqrt(n) * C_hat / sqrt(sigma_plus)

  # Step 5: Permutation distribution
  T_perm <- numeric(B)
  for (b in 1:B) {
    perm <- sample(1:n)
    e_perm <- e_hat[perm]

    a_perm <- sapply(1:n, function(i) {
      neighbors <- which(A[i, ] == 1)
      if (length(neighbors) > 0) mean(e_perm[neighbors]) else 0
    })
    a_perm_c <- sapply(1:n, function(i) {
      non_neighbors <- setdiff(1:n, c(which(A[i, ] == 1), i))
      if (length(non_neighbors) > 0) mean(e_perm[non_neighbors]) else 0
    })

    gamma_perm <- mean(e_perm * a_perm)
    gamma_perm_c <- mean(e_perm * a_perm_c)
    C_perm <- gamma_perm - gamma_perm_c

    q_perm <- e_perm * (a_perm - e_perm * gamma_perm)
    q_bar_perm <- sapply(1:n, function(i) {
      peers <- which(degrees == degrees[i])
      mean(q_perm[peers])
    })
    sigma_perm <- mean((q_perm - q_bar_perm)^2)
    sigma1_perm <- mean(q_perm^2)
    sigma_plus_perm <- ifelse(sigma_perm > 0, sigma_perm, sigma1_perm)

    T_perm[b] <- sqrt(n) * C_perm / sqrt(sigma_plus_perm)
  }

  T_perm <- T_perm[!is.na(T_perm)]
  critical_value <- quantile(T_perm, 1 - alpha)
  p_value <- max(1 / B, mean(T_perm > T1))
  reject_H0 <- T1 > critical_value

  if (print_output) {
    cat("One-Sided Graph Concordance Test\n")
    cat("  T-statistic:      ", round(T1, 4), "\n")
    cat("  Critical value:   ", round(critical_value, 4), "\n")
    cat("  p-value:          ", round(p_value, 4), "\n")
    cat("  Reject H0 (C <= 0):", ifelse(reject_H0, "YES", "NO"), "\n")
  }
  return(invisible(list(
    T_statistic = T1,
    critical_value = critical_value,
    reject_H0 = reject_H0,
    p_value = p_value,
    perm_distribution = T_perm
  )))

}

new package and definitons

library(MASS)
# R- Number of runs
# n - Number of nodes
# type - "ER" for Erdős–Rényi, "BA" for Barabási–Albert
# lambda - Average degree for ER graphs
# m - Number of edges to attach in BA
# c - Correlation parameter for outcome dependence

Function 10: simulate_network_and_outcome()

Description: Generates a network with a specified # topology (Erdős–Rényi or Barabási–Albert) and a dependent outcome variable Y with user-defined correlation between connected nodes. Provides adjacency matrix and outcome vector as output.

simulate_network_and_outcome <- function(n = 300, graph_type = "ER", lambda = 3, m = 2, c = 0.3) {
 
  # Step 1: Generate Network
  
  if (graph_type == "ER") {
    # Erdős–Rényi random graph: edges are formed randomly
    A <- generate_ER_graph(n, lambda)
    
  } else if (graph_type == "BA") {
    # Barabási–Albert graph: preferential attachment 
    A <- generate_BA_graph(n, m)
    
  } else {
    stop("Graph type must be 'ER' or 'BA'")
  }
  
  # Step 2: Simulate Outcome Data
 
  # Outcome Y is generated with controlled dependence.
  # If c = 0: outcomes are independent
  # If c increases towards 1: stronger correlation among connected nodes
  
  Y <- generate_Y_dependent(A, c)
  
  # Step 3: Return Results
  
  return(list(
    adj_matrix = A,    # The adjacency matrix of the network
    Y = Y              # Simulated outcome vector
  ))
}

Function 11: run_MC_study()

Description: Runs a Monte Carlo simulation to assess the statistical properties of GC estimates and confidence intervals under a given network model and dependence parameter. Computes coverage rates and CI lengths over R replications.

run_MC_study <- function(R = 30, n = 300, type = "ER", lambda = 3, m = 2, c = 0.3, alpha = 0.05) {
  results <- data.frame(
    run = 1:R,
    GC_estimate = numeric(R),
    CI_lower = numeric(R),
    CI_upper = numeric(R),
    covered = logical(R),
    CI_length = numeric(R)
  )
  
  for (r in 1:R) {
    data <- simulate_one_MC(n = n, type = type, lambda = lambda, m = m, c = c)
    res <- suppressMessages(
  permutation_gc_inference(data$adj_matrix, data$Y, B = 500, alpha = alpha)
)
    
    results$GC_estimate[r] <- res$GC_estimate
    results$CI_lower[r] <- res$CI[1]
    results$CI_upper[r] <- res$CI[2]
    # Use the average GC from all runs as a proxy for "true" GC
    results$covered[r] <- (results$CI_lower[r] <= results$GC_estimate[r]) &&
      (results$GC_estimate[r] <= results$CI_upper[r])
    results$CI_length[r] <- results$CI_upper[r] - results$CI_lower[r]
    
    if (r %% 10 == 0) cat("Completed", r, "runs\n")
  }
  
  # Summary
  coverage <- mean(results$covered)
  avg_length <- mean(results$CI_length)
  
  cat("\nEmpirical Coverage Rate:", round(coverage, 4), "\n")
  cat("Average CI Length:", round(avg_length, 4), "\n")
  
  return(results[1, ]) #only first output returned for the purpose of traceability
}

optional for visualization

# hist(mc_results$GC_estimate, main = "GC Estimates across Simulations", xlab = "GC Estimate")
# boxplot(mc_results$CI_length, main = "CI Lengths", ylab = "Length")