Will Tests include only results on people? or will companies also be included?
how can we include the data from the 3rd column, which includes previous connections to make inferences about probable future relations?
Preliminaries and Loading Data in R
#trim edgelist to the 3 relevant columns
BoardDataCurrent <- cleanlistCurrent[ , 1:2]
#create a graph object from the good data list (3column)
g <- graph_from_data_frame(BoardDataCurrent, directed=FALSE)
g1 <- graph_from_data_frame(BoardDataPast, directed=FALSE)
#set igraph options
igraph_options(plot.layout=layout_with_kk, edge.arrow.size=0.1,
vertex.label.cex=0.85, par(mar=c(0,0,0,0)))
#plot the network for diagnostics
plot(g, vertex.label.cex = 0.35,
vertex.label.color = "black",
vertex.size = 5)
plot(g1, vertex.label.cex = 0.35,
vertex.label.color = "black",
vertex.size = 5)
Formatting the Network for Two-Mode analysis
#splits the network by type into two modes
V(g)$type <- bipartite_mapping(g)$type
V(g1)$type <- bipartite_mapping(g1)$type
# Optional: Assign colors for visualization (FALSE=People, TRUE=Companies)
V(g)$color <- ifelse(V(g)$type, "pink", "lightblue")
V(g)$shape <- ifelse(V(g)$type, "square", "circle")
V(g1)$color <- ifelse(V(g1)$type, "pink", "lightblue")
V(g1)$shape <- ifelse(V(g1)$type, "square", "circle")
#plot the network for diagnostics
igraph_options(plot.layout=layout_with_kk, edge.arrow.size=0.1,
vertex.label.cex=0.000085, par(mar=c(0,0,0,0)))
plot(g,
vertex.label.cex = 0.0000065,
vertex.label.color = "black",
vertex.size = 4)
plot(g1,
vertex.label.cex = 0.0000065,
vertex.label.color = "black",
vertex.size = 4)
# coreness on people projection
coreness_people <- coreness(People_overlap, mode = "all")
V(People_overlap)$coreness <- coreness_people
max_k <- max(coreness_people, na.rm = TRUE)
max_k
## [1] 13
# nodes in top core
top_core_nodes <- V(People_overlap)[coreness == max_k]$name
length(top_core_nodes)
## [1] 40
# induced subgraph of top core
top_core_subg <- induced_subgraph(People_overlap, which(V(People_overlap)$coreness == max_k))
# induced subgraph of top core
top_core_subg <- induced_subgraph(People_overlap, which(V(People_overlap)$coreness == max_k))
components_top_core <- igraph::components(top_core_subg)
components_top_core$no # number of connected components inside the top k-core
## [1] 1
summary(top_core_subg)
## IGRAPH c4cbe76 UNW- 40 276 --
## + attr: name (v/c), coreness (v/n), weight (e/n)
igraph_options(plot.layout=layout_with_kk, edge.arrow.size=0.01,
vertex.label.cex=0.85, par(mar=c(0,0,0,0)))
plot(top_core_subg,
vertex.label.color = "black",
vertex.size = 4)
# average degree in bipartite (two-mode) -- people vs companies
# V(g)$type == FALSE (or TRUE) depends on how igraph assigned; verify mapping
people_vertices <- V(g)[V(g)$type == FALSE] # check which is people in your data
company_vertices <- V(g)[V(g)$type == TRUE]
avg_degree_people_2mode <- mean(igraph::degree(g)[people_vertices])
avg_degree_companies_2mode <- mean(igraph::degree(g)[company_vertices])
list(
'average degree people two mode' = avg_degree_people_2mode,
'average degree company two mode' = avg_degree_companies_2mode
)
## $`average degree people two mode`
## [1] 2.241071
##
## $`average degree company two mode`
## [1] 1.916031
# One-mode (people projection)
# unweighted degree (# of co-directors)
deg_people_proj <- igraph::degree(People_overlap)
# weighted degree (sum of shared boards)
strength_people_proj <- igraph::strength(People_overlap)
avg_deg_people_proj <- mean(deg_people_proj)
median_deg_people_proj <- median(deg_people_proj)
sd_deg_people_proj <- sd(deg_people_proj)
graph_density_people <- edge_density(People_overlap)
list(
'average degree people only' = avg_deg_people_proj,
'median degree people only' = median_deg_people_proj,
'stdDev of Degree for people only' = sd_deg_people_proj,
'density of people only' = graph_density_people,
'weighted degree people only' = head(strength_people_proj,3)
)
## $`average degree people only`
## [1] 12.17857
##
## $`median degree people only`
## [1] 12
##
## $`stdDev of Degree for people only`
## [1] 3.722864
##
## $`density of people only`
## [1] 0.1097169
##
## $`weighted degree people only`
## Warren Buffett Kenneth Chenault Wally Weitz
## 14 14 13
# 1) compute (unweighted) coreness
coreness_people <- igraph::coreness(People_overlap, mode = "all")
V(People_overlap)$coreness <- coreness_people
max_k <- max(coreness_people, na.rm = TRUE)
# 2) identify top-core vertex indices
top_idx <- which(V(People_overlap)$coreness == max_k)
length_top_core <- length(top_idx)
message("Top k-core (k = ", max_k, ") node count: ", length_top_core)
## Top k-core (k = 13) node count: 40
# 3) prepare plotting attributes
# Base color for non-core nodes and highlight color for core nodes
base_col <- "lightgray"
core_col <- "tomato"
V(People_overlap)$color <- ifelse(seq_len(vcount(People_overlap)) %in% top_idx, core_col, base_col)
# Make core nodes larger and label them, keep others small and unlabeled for clarity
base_size <- 4
V(People_overlap)$size <- ifelse(seq_len(vcount(People_overlap)) %in% top_idx,
base_size + 4, # bigger for core
base_size)
V(People_overlap)$label <- ifelse(seq_len(vcount(People_overlap)) %in%
top_idx,
V(People_overlap)$name, NA)
V(People_overlap)$label.cex <- 0.8
V(People_overlap)$label.color <- "black"
V(People_overlap)$frame.color <- "black" # node border color
# Optionally scale edge width by weight (so core stands out if they share many boards)
if (!is.null(E(People_overlap)$weight)) {
E(People_overlap)$width <- 1 + 1 * (E(People_overlap)$weight / max(E(People_overlap)$weight, na.rm = TRUE))
} else {
E(People_overlap)$width <- 1
}
igraph_options(plot.layout=layout_with_kk, edge.arrow.size=0.01,
vertex.label.cex=0.000000085, par(mar=c(0,0,0,0)))
#plot to screen
plot(People_overlap,
vertex.label.cex = 0.0000065,
vertex.label.family = "sans",
edge.color = "grey80",
main = paste0("People projection: top k-core highlighted (k = ", max_k, ")"),
margin = 0)
#add a legend
legend("topleft",
legend = c(paste0("Top k-core (k=", max_k, ")"), "Rest of network"),
pch = 21,
pt.bg = c(core_col, base_col),
pt.cex = c(2, 1.5),
bty = "n")
# Use igraph's biconnected.components (R naming)
bic <- igraph::biconnected.components(People_overlap)
## Warning: `biconnected.components()` was deprecated in igraph 2.0.0.
## ℹ Please use `biconnected_components()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# biconnected.components returns a list where $components are vectors of edge ids.
# articulation points (vertex ids) can be obtained via articulation.points()
art_pts <- igraph::articulation.points(People_overlap) # numeric vertex ids (if any)
## Warning: `articulation.points()` was deprecated in igraph 2.0.0.
## ℹ Please use `articulation_points()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
art_names <- V(People_overlap)$name[art_pts]
# Convert each bicomp (edge-id list) -> set of vertex ids
bic_vertex_sets <- lapply(bic$components, function(eids) {
if (length(eids) == 0) return(integer(0))
verts_mat <- igraph::ends(People_overlap, eids, names = FALSE)
unique(as.integer(c(verts_mat)))
})
# compute sizes (number of vertices) per bicomp
bic_sizes <- sapply(bic_vertex_sets, length)
topN <- 6 # adjust number you want to visualize on full graph
ord <- order(bic_sizes, decreasing = TRUE)
top_idx <- ord[seq_len(min(topN, length(ord)))]
# Prepare a color palette for top bicomponents
pal <- grDevices::rainbow(length(top_idx), alpha = 0.8)
# default color for nodes not in top bicomps
default_col <- "lightgray"
# For each vertex, determine membership in top bicomps (can be multiple)
vcount_total <- igraph::vcount(People_overlap)
vcols <- rep(default_col, vcount_total)
# mark articulation points with distinct border/shape later
# If vertex belongs to multiple top bicomps, use a special color (e.g. "gold") or mix
multi_col <- "gold"
# track membership counts for top bicomps only
membership_counts <- integer(vcount_total)
membership_index <- rep(0, vcount_total) # store first top component id for coloring default
for (i in seq_along(top_idx)) {
comp_id <- top_idx[i]
verts <- bic_vertex_sets[[comp_id]]
membership_counts[verts] <- membership_counts[verts] + 1
# record the index if not set
membership_index[verts][membership_index[verts] == 0] <- i
}
# assign colors: if multiple membership -> multi_col else palette by membership_index
for (v in seq_len(vcount_total)) {
if (membership_counts[v] == 0) {
vcols[v] <- default_col
} else if (membership_counts[v] > 1) {
vcols[v] <- multi_col
} else {
idx <- membership_index[v]
vcols[v] <- pal[idx]
}
}
deg <- igraph::degree(People_overlap)
vsize <- 4 + 2 * log1p(deg)
vsize[membership_counts > 0] <- vsize[membership_counts > 0] + 3
# vertex labels only for bicomponent members (optional)
vlabel <- rep(NA, vcount_total)
vlabel[membership_counts > 0] <- V(People_overlap)$name[membership_counts > 0]
# mark articulation points with a distinct frame color and shape
vframe <- rep("black", vcount_total)
vshape <- rep("circle", vcount_total)
if (length(art_pts) > 0) {
vframe[art_pts] <- "red"
vshape[art_pts] <- "square"
}
# edge gray for background; optionally make intra-top-bicomp edges bolder
E(People_overlap)$color <- "grey80"
E(People_overlap)$width <- 0.6
# make edges that connect two nodes belonging to the same top bicomp thicker/colorful
edge_ends <- igraph::ends(People_overlap, es = E(People_overlap), names = FALSE)
for (ei in seq_len(igraph::ecount(People_overlap))) {
v1 <- edge_ends[ei,1]; v2 <- edge_ends[ei,2]
# if both vertices belong to same top bicomp (single membership index and >0)
if (membership_counts[v1] == 1 && membership_counts[v2] == 1 &&
membership_index[v1] == membership_index[v2] && membership_index[v1] > 0) {
idx <- membership_index[v1]
E(People_overlap)$color[ei] <- pal[idx]
E(People_overlap)$width[ei] <- 1.5 + 1 * (E(People_overlap)$weight[ei] / max(E(People_overlap)$weight, na.rm = TRUE))
}
}
# plot
plot(People_overlap,
layout = layout_with_fr,
vertex.color = vcols,
vertex.size = vsize,
vertex.label = vlabel,
vertex.label.cex = 0.7,
margin = 0,
vertex.frame.color = vframe,
vertex.shape = vshape,
edge.color = E(People_overlap)$color,
edge.width = E(People_overlap)$width,
main = paste0("People projection — top ", length(top_idx), " bicomponents highlighted"))
# legend for bicomponents
legend_items <- c(paste0("Bicomp #", seq_along(top_idx), " (size=", bic_sizes[top_idx], ")"), "multiple membership", "non-top nodes", "articulation point")
legend_cols <- c(pal, multi_col, default_col, NA)
legend_pch <- c(rep(21, length(top_idx)), 21, 21, 22)
legend_ptbg <- c(pal, multi_col, default_col, "white")
legend("topleft", legend = legend_items, pch = legend_pch, pt.bg = legend_ptbg, pt.cex = 1.4, bty = "n")
if (length(art_pts) > 0) {
# add articulation point note
legend("bottomleft", legend = "Articulation points (square, red border)", pch = 22, pt.cex = 1.4, bty = "n")
}
##########
for (i in seq_along(top_idx)) {
comp_id <- top_idx[i]
verts <- bic_vertex_sets[[comp_id]]
if (length(verts) == 0) {
plot.new()
title(main = paste0("Bicomp #", comp_id, " (empty)"))
next
}
subg <- igraph::induced_subgraph(People_overlap, vids = verts)
set.seed(100 + i)
lay <- igraph::layout_with_kk(subg)
V(subg)$color <- "steelblue"
V(subg)$frame.color <- "black"
V(subg)$size <- 6
V(subg)$label <- V(subg)$name
V(subg)$label.cex <- 0.8
ew <- igraph::E(subg)$weight
if (!is.null(ew)) {
if (all(is.na(ew))) {
E(subg)$width <- 1
} else {
E(subg)$width <- 1 + 2 * (ew / max(ew, na.rm = TRUE))
}
} else {
E(subg)$width <- 1
}
plot(subg, layout = lay,
main = paste0("Top bicomp #", i, " (orig id=", comp_id, ", size=", length(verts), ")"),
vertex.label.family = "sans",
margin = 0.2)
}
girvan-newman, spinglass
#bicomponents
#edge betweeness algorithmn
eb <- cluster_edge_betweenness(g)
V(g)$group <- membership(eb)
sizes(eb)
## Community sizes
## 1 2 3 4 5 6 7 8 9 10 11 12
## 20 33 19 9 19 16 24 24 31 24 3 21
g_eb2 <- induced_subgraph(g, V(g)$group==2) # extract nodes in group 1
g_eb9 <- induced_subgraph(g, V(g)$group==9) # extract nodes in group 4
igraph_options(plot.layout=layout_with_kk, edge.arrow.size=0.01,
vertex.label.cex=0.85, par(mar=c(0,0,0,0)))
plot(g_eb2)
plot(g_eb9)
igraph_options(plot.layout=layout_with_kk, edge.arrow.size=0.01,
vertex.label.cex=0.35, par(mar=c(0,0,0,0)))
plot(eb, g,
vertex.size = 4,
layout = layout_with_kk,
main="Max Modularity Solution")
lv <- cluster_louvain(g)
lv1 <- cluster_louvain(g1)
igraph_options(plot.layout=layout_with_fr,
edge.arrow.size=0.1,
vertex.label.cex=0.5,
par(mar=c(0,0,0,0)))
plot(lv,
g,
vertex.size = 5)
plot(lv1,
g1,
vertex.size = 5)
Plotting People in a 1 mode network
#graphs the people only 1 mode matrix
igraph_options(plot.layout=layout_with_kk,
edge.arrow.size=0.1,
vertex.label.cex=0.5,
par(mar=c(0,0,0,0)))
plot(People_overlap,
vertex.size = 5)
plot(People_overlap1,
vertex.size = 5)
## getting each vertex `type` let's us sort easily
deg <- degree(matrix_prod)
bet <- betweenness(matrix_prod)
clos <- closeness(matrix_prod)
eig <- eigen_centrality(People_overlap)$vector
cent_df <- data.frame(deg, bet, clos, eig)
cent_df %>%
arrange(desc(deg)) %>%
head()
## deg bet clos eig
## Todd Combs 54 512.4667 0 1.0000000
## Steve Burke 52 512.4667 0 0.9676020
## Chris Davis 50 549.9333 0 0.7442313
## Ana Patricia O'Shea 48 2313.0667 0 0.7396577
## Bela Bajaria 48 1630.8667 0 0.3584563
## Brad Smith 46 1310.0000 0 0.5757964
cent_df %>%
arrange(desc(bet)) %>%
head()
## deg bet clos eig
## Ana Patricia O'Shea 48 2313.0667 0 0.73965765
## Bela Bajaria 48 1630.8667 0 0.35845634
## Mark Weinburger 30 1547.0000 0 0.55641655
## Carla Harris 26 1438.0000 0 0.04540579
## Brad Smith 46 1310.0000 0 0.57579639
## Alex Gorsky 40 972.5333 0 0.54488782
cent_df %>%
arrange(desc(eig)) %>%
head()
## deg bet clos eig
## Todd Combs 54 512.4667 0 1.0000000
## Steve Burke 52 512.4667 0 0.9676020
## Chris Davis 50 549.9333 0 0.7442313
## Ana Patricia O'Shea 48 2313.0667 0 0.7396577
## Brad Smith 46 1310.0000 0 0.5757964
## Warren Buffett 28 0.0000 0 0.5667991
# centrality measures on the people projection
deg <- igraph::degree(People_overlap) # number of unique co-directors
strg <- igraph::strength(People_overlap) # weighted ties (shared boards count)
bet <- igraph::betweenness(People_overlap, weights = 1/(E(People_overlap)$weight + 1), normalized=TRUE) # invert weight to treat larger weight as shorter paths
eig <- eigen_centrality(People_overlap,
weights=E(People_overlap)$weight)$vector
pr <- page_rank(People_overlap,
weights=E(People_overlap)$weight)$vector
close <- igraph::closeness(People_overlap,
weights = 1/(E(People_overlap)$weight + 1), normalized=TRUE)
power_df <- data.frame(
name = V(People_overlap)$name,
degree = deg,
strength = strg,
betweenness = bet,
eigen = eig,
pagerank = pr,
closeness = close,
coreness = V(People_overlap)$coreness,
stringsAsFactors = FALSE
)
# show top 10 by different measures
top_by_degree <- power_df %>%
select(degree) %>%
arrange(desc(degree)) %>%
slice_head(n = 10)
top_by_betweenness <- power_df %>%
select(betweenness) %>%
arrange(desc(betweenness)) %>%
slice_head(n = 10)
top_by_eigen <- power_df %>%
select(eigen) %>%
arrange(desc(eigen)) %>%
slice_head(n = 10)
top_by_pagerank <- power_df %>%
select(pagerank) %>%
arrange(desc(pagerank)) %>%
slice_head(n = 10)
top_by_degree
## degree
## Steve Burke 25
## Todd Combs 25
## Chris Davis 24
## Ana Patricia O'Shea 24
## Bela Bajaria 24
## Brad Smith 23
## Alex Gorsky 20
## Carolyn Everson 18
## Maria Lagomasino 18
## Kenneth Chenault 14
top_by_betweenness
## betweenness
## Ana Patricia O'Shea 0.18927655
## Bela Bajaria 0.13356811
## Mark Weinburger 0.12678133
## Carla Harris 0.11785422
## Brad Smith 0.10720721
## Alex Gorsky 0.07965056
## Todd Combs 0.04543817
## Chris Davis 0.04514879
## Carolyn Everson 0.04010920
## Maria Lagomasino 0.04010920
top_by_eigen
## eigen
## Todd Combs 1.0000000
## Steve Burke 0.9676020
## Chris Davis 0.7442313
## Ana Patricia O'Shea 0.7396577
## Brad Smith 0.5757964
## Warren Buffett 0.5667991
## Mark Weinburger 0.5564165
## Daniel Pinto 0.5538212
## Alex Gorsky 0.5448878
## Jamie Dimon 0.5200885
top_by_pagerank
## pagerank
## Todd Combs 0.016096309
## Steve Burke 0.015557147
## Chris Davis 0.015351450
## Brad Smith 0.015290282
## Bela Bajaria 0.015120264
## Ana Patricia O'Shea 0.014978563
## Alex Gorsky 0.014507818
## Carolyn Everson 0.013560198
## Maria Lagomasino 0.013560198
## Mark Weinburger 0.009690035