HW5

#first I need to upload the enzymes txt 


setwd("C:/Users/Rache/OneDrive/Documents/R/R folder/Stat713/Hw5")

data <- read.table("enzymes.txt", header = TRUE)

head(data)

##   X2 X1
## 1 13  1
## 2 24  1
## 3  1  2
## 4  3  2
## 5 12  2
## 6 14  2

library(igraph)

## Warning: package 'igraph' was built under R version 4.4.3

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

library(igraphdata)

## Warning: package 'igraphdata' was built under R version 4.4.3

#vcount doesn't work with data frame so use this function to change data to a graph
g = graph_from_data_frame(data, directed = FALSE)

#vcount for enzymes is equal to 59
vcount(g)

## [1] 59

#looking at the coefficient for this graph

## global cluster coefficient
transitivity(g, type=c("global"))

## [1] 0.2183288

## individual cluster coefficient
transitivity(g, type=c("local"))

##        13        24         1         3        12        14        35         2 
## 0.0000000 0.5000000 0.0000000 0.0000000 0.3333333 0.1666667 0.0000000 0.1000000 
##        36         5        16        37         4        15        26        38 
## 0.0000000 0.2000000 0.2000000 0.2000000 0.1666667 0.1666667 0.1666667 0.1000000 
##         7        18        29        39         6        28        40        41 
## 0.3333333 0.1000000 0.2000000 0.2000000 0.1000000 0.2000000 0.1000000 0.3333333 
##        10        21        34         9        11        22        42        44 
## 0.0000000 0.1000000 0.1388889 0.1666667 0.0000000 0.3333333 0.4000000 0.0000000 
##        45        46        17        47        48         8        19        49 
## 0.0000000 0.3333333 0.3333333 0.3333333 0.0000000 0.0000000 0.3333333 0.3333333 
##        50        43        20        51        25        52        23        53 
## 0.0000000 0.3333333 0.1666667 0.3333333 0.6666667 0.2666667 1.0000000 0.3333333 
##        27        54        55        56        30        57        58        32 
## 0.3333333 0.3333333 0.3333333 0.3333333 0.3333333 0.3333333 0.0000000 0.6666667 
##        33        59        31 
## 0.6666667 0.4000000 0.3333333

## average cluster coefficient
transitivity(g, type=c("average"))

## [1] 0.2345574

## assortativity coefficient

assortativity(g,V(g))  ## positive

## [1] 0.4169657

##  Girvan-Newman algorithm to detect community

#the G-N algorithm clusters enzymes.txt into 4 groups/communities
detect=cluster_edge_betweenness(g)
detect

## IGRAPH clustering edge betweenness, groups: 4, mod: 0.66
## + groups:
##   $`1`
##    [1] "13" "24" "1"  "3"  "12" "14" "35" "2"  "36" "44" "45" "25" "52" "23"
##   [15] "53"
##   
##   $`2`
##    [1] "5"  "16" "37" "4"  "15" "26" "38" "46" "17" "47" "27" "54" "55"
##   
##   $`3`
##    [1] "7"  "18" "29" "39" "6"  "28" "40" "48" "8"  "19" "49" "56" "30" "57"
##   
##   + ... omitted several groups/vertices

plot_dendrogram(detect)  ## visualize the  dendrogram

GNmember=membership(detect)   ## get the membership of each node
GNmember

## 13 24  1  3 12 14 35  2 36  5 16 37  4 15 26 38  7 18 29 39  6 28 40 41 10 21 
##  1  1  1  1  1  1  1  1  1  2  2  2  2  2  2  2  3  3  3  3  3  3  3  4  4  4 
## 34  9 11 22 42 44 45 46 17 47 48  8 19 49 50 43 20 51 25 52 23 53 27 54 55 56 
##  4  4  4  4  4  1  1  2  2  2  3  3  3  3  4  4  4  4  1  1  1  1  2  2  2  3 
## 30 57 58 32 33 59 31 
##  3  3  4  4  4  4  4

membership_vec <- membership(detect)
V(g)$color <- rainbow(length(unique(membership_vec)))[membership_vec]


## plot the detected communities

V(g)$shape <- "circle"  # Set all vertices to circle by default
V(g)[name == 2]$shape <- "square"
plot(detect, g)   ## plot the GN detected communities.

#in the lecture karate was cut at 2 however, I'm not positive where to manually cut the enzyme text
#I have also decided to cut this one at 2 and 3 just to see the differences
clustering <- cut_at(detect, no = 2)
clustering

##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 2 2
## [39] 2 2 2 2 2 2 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2

## visualize the cutted communities

V(g)$attr=clustering
V(g)[attr==1]$shape="circle"
V(g)[attr==2]$shape="square"

#I added this to identify the difference between the detected communities and my manual communities
#Detected communities from the GN algorithm are denoted by color
#manual communities are denoted by shape
plot(g,
     layout = layout_with_fr,          # or any other layout you like
     vertex.shape = V(g)$shape,        # based on true group
     vertex.color = V(g)$color,        # based on detected community
     vertex.label = NA,                # hide labels for clarity
     main = "True Communities (Shape) vs Detected (Color)")

#at 2 you can see the manual communities (squares vs circles) cluster on two ends of the network
#but the detected communities distinguish within each group, pretty evenly


#here it is cut at 3
clustering <- cut_at(detect, no = 3)
clustering

##  [1] 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 3 3 3 1 1
## [39] 1 1 1 1 1 1 2 2 2 2 3 3 3 1 1 1 1 1 1 1 1

V(g)$attr=clustering
V(g)[attr==1]$shape="circle"
V(g)[attr==2]$shape="square"
V(g)[attr==3]$shape='none'

#I added this to identify the difference between the detected communities and my manual communities
#Detected communities from the GN algorithm are denoted by color
#manual communities are denoted by shape
plot(g,
     layout = layout_with_fr,          # or any other layout you like
     vertex.shape = V(g)$shape,        # based on true group
     vertex.color = V(g)$color,        # based on detected community
     vertex.label = NA,                # hide labels for clarity
     main = "True Communities (Shape) vs Detected (Color)")

#manually cutting at three is odd, it leaves a large chunk of the network undefined either by shape or color

g = graph_from_data_frame(data, directed = FALSE)
optimal <- cluster_optimal(g)  ### maximize modularity
optimal

## IGRAPH clustering optimal, groups: 5, mod: 0.66
## + groups:
##   $`1`
##   [1] "13" "1"  "3"  "12" "14" "35" "2"  "36" "44"
##   
##   $`2`
##   [1] "24" "45" "25" "52" "23" "53"
##   
##   $`3`
##    [1] "5"  "16" "37" "4"  "15" "26" "38" "46" "17" "47" "27" "54" "55"
##   
##   $`4`
##   + ... omitted several groups/vertices

#here 5 communites are detected

#now to visualize them
V(g)$attr=membership(optimal)

V(g)$shape <- "circle"  # Set all vertices to circle by default
V(g)[name == 2]$shape <- "square"


V(g)[attr==1]$color="green"   ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color='purple'

plot(g)

#now to plot the fast greedy method

#g.fg <- cluster_fast_greedy(g)
#g.fg

#any_multiple(g)
#this returned true which means I have multiple edges in my graph
#to run fast greedy I need to get rid of multiple edges and loops
g = graph_from_data_frame(data, directed = FALSE)
g_simple <- simplify(g, remove.multiple = TRUE, remove.loops = TRUE)

g.fg = cluster_fast_greedy(g_simple)
g.fg

## IGRAPH clustering fast greedy, groups: 5, mod: 0.65
## + groups:
##   $`1`
##    [1] "10" "21" "34" "9"  "11" "22" "42" "50" "43" "20" "51" "58" "32" "33"
##   [15] "59" "31"
##   
##   $`2`
##    [1] "13" "24" "1"  "3"  "12" "14" "35" "2"  "36" "44" "45" "25" "52" "23"
##   [15] "53"
##   
##   $`3`
##   [1] "26" "29" "28" "27" "54" "55" "56" "30" "57"
##   + ... omitted several groups/vertices

V(g)$attr=membership(g.fg)
V(g)[attr==1]$color="green"   ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="orange"

plot(g)

#now for leading eigenvector method

g = graph_from_data_frame(data, directed = FALSE)
clu=cluster_leading_eigen(g)
clu

## IGRAPH clustering leading eigenvector, groups: 5, mod: 0.61
## + groups:
##   $`1`
##   [1] "13" "1"  "3"  "12" "14" "35" "2"  "36" "44"
##   
##   $`2`
##    [1] "41" "10" "21" "34" "9"  "11" "22" "42" "50" "43" "20" "51" "58" "32"
##   [15] "33" "59" "31"
##   
##   $`3`
##    [1] "38" "7"  "18" "29" "39" "6"  "28" "40" "17" "48" "8"  "19" "49" "27"
##   [15] "54" "55" "56" "30" "57"
##   + ... omitted several groups/vertices

#5 communities detected

V(g)$attr <- membership(clu)
V(g)[attr==1]$color="green"   ## ##cluster-eigenvalue community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="orange"
dev.off()

## null device 
##           1

plot(g)


#now for spinglass method

c1=cluster_spinglass(g,gamma=0.5)
c1

## IGRAPH clustering spinglass, groups: 6, mod: 0.41
## + groups:
##   $`1`
##   [1] "58"
##   
##   $`2`
##    [1] "41" "10" "21" "34" "9"  "11" "22" "42" "50" "43" "20" "51" "32" "33"
##   [15] "59" "31"
##   
##   $`3`
##    [1] "5"  "16" "37" "4"  "15" "26" "38" "46" "17" "47" "27" "54" "55"
##   
##   + ... omitted several groups/vertices

c2=cluster_spinglass(g,gamma=1)
c2

## IGRAPH clustering spinglass, groups: 5, mod: 0.39
## + groups:
##   $`1`
##    [1] "41" "10" "21" "34" "9"  "11" "22" "42" "50" "43" "20" "51" "58" "32"
##   [15] "33" "59" "31"
##   
##   $`2`
##    [1] "7"  "18" "29" "39" "6"  "28" "40" "48" "8"  "19" "49" "56" "30" "57"
##   
##   $`3`
##    [1] "5"  "16" "37" "4"  "15" "26" "38" "46" "17" "47" "27" "54" "55"
##   
##   + ... omitted several groups/vertices

c3=cluster_spinglass(g,gamma=2)
c3

## IGRAPH clustering spinglass, groups: 10, mod: 0.31
## + groups:
##   $`1`
##   [1] "24" "45" "25" "52" "23" "53"
##   
##   $`2`
##   [1] "13" "1"  "3"  "12" "14" "35" "2"  "36" "44"
##   
##   $`3`
##   [1] "7"  "29" "39" "6"  "28" "40" "56" "30" "57"
##   
##   $`4`
##   + ... omitted several groups/vertices

#this said it found 9 groups but it only plots 5 which is interesting


V(g)$attr=membership(c1)

V(g)$color="white"
V(g)$shape <- "circle"  # Set all vertices to circle by default
V(g)[name == 2]$shape <- "square"


V(g)[attr==1]$color="green"   ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="purple"


plot(g)   ## plot Gamma=0.5




## label propagation algorithm

cm=cluster_label_prop(g)
cm

## IGRAPH clustering label propagation, groups: 5, mod: 0.66
## + groups:
##   $`1`
##   [1] "13" "1"  "3"  "12" "14" "35" "2"  "36" "44"
##   
##   $`2`
##   [1] "24" "25" "52" "23" "53"
##   
##   $`3`
##    [1] "5"  "16" "37" "4"  "15" "26" "38" "45" "46" "17" "47" "27" "54" "55"
##   
##   $`4`
##   + ... omitted several groups/vertices

V(g)$attr=membership(cm)

#this one only found 6 groups


V(g)[attr==1]$color="green"   ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="purple"
V(g)[attr==6]$color="orange"


plot(g)

#checking the modularity of all graph types

g = graph_from_data_frame(data, directed = FALSE)
  
#vcount for enzymes is equal to 59
vcount(g)

## [1] 59

V(g)$shape <- "circle"  # default
V(g)[name == 2]$shape <- "square"  # manual assignment

# Check for missing shapes
table(V(g)$shape, useNA = "ifany")

## 
## circle square 
##     58      1

# ---- 1. Ground Truth ----
ground_truth_membership <- as.numeric(ifelse(V(g)$shape == "circle", 1, 2))
ground_truth <- make_clusters(g, ground_truth_membership)
#my ground truth lenght is 2

# ---- 2. Community Detection Methods ----
results <- list()

# Fast Greedy
fg <- cluster_fast_greedy(g_simple)
results$FastGreedy <- list(clu = fg, modularity = modularity(fg))

# Leading Eigenvector
le <- cluster_leading_eigen(g)
results$LeadingEigen <- list(clu = le, modularity = modularity(le))

# Spinglass (γ = 0.5, 1, 2)
spinglass_gamma <- c(0.5, 1, 2)
for (gamma in spinglass_gamma) {
  spg <- cluster_spinglass(g, gamma = gamma)
  key <- paste0("Spinglass_gamma_", gamma)
  results[[key]] <- list(clu = spg, modularity = modularity(spg))
}

# Label Propagation
lp <- cluster_label_prop(g)
results$LabelProp <- list(clu = lp, modularity = modularity(lp))

# Girvan–Newman (Edge Betweenness)
gn <- cluster_edge_betweenness(g)
results$GirvanNewman <- list(clu = gn, modularity = modularity(gn))

# Optimal Modularity
opt <- cluster_optimal(g)
results$Optimal <- list(clu = opt, modularity = modularity(opt))

# ---- 3. Add Ground Truth ----
results$GroundTruth <- list(clu = ground_truth, modularity = modularity(ground_truth))

results

## $FastGreedy
## $FastGreedy$clu
## IGRAPH clustering fast greedy, groups: 5, mod: 0.65
## + groups:
##   $`1`
##    [1] "10" "21" "34" "9"  "11" "22" "42" "50" "43" "20" "51" "58" "32" "33"
##   [15] "59" "31"
##   
##   $`2`
##    [1] "13" "24" "1"  "3"  "12" "14" "35" "2"  "36" "44" "45" "25" "52" "23"
##   [15] "53"
##   
##   $`3`
##   [1] "26" "29" "28" "27" "54" "55" "56" "30" "57"
##   + ... omitted several groups/vertices
## 
## $FastGreedy$modularity
## [1] 0.6461248
## 
## 
## $LeadingEigen
## $LeadingEigen$clu
## IGRAPH clustering leading eigenvector, groups: 5, mod: 0.61
## + groups:
##   $`1`
##   [1] "13" "1"  "3"  "12" "14" "35" "2"  "36" "44"
##   
##   $`2`
##    [1] "41" "10" "21" "34" "9"  "11" "22" "42" "50" "43" "20" "51" "58" "32"
##   [15] "33" "59" "31"
##   
##   $`3`
##    [1] "38" "7"  "18" "29" "39" "6"  "28" "40" "17" "48" "8"  "19" "49" "27"
##   [15] "54" "55" "56" "30" "57"
##   + ... omitted several groups/vertices
## 
## $LeadingEigen$modularity
## [1] 0.6099426
## 
## 
## $Spinglass_gamma_0.5
## $Spinglass_gamma_0.5$clu
## IGRAPH clustering spinglass, groups: 5, mod: 0.42
## + groups:
##   $`1`
##   [1] "13" "1"  "3"  "12" "14" "35" "2"  "36" "44"
##   
##   $`2`
##    [1] "7"  "18" "29" "39" "6"  "28" "40" "48" "8"  "19" "49" "56" "30" "57"
##   
##   $`3`
##   [1] "24" "25" "52" "23" "53"
##   
##   $`4`
##   + ... omitted several groups/vertices
## 
## $Spinglass_gamma_0.5$modularity
## [1] 0.4165109
## 
## 
## $Spinglass_gamma_1
## $Spinglass_gamma_1$clu
## IGRAPH clustering spinglass, groups: 7, mod: 0.36
## + groups:
##   $`1`
##    [1] "5"  "16" "37" "4"  "15" "38" "45" "46" "17" "47"
##   
##   $`2`
##   [1] "26" "27" "54" "55"
##   
##   $`3`
##   [1] "13" "1"  "3"  "12" "14" "35" "2"  "36" "44"
##   
##   $`4`
##   + ... omitted several groups/vertices
## 
## $Spinglass_gamma_1$modularity
## [1] 0.3645907
## 
## 
## $Spinglass_gamma_2
## $Spinglass_gamma_2$clu
## IGRAPH clustering spinglass, groups: 10, mod: 0.3
## + groups:
##   $`1`
##   [1] "24" "45" "25" "52" "23" "53"
##   
##   $`2`
##   [1] "7"  "29" "39" "6"  "40" "56" "30" "57"
##   
##   $`3`
##   [1] "13" "1"  "3"  "12" "14" "35" "2"  "36" "44"
##   
##   $`4`
##   + ... omitted several groups/vertices
## 
## $Spinglass_gamma_2$modularity
## [1] 0.3039606
## 
## 
## $LabelProp
## $LabelProp$clu
## IGRAPH clustering label propagation, groups: 7, mod: 0.64
## + groups:
##   $`1`
##    [1] "13" "1"  "3"  "12" "14" "35" "2"  "36" "44" "45"
##   
##   $`2`
##   [1] "24" "25" "52" "23" "53"
##   
##   $`3`
##   [1] "5"  "16" "37" "4"  "15" "38" "46" "17" "47"
##   
##   $`4`
##   + ... omitted several groups/vertices
## 
## $LabelProp$modularity
## [1] 0.6392327
## 
## 
## $GirvanNewman
## $GirvanNewman$clu
## IGRAPH clustering edge betweenness, groups: 4, mod: 0.66
## + groups:
##   $`1`
##    [1] "13" "24" "1"  "3"  "12" "14" "35" "2"  "36" "44" "45" "25" "52" "23"
##   [15] "53"
##   
##   $`2`
##    [1] "5"  "16" "37" "4"  "15" "26" "38" "46" "17" "47" "27" "54" "55"
##   
##   $`3`
##    [1] "7"  "18" "29" "39" "6"  "28" "40" "48" "8"  "19" "49" "56" "30" "57"
##   
##   + ... omitted several groups/vertices
## 
## $GirvanNewman$modularity
## [1] 0.6589882
## 
## 
## $Optimal
## $Optimal$clu
## IGRAPH clustering optimal, groups: 5, mod: 0.66
## + groups:
##   $`1`
##   [1] "13" "1"  "3"  "12" "14" "35" "2"  "36" "44"
##   
##   $`2`
##   [1] "24" "45" "25" "52" "23" "53"
##   
##   $`3`
##    [1] "5"  "16" "37" "4"  "15" "26" "38" "46" "17" "47" "27" "54" "55"
##   
##   $`4`
##   + ... omitted several groups/vertices
## 
## $Optimal$modularity
## [1] 0.6599798
## 
## 
## $GroundTruth
## $GroundTruth$clu
## IGRAPH clustering unknown, groups: 2, mod: -0.00077
## + groups:
##   $`1`
##    [1]  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
##   [25] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
##   [49] 50 51 52 53 54 55 56 57 58 59
##   
##   $`2`
##   [1] 8
##   
## 
## $GroundTruth$modularity
## [1] -0.0007722965

#A key thing to note here is my ground truth is not good at all it's modularity is -.00077 because I had to artificially make communities as it's not obvious in this dataset
#however optimal got mod = .66 and 5 groups, GN got mod = .66 and four groups, label prop got mod = .59 and 7 groups
#spinglass got mod = .32 and 8 groups gamma 1, gamma .5 got mod = .42 and 5 groups, eigen got mod = .61 and 5 groups
#fast and greedy got mod = .65 and 5 groups

#overall optimal, gn, and fast and greedy all tied for the best modularity so these were the best clustering methods

#part two do that but on my own network

#first to recreate my own network

#I've decided to analyze the Plant Pathology Department at NDSU since I work there
#I'm making 12 groups for each Plant Pathology advisor that has a student
#Each node will represent a student within that advisor's lab

set.seed(57)  # For reproducibility

#defining students and group sizes
students = paste0("Student", 1:57)
group_sizes = c(4, 6, 5, 12, 2, 6, 4, 7, 6, 2, 1, 2)
num_projects = length(group_sizes)

#adding students to a group while making sure no student is put into a group twice
group_membership = list()
start = 1
for (i in 1:num_projects) {
  end = start + group_sizes[i] - 1
  group = students[start:end]
  group_membership[[paste0("Lab", i)]] = group
  start = end + 1
}

#have to create an empty graph with students as verticies otherwise the lone student won't be in the final plot since they don't have any edges
g = make_empty_graph(n = 0, directed = FALSE)
g = add_vertices(g, nv = length(students), name = students)

#adding edges have to ignore the lone student here or it creates an error
collaborations = list()
for (group in group_membership) {
  if (length(group) < 2) next  
  edges = combn(group, 2, simplify = FALSE)
  collaborations <- c(collaborations, edges)
}


if (length(collaborations) > 0) {
  edge_list = do.call(rbind, lapply(collaborations, function(x) c(x[1], x[2])))
  g = add_edges(g, t(edge_list))
}

#adding color to each node
lab_colors = rainbow(num_projects)
V(g)$color = "gray"

for (i in seq_along(group_membership)) {
  members = group_membership[[i]]
  V(g)$color[V(g)$name %in% members] <- lab_colors[i]
}

head(g)

## 6 x 57 sparse Matrix of class "dgCMatrix"

##   [[ suppressing 57 column names 'Student1', 'Student2', 'Student3' ... ]]

##                                                                               
## Student1 . 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## Student2 1 . 1 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## Student3 1 1 . 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## Student4 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## Student5 . . . . . 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . . .
## Student6 . . . . 1 . 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . . .
##                                                     
## Student1 . . . . . . . . . . . . . . . . . . . . . .
## Student2 . . . . . . . . . . . . . . . . . . . . . .
## Student3 . . . . . . . . . . . . . . . . . . . . . .
## Student4 . . . . . . . . . . . . . . . . . . . . . .
## Student5 . . . . . . . . . . . . . . . . . . . . . .
## Student6 . . . . . . . . . . . . . . . . . . . . . .

#plot my network
#gn
detect=cluster_edge_betweenness(g)
detect

## IGRAPH clustering edge betweenness, groups: 12, mod: 0.77
## + groups:
##   $`1`
##   [1] "Student1" "Student2" "Student3" "Student4"
##   
##   $`2`
##   [1] "Student5"  "Student6"  "Student7"  "Student8"  "Student9"  "Student10"
##   
##   $`3`
##   [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##   
##   $`4`
##   + ... omitted several groups/vertices

#gave my basic 12 groups 

plot_dendrogram(detect)## visualize the  dendrogram

#looking at the dendogram there are 2 or four good base groups


GNmember=membership(detect)   ## get the membership of each node
GNmember

##  Student1  Student2  Student3  Student4  Student5  Student6  Student7  Student8 
##         1         1         1         1         2         2         2         2 
##  Student9 Student10 Student11 Student12 Student13 Student14 Student15 Student16 
##         2         2         3         3         3         3         3         4 
## Student17 Student18 Student19 Student20 Student21 Student22 Student23 Student24 
##         4         4         4         4         4         4         4         4 
## Student25 Student26 Student27 Student28 Student29 Student30 Student31 Student32 
##         4         4         4         5         5         6         6         6 
## Student33 Student34 Student35 Student36 Student37 Student38 Student39 Student40 
##         6         6         6         7         7         7         7         8 
## Student41 Student42 Student43 Student44 Student45 Student46 Student47 Student48 
##         8         8         8         8         8         8         9         9 
## Student49 Student50 Student51 Student52 Student53 Student54 Student55 Student56 
##         9         9         9         9        10        10        11        12 
## Student57 
##        12

membership_vec <- membership(detect)
V(g)$color <- rainbow(length(unique(membership_vec)))[membership_vec]
V(g)$shape <- "circle"  # Set all vertices to circle by default
V(g)[name == 2]$shape <- "square"
plot(detect, g)   ## plot the GN detected communities.

#unfortuntaely becuase I created a separated network I have a feeling it's just going to stick to those groups


#optimal clustering
optimal <- cluster_optimal(g)  ### maximize modularity
optimal

## IGRAPH clustering optimal, groups: 12, mod: 0.77
## + groups:
##   $`1`
##   [1] "Student1" "Student2" "Student3" "Student4"
##   
##   $`2`
##   [1] "Student5"  "Student6"  "Student7"  "Student8"  "Student9"  "Student10"
##   
##   $`3`
##   [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##   
##   $`4`
##   + ... omitted several groups/vertices

#now to visualize them
V(g)$attr=membership(optimal)
#12 groups again

V(g)$shape <- "circle"  # Set all vertices to circle by default
V(g)[name == 2]$shape <- "square"


V(g)[attr==1]$color="green"   ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color='purple'

plot(g)

#now to plot the fast greedy method

g.fg <- cluster_fast_greedy(g)
g.fg

## IGRAPH clustering fast greedy, groups: 12, mod: 0.77
## + groups:
##   $`1`
##    [1] "Student16" "Student17" "Student18" "Student19" "Student20" "Student21"
##    [7] "Student22" "Student23" "Student24" "Student25" "Student26" "Student27"
##   
##   $`2`
##   [1] "Student40" "Student41" "Student42" "Student43" "Student44" "Student45"
##   [7] "Student46"
##   
##   $`3`
##   [1] "Student47" "Student48" "Student49" "Student50" "Student51" "Student52"
##   + ... omitted several groups/vertices

any_multiple(g)

## [1] FALSE

#this returned false which means I have don't have multiple edges in this network





V(g)$attr=membership(g.fg)
V(g)[attr==1]$color="green"   ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="orange"

plot(g)

#now for leading eigenvector method

clu=cluster_leading_eigen(g)
clu

## IGRAPH clustering leading eigenvector, groups: 12, mod: 0.77
## + groups:
##   $`1`
##   [1] "Student1" "Student2" "Student3" "Student4"
##   
##   $`2`
##   [1] "Student5"  "Student6"  "Student7"  "Student8"  "Student9"  "Student10"
##   
##   $`3`
##   [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##   
##   $`4`
##   + ... omitted several groups/vertices

#5 communities detected

V(g)$attr <- membership(clu)
V(g)[attr==1]$color="green"   ## ##cluster-eigenvalue community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="orange"
dev.off()

## null device 
##           1

plot(g)


#now for spinglass method

#the spinglass cannot work for the network I created because it does not work on unconnected graphs





## label propagation algorithm

cm=cluster_label_prop(g)
cm

## IGRAPH clustering label propagation, groups: 12, mod: 0.77
## + groups:
##   $`1`
##   [1] "Student1" "Student2" "Student3" "Student4"
##   
##   $`2`
##   [1] "Student5"  "Student6"  "Student7"  "Student8"  "Student9"  "Student10"
##   
##   $`3`
##   [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##   
##   $`4`
##   + ... omitted several groups/vertices

V(g)$attr=membership(cm)

#this one only found 6 groups


V(g)[attr==1]$color="green"   ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="purple"
V(g)[attr==6]$color="orange"


plot(g)

#calculate modularity

#vcount for enzymes is equal to 57 for my network of students
vcount(g)

## [1] 57

V(g)$shape <- "circle"  # default
V(g)[name == 2]$shape <- "square"  # manual assignment

# Check for missing shapes
table(V(g)$shape, useNA = "ifany")

## 
## circle 
##     57

# ---- 1. Ground Truth ----
ground_truth_membership <- as.numeric(ifelse(V(g)$shape == "circle", 1, 2))
ground_truth <- make_clusters(g, ground_truth_membership)
#my ground truth length here is 1

# ---- 2. Community Detection Methods ----
results <- list()

# Fast Greedy
fg <- cluster_fast_greedy(g_simple)
results$FastGreedy <- list(clu = fg, modularity = modularity(fg))

# Leading Eigenvector
le <- cluster_leading_eigen(g)
results$LeadingEigen <- list(clu = le, modularity = modularity(le))

# Spinglass (γ = 0.5, 1, 2)
#removing spinglass as I cannot use this method

# Label Propagation
lp <- cluster_label_prop(g)
results$LabelProp <- list(clu = lp, modularity = modularity(lp))

# Girvan–Newman (Edge Betweenness)
gn <- cluster_edge_betweenness(g)
results$GirvanNewman <- list(clu = gn, modularity = modularity(gn))

# Optimal Modularity
opt <- cluster_optimal(g)
results$Optimal <- list(clu = opt, modularity = modularity(opt))

# ---- 3. Add Ground Truth ----
results$GroundTruth <- list(clu = ground_truth, modularity = modularity(ground_truth))

results

## $FastGreedy
## $FastGreedy$clu
## IGRAPH clustering fast greedy, groups: 5, mod: 0.65
## + groups:
##   $`1`
##    [1] "10" "21" "34" "9"  "11" "22" "42" "50" "43" "20" "51" "58" "32" "33"
##   [15] "59" "31"
##   
##   $`2`
##    [1] "13" "24" "1"  "3"  "12" "14" "35" "2"  "36" "44" "45" "25" "52" "23"
##   [15] "53"
##   
##   $`3`
##   [1] "26" "29" "28" "27" "54" "55" "56" "30" "57"
##   + ... omitted several groups/vertices
## 
## $FastGreedy$modularity
## [1] 0.6461248
## 
## 
## $LeadingEigen
## $LeadingEigen$clu
## IGRAPH clustering leading eigenvector, groups: 12, mod: 0.77
## + groups:
##   $`1`
##   [1] "Student1" "Student2" "Student3" "Student4"
##   
##   $`2`
##   [1] "Student5"  "Student6"  "Student7"  "Student8"  "Student9"  "Student10"
##   
##   $`3`
##   [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##   
##   $`4`
##   + ... omitted several groups/vertices
## 
## $LeadingEigen$modularity
## [1] 0.7709035
## 
## 
## $LabelProp
## $LabelProp$clu
## IGRAPH clustering label propagation, groups: 12, mod: 0.77
## + groups:
##   $`1`
##   [1] "Student1" "Student2" "Student3" "Student4"
##   
##   $`2`
##   [1] "Student5"  "Student6"  "Student7"  "Student8"  "Student9"  "Student10"
##   
##   $`3`
##   [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##   
##   $`4`
##   + ... omitted several groups/vertices
## 
## $LabelProp$modularity
## [1] 0.7709035
## 
## 
## $GirvanNewman
## $GirvanNewman$clu
## IGRAPH clustering edge betweenness, groups: 12, mod: 0.77
## + groups:
##   $`1`
##   [1] "Student1" "Student2" "Student3" "Student4"
##   
##   $`2`
##   [1] "Student5"  "Student6"  "Student7"  "Student8"  "Student9"  "Student10"
##   
##   $`3`
##   [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##   
##   $`4`
##   + ... omitted several groups/vertices
## 
## $GirvanNewman$modularity
## [1] 0.7709035
## 
## 
## $Optimal
## $Optimal$clu
## IGRAPH clustering optimal, groups: 12, mod: 0.77
## + groups:
##   $`1`
##   [1] "Student1" "Student2" "Student3" "Student4"
##   
##   $`2`
##   [1] "Student5"  "Student6"  "Student7"  "Student8"  "Student9"  "Student10"
##   
##   $`3`
##   [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##   
##   $`4`
##   + ... omitted several groups/vertices
## 
## $Optimal$modularity
## [1] 0.7709035
## 
## 
## $GroundTruth
## $GroundTruth$clu
## IGRAPH clustering unknown, groups: 1, mod: 0
## + groups:
##   $`1`
##    [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
##   [25] 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
##   [49] 49 50 51 52 53 54 55 56 57
##   
## 
## $GroundTruth$modularity
## [1] 0

#again since there is no real ground truth for this network my ground truth results mod = 0
#this means since I have no real 'truth' to compare it to all of these values are quite subjective
#However for modularity optimal got .77, GN got .77, labelprop got .77, eigen got .77, fast greedy got .65, and spinglass didn't work
#this make sense as the most obvious route for all of these algorithms was to group based of the 12 communities I made incredibly 'obvious' in my network
#interestingly fast and greedy got .65 and 5 groups 
#besides saying spinglass and fast and greedy were poorer methods of grouping I cannot say the best between the rest because they all grouped the same for this network

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

HW5

2025-08-05