#first I need to upload the enzymes txt
setwd("C:/Users/Rache/OneDrive/Documents/R/R folder/Stat713/Hw5")
data <- read.table("enzymes.txt", header = TRUE)
head(data)
## X2 X1
## 1 13 1
## 2 24 1
## 3 1 2
## 4 3 2
## 5 12 2
## 6 14 2
library(igraph)
## Warning: package 'igraph' was built under R version 4.4.3
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(igraphdata)
## Warning: package 'igraphdata' was built under R version 4.4.3
#vcount doesn't work with data frame so use this function to change data to a graph
g = graph_from_data_frame(data, directed = FALSE)
#vcount for enzymes is equal to 59
vcount(g)
## [1] 59
#looking at the coefficient for this graph
## global cluster coefficient
transitivity(g, type=c("global"))
## [1] 0.2183288
## individual cluster coefficient
transitivity(g, type=c("local"))
## 13 24 1 3 12 14 35 2
## 0.0000000 0.5000000 0.0000000 0.0000000 0.3333333 0.1666667 0.0000000 0.1000000
## 36 5 16 37 4 15 26 38
## 0.0000000 0.2000000 0.2000000 0.2000000 0.1666667 0.1666667 0.1666667 0.1000000
## 7 18 29 39 6 28 40 41
## 0.3333333 0.1000000 0.2000000 0.2000000 0.1000000 0.2000000 0.1000000 0.3333333
## 10 21 34 9 11 22 42 44
## 0.0000000 0.1000000 0.1388889 0.1666667 0.0000000 0.3333333 0.4000000 0.0000000
## 45 46 17 47 48 8 19 49
## 0.0000000 0.3333333 0.3333333 0.3333333 0.0000000 0.0000000 0.3333333 0.3333333
## 50 43 20 51 25 52 23 53
## 0.0000000 0.3333333 0.1666667 0.3333333 0.6666667 0.2666667 1.0000000 0.3333333
## 27 54 55 56 30 57 58 32
## 0.3333333 0.3333333 0.3333333 0.3333333 0.3333333 0.3333333 0.0000000 0.6666667
## 33 59 31
## 0.6666667 0.4000000 0.3333333
## average cluster coefficient
transitivity(g, type=c("average"))
## [1] 0.2345574
## assortativity coefficient
assortativity(g,V(g)) ## positive
## [1] 0.4169657
## Girvan-Newman algorithm to detect community
#the G-N algorithm clusters enzymes.txt into 4 groups/communities
detect=cluster_edge_betweenness(g)
detect
## IGRAPH clustering edge betweenness, groups: 4, mod: 0.66
## + groups:
## $`1`
## [1] "13" "24" "1" "3" "12" "14" "35" "2" "36" "44" "45" "25" "52" "23"
## [15] "53"
##
## $`2`
## [1] "5" "16" "37" "4" "15" "26" "38" "46" "17" "47" "27" "54" "55"
##
## $`3`
## [1] "7" "18" "29" "39" "6" "28" "40" "48" "8" "19" "49" "56" "30" "57"
##
## + ... omitted several groups/vertices
plot_dendrogram(detect) ## visualize the dendrogram
GNmember=membership(detect) ## get the membership of each node
GNmember
## 13 24 1 3 12 14 35 2 36 5 16 37 4 15 26 38 7 18 29 39 6 28 40 41 10 21
## 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 3 3 3 3 3 3 3 4 4 4
## 34 9 11 22 42 44 45 46 17 47 48 8 19 49 50 43 20 51 25 52 23 53 27 54 55 56
## 4 4 4 4 4 1 1 2 2 2 3 3 3 3 4 4 4 4 1 1 1 1 2 2 2 3
## 30 57 58 32 33 59 31
## 3 3 4 4 4 4 4
membership_vec <- membership(detect)
V(g)$color <- rainbow(length(unique(membership_vec)))[membership_vec]
## plot the detected communities
V(g)$shape <- "circle" # Set all vertices to circle by default
V(g)[name == 2]$shape <- "square"
plot(detect, g) ## plot the GN detected communities.
#in the lecture karate was cut at 2 however, I'm not positive where to manually cut the enzyme text
#I have also decided to cut this one at 2 and 3 just to see the differences
clustering <- cut_at(detect, no = 2)
clustering
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 2 2
## [39] 2 2 2 2 2 2 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2
## visualize the cutted communities
V(g)$attr=clustering
V(g)[attr==1]$shape="circle"
V(g)[attr==2]$shape="square"
#I added this to identify the difference between the detected communities and my manual communities
#Detected communities from the GN algorithm are denoted by color
#manual communities are denoted by shape
plot(g,
layout = layout_with_fr, # or any other layout you like
vertex.shape = V(g)$shape, # based on true group
vertex.color = V(g)$color, # based on detected community
vertex.label = NA, # hide labels for clarity
main = "True Communities (Shape) vs Detected (Color)")
#at 2 you can see the manual communities (squares vs circles) cluster on two ends of the network
#but the detected communities distinguish within each group, pretty evenly
#here it is cut at 3
clustering <- cut_at(detect, no = 3)
clustering
## [1] 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 3 3 3 1 1
## [39] 1 1 1 1 1 1 2 2 2 2 3 3 3 1 1 1 1 1 1 1 1
V(g)$attr=clustering
V(g)[attr==1]$shape="circle"
V(g)[attr==2]$shape="square"
V(g)[attr==3]$shape='none'
#I added this to identify the difference between the detected communities and my manual communities
#Detected communities from the GN algorithm are denoted by color
#manual communities are denoted by shape
plot(g,
layout = layout_with_fr, # or any other layout you like
vertex.shape = V(g)$shape, # based on true group
vertex.color = V(g)$color, # based on detected community
vertex.label = NA, # hide labels for clarity
main = "True Communities (Shape) vs Detected (Color)")
#manually cutting at three is odd, it leaves a large chunk of the network undefined either by shape or color
g = graph_from_data_frame(data, directed = FALSE)
optimal <- cluster_optimal(g) ### maximize modularity
optimal
## IGRAPH clustering optimal, groups: 5, mod: 0.66
## + groups:
## $`1`
## [1] "13" "1" "3" "12" "14" "35" "2" "36" "44"
##
## $`2`
## [1] "24" "45" "25" "52" "23" "53"
##
## $`3`
## [1] "5" "16" "37" "4" "15" "26" "38" "46" "17" "47" "27" "54" "55"
##
## $`4`
## + ... omitted several groups/vertices
#here 5 communites are detected
#now to visualize them
V(g)$attr=membership(optimal)
V(g)$shape <- "circle" # Set all vertices to circle by default
V(g)[name == 2]$shape <- "square"
V(g)[attr==1]$color="green" ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color='purple'
plot(g)
#now to plot the fast greedy method
#g.fg <- cluster_fast_greedy(g)
#g.fg
#any_multiple(g)
#this returned true which means I have multiple edges in my graph
#to run fast greedy I need to get rid of multiple edges and loops
g = graph_from_data_frame(data, directed = FALSE)
g_simple <- simplify(g, remove.multiple = TRUE, remove.loops = TRUE)
g.fg = cluster_fast_greedy(g_simple)
g.fg
## IGRAPH clustering fast greedy, groups: 5, mod: 0.65
## + groups:
## $`1`
## [1] "10" "21" "34" "9" "11" "22" "42" "50" "43" "20" "51" "58" "32" "33"
## [15] "59" "31"
##
## $`2`
## [1] "13" "24" "1" "3" "12" "14" "35" "2" "36" "44" "45" "25" "52" "23"
## [15] "53"
##
## $`3`
## [1] "26" "29" "28" "27" "54" "55" "56" "30" "57"
## + ... omitted several groups/vertices
V(g)$attr=membership(g.fg)
V(g)[attr==1]$color="green" ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="orange"
plot(g)
#now for leading eigenvector method
g = graph_from_data_frame(data, directed = FALSE)
clu=cluster_leading_eigen(g)
clu
## IGRAPH clustering leading eigenvector, groups: 5, mod: 0.61
## + groups:
## $`1`
## [1] "13" "1" "3" "12" "14" "35" "2" "36" "44"
##
## $`2`
## [1] "41" "10" "21" "34" "9" "11" "22" "42" "50" "43" "20" "51" "58" "32"
## [15] "33" "59" "31"
##
## $`3`
## [1] "38" "7" "18" "29" "39" "6" "28" "40" "17" "48" "8" "19" "49" "27"
## [15] "54" "55" "56" "30" "57"
## + ... omitted several groups/vertices
#5 communities detected
V(g)$attr <- membership(clu)
V(g)[attr==1]$color="green" ## ##cluster-eigenvalue community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="orange"
dev.off()
## null device
## 1
plot(g)
#now for spinglass method
c1=cluster_spinglass(g,gamma=0.5)
c1
## IGRAPH clustering spinglass, groups: 6, mod: 0.41
## + groups:
## $`1`
## [1] "58"
##
## $`2`
## [1] "41" "10" "21" "34" "9" "11" "22" "42" "50" "43" "20" "51" "32" "33"
## [15] "59" "31"
##
## $`3`
## [1] "5" "16" "37" "4" "15" "26" "38" "46" "17" "47" "27" "54" "55"
##
## + ... omitted several groups/vertices
c2=cluster_spinglass(g,gamma=1)
c2
## IGRAPH clustering spinglass, groups: 5, mod: 0.39
## + groups:
## $`1`
## [1] "41" "10" "21" "34" "9" "11" "22" "42" "50" "43" "20" "51" "58" "32"
## [15] "33" "59" "31"
##
## $`2`
## [1] "7" "18" "29" "39" "6" "28" "40" "48" "8" "19" "49" "56" "30" "57"
##
## $`3`
## [1] "5" "16" "37" "4" "15" "26" "38" "46" "17" "47" "27" "54" "55"
##
## + ... omitted several groups/vertices
c3=cluster_spinglass(g,gamma=2)
c3
## IGRAPH clustering spinglass, groups: 10, mod: 0.31
## + groups:
## $`1`
## [1] "24" "45" "25" "52" "23" "53"
##
## $`2`
## [1] "13" "1" "3" "12" "14" "35" "2" "36" "44"
##
## $`3`
## [1] "7" "29" "39" "6" "28" "40" "56" "30" "57"
##
## $`4`
## + ... omitted several groups/vertices
#this said it found 9 groups but it only plots 5 which is interesting
V(g)$attr=membership(c1)
V(g)$color="white"
V(g)$shape <- "circle" # Set all vertices to circle by default
V(g)[name == 2]$shape <- "square"
V(g)[attr==1]$color="green" ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="purple"
plot(g) ## plot Gamma=0.5
## label propagation algorithm
cm=cluster_label_prop(g)
cm
## IGRAPH clustering label propagation, groups: 5, mod: 0.66
## + groups:
## $`1`
## [1] "13" "1" "3" "12" "14" "35" "2" "36" "44"
##
## $`2`
## [1] "24" "25" "52" "23" "53"
##
## $`3`
## [1] "5" "16" "37" "4" "15" "26" "38" "45" "46" "17" "47" "27" "54" "55"
##
## $`4`
## + ... omitted several groups/vertices
V(g)$attr=membership(cm)
#this one only found 6 groups
V(g)[attr==1]$color="green" ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="purple"
V(g)[attr==6]$color="orange"
plot(g)
#checking the modularity of all graph types
g = graph_from_data_frame(data, directed = FALSE)
#vcount for enzymes is equal to 59
vcount(g)
## [1] 59
V(g)$shape <- "circle" # default
V(g)[name == 2]$shape <- "square" # manual assignment
# Check for missing shapes
table(V(g)$shape, useNA = "ifany")
##
## circle square
## 58 1
# ---- 1. Ground Truth ----
ground_truth_membership <- as.numeric(ifelse(V(g)$shape == "circle", 1, 2))
ground_truth <- make_clusters(g, ground_truth_membership)
#my ground truth lenght is 2
# ---- 2. Community Detection Methods ----
results <- list()
# Fast Greedy
fg <- cluster_fast_greedy(g_simple)
results$FastGreedy <- list(clu = fg, modularity = modularity(fg))
# Leading Eigenvector
le <- cluster_leading_eigen(g)
results$LeadingEigen <- list(clu = le, modularity = modularity(le))
# Spinglass (γ = 0.5, 1, 2)
spinglass_gamma <- c(0.5, 1, 2)
for (gamma in spinglass_gamma) {
spg <- cluster_spinglass(g, gamma = gamma)
key <- paste0("Spinglass_gamma_", gamma)
results[[key]] <- list(clu = spg, modularity = modularity(spg))
}
# Label Propagation
lp <- cluster_label_prop(g)
results$LabelProp <- list(clu = lp, modularity = modularity(lp))
# Girvan–Newman (Edge Betweenness)
gn <- cluster_edge_betweenness(g)
results$GirvanNewman <- list(clu = gn, modularity = modularity(gn))
# Optimal Modularity
opt <- cluster_optimal(g)
results$Optimal <- list(clu = opt, modularity = modularity(opt))
# ---- 3. Add Ground Truth ----
results$GroundTruth <- list(clu = ground_truth, modularity = modularity(ground_truth))
results
## $FastGreedy
## $FastGreedy$clu
## IGRAPH clustering fast greedy, groups: 5, mod: 0.65
## + groups:
## $`1`
## [1] "10" "21" "34" "9" "11" "22" "42" "50" "43" "20" "51" "58" "32" "33"
## [15] "59" "31"
##
## $`2`
## [1] "13" "24" "1" "3" "12" "14" "35" "2" "36" "44" "45" "25" "52" "23"
## [15] "53"
##
## $`3`
## [1] "26" "29" "28" "27" "54" "55" "56" "30" "57"
## + ... omitted several groups/vertices
##
## $FastGreedy$modularity
## [1] 0.6461248
##
##
## $LeadingEigen
## $LeadingEigen$clu
## IGRAPH clustering leading eigenvector, groups: 5, mod: 0.61
## + groups:
## $`1`
## [1] "13" "1" "3" "12" "14" "35" "2" "36" "44"
##
## $`2`
## [1] "41" "10" "21" "34" "9" "11" "22" "42" "50" "43" "20" "51" "58" "32"
## [15] "33" "59" "31"
##
## $`3`
## [1] "38" "7" "18" "29" "39" "6" "28" "40" "17" "48" "8" "19" "49" "27"
## [15] "54" "55" "56" "30" "57"
## + ... omitted several groups/vertices
##
## $LeadingEigen$modularity
## [1] 0.6099426
##
##
## $Spinglass_gamma_0.5
## $Spinglass_gamma_0.5$clu
## IGRAPH clustering spinglass, groups: 5, mod: 0.42
## + groups:
## $`1`
## [1] "13" "1" "3" "12" "14" "35" "2" "36" "44"
##
## $`2`
## [1] "7" "18" "29" "39" "6" "28" "40" "48" "8" "19" "49" "56" "30" "57"
##
## $`3`
## [1] "24" "25" "52" "23" "53"
##
## $`4`
## + ... omitted several groups/vertices
##
## $Spinglass_gamma_0.5$modularity
## [1] 0.4165109
##
##
## $Spinglass_gamma_1
## $Spinglass_gamma_1$clu
## IGRAPH clustering spinglass, groups: 7, mod: 0.36
## + groups:
## $`1`
## [1] "5" "16" "37" "4" "15" "38" "45" "46" "17" "47"
##
## $`2`
## [1] "26" "27" "54" "55"
##
## $`3`
## [1] "13" "1" "3" "12" "14" "35" "2" "36" "44"
##
## $`4`
## + ... omitted several groups/vertices
##
## $Spinglass_gamma_1$modularity
## [1] 0.3645907
##
##
## $Spinglass_gamma_2
## $Spinglass_gamma_2$clu
## IGRAPH clustering spinglass, groups: 10, mod: 0.3
## + groups:
## $`1`
## [1] "24" "45" "25" "52" "23" "53"
##
## $`2`
## [1] "7" "29" "39" "6" "40" "56" "30" "57"
##
## $`3`
## [1] "13" "1" "3" "12" "14" "35" "2" "36" "44"
##
## $`4`
## + ... omitted several groups/vertices
##
## $Spinglass_gamma_2$modularity
## [1] 0.3039606
##
##
## $LabelProp
## $LabelProp$clu
## IGRAPH clustering label propagation, groups: 7, mod: 0.64
## + groups:
## $`1`
## [1] "13" "1" "3" "12" "14" "35" "2" "36" "44" "45"
##
## $`2`
## [1] "24" "25" "52" "23" "53"
##
## $`3`
## [1] "5" "16" "37" "4" "15" "38" "46" "17" "47"
##
## $`4`
## + ... omitted several groups/vertices
##
## $LabelProp$modularity
## [1] 0.6392327
##
##
## $GirvanNewman
## $GirvanNewman$clu
## IGRAPH clustering edge betweenness, groups: 4, mod: 0.66
## + groups:
## $`1`
## [1] "13" "24" "1" "3" "12" "14" "35" "2" "36" "44" "45" "25" "52" "23"
## [15] "53"
##
## $`2`
## [1] "5" "16" "37" "4" "15" "26" "38" "46" "17" "47" "27" "54" "55"
##
## $`3`
## [1] "7" "18" "29" "39" "6" "28" "40" "48" "8" "19" "49" "56" "30" "57"
##
## + ... omitted several groups/vertices
##
## $GirvanNewman$modularity
## [1] 0.6589882
##
##
## $Optimal
## $Optimal$clu
## IGRAPH clustering optimal, groups: 5, mod: 0.66
## + groups:
## $`1`
## [1] "13" "1" "3" "12" "14" "35" "2" "36" "44"
##
## $`2`
## [1] "24" "45" "25" "52" "23" "53"
##
## $`3`
## [1] "5" "16" "37" "4" "15" "26" "38" "46" "17" "47" "27" "54" "55"
##
## $`4`
## + ... omitted several groups/vertices
##
## $Optimal$modularity
## [1] 0.6599798
##
##
## $GroundTruth
## $GroundTruth$clu
## IGRAPH clustering unknown, groups: 2, mod: -0.00077
## + groups:
## $`1`
## [1] 1 2 3 4 5 6 7 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [25] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
## [49] 50 51 52 53 54 55 56 57 58 59
##
## $`2`
## [1] 8
##
##
## $GroundTruth$modularity
## [1] -0.0007722965
#A key thing to note here is my ground truth is not good at all it's modularity is -.00077 because I had to artificially make communities as it's not obvious in this dataset
#however optimal got mod = .66 and 5 groups, GN got mod = .66 and four groups, label prop got mod = .59 and 7 groups
#spinglass got mod = .32 and 8 groups gamma 1, gamma .5 got mod = .42 and 5 groups, eigen got mod = .61 and 5 groups
#fast and greedy got mod = .65 and 5 groups
#overall optimal, gn, and fast and greedy all tied for the best modularity so these were the best clustering methods
#part two do that but on my own network
#first to recreate my own network
#I've decided to analyze the Plant Pathology Department at NDSU since I work there
#I'm making 12 groups for each Plant Pathology advisor that has a student
#Each node will represent a student within that advisor's lab
set.seed(57) # For reproducibility
#defining students and group sizes
students = paste0("Student", 1:57)
group_sizes = c(4, 6, 5, 12, 2, 6, 4, 7, 6, 2, 1, 2)
num_projects = length(group_sizes)
#adding students to a group while making sure no student is put into a group twice
group_membership = list()
start = 1
for (i in 1:num_projects) {
end = start + group_sizes[i] - 1
group = students[start:end]
group_membership[[paste0("Lab", i)]] = group
start = end + 1
}
#have to create an empty graph with students as verticies otherwise the lone student won't be in the final plot since they don't have any edges
g = make_empty_graph(n = 0, directed = FALSE)
g = add_vertices(g, nv = length(students), name = students)
#adding edges have to ignore the lone student here or it creates an error
collaborations = list()
for (group in group_membership) {
if (length(group) < 2) next
edges = combn(group, 2, simplify = FALSE)
collaborations <- c(collaborations, edges)
}
if (length(collaborations) > 0) {
edge_list = do.call(rbind, lapply(collaborations, function(x) c(x[1], x[2])))
g = add_edges(g, t(edge_list))
}
#adding color to each node
lab_colors = rainbow(num_projects)
V(g)$color = "gray"
for (i in seq_along(group_membership)) {
members = group_membership[[i]]
V(g)$color[V(g)$name %in% members] <- lab_colors[i]
}
head(g)
## 6 x 57 sparse Matrix of class "dgCMatrix"
## [[ suppressing 57 column names 'Student1', 'Student2', 'Student3' ... ]]
##
## Student1 . 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## Student2 1 . 1 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## Student3 1 1 . 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## Student4 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## Student5 . . . . . 1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . . .
## Student6 . . . . 1 . 1 1 1 1 . . . . . . . . . . . . . . . . . . . . . . . . .
##
## Student1 . . . . . . . . . . . . . . . . . . . . . .
## Student2 . . . . . . . . . . . . . . . . . . . . . .
## Student3 . . . . . . . . . . . . . . . . . . . . . .
## Student4 . . . . . . . . . . . . . . . . . . . . . .
## Student5 . . . . . . . . . . . . . . . . . . . . . .
## Student6 . . . . . . . . . . . . . . . . . . . . . .
#plot my network
#gn
detect=cluster_edge_betweenness(g)
detect
## IGRAPH clustering edge betweenness, groups: 12, mod: 0.77
## + groups:
## $`1`
## [1] "Student1" "Student2" "Student3" "Student4"
##
## $`2`
## [1] "Student5" "Student6" "Student7" "Student8" "Student9" "Student10"
##
## $`3`
## [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##
## $`4`
## + ... omitted several groups/vertices
#gave my basic 12 groups
plot_dendrogram(detect)## visualize the dendrogram
#looking at the dendogram there are 2 or four good base groups
GNmember=membership(detect) ## get the membership of each node
GNmember
## Student1 Student2 Student3 Student4 Student5 Student6 Student7 Student8
## 1 1 1 1 2 2 2 2
## Student9 Student10 Student11 Student12 Student13 Student14 Student15 Student16
## 2 2 3 3 3 3 3 4
## Student17 Student18 Student19 Student20 Student21 Student22 Student23 Student24
## 4 4 4 4 4 4 4 4
## Student25 Student26 Student27 Student28 Student29 Student30 Student31 Student32
## 4 4 4 5 5 6 6 6
## Student33 Student34 Student35 Student36 Student37 Student38 Student39 Student40
## 6 6 6 7 7 7 7 8
## Student41 Student42 Student43 Student44 Student45 Student46 Student47 Student48
## 8 8 8 8 8 8 9 9
## Student49 Student50 Student51 Student52 Student53 Student54 Student55 Student56
## 9 9 9 9 10 10 11 12
## Student57
## 12
membership_vec <- membership(detect)
V(g)$color <- rainbow(length(unique(membership_vec)))[membership_vec]
V(g)$shape <- "circle" # Set all vertices to circle by default
V(g)[name == 2]$shape <- "square"
plot(detect, g) ## plot the GN detected communities.
#unfortuntaely becuase I created a separated network I have a feeling it's just going to stick to those groups
#optimal clustering
optimal <- cluster_optimal(g) ### maximize modularity
optimal
## IGRAPH clustering optimal, groups: 12, mod: 0.77
## + groups:
## $`1`
## [1] "Student1" "Student2" "Student3" "Student4"
##
## $`2`
## [1] "Student5" "Student6" "Student7" "Student8" "Student9" "Student10"
##
## $`3`
## [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##
## $`4`
## + ... omitted several groups/vertices
#now to visualize them
V(g)$attr=membership(optimal)
#12 groups again
V(g)$shape <- "circle" # Set all vertices to circle by default
V(g)[name == 2]$shape <- "square"
V(g)[attr==1]$color="green" ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color='purple'
plot(g)
#now to plot the fast greedy method
g.fg <- cluster_fast_greedy(g)
g.fg
## IGRAPH clustering fast greedy, groups: 12, mod: 0.77
## + groups:
## $`1`
## [1] "Student16" "Student17" "Student18" "Student19" "Student20" "Student21"
## [7] "Student22" "Student23" "Student24" "Student25" "Student26" "Student27"
##
## $`2`
## [1] "Student40" "Student41" "Student42" "Student43" "Student44" "Student45"
## [7] "Student46"
##
## $`3`
## [1] "Student47" "Student48" "Student49" "Student50" "Student51" "Student52"
## + ... omitted several groups/vertices
any_multiple(g)
## [1] FALSE
#this returned false which means I have don't have multiple edges in this network
V(g)$attr=membership(g.fg)
V(g)[attr==1]$color="green" ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="orange"
plot(g)
#now for leading eigenvector method
clu=cluster_leading_eigen(g)
clu
## IGRAPH clustering leading eigenvector, groups: 12, mod: 0.77
## + groups:
## $`1`
## [1] "Student1" "Student2" "Student3" "Student4"
##
## $`2`
## [1] "Student5" "Student6" "Student7" "Student8" "Student9" "Student10"
##
## $`3`
## [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##
## $`4`
## + ... omitted several groups/vertices
#5 communities detected
V(g)$attr <- membership(clu)
V(g)[attr==1]$color="green" ## ##cluster-eigenvalue community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="orange"
dev.off()
## null device
## 1
plot(g)
#now for spinglass method
#the spinglass cannot work for the network I created because it does not work on unconnected graphs
## label propagation algorithm
cm=cluster_label_prop(g)
cm
## IGRAPH clustering label propagation, groups: 12, mod: 0.77
## + groups:
## $`1`
## [1] "Student1" "Student2" "Student3" "Student4"
##
## $`2`
## [1] "Student5" "Student6" "Student7" "Student8" "Student9" "Student10"
##
## $`3`
## [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##
## $`4`
## + ... omitted several groups/vertices
V(g)$attr=membership(cm)
#this one only found 6 groups
V(g)[attr==1]$color="green" ## modularity detected community
V(g)[attr==2]$color="red"
V(g)[attr==3]$color="blue"
V(g)[attr==4]$color="yellow"
V(g)[attr==5]$color="purple"
V(g)[attr==6]$color="orange"
plot(g)
#calculate modularity
#vcount for enzymes is equal to 57 for my network of students
vcount(g)
## [1] 57
V(g)$shape <- "circle" # default
V(g)[name == 2]$shape <- "square" # manual assignment
# Check for missing shapes
table(V(g)$shape, useNA = "ifany")
##
## circle
## 57
# ---- 1. Ground Truth ----
ground_truth_membership <- as.numeric(ifelse(V(g)$shape == "circle", 1, 2))
ground_truth <- make_clusters(g, ground_truth_membership)
#my ground truth length here is 1
# ---- 2. Community Detection Methods ----
results <- list()
# Fast Greedy
fg <- cluster_fast_greedy(g_simple)
results$FastGreedy <- list(clu = fg, modularity = modularity(fg))
# Leading Eigenvector
le <- cluster_leading_eigen(g)
results$LeadingEigen <- list(clu = le, modularity = modularity(le))
# Spinglass (γ = 0.5, 1, 2)
#removing spinglass as I cannot use this method
# Label Propagation
lp <- cluster_label_prop(g)
results$LabelProp <- list(clu = lp, modularity = modularity(lp))
# Girvan–Newman (Edge Betweenness)
gn <- cluster_edge_betweenness(g)
results$GirvanNewman <- list(clu = gn, modularity = modularity(gn))
# Optimal Modularity
opt <- cluster_optimal(g)
results$Optimal <- list(clu = opt, modularity = modularity(opt))
# ---- 3. Add Ground Truth ----
results$GroundTruth <- list(clu = ground_truth, modularity = modularity(ground_truth))
results
## $FastGreedy
## $FastGreedy$clu
## IGRAPH clustering fast greedy, groups: 5, mod: 0.65
## + groups:
## $`1`
## [1] "10" "21" "34" "9" "11" "22" "42" "50" "43" "20" "51" "58" "32" "33"
## [15] "59" "31"
##
## $`2`
## [1] "13" "24" "1" "3" "12" "14" "35" "2" "36" "44" "45" "25" "52" "23"
## [15] "53"
##
## $`3`
## [1] "26" "29" "28" "27" "54" "55" "56" "30" "57"
## + ... omitted several groups/vertices
##
## $FastGreedy$modularity
## [1] 0.6461248
##
##
## $LeadingEigen
## $LeadingEigen$clu
## IGRAPH clustering leading eigenvector, groups: 12, mod: 0.77
## + groups:
## $`1`
## [1] "Student1" "Student2" "Student3" "Student4"
##
## $`2`
## [1] "Student5" "Student6" "Student7" "Student8" "Student9" "Student10"
##
## $`3`
## [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##
## $`4`
## + ... omitted several groups/vertices
##
## $LeadingEigen$modularity
## [1] 0.7709035
##
##
## $LabelProp
## $LabelProp$clu
## IGRAPH clustering label propagation, groups: 12, mod: 0.77
## + groups:
## $`1`
## [1] "Student1" "Student2" "Student3" "Student4"
##
## $`2`
## [1] "Student5" "Student6" "Student7" "Student8" "Student9" "Student10"
##
## $`3`
## [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##
## $`4`
## + ... omitted several groups/vertices
##
## $LabelProp$modularity
## [1] 0.7709035
##
##
## $GirvanNewman
## $GirvanNewman$clu
## IGRAPH clustering edge betweenness, groups: 12, mod: 0.77
## + groups:
## $`1`
## [1] "Student1" "Student2" "Student3" "Student4"
##
## $`2`
## [1] "Student5" "Student6" "Student7" "Student8" "Student9" "Student10"
##
## $`3`
## [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##
## $`4`
## + ... omitted several groups/vertices
##
## $GirvanNewman$modularity
## [1] 0.7709035
##
##
## $Optimal
## $Optimal$clu
## IGRAPH clustering optimal, groups: 12, mod: 0.77
## + groups:
## $`1`
## [1] "Student1" "Student2" "Student3" "Student4"
##
## $`2`
## [1] "Student5" "Student6" "Student7" "Student8" "Student9" "Student10"
##
## $`3`
## [1] "Student11" "Student12" "Student13" "Student14" "Student15"
##
## $`4`
## + ... omitted several groups/vertices
##
## $Optimal$modularity
## [1] 0.7709035
##
##
## $GroundTruth
## $GroundTruth$clu
## IGRAPH clustering unknown, groups: 1, mod: 0
## + groups:
## $`1`
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## [25] 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
## [49] 49 50 51 52 53 54 55 56 57
##
##
## $GroundTruth$modularity
## [1] 0
#again since there is no real ground truth for this network my ground truth results mod = 0
#this means since I have no real 'truth' to compare it to all of these values are quite subjective
#However for modularity optimal got .77, GN got .77, labelprop got .77, eigen got .77, fast greedy got .65, and spinglass didn't work
#this make sense as the most obvious route for all of these algorithms was to group based of the 12 communities I made incredibly 'obvious' in my network
#interestingly fast and greedy got .65 and 5 groups
#besides saying spinglass and fast and greedy were poorer methods of grouping I cannot say the best between the rest because they all grouped the same for this network
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.