# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install('mixOmics')
# BiocManager::install('igraph')
library(mixOmics)
## Loading required package: MASS
## Loading required package: lattice
## Loading required package: ggplot2
##
## Loaded mixOmics 6.26.0
## Thank you for using mixOmics!
## Tutorials: http://mixomics.org
## Bookdown vignette: https://mixomicsteam.github.io/Bookdown
## Questions, issues: Follow the prompts at http://mixomics.org/contact-us
## Cite us: citation('mixOmics')
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(RColorBrewer)
Directory for all output files
setwd("C://Users//Malina//Desktop//igraphs//")
data(breast.TCGA)
mRNA = breast.TCGA$data.train$mrna
dim(mRNA)
## [1] 150 200
head(mRNA[, 1:20])
## RTN2 NDRG2 CCDC113 FAM63A ACADS GMDS HLA-H SEMA4A
## A0FJ 4.362183 7.533461 3.956124 4.457170 2.256817 6.017940 5.006907 3.217812
## A13E 1.984492 7.455194 5.427623 5.440957 4.028813 4.341692 6.178668 2.864659
## A0G0 1.727323 8.079968 2.227300 5.543480 2.629855 6.363030 6.039563 5.946028
## A0SX 4.363996 5.793750 3.544866 4.737114 4.269101 4.001104 7.087633 5.007565
## A143 2.447562 7.158993 4.691256 4.808728 2.442135 7.029723 5.936138 5.901459
## A0DA 4.770798 8.748061 4.305401 5.307480 3.239909 4.236539 6.909727 6.591109
## ETS2 LIMD2 NME3 ZEB1 CDCP1 GIYD2 RTKN2 MANSC1
## A0FJ 4.734446 5.099598 4.560289 3.991982 6.416244 2.468115 4.583978 8.076607
## A13E 5.411029 4.211397 5.139333 3.761482 6.610628 4.982935 5.172496 7.298563
## A0G0 5.651670 3.304513 4.446450 3.845990 6.413640 3.202417 4.237574 7.989699
## A0SX 5.902449 5.479451 3.757740 5.226035 6.018762 4.297181 4.245987 5.414648
## A143 6.641225 5.508654 4.244347 3.541100 7.088876 5.711343 3.957202 8.490567
## A0DA 5.858016 3.766283 4.259354 4.800017 6.052282 4.006990 2.069626 5.896195
## TAGLN IFIT3 ARL4C HTRA1
## A0FJ 8.300742 3.118577 3.928634 6.432646
## A13E 7.908038 5.185933 6.268468 6.727141
## A0G0 7.192957 4.525045 4.723262 6.343885
## A0SX 7.556347 7.250850 9.427284 7.860616
## A143 6.418898 3.972342 5.881047 6.898658
## A0DA 10.889706 5.707063 7.663267 8.288076
Setting the right correlation threshold is very important
correlation_matrix <- cor(mRNA,method = "spearman")
head(correlation_matrix[,1:30])
## RTN2 NDRG2 CCDC113 FAM63A ACADS
## RTN2 1.00000000 -0.048793280 0.13298902 0.480673808 0.36124450
## NDRG2 -0.04879328 1.000000000 -0.12397529 -0.002931686 0.06635139
## CCDC113 0.13298902 -0.123975288 1.00000000 0.209433308 0.04908485
## FAM63A 0.48067381 -0.002931686 0.20943331 1.000000000 0.41252856
## ACADS 0.36124450 0.066351393 0.04908485 0.412528557 1.00000000
## GMDS -0.35020756 0.151585404 -0.10945731 -0.359623094 -0.24201787
## GMDS HLA-H SEMA4A ETS2 LIMD2 NME3
## RTN2 -0.3502076 -0.18201520 0.020809814 -0.20713276 -0.3517259 0.348895506
## NDRG2 0.1515854 0.03060580 0.217999022 0.28118405 0.1885293 -0.030748033
## CCDC113 -0.1094573 -0.09992444 -0.011831637 -0.14477621 -0.1753482 0.003209032
## FAM63A -0.3596231 -0.20136539 0.035683364 -0.16940664 -0.5446269 0.438549269
## ACADS -0.2420179 -0.08003378 0.005911374 -0.02180186 -0.2522228 0.467130095
## GMDS 1.0000000 0.28028090 -0.023515712 0.07581670 0.3524441 -0.246149607
## ZEB1 CDCP1 GIYD2 RTKN2 MANSC1 TAGLN
## RTN2 0.2837371 -0.1839460 -0.006309614 -0.4674786 0.03199253 0.09363794
## NDRG2 -0.1428810 -0.1033379 -0.005513134 0.1433575 0.30352460 0.10228543
## CCDC113 0.1061469 0.1305960 -0.208114138 -0.1492031 0.09083604 -0.10911596
## FAM63A 0.2276670 -0.1877186 0.022114761 -0.4307267 0.22959065 -0.02341260
## ACADS 0.1443282 -0.3104476 0.178953731 -0.2745633 0.14564381 -0.02516912
## GMDS -0.4571563 0.3005414 0.086345171 0.2002382 0.14159740 -0.08982621
## IFIT3 ARL4C HTRA1 KIF13B CPPED1
## RTN2 -1.260198e-01 -0.28892129 0.30295569 0.410295569 0.05916885
## NDRG2 -8.964132e-02 0.18275834 -0.15070003 0.001655185 -0.14674963
## CCDC113 -1.450144e-01 -0.09693764 0.08851416 0.239994666 -0.08134228
## FAM63A -9.257834e-02 -0.44316103 0.17354905 0.514470865 -0.02743766
## ACADS -4.158229e-02 -0.31955020 0.10732388 0.302326326 -0.04805369
## GMDS 8.000356e-05 0.08244811 -0.39703631 -0.310426241 0.04903151
## SKAP2 ASPM KDM4B TBXAS1 MT1X MED13L
## RTN2 0.025432241 -0.52937997 0.5000276 0.04533713 -0.27281746 0.4132290
## NDRG2 0.007109649 0.03112494 -0.1729268 -0.08427930 0.29178008 -0.1372701
## CCDC113 -0.027796791 -0.13243433 0.2497089 0.04886439 0.02811681 0.2863185
## FAM63A -0.129401307 -0.45335170 0.6450936 -0.01674563 -0.35489755 0.5588675
## ACADS 0.069352416 -0.36557536 0.3794640 0.08231299 -0.08518601 0.3215201
## GMDS 0.007394106 0.28782613 -0.4036642 -0.11679275 0.20686608 -0.4361776
## SNORA8 RGS1
## RTN2 -0.4231104 -0.10211120
## NDRG2 0.1694564 0.04553625
## CCDC113 -0.1565598 0.04460465
## FAM63A -0.3656998 -0.10507667
## ACADS -0.1336006 0.01559003
## GMDS 0.2257931 -0.06863772
threshold <- 0.5
# Create an adjacency matrix based on the correlation threshold
adjacency_matrix <- ifelse(abs(correlation_matrix) > threshold, 1, 0)
head(adjacency_matrix[,1:20])
## RTN2 NDRG2 CCDC113 FAM63A ACADS GMDS HLA-H SEMA4A ETS2 LIMD2 NME3 ZEB1
## RTN2 1 0 0 0 0 0 0 0 0 0 0 0
## NDRG2 0 1 0 0 0 0 0 0 0 0 0 0
## CCDC113 0 0 1 0 0 0 0 0 0 0 0 0
## FAM63A 0 0 0 1 0 0 0 0 0 1 0 0
## ACADS 0 0 0 0 1 0 0 0 0 0 0 0
## GMDS 0 0 0 0 0 1 0 0 0 0 0 0
## CDCP1 GIYD2 RTKN2 MANSC1 TAGLN IFIT3 ARL4C HTRA1
## RTN2 0 0 0 0 0 0 0 0
## NDRG2 0 0 0 0 0 0 0 0
## CCDC113 0 0 0 0 0 0 0 0
## FAM63A 0 0 0 0 0 0 0 0
## ACADS 0 0 0 0 0 0 0 0
## GMDS 0 0 0 0 0 0 0 0
# Remove the diagonal values
diag(adjacency_matrix) <- 0
head(adjacency_matrix[,1:20])
## RTN2 NDRG2 CCDC113 FAM63A ACADS GMDS HLA-H SEMA4A ETS2 LIMD2 NME3 ZEB1
## RTN2 0 0 0 0 0 0 0 0 0 0 0 0
## NDRG2 0 0 0 0 0 0 0 0 0 0 0 0
## CCDC113 0 0 0 0 0 0 0 0 0 0 0 0
## FAM63A 0 0 0 0 0 0 0 0 0 1 0 0
## ACADS 0 0 0 0 0 0 0 0 0 0 0 0
## GMDS 0 0 0 0 0 0 0 0 0 0 0 0
## CDCP1 GIYD2 RTKN2 MANSC1 TAGLN IFIT3 ARL4C HTRA1
## RTN2 0 0 0 0 0 0 0 0
## NDRG2 0 0 0 0 0 0 0 0
## CCDC113 0 0 0 0 0 0 0 0
## FAM63A 0 0 0 0 0 0 0 0
## ACADS 0 0 0 0 0 0 0 0
## GMDS 0 0 0 0 0 0 0 0
graph <- graph_from_adjacency_matrix(adjacency_matrix ,
mode = "undirected")
graph
## IGRAPH 704cdc0 UN-- 200 835 --
## + attr: name (v/c)
## + edges from 704cdc0 (vertex names):
## [1] RTN2 --ASPM RTN2 --KDM4B RTN2 --LASS4 RTN2 --LRIG1
## [5] RTN2 --TTC39A RTN2 --CCNA2 NDRG2 --OGFRL1 FAM63A--LIMD2
## [9] FAM63A--KIF13B FAM63A--KDM4B FAM63A--MED13L FAM63A--ZNF552
## [13] FAM63A--SLC19A2 FAM63A--LRIG1 FAM63A--ILDR1 FAM63A--STC2
## [17] FAM63A--HDAC11 FAM63A--TTC39A FAM63A--MTL5 FAM63A--FMNL2
## [21] FAM63A--CCNA2 FAM63A--PREX1 FAM63A--ELP2 HLA-H --IFIT3
## [25] HLA-H --NCF4 HLA-H --IFIH1 HLA-H --C1QB LIMD2 --KDM4B
## [29] LIMD2 --MED13L LIMD2 --ZNF552 LIMD2 --LYN LIMD2 --SLC43A3
## + ... omitted several edges
#Cluster based on edge betweenness
ceb = cluster_edge_betweenness(graph)
#Extract community memberships from the clustering
community_membership <- membership(ceb)
head(community_membership)
## RTN2 NDRG2 CCDC113 FAM63A ACADS GMDS
## 1 2 3 1 4 5
length(ceb)
## [1] 68
# Count the number of nodes in each cluster
cluster_sizes <- table(community_membership)
cluster_sizes
## community_membership
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 64 2 1 1 1 5 1 1 36 1 1 1 28 1 1 1 1 1 1 1 1 1 1 1 1 1
## 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1
## 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# Add cluster memberships as a vertex attribute
igraph::V(graph)$cluster_membership <- community_membership
# Map cluster memberships to colors using a color palette function
cluster_colors <- rainbow(max(community_membership))
head(cluster_colors)
## [1] "#FF0000" "#FF1700" "#FF2D00" "#FF4300" "#FF5A00" "#FF7100"
# Create a color variable based on cluster memberships
V(graph)$color_variable <- cluster_colors[community_membership]
# vetrex degree
vertex_degrees <- igraph::degree(graph)
head(vertex_degrees)
## RTN2 NDRG2 CCDC113 FAM63A ACADS GMDS
## 6 1 0 16 0 0
# Add vertex degree as a vertex attribute
igraph::V(graph)$degree <- vertex_degrees
vertex.attr = list(
cluster_membership = V(graph)$cluster_membership,
name = V(graph)$name,
degrees = V(graph)$degree,
color = V(graph)$color_variable)
# Specify the file name for saving the GraphML file
graphml_file <- "graph_mRNA_with_metadata_unweighted_undirected.graphml"
# Write the igraph object to GraphML format with metadata
write_graph(
graph,
graphml_file,
format = "graphml")
# Combine vertex attributes into a data frame
vertex_data <- data.frame(vertex.attr)
head(vertex_data)
## cluster_membership name degrees color
## 1 1 RTN2 6 #FF0000
## 2 2 NDRG2 1 #FF1700
## 3 3 CCDC113 0 #FF2D00
## 4 1 FAM63A 16 #FF0000
## 5 4 ACADS 0 #FF4300
## 6 5 GMDS 0 #FF5A00
write.csv(vertex_data, "vertex_data_mRNA_with_metadata_unweighted_undirected.csv", row.names = FALSE)
# Calculate degree centrality (we have done that previously too)
degree_centrality <- degree(graph)
# Now checking the mean value of the degree centrality for all nodes
mean(degree_centrality)
## [1] 8.35
# Average path length
average.path.length(graph)
## Warning: `average.path.length()` was deprecated in igraph 2.0.0.
## ℹ Please use `mean_distance()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## [1] 2.794407
# Calculate betweenness centrality
betweenness_centrality <- betweenness(graph)
# Now checking the mean value of the betweenness centrality for all nodes
mean(betweenness_centrality)
## [1] 81.17
# Add the betweenness centrality as an attribute of the vertices
V(graph)$betweenness = betweenness_centrality
# Calculate closeness centrality and add as an attr of the vertices
closeness_centrality <- igraph::closeness(graph)
V(graph)$closeness = closeness_centrality
# Calculate eigenvector and add as an attr of the vertices
Eigenvectors = eigen_centrality(graph)$vector
V(graph)$eigen = Eigenvectors
# Add the additional node properties to an extended dataframe
vertex.attr.ext <- list(
cluster_membership = V(graph)$cluster_membership,
name = V(graph)$name,
degrees = V(graph)$degree,
color = V(graph)$color_variable,
betweeness = V(graph)$betweenness,
closeness = V(graph)$closeness,
eigenvectors = V(graph)$eigen
)
vertex_data_ext <- data.frame(vertex.attr.ext)
head(vertex_data_ext)
## cluster_membership name degrees color betweeness closeness
## 1 1 RTN2 6 #FF0000 1.613534 0.002785515
## 2 2 NDRG2 1 #FF1700 0.000000 1.000000000
## 3 3 CCDC113 0 #FF2D00 0.000000 NaN
## 4 1 FAM63A 16 #FF0000 75.612904 0.003003003
## 5 4 ACADS 0 #FF4300 0.000000 NaN
## 6 5 GMDS 0 #FF5A00 0.000000 NaN
## eigenvectors
## 1 9.697434e-02
## 2 3.547005e-17
## 3 2.883235e-17
## 4 1.905223e-01
## 5 2.883235e-17
## 6 2.883235e-17
# In case you'd like to save the extended vertex attr list you can do
write.csv(vertex_data_ext, "vertex_data_mRNA_with_metadata_EXT_unweighted_undirected.csv", row.names = FALSE)
# Calculate clustering coefficient
clustering_coefficient <- transitivity(graph, type = "global")
clustering_coefficient
## [1] 0.5199092
# Compute and plot degree distribution
ddist<- igraph::degree.distribution(graph)
## Warning: `degree.distribution()` was deprecated in igraph 2.0.0.
## ℹ Please use `degree_distribution()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Data frame needed for ggplot2
df <- data.frame(Degree = as.factor((seq_along(ddist)) - 1),
Fraction = ddist)
ggplot(data = df, aes(x = Degree, y = Fraction, group = 1)) +
geom_line() +
geom_point() +
theme_bw()
# The network has a large number of singletons and sparsely connected nodes and only a small number of nodes with a higher degree of 27 or more.
# Alternative 2: Using cluster_walktrap
cwt <- cluster_walktrap(graph)
membership2 <- membership(cwt)
table(membership2)
## membership2
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 33 30 65 2 5 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
data(breast.TCGA)
mRNA = breast.TCGA$data.train$mrna
# Calculate pairwise correlation between genes
correlation_matrix <- cor(mRNA,method = "spearman")
# Setting the right correlation threshold is very important
threshold <- 0.5
# Create an adjacency matrix based on the correlation threshold
# This matrix and graph will only serve to get directional corr scores
# Weights have to be positive values for the clustering to work
adjacency_matrix <- ifelse(abs(correlation_matrix) > threshold, as.numeric(correlation_matrix), 0)
head(adjacency_matrix[,1:40])
## RTN2 NDRG2 CCDC113 FAM63A ACADS GMDS HLA-H SEMA4A ETS2 LIMD2 NME3
## RTN2 1 0 0 0 0 0 0 0 0 0.0000000 0
## NDRG2 0 1 0 0 0 0 0 0 0 0.0000000 0
## CCDC113 0 0 1 0 0 0 0 0 0 0.0000000 0
## FAM63A 0 0 0 1 0 0 0 0 0 -0.5446269 0
## ACADS 0 0 0 0 1 0 0 0 0 0.0000000 0
## GMDS 0 0 0 0 0 1 0 0 0 0.0000000 0
## ZEB1 CDCP1 GIYD2 RTKN2 MANSC1 TAGLN IFIT3 ARL4C HTRA1 KIF13B CPPED1
## RTN2 0 0 0 0 0 0 0 0 0 0.0000000 0
## NDRG2 0 0 0 0 0 0 0 0 0 0.0000000 0
## CCDC113 0 0 0 0 0 0 0 0 0 0.0000000 0
## FAM63A 0 0 0 0 0 0 0 0 0 0.5144709 0
## ACADS 0 0 0 0 0 0 0 0 0 0.0000000 0
## GMDS 0 0 0 0 0 0 0 0 0 0.0000000 0
## SKAP2 ASPM KDM4B TBXAS1 MT1X MED13L SNORA8 RGS1 CBX6 WWC2
## RTN2 0 -0.52938 0.5000276 0 0 0.0000000 0 0 0 0
## NDRG2 0 0.00000 0.0000000 0 0 0.0000000 0 0 0 0
## CCDC113 0 0.00000 0.0000000 0 0 0.0000000 0 0 0 0
## FAM63A 0 0.00000 0.6450936 0 0 0.5588675 0 0 0 0
## ACADS 0 0.00000 0.0000000 0 0 0.0000000 0 0 0 0
## GMDS 0 0.00000 0.0000000 0 0 0.0000000 0 0 0 0
## TNFRSF12A ZNF552 MAPRE2 SEMA5A STAT5A FLI1 COL15A1 C7orf55
## RTN2 0 0.0000000 0 0 0 0 0 0
## NDRG2 0 0.0000000 0 0 0 0 0 0
## CCDC113 0 0.0000000 0 0 0 0 0 0
## FAM63A 0 0.6070012 0 0 0 0 0 0
## ACADS 0 0.0000000 0 0 0 0 0 0
## GMDS 0 0.0000000 0 0 0 0 0 0
diag(adjacency_matrix) <- 0
graph1 <-graph_from_adjacency_matrix(adjacency_matrix ,
mode = "undirected", weighted = TRUE)
graph1
## IGRAPH 7514028 UNW- 200 835 --
## + attr: name (v/c), weight (e/n)
## + edges from 7514028 (vertex names):
## [1] RTN2 --ASPM RTN2 --KDM4B RTN2 --LASS4 RTN2 --LRIG1
## [5] RTN2 --TTC39A RTN2 --CCNA2 NDRG2 --OGFRL1 FAM63A--LIMD2
## [9] FAM63A--KIF13B FAM63A--KDM4B FAM63A--MED13L FAM63A--ZNF552
## [13] FAM63A--SLC19A2 FAM63A--LRIG1 FAM63A--ILDR1 FAM63A--STC2
## [17] FAM63A--HDAC11 FAM63A--TTC39A FAM63A--MTL5 FAM63A--FMNL2
## [21] FAM63A--CCNA2 FAM63A--PREX1 FAM63A--ELP2 HLA-H --IFIT3
## [25] HLA-H --NCF4 HLA-H --IFIH1 HLA-H --C1QB LIMD2 --KDM4B
## [29] LIMD2 --MED13L LIMD2 --ZNF552 LIMD2 --LYN LIMD2 --SLC43A3
## + ... omitted several edges
# Create the graph with weights (absolute (positive) corr values)
adjacency_matrix <- ifelse(abs(correlation_matrix) > threshold, as.numeric(abs(correlation_matrix)), 0)
head(adjacency_matrix[,1:40])
## RTN2 NDRG2 CCDC113 FAM63A ACADS GMDS HLA-H SEMA4A ETS2 LIMD2 NME3
## RTN2 1 0 0 0 0 0 0 0 0 0.0000000 0
## NDRG2 0 1 0 0 0 0 0 0 0 0.0000000 0
## CCDC113 0 0 1 0 0 0 0 0 0 0.0000000 0
## FAM63A 0 0 0 1 0 0 0 0 0 0.5446269 0
## ACADS 0 0 0 0 1 0 0 0 0 0.0000000 0
## GMDS 0 0 0 0 0 1 0 0 0 0.0000000 0
## ZEB1 CDCP1 GIYD2 RTKN2 MANSC1 TAGLN IFIT3 ARL4C HTRA1 KIF13B CPPED1
## RTN2 0 0 0 0 0 0 0 0 0 0.0000000 0
## NDRG2 0 0 0 0 0 0 0 0 0 0.0000000 0
## CCDC113 0 0 0 0 0 0 0 0 0 0.0000000 0
## FAM63A 0 0 0 0 0 0 0 0 0 0.5144709 0
## ACADS 0 0 0 0 0 0 0 0 0 0.0000000 0
## GMDS 0 0 0 0 0 0 0 0 0 0.0000000 0
## SKAP2 ASPM KDM4B TBXAS1 MT1X MED13L SNORA8 RGS1 CBX6 WWC2
## RTN2 0 0.52938 0.5000276 0 0 0.0000000 0 0 0 0
## NDRG2 0 0.00000 0.0000000 0 0 0.0000000 0 0 0 0
## CCDC113 0 0.00000 0.0000000 0 0 0.0000000 0 0 0 0
## FAM63A 0 0.00000 0.6450936 0 0 0.5588675 0 0 0 0
## ACADS 0 0.00000 0.0000000 0 0 0.0000000 0 0 0 0
## GMDS 0 0.00000 0.0000000 0 0 0.0000000 0 0 0 0
## TNFRSF12A ZNF552 MAPRE2 SEMA5A STAT5A FLI1 COL15A1 C7orf55
## RTN2 0 0.0000000 0 0 0 0 0 0
## NDRG2 0 0.0000000 0 0 0 0 0 0
## CCDC113 0 0.0000000 0 0 0 0 0 0
## FAM63A 0 0.6070012 0 0 0 0 0 0
## ACADS 0 0.0000000 0 0 0 0 0 0
## GMDS 0 0.0000000 0 0 0 0 0 0
diag(adjacency_matrix) <- 0
# Now the weighted option is going to be set to TRUE
graph2 <-graph_from_adjacency_matrix(adjacency_matrix ,
mode = "undirected", weighted = TRUE)
graph2
## IGRAPH 752372c UNW- 200 835 --
## + attr: name (v/c), weight (e/n)
## + edges from 752372c (vertex names):
## [1] RTN2 --ASPM RTN2 --KDM4B RTN2 --LASS4 RTN2 --LRIG1
## [5] RTN2 --TTC39A RTN2 --CCNA2 NDRG2 --OGFRL1 FAM63A--LIMD2
## [9] FAM63A--KIF13B FAM63A--KDM4B FAM63A--MED13L FAM63A--ZNF552
## [13] FAM63A--SLC19A2 FAM63A--LRIG1 FAM63A--ILDR1 FAM63A--STC2
## [17] FAM63A--HDAC11 FAM63A--TTC39A FAM63A--MTL5 FAM63A--FMNL2
## [21] FAM63A--CCNA2 FAM63A--PREX1 FAM63A--ELP2 HLA-H --IFIT3
## [25] HLA-H --NCF4 HLA-H --IFIH1 HLA-H --C1QB LIMD2 --KDM4B
## [29] LIMD2 --MED13L LIMD2 --ZNF552 LIMD2 --LYN LIMD2 --SLC43A3
## + ... omitted several edges
# We can have a quick sneak peak at the values of the weights
# They are assigned automatically to the edge graph property (E) as weight
# e.g E(graph2)$weight
head(E(graph2)$weight)
## [1] 0.5293800 0.5000276 0.5028899 0.5180195 0.5640731 0.5570328
# Now we will proceed to get the vertices attributes (short version)
# Calculate the degree of each vertex
vertex_degrees <- igraph::degree(graph2)
# Cluster based on cluster_fast_greedy, which is a function that considers the effect of the weights when defining clusters
cfg <- cluster_fast_greedy(graph2)
membership2 <- membership(cfg)
table(membership2)
## membership2
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 40 52 41 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# Cluster 1,2,3 have the highest number of members
# Extract community memberships from the clustering
community_membership <- membership(cfg)
# Add cluster memberships as a vertex attribute
igraph::V(graph2)$cluster_membership <- community_membership
# Calculate vetrex degree
vertex_degrees <- igraph::degree(graph2)
# Add degree as a vertex attribute
igraph::V(graph2)$degree <- vertex_degrees
# Map cluster memberships to colors using a color palette function
# Using ColorBrewer palettes
# The number of colors in Set1 is limited, so not all clusters will have color, but in reality we only need colors for the first several clusters that had most of members
cluster_colors <- brewer.pal(max(community_membership), "Set1")
## Warning in brewer.pal(max(community_membership), "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
# Create a color variable based on cluster memberships
V(graph2)$color_variable <- cluster_colors[community_membership]
# Weight is already added as an attr of the edges
# Now we are going to add the corr scores from graph1 as corr_scores attr to graph2
# Corr scores from graph1 are actually exactly the same weights as the weights of graph2 but with sign (pos or neg) respectively
E(graph2)$corr_scores = E(graph1)$weight
# Add the node properties to the vertex attr list
vertex.attr = list(
cluster_membership = V(graph2)$cluster_membership,
name = V(graph2)$name,
degrees = V(graph2)$degree,
color = V(graph2)$color_variable,
size = V(graph2)$degree)
# Add the edge property to the edge attr list
edge.attr = list(
corr_scores = round(E(graph2)$corr_scores, 3 ))
# Specify the file path for saving the GraphML file
graphml_file <- "graph_mRNA_with_metadata_undirected_WEIGHTED.graphml"
# Write the igraph object to GraphML format with metadata
write_graph(
graph2,
graphml_file,
format = "graphml")
I hope the demonstration was helpful!
Tune on for more content like this! :)