I used the consensus (“intersection”) of SPIEC-EASI and SPARCC graphs methods. For the plots, I removed nodes with less than 2 connections to make the structure easier to see.
sc.se = intersection(sc.ps6Net, se.ps6Net)
# remove some verticies to make it easier to see
verticesToRemove <- V(sc.se)[igraph::degree(sc.se) < 2]
# These edges are removed from the graph
sc.se_plot <- delete.vertices(sc.se, verticesToRemove)
plot_network(sc.se_plot, ps6Top)
Yes. A significant p-value indicates that the network does NOT follow a power law.
fit_power_law(igraph::degree(sc.se))$KS.p
## [1] 0.9358293
sc.seG = intersection(sc.ps6NetG, se.ps6NetG)
# remove some verticies to make it easier to see
verticesToRemove <- V(sc.seG)[igraph::degree(sc.seG) < 2]
# These edges are removed from the graph
sc.se_plotG <- delete.vertices(sc.seG, verticesToRemove)
plot_network(sc.se_plotG, ps6TopG)
Module clustering could be improved by considering larger modules or using hierarchical clustering (like the WGCNA method at the end of this document). The results seem to mostly agree based on my limited testing. Specifically, the module associated with latitude shows up no matter what clustering method I use.
fc = cluster_fast_greedy(sc.se)
my_colors = RColorBrewer::brewer.pal(length(unique(fc$membership)),"Paired")
comps = membership(fc)
V(sc.se)$color = my_colors[comps]
V(sc.se)$comm <- fc$membership
# https://stackoverflow.com/questions/51271730/how-to-remove-small-communities-using-igraph-in-r
Small = which(table(fc$membership) < 6)
## Which nodes should be kept?
Keep = V(sc.se)[!(fc$membership %in% Small)]
## Get subgraph & plot
sc.se2 = induced_subgraph(sc.se, Keep)
fc2 = cluster_fast_greedy(sc.se2)
my_colors = RColorBrewer::brewer.pal(length(unique(fc2$membership)),"Paired")
comps = membership(fc2)
V(sc.se2)$color = my_colors[comps]
plot(sc.se2, vertex.label=NA,vertex.size=4)
I first get the module eigengenes, then plot sample correlation with module eigengene. Samples are ordered by bioclim_pca1. The module named “dodgerblue3” clearly is associated with latitude/climate
# change color hex code to name so I can see what module is identified
color_names = names(table(sapply(my_colors, color.id)))
# prep data
datExpr = data.frame(t(otu_table(ps6Top %>%
transform_sample_counts(function(x) x / sum(x)) %>%
subset_taxa(ASV %in% names(V(sc.se2)))
)))
# Get Module Eigengenes per cluster
MEs0 <- moduleEigengenes(datExpr, colors = factor(fc2$membership, labels = color_names))$eigengenes
# Reorder modules so similar modules are next to each other
MEs0 <- orderMEs(MEs0)
module_order = names(MEs0) %>% gsub("ME","", .)
# arranged by merged site ID (maybe bad to merge)
#site_order = unique(arrange(data.frame(sample_data(ps6)), bioclim_pca1)$site_id)
# arrange by a variable here to get a sense of geography
site_order = rownames(arrange(data.frame(sample_data(ps6)), bioclim_pca1))
# Add treatment names
MEs0$treatment = row.names(MEs0)
# MEs0$treatment = get_variable(ps6, "latitude")
# MEs0$treatment = get_variable(ps6, "site_id")
# tidy & plot data
mME = MEs0 %>%
pivot_longer(-treatment) %>%
mutate(
name = gsub("ME", "", name),
name = factor(name, levels = module_order),
treatment = factor(treatment, levels=site_order)
)
mME %>% ggplot(., aes(x=treatment, y=name, fill=value)) +
geom_tile() +
theme_bw() +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1,1)) +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-site relationships sorted by bioclim PCA1 (ASV level)", y = "Modules", fill="corr")
library(corrplot); library(psych)
MEs_tab = MEs0 %>% select(-c(treatment)) %>% as.matrix()
dat = data.frame(sample_data(ps6Top)) %>%
cbind(MEs_tab) %>%
na.omit(.)
dat2 = data.frame(sample_data(ps6Top)) %>%
select(-c(subspec,site_id,Description,site_id.1))
ct = corr.test(dat2, MEs_tab)
corrplot(ct$r, p.mat = ct$p.adj, sig.level = 0.1 , insig = "blank")
There is a significant association between the “dodger blue” module eigengene (ME) and latitude.
ggplot(dat, aes(latitude, MEdodgerblue3)) +geom_point()
mod1 = lm(MEdodgerblue3~latitude, data=dat)
anova(mod1)
## Analysis of Variance Table
##
## Response: MEdodgerblue3
## Df Sum Sq Mean Sq F value Pr(>F)
## latitude 1 0.22228 0.22228 35.734 4.143e-08 ***
## Residuals 93 0.57849 0.00622
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mod1)
##
## Call:
## lm(formula = MEdodgerblue3 ~ latitude, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.12282 -0.03567 -0.00345 0.01549 0.43640
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.364264 0.061913 5.883 6.29e-08 ***
## latitude -0.008270 0.001383 -5.978 4.14e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07887 on 93 degrees of freedom
## Multiple R-squared: 0.2776, Adjusted R-squared: 0.2698
## F-statistic: 35.73 on 1 and 93 DF, p-value: 4.143e-08
Module “sandy brown” has a negative relationship with pitcher mouth diameter.
ggplot(dat, aes(mouth_diameter, MEsandybrown)) +geom_point()
mod2 = lm(MEsandybrown~mouth_diameter, data=dat)
anova(mod2)
## Analysis of Variance Table
##
## Response: MEsandybrown
## Df Sum Sq Mean Sq F value Pr(>F)
## mouth_diameter 1 0.06021 0.060211 7.1909 0.00867 **
## Residuals 93 0.77871 0.008373
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mod2)
##
## Call:
## lm(formula = MEsandybrown ~ mouth_diameter, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.06564 -0.03974 -0.02101 0.00032 0.62919
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.068885 0.028312 2.433 0.01689 *
## mouth_diameter -0.003139 0.001170 -2.682 0.00867 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09151 on 93 degrees of freedom
## Multiple R-squared: 0.07177, Adjusted R-squared: 0.06179
## F-statistic: 7.191 on 1 and 93 DF, p-value: 0.00867
Module “forest green” has a positive relationship with rosette diameter.
ggplot(dat, aes(rosette_diameter_1, MEforestgreen)) +geom_point()
mod3 = lm(MEforestgreen~rosette_diameter_1, data=dat)
anova(mod3)
## Analysis of Variance Table
##
## Response: MEforestgreen
## Df Sum Sq Mean Sq F value Pr(>F)
## rosette_diameter_1 1 0.13084 0.130836 15.387 0.000159 ***
## Residuals 102 0.86728 0.008503
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Module “dark sea green” has a positive relationship with volume.
ggplot(dat, aes(volume, MEdarkseagreen3)) +geom_point()
mod4 = lm(MEdarkseagreen3~volume, data=dat)
anova(mod4)
## Analysis of Variance Table
##
## Response: MEdarkseagreen3
## Df Sum Sq Mean Sq F value Pr(>F)
## volume 1 0.10457 0.104570 11.873 0.0008564 ***
## Residuals 93 0.81907 0.008807
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Keystone nodes in co-occurrence networks tend to have high degree, high closeness centrality, and low betweenness centrality values (Berry and Widder, 2014).
I’m defining a northern region consisting of the 7 north-most sites, to pair with the 7 sites in the southern subspecies range. Then I get the names of all the taxa found in that range
# set up the regions
northmost = data.frame(sample_data(ps6Top)) %>%
arrange(-latitude) %>%
distinct(site_id) %>%
slice(1:7) %>%
pull(site_id)
# get ASV names associated with each region
ps6N = ps6Top %>%
subset_samples(site_id %in% northmost) %>%
filter_taxa(function(x) sum(x) >0, T) %>%
tax_table(.) %>%
data.frame(.)%>%
pull('ASV')
ps6S = ps6Top %>%
subset_samples(subspec=='southern') %>%
filter_taxa(function(x) sum(x) >0, T) %>%
tax_table(.) %>%
data.frame(.)%>%
pull('ASV')
Each range consists of 7 sites (21 samples)
sc.seN = induced.subgraph(sc.se, ps6N)
sc.seS = induced.subgraph(sc.se, ps6S)
The highest degree taxa in the southern network is average in the northern network (i.e., this taxa is potentially a ‘keystone’ in the south and not in the north).
# full graph
btwn = names(sort(betweenness(sc.se, directed = F), decreasing = T)[1:12])
deg = names(sort(igraph::degree(sc.se), decreasing = T)[1:12])
#cc = names(sort(igraph::closeness(sc.se), decreasing = T)[1:10])
key = intersect(btwn,deg)
#tax_table(subset_taxa(ps6Top, ASV=="ASV_1729"))
# northern graph
btwnN = names(sort(betweenness(sc.seN, directed = F), decreasing = T)[1:12])
degN = names(sort(igraph::degree(sc.seN), decreasing = T)[1:12])
keyN = intersect(degN,btwnN)
# southern graph
btwnS = names(sort( betweenness(sc.seS, directed = F), decreasing = T)[1:12])
degS = names(sort(igraph::degree(sc.seS), decreasing = T)[1:12])
keyS = intersect(degS,btwnS)
The colors are consistent between the two graphs. The highest betweenness centrality and degree nodes are labeled. Differences in the modules are evident. Nodes are sized by degree.
#vertex.label = ifelse(igraph::degree(sc.seS) > 5, V(sc.seS)$name, NA)
plot(sc.se,
vertex.size=igraph::degree(sc.se)*2,
vertex.label=ifelse(names(V(sc.se)) %in% key, names(V(sc.se)), NA),
main="Entire network")
plot(sc.seS,
vertex.size=igraph::degree(sc.seS)*2,
vertex.label=ifelse(names(V(sc.seS)) %in% keyS, names(V(sc.seS)), NA),
main="Southern network")
plot(sc.seN,
vertex.size=igraph::degree(sc.seN)*2,
vertex.label=ifelse(names(V(sc.seN)) %in% keyN, names(V(sc.seN)), NA),
main = "Northern network")
Here I use function comm.phylo.cor to calculate the significance of the correlation between co-occurrence and phylogenetic distance compared to the pattern expected under a null model.
ps.network2 = ps6Top %>% subset_taxa(ASV %in% names(V(sc.se2)))
# set up data
rooted2 = phy_tree(ps.network2)
otu2 = data.frame(otu_table(ps.network2))
match2 <- match.phylo.comm(phy = rooted2, comm = t(otu2))
# correlation between co-occurrence and phylogenetic distance (cij = Schoener's index)
phylocor = comm.phylo.cor(match2$comm, match2$phy, metric = "cij", null.model = "sample.taxa.labels", runs = 999)
phylocor$obs.corr.p
## [1] 1.734625e-150
network_results = function(net) {
E(net)$weight = NA
# modularity
fc = cluster_fast_greedy(net,weights =NULL)
modularity = modularity(net,membership(fc))
# graph properties
results = data.frame(clustering.coefficient=transitivity(net), # clustering.coefficient
modularity=modularity,# modularity
mean.degree= mean(igraph::degree(net)),# mean degree
size=length(E(net)),# size - number of edges
order=length(V(net)),# order - number of vertices
mean.distance=mean_distance(net),#average path length
norm.degree=mean(igraph::degree(net)/length(V(net))),#normalized degree
betweenness.centrality=centralization.betweenness(net)$centralization #betweenness centrality
)
return(results)
}
network_results(sc.se)
## clustering.coefficient modularity mean.degree size order mean.distance
## 1 0.2757112 0.878703 2.012384 325 323 7.132944
## norm.degree betweenness.centrality
## 1 0.006230291 0.08042973
Module clustering can be improved, such as merging by by topological dissimilarity
fc = cluster_fast_greedy(sc.se)
my_colors = sample(colors(), length(unique(fc$membership)))
# Another way - using WGCNA
# https://bioinformaticsworkbook.org/dataAnalysis/RNA-Seq/RNA-SeqIntro/wgcna.html#gsc.tab=0
datExpr = data.frame(t(otu_table(ps6Top %>%
transform_sample_counts(function(x) x / sum(x)) %>%
subset_taxa(ASV %in% names(V(sc.se)))
)))
# Get Module Eigengenes per cluster
MEs0 <- moduleEigengenes(datExpr, colors = factor(fc$membership, labels = my_colors))$eigengenes
# Reorder modules so similar modules are next to each other
MEs0 <- orderMEs(MEs0)
module_order = names(MEs0) %>% gsub("ME","", .)
#site_order = unique(arrange(data.frame(sample_data(ps6)), bioclim_pca1)$site_id)
site_order = rownames(arrange(data.frame(sample_data(ps6)), bioclim_pca1))
# Add treatment names
MEs0$treatment = row.names(MEs0)
# MEs0$treatment = get_variable(ps6, "latitude")
# MEs0$treatment = get_variable(ps6, "site_id")
# tidy & plot data
mME = MEs0 %>%
pivot_longer(-treatment) %>%
mutate(
name = gsub("ME", "", name),
name = factor(name, levels = module_order),
treatment = factor(treatment, levels=site_order)
)
mME %>% ggplot(., aes(x=treatment, y=name, fill=value)) +
geom_tile() +
theme_bw() +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1,1)) +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-site relationships sorted by latitude (ASV level)", y = "Modules", fill="corr")
# Turn adjacency into topological overlap
TOM = TOMsimilarity(as_adjacency_matrix(sc.se2, sparse = F));
dissTOM = 1-TOM
# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average")
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 10;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize);
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
# Plot the resulting clustering tree (dendrogram)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs0 %>% select(-treatment));
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average")
# Plot the result
sizeGrWindow(7, 6)
plot(METree, main = "Clustering of module eigengenes",
xlab = "", sub = "")
# choose threshold: corresponds to correlation of 1-MEDissThres
MEDissThres = 0.5
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
# Call an automatic merging function
merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
# The merged module colors
mergedColors = merge$colors;
# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs;
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
# Turn adjacency into topological overlap
TOM = TOMsimilarity(as_adjacency_matrix(sc.se, sparse = F));
dissTOM = 1-TOM
# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average")
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 10;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize);
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
# Plot the resulting clustering tree (dendrogram)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs0 %>% select(-treatment));
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average")
# Plot the result
sizeGrWindow(7, 6)
plot(METree, main = "Clustering of module eigengenes",
xlab = "", sub = "")
# choose threshold: corresponds to correlation of 1-MEDissThres
MEDissThres = 0.4
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
# Call an automatic merging function
merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
# The merged module colors
mergedColors = merge$colors;
# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs;
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
# Get Module Eigengenes per cluster
#MEs1 <- moduleEigengenes(datExpr, colors = dynamicColors)$eigengenes
MEs1 <- mergedMEs
# Reorder modules so similar modules are next to each other
MEs1 <- orderMEs(MEs1)
module_order = names(MEs1) %>% gsub("ME","", .)
MEs1$treatment = row.names(MEs1)
#MEs1$treatment = get_variable(ps6Top, "site_id")
# tidy & plot data
mME = MEs1 %>%
pivot_longer(-treatment) %>%
mutate(
name = gsub("ME", "", name),
name = factor(name, levels = module_order),
treatment = factor(treatment, levels=site_order)
)
mME %>% ggplot(., aes(x=treatment, y=name, fill=value)) +
geom_tile() +
theme_bw() +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1,1)) +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-site relationships sorted by bioclim_pca1 (ASV level)", y = "Modules", fill="corr")
# open Cytoscape app on desktop before running command
library(RCy3)
createNetworkFromIgraph(sc.se)