0. Construct network

I used the consensus (“intersection”) of SPIEC-EASI and SPARCC graphs methods. For the plots, I removed nodes with less than 2 connections to make the structure easier to see.

ASV

sc.se = intersection(sc.ps6Net, se.ps6Net)
# remove some verticies to make it easier to see
verticesToRemove <- V(sc.se)[igraph::degree(sc.se) < 2]
# These edges are removed from the graph
sc.se_plot <- delete.vertices(sc.se, verticesToRemove) 
plot_network(sc.se_plot, ps6Top)

Is the network scale-free?

Yes. A significant p-value indicates that the network does NOT follow a power law.

fit_power_law(igraph::degree(sc.se))$KS.p

## [1] 0.9358293

Genus

sc.seG = intersection(sc.ps6NetG, se.ps6NetG)
# remove some verticies to make it easier to see
verticesToRemove <- V(sc.seG)[igraph::degree(sc.seG) < 2]
# These edges are removed from the graph
sc.se_plotG <- delete.vertices(sc.seG, verticesToRemove) 
plot_network(sc.se_plotG, ps6TopG)

1. Associate modules with variables

Cluster ASVs into modules

Module clustering could be improved by considering larger modules or using hierarchical clustering (like the WGCNA method at the end of this document). The results seem to mostly agree based on my limited testing. Specifically, the module associated with latitude shows up no matter what clustering method I use.

fc = cluster_fast_greedy(sc.se)
my_colors = RColorBrewer::brewer.pal(length(unique(fc$membership)),"Paired")

comps = membership(fc)
V(sc.se)$color = my_colors[comps]

V(sc.se)$comm <- fc$membership
# https://stackoverflow.com/questions/51271730/how-to-remove-small-communities-using-igraph-in-r
Small = which(table(fc$membership) < 6)
## Which nodes should be kept?
Keep = V(sc.se)[!(fc$membership %in% Small)]

## Get subgraph & plot
sc.se2  = induced_subgraph(sc.se, Keep)
fc2 = cluster_fast_greedy(sc.se2)
my_colors = RColorBrewer::brewer.pal(length(unique(fc2$membership)),"Paired")
comps = membership(fc2)
V(sc.se2)$color = my_colors[comps]

plot(sc.se2, vertex.label=NA,vertex.size=4)

Calcaulte module eigengenes

I first get the module eigengenes, then plot sample correlation with module eigengene. Samples are ordered by bioclim_pca1. The module named “dodgerblue3” clearly is associated with latitude/climate

# change color hex code to name so I can see what module is identified
color_names = names(table(sapply(my_colors, color.id)))

# prep data
datExpr = data.frame(t(otu_table(ps6Top %>% 
                                   transform_sample_counts(function(x) x / sum(x)) %>%
                                   subset_taxa(ASV %in% names(V(sc.se2)))
                                   )))
# Get Module Eigengenes per cluster
MEs0 <- moduleEigengenes(datExpr, colors = factor(fc2$membership, labels = color_names))$eigengenes
# Reorder modules so similar modules are next to each other
MEs0 <- orderMEs(MEs0)
module_order = names(MEs0) %>% gsub("ME","", .)
# arranged by merged site ID (maybe bad to merge)
#site_order = unique(arrange(data.frame(sample_data(ps6)), bioclim_pca1)$site_id) 

# arrange by a variable here to get a sense of geography
site_order = rownames(arrange(data.frame(sample_data(ps6)), bioclim_pca1))

# Add treatment names
MEs0$treatment = row.names(MEs0)
# MEs0$treatment = get_variable(ps6, "latitude")
# MEs0$treatment = get_variable(ps6, "site_id")

# tidy & plot data
mME = MEs0 %>%
  pivot_longer(-treatment) %>%
  mutate(
    name = gsub("ME", "", name),
    name = factor(name, levels = module_order),
    treatment = factor(treatment, levels=site_order)
  )

mME %>% ggplot(., aes(x=treatment, y=name, fill=value)) +
  geom_tile() +
  theme_bw() +
  scale_fill_gradient2(
    low = "blue",
    high = "red",
    mid = "white",
    midpoint = 0,
    limit = c(-1,1)) +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-site relationships sorted by bioclim PCA1 (ASV level)", y = "Modules", fill="corr")

Regression of modules with environmental data

First check an exploratory plot for possible relationships

library(corrplot); library(psych)

MEs_tab = MEs0 %>% select(-c(treatment)) %>% as.matrix()

dat = data.frame(sample_data(ps6Top)) %>%
  cbind(MEs_tab) %>%
  na.omit(.)

dat2 = data.frame(sample_data(ps6Top)) %>%
  select(-c(subspec,site_id,Description,site_id.1))

ct = corr.test(dat2, MEs_tab)

corrplot(ct$r, p.mat = ct$p.adj, sig.level = 0.1 , insig = "blank")

Dodger blue

There is a significant association between the “dodger blue” module eigengene (ME) and latitude.

ggplot(dat, aes(latitude, MEdodgerblue3)) +geom_point()

mod1 = lm(MEdodgerblue3~latitude, data=dat)
anova(mod1)

## Analysis of Variance Table
## 
## Response: MEdodgerblue3
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## latitude   1 0.22228 0.22228  35.734 4.143e-08 ***
## Residuals 93 0.57849 0.00622                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(mod1)

## 
## Call:
## lm(formula = MEdodgerblue3 ~ latitude, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12282 -0.03567 -0.00345  0.01549  0.43640 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.364264   0.061913   5.883 6.29e-08 ***
## latitude    -0.008270   0.001383  -5.978 4.14e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07887 on 93 degrees of freedom
## Multiple R-squared:  0.2776, Adjusted R-squared:  0.2698 
## F-statistic: 35.73 on 1 and 93 DF,  p-value: 4.143e-08

Sandy brown

Module “sandy brown” has a negative relationship with pitcher mouth diameter.

ggplot(dat, aes(mouth_diameter, MEsandybrown)) +geom_point()

mod2 = lm(MEsandybrown~mouth_diameter, data=dat)
anova(mod2)

## Analysis of Variance Table
## 
## Response: MEsandybrown
##                Df  Sum Sq  Mean Sq F value  Pr(>F)   
## mouth_diameter  1 0.06021 0.060211  7.1909 0.00867 **
## Residuals      93 0.77871 0.008373                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(mod2)

## 
## Call:
## lm(formula = MEsandybrown ~ mouth_diameter, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.06564 -0.03974 -0.02101  0.00032  0.62919 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     0.068885   0.028312   2.433  0.01689 * 
## mouth_diameter -0.003139   0.001170  -2.682  0.00867 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09151 on 93 degrees of freedom
## Multiple R-squared:  0.07177,    Adjusted R-squared:  0.06179 
## F-statistic: 7.191 on 1 and 93 DF,  p-value: 0.00867

Forest green

Module “forest green” has a positive relationship with rosette diameter.

ggplot(dat, aes(rosette_diameter_1, MEforestgreen)) +geom_point()

mod3 = lm(MEforestgreen~rosette_diameter_1, data=dat)
anova(mod3)

## Analysis of Variance Table
## 
## Response: MEforestgreen
##                     Df  Sum Sq  Mean Sq F value   Pr(>F)    
## rosette_diameter_1   1 0.13084 0.130836  15.387 0.000159 ***
## Residuals          102 0.86728 0.008503                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Dark sea green

Module “dark sea green” has a positive relationship with volume.

ggplot(dat, aes(volume, MEdarkseagreen3)) +geom_point()

mod4 = lm(MEdarkseagreen3~volume, data=dat)
anova(mod4)

## Analysis of Variance Table
## 
## Response: MEdarkseagreen3
##           Df  Sum Sq  Mean Sq F value    Pr(>F)    
## volume     1 0.10457 0.104570  11.873 0.0008564 ***
## Residuals 93 0.81907 0.008807                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2. Do node-level centrality values differ across regions?

Keystone nodes in co-occurrence networks tend to have high degree, high closeness centrality, and low betweenness centrality values (Berry and Widder, 2014).

I’m defining a northern region consisting of the 7 north-most sites, to pair with the 7 sites in the southern subspecies range. Then I get the names of all the taxa found in that range

# set up the regions
northmost = data.frame(sample_data(ps6Top)) %>% 
  arrange(-latitude) %>%
  distinct(site_id) %>%
  slice(1:7) %>%
  pull(site_id)
# get ASV names associated with each region
ps6N = ps6Top %>% 
  subset_samples(site_id %in% northmost) %>%
  filter_taxa(function(x) sum(x) >0, T) %>%
  tax_table(.) %>%
  data.frame(.)%>%
  pull('ASV')
ps6S = ps6Top %>% 
  subset_samples(subspec=='southern') %>%
  filter_taxa(function(x) sum(x) >0, T) %>%
  tax_table(.) %>%
  data.frame(.)%>%
  pull('ASV')

Subset the networks by northern and southern range

Each range consists of 7 sites (21 samples)

sc.seN = induced.subgraph(sc.se, ps6N)
sc.seS = induced.subgraph(sc.se, ps6S)

Calculate degree, betweenness centrality, and closeness centrality values for nodes in each subgraph

The highest degree taxa in the southern network is average in the northern network (i.e., this taxa is potentially a ‘keystone’ in the south and not in the north).

# full graph
btwn = names(sort(betweenness(sc.se, directed = F), decreasing = T)[1:12])
deg = names(sort(igraph::degree(sc.se), decreasing = T)[1:12])
#cc = names(sort(igraph::closeness(sc.se), decreasing = T)[1:10])

key = intersect(btwn,deg)
#tax_table(subset_taxa(ps6Top, ASV=="ASV_1729"))

# northern graph
btwnN = names(sort(betweenness(sc.seN, directed = F), decreasing = T)[1:12])
degN = names(sort(igraph::degree(sc.seN), decreasing = T)[1:12])
keyN = intersect(degN,btwnN)
# southern graph
btwnS = names(sort( betweenness(sc.seS, directed = F), decreasing = T)[1:12])
degS = names(sort(igraph::degree(sc.seS), decreasing = T)[1:12])
keyS = intersect(degS,btwnS)

Plot the regional networks (subset from the larger network)

The colors are consistent between the two graphs. The highest betweenness centrality and degree nodes are labeled. Differences in the modules are evident. Nodes are sized by degree.

#vertex.label = ifelse(igraph::degree(sc.seS) > 5, V(sc.seS)$name, NA)

plot(sc.se, 
     vertex.size=igraph::degree(sc.se)*2, 
     vertex.label=ifelse(names(V(sc.se)) %in% key, names(V(sc.se)), NA), 
     main="Entire network")

plot(sc.seS, 
     vertex.size=igraph::degree(sc.seS)*2, 
     vertex.label=ifelse(names(V(sc.seS)) %in% keyS, names(V(sc.seS)), NA), 
     main="Southern network")

plot(sc.seN, 
     vertex.size=igraph::degree(sc.seN)*2, 
     vertex.label=ifelse(names(V(sc.seN)) %in% keyN, names(V(sc.seN)), NA),  
     main = "Northern network")

3. Is relatedness within modules greater than null model? (Treat modules like sites)

I made a new “ASV table” based on ASVs presence/absence in modules. Then I ran ses.mpd and ses.pd on the modules. Phylogenetic clustering and phylogenetic diversity are significantly greater in most modules compared to a null model.

## Assign ASVs to modules

# add a "module" column to the tax table
ps.network = ps6Top %>% subset_taxa(ASV %in% names(V(sc.se2)))
tax_table(ps.network) = cbind(tax_table(ps.network), Module=fc2$membership)

# subset the tax table to the Module variable
mods = data.frame(tax_table(ps.network)[,"Module"])

# manually add module ID to taxa
mods = mods %>% mutate(mod1 = ifelse(Module=="1",1,0),
                mod2 = ifelse(Module=="2",1,0),
                mod3 = ifelse(Module=="3",1,0),
                mod4 = ifelse(Module=="4",1,0),
                mod5 = ifelse(Module=="5",1,0),
                mod6 = ifelse(Module=="6",1,0),
                mod7 = ifelse(Module=="7",1,0),
                mod8 = ifelse(Module=="8",1,0),
                mod9 = ifelse(Module=="9",1,0),
                mod10 = ifelse(Module=="10",1,0),
                mod11 = ifelse(Module=="11",1,0),) %>%
  select(-Module)
names(mods) = color_names

## Calcaulte correlation between SES MPD and SES PD

# set up data 
rooted = phy_tree(ps.network)
# otu = data.frame(otu_table(ps.network))
otu = mods
match <- match.phylo.comm(phy = rooted, comm = t(otu))

# SES MPD of modules
mod_SESMPD = ses.mpd(match$comm, cophenetic(match$phy), null.model = "taxa.labels")

# SES PD of modules
mod_SESPD = ses.pd(samp = match$comm, tree = match$phy, null.model = "taxa.labels")

SES MPD of modules

kable(mod_SESMPD %>% select(c(ntaxa,mpd.obs.z,mpd.obs.p)))

	ntaxa	mpd.obs.z	mpd.obs.p
darkorange1	19	-1.6416680	0.047
darkseagreen3	23	1.5667183	0.937
dodgerblue3	22	-6.0598965	0.001
firebrick2	12	-0.7305460	0.242
forestgreen	16	1.1358868	0.870
khaki1	12	-3.6572040	0.001
lightpink2	13	-2.7791104	0.007
lightskyblue2	8	-2.6400066	0.010
mediumpurple4	7	-3.4354137	0.002
sandybrown	22	-2.3419511	0.011
thistle3	8	-0.1421932	0.462

SES PD of modules

kable(mod_SESPD %>% select(c(ntaxa,pd.obs.z,pd.obs.p)))

	ntaxa	pd.obs.z	pd.obs.p
darkorange1	19	-4.4672756	0.001
darkseagreen3	23	-0.0776583	0.481
dodgerblue3	22	-5.5934294	0.001
firebrick2	12	-2.7855522	0.004
forestgreen	16	-2.3221085	0.011
khaki1	12	-4.0682895	0.001
lightpink2	13	-4.4907785	0.001
lightskyblue2	8	-3.5802738	0.001
mediumpurple4	7	-3.9597377	0.001
sandybrown	22	-4.8649093	0.001
thistle3	8	-1.7310178	0.044

4. Is there a significant correlation between species co-occurrence and phylogenetic distance in the network overall?

Here I use function comm.phylo.cor to calculate the significance of the correlation between co-occurrence and phylogenetic distance compared to the pattern expected under a null model.

ps.network2 = ps6Top %>% subset_taxa(ASV %in% names(V(sc.se2)))

# set up data
rooted2 = phy_tree(ps.network2)
otu2 = data.frame(otu_table(ps.network2))
match2 <- match.phylo.comm(phy = rooted2, comm = t(otu2))

# correlation between co-occurrence and phylogenetic distance (cij = Schoener's index)
phylocor = comm.phylo.cor(match2$comm, match2$phy, metric = "cij", null.model = "sample.taxa.labels", runs = 999)
phylocor$obs.corr.p

## [1] 1.734625e-150

5. Network parameters

network_results = function(net) {
  E(net)$weight = NA
  #  modularity
  fc = cluster_fast_greedy(net,weights =NULL)
  modularity = modularity(net,membership(fc))
  # graph properties
  results = data.frame(clustering.coefficient=transitivity(net), # clustering.coefficient
                modularity=modularity,# modularity
                mean.degree= mean(igraph::degree(net)),# mean degree
                size=length(E(net)),# size - number of edges
                order=length(V(net)),# order - number of vertices
                mean.distance=mean_distance(net),#average path length
                norm.degree=mean(igraph::degree(net)/length(V(net))),#normalized degree
                betweenness.centrality=centralization.betweenness(net)$centralization #betweenness centrality
                )
  return(results)
}


network_results(sc.se)

##   clustering.coefficient modularity mean.degree size order mean.distance
## 1              0.2757112   0.878703    2.012384  325   323      7.132944
##   norm.degree betweenness.centrality
## 1 0.006230291             0.08042973

Optional: Code for refining modules (do this later!)

Module clustering can be improved, such as merging by by topological dissimilarity

Assign modules (ASV level)

fc = cluster_fast_greedy(sc.se)
my_colors = sample(colors(), length(unique(fc$membership)))

# Another way - using WGCNA
# https://bioinformaticsworkbook.org/dataAnalysis/RNA-Seq/RNA-SeqIntro/wgcna.html#gsc.tab=0
datExpr = data.frame(t(otu_table(ps6Top %>% 
                                   transform_sample_counts(function(x) x / sum(x)) %>%
                                   subset_taxa(ASV %in% names(V(sc.se)))
                                   )))
# Get Module Eigengenes per cluster
MEs0 <- moduleEigengenes(datExpr, colors = factor(fc$membership, labels = my_colors))$eigengenes
# Reorder modules so similar modules are next to each other
MEs0 <- orderMEs(MEs0)
module_order = names(MEs0) %>% gsub("ME","", .)
#site_order = unique(arrange(data.frame(sample_data(ps6)), bioclim_pca1)$site_id)
site_order = rownames(arrange(data.frame(sample_data(ps6)), bioclim_pca1))

# Add treatment names
MEs0$treatment = row.names(MEs0)
# MEs0$treatment = get_variable(ps6, "latitude")
# MEs0$treatment = get_variable(ps6, "site_id")

# tidy & plot data
mME = MEs0 %>%
  pivot_longer(-treatment) %>%
  mutate(
    name = gsub("ME", "", name),
    name = factor(name, levels = module_order),
    treatment = factor(treatment, levels=site_order)
  )

mME %>% ggplot(., aes(x=treatment, y=name, fill=value)) +
  geom_tile() +
  theme_bw() +
  scale_fill_gradient2(
    low = "blue",
    high = "red",
    mid = "white",
    midpoint = 0,
    limit = c(-1,1)) +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-site relationships sorted by latitude (ASV level)", y = "Modules", fill="corr")

Merge similar modules

# Turn adjacency into topological overlap
TOM = TOMsimilarity(as_adjacency_matrix(sc.se2, sparse = F));
dissTOM = 1-TOM
# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average")
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 10;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM, 
                            deepSplit = 2, pamRespectsDendro = FALSE,
                            minClusterSize = minModuleSize);
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
# Plot the resulting clustering tree (dendrogram)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs0 %>% select(-treatment));
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average")
# Plot the result
sizeGrWindow(7, 6)
plot(METree, main = "Clustering of module eigengenes",
xlab = "", sub = "")

# choose threshold: corresponds to correlation of 1-MEDissThres
MEDissThres = 0.5
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
# Call an automatic merging function
merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
# The merged module colors
mergedColors = merge$colors;
# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs;

plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)

# Turn adjacency into topological overlap
TOM = TOMsimilarity(as_adjacency_matrix(sc.se, sparse = F));
dissTOM = 1-TOM
# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average")
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 10;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM, 
                            deepSplit = 2, pamRespectsDendro = FALSE,
                            minClusterSize = minModuleSize);
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
# Plot the resulting clustering tree (dendrogram)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")

Module dissimilarity

# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs0 %>% select(-treatment));
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average")
# Plot the result
sizeGrWindow(7, 6)
plot(METree, main = "Clustering of module eigengenes",
xlab = "", sub = "")

# choose threshold: corresponds to correlation of 1-MEDissThres
MEDissThres = 0.4
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
# Call an automatic merging function
merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
# The merged module colors
mergedColors = merge$colors;
# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs;

plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)

Visualize modules resulting from WGCNA method

# Get Module Eigengenes per cluster
#MEs1 <- moduleEigengenes(datExpr, colors = dynamicColors)$eigengenes
MEs1 <- mergedMEs
# Reorder modules so similar modules are next to each other
MEs1 <- orderMEs(MEs1)
module_order = names(MEs1) %>% gsub("ME","", .)

MEs1$treatment = row.names(MEs1)
#MEs1$treatment = get_variable(ps6Top, "site_id")

# tidy & plot data
mME = MEs1 %>%
  pivot_longer(-treatment) %>%
  mutate(
    name = gsub("ME", "", name),
    name = factor(name, levels = module_order),
    treatment = factor(treatment, levels=site_order)
  )
mME %>% ggplot(., aes(x=treatment, y=name, fill=value)) +
  geom_tile() +
  theme_bw() +
  scale_fill_gradient2(
    low = "blue",
    high = "red",
    mid = "white",
    midpoint = 0,
    limit = c(-1,1)) +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-site relationships sorted by bioclim_pca1 (ASV level)", y = "Modules", fill="corr")

Load into Cytoscape

# open Cytoscape app on desktop before running command
library(RCy3)
createNetworkFromIgraph(sc.se)

Modules

2023-02-22