tree_file = '~/Dropbox/RoL/pitcher-16s/qiime2-stuff/rooted-tree/tree.nwk'
df = '~/Dropbox/RoL/pitcher-16s/site-data-bioclim-pca.txt'

ps = merge_phyloseq(import_biom('~/Dropbox/RoL/pitcher-16s/table-with-taxonomy-converted.biom', parseFunction = parse_taxonomy_greengenes), 
                    sample_data(read.csv(df, sep = '\t', row.names = 1)), 
                    read_tree(tree_file))
tax_table(ps) = tax_table(ps)[,1:7]
colnames(tax_table(ps)) = c('Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species')

# add ASV column
tax_table(ps) = cbind(tax_table(ps), ASV=paste0("ASV_",1:length(taxa_names(ps))) )

# filter out taxa unassigned at Kingdom level
ps = ps %>% subset_taxa( Kingdom != "Unassigned")

#try dropping florida

ps = ps %>% subset_samples(site_id != 'FLK')

Test throwing out unclassified Genera

Binning taxa by taxonomy means any unclassified taxa will be thrown out.

num = (sum(is.na(tax_table(ps))[,"Genus"]) + ntaxa(subset_taxa(ps, Genus=="uncultured"))) / ntaxa(ps) * 100

print(paste("NA/uncultured Genus overall:", round(num, 2),"%" ))

## [1] "NA/uncultured Genus overall: 18.75 %"

# go ahead and filter out NA or "uncultured" Genus
ps = ps %>% subset_taxa( !is.na(Genus) & Genus!= "uncultured")

Make taxa tables

ASV level tables

The SPIEC-EASI method and permutation method require untransformed numbers

ps4 = filter_taxa(ps, function(x) sum(x >= 1) >= 4, TRUE) %>% rarefy_even_depth(rngseed = 777) 
ps6 = filter_taxa(ps, function(x) sum(x >= 1) >= 6, TRUE) %>% rarefy_even_depth(rngseed = 777)
ps8 = filter_taxa(ps, function(x) sum(x >= 1) >= 8, TRUE) %>% rarefy_even_depth(rngseed = 777) 
ps10 = filter_taxa(ps, function(x) sum(x >= 1) >= 10, TRUE) %>% rarefy_even_depth(rngseed = 777)
# keep taxa with a relative abundance of 1% in at least 1 sample
flist = ps4 %>% transform_sample_counts(function(x) x / sum(x)) %>% filter_taxa(function(x) sum(x>= 0.01) > 0)
ps4Top = prune_taxa(flist, ps4)

flist = ps6 %>% transform_sample_counts(function(x) x / sum(x)) %>% filter_taxa(function(x) sum(x>= 0.01) > 0)
ps6Top = prune_taxa(flist, ps6) 

flist = ps8 %>% transform_sample_counts(function(x) x / sum(x)) %>% filter_taxa(function(x) sum(x>= 0.01) > 0)
ps8Top = prune_taxa(flist, ps8)

flist = ps10 %>% transform_sample_counts(function(x) x / sum(x)) %>% filter_taxa(function(x) sum(x>= 0.01) > 0)
ps10Top = prune_taxa(flist, ps10)

Genus level tables

Merging the previous tables at the Genus level.

ps4.genus = ps4 %>% tax_glom("Genus")
ps6.genus = ps6 %>% tax_glom('Genus')
ps8.genus = ps8 %>% tax_glom('Genus')
ps10.genus = ps10 %>% tax_glom('Genus')
ps4TopG = ps4Top %>% tax_glom("Genus")
ps6TopG = ps6Top %>% tax_glom("Genus")
ps8TopG = ps8Top %>% tax_glom("Genus")
ps10TopG = ps10Top %>% tax_glom("Genus")

Summarize filtering thresholds

kable(data.frame(NumSamples=c(4,6,8,10), 
                 TotalASVs=c(ntaxa(ps4),ntaxa(ps6),ntaxa(ps8),ntaxa(ps10)),
                 TopASVs=c(ntaxa(ps4Top),ntaxa(ps6Top),ntaxa(ps8Top),ntaxa(ps10Top)),
                 TotalGenus=c(ntaxa(ps4.genus), ntaxa(ps6.genus), ntaxa(ps8.genus), ntaxa(ps10.genus)),
                 TopGenus=c(ntaxa(ps4TopG), ntaxa(ps6TopG), ntaxa(ps8TopG), ntaxa(ps10TopG))) )

NumSamples	TotalASVs	TopASVs	TotalGenus	TopGenus
4	557	418	121	95
6	361	323	95	85
8	260	248	76	73
10	198	196	66	65

Network construction

SPIEC-EASI

ASV

se.ps6Top <- spiec.easi(ps6Top, method='mb', lambda.min.ratio=1e-1,
                           nlambda=30, pulsar.params=list(rep.num=99, ncores=4))
se.ps6Net <- adj2igraph(getRefit(se.ps6Top),  vertex.attr=list(name=as.vector(tax_table(ps6Top)[,'ASV'])))
plot_network(se.ps6Net, ps6Top, type='taxa')

getStability(se.ps6Top)

## [1] 0.04922884

Genus

se.ps6TopG <- spiec.easi(ps6TopG, method='mb', lambda.min.ratio=1e-1,
                           nlambda=20, pulsar.params=list(rep.num=99, ncores=4))
se.ps6NetG <- adj2igraph(getRefit(se.ps6TopG),  vertex.attr=list(name=as.vector(tax_table(ps6TopG)[,'Genus'])))
plot_network(se.ps6NetG, ps6TopG, type='taxa')

getStability(se.ps6TopG)

## [1] 0.03927639

Pearson

ASV

p.ps6Net = make_co_occurrence_network(ps6Top %>% transform_sample_counts(function(x) x / sum(x)), method = "pearson", sig = 0.05, cutoff = 0.6)
vertex_attr(p.ps6Net, "name") <- as.vector(tax_table(ps6Top)[,'ASV'])
# verticesToRemove <- V(p.ps6Net)[degree(p.ps6Net) < 2]
# p.ps6Net2 = delete_vertices(p.ps6Net, verticesToRemove )
plot_network(p.ps6Net, ps6Top)

Genus

p.ps6NetG = make_co_occurrence_network(ps6TopG %>% transform_sample_counts(function(x) x / sum(x)), method = "pearson", sig = 0.05, cutoff = 0.5)
vertex_attr(p.ps6NetG, "name") <-as.vector(tax_table(ps6TopG)[,'Genus'])
plot_network(p.ps6NetG, ps6TopG)

Spearman

ASV

sp.ps6Net = make_co_occurrence_network(ps6Top %>% transform_sample_counts(function(x) x / sum(x)), method = "spearman", sig = 0.05, cutoff = 0.5)
vertex_attr(sp.ps6Net, "name") <- as.vector(tax_table(ps6Top)[,'ASV'])
plot_network(sp.ps6Net, ps6Top)

Genus

sp.ps6NetG = make_co_occurrence_network(ps6TopG %>% transform_sample_counts(function(x) x / sum(x)), method = "spearman", sig = 0.05, cutoff = 0.5)
vertex_attr(sp.ps6NetG, "name") <-as.vector(tax_table(ps6TopG)[,'Genus'])
plot_network(sp.ps6NetG, ps6TopG)

SPARCC

ASV

sc.ps6Net =make_sparcc_net(ps6Top, 0.25)
vertex_attr(sc.ps6Net, "name") <- as.vector(tax_table(ps6Top)[,'ASV'])
plot_network(sc.ps6Net, ps6Top)

Genus

sc.ps6NetG =make_sparcc_net(ps6TopG, 0.25)
vertex_attr(sc.ps6NetG, "name") <- as.vector(tax_table(ps6TopG)[,'Genus'])
plot_network(sc.ps6NetG, ps6TopG)

How many edges are shared?

ASV

set1 = data.frame(as_edgelist(se.ps6Net)) %>% mutate(join = paste(X1, X2, sep = "-"))
set2 = data.frame(as_edgelist(sp.ps6Net)) %>% mutate(join = paste(X1, X2, sep = "-"))
set3 = data.frame(as_edgelist(sc.ps6Net)) %>% mutate(join = paste(X1, X2, sep = "-"))

display_venn(list(set1$join,set2$join,set3$join),
             category.names = c("SPIEC-EASI" , "Spearman", "SPARCC"))

After adjusting the parameters on Spearman and SparCC, SPIEC-EASI is no longer the most conservative network.

Genus

set1 = data.frame(as_edgelist(se.ps6NetG)) %>% mutate(join = paste(X1, X2, sep = "-"))
set2 = data.frame(as_edgelist(sp.ps6NetG)) %>% mutate(join = paste(X1, X2, sep = "-"))
set3 = data.frame(as_edgelist(sc.ps6NetG)) %>% mutate(join = paste(X1, X2, sep = "-"))

display_venn(list(set1$join,set2$join,set3$join),
             category.names = c("SPIEC-EASI" , "Spearman", "SPARCC"))

Use intersection

I’m removing nodes with less than 2 connections to make the plot easier to see

ASV

sc.se = intersection(sc.ps6Net, se.ps6Net)
# remove some verticies to make it easier to see
verticesToRemove <- V(sc.se)[degree(sc.se) < 2]
# These edges are removed from the graph
sc.se_plot <- delete.vertices(sc.se, verticesToRemove) 
plot_network(sc.se_plot, ps6Top)

Genus

sc.seG = intersection(sc.ps6NetG, se.ps6NetG)
# remove some verticies to make it easier to see
verticesToRemove <- V(sc.seG)[degree(sc.seG) < 2]
# These edges are removed from the graph
sc.se_plotG <- delete.vertices(sc.seG, verticesToRemove) 
plot_network(sc.se_plotG, ps6TopG)

Mega-network re-analysis

2023-02-07

Test throwing out unclassified Genera

Make taxa tables

ASV level tables

Genus level tables

Summarize filtering thresholds

Network construction

SPIEC-EASI

ASV

Genus

Pearson

ASV

Genus

Spearman

ASV

Genus

SPARCC

ASV

Genus

How many edges are shared?

ASV

Genus

Use intersection

ASV

Genus