# Set the global options
knitr::opts_chunk$set(echo= TRUE, eval = TRUE, fig.width=6, fig.height=5, fig.align = 'center',
warning=FALSE, message=FALSE)
# Set the seed
set.seed(15879966)
# Load packages
necessary_packages <- c("phyloseq", "ggplot2", "ape", "vegan", "plyr", "scales", "grid", "reshape2",
"data.table","DESeq2", "dplyr","tidyr", "sciplot","scales", "gtable","pgirmess",
"multcompView", "pander")
# Install the necessary packages
packages <- lapply(necessary_packages, library, character.only = TRUE)
# Set the working Directory
setwd("~/Final_PAFL_Trophicstate")
### Source written functions in the file Functions_PAFL.R that is housed within the "Final_PAFL_Trophicstate"
source("Functions_PAFL.R")
### Set our theme for our plots down the road
theme_set(theme_bw() + theme(plot.title = element_text(face="bold", size = 12), #Set the plot title
strip.text.x = element_text(size=10, face="bold"), #Set Y facet titles
strip.text.y = element_text(size=10, face="bold"), #Set X facet titles
strip.background = element_blank(), #No facet background
axis.title.x = element_text(face="bold", size=12), #Set the x-axis title
axis.title.y = element_text(face="bold", size=12), #Set the y-axis title
axis.text.x = element_text(colour = "black", size=8), #Set the x-axis labels
axis.text.y = element_text(colour = "black", size=8), #Set the y-axis labels
legend.title = element_text(size = 8, face="bold"), #Set the legend title
legend.text = element_text(size = 8), #Set the legend text
legend.position="right", #Default the legend position to the right
plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"))) #top, right, bottom, left
## Do not show scientific notation
options(scipen = 999, digits = 7)
## Set pander (table) output functions
panderOptions('table.split.table', Inf)
panderOptions('table.style', 'rmarkdown')
# Load in the shared file from the mothur FWDB analysis
FWWDB_shared <- "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/SoMiLakes_copy_FWDB.files.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.an.unique_list.shared"
# Load in the taxonomy file from the mothur FWDB analysis
FWDB_tax <- "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/SoMiLakes_copy_FWDB.files.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.an.unique_list.0.03.cons.taxonomy"
#Create FWDB phyloseq object
FWDB_data <- import_mothur(mothur_shared_file = FWWDB_shared, mothur_constaxonomy_file = FWDB_tax )
# Change the taxonomy names
#length(colnames(tax_table(FWDB_data))) # 8 columns
colnames(tax_table(FWDB_data)) <- c("Kingdom","Phylum","Class","Order","Family","Genus","Species", "Strain")
#sum(row.names(otu_table(FWDB_data)) == row.names(tax_table(FWDB_data))) # OTU names are the same in the tax and otu tables.
#Let's check out the taxonomy table
tax <- data.frame(tax_table(FWDB_data))
tax$OTU <- row.names(tax)
tax_table(FWDB_data) <- tax_table(as.matrix(tax))
# Change some of the sample names to match our metadata sample names
samplenames_FWDB <- sample_names(FWDB_data)
names2_FWDB <- gsub("Gull", "GUL", samplenames_FWDB)
names3_FWDB <- gsub("Long", "LON", names2_FWDB)
sample_names(FWDB_data) <- names3_FWDB
# Import metadata file and merge with FWDB_data object
mapfile <- "~/Final_PAFL_Trophicstate/raw_data/metadata"
map <- import_qiime_sample_data(mapfile)
# Check the sequencing depth of each sample
sums_FWDB <- data.frame(colSums(otu_table(FWDB_data)))
colnames(sums_FWDB) <- "Sample_TotalSeqs"
sums_FWDB$sample <- row.names(sums_FWDB)
sums_FWDB <- arrange(sums_FWDB, Sample_TotalSeqs)
#### Create a plot of the number of sequences per sample
ggplot(sums_FWDB, aes(x=reorder(sample, Sample_TotalSeqs), y = Sample_TotalSeqs)) +
ylab("Number of Sequences per Sample") +
geom_bar(stat = "identity", colour="black",fill="cornflowerblue") + xlab("Sample Name") +
ggtitle("Total Number of Sequences per Sample") +
theme(axis.text.x = element_text(colour = "black", size=6, angle=45, hjust = 1, vjust = 1))
## Prune out samples that have a sequencing depth of less than 10,000 sequences
pruned_FWDB <- prune_samples(sample_sums(FWDB_data) > 10000, FWDB_data)
# Combine FWDB and mapfiles
FWDB_no_scale <- merge_phyloseq(pruned_FWDB,map)
# lets look at only samples (removing blanks and mock and samples that didn't amplify)
FW_pruned <- prune_taxa(taxa_sums(FWDB_no_scale) > 0, FWDB_no_scale)
FW_samples <-subset_taxa(FW_pruned, Kingdom == "Bacteria")
FW_samples <-subset_taxa(FW_samples, Class != "Chloroplast")
### Time to (1) Sum between replicates and then (2) scale
# Step 1. Sum the reads
FW_sum_reps <- merge_samples(x = FW_samples, group = "dups", fun = "sum") # Sum between replicate samples
FW_sum_reps <- prune_taxa(taxa_sums(FW_sum_reps) > 0, FW_sum_reps) # Remove any OTUs that are 0
## merge_samples messes up the sample_data so we will need to fix this below.
# Step 2: Scale the reads
FWDB_sum_scale_matround <- scale_reads(physeq = FW_sum_reps, n = min(sample_sums(FW_sum_reps)), round = "matround")
data_dup_FWDB_sum_scale_matround <- read.table("~/Final_PAFL_Trophicstate/raw_data/duplicate_metadata.txt", header = TRUE, sep = "\t")
row.names(data_dup_FWDB_sum_scale_matround) <- data_dup_FWDB_sum_scale_matround$names
data_dup_FWDB_sum_scale_matround$quadrant <- factor(data_dup_FWDB_sum_scale_matround$quadrant,levels = c("Free Epilimnion", "Free Mixed", "Free Hypolimnion", "Particle Epilimnion", "Particle Mixed", "Particle Hypolimnion"))
# Fix the sample data
# Add a column of Total Sequence sums to data_dup
TotalSeqs_FWDB_sum_reps <- data.frame(rowSums(otu_table(FW_sum_reps)))
TotalSeqs_FWDB_sum_reps$names <- as.factor(row.names(TotalSeqs_FWDB_sum_reps))
colnames(TotalSeqs_FWDB_sum_reps)[1] <- "TotalSeqs_Summed"
data_dup_FWDB_sum_scale_matround_join<- left_join(data_dup_FWDB_sum_scale_matround, TotalSeqs_FWDB_sum_reps, by = "names")
TotalSeqs_FWDB_sum_scale_matround <- data.frame(rowSums(otu_table(FWDB_sum_scale_matround)))
TotalSeqs_FWDB_sum_scale_matround$names <- as.factor(row.names(TotalSeqs_FWDB_sum_scale_matround))
colnames(TotalSeqs_FWDB_sum_scale_matround)[1] <- "TotalSeqs_Scaled"
data_dup_FWDB_sum_scale_matround_join<- left_join(data_dup_FWDB_sum_scale_matround_join, TotalSeqs_FWDB_sum_scale_matround, by = "names")
#sample_names(FWDB_sum_scale_matround) <- as.character(data_dup_FWDB_sum_scale_matround_join$names)
row.names(data_dup_FWDB_sum_scale_matround_join) <- sample_names(FWDB_sum_scale_matround)
dup_data <- sample_data(data_dup_FWDB_sum_scale_matround_join) # Metadata in our phyloseq object
sum_scale_matround_otu <- otu_table(FWDB_sum_scale_matround) # OTU table in our phyloseq object
sum_scale_matround_tax <- tax_table(FWDB_sum_scale_matround) # Taxonomy Table in our phyloseq object
FWDB_sum_scale_matround <- merge_phyloseq(sum_scale_matround_otu, sum_scale_matround_tax, dup_data)
## Our phyloseq object!
nowin_FWDB_scaled <- subset_samples(FWDB_sum_scale_matround, names != "WINH" & names != "WINH3um") # Remove the two samples that are not a true hypolimnion
nowin_FWDB_scaled <- prune_taxa(taxa_sums(nowin_FWDB_scaled) > 0, nowin_FWDB_scaled) # Remove any OTUs that are 0
nowin_data <- data.frame(sample_data(nowin_FWDB_scaled))
mean_scaled_dat <- ddply(nowin_data, "filter", summarise, scaled.mean=mean(TotalSeqs_Scaled))
# First plot for figure S2
scaled_historgram <- ggplot(nowin_data, aes(x = TotalSeqs_Scaled, fill = filter)) + ylab("Count") +
geom_histogram(alpha=.75, position="identity", binwidth = 20) +
xlab("Total Sequences per Sample") +
ggtitle(expression(atop(bold("Scaled Samples"), atop("Bin-Width = 20"), ""))) +
scale_fill_manual(name="Filter Fraction",
breaks = c("Free", "Particle"),
values = c("orangered", "orangered4")) +
scale_color_manual(breaks = c("Free", "Particle"),
values = c("orangered", "orangered4")) +
scale_x_continuous(breaks = seq(14600, 15200, 100), limits = c(14600, 15200), expand = c(0,0)) +
scale_y_continuous(breaks = seq(0, 10, 1), limits = c(0, 4.5), expand = c(0,0)) +
geom_vline(data=mean_scaled_dat, aes(xintercept=scaled.mean, colour=filter),
linetype="longdash", size=1) +
theme(legend.position = c(0.2, 0.83),
plot.margin = unit(c(0.1, 0.5, 0.1, 0.1), "cm")) #top, right, bottom, left
mean_summed_dat <- ddply(nowin_data, "filter", summarise, summed.mean=mean(TotalSeqs_Summed))
# Second plot for Figure S2
merged_histogram <- ggplot(nowin_data, aes(x = TotalSeqs_Summed, fill = filter)) + ylab("Count") +
geom_histogram(position = "identity", alpha = 0.75, binwidth = 1500) +
xlab("Total Sequences per Sample") +
scale_fill_manual(name="Filter Fraction",
breaks = c("Free", "Particle"),
values = c("orangered", "orangered4")) +
ggtitle(expression(atop(bold("Summed Replicate Samples"), atop("Bin-Width = 1500"), ""))) +
scale_color_manual(breaks = c("Free", "Particle"),
values = c("orangered", "orangered4")) +
geom_vline(data=mean_summed_dat, aes(xintercept=summed.mean, colour=filter),
linetype="longdash", size=1) +
scale_x_continuous(breaks = seq(10000, 90000, 10000), limits = c(10000, 75000), expand = c(0,0)) +
scale_y_continuous(breaks = seq(0, 10, 1), limits = c(0, 3.5), expand = c(0,0)) +
theme(legend.position = c(0.2, 0.83),
plot.margin = unit(c(0.1, 0.5, 0.1, 0.1), "cm")) #top, right, bottom, left
##### Plotting FIGURE S2 ##### Plotting FIGURE S2 ##### Plotting FIGURE S2
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS2_histogram.pdf", width= 6.5, height=3.5)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS2_histogram.tiff", width= 6.5, height=3.5, units = "in", res = 175)
multiplot(merged_histogram, scaled_historgram, cols = 2); #dev.off()
Figure S2: Histogram of the total number of sequencing reads per sample. (Left) Sequencing reads per sample after summing the raw number of sequencing reads between replicate samples. (Right) Sequencing reads per sample after transforming the sequence depth of each of the summed replicate samples by taking the proportion of each OTU in samples and scaling it to the minimum sequence depth in the data set (14,925 sequences), and then rounding to the nearest integer. Dotted vertical lines represent the mean sequencing read depth of the free-living (orange) and particle-associated (brown) samples. The bin-width represents the number of sequencing reads that are binned within each bar within the histogram
########## ADD THE PROTEOBACTERIA TO THE PHYLA
phy <- data.frame(tax_table(nowin_FWDB_scaled))
Phylum <- as.character(phy$Phylum)
Class <- as.character(phy$Class)
for (i in 1:length(Phylum)){
if (Phylum[i] == "Proteobacteria"){
Phylum[i] <- Class[i]
}
}
phy$Phylum <- Phylum
t <- tax_table(as.matrix(phy))
tax_table(nowin_FWDB_scaled) <- t
# The phyloseq object!
nowin_FWDB_scaled
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8823 taxa and 41 samples ]
## sample_data() Sample Data: [ 41 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 8823 taxa by 9 taxonomic ranks ]
## The phyloseq object without the mixed lake
nosherwin_FWDB_scaled <- subset_samples(nowin_FWDB_scaled, names != "SHEE" & names != "SHEH" &
names != "SHEE3um" & names != "SHEH3um")
nosherwin_FWDB_scaled <- prune_taxa(taxa_sums(nosherwin_FWDB_scaled) > 0, nosherwin_FWDB_scaled)
#### Obtain the top 100 most abundant OTUs
topN <- 300
most_abundant_taxa = sort(taxa_sums(nowin_FWDB_scaled), TRUE)[1:topN]
#print(most_abundant_taxa)
FWDB_300_OTUs <- prune_taxa(names(most_abundant_taxa), nowin_FWDB_scaled)
FWDB_sums <- data.frame(taxa_sums(FWDB_300_OTUs))
ggplot(FWDB_sums,aes(x=row.names(FWDB_sums), y=taxa_sums.FWDB_300_OTUs.)) +
ylab("Number of Sequences per OTU") + scale_x_discrete(expand = c(0,0)) +
scale_y_continuous(expand = c(0,0)) + theme_classic() +
geom_bar(stat="identity",colour="black",fill="darkturquoise") + xlab("OTU Rank") +
ggtitle("Rank Abundance Curve of the Top 300 OTUs") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
#Create to subsampled dataset
rare_FW <- rarefy_even_depth(FW_sum_reps, sample.size = (min(sample_sums(FW_sum_reps))-1), rngseed = 15879966)
###
fwdb_list <- c( "FW_samples", "FW_sum_reps", "FWDB_sum_scale_matround", "rare_FW")
compare_fwdb <- data.frame(matrix(ncol = 6, nrow = 4))
# Assign the row names
row.names(compare_fwdb) <- c("FWDB+Silva Raw", "Merged FWDB+Silva",
"FWDB+Silva Scaled", "Subsampled FWDB+Silva")
# Assign the column names
colnames(compare_fwdb) <- c("MinSeqs", "MeanSeqs", "MedianSeqs", "MaxSeqs", "RangeSeqs", "NumOTUs")
# Create the data frame
compare_fwdb[1,] <- physeq_stats(FW_samples)
## [1] 10737.00 24876.32 25395.00 35842.00 25105.00 14294.00
compare_fwdb[2,] <- physeq_stats(FW_sum_reps)
## [1] 14925 45703 46975 69584 54659 43
compare_fwdb[2,6] <- ncol(otu_table(FW_sum_reps))
compare_fwdb[3,] <- physeq_stats(FWDB_sum_scale_matround)
## [1] 14635.00 14883.12 14906.00 15156.00 521.00 43.00
compare_fwdb[3,6] <- ncol(otu_table(FWDB_sum_scale_matround))
compare_fwdb[4,] <- physeq_stats(rare_FW)
## [1] 14924 14924 14924 14924 0 43
compare_fwdb[4,6] <- ncol(otu_table(rare_FW))
# Set the values to numbers and then will round to the whole number
panderOptions("digits", 1)
# Align all cells with "center" alignment and the row.names with "right" alignment
set.alignment('center', row.names = 'right')
pander(compare_fwdb, caption="Statistics on the sequencing depth of the entire data set.")
| Â | MinSeqs | MeanSeqs | MedianSeqs | MaxSeqs | RangeSeqs | NumOTUs |
|---|---|---|---|---|---|---|
| FWDB+Silva Raw | 10737 | 24876 | 25395 | 35842 | 25105 | 14294 |
| Merged FWDB+Silva | 14925 | 45703 | 46975 | 69584 | 54659 | 14294 |
| FWDB+Silva Scaled | 14635 | 14883 | 14906 | 15156 | 521 | 8992 |
| Subsampled FWDB+Silva | 14924 | 14924 | 14924 | 14924 | 0 | 8344 |
profile <- read.csv(file = "~/Final_PAFL_Trophicstate/raw_data/AllLakes_depthprofile.csv", header = TRUE);
profile <- subset(profile, select = c('lakename',"trophicstate", "depth", "temp", "DO", "pH", "SpC"))
profile$DO. = NULL
profile$trophicstate <- as.character(profile$trophicstate)
profile$trophicstate[profile$trophicstate == "Eutrophic"] <- "Productive"
profile$trophicstate[profile$trophicstate == "Mesotrophic"] <- "Productive"
profile$trophicstate[profile$trophicstate == "Oligotrophic"] <- "Unproductive"
profile$trophicstate[profile$trophicstate == "Mixed"] <- "Mixed"
# ALL LAKES
profile_all <- profile %>% gather(Variable, Value, 4:7)
profile_all$lakename <- as.character(profile_all$lakename)
profile_all$lakename[profile_all$lakename == "WIN"] <- "Wintergreen"
profile_all$lakename[profile_all$lakename == "SIX"] <- "Sixteen"
profile_all$lakename[profile_all$lakename == "SHE"] <- "Sherman"
profile_all$lakename[profile_all$lakename == "PAY"] <- "Payne"
profile_all$lakename[profile_all$lakename == "LON"] <- "Little Long"
profile_all$lakename[profile_all$lakename == "LEE"] <- "Lee"
profile_all$lakename[profile_all$lakename == "GUL"] <- "Gull"
profile_all$lakename[profile_all$lakename == "BRI"] <- "Bristol"
profile_all$lakename[profile_all$lakename == "BAK"] <- "Baker"
profile_all$lakename[profile_all$lakename == "BAS"] <- "Baseline"
profile_all$lakename[profile_all$lakename == "BST"] <- "Bassett"
profile_all$lakename[profile_all$lakename == "BST"] <- "Bassett"
profile_all$trophicstate[profile_all$trophicstate == "Productive"] <- "High-Nutrient"
profile_all$trophicstate[profile_all$trophicstate == "Unproductive"] <- "Low-Nutrient"
profile_all$trophicstate[profile_all$trophicstate == "Mixed"] <- "High-Nutrient"
profile_all <- subset(profile_all, Variable != "SpC")
profile_labeller <- function(var, value){
value <- as.character(value)
if (var=="Variable") {
value[value=="temp"] <- "Temperature \n (Celsius)"
value[value=="DO"] <- "Dissolved Oxygen \n (mg/L)"
value[value=="pH"] <- "pH"
value[value=="SpC"] <- "Specific Conductivity \n (uS/cm)"
}
return(value)
}
##### Plotting FIGURE S1 ##### Plotting FIGURE S1 ##### Plotting FIGURE S1
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS1_depth_profile.pdf", width= 6.5, height=6.5)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS1_depth_profile.tiff", width= 6.5, height=6.5, units = "in", res = 175)
ggplot(profile_all, aes(x=Value, y = depth, color = lakename)) +
geom_path(size=1, alpha = 0.8) + ylab("Depth (m)") + xlab("") +
theme_bw() + geom_point(size=2, alpha = 0.8) + geom_hline(h=0) +
scale_y_reverse(breaks=seq(0, 32, 4), expand = c(0, 0)) +
scale_color_manual(name = "Lake Name",
labels = c("Baker", "Baseline", "Bassett", "Bristol", "Gull", "Lee",
"Little Long", "Payne", "Sherman", "Sixteen", "Wintergreen"),
values = c("blue", "red", "black", "forestgreen","violet","limegreen",
"purple", "orange", "maroon1", "turquoise3", "thistle4")) +
facet_grid(trophicstate ~ Variable, scales = "free",labeller = profile_labeller, space = "free_y") +
theme(axis.title.x = element_text(face="bold", size=12),
axis.text.x = element_text(colour = "black", size=8, hjust = 1, vjust=1, angle = 30),
axis.text.y = element_text(colour = "black", size=8),
axis.title.y = element_text(face="bold", size=12),
legend.title = element_blank(),
legend.text = element_text(size = 8),
strip.text.y = element_blank(),
strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
strip.background = element_blank(),
plot.title = element_text(size = 12, face = "bold", colour = "black"),
plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
legend.position="right"); #dev.off()
Figure S1: Full lake profiles for each lake. (Top) High-nutrient and (Bottom) low-nutrient lake profiles. Colored points represent the raw value at each depth. Lines connect the discrete points within each lake. Sherman Lake was the mixed lake.
# 1st: Get the abundance of each phylum for ALL PA/FL/High-Nutrient/Low-Nutrient/Epilimnion/Hypolimnion
phy_glom <- tax_glom(nowin_FWDB_scaled,taxrank = "Phylum")
phy_melt <- psmelt(phy_glom) %>%
subset(select = c("Sample", "ProdLevel", "limnion", "filter", "Phylum","Abundance"))
# No sherman samples
phy_melt_nosher <- subset(phy_melt, Sample != "SHEE" & Sample != "SHEH" &
Sample != "SHEE3um" & Sample != "SHEH3um")
## Free-Living - 21 samples
phy_abund_free <- subset(phy_melt_nosher, filter == "Free") %>% # Subset the Free-Living samples
phy_abund_in_group("Free-Living")
#Particle-Associated - 20 Samples
phy_abund_particle <- subset(phy_melt_nosher, filter == "Particle") %>% # Subset the particle samples
phy_abund_in_group("Particle-Associated")
# Epilimnion - 19 samples
phy_abund_epi <- subset(phy_melt_nosher, limnion == "Epilimnion") %>% # Subset the Epilimnion samples
phy_abund_in_group("Epilimnion")
# Hypolimnion - 18 Samples
phy_abund_hypo <- subset(phy_melt_nosher, limnion == "Hypolimnion") %>% # Subset the Hypolimnion samples
phy_abund_in_group("Hypolimnion")
# High-Nutrient - 26 Samples
phy_abund_high <- subset(phy_melt_nosher, ProdLevel == "Productive") %>% # Subset the Productive samples
phy_abund_in_group("Productive")
# Low-Nutrient - 15 samples
phy_abund_low <- subset(phy_melt_nosher, ProdLevel == "Unproductive") %>% # Subset the Unproductive samples
phy_abund_in_group("Unproductive")
## WARNING: This command takes a long time to run!
# it calculates the abundance of each otu within each samples
otu_glom <- tax_glom(nowin_FWDB_scaled, taxrank = "OTU")
otu_melt <- psmelt(otu_glom) %>%
subset(select = c("Sample", "ProdLevel", "limnion", "filter", "OTU","Abundance"))
#write.table(otu_melt, "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/Nov_alpha_data/otu_melt", row.names = FALSE)
# Let's load in a previously run version to make this process faster.
#otu_melt <- read.table( "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/Nov_alpha_data/otu_melt", header = TRUE)
otu_melt_nosherwin <- subset(otu_melt, Sample != "SHEE" & Sample != "SHEH" &
Sample != "SHEE3um" & Sample != "SHEH3um")
# The below data frames do not include SHERMAN (Mixed lake) OR Wintergreen "hypolimnion" samples
## Free-Living - 21 samples
# All OTUs and their abundance in the FREE LIVING
otu_abund_free <- subset(otu_melt_nosherwin, filter == "Free") %>% # Subset the free-living samples
select(Sample, Abundance, OTU) %>% # select only the important columns
group_by(OTU) %>% # Combine all the OTUs
summarize(OTU_sum = sum(Abundance)) # Sum each OTU abundance count
otu_abund_free$SampleType <- "Free-Living"
# Particle-Associated - 20 Samples
# All OTUs and their abundance in the PARTICLE-ASSOCIATED
otu_abund_particle <- subset(otu_melt_nosherwin, filter == "Particle") %>% # Subset the particle-associated samples
select(Sample, Abundance, OTU) %>% # select only the important columns
group_by(OTU) %>% # Combine all the OTUs
summarize(OTU_sum = sum(Abundance)) # Sum each OTU abundance count
otu_abund_particle$SampleType <- "Particle-Associated"
# Epilimnion - 19 samples
# All OTUs and their abundance in the EPILIMNION
otu_abund_epi <- subset(otu_melt_nosherwin, limnion == "Epilimnion") %>% # Subset the Epilimnion samples
select(Sample, Abundance, OTU) %>% # select only the important columns
group_by(OTU) %>% # Combine all the OTUs
summarize(OTU_sum = sum(Abundance)) # Sum each OTU abundance count
otu_abund_epi$SampleType <- "Epilimnion"
# Hypolimnion - 18 Samples
# All OTUs and their abundance in the HYPOLIMNION
otu_abund_hypo <- subset(otu_melt_nosherwin, limnion == "Hypolimnion") %>% # Subset the Hypolimnion samples
select(Sample, Abundance, OTU) %>% # select only the important columns
group_by(OTU) %>% # Combine all the OTUs
summarize(OTU_sum = sum(Abundance)) # Sum each OTU abundance count
otu_abund_hypo$SampleType <- "Hypolimnion"
# High-Nutrient - 26 Samples
# All OTUs and their abundance in the HIGH-NUTRIENT
otu_abund_high <- subset(otu_melt_nosherwin, ProdLevel == "Productive") %>% # Subset the Productive samples
select(Sample, Abundance, OTU) %>% # select only the important columns
group_by(OTU) %>% # Combine all the OTUs
summarize(OTU_sum = sum(Abundance)) # Sum each OTU abundance count
otu_abund_high$SampleType <- "Productive"
# Low-Nutrient - 15 samples
# All OTUs and their abundance in the LOW-NUTRIENT
otu_abund_low <- subset(otu_melt_nosherwin, ProdLevel == "Unproductive") %>% # Subset the Unproductive samples
select(Sample, Abundance, OTU) %>% # select only the important columns
group_by(OTU) %>% # Combine all the OTUs
summarize(OTU_sum = sum(Abundance)) # Sum each OTU abundance count
otu_abund_low$SampleType <- "Unproductive"
## Get rid of OTUs that are 0.
otu_abund_free_pruned <- filter(otu_abund_free, OTU_sum > 0)
colnames(otu_abund_free_pruned)[2] <- "OTU_sum_Free"
otu_abund_particle_pruned <- filter(otu_abund_particle, OTU_sum > 0)
colnames(otu_abund_particle_pruned)[2] <- "OTU_sum_Particle"
otu_abund_epi_pruned <- filter(otu_abund_epi, OTU_sum > 0)
colnames(otu_abund_epi_pruned)[2] <- "OTU_sum_Epi"
otu_abund_hypo_pruned <- filter(otu_abund_hypo, OTU_sum > 0)
colnames(otu_abund_hypo_pruned)[2] <- "OTU_sum_Hypo"
otu_abund_high_pruned <- filter(otu_abund_high, OTU_sum > 0)
colnames(otu_abund_high_pruned)[2] <- "OTU_sum_High"
otu_abund_low_pruned <- filter(otu_abund_low, OTU_sum > 0)
colnames(otu_abund_low_pruned)[2] <- "OTU_sum_Low"
### Combing get all of the
### Need to add the taxonomy to the OTU information
taxa <- data.frame(tax_table(nosherwin_FWDB_scaled)) %>%
select(Phylum, OTU)
free_living_all_otus <- inner_join(otu_abund_free_pruned, taxa, by = "OTU") %>%
select(-OTU_sum_Free) %>%
distinct() %>%
group_by(Phylum) %>%
tally()
colnames(free_living_all_otus)[2] <- "AllOTU_Total"
particle_associated_all_otus <- inner_join(otu_abund_particle_pruned, taxa, by = "OTU") %>%
select(-OTU_sum_Particle) %>%
distinct() %>%
group_by(Phylum) %>%
tally()
colnames(particle_associated_all_otus)[2] <- "AllOTU_Total"
epilimnion_all_otus <- inner_join(otu_abund_epi_pruned, taxa, by = "OTU") %>%
select(-OTU_sum_Epi) %>%
distinct() %>%
group_by(Phylum) %>%
tally()
colnames(epilimnion_all_otus)[2] <- "AllOTU_Total"
hypolimnion_all_otus <- inner_join(otu_abund_hypo_pruned, taxa, by = "OTU") %>%
select(-OTU_sum_Hypo) %>%
distinct() %>%
group_by(Phylum) %>%
tally()
colnames(hypolimnion_all_otus)[2] <- "AllOTU_Total"
high_nutrient_all_otus <- inner_join(otu_abund_high_pruned, taxa, by = "OTU") %>%
select(-OTU_sum_High) %>%
distinct() %>%
group_by(Phylum) %>%
tally()
colnames(high_nutrient_all_otus)[2] <- "AllOTU_Total"
low_nutrient_all_otus <- inner_join(otu_abund_low_pruned, taxa, by = "OTU") %>%
select(-OTU_sum_Low) %>%
distinct() %>%
group_by(Phylum) %>%
tally()
colnames(low_nutrient_all_otus)[2] <- "AllOTU_Total"
nowinOTU <- as.matrix(otu_table(nowin_FWDB_scaled))
#weighted
norm_bray <- vegdist(nowinOTU, method = "bray", binary = FALSE) # calculates the Bray-Curtis Distances
nowinOTU_df <- data.frame(otu_table(nowinOTU))
norm_soren <- vegdist(nowinOTU_df, method = "bray", binary = TRUE) # SORENSEN INDEX
set.seed(15879966) # To be safe
nmds_bray <- metaMDS(nowinOTU, distance="bray") #, autotransform = FALSE
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1722805
## Run 1 stress 0.1703942
## ... New best solution
## ... procrustes: rmse 0.06318353 max resid 0.1885027
## Run 2 stress 0.1706441
## ... procrustes: rmse 0.007538099 max resid 0.03135936
## Run 3 stress 0.17228
## Run 4 stress 0.170546
## ... procrustes: rmse 0.006356934 max resid 0.03176699
## Run 5 stress 0.170546
## ... procrustes: rmse 0.006343347 max resid 0.03162104
## Run 6 stress 0.1722825
## Run 7 stress 0.1722807
## Run 8 stress 0.1722844
## Run 9 stress 0.1704897
## ... procrustes: rmse 0.004210585 max resid 0.01908924
## Run 10 stress 0.1895625
## Run 11 stress 0.1894888
## Run 12 stress 0.172284
## Run 13 stress 0.1703948
## ... procrustes: rmse 0.0003323431 max resid 0.001914146
## *** Solution reached
nmds_bray <- data.frame(nmds_bray$points) #http://strata.uga.edu/software/pdf/mdsTutorial.pdf
nmds_bray$names<-row.names(nmds_bray) #new names column
nmds_bray <- makeCategories_dups(nmds_bray) #will add our categorical information: lakenames, limnion, filter, quadrant and trophicstate
nmds_bray$ProdLevel <- as.character(nmds_bray$trophicstate)
nmds_bray$ProdLevel[nmds_bray$trophicstate == "Eutrophic"] <- "Productive"
nmds_bray$ProdLevel[nmds_bray$trophicstate == "Mesotrophic"] <- "Productive"
nmds_bray$ProdLevel[nmds_bray$trophicstate == "Oligotrophic"] <- "Unproductive"
nmds_bray$quadrant <- factor(nmds_bray$quadrant,
levels = c("Free Epilimnion", "Free Mixed", "Free Hypolimnion",
"Particle Epilimnion", "Particle Mixed", "Particle Hypolimnion"))
nmds_bray$ProdLevel <- factor(nmds_bray$ProdLevel,levels = c("Productive", "Unproductive"))
nmds_bc_quad <- ggplot(nmds_bray, aes(MDS1*-100, MDS2*-100, color = filter,
shape = limnion, fill = ProdLevel)) +
annotate("text", label = "B", x = (min(nmds_bray$MDS1*100) + 0.7), y = (max(nmds_bray$MDS2*100) + 0.1),
face = "bold",size = 6, colour = "black") +
annotate("text", label = "Stress = 0.17", x = (max(nmds_bray$MDS1*100)+0.1), y = (max(nmds_bray$MDS2*100) + 0.1),
size = 4, colour = "black") +
xlab("NMDS1") + ylab("NMDS2") + ggtitle("Bray-Curtis Dissimilarity") +
geom_point(size = 4, alpha=0.9) + theme_bw() + # Draw the points
geom_point(aes(size=6, shape = limnion), show_guide = FALSE )+ # Make the points have a thicker outline
scale_color_manual(name = "Filter Fraction", breaks=c("Free", "Particle"),
labels = c("Free-Living", "Particle-Associated"),
values = c("orangered", "orangered4")) +
scale_fill_manual(name = "Nutrient Level", breaks = c("Productive", "Unproductive"),
labels = c("High-Nutrient", "Low-Nutrient"),
values = c("gray", "white")) +
scale_shape_manual(name = "Lake Layer", breaks = c("Epilimnion", "Hypolimnion", "Mixed"),
values = c(21, 22, 24)) +
guides(shape = guide_legend(order=1, override.aes=list(size=4)), # Manually set the legends
fill = guide_legend(order=2, override.aes=list(size=4, fill = c("gray", "white"))),
color = guide_legend(order=3, override.aes=list(size=4))) +
theme(axis.text.x = element_blank(), # text(colour="black", vjust=0.5, size=10),
axis.text.y = element_blank(), #text(colour="black", vjust=0.5, size=10),
axis.title.x = element_text(face="bold", size=10),
axis.title.y = element_text(face="bold", size=10),
legend.title = element_text(size=8, face="bold"),
legend.text = element_text(size = 8),
axis.ticks = element_blank(),
plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
plot.title = element_text(size = 12, face="bold"),
###LEGEND TOP RIGHT CORNER
legend.position = "right");
# UNWEIGHTED
set.seed(15879966)
nmds_soren <- metaMDS(nowinOTU, distance="bray", binary = TRUE)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1490234
## Run 1 stress 0.1708672
## Run 2 stress 0.1490236
## ... procrustes: rmse 0.0000749495 max resid 0.0002596153
## *** Solution reached
nmds_soren <- data.frame(nmds_soren$points) #http://strata.uga.edu/software/pdf/mdsTutorial.pdf
nmds_soren$names<-row.names(nmds_soren) #new names column
nmds_soren <- makeCategories_dups(nmds_soren) #will add our categorical information: lakenames, limnion, filter, quadrant and trophicstate
nmds_soren$ProdLevel <- as.character(nmds_soren$trophicstate)
nmds_soren$ProdLevel[nmds_soren$trophicstate == "Eutrophic"] <- "Productive"
nmds_soren$ProdLevel[nmds_soren$trophicstate == "Mesotrophic"] <- "Productive"
nmds_soren$ProdLevel[nmds_soren$trophicstate == "Oligotrophic"] <- "Unproductive"
nmds_soren$quadrant <- factor(nmds_soren$quadrant,
levels = c("Free Epilimnion", "Free Mixed", "Free Hypolimnion",
"Particle Epilimnion", "Particle Mixed", "Particle Hypolimnion"))
nmds_soren_quad <- ggplot(nmds_soren, aes(MDS1, MDS2, color = filter, shape = limnion, fill = ProdLevel)) +
annotate("text", label = "A", x = (min(nmds_soren$MDS1) + 0.05),
y = (max(nmds_soren$MDS2) -0.01), face = "bold",size = 6, colour = "black") +
annotate("text", label = "Stress = 0.15", x = (max(nmds_soren$MDS1) -0.2),
y = (max(nmds_soren$MDS2) -0.01), size = 4, colour = "black") +
xlab("NMDS1") + ylab("NMDS2") + ggtitle("Sørensen Dissimilarity") +
geom_point(size = 4, alpha=0.9) + theme_bw() + #geom_point(colour="white", size = 1) +
geom_point(aes(size=6), show_guide = FALSE )+
scale_color_manual(name = "Filter Fraction", breaks=c("Free", "Particle"),
labels = c("Free-Living", "Particle-Associated"),
values = c("orangered", "orangered4")) +
scale_fill_manual(name = "Nutrient Level", breaks = c("Productive", "Unproductive"),
labels = c("High-Nutrient", "Low-Nutrient"),
values = c("gray", "white")) +
scale_shape_manual(name = "Lake Strata", breaks = c("Epilimnion", "Hypolimnion", "Mixed"),
values = c(21, 22, 24)) +
guides(shape = guide_legend(order=1, override.aes=list(size=4)),
fill = guide_legend(order=2, override.aes=list(size=4)),
color = guide_legend(order=3, override.aes=list(size=4))) +
theme(axis.text.x = element_blank(), # text(colour="black", vjust=0.5, size=10),
axis.text.y = element_blank(), #text(colour="black", vjust=0.5, size=10),
axis.title.x = element_text(face="bold", size=10),
axis.title.y = element_text(face="bold", size=10),
legend.title = element_text(size=8, face="bold"),
legend.text = element_text(size = 8),
axis.ticks = element_blank(),
plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
plot.title = element_text(size = 12, face="bold"),
###LEGEND TOP RIGHT CORNER
legend.position = "none");
##### Plotting FIGURE 2 ##### Plotting FIGURE 2 ##### Plotting FIGURE 2
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig3_NMDS_binary_color2.pdf", width= 10, height=4)
grid.newpage()
pushViewport(viewport(layout=grid.layout(1,2,width=c(0.41,0.59))))
print(nmds_soren_quad, vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(nmds_bc_quad, vp=viewport(layout.pos.row=1,layout.pos.col=2)); #dev.off()
Figure 2: Non-Metric Multidimensional Scaling (NMDS) Ordinations visualizing differences in bacterial community composition based on (A) Sørensen dissimilarity (OTU presence or absence) and (B) Bray-Curtis dissimilarity (OTU relative abundance). Data points are colored by filter fraction, shaped by lake layer, and filled in by the nutrient level of each sample.
# First we need to organize our environmental data
environ <- sample_data(nosherwin_FWDB_scaled)
environ <- subset(environ, select = c("names", "lakenames", "limnion", "filter", "trophicstate","ProdLevel",
"totaldepth", "DO", "SpC", "temp", "pH"))
environ <- data.frame(environ)
## Calculate the BC dissimilarity and the Sorensen Dissimilarity
nosherwinOTU <- otu_table(nosherwin_FWDB_scaled) # This is our OTU table that we will use for Adonis
set.seed(15879966)
## Bray-Curtis Distance --> OTU relative abundance
bc_dist <- vegdist(nosherwinOTU, method = "bray", binary = FALSE)
adonis_bc <- adonis(bc_dist ~ limnion+filter +ProdLevel+ DO+temp + pH, data=environ); adonis_bc
##
## Call:
## adonis(formula = bc_dist ~ limnion + filter + ProdLevel + DO + temp + pH, data = environ)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## limnion 1 1.7475 1.74746 9.0680 0.15837 0.001 ***
## filter 1 1.4075 1.40755 7.3041 0.12757 0.001 ***
## ProdLevel 1 1.0334 1.03336 5.3623 0.09365 0.001 ***
## DO 1 0.4315 0.43152 2.2392 0.03911 0.010 **
## temp 1 0.3452 0.34521 1.7913 0.03129 0.056 .
## pH 1 0.2876 0.28757 1.4922 0.02606 0.110
## Residuals 30 5.7812 0.19271 0.52395
## Total 36 11.0339 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Sørensen Distance --> Tests OTU presence/absence
soren_dist <- vegdist(data.frame(nosherwinOTU), method = "bray", binary = TRUE)
adonis_soren <- adonis(soren_dist ~ limnion+filter +ProdLevel+ DO+temp + pH, data=environ); adonis_soren
##
## Call:
## adonis(formula = soren_dist ~ limnion + filter + ProdLevel + DO + temp + pH, data = environ)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## limnion 1 1.2078 1.20782 6.6377 0.13291 0.001 ***
## filter 1 0.3956 0.39557 2.1739 0.04353 0.013 *
## ProdLevel 1 1.0405 1.04046 5.7180 0.11449 0.001 ***
## DO 1 0.4885 0.48847 2.6844 0.05375 0.003 **
## temp 1 0.2473 0.24729 1.3590 0.02721 0.115
## pH 1 0.2493 0.24931 1.3701 0.02743 0.122
## Residuals 30 5.4589 0.18196 0.60068
## Total 36 9.0878 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Particle-Associated!
part <- subset_samples(nosherwin_FWDB_scaled, filter == "Particle")
partOTU <- otu_table(part)
set.seed(15879966)
part_env <- subset(environ, filter == "Particle") #Load Environmental Data
## Bray-Curtis Distance --> OTU relative abundance
part_BC <- vegdist(partOTU, method = "bray", binary = FALSE)
part_adon_bc <- adonis(part_BC ~ limnion+ProdLevel+ DO + temp + pH, data=part_env); part_adon_bc
##
## Call:
## adonis(formula = part_BC ~ limnion + ProdLevel + DO + temp + pH, data = part_env)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## limnion 1 0.9511 0.95111 3.9073 0.18067 0.002 **
## ProdLevel 1 0.6783 0.67830 2.7865 0.12885 0.007 **
## DO 1 0.3058 0.30582 1.2564 0.05809 0.227
## temp 1 0.2206 0.22059 0.9062 0.04190 0.592
## pH 1 0.1874 0.18739 0.7698 0.03560 0.654
## Residuals 12 2.9210 0.24342 0.55488
## Total 17 5.2643 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Sørensen Distance --> Tests OTU presence/absence
part_soren <- vegdist(data.frame(partOTU), method = "bray", binary = TRUE)
part_adon_soren <- adonis(part_soren ~ limnion+ProdLevel+ DO + temp + pH, data=part_env); part_adon_soren
##
## Call:
## adonis(formula = part_soren ~ limnion + ProdLevel + DO + temp + pH, data = part_env)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## limnion 1 0.6181 0.61808 3.2661 0.15065 0.002 **
## ProdLevel 1 0.6066 0.60660 3.2054 0.14785 0.005 **
## DO 1 0.2746 0.27457 1.4509 0.06692 0.102
## temp 1 0.1716 0.17159 0.9067 0.04182 0.589
## pH 1 0.1611 0.16106 0.8511 0.03926 0.604
## Residuals 12 2.2709 0.18924 0.55350
## Total 17 4.1028 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# FREE-LIVING!!!
free <- subset_samples(nosherwin_FWDB_scaled, filter == "Free")
set.seed(3)
freeOTU <- otu_table(free)
free_env <- subset(environ, filter == "Free") #Load Environmental Data
## Bray-Curtis Distance --> OTU relative abundance
free_BC <- vegdist(freeOTU, method = "bray", binary = FALSE)
free_adon_bc <- adonis(free_BC ~ limnion+ProdLevel+ DO + temp + pH, data=free_env); free_adon_bc
##
## Call:
## adonis(formula = free_BC ~ limnion + ProdLevel + DO + temp + pH, data = free_env)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## limnion 1 1.0207 1.02074 5.8396 0.23353 0.001 ***
## ProdLevel 1 0.4824 0.48237 2.7596 0.11036 0.006 **
## DO 1 0.2471 0.24706 1.4134 0.05652 0.155
## temp 1 0.1863 0.18626 1.0656 0.04261 0.375
## pH 1 0.1622 0.16216 0.9277 0.03710 0.498
## Residuals 13 2.2723 0.17480 0.51988
## Total 18 4.3709 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Sørensen Distance --> Tests OTU presence/absence
free_soren <- vegdist(data.frame(freeOTU), method = "bray", binary = TRUE)
free_adon_soren <- adonis(free_soren ~ limnion+ProdLevel+ DO + temp + pH, data=free_env); free_adon_soren
##
## Call:
## adonis(formula = free_soren ~ limnion + ProdLevel + DO + temp + pH, data = free_env)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## limnion 1 0.7425 0.74255 3.6951 0.16182 0.001 ***
## ProdLevel 1 0.5605 0.56048 2.7891 0.12214 0.001 ***
## DO 1 0.3417 0.34171 1.7004 0.07447 0.046 *
## temp 1 0.1564 0.15638 0.7782 0.03408 0.825
## pH 1 0.1753 0.17527 0.8722 0.03820 0.558
## Residuals 13 2.6124 0.20095 0.56930
## Total 18 4.5888 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Epilimnion!
epi <- subset_samples(nosherwin_FWDB_scaled, limnion == "Epilimnion")
epiOTU <- otu_table(epi)
set.seed(15879966)
epi_env <- subset(environ, limnion == "Epilimnion") #Load Environmental Data
## Bray-Curtis Distance --> OTU relative abundance
epi_BC <- vegdist(epiOTU, method = "bray", binary = FALSE)
epi_adon_bc <- adonis(epi_BC ~ filter+ProdLevel+ DO + temp + pH, data=epi_env); epi_adon_bc
##
## Call:
## adonis(formula = epi_BC ~ filter + ProdLevel + DO + temp + pH, data = epi_env)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## filter 1 0.8092 0.80923 4.5733 0.18180 0.001 ***
## ProdLevel 1 0.4387 0.43866 2.4790 0.09855 0.013 *
## DO 1 0.3191 0.31908 1.8032 0.07168 0.060 .
## temp 1 0.1882 0.18821 1.0636 0.04228 0.380
## pH 1 0.3957 0.39571 2.2363 0.08890 0.024 *
## Residuals 13 2.3003 0.17695 0.51679
## Total 18 4.4512 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Sørensen Distance --> Tests OTU presence/absence
epi_soren <- vegdist(data.frame(epiOTU), method = "bray", binary = TRUE)
epi_adon_soren <- adonis(epi_soren ~ filter+ProdLevel+ DO + temp + pH, data=epi_env); epi_adon_soren
##
## Call:
## adonis(formula = epi_soren ~ filter + ProdLevel + DO + temp + pH, data = epi_env)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## filter 1 0.2576 0.25760 1.4201 0.06543 0.037 *
## ProdLevel 1 0.4403 0.44031 2.4273 0.11184 0.001 ***
## DO 1 0.3222 0.32222 1.7763 0.08184 0.007 **
## temp 1 0.2520 0.25202 1.3893 0.06401 0.054 .
## pH 1 0.3068 0.30679 1.6912 0.07792 0.006 **
## Residuals 13 2.3582 0.18140 0.59896
## Total 18 3.9371 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# hypolimnion!
hypo <- subset_samples(nosherwin_FWDB_scaled, limnion == "Hypolimnion")
hypoOTU <- otu_table(hypo)
set.seed(15879966)
hypo_env <- subset(environ, limnion == "Hypolimnion") #Load Environmental Data
## Bray-Curtis Distance --> OTU relative abundance
hypo_BC <- vegdist(hypoOTU, method = "bray", binary = FALSE)
hypo_adon_bc <- adonis(hypo_BC ~ filter+ProdLevel+DO + temp + pH, data=hypo_env); hypo_adon_bc
##
## Call:
## adonis(formula = hypo_BC ~ filter + ProdLevel + DO + temp + pH, data = hypo_env)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## filter 1 0.8138 0.81384 6.2042 0.16831 0.001 ***
## ProdLevel 1 1.5496 1.54960 11.8133 0.32048 0.001 ***
## DO 1 0.3124 0.31239 2.3815 0.06461 0.027 *
## temp 1 0.3469 0.34695 2.6449 0.07175 0.008 **
## pH 1 0.2383 0.23833 1.8169 0.04929 0.081 .
## Residuals 12 1.5741 0.13117 0.32555
## Total 17 4.8352 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Sørensen Distance --> Tests OTU presence/absence
hypo_soren <- vegdist(data.frame(hypoOTU), method = "bray", binary = TRUE)
hypo_adon_soren <- adonis(hypo_soren ~ filter+ProdLevel+DO + temp + pH, data=hypo_env); hypo_adon_soren
##
## Call:
## adonis(formula = hypo_soren ~ filter + ProdLevel + DO + temp + pH, data = hypo_env)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## filter 1 0.2914 0.29143 2.2133 0.07391 0.031 *
## ProdLevel 1 1.2793 1.27933 9.7161 0.32446 0.001 ***
## DO 1 0.2982 0.29815 2.2644 0.07562 0.022 *
## temp 1 0.2520 0.25200 1.9139 0.06391 0.048 *
## pH 1 0.2419 0.24192 1.8373 0.06136 0.056 .
## Residuals 12 1.5801 0.13167 0.40074
## Total 17 3.9429 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Oligotrophic!!
oligo <- subset_samples(nosherwin_FWDB_scaled, trophicstate == "Oligotrophic")
oligoOTU <- otu_table(oligo)
set.seed(15879966)
oligo_env <- subset(environ, trophicstate == "Oligotrophic") #Load Environmental Data
## Bray-Curtis Distance --> OTU relative abundance
oligo_BC <- vegdist(oligoOTU, method = "bray", binary = FALSE)
oligo_adon_bc <- adonis(oligo_BC ~ limnion+filter+DO + temp + pH, data=oligo_env); oligo_adon_bc
##
## Call:
## adonis(formula = oligo_BC ~ limnion + filter + DO + temp + pH, data = oligo_env)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## limnion 1 0.7302 0.73015 4.3187 0.18873 0.001 ***
## filter 1 0.6656 0.66555 3.9366 0.17203 0.002 **
## DO 1 0.3199 0.31986 1.8919 0.08268 0.048 *
## temp 1 0.3413 0.34128 2.0186 0.08821 0.027 *
## pH 1 0.2903 0.29032 1.7172 0.07504 0.063 .
## Residuals 9 1.5216 0.16907 0.39331
## Total 14 3.8688 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Sørensen Distance --> Tests OTU presence/absence
oligo_soren <- vegdist(data.frame(oligoOTU), method = "bray", binary = TRUE)
oligo_adon_soren <- adonis(oligo_soren ~ limnion+filter+DO + temp + pH, data=oligo_env); oligo_adon_soren
##
## Call:
## adonis(formula = oligo_soren ~ limnion + filter + DO + temp + pH, data = oligo_env)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## limnion 1 0.45172 0.45172 2.7101 0.14749 0.001 ***
## filter 1 0.25213 0.25213 1.5127 0.08232 0.040 *
## DO 1 0.29544 0.29544 1.7725 0.09646 0.010 **
## temp 1 0.25909 0.25909 1.5544 0.08459 0.032 *
## pH 1 0.30432 0.30432 1.8258 0.09936 0.007 **
## Residuals 9 1.50011 0.16668 0.48978
## Total 14 3.06281 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# MESO + EUTROPHIC!!
prod <- subset_samples(nosherwin_FWDB_scaled, ProdLevel == "Productive")
prodOTU <- otu_table(prod)
set.seed(15879966)
prod_env <- subset(environ, ProdLevel == "Productive") #Load Environmental Data
## Bray-Curtis Distance --> Tests OTU relative abundance
prod_BC <- vegdist(prodOTU, method = "bray", binary = FALSE)
prod_adon_bc <- adonis(prod_BC ~ limnion+filter+DO + temp + pH, data=prod_env); prod_adon_bc
##
## Call:
## adonis(formula = prod_BC ~ limnion + filter + DO + temp + pH, data = prod_env)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## limnion 1 2.0802 2.08018 13.2443 0.33340 0.001 ***
## filter 1 0.8607 0.86070 5.4800 0.13795 0.001 ***
## DO 1 0.3050 0.30504 1.9422 0.04889 0.043 *
## temp 1 0.2161 0.21610 1.3759 0.03463 0.168
## pH 1 0.2643 0.26435 1.6831 0.04237 0.109
## Residuals 16 2.5130 0.15706 0.40277
## Total 21 6.2394 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Sørensen Distance --> Tests OTU presence/absence
prod_soren <- vegdist(data.frame(prodOTU), method = "bray", binary = TRUE)
prod_adon_soren <- adonis(prod_soren ~ limnion+filter+DO + temp + pH, data=prod_env); prod_adon_soren
##
## Call:
## adonis(formula = prod_soren ~ limnion + filter + DO + temp + pH, data = prod_env)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## limnion 1 1.5264 1.52640 9.6322 0.30071 0.001 ***
## filter 1 0.2690 0.26902 1.6977 0.05300 0.072 .
## DO 1 0.3218 0.32180 2.0307 0.06340 0.027 *
## temp 1 0.2234 0.22336 1.4095 0.04400 0.151
## pH 1 0.1999 0.19992 1.2616 0.03938 0.183
## Residuals 16 2.5355 0.15847 0.49951
## Total 21 5.0760 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#BC Distance
nowinOTU <- as.matrix(otu_table(nowin_FWDB_scaled))
norm_bray <- vegdist(nowinOTU, method = "bray", binary = FALSE) # calculates the Bray-Curtis Distances
bray <- melt(as.matrix(norm_bray), varnames = c("samp1", "samp2"))
bray <- subset(bray, value > 0)
bray$lakenames1 <- substr(bray$samp1,1,3) # Create a new column called lakenames1 with first 3 letters of string
bray$lakenames2 <- substr(bray$samp2,1,3) # Create a new column called lakenames2 with first 3 letters of string
bray$limnion1 <- substr(bray$samp1, 4, 4) # Create a column called limnon1 with hypo or epi
bray$limnion2 <- substr(bray$samp2, 4, 4) # Create a column called limnon2 with hypo or epi
bray$filter1 <- substr(bray$samp1, 5, 7) # Create a column called filter1 with PA or FL
bray$filter2 <- substr(bray$samp2, 5, 7) # Create a column called filter2 with PA or FL
bray$lakenames1 <- as.character(bray$lakenames1)
bray$lakenames1[bray$lakenames1 == "WIN"] <- "Wintergreen"
bray$lakenames1[bray$lakenames1 == "SIX"] <- "Sixteen"
bray$lakenames1[bray$lakenames1 == "SHE"] <- "Sherman"
bray$lakenames1[bray$lakenames1 == "PAY"] <- "Payne"
bray$lakenames1[bray$lakenames1 == "LON"] <- "LittleLong"
bray$lakenames1[bray$lakenames1 == "LEE"] <- "Lee"
bray$lakenames1[bray$lakenames1 == "GUL"] <- "Gull"
bray$lakenames1[bray$lakenames1 == "BRI"] <- "Bristol"
bray$lakenames1[bray$lakenames1 == "BAK"] <- "Baker"
bray$lakenames1[bray$lakenames1 == "BAS"] <- "Baseline"
bray$lakenames1[bray$lakenames1 == "BST"] <- "Bassett"
bray$lakenames2 <- as.character(bray$lakenames2)
bray$lakenames2[bray$lakenames2 == "WIN"] <- "Wintergreen"
bray$lakenames2[bray$lakenames2 == "SIX"] <- "Sixteen"
bray$lakenames2[bray$lakenames2 == "SHE"] <- "Sherman"
bray$lakenames2[bray$lakenames2 == "PAY"] <- "Payne"
bray$lakenames2[bray$lakenames2 == "LON"] <- "LittleLong"
bray$lakenames2[bray$lakenames2 == "LEE"] <- "Lee"
bray$lakenames2[bray$lakenames2 == "GUL"] <- "Gull"
bray$lakenames2[bray$lakenames2 == "BRI"] <- "Bristol"
bray$lakenames2[bray$lakenames2 == "BAK"] <- "Baker"
bray$lakenames2[bray$lakenames2 == "BAS"] <- "Baseline"
bray$lakenames2[bray$lakenames2 == "BST"] <- "Bassett"
#Add Trophic State column by using the name of the lake
bray <- data.table(bray)
library(data.table)
bray[, trophicstate1 := ifelse(lakenames1 %in% c("Wintergreen", "Baker", "Baseline"), "Eutrophic",
ifelse(lakenames1 %in% c("Bassett", "Bristol", "Payne"), "Mesotrophic",
ifelse(lakenames1 %in% c("Sherman"), "Mixed",
ifelse(lakenames1 %in% c("Gull", "Sixteen", "LittleLong", "Lee"),
"Oligotrophic", NA))))]
bray[, trophicstate2 := ifelse(lakenames2 %in% c("Wintergreen", "Baker", "Baseline"), "Eutrophic",
ifelse(lakenames2 %in% c("Bassett", "Bristol", "Payne"), "Mesotrophic",
ifelse(lakenames2 %in% c("Sherman"), "Mixed",
ifelse(lakenames2 %in% c("Gull", "Sixteen", "LittleLong", "Lee"),
"Oligotrophic", NA))))]
bray$limnion1[bray$limnion1 == "E"] <- "Epilimnion"
bray$limnion1[bray$limnion1 == "H"] <- "Hypolimnion"
bray$limnion2[bray$limnion2 == "E"] <- "Epilimnion"
bray$limnion2[bray$limnion2 == "H"] <- "Hypolimnion"
### Pull out the SHERMAN LAKE SAMPLES
bray$limnion1[bray$lakenames1 == "Sherman" & bray$limnion1 == "Epilimnion"] <- "Mixed"
bray$limnion1[bray$lakenames1 == "Sherman" & bray$limnion1 == "Hypolimnion"] <- "Mixed"
bray$limnion2[bray$lakenames2 == "Sherman" & bray$limnion2 == "Epilimnion"] <- "Mixed"
bray$limnion2[bray$lakenames2 == "Sherman" & bray$limnion2 == "Hypolimnion"] <- "Mixed"
bray$filter1[bray$filter1 == "3um"] <- "Particle"
bray$filter1[bray$filter1 == ""] <- "Free"
bray$filter2[bray$filter2 == "3um"] <- "Particle"
bray$filter2[bray$filter2 == ""] <- "Free"
#Add the combined lake layer
bray$combined<-rep(NA,length(bray$limnion1))
bray$combined[bray$limnion1=="Epilimnion"&bray$limnion2=="Epilimnion"]<-"Epilimnion"
bray$combined[bray$limnion1=="Hypolimnion"&bray$limnion2=="Hypolimnion"]<-"Hypolimnion"
bray$combined[bray$limnion1=="Hypolimnion"&bray$limnion2=="Epilimnion"]<-"EH"
bray$combined[bray$limnion1=="Epilimnion"&bray$limnion2=="Hypolimnion"]<-"EH"
bray$combined[bray$limnion1=="Mixed"&bray$limnion2=="Mixed"]<-"Mixed"
bray$combined[bray$limnion1=="Mixed"&bray$limnion2=="Hypolimnion"]<-"Mixed Hypolimnion"
bray$combined[bray$limnion1=="Hypolimnion"&bray$limnion2=="Mixed"]<-"Mixed Hypolimnion"
bray$combined[bray$limnion1=="Epilimnion"&bray$limnion2=="Mixed"]<-"Mixed Epilimnion"
bray$combined[bray$limnion1=="Mixed"&bray$limnion2=="Epilimnion"]<-"Mixed Epilimnion"
## Add the commbined filter.
bray$filt_comb<-rep(NA,length(bray$filter1))
bray$filt_comb[bray$filter1=="Free"&bray$filter2=="Free"]<-"Free"
bray$filt_comb[bray$filter1=="Particle"&bray$filter2=="Particle"]<-"Particle"
bray$filt_comb[bray$filter1=="Particle"&bray$filter2=="Free"]<-"PF"
bray$filt_comb[bray$filter1=="Free"&bray$filter2=="Particle"]<-"PF"
#creating a column for each combination of top bottom particle free
for(i in 1:length(bray$limnion1)){
bray$cat[i]<-paste(as.character(bray$filt_comb[i]),as.character(bray$combined[i]))}
# Add for trophicstate combintions
bray$troph_comb<-rep(NA,length(bray$lakenames2))
bray$troph_comb[bray$trophicstate1=="Eutrophic" & bray$trophicstate2=="Eutrophic"]<-"Eutrophic"
bray$troph_comb[bray$trophicstate1=="Eutrophic" & bray$trophicstate2=="Mesotrophic"]<-"Eutrophic-Mesotrophic"
bray$troph_comb[bray$trophicstate1=="Mesotrophic" & bray$trophicstate2=="Eutrophic"]<-"Eutrophic-Mesotrophic"
bray$troph_comb[bray$trophicstate1=="Eutrophic" & bray$trophicstate2=="Oligotrophic"]<-"Eutrophic-Oligotrophic"
bray$troph_comb[bray$trophicstate1=="Oligotrophic" & bray$trophicstate2=="Eutrophic"]<-"Eutrophic-Oligotrophic"
bray$troph_comb[bray$trophicstate1=="Eutrophic" & bray$trophicstate2=="Mixed"]<-"Eutrophic-Mixed"
bray$troph_comb[bray$trophicstate1=="Mixed" & bray$trophicstate2=="Eutrophic"]<-"Eutrophic-Mixed"
bray$troph_comb[bray$trophicstate1=="Mesotrophic" & bray$trophicstate2=="Mesotrophic"]<-"Mesotrophic"
bray$troph_comb[bray$trophicstate1=="Mesotrophic" &
bray$trophicstate2=="Oligotrophic"]<-"Mesotrophic-Oligotrophic"
bray$troph_comb[bray$trophicstate1=="Oligotrophic" &
bray$trophicstate2=="Mesotrophic"]<-"Mesotrophic-Oligotrophic"
bray$troph_comb[bray$trophicstate1=="Mesotrophic" & bray$trophicstate2=="Mixed"]<-"Mesotrophic-Mixed"
bray$troph_comb[bray$trophicstate1=="Mixed" & bray$trophicstate2=="Mesotrophic"]<-"Mesotrophic-Mixed"
bray$troph_comb[bray$trophicstate1=="Oligotrophic" & bray$trophicstate2=="Oligotrophic"]<-"Oligotrophic"
bray$troph_comb[bray$trophicstate1=="Mixed" & bray$trophicstate2=="Oligotrophic"]<-"Oligotrophic-Mixed"
bray$troph_comb[bray$trophicstate1=="Oligotrophic" & bray$trophicstate2=="Mixed"]<-"Oligotrophic-Mixed"
bray$troph_comb[bray$trophicstate1=="Mixed" & bray$trophicstate2=="Mixed"]<-"Mixed"
## Add combined trophicstate, filter, and limnion
for(i in 1:length(bray$limnion1)){
bray$troph_lim1[i]<-paste(as.character(bray$trophicstate1[i]),
as.character(bray$limnion1[i]),
as.character(bray$filter1[i]))}
for(i in 1:length(bray$limnion2)){
bray$troph_lim2[i]<-paste(as.character(bray$trophicstate2[i]),
as.character(bray$limnion2[i]),
as.character(bray$filter2[i]))}
bray$samp1 <- as.character(bray$samp1)
bray$samp2 <- as.character(bray$samp2)
#creating a column for each combination of top bottom particle free
for(i in 1:length(bray$limnion1)){
bray$cat[i]<-paste(as.character(bray$filt_comb[i]),as.character(bray$combined[i]))}
### Subsetting out the samples that are in the same categories as each other
beta <- subset(bray, troph_lim1 == troph_lim2)
### Put everything in the order we would like.
beta$trophicstate1 <-factor(beta$trophicstate1,
levels=c("Eutrophic", "Mesotrophic", "Oligotrophic", "Mixed"))
beta$trophicstate2 <-factor(beta$trophicstate2,
levels=c("Eutrophic", "Mesotrophic", "Oligotrophic", "Mixed"))
beta$troph_lim2 <-factor(beta$troph_lim2,
levels=c("Eutrophic Epilimnion Particle", "Eutrophic Epilimnion Free", "Eutrophic Hypolimnion Particle", "Eutrophic Hypolimnion Free","Mesotrophic Epilimnion Particle", "Mesotrophic Epilimnion Free", "Mesotrophic Hypolimnion Particle", "Mesotrophic Hypolimnion Free","Oligotrophic Epilimnion Particle", "Oligotrophic Epilimnion Free", "Oligotrophic Hypolimnion Particle", "Oligotrophic Hypolimnion Free","Mixed Mixed Particle", "Mixed Mixed Free"))
#### Subsetting out the mixed lake!
nomix_beta <- subset(beta, trophicstate1 != "Mixed")
nomix_beta2 <- subset(nomix_beta, trophicstate2 != "Mixed")
prod_beta <- nomix_beta2
prod_beta$troph_lim1 <- as.character(prod_beta$troph_lim1)
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Eutrophic Epilimnion Free" |
prod_beta$troph_lim1 =="Mesotrophic Epilimnion Free"] <- "Productive Epilimnion Free"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Eutrophic Epilimnion Particle" |
prod_beta$troph_lim1 =="Mesotrophic Epilimnion Particle"] <- "Productive Epilimnion Particle"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Eutrophic Hypolimnion Free" |
prod_beta$troph_lim1 =="Mesotrophic Hypolimnion Free"] <- "Productive Hypolimnion Free"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Eutrophic Hypolimnion Particle" |
prod_beta$troph_lim1 =="Mesotrophic Hypolimnion Particle"] <- "Productive Hypolimnion Particle"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Oligotrophic Epilimnion Free"] <- "Unproductive Epilimnion Free"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Oligotrophic Epilimnion Particle"] <- "Unproductive Epilimnion Particle"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Oligotrophic Hypolimnion Free"] <- "Unproductive Hypolimnion Free"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Oligotrophic Hypolimnion Particle"] <- "Unproductive Hypolimnion Particle"
prod_beta$trophicstate1 <- as.character(prod_beta$trophicstate1)
prod_beta$trophicstate1[prod_beta$trophicstate1 == "Eutrophic"] <- "Productive"
prod_beta$trophicstate1[prod_beta$trophicstate1 == "Mesotrophic"] <- "Productive"
prod_beta$trophicstate1[prod_beta$trophicstate1 == "Oligotrophic"] <- "Unproductive"
prod_beta <- prod_beta %>%
group_by(troph_lim1) %>%
mutate(mean.bray = mean(value),
sd.bray = sd(value))
############ Kruskal-Wallis Rank Sum Test: Bray-Curtis
#hist(prod_beta$value, breaks = 30) # Not normally distributed
prod_beta$value <- as.numeric(prod_beta$value)
prod_beta$troph_lim1 <- as.factor(prod_beta$troph_lim1)
prod_bray_beta <- prod_beta
## Run the Kruskal Wallis test
bray_prod_KW <- kruskal.test(prod_bray_beta$value ~ prod_bray_beta$troph_lim1)
print(bray_prod_KW) # show Kruskal Wallis result
##
## Kruskal-Wallis rank sum test
##
## data: prod_bray_beta$value by prod_bray_beta$troph_lim1
## Kruskal-Wallis chi-squared = 32.341, df = 7, p-value = 0.00003511
### Which samples are significantly different from each other?
bray_prod_KW_MC <- kruskalmc(prod_bray_beta$value ~ prod_bray_beta$troph_lim1) ## Defaults to P < 0.05
#print(bray_prod_KW_MC)
### Test to find the letters to represent significance in a plot
bray_test <- bray_prod_KW_MC$dif.com$difference # select logical vector
names(bray_test) <- row.names(bray_prod_KW_MC$dif.com) # add comparison names
# create a list with "homogenous groups" coded by letter
bray_letters <- multcompLetters(bray_test, compare="<", threshold=0.05,
Letters=c(letters, LETTERS, "."), reversed = FALSE)
### Extract the values from the multcompLetters object
bray_sigs_dataframe <- data.frame(as.vector(names(bray_letters$Letters)), as.vector(bray_letters$Letters))
colnames(bray_sigs_dataframe) <- c("troph_lim1", "siglabel")
bray_try <- merge(prod_beta, bray_sigs_dataframe, by = "troph_lim1")
bray_try$troph_lim1 <-factor(bray_try$troph_lim1,
levels=c( "Productive Epilimnion Free", "Productive Epilimnion Particle", "Productive Hypolimnion Free","Productive Hypolimnion Particle","Unproductive Epilimnion Free", "Unproductive Epilimnion Particle", "Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle"))
bray_sigs <- bray_try %>%
select(trophicstate1, troph_lim1, siglabel) %>%
distinct()
bray_boxplot <- ggplot(bray_try, aes(x = troph_lim1, y = value, fill = troph_lim1)) + geom_point(size = 2) +
geom_boxplot() +
geom_text(data = bray_sigs, aes(label = siglabel, x = troph_lim1, y = 0.92), size =3) +
scale_fill_manual(name = "", limits=c("Productive Epilimnion Particle", "Productive Epilimnion Free",
"Productive Hypolimnion Particle", "Productive Hypolimnion Free",
"Unproductive Epilimnion Particle", "Unproductive Epilimnion Free",
"Unproductive Hypolimnion Particle", "Unproductive Hypolimnion Free"),
values = c("royalblue4", "royalblue4", "royalblue4", "royalblue4",
"cornflowerblue","cornflowerblue","cornflowerblue","cornflowerblue"))+
scale_x_discrete(breaks=c("Productive Epilimnion Free", "Productive Epilimnion Particle",
"Productive Hypolimnion Free","Productive Hypolimnion Particle",
"Unproductive Epilimnion Free", "Unproductive Epilimnion Particle",
"Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle"),
labels=c("Epilimnion \nFree-Living (12)", "Epilimnion \nParticle-Associated (12)",
"Hypolimnion \nFree-Living (8)", "Hypolimnion \nParticle-Associated (8)",
"Epilimnion \nFree-Living (12)", "Epilimnion \nParticle-Associated (6)",
"Hypolimnion \nFree-Living (12)", "Hypolimnion \nParticle-Associated (12)")) +
xlab("Habitat") + ylab("Bray-Curtis \nDissimilarity") + theme_bw() + #scale_fill_brewer(palette="Paired") +
facet_grid(. ~ trophicstate1, scale = "free", space = "free") +
theme(plot.title = element_text(face="bold", size = 12), #Set the plot title
strip.text.x = element_blank(), #Set the facet titles on x-axis
strip.text.y = element_blank(), #Set the facet titles on x-axis
strip.background = element_blank(), #Set the facet background to no background
axis.title.x = element_text(face="bold", size=10), #Set the x-axis title
axis.title.y = element_text(face="bold", size=10), #Set the y-axis title
axis.text.x = element_text(angle=30, colour = "black",
vjust=1, hjust = 1, size=8), #Set the x-axis labels
axis.text.y = element_text(colour = "black", size=8), #Set the y-axis labels
legend.title = element_text(size = 8, face="bold"), #Set the legend title
legend.text = element_text(size = 8), #Set the legend text
legend.position="none", #Default the legend position to the right
plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm")); #top, right, bottom, left
## Calculate the sorensen dissimiliarity matrix
nowinOTU_df <- data.frame(otu_table(nowin_FWDB_scaled))
norm_soren <- vegdist(nowinOTU_df, method = "bray", binary = TRUE) # SORENSEN INDEX
### Melt all dissimilarities into one data frame
soren <- melt(as.matrix(norm_soren), varnames = c("samp1", "samp2"))
soren <- subset(soren, value > 0)
soren$lakenames1 <- substr(soren$samp1,1,3) # Create a new row called "lakenames" with first 3 letters of string
soren$lakenames2 <- substr(soren$samp2,1,3) # Create a new row called "lakenames" with first 3 letters of string
soren$limnion1 <- substr(soren$samp1, 4, 4) # Create a column called limnon with hypo or epi
soren$limnion2 <- substr(soren$samp2, 4, 4) # Create a column called limnon with hypo or epi
soren$filter1 <- substr(soren$samp1, 5, 7)
soren$filter2 <- substr(soren$samp2, 5, 7)
soren$lakenames1 <- as.character(soren$lakenames1)
soren$lakenames1[soren$lakenames1 == "WIN"] <- "Wintergreen"
soren$lakenames1[soren$lakenames1 == "SIX"] <- "Sixteen"
soren$lakenames1[soren$lakenames1 == "SHE"] <- "Sherman"
soren$lakenames1[soren$lakenames1 == "PAY"] <- "Payne"
soren$lakenames1[soren$lakenames1 == "LON"] <- "LittleLong"
soren$lakenames1[soren$lakenames1 == "LEE"] <- "Lee"
soren$lakenames1[soren$lakenames1 == "GUL"] <- "Gull"
soren$lakenames1[soren$lakenames1 == "BRI"] <- "Bristol"
soren$lakenames1[soren$lakenames1 == "BAK"] <- "Baker"
soren$lakenames1[soren$lakenames1 == "BAS"] <- "Baseline"
soren$lakenames1[soren$lakenames1 == "BST"] <- "Bassett"
soren$lakenames2 <- as.character(soren$lakenames2)
soren$lakenames2[soren$lakenames2 == "WIN"] <- "Wintergreen"
soren$lakenames2[soren$lakenames2 == "SIX"] <- "Sixteen"
soren$lakenames2[soren$lakenames2 == "SHE"] <- "Sherman"
soren$lakenames2[soren$lakenames2 == "PAY"] <- "Payne"
soren$lakenames2[soren$lakenames2 == "LON"] <- "LittleLong"
soren$lakenames2[soren$lakenames2 == "LEE"] <- "Lee"
soren$lakenames2[soren$lakenames2 == "GUL"] <- "Gull"
soren$lakenames2[soren$lakenames2 == "BRI"] <- "Bristol"
soren$lakenames2[soren$lakenames2 == "BAK"] <- "Baker"
soren$lakenames2[soren$lakenames2 == "BAS"] <- "Baseline"
soren$lakenames2[soren$lakenames2 == "BST"] <- "Bassett"
#Add Trophic State column by using the name of the lake
soren <- data.table(soren)
library(data.table)
soren[, trophicstate1 := ifelse(lakenames1 %in% c("Wintergreen", "Baker", "Baseline"), "Eutrophic",
ifelse(lakenames1 %in% c("Bassett", "Bristol", "Payne"), "Mesotrophic",
ifelse(lakenames1 %in% c("Sherman"), "Mixed",
ifelse(lakenames1 %in% c("Gull", "Sixteen", "LittleLong", "Lee"),
"Oligotrophic", NA))))]
soren[, trophicstate2 := ifelse(lakenames2 %in% c("Wintergreen", "Baker", "Baseline"), "Eutrophic",
ifelse(lakenames2 %in% c("Bassett", "Bristol", "Payne"), "Mesotrophic",
ifelse(lakenames2 %in% c("Sherman"), "Mixed",
ifelse(lakenames2 %in% c("Gull", "Sixteen", "LittleLong", "Lee"),
"Oligotrophic", NA))))]
soren$limnion1[soren$limnion1 == "E"] <- "Epilimnion"
soren$limnion1[soren$limnion1 == "H"] <- "Hypolimnion"
soren$limnion2[soren$limnion2 == "E"] <- "Epilimnion"
soren$limnion2[soren$limnion2 == "H"] <- "Hypolimnion"
### Pull out the SHERMAN LAKE SAMPLES
soren$limnion1[soren$lakenames1 == "Sherman" & soren$limnion1 == "Epilimnion"] <- "Mixed"
soren$limnion1[soren$lakenames1 == "Sherman" & soren$limnion1 == "Hypolimnion"] <- "Mixed"
soren$limnion2[soren$lakenames2 == "Sherman" & soren$limnion2 == "Epilimnion"] <- "Mixed"
soren$limnion2[soren$lakenames2 == "Sherman" & soren$limnion2 == "Hypolimnion"] <- "Mixed"
soren$filter1[soren$filter1 == "3um"] <- "Particle"
soren$filter1[soren$filter1 == ""] <- "Free"
soren$filter2[soren$filter2 == "3um"] <- "Particle"
soren$filter2[soren$filter2 == ""] <- "Free"
#Add the combined lake layer
soren$combined<-rep(NA,length(soren$limnion1))
soren$combined[soren$limnion1=="Epilimnion"&soren$limnion2=="Epilimnion"]<-"Epilimnion"
soren$combined[soren$limnion1=="Hypolimnion"&soren$limnion2=="Hypolimnion"]<-"Hypolimnion"
soren$combined[soren$limnion1=="Hypolimnion"&soren$limnion2=="Epilimnion"]<-"EH"
soren$combined[soren$limnion1=="Epilimnion"&soren$limnion2=="Hypolimnion"]<-"EH"
soren$combined[soren$limnion1=="Mixed"&soren$limnion2=="Mixed"]<-"Mixed"
soren$combined[soren$limnion1=="Mixed"&soren$limnion2=="Hypolimnion"]<-"Mixed Hypolimnion"
soren$combined[soren$limnion1=="Hypolimnion"&soren$limnion2=="Mixed"]<-"Mixed Hypolimnion"
soren$combined[soren$limnion1=="Epilimnion"&soren$limnion2=="Mixed"]<-"Mixed Epilimnion"
soren$combined[soren$limnion1=="Mixed"&soren$limnion2=="Epilimnion"]<-"Mixed Epilimnion"
## Add the commbined filter.
soren$filt_comb<-rep(NA,length(soren$filter1))
soren$filt_comb[soren$filter1=="Free"&soren$filter2=="Free"]<-"Free"
soren$filt_comb[soren$filter1=="Particle"&soren$filter2=="Particle"]<-"Particle"
soren$filt_comb[soren$filter1=="Particle"&soren$filter2=="Free"]<-"PF"
soren$filt_comb[soren$filter1=="Free"&soren$filter2=="Particle"]<-"PF"
#creating a column for each combination of top bottom particle free
for(i in 1:length(soren$limnion1)){
soren$cat[i]<-paste(as.character(soren$filt_comb[i]),as.character(soren$combined[i]))}
# Add for trophicstate combintions
soren$troph_comb<-rep(NA,length(soren$lakenames2))
soren$troph_comb[soren$trophicstate1=="Eutrophic" & soren$trophicstate2=="Eutrophic"]<-"Eutrophic"
soren$troph_comb[soren$trophicstate1=="Eutrophic" & soren$trophicstate2=="Mesotrophic"]<-"Eutrophic-Mesotrophic"
soren$troph_comb[soren$trophicstate1=="Mesotrophic" & soren$trophicstate2=="Eutrophic"]<-"Eutrophic-Mesotrophic"
soren$troph_comb[soren$trophicstate1=="Eutrophic" &
soren$trophicstate2=="Oligotrophic"]<-"Eutrophic-Oligotrophic"
soren$troph_comb[soren$trophicstate1=="Oligotrophic" &
soren$trophicstate2=="Eutrophic"]<-"Eutrophic-Oligotrophic"
soren$troph_comb[soren$trophicstate1=="Eutrophic" & soren$trophicstate2=="Mixed"]<-"Eutrophic-Mixed"
soren$troph_comb[soren$trophicstate1=="Mixed" & soren$trophicstate2=="Eutrophic"]<-"Eutrophic-Mixed"
soren$troph_comb[soren$trophicstate1=="Mesotrophic" & soren$trophicstate2=="Mesotrophic"]<-"Mesotrophic"
soren$troph_comb[soren$trophicstate1=="Mesotrophic" &
soren$trophicstate2=="Oligotrophic"]<-"Mesotrophic-Oligotrophic"
soren$troph_comb[soren$trophicstate1=="Oligotrophic" &
soren$trophicstate2=="Mesotrophic"]<-"Mesotrophic-Oligotrophic"
soren$troph_comb[soren$trophicstate1=="Mesotrophic" & soren$trophicstate2=="Mixed"]<-"Mesotrophic-Mixed"
soren$troph_comb[soren$trophicstate1=="Mixed" & soren$trophicstate2=="Mesotrophic"]<-"Mesotrophic-Mixed"
soren$troph_comb[soren$trophicstate1=="Oligotrophic" & soren$trophicstate2=="Oligotrophic"]<-"Oligotrophic"
soren$troph_comb[soren$trophicstate1=="Mixed" & soren$trophicstate2=="Oligotrophic"]<-"Oligotrophic-Mixed"
soren$troph_comb[soren$trophicstate1=="Oligotrophic" & soren$trophicstate2=="Mixed"]<-"Oligotrophic-Mixed"
soren$troph_comb[soren$trophicstate1=="Mixed" & soren$trophicstate2=="Mixed"]<-"Mixed"
## Add combined trophicstate, filter, and limnion
for(i in 1:length(soren$limnion1)){
soren$troph_lim1[i]<-paste(as.character(soren$trophicstate1[i]),
as.character(soren$limnion1[i]),
as.character(soren$filter1[i]))}
for(i in 1:length(soren$limnion2)){
soren$troph_lim2[i]<-paste(as.character(soren$trophicstate2[i]),
as.character(soren$limnion2[i]),
as.character(soren$filter2[i]))}
soren$samp1 <- as.character(soren$samp1)
soren$samp2 <- as.character(soren$samp2)
#creating a column for each combination of top bottom particle free
for(i in 1:length(soren$limnion1)){
soren$cat[i]<-paste(as.character(soren$filt_comb[i]),as.character(soren$combined[i]))}
### Subsetting out the samples that are in the same categories as each other
soren_beta <- subset(soren, troph_lim1 == troph_lim2)
### Put everything in the order we would like.
soren_beta$trophicstate1 <-factor(soren_beta$trophicstate1,
levels=c("Eutrophic", "Mesotrophic", "Oligotrophic", "Mixed"))
soren_beta$trophicstate2 <-factor(soren_beta$trophicstate2,
levels=c("Eutrophic", "Mesotrophic", "Oligotrophic", "Mixed"))
soren_beta$troph_lim2 <-factor(soren_beta$troph_lim2,
levels=c("Eutrophic Epilimnion Particle", "Eutrophic Epilimnion Free", "Eutrophic Hypolimnion Particle", "Eutrophic Hypolimnion Free","Mesotrophic Epilimnion Particle", "Mesotrophic Epilimnion Free", "Mesotrophic Hypolimnion Particle", "Mesotrophic Hypolimnion Free","Oligotrophic Epilimnion Particle", "Oligotrophic Epilimnion Free", "Oligotrophic Hypolimnion Particle", "Oligotrophic Hypolimnion Free","Mixed Mixed Particle", "Mixed Mixed Free"))
#### Subsetting out the mixed lake!
nomix_soren_beta <- subset(soren_beta, trophicstate1 != "Mixed")
nomix_soren_beta2 <- subset(nomix_soren_beta, trophicstate2 != "Mixed")
prod_soren <- nomix_soren_beta2
prod_soren$troph_lim1 <- as.character(prod_soren$troph_lim1)
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Eutrophic Epilimnion Free" |
prod_soren$troph_lim1 =="Mesotrophic Epilimnion Free"] <- "Productive Epilimnion Free"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Eutrophic Epilimnion Particle" |
prod_soren$troph_lim1 =="Mesotrophic Epilimnion Particle"] <- "Productive Epilimnion Particle"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Eutrophic Hypolimnion Free" |
prod_soren$troph_lim1 =="Mesotrophic Hypolimnion Free"] <- "Productive Hypolimnion Free"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Eutrophic Hypolimnion Particle" |
prod_soren$troph_lim1 =="Mesotrophic Hypolimnion Particle"] <- "Productive Hypolimnion Particle"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Oligotrophic Epilimnion Free"] <- "Unproductive Epilimnion Free"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Oligotrophic Epilimnion Particle"] <- "Unproductive Epilimnion Particle"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Oligotrophic Hypolimnion Free"] <- "Unproductive Hypolimnion Free"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Oligotrophic Hypolimnion Particle"] <- "Unproductive Hypolimnion Particle"
prod_soren$trophicstate1 <- as.character(prod_soren$trophicstate1)
prod_soren$trophicstate1[prod_soren$trophicstate1 == "Eutrophic"] <- "High-Nutrient"
prod_soren$trophicstate1[prod_soren$trophicstate1 == "Mesotrophic"] <- "High-Nutrient"
prod_soren$trophicstate1[prod_soren$trophicstate1 == "Oligotrophic"] <- "Low-Nutrient"
prod_soren <- prod_soren %>%
group_by(troph_lim1) %>%
mutate(mean.soren = mean(value),
sd.soren = sd(value))
############ Kruskal-Wallis Rank Sum Test: Sørensen
#### Run the test on ALL the data that goes into the mean
#hist(prod_soren$value, breaks = 30) # Not normally distributed
prod_soren$value <- as.numeric(prod_soren$value)
prod_soren$troph_lim1 <- as.factor(prod_soren$troph_lim1)
## Do the KW test
soren_prod_KW <- kruskal.test(prod_soren$value ~ prod_soren$troph_lim1) # Kruskal Wallis test on sorensen
print(soren_prod_KW) # show Kruskal Wallis result
##
## Kruskal-Wallis rank sum test
##
## data: prod_soren$value by prod_soren$troph_lim1
## Kruskal-Wallis chi-squared = 43.741, df = 7, p-value =
## 0.0000002399
### Which samples are significantly different from each other?
soren_prod_KW_MC <- kruskalmc(prod_soren$value ~ prod_soren$troph_lim1) ## Defaults to P < 0.05
#print(soren_prod_KW_MC)
### Time to figure out letters to represent significance in a plot
soren_test <- soren_prod_KW_MC$dif.com$difference # select logical vector
names(soren_test) <- row.names(soren_prod_KW_MC$dif.com) # add comparison names
# create a list with "homogenous groups" coded by letter
soren_letters <- multcompLetters(soren_test, compare="<", threshold=0.05,
Letters=c(letters, LETTERS, "."), reversed = FALSE)
### Extract the values from the multcompLetters object
soren_sigs_dataframe <- data.frame(as.vector(names(soren_letters$Letters)), as.vector(soren_letters$Letters))
colnames(soren_sigs_dataframe) <- c("troph_lim1", "siglabel")
soren_try <- merge(prod_soren, soren_sigs_dataframe, by = "troph_lim1")
soren_try$troph_lim1 <-factor(soren_try$troph_lim1,
levels=c( "Productive Epilimnion Free", "Productive Epilimnion Particle", "Productive Hypolimnion Free","Productive Hypolimnion Particle","Unproductive Epilimnion Free", "Unproductive Epilimnion Particle", "Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle"))
soren_sigs <- soren_try %>%
select(trophicstate1, troph_lim1, siglabel) %>%
distinct()
soren_boxplot <- ggplot(soren_try, aes(x = troph_lim1, y = value, fill = troph_lim1)) + geom_point(size = 2) +
geom_boxplot() +
geom_text(data = soren_sigs, aes(label = siglabel, x = troph_lim1, y = 0.74), size =3) +
scale_fill_manual(name = "", limits=c("Productive Epilimnion Particle", "Productive Epilimnion Free",
"Productive Hypolimnion Particle", "Productive Hypolimnion Free",
"Unproductive Epilimnion Particle", "Unproductive Epilimnion Free",
"Unproductive Hypolimnion Particle", "Unproductive Hypolimnion Free"),
values = c("royalblue4", "royalblue4", "royalblue4", "royalblue4",
"cornflowerblue","cornflowerblue","cornflowerblue","cornflowerblue"))+
scale_x_discrete(breaks=c("Productive Epilimnion Free", "Productive Epilimnion Particle",
"Productive Hypolimnion Free","Productive Hypolimnion Particle",
"Unproductive Epilimnion Free", "Unproductive Epilimnion Particle",
"Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle"),
labels=c("Epilimnion \nFree-Living", "Epilimnion \nParticle-Associated",
"Hypolimnion \nFree-Living", "Hypolimnion \nParticle-Associated",
"Epilimnion \nFree-Living", "Epilimnion \nParticle-Associated",
"Hypolimnion \nFree-Living", "Hypolimnion \nParticle-Associated")) +
xlab("Habitat") + ylab("Sørensen \nDissimilarity") + theme_bw() + #scale_fill_brewer(palette="Paired") +
facet_grid(. ~ trophicstate1, scale = "free", space = "free") +
theme(plot.title = element_text(face="bold", size = 12), #Set the plot title
strip.text.x = element_text(face="bold", size=10), #Set X facet titles
strip.text.y = element_blank(), #Set the facet titles on x-axis
strip.background = element_blank(), #Set the facet background to no background
axis.title.x = element_text(face="bold", size=10), #Set the x-axis title
axis.title.y = element_text(face="bold", size=10), #Set the y-axis title
axis.text.x = element_text(angle=30, colour = "black",
vjust=1, hjust = 1, size=8),
axis.text.y = element_text(colour = "black", size=8), #Set the y-axis labels
legend.title = element_text(size = 8, face="bold"), #Set the legend title
legend.text = element_text(size = 8), #Set the legend text
legend.position="none", #Default the legend position to the right
plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm")); #top, right, bottom, left
soren_boxplot2 <- soren_boxplot + theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
#top, right, bottom, left
plot.margin = unit(c(0.1, 0.1, 0.35, 0.1), "cm"))
bray_boxplot2 <- bray_boxplot + theme(plot.margin = unit(c(-1, 0.1, 0.1, 0.26), "cm")) #top, right, bottom, left
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig3_beta_variance.pdf", width= 5, height=4.5)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig3_beta_variance.tiff", width= 5, height=4.5, units = "in", res = 175)
grid.newpage()
pushViewport(viewport(layout=grid.layout(2,1,height=c(0.46,0.54))))
print(soren_boxplot2, vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(bray_boxplot2, vp=viewport(layout.pos.row=2,layout.pos.col=1)); #dev.off()
Figure 3: Box and whisker plots of the variance in the Sørensen and Bray-Curtis dissimilarity metrics between lakes. (Top) Sørensen dissimilarity (OTU presence or absence; KW: p = 2.4 x 10-7) and (Bottom) Bray-Curtis dissimilarity (OTU relative abundance; KW: p = 3.5 x 10-5) between samples within the same habitat in different lakes. Letter(s) next to data points indicate groups of samples that are significantly different in their degree of lake-to-lake dissimilarity. Numbers in parentheses along the x-axis represent sample sizes of each lake habitat.
### Calculate the mean relative abundance based on ProdLevel + Quadrant for each PHYLUM
nosherwin_FWDB_scaled <- subset_samples(nowin_FWDB_scaled, names != "SHEE" & names != "SHEE3um" & names !="SHEH" & names != "SHEH3um")
# Remove any OTUs that are 0
nosherwin_FWDB_scaled <- prune_taxa(taxa_sums(nosherwin_FWDB_scaled) > 0, nosherwin_FWDB_scaled)
################################################################################# OVERALL ABUNDANCE PLOT
############### NO ISOTHERMAL LAKE (SHERMAN) SAMPLES AND NO WINTERGREEN HYPOLIMNION SAMPLES
good_phylum <-tax_glom(nosherwin_FWDB_scaled,taxrank = "Phylum")
## Melt it into a dataframe
goodsamps_phy_melt <- psmelt(good_phylum)
sub_goodsamps_phy_melt <- subset(goodsamps_phy_melt, select = c("Sample", "ProdLevel", "quadrant", "Phylum","Abundance"))
TOTALS <- ddply(sub_goodsamps_phy_melt, c("Sample"), summarise, total = sum(Abundance))
### Merge phylum sums and the total numbers -> so we can calculate the Relative Abundance
sub_phy_melt_totals <- merge(sub_goodsamps_phy_melt, TOTALS, by = "Sample")
## Calculate the relative abundance
sub_phy_melt_totals$RelAbundance <- sub_phy_melt_totals$Abundance/sub_phy_melt_totals$total
## Calculate the Percent Abundance
sub_phy_melt_totals$PercentAbund <- sub_phy_melt_totals$RelAbundance * 100
#### Make a new dataframe with the percent abudance within the entire dataset!
phy_stats <- ddply(sub_phy_melt_totals, c("Phylum"), summarise,
N = length(PercentAbund),
PercentPhy = mean(PercentAbund),
sd = sd(PercentAbund),
se = sd / sqrt(N))
abund <- subset(phy_stats,PercentPhy > 0.001) # Only take the phyla that are more than 0.01% abundant
abund$Phylum <- factor(abund$Phylum,
levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Firmicutes", "Chlorobi", "Armatimonadetes", "Acidobacteria", "Spirochaetae", "Candidate_division_OD1","NPL-UPA2", "Deinococcus-Thermus", "Candidate_division_OP3", "Epsilonproteobacteria", "TM6", "TA18", "Chlamydiae", "Fibrobacteres", "Candidate_division_SR1", "Candidate_division_BRC1", "BD1-5", "Candidate_division_WS3", "Tenericutes", "Elusimicrobia", "Gemmatimonadetes", "WCHB1-60", "Deferribacteres", "Fusobacteria", "Candidate_division_TM7", "SPOTSOCT00m83", "Candidate_division_OP8", "Thermotogae", "Candidate_division_OP11", "Dictyoglomi", "SM2F11", "unclassified"))
abund$Phylum = with(abund, factor(Phylum, levels = rev(levels(Phylum))))
#Relative abundance plot
ggplot(abund, aes(y=PercentPhy , x=Phylum)) +
geom_bar(stat="identity", position=position_dodge(), fill = "cornflowerblue", colour = "black") +
theme_bw() + ggtitle("Phyla Above 0.1% in All Samples") +
xlab("Phylum") + ylab("Mean Relative Abundance (%)") +
geom_errorbar(aes(ymin = PercentPhy -se, ymax = PercentPhy +se), width = 0.25) +
coord_flip()
# sub_phy_melt_totals # This data frame has no Wintergreen hypo or Mixed lake samples!
## Calculate the Phylum abundance based on the ProdLevel and the filter fraction.
prod_phylum_FWDB_stats <- ddply(sub_phy_melt_totals, c("ProdLevel","quadrant", "Phylum"), summarise,
N = length(PercentAbund),
mean_abundance = mean(PercentAbund),
sd = sd(PercentAbund),
se = sd / sqrt(N))
### Only the phylum
abund_only_by_phylum <- ddply(prod_phylum_FWDB_stats, c("Phylum"), summarise,
N = length(mean_abundance),
phylum_mean = mean(mean_abundance))
abund_only_by_phylum <-arrange(abund_only_by_phylum, desc(phylum_mean))
# Create a vector of the phyla in the order that we would like
phy_order <- as.character(abund_only_by_phylum$Phylum)
### TOP 25 PHYLA
top15phy <- prod_phylum_FWDB_stats[prod_phylum_FWDB_stats$Phylum %in% phy_order[1:17], ]
phy15_order <- as.character(abund_only_by_phylum$Phylum)[1:17]
#### Make sure there's no sherman lake in this part of the analysis
good_phylum_with_sher <-tax_glom(nowin_FWDB_scaled,taxrank = "Phylum")
phy_melt_with_sher <- psmelt(good_phylum_with_sher)
good_phylum_no_sher <-tax_glom(nosherwin_FWDB_scaled,taxrank = "Phylum")
######################################################## DESeq on the six habitats
########################################################### HIGH vs LOWnutrient lakes
DESeq_prodLevel <- deSEQ(good_phylum_no_sher, ~ ProdLevel)
########################################################### Epilimnion vs Hypolimnion
DESeq_limnion <- deSEQ(good_phylum_no_sher, ~ limnion)
########################################################### Particle vs Free
DESeq_filter <- deSEQ(good_phylum_no_sher, ~ filter)
DESeq_hab1 <- subset(DESeq_filter, select = c(Phylum, log2FoldChange, padj)) %>%
mutate(Habitat = "PA vs. FL",
sig_lab = "*")
DESeq_hab2 <- subset(DESeq_limnion, select = c(Phylum, log2FoldChange, padj)) %>%
mutate(Habitat = "Top vs. Bottom",
sig_lab = "*")
DESeq_hab3 <- subset(DESeq_prodLevel, select = c(Phylum, log2FoldChange, padj)) %>%
mutate(Habitat = "Prod vs. UnProd",
log2FoldChange = log2FoldChange *-1,
sig_lab = "*")#
# Prep the data frame
bicompare_phy <- subset(phy_melt_with_sher, select = c("Sample", "filter", "ProdLevel", "limnion","quadrant", "Phylum","Abundance"))
## Phylum Total for each sample
bi_TOTALS <- ddply(bicompare_phy, c("Sample"), summarise,
total = sum(Abundance))
### Merge phylum sums and the total numbers -> so we can calculate the Relative Abundance
bicompare_totals <- merge(bicompare_phy, bi_TOTALS, by = "Sample")
bicompare_phy$RelAbundance <- bicompare_phy$Abundance/bicompare_totals$total ## Calculate the relative abundance
bicompare_phy$PercentAbund <- bicompare_phy$RelAbundance * 100 ## Calculate the Percent Abundance
## Only provide top 17 phyla
bicompare_phy <- bicompare_phy[bicompare_phy$Phylum %in% phy_order[1:17], ]
bicompare_phy$Phylum <- factor(bicompare_phy$Phylum,
levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria", "Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria", "Spirochaetae"))
## PROD LEVEL
bicompare_phy$ProdLevel <- as.character(bicompare_phy$ProdLevel)
bicompare_phy$ProdLevel[bicompare_phy$ProdLevel == "Productive"] <- "High-Nutrient"
bicompare_phy$ProdLevel[bicompare_phy$ProdLevel == "Unproductive"] <- "Low-Nutrient"
# FILTER
bicompare_phy$filter <- as.character(bicompare_phy$filter)
bicompare_phy$filter[bicompare_phy$filter == "Free"] <- "Free-Living"
bicompare_phy$filter[bicompare_phy$filter == "Particle"] <- "Particle-Associated"
bicompare_phy$ProdLevel<- factor(bicompare_phy$ProdLevel,levels = c("Low-Nutrient", "High-Nutrient"))
bicompare_phy_nowin <- subset(bicompare_phy, Sample != "WINH" & Sample != "WINH3um")
bicompare_phy_nosherwin <- subset(bicompare_phy_nowin, Sample != "SHEE" & Sample != "SHEH" &
Sample != "SHEE3um" & Sample != "SHEH3um")
##### Prepare the data frame for significance values for ProdLevel
filter_phy_boxplot <- full_join(bicompare_phy_nosherwin, DESeq_hab1, by = "Phylum")
filter_phy_boxplot <- filter_phy_boxplot[filter_phy_boxplot$Phylum %in% phy_order[1:17], ]
## Order the phyla to match original data frame
filter_phy_boxplot$Phylum <- factor(filter_phy_boxplot$Phylum,
levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria", "Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria", "Spirochaetae"))
g1 <- ggplot(filter_phy_boxplot, aes(y=PercentAbund , x=Phylum, fill=filter)) +
ggtitle("") + xlab("") + geom_boxplot() + ylab("Mean Percent \nRelative Abundance") +
scale_fill_manual(name = "Filter Fraction", breaks=c("Free-Living", "Particle-Associated"),
labels = c("Free-Living (21)","Particle-Associated (20)"),
values = c("orangered", "orangered4")) +
theme_bw() + geom_text(aes(label = sig_lab, x = Phylum, y = 50, color = "red"), size =4) +
scale_colour_hue(guide = "none") +
theme(axis.text.x = element_blank(),
plot.title = element_text(face="bold", size = 12),
axis.title.y = element_text(face="bold", size=10),
axis.ticks.x = element_blank(),
legend.title = element_text(size = 8, face="bold"), #Set the legend title
legend.text = element_text(size = 8),
plot.margin = unit(c(0.1, 0.1, -1, 0.1), "cm"), #top, right, bottom, left
legend.position = c(0.85, 0.67))
##### Prepare the data frame for significance values for ProdLevel
prodlevel_phy_boxplot <- full_join(bicompare_phy_nosherwin, DESeq_hab3, by = "Phylum")
prodlevel_phy_boxplot <- prodlevel_phy_boxplot[prodlevel_phy_boxplot$Phylum %in% phy_order[1:17], ]
## Order the phyla to match original data frame
prodlevel_phy_boxplot$Phylum <- factor(prodlevel_phy_boxplot$Phylum,
levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria", "Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria", "Spirochaetae"))
g2 <- ggplot(prodlevel_phy_boxplot, aes(y=PercentAbund , x=Phylum, fill=ProdLevel)) +
ggtitle("") + xlab("") + geom_boxplot() +
ylab("Mean Percent \nRelative Abundance") +
scale_fill_manual(name = "Productivity Level", breaks=c("Low-Nutrient", "High-Nutrient"),
labels = c("Low-Nutrient (15)","High-Nutrient (26)"),
values = c("cornflowerblue", "royalblue4")) +
theme_bw() + geom_text(aes(label = sig_lab, x = Phylum, y = 50, color = "red"), size =4) +
scale_colour_hue(guide = "none") +
theme(axis.text.x = element_blank(),
axis.title.y = element_text(face="bold", size=10),
axis.ticks.x = element_blank(),
legend.title = element_text(size = 8, face="bold"), #Set the legend title
legend.text = element_text(size = 8),
plot.margin = unit(c(-0.25, 0.1, -0.5, 0.1), "cm"), #top, right, bottom, left
legend.position = c(0.88, 0.67))
##### Prepare the data frame for significance values for ProdLevel
limnion_phy_boxplot <- full_join(bicompare_phy_nosherwin, DESeq_hab2, by = "Phylum")
limnion_phy_boxplot <- limnion_phy_boxplot[limnion_phy_boxplot$Phylum %in% phy_order[1:17], ]
## Order the phyla to match original data frame
limnion_phy_boxplot$Phylum <- factor(limnion_phy_boxplot$Phylum,
levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria", "Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria", "Spirochaetae"))
g3 <- ggplot(limnion_phy_boxplot, aes(y=PercentAbund , x=Phylum, fill=limnion)) +
ggtitle("") + xlab("") + geom_boxplot() + ylab("Mean Percent \nRelative Abundance") + xlab("Phylum") +
scale_fill_manual(name = "Lake Layer", breaks=c("Epilimnion", "Hypolimnion"),
labels = c("Epilimnion (19)","Hypolimnion (18)"),
values = c("limegreen", "darkgreen")) +
theme_bw() + geom_text(aes(label = sig_lab, x = Phylum, y = 50, color = "red"), size =4) +
scale_colour_hue(guide = "none") +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1, size = 8),
axis.title.x = element_text(face="bold", size=10),
axis.title.y = element_text(face="bold", size=10),
legend.title = element_text(size = 8, face="bold"), #Set the legend title
legend.text = element_text(size = 8), #Set the legend text
plot.margin = unit(c(-0.75, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
legend.position = c(0.88, 0.67));
# Store ggplot graphical parameters
g1 <- ggplotGrob(g1)
g2 <- ggplotGrob(g2)
g3 <- ggplotGrob(g3)
# Add columns to plots without legends
#ncol(g1); ncol(g2); ncol(g3)
#g1 <- gtable_add_cols(g1, g2$widths[6])
#### DRAW FIGURE 5 ### DRAW FIGURE 5 #### DRAW FIGURE 5 ### DRAW FIGURE 5 ####
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig4_phy17_comparison_sigs.pdf", width= 6.5, height=6.5)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig4_phy17_comparison_sigs.tiff", width= 6.5, height=6.5, units = "in", res = 175)
grid.draw(rbind(g1, g2, g3, size = "last")); #dev.off()
Figure 4: Phylum abundance of the six lake habitats of the 17 most abundant phyla and classes of Proteobacteria. Box and whisker plots of the phylum abundance across all samples grouped based on (top) filter fraction, (middle) nutrient level, or (bottom) lake layer. Numbers in parentheses within the legend represent sample sizes of each lake habitat. Red stars represent significant differentially abundant phyla between filter fraction, productivity level, or lake layers as calculated by DESeq.
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS4_mixed_green.pdf", width= 6.5, height=3)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS4_mixed_green.tiff", width= 6.5, height=3, units = "in", res = 175)
ggplot(bicompare_phy_nowin, aes(y=PercentAbund , x=Phylum, fill=limnion)) + #coord_cartesian(xlim = c(0, 30)) +
ggtitle("") + xlab("") + geom_boxplot() + ylab("Mean Percent \nRelative Abundance") + xlab("Phylum") +
scale_fill_manual(name = "Lake Strata", breaks=c("Epilimnion", "Hypolimnion", "Mixed"),
labels = c("Epilimnion (19)","Hypolimnion (18)", "Mixed (4)"),
values = c("limegreen", "darkgreen", "seagreen1")) +
theme_bw() + theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1, size = 8),
axis.title.x = element_text(face="bold", size=10),
axis.title.y = element_text(face="bold", size=10),
plot.title = element_text(face = "bold", size = 12),
legend.title = element_text(size = 8, face="bold"), #Set the legend title
legend.text = element_text(size = 8), #Set the legend text
plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
legend.position = c(0.88, 0.7)); #dev.off()
Figure S4: Phylum abundance of the 3 lake layer habitats, including the mixed lake, of the 17 most abundant phyla and classes of Proteobacteria. Box and whisker plots of the phylum abundance across the epilimnion, hypolimnion and mixed lake samples. Numbers in parentheses within the legend represent sample sizes of each lake habitat.
## Calculate the Phylum abundance based on the ProdLevel and the filter fraction.
prod_phylum_FWDB_stats <- ddply(sub_phy_melt_totals, c("ProdLevel","quadrant", "Phylum"), summarise,
N = length(PercentAbund),
mean_abundance = mean(PercentAbund),
sd = sd(PercentAbund),
se = sd / sqrt(N))
### Only the phylum
abund_only_by_phylum <- ddply(prod_phylum_FWDB_stats, c("Phylum"), summarise,
N = length(mean_abundance),
phylum_mean = mean(mean_abundance))
abund_only_by_phylum <-arrange(abund_only_by_phylum, desc(phylum_mean))
# Create a vector of the phyla we'd like
phy_order <- as.character(abund_only_by_phylum$Phylum)
### Top 17 most abundant phyla
top15phy <- prod_phylum_FWDB_stats[prod_phylum_FWDB_stats$Phylum %in% phy_order[1:17], ]
phy15_order <- as.character(abund_only_by_phylum$Phylum)[1:17]
## Put the phyla in the right order in terms of total relative abundance
top15phy$Phylum <- factor(top15phy$Phylum,
levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria", "Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria", "Spirochaetae"))
top15phy <- subset(top15phy, quadrant != "Free Mixed" & quadrant != "Particle Mixed")
top15phy$quadrant <- factor(top15phy$quadrant,
levels = c("Free Epilimnion", "Particle Epilimnion",
"Free Hypolimnion", "Particle Hypolimnion"))
top15phy$Phylum = with(top15phy, factor(Phylum, levels = rev(levels(Phylum))))
#### ADDING NO SPACES FOR FREE LIVING and ADDING 2 SPACE FOR PARICLE ASSOCIATED AT THE END
top15phy$quadrant <- as.character(top15phy$quadrant)
top15phy$quadrant[top15phy$quadrant=="Particle Epilimnion"] <- "Epilimnion "
top15phy$quadrant[top15phy$quadrant=="Free Epilimnion"] <- "Epilimnion"
top15phy$quadrant[top15phy$quadrant=="Particle Hypolimnion"] <- " Hypolimnion "
top15phy$quadrant[top15phy$quadrant=="Free Hypolimnion"] <- "Hypolimnion"
#### ADDING NO SPACES FOR PRODUCTIVE and ADDING 1 SPACE FOR UNPRODUCTIVE
top15phy$prod <- top15phy$ProdLevel
top15phy$prod <- as.character(top15phy$prod)
top15phy$prod[top15phy$prod =="Productive"] <- ""
top15phy$prod[top15phy$prod =="Unproductive"] <- " "
for(i in 1:length(top15phy$prod)){
top15phy$prod_quad [i]<-paste(as.character(top15phy$prod[i]),as.character(top15phy$quadrant[i]))}
top15phy$prod_quad <- factor(top15phy$prod_quad,
levels = c(" Epilimnion", " Hypolimnion", " Epilimnion ",
" Hypolimnion ", " Epilimnion", " Hypolimnion",
" Epilimnion ", " Hypolimnion "))
phy.colors.whew <- c(Acidobacteria = "grey26", Actinobacteria = "palevioletred2", Alphaproteobacteria = "steelblue4", Armatimonadetes = "red", Bacteroidetes = "darkorange","BD1-5" = "chartreuse", Betaproteobacteria = "royalblue", Caldiserica = "black","Candidate_division_BRC1" = "red","Candidate_division_JS1" = "aquamarine1","Candidate_division_OD1" = "#6DDE88", "Candidate_division_OP3" = "yellow1", "Candidate_division_OP8" = "goldenrod1", "Candidate_division_OP11" = "chocolate4","Candidate_division_SR1" = "tan3", "Candidate_division_TM7" = "skyblue1", "Candidate_division_WS3" = "magenta",Chlamydiae="violet", Chlorobi="cyan2", Chloroflexi="darkgreen", Cyanobacteria = "chartreuse3", Deferribacteres = "slateblue3", "Deinococcus-Thermus" = "violetred", Dictyoglomi = "cornsilk4", Deltaproteobacteria = "deepskyblue", Elusimicrobia = "violetred4", Epsilonproteobacteria = "lightskyblue", Fibrobacteres = "hotpink", Firmicutes = "blue4", FGL7S = "palevioletred1",Fusobacteria = "slateblue1", Gammaproteobacteria = "plum2", Gemmatimonadetes="black", GOUTA4 = "plum1", "Hyd24-12" = "sienna2", JTB23 = "seashell2",Lentisphaerae = "yellow1", "NPL-UPA2"="royalblue", OC31 = "mediumpurple4", Planctomycetes = "mediumorchid3", Proteobacteria = "deepskyblue","SHA-109" = "lightsalmon3", SM2F11 = "lightskyblue2", SPOTSOCT00m83 = "orangered",Spirochaetae = "gold3", Tenericutes="pink", Thermotogae = "chocolate1", TA06 = "lightslateblue",TA18 = "rosybrown3", TM6 = "olivedrab",unclassified = "grey", Verrucomicrobia = "purple4", "WCHB1-60" = "palegreen")
########################### Here we will use GTable to manually create the labels.
# Create the gtable object
top15plot <- ggplot(top15phy, aes(y=mean_abundance , x=Phylum, fill=Phylum)) + #coord_cartesian(xlim = c(0, 30)) +
guides(fill = guide_legend(reverse=TRUE)) + ggtitle("") +
geom_bar(stat="identity", position=position_dodge()) + #theme_classic() +
geom_bar(stat="identity", position=position_dodge(), colour = "black", show_guide = FALSE) +
facet_wrap(~ prod_quad, ncol = 8) + xlab("Top 15 Most Abundant Phyla") +
scale_fill_manual(values = phy.colors.whew,name="Phylum") +
geom_errorbar(aes(ymin = mean_abundance -se, ymax = mean_abundance +se), width = 0.25, color = "black") +
ylab("Mean Percent Relative Abundance (%)") + coord_flip() + theme_bw() +
guides(fill = guide_legend(keywidth = 0.7, keyheight = 0.7)) +
theme(plot.title = element_text(face="bold", size = 12), #Set the plot title
strip.text.x = element_text(size=8), #Set the facet titles on X
strip.text.y = element_text(size=8, face="bold"), #Set the facet titles Y
axis.title.x = element_text(face="bold", size=12), #Set the x-axis title
axis.title.y = element_text(face="bold", size=12), #Set the y-axis title
axis.text.x = element_text(colour = "black", size=8), #Set the x-axis labels
axis.text.y = element_text(colour = "black", size=8), #Set the y-axis labels
legend.title = element_text(size = 8, face="bold"), #Set the legend title
legend.text = element_text(size = 8), #Set the legend text
legend.position="right")
z <- ggplot_gtable(ggplot_build(top15plot))
# add label for top strip
z <- gtable_add_rows(z, z$heights[[3]], 2)
z <- gtable_add_grob(z, list(rectGrob(gp = gpar(col = NA, linetype = 1, fill = gray(0.7))),
textGrob("Free-Living", gp = gpar(fontsize = 8, col = "black"))),
3, 4, 3, 7, name = paste(runif(2)))
z <- gtable_add_grob(z, list(rectGrob(gp = gpar(col = NA, linetype = 1, fill = gray(0.7))),
textGrob("Particle-Associated", gp = gpar(fontsize = 8, col = "black"))),
3, 9, 3, 13, name = paste(runif(2)))
z <- gtable_add_grob(z, list(rectGrob(gp = gpar(col = NA, linetype = 1, fill = gray(0.6))),
textGrob("High-Nutrient", gp = gpar(fontsize = 8, col = "black"))),
2, 4, 2, 13, name = paste(runif(2)))
z <- gtable_add_grob(z, list(rectGrob(gp = gpar(col = NA, linetype = 1, fill = gray(0.7))),
textGrob("Free-Living", gp = gpar(fontsize = 8, col = "black"))),
3, 15, 3, 19, name = paste(runif(2)))
z <- gtable_add_grob(z, list(rectGrob(gp = gpar(col = NA, linetype = 1, fill = gray(0.7))),
textGrob("Particle-Associated", gp = gpar(fontsize = 8, col = "black"))),
3, 22, 3, 25, name = paste(runif(2)))
z <- gtable_add_grob(z, list(rectGrob(gp = gpar(col = NA, linetype=1, fill = gray(0.6))),
textGrob("Low-Nutrient", gp = gpar(fontsize = 8, col = "black"))),
2, 15, 2, 25, name = paste(runif(2)))
# add margins
z <- gtable_add_cols(z, unit(1/8, "line"), 7)
z <- gtable_add_rows(z, unit(1/8, "line"), 3)
##### Plotting FIGURE S3 ##### Plotting FIGURE S3 ##### Plotting FIGURE S3
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS5_phyabund_quadrants.pdf", width= 10, height=6)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS5_phyabund_quadrants.tiff", width= 10, height=6, units = "in", res = 175)
# draw it
grid.newpage()
grid.draw(z); #dev.off()
Figure S5: Mean percent relative abundance of the 17 most abundant phyla or classes of the Proteobacteria in each habitat within high- and low-nutrient lakes. Mean abundance of each phylum or class is plotted with error bars representing the standard error of the mean.
good_phylum <-tax_glom(nowin_FWDB_scaled,taxrank = "Phylum")
### Subsetting Sherman lake for differences between particle and free living
phy_sherm <- subset_samples(good_phylum, lakenames == "Sherman")
phy_shemeister <- deSEQ(phy_sherm, ~ filter)
########## SUBSET OUT SHERMAN AND WINTERGREEN HYPOLIMNION
good_phylum_nosherwin <- subset_samples(good_phylum, lakenames != "Sherman")
############################
########################################################### PA VS FL
#1. Top prod
phytopProd <- subset_samples(good_phylum_nosherwin, ProdLevel == "Productive" & limnion == "Epilimnion")
de_phytopProd<- deSEQ(phytopProd, ~ filter)
#2 Top Oligo --> No significant difference
#phytopUNProd <- subset_samples(good_phylum_nosherwin, ProdLevel == "Unproductive" & limnion == "Epilimnion")
#de_phytopUNProd <- deSEQ(phytopUNProd, ~ filter) ## No significant changes
#3 Bottom Productive
phybotProd <- subset_samples(good_phylum_nosherwin, ProdLevel == "Productive" & limnion == "Hypolimnion")
de_phybotProd <- deSEQ(phybotProd, ~ filter)
#4 Bottom Unproductive
phybotUNProd <- subset_samples(good_phylum_nosherwin, ProdLevel == "Unproductive" & limnion == "Hypolimnion")
de_phybotUNProd <- deSEQ(phybotUNProd, ~ filter)
############################
########################################################### EPILIMNION VS HYPOLIMNION
#1. PA prod
phyprodPA <- subset_samples(good_phylum_nosherwin, ProdLevel == "Productive" & filter == "Particle")
de_phyprodPA <- deSEQ(phyprodPA, ~ limnion)
#2 PA Oligo
phyunprodPA <- subset_samples(good_phylum_nosherwin, ProdLevel == "Unproductive" & filter == "Particle")
de_phyunprodPA <- deSEQ(phyunprodPA, ~ limnion)
#3 FL Productive
phyprodFL <- subset_samples(good_phylum_nosherwin, ProdLevel == "Productive" & filter == "Free")
de_phyprodFL <- deSEQ(phyprodFL, ~ limnion)
#4 FL Unproductive
phyunprodFL <- subset_samples(good_phylum_nosherwin, ProdLevel == "Unproductive" & filter == "Free")
de_phyunprodFL <- deSEQ(phyunprodFL, ~ limnion)
############################
########################################################### HIGH VS LOW-NUTRIENT
#1. Top PA --> No significant difference
#phytopPA <- subset_samples(good_phylum_nosherwin, limnion == "Epilimnion" & filter == "Particle") ## DOES NOT WORK!!!!!!!!!!
#de_phytopPA <- deSEQ(phytopPA, ~ ProdLevel) ## No significant changes
#2 BOTTOM PA
phybotPA <- subset_samples(good_phylum_nosherwin, limnion == "Hypolimnion" & filter == "Particle")
de_phybotPA <- deSEQ(phybotPA, ~ ProdLevel)
#3 TOP FL --> No significant difference
#phytopFL <- subset_samples(good_phylum_nosherwin, limnion == "Epilimnion" & filter == "Free")
#de_phytopFL <- deSEQ(phytopFL, ~ ProdLevel) ## No significant changes
#4 Bottom FL
phybotFL <- subset_samples(good_phylum_nosherwin, limnion == "Hypolimnion" & filter == "Free")
de_phybotFL <- deSEQ(phybotFL, ~ ProdLevel)
#PA VS FL:
#Surface productive
pafl_topprod <- subset(de_phytopProd, select = c(Phylum, log2FoldChange, padj))
pafl_topprod$Habitat <- "PA vs. FL: Top Productive"
# NOTHING SIGNIFICANT HERE: Surface unproductive
#pafl_topUNprod <- subset(de_phytopUNProd, select = c(Phylum, log2FoldChange, padj))
#pafl_topUNprod$Habitat <- "PA vs. FL: Top Unproductive"
#Bottom Productive
pafl_botprod <- subset(de_phybotProd, select = c(Phylum, log2FoldChange, padj))
pafl_botprod$Habitat <- "PA vs. FL: Bottom Productive"
#Bottom Unproductive
pafl_botUNprod <- subset(de_phybotUNProd, select = c(Phylum, log2FoldChange, padj))
pafl_botUNprod$Habitat <- "PA vs. FL: Bottom Unproductive"
## SHERMAN
pafl_isothermal <- subset(phy_shemeister, select = c(Phylum, log2FoldChange, padj))
pafl_isothermal$Habitat <- "PA vs. FL: Mixed"
###Top vs Bottom:
topbot1 <- subset(de_phyprodPA, select = c(Phylum, log2FoldChange, padj))
topbot1$Habitat <- "Top vs. Bottom: PA Productive"
# Particle-Associated UNproductive
topbot2 <- subset(de_phyunprodPA, select = c(Phylum, log2FoldChange, padj))
topbot2$Habitat <- "Top vs. Bottom: PA Unproductive"
# Free-Living Productive
topbot3 <- subset(de_phyprodFL, select = c(Phylum, log2FoldChange, padj))
topbot3$Habitat <- "Top vs. Bottom: FL Productive"
# Free-Living UNproductive
topbot4 <- subset(de_phyunprodFL, select = c(Phylum, log2FoldChange, padj))
topbot4$Habitat <- "Top vs. Bottom: FL Unproductive"
#Prod vs Oligo:
#troph1 <- subset(de_phytopPA, select = c(Phylum, log2FoldChange, padj)) ## No significant changes
#troph1$Habitat <- "Prod vs. Unprod: PA Epilimnion"
# Particle-Associated BOTTOM
troph2 <- subset(de_phybotPA, select = c(Phylum, log2FoldChange, padj))
troph2$Habitat <- "Prod vs. Unprod: PA Bottom"
# Free-Living TOP
#troph3 <- subset(de_phytopFL, select = c(Phylum, log2FoldChange, padj)) ## No significant changes
#troph3$Habitat <- "Prod vs. Unprod: FL Top"
# Free-Living BOTTOM
troph4 <- subset(de_phybotFL, select = c(Phylum, log2FoldChange, padj))
troph4$Habitat <- "Prod vs. Unprod: FL Bottom"
trophs <- rbind(troph2, troph4)
newlog2foldchange <- as.numeric(trophs$log2FoldChange * -1) ## To make high-nutrient to POSITIVE
trophs$log2FoldChange <- newlog2foldchange
df_ratio <-rbind(pafl_topprod, pafl_botprod, pafl_botUNprod,pafl_isothermal,
topbot1, topbot2, topbot3, topbot4,
trophs)
paste(c("The range of the log2foldChange is",min(df_ratio$log2FoldChange), "to", max(df_ratio$log2FoldChange)))
## [1] "The range of the log2foldChange is"
## [2] "-5.4994449553491"
## [3] "to"
## [4] "6.76043158519451"
#Split of the habitat column to 2 columns named comparison and habitat
split_cols <- colsplit(df_ratio$Habitat, ":", c("Comparison", "Habitat"))
df_ratio$Habitat = NULL
dfrat <- cbind(df_ratio, split_cols)
uncla <- subset(dfrat, Phylum == "unclassified")
dfrat$Phylum <- factor(dfrat$Phylum,
levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia","Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Chlorobi", "Armatimonadetes", "Firmicutes", "Acidobacteria", "Spirochaetae", "Candidate_division_OD1","NPL-UPA2", "Deinococcus-Thermus", "Candidate_division_OP3", "TM6", "Chlamydiae", "Epsilonproteobacteria", "TA18", "Fibrobacteres", "Candidate_division_SR1", "Candidate_division_BRC1", "BD1-5", "Gemmatimonadetes", "Fusobacteria", "Candidate_division_WS3", "Tenericutes", "Elusimicrobia", "WCHB1-60", "Deferribacteres", "Candidate_division_TM7", "Candidate_division_OP8", "SPOTSOCT00m83", "Thermotogae", "Candidate_division_OP11", "Dictyoglomi", "unclassified"))
dfrat$Phylum = with(dfrat, factor(Phylum, levels = rev(levels(Phylum))))
################################################################################# OVERALL ABUNDANCE PLOT
good_phylum_proteo <-tax_glom(nowin_FWDB_scaled,taxrank = "Phylum")
goodsamps_phylum <- tax_glom(good_phylum_proteo,taxrank = "Phylum")
goodsamps_phy_melt <- psmelt(goodsamps_phylum) ## Melt it into a dataframe
sub_goodsamps_phy_melt <- subset(goodsamps_phy_melt, select = c("Sample", "ProdLevel", "quadrant",
"Phylum","Abundance"))
## Calculate the total for each sample
TOTALS <- ddply(sub_goodsamps_phy_melt, c("Sample"), summarise,
total = sum(Abundance))
### Merge phylum sums and the total numbers -> so we can calculate the Relative Abundance
sub_phy_melt_totals <- merge(sub_goodsamps_phy_melt, TOTALS, by = "Sample")
## Calculate the relative abundance
sub_phy_melt_totals$RelAbundance <- sub_phy_melt_totals$Abundance/sub_phy_melt_totals$total
## Calculate the Percent Abundance
sub_phy_melt_totals$PercentAbund <- sub_phy_melt_totals$RelAbundance * 100
#### Make a new dataframe with the percent abudance within the entire dataset!
phy_stats <- ddply(sub_phy_melt_totals, c("Phylum"), summarise,
N = length(PercentAbund),
PercentPhy = mean(PercentAbund),
sd = sd(PercentAbund),
se = sd / sqrt(N))
abund <- subset(phy_stats,PercentPhy > 0.001) # Only take the phyla that are more than 0.01% abundant
abund_ordered <- arrange(abund, desc(PercentPhy))
abund$Phylum <- factor(abund$Phylum,levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Chlorobi", "Armatimonadetes", "Firmicutes", "Acidobacteria", "Spirochaetae", "Candidate_division_OD1","NPL-UPA2", "Deinococcus-Thermus", "Candidate_division_OP3", "TM6", "Chlamydiae", "Epsilonproteobacteria", "TA18", "Fibrobacteres", "Candidate_division_SR1", "Candidate_division_BRC1", "BD1-5", "Gemmatimonadetes", "Fusobacteria", "Candidate_division_WS3", "Tenericutes", "Elusimicrobia", "WCHB1-60", "Deferribacteres", "Candidate_division_TM7", "Candidate_division_OP8", "SPOTSOCT00m83", "Thermotogae", "Candidate_division_OP11", "Dictyoglomi", "unclassified"))
abund$Phylum = with(abund, factor(Phylum, levels = rev(levels(Phylum))))
#Relative abundance plot
abund_plot <- ggplot(abund, aes(y=PercentPhy , x=Phylum)) +
#geom_boxplot(fill = "magenta4", colour = "black") +
geom_bar(stat="identity", position=position_dodge(), fill = "magenta4", colour = "black") +
theme_bw() + ggtitle("Phyla Above 0.1% in All Samples") +
xlab("Phylum") + ylab("Mean Relative Abundance (%)") +
geom_errorbar(aes(ymin = PercentPhy -se, ymax = PercentPhy +se), width = 0.25) + coord_flip() +
theme(axis.title.x = element_text(face="bold", size=16),
axis.text.x = element_text(angle=0, colour = "black", size=14),
axis.text.y = element_text(colour = "black", size=14),
axis.title.y = element_text(face="bold", size=16),
plot.title = element_text(face="bold", size = 20),
legend.title = element_text(size=12, face="bold"),
legend.text = element_text(size = 12),
legend.position="none");
#################################################################################
#################################################################################
dfrat$Habitat <- as.character(dfrat$Habitat)
dfrat$Habitat[dfrat$Habitat == " Mixed"] <- "Mixed"
dfrat$Habitat[dfrat$Habitat == " Top Productive"] <- "Epilimnion \nHigh-Nutrient"
dfrat$Habitat[dfrat$Habitat == " Top Unproductive"] <- "Epilimnion \nLow-Nutrient"
dfrat$Habitat[dfrat$Habitat == " Bottom Productive"] <- "Hypolimnion \nHigh-Nutrient"
dfrat$Habitat[dfrat$Habitat == " Bottom Unproductive"] <- "Hypolimnion \nLow-Nutrient"
dfrat$Habitat[dfrat$Habitat == " PA Productive"] <- "High-Nutrient \nParticle-Associated"
dfrat$Habitat[dfrat$Habitat == " PA Unproductive"] <- "Low-Nutrient \nParticle-Associated"
dfrat$Habitat[dfrat$Habitat == " FL Productive"] <- "High-Nutrient \nFree-Living"
dfrat$Habitat[dfrat$Habitat == " FL Unproductive"] <- "Low-Nutrient \nFree-Living"
dfrat$Habitat[dfrat$Habitat == " PA Top"] <- "Particle-Associated \nEpilimnion"
dfrat$Habitat[dfrat$Habitat == " FL Top"] <- "Free-Living \nEpilimnion"
dfrat$Habitat[dfrat$Habitat == " FL Bottom"] <- "Free-Living \nHypolimnion"
dfrat$Habitat[dfrat$Habitat == " PA Bottom"] <- "Particle-Associated \nHypolimnion"
dfrat$Habitat <- factor(dfrat$Habitat,levels = c("Epilimnion \nHigh-Nutrient", "Hypolimnion \nHigh-Nutrient", "Mixed", "Epilimnion \nLow-Nutrient", "Hypolimnion \nLow-Nutrient","Particle-Associated \nHypolimnion", "Free-Living \nEpilimnion", "Free-Living \nHypolimnion", "High-Nutrient \nParticle-Associated", "High-Nutrient \nFree-Living", "Low-Nutrient \nParticle-Associated", "Low-Nutrient \nFree-Living"))
heat <- ggplot(dfrat, aes(Habitat, Phylum)) + geom_tile(aes(fill = log2FoldChange)) +
scale_fill_gradient2(name = "Odds\nRatio", mid = "gray", low = "darkorange", high = "blue4",
na.value = "white", guide = guide_colorbar(barwidth = 1.5, barheight = 4.5)) +
theme_bw(base_size = 12) + scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
ylab("Phylum") + xlab("Habitat") +
facet_grid(. ~ Comparison, scales = "free", space = "free", labeller=mf_labeller) +
theme(axis.text.x = element_text(colour="black", size=8, angle = 30, hjust = 1, vjust = 1),
axis.text.y = element_text(colour="black", vjust=0.5, size=8),
axis.title.x = element_text(face="bold", size=8),
legend.title = element_text(face="bold", size=6),
legend.text = element_text(size = 6),
legend.position = c(0.93, 0.14),#"left",
axis.title.y = element_text(face="bold", size=8),
plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
#plot.margin = unit(c(0.5, 1, 0.5, 0.5), "cm"), #top, right, bottom, left
strip.text.x = element_text(size=8, face = "bold", colour = "black"),
strip.background = element_blank());
# Discover the phyla that are in the abundance plot but NOT differentially abundant at the phylum level
#setdiff(abund$Phylum, dfrat$Phylum)
phylum_delete <- c("Candidate_division_OP11", "Candidate_division_OP8", "Candidate_division_TM7", "Dictyoglomi", "SPOTSOCT00m83", "Thermotogae", "TM6", "unclassified", "WCHB1-60")
subset_abundPhylum <- subset(abund, !(Phylum %in% phylum_delete))
#setdiff(subset_abundPhylum$Phylum, dfrat$Phylum) # Sanity check
#setdiff(dfrat$Phylum, subset_abundPhylum$Phylum) # Double sanity check
relabun_plot <- ggplot(subset_abundPhylum, aes(y=PercentPhy , x=Phylum)) + #coord_cartesian(xlim = c(0, 30)) +
geom_bar(stat="identity", position=position_dodge(),fill = "gray", colour = "black") +
ylab("Mean Percent \n Relative \n Abundance (%)") + coord_flip() + theme_bw() +
geom_errorbar(aes(ymin = PercentPhy -se, ymax = PercentPhy +se), width = 0.25, color = "black") +
scale_y_continuous(expand= c(0,0), limits = c(0,25)) +
theme(axis.title.x = element_text(face="bold", size=8),
axis.text.x = element_text(angle=0, colour = "black", size=8),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
strip.background = element_rect(colour="black", fill = "black"),
plot.margin = unit(c(1.15, 0.3, 1, -0.4), "cm"), #top, right, bottom, left
#plot.margin = unit(c(1.65, 2, 1.35, -1.35), "cm"), #top, right, bottom, left
legend.position="none"); #relabun_plot
##### Plotting FIGURE 5 ##### Plotting FIGURE 5 ##### Plotting FIGURE 5
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS6_phyheat.pdf", width= 7.5, height=6)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS6_phyheat.tiff", width= 7.5, height=6, units = "in", res = 175)
grid.newpage()
pushViewport(viewport(layout=grid.layout(1,2,width=c(0.83,0.17))))
print(heat, vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(relabun_plot, vp=viewport(layout.pos.row=1,layout.pos.col=2)); #dev.off()
Figure S6: Differentially abundant phyla across all specific lake habitats. (Left) Heat map of the significantly differentially abundant (based on log2-ratios of relative abundance data) bacterial phyla and classes of the Proteobacteria between filter fractions, nutrient levels, and lake layers (titles). Blue represents a significant differential abundance of a phylum or class in the particle-associated fraction, high-nutrient lakes, or hypolimnion. Yellow represents a significant differential abundance of a phylum or class in the free-living fraction, low-nutrient lakes, or epilimnion. Only significant phyla identified were included. (Right) Bar graph of the mean relative abundance of the bacterial phylum or class of the Proteobacteria across the entire data set sorted from high to low relative abundance. Error bars represent the standard error of the mean.
sherm <- subset_samples(nowin_FWDB_scaled, lakenames == "Sherman")
shemeister <- deSEQ(sherm, ~ filter, cutoff = 0, alpha = 0.01)
########## SUBSET OUT SHERMAN (mixed) Lake
good_samps_nosherwin <- subset_samples(nowin_FWDB_scaled, lakenames != "Sherman")
############################
########################################################### PA VS FL
#1. Top prod --> Model fit not typical
topProd <- subset_samples(good_samps_nosherwin, ProdLevel == "Productive" & limnion == "Epilimnion")
de_topProd <- deSEQ(topProd, ~ filter, cutoff = 0, alpha = 0.01) # fitType = "local"
#2 Top Oligo --> No significant difference
topUNProd <- subset_samples(good_samps_nosherwin, ProdLevel == "Unproductive" & limnion == "Epilimnion")
#de_topUNProd <- deSEQ(topUNProd, ~ filter, cutoff = 0, alpha = 0.01) # No significant difference
#3 Bottom Productive
botProd <- subset_samples(good_samps_nosherwin, ProdLevel == "Productive" & limnion == "Hypolimnion")
de_botProd <- deSEQ(botProd, ~ filter, cutoff = 0, alpha = 0.01)
#4 Bottom Unproductive
botUNProd <- subset_samples(good_samps_nosherwin, ProdLevel == "Unproductive" & limnion == "Hypolimnion")
de_botUNProd <- deSEQ(botUNProd, ~ filter, cutoff = 0, alpha = 0.01)
############################
########################################################### EPILIMNION VS HYPOLIMNION
#1. Top prod
prodPA <- subset_samples(good_samps_nosherwin, ProdLevel == "Productive" & filter == "Particle")
de_prodPA <- deSEQ(prodPA, ~ limnion, cutoff = 0, alpha = 0.01)
#2 Top Oligo
unprodPA <- subset_samples(good_samps_nosherwin, ProdLevel == "Unproductive" & filter == "Particle")
de_unprodPA <- deSEQ(unprodPA, ~ limnion, cutoff = 0, alpha = 0.01)
#3 Bottom Productive
prodFL <- subset_samples(good_samps_nosherwin, ProdLevel == "Productive" & filter == "Free")
de_prodFL <- deSEQ(prodFL, ~ limnion, cutoff = 0, alpha = 0.01)
#4 Bottom Unproductive
unprodFL <- subset_samples(good_samps_nosherwin, ProdLevel == "Unproductive" & filter == "Free")
de_unprodFL <- deSEQ(unprodFL, ~ limnion, cutoff = 0, alpha = 0.01)
############################
########################################################### HIGH- VS LOW-NUTRIENT
#1. Top prod
topPA <- subset_samples(good_samps_nosherwin, limnion == "Epilimnion" & filter == "Particle")
de_topPA <- deSEQ(topPA, ~ ProdLevel, cutoff = 0, alpha = 0.01)
#2 Top Oligo
botPA <- subset_samples(good_samps_nosherwin, limnion == "Hypolimnion" & filter == "Particle")
de_botPA <- deSEQ(botPA, ~ ProdLevel, cutoff = 0, alpha = 0.01)
#3 Bottom Productive
topFL <- subset_samples(good_samps_nosherwin, limnion == "Epilimnion" & filter == "Free")
de_topFL <- deSEQ(topFL, ~ ProdLevel, cutoff = 0, alpha = 0.01)
#4 Bottom Unproductive
botFL <- subset_samples(good_samps_nosherwin, limnion == "Hypolimnion" & filter == "Free")
de_botFL <- deSEQ(botFL, ~ ProdLevel, cutoff = 0, alpha = 0.01)
#PA VS FL:
#Surface Productive
pafl1 <- subset(de_topProd, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
pafl1$Habitat <- "PA vs. FL: Epilimnion \nHigh-Nutrient"
#Bottom High-Nutrient
pafl2 <- subset(de_botProd, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
pafl2$Habitat <- "PA vs. FL: Epilimnion \nLow-Nutrient"
#Surface Low-Nutrient --> No significant difference
#pafl3 <- subset(de_topUNProd, select = c(Phylum, Genus, Species,OTU, log2FoldChange, padj))
#pafl3$Habitat <- "PA vs. FL: Hypolimnion \nHigh-Nutrient"
#Bottom Low-Nutrient
pafl4 <- subset(de_botUNProd, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
pafl4$Habitat <- "PA vs. FL: Hypolimnion \nLow-Nutrient"
###Top vs Bottom:
otu_topbot1 <- subset(de_prodPA, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_topbot1$Habitat <- "Top vs. Bottom: High-Nutrient \n Particle-Associated"
# Particle-Associated Low-Nutrient
otu_topbot2 <- subset(de_unprodPA, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_topbot2$Habitat <- "Top vs. Bottom: Low-Nutrient \n Particle-Associated"
# Free-Living High-Nutrient
otu_topbot3 <- subset(de_prodFL, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_topbot3$Habitat <- "Top vs. Bottom: High-Nutrient \n Free-Living"
# Free-Living Low-Nutrient
otu_topbot4 <- subset(de_unprodFL, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_topbot4$Habitat <- "Top vs. Bottom: Low-Nutrient \n Free-Living"
#Prod vs Oligo:
otu_troph1 <- subset(de_topPA, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_troph1$Habitat <- "Prod vs. Unprod: Particle-Associated \n Epilimnion"
# Particle-Associated BOTTOM
otu_troph2 <- subset(de_botPA, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_troph2$Habitat <- "Prod vs. Unprod: Particle-Associated \n Hypolimnion"
# Free-Living TOP
otu_troph3 <- subset(de_topFL, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_troph3$Habitat <- "Prod vs. Unprod: Free-Living \nEpilimnion"
# Free-Living BOTTOM
otu_troph4 <- subset(de_botFL, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_troph4$Habitat <- "Prod vs. Unprod: Free-Living \nHypolimnion"
###### Combing all into one dataframe named out_ratio
otu_trophs <- rbind(otu_troph1, otu_troph2, otu_troph3, otu_troph4)
newlog2foldchange <- as.numeric(otu_trophs$log2FoldChange * -1) # To correlate with Top vs bottom
otu_trophs$log2FoldChange <- newlog2foldchange
otu_ratio <-rbind(pafl1, pafl2, pafl4,
otu_topbot1, otu_topbot2, otu_topbot3, otu_topbot4,
otu_trophs) #otu_troph1, otu_troph2, otu_troph3, otu_troph4)
#What's the min and max ratios?
paste(c("The range of the log2foldChange is", min(otu_ratio$log2FoldChange), "to",
max(otu_ratio$log2FoldChange)))
## [1] "The range of the log2foldChange is"
## [2] "-10.6132280899782"
## [3] "to"
## [4] "11.8805004022321"
#Split of the habitat column to 2 columns named comparison and habitat
otu_cols <- colsplit(otu_ratio$Habitat, ":", c("Comparison", "Habitat"))
otu_ratio$Habitat = NULL
otu_ratios <- cbind(otu_ratio, otu_cols)
######## Summed-OTU Plot
### PA (positive) vs FL (negative)
### Top (negative) vs Bottom (positive)
### Prod (positive) vs Unprod (negative)
#head(otu_ratios)
#subset out prod vs unprod
sig_oturats <- otu_ratios
pvu <- subset(sig_oturats, Comparison == "Prod vs. Unprod")
tvb <- subset(sig_oturats, Comparison == "Top vs. Bottom")
pvf <- subset(sig_oturats, Comparison == "PA vs. FL")
#### High vs Low nutrient
pvu$Preference <- "NA"
pvu_high <- filter(pvu, log2FoldChange > 0)
pvu_high$Preference <- "High-Nutrient" # High-Nutrient is POSITIVE
pvu_low <- filter(pvu, log2FoldChange < 0)
pvu_low$Preference <- "Low-Nutrient" # Low-Nutrient is NEGATIVE
pvu_pref <- rbind(pvu_low, pvu_high) %>% # Combine the PA and FL data frames
group_by(Phylum, Preference) %>% # group by preference and phylum
tally() # count how many in each
pvu_pref$Comparison <- "Prod vs. Unprod"
# Add a new column where the Low nutrient values are negative
pvu_pref$Count <- ifelse(pvu_pref$Preference == "Low-Nutrient", pvu_pref$n*-1, pvu_pref$n)
#### Top (Epilimnion) vs Bottom (Hypolimnion)
tvb$Preference <- "NA"
tvb_bottom <- filter(tvb, log2FoldChange > 0)
tvb_bottom$Preference <- "Hypolimnion" # Hypolimnion is POSITIVE
tvb_top <- filter(tvb, log2FoldChange < 0)
tvb_top$Preference <- "Epilimnion" # Epilimnion is NEGATIVE
tvb_pref <- rbind(tvb_top, tvb_bottom) %>% # Combine the PA and FL data frames
group_by(Phylum, Preference) %>% # group by preference and phylum
tally() # count how many in each
tvb_pref$Comparison <- "Top vs. Bottom"
# Add a column where the Epilimnion values are negative in a new column name count
tvb_pref$Count <- ifelse(tvb_pref$Preference == "Epilimnion", tvb_pref$n*-1, tvb_pref$n)
# Add a new column called count
### Particle vs Free living
pvf$Preference <- "NA"
pvf_particle <- filter(pvf, log2FoldChange > 0)
pvf_particle$Preference <- "Particle-Associated"
pvf_free <- filter(pvf, log2FoldChange < 0)
pvf_free$Preference <- "Free-Living"
pvf_pref <- rbind(pvf_free, pvf_particle) %>% # Combine the PA and FL data frames
group_by(Phylum, Preference) %>% # group by preference and phylum
tally() # count how many in each
pvf_pref$Comparison <- "PA vs. FL"
# Add a new column called "count" where if the count is free-living we make it negative
pvf_pref$Count <- ifelse(pvf_pref$Preference == "Free-Living", pvf_pref$n*-1, pvf_pref$n)
# Combine into one data frame
otu_preference <- data.frame(rbind(pvf_pref, pvu_pref, tvb_pref))
# Determine factor order
otu_preference$Preference <- factor(otu_preference$Preference, levels=c("Particle-Associated","Free-Living",
"Low-Nutrient","High-Nutrient",
"Epilimnion", "Hypolimnion"))
otu_preference$Phylum <- factor(otu_preference$Phylum,
levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia","Betaproteobacteria","Actinobacteria", "Planctomycetes","Alphaproteobacteria","Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae","Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria","Spirochaetae","Candidate_division_OD1","NPL-UPA2","Deinococcus-Thermus","Candidate_division_OP3","Epsilonproteobacteria", "TM6", "TA18","Chlamydiae","Fibrobacteres", "Candidate_division_SR1","Candidate_division_BRC1", "BD1-5", "Candidate_division_WS3","Tenericutes", "Elusimicrobia","Gemmatimonadetes","WCHB1-60","Fusobacteria", "Deferribacteres","Candidate_division_TM7","Candidate_division_OP8", "SPOTSOCT00m83", "Thermotogae","Candidate_division_OP11","Dictyoglomi", "SM2F11", "Hyd24-12", "Nitrospirae","SHA-109","TA06", "OC31", "GOUTA4", "Caldiserica","Candidate_division_JS1","FGL7S", "unclassified"))
otu_preference$Comparison <- factor(otu_preference$Comparison,
levels = c("PA vs. FL", "Prod vs. Unprod","Top vs. Bottom"))
OTUtop16 <- c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria",
"Planctomycetes", "Alphaproteobacteria", "Deltaproteobacteria","Gammaproteobacteria",
"Chloroflexi", "Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria",
"Spirochaetae", "unclassified")
summed_16 <- otu_preference[otu_preference$Phylum %in% OTUtop16, ]
summed_16$Phylum = with(summed_16, factor(Phylum, levels = rev(levels(Phylum))))
##### Plotting FIGURE S8 ##### Plotting FIGURE S8 ##### Plotting FIGURE S8
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS8_summed_ALL_otus.pdf", width= 7.5, height=6)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS8_summed_ALL_otus.tiff", width= 7.5, height=6, units = "in", res = 175)
ggplot(summed_16, aes(y=Count, x=Phylum, fill=Preference)) +
geom_bar(stat="identity", position="identity") + coord_flip() + #ggtitle("Summed OTUs") +
geom_bar(stat="identity", colour = "black", show_guide = FALSE, position="identity") +
theme_gray() + scale_y_continuous(breaks=seq(-30, 200, 30)) +
ggtitle("All OTUs in the Data") +
facet_grid(. ~ Comparison, scales = "free_x", labeller = mf_labeller) + theme_bw() +
ylab("Total Number of Significant OTUs") + xlab("Phylum") +
scale_fill_manual(name = "", limits=c("Particle-Associated","Free-Living","Low-Nutrient",
"High-Nutrient","Epilimnion", "Hypolimnion"),
values = c( "orangered", "orangered4", "cornflowerblue", "royalblue4",
"limegreen", "darkgreen")) +
guides(fill = guide_legend(keywidth = 0.75, keyheight = 0.75)) +
theme(axis.title.x = element_text(face="bold", size=12),
axis.text.x = element_text(colour = "black", size=8, hjust = 1, vjust=1, angle = 30),
axis.text.y = element_text(colour = "black", size=8),
axis.title.y = element_text(face="bold", size=12),
legend.title = element_blank(),
legend.text = element_text(size = 8),
strip.text.y = element_blank(),
strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
strip.background = element_blank(),
plot.title = element_text(size = 12, face = "bold", colour = "black"),
legend.position="right"); #dev.off()
Figure S8: Significant differentially abundant OTUs in the 17 most abundant phyla and classes of Proteobacteria. Total number of OTUs that were significantly differentially abundant between filter fractions, nutrient levels, and lake layers. For each comparison, the number of differentially abundant OTUs in each habitat combination was summed.
# Get the significant OTUs
# summed relative to its abundance
### Free-living
sigotu_free_unique <- left_join(pvf_free, otu_abund_free, by = "OTU") %>%
left_join(phy_abund_free, by = "Phylum") %>%
select(Preference, Phylum, Genus, Species, OTU, Comparison, OTU_sum, phylum_sum) %>% # Select columns
mutate(OTU_rel_abund_byphy = OTU_sum/phylum_sum) %>% # Calculate OTU relative abundance by phylum
distinct() %>% # Take only the rows that are unique (no repeat of OTUs)
group_by(Phylum) %>%
summarize(sig_phylum_relabund = sum(OTU_rel_abund_byphy) *-1) %>%
mutate(Comparison = "PA vs. FL",
Preference = "Free-Living")
# Particle-Associated
sigotu_particle_unique <- left_join(pvf_particle, otu_abund_particle, by = "OTU") %>%
left_join(phy_abund_particle, by = "Phylum") %>%
select(Preference, Phylum, Genus, Species, OTU, Comparison, OTU_sum, phylum_sum) %>% # Select columns
mutate(OTU_rel_abund_byphy = OTU_sum/phylum_sum) %>% # Calculate OTU relative abundance by phylum
distinct() %>% # Take only the rows that are unique (no repeat of OTUs)
group_by(Phylum) %>%
summarize(sig_phylum_relabund = sum(OTU_rel_abund_byphy))%>%
mutate(Comparison = "PA vs. FL",
Preference = "Particle-Associated")
# Epilimnion
sigotu_epilimnion_unique <- left_join(tvb_top, otu_abund_epi, by = "OTU") %>%
left_join(phy_abund_epi, by = "Phylum") %>%
select(Preference, Phylum, Genus, Species, OTU, Comparison, OTU_sum, phylum_sum) %>% # Select columns
mutate(OTU_rel_abund_byphy = OTU_sum/phylum_sum) %>% # Calculate OTU relative abundance by phylum
distinct() %>% # Take only the rows that are unique (no repeat of OTUs)
group_by(Phylum) %>%
summarize(sig_phylum_relabund = sum(OTU_rel_abund_byphy) * -1) %>%
mutate(Comparison = "Top vs. Bottom",
Preference = "Epilimnion")
# Hypolimnion
sigotu_hypolimnion_unique <- left_join(tvb_bottom, otu_abund_hypo, by = "OTU") %>%
left_join(phy_abund_hypo, by = "Phylum") %>%
select(Preference, Phylum, Genus, Species, OTU, Comparison, OTU_sum, phylum_sum) %>% # Select columns
mutate(OTU_rel_abund_byphy = OTU_sum/phylum_sum) %>% # Calculate OTU relative abundance by phylum
distinct() %>% # Take only the rows that are unique (no repeat of OTUs)
group_by(Phylum) %>%
summarize(sig_phylum_relabund = sum(OTU_rel_abund_byphy)) %>%
mutate(Comparison = "Top vs. Bottom",
Preference = "Hypolimnion")
# High-Nutrient
sigotu_high_nutrient_unique <- left_join(pvu_high, otu_abund_high, by = "OTU") %>%
left_join(phy_abund_high, by = "Phylum") %>%
select(Preference, Phylum, Genus, Species, OTU, Comparison, OTU_sum, phylum_sum) %>% # Select columns
mutate(OTU_rel_abund_byphy = OTU_sum/phylum_sum) %>% # Calculate OTU relative abundance by phylum
distinct() %>% # Take only the rows that are unique (no repeat of OTUs)
group_by(Phylum) %>%
summarize(sig_phylum_relabund = sum(OTU_rel_abund_byphy)) %>%
mutate(Comparison = "Prod vs. Unprod",
Preference = "High-Nutrient")
# Low-Nutrient
sigotu_low_nutrient_unique <- left_join(pvu_low, otu_abund_low, by = "OTU") %>%
left_join(phy_abund_low, by = "Phylum") %>%
select(Preference, Phylum, Genus, Species, OTU, Comparison, OTU_sum, phylum_sum) %>% # Select columns
mutate(OTU_rel_abund_byphy = OTU_sum/phylum_sum) %>% # Calculate OTU relative abundance by phylum
distinct() %>% # Take only the rows that are unique (no repeat of OTUs)
group_by(Phylum) %>%
summarize(sig_phylum_relabund = sum(OTU_rel_abund_byphy) *-1) %>%
mutate(Comparison = "Prod vs. Unprod",
Preference = "Low-Nutrient")
sig_otus_relabund_0 <- rbind(sigotu_free_unique, sigotu_particle_unique, sigotu_epilimnion_unique,
sigotu_hypolimnion_unique, sigotu_low_nutrient_unique, sigotu_high_nutrient_unique)
# Determine factor order
sig_otus_relabund_0$Preference <- factor(sig_otus_relabund_0$Preference,
levels = c("Particle-Associated","Free-Living",
"Low-Nutrient","High-Nutrient",
"Epilimnion", "Hypolimnion"))
sig_otus_relabund_0$Phylum <- factor(sig_otus_relabund_0$Phylum,
levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria","Actinobacteria", "Planctomycetes", "Alphaproteobacteria","unclassified", "Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi","Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi","Acidobacteria", "Spirochaetae", "Candidate_division_OD1","NPL-UPA2", "Deinococcus-Thermus", "Candidate_division_OP3","Epsilonproteobacteria", "TM6", "TA18", "Chlamydiae", "Fibrobacteres","Candidate_division_SR1", "Candidate_division_BRC1", "BD1-5","Candidate_division_WS3", "Tenericutes", "Elusimicrobia","Gemmatimonadetes", "WCHB1-60","Fusobacteria", "Deferribacteres","Candidate_division_TM7", "Candidate_division_OP8", "SPOTSOCT00m83","Thermotogae", "Candidate_division_OP11", "Dictyoglomi", "SM2F11","Hyd24-12", "Nitrospirae","SHA-109", "TA06", "OC31", "GOUTA4","Caldiserica", "Candidate_division_JS1", "FGL7S"))
sig_otus_relabund_0$Comparison <- factor(sig_otus_relabund_0$Comparison,
levels = c("PA vs. FL", "Prod vs. Unprod","Top vs. Bottom"))
# Reverse the order of the phyla so it plots logically
sig_otus_relabund_0$Phylum = with(sig_otus_relabund_0, factor(Phylum, levels = rev(levels(Phylum))))
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/SigOTUs_ALL_Relabund.pdf", width= 7.5, height=8)
sig_phy_relabund_plot <- ggplot(sig_otus_relabund_0, aes(y=sig_phylum_relabund, x=Phylum, fill=Preference)) +
geom_bar(stat="identity", position="identity") + coord_flip() + #ggtitle("Summed OTUs") +
geom_bar(stat="identity", colour = "black", show_guide = FALSE, position="identity") +
theme_gray() + #scale_y_continuous(breaks=seq(-30, 200, 30)) +
ggtitle("All Phyla and OTUs in the Data") +
facet_grid(. ~ Comparison, labeller = mf_labeller) + theme_bw() +
ylab("Within Phylum Relative Abundance of Significant OTUs") + xlab("Phylum") +
scale_fill_manual(name = "", limits=c("Free-Living","Particle-Associated","Low-Nutrient",
"High-Nutrient","Epilimnion", "Hypolimnion"),
values = c( "orangered", "orangered4", "cornflowerblue", "royalblue4",
"limegreen", "darkgreen")) +
guides(fill = guide_legend(keywidth = 0.75, keyheight = 0.75)) +
theme(axis.title.x = element_text(face="bold", size=12),
axis.text.x = element_text(colour = "black", size=8, hjust = 1, vjust=1, angle = 30),
axis.text.y = element_text(colour = "black", size=8),
axis.title.y = element_text(face="bold", size=12),
legend.title = element_blank(),
legend.text = element_text(size = 8),
strip.text.y = element_blank(),
strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
strip.background = element_blank(),
plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
plot.title = element_text(size = 12, face = "bold", colour = "black"),
legend.position="right"); #dev.off()
OTUtop16 <- c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria","Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria", "Spirochaetae")
sigotu_relabund_16 <- sig_otus_relabund_0[sig_otus_relabund_0$Phylum %in% OTUtop16, ]
# Reverse the order of the phyla so it plots logically
sig_otus_relabund_0$Phylum = with(sig_otus_relabund_0, factor(Phylum, levels = rev(levels(Phylum))))
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/SigOTUs_Top16_Relabund.pdf", width= 7.5, height=6)
sigotu_phy_relabund_plot <- ggplot(sigotu_relabund_16, aes(y=sig_phylum_relabund, x=Phylum, fill=Preference)) +
geom_bar(stat="identity", position="identity") + coord_flip() + #ggtitle("Summed OTUs") +
geom_bar(stat="identity", colour = "black", show_guide = FALSE, position="identity") +
theme_gray() + #scale_y_continuous(breaks=seq(-30, 200, 30)) +
#ggtitle("All OTUs in the Top 16 Phyla in the Data") +
facet_grid(. ~ Comparison, labeller = mf_labeller) + theme_bw() +
ylab("Within Phylum Relative Abundance of Significant OTUs") + xlab("Phylum") +
scale_fill_manual(name = "", limits=c("Free-Living","Particle-Associated","Low-Nutrient",
"High-Nutrient","Epilimnion", "Hypolimnion"),
labels = c("Free-Living (21)","Particle-Associated (20)","Low-Nutrient (15)",
"High-Nutrient (26)","Epilimnion (19)", "Hypolimnion (18)"),
values = c( "orangered", "orangered4", "cornflowerblue", "royalblue4",
"limegreen", "darkgreen")) +
guides(fill = guide_legend(keywidth = 1, keyheight = 1)) +
theme(axis.title.x = element_text(face="bold", size=12),
axis.text.x = element_text(colour = "black", size=8, hjust = 1, vjust=1, angle = 30),
axis.text.y = element_text(colour = "black", size=8),
axis.title.y = element_text(face="bold", size=12),
legend.title = element_blank(),
legend.text = element_text(size = 8),
strip.text.y = element_blank(),
strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
strip.background = element_blank(),
plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
plot.title = element_text(size = 12, face = "bold", colour = "black"),
legend.position="right"); #dev.off()
# Left Panel
#### Number of significant OTUs versus the total number of OTUs by habitat and phylum
# Calculate a data frame with a tally of the significant OTUs by habitat and by phylum
free_living_sig_otus <- pvf %>% # Make a data frame with significant OTUs unique to free living
filter(log2FoldChange < 0) %>%
select(Phylum, Genus, Species, OTU, Comparison) %>%
distinct() %>%
group_by(Phylum) %>%
tally() %>%
mutate(Preference = "Free-Living", Comparison = "PA vs. FL")
colnames(free_living_sig_otus)[2] <- "SigOTU_Total"
particle_associated_sig_otus <- pvf %>% # Make a data frame with significant OTUs unique to free living
filter(log2FoldChange > 0) %>%
select(Phylum, Genus, Species, OTU, Comparison) %>%
distinct() %>%
group_by(Phylum) %>%
tally() %>%
mutate(Preference = "Particle-Associated", Comparison = "PA vs. FL")
colnames(particle_associated_sig_otus)[2] <- "SigOTU_Total"
epilimnion_sig_otus <- tvb %>% # Make a data frame with significant OTUs unique to Epilimnion
filter(log2FoldChange < 0) %>%
select(Phylum, Genus, Species, OTU, Comparison) %>%
distinct() %>%
group_by(Phylum) %>%
tally() %>%
mutate(Preference = "Epilimnion", Comparison = "Top vs. Bottom")
colnames(epilimnion_sig_otus)[2] <- "SigOTU_Total"
hypolimnion_sig_otus <- tvb %>% # Make a data frame with significant OTUs unique to Hypolimnion
filter(log2FoldChange > 0) %>%
select(Phylum, Genus, Species, OTU, Comparison) %>%
distinct() %>%
group_by(Phylum) %>%
tally() %>%
mutate(Preference = "Hypolimnion", Comparison = "Top vs. Bottom")
colnames(hypolimnion_sig_otus)[2] <- "SigOTU_Total"
high_nutrient_sig_otus <- pvu %>% # Make a data frame with significant OTUs unique to high-nutrient
filter(log2FoldChange > 0) %>%
select(Phylum, Genus, Species, OTU, Comparison) %>%
distinct() %>%
group_by(Phylum) %>%
tally() %>%
mutate(Preference = "High-Nutrient", Comparison = "Prod vs. Unprod")
colnames(high_nutrient_sig_otus)[2] <- "SigOTU_Total"
low_nutrient_sig_otus <- pvu %>% # Make a data frame with significant OTUs unique to low-nutrient
filter(log2FoldChange < 0) %>%
select(Phylum, Genus, Species, OTU, Comparison) %>%
distinct() %>%
group_by(Phylum) %>%
tally() %>%
mutate(Preference = "Low-Nutrient", Comparison = "Prod vs. Unprod")
colnames(low_nutrient_sig_otus)[2] <- "SigOTU_Total"
### Need to add the taxonomy to the OTU information
nosherwin_FWDB_scaled <- subset_samples(nowin_FWDB_scaled, names != "SHEE" & names != "SHEH" &
names != "SHEE3um" & names != "SHEH3um")
nosherwin_FWDB_scaled <- prune_taxa(taxa_sums(nosherwin_FWDB_scaled) > 0, nosherwin_FWDB_scaled)
taxa <- data.frame(tax_table(nosherwin_FWDB_scaled)) %>%
select(Phylum, OTU)
free_living_all_otus <- inner_join(otu_abund_free_pruned, taxa, by = "OTU") %>%
select(-OTU_sum_Free) %>%
distinct() %>%
group_by(Phylum) %>%
tally()
colnames(free_living_all_otus)[2] <- "AllOTU_Total"
particle_associated_all_otus <- inner_join(otu_abund_particle_pruned, taxa, by = "OTU") %>%
select(-OTU_sum_Particle) %>%
distinct() %>%
group_by(Phylum) %>%
tally()
colnames(particle_associated_all_otus)[2] <- "AllOTU_Total"
epilimnion_all_otus <- inner_join(otu_abund_epi_pruned, taxa, by = "OTU") %>%
select(-OTU_sum_Epi) %>%
distinct() %>%
group_by(Phylum) %>%
tally()
colnames(epilimnion_all_otus)[2] <- "AllOTU_Total"
hypolimnion_all_otus <- inner_join(otu_abund_hypo_pruned, taxa, by = "OTU") %>%
select(-OTU_sum_Hypo) %>%
distinct() %>%
group_by(Phylum) %>%
tally()
colnames(hypolimnion_all_otus)[2] <- "AllOTU_Total"
high_nutrient_all_otus <- inner_join(otu_abund_high_pruned, taxa, by = "OTU") %>%
select(-OTU_sum_High) %>%
distinct() %>%
group_by(Phylum) %>%
tally()
colnames(high_nutrient_all_otus)[2] <- "AllOTU_Total"
low_nutrient_all_otus <- inner_join(otu_abund_low_pruned, taxa, by = "OTU") %>%
select(-OTU_sum_Low) %>%
distinct() %>%
group_by(Phylum) %>%
tally()
colnames(low_nutrient_all_otus)[2] <- "AllOTU_Total"
##################### Combine the taxonomic information with the significant OTUs
#### Multiply free-living, low-nutrient, and epilimnion samples by -1 for plotting
otu_join_free <- inner_join(free_living_sig_otus, free_living_all_otus, by = "Phylum") %>%
mutate(Proportion_sig2total = -1* (SigOTU_Total/AllOTU_Total))
otu_join_particle <- inner_join(particle_associated_sig_otus, particle_associated_all_otus, by = "Phylum") %>%
mutate(Proportion_sig2total = 1* (SigOTU_Total/AllOTU_Total))
otu_join_epi <- inner_join(epilimnion_sig_otus, epilimnion_all_otus, by = "Phylum") %>%
mutate(Proportion_sig2total = -1* (SigOTU_Total/AllOTU_Total))
otu_join_hypo <- inner_join(hypolimnion_sig_otus, hypolimnion_all_otus, by = "Phylum") %>%
mutate(Proportion_sig2total = 1* (SigOTU_Total/AllOTU_Total))
otu_join_high <- inner_join(high_nutrient_sig_otus, high_nutrient_all_otus, by = "Phylum") %>%
mutate(Proportion_sig2total = 1* (SigOTU_Total/AllOTU_Total))
otu_join_low <- inner_join(low_nutrient_sig_otus, low_nutrient_all_otus, by = "Phylum") %>%
mutate(Proportion_sig2total = -1* (SigOTU_Total/AllOTU_Total))
otu_joined <- rbind(otu_join_free, otu_join_particle, otu_join_epi, otu_join_hypo, otu_join_high, otu_join_low)
otu_joined$Preference <- factor(otu_joined$Preference,
levels = c("Particle-Associated","Free-Living",
"Low-Nutrient","High-Nutrient",
"Epilimnion", "Hypolimnion"))
otu_joined$Phylum <- factor(otu_joined$Phylum,
levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria","Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae","Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria", "Spirochaetae","Candidate_division_OD1","NPL-UPA2", "Deinococcus-Thermus","Candidate_division_OP3", "Epsilonproteobacteria", "TM6", "TA18", "Chlamydiae","Fibrobacteres", "Candidate_division_SR1", "Candidate_division_BRC1", "BD1-5","Candidate_division_WS3", "Tenericutes", "Elusimicrobia", "Gemmatimonadetes","WCHB1-60","Fusobacteria", "Deferribacteres", "Candidate_division_TM7","Candidate_division_OP8", "SPOTSOCT00m83", "Thermotogae", "Candidate_division_OP11", "Dictyoglomi", "SM2F11", "Hyd24-12","Nitrospirae","SHA-109", "TA06", "OC31", "GOUTA4", "Caldiserica","Candidate_division_JS1", "FGL7S"))
otu_joined$Comparison <- factor(otu_joined$Comparison,
levels = c("PA vs. FL", "Prod vs. Unprod","Top vs. Bottom"))
OTUtop16 <- c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria","Planctomycetes", "Alphaproteobacteria","unclassified", "Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria", "Spirochaetae")
otu_joined_16 <- otu_joined[otu_joined$Phylum %in% OTUtop16, ]
# Reverse the order of the phyla so it plots logically
otu_joined_16$Phylum = with(otu_joined_16, factor(Phylum, levels = rev(levels(Phylum))))
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/PROPORTION_Top16_Relabund.pdf", width= 7.5, height=6)
sigotu_proportion_plot <- ggplot(otu_joined_16, aes(y=Proportion_sig2total, x=Phylum, fill=Preference)) +
geom_bar(stat="identity", position="identity") + coord_flip() + #ggtitle("Summed OTUs") +
geom_bar(stat="identity", colour = "black", show_guide = FALSE, position="identity") +
theme_gray() + scale_y_continuous(breaks=seq(-0.5, 1, 0.1), lim = c(-0.1, 0.3)) +
#ggtitle("All OTUs in the Top 16 Phyla in the Data") +
facet_grid(. ~ Comparison, labeller = mf_labeller) + theme_bw() +
#scale_y_continuous(breaks=seq(-0.3, 1, 0.1)) +
ylab("Proportion of Significant to Total OTUs by Phylum") + xlab("Phylum") +
scale_fill_manual(name = "", limits=c("Free-Living","Particle-Associated","Low-Nutrient",
"High-Nutrient","Epilimnion", "Hypolimnion"),
labels = c("Free-Living (21)","Particle-Associated (20)","Low-Nutrient (15)",
"High-Nutrient (26)","Epilimnion (19)", "Hypolimnion (18)"),
values = c( "orangered", "orangered4", "cornflowerblue", "royalblue4",
"limegreen", "darkgreen")) +
guides(fill = guide_legend(keywidth = 1, keyheight = 1)) +
theme(axis.title.x = element_text(face="bold", size=12),
axis.text.x = element_text(colour = "black", size=8, hjust = 1, vjust=1, angle = 30),
axis.text.y = element_text(colour = "black", size=8),
axis.title.y = element_text(face="bold", size=12),
legend.title = element_blank(),
legend.text = element_text(size = 8),
strip.text.y = element_blank(),
strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
strip.background = element_blank(),
plot.title = element_text(size = 12, face = "bold", colour = "black"),
plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
legend.position="right"); #dev.off()
#### OVERLAP BETWEEN HYPO AND HIGH NUTRIENT
hypo_otus <- tvb %>% # Make a data frame with significant OTUs unique to Hypolimnion
filter(log2FoldChange > 0) %>%
select(Phylum, Genus, Species, OTU, Comparison) %>%
distinct()
high_otus <- pvu %>% # Make a data frame with significant OTUs unique to high-nutrient
filter(log2FoldChange > 0) %>%
select(Phylum, Genus, Species, OTU, Comparison) %>%
distinct()
nrow(hypo_otus) # Number of Sig OTUs in hypolimnion
## [1] 486
nrow(high_otus) # Number of sig OTUs in the high-nutrient lakes
## [1] 258
shared_hypo_high <- inner_join(hypo_otus, high_otus, by = "OTU")
nrow(shared_hypo_high) # Number of sig OTUs that overlap between hypo & high-nutrient
## [1] 250
# Which OTUs are significant in productive lakes that are also NOT significant in the hypolimnion?
# There's only 8!
anti_join(high_otus, hypo_otus, by = "OTU")
## Phylum Genus Species OTU Comparison
## 1 Betaproteobacteria Iodobacter <NA> Otu00442 Prod vs. Unprod
## 2 Bacteroidetes unclassified <NA> Otu00240 Prod vs. Unprod
## 3 Verrucomicrobia unclassified <NA> Otu00077 Prod vs. Unprod
## 4 Gammaproteobacteria unclassified <NA> Otu00210 Prod vs. Unprod
## 5 Verrucomicrobia unclassified unclassified Otu00053 Prod vs. Unprod
## 6 Chloroflexi unclassified <NA> Otu00598 Prod vs. Unprod
## 7 Verrucomicrobia unclassified <NA> Otu00005 Prod vs. Unprod
## 8 Bacteroidetes unclassified unclassified Otu00168 Prod vs. Unprod
# COMBINE THE TWO PLOTS
b1 <- sigotu_proportion_plot + theme(legend.position="none",
plot.margin = unit(c(0.1, 0.05, 0.1, 0.1), "cm"))
b2 <- sigotu_phy_relabund_plot + xlab("") +theme(axis.text.y = element_blank(),
plot.margin = unit(c(0.1, 0.1, 0.1, 0.05), "cm"))
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig5_proportion_relabund_Top16.pdf", width= 12, height=6)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig5_proportion_relabund_Top16.tiff", width= 12, height=6, units = "in", res = 175)
grid.newpage()
pushViewport(viewport(layout=grid.layout(1,2,width=c(0.46,0.54))))
print(b1, vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(b2, vp=viewport(layout.pos.row=1,layout.pos.col=2)); #dev.off()
Figure 5: Significant differentially abundant OTUs in the 17 most abundant phyla and classes of Proteobacteria. (Left) Proportion of the total number of OTUs that were significantly differentially abundant divided by the total number of OTUs present within each phylum and habitat. (Right) Within-phylum relative abundance of the significant OTUs within each phylum and habitat. Numbers in parentheses within the legend represent sample sizes of each lake habitat.
######## GENUS LEVEL PLOT ORGANIZED BY PHYLUM
## GENUS LEVEL HEAT PLOT
sub_abund <- subset(abund, select = c("Phylum", "PercentPhy"))
oturats_abund <- merge(otu_ratios, sub_abund, by = "Phylum")
ordered_otu_ratios <- arrange(oturats_abund, desc(PercentPhy))
# manually place order in the next line to make sure the Genera are in the right order
#unique(ordered_otu_ratios$Genus)
ordered_otu_ratios$Genus <- factor(ordered_otu_ratios$Genus,
levels = c("unclassified", "bacIV-B", "vadinBC27_wastewater-sludge_group", "bacII-A","Paludibacter","bacIV-A","bacIII-A", "bacI-A", "Emticicia", "bacVI-B","Solitalea", "bacIII-B", "Synechococcus", "Anabaena","Snowella","Planktothrix","Pseudanabaena","Luteolibacter","CandidatusXiphinematobacter", "Pnec","betIV-A", "betI-A", "Dechloromonas","Iodobacter", "betVII-B", "Sterolibacterium", "Sideroxydans","Sulfuritalea","PRD01a011B", "Candidatus_Branchiomonas", "betIII-A", "Nitrosospira","acIV-A", "acIV-D", "acTH1-A", "acI-A", "acI-B", "Myco", "acI-C", "Luna1-A", "acSTL-A", "Pirellula", "Candidatus_Nostocoida", "CL500-3","Planctomyces", "Candidatus_Anammoximicrobium", "Pir4_lineage","Phenylobacterium", "Rickettsia", "Azospirillum", "alfI-B", "alfII-A","Haematobacter", "Magnetospirillum", "Rubellimicrobium", "Roseomonas","Rhizomicrobium", "alfIV-A", "Roseospirillum","OM27_clade", "Desulfomonile","Peredibacter", "Desulfatirhabdium","Syntrophus", "Desulfocapsa", "Sorangium", "Desulfobulbus", "Smithella", "Desulfobacula", "Phaselicystis","Desulfovibrio", "Methylobacter", "Thiocystis", "Coxiella", "Crenothrix","Thiodictyon", "Lamprocystis", "Candidatus_Competibacter", "Oscillochloris","Chloronema", "Anaerolinea", "Leptolinea", "Victivallis","Chlorobium", "Armatimonas", "Incertae_Sedis", "Fastidiosipila","Fonticella", "Clostridium_sensu_stricto_9", "Anaerofustis", "Geothrix","Treponema", "Spirochaeta", "Leptospira", "Brevinema", "Sulfurospirillum","Sulfurimonas", "possible_genus_03", "Gemmatimonas", "SC103"))
ordered_otu_ratios$Genus = with(ordered_otu_ratios, factor(Genus, levels = rev(levels(Genus))))
sub_ordered_oturats <- subset(ordered_otu_ratios, Genus != "unclassified")
##### Plotting FIGURE S7 ##### Plotting FIGURE S7 ##### Plotting FIGURE S7
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig.S7_genus_heat.pdf", width= 7.5, height=11)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig.S7_genus_heat.tiff", width= 7.5, height=11, units = "in", res = 175)
ggplot(sub_ordered_oturats, aes(Habitat, Genus)) + geom_tile(aes(fill = log2FoldChange)) +
scale_fill_gradient2(name = "Odds-\nRatio", mid = "gray", low = "darkorange", high = "blue4",
na.value = "white", guide = guide_colorbar(barwidth = 1, barheight = 4.5)) +
theme_bw(base_size = 12) + scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) + ylab(NULL) +
xlab("Habitat") + ylab("Genus or Freshwater Tribe") +
facet_grid(. ~ Comparison, scales = "free", space = "free", labeller=mf_labeller) +
theme(axis.title.x = element_text(face="bold", size=12),
axis.text.x = element_text(colour = "black", size=8, hjust = 1, vjust=1, angle = 30),
axis.text.y = element_text(colour = "black", size=8),
axis.title.y = element_text(face="bold", size=12),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8),
strip.text.y = element_blank(),
strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
strip.background = element_blank(),
legend.position = c(0.94, 0.07), #c(0.1, 0.93),
plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm")); #dev.off()
Figure S7: Differentially abundant Genera or Freshwater Tribes, based on the average Log2-ratio of significant OTUs, across lake habitats. Heat map of the significantly differentially abundant (based on log2-ratios of relative abundance data within each genera) bacterial OTUs within each genera between filter fractions, nutrient levels, and lake layers (titles). Blue represents a significant differential abundance of a genus or freshwater tribe in the particle-associated fraction, high-nutrient lakes, or hypolimnion. Yellow represents a significant differential abundance of a genus in the free-living fraction, low-nutrient lakes, or epilimnion. Only habitats with significant differentially abundant OTUs identified were included. Only genera or freshwater tribes with significant differentially abundant OTUs within the dataset are included.
# Let's see what our alpha diversity looks like for a rarefied data set using the summed replicate samples.
#FW_sum_reps # Raw Merged Samples, we have 14,294 OTUs
# Fix the metadata from merge_samples function
#sample_data(FW_sum_reps) <- sample_data(FWDB_sum_scale_matround)
# Remove the 2 wintergreen samples that are actually from the metalimnion
#FW_sum_reps_nowin <- subset_samples(FW_sum_reps, names != "WINH" & names != "WINH3um")
#FW_sum_reps_nowin <- prune_taxa(taxa_sums(FW_sum_reps_nowin) > 0, FW_sum_reps_nowin)
# Now, we have 14,105 OTUs !
#Data Import for rarefied data
#FW_sum_reps_nowin
#sum_reps_nowin_min_minus1 <- min(sample_sums(FW_sum_reps_nowin)) - 1 # 14,924
# The following code was developed by Michelle Berry:
# Rarefy to 14924 reads with replacement 100 times to estimate species richness
# Since we are rarefying to 14924 reads we need to remove samples with less than 14924 reads
#raredata14924 <- prune_samples(sample_sums(FW_sum_reps_nowin) > sum_reps_nowin_min_minus1, FW_sum_reps_nowin)
# There should still be 41 samples
# Initialize matrices to store richness and evenness estimates
#richness <- matrix(nrow = 41,ncol = 100) # Store richness: We have 41 samples
#row.names(richness) <- sample_names(raredata14924)
#evenness <- matrix(nrow = 41,ncol = 100) #Store evenness
#row.names(evenness) <- sample_names(raredata14924)
# We want to be reproducible - so let's set the seed.
#set.seed(15879966) # Same seed as in the first chunk
# For 100 replications, rarefy the OTU table to 14854 reads and store the richness and evennes estimates in our 2 matrices we just created. The default for the rarefy_even_depth command is to pick with replacement so I set it to false. Picking without replacement is more computationally intensive
#for (i in 1:100) {
# r <- rarefy_even_depth(raredata14924, sample.size = sum_reps_nowin_min_minus1, verbose = FALSE, replace = FALSE)
# rich <- as.numeric(as.matrix(estimate_richness(r, measures="Observed")))
# richness[,i] <- rich
# even <- as.numeric(as.matrix(estimate_richness(r, measures="InvSimpson")))
# evenness[,i] <- even
#}
#write.table(richness, "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/Nov_alpha_data/ObservedRichness100_summed",
# row.names = TRUE)
#write.table(evenness, "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/Nov_alpha_data/InvSimpson100_summed",
# row.names = TRUE)
richness_summed <- read.table(
"~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/Nov_alpha_data/ObservedRichness100_summed", header = TRUE)
evenness_summed <- read.table(
"~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/Nov_alpha_data/InvSimpson100_summed", header = TRUE)
# Create a new matrix to hold the means and standard deviations of all the richness_summed estimates
rich_stats_summed <- matrix(nrow= nrow(richness_summed), ncol=1)
rich_stats_summed[,1] <- apply(richness_summed, 1, mean)
#rich_stats_summed[,2] = apply(richness_summed, 1, sd)
rich_stats_summed = data.frame(row.names(richness_summed), rich_stats_summed)
colnames(rich_stats_summed) <- c("samples","mean_value")
rich_stats_summed$Test <- "Observed Richness"
# Create a new matrix to hold the means and standard deviations of the evenness_summed estimates
even_stats_summed <- matrix(nrow = nrow(evenness_summed), ncol = 1)
even_stats_summed[,1] <- apply(evenness_summed, 1, mean)
#even_stats[,2] = apply(evenness_summed, 1, sd)
even_stats_summed <- data.frame(row.names(evenness_summed), even_stats_summed)
colnames(even_stats_summed) <- c("samples","mean_value")
even_stats_summed$Test <- "Inverse Simpson"
### Add SIMPSON'S EVENNESS!
simps_even_summed <- data.frame(matrix(nrow = nrow(evenness_summed), ncol = 3))
colnames(simps_even_summed) = c("samples","mean_value", "Test")
simps_even_summed$samples <- even_stats_summed$samples
simps_even_summed$mean_value <- even_stats_summed$mean_value/rich_stats_summed$mean_value
simps_even_summed$Test <- "Simpson's Evenness"
#Combine the rich.stats and the even.stats dataframes
alpha_stats_summed <- rbind(rich_stats_summed, even_stats_summed, simps_even_summed)
alpha_stats_summed$names <- alpha_stats_summed$samples
alpha_stats_summed <- makeCategories_dups(alpha_stats_summed)
alpha_stats_summed$ProdLevel <- as.character(alpha_stats_summed$trophicstate)
alpha_stats_summed$ProdLevel[alpha_stats_summed$trophicstate == "Eutrophic"] <- "Productive"
alpha_stats_summed$ProdLevel[alpha_stats_summed$trophicstate == "Mesotrophic"] <- "Productive"
alpha_stats_summed$ProdLevel[alpha_stats_summed$trophicstate == "Oligotrophic"] <- "Unproductive"
alpha_stats_summed$ProdLevel[alpha_stats_summed$lakenames == "Sherman"] <- "Mixed"
alpha_stats_summed$trophicstate[alpha_stats_summed$lakenames == "Sherman"] <- "Mixed"
#### Add all information together!
for(i in 1:length(alpha_stats_summed$limnion)){
alpha_stats_summed$prod_lim[i]<-paste(as.character(alpha_stats_summed$ProdLevel[i]),
as.character(alpha_stats_summed$limnion[i]),
as.character(alpha_stats_summed$filter[i]))}
summed_stats <- alpha_stats_summed %>%
group_by(prod_lim, Test) %>%
summarize(mean = mean(mean_value),
SE = se(mean_value),
SD = sd(mean_value),
median = median(mean_value))
final_summed_alpha_stats <- left_join(alpha_stats_summed, summed_stats, by = c("prod_lim", "Test"))
final_summed_alpha_stats$ProdLevel[final_summed_alpha_stats$trophicstate == "Eutrophic"] <- "High-Nutrient"
final_summed_alpha_stats$ProdLevel[final_summed_alpha_stats$trophicstate == "Mesotrophic"] <- "High-Nutrient"
final_summed_alpha_stats$ProdLevel[final_summed_alpha_stats$trophicstate == "Oligotrophic"] <- "Low-Nutrient"
final_summed_alpha_stats$ProdLevel <-factor(final_summed_alpha_stats$ProdLevel,
levels=c("High-Nutrient", "Low-Nutrient", "Mixed"))
final_summed_alpha_stats$Test <-factor(final_summed_alpha_stats$Test,
levels=c("Inverse Simpson", "Simpson's Evenness","Observed Richness"))
final_summed_alpha_stats$prod_lim <-factor(final_summed_alpha_stats$prod_lim,levels=c("Productive Epilimnion Free", "Productive Epilimnion Particle","Productive Hypolimnion Free","Productive Hypolimnion Particle","Unproductive Epilimnion Free", "Unproductive Epilimnion Particle", "Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle","Mixed Mixed Particle", "Mixed Mixed Free"))
rare_alpha_stats <- final_summed_alpha_stats %>%
group_by(Test) %>%
summarize(max = max(mean_value),
min = min(mean_value))
final_summed_alpha_stats_plot <-
ggplot(final_summed_alpha_stats, aes(x = prod_lim, y = mean_value, color = prod_lim)) +
geom_point(size = 3, alpha = 0.7) +
ggtitle("Sub-Sampled at 14,924 Sequences") + theme_bw() + xlab("Habitat") + ylab("") +
geom_errorbar(aes(ymin= mean-SD, ymax=mean+SD), color = "black", width=.1, position=position_dodge(.9)) +
layer(data = final_summed_alpha_stats, mapping = aes(x = prod_lim, y = median),
geom = "point", pch = 95, size = 7, color = "black") +
layer(data = final_summed_alpha_stats, mapping = aes(x = prod_lim, y = mean),
geom = "point", pch = 126, size = 7, color = "black") +
facet_grid(Test ~ ProdLevel, scales="free", space="free_x") +
scale_color_manual(name = "", limits=c("Productive Epilimnion Particle", "Productive Epilimnion Free",
"Productive Hypolimnion Particle", "Productive Hypolimnion Free",
"Unproductive Epilimnion Particle", "Unproductive Epilimnion Free",
"Unproductive Hypolimnion Particle", "Unproductive Hypolimnion Free",
"Mixed Mixed Particle", "Mixed Mixed Free"),
values = c("gray60", "gray60", "gray60", "gray60","gray60","gray60",
"gray60","gray60", "gray60", "gray60"))+
scale_x_discrete(breaks=c("Productive Epilimnion Free", "Productive Epilimnion Particle",
"Productive Hypolimnion Free","Productive Hypolimnion Particle",
"Unproductive Epilimnion Free", "Unproductive Epilimnion Particle",
"Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle",
"Mixed Mixed Particle", "Mixed Mixed Free"),
labels=c("Epilimnion \nFree-Living", "Epilimnion \nParticle-Associated", "Hypolimnion \nFree-Living", "Hypolimnion \nParticle-Associated","Epilimnion \nFree-Living", "Epilimnion \nParticle-Associated", "Hypolimnion \nFree-Living", "Hypolimnion \nParticle-Associated","Particle-Associated", "Free-Living")) +
theme(axis.title.x = element_text(face="bold", size=12),
axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=8),
axis.text.y = element_text(colour = "black", size=8),
axis.title.y = element_text(face="bold", size=12),
plot.title = element_text(face="bold", size = 12),
strip.text.x = element_text(size=10, face="bold"),
strip.text.y = element_text(size=10, face="bold"),
legend.title = element_text(size=8, face="bold"),
legend.text = element_text(size = 8),
plot.margin = unit(c(0.1, -0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
strip.background = element_blank(),
legend.position="none")
### Using the sum and scaled data
## remove the 2 wintergreen "hypolimnion" samples
nowin_FWDB_scaled # 8,823 OTUs
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8823 taxa and 41 samples ]
## sample_data() Sample Data: [ 41 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 8823 taxa by 9 taxonomic ranks ]
names <- sample_names(nowin_FWDB_scaled)
sum_scaled_matround_rich <- as.numeric(as.matrix(estimate_richness(nowin_FWDB_scaled, measures="Observed")))
sum_scaled_matround_even <- as.numeric(as.matrix(estimate_richness(nowin_FWDB_scaled, measures="InvSimpson")))
sum_scaled_matround_simpseven <- sum_scaled_matround_even/sum_scaled_matround_rich
#Create new data frame
sum_scaled_matround_alpha <- data.frame(matrix(ncol = 0, nrow = length(names)))
sum_scaled_matround_alpha$names <- names
sum_scaled_matround_alpha$ObsRichness <- sum_scaled_matround_rich
sum_scaled_matround_alpha$InvSimpson <- sum_scaled_matround_even
sum_scaled_matround_alpha$Simps_Even <- sum_scaled_matround_simpseven
# Reshape the data
sum_scaled_matround_alpha <- gather(sum_scaled_matround_alpha, "Test", "Value", 2:4)
sum_scaled_matround_alpha <- makeCategories_dups(sum_scaled_matround_alpha)
sum_scaled_matround_alpha$ProdLevel <- as.character(sum_scaled_matround_alpha$trophicstate)
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$trophicstate == "Eutrophic"] <- "Productive"
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$trophicstate == "Mesotrophic"] <- "Productive"
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$trophicstate == "Oligotrophic"] <- "Unproductive"
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$lakenames == "Sherman"] <- "Mixed"
sum_scaled_matround_alpha$trophicstate[sum_scaled_matround_alpha$lakenames == "Sherman"] <- "Mixed"
# Change test
sum_scaled_matround_alpha$Test <- as.character(sum_scaled_matround_alpha$Test)
sum_scaled_matround_alpha$Test[sum_scaled_matround_alpha$Test == "ObsRichness"] <- "Observed Richness"
sum_scaled_matround_alpha$Test[sum_scaled_matround_alpha$Test == "InvSimpson"] <- "Inverse Simpson"
sum_scaled_matround_alpha$Test[sum_scaled_matround_alpha$Test == "Simps_Even"] <- "Simpson's Evenness"
for(i in 1:length(sum_scaled_matround_alpha$limnion)){
sum_scaled_matround_alpha$prod_lim[i]<-paste(as.character(sum_scaled_matround_alpha$ProdLevel[i]),
as.character(sum_scaled_matround_alpha$limnion[i]),
as.character(sum_scaled_matround_alpha$filter[i]))}
# Calculate summary statistics
sum_scaled_matround_prod_stats <- sum_scaled_matround_alpha %>%
group_by(prod_lim, Test) %>%
summarize(mean = mean(Value),
SE = se(Value),
SD = sd(Value),
median = median(Value))
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$trophicstate == "Eutrophic"] <- "High-Nutrient"
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$trophicstate == "Mesotrophic"] <- "High-Nutrient"
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$trophicstate == "Oligotrophic"] <- "Low-Nutrient"
final_sum_scaled_matround_prod_stats <- left_join(sum_scaled_matround_alpha, sum_scaled_matround_prod_stats,
by = c("prod_lim", "Test"))
final_sum_scaled_matround_prod_stats$ProdLevel <-factor(final_sum_scaled_matround_prod_stats$ProdLevel,
levels=c("High-Nutrient", "Low-Nutrient", "Mixed"))
final_sum_scaled_matround_prod_stats$Test <-factor(final_sum_scaled_matround_prod_stats$Test,
levels=c("Inverse Simpson", "Simpson's Evenness",
"Observed Richness"))
final_sum_scaled_matround_prod_stats$prod_lim <-factor(final_sum_scaled_matround_prod_stats$prod_lim,
levels=c("Productive Epilimnion Free", "Productive Epilimnion Particle","Productive Hypolimnion Free","Productive Hypolimnion Particle","Unproductive Epilimnion Free", "Unproductive Epilimnion Particle", "Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle","Mixed Mixed Particle", "Mixed Mixed Free"))
scaled_alpha_stats <- final_sum_scaled_matround_prod_stats %>%
group_by(Test) %>%
summarize(max = max(Value),
min = min(Value))
sum_scaled_matround_prod_alpha_plot <-
ggplot(final_sum_scaled_matround_prod_stats, aes(x = prod_lim, y = Value, color = prod_lim)) +
geom_point(size = 3, alpha = 0.7) +
ggtitle("Scaled to 14,925 Sequences and Rounded") + theme_bw() + xlab("Habitat") + ylab("") +
geom_errorbar(aes(ymin= mean-SD, ymax=mean+SD), color = "black", width=.1, position=position_dodge(.9)) +
layer(data = final_sum_scaled_matround_prod_stats, mapping = aes(x = prod_lim, y = median),
geom = "point", pch = 95, size = 7, color = "black") +
layer(data = final_sum_scaled_matround_prod_stats, mapping = aes(x = prod_lim, y = mean),
geom = "point", pch = 126, size = 7, color = "black") +
facet_grid(Test ~ ProdLevel, scales="free", space="free_x") +
scale_color_manual(name = "", limits=c("Productive Epilimnion Particle", "Productive Epilimnion Free",
"Productive Hypolimnion Particle", "Productive Hypolimnion Free",
"Unproductive Epilimnion Particle", "Unproductive Epilimnion Free",
"Unproductive Hypolimnion Particle", "Unproductive Hypolimnion Free",
"Mixed Mixed Particle", "Mixed Mixed Free"),
values = c("gray60", "gray60", "gray60", "gray60","gray60",
"gray60","gray60","gray60", "gray60", "gray60"))+
scale_x_discrete(breaks=c("Productive Epilimnion Free", "Productive Epilimnion Particle",
"Productive Hypolimnion Free","Productive Hypolimnion Particle",
"Unproductive Epilimnion Free", "Unproductive Epilimnion Particle",
"Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle",
"Mixed Mixed Particle", "Mixed Mixed Free"),
labels=c("Epilimnion \nFree-Living", "Epilimnion \nParticle-Associated",
"Hypolimnion \nFree-Living", "Hypolimnion \nParticle-Associated",
"Epilimnion \nFree-Living", "Epilimnion \nParticle-Associated",
"Hypolimnion \nFree-Living", "Hypolimnion \nParticle-Associated",
"Particle-Associated", "Free-Living")) +
theme(axis.title.x = element_text(face="bold", size=12),
axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=8),
axis.text.y = element_text(colour = "black", size=8),
axis.title.y = element_text(face="bold", size=12),
plot.title = element_text(face="bold", size = 12),
strip.text.x = element_text(size=10, face="bold"),
strip.text.y = element_text(size=10, face="bold"),
legend.title = element_text(size=8, face="bold"),
legend.text = element_text(size = 8),
strip.background = element_blank(),
plot.margin = unit(c(0.1, 0.1, 0.1, -0.1), "cm"), #top, right, bottom, left
legend.position="none")
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS3_alpha_diversity.pdf", width= 10.5, height=6)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS3_alpha_diversity.tiff", width= 10.5, height=6, units = "in", res = 175)
multiplot(final_summed_alpha_stats_plot, sum_scaled_matround_prod_alpha_plot, cols = 2); #dev.off()
Figure S3: Within-sample OTU richness and evenness impacted by (Left panel) sub-sampling (rarefy-ing) the data 100 times without replacement at 14,924 sequencing reads and (right panel) by scaling the reads to 14,925 sequencing reads as describe within the methods and Figure S2, right panel legend (sequencing depth range is 521 sequences). As the scaling method (right panel) is not the standard method for alpha diversity metrics, caution must be taken when interpreting the plot. Raw (gray points), mean (black tilde) and median (black horizontal bar) values and standard deviation (error bars) for (Top panel) the Inverse Simpson (Middle panel) Simpson’s measure of evenness and (Bottom panel) OTU richness within each lake habitat sampled.
# Provide the Session Information for reproducibility
devtools::session_info()
## setting value
## version R version 3.2.2 (2015-08-14)
## system x86_64, darwin13.4.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## tz America/Detroit
## date 2015-12-08
##
## package * version date source
## acepack 1.3-3.3 2014-11-24 CRAN (R 3.2.0)
## ade4 1.7-2 2015-04-14 CRAN (R 3.2.0)
## annotate 1.46.1 2015-07-11 Bioconductor
## AnnotationDbi 1.30.1 2015-04-26 Bioconductor
## ape * 3.3 2015-05-29 CRAN (R 3.2.0)
## assertthat 0.1 2013-12-06 CRAN (R 3.2.0)
## Biobase 2.28.0 2015-04-17 Bioconductor
## BiocGenerics * 0.14.0 2015-04-17 Bioconductor
## BiocParallel 1.2.22 2015-09-30 Bioconductor
## biom 0.3.12 2014-02-24 CRAN (R 3.2.0)
## Biostrings 2.36.4 2015-08-21 Bioconductor
## boot 1.3-17 2015-06-29 CRAN (R 3.2.2)
## chron 2.3-47 2015-06-24 CRAN (R 3.2.0)
## cluster 2.0.3 2015-07-21 CRAN (R 3.2.2)
## coda 0.18-1 2015-10-16 CRAN (R 3.2.0)
## codetools 0.2-14 2015-07-15 CRAN (R 3.2.2)
## colorspace 1.2-6 2015-03-11 CRAN (R 3.2.0)
## data.table * 1.9.6 2015-09-19 CRAN (R 3.2.0)
## DBI 0.3.1 2014-09-24 CRAN (R 3.2.0)
## deldir 0.1-9 2015-03-09 CRAN (R 3.2.0)
## DESeq2 * 1.8.2 2015-10-05 Bioconductor
## devtools 1.9.1 2015-09-11 CRAN (R 3.2.0)
## digest 0.6.8 2014-12-31 CRAN (R 3.2.0)
## dplyr * 0.4.3 2015-09-01 CRAN (R 3.2.0)
## evaluate 0.8 2015-09-18 CRAN (R 3.2.0)
## foreach 1.4.3 2015-10-13 CRAN (R 3.2.0)
## foreign 0.8-66 2015-08-19 CRAN (R 3.2.2)
## formatR 1.2.1 2015-09-18 CRAN (R 3.2.0)
## Formula 1.2-1 2015-04-07 CRAN (R 3.2.0)
## futile.logger 1.4.1 2015-04-20 CRAN (R 3.2.0)
## futile.options 1.0.0 2010-04-06 CRAN (R 3.2.0)
## genefilter 1.50.0 2015-04-17 Bioconductor
## geneplotter 1.46.0 2015-04-17 Bioconductor
## GenomeInfoDb * 1.4.3 2015-09-23 Bioconductor
## GenomicRanges * 1.20.8 2015-09-21 Bioconductor
## ggplot2 * 1.0.1 2015-03-17 CRAN (R 3.2.0)
## gridExtra 2.0.0 2015-07-14 CRAN (R 3.2.0)
## gtable * 0.1.2 2012-12-05 CRAN (R 3.2.0)
## Hmisc 3.17-0 2015-09-21 CRAN (R 3.2.0)
## htmltools 0.2.6 2014-09-08 CRAN (R 3.2.0)
## igraph 1.0.1 2015-06-26 CRAN (R 3.2.0)
## IRanges * 2.2.9 2015-10-02 Bioconductor
## iterators 1.0.8 2015-10-13 CRAN (R 3.2.0)
## knitr 1.11 2015-08-14 CRAN (R 3.2.2)
## labeling 0.3 2014-08-23 CRAN (R 3.2.0)
## lambda.r 1.1.7 2015-03-20 CRAN (R 3.2.0)
## lattice * 0.20-33 2015-07-14 CRAN (R 3.2.2)
## latticeExtra 0.6-26 2013-08-15 CRAN (R 3.2.0)
## lazyeval 0.1.10 2015-01-02 CRAN (R 3.2.0)
## LearnBayes 2.15 2014-05-29 CRAN (R 3.2.0)
## locfit 1.5-9.1 2013-04-20 CRAN (R 3.2.0)
## magrittr 1.5 2014-11-22 CRAN (R 3.2.0)
## maptools 0.8-37 2015-09-29 CRAN (R 3.2.0)
## MASS 7.3-44 2015-08-30 CRAN (R 3.2.0)
## Matrix 1.2-2 2015-07-08 CRAN (R 3.2.2)
## memoise 0.2.1 2014-04-22 CRAN (R 3.2.0)
## mgcv 1.8-8 2015-10-24 CRAN (R 3.2.0)
## multcompView * 0.1-7 2015-07-31 CRAN (R 3.2.0)
## multtest 2.24.0 2015-04-17 Bioconductor
## munsell 0.4.2 2013-07-11 CRAN (R 3.2.0)
## nlme 3.1-122 2015-08-19 CRAN (R 3.2.0)
## nnet 7.3-11 2015-08-30 CRAN (R 3.2.0)
## pander * 0.5.2 2015-05-18 CRAN (R 3.2.0)
## permute * 0.8-4 2015-05-19 CRAN (R 3.2.0)
## pgirmess * 1.6.2 2015-06-07 CRAN (R 3.2.0)
## phyloseq * 1.12.2 2015-04-27 Bioconductor
## plyr * 1.8.3 2015-06-12 CRAN (R 3.2.0)
## proto 0.3-10 2012-12-22 CRAN (R 3.2.0)
## R6 2.1.1 2015-08-19 CRAN (R 3.2.2)
## RColorBrewer 1.1-2 2014-12-07 CRAN (R 3.2.0)
## Rcpp * 0.12.1 2015-09-10 CRAN (R 3.2.0)
## RcppArmadillo * 0.6.100.0.0 2015-10-04 CRAN (R 3.2.0)
## reshape2 * 1.4.1 2014-12-06 CRAN (R 3.2.0)
## rgdal 1.0-4 2015-06-23 CRAN (R 3.2.0)
## rgeos 0.3-11 2015-05-29 CRAN (R 3.2.0)
## RJSONIO 1.3-0 2014-07-28 CRAN (R 3.2.0)
## rmarkdown 0.8.1 2015-10-10 CRAN (R 3.2.2)
## rpart 4.1-10 2015-06-29 CRAN (R 3.2.2)
## RSQLite 1.0.0 2014-10-25 CRAN (R 3.2.0)
## S4Vectors * 0.6.6 2015-09-21 Bioconductor
## scales * 0.3.0 2015-08-25 CRAN (R 3.2.2)
## sciplot * 1.1-0 2012-11-20 CRAN (R 3.2.0)
## sp 1.2-1 2015-10-18 CRAN (R 3.2.2)
## spdep 0.5-88 2015-03-17 CRAN (R 3.2.0)
## splancs 2.01-38 2015-09-29 CRAN (R 3.2.0)
## stringi 1.0-1 2015-10-22 CRAN (R 3.2.0)
## stringr 1.0.0 2015-04-30 CRAN (R 3.2.0)
## survival 2.38-3 2015-07-02 CRAN (R 3.2.2)
## tidyr * 0.3.1 2015-09-10 CRAN (R 3.2.0)
## vegan * 2.3-1 2015-09-25 CRAN (R 3.2.0)
## XML 3.98-1.3 2015-06-30 CRAN (R 3.2.0)
## xtable 1.7-4 2014-09-12 CRAN (R 3.2.0)
## XVector 0.8.0 2015-04-17 Bioconductor
## yaml 2.1.13 2014-06-12 CRAN (R 3.2.0)
## zlibbioc 1.14.0 2015-04-17 Bioconductor