Phylogenetic conservation of freshwater lake habitat preference varies between abundant bacterioplankton phyla

Accepted by Environmental Microbiology on November 19th, 2015

Link to the article: http://onlinelibrary.wiley.com/doi/10.1111/1462-2920.13143/abstract

Set the Global Knitr Options

# Set the global options
knitr::opts_chunk$set(echo= TRUE, eval = TRUE, fig.width=6, fig.height=5, fig.align = 'center', 
                      warning=FALSE, message=FALSE)
# Set the seed
set.seed(15879966)

Load in R-Packages and Set the Working Environment

# Load packages
necessary_packages <-  c("phyloseq", "ggplot2", "ape", "vegan", "plyr", "scales", "grid", "reshape2", 
                         "data.table","DESeq2", "dplyr","tidyr", "sciplot","scales", "gtable","pgirmess",
                         "multcompView", "pander")

# Install the necessary packages
packages <- lapply(necessary_packages, library, character.only = TRUE)

# Set the working Directory
setwd("~/Final_PAFL_Trophicstate")

### Source written functions in the file Functions_PAFL.R that is housed within the "Final_PAFL_Trophicstate"
source("Functions_PAFL.R")

Set the Plotting and Table Options

### Set our theme for our plots down the road
theme_set(theme_bw() + theme(plot.title = element_text(face="bold", size = 12),  #Set the plot title
                                 strip.text.x = element_text(size=10, face="bold"),  #Set Y facet titles 
                                 strip.text.y = element_text(size=10, face="bold"),  #Set X facet titles 
                                 strip.background = element_blank(),  #No facet background
                                 axis.title.x = element_text(face="bold", size=12),  #Set the x-axis title
                                 axis.title.y = element_text(face="bold", size=12),  #Set the y-axis title
                                 axis.text.x = element_text(colour = "black", size=8),  #Set the x-axis labels
                                 axis.text.y = element_text(colour = "black", size=8),  #Set the y-axis labels
                                 legend.title = element_text(size = 8, face="bold"),  #Set the legend title 
                                 legend.text = element_text(size = 8),  #Set the legend text
                                 legend.position="right", #Default the legend position to the right
                                 plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"))) #top, right, bottom, left
  
##  Do not show scientific notation 
options(scipen = 999, digits = 7)

## Set pander (table) output functions 
panderOptions('table.split.table', Inf)
panderOptions('table.style', 'rmarkdown')

Import Data & Remove Non-Bacterial and Chloroplasts Sequences

# Load in the shared file from the mothur FWDB analysis
FWWDB_shared <- "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/SoMiLakes_copy_FWDB.files.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.an.unique_list.shared"
# Load in the taxonomy file from the mothur FWDB analysis
FWDB_tax <- "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/SoMiLakes_copy_FWDB.files.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.an.unique_list.0.03.cons.taxonomy"
#Create FWDB phyloseq object
FWDB_data <- import_mothur(mothur_shared_file = FWWDB_shared, mothur_constaxonomy_file = FWDB_tax )

# Change the taxonomy names
#length(colnames(tax_table(FWDB_data))) # 8 columns 
colnames(tax_table(FWDB_data)) <- c("Kingdom","Phylum","Class","Order","Family","Genus","Species", "Strain")
#sum(row.names(otu_table(FWDB_data)) == row.names(tax_table(FWDB_data))) # OTU names are the same in the tax and otu tables.

#Let's check out the taxonomy table 
tax <- data.frame(tax_table(FWDB_data))
tax$OTU <- row.names(tax)
tax_table(FWDB_data) <- tax_table(as.matrix(tax))

# Change some of the sample names to match our metadata sample names
samplenames_FWDB <- sample_names(FWDB_data)
names2_FWDB <- gsub("Gull", "GUL", samplenames_FWDB)
names3_FWDB <- gsub("Long", "LON", names2_FWDB)
sample_names(FWDB_data) <- names3_FWDB

# Import metadata file and merge with FWDB_data object
mapfile <- "~/Final_PAFL_Trophicstate/raw_data/metadata"
map <- import_qiime_sample_data(mapfile)

# Check the sequencing depth of each sample 
sums_FWDB <- data.frame(colSums(otu_table(FWDB_data)))
colnames(sums_FWDB) <- "Sample_TotalSeqs"
sums_FWDB$sample <- row.names(sums_FWDB)
sums_FWDB <- arrange(sums_FWDB, Sample_TotalSeqs)

####  Create a plot of the number of sequences per sample
ggplot(sums_FWDB, aes(x=reorder(sample, Sample_TotalSeqs), y = Sample_TotalSeqs)) + 
  ylab("Number of Sequences per Sample") +
  geom_bar(stat = "identity", colour="black",fill="cornflowerblue")  + xlab("Sample Name") + 
  ggtitle("Total Number of Sequences per Sample") + 
  theme(axis.text.x = element_text(colour = "black", size=6, angle=45, hjust = 1, vjust = 1))

##  Prune out samples that have a sequencing depth of less than 10,000 sequences
pruned_FWDB <- prune_samples(sample_sums(FWDB_data) > 10000, FWDB_data)

# Combine FWDB and mapfiles 
FWDB_no_scale <- merge_phyloseq(pruned_FWDB,map)

# lets look at only samples (removing blanks and mock and samples that didn't amplify)
FW_pruned <- prune_taxa(taxa_sums(FWDB_no_scale) > 0, FWDB_no_scale)
FW_samples <-subset_taxa(FW_pruned, Kingdom == "Bacteria")
FW_samples <-subset_taxa(FW_samples, Class != "Chloroplast")

Merge Replicate Sample and Scale the Sequencing Depth

###  Time to (1) Sum between replicates and then (2) scale 
# Step 1.  Sum the reads 
FW_sum_reps <- merge_samples(x = FW_samples, group = "dups", fun = "sum") # Sum between replicate samples
FW_sum_reps <- prune_taxa(taxa_sums(FW_sum_reps) > 0, FW_sum_reps) # Remove any OTUs that are 0
## merge_samples messes up the sample_data so we will need to fix this below.  


# Step 2:  Scale the reads 
FWDB_sum_scale_matround <- scale_reads(physeq = FW_sum_reps, n = min(sample_sums(FW_sum_reps)), round = "matround")

data_dup_FWDB_sum_scale_matround <- read.table("~/Final_PAFL_Trophicstate/raw_data/duplicate_metadata.txt", header = TRUE, sep = "\t")
row.names(data_dup_FWDB_sum_scale_matround) <- data_dup_FWDB_sum_scale_matround$names
data_dup_FWDB_sum_scale_matround$quadrant <- factor(data_dup_FWDB_sum_scale_matround$quadrant,levels = c("Free Epilimnion", "Free Mixed",  "Free Hypolimnion", "Particle Epilimnion", "Particle Mixed", "Particle Hypolimnion"))

# Fix the sample data 
# Add a column of Total Sequence sums to data_dup
TotalSeqs_FWDB_sum_reps <- data.frame(rowSums(otu_table(FW_sum_reps)))
TotalSeqs_FWDB_sum_reps$names <- as.factor(row.names(TotalSeqs_FWDB_sum_reps))
colnames(TotalSeqs_FWDB_sum_reps)[1] <- "TotalSeqs_Summed"
data_dup_FWDB_sum_scale_matround_join<- left_join(data_dup_FWDB_sum_scale_matround, TotalSeqs_FWDB_sum_reps, by = "names") 

TotalSeqs_FWDB_sum_scale_matround <- data.frame(rowSums(otu_table(FWDB_sum_scale_matround)))
TotalSeqs_FWDB_sum_scale_matround$names <- as.factor(row.names(TotalSeqs_FWDB_sum_scale_matround))
colnames(TotalSeqs_FWDB_sum_scale_matround)[1] <- "TotalSeqs_Scaled"
data_dup_FWDB_sum_scale_matround_join<- left_join(data_dup_FWDB_sum_scale_matround_join, TotalSeqs_FWDB_sum_scale_matround, by = "names") 
#sample_names(FWDB_sum_scale_matround) <- as.character(data_dup_FWDB_sum_scale_matround_join$names)

row.names(data_dup_FWDB_sum_scale_matround_join) <- sample_names(FWDB_sum_scale_matround)

dup_data <- sample_data(data_dup_FWDB_sum_scale_matround_join)  # Metadata in our phyloseq object
sum_scale_matround_otu <- otu_table(FWDB_sum_scale_matround)  # OTU table in our phyloseq object
sum_scale_matround_tax <- tax_table(FWDB_sum_scale_matround)  # Taxonomy Table in our phyloseq object
FWDB_sum_scale_matround <- merge_phyloseq(sum_scale_matround_otu, sum_scale_matround_tax, dup_data)

Figure S2: Histogram of Sequencing Depth of Merged and Scaled Samples

##  Our phyloseq object!
nowin_FWDB_scaled <- subset_samples(FWDB_sum_scale_matround, names != "WINH" & names != "WINH3um") # Remove the two samples that are not a true hypolimnion
nowin_FWDB_scaled <- prune_taxa(taxa_sums(nowin_FWDB_scaled) > 0, nowin_FWDB_scaled) # Remove any OTUs that are 0
nowin_data <- data.frame(sample_data(nowin_FWDB_scaled))

mean_scaled_dat <- ddply(nowin_data, "filter", summarise, scaled.mean=mean(TotalSeqs_Scaled))

# First plot for figure S2
scaled_historgram <- ggplot(nowin_data, aes(x = TotalSeqs_Scaled, fill = filter)) + ylab("Count") +
  geom_histogram(alpha=.75, position="identity", binwidth = 20)  + 
  xlab("Total Sequences per Sample") + 
  ggtitle(expression(atop(bold("Scaled Samples"), atop("Bin-Width = 20"), ""))) +
  scale_fill_manual(name="Filter Fraction", 
                    breaks = c("Free", "Particle"),
                    values = c("orangered", "orangered4")) +
  scale_color_manual(breaks = c("Free", "Particle"),
                    values = c("orangered", "orangered4")) +
  scale_x_continuous(breaks = seq(14600, 15200, 100), limits = c(14600, 15200), expand = c(0,0)) +
  scale_y_continuous(breaks = seq(0, 10, 1), limits = c(0, 4.5), expand = c(0,0)) +
  geom_vline(data=mean_scaled_dat, aes(xintercept=scaled.mean,  colour=filter),
               linetype="longdash", size=1) +
  theme(legend.position = c(0.2, 0.83),
        plot.margin = unit(c(0.1, 0.5, 0.1, 0.1), "cm")) #top, right, bottom, left


mean_summed_dat <- ddply(nowin_data, "filter", summarise, summed.mean=mean(TotalSeqs_Summed))

# Second plot for Figure S2
merged_histogram <- ggplot(nowin_data, aes(x = TotalSeqs_Summed, fill = filter)) + ylab("Count") + 
  geom_histogram(position = "identity", alpha = 0.75, binwidth = 1500)  + 
  xlab("Total Sequences per Sample") + 
    scale_fill_manual(name="Filter Fraction", 
                    breaks = c("Free", "Particle"),
                    values = c("orangered", "orangered4")) +
  ggtitle(expression(atop(bold("Summed Replicate Samples"), atop("Bin-Width = 1500"), ""))) +
  scale_color_manual(breaks = c("Free", "Particle"),
                    values = c("orangered", "orangered4")) +
  geom_vline(data=mean_summed_dat, aes(xintercept=summed.mean,  colour=filter),
               linetype="longdash", size=1) +
  scale_x_continuous(breaks = seq(10000, 90000, 10000), limits = c(10000, 75000), expand = c(0,0)) +
  scale_y_continuous(breaks = seq(0, 10, 1), limits = c(0, 3.5), expand = c(0,0)) +
  theme(legend.position = c(0.2, 0.83),
        plot.margin = unit(c(0.1, 0.5, 0.1, 0.1), "cm")) #top, right, bottom, left



#####  Plotting FIGURE S2  #####  Plotting FIGURE S2  #####  Plotting FIGURE S2  
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS2_histogram.pdf",  width= 6.5, height=3.5)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS2_histogram.tiff", width= 6.5, height=3.5, units = "in", res = 175)
multiplot(merged_histogram, scaled_historgram, cols = 2);  #dev.off()

Figure S2: Histogram of the total number of sequencing reads per sample. (Left) Sequencing reads per sample after summing the raw number of sequencing reads between replicate samples. (Right) Sequencing reads per sample after transforming the sequence depth of each of the summed replicate samples by taking the proportion of each OTU in samples and scaling it to the minimum sequence depth in the data set (14,925 sequences), and then rounding to the nearest integer. Dotted vertical lines represent the mean sequencing read depth of the free-living (orange) and particle-associated (brown) samples. The bin-width represents the number of sequencing reads that are binned within each bar within the histogram

Working phyloseq object: nowin_FWDB_scaled

########## ADD THE PROTEOBACTERIA TO THE PHYLA
phy <- data.frame(tax_table(nowin_FWDB_scaled))
Phylum <- as.character(phy$Phylum)
Class <- as.character(phy$Class)

for  (i in 1:length(Phylum)){ 
  if (Phylum[i] == "Proteobacteria"){
    Phylum[i] <- Class[i]
  } 
}

phy$Phylum <- Phylum
t <- tax_table(as.matrix(phy))

tax_table(nowin_FWDB_scaled) <- t

# The phyloseq object!
nowin_FWDB_scaled

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8823 taxa and 41 samples ]
## sample_data() Sample Data:       [ 41 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 8823 taxa by 9 taxonomic ranks ]

## The phyloseq object without the mixed lake
nosherwin_FWDB_scaled <- subset_samples(nowin_FWDB_scaled, names != "SHEE" & names != "SHEH" & 
                                          names != "SHEE3um" & names != "SHEH3um")
nosherwin_FWDB_scaled <- prune_taxa(taxa_sums(nosherwin_FWDB_scaled) > 0, nosherwin_FWDB_scaled)

Rank Abundance curve of the 300 Most Abundant OTUs

####  Obtain the top 100 most abundant OTUs
topN <- 300
most_abundant_taxa = sort(taxa_sums(nowin_FWDB_scaled), TRUE)[1:topN]
#print(most_abundant_taxa)
FWDB_300_OTUs <- prune_taxa(names(most_abundant_taxa), nowin_FWDB_scaled)

FWDB_sums <- data.frame(taxa_sums(FWDB_300_OTUs))

ggplot(FWDB_sums,aes(x=row.names(FWDB_sums), y=taxa_sums.FWDB_300_OTUs.)) + 
  ylab("Number of Sequences per OTU") + scale_x_discrete(expand = c(0,0)) + 
  scale_y_continuous(expand = c(0,0)) + theme_classic() +
  geom_bar(stat="identity",colour="black",fill="darkturquoise")  + xlab("OTU Rank") + 
  ggtitle("Rank Abundance Curve of the Top 300 OTUs") + 
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank())

Statistics on Merging Samples, Sub-Sampling, and Scaling Sequencing Reads

#Create to subsampled dataset
rare_FW <- rarefy_even_depth(FW_sum_reps, sample.size = (min(sample_sums(FW_sum_reps))-1), rngseed = 15879966)

###
fwdb_list <- c( "FW_samples", "FW_sum_reps", "FWDB_sum_scale_matround", "rare_FW")
compare_fwdb <- data.frame(matrix(ncol = 6, nrow = 4))
# Assign the row names
row.names(compare_fwdb) <- c("FWDB+Silva Raw", "Merged FWDB+Silva", 
                             "FWDB+Silva Scaled", "Subsampled FWDB+Silva")
# Assign the column names 
colnames(compare_fwdb) <- c("MinSeqs", "MeanSeqs", "MedianSeqs", "MaxSeqs", "RangeSeqs", "NumOTUs")
# Create the data frame
compare_fwdb[1,] <- physeq_stats(FW_samples)

## [1] 10737.00 24876.32 25395.00 35842.00 25105.00 14294.00

compare_fwdb[2,] <- physeq_stats(FW_sum_reps)

## [1] 14925 45703 46975 69584 54659    43

compare_fwdb[2,6] <- ncol(otu_table(FW_sum_reps))
compare_fwdb[3,] <- physeq_stats(FWDB_sum_scale_matround)

## [1] 14635.00 14883.12 14906.00 15156.00   521.00    43.00

compare_fwdb[3,6] <- ncol(otu_table(FWDB_sum_scale_matround))
compare_fwdb[4,] <- physeq_stats(rare_FW)

## [1] 14924 14924 14924 14924     0    43

compare_fwdb[4,6] <- ncol(otu_table(rare_FW))

# Set the values to numbers and then will round to the whole number
panderOptions("digits", 1) 
# Align all cells with "center" alignment and the row.names with "right" alignment
set.alignment('center', row.names = 'right') 
pander(compare_fwdb, caption="Statistics on the sequencing depth of the entire data set.")

Statistics on the sequencing depth of the entire data set.
	MinSeqs	MeanSeqs	MedianSeqs	MaxSeqs	RangeSeqs	NumOTUs
FWDB+Silva Raw	10737	24876	25395	35842	25105	14294
Merged FWDB+Silva	14925	45703	46975	69584	54659	14294
FWDB+Silva Scaled	14635	14883	14906	15156	521	8992
Subsampled FWDB+Silva	14924	14924	14924	14924	0	8344

profile <- read.csv(file = "~/Final_PAFL_Trophicstate/raw_data/AllLakes_depthprofile.csv", header = TRUE); 
profile <- subset(profile, select = c('lakename',"trophicstate", "depth", "temp", "DO", "pH", "SpC"))
profile$DO. = NULL

profile$trophicstate <- as.character(profile$trophicstate)
profile$trophicstate[profile$trophicstate == "Eutrophic"] <- "Productive"
profile$trophicstate[profile$trophicstate == "Mesotrophic"] <- "Productive"
profile$trophicstate[profile$trophicstate == "Oligotrophic"] <- "Unproductive"
profile$trophicstate[profile$trophicstate == "Mixed"] <- "Mixed"

# ALL LAKES 
profile_all <- profile %>%  gather(Variable, Value, 4:7)
profile_all$lakename <- as.character(profile_all$lakename)
profile_all$lakename[profile_all$lakename == "WIN"] <- "Wintergreen"
profile_all$lakename[profile_all$lakename == "SIX"] <- "Sixteen"
profile_all$lakename[profile_all$lakename == "SHE"] <- "Sherman"
profile_all$lakename[profile_all$lakename == "PAY"] <- "Payne"
profile_all$lakename[profile_all$lakename == "LON"] <- "Little Long"
profile_all$lakename[profile_all$lakename == "LEE"] <- "Lee"
profile_all$lakename[profile_all$lakename == "GUL"] <- "Gull"
profile_all$lakename[profile_all$lakename == "BRI"] <- "Bristol"
profile_all$lakename[profile_all$lakename == "BAK"] <- "Baker"
profile_all$lakename[profile_all$lakename == "BAS"] <- "Baseline"
profile_all$lakename[profile_all$lakename == "BST"] <- "Bassett"
profile_all$lakename[profile_all$lakename == "BST"] <- "Bassett"
profile_all$trophicstate[profile_all$trophicstate == "Productive"] <- "High-Nutrient"
profile_all$trophicstate[profile_all$trophicstate == "Unproductive"] <- "Low-Nutrient"
profile_all$trophicstate[profile_all$trophicstate == "Mixed"] <- "High-Nutrient"

profile_all <- subset(profile_all, Variable != "SpC")

profile_labeller <- function(var, value){
  value <- as.character(value)
  if (var=="Variable") { 
    value[value=="temp"] <- "Temperature \n (Celsius)"
    value[value=="DO"]   <- "Dissolved Oxygen \n (mg/L)"
    value[value=="pH"]   <- "pH"
    value[value=="SpC"]   <- "Specific Conductivity \n (uS/cm)"
  }
  return(value)
}

#####  Plotting FIGURE S1  #####  Plotting FIGURE S1  #####  Plotting FIGURE S1  
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS1_depth_profile.pdf",  width= 6.5, height=6.5)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS1_depth_profile.tiff", width= 6.5, height=6.5, units = "in", res = 175)
ggplot(profile_all, aes(x=Value, y = depth, color = lakename)) +   
  geom_path(size=1, alpha = 0.8) + ylab("Depth (m)") + xlab("") + 
  theme_bw() +  geom_point(size=2, alpha = 0.8) + geom_hline(h=0) +
  scale_y_reverse(breaks=seq(0, 32, 4), expand = c(0, 0)) + 
  scale_color_manual(name = "Lake Name", 
                     labels = c("Baker", "Baseline", "Bassett", "Bristol", "Gull", "Lee", 
                                "Little Long", "Payne", "Sherman", "Sixteen", "Wintergreen"),
                     values = c("blue", "red", "black", "forestgreen","violet","limegreen", 
                                "purple", "orange", "maroon1", "turquoise3", "thistle4")) +
  facet_grid(trophicstate ~ Variable, scales = "free",labeller = profile_labeller, space = "free_y") +  
  theme(axis.title.x = element_text(face="bold", size=12),
        axis.text.x = element_text(colour = "black", size=8, hjust = 1, vjust=1, angle = 30),
        axis.text.y = element_text(colour = "black", size=8),
        axis.title.y = element_text(face="bold", size=12),
        legend.title = element_blank(),
        legend.text = element_text(size = 8),
        strip.text.y = element_blank(),
        strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
        strip.background = element_blank(),
        plot.title = element_text(size = 12, face = "bold", colour = "black"),
        plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
        legend.position="right");  #dev.off()

Figure S1: Full lake profiles for each lake. (Top) High-nutrient and (Bottom) low-nutrient lake profiles. Colored points represent the raw value at each depth. Lines connect the discrete points within each lake. Sherman Lake was the mixed lake.

Calculate the Phylum and OTU abundance for each Lake Habitat

# 1st:  Get the abundance of each phylum for ALL PA/FL/High-Nutrient/Low-Nutrient/Epilimnion/Hypolimnion
phy_glom <- tax_glom(nowin_FWDB_scaled,taxrank = "Phylum")
phy_melt <- psmelt(phy_glom) %>%
  subset(select = c("Sample", "ProdLevel", "limnion", "filter", "Phylum","Abundance"))


# No sherman samples 
phy_melt_nosher <- subset(phy_melt, Sample != "SHEE" & Sample != "SHEH" & 
                            Sample != "SHEE3um" & Sample != "SHEH3um")

## Free-Living - 21 samples
phy_abund_free <- subset(phy_melt_nosher, filter == "Free") %>% # Subset the Free-Living samples 
  phy_abund_in_group("Free-Living")

#Particle-Associated - 20 Samples
phy_abund_particle <- subset(phy_melt_nosher, filter == "Particle") %>% # Subset the particle samples 
  phy_abund_in_group("Particle-Associated")

# Epilimnion - 19 samples 
phy_abund_epi <- subset(phy_melt_nosher, limnion == "Epilimnion") %>% # Subset the Epilimnion samples 
  phy_abund_in_group("Epilimnion")

# Hypolimnion - 18 Samples 
phy_abund_hypo <- subset(phy_melt_nosher, limnion == "Hypolimnion") %>% # Subset the Hypolimnion samples 
  phy_abund_in_group("Hypolimnion")

# High-Nutrient - 26 Samples 
phy_abund_high <- subset(phy_melt_nosher, ProdLevel == "Productive") %>% # Subset the Productive samples
  phy_abund_in_group("Productive")

# Low-Nutrient - 15 samples 
phy_abund_low <- subset(phy_melt_nosher, ProdLevel == "Unproductive") %>% # Subset the Unproductive samples 
  phy_abund_in_group("Unproductive")



## WARNING: This command takes a long time to run! 
# it calculates the abundance of each otu within each samples
otu_glom <- tax_glom(nowin_FWDB_scaled, taxrank = "OTU")   
otu_melt <- psmelt(otu_glom) %>%
  subset(select = c("Sample", "ProdLevel", "limnion", "filter", "OTU","Abundance"))
#write.table(otu_melt, "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/Nov_alpha_data/otu_melt", row.names = FALSE)
# Let's load in a previously run version to make this process faster.
#otu_melt <- read.table( "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/Nov_alpha_data/otu_melt", header = TRUE)
otu_melt_nosherwin <- subset(otu_melt, Sample != "SHEE" & Sample != "SHEH" & 
                               Sample != "SHEE3um" & Sample != "SHEH3um")

# The below data frames do not include SHERMAN (Mixed lake) OR Wintergreen "hypolimnion" samples 
## Free-Living - 21 samples
# All OTUs and their abundance in the FREE LIVING 
otu_abund_free <- subset(otu_melt_nosherwin, filter == "Free") %>% # Subset the free-living samples 
  select(Sample, Abundance, OTU) %>% # select only the important columns 
  group_by(OTU) %>% # Combine all the OTUs
  summarize(OTU_sum = sum(Abundance)) # Sum each OTU abundance count
otu_abund_free$SampleType <- "Free-Living"

# Particle-Associated - 20 Samples
# All OTUs and their abundance in the PARTICLE-ASSOCIATED
otu_abund_particle <- subset(otu_melt_nosherwin, filter == "Particle") %>% # Subset the particle-associated samples 
  select(Sample, Abundance, OTU) %>% # select only the important columns 
  group_by(OTU) %>% # Combine all the OTUs
  summarize(OTU_sum = sum(Abundance)) # Sum each OTU abundance count
otu_abund_particle$SampleType <- "Particle-Associated"

# Epilimnion - 19 samples 
# All OTUs and their abundance in the EPILIMNION
otu_abund_epi <- subset(otu_melt_nosherwin, limnion == "Epilimnion") %>% # Subset the Epilimnion samples 
  select(Sample, Abundance, OTU) %>% # select only the important columns 
  group_by(OTU) %>% # Combine all the OTUs
  summarize(OTU_sum = sum(Abundance)) # Sum each OTU abundance count
otu_abund_epi$SampleType <- "Epilimnion"

# Hypolimnion - 18 Samples 
# All OTUs and their abundance in the HYPOLIMNION 
otu_abund_hypo <- subset(otu_melt_nosherwin, limnion == "Hypolimnion") %>% # Subset the Hypolimnion samples 
  select(Sample, Abundance, OTU) %>% # select only the important columns 
  group_by(OTU) %>% # Combine all the OTUs
  summarize(OTU_sum = sum(Abundance)) # Sum each OTU abundance count
otu_abund_hypo$SampleType <- "Hypolimnion"

# High-Nutrient - 26 Samples 
# All OTUs and their abundance in the HIGH-NUTRIENT 
otu_abund_high <- subset(otu_melt_nosherwin, ProdLevel == "Productive") %>% # Subset the Productive samples 
  select(Sample, Abundance, OTU) %>% # select only the important columns 
  group_by(OTU) %>% # Combine all the OTUs
  summarize(OTU_sum = sum(Abundance)) # Sum each OTU abundance count
otu_abund_high$SampleType <- "Productive"

# Low-Nutrient - 15 samples 
# All OTUs and their abundance in the LOW-NUTRIENT
otu_abund_low <- subset(otu_melt_nosherwin, ProdLevel == "Unproductive") %>% # Subset the Unproductive samples 
  select(Sample, Abundance, OTU) %>% # select only the important columns 
  group_by(OTU) %>% # Combine all the OTUs
  summarize(OTU_sum = sum(Abundance)) # Sum each OTU abundance count
otu_abund_low$SampleType <- "Unproductive"


## Get rid of OTUs that are 0.
otu_abund_free_pruned <- filter(otu_abund_free, OTU_sum > 0)
colnames(otu_abund_free_pruned)[2] <- "OTU_sum_Free"

otu_abund_particle_pruned <- filter(otu_abund_particle, OTU_sum > 0)
colnames(otu_abund_particle_pruned)[2] <- "OTU_sum_Particle"

otu_abund_epi_pruned <- filter(otu_abund_epi, OTU_sum > 0)
colnames(otu_abund_epi_pruned)[2] <- "OTU_sum_Epi"

otu_abund_hypo_pruned <- filter(otu_abund_hypo, OTU_sum > 0)
colnames(otu_abund_hypo_pruned)[2] <- "OTU_sum_Hypo"

otu_abund_high_pruned <- filter(otu_abund_high, OTU_sum > 0)
colnames(otu_abund_high_pruned)[2] <- "OTU_sum_High"

otu_abund_low_pruned <- filter(otu_abund_low, OTU_sum > 0)
colnames(otu_abund_low_pruned)[2] <- "OTU_sum_Low"


### Combing get all of the 
###  Need to add the taxonomy to the OTU information
taxa <- data.frame(tax_table(nosherwin_FWDB_scaled)) %>%
  select(Phylum, OTU)

free_living_all_otus <- inner_join(otu_abund_free_pruned, taxa, by = "OTU") %>%
  select(-OTU_sum_Free) %>% 
  distinct() %>%
  group_by(Phylum) %>%
  tally() 
colnames(free_living_all_otus)[2] <- "AllOTU_Total"

particle_associated_all_otus <- inner_join(otu_abund_particle_pruned, taxa, by = "OTU") %>%
  select(-OTU_sum_Particle) %>% 
  distinct() %>%
  group_by(Phylum) %>%
  tally() 
colnames(particle_associated_all_otus)[2] <- "AllOTU_Total"


epilimnion_all_otus <- inner_join(otu_abund_epi_pruned, taxa, by = "OTU") %>%
  select(-OTU_sum_Epi) %>% 
  distinct() %>%
  group_by(Phylum) %>%
  tally() 
colnames(epilimnion_all_otus)[2] <- "AllOTU_Total"

hypolimnion_all_otus <- inner_join(otu_abund_hypo_pruned, taxa, by = "OTU") %>%
  select(-OTU_sum_Hypo) %>% 
  distinct() %>%
  group_by(Phylum) %>%
  tally() 
colnames(hypolimnion_all_otus)[2] <- "AllOTU_Total"

high_nutrient_all_otus <- inner_join(otu_abund_high_pruned, taxa, by = "OTU") %>%
  select(-OTU_sum_High) %>% 
  distinct() %>%
  group_by(Phylum) %>%
  tally() 
colnames(high_nutrient_all_otus)[2] <- "AllOTU_Total"

low_nutrient_all_otus <- inner_join(otu_abund_low_pruned, taxa, by = "OTU") %>%
  select(-OTU_sum_Low) %>% 
  distinct() %>%
  group_by(Phylum) %>%
  tally() 
colnames(low_nutrient_all_otus)[2] <- "AllOTU_Total"

Figure 1: Shared and Unique OTUs

# Free-living and Particle-associated
free_only <- anti_join(otu_abund_free_pruned, otu_abund_particle_pruned, by = "OTU") # Unique to FL
singletons_free <- subset(free_only, OTU_sum_Free < 2.1)
nrow(singletons_free) # How many single- & double-tons are unique to free-living?

## [1] 2741

part_only <- anti_join(otu_abund_particle_pruned, otu_abund_free_pruned, by = "OTU") # Unique to PA
singletons_part <- subset(part_only, OTU_sum_Particle < 2.1)
nrow(singletons_part) # How many single- & double-tons are unique to particle-associated?

## [1] 2252

pvf_both <- inner_join(otu_abund_free_pruned, otu_abund_particle_pruned, by = "OTU") %>% # Shared between PA & FL 
  mutate(Comparison = "PA vs. FL",
         Overlap = "Both PA and FL",
         TotalSum = OTU_sum_Free + OTU_sum_Particle) %>%
  select(-SampleType.x, -SampleType.y) # Delete unnecessary Columns
  

#  Epilimnion and Hypolimnion 
epi_only <- anti_join(otu_abund_epi_pruned, otu_abund_hypo_pruned, by = "OTU") # Unique to epilimnion
singletons_epi <- subset(epi_only, OTU_sum_Epi < 2.1)
nrow(singletons_epi) # How many single- & double-tons are unique to the epilimnion?

## [1] 2315

hypo_only <- anti_join(otu_abund_hypo_pruned, otu_abund_epi_pruned, by = "OTU") # Unique to hypolimnion 
singletons_hypo <- subset(hypo_only, OTU_sum_Hypo < 2.1)
nrow(singletons_hypo) # How many single- & double-tons are unique to the hypolimnion?

## [1] 2829

tvb_both <- inner_join(otu_abund_epi_pruned, otu_abund_hypo_pruned, by = "OTU") %>%  # Shared between epi & hypo 
  mutate(Comparison = "Top vs. Bottom",
         Overlap = "Both Epi and Hypo",
         TotalSum = OTU_sum_Epi + OTU_sum_Hypo) %>%
  select(-SampleType.x, -SampleType.y) 

#  High and Low-Nutrient 
low_only <- anti_join(otu_abund_low_pruned, otu_abund_high_pruned, by = "OTU") # Unique to low-nutrient
singletons_low <- subset(low_only, OTU_sum_Low < 2.1)
nrow(singletons_low) # How many single- & double-tons are unique to the low-nutrient lakes?

## [1] 1625

high_only <- anti_join(otu_abund_high_pruned, otu_abund_low_pruned, by = "OTU") # Unique to high-nutrient 
singletons_high <- subset(high_only, OTU_sum_High < 2.1)
nrow(singletons_high)# How many single- & double-tons are unique to the high-nutrient lakes?

## [1] 3500

hvl_both <- inner_join(otu_abund_low_pruned, otu_abund_high_pruned, by = "OTU") %>%  # Shared between high & low 
  mutate(Comparison = "Prod vs. Unprod",
         Overlap = "Both Epi and Hypo",
         TotalSum = OTU_sum_High + OTU_sum_Low) %>%
  select(-SampleType.x, -SampleType.y) 


###  Make an empty data frame for plotting this information
otu_overlap <- data.frame(matrix(ncol = 1, nrow = 12))

##  Manually make all the columns 

### Column for the thype of sample
SampleType <- c("Particle-Associated", "Particle-Associated", "Free-Living", "Free-Living", 
                "Epilimnion", "Epilimnion", "Hypolimnion", "Hypolimnion", 
                "High-Nutrient", "High-Nutrient", "Low-Nutrient", "Low-Nutrient")

## Column for Unique and Shared Information
Type <- c("Particle-Associated Only", "Shared", "Free-Living Only", "Shared", 
          "Epilimnion Only", "Shared", "Hypolimnion Only", "Shared", 
          "High-Nutrient Only", "Shared", "Low-Nutrient Only", "Shared")

##  Column for the number of OTUs that are shared/unique
NumOTUs <- c(nrow(part_only), nrow(pvf_both), nrow(free_only), nrow(pvf_both), 
             nrow(epi_only), nrow(tvb_both), nrow(hypo_only), nrow(tvb_both), 
             nrow(high_only), nrow(hvl_both), nrow(low_only), nrow(hvl_both))

##  Provide the facet label information 
Comparison <- c("PA vs. FL", "PA vs. FL", "PA vs. FL", "PA vs. FL", 
                "Top vs. Bottom","Top vs. Bottom", "Top vs. Bottom", "Top vs. Bottom",
                "Prod vs. Unprod","Prod vs. Unprod","Prod vs. Unprod","Prod vs. Unprod")

## Provide the number of OTUs within each sampletype# abundance of each sample type
Relative_OTU_Abund <- c(sum(part_only$OTU_sum_Particle), sum(pvf_both$OTU_sum_Particle), 
                        sum(free_only$OTU_sum_Free),  sum(pvf_both$OTU_sum_Free), 
                        sum(epi_only$OTU_sum_Epi), sum(tvb_both$OTU_sum_Epi), 
                        sum(hypo_only$OTU_sum_Hypo), sum(tvb_both$OTU_sum_Hypo), 
                        sum(high_only$OTU_sum_High), sum(hvl_both$OTU_sum_High), 
                        sum(low_only$OTU_sum_Low), sum(hvl_both$OTU_sum_Low))

## Provide the total number of reads within each sample
Total_reads <- c(sum(sum(part_only$OTU_sum_Particle) + sum(pvf_both$OTU_sum_Particle)), 
                 sum(sum(part_only$OTU_sum_Particle) + sum(pvf_both$OTU_sum_Particle)), 
                 sum(sum(free_only$OTU_sum_Free) + sum(pvf_both$OTU_sum_Free)), 
                 sum(sum(free_only$OTU_sum_Free) + sum(pvf_both$OTU_sum_Free)),
                 sum(sum(epi_only$OTU_sum_Epi) + sum(tvb_both$OTU_sum_Epi)), 
                 sum(sum(epi_only$OTU_sum_Epi) + sum(tvb_both$OTU_sum_Epi)),
                 sum(sum(hypo_only$OTU_sum_Hypo) + sum(tvb_both$OTU_sum_Hypo)),  
                 sum(sum(hypo_only$OTU_sum_Hypo) + sum(tvb_both$OTU_sum_Hypo)),
                 sum(sum(high_only$OTU_sum_High) + sum(hvl_both$OTU_sum_High)),  
                 sum(sum(high_only$OTU_sum_High) + sum(hvl_both$OTU_sum_High)),
                 sum(sum(low_only$OTU_sum_Low) + sum(hvl_both$OTU_sum_Low)),  
                 sum(sum(low_only$OTU_sum_Low) + sum(hvl_both$OTU_sum_Low)))

# Make the columns 
otu_overlap$SampleType<- SampleType
otu_overlap$Type <- Type
otu_overlap$NumOTUs <- NumOTUs
otu_overlap$Comparison <- Comparison
otu_overlap$Relative_OTU_Abund <- Relative_OTU_Abund
otu_overlap$Total_reads <- Total_reads
otu_overlap <- otu_overlap[,-1]

otu_overlap$SampleType <- factor(otu_overlap$SampleType,levels = c("Free-Living", "Particle-Associated", 
                                                                   "Low-Nutrient", "High-Nutrient",
                                                                   "Epilimnion", "Hypolimnion"))

otu_overlap$Type <- factor(otu_overlap$Type, levels = c("Shared","Free-Living Only", "Particle-Associated Only", 
                                                        "Low-Nutrient Only", "High-Nutrient Only", 
                                                        "Epilimnion Only", "Hypolimnion Only"))



otu_overlap <- otu_overlap %>%
  mutate(proportion = Relative_OTU_Abund/Total_reads)

otu_proportions <- otu_overlap$proportion

####  Test for Significance of OVERLAPPING OTUS 
PAFL_chisq <-matrix(c(nrow(part_only), nrow(pvf_both), nrow(free_only), nrow(pvf_both)),nrow=2)
PAFL_chisq_res <- chisq.test(PAFL_chisq, correct = FALSE); PAFL_chisq_res

## 
##  Pearson's Chi-squared test
## 
## data:  PAFL_chisq
## X-squared = 22.518, df = 1, p-value = 0.000002082

prod_chisq <-matrix(c(nrow(high_only), nrow(hvl_both), nrow(low_only), nrow(hvl_both)),nrow=2)
prod_chisq_res <- chisq.test(prod_chisq, correct = FALSE); prod_chisq_res

## 
##  Pearson's Chi-squared test
## 
## data:  prod_chisq
## X-squared = 376.67, df = 1, p-value < 0.00000000000000022

limnion_chisq <-matrix(c(nrow(epi_only), nrow(tvb_both), nrow(hypo_only), nrow(tvb_both)),nrow=2)
limnion_chisq_res <- chisq.test(limnion_chisq, correct = FALSE); limnion_chisq_res

## 
##  Pearson's Chi-squared test
## 
## data:  limnion_chisq
## X-squared = 49.006, df = 1, p-value = 0.000000000002552

shared_chisq <- matrix(c(nrow(pvf_both),nrow(hvl_both), nrow(tvb_both)),nrow=1)
shared_chisq_res <- chisq.test(shared_chisq, correct = FALSE); shared_chisq_res

## 
##  Chi-squared test for given probabilities
## 
## data:  shared_chisq
## X-squared = 306.75, df = 2, p-value < 0.00000000000000022

otu_sum_plot <- ggplot(otu_overlap, aes(y=NumOTUs , x=SampleType, fill=Type, order=Type)) +
  facet_grid(. ~ Comparison,scales = "free", labeller = mf_labeller) + #geom_text(x = 2, y = 8750, label = "***") +
  xlab("Habitat") + ylab("Sum of Shared and Unique OTUs") + 
  geom_bar(stat="identity") +   geom_bar(stat="identity", colour="black", show_guide=FALSE) +  
  scale_y_continuous(expand = c(0,0), breaks=seq(0, 7000, 1000), lim = c(0, 6500)) + 
  guides(fill = guide_legend(keywidth = 1.5, keyheight = 1.5)) + 
  scale_x_discrete(breaks=c("Free-Living", "Particle-Associated", "Low-Nutrient", 
                            "High-Nutrient", "Epilimnion", "Hypolimnion"),
                      labels=c("Free-Living (21)", "Particle-Associated (20)", "Low-Nutrient (15)",
                               "High-Nutrient (26)", "Epilimnion (19)", "Hypolimnion (18)")) + 
  scale_fill_manual(limits = c("Free-Living Only", "Particle-Associated Only", "Low-Nutrient Only", 
                               "High-Nutrient Only", "Epilimnion Only", "Hypolimnion Only", "Shared"),
                    breaks = c("Free-Living Only", "Particle-Associated Only", "Low-Nutrient Only", 
                               "High-Nutrient Only", "Epilimnion Only", "Hypolimnion Only", "Shared"),
                    values = c( "orangered", "orangered4", "cornflowerblue", "royalblue4","limegreen", 
                                "darkgreen", "gray39")) +
  theme_classic() +  guides(fill = guide_legend(keywidth = 1, keyheight = 1)) +
  annotate("text",x = 1.5, y = 6300, label = "***") +
  theme(axis.title.x = element_text(face="bold", size=12),
        axis.text.x = element_text(colour = "black", size=10, hjust = 1, vjust=1, angle = 30),
        axis.text.y = element_text(colour = "black", size=10),
        axis.title.y = element_text(face="bold", size=12),
        legend.title = element_blank(),
        legend.text = element_text(size = 8),
        strip.text.y = element_blank(),
        strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
        strip.background = element_blank(),
        plot.title = element_text(size = 12, face = "bold", colour = "black"),
        plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
        legend.position="none"); 
#dev.off()

###  RELATIVIZE IT!!!!  
otu_relative_plot <- ggplot(otu_overlap, aes(y=proportion , x=SampleType, fill=Type, order=Type)) +
  facet_grid(. ~ Comparison,scales = "free", labeller = mf_labeller) + #geom_text(x = 2, y = 8750, label = "***") +
  xlab("Habitat") + ylab("Relative Abundance of Shared and Unique OTUs") + 
  geom_bar(stat="identity", position = "fill") +   geom_bar(stat="identity", position = "fill", colour="black", show_guide=FALSE) +  
  scale_y_continuous(expand = c(0,0), breaks=seq(0, 1, 0.1)) + 
  guides(fill = guide_legend(keywidth = 1.5, keyheight = 1.5)) + 
  scale_x_discrete(breaks=c("Free-Living", "Particle-Associated", "Low-Nutrient", 
                            "High-Nutrient", "Epilimnion", "Hypolimnion"),
                      labels=c("Free-Living (21)", "Particle-Associated (20)", "Low-Nutrient (15)", 
                               "High-Nutrient (26)", "Epilimnion (19)", "Hypolimnion (18)")) + 
  scale_fill_manual(limits = c("Free-Living Only", "Particle-Associated Only", "Low-Nutrient Only", 
                               "High-Nutrient Only", "Epilimnion Only", "Hypolimnion Only", "Shared"),
                    breaks = c("Free-Living Only", "Particle-Associated Only", "Low-Nutrient Only", 
                               "High-Nutrient Only", "Epilimnion Only", "Hypolimnion Only", "Shared"),
                    values = c( "orangered", "orangered4", "cornflowerblue", "royalblue4","limegreen", 
                                "darkgreen", "gray39")) +
  theme_classic() +  guides(fill = guide_legend(keywidth = 1, keyheight = 1)) + 
  theme(axis.title.x = element_text(face="bold", size=12),
        axis.text.x = element_text(colour = "black", size=10, hjust = 1, vjust=1, angle = 30),
        axis.text.y = element_text(colour = "black", size=10),
        axis.title.y = element_text(face="bold", size=12),
        legend.title = element_blank(),
        legend.text = element_text(size = 8),
        strip.text.y = element_blank(),
        strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
        strip.background = element_blank(),
        plot.title = element_text(size = 12, face = "bold", colour = "black"),
        plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
        legend.position="right"); 

# Store ggplot graphical parameters
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig1_OTUoverlap_relative.pdf", width= 12, height=6)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig1_OTUoverlap_relative.tiff", width= 12, height=6, units = "in", res = 175)
grid.newpage()
pushViewport(viewport(layout=grid.layout(1,2,width=c(0.43,0.57))))
print(otu_sum_plot, vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(otu_relative_plot, vp=viewport(layout.pos.row=1,layout.pos.col=2)); #dev.off()

Figure 1: Unique and shared OTUs between lake habitats. (Left) Cumulative number of unique and shared OTUs detected across all lake habitats after transformation of the sequence depth to 14,925 sequences based on the scaling method mentioned in the methods. (Right) The relative abundance of the unique and shared OTUs based on their abundance within each habitat. Three asterisks ndicates a significant difference in the proportion of unique OTUs based on a Chi-squared test (p < 0.0001). Numbers in parentheses along the x-axis represent sample sizes of each lake habitat.

Figure 2: NMDS Ordinations

nowinOTU <- as.matrix(otu_table(nowin_FWDB_scaled))
#weighted
norm_bray <- vegdist(nowinOTU, method = "bray", binary = FALSE)  # calculates the Bray-Curtis Distances
nowinOTU_df <- data.frame(otu_table(nowinOTU))  
norm_soren <- vegdist(nowinOTU_df, method = "bray", binary = TRUE)  # SORENSEN INDEX

set.seed(15879966) # To be safe
nmds_bray <- metaMDS(nowinOTU, distance="bray")  #, autotransform = FALSE

## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1722805 
## Run 1 stress 0.1703942 
## ... New best solution
## ... procrustes: rmse 0.06318353  max resid 0.1885027 
## Run 2 stress 0.1706441 
## ... procrustes: rmse 0.007538099  max resid 0.03135936 
## Run 3 stress 0.17228 
## Run 4 stress 0.170546 
## ... procrustes: rmse 0.006356934  max resid 0.03176699 
## Run 5 stress 0.170546 
## ... procrustes: rmse 0.006343347  max resid 0.03162104 
## Run 6 stress 0.1722825 
## Run 7 stress 0.1722807 
## Run 8 stress 0.1722844 
## Run 9 stress 0.1704897 
## ... procrustes: rmse 0.004210585  max resid 0.01908924 
## Run 10 stress 0.1895625 
## Run 11 stress 0.1894888 
## Run 12 stress 0.172284 
## Run 13 stress 0.1703948 
## ... procrustes: rmse 0.0003323431  max resid 0.001914146 
## *** Solution reached

nmds_bray <- data.frame(nmds_bray$points) #http://strata.uga.edu/software/pdf/mdsTutorial.pdf
nmds_bray$names<-row.names(nmds_bray) #new names column
nmds_bray <- makeCategories_dups(nmds_bray) #will add our categorical information:  lakenames, limnion, filter, quadrant and trophicstate
nmds_bray$ProdLevel <- as.character(nmds_bray$trophicstate)
nmds_bray$ProdLevel[nmds_bray$trophicstate == "Eutrophic"] <- "Productive"
nmds_bray$ProdLevel[nmds_bray$trophicstate == "Mesotrophic"] <- "Productive"
nmds_bray$ProdLevel[nmds_bray$trophicstate == "Oligotrophic"] <- "Unproductive"
nmds_bray$quadrant <- factor(nmds_bray$quadrant,
                             levels = c("Free Epilimnion", "Free Mixed",  "Free Hypolimnion",
                                        "Particle Epilimnion", "Particle Mixed", "Particle Hypolimnion"))

nmds_bray$ProdLevel <- factor(nmds_bray$ProdLevel,levels = c("Productive", "Unproductive"))
                             
nmds_bc_quad <- ggplot(nmds_bray, aes(MDS1*-100, MDS2*-100, color = filter, 
                                      shape = limnion, fill = ProdLevel)) +
  annotate("text", label = "B", x = (min(nmds_bray$MDS1*100) + 0.7), y = (max(nmds_bray$MDS2*100) + 0.1), 
           face = "bold",size = 6, colour = "black") +
  annotate("text", label = "Stress = 0.17", x = (max(nmds_bray$MDS1*100)+0.1), y = (max(nmds_bray$MDS2*100) + 0.1), 
           size = 4, colour = "black") +
  xlab("NMDS1") + ylab("NMDS2") + ggtitle("Bray-Curtis Dissimilarity") +
  geom_point(size = 4, alpha=0.9) + theme_bw() + # Draw the points
  geom_point(aes(size=6, shape = limnion), show_guide = FALSE )+ # Make the points have a thicker outline 
  scale_color_manual(name = "Filter Fraction", breaks=c("Free", "Particle"),
                     labels = c("Free-Living", "Particle-Associated"), 
                     values = c("orangered", "orangered4")) +
  scale_fill_manual(name = "Nutrient Level", breaks = c("Productive", "Unproductive"), 
                     labels = c("High-Nutrient", "Low-Nutrient"),
                     values = c("gray", "white")) +
  scale_shape_manual(name = "Lake Layer", breaks = c("Epilimnion", "Hypolimnion", "Mixed"), 
                     values = c(21, 22, 24)) +
  guides(shape = guide_legend(order=1, override.aes=list(size=4)), # Manually set the legends 
         fill = guide_legend(order=2, override.aes=list(size=4, fill = c("gray", "white"))),
         color = guide_legend(order=3, override.aes=list(size=4))) +
  theme(axis.text.x = element_blank(), # text(colour="black", vjust=0.5, size=10), 
        axis.text.y = element_blank(), #text(colour="black", vjust=0.5, size=10),
        axis.title.x = element_text(face="bold", size=10),
        axis.title.y = element_text(face="bold", size=10),
        legend.title = element_text(size=8, face="bold"),
        legend.text = element_text(size = 8),
        axis.ticks = element_blank(),
        plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
        plot.title = element_text(size = 12, face="bold"),
        ###LEGEND TOP RIGHT CORNER
        legend.position = "right");


# UNWEIGHTED
set.seed(15879966)
nmds_soren <- metaMDS(nowinOTU, distance="bray", binary = TRUE)

## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1490234 
## Run 1 stress 0.1708672 
## Run 2 stress 0.1490236 
## ... procrustes: rmse 0.0000749495  max resid 0.0002596153 
## *** Solution reached

nmds_soren <- data.frame(nmds_soren$points) #http://strata.uga.edu/software/pdf/mdsTutorial.pdf
nmds_soren$names<-row.names(nmds_soren) #new names column
nmds_soren <- makeCategories_dups(nmds_soren) #will add our categorical information:  lakenames, limnion, filter, quadrant and trophicstate
nmds_soren$ProdLevel <- as.character(nmds_soren$trophicstate)
nmds_soren$ProdLevel[nmds_soren$trophicstate == "Eutrophic"] <- "Productive"
nmds_soren$ProdLevel[nmds_soren$trophicstate == "Mesotrophic"] <- "Productive"
nmds_soren$ProdLevel[nmds_soren$trophicstate == "Oligotrophic"] <- "Unproductive"
nmds_soren$quadrant <- factor(nmds_soren$quadrant,
                              levels = c("Free Epilimnion", "Free Mixed",  "Free Hypolimnion", 
                                         "Particle Epilimnion", "Particle Mixed", "Particle Hypolimnion"))

nmds_soren_quad <- ggplot(nmds_soren, aes(MDS1, MDS2, color = filter, shape = limnion, fill = ProdLevel)) +
  annotate("text", label = "A", x = (min(nmds_soren$MDS1) + 0.05), 
           y = (max(nmds_soren$MDS2) -0.01), face = "bold",size = 6, colour = "black") +
  annotate("text", label = "Stress = 0.15", x = (max(nmds_soren$MDS1) -0.2), 
           y = (max(nmds_soren$MDS2) -0.01), size = 4, colour = "black") +
  xlab("NMDS1") + ylab("NMDS2") + ggtitle("Sørensen Dissimilarity") +
  geom_point(size = 4, alpha=0.9) + theme_bw() +   #geom_point(colour="white", size = 1) +
  geom_point(aes(size=6), show_guide = FALSE )+
  scale_color_manual(name = "Filter Fraction", breaks=c("Free", "Particle"),
                     labels = c("Free-Living", "Particle-Associated"), 
                     values = c("orangered", "orangered4")) +
  scale_fill_manual(name = "Nutrient Level", breaks = c("Productive", "Unproductive"), 
                     labels = c("High-Nutrient", "Low-Nutrient"),
                     values = c("gray", "white")) +
  scale_shape_manual(name = "Lake Strata", breaks = c("Epilimnion", "Hypolimnion", "Mixed"), 
                     values = c(21, 22, 24)) +
  guides(shape = guide_legend(order=1, override.aes=list(size=4)),
         fill = guide_legend(order=2, override.aes=list(size=4)),
         color = guide_legend(order=3, override.aes=list(size=4))) +
  theme(axis.text.x = element_blank(), # text(colour="black", vjust=0.5, size=10), 
        axis.text.y = element_blank(), #text(colour="black", vjust=0.5, size=10),
        axis.title.x = element_text(face="bold", size=10),
        axis.title.y = element_text(face="bold", size=10),
        legend.title = element_text(size=8, face="bold"),
        legend.text = element_text(size = 8),
        axis.ticks = element_blank(),
        plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
        plot.title = element_text(size = 12, face="bold"),
        ###LEGEND TOP RIGHT CORNER
        legend.position = "none");  

#####  Plotting FIGURE 2  #####  Plotting FIGURE 2  #####  Plotting FIGURE 2  
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig3_NMDS_binary_color2.pdf",  width= 10, height=4)
grid.newpage()
pushViewport(viewport(layout=grid.layout(1,2,width=c(0.41,0.59))))
print(nmds_soren_quad, vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(nmds_bc_quad, vp=viewport(layout.pos.row=1,layout.pos.col=2));  #dev.off()

Figure 2: Non-Metric Multidimensional Scaling (NMDS) Ordinations visualizing differences in bacterial community composition based on (A) Sørensen dissimilarity (OTU presence or absence) and (B) Bray-Curtis dissimilarity (OTU relative abundance). Data points are colored by filter fraction, shaped by lake layer, and filled in by the nutrient level of each sample.

PERMANOVA Results

#  First we need to organize our environmental data
environ <- sample_data(nosherwin_FWDB_scaled)
environ <- subset(environ, select = c("names", "lakenames", "limnion", "filter", "trophicstate","ProdLevel", 
                                      "totaldepth", "DO", "SpC", "temp", "pH")) 
environ <- data.frame(environ)


##  Calculate the BC dissimilarity and the Sorensen Dissimilarity
nosherwinOTU <- otu_table(nosherwin_FWDB_scaled)  # This is our OTU table that we will use for Adonis
set.seed(15879966)
## Bray-Curtis Distance --> OTU relative abundance
bc_dist <- vegdist(nosherwinOTU, method = "bray", binary = FALSE)  
adonis_bc <- adonis(bc_dist ~ limnion+filter +ProdLevel+ DO+temp + pH, data=environ); adonis_bc

## 
## Call:
## adonis(formula = bc_dist ~ limnion + filter + ProdLevel + DO +      temp + pH, data = environ) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## limnion    1    1.7475 1.74746  9.0680 0.15837  0.001 ***
## filter     1    1.4075 1.40755  7.3041 0.12757  0.001 ***
## ProdLevel  1    1.0334 1.03336  5.3623 0.09365  0.001 ***
## DO         1    0.4315 0.43152  2.2392 0.03911  0.010 ** 
## temp       1    0.3452 0.34521  1.7913 0.03129  0.056 .  
## pH         1    0.2876 0.28757  1.4922 0.02606  0.110    
## Residuals 30    5.7812 0.19271         0.52395           
## Total     36   11.0339                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Sørensen Distance --> Tests OTU presence/absence 
soren_dist <- vegdist(data.frame(nosherwinOTU), method = "bray", binary = TRUE)  
adonis_soren <- adonis(soren_dist ~ limnion+filter +ProdLevel+ DO+temp + pH, data=environ); adonis_soren

## 
## Call:
## adonis(formula = soren_dist ~ limnion + filter + ProdLevel +      DO + temp + pH, data = environ) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## limnion    1    1.2078 1.20782  6.6377 0.13291  0.001 ***
## filter     1    0.3956 0.39557  2.1739 0.04353  0.013 *  
## ProdLevel  1    1.0405 1.04046  5.7180 0.11449  0.001 ***
## DO         1    0.4885 0.48847  2.6844 0.05375  0.003 ** 
## temp       1    0.2473 0.24729  1.3590 0.02721  0.115    
## pH         1    0.2493 0.24931  1.3701 0.02743  0.122    
## Residuals 30    5.4589 0.18196         0.60068           
## Total     36    9.0878                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Particle-Associated!
part <- subset_samples(nosherwin_FWDB_scaled, filter == "Particle")
partOTU <- otu_table(part)
set.seed(15879966)
part_env <- subset(environ, filter == "Particle")  #Load Environmental Data
## Bray-Curtis Distance --> OTU relative abundance
part_BC <- vegdist(partOTU, method = "bray", binary = FALSE)  
part_adon_bc <- adonis(part_BC ~ limnion+ProdLevel+ DO + temp + pH, data=part_env); part_adon_bc

## 
## Call:
## adonis(formula = part_BC ~ limnion + ProdLevel + DO + temp +      pH, data = part_env) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## limnion    1    0.9511 0.95111  3.9073 0.18067  0.002 **
## ProdLevel  1    0.6783 0.67830  2.7865 0.12885  0.007 **
## DO         1    0.3058 0.30582  1.2564 0.05809  0.227   
## temp       1    0.2206 0.22059  0.9062 0.04190  0.592   
## pH         1    0.1874 0.18739  0.7698 0.03560  0.654   
## Residuals 12    2.9210 0.24342         0.55488          
## Total     17    5.2643                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Sørensen Distance --> Tests OTU presence/absence 
part_soren <- vegdist(data.frame(partOTU), method = "bray", binary = TRUE)  
part_adon_soren <- adonis(part_soren ~ limnion+ProdLevel+ DO + temp + pH, data=part_env); part_adon_soren

## 
## Call:
## adonis(formula = part_soren ~ limnion + ProdLevel + DO + temp +      pH, data = part_env) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## limnion    1    0.6181 0.61808  3.2661 0.15065  0.002 **
## ProdLevel  1    0.6066 0.60660  3.2054 0.14785  0.005 **
## DO         1    0.2746 0.27457  1.4509 0.06692  0.102   
## temp       1    0.1716 0.17159  0.9067 0.04182  0.589   
## pH         1    0.1611 0.16106  0.8511 0.03926  0.604   
## Residuals 12    2.2709 0.18924         0.55350          
## Total     17    4.1028                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# FREE-LIVING!!!
free <- subset_samples(nosherwin_FWDB_scaled, filter == "Free")
set.seed(3)
freeOTU <- otu_table(free)
free_env <- subset(environ, filter == "Free")  #Load Environmental Data
## Bray-Curtis Distance --> OTU relative abundance
free_BC <- vegdist(freeOTU, method = "bray", binary = FALSE)  
free_adon_bc <- adonis(free_BC ~ limnion+ProdLevel+ DO + temp + pH, data=free_env); free_adon_bc

## 
## Call:
## adonis(formula = free_BC ~ limnion + ProdLevel + DO + temp +      pH, data = free_env) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## limnion    1    1.0207 1.02074  5.8396 0.23353  0.001 ***
## ProdLevel  1    0.4824 0.48237  2.7596 0.11036  0.006 ** 
## DO         1    0.2471 0.24706  1.4134 0.05652  0.155    
## temp       1    0.1863 0.18626  1.0656 0.04261  0.375    
## pH         1    0.1622 0.16216  0.9277 0.03710  0.498    
## Residuals 13    2.2723 0.17480         0.51988           
## Total     18    4.3709                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Sørensen Distance --> Tests OTU presence/absence 
free_soren <- vegdist(data.frame(freeOTU), method = "bray", binary = TRUE)  
free_adon_soren <- adonis(free_soren ~ limnion+ProdLevel+ DO + temp + pH, data=free_env); free_adon_soren

## 
## Call:
## adonis(formula = free_soren ~ limnion + ProdLevel + DO + temp +      pH, data = free_env) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## limnion    1    0.7425 0.74255  3.6951 0.16182  0.001 ***
## ProdLevel  1    0.5605 0.56048  2.7891 0.12214  0.001 ***
## DO         1    0.3417 0.34171  1.7004 0.07447  0.046 *  
## temp       1    0.1564 0.15638  0.7782 0.03408  0.825    
## pH         1    0.1753 0.17527  0.8722 0.03820  0.558    
## Residuals 13    2.6124 0.20095         0.56930           
## Total     18    4.5888                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Epilimnion!
epi <- subset_samples(nosherwin_FWDB_scaled, limnion == "Epilimnion")
epiOTU <- otu_table(epi)
set.seed(15879966)
epi_env <- subset(environ, limnion == "Epilimnion")  #Load Environmental Data
## Bray-Curtis Distance --> OTU relative abundance
epi_BC <- vegdist(epiOTU, method = "bray", binary = FALSE)
epi_adon_bc <- adonis(epi_BC ~ filter+ProdLevel+ DO + temp + pH, data=epi_env); epi_adon_bc

## 
## Call:
## adonis(formula = epi_BC ~ filter + ProdLevel + DO + temp + pH,      data = epi_env) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## filter     1    0.8092 0.80923  4.5733 0.18180  0.001 ***
## ProdLevel  1    0.4387 0.43866  2.4790 0.09855  0.013 *  
## DO         1    0.3191 0.31908  1.8032 0.07168  0.060 .  
## temp       1    0.1882 0.18821  1.0636 0.04228  0.380    
## pH         1    0.3957 0.39571  2.2363 0.08890  0.024 *  
## Residuals 13    2.3003 0.17695         0.51679           
## Total     18    4.4512                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Sørensen Distance --> Tests OTU presence/absence 
epi_soren <- vegdist(data.frame(epiOTU), method = "bray", binary = TRUE)  
epi_adon_soren <- adonis(epi_soren ~ filter+ProdLevel+ DO + temp + pH, data=epi_env); epi_adon_soren

## 
## Call:
## adonis(formula = epi_soren ~ filter + ProdLevel + DO + temp +      pH, data = epi_env) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## filter     1    0.2576 0.25760  1.4201 0.06543  0.037 *  
## ProdLevel  1    0.4403 0.44031  2.4273 0.11184  0.001 ***
## DO         1    0.3222 0.32222  1.7763 0.08184  0.007 ** 
## temp       1    0.2520 0.25202  1.3893 0.06401  0.054 .  
## pH         1    0.3068 0.30679  1.6912 0.07792  0.006 ** 
## Residuals 13    2.3582 0.18140         0.59896           
## Total     18    3.9371                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# hypolimnion!
hypo <- subset_samples(nosherwin_FWDB_scaled, limnion == "Hypolimnion")
hypoOTU <- otu_table(hypo)
set.seed(15879966)
hypo_env <- subset(environ, limnion == "Hypolimnion") #Load Environmental Data
## Bray-Curtis Distance --> OTU relative abundance
hypo_BC <- vegdist(hypoOTU, method = "bray", binary = FALSE)
hypo_adon_bc <- adonis(hypo_BC ~ filter+ProdLevel+DO + temp + pH, data=hypo_env); hypo_adon_bc

## 
## Call:
## adonis(formula = hypo_BC ~ filter + ProdLevel + DO + temp + pH,      data = hypo_env) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## filter     1    0.8138 0.81384  6.2042 0.16831  0.001 ***
## ProdLevel  1    1.5496 1.54960 11.8133 0.32048  0.001 ***
## DO         1    0.3124 0.31239  2.3815 0.06461  0.027 *  
## temp       1    0.3469 0.34695  2.6449 0.07175  0.008 ** 
## pH         1    0.2383 0.23833  1.8169 0.04929  0.081 .  
## Residuals 12    1.5741 0.13117         0.32555           
## Total     17    4.8352                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Sørensen Distance --> Tests OTU presence/absence 
hypo_soren <- vegdist(data.frame(hypoOTU), method = "bray", binary = TRUE)  
hypo_adon_soren <- adonis(hypo_soren ~ filter+ProdLevel+DO + temp + pH, data=hypo_env); hypo_adon_soren

## 
## Call:
## adonis(formula = hypo_soren ~ filter + ProdLevel + DO + temp +      pH, data = hypo_env) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## filter     1    0.2914 0.29143  2.2133 0.07391  0.031 *  
## ProdLevel  1    1.2793 1.27933  9.7161 0.32446  0.001 ***
## DO         1    0.2982 0.29815  2.2644 0.07562  0.022 *  
## temp       1    0.2520 0.25200  1.9139 0.06391  0.048 *  
## pH         1    0.2419 0.24192  1.8373 0.06136  0.056 .  
## Residuals 12    1.5801 0.13167         0.40074           
## Total     17    3.9429                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Oligotrophic!!
oligo <- subset_samples(nosherwin_FWDB_scaled, trophicstate == "Oligotrophic") 
oligoOTU <- otu_table(oligo)
set.seed(15879966)
oligo_env <- subset(environ, trophicstate == "Oligotrophic") #Load Environmental Data
## Bray-Curtis Distance --> OTU relative abundance
oligo_BC <- vegdist(oligoOTU, method = "bray", binary = FALSE)
oligo_adon_bc <- adonis(oligo_BC ~ limnion+filter+DO + temp + pH, data=oligo_env); oligo_adon_bc

## 
## Call:
## adonis(formula = oligo_BC ~ limnion + filter + DO + temp + pH,      data = oligo_env) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## limnion    1    0.7302 0.73015  4.3187 0.18873  0.001 ***
## filter     1    0.6656 0.66555  3.9366 0.17203  0.002 ** 
## DO         1    0.3199 0.31986  1.8919 0.08268  0.048 *  
## temp       1    0.3413 0.34128  2.0186 0.08821  0.027 *  
## pH         1    0.2903 0.29032  1.7172 0.07504  0.063 .  
## Residuals  9    1.5216 0.16907         0.39331           
## Total     14    3.8688                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Sørensen Distance --> Tests OTU presence/absence 
oligo_soren <- vegdist(data.frame(oligoOTU), method = "bray", binary = TRUE)   
oligo_adon_soren <- adonis(oligo_soren ~ limnion+filter+DO + temp + pH, data=oligo_env); oligo_adon_soren

## 
## Call:
## adonis(formula = oligo_soren ~ limnion + filter + DO + temp +      pH, data = oligo_env) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## limnion    1   0.45172 0.45172  2.7101 0.14749  0.001 ***
## filter     1   0.25213 0.25213  1.5127 0.08232  0.040 *  
## DO         1   0.29544 0.29544  1.7725 0.09646  0.010 ** 
## temp       1   0.25909 0.25909  1.5544 0.08459  0.032 *  
## pH         1   0.30432 0.30432  1.8258 0.09936  0.007 ** 
## Residuals  9   1.50011 0.16668         0.48978           
## Total     14   3.06281                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# MESO + EUTROPHIC!!
prod <- subset_samples(nosherwin_FWDB_scaled, ProdLevel == "Productive")
prodOTU <- otu_table(prod)
set.seed(15879966)
prod_env <- subset(environ, ProdLevel == "Productive") #Load Environmental Data
## Bray-Curtis Distance --> Tests OTU relative abundance
prod_BC <- vegdist(prodOTU, method = "bray", binary = FALSE) 
prod_adon_bc <- adonis(prod_BC ~ limnion+filter+DO + temp + pH, data=prod_env); prod_adon_bc

## 
## Call:
## adonis(formula = prod_BC ~ limnion + filter + DO + temp + pH,      data = prod_env) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## limnion    1    2.0802 2.08018 13.2443 0.33340  0.001 ***
## filter     1    0.8607 0.86070  5.4800 0.13795  0.001 ***
## DO         1    0.3050 0.30504  1.9422 0.04889  0.043 *  
## temp       1    0.2161 0.21610  1.3759 0.03463  0.168    
## pH         1    0.2643 0.26435  1.6831 0.04237  0.109    
## Residuals 16    2.5130 0.15706         0.40277           
## Total     21    6.2394                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Sørensen Distance --> Tests OTU presence/absence 
prod_soren <- vegdist(data.frame(prodOTU), method = "bray", binary = TRUE)  
prod_adon_soren <- adonis(prod_soren ~ limnion+filter+DO + temp + pH, data=prod_env); prod_adon_soren

## 
## Call:
## adonis(formula = prod_soren ~ limnion + filter + DO + temp +      pH, data = prod_env) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## limnion    1    1.5264 1.52640  9.6322 0.30071  0.001 ***
## filter     1    0.2690 0.26902  1.6977 0.05300  0.072 .  
## DO         1    0.3218 0.32180  2.0307 0.06340  0.027 *  
## temp       1    0.2234 0.22336  1.4095 0.04400  0.151    
## pH         1    0.1999 0.19992  1.2616 0.03938  0.183    
## Residuals 16    2.5355 0.15847         0.49951           
## Total     21    5.0760                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Prepping for Figure 3: Bray-Curtis

#BC Distance 
nowinOTU <- as.matrix(otu_table(nowin_FWDB_scaled))
norm_bray <- vegdist(nowinOTU, method = "bray", binary = FALSE)  # calculates the Bray-Curtis Distances

bray <- melt(as.matrix(norm_bray), varnames = c("samp1", "samp2"))
bray <- subset(bray, value > 0)

bray$lakenames1 <- substr(bray$samp1,1,3) # Create a new column called lakenames1 with first 3 letters of string
bray$lakenames2 <- substr(bray$samp2,1,3) # Create a new column called lakenames2 with first 3 letters of string
bray$limnion1 <- substr(bray$samp1, 4, 4) # Create a column called limnon1 with hypo or epi
bray$limnion2 <- substr(bray$samp2, 4, 4) # Create a column called limnon2 with hypo or epi
bray$filter1 <- substr(bray$samp1, 5, 7)  # Create a column called filter1 with PA or FL
bray$filter2 <- substr(bray$samp2, 5, 7) # Create a column called filter2 with PA or FL


bray$lakenames1 <- as.character(bray$lakenames1)
bray$lakenames1[bray$lakenames1 == "WIN"] <- "Wintergreen"
bray$lakenames1[bray$lakenames1 == "SIX"] <- "Sixteen"
bray$lakenames1[bray$lakenames1 == "SHE"] <- "Sherman"
bray$lakenames1[bray$lakenames1 == "PAY"] <- "Payne"
bray$lakenames1[bray$lakenames1 == "LON"] <- "LittleLong"
bray$lakenames1[bray$lakenames1 == "LEE"] <- "Lee"
bray$lakenames1[bray$lakenames1 == "GUL"] <- "Gull"
bray$lakenames1[bray$lakenames1 == "BRI"] <- "Bristol"
bray$lakenames1[bray$lakenames1 == "BAK"] <- "Baker"
bray$lakenames1[bray$lakenames1 == "BAS"] <- "Baseline"
bray$lakenames1[bray$lakenames1 == "BST"] <- "Bassett"

bray$lakenames2 <- as.character(bray$lakenames2)
bray$lakenames2[bray$lakenames2 == "WIN"] <- "Wintergreen"
bray$lakenames2[bray$lakenames2 == "SIX"] <- "Sixteen"
bray$lakenames2[bray$lakenames2 == "SHE"] <- "Sherman"
bray$lakenames2[bray$lakenames2 == "PAY"] <- "Payne"
bray$lakenames2[bray$lakenames2 == "LON"] <- "LittleLong"
bray$lakenames2[bray$lakenames2 == "LEE"] <- "Lee"
bray$lakenames2[bray$lakenames2 == "GUL"] <- "Gull"
bray$lakenames2[bray$lakenames2 == "BRI"] <- "Bristol"
bray$lakenames2[bray$lakenames2 == "BAK"] <- "Baker"
bray$lakenames2[bray$lakenames2 == "BAS"] <- "Baseline"
bray$lakenames2[bray$lakenames2 == "BST"] <- "Bassett"



#Add Trophic State column by using the name of the lake
bray <- data.table(bray)
library(data.table)
bray[, trophicstate1 := ifelse(lakenames1 %in% c("Wintergreen", "Baker", "Baseline"), "Eutrophic",
                               ifelse(lakenames1 %in% c("Bassett", "Bristol", "Payne"), "Mesotrophic",
                                      ifelse(lakenames1 %in% c("Sherman"), "Mixed",
                                             ifelse(lakenames1 %in% c("Gull", "Sixteen", "LittleLong", "Lee"), 
                                                    "Oligotrophic", NA))))]
bray[, trophicstate2 := ifelse(lakenames2 %in% c("Wintergreen", "Baker", "Baseline"), "Eutrophic",
                               ifelse(lakenames2 %in% c("Bassett", "Bristol", "Payne"), "Mesotrophic",
                                      ifelse(lakenames2 %in% c("Sherman"), "Mixed",
                                             ifelse(lakenames2 %in% c("Gull", "Sixteen", "LittleLong", "Lee"), 
                                                    "Oligotrophic", NA))))]



bray$limnion1[bray$limnion1 == "E"] <- "Epilimnion"
bray$limnion1[bray$limnion1 == "H"] <- "Hypolimnion"
bray$limnion2[bray$limnion2 == "E"] <- "Epilimnion"
bray$limnion2[bray$limnion2 == "H"] <- "Hypolimnion"


###  Pull out the SHERMAN LAKE SAMPLES
bray$limnion1[bray$lakenames1 == "Sherman" & bray$limnion1 == "Epilimnion"] <- "Mixed"
bray$limnion1[bray$lakenames1 == "Sherman" & bray$limnion1 == "Hypolimnion"] <- "Mixed"
bray$limnion2[bray$lakenames2 == "Sherman" & bray$limnion2 == "Epilimnion"] <- "Mixed"
bray$limnion2[bray$lakenames2 == "Sherman" & bray$limnion2 == "Hypolimnion"] <- "Mixed"


bray$filter1[bray$filter1 == "3um"] <- "Particle"
bray$filter1[bray$filter1 == ""] <- "Free"
bray$filter2[bray$filter2 == "3um"] <- "Particle"
bray$filter2[bray$filter2 == ""] <- "Free"

#Add the combined lake layer
bray$combined<-rep(NA,length(bray$limnion1))
bray$combined[bray$limnion1=="Epilimnion"&bray$limnion2=="Epilimnion"]<-"Epilimnion"
bray$combined[bray$limnion1=="Hypolimnion"&bray$limnion2=="Hypolimnion"]<-"Hypolimnion"
bray$combined[bray$limnion1=="Hypolimnion"&bray$limnion2=="Epilimnion"]<-"EH"
bray$combined[bray$limnion1=="Epilimnion"&bray$limnion2=="Hypolimnion"]<-"EH"

bray$combined[bray$limnion1=="Mixed"&bray$limnion2=="Mixed"]<-"Mixed"
bray$combined[bray$limnion1=="Mixed"&bray$limnion2=="Hypolimnion"]<-"Mixed Hypolimnion"
bray$combined[bray$limnion1=="Hypolimnion"&bray$limnion2=="Mixed"]<-"Mixed Hypolimnion"
bray$combined[bray$limnion1=="Epilimnion"&bray$limnion2=="Mixed"]<-"Mixed Epilimnion"
bray$combined[bray$limnion1=="Mixed"&bray$limnion2=="Epilimnion"]<-"Mixed Epilimnion"


##  Add the commbined filter.
bray$filt_comb<-rep(NA,length(bray$filter1))
bray$filt_comb[bray$filter1=="Free"&bray$filter2=="Free"]<-"Free"
bray$filt_comb[bray$filter1=="Particle"&bray$filter2=="Particle"]<-"Particle"
bray$filt_comb[bray$filter1=="Particle"&bray$filter2=="Free"]<-"PF"
bray$filt_comb[bray$filter1=="Free"&bray$filter2=="Particle"]<-"PF"

#creating a column for each combination of top bottom particle free
for(i in 1:length(bray$limnion1)){
  bray$cat[i]<-paste(as.character(bray$filt_comb[i]),as.character(bray$combined[i]))}


#  Add for trophicstate combintions
bray$troph_comb<-rep(NA,length(bray$lakenames2))
bray$troph_comb[bray$trophicstate1=="Eutrophic" & bray$trophicstate2=="Eutrophic"]<-"Eutrophic"
bray$troph_comb[bray$trophicstate1=="Eutrophic" & bray$trophicstate2=="Mesotrophic"]<-"Eutrophic-Mesotrophic"
bray$troph_comb[bray$trophicstate1=="Mesotrophic" & bray$trophicstate2=="Eutrophic"]<-"Eutrophic-Mesotrophic"
bray$troph_comb[bray$trophicstate1=="Eutrophic" & bray$trophicstate2=="Oligotrophic"]<-"Eutrophic-Oligotrophic"
bray$troph_comb[bray$trophicstate1=="Oligotrophic" & bray$trophicstate2=="Eutrophic"]<-"Eutrophic-Oligotrophic"
bray$troph_comb[bray$trophicstate1=="Eutrophic" & bray$trophicstate2=="Mixed"]<-"Eutrophic-Mixed"
bray$troph_comb[bray$trophicstate1=="Mixed" & bray$trophicstate2=="Eutrophic"]<-"Eutrophic-Mixed"

bray$troph_comb[bray$trophicstate1=="Mesotrophic" & bray$trophicstate2=="Mesotrophic"]<-"Mesotrophic"
bray$troph_comb[bray$trophicstate1=="Mesotrophic" & 
                  bray$trophicstate2=="Oligotrophic"]<-"Mesotrophic-Oligotrophic"
bray$troph_comb[bray$trophicstate1=="Oligotrophic" & 
                  bray$trophicstate2=="Mesotrophic"]<-"Mesotrophic-Oligotrophic"
bray$troph_comb[bray$trophicstate1=="Mesotrophic" & bray$trophicstate2=="Mixed"]<-"Mesotrophic-Mixed"
bray$troph_comb[bray$trophicstate1=="Mixed" & bray$trophicstate2=="Mesotrophic"]<-"Mesotrophic-Mixed"

bray$troph_comb[bray$trophicstate1=="Oligotrophic" & bray$trophicstate2=="Oligotrophic"]<-"Oligotrophic"
bray$troph_comb[bray$trophicstate1=="Mixed" & bray$trophicstate2=="Oligotrophic"]<-"Oligotrophic-Mixed"
bray$troph_comb[bray$trophicstate1=="Oligotrophic" & bray$trophicstate2=="Mixed"]<-"Oligotrophic-Mixed"

bray$troph_comb[bray$trophicstate1=="Mixed" & bray$trophicstate2=="Mixed"]<-"Mixed"

##  Add combined trophicstate, filter, and limnion 
for(i in 1:length(bray$limnion1)){
  bray$troph_lim1[i]<-paste(as.character(bray$trophicstate1[i]),
                            as.character(bray$limnion1[i]),
                            as.character(bray$filter1[i]))}
for(i in 1:length(bray$limnion2)){
  bray$troph_lim2[i]<-paste(as.character(bray$trophicstate2[i]),
                            as.character(bray$limnion2[i]),
                            as.character(bray$filter2[i]))}

bray$samp1 <- as.character(bray$samp1)
bray$samp2 <- as.character(bray$samp2)

#creating a column for each combination of top bottom particle free
for(i in 1:length(bray$limnion1)){
  bray$cat[i]<-paste(as.character(bray$filt_comb[i]),as.character(bray$combined[i]))}

###  Subsetting out the samples that are in the same categories as each other
beta <- subset(bray, troph_lim1 == troph_lim2)

### Put everything in the order we would like.
beta$trophicstate1 <-factor(beta$trophicstate1,
                            levels=c("Eutrophic", "Mesotrophic", "Oligotrophic", "Mixed"))

beta$trophicstate2 <-factor(beta$trophicstate2,
                            levels=c("Eutrophic", "Mesotrophic", "Oligotrophic", "Mixed"))

beta$troph_lim2 <-factor(beta$troph_lim2,
                         levels=c("Eutrophic Epilimnion Particle", "Eutrophic Epilimnion Free", "Eutrophic Hypolimnion Particle", "Eutrophic Hypolimnion Free","Mesotrophic Epilimnion Particle", "Mesotrophic Epilimnion Free", "Mesotrophic Hypolimnion Particle", "Mesotrophic Hypolimnion Free","Oligotrophic Epilimnion Particle", "Oligotrophic Epilimnion Free", "Oligotrophic Hypolimnion Particle", "Oligotrophic Hypolimnion Free","Mixed Mixed Particle", "Mixed Mixed Free"))

#### Subsetting out the mixed lake!
nomix_beta <- subset(beta, trophicstate1 != "Mixed")
nomix_beta2 <- subset(nomix_beta, trophicstate2 != "Mixed")

prod_beta <- nomix_beta2

prod_beta$troph_lim1 <- as.character(prod_beta$troph_lim1)
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Eutrophic Epilimnion Free" | 
                       prod_beta$troph_lim1 =="Mesotrophic Epilimnion Free"] <- "Productive Epilimnion Free"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Eutrophic Epilimnion Particle" | 
                       prod_beta$troph_lim1 =="Mesotrophic Epilimnion Particle"] <- "Productive Epilimnion Particle"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Eutrophic Hypolimnion Free" | 
                       prod_beta$troph_lim1 =="Mesotrophic Hypolimnion Free"] <- "Productive Hypolimnion Free"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Eutrophic Hypolimnion Particle" | 
                       prod_beta$troph_lim1 =="Mesotrophic Hypolimnion Particle"] <- "Productive Hypolimnion Particle"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Oligotrophic Epilimnion Free"] <- "Unproductive Epilimnion Free"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Oligotrophic Epilimnion Particle"] <- "Unproductive Epilimnion Particle"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Oligotrophic Hypolimnion Free"] <- "Unproductive Hypolimnion Free"
prod_beta$troph_lim1[prod_beta$troph_lim1 == "Oligotrophic Hypolimnion Particle"] <- "Unproductive Hypolimnion Particle"
prod_beta$trophicstate1 <- as.character(prod_beta$trophicstate1)
prod_beta$trophicstate1[prod_beta$trophicstate1 == "Eutrophic"] <- "Productive"
prod_beta$trophicstate1[prod_beta$trophicstate1 == "Mesotrophic"] <- "Productive"
prod_beta$trophicstate1[prod_beta$trophicstate1 == "Oligotrophic"] <- "Unproductive"


prod_beta <- prod_beta %>% 
  group_by(troph_lim1) %>%
  mutate(mean.bray = mean(value),
         sd.bray = sd(value))


############ Kruskal-Wallis Rank Sum Test: Bray-Curtis
#hist(prod_beta$value, breaks = 30)  # Not normally distributed
prod_beta$value <- as.numeric(prod_beta$value)
prod_beta$troph_lim1 <- as.factor(prod_beta$troph_lim1)
prod_bray_beta <- prod_beta
## Run the Kruskal Wallis test 
bray_prod_KW <- kruskal.test(prod_bray_beta$value ~ prod_bray_beta$troph_lim1) 
print(bray_prod_KW)  # show Kruskal Wallis result

## 
##  Kruskal-Wallis rank sum test
## 
## data:  prod_bray_beta$value by prod_bray_beta$troph_lim1
## Kruskal-Wallis chi-squared = 32.341, df = 7, p-value = 0.00003511

### Which samples are significantly different from each other?  
bray_prod_KW_MC <- kruskalmc(prod_bray_beta$value ~ prod_bray_beta$troph_lim1)  ## Defaults to P < 0.05
#print(bray_prod_KW_MC)
### Test to find the letters to represent significance in a plot
bray_test <- bray_prod_KW_MC$dif.com$difference # select logical vector
names(bray_test) <- row.names(bray_prod_KW_MC$dif.com) # add comparison names
# create a list with "homogenous groups" coded by letter
bray_letters <- multcompLetters(bray_test, compare="<", threshold=0.05, 
                                Letters=c(letters, LETTERS, "."), reversed = FALSE)
###  Extract the values from the multcompLetters object
bray_sigs_dataframe <-  data.frame(as.vector(names(bray_letters$Letters)), as.vector(bray_letters$Letters))
colnames(bray_sigs_dataframe) <- c("troph_lim1", "siglabel")
bray_try <- merge(prod_beta, bray_sigs_dataframe, by = "troph_lim1")


bray_try$troph_lim1 <-factor(bray_try$troph_lim1,
                             levels=c( "Productive Epilimnion Free", "Productive Epilimnion Particle", "Productive Hypolimnion Free","Productive Hypolimnion Particle","Unproductive Epilimnion Free", "Unproductive Epilimnion Particle", "Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle"))


bray_sigs <- bray_try %>%
  select(trophicstate1, troph_lim1, siglabel) %>%
  distinct()

bray_boxplot <- ggplot(bray_try, aes(x = troph_lim1, y = value, fill = troph_lim1)) + geom_point(size = 2) +
  geom_boxplot() +
  geom_text(data = bray_sigs, aes(label = siglabel, x = troph_lim1, y = 0.92), size =3) +
  scale_fill_manual(name = "", limits=c("Productive Epilimnion Particle", "Productive Epilimnion Free", 
                                        "Productive Hypolimnion Particle", "Productive Hypolimnion Free",
                                         "Unproductive Epilimnion Particle", "Unproductive Epilimnion Free", 
                                        "Unproductive Hypolimnion Particle", "Unproductive Hypolimnion Free"), 
                     values = c("royalblue4", "royalblue4", "royalblue4", "royalblue4",
                                "cornflowerblue","cornflowerblue","cornflowerblue","cornflowerblue"))+
  scale_x_discrete(breaks=c("Productive Epilimnion Free", "Productive Epilimnion Particle", 
                            "Productive Hypolimnion Free","Productive Hypolimnion Particle",
                            "Unproductive Epilimnion Free", "Unproductive Epilimnion Particle",
                            "Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle"),
                   labels=c("Epilimnion \nFree-Living (12)", "Epilimnion \nParticle-Associated (12)", 
                            "Hypolimnion \nFree-Living (8)", "Hypolimnion \nParticle-Associated (8)",
                            "Epilimnion \nFree-Living (12)", "Epilimnion \nParticle-Associated (6)", 
                            "Hypolimnion \nFree-Living (12)", "Hypolimnion \nParticle-Associated (12)")) + 
  xlab("Habitat") + ylab("Bray-Curtis \nDissimilarity") + theme_bw() + #scale_fill_brewer(palette="Paired") + 
  facet_grid(. ~ trophicstate1, scale = "free", space = "free") +
  theme(plot.title = element_text(face="bold", size = 12),  #Set the plot title
                                 strip.text.x = element_blank(),  #Set the facet titles on x-axis 
                                 strip.text.y = element_blank(),  #Set the facet titles on x-axis 
                                 strip.background = element_blank(),  #Set the facet background to no background
                                 axis.title.x = element_text(face="bold", size=10),  #Set the x-axis title
                                 axis.title.y = element_text(face="bold", size=10),  #Set the y-axis title
                                 axis.text.x = element_text(angle=30, colour = "black", 
                                                            vjust=1, hjust = 1, size=8),  #Set the x-axis labels
                                 axis.text.y = element_text(colour = "black", size=8),  #Set the y-axis labels
                                 legend.title = element_text(size = 8, face="bold"),  #Set the legend title 
                                 legend.text = element_text(size = 8),  #Set the legend text
                                 legend.position="none", #Default the legend position to the right
                                 plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"));  #top, right, bottom, left

Prepping for Figure 3: Sorensen

##  Calculate the sorensen dissimiliarity matrix 
nowinOTU_df <- data.frame(otu_table(nowin_FWDB_scaled))  
norm_soren <- vegdist(nowinOTU_df, method = "bray", binary = TRUE)  # SORENSEN INDEX
###  Melt all dissimilarities into one data frame
soren <- melt(as.matrix(norm_soren), varnames = c("samp1", "samp2"))
soren <- subset(soren, value > 0)

soren$lakenames1 <- substr(soren$samp1,1,3) # Create a new row called "lakenames" with first 3 letters of string
soren$lakenames2 <- substr(soren$samp2,1,3) # Create a new row called "lakenames" with first 3 letters of string
soren$limnion1 <- substr(soren$samp1, 4, 4) # Create a column called limnon with hypo or epi
soren$limnion2 <- substr(soren$samp2, 4, 4) # Create a column called limnon with hypo or epi
soren$filter1 <- substr(soren$samp1, 5, 7) 
soren$filter2 <- substr(soren$samp2, 5, 7) 


soren$lakenames1 <- as.character(soren$lakenames1)
soren$lakenames1[soren$lakenames1 == "WIN"] <- "Wintergreen"
soren$lakenames1[soren$lakenames1 == "SIX"] <- "Sixteen"
soren$lakenames1[soren$lakenames1 == "SHE"] <- "Sherman"
soren$lakenames1[soren$lakenames1 == "PAY"] <- "Payne"
soren$lakenames1[soren$lakenames1 == "LON"] <- "LittleLong"
soren$lakenames1[soren$lakenames1 == "LEE"] <- "Lee"
soren$lakenames1[soren$lakenames1 == "GUL"] <- "Gull"
soren$lakenames1[soren$lakenames1 == "BRI"] <- "Bristol"
soren$lakenames1[soren$lakenames1 == "BAK"] <- "Baker"
soren$lakenames1[soren$lakenames1 == "BAS"] <- "Baseline"
soren$lakenames1[soren$lakenames1 == "BST"] <- "Bassett"

soren$lakenames2 <- as.character(soren$lakenames2)
soren$lakenames2[soren$lakenames2 == "WIN"] <- "Wintergreen"
soren$lakenames2[soren$lakenames2 == "SIX"] <- "Sixteen"
soren$lakenames2[soren$lakenames2 == "SHE"] <- "Sherman"
soren$lakenames2[soren$lakenames2 == "PAY"] <- "Payne"
soren$lakenames2[soren$lakenames2 == "LON"] <- "LittleLong"
soren$lakenames2[soren$lakenames2 == "LEE"] <- "Lee"
soren$lakenames2[soren$lakenames2 == "GUL"] <- "Gull"
soren$lakenames2[soren$lakenames2 == "BRI"] <- "Bristol"
soren$lakenames2[soren$lakenames2 == "BAK"] <- "Baker"
soren$lakenames2[soren$lakenames2 == "BAS"] <- "Baseline"
soren$lakenames2[soren$lakenames2 == "BST"] <- "Bassett"



#Add Trophic State column by using the name of the lake
soren <- data.table(soren)
library(data.table)
soren[, trophicstate1 := ifelse(lakenames1 %in% c("Wintergreen", "Baker", "Baseline"), "Eutrophic",
                                ifelse(lakenames1 %in% c("Bassett", "Bristol", "Payne"), "Mesotrophic",
                                       ifelse(lakenames1 %in% c("Sherman"), "Mixed",
                                              ifelse(lakenames1 %in% c("Gull", "Sixteen", "LittleLong", "Lee"), 
                                                     "Oligotrophic", NA))))]
soren[, trophicstate2 := ifelse(lakenames2 %in% c("Wintergreen", "Baker", "Baseline"), "Eutrophic",
                                ifelse(lakenames2 %in% c("Bassett", "Bristol", "Payne"), "Mesotrophic",
                                       ifelse(lakenames2 %in% c("Sherman"), "Mixed",
                                              ifelse(lakenames2 %in% c("Gull", "Sixteen", "LittleLong", "Lee"),
                                                     "Oligotrophic", NA))))]

soren$limnion1[soren$limnion1 == "E"] <- "Epilimnion"
soren$limnion1[soren$limnion1 == "H"] <- "Hypolimnion"
soren$limnion2[soren$limnion2 == "E"] <- "Epilimnion"
soren$limnion2[soren$limnion2 == "H"] <- "Hypolimnion"


###  Pull out the SHERMAN LAKE SAMPLES
soren$limnion1[soren$lakenames1 == "Sherman" & soren$limnion1 == "Epilimnion"] <- "Mixed"
soren$limnion1[soren$lakenames1 == "Sherman" & soren$limnion1 == "Hypolimnion"] <- "Mixed"
soren$limnion2[soren$lakenames2 == "Sherman" & soren$limnion2 == "Epilimnion"] <- "Mixed"
soren$limnion2[soren$lakenames2 == "Sherman" & soren$limnion2 == "Hypolimnion"] <- "Mixed"


soren$filter1[soren$filter1 == "3um"] <- "Particle"
soren$filter1[soren$filter1 == ""] <- "Free"
soren$filter2[soren$filter2 == "3um"] <- "Particle"
soren$filter2[soren$filter2 == ""] <- "Free"

#Add the combined lake layer
soren$combined<-rep(NA,length(soren$limnion1))
soren$combined[soren$limnion1=="Epilimnion"&soren$limnion2=="Epilimnion"]<-"Epilimnion"
soren$combined[soren$limnion1=="Hypolimnion"&soren$limnion2=="Hypolimnion"]<-"Hypolimnion"
soren$combined[soren$limnion1=="Hypolimnion"&soren$limnion2=="Epilimnion"]<-"EH"
soren$combined[soren$limnion1=="Epilimnion"&soren$limnion2=="Hypolimnion"]<-"EH"

soren$combined[soren$limnion1=="Mixed"&soren$limnion2=="Mixed"]<-"Mixed"
soren$combined[soren$limnion1=="Mixed"&soren$limnion2=="Hypolimnion"]<-"Mixed Hypolimnion"
soren$combined[soren$limnion1=="Hypolimnion"&soren$limnion2=="Mixed"]<-"Mixed Hypolimnion"
soren$combined[soren$limnion1=="Epilimnion"&soren$limnion2=="Mixed"]<-"Mixed Epilimnion"
soren$combined[soren$limnion1=="Mixed"&soren$limnion2=="Epilimnion"]<-"Mixed Epilimnion"


##  Add the commbined filter.
soren$filt_comb<-rep(NA,length(soren$filter1))
soren$filt_comb[soren$filter1=="Free"&soren$filter2=="Free"]<-"Free"
soren$filt_comb[soren$filter1=="Particle"&soren$filter2=="Particle"]<-"Particle"
soren$filt_comb[soren$filter1=="Particle"&soren$filter2=="Free"]<-"PF"
soren$filt_comb[soren$filter1=="Free"&soren$filter2=="Particle"]<-"PF"

#creating a column for each combination of top bottom particle free
for(i in 1:length(soren$limnion1)){
  soren$cat[i]<-paste(as.character(soren$filt_comb[i]),as.character(soren$combined[i]))}


#  Add for trophicstate combintions
soren$troph_comb<-rep(NA,length(soren$lakenames2))
soren$troph_comb[soren$trophicstate1=="Eutrophic" & soren$trophicstate2=="Eutrophic"]<-"Eutrophic"
soren$troph_comb[soren$trophicstate1=="Eutrophic" & soren$trophicstate2=="Mesotrophic"]<-"Eutrophic-Mesotrophic"
soren$troph_comb[soren$trophicstate1=="Mesotrophic" & soren$trophicstate2=="Eutrophic"]<-"Eutrophic-Mesotrophic"
soren$troph_comb[soren$trophicstate1=="Eutrophic" & 
                   soren$trophicstate2=="Oligotrophic"]<-"Eutrophic-Oligotrophic"
soren$troph_comb[soren$trophicstate1=="Oligotrophic" &
                   soren$trophicstate2=="Eutrophic"]<-"Eutrophic-Oligotrophic"
soren$troph_comb[soren$trophicstate1=="Eutrophic" & soren$trophicstate2=="Mixed"]<-"Eutrophic-Mixed"
soren$troph_comb[soren$trophicstate1=="Mixed" & soren$trophicstate2=="Eutrophic"]<-"Eutrophic-Mixed"

soren$troph_comb[soren$trophicstate1=="Mesotrophic" & soren$trophicstate2=="Mesotrophic"]<-"Mesotrophic"
soren$troph_comb[soren$trophicstate1=="Mesotrophic" & 
                   soren$trophicstate2=="Oligotrophic"]<-"Mesotrophic-Oligotrophic"
soren$troph_comb[soren$trophicstate1=="Oligotrophic" & 
                   soren$trophicstate2=="Mesotrophic"]<-"Mesotrophic-Oligotrophic"
soren$troph_comb[soren$trophicstate1=="Mesotrophic" & soren$trophicstate2=="Mixed"]<-"Mesotrophic-Mixed"
soren$troph_comb[soren$trophicstate1=="Mixed" & soren$trophicstate2=="Mesotrophic"]<-"Mesotrophic-Mixed"

soren$troph_comb[soren$trophicstate1=="Oligotrophic" & soren$trophicstate2=="Oligotrophic"]<-"Oligotrophic"
soren$troph_comb[soren$trophicstate1=="Mixed" & soren$trophicstate2=="Oligotrophic"]<-"Oligotrophic-Mixed"
soren$troph_comb[soren$trophicstate1=="Oligotrophic" & soren$trophicstate2=="Mixed"]<-"Oligotrophic-Mixed"

soren$troph_comb[soren$trophicstate1=="Mixed" & soren$trophicstate2=="Mixed"]<-"Mixed"

##  Add combined trophicstate, filter, and limnion 
for(i in 1:length(soren$limnion1)){
  soren$troph_lim1[i]<-paste(as.character(soren$trophicstate1[i]),
                             as.character(soren$limnion1[i]),
                             as.character(soren$filter1[i]))}
for(i in 1:length(soren$limnion2)){
  soren$troph_lim2[i]<-paste(as.character(soren$trophicstate2[i]),
                             as.character(soren$limnion2[i]),
                             as.character(soren$filter2[i]))}

soren$samp1 <- as.character(soren$samp1)
soren$samp2 <- as.character(soren$samp2)

#creating a column for each combination of top bottom particle free
for(i in 1:length(soren$limnion1)){
  soren$cat[i]<-paste(as.character(soren$filt_comb[i]),as.character(soren$combined[i]))}

###  Subsetting out the samples that are in the same categories as each other
soren_beta <- subset(soren, troph_lim1 == troph_lim2)

### Put everything in the order we would like.
soren_beta$trophicstate1 <-factor(soren_beta$trophicstate1,
                                  levels=c("Eutrophic", "Mesotrophic", "Oligotrophic", "Mixed"))

soren_beta$trophicstate2 <-factor(soren_beta$trophicstate2,
                                  levels=c("Eutrophic", "Mesotrophic", "Oligotrophic", "Mixed"))

soren_beta$troph_lim2 <-factor(soren_beta$troph_lim2,
                               levels=c("Eutrophic Epilimnion Particle", "Eutrophic Epilimnion Free", "Eutrophic Hypolimnion Particle", "Eutrophic Hypolimnion Free","Mesotrophic Epilimnion Particle", "Mesotrophic Epilimnion Free", "Mesotrophic Hypolimnion Particle", "Mesotrophic Hypolimnion Free","Oligotrophic Epilimnion Particle", "Oligotrophic Epilimnion Free", "Oligotrophic Hypolimnion Particle", "Oligotrophic Hypolimnion Free","Mixed Mixed Particle", "Mixed Mixed Free"))

#### Subsetting out the mixed lake!
nomix_soren_beta <- subset(soren_beta, trophicstate1 != "Mixed")
nomix_soren_beta2 <- subset(nomix_soren_beta, trophicstate2 != "Mixed")


prod_soren <- nomix_soren_beta2

prod_soren$troph_lim1 <- as.character(prod_soren$troph_lim1)
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Eutrophic Epilimnion Free" | 
                        prod_soren$troph_lim1 =="Mesotrophic Epilimnion Free"] <- "Productive Epilimnion Free"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Eutrophic Epilimnion Particle" | 
                        prod_soren$troph_lim1 =="Mesotrophic Epilimnion Particle"] <- "Productive Epilimnion Particle"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Eutrophic Hypolimnion Free" | 
                        prod_soren$troph_lim1 =="Mesotrophic Hypolimnion Free"] <- "Productive Hypolimnion Free"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Eutrophic Hypolimnion Particle" | 
                        prod_soren$troph_lim1 =="Mesotrophic Hypolimnion Particle"] <- "Productive Hypolimnion Particle"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Oligotrophic Epilimnion Free"] <- "Unproductive Epilimnion Free"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Oligotrophic Epilimnion Particle"] <- "Unproductive Epilimnion Particle"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Oligotrophic Hypolimnion Free"] <- "Unproductive Hypolimnion Free"
prod_soren$troph_lim1[prod_soren$troph_lim1 == "Oligotrophic Hypolimnion Particle"] <- "Unproductive Hypolimnion Particle"
prod_soren$trophicstate1 <- as.character(prod_soren$trophicstate1)
prod_soren$trophicstate1[prod_soren$trophicstate1 == "Eutrophic"] <- "High-Nutrient"
prod_soren$trophicstate1[prod_soren$trophicstate1 == "Mesotrophic"] <- "High-Nutrient"
prod_soren$trophicstate1[prod_soren$trophicstate1 == "Oligotrophic"] <- "Low-Nutrient"

prod_soren <- prod_soren %>% 
  group_by(troph_lim1) %>%
  mutate(mean.soren = mean(value),
         sd.soren = sd(value))

############ Kruskal-Wallis Rank Sum Test: Sørensen
####  Run the test on ALL the data that goes into the mean  
#hist(prod_soren$value, breaks = 30)  # Not normally distributed
prod_soren$value <- as.numeric(prod_soren$value)
prod_soren$troph_lim1 <- as.factor(prod_soren$troph_lim1)
## Do the KW test
soren_prod_KW <- kruskal.test(prod_soren$value ~ prod_soren$troph_lim1) # Kruskal Wallis test on sorensen
print(soren_prod_KW)  # show Kruskal Wallis result

## 
##  Kruskal-Wallis rank sum test
## 
## data:  prod_soren$value by prod_soren$troph_lim1
## Kruskal-Wallis chi-squared = 43.741, df = 7, p-value =
## 0.0000002399

### Which samples are significantly different from each other?  
soren_prod_KW_MC <- kruskalmc(prod_soren$value ~ prod_soren$troph_lim1)  ## Defaults to P < 0.05
#print(soren_prod_KW_MC)
### Time to figure out letters to represent significance in a plot
soren_test <- soren_prod_KW_MC$dif.com$difference # select logical vector
names(soren_test) <- row.names(soren_prod_KW_MC$dif.com) # add comparison names
# create a list with "homogenous groups" coded by letter
soren_letters <- multcompLetters(soren_test, compare="<", threshold=0.05, 
                                 Letters=c(letters, LETTERS, "."), reversed = FALSE)
###  Extract the values from the multcompLetters object
soren_sigs_dataframe <-  data.frame(as.vector(names(soren_letters$Letters)), as.vector(soren_letters$Letters))
colnames(soren_sigs_dataframe) <- c("troph_lim1", "siglabel")
soren_try <- merge(prod_soren, soren_sigs_dataframe, by = "troph_lim1")

soren_try$troph_lim1 <-factor(soren_try$troph_lim1,
                              levels=c( "Productive Epilimnion Free", "Productive Epilimnion Particle", "Productive Hypolimnion Free","Productive Hypolimnion Particle","Unproductive Epilimnion Free", "Unproductive Epilimnion Particle", "Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle"))

soren_sigs <- soren_try %>%
  select(trophicstate1, troph_lim1, siglabel) %>%
  distinct()

soren_boxplot <- ggplot(soren_try, aes(x = troph_lim1, y = value, fill = troph_lim1)) + geom_point(size = 2) +
  geom_boxplot() +
  geom_text(data = soren_sigs, aes(label = siglabel, x = troph_lim1, y = 0.74), size =3) +
  scale_fill_manual(name = "", limits=c("Productive Epilimnion Particle", "Productive Epilimnion Free", 
                                        "Productive Hypolimnion Particle", "Productive Hypolimnion Free",
                                         "Unproductive Epilimnion Particle", "Unproductive Epilimnion Free", 
                                        "Unproductive Hypolimnion Particle", "Unproductive Hypolimnion Free"), 
                     values = c("royalblue4", "royalblue4", "royalblue4", "royalblue4",
                                "cornflowerblue","cornflowerblue","cornflowerblue","cornflowerblue"))+
  scale_x_discrete(breaks=c("Productive Epilimnion Free", "Productive Epilimnion Particle", 
                            "Productive Hypolimnion Free","Productive Hypolimnion Particle",
                            "Unproductive Epilimnion Free", "Unproductive Epilimnion Particle", 
                            "Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle"),
                   labels=c("Epilimnion \nFree-Living", "Epilimnion \nParticle-Associated", 
                            "Hypolimnion \nFree-Living", "Hypolimnion \nParticle-Associated",
                            "Epilimnion \nFree-Living", "Epilimnion \nParticle-Associated", 
                            "Hypolimnion \nFree-Living", "Hypolimnion \nParticle-Associated")) + 
  xlab("Habitat") + ylab("Sørensen \nDissimilarity") + theme_bw() + #scale_fill_brewer(palette="Paired") + 
  facet_grid(. ~ trophicstate1, scale = "free", space = "free") +
  theme(plot.title = element_text(face="bold", size = 12),  #Set the plot title
                                 strip.text.x = element_text(face="bold", size=10),  #Set X facet titles  
                                 strip.text.y = element_blank(),  #Set the facet titles on x-axis 
                                 strip.background = element_blank(),  #Set the facet background to no background
                                 axis.title.x = element_text(face="bold", size=10),  #Set the x-axis title
                                 axis.title.y = element_text(face="bold", size=10),  #Set the y-axis title
                                 axis.text.x = element_text(angle=30, colour = "black", 
                                                            vjust=1, hjust = 1, size=8),
                                 axis.text.y = element_text(colour = "black", size=8),  #Set the y-axis labels
                                 legend.title = element_text(size = 8, face="bold"),  #Set the legend title 
                                 legend.text = element_text(size = 8),  #Set the legend text
                                 legend.position="none", #Default the legend position to the right
                                 plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"));   #top, right, bottom, left

Figure 3: Sørensen and Bray-Curtis Inter-Lake Variation

soren_boxplot2 <- soren_boxplot + theme(axis.title.x = element_blank(),
                                        axis.text.x = element_blank(),
                                        axis.ticks.x = element_blank(),
                                                            #top, right, bottom, left
                                        plot.margin = unit(c(0.1, 0.1, 0.35, 0.1), "cm")) 


bray_boxplot2 <- bray_boxplot + theme(plot.margin = unit(c(-1, 0.1, 0.1, 0.26), "cm")) #top, right, bottom, left


#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig3_beta_variance.pdf", width= 5, height=4.5)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig3_beta_variance.tiff", width= 5, height=4.5, units = "in", res = 175)
grid.newpage()
pushViewport(viewport(layout=grid.layout(2,1,height=c(0.46,0.54))))
print(soren_boxplot2, vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(bray_boxplot2, vp=viewport(layout.pos.row=2,layout.pos.col=1)); #dev.off()

Figure 3: Box and whisker plots of the variance in the Sørensen and Bray-Curtis dissimilarity metrics between lakes. (Top) Sørensen dissimilarity (OTU presence or absence; KW: p = 2.4 x 10-7) and (Bottom) Bray-Curtis dissimilarity (OTU relative abundance; KW: p = 3.5 x 10-5) between samples within the same habitat in different lakes. Letter(s) next to data points indicate groups of samples that are significantly different in their degree of lake-to-lake dissimilarity. Numbers in parentheses along the x-axis represent sample sizes of each lake habitat.

Overall Abundances of Phyla throughout the Dataset (without mixed lake)

### Calculate the mean relative abundance based on ProdLevel + Quadrant for each PHYLUM 
nosherwin_FWDB_scaled <- subset_samples(nowin_FWDB_scaled, names != "SHEE" & names != "SHEE3um" & names !="SHEH" & names != "SHEH3um") 
# Remove any OTUs that are 0
nosherwin_FWDB_scaled <- prune_taxa(taxa_sums(nosherwin_FWDB_scaled) > 0, nosherwin_FWDB_scaled) 


#################################################################################  OVERALL ABUNDANCE PLOT
###############  NO ISOTHERMAL LAKE (SHERMAN) SAMPLES AND NO WINTERGREEN HYPOLIMNION SAMPLES 
good_phylum <-tax_glom(nosherwin_FWDB_scaled,taxrank = "Phylum")
##  Melt it into a dataframe 
goodsamps_phy_melt <- psmelt(good_phylum)  
sub_goodsamps_phy_melt <- subset(goodsamps_phy_melt, select = c("Sample", "ProdLevel", "quadrant", "Phylum","Abundance"))
TOTALS <- ddply(sub_goodsamps_phy_melt, c("Sample"), summarise, total = sum(Abundance))   

### Merge phylum sums and the total numbers -> so we can calculate the Relative Abundance
sub_phy_melt_totals <- merge(sub_goodsamps_phy_melt, TOTALS, by = "Sample")  
## Calculate the relative abundance
sub_phy_melt_totals$RelAbundance <- sub_phy_melt_totals$Abundance/sub_phy_melt_totals$total  
##  Calculate the Percent Abundance
sub_phy_melt_totals$PercentAbund <- sub_phy_melt_totals$RelAbundance * 100  

####  Make a new dataframe with the percent abudance within the entire dataset!
phy_stats <- ddply(sub_phy_melt_totals, c("Phylum"), summarise, 
                   N = length(PercentAbund),
                   PercentPhy = mean(PercentAbund),
                   sd   = sd(PercentAbund),
                   se   = sd / sqrt(N))

abund <- subset(phy_stats,PercentPhy > 0.001)  # Only take the phyla that are more than 0.01% abundant

abund$Phylum <- factor(abund$Phylum,
                       levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes",  "Alphaproteobacteria", "Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Firmicutes", "Chlorobi", "Armatimonadetes", "Acidobacteria", "Spirochaetae", "Candidate_division_OD1","NPL-UPA2", "Deinococcus-Thermus", "Candidate_division_OP3", "Epsilonproteobacteria", "TM6", "TA18", "Chlamydiae",   "Fibrobacteres", "Candidate_division_SR1", "Candidate_division_BRC1", "BD1-5", "Candidate_division_WS3", "Tenericutes", "Elusimicrobia", "Gemmatimonadetes", "WCHB1-60", "Deferribacteres", "Fusobacteria",  "Candidate_division_TM7",  "SPOTSOCT00m83", "Candidate_division_OP8", "Thermotogae", "Candidate_division_OP11", "Dictyoglomi", "SM2F11", "unclassified"))   

abund$Phylum = with(abund, factor(Phylum, levels = rev(levels(Phylum)))) 


#Relative abundance plot 
ggplot(abund, aes(y=PercentPhy , x=Phylum))  +
  geom_bar(stat="identity", position=position_dodge(),  fill = "cornflowerblue", colour = "black") +
  theme_bw() + ggtitle("Phyla Above 0.1% in All Samples") +
  xlab("Phylum") + ylab("Mean Relative Abundance (%)") +
  geom_errorbar(aes(ymin = PercentPhy -se, ymax = PercentPhy +se), width = 0.25) + 
  coord_flip()

Figure 4: Abundance of Phyla within each Lake Habitat

# sub_phy_melt_totals #  This data frame has no Wintergreen hypo or Mixed lake samples!
##  Calculate the Phylum abundance based on the ProdLevel and the filter fraction.  
prod_phylum_FWDB_stats <- ddply(sub_phy_melt_totals, c("ProdLevel","quadrant", "Phylum"), summarise, 
                                N = length(PercentAbund),
                                mean_abundance = mean(PercentAbund),
                                sd   = sd(PercentAbund),
                                se   = sd / sqrt(N))
###  Only the phylum 
abund_only_by_phylum <- ddply(prod_phylum_FWDB_stats, c("Phylum"), summarise, 
                              N = length(mean_abundance),
                              phylum_mean = mean(mean_abundance))

abund_only_by_phylum <-arrange(abund_only_by_phylum, desc(phylum_mean)) 
# Create a vector of the phyla in the order that we would like
phy_order <- as.character(abund_only_by_phylum$Phylum)  

### TOP 25 PHYLA
top15phy <- prod_phylum_FWDB_stats[prod_phylum_FWDB_stats$Phylum %in% phy_order[1:17], ]

phy15_order <- as.character(abund_only_by_phylum$Phylum)[1:17]

#### Make sure there's no sherman lake in this part of the analysis
good_phylum_with_sher <-tax_glom(nowin_FWDB_scaled,taxrank = "Phylum")
phy_melt_with_sher <- psmelt(good_phylum_with_sher)  
good_phylum_no_sher <-tax_glom(nosherwin_FWDB_scaled,taxrank = "Phylum")

######################################################## DESeq on the six habitats 
###########################################################  HIGH vs LOWnutrient lakes
DESeq_prodLevel <- deSEQ(good_phylum_no_sher, ~ ProdLevel)

###########################################################  Epilimnion vs Hypolimnion
DESeq_limnion <- deSEQ(good_phylum_no_sher, ~ limnion)

###########################################################  Particle vs Free
DESeq_filter <- deSEQ(good_phylum_no_sher, ~ filter)

DESeq_hab1 <- subset(DESeq_filter, select = c(Phylum, log2FoldChange, padj)) %>%
  mutate(Habitat = "PA vs. FL",
         sig_lab = "*")
DESeq_hab2 <- subset(DESeq_limnion, select = c(Phylum, log2FoldChange, padj)) %>%
  mutate(Habitat = "Top vs. Bottom", 
         sig_lab = "*")
DESeq_hab3 <- subset(DESeq_prodLevel, select = c(Phylum, log2FoldChange, padj)) %>%
  mutate(Habitat = "Prod vs. UnProd", 
         log2FoldChange = log2FoldChange *-1,
         sig_lab = "*")#


#  Prep the data frame 
bicompare_phy <- subset(phy_melt_with_sher, select = c("Sample", "filter", "ProdLevel", "limnion","quadrant", "Phylum","Abundance"))

##  Phylum Total for each sample
bi_TOTALS <- ddply(bicompare_phy, c("Sample"), summarise,  
                total   = sum(Abundance))   
### Merge phylum sums and the total numbers -> so we can calculate the Relative Abundance
bicompare_totals <- merge(bicompare_phy, bi_TOTALS, by = "Sample")  
bicompare_phy$RelAbundance <- bicompare_phy$Abundance/bicompare_totals$total  ## Calculate the relative abundance
bicompare_phy$PercentAbund <- bicompare_phy$RelAbundance * 100  ##  Calculate the Percent Abundance

##  Only provide top 17 phyla 
bicompare_phy <- bicompare_phy[bicompare_phy$Phylum %in% phy_order[1:17], ]

bicompare_phy$Phylum <- factor(bicompare_phy$Phylum,
                               levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria", "Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes",  "Firmicutes",  "Chlorobi", "Acidobacteria", "Spirochaetae"))

## PROD LEVEL
bicompare_phy$ProdLevel <- as.character(bicompare_phy$ProdLevel)
bicompare_phy$ProdLevel[bicompare_phy$ProdLevel == "Productive"] <- "High-Nutrient" 
bicompare_phy$ProdLevel[bicompare_phy$ProdLevel == "Unproductive"] <- "Low-Nutrient" 
# FILTER 
bicompare_phy$filter <- as.character(bicompare_phy$filter)
bicompare_phy$filter[bicompare_phy$filter == "Free"] <- "Free-Living"
bicompare_phy$filter[bicompare_phy$filter == "Particle"] <- "Particle-Associated"

bicompare_phy$ProdLevel<- factor(bicompare_phy$ProdLevel,levels = c("Low-Nutrient", "High-Nutrient"))

bicompare_phy_nowin <- subset(bicompare_phy, Sample != "WINH" & Sample != "WINH3um")

bicompare_phy_nosherwin <- subset(bicompare_phy_nowin, Sample != "SHEE" & Sample != "SHEH" &
                                    Sample != "SHEE3um" & Sample != "SHEH3um")

#####  Prepare the data frame for significance values for ProdLevel
filter_phy_boxplot <- full_join(bicompare_phy_nosherwin, DESeq_hab1, by = "Phylum")
filter_phy_boxplot <- filter_phy_boxplot[filter_phy_boxplot$Phylum %in% phy_order[1:17], ]
##  Order the phyla to match original data frame 
filter_phy_boxplot$Phylum <- factor(filter_phy_boxplot$Phylum,
                                    levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria", "Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes",  "Firmicutes",  "Chlorobi", "Acidobacteria", "Spirochaetae"))

g1 <- ggplot(filter_phy_boxplot, aes(y=PercentAbund , x=Phylum, fill=filter)) + 
  ggtitle("") + xlab("") + geom_boxplot() +  ylab("Mean Percent \nRelative Abundance") + 
  scale_fill_manual(name = "Filter Fraction", breaks=c("Free-Living", "Particle-Associated"),
                     labels = c("Free-Living (21)","Particle-Associated (20)"), 
                      values = c("orangered", "orangered4")) +
  theme_bw() + geom_text(aes(label = sig_lab, x = Phylum, y = 50, color = "red"), size =4) +
  scale_colour_hue(guide = "none") +
  theme(axis.text.x = element_blank(),
                     plot.title = element_text(face="bold", size = 12),
                     axis.title.y = element_text(face="bold", size=10),
                     axis.ticks.x = element_blank(), 
                     legend.title = element_text(size = 8, face="bold"),  #Set the legend title 
                     legend.text = element_text(size = 8),
                     plot.margin = unit(c(0.1, 0.1, -1, 0.1), "cm"), #top, right, bottom, left
                     legend.position = c(0.85, 0.67)) 

#####  Prepare the data frame for significance values for ProdLevel
prodlevel_phy_boxplot <- full_join(bicompare_phy_nosherwin, DESeq_hab3, by = "Phylum")
prodlevel_phy_boxplot <- prodlevel_phy_boxplot[prodlevel_phy_boxplot$Phylum %in% phy_order[1:17], ]
##  Order the phyla to match original data frame 
prodlevel_phy_boxplot$Phylum <- factor(prodlevel_phy_boxplot$Phylum,
                                       levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria", "Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes",  "Firmicutes",  "Chlorobi", "Acidobacteria", "Spirochaetae"))

g2 <- ggplot(prodlevel_phy_boxplot, aes(y=PercentAbund , x=Phylum, fill=ProdLevel)) + 
  ggtitle("")  + xlab("") + geom_boxplot() +  
  ylab("Mean Percent \nRelative Abundance") +
  scale_fill_manual(name = "Productivity Level", breaks=c("Low-Nutrient", "High-Nutrient"),
                     labels = c("Low-Nutrient (15)","High-Nutrient (26)"), 
                     values = c("cornflowerblue", "royalblue4")) +
  theme_bw() + geom_text(aes(label = sig_lab, x = Phylum, y = 50, color = "red"), size =4) +
  scale_colour_hue(guide = "none") +
  theme(axis.text.x = element_blank(),
                     axis.title.y = element_text(face="bold", size=10),
                     axis.ticks.x = element_blank(), 
                     legend.title = element_text(size = 8, face="bold"),  #Set the legend title 
                     legend.text = element_text(size = 8),
                     plot.margin = unit(c(-0.25, 0.1, -0.5, 0.1), "cm"), #top, right, bottom, left
                     legend.position = c(0.88, 0.67)) 

#####  Prepare the data frame for significance values for ProdLevel
limnion_phy_boxplot <- full_join(bicompare_phy_nosherwin, DESeq_hab2, by = "Phylum")
limnion_phy_boxplot <- limnion_phy_boxplot[limnion_phy_boxplot$Phylum %in% phy_order[1:17], ]
##  Order the phyla to match original data frame 
limnion_phy_boxplot$Phylum <- factor(limnion_phy_boxplot$Phylum,
                                     levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria", "Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes",  "Firmicutes",  "Chlorobi", "Acidobacteria", "Spirochaetae"))

g3 <- ggplot(limnion_phy_boxplot, aes(y=PercentAbund , x=Phylum, fill=limnion)) + 
  ggtitle("") + xlab("") + geom_boxplot() +  ylab("Mean Percent \nRelative Abundance") + xlab("Phylum") +
   scale_fill_manual(name = "Lake Layer", breaks=c("Epilimnion", "Hypolimnion"),
                     labels = c("Epilimnion (19)","Hypolimnion (18)"), 
                     values = c("limegreen", "darkgreen")) +
  theme_bw() + geom_text(aes(label = sig_lab, x = Phylum, y = 50, color = "red"), size =4) +
  scale_colour_hue(guide = "none") +
  theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1, size = 8),
                     axis.title.x = element_text(face="bold", size=10),
                     axis.title.y = element_text(face="bold", size=10),
                     legend.title = element_text(size = 8, face="bold"),  #Set the legend title 
                     legend.text = element_text(size = 8),  #Set the legend text
                     plot.margin = unit(c(-0.75, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
                     legend.position = c(0.88, 0.67)); 

# Store ggplot graphical parameters
g1 <- ggplotGrob(g1)
g2 <- ggplotGrob(g2)
g3 <- ggplotGrob(g3)

# Add columns to plots without legends
#ncol(g1); ncol(g2); ncol(g3)
#g1 <- gtable_add_cols(g1, g2$widths[6])

####  DRAW FIGURE 5 ### DRAW FIGURE 5  ####  DRAW FIGURE 5 ### DRAW FIGURE 5  #### 
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig4_phy17_comparison_sigs.pdf",  width= 6.5, height=6.5)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig4_phy17_comparison_sigs.tiff", width= 6.5, height=6.5, units = "in", res = 175)
grid.draw(rbind(g1, g2, g3, size = "last")); #dev.off()

Figure 4: Phylum abundance of the six lake habitats of the 17 most abundant phyla and classes of Proteobacteria. Box and whisker plots of the phylum abundance across all samples grouped based on (top) filter fraction, (middle) nutrient level, or (bottom) lake layer. Numbers in parentheses within the legend represent sample sizes of each lake habitat. Red stars represent significant differentially abundant phyla between filter fraction, productivity level, or lake layers as calculated by DESeq.

Figure S4: Including the Mixed Lake

#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS4_mixed_green.pdf",  width= 6.5, height=3)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS4_mixed_green.tiff", width= 6.5, height=3, units = "in", res = 175)
ggplot(bicompare_phy_nowin, aes(y=PercentAbund , x=Phylum, fill=limnion)) + #coord_cartesian(xlim = c(0, 30)) + 
  ggtitle("") + xlab("") + geom_boxplot() +  ylab("Mean Percent \nRelative Abundance") + xlab("Phylum") +
  scale_fill_manual(name = "Lake Strata", breaks=c("Epilimnion", "Hypolimnion", "Mixed"),
                     labels = c("Epilimnion (19)","Hypolimnion (18)", "Mixed (4)"), 
                     values = c("limegreen", "darkgreen", "seagreen1")) +
  theme_bw() + theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1, size = 8),
                     axis.title.x = element_text(face="bold", size=10),
                     axis.title.y = element_text(face="bold", size=10),
                     plot.title = element_text(face = "bold", size = 12),
                     legend.title = element_text(size = 8, face="bold"),  #Set the legend title 
                     legend.text = element_text(size = 8),  #Set the legend text
                     plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
                     legend.position = c(0.88, 0.7)); #dev.off()

Figure S4: Phylum abundance of the 3 lake layer habitats, including the mixed lake, of the 17 most abundant phyla and classes of Proteobacteria. Box and whisker plots of the phylum abundance across the epilimnion, hypolimnion and mixed lake samples. Numbers in parentheses within the legend represent sample sizes of each lake habitat.

Figure S5: Phylum abundance withing each of the sub-lake habitats.

##  Calculate the Phylum abundance based on the ProdLevel and the filter fraction.  
prod_phylum_FWDB_stats <- ddply(sub_phy_melt_totals, c("ProdLevel","quadrant", "Phylum"), summarise, 
                                N = length(PercentAbund),
                                mean_abundance = mean(PercentAbund),
                                sd   = sd(PercentAbund),
                                se   = sd / sqrt(N))
###  Only the phylum 
abund_only_by_phylum <- ddply(prod_phylum_FWDB_stats, c("Phylum"), summarise, 
                              N = length(mean_abundance),
                              phylum_mean = mean(mean_abundance))

abund_only_by_phylum <-arrange(abund_only_by_phylum, desc(phylum_mean))

# Create a vector of the phyla we'd like
phy_order <- as.character(abund_only_by_phylum$Phylum) 

### Top 17 most abundant phyla
top15phy <- prod_phylum_FWDB_stats[prod_phylum_FWDB_stats$Phylum %in% phy_order[1:17], ]

phy15_order <- as.character(abund_only_by_phylum$Phylum)[1:17]

##  Put the phyla in the right order in terms of total relative abundance
top15phy$Phylum <- factor(top15phy$Phylum,
                          levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria", "Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes",  "Firmicutes",  "Chlorobi", "Acidobacteria", "Spirochaetae"))
  
top15phy <- subset(top15phy, quadrant != "Free Mixed" & quadrant != "Particle Mixed")
top15phy$quadrant <- factor(top15phy$quadrant,
                            levels = c("Free Epilimnion", "Particle Epilimnion", 
                                       "Free Hypolimnion", "Particle Hypolimnion"))

top15phy$Phylum = with(top15phy, factor(Phylum, levels = rev(levels(Phylum))))

#### ADDING NO SPACES FOR FREE LIVING and ADDING 2 SPACE FOR PARICLE ASSOCIATED AT THE END
top15phy$quadrant <- as.character(top15phy$quadrant)
top15phy$quadrant[top15phy$quadrant=="Particle Epilimnion"] <- "Epilimnion   "
top15phy$quadrant[top15phy$quadrant=="Free Epilimnion"]   <- "Epilimnion"
top15phy$quadrant[top15phy$quadrant=="Particle Hypolimnion"]   <- " Hypolimnion  "
top15phy$quadrant[top15phy$quadrant=="Free Hypolimnion"]   <- "Hypolimnion"

#### ADDING NO SPACES FOR PRODUCTIVE and ADDING 1 SPACE FOR UNPRODUCTIVE
top15phy$prod <- top15phy$ProdLevel
top15phy$prod <- as.character(top15phy$prod)
top15phy$prod[top15phy$prod =="Productive"]   <- ""
top15phy$prod[top15phy$prod =="Unproductive"]   <- " "

for(i in 1:length(top15phy$prod)){
  top15phy$prod_quad [i]<-paste(as.character(top15phy$prod[i]),as.character(top15phy$quadrant[i]))}

top15phy$prod_quad <- factor(top15phy$prod_quad,
                             levels = c(" Epilimnion", " Hypolimnion", " Epilimnion   ",
                                        "  Hypolimnion  ", "  Epilimnion", "  Hypolimnion", 
                                        "  Epilimnion   ",  "   Hypolimnion  "))

phy.colors.whew <- c(Acidobacteria = "grey26", Actinobacteria = "palevioletred2", Alphaproteobacteria = "steelblue4", Armatimonadetes = "red", Bacteroidetes = "darkorange","BD1-5" = "chartreuse", Betaproteobacteria = "royalblue", Caldiserica = "black","Candidate_division_BRC1" = "red","Candidate_division_JS1" = "aquamarine1","Candidate_division_OD1" = "#6DDE88", "Candidate_division_OP3" = "yellow1", "Candidate_division_OP8" = "goldenrod1", "Candidate_division_OP11" = "chocolate4","Candidate_division_SR1" = "tan3", "Candidate_division_TM7" = "skyblue1", "Candidate_division_WS3" = "magenta",Chlamydiae="violet", Chlorobi="cyan2", Chloroflexi="darkgreen", Cyanobacteria = "chartreuse3", Deferribacteres = "slateblue3", "Deinococcus-Thermus" = "violetred", Dictyoglomi = "cornsilk4", Deltaproteobacteria = "deepskyblue", Elusimicrobia = "violetred4", Epsilonproteobacteria = "lightskyblue", Fibrobacteres = "hotpink", Firmicutes = "blue4", FGL7S = "palevioletred1",Fusobacteria = "slateblue1", Gammaproteobacteria = "plum2", Gemmatimonadetes="black", GOUTA4 = "plum1", "Hyd24-12" = "sienna2", JTB23 = "seashell2",Lentisphaerae = "yellow1", "NPL-UPA2"="royalblue", OC31 = "mediumpurple4", Planctomycetes = "mediumorchid3", Proteobacteria = "deepskyblue","SHA-109" = "lightsalmon3", SM2F11 = "lightskyblue2", SPOTSOCT00m83 = "orangered",Spirochaetae = "gold3", Tenericutes="pink", Thermotogae = "chocolate1", TA06 = "lightslateblue",TA18 = "rosybrown3", TM6 = "olivedrab",unclassified = "grey", Verrucomicrobia = "purple4", "WCHB1-60" = "palegreen")


###########################  Here we will use GTable to manually create the labels. 
# Create the gtable object
top15plot <- ggplot(top15phy, aes(y=mean_abundance , x=Phylum, fill=Phylum)) + #coord_cartesian(xlim = c(0, 30)) + 
  guides(fill = guide_legend(reverse=TRUE)) + ggtitle("") + 
  geom_bar(stat="identity", position=position_dodge()) + #theme_classic() +
  geom_bar(stat="identity", position=position_dodge(), colour = "black", show_guide = FALSE) +
  facet_wrap(~ prod_quad, ncol = 8) + xlab("Top 15 Most Abundant Phyla") +
  scale_fill_manual(values = phy.colors.whew,name="Phylum") + 
  geom_errorbar(aes(ymin = mean_abundance -se, ymax = mean_abundance +se), width = 0.25, color = "black") + 
  ylab("Mean Percent Relative Abundance (%)") + coord_flip() + theme_bw() +
  guides(fill = guide_legend(keywidth = 0.7, keyheight = 0.7)) + 
  theme(plot.title = element_text(face="bold", size = 12),  #Set the plot title
                                 strip.text.x = element_text(size=8),  #Set the facet titles on X 
                                 strip.text.y = element_text(size=8, face="bold"),  #Set the facet titles Y 
                                 axis.title.x = element_text(face="bold", size=12),  #Set the x-axis title
                                 axis.title.y = element_text(face="bold", size=12),  #Set the y-axis title
                                 axis.text.x = element_text(colour = "black", size=8),  #Set the x-axis labels
                                 axis.text.y = element_text(colour = "black", size=8),  #Set the y-axis labels
                                 legend.title = element_text(size = 8, face="bold"),  #Set the legend title 
                                 legend.text = element_text(size = 8),  #Set the legend text
                                 legend.position="right")


z <- ggplot_gtable(ggplot_build(top15plot))

# add label for top strip
z <- gtable_add_rows(z, z$heights[[3]], 2)
z <- gtable_add_grob(z, list(rectGrob(gp = gpar(col = NA, linetype = 1, fill = gray(0.7))),
                             textGrob("Free-Living", gp = gpar(fontsize = 8, col = "black"))),
                     3, 4, 3, 7, name = paste(runif(2)))

z <- gtable_add_grob(z, list(rectGrob(gp = gpar(col = NA, linetype = 1, fill = gray(0.7))),
                             textGrob("Particle-Associated", gp = gpar(fontsize = 8, col = "black"))),
                     3, 9, 3, 13, name = paste(runif(2)))

z <- gtable_add_grob(z, list(rectGrob(gp = gpar(col = NA, linetype = 1, fill = gray(0.6))),
                             textGrob("High-Nutrient", gp = gpar(fontsize = 8, col = "black"))),
                     2, 4, 2, 13, name = paste(runif(2)))

z <- gtable_add_grob(z, list(rectGrob(gp = gpar(col = NA, linetype = 1, fill = gray(0.7))),
                             textGrob("Free-Living", gp = gpar(fontsize = 8, col = "black"))),
                     3, 15, 3, 19, name = paste(runif(2)))

z <- gtable_add_grob(z, list(rectGrob(gp = gpar(col = NA, linetype = 1, fill = gray(0.7))),
                             textGrob("Particle-Associated", gp = gpar(fontsize = 8, col = "black"))),
                     3, 22, 3, 25, name = paste(runif(2)))

z <- gtable_add_grob(z, list(rectGrob(gp = gpar(col = NA, linetype=1, fill = gray(0.6))),
                             textGrob("Low-Nutrient", gp = gpar(fontsize = 8, col = "black"))),
                     2, 15, 2, 25, name = paste(runif(2)))

# add margins
z <- gtable_add_cols(z, unit(1/8, "line"), 7)
z <- gtable_add_rows(z, unit(1/8, "line"), 3)

#####  Plotting FIGURE S3  #####  Plotting FIGURE S3  #####  Plotting FIGURE S3  
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS5_phyabund_quadrants.pdf", width= 10, height=6)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS5_phyabund_quadrants.tiff", width= 10, height=6, units = "in", res = 175)
# draw it
grid.newpage()
grid.draw(z); #dev.off()

Figure S5: Mean percent relative abundance of the 17 most abundant phyla or classes of the Proteobacteria in each habitat within high- and low-nutrient lakes. Mean abundance of each phylum or class is plotted with error bars representing the standard error of the mean.

Figure S6: Phylum Heat Plot with Log2-Fold Odds Ratio

good_phylum <-tax_glom(nowin_FWDB_scaled,taxrank = "Phylum")

### Subsetting Sherman lake for differences between particle and free living 
phy_sherm <- subset_samples(good_phylum, lakenames == "Sherman")  
phy_shemeister <- deSEQ(phy_sherm, ~ filter)

########## SUBSET OUT SHERMAN AND WINTERGREEN HYPOLIMNION
good_phylum_nosherwin <- subset_samples(good_phylum, lakenames != "Sherman")


############################
########################################################### PA VS FL
#1. Top prod 
phytopProd <- subset_samples(good_phylum_nosherwin, ProdLevel == "Productive" & limnion == "Epilimnion") 
de_phytopProd<- deSEQ(phytopProd, ~ filter)

#2 Top Oligo -->  No significant difference 
#phytopUNProd <- subset_samples(good_phylum_nosherwin, ProdLevel == "Unproductive" & limnion == "Epilimnion") 
#de_phytopUNProd <- deSEQ(phytopUNProd, ~ filter)  ## No significant changes

#3 Bottom Productive 
phybotProd <- subset_samples(good_phylum_nosherwin, ProdLevel == "Productive" & limnion == "Hypolimnion")
de_phybotProd <- deSEQ(phybotProd, ~ filter)

#4 Bottom Unproductive 
phybotUNProd <- subset_samples(good_phylum_nosherwin, ProdLevel == "Unproductive" & limnion == "Hypolimnion") 
de_phybotUNProd <- deSEQ(phybotUNProd, ~ filter)


############################
########################################################### EPILIMNION VS HYPOLIMNION
#1. PA prod 
phyprodPA <- subset_samples(good_phylum_nosherwin, ProdLevel == "Productive" & filter == "Particle") 
de_phyprodPA <- deSEQ(phyprodPA, ~ limnion)

#2 PA Oligo 
phyunprodPA <- subset_samples(good_phylum_nosherwin, ProdLevel == "Unproductive" & filter == "Particle") 
de_phyunprodPA <- deSEQ(phyunprodPA, ~ limnion)

#3 FL Productive 
phyprodFL <- subset_samples(good_phylum_nosherwin, ProdLevel == "Productive" & filter == "Free") 
de_phyprodFL <- deSEQ(phyprodFL, ~ limnion)

#4 FL Unproductive 
phyunprodFL <- subset_samples(good_phylum_nosherwin, ProdLevel == "Unproductive" & filter == "Free") 
de_phyunprodFL <- deSEQ(phyunprodFL, ~ limnion)


############################
########################################################### HIGH VS LOW-NUTRIENT
#1. Top PA -->  No significant difference 
#phytopPA <- subset_samples(good_phylum_nosherwin, limnion == "Epilimnion" & filter == "Particle") ## DOES NOT WORK!!!!!!!!!!
#de_phytopPA <- deSEQ(phytopPA, ~ ProdLevel)   ## No significant changes

#2 BOTTOM PA 
phybotPA <- subset_samples(good_phylum_nosherwin, limnion == "Hypolimnion" & filter == "Particle") 
de_phybotPA <- deSEQ(phybotPA, ~ ProdLevel)

#3 TOP FL -->  No significant difference 
#phytopFL <- subset_samples(good_phylum_nosherwin, limnion == "Epilimnion" & filter == "Free") 
#de_phytopFL <- deSEQ(phytopFL, ~ ProdLevel)  ## No significant changes

#4 Bottom FL 
phybotFL <- subset_samples(good_phylum_nosherwin, limnion == "Hypolimnion" & filter == "Free") 
de_phybotFL <- deSEQ(phybotFL, ~ ProdLevel)


#PA VS FL:  
#Surface productive
pafl_topprod <- subset(de_phytopProd, select = c(Phylum, log2FoldChange, padj))
pafl_topprod$Habitat <- "PA vs. FL: Top Productive"
# NOTHING SIGNIFICANT HERE:  Surface unproductive
#pafl_topUNprod <- subset(de_phytopUNProd, select = c(Phylum, log2FoldChange, padj))
#pafl_topUNprod$Habitat <- "PA vs. FL: Top Unproductive"
#Bottom Productive
pafl_botprod <- subset(de_phybotProd, select = c(Phylum, log2FoldChange, padj))
pafl_botprod$Habitat <- "PA vs. FL: Bottom Productive"
#Bottom Unproductive
pafl_botUNprod <- subset(de_phybotUNProd, select = c(Phylum, log2FoldChange, padj))
pafl_botUNprod$Habitat <- "PA vs. FL: Bottom Unproductive"
## SHERMAN
pafl_isothermal <- subset(phy_shemeister, select = c(Phylum, log2FoldChange, padj))
pafl_isothermal$Habitat <- "PA vs. FL: Mixed"

###Top vs Bottom:  
topbot1 <- subset(de_phyprodPA, select = c(Phylum, log2FoldChange, padj))
topbot1$Habitat <- "Top vs. Bottom: PA Productive"
# Particle-Associated UNproductive
topbot2 <- subset(de_phyunprodPA, select = c(Phylum, log2FoldChange, padj))
topbot2$Habitat <- "Top vs. Bottom: PA Unproductive"
# Free-Living Productive 
topbot3 <- subset(de_phyprodFL, select = c(Phylum, log2FoldChange, padj))
topbot3$Habitat <- "Top vs. Bottom: FL Productive"
# Free-Living UNproductive
topbot4 <- subset(de_phyunprodFL, select = c(Phylum, log2FoldChange, padj))
topbot4$Habitat <- "Top vs. Bottom: FL Unproductive"


#Prod vs Oligo:  
#troph1 <- subset(de_phytopPA, select = c(Phylum, log2FoldChange, padj))  ## No significant changes
#troph1$Habitat <- "Prod vs. Unprod: PA Epilimnion"
# Particle-Associated BOTTOM
troph2 <- subset(de_phybotPA, select = c(Phylum, log2FoldChange, padj))
troph2$Habitat <- "Prod vs. Unprod: PA Bottom"
# Free-Living TOP 
#troph3 <- subset(de_phytopFL, select = c(Phylum, log2FoldChange, padj))   ## No significant changes
#troph3$Habitat <- "Prod vs. Unprod: FL Top"
# Free-Living BOTTOM
troph4 <- subset(de_phybotFL, select = c(Phylum, log2FoldChange, padj))
troph4$Habitat <- "Prod vs. Unprod: FL Bottom"
trophs <- rbind(troph2, troph4)
newlog2foldchange <- as.numeric(trophs$log2FoldChange * -1)  ##  To make high-nutrient to POSITIVE 
trophs$log2FoldChange <- newlog2foldchange

df_ratio <-rbind(pafl_topprod, pafl_botprod, pafl_botUNprod,pafl_isothermal,
                 topbot1, topbot2, topbot3, topbot4,
                 trophs)

paste(c("The range of the log2foldChange is",min(df_ratio$log2FoldChange), "to", max(df_ratio$log2FoldChange)))

## [1] "The range of the log2foldChange is"
## [2] "-5.4994449553491"                  
## [3] "to"                                
## [4] "6.76043158519451"

#Split of the habitat column to 2 columns named comparison and habitat
split_cols <- colsplit(df_ratio$Habitat, ":", c("Comparison", "Habitat"))
df_ratio$Habitat = NULL
dfrat <- cbind(df_ratio, split_cols)


uncla <- subset(dfrat, Phylum == "unclassified")

dfrat$Phylum <- factor(dfrat$Phylum,
                       levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia","Betaproteobacteria", "Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Chlorobi", "Armatimonadetes", "Firmicutes", "Acidobacteria", "Spirochaetae", "Candidate_division_OD1","NPL-UPA2", "Deinococcus-Thermus", "Candidate_division_OP3", "TM6", "Chlamydiae", "Epsilonproteobacteria", "TA18", "Fibrobacteres", "Candidate_division_SR1", "Candidate_division_BRC1", "BD1-5", "Gemmatimonadetes", "Fusobacteria", "Candidate_division_WS3", "Tenericutes", "Elusimicrobia", "WCHB1-60", "Deferribacteres", "Candidate_division_TM7", "Candidate_division_OP8", "SPOTSOCT00m83", "Thermotogae", "Candidate_division_OP11", "Dictyoglomi", "unclassified"))

dfrat$Phylum = with(dfrat, factor(Phylum, levels = rev(levels(Phylum))))


#################################################################################  OVERALL ABUNDANCE PLOT
good_phylum_proteo <-tax_glom(nowin_FWDB_scaled,taxrank = "Phylum")
goodsamps_phylum <- tax_glom(good_phylum_proteo,taxrank = "Phylum")
goodsamps_phy_melt <- psmelt(goodsamps_phylum)  ##  Melt it into a dataframe 
sub_goodsamps_phy_melt <- subset(goodsamps_phy_melt, select = c("Sample", "ProdLevel", "quadrant", 
                                                                "Phylum","Abundance"))
##  Calculate the total for each sample
TOTALS <- ddply(sub_goodsamps_phy_melt, c("Sample"), summarise,  
                total   = sum(Abundance))   
### Merge phylum sums and the total numbers -> so we can calculate the Relative Abundance
sub_phy_melt_totals <- merge(sub_goodsamps_phy_melt, TOTALS, by = "Sample")  
## Calculate the relative abundance
sub_phy_melt_totals$RelAbundance <- sub_phy_melt_totals$Abundance/sub_phy_melt_totals$total  
##  Calculate the Percent Abundance
sub_phy_melt_totals$PercentAbund <- sub_phy_melt_totals$RelAbundance * 100 

####  Make a new dataframe with the percent abudance within the entire dataset!
phy_stats <- ddply(sub_phy_melt_totals, c("Phylum"), summarise, 
                   N = length(PercentAbund),
                   PercentPhy = mean(PercentAbund),
                   sd   = sd(PercentAbund),
                   se   = sd / sqrt(N))
abund <- subset(phy_stats,PercentPhy > 0.001)  # Only take the phyla that are more than 0.01% abundant

abund_ordered <- arrange(abund, desc(PercentPhy))

abund$Phylum <- factor(abund$Phylum,levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria", "Planctomycetes",  "Alphaproteobacteria", "Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Chlorobi", "Armatimonadetes", "Firmicutes", "Acidobacteria", "Spirochaetae", "Candidate_division_OD1","NPL-UPA2", "Deinococcus-Thermus", "Candidate_division_OP3", "TM6", "Chlamydiae", "Epsilonproteobacteria", "TA18", "Fibrobacteres", "Candidate_division_SR1", "Candidate_division_BRC1", "BD1-5", "Gemmatimonadetes", "Fusobacteria", "Candidate_division_WS3", "Tenericutes", "Elusimicrobia", "WCHB1-60", "Deferribacteres", "Candidate_division_TM7", "Candidate_division_OP8", "SPOTSOCT00m83", "Thermotogae", "Candidate_division_OP11", "Dictyoglomi", "unclassified"))   

abund$Phylum = with(abund, factor(Phylum, levels = rev(levels(Phylum)))) 

#Relative abundance plot 
abund_plot <- ggplot(abund, aes(y=PercentPhy , x=Phylum))  +
  #geom_boxplot(fill = "magenta4", colour = "black") + 
  geom_bar(stat="identity", position=position_dodge(),  fill = "magenta4", colour = "black") +
  theme_bw() + ggtitle("Phyla Above 0.1% in All Samples") +
  xlab("Phylum") + ylab("Mean Relative Abundance (%)") +
  geom_errorbar(aes(ymin = PercentPhy -se, ymax = PercentPhy +se), width = 0.25) + coord_flip() +
  theme(axis.title.x = element_text(face="bold", size=16),
        axis.text.x = element_text(angle=0, colour = "black", size=14),
        axis.text.y = element_text(colour = "black", size=14),
        axis.title.y = element_text(face="bold", size=16),
        plot.title = element_text(face="bold", size = 20),
        legend.title = element_text(size=12, face="bold"),
        legend.text = element_text(size = 12),
        legend.position="none"); 


#################################################################################
#################################################################################
dfrat$Habitat <- as.character(dfrat$Habitat)
dfrat$Habitat[dfrat$Habitat == " Mixed"] <- "Mixed"
dfrat$Habitat[dfrat$Habitat == " Top Productive"] <- "Epilimnion \nHigh-Nutrient"
dfrat$Habitat[dfrat$Habitat == " Top Unproductive"] <- "Epilimnion \nLow-Nutrient"
dfrat$Habitat[dfrat$Habitat == " Bottom Productive"] <- "Hypolimnion \nHigh-Nutrient"
dfrat$Habitat[dfrat$Habitat == " Bottom Unproductive"] <- "Hypolimnion \nLow-Nutrient"
dfrat$Habitat[dfrat$Habitat == " PA Productive"] <- "High-Nutrient \nParticle-Associated"
dfrat$Habitat[dfrat$Habitat == " PA Unproductive"] <- "Low-Nutrient \nParticle-Associated"
dfrat$Habitat[dfrat$Habitat == " FL Productive"] <- "High-Nutrient \nFree-Living"
dfrat$Habitat[dfrat$Habitat == " FL Unproductive"] <- "Low-Nutrient \nFree-Living"
dfrat$Habitat[dfrat$Habitat == " PA Top"] <- "Particle-Associated \nEpilimnion"
dfrat$Habitat[dfrat$Habitat == " FL Top"] <- "Free-Living \nEpilimnion"
dfrat$Habitat[dfrat$Habitat == " FL Bottom"] <- "Free-Living \nHypolimnion"
dfrat$Habitat[dfrat$Habitat == " PA Bottom"] <- "Particle-Associated \nHypolimnion"

dfrat$Habitat <- factor(dfrat$Habitat,levels = c("Epilimnion \nHigh-Nutrient", "Hypolimnion \nHigh-Nutrient", "Mixed", "Epilimnion \nLow-Nutrient", "Hypolimnion \nLow-Nutrient","Particle-Associated \nHypolimnion", "Free-Living \nEpilimnion", "Free-Living \nHypolimnion", "High-Nutrient \nParticle-Associated", "High-Nutrient \nFree-Living", "Low-Nutrient \nParticle-Associated", "Low-Nutrient \nFree-Living"))

heat <- ggplot(dfrat, aes(Habitat, Phylum)) + geom_tile(aes(fill = log2FoldChange)) + 
  scale_fill_gradient2(name = "Odds\nRatio", mid = "gray", low = "darkorange", high = "blue4",  
                       na.value = "white", guide = guide_colorbar(barwidth = 1.5, barheight = 4.5)) + 
  theme_bw(base_size = 12) + scale_x_discrete(expand = c(0, 0)) + 
  scale_y_discrete(expand = c(0, 0)) + 
  ylab("Phylum") + xlab("Habitat") + 
  facet_grid(. ~ Comparison, scales = "free", space = "free", labeller=mf_labeller) + 
  theme(axis.text.x = element_text(colour="black", size=8, angle = 30, hjust = 1, vjust = 1), 
        axis.text.y = element_text(colour="black", vjust=0.5, size=8),
        axis.title.x = element_text(face="bold", size=8),
        legend.title = element_text(face="bold", size=6),
        legend.text = element_text(size = 6),
        legend.position = c(0.93, 0.14),#"left",
        axis.title.y = element_text(face="bold", size=8),
        plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
        #plot.margin = unit(c(0.5, 1, 0.5, 0.5), "cm"), #top, right, bottom, left
        strip.text.x = element_text(size=8, face = "bold", colour = "black"),
        strip.background = element_blank()); 

# Discover the phyla that are in the abundance plot but NOT differentially abundant at the phylum level
#setdiff(abund$Phylum, dfrat$Phylum)  
phylum_delete <- c("Candidate_division_OP11", "Candidate_division_OP8", "Candidate_division_TM7", "Dictyoglomi", "SPOTSOCT00m83", "Thermotogae", "TM6", "unclassified", "WCHB1-60")

subset_abundPhylum <- subset(abund, !(Phylum %in% phylum_delete))
#setdiff(subset_abundPhylum$Phylum, dfrat$Phylum)  # Sanity check
#setdiff(dfrat$Phylum, subset_abundPhylum$Phylum)  # Double sanity check


relabun_plot <- ggplot(subset_abundPhylum, aes(y=PercentPhy , x=Phylum)) + #coord_cartesian(xlim = c(0, 30)) + 
  geom_bar(stat="identity", position=position_dodge(),fill = "gray", colour = "black") +
  ylab("Mean Percent \n Relative \n Abundance (%)") + coord_flip() + theme_bw() + 
  geom_errorbar(aes(ymin = PercentPhy -se, ymax = PercentPhy +se), width = 0.25, color = "black") + 
  scale_y_continuous(expand= c(0,0), limits = c(0,25)) +
  theme(axis.title.x = element_text(face="bold", size=8),
        axis.text.x = element_text(angle=0, colour = "black", size=8),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        strip.background = element_rect(colour="black", fill = "black"),
        plot.margin = unit(c(1.15, 0.3, 1, -0.4), "cm"), #top, right, bottom, left
        #plot.margin = unit(c(1.65, 2, 1.35, -1.35), "cm"), #top, right, bottom, left
        legend.position="none"); #relabun_plot

#####  Plotting FIGURE 5  #####  Plotting FIGURE 5  #####  Plotting FIGURE 5  
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS6_phyheat.pdf",  width= 7.5, height=6)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS6_phyheat.tiff", width= 7.5, height=6, units = "in", res = 175)
grid.newpage()
pushViewport(viewport(layout=grid.layout(1,2,width=c(0.83,0.17))))
print(heat, vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(relabun_plot, vp=viewport(layout.pos.row=1,layout.pos.col=2)); #dev.off()

Figure S6: Differentially abundant phyla across all specific lake habitats. (Left) Heat map of the significantly differentially abundant (based on log2-ratios of relative abundance data) bacterial phyla and classes of the Proteobacteria between filter fractions, nutrient levels, and lake layers (titles). Blue represents a significant differential abundance of a phylum or class in the particle-associated fraction, high-nutrient lakes, or hypolimnion. Yellow represents a significant differential abundance of a phylum or class in the free-living fraction, low-nutrient lakes, or epilimnion. Only significant phyla identified were included. (Right) Bar graph of the mean relative abundance of the bacterial phylum or class of the Proteobacteria across the entire data set sorted from high to low relative abundance. Error bars represent the standard error of the mean.

Working up data for Figure 5 - run DESeq

sherm <- subset_samples(nowin_FWDB_scaled, lakenames == "Sherman")  
shemeister <- deSEQ(sherm, ~ filter, cutoff = 0, alpha = 0.01)

########## SUBSET OUT SHERMAN (mixed) Lake
good_samps_nosherwin <- subset_samples(nowin_FWDB_scaled, lakenames != "Sherman")


############################
########################################################### PA VS FL
#1. Top prod --> Model fit not typical
topProd <- subset_samples(good_samps_nosherwin, ProdLevel == "Productive" & limnion == "Epilimnion")
de_topProd <- deSEQ(topProd, ~ filter, cutoff = 0, alpha = 0.01) # fitType = "local"

#2 Top Oligo --> No significant difference 
topUNProd <- subset_samples(good_samps_nosherwin, ProdLevel == "Unproductive" & limnion == "Epilimnion")
#de_topUNProd <- deSEQ(topUNProd, ~ filter, cutoff = 0, alpha = 0.01) # No significant difference 

#3 Bottom Productive 
botProd <- subset_samples(good_samps_nosherwin, ProdLevel == "Productive" & limnion == "Hypolimnion") 
de_botProd <- deSEQ(botProd, ~ filter, cutoff = 0, alpha = 0.01)

#4 Bottom Unproductive 
botUNProd <- subset_samples(good_samps_nosherwin, ProdLevel == "Unproductive" & limnion == "Hypolimnion") 
de_botUNProd <- deSEQ(botUNProd, ~ filter, cutoff = 0, alpha = 0.01)

############################
########################################################### EPILIMNION VS HYPOLIMNION
#1. Top prod 
prodPA <- subset_samples(good_samps_nosherwin, ProdLevel == "Productive" & filter == "Particle") 
de_prodPA <- deSEQ(prodPA, ~ limnion, cutoff = 0, alpha = 0.01)

#2 Top Oligo 
unprodPA <- subset_samples(good_samps_nosherwin, ProdLevel == "Unproductive" & filter == "Particle") 
de_unprodPA <- deSEQ(unprodPA, ~ limnion, cutoff = 0, alpha = 0.01)

#3 Bottom Productive 
prodFL <- subset_samples(good_samps_nosherwin, ProdLevel == "Productive" & filter == "Free") 
de_prodFL <- deSEQ(prodFL, ~ limnion, cutoff = 0, alpha = 0.01)

#4 Bottom Unproductive 
unprodFL <- subset_samples(good_samps_nosherwin, ProdLevel == "Unproductive" & filter == "Free") 
de_unprodFL <- deSEQ(unprodFL, ~ limnion, cutoff = 0, alpha = 0.01)

############################
########################################################### HIGH- VS LOW-NUTRIENT
#1. Top prod 
topPA <- subset_samples(good_samps_nosherwin, limnion == "Epilimnion" & filter == "Particle") 
de_topPA <- deSEQ(topPA, ~ ProdLevel, cutoff = 0, alpha = 0.01)

#2 Top Oligo 
botPA <- subset_samples(good_samps_nosherwin, limnion == "Hypolimnion" & filter == "Particle") 
de_botPA <- deSEQ(botPA, ~ ProdLevel, cutoff = 0, alpha = 0.01)

#3 Bottom Productive 
topFL <- subset_samples(good_samps_nosherwin, limnion == "Epilimnion" & filter == "Free") 
de_topFL <- deSEQ(topFL, ~ ProdLevel, cutoff = 0, alpha = 0.01)

#4 Bottom Unproductive 
botFL <- subset_samples(good_samps_nosherwin, limnion == "Hypolimnion" & filter == "Free") 
de_botFL <- deSEQ(botFL, ~ ProdLevel, cutoff = 0, alpha = 0.01)


#PA VS FL:  
#Surface Productive
pafl1 <- subset(de_topProd, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
pafl1$Habitat <- "PA vs. FL: Epilimnion \nHigh-Nutrient"
#Bottom High-Nutrient 
pafl2 <- subset(de_botProd, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
pafl2$Habitat <- "PA vs. FL: Epilimnion \nLow-Nutrient"
#Surface Low-Nutrient --> No significant difference 
#pafl3 <- subset(de_topUNProd, select = c(Phylum, Genus, Species,OTU, log2FoldChange, padj))
#pafl3$Habitat <- "PA vs. FL: Hypolimnion \nHigh-Nutrient"
#Bottom Low-Nutrient
pafl4 <- subset(de_botUNProd, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
pafl4$Habitat <- "PA vs. FL: Hypolimnion \nLow-Nutrient"



###Top vs Bottom:  
otu_topbot1 <- subset(de_prodPA, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_topbot1$Habitat <- "Top vs. Bottom: High-Nutrient \n Particle-Associated"
# Particle-Associated Low-Nutrient
otu_topbot2 <- subset(de_unprodPA, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_topbot2$Habitat <- "Top vs. Bottom: Low-Nutrient \n Particle-Associated"
# Free-Living High-Nutrient 
otu_topbot3 <- subset(de_prodFL, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_topbot3$Habitat <- "Top vs. Bottom: High-Nutrient \n Free-Living"
# Free-Living Low-Nutrient
otu_topbot4 <- subset(de_unprodFL, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_topbot4$Habitat <- "Top vs. Bottom: Low-Nutrient \n Free-Living"



#Prod vs Oligo:  
otu_troph1 <- subset(de_topPA, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj)) 
otu_troph1$Habitat <- "Prod vs. Unprod: Particle-Associated \n Epilimnion"
# Particle-Associated BOTTOM
otu_troph2 <- subset(de_botPA, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_troph2$Habitat <- "Prod vs. Unprod: Particle-Associated \n Hypolimnion"
# Free-Living TOP 
otu_troph3 <- subset(de_topFL, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_troph3$Habitat <- "Prod vs. Unprod: Free-Living \nEpilimnion"
# Free-Living BOTTOM
otu_troph4 <- subset(de_botFL, select = c(Phylum, Genus, Species, OTU, log2FoldChange, padj))
otu_troph4$Habitat <- "Prod vs. Unprod: Free-Living \nHypolimnion"


######  Combing all into one dataframe named out_ratio
otu_trophs <- rbind(otu_troph1, otu_troph2, otu_troph3, otu_troph4)
newlog2foldchange <- as.numeric(otu_trophs$log2FoldChange * -1) # To correlate with Top vs bottom
otu_trophs$log2FoldChange <- newlog2foldchange

otu_ratio <-rbind(pafl1, pafl2, pafl4,
                  otu_topbot1, otu_topbot2, otu_topbot3, otu_topbot4,
                  otu_trophs) #otu_troph1, otu_troph2, otu_troph3, otu_troph4)

#What's the min and max ratios?
paste(c("The range of the log2foldChange is", min(otu_ratio$log2FoldChange), "to", 
        max(otu_ratio$log2FoldChange)))

## [1] "The range of the log2foldChange is"
## [2] "-10.6132280899782"                 
## [3] "to"                                
## [4] "11.8805004022321"

#Split of the habitat column to 2 columns named comparison and habitat
otu_cols <- colsplit(otu_ratio$Habitat, ":", c("Comparison", "Habitat"))
otu_ratio$Habitat = NULL
otu_ratios <- cbind(otu_ratio, otu_cols)

Figure S8: Summed Significant OTUs

########   Summed-OTU Plot 
### PA (positive) vs FL (negative)
### Top (negative) vs Bottom (positive)
### Prod (positive) vs Unprod (negative)

#head(otu_ratios)
#subset out prod vs unprod
sig_oturats <- otu_ratios
pvu <- subset(sig_oturats, Comparison == "Prod vs. Unprod")
tvb <- subset(sig_oturats, Comparison == "Top vs. Bottom")
pvf <- subset(sig_oturats, Comparison == "PA vs. FL")

####  High vs Low nutrient
pvu$Preference <- "NA"
pvu_high <- filter(pvu, log2FoldChange > 0)
pvu_high$Preference <- "High-Nutrient" # High-Nutrient is POSITIVE  
pvu_low <- filter(pvu, log2FoldChange < 0)
pvu_low$Preference <- "Low-Nutrient" # Low-Nutrient is NEGATIVE 
pvu_pref <- rbind(pvu_low, pvu_high) %>% # Combine the PA and FL data frames
  group_by(Phylum, Preference) %>%  # group by preference and phylum
  tally() # count how many in each 
pvu_pref$Comparison <- "Prod vs. Unprod"
# Add a new column where the Low nutrient values are negative  
pvu_pref$Count <-  ifelse(pvu_pref$Preference == "Low-Nutrient", pvu_pref$n*-1, pvu_pref$n)

  
#### Top (Epilimnion) vs Bottom (Hypolimnion) 
tvb$Preference <- "NA"
tvb_bottom <- filter(tvb, log2FoldChange > 0)
tvb_bottom$Preference <- "Hypolimnion" # Hypolimnion is POSITIVE  
tvb_top <- filter(tvb, log2FoldChange < 0)
tvb_top$Preference <- "Epilimnion" # Epilimnion is NEGATIVE 
tvb_pref <- rbind(tvb_top, tvb_bottom) %>% # Combine the PA and FL data frames
  group_by(Phylum, Preference) %>%  # group by preference and phylum
  tally() # count how many in each 
tvb_pref$Comparison <- "Top vs. Bottom"
#  Add a column where the Epilimnion values are negative in a new column name count
tvb_pref$Count <-  ifelse(tvb_pref$Preference == "Epilimnion", tvb_pref$n*-1, tvb_pref$n)

# Add a new column called count
  
###  Particle vs Free living 
pvf$Preference <- "NA"
pvf_particle <- filter(pvf, log2FoldChange > 0)
pvf_particle$Preference <- "Particle-Associated"
pvf_free <- filter(pvf, log2FoldChange < 0)
pvf_free$Preference <- "Free-Living"
pvf_pref <- rbind(pvf_free, pvf_particle) %>% # Combine the PA and FL data frames
  group_by(Phylum, Preference) %>%  # group by preference and phylum
  tally() # count how many in each 
pvf_pref$Comparison <- "PA vs. FL"
#  Add a new column called "count" where if the count is free-living we make it negative 
pvf_pref$Count <-  ifelse(pvf_pref$Preference == "Free-Living", pvf_pref$n*-1, pvf_pref$n)


#  Combine into one data frame 
otu_preference <- data.frame(rbind(pvf_pref, pvu_pref, tvb_pref))

# Determine factor order  
otu_preference$Preference <- factor(otu_preference$Preference, levels=c("Particle-Associated","Free-Living",
                                                                        "Low-Nutrient","High-Nutrient",
                                                                        "Epilimnion", "Hypolimnion")) 

otu_preference$Phylum <- factor(otu_preference$Phylum,
                                levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia","Betaproteobacteria","Actinobacteria", "Planctomycetes","Alphaproteobacteria","Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae","Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria","Spirochaetae","Candidate_division_OD1","NPL-UPA2","Deinococcus-Thermus","Candidate_division_OP3","Epsilonproteobacteria", "TM6", "TA18","Chlamydiae","Fibrobacteres", "Candidate_division_SR1","Candidate_division_BRC1", "BD1-5", "Candidate_division_WS3","Tenericutes", "Elusimicrobia","Gemmatimonadetes","WCHB1-60","Fusobacteria", "Deferribacteres","Candidate_division_TM7","Candidate_division_OP8", "SPOTSOCT00m83", "Thermotogae","Candidate_division_OP11","Dictyoglomi", "SM2F11", "Hyd24-12", "Nitrospirae","SHA-109","TA06", "OC31", "GOUTA4", "Caldiserica","Candidate_division_JS1","FGL7S", "unclassified"))


otu_preference$Comparison <- factor(otu_preference$Comparison,
                                    levels = c("PA vs. FL", "Prod vs. Unprod","Top vs. Bottom")) 

OTUtop16 <- c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria",
              "Planctomycetes", "Alphaproteobacteria", "Deltaproteobacteria","Gammaproteobacteria", 
              "Chloroflexi", "Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria", 
              "Spirochaetae", "unclassified")


summed_16 <- otu_preference[otu_preference$Phylum %in% OTUtop16, ]

summed_16$Phylum = with(summed_16, factor(Phylum, levels = rev(levels(Phylum))))


#####  Plotting FIGURE S8  #####  Plotting FIGURE S8  #####  Plotting FIGURE S8
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS8_summed_ALL_otus.pdf",  width= 7.5, height=6)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS8_summed_ALL_otus.tiff", width= 7.5, height=6, units = "in", res = 175)
ggplot(summed_16, aes(y=Count, x=Phylum, fill=Preference)) + 
  geom_bar(stat="identity", position="identity") + coord_flip() + #ggtitle("Summed OTUs") +
  geom_bar(stat="identity", colour = "black", show_guide = FALSE, position="identity") +
  theme_gray() +  scale_y_continuous(breaks=seq(-30, 200, 30)) +
  ggtitle("All OTUs in the Data") +
  facet_grid(. ~ Comparison, scales = "free_x", labeller = mf_labeller) + theme_bw() +
  ylab("Total Number of Significant OTUs") +  xlab("Phylum") + 
  scale_fill_manual(name = "", limits=c("Particle-Associated","Free-Living","Low-Nutrient",
                                        "High-Nutrient","Epilimnion", "Hypolimnion"), 
                    values = c( "orangered", "orangered4", "cornflowerblue", "royalblue4",
                                "limegreen", "darkgreen")) +
  guides(fill = guide_legend(keywidth = 0.75, keyheight = 0.75)) + 
  theme(axis.title.x = element_text(face="bold", size=12),
        axis.text.x = element_text(colour = "black", size=8, hjust = 1, vjust=1, angle = 30),
        axis.text.y = element_text(colour = "black", size=8),
        axis.title.y = element_text(face="bold", size=12),
        legend.title = element_blank(),
        legend.text = element_text(size = 8),
        strip.text.y = element_blank(),
        strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
        strip.background = element_blank(),
        plot.title = element_text(size = 12, face = "bold", colour = "black"),
        legend.position="right"); #dev.off()

Figure S8: Significant differentially abundant OTUs in the 17 most abundant phyla and classes of Proteobacteria. Total number of OTUs that were significantly differentially abundant between filter fractions, nutrient levels, and lake layers. For each comparison, the number of differentially abundant OTUs in each habitat combination was summed.

Working up right panel of Figure 5: Proportion of significant OTUs

# Get the significant OTUs 
# summed relative to its abundance 
### Free-living
sigotu_free_unique <- left_join(pvf_free, otu_abund_free, by = "OTU") %>%
  left_join(phy_abund_free, by = "Phylum") %>%
  select(Preference, Phylum, Genus, Species, OTU, Comparison, OTU_sum, phylum_sum) %>% # Select columns
  mutate(OTU_rel_abund_byphy = OTU_sum/phylum_sum) %>% # Calculate OTU relative abundance by phylum 
  distinct() %>% # Take only the rows that are unique (no repeat of OTUs)
  group_by(Phylum) %>%
  summarize(sig_phylum_relabund = sum(OTU_rel_abund_byphy) *-1) %>%
  mutate(Comparison = "PA vs. FL",
         Preference = "Free-Living")

# Particle-Associated 
sigotu_particle_unique <- left_join(pvf_particle, otu_abund_particle, by = "OTU") %>%
  left_join(phy_abund_particle, by = "Phylum") %>%
  select(Preference, Phylum, Genus, Species, OTU, Comparison, OTU_sum, phylum_sum) %>% # Select columns
  mutate(OTU_rel_abund_byphy = OTU_sum/phylum_sum) %>% # Calculate OTU relative abundance by phylum 
  distinct() %>% # Take only the rows that are unique (no repeat of OTUs)
  group_by(Phylum) %>%
  summarize(sig_phylum_relabund = sum(OTU_rel_abund_byphy))%>%
  mutate(Comparison = "PA vs. FL",
         Preference = "Particle-Associated")

# Epilimnion 
sigotu_epilimnion_unique <- left_join(tvb_top, otu_abund_epi, by = "OTU") %>%
  left_join(phy_abund_epi, by = "Phylum") %>%
  select(Preference, Phylum, Genus, Species, OTU, Comparison, OTU_sum, phylum_sum) %>% # Select columns
  mutate(OTU_rel_abund_byphy = OTU_sum/phylum_sum) %>% # Calculate OTU relative abundance by phylum 
  distinct() %>% # Take only the rows that are unique (no repeat of OTUs)
  group_by(Phylum) %>%
  summarize(sig_phylum_relabund = sum(OTU_rel_abund_byphy) * -1) %>%
  mutate(Comparison = "Top vs. Bottom",
         Preference = "Epilimnion")

# Hypolimnion 
sigotu_hypolimnion_unique <- left_join(tvb_bottom, otu_abund_hypo, by = "OTU") %>%
  left_join(phy_abund_hypo, by = "Phylum") %>%
  select(Preference, Phylum, Genus, Species, OTU, Comparison, OTU_sum, phylum_sum) %>% # Select columns
  mutate(OTU_rel_abund_byphy = OTU_sum/phylum_sum) %>% # Calculate OTU relative abundance by phylum 
  distinct() %>% # Take only the rows that are unique (no repeat of OTUs)
  group_by(Phylum) %>%
  summarize(sig_phylum_relabund = sum(OTU_rel_abund_byphy)) %>%
  mutate(Comparison = "Top vs. Bottom",
         Preference = "Hypolimnion")

# High-Nutrient 
sigotu_high_nutrient_unique <- left_join(pvu_high, otu_abund_high, by = "OTU") %>%
  left_join(phy_abund_high, by = "Phylum") %>%
  select(Preference, Phylum, Genus, Species, OTU, Comparison, OTU_sum, phylum_sum) %>% # Select columns
  mutate(OTU_rel_abund_byphy = OTU_sum/phylum_sum) %>% # Calculate OTU relative abundance by phylum 
  distinct() %>% # Take only the rows that are unique (no repeat of OTUs)
  group_by(Phylum) %>%
  summarize(sig_phylum_relabund = sum(OTU_rel_abund_byphy)) %>%
  mutate(Comparison = "Prod vs. Unprod",
         Preference = "High-Nutrient")

# Low-Nutrient 
sigotu_low_nutrient_unique <- left_join(pvu_low, otu_abund_low, by = "OTU") %>%
  left_join(phy_abund_low, by = "Phylum") %>%
  select(Preference, Phylum, Genus, Species, OTU, Comparison, OTU_sum, phylum_sum) %>% # Select columns
  mutate(OTU_rel_abund_byphy = OTU_sum/phylum_sum) %>% # Calculate OTU relative abundance by phylum 
  distinct() %>% # Take only the rows that are unique (no repeat of OTUs)
  group_by(Phylum) %>%
  summarize(sig_phylum_relabund = sum(OTU_rel_abund_byphy) *-1) %>%
  mutate(Comparison = "Prod vs. Unprod",
         Preference = "Low-Nutrient")



sig_otus_relabund_0 <- rbind(sigotu_free_unique, sigotu_particle_unique, sigotu_epilimnion_unique,
                             sigotu_hypolimnion_unique, sigotu_low_nutrient_unique, sigotu_high_nutrient_unique)


# Determine factor order  
sig_otus_relabund_0$Preference <- factor(sig_otus_relabund_0$Preference,
                                    levels = c("Particle-Associated","Free-Living",
                                               "Low-Nutrient","High-Nutrient",
                                               "Epilimnion", "Hypolimnion")) 

sig_otus_relabund_0$Phylum <- factor(sig_otus_relabund_0$Phylum,
                                     levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria","Actinobacteria", "Planctomycetes", "Alphaproteobacteria","unclassified", "Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi","Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi","Acidobacteria", "Spirochaetae", "Candidate_division_OD1","NPL-UPA2", "Deinococcus-Thermus", "Candidate_division_OP3","Epsilonproteobacteria", "TM6", "TA18", "Chlamydiae", "Fibrobacteres","Candidate_division_SR1", "Candidate_division_BRC1", "BD1-5","Candidate_division_WS3", "Tenericutes", "Elusimicrobia","Gemmatimonadetes", "WCHB1-60","Fusobacteria", "Deferribacteres","Candidate_division_TM7", "Candidate_division_OP8", "SPOTSOCT00m83","Thermotogae", "Candidate_division_OP11", "Dictyoglomi", "SM2F11","Hyd24-12", "Nitrospirae","SHA-109", "TA06", "OC31", "GOUTA4","Caldiserica", "Candidate_division_JS1", "FGL7S"))


sig_otus_relabund_0$Comparison <- factor(sig_otus_relabund_0$Comparison,
                                    levels = c("PA vs. FL", "Prod vs. Unprod","Top vs. Bottom")) 

# Reverse the order of the phyla so it plots logically 
sig_otus_relabund_0$Phylum = with(sig_otus_relabund_0, factor(Phylum, levels = rev(levels(Phylum)))) 


#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/SigOTUs_ALL_Relabund.pdf",  width= 7.5, height=8)
sig_phy_relabund_plot <- ggplot(sig_otus_relabund_0, aes(y=sig_phylum_relabund, x=Phylum, fill=Preference)) + 
  geom_bar(stat="identity", position="identity") + coord_flip() + #ggtitle("Summed OTUs") +
  geom_bar(stat="identity", colour = "black", show_guide = FALSE, position="identity") +
  theme_gray() +  #scale_y_continuous(breaks=seq(-30, 200, 30)) +
  ggtitle("All Phyla and OTUs in the Data") +
  facet_grid(. ~ Comparison, labeller = mf_labeller) + theme_bw() +
  ylab("Within Phylum Relative Abundance of Significant OTUs") +  xlab("Phylum") + 
  scale_fill_manual(name = "", limits=c("Free-Living","Particle-Associated","Low-Nutrient",
                                        "High-Nutrient","Epilimnion", "Hypolimnion"), 
                    values = c( "orangered", "orangered4", "cornflowerblue", "royalblue4",
                                "limegreen", "darkgreen")) +
  guides(fill = guide_legend(keywidth = 0.75, keyheight = 0.75)) + 
  theme(axis.title.x = element_text(face="bold", size=12),
        axis.text.x = element_text(colour = "black", size=8, hjust = 1, vjust=1, angle = 30),
        axis.text.y = element_text(colour = "black", size=8),
        axis.title.y = element_text(face="bold", size=12),
        legend.title = element_blank(),
        legend.text = element_text(size = 8),
        strip.text.y = element_blank(),
        strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
        strip.background = element_blank(),
        plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
        plot.title = element_text(size = 12, face = "bold", colour = "black"),
        legend.position="right");  #dev.off()

Code for Figure 5, right panel

OTUtop16 <- c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria","Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria", "Spirochaetae")


sigotu_relabund_16 <- sig_otus_relabund_0[sig_otus_relabund_0$Phylum %in% OTUtop16, ]
# Reverse the order of the phyla so it plots logically 
sig_otus_relabund_0$Phylum = with(sig_otus_relabund_0, factor(Phylum, levels = rev(levels(Phylum)))) 


#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/SigOTUs_Top16_Relabund.pdf",  width= 7.5, height=6)
sigotu_phy_relabund_plot <- ggplot(sigotu_relabund_16, aes(y=sig_phylum_relabund, x=Phylum, fill=Preference)) + 
  geom_bar(stat="identity", position="identity") + coord_flip() + #ggtitle("Summed OTUs") +
  geom_bar(stat="identity", colour = "black", show_guide = FALSE, position="identity") +
  theme_gray() +  #scale_y_continuous(breaks=seq(-30, 200, 30)) +
  #ggtitle("All OTUs in the Top 16 Phyla in the Data") +
  facet_grid(. ~ Comparison, labeller = mf_labeller) + theme_bw() +
  ylab("Within Phylum Relative Abundance of Significant OTUs") +  xlab("Phylum") + 
  scale_fill_manual(name = "", limits=c("Free-Living","Particle-Associated","Low-Nutrient",
                                        "High-Nutrient","Epilimnion", "Hypolimnion"), 
                    labels = c("Free-Living (21)","Particle-Associated (20)","Low-Nutrient (15)",
                               "High-Nutrient (26)","Epilimnion (19)", "Hypolimnion (18)"), 
                    values = c( "orangered", "orangered4", "cornflowerblue", "royalblue4",
                                "limegreen", "darkgreen")) +
  guides(fill = guide_legend(keywidth = 1, keyheight = 1)) + 
  theme(axis.title.x = element_text(face="bold", size=12),
        axis.text.x = element_text(colour = "black", size=8, hjust = 1, vjust=1, angle = 30),
        axis.text.y = element_text(colour = "black", size=8),
        axis.title.y = element_text(face="bold", size=12),
        legend.title = element_blank(),
        legend.text = element_text(size = 8),
        strip.text.y = element_blank(),
        strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
        strip.background = element_blank(),
        plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
        plot.title = element_text(size = 12, face = "bold", colour = "black"),
        legend.position="right"); #dev.off()

Working up left panel of Figure 5: Relative abundance of OTUs

# Left Panel
#### Number of significant OTUs versus the total number of OTUs by habitat and phylum

#  Calculate a data frame with a tally of the significant OTUs by habitat and by phylum 
free_living_sig_otus <- pvf %>% # Make a data frame with significant OTUs unique to free living 
  filter(log2FoldChange < 0) %>%
  select(Phylum, Genus, Species, OTU, Comparison) %>%
  distinct() %>%
  group_by(Phylum) %>%
  tally() %>%
  mutate(Preference = "Free-Living", Comparison =  "PA vs. FL")
colnames(free_living_sig_otus)[2] <- "SigOTU_Total"


particle_associated_sig_otus <- pvf %>% # Make a data frame with significant OTUs unique to free living 
  filter(log2FoldChange > 0) %>%
  select(Phylum, Genus, Species, OTU, Comparison) %>%
  distinct() %>%
  group_by(Phylum) %>%
  tally() %>%
  mutate(Preference = "Particle-Associated", Comparison =  "PA vs. FL")
colnames(particle_associated_sig_otus)[2] <- "SigOTU_Total"


epilimnion_sig_otus <- tvb %>% # Make a data frame with significant OTUs unique to Epilimnion
  filter(log2FoldChange < 0) %>%
  select(Phylum, Genus, Species, OTU, Comparison) %>%
  distinct() %>%
  group_by(Phylum) %>%
  tally() %>%
  mutate(Preference = "Epilimnion", Comparison =  "Top vs. Bottom")
colnames(epilimnion_sig_otus)[2] <- "SigOTU_Total"


hypolimnion_sig_otus <- tvb %>% # Make a data frame with significant OTUs unique to Hypolimnion
  filter(log2FoldChange > 0) %>%
  select(Phylum, Genus, Species, OTU, Comparison) %>%
  distinct() %>%
  group_by(Phylum) %>%
  tally() %>%
  mutate(Preference = "Hypolimnion", Comparison =  "Top vs. Bottom")
colnames(hypolimnion_sig_otus)[2] <- "SigOTU_Total"


high_nutrient_sig_otus <- pvu %>% # Make a data frame with significant OTUs unique to high-nutrient
  filter(log2FoldChange > 0) %>%
  select(Phylum, Genus, Species, OTU, Comparison) %>%
  distinct() %>%
  group_by(Phylum) %>%
  tally() %>%
  mutate(Preference = "High-Nutrient", Comparison =  "Prod vs. Unprod")
colnames(high_nutrient_sig_otus)[2] <- "SigOTU_Total"


low_nutrient_sig_otus <- pvu %>% # Make a data frame with significant OTUs unique to low-nutrient
  filter(log2FoldChange < 0) %>%
  select(Phylum, Genus, Species, OTU, Comparison) %>%
  distinct() %>%
  group_by(Phylum) %>%
  tally() %>%
  mutate(Preference = "Low-Nutrient", Comparison =  "Prod vs. Unprod")
colnames(low_nutrient_sig_otus)[2] <- "SigOTU_Total"

###  Need to add the taxonomy to the OTU information
nosherwin_FWDB_scaled <- subset_samples(nowin_FWDB_scaled, names != "SHEE" & names != "SHEH" & 
                                          names != "SHEE3um" & names != "SHEH3um")
nosherwin_FWDB_scaled <- prune_taxa(taxa_sums(nosherwin_FWDB_scaled) > 0, nosherwin_FWDB_scaled)

taxa <- data.frame(tax_table(nosherwin_FWDB_scaled)) %>%
  select(Phylum, OTU)

free_living_all_otus <- inner_join(otu_abund_free_pruned, taxa, by = "OTU") %>%
  select(-OTU_sum_Free) %>% 
  distinct() %>%
  group_by(Phylum) %>%
  tally() 
colnames(free_living_all_otus)[2] <- "AllOTU_Total"


particle_associated_all_otus <- inner_join(otu_abund_particle_pruned, taxa, by = "OTU") %>%
  select(-OTU_sum_Particle) %>% 
  distinct() %>%
  group_by(Phylum) %>%
  tally() 
colnames(particle_associated_all_otus)[2] <- "AllOTU_Total"


epilimnion_all_otus <- inner_join(otu_abund_epi_pruned, taxa, by = "OTU") %>%
  select(-OTU_sum_Epi) %>% 
  distinct() %>%
  group_by(Phylum) %>%
  tally() 
colnames(epilimnion_all_otus)[2] <- "AllOTU_Total"


hypolimnion_all_otus <- inner_join(otu_abund_hypo_pruned, taxa, by = "OTU") %>%
  select(-OTU_sum_Hypo) %>% 
  distinct() %>%
  group_by(Phylum) %>%
  tally() 
colnames(hypolimnion_all_otus)[2] <- "AllOTU_Total"


high_nutrient_all_otus <- inner_join(otu_abund_high_pruned, taxa, by = "OTU") %>%
  select(-OTU_sum_High) %>% 
  distinct() %>%
  group_by(Phylum) %>%
  tally() 
colnames(high_nutrient_all_otus)[2] <- "AllOTU_Total"


low_nutrient_all_otus <- inner_join(otu_abund_low_pruned, taxa, by = "OTU") %>%
  select(-OTU_sum_Low) %>% 
  distinct() %>%
  group_by(Phylum) %>%
  tally() 
colnames(low_nutrient_all_otus)[2] <- "AllOTU_Total"


#####################  Combine the taxonomic information with the significant OTUs
####  Multiply free-living, low-nutrient, and epilimnion samples by -1 for plotting
otu_join_free <- inner_join(free_living_sig_otus, free_living_all_otus, by = "Phylum") %>%
  mutate(Proportion_sig2total = -1* (SigOTU_Total/AllOTU_Total))

otu_join_particle <- inner_join(particle_associated_sig_otus, particle_associated_all_otus, by = "Phylum") %>%
  mutate(Proportion_sig2total = 1* (SigOTU_Total/AllOTU_Total))

otu_join_epi <- inner_join(epilimnion_sig_otus, epilimnion_all_otus, by = "Phylum") %>%
  mutate(Proportion_sig2total = -1* (SigOTU_Total/AllOTU_Total))

otu_join_hypo <- inner_join(hypolimnion_sig_otus, hypolimnion_all_otus, by = "Phylum") %>%
  mutate(Proportion_sig2total = 1* (SigOTU_Total/AllOTU_Total))

otu_join_high <- inner_join(high_nutrient_sig_otus, high_nutrient_all_otus, by = "Phylum") %>%
  mutate(Proportion_sig2total = 1* (SigOTU_Total/AllOTU_Total))

otu_join_low <- inner_join(low_nutrient_sig_otus, low_nutrient_all_otus, by = "Phylum") %>%
  mutate(Proportion_sig2total = -1* (SigOTU_Total/AllOTU_Total))

otu_joined <- rbind(otu_join_free, otu_join_particle, otu_join_epi, otu_join_hypo, otu_join_high, otu_join_low)

otu_joined$Preference <- factor(otu_joined$Preference,
                                    levels = c("Particle-Associated","Free-Living",
                                               "Low-Nutrient","High-Nutrient",
                                               "Epilimnion", "Hypolimnion")) 

otu_joined$Phylum <- factor(otu_joined$Phylum,
                            levels = c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria","Actinobacteria", "Planctomycetes", "Alphaproteobacteria", "unclassified","Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae","Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria", "Spirochaetae","Candidate_division_OD1","NPL-UPA2", "Deinococcus-Thermus","Candidate_division_OP3", "Epsilonproteobacteria", "TM6", "TA18", "Chlamydiae","Fibrobacteres", "Candidate_division_SR1", "Candidate_division_BRC1", "BD1-5","Candidate_division_WS3", "Tenericutes", "Elusimicrobia", "Gemmatimonadetes","WCHB1-60","Fusobacteria", "Deferribacteres", "Candidate_division_TM7","Candidate_division_OP8", "SPOTSOCT00m83", "Thermotogae", "Candidate_division_OP11", "Dictyoglomi", "SM2F11", "Hyd24-12","Nitrospirae","SHA-109", "TA06", "OC31", "GOUTA4", "Caldiserica","Candidate_division_JS1", "FGL7S"))


otu_joined$Comparison <- factor(otu_joined$Comparison, 
                                levels = c("PA vs. FL", "Prod vs. Unprod","Top vs. Bottom")) 


OTUtop16 <- c("Bacteroidetes", "Cyanobacteria", "Verrucomicrobia", "Betaproteobacteria", "Actinobacteria","Planctomycetes", "Alphaproteobacteria","unclassified", "Deltaproteobacteria","Gammaproteobacteria", "Chloroflexi", "Lentisphaerae", "Armatimonadetes", "Firmicutes", "Chlorobi", "Acidobacteria", "Spirochaetae")


otu_joined_16 <- otu_joined[otu_joined$Phylum %in% OTUtop16, ]
# Reverse the order of the phyla so it plots logically 
otu_joined_16$Phylum = with(otu_joined_16, factor(Phylum, levels = rev(levels(Phylum)))) 



#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/PROPORTION_Top16_Relabund.pdf",  width= 7.5, height=6)
sigotu_proportion_plot <- ggplot(otu_joined_16, aes(y=Proportion_sig2total, x=Phylum, fill=Preference)) + 
  geom_bar(stat="identity", position="identity") + coord_flip() + #ggtitle("Summed OTUs") +
  geom_bar(stat="identity", colour = "black", show_guide = FALSE, position="identity") +
  theme_gray() +  scale_y_continuous(breaks=seq(-0.5, 1, 0.1), lim = c(-0.1, 0.3)) + 
  #ggtitle("All OTUs in the Top 16 Phyla in the Data") +
  facet_grid(. ~ Comparison, labeller = mf_labeller) + theme_bw() +
  #scale_y_continuous(breaks=seq(-0.3, 1, 0.1)) +
  ylab("Proportion of Significant to Total OTUs by Phylum") +  xlab("Phylum") + 
  scale_fill_manual(name = "", limits=c("Free-Living","Particle-Associated","Low-Nutrient",
                                        "High-Nutrient","Epilimnion", "Hypolimnion"), 
                    labels = c("Free-Living (21)","Particle-Associated (20)","Low-Nutrient (15)",
                               "High-Nutrient (26)","Epilimnion (19)", "Hypolimnion (18)"), 
                    values = c( "orangered", "orangered4", "cornflowerblue", "royalblue4",
                                "limegreen", "darkgreen")) +
  guides(fill = guide_legend(keywidth = 1, keyheight = 1)) + 
  theme(axis.title.x = element_text(face="bold", size=12),
        axis.text.x = element_text(colour = "black", size=8, hjust = 1, vjust=1, angle = 30),
        axis.text.y = element_text(colour = "black", size=8),
        axis.title.y = element_text(face="bold", size=12),
        legend.title = element_blank(),
        legend.text = element_text(size = 8),
        strip.text.y = element_blank(),
        strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
        strip.background = element_blank(),
        plot.title = element_text(size = 12, face = "bold", colour = "black"),
        plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
        legend.position="right");  #dev.off()


####  OVERLAP BETWEEN HYPO AND HIGH NUTRIENT 
hypo_otus <- tvb %>% # Make a data frame with significant OTUs unique to Hypolimnion
  filter(log2FoldChange > 0) %>%
  select(Phylum, Genus, Species, OTU, Comparison) %>%
  distinct()

high_otus <- pvu %>% # Make a data frame with significant OTUs unique to high-nutrient
  filter(log2FoldChange > 0) %>%
  select(Phylum, Genus, Species, OTU, Comparison) %>%
  distinct() 

nrow(hypo_otus) # Number of Sig OTUs in hypolimnion

## [1] 486

nrow(high_otus) # Number of sig OTUs in the high-nutrient lakes

## [1] 258

shared_hypo_high <- inner_join(hypo_otus, high_otus, by = "OTU")
nrow(shared_hypo_high) # Number of sig OTUs that overlap between hypo & high-nutrient

## [1] 250

#  Which OTUs are significant in productive lakes that are also NOT significant in the hypolimnion?
# There's only 8!
anti_join(high_otus, hypo_otus, by = "OTU")

##                Phylum        Genus      Species      OTU      Comparison
## 1  Betaproteobacteria   Iodobacter         <NA> Otu00442 Prod vs. Unprod
## 2       Bacteroidetes unclassified         <NA> Otu00240 Prod vs. Unprod
## 3     Verrucomicrobia unclassified         <NA> Otu00077 Prod vs. Unprod
## 4 Gammaproteobacteria unclassified         <NA> Otu00210 Prod vs. Unprod
## 5     Verrucomicrobia unclassified unclassified Otu00053 Prod vs. Unprod
## 6         Chloroflexi unclassified         <NA> Otu00598 Prod vs. Unprod
## 7     Verrucomicrobia unclassified         <NA> Otu00005 Prod vs. Unprod
## 8       Bacteroidetes unclassified unclassified Otu00168 Prod vs. Unprod

Figure 5: Significant OTUs

# COMBINE THE TWO PLOTS 
b1 <- sigotu_proportion_plot + theme(legend.position="none", 
                                     plot.margin = unit(c(0.1, 0.05, 0.1, 0.1), "cm")) 

b2 <- sigotu_phy_relabund_plot + xlab("") +theme(axis.text.y = element_blank(), 
                                                 plot.margin = unit(c(0.1, 0.1, 0.1, 0.05), "cm")) 

#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig5_proportion_relabund_Top16.pdf",  width= 12, height=6)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig5_proportion_relabund_Top16.tiff", width= 12, height=6, units = "in", res = 175)
grid.newpage()
pushViewport(viewport(layout=grid.layout(1,2,width=c(0.46,0.54))))
print(b1, vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(b2, vp=viewport(layout.pos.row=1,layout.pos.col=2));  #dev.off()

Figure 5: Significant differentially abundant OTUs in the 17 most abundant phyla and classes of Proteobacteria. (Left) Proportion of the total number of OTUs that were significantly differentially abundant divided by the total number of OTUs present within each phylum and habitat. (Right) Within-phylum relative abundance of the significant OTUs within each phylum and habitat. Numbers in parentheses within the legend represent sample sizes of each lake habitat.

Figure S7: Log2-fold Odds Ratio of OTUs at the Genus Level

########  GENUS LEVEL PLOT ORGANIZED BY PHYLUM
## GENUS LEVEL HEAT PLOT
sub_abund <- subset(abund, select = c("Phylum", "PercentPhy"))
oturats_abund <- merge(otu_ratios, sub_abund, by = "Phylum")
ordered_otu_ratios <- arrange(oturats_abund, desc(PercentPhy)) 

# manually place order in the next line to make sure the Genera are in the right order
#unique(ordered_otu_ratios$Genus) 
ordered_otu_ratios$Genus <- factor(ordered_otu_ratios$Genus,
                                   levels = c("unclassified", "bacIV-B", "vadinBC27_wastewater-sludge_group", "bacII-A","Paludibacter","bacIV-A","bacIII-A", "bacI-A", "Emticicia", "bacVI-B","Solitalea", "bacIII-B", "Synechococcus", "Anabaena","Snowella","Planktothrix","Pseudanabaena","Luteolibacter","CandidatusXiphinematobacter", "Pnec","betIV-A", "betI-A", "Dechloromonas","Iodobacter", "betVII-B", "Sterolibacterium", "Sideroxydans","Sulfuritalea","PRD01a011B", "Candidatus_Branchiomonas", "betIII-A", "Nitrosospira","acIV-A", "acIV-D", "acTH1-A", "acI-A", "acI-B", "Myco", "acI-C", "Luna1-A", "acSTL-A", "Pirellula", "Candidatus_Nostocoida", "CL500-3","Planctomyces", "Candidatus_Anammoximicrobium", "Pir4_lineage","Phenylobacterium", "Rickettsia", "Azospirillum", "alfI-B", "alfII-A","Haematobacter", "Magnetospirillum", "Rubellimicrobium", "Roseomonas","Rhizomicrobium", "alfIV-A", "Roseospirillum","OM27_clade", "Desulfomonile","Peredibacter", "Desulfatirhabdium","Syntrophus", "Desulfocapsa", "Sorangium", "Desulfobulbus", "Smithella", "Desulfobacula", "Phaselicystis","Desulfovibrio", "Methylobacter", "Thiocystis", "Coxiella", "Crenothrix","Thiodictyon", "Lamprocystis", "Candidatus_Competibacter", "Oscillochloris","Chloronema", "Anaerolinea", "Leptolinea", "Victivallis","Chlorobium", "Armatimonas", "Incertae_Sedis", "Fastidiosipila","Fonticella", "Clostridium_sensu_stricto_9", "Anaerofustis", "Geothrix","Treponema", "Spirochaeta", "Leptospira", "Brevinema", "Sulfurospirillum","Sulfurimonas", "possible_genus_03", "Gemmatimonas", "SC103"))


ordered_otu_ratios$Genus = with(ordered_otu_ratios, factor(Genus, levels = rev(levels(Genus)))) 

sub_ordered_oturats <- subset(ordered_otu_ratios, Genus != "unclassified")


#####  Plotting FIGURE S7  #####  Plotting FIGURE S7  #####  Plotting FIGURE S7 
#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig.S7_genus_heat.pdf",  width= 7.5, height=11)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/Fig.S7_genus_heat.tiff", width= 7.5, height=11, units = "in", res = 175)
ggplot(sub_ordered_oturats, aes(Habitat, Genus)) + geom_tile(aes(fill = log2FoldChange)) + 
  scale_fill_gradient2(name = "Odds-\nRatio", mid = "gray", low = "darkorange", high = "blue4",  
                       na.value = "white", guide = guide_colorbar(barwidth = 1, barheight = 4.5)) +
  theme_bw(base_size = 12) + scale_x_discrete(expand = c(0, 0)) + 
  scale_y_discrete(expand = c(0, 0)) + ylab(NULL) + 
  xlab("Habitat") + ylab("Genus or Freshwater Tribe") + 
  facet_grid(. ~ Comparison, scales = "free", space = "free", labeller=mf_labeller) + 
  theme(axis.title.x = element_text(face="bold", size=12),
        axis.text.x = element_text(colour = "black", size=8, hjust = 1, vjust=1, angle = 30),
        axis.text.y = element_text(colour = "black", size=8),
        axis.title.y = element_text(face="bold", size=12),
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 8),
        strip.text.y = element_blank(),
        strip.text.x = element_text(size = 9, face = "bold", colour = "black"),
        strip.background = element_blank(),
        legend.position = c(0.94, 0.07), #c(0.1, 0.93),
        plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm")); #dev.off()

Figure S7: Differentially abundant Genera or Freshwater Tribes, based on the average Log2-ratio of significant OTUs, across lake habitats. Heat map of the significantly differentially abundant (based on log2-ratios of relative abundance data within each genera) bacterial OTUs within each genera between filter fractions, nutrient levels, and lake layers (titles). Blue represents a significant differential abundance of a genus or freshwater tribe in the particle-associated fraction, high-nutrient lakes, or hypolimnion. Yellow represents a significant differential abundance of a genus in the free-living fraction, low-nutrient lakes, or epilimnion. Only habitats with significant differentially abundant OTUs identified were included. Only genera or freshwater tribes with significant differentially abundant OTUs within the dataset are included.

Figure S3, Left Panel: Sub-Sampled Alpha Diversity

# Let's see what our alpha diversity looks like for a rarefied data set using the summed replicate samples.
#FW_sum_reps  # Raw Merged Samples, we have 14,294 OTUs  
# Fix the metadata from merge_samples function 
#sample_data(FW_sum_reps) <- sample_data(FWDB_sum_scale_matround) 
# Remove the 2 wintergreen samples that are actually from the metalimnion 
#FW_sum_reps_nowin <- subset_samples(FW_sum_reps, names != "WINH" & names != "WINH3um") 
#FW_sum_reps_nowin <- prune_taxa(taxa_sums(FW_sum_reps_nowin) > 0, FW_sum_reps_nowin)
# Now, we have 14,105 OTUs !

#Data Import for rarefied data 
#FW_sum_reps_nowin  
#sum_reps_nowin_min_minus1 <- min(sample_sums(FW_sum_reps_nowin)) - 1 # 14,924

# The following code was developed by Michelle Berry:
# Rarefy to 14924 reads with replacement 100 times to estimate species richness
# Since we are rarefying to 14924 reads we need to remove samples with less than 14924 reads
#raredata14924 <- prune_samples(sample_sums(FW_sum_reps_nowin) > sum_reps_nowin_min_minus1, FW_sum_reps_nowin)
#  There should still be 41 samples 

# Initialize matrices to store richness and evenness estimates
#richness <- matrix(nrow = 41,ncol = 100)  # Store richness:  We have 41 samples 
#row.names(richness) <- sample_names(raredata14924)
#evenness <- matrix(nrow = 41,ncol = 100)  #Store evenness
#row.names(evenness) <- sample_names(raredata14924)

# We want to be reproducible - so let's set the seed.
#set.seed(15879966) # Same seed as in the first chunk

# For 100 replications, rarefy the OTU table to 14854 reads and store the richness and evennes estimates in our 2 matrices we just created. The default for the rarefy_even_depth command is to pick with replacement so I set it to false. Picking without replacement is more computationally intensive 
#for (i in 1:100) {
#  r <- rarefy_even_depth(raredata14924, sample.size = sum_reps_nowin_min_minus1, verbose = FALSE, replace = FALSE)
#  rich <- as.numeric(as.matrix(estimate_richness(r, measures="Observed")))
#  richness[,i] <- rich
#  even <- as.numeric(as.matrix(estimate_richness(r, measures="InvSimpson")))
#  evenness[,i] <- even
#}

#write.table(richness, "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/Nov_alpha_data/ObservedRichness100_summed", 
#            row.names = TRUE)
#write.table(evenness, "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/Nov_alpha_data/InvSimpson100_summed", 
#            row.names = TRUE)


richness_summed <- read.table(
  "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/Nov_alpha_data/ObservedRichness100_summed",  header = TRUE)

evenness_summed <- read.table(
  "~/Final_PAFL_Trophicstate/Nov6_FWDB_Silva/Nov_alpha_data/InvSimpson100_summed", header = TRUE)

# Create a new matrix to hold the means and standard deviations of all the richness_summed estimates
rich_stats_summed <- matrix(nrow= nrow(richness_summed), ncol=1)
rich_stats_summed[,1] <- apply(richness_summed, 1, mean)
#rich_stats_summed[,2] = apply(richness_summed, 1, sd)
rich_stats_summed = data.frame(row.names(richness_summed), rich_stats_summed)
colnames(rich_stats_summed) <- c("samples","mean_value")
rich_stats_summed$Test <- "Observed Richness"

# Create a new matrix to hold the means and standard deviations of the evenness_summed estimates
even_stats_summed <- matrix(nrow = nrow(evenness_summed), ncol = 1)
even_stats_summed[,1] <- apply(evenness_summed, 1, mean)
#even_stats[,2] = apply(evenness_summed, 1, sd)
even_stats_summed <- data.frame(row.names(evenness_summed), even_stats_summed)
colnames(even_stats_summed) <- c("samples","mean_value")
even_stats_summed$Test <- "Inverse Simpson"


###  Add SIMPSON'S EVENNESS!
simps_even_summed <- data.frame(matrix(nrow = nrow(evenness_summed), ncol = 3))
colnames(simps_even_summed) = c("samples","mean_value", "Test")
simps_even_summed$samples <- even_stats_summed$samples
simps_even_summed$mean_value <-  even_stats_summed$mean_value/rich_stats_summed$mean_value
simps_even_summed$Test <- "Simpson's Evenness"


#Combine the rich.stats and the even.stats dataframes
alpha_stats_summed <- rbind(rich_stats_summed, even_stats_summed, simps_even_summed)
alpha_stats_summed$names <- alpha_stats_summed$samples
alpha_stats_summed <- makeCategories_dups(alpha_stats_summed)
alpha_stats_summed$ProdLevel <- as.character(alpha_stats_summed$trophicstate)
alpha_stats_summed$ProdLevel[alpha_stats_summed$trophicstate == "Eutrophic"] <- "Productive"
alpha_stats_summed$ProdLevel[alpha_stats_summed$trophicstate == "Mesotrophic"] <- "Productive"
alpha_stats_summed$ProdLevel[alpha_stats_summed$trophicstate == "Oligotrophic"] <- "Unproductive"
alpha_stats_summed$ProdLevel[alpha_stats_summed$lakenames == "Sherman"] <- "Mixed"
alpha_stats_summed$trophicstate[alpha_stats_summed$lakenames == "Sherman"] <- "Mixed"


####  Add all information together!
for(i in 1:length(alpha_stats_summed$limnion)){
  alpha_stats_summed$prod_lim[i]<-paste(as.character(alpha_stats_summed$ProdLevel[i]),
                                  as.character(alpha_stats_summed$limnion[i]),
                                  as.character(alpha_stats_summed$filter[i]))}


summed_stats <- alpha_stats_summed %>%
  group_by(prod_lim, Test) %>%
  summarize(mean = mean(mean_value), 
            SE = se(mean_value), 
            SD = sd(mean_value), 
            median = median(mean_value))

final_summed_alpha_stats <- left_join(alpha_stats_summed, summed_stats, by = c("prod_lim", "Test"))

final_summed_alpha_stats$ProdLevel[final_summed_alpha_stats$trophicstate == "Eutrophic"] <- "High-Nutrient"
final_summed_alpha_stats$ProdLevel[final_summed_alpha_stats$trophicstate == "Mesotrophic"] <- "High-Nutrient"
final_summed_alpha_stats$ProdLevel[final_summed_alpha_stats$trophicstate == "Oligotrophic"] <- "Low-Nutrient"

final_summed_alpha_stats$ProdLevel <-factor(final_summed_alpha_stats$ProdLevel,
                                            levels=c("High-Nutrient", "Low-Nutrient", "Mixed"))
final_summed_alpha_stats$Test <-factor(final_summed_alpha_stats$Test,
                                       levels=c("Inverse Simpson", "Simpson's Evenness","Observed Richness"))

final_summed_alpha_stats$prod_lim <-factor(final_summed_alpha_stats$prod_lim,levels=c("Productive Epilimnion Free", "Productive Epilimnion Particle","Productive Hypolimnion Free","Productive Hypolimnion Particle","Unproductive Epilimnion Free", "Unproductive Epilimnion Particle", "Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle","Mixed Mixed Particle", "Mixed Mixed Free"))

rare_alpha_stats <- final_summed_alpha_stats %>%
  group_by(Test) %>%
  summarize(max = max(mean_value),
            min = min(mean_value))

final_summed_alpha_stats_plot <- 
  ggplot(final_summed_alpha_stats, aes(x = prod_lim, y = mean_value, color = prod_lim)) + 
  geom_point(size = 3, alpha = 0.7) +
  ggtitle("Sub-Sampled at 14,924 Sequences") + theme_bw() +   xlab("Habitat") + ylab("") + 
  geom_errorbar(aes(ymin= mean-SD, ymax=mean+SD), color = "black", width=.1, position=position_dodge(.9)) +
  layer(data = final_summed_alpha_stats, mapping = aes(x = prod_lim, y = median), 
        geom = "point", pch = 95, size = 7, color = "black") + 
  layer(data = final_summed_alpha_stats, mapping = aes(x = prod_lim, y = mean), 
        geom = "point", pch = 126, size = 7, color = "black") + 
  facet_grid(Test ~ ProdLevel, scales="free", space="free_x") +
  scale_color_manual(name = "", limits=c("Productive Epilimnion Particle", "Productive Epilimnion Free", 
                                         "Productive Hypolimnion Particle", "Productive Hypolimnion Free",
                                         "Unproductive Epilimnion Particle", "Unproductive Epilimnion Free",
                                         "Unproductive Hypolimnion Particle", "Unproductive Hypolimnion Free",
                                         "Mixed Mixed Particle", "Mixed Mixed Free"), 
                     values = c("gray60", "gray60", "gray60", "gray60","gray60","gray60",
                                "gray60","gray60", "gray60", "gray60"))+
    scale_x_discrete(breaks=c("Productive Epilimnion Free", "Productive Epilimnion Particle", 
                              "Productive Hypolimnion Free","Productive Hypolimnion Particle",
                              "Unproductive Epilimnion Free", "Unproductive Epilimnion Particle", 
                              "Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle",
                              "Mixed Mixed Particle", "Mixed Mixed Free"),
                   labels=c("Epilimnion \nFree-Living", "Epilimnion \nParticle-Associated", "Hypolimnion \nFree-Living", "Hypolimnion \nParticle-Associated","Epilimnion \nFree-Living", "Epilimnion \nParticle-Associated", "Hypolimnion \nFree-Living", "Hypolimnion \nParticle-Associated","Particle-Associated", "Free-Living")) + 
  theme(axis.title.x = element_text(face="bold", size=12),
        axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=8),
        axis.text.y = element_text(colour = "black", size=8),
        axis.title.y = element_text(face="bold", size=12),
        plot.title = element_text(face="bold", size = 12),
        strip.text.x = element_text(size=10, face="bold"),
        strip.text.y = element_text(size=10, face="bold"),
        legend.title = element_text(size=8, face="bold"),
        legend.text = element_text(size = 8),
        plot.margin = unit(c(0.1, -0.1, 0.1, 0.1), "cm"), #top, right, bottom, left
        strip.background = element_blank(),
        legend.position="none")

Figure S3, Right Panel: Scaled Alpha Diversity

### Using the sum and scaled data
## remove the 2 wintergreen "hypolimnion" samples
nowin_FWDB_scaled # 8,823 OTUs

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8823 taxa and 41 samples ]
## sample_data() Sample Data:       [ 41 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 8823 taxa by 9 taxonomic ranks ]

names <- sample_names(nowin_FWDB_scaled)
sum_scaled_matround_rich <- as.numeric(as.matrix(estimate_richness(nowin_FWDB_scaled, measures="Observed")))
sum_scaled_matround_even <- as.numeric(as.matrix(estimate_richness(nowin_FWDB_scaled, measures="InvSimpson")))
sum_scaled_matround_simpseven <- sum_scaled_matround_even/sum_scaled_matround_rich

#Create new data frame
sum_scaled_matround_alpha <- data.frame(matrix(ncol = 0, nrow = length(names)))
sum_scaled_matround_alpha$names <- names
sum_scaled_matround_alpha$ObsRichness <- sum_scaled_matround_rich
sum_scaled_matround_alpha$InvSimpson <- sum_scaled_matround_even
sum_scaled_matround_alpha$Simps_Even <- sum_scaled_matround_simpseven

# Reshape the data
sum_scaled_matround_alpha <- gather(sum_scaled_matround_alpha, "Test", "Value", 2:4)
sum_scaled_matround_alpha <- makeCategories_dups(sum_scaled_matround_alpha)
sum_scaled_matround_alpha$ProdLevel <- as.character(sum_scaled_matround_alpha$trophicstate)
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$trophicstate == "Eutrophic"] <- "Productive"
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$trophicstate == "Mesotrophic"] <- "Productive"
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$trophicstate == "Oligotrophic"] <- "Unproductive"
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$lakenames == "Sherman"] <- "Mixed"
sum_scaled_matround_alpha$trophicstate[sum_scaled_matround_alpha$lakenames == "Sherman"] <- "Mixed"
# Change test 
sum_scaled_matround_alpha$Test <- as.character(sum_scaled_matround_alpha$Test)
sum_scaled_matround_alpha$Test[sum_scaled_matround_alpha$Test == "ObsRichness"] <- "Observed Richness"
sum_scaled_matround_alpha$Test[sum_scaled_matround_alpha$Test == "InvSimpson"] <- "Inverse Simpson"
sum_scaled_matround_alpha$Test[sum_scaled_matround_alpha$Test == "Simps_Even"] <- "Simpson's Evenness"

for(i in 1:length(sum_scaled_matround_alpha$limnion)){
  sum_scaled_matround_alpha$prod_lim[i]<-paste(as.character(sum_scaled_matround_alpha$ProdLevel[i]),
                                  as.character(sum_scaled_matround_alpha$limnion[i]),
                                  as.character(sum_scaled_matround_alpha$filter[i]))}

# Calculate summary statistics 
sum_scaled_matround_prod_stats <- sum_scaled_matround_alpha %>%
  group_by(prod_lim, Test) %>%
  summarize(mean = mean(Value), 
            SE = se(Value), 
            SD = sd(Value), 
            median = median(Value))


sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$trophicstate == "Eutrophic"] <- "High-Nutrient"
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$trophicstate == "Mesotrophic"] <- "High-Nutrient"
sum_scaled_matround_alpha$ProdLevel[sum_scaled_matround_alpha$trophicstate == "Oligotrophic"] <- "Low-Nutrient"

final_sum_scaled_matround_prod_stats <- left_join(sum_scaled_matround_alpha, sum_scaled_matround_prod_stats, 
                                                  by = c("prod_lim", "Test"))

final_sum_scaled_matround_prod_stats$ProdLevel <-factor(final_sum_scaled_matround_prod_stats$ProdLevel,
                                                        levels=c("High-Nutrient", "Low-Nutrient", "Mixed"))

final_sum_scaled_matround_prod_stats$Test <-factor(final_sum_scaled_matround_prod_stats$Test,
                                                   levels=c("Inverse Simpson", "Simpson's Evenness",
                                                            "Observed Richness"))

final_sum_scaled_matround_prod_stats$prod_lim <-factor(final_sum_scaled_matround_prod_stats$prod_lim,
                                                       levels=c("Productive Epilimnion Free", "Productive Epilimnion Particle","Productive Hypolimnion Free","Productive Hypolimnion Particle","Unproductive Epilimnion Free", "Unproductive Epilimnion Particle", "Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle","Mixed Mixed Particle", "Mixed Mixed Free"))

scaled_alpha_stats <- final_sum_scaled_matround_prod_stats %>%
  group_by(Test) %>%
  summarize(max = max(Value),
            min = min(Value))

sum_scaled_matround_prod_alpha_plot <- 
  ggplot(final_sum_scaled_matround_prod_stats, aes(x = prod_lim, y = Value, color = prod_lim)) + 
  geom_point(size = 3, alpha = 0.7) +
  ggtitle("Scaled to 14,925 Sequences and Rounded") + theme_bw() +   xlab("Habitat") + ylab("") + 
  geom_errorbar(aes(ymin= mean-SD, ymax=mean+SD), color = "black", width=.1, position=position_dodge(.9)) +
  layer(data = final_sum_scaled_matround_prod_stats, mapping = aes(x = prod_lim, y = median), 
        geom = "point", pch = 95, size = 7, color = "black") + 
  layer(data = final_sum_scaled_matround_prod_stats, mapping = aes(x = prod_lim, y = mean), 
        geom = "point", pch = 126, size = 7, color = "black") + 
  facet_grid(Test ~ ProdLevel, scales="free", space="free_x") +
  scale_color_manual(name = "", limits=c("Productive Epilimnion Particle", "Productive Epilimnion Free", 
                                         "Productive Hypolimnion Particle", "Productive Hypolimnion Free",
                                         "Unproductive Epilimnion Particle", "Unproductive Epilimnion Free",
                                         "Unproductive Hypolimnion Particle", "Unproductive Hypolimnion Free",
                                         "Mixed Mixed Particle", "Mixed Mixed Free"), 
                     values = c("gray60", "gray60", "gray60", "gray60","gray60",
                                "gray60","gray60","gray60", "gray60", "gray60"))+
    scale_x_discrete(breaks=c("Productive Epilimnion Free", "Productive Epilimnion Particle", 
                              "Productive Hypolimnion Free","Productive Hypolimnion Particle",
                              "Unproductive Epilimnion Free", "Unproductive Epilimnion Particle", 
                              "Unproductive Hypolimnion Free", "Unproductive Hypolimnion Particle",
                              "Mixed Mixed Particle", "Mixed Mixed Free"),
                   labels=c("Epilimnion \nFree-Living", "Epilimnion \nParticle-Associated", 
                            "Hypolimnion \nFree-Living", "Hypolimnion \nParticle-Associated",
                            "Epilimnion \nFree-Living", "Epilimnion \nParticle-Associated", 
                            "Hypolimnion \nFree-Living", "Hypolimnion \nParticle-Associated",
                            "Particle-Associated", "Free-Living")) + 
  theme(axis.title.x = element_text(face="bold", size=12),
        axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=8),
        axis.text.y = element_text(colour = "black", size=8),
        axis.title.y = element_text(face="bold", size=12),
        plot.title = element_text(face="bold", size = 12),
        strip.text.x = element_text(size=10, face="bold"),
        strip.text.y = element_text(size=10, face="bold"),
        legend.title = element_text(size=8, face="bold"),
        legend.text = element_text(size = 8),
        strip.background = element_blank(),
        plot.margin = unit(c(0.1, 0.1, 0.1, -0.1), "cm"), #top, right, bottom, left
        legend.position="none")

#pdf(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS3_alpha_diversity.pdf",  width= 10.5, height=6)
#tiff(file="~/Final_PAFL_Trophicstate/FWDB_Figures/FigS3_alpha_diversity.tiff", width= 10.5, height=6, units = "in", res = 175)
multiplot(final_summed_alpha_stats_plot, sum_scaled_matround_prod_alpha_plot, cols = 2); #dev.off()

Figure S3: Within-sample OTU richness and evenness impacted by (Left panel) sub-sampling (rarefy-ing) the data 100 times without replacement at 14,924 sequencing reads and (right panel) by scaling the reads to 14,925 sequencing reads as describe within the methods and Figure S2, right panel legend (sequencing depth range is 521 sequences). As the scaling method (right panel) is not the standard method for alpha diversity metrics, caution must be taken when interpreting the plot. Raw (gray points), mean (black tilde) and median (black horizontal bar) values and standard deviation (error bars) for (Top panel) the Inverse Simpson (Middle panel) Simpson’s measure of evenness and (Bottom panel) OTU richness within each lake habitat sampled.

# Provide the Session Information for reproducibility
devtools::session_info()

##  setting  value                       
##  version  R version 3.2.2 (2015-08-14)
##  system   x86_64, darwin13.4.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  tz       America/Detroit             
##  date     2015-12-08                  
## 
##  package        * version     date       source        
##  acepack          1.3-3.3     2014-11-24 CRAN (R 3.2.0)
##  ade4             1.7-2       2015-04-14 CRAN (R 3.2.0)
##  annotate         1.46.1      2015-07-11 Bioconductor  
##  AnnotationDbi    1.30.1      2015-04-26 Bioconductor  
##  ape            * 3.3         2015-05-29 CRAN (R 3.2.0)
##  assertthat       0.1         2013-12-06 CRAN (R 3.2.0)
##  Biobase          2.28.0      2015-04-17 Bioconductor  
##  BiocGenerics   * 0.14.0      2015-04-17 Bioconductor  
##  BiocParallel     1.2.22      2015-09-30 Bioconductor  
##  biom             0.3.12      2014-02-24 CRAN (R 3.2.0)
##  Biostrings       2.36.4      2015-08-21 Bioconductor  
##  boot             1.3-17      2015-06-29 CRAN (R 3.2.2)
##  chron            2.3-47      2015-06-24 CRAN (R 3.2.0)
##  cluster          2.0.3       2015-07-21 CRAN (R 3.2.2)
##  coda             0.18-1      2015-10-16 CRAN (R 3.2.0)
##  codetools        0.2-14      2015-07-15 CRAN (R 3.2.2)
##  colorspace       1.2-6       2015-03-11 CRAN (R 3.2.0)
##  data.table     * 1.9.6       2015-09-19 CRAN (R 3.2.0)
##  DBI              0.3.1       2014-09-24 CRAN (R 3.2.0)
##  deldir           0.1-9       2015-03-09 CRAN (R 3.2.0)
##  DESeq2         * 1.8.2       2015-10-05 Bioconductor  
##  devtools         1.9.1       2015-09-11 CRAN (R 3.2.0)
##  digest           0.6.8       2014-12-31 CRAN (R 3.2.0)
##  dplyr          * 0.4.3       2015-09-01 CRAN (R 3.2.0)
##  evaluate         0.8         2015-09-18 CRAN (R 3.2.0)
##  foreach          1.4.3       2015-10-13 CRAN (R 3.2.0)
##  foreign          0.8-66      2015-08-19 CRAN (R 3.2.2)
##  formatR          1.2.1       2015-09-18 CRAN (R 3.2.0)
##  Formula          1.2-1       2015-04-07 CRAN (R 3.2.0)
##  futile.logger    1.4.1       2015-04-20 CRAN (R 3.2.0)
##  futile.options   1.0.0       2010-04-06 CRAN (R 3.2.0)
##  genefilter       1.50.0      2015-04-17 Bioconductor  
##  geneplotter      1.46.0      2015-04-17 Bioconductor  
##  GenomeInfoDb   * 1.4.3       2015-09-23 Bioconductor  
##  GenomicRanges  * 1.20.8      2015-09-21 Bioconductor  
##  ggplot2        * 1.0.1       2015-03-17 CRAN (R 3.2.0)
##  gridExtra        2.0.0       2015-07-14 CRAN (R 3.2.0)
##  gtable         * 0.1.2       2012-12-05 CRAN (R 3.2.0)
##  Hmisc            3.17-0      2015-09-21 CRAN (R 3.2.0)
##  htmltools        0.2.6       2014-09-08 CRAN (R 3.2.0)
##  igraph           1.0.1       2015-06-26 CRAN (R 3.2.0)
##  IRanges        * 2.2.9       2015-10-02 Bioconductor  
##  iterators        1.0.8       2015-10-13 CRAN (R 3.2.0)
##  knitr            1.11        2015-08-14 CRAN (R 3.2.2)
##  labeling         0.3         2014-08-23 CRAN (R 3.2.0)
##  lambda.r         1.1.7       2015-03-20 CRAN (R 3.2.0)
##  lattice        * 0.20-33     2015-07-14 CRAN (R 3.2.2)
##  latticeExtra     0.6-26      2013-08-15 CRAN (R 3.2.0)
##  lazyeval         0.1.10      2015-01-02 CRAN (R 3.2.0)
##  LearnBayes       2.15        2014-05-29 CRAN (R 3.2.0)
##  locfit           1.5-9.1     2013-04-20 CRAN (R 3.2.0)
##  magrittr         1.5         2014-11-22 CRAN (R 3.2.0)
##  maptools         0.8-37      2015-09-29 CRAN (R 3.2.0)
##  MASS             7.3-44      2015-08-30 CRAN (R 3.2.0)
##  Matrix           1.2-2       2015-07-08 CRAN (R 3.2.2)
##  memoise          0.2.1       2014-04-22 CRAN (R 3.2.0)
##  mgcv             1.8-8       2015-10-24 CRAN (R 3.2.0)
##  multcompView   * 0.1-7       2015-07-31 CRAN (R 3.2.0)
##  multtest         2.24.0      2015-04-17 Bioconductor  
##  munsell          0.4.2       2013-07-11 CRAN (R 3.2.0)
##  nlme             3.1-122     2015-08-19 CRAN (R 3.2.0)
##  nnet             7.3-11      2015-08-30 CRAN (R 3.2.0)
##  pander         * 0.5.2       2015-05-18 CRAN (R 3.2.0)
##  permute        * 0.8-4       2015-05-19 CRAN (R 3.2.0)
##  pgirmess       * 1.6.2       2015-06-07 CRAN (R 3.2.0)
##  phyloseq       * 1.12.2      2015-04-27 Bioconductor  
##  plyr           * 1.8.3       2015-06-12 CRAN (R 3.2.0)
##  proto            0.3-10      2012-12-22 CRAN (R 3.2.0)
##  R6               2.1.1       2015-08-19 CRAN (R 3.2.2)
##  RColorBrewer     1.1-2       2014-12-07 CRAN (R 3.2.0)
##  Rcpp           * 0.12.1      2015-09-10 CRAN (R 3.2.0)
##  RcppArmadillo  * 0.6.100.0.0 2015-10-04 CRAN (R 3.2.0)
##  reshape2       * 1.4.1       2014-12-06 CRAN (R 3.2.0)
##  rgdal            1.0-4       2015-06-23 CRAN (R 3.2.0)
##  rgeos            0.3-11      2015-05-29 CRAN (R 3.2.0)
##  RJSONIO          1.3-0       2014-07-28 CRAN (R 3.2.0)
##  rmarkdown        0.8.1       2015-10-10 CRAN (R 3.2.2)
##  rpart            4.1-10      2015-06-29 CRAN (R 3.2.2)
##  RSQLite          1.0.0       2014-10-25 CRAN (R 3.2.0)
##  S4Vectors      * 0.6.6       2015-09-21 Bioconductor  
##  scales         * 0.3.0       2015-08-25 CRAN (R 3.2.2)
##  sciplot        * 1.1-0       2012-11-20 CRAN (R 3.2.0)
##  sp               1.2-1       2015-10-18 CRAN (R 3.2.2)
##  spdep            0.5-88      2015-03-17 CRAN (R 3.2.0)
##  splancs          2.01-38     2015-09-29 CRAN (R 3.2.0)
##  stringi          1.0-1       2015-10-22 CRAN (R 3.2.0)
##  stringr          1.0.0       2015-04-30 CRAN (R 3.2.0)
##  survival         2.38-3      2015-07-02 CRAN (R 3.2.2)
##  tidyr          * 0.3.1       2015-09-10 CRAN (R 3.2.0)
##  vegan          * 2.3-1       2015-09-25 CRAN (R 3.2.0)
##  XML              3.98-1.3    2015-06-30 CRAN (R 3.2.0)
##  xtable           1.7-4       2014-09-12 CRAN (R 3.2.0)
##  XVector          0.8.0       2015-04-17 Bioconductor  
##  yaml             2.1.13      2014-06-12 CRAN (R 3.2.0)
##  zlibbioc         1.14.0      2015-04-17 Bioconductor