According to Libretexts (2022), soil formation involves 45% inorganic minerals, mostly sand, silt, and clay, allowing elements such as ammonia, calcium, and zinc to be available. The organic matter in soil constitutes 5% (Libretexts, 2022) of macroorganisms and microorganisms, which may include dead or decaying matter. These are crucial for completing the ecosystem with continuous nutrient cycles such as phosphorus, carbon, and nitrogen (Bormann & Likens, 1970). Soil also contains 25% water (Libretexts, 2022), which provides essential hydrocarbons for cellular processes for macro and microorganisms, and 25% air (Libretexts, 2022), which is ideal for aerobic organisms. The diversity of microbial populations in soil fluctuates and adapts quickly due to the quick reproduction of generations, DNA mutations and genome or epigenetic modifications (Elena & Lenski, 2003) due to changes in PH, temperature, and nutrient availability in soil. (Daniel, 2005). These environmental factors affect the microbial’s functional genes, adaptations, species distributions, interactions, and evolutionary relationships (Elena & Lenski, 2003). These complex microscopic communities’ genotypes and phenotypes can be observed and compared with other abundant microbial communities in various habitats (Fierer & Jackson, 2006). Originating from discovering microorganisms under a microscope (Gest, 2004) by Robert Hooke and Antoni Van Leeuwenhoek. The continuous modern advances of inspection and deeper understanding of the biodiversity of prokaryotic species today uncover their function, purpose and possibilities, as well as resemblance from DNA and RNA sequencing and data analysis. (Burke et al., 2011). In particular, the “Third-generation sequencing” (Petersen et al., 2019) Oxford Nanopore sequencing 16S Barcoding Kit is specifically designed to target the rRNA gene, allowing for identifying bacterial communities. The technology relies on DNA strands passing through nanopores, altering electrical currents to determine base sequences (Petersen et al., 2019). Unlike Next Generation Sequencing, where DNA fragments are short-reads (Soliman et al., 2017), nanopore technology can sequence long-reads, generating additional DNA sequences for further analysis; this makes it an optimal choice for observing microorganisms in soil samples. This study investigates and compares soil samples from three locations: the lake (on campus), the Wivenhoe trail (outside campus), and the garden (outside campus). To explore what species are present in soil samples in the various locations, critical functional genes in the microorganisms found in soil samples and any unique DNA adaptations or traits in the samples that evolved in response to habitat conditions. In addition, metagenomic analysis of soil DNA provides insights into community interactions, for example, ecological competition or symbiosis. Hypothesis 1: The study aims to show that the conserved functional genes in microorganisms found in soil differ significantly in different environmental habitats.Hypothesis 2: Microorganisms in soil samples show unique genetic adaptations and traits within different locations and environmental conditions.
install.packages("readr")
## Installing package into '/home/catherinetaylor35/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library("readr")
data = read_tsv("data.tsv")
## Rows: 3432 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): tax
## dbl (26): barcode01, barcode02, barcode03, barcode04, barcode05, barcode06, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 27
## tax barcode01 barcode02 barcode03 barcode04 barcode05 barcode06 barcode07
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Bacteri… 28213 345 21467 62888 664 0 0
## 2 Bacteri… 6073 52 36614 87607 168 0 0
## 3 Bacteri… 7734 31 16041 44558 302 0 0
## 4 Bacteri… 8797 92 8632 43361 269 0 0
## 5 Bacteri… 7951 31 13659 31858 85 0 0
## 6 Bacteri… 39851 113 1628 2193 199 0 0
## # ℹ 19 more variables: barcode08 <dbl>, barcode09 <dbl>, barcode10 <dbl>,
## # barcode11 <dbl>, barcode12 <dbl>, barcode13 <dbl>, barcode14 <dbl>,
## # barcode15 <dbl>, barcode16 <dbl>, barcode17 <dbl>, barcode18 <dbl>,
## # barcode19 <dbl>, barcode20 <dbl>, barcode21 <dbl>, barcode22 <dbl>,
## # barcode23 <dbl>, barcode24 <dbl>, unclassified <dbl>, total <dbl>
if (!require(dplyr)) {
install.packages("dplyr")
library(dplyr)
}
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Select specific columns
df_cleaned <- data %>%
select(`tax`, `barcode01`, `barcode02`, `barcode03`, `barcode04`, `barcode05`, `barcode10`, unclassified, total)
# View the cleaned dataframe
head(df_cleaned)
## # A tibble: 6 × 9
## tax barcode01 barcode02 barcode03 barcode04 barcode05 barcode10 unclassified
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Bact… 28213 345 21467 62888 664 23479 3543
## 2 Bact… 6073 52 36614 87607 168 1646 3789
## 3 Bact… 7734 31 16041 44558 302 6639 2094
## 4 Bact… 8797 92 8632 43361 269 1264 1666
## 5 Bact… 7951 31 13659 31858 85 1043 1631
## 6 Bact… 39851 113 1628 2193 199 5357 1170
## # ℹ 1 more variable: total <dbl>
# Aggregate data by species
species_aggregate <- df_cleaned %>%
group_by(tax) %>%
summarise(across(starts_with("barcode"), sum, na.rm = TRUE)) # Summing up all barcode columns for each species
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(starts_with("barcode"), sum, na.rm = TRUE)`.
## ℹ In group 1: `tax =
## "Archaea;Archaea_none;Euryarchaeota;Halobacteria;Halobacteriales;Haloarculaceae;Haloarcula"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
# View the aggregated data
head(species_aggregate)
## # A tibble: 6 × 7
## tax barcode01 barcode02 barcode03 barcode04 barcode05 barcode10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Archaea;Archaea_n… 1 0 0 0 0 0
## 2 Bacteria;Bacteria… 91 12 145 66 4 28
## 3 Bacteria;Bacteria… 529 22 2299 1237 22 35
## 4 Bacteria;Bacteria… 124 5 767 236 2 5
## 5 Bacteria;Bacteria… 171 2 654 848 4 8
## 6 Bacteria;Bacteria… 350 9 2456 1023 6 15
# Install the phyloseq package
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
## Bioconductor version '3.12' is out-of-date; the current release version '3.18'
## is available with R version '4.3'; see https://bioconductor.org/install
BiocManager::install("phyloseq")
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://cloud.r-project.org
## Bioconductor version 3.12 (BiocManager 1.30.22), R 4.0.4 (2021-02-15)
## Warning: package(s) not installed when version(s) same as or greater than current; use
## `force = TRUE` to re-install: 'phyloseq'
## Installation paths not writeable, unable to update packages
## path: /usr/lib/R/library
## packages:
## boot, class, cluster, codetools, foreign, KernSmooth, lattice, MASS,
## Matrix, mgcv, nlme, nnet, rpart, spatial, survival
install.packages("lattice")
## Installing package into '/home/catherinetaylor35/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library(phyloseq)
library(lattice)
install.packages(c("ggplot2", "dplyr", "tidyr", "readr", "purrr", "tibble", "stringr", "forcats", "magrittr" ))
## Installing packages into '/home/catherinetaylor35/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library(magrittr)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
##
## extract
df_cleaned <- species_aggregate %>% separate(tax, sep = ";", into = c("Domain", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus"))
head(df_cleaned)
## # A tibble: 6 × 13
## Domain Kingdom Phylum Class Order Family Genus barcode01 barcode02 barcode03
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Archaea Archae… Eurya… Halo… Halo… Haloa… Halo… 1 0 0
## 2 Bacteria Bacter… Abdit… Abdi… Abdi… Abiti… Abdi… 91 12 145
## 3 Bacteria Bacter… Acido… Acid… Acid… Acido… Acid… 529 22 2299
## 4 Bacteria Bacter… Acido… Acid… Acid… Acido… Acid… 124 5 767
## 5 Bacteria Bacter… Acido… Acid… Acid… Acido… Acid… 171 2 654
## 6 Bacteria Bacter… Acido… Acid… Acid… Acido… Acid… 350 9 2456
## # ℹ 3 more variables: barcode04 <dbl>, barcode05 <dbl>, barcode10 <dbl>
tax_data <- df_cleaned %>% select(c("Phylum", "Class", "Order", "Family", "Genus"))
tax_ranks <- colnames(tax_data)
tax_data <- tax_table(tax_data)
## Warning in .local(object): Coercing from data.frame class to character matrix
## prior to building taxonomyTable.
## This could introduce artifacts.
## Check your taxonomyTable, or coerce to matrix manually.
dimnames(tax_data)[[2]] <- tax_ranks
head(tax_data)
## Taxonomy Table: [6 taxa by 5 taxonomic ranks]:
## Phylum Class Order Family
## sp1 "Euryarchaeota" "Halobacteria" "Halobacteriales" "Haloarculaceae"
## sp2 "Abditibacteriota" "Abditibacteria" "Abditibacteriales" "Abitibacteriaceae"
## sp3 "Acidobacteria" "Acidobacteriia" "Acidobacteriales" "Acidobacteriaceae"
## sp4 "Acidobacteria" "Acidobacteriia" "Acidobacteriales" "Acidobacteriaceae"
## sp5 "Acidobacteria" "Acidobacteriia" "Acidobacteriales" "Acidobacteriaceae"
## sp6 "Acidobacteria" "Acidobacteriia" "Acidobacteriales" "Acidobacteriaceae"
## Genus
## sp1 "Haloarcula"
## sp2 "Abditibacterium"
## sp3 "Acidicapsa"
## sp4 "Acidipila"
## sp5 "Acidisarcina"
## sp6 "Acidobacterium"
# first, extract all columns starting with 'barcode'
otu_tab <- df_cleaned %>% select(starts_with("barcode"))
# now we convert it to an otuTable object
# the 'taxa_are_rows' argument tells phyloseq which orientation our data are in
otu_tab <- otu_table(otu_tab, taxa_are_rows = T)
head(otu_tab)
## OTU Table: [6 taxa and 6 samples]
## taxa are rows
## barcode01 barcode02 barcode03 barcode04 barcode05 barcode10
## sp1 1 0 0 0 0 0
## sp2 91 12 145 66 4 28
## sp3 529 22 2299 1237 22 35
## sp4 124 5 767 236 2 5
## sp5 171 2 654 848 4 8
## sp6 350 9 2456 1023 6 15
microbiome <- phyloseq(otu_tab, tax_data)
microbiome # prints a nice summary of the data
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3432 taxa and 6 samples ]
## tax_table() Taxonomy Table: [ 3432 taxa by 5 taxonomic ranks ]
head(otu_table(microbiome))
## OTU Table: [6 taxa and 6 samples]
## taxa are rows
## barcode01 barcode02 barcode03 barcode04 barcode05 barcode10
## sp1 1 0 0 0 0 0
## sp2 91 12 145 66 4 28
## sp3 529 22 2299 1237 22 35
## sp4 124 5 767 236 2 5
## sp5 171 2 654 848 4 8
## sp6 350 9 2456 1023 6 15
head(tax_table(microbiome))
## Taxonomy Table: [6 taxa by 5 taxonomic ranks]:
## Phylum Class Order Family
## sp1 "Euryarchaeota" "Halobacteria" "Halobacteriales" "Haloarculaceae"
## sp2 "Abditibacteriota" "Abditibacteria" "Abditibacteriales" "Abitibacteriaceae"
## sp3 "Acidobacteria" "Acidobacteriia" "Acidobacteriales" "Acidobacteriaceae"
## sp4 "Acidobacteria" "Acidobacteriia" "Acidobacteriales" "Acidobacteriaceae"
## sp5 "Acidobacteria" "Acidobacteriia" "Acidobacteriales" "Acidobacteriaceae"
## sp6 "Acidobacteria" "Acidobacteriia" "Acidobacteriales" "Acidobacteriaceae"
## Genus
## sp1 "Haloarcula"
## sp2 "Abditibacterium"
## sp3 "Acidicapsa"
## sp4 "Acidipila"
## sp5 "Acidisarcina"
## sp6 "Acidobacterium"
# Can change which taxonomic rank the plot shows
plot_bar(microbiome, fill = "Phylum")
sample_sums(microbiome) # shows the number of sequences in each sample
## barcode01 barcode02 barcode03 barcode04 barcode05 barcode10
## 408376 395805 1160754 881449 12167 362660
rarefiedData <- rarefy_even_depth(microbiome,
sample.size = min(sample_sums(microbiome)), replace = F)
## You set `rngseed` to FALSE. Make sure you've set & recorded
## the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 1535OTUs were removed because they are no longer
## present in any sample after random subsampling
## ...
## You set `rngseed` to FALSE. Make sure you've set & recorded
## the random seed of your session for reproducibility.
## See `?set.seed`
## ...
sample_sums(rarefiedData)
## barcode01 barcode02 barcode03 barcode04 barcode05 barcode10
## 12167 12167 12167 12167 12167 12167
## barcode01 barcode02 barcode03 barcode04 barcode10
## 50947 50947 50947 50947 50947
estimate_richness(rarefiedData) # summarises the diversity of each sample
## Observed Chao1 se.chao1 ACE se.ACE Shannon Simpson
## barcode01 856 1325.624 70.73702 1301.527 19.76205 5.143159 0.9796983
## barcode02 1030 1502.500 62.87449 1567.793 22.48096 5.211883 0.9826293
## barcode03 911 1243.244 52.12622 1240.648 18.50681 5.673057 0.9933013
## barcode04 725 1157.618 72.21158 1096.914 18.30009 4.798410 0.9732326
## barcode05 748 1092.443 58.26081 1071.254 17.39766 5.250453 0.9886867
## barcode10 812 1262.591 73.62322 1176.418 18.08791 5.374851 0.9889792
## InvSimpson Fisher
## barcode01 49.25708 209.9798
## barcode02 57.56825 268.5627
## barcode03 149.28315 227.9933
## barcode04 37.35881 168.9776
## barcode05 88.39119 175.9806
## barcode10 90.73745 195.9075
# Calculate species richness
richness_data <- estimate_richness(rarefiedData, measures = c("ACE", "Shannon"))
# View the first few lines of the diversity data
head(richness_data)
## ACE se.ACE Shannon
## barcode01 1301.527 19.76205 5.143159
## barcode02 1567.793 22.48096 5.211883
## barcode03 1240.648 18.50681 5.673057
## barcode04 1096.914 18.30009 4.798410
## barcode05 1071.254 17.39766 5.250453
## barcode10 1176.418 18.08791 5.374851
# Biodiversity metrics
barcode_metrics <- data.frame(
Barcode = c("barcode01", "barcode03"),
ACE = c(1214.884, 1304.168),
Shannon = c(5.139960, 5.683512)
)
# Descriptive comparison
difference_ace <- barcode_metrics$ACE[barcode_metrics$Barcode == "barcode03"] -
barcode_metrics$ACE[barcode_metrics$Barcode == "barcode01"]
difference_shannon <- barcode_metrics$Shannon[barcode_metrics$Barcode == "barcode03"] -
barcode_metrics$Shannon[barcode_metrics$Barcode == "barcode01"]
# Print the observed differences
print(paste("Difference in ACE between barcode03 and barcode01:", difference_ace))
## [1] "Difference in ACE between barcode03 and barcode01: 89.2839999999999"
print(paste("Difference in Shannon index between barcode03 and barcode01:", difference_shannon))
## [1] "Difference in Shannon index between barcode03 and barcode01: 0.543552"
# Biodiversity metrics
barcode_metrics <- data.frame(
Barcode = c("barcode04", "barcode10"),
ACE = c(1121.175, 1206.746), # ACE values for barcode04 and barcode10
Shannon = c(4.840777, 5.355481) # Shannon values for barcode04 and barcode10
)
# Descriptive comparison
difference_ace <- barcode_metrics$ACE[barcode_metrics$Barcode == "barcode10"] -
barcode_metrics$ACE[barcode_metrics$Barcode == "barcode04"]
difference_shannon <- barcode_metrics$Shannon[barcode_metrics$Barcode == "barcode10"] -
barcode_metrics$Shannon[barcode_metrics$Barcode == "barcode04"]
# Print the observed differences
print(paste("Difference in ACE between barcode10 and barcode04:", difference_ace))
## [1] "Difference in ACE between barcode10 and barcode04: 85.5710000000001"
print(paste("Difference in Shannon index between barcode10 and barcode04:", difference_shannon))
## [1] "Difference in Shannon index between barcode10 and barcode04: 0.514704"
# Biodiversity metrics
barcode_metrics <- data.frame(
Barcode = c("barcode02", "barcode05"),
ACE = c(1460.731, 1071.254), # ACE values for barcode02 and barcode05
Shannon = c(5.199304, 5.250453) # Shannon values for barcode02 and barcode05
)
# Descriptive comparison
difference_ace <- barcode_metrics$ACE[barcode_metrics$Barcode == "barcode05"] -
barcode_metrics$ACE[barcode_metrics$Barcode == "barcode02"]
difference_shannon <- barcode_metrics$Shannon[barcode_metrics$Barcode == "barcode05"] -
barcode_metrics$Shannon[barcode_metrics$Barcode == "barcode02"]
# Print the observed differences
print(paste("Difference in ACE between barcode05 and barcode02:", difference_ace))
## [1] "Difference in ACE between barcode05 and barcode02: -389.477"
print(paste("Difference in Shannon index between barcode05 and barcode02:", difference_shannon))
## [1] "Difference in Shannon index between barcode05 and barcode02: 0.0511490000000006"
# Load the ggplot2 package
library(ggplot2)
# Create a data frame with your data
data <- data.frame(
Barcode = c("barcode01", "barcode02", "barcode03", "barcode04", "barcode05", "barcode10"),
ACE = c(1240.594, 1538.493, 1251.353, 1055.012, 1071.254, 1130.701),
se.ACE = c(18.95278, 22.25332, 18.53425, 17.85359, 17.39766, 17.79527),
Shannon = c(5.147862, 5.196808, 5.693757, 4.823278, 5.250453, 5.345040)
)
# Calculate the upper and lower limits of the error bars for ACE
data$ACE_upper <- data$ACE + data$se.ACE
data$ACE_lower <- data$ACE - data$se.ACE
# Create the plot with error bars for ACE
ggplot(data, aes(x = Barcode)) +
geom_errorbar(aes(ymin = ACE_lower, ymax = ACE_upper), width = 0.2) +
geom_point(aes(y = ACE), color = 'red', size = 3) +
geom_line(aes(y = ACE, group = 1), color = 'red') +
geom_point(aes(y = Shannon * 100), color = 'blue', size = 3) + # Multiplying by 100 for visualization purposes
geom_line(aes(y = Shannon * 100, group = 1), color = 'blue') +
theme_minimal() +
labs(title = "ACE and Shannon across Barcodes", x = "Barcode", y = "Value") +
scale_y_continuous(sec.axis = sec_axis(~ . / 100, name = "Shannon")) # Adding a secondary axis for Shannon
# ... (previous code to create the data frame)
# Use ggplot2 to create the plot with ACE and Shannon
ggplot(data, aes(x = Barcode)) +
# Add ACE points
geom_point(aes(y = ACE), color = "black") +
# Add error bars for ACE
geom_errorbar(aes(ymin = ACE_lower, ymax = ACE_upper), width = 0.2) +
# Add Shannon points
geom_point(aes(y = Shannon * 200), color = "blue", shape = 17) + # Multiplying Shannon by a factor to scale appropriately
# Customizations
theme_minimal() +
labs(title = "Alpha Diversity Measures for Samples", x = "Barcode", y = "ACE") +
# Adjust the secondary axis for the Shannon index
scale_y_continuous(
name = "ACE",
sec.axis = sec_axis(~ . / 200, name="Shannon") # Adjust this transformation to match the scale of Shannon
)
# Display the plot
ggsave("alpha_diversity_plot.png", width = 10, height = 6, dpi = 300)
# First, create the data frame with your data
data <- data.frame(
Barcode = c("barcode01", "barcode02", "barcode03", "barcode04", "barcode05", "barcode10"),
ACE = c(1240.594, 1538.493, 1251.353, 1055.012, 1071.254, 1130.701),
Shannon = c(5.147862, 5.196808, 5.693757, 4.823278, 5.250453, 5.345040)
)
# Descriptive statistics for ACE
ace_mean <- mean(data$ACE)
ace_sd <- sd(data$ACE)
ace_se <- ace_sd / sqrt(length(data$ACE))
# Descriptive statistics for Shannon
shannon_mean <- mean(data$Shannon)
shannon_sd <- sd(data$Shannon)
shannon_se <- shannon_sd / sqrt(length(data$Shannon))
# Output the descriptive statistics
cat("Descriptive Statistics for ACE:\n")
## Descriptive Statistics for ACE:
cat("Mean:", ace_mean, "SD:", ace_sd, "SE:", ace_se, "\n\n")
## Mean: 1214.568 SD: 178.8791 SE: 73.02707
cat("Descriptive Statistics for Shannon:\n")
## Descriptive Statistics for Shannon:
cat("Mean:", shannon_mean, "SD:", shannon_sd, "SE:", shannon_se, "\n")
## Mean: 5.242866 SD: 0.2831964 SE: 0.1156144
nmdsResult <- ordinate(rarefiedData, "NMDS", "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 7.029424e-05
## Run 1 stress 0.0004254833
## ... Procrustes: rmse 0.1602638 max resid 0.3501461
## Run 2 stress 8.50768e-05
## ... Procrustes: rmse 0.07133946 max resid 0.117697
## Run 3 stress 9.507923e-05
## ... Procrustes: rmse 0.1896866 max resid 0.4084741
## Run 4 stress 8.445916e-05
## ... Procrustes: rmse 0.05165806 max resid 0.07086816
## Run 5 stress 0.0001923588
## ... Procrustes: rmse 0.04872978 max resid 0.0887314
## Run 6 stress 9.766649e-05
## ... Procrustes: rmse 0.1966351 max resid 0.4371813
## Run 7 stress 0.0001375064
## ... Procrustes: rmse 0.05990147 max resid 0.07850161
## Run 8 stress 9.215626e-05
## ... Procrustes: rmse 0.1150951 max resid 0.2367959
## Run 9 stress 8.987859e-05
## ... Procrustes: rmse 0.1945409 max resid 0.4293269
## Run 10 stress 8.659425e-05
## ... Procrustes: rmse 0.130185 max resid 0.2827591
## Run 11 stress 9.012014e-05
## ... Procrustes: rmse 0.06935276 max resid 0.1167231
## Run 12 stress 9.998257e-05
## ... Procrustes: rmse 0.08221663 max resid 0.1726101
## Run 13 stress 9.010178e-05
## ... Procrustes: rmse 0.08921111 max resid 0.1886263
## Run 14 stress 0.000414585
## ... Procrustes: rmse 0.1070806 max resid 0.1394237
## Run 15 stress 8.155764e-05
## ... Procrustes: rmse 0.02890601 max resid 0.05291149
## Run 16 stress 9.945727e-05
## ... Procrustes: rmse 0.199066 max resid 0.4442956
## Run 17 stress 9.950418e-05
## ... Procrustes: rmse 0.08023292 max resid 0.1604198
## Run 18 stress 9.802258e-05
## ... Procrustes: rmse 0.2076122 max resid 0.4585529
## Run 19 stress 8.021272e-05
## ... Procrustes: rmse 0.06814345 max resid 0.1386575
## Run 20 stress 0.001349485
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 5: no. of iterations >= maxit
## 15: stress < smin
## Warning in metaMDS(veganifyOTU(physeq), distance, ...): stress is (nearly)
## zero: you may have insufficient data
plot_ordination(rarefiedData, nmdsResult, type = "sample")
## No available covariate data to map on the points for this plot `type`
df <- data.frame(
tax = c(
"Bacteria;Bacteria_none;Proteobacteria;Deltaproteobacteria;Myxococcales;Labilitrichaceae;Labilithrix",
"Bacteria;Bacteria_none;Acidobacteria;Blastocatellia;Blastocatellales;Pyrinomonadaceae;Brevitalea",
"Bacteria;Bacteria_none;Proteobacteria;Betaproteobacteria;Burkholderiales;Burkholderiales_Incertae_sedis;Aquabacterium",
"Bacteria;Bacteria_none;Proteobacteria;Deltaproteobacteria;Syntrophales;Syntrophaceae;Desulfomonile",
"Bacteria;Bacteria_none;Firmicutes;Bacilli;Bacillales;Bacillaceae;Peribacillus",
"Bacteria;Bacteria_none;Bacteroidota;Bacteroidia;Marinilabiliales;Prolixibacteraceae;Maribellus",
"Bacteria;Bacteria_none;Proteobacteria;Gammaproteobacteria;Xanthomonadales;Xanthomonadaceae;Stenotrophomonas",
"Bacteria;Bacteria_none;Acidobacteria;Thermoanaerobaculia;Thermoanaerobaculales;Thermoanaerobaculaceae;Thermoanaerobaculum",
"Bacteria;Bacteria_none;Proteobacteria;Deltaproteobacteria;Myxococcales;Polyangiaceae;Chondromyces",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Rhizobiaceae;Rhizobium"
),
barcode01 = c(974, 3053, 353, 592, 2524, 0, 17, 725, 528, 365),
barcode03 = c(3261, 1356, 3797, 4568, 2483, 326, 7595, 763, 1822, 0)
)
# Define a function to extract the taxonomic level
get_taxonomic_level <- function(tax) {
components <- unlist(strsplit(tax, ";"))
if (length(components) >= 4) {
return(components[4])
} else {
return("Unclassified")
}
}
# Add a column for taxonomic level
df <- df %>%
mutate(taxonomic_level = sapply(df$tax, get_taxonomic_level))
# Aggregate data at the taxonomic level
aggregated_data <- df %>%
group_by(taxonomic_level) %>%
summarize(abundance_01 = sum(barcode01), abundance_03 = sum(barcode03)) %>%
ungroup()
# Create the bar plot
ggplot(aggregated_data, aes(x = reorder(taxonomic_level, -abundance_01), fill = taxonomic_level)) +
geom_bar(aes(y = abundance_01), stat = "identity", position = "dodge", width = 0.7) +
geom_bar(aes(y = abundance_03), stat = "identity", position = "dodge", width = 0.5) +
scale_fill_manual(values = rainbow(length(unique(aggregated_data$taxonomic_level)))) +
theme_minimal() +
labs(x = "Taxonomic Level", y = "Abundance", title = "Abundance of Bacteria by Taxonomic Level for Barcodes 1 and 3") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Diagonal x-axis labels for better readability
library(tidyr)
# New sample data with barcode01 and barcode03
df <- data.frame(
tax = c(
"Bacteria;Bacteria_none;Proteobacteria;Deltaproteobacteria;Myxococcales;Labilitrichaceae;Labilithrix",
"Bacteria;Bacteria_none;Acidobacteria;Blastocatellia;Blastocatellales;Pyrinomonadaceae;Brevitalea",
"Bacteria;Bacteria_none;Proteobacteria;Betaproteobacteria;Burkholderiales;Burkholderiales_Incertae_sedis;Aquabacterium",
"Bacteria;Bacteria_none;Proteobacteria;Deltaproteobacteria;Syntrophales;Syntrophaceae;Desulfomonile",
"Bacteria;Bacteria_none;Firmicutes;Bacilli;Bacillales;Bacillaceae;Peribacillus",
"Bacteria;Bacteria_none;Bacteroidota;Bacteroidia;Marinilabiliales;Prolixibacteraceae;Maribellus",
"Bacteria;Bacteria_none;Proteobacteria;Gammaproteobacteria;Xanthomonadales;Xanthomonadaceae;Stenotrophomonas",
"Bacteria;Bacteria_none;Acidobacteria;Thermoanaerobaculia;Thermoanaerobaculales;Thermoanaerobaculaceae;Thermoanaerobaculum",
"Bacteria;Bacteria_none;Proteobacteria;Deltaproteobacteria;Myxococcales;Polyangiaceae;Chondromyces",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Rhizobiaceae;Rhizobium"
),
barcode01 = c(974, 3053, 353, 592, 2524, 0, 17, 725, 528, 365),
barcode03 = c(3261, 1356, 3797, 4568, 2483, 326, 7595, 763, 1822, 0)
)
# Prepare the data for plotting
df_long <- df %>%
gather(key = "barcode", value = "abundance", -tax) %>%
mutate(taxonomic_level = sapply(strsplit(as.character(tax), ";"), function(x) x[4]))
# Adjusting the colors to match the number of taxonomic levels
color_palette <- c("blue", "orange", "green", "red", "purple", "brown", "pink", "grey", "yellow", "cyan")
# Create the bar plot
ggplot(df_long, aes(x = taxonomic_level, y = abundance, fill = barcode)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = color_palette[1:length(unique(df_long$taxonomic_level))]) +
labs(x = "Taxonomic Level", y = "Abundance", title = "Abundance of Bacteria by Taxonomic Level for Barcodes 1 and 3") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Sample data
df <- data.frame(
tax = c(
"Bacteria;Bacteria_none;Acidobacteria;Vicinamibacteria;Vicinamibacterales;Vicinamibacteraceae;Vicinamibacter",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Xanthobacteraceae;Pseudolabrys",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Nitrobacteraceae;Bradyrhizobium",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Hyphomicrobiaceae;Rhodoplanes",
"Bacteria;Bacteria_none;Actinobacteria;Rubrobacteria;Gaiellales;Gaiellaceae;Gaiella",
"Bacteria;Bacteria_none;Firmicutes;Bacilli;Bacillales;Planococcaceae;Sporosarcina",
"Bacteria;Bacteria_none;Proteobacteria;Deltaproteobacteria;Myxococcales;Kofleriaceae;Haliangium",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Hyphomicrobiaceae;Methylothermalis",
"Bacteria;Bacteria_none;Proteobacteria;Epsilonproteobacteria;Campylobacterales;Thiovulaceae;Sulfurimonas",
"Bacteria;Bacteria_none;Acidobacteria;Acidobacteriia;Bryobacterales;Bryobacteraceae;Paludibaculum"
),
barcode04 = c(62888, 87607, 44558, 43361, 31858, 2193, 11515, 30520, 8, 4425),
barcode10 = c(23479, 1646, 6639, 1264, 1043, 5357, 9403, 1257, 2, 4746)
)
# Define a function to extract the taxonomic level
get_taxonomic_level <- function(tax) {
components <- unlist(strsplit(tax, ";"))
if (length(components) >= 4) {
return(components[4])
} else {
return("Unclassified")
}
}
# Add a column for taxonomic level
df <- df %>%
mutate(taxonomic_level = sapply(df$tax, get_taxonomic_level))
# Aggregate data at the taxonomic level
aggregated_data <- df %>%
group_by(taxonomic_level) %>%
summarize(abundance_04 = sum(barcode04), abundance_10 = sum(barcode10)) %>%
ungroup()
# Create the bar plot
ggplot(aggregated_data, aes(x = reorder(taxonomic_level, -abundance_04), fill = taxonomic_level)) +
geom_bar(aes(y = abundance_04), stat = "identity", position = "dodge", width = 0.7) +
geom_bar(aes(y = abundance_10), stat = "identity", position = "dodge", width = 0.5) +
scale_fill_manual(values = rainbow(length(unique(aggregated_data$taxonomic_level)))) +
theme_minimal() +
labs(x = "Taxonomic Level", y = "Abundance", title = "Abundance of Bacteria by Taxonomic Level for Barcodes 4 and 10") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Diagonal x-axis labels for better readability
# Updated sample data with barcode04 and barcode10
df <- data.frame(
tax = c(
"Bacteria;Bacteria_none;Acidobacteria;Vicinamibacteria;Vicinamibacterales;Vicinamibacteraceae;Vicinamibacter",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Xanthobacteraceae;Pseudolabrys",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Nitrobacteraceae;Bradyrhizobium",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Hyphomicrobiaceae;Rhodoplanes",
"Bacteria;Bacteria_none;Actinobacteria;Rubrobacteria;Gaiellales;Gaiellaceae;Gaiella",
"Bacteria;Bacteria_none;Firmicutes;Bacilli;Bacillales;Planococcaceae;Sporosarcina",
"Bacteria;Bacteria_none;Proteobacteria;Deltaproteobacteria;Myxococcales;Kofleriaceae;Haliangium",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Hyphomicrobiaceae;Methylothermalis",
"Bacteria;Bacteria_none;Proteobacteria;Epsilonproteobacteria;Campylobacterales;Thiovulaceae;Sulfurimonas",
"Bacteria;Bacteria_none;Acidobacteria;Acidobacteriia;Bryobacterales;Bryobacteraceae;Paludibaculum"
),
barcode04 = c(62888, 87607, 44558, 43361, 31858, 2193, 11515, 30520, 8, 4425),
barcode10 = c(23479, 1646, 6639, 1264, 1043, 5357, 9403, 1257, 2, 4746)
)
# Prepare the data for plotting
df_long <- df %>%
gather(key = "barcode", value = "abundance", -tax) %>%
mutate(taxonomic_level = sapply(strsplit(as.character(tax), ";"), function(x) x[4]))
# Create the bar plot
ggplot(df_long, aes(x = taxonomic_level, y = abundance, fill = barcode)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = c("blue", "orange")) +
labs(x = "Taxonomic Level", y = "Abundance", title = "Abundance of Bacteria by Taxonomic Level") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Updated sample data with the new abundance data for barcode02 and barcode05
df <- data.frame(
tax = c(
"Bacteria;Bacteria_none;Acidobacteria;Vicinamibacteria;Vicinamibacterales;Vicinamibacteraceae;Vicinamibacter",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Xanthobacteraceae;Pseudolabrys",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Nitrobacteraceae;Bradyrhizobium",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Hyphomicrobiaceae;Rhodoplanes",
"Bacteria;Bacteria_none;Actinobacteria;Rubrobacteria;Gaiellales;Gaiellaceae;Gaiella",
"Bacteria;Bacteria_none;Firmicutes;Bacilli;Bacillales;Planococcaceae;Sporosarcina",
"Bacteria;Bacteria_none;Proteobacteria;Deltaproteobacteria;Myxococcales;Kofleriaceae;Haliangium",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Hyphomicrobiaceae;Methylothermalis",
"Bacteria;Bacteria_none;Proteobacteria;Epsilonproteobacteria;Campylobacterales;Thiovulaceae;Sulfurimonas",
"Bacteria;Bacteria_none;Acidobacteria;Acidobacteriia;Bryobacterales;Bryobacteraceae;Paludibaculum"
),
barcode02 = c(345, 52, 31, 92, 31, 113, 1011, 75, 33300, 325),
barcode05 = c(664, 168, 302, 269, 85, 199, 407, 158, 0, 99)
)
# Add a column for taxonomic level
df <- df %>%
mutate(taxonomic_level = sapply(df$tax, get_taxonomic_level))
# Aggregate data at the taxonomic level for barcode02 and barcode05
aggregated_data <- df %>%
group_by(taxonomic_level) %>%
summarize(abundance_02 = sum(barcode02), abundance_05 = sum(barcode05)) %>%
ungroup()
# Define a color palette with enough colors for each taxonomic level
num_of_tax_levels <- length(unique(aggregated_data$taxonomic_level))
color_palette <- rainbow(num_of_tax_levels)
# Create the bar plot for barcode02 and barcode05
ggplot(aggregated_data, aes(x = reorder(taxonomic_level, -abundance_02), fill = taxonomic_level)) +
geom_bar(aes(y = abundance_02), stat = "identity", position = "dodge", width = 0.7) +
geom_bar(aes(y = abundance_05), stat = "identity", position = "dodge", width = 0.5) +
scale_fill_manual(values = color_palette) + # Use the defined color palette
theme_minimal() +
labs(x = "Taxonomic Level", y = "Abundance", title = "Abundance of Bacteria by Taxonomic Level for Barcodes 2 and 5") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Diagonal x-axis labels for better readability
# Updated sample data with barcode02 and barcode05
df <- data.frame(
tax = c(
"Bacteria;Bacteria_none;Acidobacteria;Vicinamibacteria;Vicinamibacterales;Vicinamibacteraceae;Vicinamibacter",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Xanthobacteraceae;Pseudolabrys",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Nitrobacteraceae;Bradyrhizobium",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Hyphomicrobiaceae;Rhodoplanes",
"Bacteria;Bacteria_none;Actinobacteria;Rubrobacteria;Gaiellales;Gaiellaceae;Gaiella",
"Bacteria;Bacteria_none;Firmicutes;Bacilli;Bacillales;Planococcaceae;Sporosarcina",
"Bacteria;Bacteria_none;Proteobacteria;Deltaproteobacteria;Myxococcales;Kofleriaceae;Haliangium",
"Bacteria;Bacteria_none;Proteobacteria;Alphaproteobacteria;Hyphomicrobiales;Hyphomicrobiaceae;Methylothermalis",
"Bacteria;Bacteria_none;Proteobacteria;Epsilonproteobacteria;Campylobacterales;Thiovulaceae;Sulfurimonas",
"Bacteria;Bacteria_none;Acidobacteria;Acidobacteriia;Bryobacterales;Bryobacteraceae;Paludibaculum"
),
barcode02 = c(345, 52, 31, 92, 31, 113, 1011, 75, 33300, 325),
barcode05 = c(664, 168, 302, 269, 85, 199, 407, 158, 0, 99)
)
# Prepare the data for plotting for barcodes 2 and 5
df_long <- df %>%
gather(key = "barcode", value = "abundance", -tax) %>%
mutate(taxonomic_level = sapply(strsplit(as.character(tax), ";"), function(x) x[4]))
# Adjusting the colors to match the number of taxonomic levels
# Ensure the color palette has enough colors for each level
color_palette <- rainbow(length(unique(df_long$taxonomic_level)))
# Create the bar plot for barcodes 2 and 5
ggplot(df_long, aes(x = taxonomic_level, y = abundance, fill = barcode)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = color_palette) + # Use the defined color palette
labs(x = "Taxonomic Level", y = "Abundance", title = "Abundance of Bacteria by Taxonomic Level for Barcodes 2 and 5") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Subset the microbiome data for the different groups of barcodes
microbiome_1_3 <- prune_samples(sample_names(microbiome) %in% c("barcode01", "barcode03"), microbiome)
microbiome_4_10 <- prune_samples(sample_names(microbiome) %in% c("barcode04", "barcode10"), microbiome)
microbiome_2_5 <- prune_samples(sample_names(microbiome) %in% c("barcode02", "barcode05"), microbiome)
# Function to plot bar plots for a phyloseq object
plot_microbiome <- function(physeq_obj, fill_rank = "Phylum") {
p <- plot_bar(physeq_obj, fill = fill_rank) +
theme_minimal() +
labs(x = "Sample", y = "Abundance") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
return(p)
}
# Create the plots
p_1_3 <- plot_microbiome(microbiome_1_3)
p_4_10 <- plot_microbiome(microbiome_4_10)
p_2_5 <- plot_microbiome(microbiome_2_5)
p_all <- plot_microbiome(microbiome)
# To display the plots
print(p_1_3)
print(p_4_10)
print(p_2_5)
print(p_all)
df_cleaned <- tax_glom(rarefiedData, taxrank = "Class")
df_cleaned <- tax_glom(rarefiedData, taxrank = "Class")
corResult <- cor(t(otu_table(df_cleaned)))
heatmap(corResult)