This R script was written to take fungal OTU data gathered through ITS amplicon sequencing and metabolomics data gathered through LC-MS/MS. Both analyses were carried out on subsets from the same tissue. This markdown will guide the user through — # Load libraries

library(vegan)

## Loading required package: permute

## Loading required package: lattice

## This is vegan 2.6-4

library(ggplot2)
library(tidyverse)

## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──

## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ✔ purrr   0.3.5      
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

Navigate to files and read in microbial data

#Navigate to directory
setwd("~/Desktop/Stats_with_R/Project/")

#Read in microbial data
fungi <- read_csv("Run8both_ITS1-otutable-mosses_stats_proj.csv")

## Rows: 80 Columns: 1752
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr    (9): SampleID, Sterilized, Decomp_Status, Metabolomics, Correlate, Si...
## dbl (1743): Site_number, Microsite, total, OTU_1011, OTU_1017, OTU_102, OTU_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Filter microbial data to only show information for the living layer with metabolome data

#Filter data to only have the living layer for samples with metabolomics info
fungi_living <- fungi %>%
  filter(Decomp_Status == "Living") %>%
  filter(Metabolomics == "Y")

#Remove extra columns to aid in creation of distance matrices
fungi_living_OTU <- fungi_living %>%
  select("SampleID", contains("OTU"))

Create table that includes metadata

Later on, we will need metadata to include in data visualization

fungi_meta <- fungi_living %>%
  select(SampleID, Site_Code, HostGenus)

Prepare data to feed into a distance matrix

#Remove samples with fewer than 2000 reads
fungi_sums <- fungi_living_OTU %>%
  mutate(total = rowSums(fungi_living_OTU[c(2:1741)])) %>%
  filter(total > 1300)

#Remove 'total' column 
fungi_input <- fungi_sums %>%
  mutate(total = NULL)

#Convert to a dataframe that will work with as.matrix
fungi_dataframe <- as.data.frame(fungi_input)
rownames(fungi_dataframe) <- fungi_dataframe$SampleID
fungi_dataframe <- fungi_dataframe[, -1]

Combine metadata created previously with the input with samples removed

fungi_input_meta <- fungi_input %>%
  left_join(fungi_meta, fungi_input, by = "SampleID")

Convert to distance matrix

#convert to matrix
fungi_mat <- as.matrix(fungi_dataframe)

#calculate distances
fungi.distmat <- vegdist(fungi_mat, method = "bray")

NMDS

set.seed(2023)
fungi_nmds <- metaMDS(fungi.distmat, distance="bray", k=2, trymax = 100)

## Run 0 stress 0.221533 
## Run 1 stress 0.2125934 
## ... New best solution
## ... Procrustes: rmse 0.1199497  max resid 0.4216615 
## Run 2 stress 0.2181455 
## Run 3 stress 0.2510435 
## Run 4 stress 0.2446197 
## Run 5 stress 0.2181453 
## Run 6 stress 0.248204 
## Run 7 stress 0.2323909 
## Run 8 stress 0.2192584 
## Run 9 stress 0.2542132 
## Run 10 stress 0.2406459 
## Run 11 stress 0.236481 
## Run 12 stress 0.2225414 
## Run 13 stress 0.2519353 
## Run 14 stress 0.2385031 
## Run 15 stress 0.2570512 
## Run 16 stress 0.2125934 
## ... Procrustes: rmse 5.891161e-05  max resid 0.0001910962 
## ... Similar to previous best
## Run 17 stress 0.2280101 
## Run 18 stress 0.2351187 
## Run 19 stress 0.2125934 
## ... Procrustes: rmse 5.400257e-05  max resid 0.000169177 
## ... Similar to previous best
## Run 20 stress 0.2252341 
## *** Best solution repeated 2 times

fungi_nmds$stress

## [1] 0.2125934

stressplot(fungi_nmds) #Shepard plot

Plot NMDS

scores(fungi_nmds) %>%
  as_tibble(rownames = "SampleID") %>%
  ggplot(aes(x=NMDS1, y=NMDS2)) +
  geom_point()

Plot with Metadata

scores(fungi_nmds) %>%
  as_tibble(rownames = "SampleID") %>%
  inner_join(., fungi_meta, by = "SampleID") %>%
  ggplot(aes(x=NMDS1, y=NMDS2, color=HostGenus, shape=Site_Code)) +
  geom_point()

Alter aesthetics

scores(fungi_nmds) %>%
  as_tibble(rownames = "SampleID") %>%
  inner_join(., fungi_meta, by = "SampleID") %>%
  ggplot(aes(x=NMDS1, y=NMDS2, color=HostGenus, shape=Site_Code)) +
  geom_point(size=3)+
  scale_color_manual(values = c('brown', 'purple', 'blue'))+
  theme_bw()

PERMANOVA to quantify variation

adonis2(vegdist(fungi_mat, method="bray") ~ HostGenus * Site_Code,
        data = fungi_input_meta)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vegdist(fungi_mat, method = "bray") ~ HostGenus * Site_Code, data = fungi_input_meta)
##                     Df SumOfSqs      R2      F Pr(>F)    
## HostGenus            2   1.7676 0.16694 2.5668  0.001 ***
## Site_Code            3   1.6402 0.15491 1.5879  0.002 ** 
## HostGenus:Site_Code  3   1.3269 0.12532 1.2846  0.028 *  
## Residual            17   5.8534 0.55283                  
## Total               25  10.5882 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Repeat for metabolomics information

The end goal of this script is to visualize differences within the datasets and correlate the differences. Here we are looking to correlate metabolites with fungal OTU. Therefore, the next steps are to produce a distance matrix and NMDS for the metabolite data.

Read in metabolomics data

metabolite <- read_csv("HILIC_not_gap_filled_abundance_7APR23.csv", show_col_types = FALSE)
metadata <- read_csv("Phyloseq_AK2021_Moss_Metadata-v2.csv", show_col_types = FALSE)
metabolite_w_metadata <- left_join(metadata, metabolite, by = 'SampleID')

Filter for samples in the fungal matrix made previously

#Filter for living samples we have fungal info for
metab_living <- metabolite_w_metadata %>%
  filter(Decomp_Status == 'Living') %>%
  filter(Metabolomics == 'Y')

Prepare table with metadata to be incorporated later

metab_meta <- metab_living %>%
  select(SampleID, Site_Code, HostGenus)

Prepare data to calculate distances

#Remove extra columns
metab_living_feat <- metab_living %>%
  select("SampleID", contains("Feature")) %>%
  as.data.frame()

#Create matrix
rownames(metab_living_feat) <- metab_living_feat$SampleID
metab_living_feat <- metab_living_feat[, -1]
metab_mat <- as.matrix(metab_living_feat)

Calculate distances

metabolite.distmat <- vegdist(metab_mat, method = "bray")

NMDS

set.seed(2023)
metab_nmds <- metaMDS(metabolite.distmat, distance = "bray", k=2, trymax = 100)

## Run 0 stress 0.1371134 
## Run 1 stress 0.1285196 
## ... New best solution
## ... Procrustes: rmse 0.1254962  max resid 0.4975257 
## Run 2 stress 0.1500502 
## Run 3 stress 0.1298576 
## Run 4 stress 0.147465 
## Run 5 stress 0.09747497 
## ... New best solution
## ... Procrustes: rmse 0.1095758  max resid 0.5231563 
## Run 6 stress 0.1500502 
## Run 7 stress 0.09745198 
## ... New best solution
## ... Procrustes: rmse 0.006277238  max resid 0.02625014 
## Run 8 stress 0.1454295 
## Run 9 stress 0.1294097 
## Run 10 stress 0.1001021 
## Run 11 stress 0.136602 
## Run 12 stress 0.1294096 
## Run 13 stress 0.1248569 
## Run 14 stress 0.1068059 
## Run 15 stress 0.1320659 
## Run 16 stress 0.1372901 
## Run 17 stress 0.1068059 
## Run 18 stress 0.1294096 
## Run 19 stress 0.1591385 
## Run 20 stress 0.1519634 
## Run 21 stress 0.1295195 
## Run 22 stress 0.1311882 
## Run 23 stress 0.166473 
## Run 24 stress 0.1068059 
## Run 25 stress 0.1383764 
## Run 26 stress 0.0974766 
## ... Procrustes: rmse 0.01017251  max resid 0.03950491 
## Run 27 stress 0.1692503 
## Run 28 stress 0.1470267 
## Run 29 stress 0.1228293 
## Run 30 stress 0.09751364 
## ... Procrustes: rmse 0.01184083  max resid 0.03884074 
## Run 31 stress 0.1002069 
## Run 32 stress 0.1632436 
## Run 33 stress 0.09753365 
## ... Procrustes: rmse 0.01052189  max resid 0.03899888 
## Run 34 stress 0.1584984 
## Run 35 stress 0.1371134 
## Run 36 stress 0.1421129 
## Run 37 stress 0.1634359 
## Run 38 stress 0.147465 
## Run 39 stress 0.1513647 
## Run 40 stress 0.09743099 
## ... New best solution
## ... Procrustes: rmse 0.002357681  max resid 0.009158309 
## ... Similar to previous best
## *** Best solution repeated 1 times

metab_nmds$stress

## [1] 0.09743099

stressplot(metab_nmds) #Shepard plot

Plot NMDS

scores(metab_nmds) %>%
  as_tibble(rownames = "SampleID") %>%
  ggplot(aes(x=NMDS1, y=NMDS2)) +
  geom_point()

Plot with metadata

#Add metadata
scores(metab_nmds) %>%
  as_tibble(rownames = "SampleID") %>%
  inner_join(., metab_meta, by = "SampleID") %>%
  ggplot(aes(x=NMDS1, y=NMDS2, color=HostGenus, shape=Site_Code)) +
  geom_point()

Change aesthetics

scores(metab_nmds) %>%
  as_tibble(rownames = "SampleID") %>%
  inner_join(., metab_meta, by = "SampleID") %>%
  ggplot(aes(x=NMDS1, y=NMDS2, color=HostGenus, shape=Site_Code)) +
  geom_point(size = 3)+
  scale_color_manual(values = c('brown', 'purple', 'blue'))+
  theme_bw()

PERMANOVA

Use PERMANOVA to quantify variation

adonis2(vegdist(metab_mat, method="bray") ~ HostGenus + Site_Code + HostGenus * Site_Code,
       data = metab_meta)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vegdist(metab_mat, method = "bray") ~ HostGenus + Site_Code + HostGenus * Site_Code, data = metab_meta)
##                     Df SumOfSqs      R2       F Pr(>F)    
## HostGenus            2  0.22764 0.45093 12.1090  0.001 ***
## Site_Code            3  0.07409 0.14676  2.6273  0.003 ** 
## HostGenus:Site_Code  3  0.03391 0.06716  1.2024  0.261    
## Residual            18  0.16919 0.33515                   
## Total               26  0.50483 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlate the Data!

Make matrices the same size

#Make matrices the same size- we have one Aulacomnium sample that gets removed from microbial data bc too few reads
metab_living_filtered <- metab_living %>%
  filter(SampleID != "S2.M1.S6.L1")
metab_living_feat_filtered <- metab_living_filtered %>%
  select("SampleID", contains("Feature")) 

metab_living_df <- as.data.frame(metab_living_feat_filtered)
#Create matrix
rownames(metab_living_df) <- metab_living_df$SampleID
metab_living_df <- metab_living_df[, -1]
metab_mat_corr <- as.matrix(metab_living_df)
metabolite_filtered.distmat <- vegdist(metab_mat_corr, method = "bray")

Mantel Test

Use Mantel test to see the overall correlations.

correlation <- mantel(fungi.distmat, metabolite_filtered.distmat, method = "spearman", permutations = 9999, na.rm = TRUE)
correlation

## 
## Mantel statistic based on Spearman's rank correlation rho 
## 
## Call:
## mantel(xdis = fungi.distmat, ydis = metabolite_filtered.distmat,      method = "spearman", permutations = 9999, na.rm = TRUE) 
## 
## Mantel statistic r: 0.5231 
##       Significance: 1e-04 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.137 0.174 0.208 0.253 
## Permutation: free
## Number of permutations: 9999

Pairwise correlations

I wanted to see if there were significant correlations between specific metabolites and specific OTU.

#Pairwise Correlations
library(Hmisc)

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

#Create pairwise matrix
pairwise <- left_join(metab_living_feat_filtered, fungi_input, by ="SampleID") %>%
  as.data.frame()

rownames(pairwise) <- pairwise$SampleID
pairwise <- pairwise[, -1]
pairwise_mat <- as.matrix(pairwise)

pairwise_corr <- rcorr(pairwise_mat)
# cormat : matrix of the correlation coefficients
# pmat : matrix of the correlation p-values
flattenCorrMatrix <- function(cormat, pmat) {
  ut <- upper.tri(cormat)
  data.frame(
    row = rownames(cormat)[row(cormat)[ut]],
    column = rownames(cormat)[col(cormat)[ut]],
    cor  =(cormat)[ut],
    p = pmat[ut]
  )
}
flat_pairwise <- flattenCorrMatrix(pairwise_corr$r, pairwise_corr$P)

Filter and prepare for visualization

This performed all pairwise correlations and therefore is a large file. I want to visualize only significant correlations between metabolites and OTU. This creates an input file for the app MetScape which is part of Cytoscape. This app has some particular quirks, so the code represents the exact input file format it requires.

##Show only correlations between metabolites
correlation_metab <- flat_pairwise %>%
  filter(str_detect(row, "^Feature"))

##Show only correlations between metabolites and OTU
correlation_metab_OTU <- correlation_metab %>%
  filter(str_detect(column, "^OTU"))

##Show only significant correlations
correlation_sig <- correlation_metab_OTU %>%
  filter(p <= 0.05) %>%
  filter(p != 0)

##Replace <0.0001 with 0.0001 because metscape doesn't like "<"
correlation_sig[correlation_sig == '<.0001'] <- "0.0001"

##Add adjusted p val of 1 so metscape will read
correlation_sig$'adj pval' <- 1

##change column names to match metscape example
colnames(correlation_sig)[1] = 'metab1'
colnames(correlation_sig)[2] = 'metab2'
colnames(correlation_sig)[3] = 'pcor'
colnames(correlation_sig)[4] = 'pval'

write_csv(correlation_sig, 'AK_moss2021_correlation_MetscapeInput_StatsWRProj.csv')

#Conclusion By the end of this, you should have ordinations for microbial and metabolite data, statistics on the driving factors behind the variation, statistics on the correlation between the datasets, and an input file to visualize the networks.

MeganNickerson_Advanced_stats2023

2023-05-03

Navigate to files and read in microbial data

Filter microbial data to only show information for the living layer with metabolome data

Create table that includes metadata

Prepare data to feed into a distance matrix

Combine metadata created previously with the input with samples removed

Convert to distance matrix

NMDS

Plot NMDS

Plot with Metadata

Alter aesthetics

PERMANOVA to quantify variation

Repeat for metabolomics information

Read in metabolomics data

Filter for samples in the fungal matrix made previously

Prepare table with metadata to be incorporated later

Prepare data to calculate distances

Calculate distances

NMDS

Plot NMDS

Plot with metadata

Change aesthetics

PERMANOVA

Correlate the Data!

Make matrices the same size

Mantel Test

Pairwise correlations

Filter and prepare for visualization