Custom script

indices_normality <- function(rich, nrow, ncol) {
  
  ### p-value < 0.05 means data failed normality test
  
  par(mfrow = c(nrow, ncol))
  
  for (i in names(rich)) {
    shap <- shapiro.test(rich[, i])
    qqnorm(rich[, i], main = i, sub = shap$p.value)
    qqline(rich[, i])
  }
  
  par(mfrow = c(1, 1))
}

Load necessary libraries

setwd(dir = "/Users/qcvp/R")
list.files()
##  [1] "baothinh_rel.txt"         "Baothinh.rmd"            
##  [3] "baothinh.txt"             "code intern"             
##  [5] "Contaminant.txt"          "custom script"           
##  [7] "mẫu plume báo trong nước" "Mẫu RIVS"                
##  [9] "metabar"                  "Nemesis code"            
## [11] "Nemesis-data"             "Others"                  
## [13] "outputs"                  "Plumecode"               
## [15] "Transect_baothinh.txt"
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-8
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(datarium)
library(patchwork)
library(ggmap)
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
##   Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
##   OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.4     ✔ stringr   1.5.1
## ✔ purrr     1.0.2     ✔ tibble    3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggfortify)
library(ggcorrplot)
library(FactoMineR)
library(corrr)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(Hmisc)
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(PCAtest)
library(microbiome)
## Loading required package: phyloseq
## 
## microbiome R package (microbiome.github.com)
##     
## 
## 
##  Copyright (C) 2011-2022 Leo Lahti, 
##     Sudarshan Shetty et al. <microbiome.github.io>
## 
## 
## Attaching package: 'microbiome'
## 
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## 
## The following object is masked from 'package:vegan':
## 
##     diversity
## 
## The following object is masked from 'package:base':
## 
##     transform
library(cowplot)
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
## 
## The following object is masked from 'package:ggmap':
## 
##     theme_nothing
## 
## The following object is masked from 'package:patchwork':
## 
##     align_plots
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:cowplot':
## 
##     get_legend
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## 
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## Attaching package: 'VIM'
## 
## The following object is masked from 'package:datasets':
## 
##     sleep
library(missMDA)
library(naniar)
library(RColorBrewer)
library(writexl)
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

Relative abundance

Load the data

data_rel <- read.table("/Users/qcvp/R/baothinh_rel.txt", header = TRUE, sep = "\t")

# Transform the data to long format for plotting
data_rel_long <- data_rel %>%
  gather(key = "Gene", value = "Abundance", -Sample.name)

Checking distribution and add Group column

#Add a column to identify sample groups
data_rel_long$Group <- substr(data_rel_long$Sample.name, 1, 2)
data_rel_long$Cite <- substr(data_rel_long$Sample.name, 1, 3)

#Create a gene list  
genes <- c("Sul1", "Sul2", "TetQ", "TetM", "Intl1")

# Checking sample distribution for each Gene
for (gene in genes) {
  # Filter data for the current gene
  gene_rel <- data_rel_long[data_rel_long$Gene == gene, ]
  
  # Checking distribution
  gene_rel |>
    dplyr::select(Abundance) |>
    indices_normality(nrow = 1, ncol =1)}

##Test between cite

# Loop through each gene
for (gene in genes) {
 
  # Calculate statistical significance based on Kruskal-Wallis test
  kruskal_test <- function(gene) {
    KW_result <- kruskal.test(Abundance ~ Cite, data = filter(data_rel_long, Gene == gene))
    return(KW_result)  }

  KW_result_abs <- lapply(unique(data_rel_long$Gene), kruskal_test)
  
  # Posthoc Dunn test for each gene
  test_dunn <- function(gene) {
    dunn_result <- FSA::dunnTest(Abundance ~ Cite,   
                             data = filter(data_rel_long, Gene == gene),
                             method = "bh")
    return(dunn_result)  }

  dunn_result_rell <- lapply(unique(data_rel_long$Gene), test_dunn)     
}
## Registered S3 methods overwritten by 'FSA':
##   method       from
##   confint.boot car 
##   hist.boot    car
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
# Kruskal-Wallis
KW_result_abs
## [[1]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 5.4615, df = 5, p-value = 0.3622
## 
## 
## [[2]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 5.8462, df = 5, p-value = 0.3215
## 
## 
## [[3]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 5.5414, df = 5, p-value = 0.3534
## 
## 
## [[4]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 7.6154, df = 5, p-value = 0.1787
## 
## 
## [[5]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 3.5385, df = 5, p-value = 0.6176
# Dunn test
dunn_result_rell
## [[1]]
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2 -1.5254255 0.12715299 0.6357650
## 2   BD1 - BD3 -0.5547002 0.57909974 0.8686496
## 3   BD2 - BD3  0.9707253 0.33168507 0.5528084
## 4   BD1 - NP1 -1.8027756 0.07142346 1.0000000
## 5   BD2 - NP1 -0.2773501 0.78151129 0.9017438
## 6   BD3 - NP1 -1.2480754 0.21200343 0.6360103
## 7   BD1 - NP2 -1.5254255 0.12715299 0.9536474
## 8   BD2 - NP2  0.0000000 1.00000000 1.0000000
## 9   BD3 - NP2 -0.9707253 0.33168507 0.6219095
## 10  NP1 - NP2  0.2773501 0.78151129 0.9768891
## 11  BD1 - NP3 -0.4160251 0.67739160 0.9237158
## 12  BD2 - NP3  1.1094004 0.26725749 0.5726946
## 13  BD3 - NP3  0.1386750 0.88970694 0.9532574
## 14  NP1 - NP3  1.3867505 0.16551786 0.6206920
## 15  NP2 - NP3  1.1094004 0.26725749 0.6681437
## 
## [[2]]
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2 -0.5547002 0.57909974 0.7896815
## 2   BD1 - BD3  0.0000000 1.00000000 1.0000000
## 3   BD2 - BD3  0.5547002 0.57909974 0.8686496
## 4   BD1 - NP1 -1.9414507 0.05220364 0.3915273
## 5   BD2 - NP1 -1.3867505 0.16551786 0.6206920
## 6   BD3 - NP1 -1.9414507 0.05220364 0.7830545
## 7   BD1 - NP2 -1.2480754 0.21200343 0.5300086
## 8   BD2 - NP2 -0.6933752 0.48807409 0.8134568
## 9   BD3 - NP2 -1.2480754 0.21200343 0.6360103
## 10  NP1 - NP2  0.6933752 0.48807409 0.9151389
## 11  BD1 - NP3 -0.4160251 0.67739160 0.7816057
## 12  BD2 - NP3  0.1386750 0.88970694 0.9532574
## 13  BD3 - NP3 -0.4160251 0.67739160 0.8467395
## 14  NP1 - NP3  1.5254255 0.12715299 0.6357650
## 15  NP2 - NP3  0.8320503 0.40538056 0.8686726
## 
## [[3]]
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2  0.1437939 0.88566320 0.9489249
## 2   BD1 - BD3 -1.4379392 0.15045130 0.7522565
## 3   BD2 - BD3 -1.5817331 0.11371051 0.8528288
## 4   BD1 - NP1 -0.5751757 0.56517249 0.6521221
## 5   BD2 - NP1 -0.7189696 0.47215965 0.7082395
## 6   BD3 - NP1  0.8627635 0.38826750 0.8320018
## 7   BD1 - NP2 -0.5751757 0.56517249 0.7064656
## 8   BD2 - NP2 -0.7189696 0.47215965 0.7869327
## 9   BD3 - NP2  0.8627635 0.38826750 0.9706687
## 10  NP1 - NP2  0.0000000 1.00000000 1.0000000
## 11  BD1 - NP3  0.7189696 0.47215965 0.8852993
## 12  BD2 - NP3  0.5751757 0.56517249 0.7706898
## 13  BD3 - NP3  2.1569088 0.03101277 0.4651915
## 14  NP1 - NP3  1.2941453 0.19561524 0.5868457
## 15  NP2 - NP3  1.2941453 0.19561524 0.7335572
## 
## [[4]]
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2  1.2480754 0.21200343 0.5300086
## 2   BD1 - BD3  0.8320503 0.40538056 0.6080708
## 3   BD2 - BD3 -0.4160251 0.67739160 0.7257767
## 4   BD1 - NP1  2.2188008 0.02650028 0.1987521
## 5   BD2 - NP1  0.9707253 0.33168507 0.5528084
## 6   BD3 - NP1  1.3867505 0.16551786 0.4965536
## 7   BD1 - NP2  1.8027756 0.07142346 0.3571173
## 8   BD2 - NP2  0.5547002 0.57909974 0.7896815
## 9   BD3 - NP2  0.9707253 0.33168507 0.6219095
## 10  NP1 - NP2 -0.4160251 0.67739160 0.7816057
## 11  BD1 - NP3  2.2188008 0.02650028 0.3975042
## 12  BD2 - NP3  0.9707253 0.33168507 0.7107537
## 13  BD3 - NP3  1.3867505 0.16551786 0.6206920
## 14  NP1 - NP3  0.0000000 1.00000000 1.0000000
## 15  NP2 - NP3  0.4160251 0.67739160 0.8467395
## 
## [[5]]
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2 -0.8320503 0.40538056 1.0000000
## 2   BD1 - BD3 -0.4160251 0.67739160 0.8467395
## 3   BD2 - BD3  0.4160251 0.67739160 0.9237158
## 4   BD1 - NP1 -0.9707253 0.33168507 1.0000000
## 5   BD2 - NP1 -0.1386750 0.88970694 0.8897069
## 6   BD3 - NP1 -0.5547002 0.57909974 0.8686496
## 7   BD1 - NP2 -1.6641006 0.09609233 1.0000000
## 8   BD2 - NP2 -0.8320503 0.40538056 1.0000000
## 9   BD3 - NP2 -1.2480754 0.21200343 1.0000000
## 10  NP1 - NP2 -0.6933752 0.48807409 0.9151389
## 11  BD1 - NP3 -0.2773501 0.78151129 0.9017438
## 12  BD2 - NP3  0.5547002 0.57909974 0.9651662
## 13  BD3 - NP3  0.1386750 0.88970694 0.9532574
## 14  NP1 - NP3  0.6933752 0.48807409 1.0000000
## 15  NP2 - NP3  1.3867505 0.16551786 1.0000000

Test between group

# Loop through each gene
for (gene in genes) {

  # Posthoc Dunn test for each gene
  test_wilcox <- function(gene) {
    
    test <- ggpubr::compare_means(Abundance ~ Group,
                                       data = filter(data_rel_long, Gene == gene),
                                       method = "wilcox.test")
      return(test)
  }

  wilcox_test_rel <- lapply(unique(data_rel_long$Gene), test_wilcox)        

}

# Display results
 wilcox_test_rel
## [[1]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance NP     BD     0.394  0.39 0.39     ns       Wilcoxon
## 
## [[2]]
## # A tibble: 1 × 8
##   .y.       group1 group2      p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance NP     BD     0.0931 0.093 0.093    ns       Wilcoxon
## 
## [[3]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance NP     BD     0.678  0.68 0.68     ns       Wilcoxon
## 
## [[4]]
## # A tibble: 1 × 8
##   .y.       group1 group2      p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance NP     BD     0.0152 0.015 0.015    *        Wilcoxon
## 
## [[5]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance NP     BD     0.394  0.39 0.39     ns       Wilcoxon

Test between substrates for each gene

#Adding sample type
my_sample_col <- data.frame(sample = rep(c("S", "W"), c(3,3)))
data_rel_long$type <- substr(my_sample_col$sample, 1, 1)

# Loop through each gene
for (gene in genes) {

  # Posthoc Dunn test for each gene
  test_wilcox <- function(gene) {
    
    test <- ggpubr::compare_means(Abundance ~ type,
                                       data = filter(data_rel_long, Gene == gene),
                                       method = "wilcox.test")
      return(test)
  }

  wilcox_test_rel <- lapply(unique(data_rel_long$Gene), test_wilcox)        

}

# Display results
 wilcox_test_rel
## [[1]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.485  0.48 0.48     ns       Wilcoxon
## 
## [[2]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.310  0.31 0.31     ns       Wilcoxon
## 
## [[3]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.934  0.93 0.93     ns       Wilcoxon
## 
## [[4]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.699   0.7 0.7      ns       Wilcoxon
## 
## [[5]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.485  0.48 0.48     ns       Wilcoxon

Log 10 Absolute abundance Statistical test

Load the data

data_log10 <- read.table("/Users/qcvp/R/baothinh.txt", header = TRUE, sep = "\t")

# Transform the data to long format for plotting
data_log10_long <- data_log10 %>%
  gather(key = "Gene", value = "Abundance", -Sample)

Checking distribution and add Group column

#Add a column to identify sample groups
data_log10_long$Group <- substr(data_log10_long$Sample, 1, 2)
data_log10_long$Cite <- substr(data_log10_long$Sample, 1, 3)

#Create a gene list  
genes <- c("Sul1", "Sul2", "TetQ", "TetM", "Intl1")

# Checking sample distribution for each Gene
for (gene in genes) {
  # Filter data for the current gene
  gene_log10 <- data_log10_long[data_log10_long$Gene == gene, ]
  
  # Checking distribution
  gene_log10 |>
    dplyr::select(Abundance) |>
    indices_normality(nrow = 1, ncol =1)}

Test between cite

# Loop through each gene
for (gene in genes) {
 
  # Calculate statistical significance based on Kruskal-Wallis test
  kruskal_test <- function(gene) {
    KW_result <- kruskal.test(Abundance ~ Cite, data = filter(data_log10_long, Gene == gene))
    return(KW_result)  }

  KW_result_log10 <- lapply(unique(data_log10_long$Gene), kruskal_test)
  
  # Posthoc Dunn test for each gene
  test_dunn <- function(gene) {
    dunn_result <- FSA::dunnTest(Abundance ~ Cite,   
                             data = filter(data_log10_long, Gene == gene),
                             method = "bh")
    return(dunn_result)  }

  dunn_result_log10 <- lapply(unique(data_log10_long$Gene), test_dunn)        
}
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.

Statistical test result

# Kruskal-Wallis
KW_result_log10
## [[1]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 4.8252, df = 5, p-value = 0.4376
## 
## 
## [[2]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 3.5385, df = 5, p-value = 0.6176
## 
## 
## [[3]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 13.143, df = 5, p-value = 0.02208
## 
## 
## [[4]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 11.741, df = 5, p-value = 0.03851
## 
## 
## [[5]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 3.0559, df = 5, p-value = 0.6914
# Dunn test
dunn_result_log10
## [[1]]
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2 -0.6015714 0.54745949 0.7465357
## 2   BD1 - BD3  0.2005238 0.84107095 0.8410710
## 3   BD2 - BD3  0.8020952 0.42249792 0.7921836
## 4   BD1 - NP1 -1.6041903 0.10867211 0.8150408
## 5   BD2 - NP1 -1.0026189 0.31604475 0.7901119
## 6   BD3 - NP1 -1.8047141 0.07111943 1.0000000
## 7   BD1 - NP2 -1.2031427 0.22892109 0.6867633
## 8   BD2 - NP2 -0.6015714 0.54745949 0.8211892
## 9   BD3 - NP2 -1.4036665 0.16041817 0.8020909
## 10  NP1 - NP2  0.4010476 0.68838509 0.7942905
## 11  BD1 - NP3 -0.4010476 0.68838509 0.8604814
## 12  BD2 - NP3  0.2005238 0.84107095 0.9011475
## 13  BD3 - NP3 -0.6015714 0.54745949 0.9124325
## 14  NP1 - NP3  1.2031427 0.22892109 0.8584541
## 15  NP2 - NP3  0.8020952 0.42249792 0.9053527
## 
## [[2]]
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2 -0.2005238 0.84107095 0.9011475
## 2   BD1 - BD3  1.0026189 0.31604475 0.9481342
## 3   BD2 - BD3  1.2031427 0.22892109 1.0000000
## 4   BD1 - NP1 -0.8020952 0.42249792 0.9053527
## 5   BD2 - NP1 -0.6015714 0.54745949 0.9124325
## 6   BD3 - NP1 -1.8047141 0.07111943 1.0000000
## 7   BD1 - NP2 -0.2005238 0.84107095 0.9704665
## 8   BD2 - NP2  0.0000000 1.00000000 1.0000000
## 9   BD3 - NP2 -1.2031427 0.22892109 1.0000000
## 10  NP1 - NP2  0.6015714 0.54745949 1.0000000
## 11  BD1 - NP3  0.2005238 0.84107095 1.0000000
## 12  BD2 - NP3  0.4010476 0.68838509 0.9387069
## 13  BD3 - NP3 -0.8020952 0.42249792 1.0000000
## 14  NP1 - NP3  1.0026189 0.31604475 1.0000000
## 15  NP2 - NP3  0.4010476 0.68838509 1.0000000
## 
## [[3]]
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##    Comparison          Z     P.unadj      P.adj
## 1   BD1 - BD2  0.2079257 0.835287008 0.83528701
## 2   BD1 - BD3 -2.0792567 0.037593766 0.14097662
## 3   BD2 - BD3 -2.2871823 0.022185183 0.11092591
## 4   BD1 - NP1 -1.0396283 0.298512609 0.44776891
## 5   BD2 - NP1 -1.2475540 0.212194434 0.39786456
## 6   BD3 - NP1  1.0396283 0.298512609 0.49752101
## 7   BD1 - NP2 -1.6634053 0.096231327 0.20620999
## 8   BD2 - NP2 -1.8713310 0.061299217 0.15324804
## 9   BD3 - NP2  0.4158513 0.677518794 0.72591299
## 10  NP1 - NP2 -0.6237770 0.532774047 0.61473929
## 11  BD1 - NP3  0.8317027 0.405576796 0.55305927
## 12  BD2 - NP3  0.6237770 0.532774047 0.66596756
## 13  BD3 - NP3  2.9109593 0.003603209 0.05404814
## 14  NP1 - NP3  1.8713310 0.061299217 0.18389765
## 15  NP2 - NP3  2.4951080 0.012591880 0.09443910
## 
## [[4]]
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##    Comparison          Z     P.unadj      P.adj
## 1   BD1 - BD2  1.6041903 0.108672108 0.32601633
## 2   BD1 - BD3  2.8073331 0.004995356 0.03746517
## 3   BD2 - BD3  1.2031427 0.228921089 0.49054519
## 4   BD1 - NP1  2.8073331 0.004995356 0.07493035
## 5   BD2 - NP1  1.2031427 0.228921089 0.57230272
## 6   BD3 - NP1  0.0000000 1.000000000 1.00000000
## 7   BD1 - NP2  2.2057617 0.027400690 0.10275259
## 8   BD2 - NP2  0.6015714 0.547459492 0.74653567
## 9   BD3 - NP2 -0.6015714 0.547459492 0.82118924
## 10  NP1 - NP2 -0.6015714 0.547459492 0.91243249
## 11  BD1 - NP3  2.6068093 0.009139025 0.04569513
## 12  BD2 - NP3  1.0026189 0.316044750 0.59258391
## 13  BD3 - NP3 -0.2005238 0.841070954 0.90114745
## 14  NP1 - NP3 -0.2005238 0.841070954 0.97046649
## 15  NP2 - NP3  0.4010476 0.688385094 0.86048137
## 
## [[5]]
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##    Comparison          Z   P.unadj     P.adj
## 1   BD1 - BD2  0.4010476 0.6883851 0.7942905
## 2   BD1 - BD3  0.8020952 0.4224979 0.7921836
## 3   BD2 - BD3  0.4010476 0.6883851 0.8604814
## 4   BD1 - NP1 -0.4010476 0.6883851 0.9387069
## 5   BD2 - NP1 -0.8020952 0.4224979 0.9053527
## 6   BD3 - NP1 -1.2031427 0.2289211 0.8584541
## 7   BD1 - NP2 -0.4010476 0.6883851 1.0000000
## 8   BD2 - NP2 -0.8020952 0.4224979 1.0000000
## 9   BD3 - NP2 -1.2031427 0.2289211 1.0000000
## 10  NP1 - NP2  0.0000000 1.0000000 1.0000000
## 11  BD1 - NP3  0.8020952 0.4224979 1.0000000
## 12  BD2 - NP3  0.4010476 0.6883851 1.0000000
## 13  BD3 - NP3  0.0000000 1.0000000 1.0000000
## 14  NP1 - NP3  1.2031427 0.2289211 1.0000000
## 15  NP2 - NP3  1.2031427 0.2289211 1.0000000

Test between substrates for each gene

#Adding sample type
my_sample_col <- data.frame(sample = rep(c("S", "W"), c(3,3)))
data_log10_long$type <- substr(my_sample_col$sample, 1, 1)

# Loop through each gene
for (gene in genes) {
  # Posthoc Dunn test for each gene
  test_wilcox <- function(gene) {
    
    test <- ggpubr::compare_means(Abundance ~ type,
                                       data = filter(data_log10_long, Gene == gene),
                                       method = "wilcox.test")
      return(test)
  }

   wilcox_test_log10 <- lapply(unique(data_log10_long$Gene), test_wilcox)        

}

# Display results
 wilcox_test_log10
## [[1]]
## # A tibble: 1 × 8
##   .y.       group1 group2       p  p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>    <dbl>  <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.00417 0.0042 0.0042   **       Wilcoxon
## 
## [[2]]
## # A tibble: 1 × 8
##   .y.       group1 group2      p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.0118 0.012 0.012    *        Wilcoxon
## 
## [[3]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.741  0.74 0.74     ns       Wilcoxon
## 
## [[4]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.582  0.58 0.58     ns       Wilcoxon
## 
## [[5]]
## # A tibble: 1 × 8
##   .y.       group1 group2      p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.0163 0.016 0.016    *        Wilcoxon

PCOA

#Import file input
Relative_abun_data <- read.csv("/Users/qcvp/R/baothinh_rel.txt", header = T, sep = "\t")

Relative_abun_data2 <- as.matrix(Relative_abun_data[,-1])
rownames(Relative_abun_data2) <- Relative_abun_data[,1]

library(vegan)
library(ape)
## 
## Attaching package: 'ape'
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## The following object is masked from 'package:Hmisc':
## 
##     zoom
## The following object is masked from 'package:dplyr':
## 
##     where
rel.bray <- vegdist(Relative_abun_data2, method = "bray")
pcoa.sub <- pcoa(rel.bray)

pcoa_coord <- pcoa.sub$vectors[,1:2]
trans_data <- read.table("/Users/qcvp/R/Transect_baothinh.txt", header = T, sep = "\t")

rel_pcoa <- cbind(pcoa_coord, trans_data)

#Plot
a2 <- ggplot() + 
  geom_hline(yintercept=0, colour="lightgrey", linetype = 2) + 
  geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
  geom_point(data = rel_pcoa, aes(x=Axis.1, y =Axis.2, shape = Type, color = Substrate), size = 5) +
  scale_shape_manual(values = c(15, 16, 17)) +
  geom_point(data = rel_pcoa, aes(x=Axis.1, y=Axis.2), colour = "white") +
  xlab(paste("PCo1 (", round(pcoa.sub$values$Relative_eig[1]*100, 1), "%)")) +
  ylab(paste("PCo2 (", round(pcoa.sub$values$Relative_eig[2]*100, 1), "%)")) +
  theme_bw() +
  coord_equal() +
  labs(color = "Transects", shape = "Sample types") +
  theme(axis.title.x = element_text(size=14), # remove x-axis labels
        axis.title.y = element_text(size=14), # remove y-axis labels
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank(),  #remove minor-grid labels
        plot.background = element_blank())
a2