Custom script

indices_normality <- function(rich, nrow, ncol) {
  
  ### p-value < 0.05 means data failed normality test
  
  par(mfrow = c(nrow, ncol))
  
  for (i in names(rich)) {
    shap <- shapiro.test(rich[, i])
    qqnorm(rich[, i], main = i, sub = shap$p.value)
    qqline(rich[, i])
  }
  
  par(mfrow = c(1, 1))
}

Load necessary libraries

setwd(dir = "/Users/qcvp/R")
list.files()

##  [1] "baothinh_rel.txt"         "Baothinh.rmd"            
##  [3] "baothinh.txt"             "code intern"             
##  [5] "Contaminant.txt"          "custom script"           
##  [7] "mẫu plume báo trong nước" "Mẫu RIVS"                
##  [9] "metabar"                  "Nemesis code"            
## [11] "Nemesis-data"             "Others"                  
## [13] "outputs"                  "Plumecode"               
## [15] "Transect_baothinh.txt"

library(vegan)

## Loading required package: permute

## Loading required package: lattice

## This is vegan 2.6-8

library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(datarium)
library(patchwork)
library(ggmap)

## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
##   Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
##   OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.4     ✔ stringr   1.5.1
## ✔ purrr     1.0.2     ✔ tibble    3.2.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggfortify)
library(ggcorrplot)
library(FactoMineR)
library(corrr)
library(factoextra)

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(Hmisc)

## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units

library(PCAtest)
library(microbiome)

## Loading required package: phyloseq
## 
## microbiome R package (microbiome.github.com)
##     
## 
## 
##  Copyright (C) 2011-2022 Leo Lahti, 
##     Sudarshan Shetty et al. <microbiome.github.io>
## 
## 
## Attaching package: 'microbiome'
## 
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## 
## The following object is masked from 'package:vegan':
## 
##     diversity
## 
## The following object is masked from 'package:base':
## 
##     transform

library(cowplot)

## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
## 
## The following object is masked from 'package:ggmap':
## 
##     theme_nothing
## 
## The following object is masked from 'package:patchwork':
## 
##     align_plots

library(ggpubr)

## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:cowplot':
## 
##     get_legend

library(VIM)

## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## 
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## Attaching package: 'VIM'
## 
## The following object is masked from 'package:datasets':
## 
##     sleep

library(missMDA)
library(naniar)
library(RColorBrewer)
library(writexl)
library(reshape2)

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

library(gridExtra)

## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

Relative abundance

Load the data

data_rel <- read.table("/Users/qcvp/R/baothinh_rel.txt", header = TRUE, sep = "\t")

# Transform the data to long format for plotting
data_rel_long <- data_rel %>%
  gather(key = "Gene", value = "Abundance", -Sample.name)

Checking distribution and add Group column

#Add a column to identify sample groups
data_rel_long$Group <- substr(data_rel_long$Sample.name, 1, 2)
data_rel_long$Cite <- substr(data_rel_long$Sample.name, 1, 3)

#Create a gene list  
genes <- c("Sul1", "Sul2", "TetQ", "TetM", "Intl1")

# Checking sample distribution for each Gene
for (gene in genes) {
  # Filter data for the current gene
  gene_rel <- data_rel_long[data_rel_long$Gene == gene, ]
  
  # Checking distribution
  gene_rel |>
    dplyr::select(Abundance) |>
    indices_normality(nrow = 1, ncol =1)}

##Test between cite

# Loop through each gene
for (gene in genes) {
 
  # Calculate statistical significance based on Kruskal-Wallis test
  kruskal_test <- function(gene) {
    KW_result <- kruskal.test(Abundance ~ Cite, data = filter(data_rel_long, Gene == gene))
    return(KW_result)  }

  KW_result_abs <- lapply(unique(data_rel_long$Gene), kruskal_test)
  
  # Posthoc Dunn test for each gene
  test_dunn <- function(gene) {
    dunn_result <- FSA::dunnTest(Abundance ~ Cite,   
                             data = filter(data_rel_long, Gene == gene),
                             method = "bh")
    return(dunn_result)  }

  dunn_result_rell <- lapply(unique(data_rel_long$Gene), test_dunn)     
}

## Registered S3 methods overwritten by 'FSA':
##   method       from
##   confint.boot car 
##   hist.boot    car

## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.

# Kruskal-Wallis
KW_result_abs

## [[1]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 5.4615, df = 5, p-value = 0.3622
## 
## 
## [[2]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 5.8462, df = 5, p-value = 0.3215
## 
## 
## [[3]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 5.5414, df = 5, p-value = 0.3534
## 
## 
## [[4]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 7.6154, df = 5, p-value = 0.1787
## 
## 
## [[5]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 3.5385, df = 5, p-value = 0.6176

# Dunn test
dunn_result_rell

## [[1]]

## Dunn (1964) Kruskal-Wallis multiple comparison

##   p-values adjusted with the Benjamini-Hochberg method.

##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2 -1.5254255 0.12715299 0.6357650
## 2   BD1 - BD3 -0.5547002 0.57909974 0.8686496
## 3   BD2 - BD3  0.9707253 0.33168507 0.5528084
## 4   BD1 - NP1 -1.8027756 0.07142346 1.0000000
## 5   BD2 - NP1 -0.2773501 0.78151129 0.9017438
## 6   BD3 - NP1 -1.2480754 0.21200343 0.6360103
## 7   BD1 - NP2 -1.5254255 0.12715299 0.9536474
## 8   BD2 - NP2  0.0000000 1.00000000 1.0000000
## 9   BD3 - NP2 -0.9707253 0.33168507 0.6219095
## 10  NP1 - NP2  0.2773501 0.78151129 0.9768891
## 11  BD1 - NP3 -0.4160251 0.67739160 0.9237158
## 12  BD2 - NP3  1.1094004 0.26725749 0.5726946
## 13  BD3 - NP3  0.1386750 0.88970694 0.9532574
## 14  NP1 - NP3  1.3867505 0.16551786 0.6206920
## 15  NP2 - NP3  1.1094004 0.26725749 0.6681437
## 
## [[2]]

## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.

##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2 -0.5547002 0.57909974 0.7896815
## 2   BD1 - BD3  0.0000000 1.00000000 1.0000000
## 3   BD2 - BD3  0.5547002 0.57909974 0.8686496
## 4   BD1 - NP1 -1.9414507 0.05220364 0.3915273
## 5   BD2 - NP1 -1.3867505 0.16551786 0.6206920
## 6   BD3 - NP1 -1.9414507 0.05220364 0.7830545
## 7   BD1 - NP2 -1.2480754 0.21200343 0.5300086
## 8   BD2 - NP2 -0.6933752 0.48807409 0.8134568
## 9   BD3 - NP2 -1.2480754 0.21200343 0.6360103
## 10  NP1 - NP2  0.6933752 0.48807409 0.9151389
## 11  BD1 - NP3 -0.4160251 0.67739160 0.7816057
## 12  BD2 - NP3  0.1386750 0.88970694 0.9532574
## 13  BD3 - NP3 -0.4160251 0.67739160 0.8467395
## 14  NP1 - NP3  1.5254255 0.12715299 0.6357650
## 15  NP2 - NP3  0.8320503 0.40538056 0.8686726
## 
## [[3]]

## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.

##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2  0.1437939 0.88566320 0.9489249
## 2   BD1 - BD3 -1.4379392 0.15045130 0.7522565
## 3   BD2 - BD3 -1.5817331 0.11371051 0.8528288
## 4   BD1 - NP1 -0.5751757 0.56517249 0.6521221
## 5   BD2 - NP1 -0.7189696 0.47215965 0.7082395
## 6   BD3 - NP1  0.8627635 0.38826750 0.8320018
## 7   BD1 - NP2 -0.5751757 0.56517249 0.7064656
## 8   BD2 - NP2 -0.7189696 0.47215965 0.7869327
## 9   BD3 - NP2  0.8627635 0.38826750 0.9706687
## 10  NP1 - NP2  0.0000000 1.00000000 1.0000000
## 11  BD1 - NP3  0.7189696 0.47215965 0.8852993
## 12  BD2 - NP3  0.5751757 0.56517249 0.7706898
## 13  BD3 - NP3  2.1569088 0.03101277 0.4651915
## 14  NP1 - NP3  1.2941453 0.19561524 0.5868457
## 15  NP2 - NP3  1.2941453 0.19561524 0.7335572
## 
## [[4]]

## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.

##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2  1.2480754 0.21200343 0.5300086
## 2   BD1 - BD3  0.8320503 0.40538056 0.6080708
## 3   BD2 - BD3 -0.4160251 0.67739160 0.7257767
## 4   BD1 - NP1  2.2188008 0.02650028 0.1987521
## 5   BD2 - NP1  0.9707253 0.33168507 0.5528084
## 6   BD3 - NP1  1.3867505 0.16551786 0.4965536
## 7   BD1 - NP2  1.8027756 0.07142346 0.3571173
## 8   BD2 - NP2  0.5547002 0.57909974 0.7896815
## 9   BD3 - NP2  0.9707253 0.33168507 0.6219095
## 10  NP1 - NP2 -0.4160251 0.67739160 0.7816057
## 11  BD1 - NP3  2.2188008 0.02650028 0.3975042
## 12  BD2 - NP3  0.9707253 0.33168507 0.7107537
## 13  BD3 - NP3  1.3867505 0.16551786 0.6206920
## 14  NP1 - NP3  0.0000000 1.00000000 1.0000000
## 15  NP2 - NP3  0.4160251 0.67739160 0.8467395
## 
## [[5]]

## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.

##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2 -0.8320503 0.40538056 1.0000000
## 2   BD1 - BD3 -0.4160251 0.67739160 0.8467395
## 3   BD2 - BD3  0.4160251 0.67739160 0.9237158
## 4   BD1 - NP1 -0.9707253 0.33168507 1.0000000
## 5   BD2 - NP1 -0.1386750 0.88970694 0.8897069
## 6   BD3 - NP1 -0.5547002 0.57909974 0.8686496
## 7   BD1 - NP2 -1.6641006 0.09609233 1.0000000
## 8   BD2 - NP2 -0.8320503 0.40538056 1.0000000
## 9   BD3 - NP2 -1.2480754 0.21200343 1.0000000
## 10  NP1 - NP2 -0.6933752 0.48807409 0.9151389
## 11  BD1 - NP3 -0.2773501 0.78151129 0.9017438
## 12  BD2 - NP3  0.5547002 0.57909974 0.9651662
## 13  BD3 - NP3  0.1386750 0.88970694 0.9532574
## 14  NP1 - NP3  0.6933752 0.48807409 1.0000000
## 15  NP2 - NP3  1.3867505 0.16551786 1.0000000

Test between group

# Loop through each gene
for (gene in genes) {

  # Posthoc Dunn test for each gene
  test_wilcox <- function(gene) {
    
    test <- ggpubr::compare_means(Abundance ~ Group,
                                       data = filter(data_rel_long, Gene == gene),
                                       method = "wilcox.test")
      return(test)
  }

  wilcox_test_rel <- lapply(unique(data_rel_long$Gene), test_wilcox)        

}

# Display results
 wilcox_test_rel

## [[1]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance NP     BD     0.394  0.39 0.39     ns       Wilcoxon
## 
## [[2]]
## # A tibble: 1 × 8
##   .y.       group1 group2      p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance NP     BD     0.0931 0.093 0.093    ns       Wilcoxon
## 
## [[3]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance NP     BD     0.678  0.68 0.68     ns       Wilcoxon
## 
## [[4]]
## # A tibble: 1 × 8
##   .y.       group1 group2      p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance NP     BD     0.0152 0.015 0.015    *        Wilcoxon
## 
## [[5]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance NP     BD     0.394  0.39 0.39     ns       Wilcoxon

Test between substrates for each gene

#Adding sample type
my_sample_col <- data.frame(sample = rep(c("S", "W"), c(3,3)))
data_rel_long$type <- substr(my_sample_col$sample, 1, 1)

# Loop through each gene
for (gene in genes) {

  # Posthoc Dunn test for each gene
  test_wilcox <- function(gene) {
    
    test <- ggpubr::compare_means(Abundance ~ type,
                                       data = filter(data_rel_long, Gene == gene),
                                       method = "wilcox.test")
      return(test)
  }

  wilcox_test_rel <- lapply(unique(data_rel_long$Gene), test_wilcox)        

}

# Display results
 wilcox_test_rel

## [[1]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.485  0.48 0.48     ns       Wilcoxon
## 
## [[2]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.310  0.31 0.31     ns       Wilcoxon
## 
## [[3]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.934  0.93 0.93     ns       Wilcoxon
## 
## [[4]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.699   0.7 0.7      ns       Wilcoxon
## 
## [[5]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.485  0.48 0.48     ns       Wilcoxon

Log 10 Absolute abundance Statistical test

Load the data

data_log10 <- read.table("/Users/qcvp/R/baothinh.txt", header = TRUE, sep = "\t")

# Transform the data to long format for plotting
data_log10_long <- data_log10 %>%
  gather(key = "Gene", value = "Abundance", -Sample)

Checking distribution and add Group column

#Add a column to identify sample groups
data_log10_long$Group <- substr(data_log10_long$Sample, 1, 2)
data_log10_long$Cite <- substr(data_log10_long$Sample, 1, 3)

#Create a gene list  
genes <- c("Sul1", "Sul2", "TetQ", "TetM", "Intl1")

# Checking sample distribution for each Gene
for (gene in genes) {
  # Filter data for the current gene
  gene_log10 <- data_log10_long[data_log10_long$Gene == gene, ]
  
  # Checking distribution
  gene_log10 |>
    dplyr::select(Abundance) |>
    indices_normality(nrow = 1, ncol =1)}

Test between cite

# Loop through each gene
for (gene in genes) {
 
  # Calculate statistical significance based on Kruskal-Wallis test
  kruskal_test <- function(gene) {
    KW_result <- kruskal.test(Abundance ~ Cite, data = filter(data_log10_long, Gene == gene))
    return(KW_result)  }

  KW_result_log10 <- lapply(unique(data_log10_long$Gene), kruskal_test)
  
  # Posthoc Dunn test for each gene
  test_dunn <- function(gene) {
    dunn_result <- FSA::dunnTest(Abundance ~ Cite,   
                             data = filter(data_log10_long, Gene == gene),
                             method = "bh")
    return(dunn_result)  }

  dunn_result_log10 <- lapply(unique(data_log10_long$Gene), test_dunn)        
}

## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.
## Warning: Cite was coerced to a factor.

Statistical test result

# Kruskal-Wallis
KW_result_log10

## [[1]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 4.8252, df = 5, p-value = 0.4376
## 
## 
## [[2]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 3.5385, df = 5, p-value = 0.6176
## 
## 
## [[3]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 13.143, df = 5, p-value = 0.02208
## 
## 
## [[4]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 11.741, df = 5, p-value = 0.03851
## 
## 
## [[5]]
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Cite
## Kruskal-Wallis chi-squared = 3.0559, df = 5, p-value = 0.6914

# Dunn test
dunn_result_log10

## [[1]]

## Dunn (1964) Kruskal-Wallis multiple comparison

##   p-values adjusted with the Benjamini-Hochberg method.

##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2 -0.6015714 0.54745949 0.7465357
## 2   BD1 - BD3  0.2005238 0.84107095 0.8410710
## 3   BD2 - BD3  0.8020952 0.42249792 0.7921836
## 4   BD1 - NP1 -1.6041903 0.10867211 0.8150408
## 5   BD2 - NP1 -1.0026189 0.31604475 0.7901119
## 6   BD3 - NP1 -1.8047141 0.07111943 1.0000000
## 7   BD1 - NP2 -1.2031427 0.22892109 0.6867633
## 8   BD2 - NP2 -0.6015714 0.54745949 0.8211892
## 9   BD3 - NP2 -1.4036665 0.16041817 0.8020909
## 10  NP1 - NP2  0.4010476 0.68838509 0.7942905
## 11  BD1 - NP3 -0.4010476 0.68838509 0.8604814
## 12  BD2 - NP3  0.2005238 0.84107095 0.9011475
## 13  BD3 - NP3 -0.6015714 0.54745949 0.9124325
## 14  NP1 - NP3  1.2031427 0.22892109 0.8584541
## 15  NP2 - NP3  0.8020952 0.42249792 0.9053527
## 
## [[2]]

## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.

##    Comparison          Z    P.unadj     P.adj
## 1   BD1 - BD2 -0.2005238 0.84107095 0.9011475
## 2   BD1 - BD3  1.0026189 0.31604475 0.9481342
## 3   BD2 - BD3  1.2031427 0.22892109 1.0000000
## 4   BD1 - NP1 -0.8020952 0.42249792 0.9053527
## 5   BD2 - NP1 -0.6015714 0.54745949 0.9124325
## 6   BD3 - NP1 -1.8047141 0.07111943 1.0000000
## 7   BD1 - NP2 -0.2005238 0.84107095 0.9704665
## 8   BD2 - NP2  0.0000000 1.00000000 1.0000000
## 9   BD3 - NP2 -1.2031427 0.22892109 1.0000000
## 10  NP1 - NP2  0.6015714 0.54745949 1.0000000
## 11  BD1 - NP3  0.2005238 0.84107095 1.0000000
## 12  BD2 - NP3  0.4010476 0.68838509 0.9387069
## 13  BD3 - NP3 -0.8020952 0.42249792 1.0000000
## 14  NP1 - NP3  1.0026189 0.31604475 1.0000000
## 15  NP2 - NP3  0.4010476 0.68838509 1.0000000
## 
## [[3]]

## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.

##    Comparison          Z     P.unadj      P.adj
## 1   BD1 - BD2  0.2079257 0.835287008 0.83528701
## 2   BD1 - BD3 -2.0792567 0.037593766 0.14097662
## 3   BD2 - BD3 -2.2871823 0.022185183 0.11092591
## 4   BD1 - NP1 -1.0396283 0.298512609 0.44776891
## 5   BD2 - NP1 -1.2475540 0.212194434 0.39786456
## 6   BD3 - NP1  1.0396283 0.298512609 0.49752101
## 7   BD1 - NP2 -1.6634053 0.096231327 0.20620999
## 8   BD2 - NP2 -1.8713310 0.061299217 0.15324804
## 9   BD3 - NP2  0.4158513 0.677518794 0.72591299
## 10  NP1 - NP2 -0.6237770 0.532774047 0.61473929
## 11  BD1 - NP3  0.8317027 0.405576796 0.55305927
## 12  BD2 - NP3  0.6237770 0.532774047 0.66596756
## 13  BD3 - NP3  2.9109593 0.003603209 0.05404814
## 14  NP1 - NP3  1.8713310 0.061299217 0.18389765
## 15  NP2 - NP3  2.4951080 0.012591880 0.09443910
## 
## [[4]]

## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.

##    Comparison          Z     P.unadj      P.adj
## 1   BD1 - BD2  1.6041903 0.108672108 0.32601633
## 2   BD1 - BD3  2.8073331 0.004995356 0.03746517
## 3   BD2 - BD3  1.2031427 0.228921089 0.49054519
## 4   BD1 - NP1  2.8073331 0.004995356 0.07493035
## 5   BD2 - NP1  1.2031427 0.228921089 0.57230272
## 6   BD3 - NP1  0.0000000 1.000000000 1.00000000
## 7   BD1 - NP2  2.2057617 0.027400690 0.10275259
## 8   BD2 - NP2  0.6015714 0.547459492 0.74653567
## 9   BD3 - NP2 -0.6015714 0.547459492 0.82118924
## 10  NP1 - NP2 -0.6015714 0.547459492 0.91243249
## 11  BD1 - NP3  2.6068093 0.009139025 0.04569513
## 12  BD2 - NP3  1.0026189 0.316044750 0.59258391
## 13  BD3 - NP3 -0.2005238 0.841070954 0.90114745
## 14  NP1 - NP3 -0.2005238 0.841070954 0.97046649
## 15  NP2 - NP3  0.4010476 0.688385094 0.86048137
## 
## [[5]]

## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.

##    Comparison          Z   P.unadj     P.adj
## 1   BD1 - BD2  0.4010476 0.6883851 0.7942905
## 2   BD1 - BD3  0.8020952 0.4224979 0.7921836
## 3   BD2 - BD3  0.4010476 0.6883851 0.8604814
## 4   BD1 - NP1 -0.4010476 0.6883851 0.9387069
## 5   BD2 - NP1 -0.8020952 0.4224979 0.9053527
## 6   BD3 - NP1 -1.2031427 0.2289211 0.8584541
## 7   BD1 - NP2 -0.4010476 0.6883851 1.0000000
## 8   BD2 - NP2 -0.8020952 0.4224979 1.0000000
## 9   BD3 - NP2 -1.2031427 0.2289211 1.0000000
## 10  NP1 - NP2  0.0000000 1.0000000 1.0000000
## 11  BD1 - NP3  0.8020952 0.4224979 1.0000000
## 12  BD2 - NP3  0.4010476 0.6883851 1.0000000
## 13  BD3 - NP3  0.0000000 1.0000000 1.0000000
## 14  NP1 - NP3  1.2031427 0.2289211 1.0000000
## 15  NP2 - NP3  1.2031427 0.2289211 1.0000000

Test between substrates for each gene

#Adding sample type
my_sample_col <- data.frame(sample = rep(c("S", "W"), c(3,3)))
data_log10_long$type <- substr(my_sample_col$sample, 1, 1)

# Loop through each gene
for (gene in genes) {
  # Posthoc Dunn test for each gene
  test_wilcox <- function(gene) {
    
    test <- ggpubr::compare_means(Abundance ~ type,
                                       data = filter(data_log10_long, Gene == gene),
                                       method = "wilcox.test")
      return(test)
  }

   wilcox_test_log10 <- lapply(unique(data_log10_long$Gene), test_wilcox)        

}

# Display results
 wilcox_test_log10

## [[1]]
## # A tibble: 1 × 8
##   .y.       group1 group2       p  p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>    <dbl>  <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.00417 0.0042 0.0042   **       Wilcoxon
## 
## [[2]]
## # A tibble: 1 × 8
##   .y.       group1 group2      p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.0118 0.012 0.012    *        Wilcoxon
## 
## [[3]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.741  0.74 0.74     ns       Wilcoxon
## 
## [[4]]
## # A tibble: 1 × 8
##   .y.       group1 group2     p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>  <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.582  0.58 0.58     ns       Wilcoxon
## 
## [[5]]
## # A tibble: 1 × 8
##   .y.       group1 group2      p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance S      W      0.0163 0.016 0.016    *        Wilcoxon

PCOA

#Import file input
Relative_abun_data <- read.csv("/Users/qcvp/R/baothinh_rel.txt", header = T, sep = "\t")

Relative_abun_data2 <- as.matrix(Relative_abun_data[,-1])
rownames(Relative_abun_data2) <- Relative_abun_data[,1]

library(vegan)
library(ape)

## 
## Attaching package: 'ape'

## The following object is masked from 'package:ggpubr':
## 
##     rotate

## The following object is masked from 'package:Hmisc':
## 
##     zoom

## The following object is masked from 'package:dplyr':
## 
##     where

rel.bray <- vegdist(Relative_abun_data2, method = "bray")
pcoa.sub <- pcoa(rel.bray)

pcoa_coord <- pcoa.sub$vectors[,1:2]
trans_data <- read.table("/Users/qcvp/R/Transect_baothinh.txt", header = T, sep = "\t")

rel_pcoa <- cbind(pcoa_coord, trans_data)

#Plot
a2 <- ggplot() + 
  geom_hline(yintercept=0, colour="lightgrey", linetype = 2) + 
  geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
  geom_point(data = rel_pcoa, aes(x=Axis.1, y =Axis.2, shape = Type, color = Substrate), size = 5) +
  scale_shape_manual(values = c(15, 16, 17)) +
  geom_point(data = rel_pcoa, aes(x=Axis.1, y=Axis.2), colour = "white") +
  xlab(paste("PCo1 (", round(pcoa.sub$values$Relative_eig[1]*100, 1), "%)")) +
  ylab(paste("PCo2 (", round(pcoa.sub$values$Relative_eig[2]*100, 1), "%)")) +
  theme_bw() +
  coord_equal() +
  labs(color = "Transects", shape = "Sample types") +
  theme(axis.title.x = element_text(size=14), # remove x-axis labels
        axis.title.y = element_text(size=14), # remove y-axis labels
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank(),  #remove minor-grid labels
        plot.background = element_blank())
a2

Final_Script

Hoang et al

2024-08-07

Custom script

Load necessary libraries

Relative abundance

Load the data

Checking distribution and add Group column

Test between group

Test between substrates for each gene

Log 10 Absolute abundance Statistical test

Load the data

Checking distribution and add Group column

Test between cite

Statistical test result

Test between substrates for each gene

PCOA