Custom script

indices_normality <- function(rich, nrow, ncol) {
  
  ### p-value < 0.05 means data failed normality test
  
  par(mfrow = c(nrow, ncol))
  
  for (i in names(rich)) {
    shap <- shapiro.test(rich[, i])
    qqnorm(rich[, i], main = i, sub = shap$p.value)
    qqline(rich[, i])
  }
  
  par(mfrow = c(1, 1))
}

#ARGs Abundance

library(pheatmap)
#Import file input
log_abun_data <- read.csv("/Users/qcvp/R/mẫu plume báo trong nước/log10abso.txt", header = T, sep = "\t")

log_abun_data2 <- as.matrix(log_abun_data[,-1])
rownames(log_abun_data2) <- log_abun_data[,1]

log_abun_data3 <- as.matrix(log_abun_data2[,-c(1,6)])
log_abun_data4 <- as.matrix(log_abun_data2[,c(1,6)])

#Heatmap with Intl
pheatmap::pheatmap(log_abun_data3, cluster_rows = TRUE,cluster_cols = FALSE)

#Heatmap without Intl
pheatmap::pheatmap(log_abun_data4, cluster_rows = TRUE,cluster_cols = FALSE)

#ARG resistance heatmap

library(reshape)
library(ggplot2)

ksddata <- read.csv("/Users/qcvp/R/mẫu plume báo trong nước/ksd plume.txt", header = T, sep = "\t")

for (x in 3:16) {
       ksddata[,x] <- gsub( 'R', '1', ksddata[,x])
       ksddata[,x] <- gsub( 'S', '0', ksddata[,x])
       ksddata[,x] <- gsub( 'I', '0.5', ksddata[,x]) 
       ksddata[,x] <- gsub( 'NA', '2', ksddata[,x])}

ksddata2 <- as.matrix(ksddata[,-1])
rownames(ksddata2) <- ksddata2[,1]
ksddata2 <- as.matrix(ksddata2[,-1])

data_long <- melt(ksddata2)

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

#Add group of antitiobics
data_long$group <- NA

data_long$group[data_long$X2 %in% c('TCC','PTZ')] <- 'Penicillin'
data_long$group[data_long$X2 %in% c('FEP', 'CAZ' , 'CTX')] <- 'Cephalosporins'
data_long$group[data_long$X2 %in% c('IPM', 'MEM')] <- 'Carbapanems'
data_long$group[data_long$X2 %in% c('CN', 'AN', 'TOB')] <- 'Aminoglycosides'
data_long$group[data_long$X2 %in% c('CIP', 'LEV')] <- 'Fluoroquiolones'
data_long$group[data_long$X2 %in% c('ATM')] <- 'Monobactams'
data_long$group[data_long$X2 %in% c('SXT')] <- 'Miscellaneous Antibiotics'



#Plotting 
data_long$group <- factor(data_long$group, levels = c("Penicillin", "Cephalosporins", "Carbapanems", "Aminoglycosides", "Fluoroquiolones", "Monobactams", "Miscellaneous Antibiotics"))

data_long$X2 <- factor(data_long$X2, levels = c("PTZ", "TCC", "CAZ", "CTX", "FEP",  "IPM", "AN", "CN", "TOB", "CIP", "LEV", "MEM", "ATM",  "SXT"))


kd_plot <- ggplot(data_long, aes(x = X2, y = X1, fill = value)) +
  geom_tile(colour = "black") +
  scale_fill_manual(values = c('1' = 'red', '0.5' = 'yellow', '0' = 'green', '2' = 'gray')) +
  theme_minimal() +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "none",
        axis.text.x = element_text(size=13, angle = 45, hjust = 1), 
        axis.text.y = element_text(size=13),
        legend.title = element_blank(),
        legend.text = element_blank(),
        panel.spacing = unit(0, "lines"),
        strip.text =  element_text(size=13))+
  facet_grid(~group, scales = "free", space = "free")

kd_plot

#MAR plot ##Preparing data

library(tidyr)

## 
## Attaching package: 'tidyr'

## The following objects are masked from 'package:reshape':
## 
##     expand, smiths

MAR_data <- read.table("/Users/qcvp/R/mẫu plume báo trong nước/ksd1 plume.txt", sep = "\t", header = T)
names(MAR_data)

##  [1] "Sample"   "Taxonomy" "TCC"      "PTZ"      "FEP"      "IPM"     
##  [7] "CN"       "CIP"      "LEV"      "ATM"      "CAZ"      "SXT"     
## [13] "CTX"      "MEM"      "AN"       "TOB"

row.names(MAR_data) <- MAR_data$SampleID
MAR_data2 <- MAR_data[,-1]
MAR_data2 <- MAR_data2 %>% separate(Taxonomy, c("Genus", "Species"))

##Plotting

library(ggplot2)
# Adding color for plotting
custom_colors <- c( "#CBD588", "#5F7FC7","#DA5724", "#508578", "#CD9BCD",
                    "#AD6F3B", "#673770","#D14285", "#652926", "#C84248", 
                    "#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861", "black", "red", "green")

# MAR calculate
MAR_data2$MAR <- apply(MAR_data2[,3:16], 1, function(x) (length(which(x == "R"))/length(which(x %in% c("R", "S", "I")))))

# Plotting
box3 = ggplot(MAR_data2, aes(x = Species , y = MAR, color = Species)) + 
  geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3) + theme_bw() +
  geom_boxplot(aes(colour = Species), alpha=0.1, outlier.colour = NA) + 
  theme(axis.title.x = element_blank(),
        axis.title.y = element_text(size = 20),
        axis.text.y = element_text(size=20),
        axis.text.x = element_blank()) + 
    ylab("MAR index") + scale_fill_manual(values = custom_colors) +   geom_hline(yintercept = 0.2, linetype="dashed") +
  facet_wrap(~Genus, nrow=1, scales = "free_x")
box3

#Bacterial composition plot

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:reshape':
## 
##     rename

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(reshape)
library(stringr)
library(ggplot2)
MAR_data  <- MAR_data  %>%
            mutate(Abundance = 1/nrow(MAR_data)) %>%
            mutate(Transect = "Isolated Bacteria")

#Plotting
b11 <- ggplot(MAR_data, aes(x = Transect, fill = Taxonomy)) + 
  geom_bar(position ="fill") + scale_color_manual(custom_colors) +
  ylab("Isolated bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
b11

PCOA

Plot

library(vegan)

## Loading required package: permute

## Loading required package: lattice

library(ape)

## 
## Attaching package: 'ape'

## The following object is masked from 'package:dplyr':
## 
##     where

rel.bray <- vegdist(log_abun_data2, method = "bray")

## Warning in vegdist(log_abun_data2, method = "bray"): results may be meaningless because data have negative entries
##                  in method "bray"

pcoa.sub <- pcoa(rel.bray)
pcoa_coord <- pcoa.sub$vectors[,1:2]

my_sample_col <- data.frame(sample = rep(c("Water", "Sediment"), c(18,18)))
row.names(my_sample_col) <- rownames(pcoa_coord)
rel_pcoa <- cbind(pcoa_coord, my_sample_col)


#Plot
a1 <- ggplot() + 
  geom_hline(yintercept=0, colour="lightgrey", linetype = 2) + 
  geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
  geom_point(data = rel_pcoa, aes(x=Axis.1, y=Axis.2, color = sample), alpha = 0.7, size = 5, shape=18) +
  geom_point(data = rel_pcoa, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.5) +
  xlab(paste("PCo1 (", round(pcoa.sub$values$Relative_eig[1]*100, 1), "%)")) +
  ylab(paste("PCo2 (", round(pcoa.sub$values$Relative_eig[2]*100, 1), "%)")) +
  theme_bw() +
  coord_equal() +
  labs(color = "Sample types") +
  scale_colour_manual(breaks =c ("Sediment", "Water"), 
                      values = c("lightgoldenrod1", "blue")) +
  theme(axis.title.x = element_text(size=15), # remove x-axis labels
        axis.title.y = element_text(size=15), # remove y-axis labels
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank(),  #remove minor-grid labels
        plot.background = element_blank())
a1

##Permanova test

adonis2(rel.bray ~ sample, data = rel_pcoa,permutations = 1000, method ="bray")

## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
## 
## adonis2(formula = rel.bray ~ sample, data = rel_pcoa, permutations = 1000, method = "bray")
##          Df SumOfSqs      R2      F   Pr(>F)    
## Model     1  0.88823 0.73822 95.882 0.000999 ***
## Residual 34  0.31497 0.26178                    
## Total    35  1.20320 1.00000                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Barplot of abundances

# Load data
Abs_abun_arg <- read.table("/Users/qcvp/R/mẫu plume báo trong nước/log10abso.txt", header = TRUE, sep = "\t")

# Reshape the data to long format
Abs_abun_arg_long <- melt(Abs_abun_arg, id.vars = "Sample.name", variable.name = "ARG", value.name = "Abundance")
Abs_abun_arg_long <- Abs_abun_arg_long %>%
  mutate(Type = str_sub(Sample.name, -1, -1))

#Calculate errors
Abs_abun_summary <- Abs_abun_arg_long %>%
  group_by(variable, Type) %>%
  summarise(
    Mean = mean(value, na.rm = TRUE),
    SD = sd(value, na.rm = TRUE))

## `summarise()` has grouped output by 'variable'. You can override using the
## `.groups` argument.

Abs_abun_summary

## # A tibble: 12 × 4
## # Groups:   variable [6]
##    variable  Type   Mean    SD
##    <fct>     <chr> <dbl> <dbl>
##  1 X16S.qPCR S     5.09  0.223
##  2 X16S.qPCR W     5.57  0.343
##  3 Sul1      S     2.05  0.203
##  4 Sul1      W     2.50  1.16 
##  5 Sul2      S     2.50  0.244
##  6 Sul2      W     2.67  0.880
##  7 tetQ      S     0.536 0.928
##  8 tetQ      W     1.77  0.652
##  9 tetM      S     4.86  0.636
## 10 tetM      W     1.21  0.617
## 11 Intl      S     6.28  0.442
## 12 Intl      W     1.60  0.688

#Only Sed
Abs_abun_arg_sed <-Abs_abun_summary[Abs_abun_summary$Type == "S",]
Abs_abun_arg_wat <-Abs_abun_summary[Abs_abun_summary$Type== "W",]

# Plot using ggplot2
# Plot for sediment data
s <- ggplot(Abs_abun_arg_sed, aes(x = Type, y = Mean, fill = Type)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin =  Mean-SD, ymax = Mean+SD), width = 0.2, position = position_dodge(0.9)) +
  theme_minimal() +
  theme(legend.text = element_text(size = 15),
        legend.position = "none",
        plot.title = element_text(size = 20, face = "bold"),
        legend.title = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 15),
        strip.text = element_text(size = 15),
        axis.text.x = element_blank()) +
  labs(x = "ARGs", y = "Mean Absolute Abundance (Log10)", title = "Sediment") +
  facet_grid(~ variable, scales = "free")

s

# Plot for water data
w <- ggplot(Abs_abun_arg_wat, aes(x = Type, y = Mean, fill = Type)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin =  Mean-SD, ymax = Mean+SD), width = 0.2, position = position_dodge(0.9)) +
  theme_minimal() +
  theme(legend.text = element_text(size = 15),
        legend.position = "none",
        plot.title = element_text(size = 20, face = "bold"),
        legend.title = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 15),
        strip.text = element_text(size = 15),
        axis.text.x = element_blank()) +
  labs(x = "ARGs", y = "Mean Absolute Abundance (Log10)", title = "Water column") +
  facet_grid(~ variable, scales = "free") 
w

#Statistical tests ## Load the data

# Transform the data to long format for plotting
Abs_abun_arg_long <- Abs_abun_arg %>%
  gather(key = "Gene", value = "Abundance", -Sample.name)

Checking distribution and add Group column

#Add a column to identify sample groups
Abs_abun_arg_long <- Abs_abun_arg_long %>%
  mutate(Type = str_sub(Sample.name, -1, -1))

#Create a gene list  
genes <- c("Sul1", "Sul2", "tetQ", "tetM", "Intl")

# Checking sample distribution for each Gene
for (gene in genes) {
  # Filter data for the current gene
  gene_log10 <- Abs_abun_arg_long[Abs_abun_arg_long$Gene == gene, ]
  
  # Checking distribution
  gene_log10 |>
    dplyr::select(Abundance) |>
    indices_normality(nrow = 1, ncol =1)}

Test between substrates for each gene

# Loop through each gene
for (gene in genes) {
  # Posthoc Dunn test for each gene
  test_wilcox <- function(gene) {
    
    test <- ggpubr::compare_means(Abundance ~ Type,
                                       data = filter(Abs_abun_arg_long, Gene == gene),
                                       method = "wilcox.test")
      return(test)
  }

   wilcox_test_log10 <- lapply(unique(Abs_abun_arg_long$Gene), test_wilcox)        

}

# Display results
 wilcox_test_log10

## [[1]]
## # A tibble: 1 × 8
##   .y.       group1 group2        p  p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>     <dbl>  <dbl> <chr>    <chr>    <chr>   
## 1 Abundance W      S      0.000200 0.0002 2e-04    ***      Wilcoxon
## 
## [[2]]
## # A tibble: 1 × 8
##   .y.       group1 group2      p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance W      S      0.0846 0.085 0.085    ns       Wilcoxon
## 
## [[3]]
## # A tibble: 1 × 8
##   .y.       group1 group2      p p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Abundance W      S      0.0327 0.033 0.033    *        Wilcoxon
## 
## [[4]]
## # A tibble: 1 × 8
##   .y.       group1 group2         p    p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>      <dbl>    <dbl> <chr>    <chr>    <chr>   
## 1 Abundance W      S      0.0000515 0.000052 5.2e-05  ****     Wilcoxon
## 
## [[5]]
## # A tibble: 1 × 8
##   .y.       group1 group2           p      p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>        <dbl>      <dbl> <chr>    <chr>    <chr>   
## 1 Abundance W      S      0.000000322 0.00000032 3.2e-07  ****     Wilcoxon
## 
## [[6]]
## # A tibble: 1 × 8
##   .y.       group1 group2           p      p.adj p.format p.signif method  
##   <chr>     <chr>  <chr>        <dbl>      <dbl> <chr>    <chr>    <chr>   
## 1 Abundance W      S      0.000000322 0.00000032 3.2e-07  ****     Wilcoxon

Final_Script

Hoang et al

2024-08-07

Custom script

PCOA

Plot

Barplot of abundances

Checking distribution and add Group column

Test between substrates for each gene