indices_normality <- function(rich, nrow, ncol) {
### p-value < 0.05 means data failed normality test
par(mfrow = c(nrow, ncol))
for (i in names(rich)) {
shap <- shapiro.test(rich[, i])
qqnorm(rich[, i], main = i, sub = shap$p.value)
qqline(rich[, i])
}
par(mfrow = c(1, 1))
}
#ARGs Abundance
library(pheatmap)
#Import file input
log_abun_data <- read.csv("/Users/qcvp/R/mẫu plume báo trong nước/log10abso.txt", header = T, sep = "\t")
log_abun_data2 <- as.matrix(log_abun_data[,-1])
rownames(log_abun_data2) <- log_abun_data[,1]
log_abun_data3 <- as.matrix(log_abun_data2[,-c(1,6)])
log_abun_data4 <- as.matrix(log_abun_data2[,c(1,6)])
#Heatmap with Intl
pheatmap::pheatmap(log_abun_data3, cluster_rows = TRUE,cluster_cols = FALSE)
#Heatmap without Intl
pheatmap::pheatmap(log_abun_data4, cluster_rows = TRUE,cluster_cols = FALSE)
#ARG resistance heatmap
library(reshape)
library(ggplot2)
ksddata <- read.csv("/Users/qcvp/R/mẫu plume báo trong nước/ksd plume.txt", header = T, sep = "\t")
for (x in 3:16) {
ksddata[,x] <- gsub( 'R', '1', ksddata[,x])
ksddata[,x] <- gsub( 'S', '0', ksddata[,x])
ksddata[,x] <- gsub( 'I', '0.5', ksddata[,x])
ksddata[,x] <- gsub( 'NA', '2', ksddata[,x])}
ksddata2 <- as.matrix(ksddata[,-1])
rownames(ksddata2) <- ksddata2[,1]
ksddata2 <- as.matrix(ksddata2[,-1])
data_long <- melt(ksddata2)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
#Add group of antitiobics
data_long$group <- NA
data_long$group[data_long$X2 %in% c('TCC','PTZ')] <- 'Penicillin'
data_long$group[data_long$X2 %in% c('FEP', 'CAZ' , 'CTX')] <- 'Cephalosporins'
data_long$group[data_long$X2 %in% c('IPM', 'MEM')] <- 'Carbapanems'
data_long$group[data_long$X2 %in% c('CN', 'AN', 'TOB')] <- 'Aminoglycosides'
data_long$group[data_long$X2 %in% c('CIP', 'LEV')] <- 'Fluoroquiolones'
data_long$group[data_long$X2 %in% c('ATM')] <- 'Monobactams'
data_long$group[data_long$X2 %in% c('SXT')] <- 'Miscellaneous Antibiotics'
#Plotting
data_long$group <- factor(data_long$group, levels = c("Penicillin", "Cephalosporins", "Carbapanems", "Aminoglycosides", "Fluoroquiolones", "Monobactams", "Miscellaneous Antibiotics"))
data_long$X2 <- factor(data_long$X2, levels = c("PTZ", "TCC", "CAZ", "CTX", "FEP", "IPM", "AN", "CN", "TOB", "CIP", "LEV", "MEM", "ATM", "SXT"))
kd_plot <- ggplot(data_long, aes(x = X2, y = X1, fill = value)) +
geom_tile(colour = "black") +
scale_fill_manual(values = c('1' = 'red', '0.5' = 'yellow', '0' = 'green', '2' = 'gray')) +
theme_minimal() +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "none",
axis.text.x = element_text(size=13, angle = 45, hjust = 1),
axis.text.y = element_text(size=13),
legend.title = element_blank(),
legend.text = element_blank(),
panel.spacing = unit(0, "lines"),
strip.text = element_text(size=13))+
facet_grid(~group, scales = "free", space = "free")
kd_plot
#MAR plot ##Preparing data
library(tidyr)
##
## Attaching package: 'tidyr'
## The following objects are masked from 'package:reshape':
##
## expand, smiths
MAR_data <- read.table("/Users/qcvp/R/mẫu plume báo trong nước/ksd1 plume.txt", sep = "\t", header = T)
names(MAR_data)
## [1] "Sample" "Taxonomy" "TCC" "PTZ" "FEP" "IPM"
## [7] "CN" "CIP" "LEV" "ATM" "CAZ" "SXT"
## [13] "CTX" "MEM" "AN" "TOB"
row.names(MAR_data) <- MAR_data$SampleID
MAR_data2 <- MAR_data[,-1]
MAR_data2 <- MAR_data2 %>% separate(Taxonomy, c("Genus", "Species"))
##Plotting
library(ggplot2)
# Adding color for plotting
custom_colors <- c( "#CBD588", "#5F7FC7","#DA5724", "#508578", "#CD9BCD",
"#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
"#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861", "black", "red", "green")
# MAR calculate
MAR_data2$MAR <- apply(MAR_data2[,3:16], 1, function(x) (length(which(x == "R"))/length(which(x %in% c("R", "S", "I")))))
# Plotting
box3 = ggplot(MAR_data2, aes(x = Species , y = MAR, color = Species)) +
geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3) + theme_bw() +
geom_boxplot(aes(colour = Species), alpha=0.1, outlier.colour = NA) +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(size = 20),
axis.text.y = element_text(size=20),
axis.text.x = element_blank()) +
ylab("MAR index") + scale_fill_manual(values = custom_colors) + geom_hline(yintercept = 0.2, linetype="dashed") +
facet_wrap(~Genus, nrow=1, scales = "free_x")
box3
#Bacterial composition plot
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:reshape':
##
## rename
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape)
library(stringr)
library(ggplot2)
MAR_data <- MAR_data %>%
mutate(Abundance = 1/nrow(MAR_data)) %>%
mutate(Transect = "Isolated Bacteria")
#Plotting
b11 <- ggplot(MAR_data, aes(x = Transect, fill = Taxonomy)) +
geom_bar(position ="fill") + scale_color_manual(custom_colors) +
ylab("Isolated bacterial composition") +
scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
theme_bw() +
theme(legend.text = element_text(face = "italic")) +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank()) #remove minor-grid labels
b11
library(vegan)
## Loading required package: permute
## Loading required package: lattice
library(ape)
##
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
##
## where
rel.bray <- vegdist(log_abun_data2, method = "bray")
## Warning in vegdist(log_abun_data2, method = "bray"): results may be meaningless because data have negative entries
## in method "bray"
pcoa.sub <- pcoa(rel.bray)
pcoa_coord <- pcoa.sub$vectors[,1:2]
my_sample_col <- data.frame(sample = rep(c("Water", "Sediment"), c(18,18)))
row.names(my_sample_col) <- rownames(pcoa_coord)
rel_pcoa <- cbind(pcoa_coord, my_sample_col)
#Plot
a1 <- ggplot() +
geom_hline(yintercept=0, colour="lightgrey", linetype = 2) +
geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
geom_point(data = rel_pcoa, aes(x=Axis.1, y=Axis.2, color = sample), alpha = 0.7, size = 5, shape=18) +
geom_point(data = rel_pcoa, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.5) +
xlab(paste("PCo1 (", round(pcoa.sub$values$Relative_eig[1]*100, 1), "%)")) +
ylab(paste("PCo2 (", round(pcoa.sub$values$Relative_eig[2]*100, 1), "%)")) +
theme_bw() +
coord_equal() +
labs(color = "Sample types") +
scale_colour_manual(breaks =c ("Sediment", "Water"),
values = c("lightgoldenrod1", "blue")) +
theme(axis.title.x = element_text(size=15), # remove x-axis labels
axis.title.y = element_text(size=15), # remove y-axis labels
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank())
a1
##Permanova test
adonis2(rel.bray ~ sample, data = rel_pcoa,permutations = 1000, method ="bray")
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
##
## adonis2(formula = rel.bray ~ sample, data = rel_pcoa, permutations = 1000, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.88823 0.73822 95.882 0.000999 ***
## Residual 34 0.31497 0.26178
## Total 35 1.20320 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Load data
Abs_abun_arg <- read.table("/Users/qcvp/R/mẫu plume báo trong nước/log10abso.txt", header = TRUE, sep = "\t")
# Reshape the data to long format
Abs_abun_arg_long <- melt(Abs_abun_arg, id.vars = "Sample.name", variable.name = "ARG", value.name = "Abundance")
Abs_abun_arg_long <- Abs_abun_arg_long %>%
mutate(Type = str_sub(Sample.name, -1, -1))
#Calculate errors
Abs_abun_summary <- Abs_abun_arg_long %>%
group_by(variable, Type) %>%
summarise(
Mean = mean(value, na.rm = TRUE),
SD = sd(value, na.rm = TRUE))
## `summarise()` has grouped output by 'variable'. You can override using the
## `.groups` argument.
Abs_abun_summary
## # A tibble: 12 × 4
## # Groups: variable [6]
## variable Type Mean SD
## <fct> <chr> <dbl> <dbl>
## 1 X16S.qPCR S 5.09 0.223
## 2 X16S.qPCR W 5.57 0.343
## 3 Sul1 S 2.05 0.203
## 4 Sul1 W 2.50 1.16
## 5 Sul2 S 2.50 0.244
## 6 Sul2 W 2.67 0.880
## 7 tetQ S 0.536 0.928
## 8 tetQ W 1.77 0.652
## 9 tetM S 4.86 0.636
## 10 tetM W 1.21 0.617
## 11 Intl S 6.28 0.442
## 12 Intl W 1.60 0.688
#Only Sed
Abs_abun_arg_sed <-Abs_abun_summary[Abs_abun_summary$Type == "S",]
Abs_abun_arg_wat <-Abs_abun_summary[Abs_abun_summary$Type== "W",]
# Plot using ggplot2
# Plot for sediment data
s <- ggplot(Abs_abun_arg_sed, aes(x = Type, y = Mean, fill = Type)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = Mean-SD, ymax = Mean+SD), width = 0.2, position = position_dodge(0.9)) +
theme_minimal() +
theme(legend.text = element_text(size = 15),
legend.position = "none",
plot.title = element_text(size = 20, face = "bold"),
legend.title = element_text(size = 18),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 15),
strip.text = element_text(size = 15),
axis.text.x = element_blank()) +
labs(x = "ARGs", y = "Mean Absolute Abundance (Log10)", title = "Sediment") +
facet_grid(~ variable, scales = "free")
s
# Plot for water data
w <- ggplot(Abs_abun_arg_wat, aes(x = Type, y = Mean, fill = Type)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = Mean-SD, ymax = Mean+SD), width = 0.2, position = position_dodge(0.9)) +
theme_minimal() +
theme(legend.text = element_text(size = 15),
legend.position = "none",
plot.title = element_text(size = 20, face = "bold"),
legend.title = element_text(size = 18),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 15),
strip.text = element_text(size = 15),
axis.text.x = element_blank()) +
labs(x = "ARGs", y = "Mean Absolute Abundance (Log10)", title = "Water column") +
facet_grid(~ variable, scales = "free")
w
#Statistical tests ## Load the data
# Transform the data to long format for plotting
Abs_abun_arg_long <- Abs_abun_arg %>%
gather(key = "Gene", value = "Abundance", -Sample.name)
#Add a column to identify sample groups
Abs_abun_arg_long <- Abs_abun_arg_long %>%
mutate(Type = str_sub(Sample.name, -1, -1))
#Create a gene list
genes <- c("Sul1", "Sul2", "tetQ", "tetM", "Intl")
# Checking sample distribution for each Gene
for (gene in genes) {
# Filter data for the current gene
gene_log10 <- Abs_abun_arg_long[Abs_abun_arg_long$Gene == gene, ]
# Checking distribution
gene_log10 |>
dplyr::select(Abundance) |>
indices_normality(nrow = 1, ncol =1)}
# Loop through each gene
for (gene in genes) {
# Posthoc Dunn test for each gene
test_wilcox <- function(gene) {
test <- ggpubr::compare_means(Abundance ~ Type,
data = filter(Abs_abun_arg_long, Gene == gene),
method = "wilcox.test")
return(test)
}
wilcox_test_log10 <- lapply(unique(Abs_abun_arg_long$Gene), test_wilcox)
}
# Display results
wilcox_test_log10
## [[1]]
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Abundance W S 0.000200 0.0002 2e-04 *** Wilcoxon
##
## [[2]]
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Abundance W S 0.0846 0.085 0.085 ns Wilcoxon
##
## [[3]]
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Abundance W S 0.0327 0.033 0.033 * Wilcoxon
##
## [[4]]
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Abundance W S 0.0000515 0.000052 5.2e-05 **** Wilcoxon
##
## [[5]]
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Abundance W S 0.000000322 0.00000032 3.2e-07 **** Wilcoxon
##
## [[6]]
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Abundance W S 0.000000322 0.00000032 3.2e-07 **** Wilcoxon