cat("Muestras totales:", n_distinct(Tabla_inicial$dataset), "\n\n")
## Muestras totales: 91
Tabla_inicial %>%
distinct(dataset, batch, Tipo_label) %>%
count(batch, Tipo_label) %>%
pivot_wider(names_from = Tipo_label, values_from = n, values_fill = 0) %>%
knitr::kable(caption = "Muestras por lote y tipo de paciente")
| batch | Entrantes (T0) | Estancia (T1) |
|---|---|---|
| LIB1 | 12 | 12 |
| LIB6 | 10 | 11 |
| RUN6 | 11 | 12 |
| RUN7 | 12 | 11 |
Tabla_inicial %>%
count(database, batch) %>%
pivot_wider(names_from = batch, values_from = n, values_fill = 0) %>%
knitr::kable(caption = "Hits filtrados por base de datos y lote")
| database | LIB1 | LIB6 | RUN6 | RUN7 |
|---|---|---|---|---|
| ABR | 2269 | 1904 | 2027 | 1804 |
| BAC | 9226 | 10118 | 11794 | 9287 |
| REL | 3128 | 3198 | 3489 | 3081 |
diver_long %>%
filter(!is.na(Tipo_label), !is.na(value)) %>%
group_by(database2, index, Tipo_label) %>%
filter(n() >= 3) %>%
do(tidy(shapiro.test(.$value))) %>%
datatable(rownames = FALSE, filter = "top",
options = list(pageLength = 20, scrollX = TRUE))
diver_long %>%
filter(!is.na(Tipo_label), !is.na(value)) %>%
group_by(database2, index) %>%
filter(n_distinct(Tipo_label) > 1) %>%
do(tidy(kruskal.test(.$value ~ .$Tipo_label))) %>%
datatable(rownames = FALSE, filter = "top",
options = list(pageLength = 20, scrollX = TRUE))
diver_long %>%
filter(index == "diversity_shannon", !is.na(Tipo_label)) %>%
ggplot(aes(x = Tipo_label, y = value, fill = Tipo_label)) +
geom_violin() + geom_boxplot(width = 0.2) +
facet_wrap(~database2, scales = "free_y") +
scale_fill_manual(values = c("Entrantes (T0)" = "#4DBBD5",
"Estancia (T1)" = "#E64B35")) +
labs(title = "Shannon diversity", x = NULL, y = "Shannon Index") +
theme_classic() +
theme(legend.position = "none", strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
grouped_ggbetweenstats(
data = diver_long %>%
filter(index == "diversity_shannon", !is.na(Tipo_label),
!database2 %in% c("gral","BAC")),
x = Tipo_label,
y = value,
type = "np",
grouping.var = database2,
ggplot.component = ggplot2::theme(
legend.position = "none", strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
)
diver_long %>%
filter(index == "chao1", !is.na(Tipo_label)) %>%
ggplot(aes(x = Tipo_label, y = value, fill = Tipo_label)) +
geom_violin() + geom_boxplot(width = 0.2) +
facet_wrap(~database2, scales = "free_y") +
scale_fill_manual(values = c("Entrantes (T0)" = "#4DBBD5",
"Estancia (T1)" = "#E64B35")) +
labs(title = "Chao1 index", x = NULL, y = "Chao1") +
theme_classic() +
theme(legend.position = "none", strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
grouped_ggbetweenstats(
data = diver_long %>%
filter(index == "chao1", !is.na(Tipo_label),
!database2 %in% c("gral","BAC")),
x = Tipo_label, y = value, type = "np",
grouping.var = database2
)
diver_long %>%
filter(index == "dominance_simpson", !is.na(Tipo_label)) %>%
ggplot(aes(x = Tipo_label, y = value, fill = Tipo_label)) +
geom_violin() + geom_boxplot(width = 0.2) +
facet_wrap(~database2, scales = "free_y") +
scale_fill_manual(values = c("Entrantes (T0)" = "#4DBBD5",
"Estancia (T1)" = "#E64B35")) +
labs(title = "Dominance Simpson", x = NULL, y = "Simpson") +
theme_classic() +
theme(legend.position = "none", strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
grouped_ggbetweenstats(
data = diver_long %>%
filter(index == "dominance_simpson", !is.na(Tipo_label),
!database2 %in% c("gral","BAC")),
x = Tipo_label, y = value, type = "np",
grouping.var = database2
)
diver_long %>%
filter(index == "diversity_shannon", !is.na(batch)) %>%
ggplot(aes(x = batch, y = value, fill = batch)) +
geom_violin() + geom_boxplot(width = 0.2) +
facet_wrap(~database2, scales = "free_y") +
scale_fill_brewer(palette = "Set2") +
labs(title = "Shannon por lote de secuenciación", x = NULL, y = "Shannon Index") +
theme_classic() +
theme(legend.position = "none", strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
rare_div %>%
filter(!is.na(Tipo_label)) %>%
ggplot(aes(x = Tipo_label, y = shannon_rare, fill = Tipo_label)) +
geom_boxplot(outlier.size = 1) +
geom_jitter(width = 0.1, alpha = 0.5) +
scale_fill_manual(values = c("Entrantes (T0)" = "#4DBBD5",
"Estancia (T1)" = "#E64B35")) +
labs(title = paste0("Shannon rarefied (depth = ", min_depth, " reads)"),
y = "Shannon index", x = NULL) +
theme_classic() + theme(legend.position = "none")
rare_div %>%
filter(!is.na(Tipo_label)) %>%
ggplot(aes(x = Tipo_label, y = richness_rare, fill = Tipo_label)) +
geom_boxplot(outlier.size = 1) +
geom_jitter(width = 0.1, alpha = 0.5) +
scale_fill_manual(values = c("Entrantes (T0)" = "#4DBBD5",
"Estancia (T1)" = "#E64B35")) +
labs(title = paste0("ARG richness rarefied (depth = ", min_depth, ")"),
y = "N ARGs", x = NULL) +
theme_classic() + theme(legend.position = "none")
rare_div %>%
ggplot(aes(x = pool_id, y = richness_rare, color = Tipo_label, group = Tipo_label)) +
geom_line() + geom_point(size = 2) +
facet_wrap(~batch, scales = "free_x") +
scale_color_manual(values = c("Entrantes (T0)" = "#4DBBD5",
"Estancia (T1)" = "#E64B35")) +
labs(title = "Riqueza rarefied por semana/pool y lote",
y = "N ARGs", x = "Pool ID", color = NULL) +
theme_classic() +
theme(strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, size = 7))
arg_prevalence %>%
count(resistome_class) %>%
mutate(resistome_class = factor(resistome_class, levels = c("Core","Shell","Rare"))) %>%
ggplot(aes(x = resistome_class, y = n, fill = resistome_class)) +
geom_col(width = 0.6) +
geom_text(aes(label = n), vjust = -0.4, size = 4) +
scale_fill_manual(values = c("Core"="#E64B35","Shell"="#4DBBD5","Rare"="#B0B0B0")) +
labs(title = "Clasificación del resistoma ABR",
subtitle = paste0("Core ≥90% | Shell 15–89% | Rare <15% (n=", n_samples, " muestras)"),
x = NULL, y = "N templates ARG") +
theme_classic() + theme(legend.position = "none")
arg_prevalence %>%
filter(resistome_class == "Core") %>%
count(Class) %>%
arrange(desc(n)) %>%
ggplot(aes(x = reorder(Class, n), y = n, fill = n)) +
geom_col() + coord_flip() +
scale_fill_viridis_c(option = "C", direction = -1) +
labs(title = "Clases en el Core resistome", x = NULL, y = "N templates") +
theme_classic() + theme(legend.position = "none")
arg_prevalence %>%
ggplot(aes(x = prevalence_pct, fill = resistome_class)) +
geom_histogram(binwidth = 5, color = "white") +
geom_vline(xintercept = c(15, 90), linetype = "dashed", color = "grey40") +
scale_fill_manual(values = c("Core"="#E64B35","Shell"="#4DBBD5","Rare"="#B0B0B0")) +
labs(title = "Distribución de prevalencia de ARGs",
x = "Prevalencia (% muestras)", y = "N templates", fill = NULL) +
theme_classic()
arg_prevalence %>%
filter(resistome_class == "Core") %>%
arrange(desc(prevalence_pct)) %>%
select(template, Class, prevalence_pct, n_samples_detected, mean_depth) %>%
mutate(across(where(is.numeric), ~round(., 1))) %>%
datatable(rownames = FALSE, filter = "top",
options = list(pageLength = 20, scrollX = TRUE))
pca_df(PCA) %>% select(dataset, PC1, PC2, Tipo_label) %>% distinct() %>%
ggplot(aes(PC1, PC2, color = Tipo_label)) +
geom_point(size = 3) +
scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
na.value = "grey60") +
labs(title = "PCA Global", x = paste0("PC1 ", pct(PCA)[1], "%"),
y = paste0("PC2 ", pct(PCA)[2], "%"), color = NULL) +
theme_classic()
pca_df(PCA) %>% select(dataset, PC1, PC2, batch) %>% distinct() %>%
ggplot(aes(PC1, PC2, color = batch)) +
geom_point(size = 3) + scale_color_brewer(palette = "Set1") +
labs(title = "PCA Global (por lote)", x = paste0("PC1 ", pct(PCA)[1], "%"),
y = paste0("PC2 ", pct(PCA)[2], "%"), color = "Lote") +
theme_classic()
pca_df(PCA_ABR) %>% select(dataset, PC1, PC2, Tipo_label) %>% distinct() %>%
ggplot(aes(PC1, PC2, color = Tipo_label)) +
geom_point(size = 3) +
scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
na.value = "grey60") +
labs(title = "PCA ARGs", x = paste0("PC1 ", pct(PCA_ABR)[1], "%"),
y = paste0("PC2 ", pct(PCA_ABR)[2], "%"), color = NULL) +
theme_classic()
pca_df(PCA_BAC) %>% select(dataset, PC1, PC2, Tipo_label) %>% distinct() %>%
ggplot(aes(PC1, PC2, color = Tipo_label)) +
geom_point(size = 3) +
scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
na.value = "grey60") +
labs(title = "PCA BacMet", x = paste0("PC1 ", pct(PCA_BAC)[1], "%"),
y = paste0("PC2 ", pct(PCA_BAC)[2], "%"), color = NULL) +
theme_classic()
pca_df(PCA_REL) %>% select(dataset, PC1, PC2, Tipo_label) %>% distinct() %>%
ggplot(aes(PC1, PC2, color = Tipo_label)) +
geom_point(size = 3) +
scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
na.value = "grey60") +
labs(title = "PCA Relaxasas", x = paste0("PC1 ", pct(PCA_REL)[1], "%"),
y = paste0("PC2 ", pct(PCA_REL)[2], "%"), color = NULL) +
theme_classic()
pca_df(PCA_BAC_Metal) %>% select(dataset, PC1, PC2, Tipo_label) %>% distinct() %>%
ggplot(aes(PC1, PC2, color = Tipo_label)) +
geom_point(size = 3) +
scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
na.value = "grey60") +
labs(title = "PCA BAC Metales", x = paste0("PC1 ", pct(PCA_BAC_Metal)[1], "%"),
y = paste0("PC2 ", pct(PCA_BAC_Metal)[2], "%"), color = NULL) +
theme_classic()
pca_df(PCA_BAC_Biocide) %>% select(dataset, PC1, PC2, Tipo_label) %>% distinct() %>%
ggplot(aes(PC1, PC2, color = Tipo_label)) +
geom_point(size = 3) +
scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
na.value = "grey60") +
labs(title = "PCA BAC Biocides", x = paste0("PC1 ", pct(PCA_BAC_Biocide)[1], "%"),
y = paste0("PC2 ", pct(PCA_BAC_Biocide)[2], "%"), color = NULL) +
theme_classic()
umap_plot %>% select(dataset, V1, V2, Tipo_label) %>% distinct() %>%
ggplot(aes(V1, V2, color = Tipo_label)) + geom_point(size = 3) +
scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
na.value = "grey60") +
labs(title = "UMAP General", color = NULL) + theme_classic()
umap_plot_ABR %>% select(dataset, V1, V2, Tipo_label) %>% distinct() %>%
ggplot(aes(V1, V2, color = Tipo_label)) + geom_point(size = 3) +
scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
na.value = "grey60") +
labs(title = "UMAP ARG", color = NULL) + theme_classic()
umap_plot_BAC %>% select(dataset, V1, V2, Tipo_label) %>% distinct() %>%
ggplot(aes(V1, V2, color = Tipo_label)) + geom_point(size = 3) +
scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
na.value = "grey60") +
labs(title = "UMAP BAC", color = NULL) + theme_classic()
umap_plot_REL %>% select(dataset, V1, V2, Tipo_label) %>% distinct() %>%
ggplot(aes(V1, V2, color = Tipo_label)) + geom_point(size = 3) +
scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
na.value = "grey60") +
labs(title = "UMAP REL", color = NULL) + theme_classic()
tmp <- rownames_to_column(Guayana_spread, "dataset") %>%
left_join(Tabla_inicial %>% select(dataset, Tipo_label, batch) %>% distinct(), by = "dataset") %>%
column_to_rownames("dataset")
adonis2(select_if(tmp, is.numeric) ~ Tipo_label, data = tmp, perm = 999, na.action = na.omit)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = select_if(tmp, is.numeric) ~ Tipo_label, data = tmp, permutations = 999, na.action = na.omit)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.6012 0.02149 1.9547 0.003 **
## Residual 89 27.3723 0.97851
## Total 90 27.9735 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(select_if(tmp, is.numeric) ~ batch, data = tmp, perm = 999, na.action = na.omit)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = select_if(tmp, is.numeric) ~ batch, data = tmp, permutations = 999, na.action = na.omit)
## Df SumOfSqs R2 F Pr(>F)
## Model 3 1.5228 0.05444 1.6696 0.002 **
## Residual 87 26.4507 0.94556
## Total 90 27.9735 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tmp_abr <- rownames_to_column(Guayana_spread_ABR, "dataset") %>%
left_join(Tabla_inicial %>% select(dataset, Tipo_label, batch) %>% distinct(), by = "dataset") %>%
column_to_rownames("dataset")
adonis2(select_if(tmp_abr, is.numeric) ~ Tipo_label, data = tmp_abr, perm = 999, na.action = na.omit)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = select_if(tmp_abr, is.numeric) ~ Tipo_label, data = tmp_abr, permutations = 999, na.action = na.omit)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4679 0.02495 2.277 0.009 **
## Residual 89 18.2893 0.97505
## Total 90 18.7573 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tmp_bac <- rownames_to_column(Guayana_spread_BAC, "dataset") %>%
left_join(Tabla_inicial %>% select(dataset, Tipo_label, batch) %>% distinct(), by = "dataset") %>%
column_to_rownames("dataset")
adonis2(select_if(tmp_bac, is.numeric) ~ Tipo_label, data = tmp_bac, perm = 999, na.action = na.omit)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = select_if(tmp_bac, is.numeric) ~ Tipo_label, data = tmp_bac, permutations = 999, na.action = na.omit)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.745 0.01934 1.7553 0.005 **
## Residual 89 37.789 0.98066
## Total 90 38.535 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tmp_rel <- rownames_to_column(Guayana_spread_REL, "dataset") %>%
left_join(Tabla_inicial %>% select(dataset, Tipo_label, batch) %>% distinct(), by = "dataset") %>%
column_to_rownames("dataset")
adonis2(select_if(tmp_rel, is.numeric) ~ Tipo_label, data = tmp_rel, perm = 999, na.action = na.omit)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = select_if(tmp_rel, is.numeric) ~ Tipo_label, data = tmp_rel, permutations = 999, na.action = na.omit)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7388 0.03118 2.8646 0.001 ***
## Residual 89 22.9544 0.96882
## Total 90 23.6932 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
top_args <- Tabla_inicial_info %>%
filter(database == "ABR") %>%
group_by(template, Class) %>%
summarise(n_muestras = n_distinct(dataset),
mean_depth = round(mean(depth), 2), .groups = "drop") %>%
arrange(desc(n_muestras)) %>%
slice_head(n = 40)
datatable(top_args, rownames = FALSE, filter = "top",
caption = "Top 40 ARGs por prevalencia",
options = list(pageLength = 15, scrollX = TRUE))
Tabla_inicial_info %>%
filter(database == "ABR", !is.na(Class)) %>%
left_join(metadata_nobatch, by = "dataset") %>%
filter(!is.na(Tipo_label)) %>%
group_by(Class, Tipo_label) %>%
summarise(n = n_distinct(dataset), .groups = "drop") %>%
mutate(pct = n / n_distinct(Tabla_inicial$dataset) * 100) %>%
ggplot(aes(x = reorder(Class, -pct), y = pct, fill = Tipo_label)) +
geom_col(position = position_dodge(0.8), width = 0.7) +
scale_fill_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35")) +
labs(title = "Prevalencia por clase de antibiótico", x = NULL, y = "% muestras", fill = NULL) +
theme_classic() +
theme(axis.text.x = element_text(angle = 50, hjust = 1, size = 9))
top40_genes <- top_args$template[1:min(40, nrow(top_args))]
heat_data <- Guayana_spread_ABR[, colnames(Guayana_spread_ABR) %in% top40_genes, drop = FALSE]
if (ncol(heat_data) > 1) {
heat_pa <- (heat_data > 0) * 1
ann_col <- metadata %>%
filter(dataset %in% rownames(heat_pa)) %>%
select(dataset, batch, Tipo_label) %>%
column_to_rownames("dataset")
pheatmap(
t(heat_pa[rownames(ann_col), , drop = FALSE]),
annotation_col = ann_col,
show_colnames = FALSE,
color = c("white", "#1565C0"),
main = "Presencia/ausencia top 40 ARGs",
fontsize_row = 7,
annotation_colors = list(
batch = setNames(RColorBrewer::brewer.pal(4,"Set1"), c("LIB1","LIB6","RUN6","RUN7")),
Tipo_label = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35")
)
)
}
bac_prev <- Tabla_inicial_info %>%
filter(database == "BAC") %>%
left_join(metadata_nobatch, by = "dataset") %>%
filter(!is.na(Compound)) %>%
group_by(Compound, Class_compound, Tipo_label) %>%
summarise(n = n_distinct(dataset), .groups = "drop")
if (nrow(bac_prev) > 0) {
bac_prev %>%
group_by(Compound) %>% mutate(total = sum(n)) %>% ungroup() %>%
filter(dense_rank(desc(total)) <= 20) %>%
ggplot(aes(x = reorder(Compound, -total), y = n, fill = Tipo_label)) +
geom_col(position = position_dodge(0.8)) +
scale_fill_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
na.value = "gray60") +
labs(title = "Top 20 compuestos BacMet", x = NULL, y = "N muestras", fill = NULL) +
theme_classic() +
theme(axis.text.x = element_text(angle = 50, hjust = 1, size = 8))
} else {
cat("Sin hits BAC.\n")
}
rel_summary <- Tabla_inicial_info %>%
filter(database == "REL") %>%
left_join(metadata_nobatch, by = "dataset") %>%
group_by(template_gene) %>%
summarise(n_muestras = n_distinct(dataset),
batches = paste(sort(unique(batch)), collapse = ", "),
.groups = "drop") %>%
arrange(desc(n_muestras))
if (nrow(rel_summary) > 0) {
datatable(rel_summary, rownames = FALSE,
caption = "Genes de movilización (REL) detectados")
} else {
cat("Sin hits REL.\n")
}
Tabla_inicial_info %>%
filter(database == "ABR") %>%
left_join(metadata_nobatch, by = "dataset") %>%
group_by(dataset, batch, pool_id, Tipo_label) %>%
summarise(n_ARGs = n_distinct(template),
n_clases = n_distinct(Class, na.rm = TRUE),
mean_depth = round(mean(depth), 2),
.groups = "drop") %>%
arrange(batch, pool_id, Tipo_label) %>%
datatable(rownames = FALSE, filter = "top",
caption = "Resumen ARGs por muestra",
options = list(pageLength = 15, scrollX = TRUE))
bind_rows(
arg_prev_T1 %>% mutate(grupo = "Estancia T1"),
arg_prev_T0 %>% rename(prev_pct = prev_pct_T0) %>%
mutate(resistome_class = case_when(
prev_pct >= 90 ~ "Core", prev_pct >= 15 ~ "Shell", TRUE ~ "Rare"
), grupo = "Entrantes T0")
) %>%
count(grupo, resistome_class) %>%
mutate(resistome_class = factor(resistome_class, levels = c("Core","Shell","Rare"))) %>%
ggplot(aes(x = resistome_class, y = n, fill = grupo)) +
geom_col(position = position_dodge(0.7), width = 0.6) +
geom_text(aes(label = n), position = position_dodge(0.7), vjust = -0.3, size = 3.5) +
scale_fill_manual(values = c("Estancia T1" = "#E64B35", "Entrantes T0" = "#4DBBD5")) +
labs(title = "Clasificación del resistoma: T1 (UCI) vs T0 (entrantes)",
x = NULL, y = "N genes", fill = NULL) +
theme_classic() + theme(legend.position = "top")
cat("Genes Core T1:", length(core_T1_genes), "\n")
## Genes Core T1: 22
cat("Genes Core T0:", length(core_T0_genes), "\n")
## Genes Core T0: 28
cat("Genes endémicos UCI (Core T1, NO Core T0):", length(endemic_genes), "\n\n")
## Genes endémicos UCI (Core T1, NO Core T0): 1
arg_prev_T1 %>%
filter(template %in% endemic_genes) %>%
left_join(arg_prev_T0, by = "template") %>%
arrange(desc(prev_pct)) %>%
mutate(across(where(is.numeric), ~round(., 1))) %>%
datatable(rownames = FALSE, filter = "top",
caption = "Genes endémicos UCI: Core en T1, no Core en T0",
options = list(pageLength = 15, scrollX = TRUE))
# Bray-Curtis entre semanas consecutivas de T1 (orden global por week_abs)
meta_T1_ord <- metadata %>%
filter(Tipo == "1") %>%
arrange(week_abs) %>%
mutate(semana_rank = row_number())
# Matriz de distancias T1
dist_T1 <- vegdist(spread_T1_abr[meta_T1_ord$dataset[meta_T1_ord$dataset %in% rownames(spread_T1_abr)], ], method = "bray")
# Comparar distancia entre semanas consecutivas vs no consecutivas
dist_T1_df <- as.data.frame(as.matrix(dist_T1)) %>%
rownames_to_column("ds1") %>%
pivot_longer(-ds1, names_to = "ds2", values_to = "bray") %>%
filter(ds1 < ds2) %>%
left_join(meta_T1_ord %>% select(dataset, semana_rank, week_abs), by = c("ds1" = "dataset")) %>%
left_join(meta_T1_ord %>% select(dataset, semana_rank, week_abs), by = c("ds2" = "dataset"),
suffix = c("_1","_2")) %>%
mutate(consecutivas = abs(semana_rank_1 - semana_rank_2) == 1,
tipo_par = ifelse(consecutivas, "Semanas consecutivas", "Semanas no consecutivas"))
ggplot(dist_T1_df, aes(x = tipo_par, y = bray, fill = tipo_par)) +
geom_boxplot(outlier.size = 1) +
geom_jitter(width = 0.1, alpha = 0.4, size = 1) +
scale_fill_manual(values = c("Semanas consecutivas" = "#4CAF50",
"Semanas no consecutivas" = "#9E9E9E")) +
stat_compare_means(method = "wilcox.test", label.x.npc = 0.35) +
labs(title = "Estabilidad T1: distancia Bray-Curtis entre semanas",
subtitle = "Distancia baja = resistoma estable",
x = NULL, y = "Bray-Curtis", fill = NULL) +
theme_classic() + theme(legend.position = "none")
bind_rows(
same_week_dist %>% select(bray = bray_T0_T1) %>% mutate(tipo = "Misma semana"),
random_pairs %>% select(bray) %>% mutate(tipo = "Par aleatorio")
) %>%
ggplot(aes(x = tipo, y = bray, fill = tipo)) +
geom_boxplot(outlier.size = 1) +
geom_jitter(width = 0.1, alpha = 0.5, size = 1.5) +
scale_fill_manual(values = c("Misma semana" = "#9C27B0", "Par aleatorio" = "#9E9E9E")) +
stat_compare_means(method = "wilcox.test", label.x.npc = 0.35) +
labs(title = "¿Se parece T0 al T1 de la misma semana?",
subtitle = "Distancia baja = T0 similar a T1 (influencia de entrantes)",
x = NULL, y = "Bray-Curtis", fill = NULL) +
theme_classic() + theme(legend.position = "none")
# Solo pools con par completo
common_pools_T0 <- pairs_same$ds_0[pairs_same$ds_0 %in% rownames(spread_T0_abr)]
common_pools_T1 <- pairs_same$ds_1[pairs_same$ds_1 %in% rownames(spread_T1_abr)]
if (length(common_pools_T0) >= 5) {
mat_T0 <- spread_T0_abr[common_pools_T0, , drop = FALSE]
mat_T1 <- spread_T1_abr[common_pools_T1, , drop = FALSE]
# Asegurar mismas columnas
common_cols <- intersect(colnames(mat_T0), colnames(mat_T1))
if (length(common_cols) == 0) {
all_cols <- union(colnames(mat_T0), colnames(mat_T1))
mat_T0 <- mat_T0[, all_cols[all_cols %in% colnames(mat_T0)], drop = FALSE]
mat_T1 <- mat_T1[, all_cols[all_cols %in% colnames(mat_T1)], drop = FALSE]
}
pca_T0_p <- prcomp(decostand(mat_T0, "hellinger"))
pca_T1_p <- prcomp(decostand(mat_T1, "hellinger"))
prot <- protest(pca_T0_p$x, pca_T1_p$x, permutations = 999)
cat("Procrustes T0 ↔ T1 misma semana\n")
cat("m² =", round(prot$ss, 4), " | p =", prot$signif, "\n")
cat("Correlación =", round(sqrt(1 - prot$ss), 4), "\n\n")
proc_df <- data.frame(
T1_x = prot$Yrot[,1], T1_y = prot$Yrot[,2],
T0_x = prot$X[,1], T0_y = prot$X[,2],
ds_T1 = rownames(prot$Yrot)
) %>%
left_join(pairs_same %>% select(ds_0, ds_1, batch_T0), by = c("ds_T1" = "ds_1"))
ggplot(proc_df) +
geom_segment(aes(x = T0_x, y = T0_y, xend = T1_x, yend = T1_y, color = batch_T0),
arrow = arrow(length = unit(0.2,"cm")), alpha = 0.7) +
geom_point(aes(T0_x, T0_y, color = batch_T0), shape = 1, size = 3) +
geom_point(aes(T1_x, T1_y, color = batch_T0), shape = 16, size = 3) +
scale_color_brewer(palette = "Set1") +
annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5, size = 3.5,
label = paste0("m² = ", round(prot$ss, 3), " p = ", prot$signif)) +
labs(title = "Procrustes: T0 (círculo) → T1 (punto) misma semana",
subtitle = "Flechas cortas = alta similitud T0–T1",
x = "PC1", y = "PC2", color = "Lote") +
theme_classic()
} else {
cat("Insuficientes pares T0-T1 para Procrustes.\n")
}
## Procrustes T0 ↔ T1 misma semana
## m² = 0.4442 | p = 0.882
## Correlación = 0.7455
overlap_stats <- pairs_same %>%
pmap_dfr(function(pool_id, ds_0, ds_1, batch_T0, ...) {
g0 <- Tabla_inicial %>% filter(dataset == ds_0, database == "ABR") %>% pull(template)
g1 <- Tabla_inicial %>% filter(dataset == ds_1, database == "ABR") %>% pull(template)
tibble(
batch = batch_T0,
pool_id = pool_id,
n_T0 = length(g0),
n_T1 = length(g1),
n_shared = length(intersect(g0, g1)),
n_only_T0 = length(setdiff(g0, g1)),
n_only_T1 = length(setdiff(g1, g0)),
jaccard = length(intersect(g0,g1)) / length(union(g0,g1))
)
})
overlap_stats %>%
select(batch, pool_id, n_T0, n_T1, n_shared, n_only_T0, n_only_T1, jaccard) %>%
mutate(jaccard = round(jaccard, 3)) %>%
datatable(rownames = FALSE, filter = "top",
caption = "Overlap génico T0-T1 por semana/pool",
options = list(pageLength = 15, scrollX = TRUE))
overlap_stats %>%
pivot_longer(cols = c(n_shared, n_only_T0, n_only_T1),
names_to = "categoria", values_to = "n") %>%
mutate(categoria = factor(categoria,
levels = c("n_only_T0","n_shared","n_only_T1"),
labels = c("Solo T0","Compartido","Solo T1"))) %>%
ggplot(aes(x = pool_id, y = n, fill = categoria)) +
geom_col(position = "stack") +
facet_wrap(~batch, scales = "free_x") +
scale_fill_manual(values = c("Solo T0"="#4DBBD5","Compartido"="#91D1C2",
"Solo T1"="#E64B35")) +
labs(title = "Overlap de ARGs T0–T1 por semana", x = "Pool/semana",
y = "N genes", fill = NULL) +
theme_classic() +
theme(strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, size = 7))
bind_rows(
lagged_pairs %>% select(bray = bray_lagged) %>% mutate(tipo = "T0(W) → T1(W+1)"),
same_week_dist %>% select(bray = bray_T0_T1) %>% mutate(tipo = "T0 → T1 misma semana"),
random_pairs %>% select(bray) %>% mutate(tipo = "Par aleatorio")
) %>%
mutate(tipo = factor(tipo, levels = c("T0(W) → T1(W+1)",
"T0 → T1 misma semana",
"Par aleatorio"))) %>%
ggplot(aes(x = tipo, y = bray, fill = tipo)) +
geom_boxplot(outlier.size = 1) +
geom_jitter(width = 0.1, alpha = 0.5, size = 1.5) +
scale_fill_manual(values = c("T0(W) → T1(W+1)" = "#FF9800",
"T0 → T1 misma semana" = "#9C27B0",
"Par aleatorio" = "#9E9E9E")) +
stat_compare_means(comparisons = list(c("T0(W) → T1(W+1)","Par aleatorio"),
c("T0(W) → T1(W+1)","T0 → T1 misma semana")),
method = "wilcox.test") +
labs(title = "¿Predice T0(W) el resistoma de T1(W+1)?",
x = NULL, y = "Bray-Curtis (distancia)", fill = NULL) +
theme_classic() + theme(legend.position = "none",
axis.text.x = element_text(angle = 20, hjust = 1))
lag_T0 <- lagged_pairs$dataset_T0[lagged_pairs$dataset_T0 %in% rownames(spread_T0_abr)]
lag_T1 <- lagged_pairs$dataset_T1[lagged_pairs$dataset_T1 %in% rownames(spread_T1_abr)]
if (length(lag_T0) >= 5) {
mat_lag_T0 <- spread_T0_abr[lag_T0, , drop = FALSE]
mat_lag_T1 <- spread_T1_abr[lag_T1, , drop = FALSE]
pca_lag_T0 <- prcomp(decostand(mat_lag_T0, "hellinger"))
pca_lag_T1 <- prcomp(decostand(mat_lag_T1, "hellinger"))
prot_lag <- protest(pca_lag_T0$x, pca_lag_T1$x, permutations = 999)
cat("Procrustes retardado T0(W) → T1(W+1)\n")
cat("m² =", round(prot_lag$ss, 4), " | p =", prot_lag$signif, "\n")
cat("Correlación =", round(sqrt(1 - prot_lag$ss), 4), "\n\n")
proc_lag_df <- data.frame(
T1_x = prot_lag$Yrot[,1], T1_y = prot_lag$Yrot[,2],
T0_x = prot_lag$X[,1], T0_y = prot_lag$X[,2],
ds_T0 = rownames(prot_lag$X)
) %>%
left_join(lagged_pairs %>% select(dataset_T0, batch_T0, pool_id_T0), by = c("ds_T0" = "dataset_T0"))
ggplot(proc_lag_df) +
geom_segment(aes(x = T0_x, y = T0_y, xend = T1_x, yend = T1_y, color = batch_T0),
arrow = arrow(length = unit(0.2,"cm")), alpha = 0.7) +
geom_point(aes(T0_x, T0_y, color = batch_T0), shape = 1, size = 3) +
geom_point(aes(T1_x, T1_y, color = batch_T0), shape = 16, size = 3) +
scale_color_brewer(palette = "Set1") +
annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5, size = 3.5,
label = paste0("m² = ", round(prot_lag$ss, 3), " p = ", prot_lag$signif)) +
labs(title = "Procrustes: T0(W) (círculo) → T1(W+1) (punto)",
subtitle = "Flechas cortas = T0 predice T1 siguiente semana",
x = "PC1", y = "PC2", color = "Lote") +
theme_classic()
} else {
cat("Insuficientes pares retardados para Procrustes.\n")
}
## Procrustes retardado T0(W) → T1(W+1)
## m² = 0.3895 | p = 0.033
## Correlación = 0.7813
# Construir tabla de T1 de los pares retardados
if (nrow(lagged_pairs) >= 6) {
mat_T1_lag_full <- spread_T1_abr[lagged_pairs$dataset_T1, , drop = FALSE]
mat_T1_lag_full[is.na(mat_T1_lag_full)] <- 0
# Vector de diversidad Shannon de T0 como covariable
sha_T0 <- vegan::diversity(
spread_T0_abr[lagged_pairs$dataset_T0, , drop = FALSE],
index = "shannon"
)
meta_lag_perm <- lagged_pairs %>%
mutate(shannon_T0 = sha_T0)
dist_T1_lag <- vegdist(mat_T1_lag_full, method = "bray")
cat("**PERMANOVA: varianza de T1(W+1) explicada por Shannon_T0(W) + batch**\n\n")
print(adonis2(dist_T1_lag ~ shannon_T0 + batch_T0,
data = meta_lag_perm, permutations = 999))
}
PERMANOVA: varianza de T1(W+1) explicada por Shannon_T0(W) + batch
Permutation test for adonis under reduced model Permutation: free Number of permutations: 999
adonis2(formula = dist_T1_lag ~ shannon_T0 + batch_T0, data =
meta_lag_perm, permutations = 999) Df SumOfSqs R2 F Pr(>F)
Model 4 1.2111 0.13035 1.4614 0.035 * Residual 39 8.0801 0.86965
Total 43 9.2912 1.00000
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05
‘.’ 0.1 ’ ’ 1
sha_T0_df <- tibble(
dataset_T0 = rownames(spread_T0_abr),
shannon_T0 = vegan::diversity(spread_T0_abr, "shannon")
)
sha_T1_df <- tibble(
dataset_T1 = rownames(spread_T1_abr),
shannon_T1 = vegan::diversity(spread_T1_abr, "shannon")
)
corr_df <- lagged_pairs %>%
left_join(sha_T0_df, by = "dataset_T0") %>%
left_join(sha_T1_df, by = "dataset_T1")
ggplot(corr_df, aes(x = shannon_T0, y = shannon_T1, color = batch_T0)) +
geom_point(size = 3) +
geom_smooth(method = "lm", color = "black", linetype = "dashed", se = TRUE) +
stat_cor(method = "spearman", label.x.npc = 0.05, label.y.npc = 0.95) +
scale_color_brewer(palette = "Set1") +
labs(title = "¿Predice la diversidad de entrantes (T0_W) la diversidad de ingresados (T1_{W+1})?",
x = "Shannon T0 (semana W)", y = "Shannon T1 (semana W+1)", color = "Lote") +
theme_classic()
persistence <- lagged_pairs %>%
pmap_dfr(function(pool_id_T0, pool_id_T1, dataset_T0, dataset_T1, batch_T0, ...) {
g0 <- Tabla_inicial %>% filter(dataset == dataset_T0, database == "ABR") %>% pull(template)
g1 <- Tabla_inicial %>% filter(dataset == dataset_T1, database == "ABR") %>% pull(template)
tibble(
batch = batch_T0,
pool_T0 = pool_id_T0,
pool_T1 = pool_id_T1,
n_T0 = length(g0),
n_T1 = length(g1),
n_persist = length(intersect(g0, g1)),
pct_T0_en_T1 = ifelse(length(g0) > 0, length(intersect(g0,g1)) / length(g0) * 100, NA),
pct_T1_de_T0 = ifelse(length(g1) > 0, length(intersect(g0,g1)) / length(g1) * 100, NA)
)
})
persistence %>%
pivot_longer(cols = c(pct_T0_en_T1, pct_T1_de_T0), names_to = "metrica", values_to = "pct") %>%
mutate(metrica = recode(metrica,
pct_T0_en_T1 = "% genes T0(W) encontrados en T1(W+1)",
pct_T1_de_T0 = "% genes T1(W+1) que vienen de T0(W)")) %>%
ggplot(aes(x = batch, y = pct, fill = metrica)) +
geom_boxplot(position = position_dodge(0.8)) +
scale_fill_manual(values = c("#4DBBD5","#E64B35")) +
labs(title = "Persistencia génica T0(W) → T1(W+1)",
x = NULL, y = "% genes", fill = NULL) +
theme_classic() +
theme(legend.position = "top",
axis.text.x = element_text(angle = 20, hjust = 1))
gene_pred %>%
filter(p_adj < 0.05) %>%
mutate(across(where(is.numeric), ~round(., 3))) %>%
datatable(rownames = FALSE, filter = "top",
caption = "ARGs cuya presencia en T0(W) predice su presencia en T1(W+1) (FDR < 0.05)",
options = list(pageLength = 15, scrollX = TRUE))
gene_pred %>%
filter(!is.na(spearman_r), !is.na(p_adj)) %>%
mutate(
sig = p_adj < 0.05 & spearman_r > 0.3,
label_gen = ifelse(sig & rank(p_adj) <= 15, template, NA)
) %>%
ggplot(aes(x = spearman_r, y = -log10(p_adj + 1e-10),
color = sig, label = label_gen)) +
geom_point(alpha = 0.7, size = 2) +
geom_text_repel(size = 2.5, max.overlaps = 20) +
geom_vline(xintercept = 0.3, linetype = "dashed", color = "gray50") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "gray50") +
scale_color_manual(values = c("FALSE" = "gray70", "TRUE" = "#E64B35")) +
labs(title = "Poder predictivo gen a gen: T0(W) → T1(W+1)",
x = "Spearman r (correlación T0 → T1)", y = "-log10(FDR)",
caption = "Umbral: r > 0.3, FDR < 0.05") +
theme_classic() + theme(legend.position = "none")
gene_pred %>%
filter(!is.na(Class)) %>%
group_by(Class) %>%
summarise(
n_genes = n(),
n_predictivos = sum(p_adj < 0.05 & spearman_r > 0.3, na.rm = TRUE),
pct_pred = n_predictivos / n_genes * 100,
mean_r = mean(spearman_r, na.rm = TRUE),
.groups = "drop"
) %>%
filter(n_genes >= 3) %>%
arrange(desc(pct_pred)) %>%
ggplot(aes(x = reorder(Class, pct_pred), y = pct_pred, fill = mean_r)) +
geom_col() + coord_flip() +
scale_fill_gradient2(low = "#4DBBD5", mid = "white", high = "#E64B35", midpoint = 0) +
labs(title = "% genes predictivos por clase (T0 → T1 siguiente semana)",
x = NULL, y = "% genes con r > 0.3 y FDR < 0.05", fill = "Mean r") +
theme_classic()
gene_pred %>%
filter(p_adj < 0.05, spearman_r > 0.3) %>%
group_by(Class) %>%
summarise(
n_genes_predictivos = n(),
top_gen = template[which.max(spearman_r)],
max_r = round(max(spearman_r), 3),
mean_prev_T0 = round(mean(prev_T0), 3),
mean_prev_T1 = round(mean(prev_T1), 3),
.groups = "drop"
) %>%
arrange(desc(n_genes_predictivos)) %>%
datatable(rownames = FALSE,
caption = "Clases con genes predictivos: monitorizar en T0 para anticipar T1",
options = list(pageLength = 15, scrollX = TRUE))
temporal_div %>%
mutate(Tipo_label = ifelse(Tipo == "0", "Entrantes (T0)", "Estancia (T1)")) %>%
ggplot(aes(x = week_abs, y = shannon, color = Tipo_label)) +
geom_point(alpha = 0.7, size = 2) +
geom_smooth(method = "loess", span = 0.4, se = TRUE, alpha = 0.15) +
geom_vline(xintercept = c(
metadata %>% filter(periodo == "P1_Primavera-Verano2019") %>% pull(week_abs) %>% max(),
metadata %>% filter(periodo == "P2_Otoño2019") %>% pull(week_abs) %>% max()
), linetype = "dashed", color = "grey50", alpha = 0.6) +
scale_color_manual(values = c("Entrantes (T0)" = "#4DBBD5",
"Estancia (T1)" = "#E64B35")) +
labs(title = "Evolución temporal de la diversidad ARG",
subtitle = "Líneas punteadas = cambio de período (P1→P2→P3)",
x = "Semana absoluta (week_abs)", y = "Shannon H'", color = NULL) +
theme_classic() + theme(legend.position = "top")
temporal_div %>%
mutate(Tipo_label = ifelse(Tipo == "0", "Entrantes (T0)", "Estancia (T1)")) %>%
ggplot(aes(x = week_abs, y = n_genes, color = Tipo_label)) +
geom_line(alpha = 0.6) +
geom_point(size = 2, alpha = 0.8) +
geom_smooth(method = "loess", span = 0.4, se = FALSE, linetype = "dashed") +
scale_color_manual(values = c("Entrantes (T0)" = "#4DBBD5",
"Estancia (T1)" = "#E64B35")) +
labs(title = "Riqueza de ARGs por semana epidemiológica",
x = "Semana absoluta", y = "N genes ARG", color = NULL) +
theme_classic() + theme(legend.position = "top")
if (length(endemic_genes) > 0 && nrow(core_T1_weekly) > 0) {
core_T1_weekly %>%
ggplot(aes(x = week_abs, y = n_endemic, fill = periodo)) +
geom_col(alpha = 0.8) +
scale_fill_manual(values = c("P1_Primavera-Verano2019" = "#4DBBD5",
"P2_Otoño2019" = "#E64B35",
"P3_Invierno2019-2020" = "#F39B7F")) +
labs(title = "Genes endémicos UCI (Core T1) presentes por semana",
subtitle = paste0("N genes endémicos = ", length(endemic_genes)),
x = "Semana absoluta", y = "N genes endémicos detectados",
fill = "Período") +
theme_classic()
} else {
cat("No hay suficientes genes endémicos o datos para este gráfico.\n")
}
Tabla_inicial %>%
filter(database == "ABR", !is.na(week_abs), !is.na(Tipo)) %>%
left_join(Resfinder_phenotype %>% select(template, Class) %>% distinct(), by = "template") %>%
filter(!is.na(Class)) %>%
mutate(Tipo_label = ifelse(Tipo == "0", "Entrantes (T0)", "Estancia (T1)")) %>%
group_by(week_abs, Tipo_label, Class) %>%
summarise(n = n_distinct(template), .groups = "drop") %>%
group_by(week_abs, Tipo_label) %>%
mutate(pct = n / sum(n) * 100) %>%
ungroup() %>%
filter(Class %in% (Resfinder_phenotype %>% count(Class) %>% slice_max(n, n=8) %>% pull(Class))) %>%
ggplot(aes(x = week_abs, y = pct, fill = Class)) +
geom_area(position = "stack", alpha = 0.8) +
facet_wrap(~Tipo_label, ncol = 1) +
scale_fill_brewer(palette = "Set3") +
labs(title = "Composición ARG por clase a lo largo del tiempo",
x = "Semana absoluta", y = "% genes", fill = "Clase") +
theme_classic() +
theme(strip.background = element_blank(), legend.position = "bottom")
if (nrow(critical_df) > 0) {
critical_df %>%
mutate(Tipo_label = ifelse(Tipo == "0", "Entrantes (T0)", "Estancia (T1)")) %>%
group_by(week_abs, grupo_clinico, Tipo_label) %>%
summarise(detected = n_distinct(template), .groups = "drop") %>%
ggplot(aes(x = week_abs, y = detected, color = Tipo_label, group = Tipo_label)) +
geom_line(alpha = 0.7) +
geom_point(size = 2) +
facet_wrap(~grupo_clinico, scales = "free_y", ncol = 2) +
scale_color_manual(values = c("Entrantes (T0)" = "#4DBBD5",
"Estancia (T1)" = "#E64B35")) +
labs(title = "Genes de alto impacto clínico — presencia por semana",
x = "Semana absoluta", y = "N genes detectados", color = NULL) +
theme_classic() +
theme(strip.background = element_blank(), legend.position = "top")
} else {
cat("No se detectaron genes en las categorías clínicas definidas.\n")
}
meca_df <- Tabla_inicial %>%
filter(database == "ABR", grepl("mecA", template, ignore.case = TRUE),
!is.na(week_abs)) %>%
mutate(Tipo_label = ifelse(Tipo == "0", "Entrantes (T0)", "Estancia (T1)"))
if (nrow(meca_df) > 0) {
p1 <- ggplot(meca_df, aes(x = week_abs, y = depth, color = Tipo_label)) +
geom_point(size = 2, alpha = 0.8) +
geom_smooth(method = "loess", se = FALSE, span = 0.5, linetype = "dashed") +
scale_color_manual(values = c("Entrantes (T0)" = "#4DBBD5",
"Estancia (T1)" = "#E64B35")) +
labs(title = "Profundidad mecA por semana",
x = "Semana absoluta", y = "Depth (KMA)", color = NULL) +
theme_classic()
p2 <- ggplot(meca_df, aes(x = Tipo_label, y = depth, fill = Tipo_label)) +
geom_boxplot(outlier.size = 1) +
geom_jitter(width = 0.1, alpha = 0.4) +
scale_fill_manual(values = c("Entrantes (T0)" = "#4DBBD5",
"Estancia (T1)" = "#E64B35")) +
stat_compare_means(method = "wilcox.test") +
labs(title = "Depth mecA: T0 vs T1", x = NULL, y = "Depth", fill = NULL) +
theme_classic() + theme(legend.position = "none")
cowplot::plot_grid(p1, p2, rel_widths = c(2, 1))
} else {
cat("mecA no detectado en los datos filtrados.\n")
}
if (nrow(critical_df) > 0) {
critical_df %>%
group_by(grupo_clinico, template) %>%
summarise(
n_muestras = n_distinct(dataset),
n_T0 = n_distinct(dataset[Tipo == "0"]),
n_T1 = n_distinct(dataset[Tipo == "1"]),
mean_depth = round(mean(depth), 1),
max_depth = round(max(depth), 1),
semanas_det = n_distinct(week_abs),
.groups = "drop"
) %>%
arrange(grupo_clinico, desc(n_muestras)) %>%
datatable(rownames = FALSE, filter = "top",
caption = "Genes clínicamente críticos detectados",
options = list(pageLength = 20, scrollX = TRUE))
} else {
cat("No se detectaron genes críticos.\n")
}
depth_comparison %>%
filter(!is.na(log2_ratio)) %>%
mutate(
categoria = case_when(
amplificado_UCI ~ "Amplificado UCI",
importado ~ "Importado (T0)",
TRUE ~ "Neutro"
),
label = ifelse(abs(log2_ratio) > 2 & (n_det_T0 + n_det_T1) > 10, template, NA)
) %>%
ggplot(aes(
x = log2((median_depth_T0 + median_depth_T1) / 2),
y = log2_ratio,
color = categoria,
label = label
)) +
geom_point(alpha = 0.7, size = 2) +
geom_hline(yintercept = c(-1, 1), linetype = "dashed", color = "grey50") +
geom_hline(yintercept = 0, color = "black", alpha = 0.5) +
geom_text_repel(size = 2.5, max.overlaps = 15, show.legend = FALSE) +
scale_color_manual(values = c(
"Amplificado UCI" = "#E64B35",
"Importado (T0)" = "#4DBBD5",
"Neutro" = "#B0B0B0"
)) +
labs(
title = "Presión de selección intraUCI: depth T1 vs T0",
subtitle = "Arriba = amplificado en UCI | Abajo = más abundante en entrantes",
x = "log2(depth promedio)",
y = "log2(depth T1 / depth T0)",
color = NULL
) +
theme_classic() + theme(legend.position = "top")
depth_comparison %>%
filter(!is.na(Class), !is.na(log2_ratio)) %>%
group_by(Class) %>%
filter(n() >= 3) %>%
ungroup() %>%
mutate(Class = reorder(Class, log2_ratio, FUN = median)) %>%
ggplot(aes(x = Class, y = log2_ratio, fill = median(log2_ratio) > 0)) +
geom_boxplot(outlier.size = 1, show.legend = FALSE) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
coord_flip() +
scale_fill_manual(values = c("TRUE" = "#E64B35", "FALSE" = "#4DBBD5")) +
labs(
title = "Ratio de amplificación UCI por clase de antibiótico",
subtitle = "Rojo = más abundante en T1 (UCI amplifica) | Azul = más en T0 (importado)",
x = NULL, y = "log2(depth T1 / depth T0)"
) +
theme_classic()
depth_comparison %>%
filter(amplificado_UCI | importado) %>%
mutate(
categoria = ifelse(amplificado_UCI, "Amplificado UCI", "Importado (T0)"),
across(where(is.numeric), ~round(., 2))
) %>%
select(template, Class, categoria, median_depth_T0, median_depth_T1,
log2_ratio, n_det_T0, n_det_T1) %>%
arrange(desc(abs(log2_ratio))) %>%
datatable(rownames = FALSE, filter = "top",
caption = "Genes con presión de selección diferencial T0 vs T1",
options = list(pageLength = 20, scrollX = TRUE))
gene_origin %>%
count(origen) %>%
mutate(origen = reorder(origen, n)) %>%
ggplot(aes(x = origen, y = n, fill = origen)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = n), hjust = -0.2, size = 4) +
coord_flip() +
scale_fill_manual(values = c(
"Endémico UCI" = "#E64B35",
"Amplificado intraUCI"= "#F39B7F",
"Importado" = "#4DBBD5",
"Fugaz (solo T0)" = "#91D1C2",
"Transitorio" = "#B0B0B0"
)) +
labs(title = "Origen de los genes ARG detectados en la UCI",
subtitle = "Basado en prevalencia relativa en T0 (entrantes) vs T1 (estancia UCI)",
x = NULL, y = "N genes") +
theme_classic() +
expand_limits(y = max(table(gene_origin$origen)) * 1.15)
gene_origin %>%
filter(prev_T0 > 0 | prev_T1 > 0) %>%
mutate(label = ifelse(
(prev_T1 > 60 | prev_T0 > 60 | abs(prev_T1 - prev_T0) > 40),
template, NA
)) %>%
ggplot(aes(x = prev_T0, y = prev_T1, color = origen, label = label)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey50") +
geom_point(alpha = 0.7, size = 2.5) +
geom_text_repel(size = 2.5, max.overlaps = 15, show.legend = FALSE) +
scale_color_manual(values = c(
"Endémico UCI" = "#E64B35",
"Amplificado intraUCI"= "#F39B7F",
"Importado" = "#4DBBD5",
"Fugaz (solo T0)" = "#91D1C2",
"Transitorio" = "#B0B0B0"
)) +
labs(
title = "Prevalencia T0 vs T1 por gen",
subtitle = "Línea diagonal = igual prevalencia | Sobre la línea = amplificado en UCI",
x = "Prevalencia en T0 (%)", y = "Prevalencia en T1 (%)", color = "Origen"
) +
theme_classic() + theme(legend.position = "right")
gene_origin %>%
filter(origen %in% c("Endémico UCI", "Amplificado intraUCI"), !is.na(Class)) %>%
count(origen, Class) %>%
ggplot(aes(x = reorder(Class, n), y = n, fill = origen)) +
geom_col(position = "dodge") +
coord_flip() +
scale_fill_manual(values = c("Endémico UCI" = "#E64B35",
"Amplificado intraUCI" = "#F39B7F")) +
labs(title = "Clases de antibiótico: genes endémicos y amplificados",
x = NULL, y = "N genes", fill = NULL) +
theme_classic() + theme(legend.position = "top")
gene_origin %>%
filter(prev_T0 > 0 | prev_T1 > 0) %>%
mutate(across(where(is.numeric), ~round(., 1))) %>%
arrange(origen, desc(prev_T1)) %>%
datatable(rownames = FALSE, filter = "top",
caption = "Clasificación de origen de todos los ARGs detectados",
options = list(pageLength = 20, scrollX = TRUE))
sessionInfo()
## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.3
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=es_ES.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_ES.UTF-8 LC_COLLATE=es_ES.UTF-8
## [5] LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=es_ES.UTF-8
## [7] LC_PAPER=es_ES.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Madrid
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggstatsplot_0.13.4 ggrepel_0.9.6 broom_1.0.11 microbiome_1.30.0
## [5] ggpubr_0.6.2 reshape2_1.4.5 microbial_0.0.22 phyloseq_1.52.0
## [9] ggsci_4.2.0 viridis_0.6.5 viridisLite_0.4.2 DT_0.34.0
## [13] plotly_4.11.0 lubridate_1.9.4 forcats_1.0.1 stringr_1.6.0
## [17] dplyr_1.1.4 purrr_1.2.1 readr_2.1.6 tidyr_1.3.2
## [21] tibble_3.3.1 tidyverse_2.0.0 pheatmap_1.0.13 umap_0.2.10.0
## [25] factoextra_1.0.7 ggplot2_4.0.1 vegan_2.7-2 permute_0.9-8
## [29] MASS_7.3-65
##
## loaded via a namespace (and not attached):
## [1] splines_4.5.3 prismatic_1.1.2
## [3] datawizard_1.3.0 lifecycle_1.0.5
## [5] rstatix_0.7.3 edgeR_4.6.3
## [7] lattice_0.22-7 vroom_1.6.7
## [9] crosstalk_1.2.2 insight_1.4.4
## [11] backports_1.5.0 magrittr_2.0.4
## [13] limma_3.64.3 sass_0.4.10
## [15] rmarkdown_2.30 jquerylib_0.1.4
## [17] yaml_2.3.12 otel_0.2.0
## [19] askpass_1.2.1 reticulate_1.44.1
## [21] cowplot_1.2.0 RColorBrewer_1.1-3
## [23] ade4_1.7-23 abind_1.4-8
## [25] Rtsne_0.17 quadprog_1.5-8
## [27] GenomicRanges_1.60.0 BiocGenerics_0.54.1
## [29] GenomeInfoDbData_1.2.14 IRanges_2.42.0
## [31] S4Vectors_0.46.0 correlation_0.8.8
## [33] RSpectra_0.16-2 codetools_0.2-20
## [35] DelayedArray_0.34.1 tidyselect_1.2.1
## [37] UCSC.utils_1.4.0 farver_2.1.2
## [39] effectsize_1.0.1 matrixStats_1.5.0
## [41] stats4_4.5.3 jsonlite_2.0.0
## [43] multtest_2.64.0 Formula_1.2-5
## [45] survival_3.8-6 iterators_1.0.14
## [47] emmeans_2.0.1 foreach_1.5.2
## [49] tools_4.5.3 Rcpp_1.1.1
## [51] glue_1.8.0 gridExtra_2.3
## [53] SparseArray_1.8.1 xfun_0.55
## [55] mgcv_1.9-4 DESeq2_1.48.2
## [57] MatrixGenerics_1.20.0 GenomeInfoDb_1.44.3
## [59] withr_3.0.2 fastmap_1.2.0
## [61] boot_1.3-32 rhdf5filters_1.20.0
## [63] openssl_2.3.4 digest_0.6.39
## [65] timechange_0.3.0 R6_2.6.1
## [67] estimability_1.5.1 generics_0.1.4
## [69] data.table_1.18.0 httr_1.4.7
## [71] htmlwidgets_1.6.4 S4Arrays_1.8.1
## [73] parameters_0.28.3 pkgconfig_2.0.3
## [75] gtable_0.3.6 statsExpressions_1.7.2
## [77] S7_0.2.1 XVector_0.48.0
## [79] htmltools_0.5.9 carData_3.0-5
## [81] biomformat_1.36.0 scales_1.4.0
## [83] Biobase_2.68.0 png_0.1-8
## [85] knitr_1.51 rstudioapi_0.17.1
## [87] tzdb_0.5.0 coda_0.19-4.1
## [89] nlme_3.1-168 cachem_1.1.0
## [91] rhdf5_2.52.1 parallel_4.5.3
## [93] pillar_1.11.1 grid_4.5.3
## [95] vctrs_0.6.5 randomForest_4.7-1.2
## [97] car_3.1-3 xtable_1.8-4
## [99] cluster_2.1.8.2 paletteer_1.7.0
## [101] evaluate_1.0.5 zeallot_0.2.0
## [103] mvtnorm_1.3-3 cli_3.6.5
## [105] locfit_1.5-9.12 compiler_4.5.3
## [107] rlang_1.1.7 crayon_1.5.3
## [109] rstantools_2.6.0 ggsignif_0.6.4
## [111] labeling_0.4.3 rematch2_2.1.2
## [113] plyr_1.8.9 stringi_1.8.7
## [115] BiocParallel_1.42.2 Biostrings_2.76.0
## [117] lazyeval_0.2.2 bayestestR_0.17.0
## [119] Matrix_1.7-4 hms_1.1.4
## [121] patchwork_1.3.2 bit64_4.6.0-1
## [123] Rhdf5lib_1.30.0 statmod_1.5.1
## [125] SummarizedExperiment_1.38.1 igraph_2.2.1
## [127] RcppParallel_5.1.11-1 bslib_0.9.0
## [129] phangorn_2.12.1 fastmatch_1.1-6
## [131] bit_4.6.0 ape_5.8-1